From e283a40d17bbbeeafda84fad9b75bbc458950934 Mon Sep 17 00:00:00 2001
From: Evan Haas <evan@lagerdata.com>
Date: Mon, 5 Sep 2022 21:32:20 -0700
Subject: [PATCH 01/68] translate-c: convert tabs to `\t` in object-like macro
 string literals

Closes #12549
---
 src/translate_c.zig                  |  9 ++++++++-
 test/behavior/translate_c_macros.h   |  2 ++
 test/behavior/translate_c_macros.zig | 11 +++++++++++
 3 files changed, 21 insertions(+), 1 deletion(-)

diff --git a/src/translate_c.zig b/src/translate_c.zig
index faa8a456f5..e62f5b4568 100644
--- a/src/translate_c.zig
+++ b/src/translate_c.zig
@@ -5799,7 +5799,7 @@ fn zigifyEscapeSequences(ctx: *Context, m: *MacroCtx) ![]const u8 {
         }
     }
     for (source) |c| {
-        if (c == '\\') {
+        if (c == '\\' or c == '\t') {
             break;
         }
     } else return source;
@@ -5876,6 +5876,13 @@ fn zigifyEscapeSequences(ctx: *Context, m: *MacroCtx) ![]const u8 {
                     state = .Start;
             },
             .Start => {
+                if (c == '\t') {
+                    bytes[i] = '\\';
+                    i += 1;
+                    bytes[i] = 't';
+                    i += 1;
+                    continue;
+                }
                 if (c == '\\') {
                     state = .Escape;
                 }
diff --git a/test/behavior/translate_c_macros.h b/test/behavior/translate_c_macros.h
index 222a7ded6c..439577fecc 100644
--- a/test/behavior/translate_c_macros.h
+++ b/test/behavior/translate_c_macros.h
@@ -50,3 +50,5 @@ typedef _Bool uintptr_t;
 #define CAST_TO_UINTPTR(X) (uintptr_t)(X)
 
 #define LARGE_INT 18446744073709550592
+
+#define EMBEDDED_TAB "hello	"
diff --git a/test/behavior/translate_c_macros.zig b/test/behavior/translate_c_macros.zig
index d670e0cbd4..314a9028df 100644
--- a/test/behavior/translate_c_macros.zig
+++ b/test/behavior/translate_c_macros.zig
@@ -2,6 +2,7 @@ const builtin = @import("builtin");
 const std = @import("std");
 const expect = std.testing.expect;
 const expectEqual = std.testing.expectEqual;
+const expectEqualStrings = std.testing.expectEqualStrings;
 
 const h = @cImport(@cInclude("behavior/translate_c_macros.h"));
 
@@ -123,3 +124,13 @@ test "large integer macro" {
 
     try expectEqual(@as(c_ulonglong, 18446744073709550592), h.LARGE_INT);
 }
+
+test "string literal macro with embedded tab character" {
+    if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; // TODO
+    if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO
+    if (builtin.zig_backend == .stage2_c) return error.SkipZigTest; // TODO
+    if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
+    if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO
+
+    try expectEqualStrings("hello\t", h.EMBEDDED_TAB);
+}

From e02b9f458dd7e48bef5b436242ba0ab3550224da Mon Sep 17 00:00:00 2001
From: bfredl <bjorn.linse@gmail.com>
Date: Tue, 6 Sep 2022 13:28:31 +0200
Subject: [PATCH 02/68] build-exe: allow combination of -fno-emit-bin and
 --verbose-air

Currently, `zig build-exe -fno-emit-bin --verbose-air src/main.zig`
results in no output at all. With this refactor, it dumps AIR
and then exits without invoking LLVM, as expected
---
 src/Module.zig | 15 +++++++++++----
 1 file changed, 11 insertions(+), 4 deletions(-)

diff --git a/src/Module.zig b/src/Module.zig
index c63fe43158..3ae6c48edd 100644
--- a/src/Module.zig
+++ b/src/Module.zig
@@ -4274,11 +4274,14 @@ pub fn ensureFuncBodyAnalyzed(mod: *Module, func: *Fn) SemaError!void {
 
             const comp = mod.comp;
 
-            if (comp.bin_file.options.emit == null and
+            const no_bin_file = (comp.bin_file.options.emit == null and
                 comp.emit_asm == null and
                 comp.emit_llvm_ir == null and
-                comp.emit_llvm_bc == null)
-            {
+                comp.emit_llvm_bc == null);
+
+            const dump_air = builtin.mode == .Debug and comp.verbose_air;
+
+            if (no_bin_file and !dump_air) {
                 return;
             }
 
@@ -4286,7 +4289,7 @@ pub fn ensureFuncBodyAnalyzed(mod: *Module, func: *Fn) SemaError!void {
             var liveness = try Liveness.analyze(gpa, air);
             defer liveness.deinit(gpa);
 
-            if (builtin.mode == .Debug and comp.verbose_air) {
+            if (dump_air) {
                 const fqn = try decl.getFullyQualifiedName(mod);
                 defer mod.gpa.free(fqn);
 
@@ -4295,6 +4298,10 @@ pub fn ensureFuncBodyAnalyzed(mod: *Module, func: *Fn) SemaError!void {
                 std.debug.print("# End Function AIR: {s}\n\n", .{fqn});
             }
 
+            if (no_bin_file) {
+                return;
+            }
+
             comp.bin_file.updateFunc(mod, func, air, liveness) catch |err| switch (err) {
                 error.OutOfMemory => return error.OutOfMemory,
                 error.AnalysisFail => {

From 924679abc46deeaae9284ab6ce928aaddb0fae95 Mon Sep 17 00:00:00 2001
From: Dan Ellis Echavarria <19101das@gmail.com>
Date: Wed, 7 Sep 2022 07:22:30 -0500
Subject: [PATCH 03/68] std.simd: change T to u16

The `element_bit_size` would break if `T` was signed due to `ceilPowerOfTwo` only working on unsigned numbers.
---
 lib/std/simd.zig | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/lib/std/simd.zig b/lib/std/simd.zig
index b2655758c0..972bf136e9 100644
--- a/lib/std/simd.zig
+++ b/lib/std/simd.zig
@@ -9,7 +9,7 @@ const builtin = @import("builtin");
 pub fn suggestVectorSizeForCpu(comptime T: type, comptime cpu: std.Target.Cpu) ?usize {
     // This is guesswork, if you have better suggestions can add it or edit the current here
     // This can run in comptime only, but stage 1 fails at it, stage 2 can understand it
-    const element_bit_size = @maximum(8, std.math.ceilPowerOfTwo(T, @bitSizeOf(T)) catch unreachable);
+    const element_bit_size = @maximum(8, std.math.ceilPowerOfTwo(u16, @bitSizeOf(T)) catch unreachable);
     const vector_bit_size: u16 = blk: {
         if (cpu.arch.isX86()) {
             if (T == bool and std.Target.x86.featureSetHas(.prefer_mask_registers)) return 64;
@@ -57,6 +57,15 @@ pub fn suggestVectorSize(comptime T: type) ?usize {
     return suggestVectorSizeForCpu(T, builtin.cpu);
 }
 
+test "suggestVectorSizeForCpu works with signed and unsigned values" {
+    comptime var cpu = std.Target.Cpu.baseline(std.Target.Cpu.Arch.x86_64);
+    comptime cpu.features.addFeature(@enumToInt(std.Target.x86.Feature.avx512f));
+    const signed_integer_size = suggestVectorSizeForCpu(i32, cpu).?;
+    const unsigned_integer_size = suggestVectorSizeForCpu(u32, cpu).?;
+    try std.testing.expectEqual(@as(usize, 16), unsigned_integer_size);
+    try std.testing.expectEqual(@as(usize, 16), signed_integer_size);
+}
+
 fn vectorLength(comptime VectorType: type) comptime_int {
     return switch (@typeInfo(VectorType)) {
         .Vector => |info| info.len,

From 9a92f3d290694bfefbc7d71b5ba1823edb6c547f Mon Sep 17 00:00:00 2001
From: Luuk de Gram <luuk@degram.dev>
Date: Mon, 29 Aug 2022 20:13:55 +0200
Subject: [PATCH 04/68] wasm/Object: parse debug sections into reloc data

Rather than storing the name of a debug section into the structure
`RelocatableData`, we use the `index` field as an offset into the
debug names table. This means we do not have to store an extra 16 bytes
for non-debug sections which can be massive for object files where each
data symbol has its own data section. The name of a debug section
can then be retrieved again when needed by using the offset and
then reading until the 0-delimiter.
---
 src/link/Wasm.zig        |  2 +-
 src/link/Wasm/Object.zig | 49 +++++++++++++++++++++++++++++++++++-----
 2 files changed, 44 insertions(+), 7 deletions(-)

diff --git a/src/link/Wasm.zig b/src/link/Wasm.zig
index 050d9287a5..a62a11a389 100644
--- a/src/link/Wasm.zig
+++ b/src/link/Wasm.zig
@@ -1782,7 +1782,7 @@ pub fn getMatchingSegment(self: *Wasm, object_index: u16, relocatable_index: u32
             });
             break :blk index;
         },
-        .custom => return error.@"TODO: Custom section relocations for wasm",
+        .debug => return error.@"TODO: Custom section relocations for wasm",
     }
 }
 
diff --git a/src/link/Wasm/Object.zig b/src/link/Wasm/Object.zig
index 50827ca9fb..d07d0b39ea 100644
--- a/src/link/Wasm/Object.zig
+++ b/src/link/Wasm/Object.zig
@@ -63,16 +63,21 @@ relocatable_data: []const RelocatableData = &.{},
 /// import name, module name and export names. Each string will be deduplicated
 /// and returns an offset into the table.
 string_table: Wasm.StringTable = .{},
+/// All the names of each debug section found in the current object file.
+/// Each name is terminated by a null-terminator. The name can be found,
+/// from the `index` offset within the `RelocatableData`.
+debug_names: [:0]const u8,
 
 /// Represents a single item within a section (depending on its `type`)
 const RelocatableData = struct {
     /// The type of the relocatable data
-    type: enum { data, code, custom },
+    type: enum { data, code, debug },
     /// Pointer to the data of the segment, where its length is written to `size`
     data: [*]u8,
     /// The size in bytes of the data representing the segment within the section
     size: u32,
-    /// The index within the section itself
+    /// The index within the section itself, or in case of a debug section,
+    /// the offset within the `debug_names` table.
     index: u32,
     /// The offset within the section where the data starts
     offset: u32,
@@ -96,7 +101,7 @@ const RelocatableData = struct {
         return switch (self.type) {
             .data => .data,
             .code => .function,
-            .custom => .section,
+            .debug => unreachable, // illegal, debug sections are not represented by a symbol
         };
     }
 };
@@ -111,6 +116,7 @@ pub fn create(gpa: Allocator, file: std.fs.File, name: []const u8, maybe_max_siz
     var object: Object = .{
         .file = file,
         .name = try gpa.dupe(u8, name),
+        .debug_names = &.{},
     };
 
     var is_object_file: bool = false;
@@ -197,6 +203,11 @@ pub fn importedCountByKind(self: *const Object, kind: std.wasm.ExternalKind) u32
     } else i;
 }
 
+/// From a given `RelocatableDate`, find the corresponding debug section name
+pub fn getDebugName(self: *const Object, relocatable_data: RelocatableData) []const u8 {
+    return std.mem.sliceTo(self.debug_names[relocatable_data.index..], 0);
+}
+
 /// Checks if the object file is an MVP version.
 /// When that's the case, we check if there's an import table definiton with its name
 /// set to '__indirect_function_table". When that's also the case,
@@ -328,10 +339,15 @@ fn Parser(comptime ReaderType: type) type {
 
             self.object.version = version;
             var relocatable_data = std.ArrayList(RelocatableData).init(gpa);
+            var debug_names = std.ArrayList(u8).init(gpa);
 
-            errdefer while (relocatable_data.popOrNull()) |rel_data| {
-                gpa.free(rel_data.data[0..rel_data.size]);
-            } else relocatable_data.deinit();
+            errdefer {
+                while (relocatable_data.popOrNull()) |rel_data| {
+                    gpa.free(rel_data.data[0..rel_data.size]);
+                } else relocatable_data.deinit();
+                gpa.free(debug_names.items);
+                debug_names.deinit();
+            }
 
             var section_index: u32 = 0;
             while (self.reader.reader().readByte()) |byte| : (section_index += 1) {
@@ -352,6 +368,24 @@ fn Parser(comptime ReaderType: type) type {
                             try self.parseRelocations(gpa);
                         } else if (std.mem.eql(u8, name, "target_features")) {
                             try self.parseFeatures(gpa);
+                        } else if (std.mem.startsWith(u8, name, ".debug")) {
+                            const debug_size = @intCast(u32, reader.context.bytes_left);
+                            const debug_content = try gpa.alloc(u8, debug_size);
+                            errdefer gpa.free(debug_content);
+                            try reader.readNoEof(debug_content);
+
+                            const debug_name_index = @intCast(u32, debug_names.items.len);
+                            try debug_names.ensureUnusedCapacity(name.len + 1);
+                            debug_names.appendSliceAssumeCapacity(try gpa.dupe(u8, name));
+                            debug_names.appendAssumeCapacity(0);
+                            try relocatable_data.append(.{
+                                .type = .debug,
+                                .data = debug_content.ptr,
+                                .size = debug_size,
+                                .index = debug_name_index,
+                                .offset = len - debug_size,
+                                .section_index = section_index,
+                            });
                         } else {
                             try reader.skipBytes(reader.context.bytes_left, .{});
                         }
@@ -517,6 +551,9 @@ fn Parser(comptime ReaderType: type) type {
                 else => |e| return e,
             }
             self.object.relocatable_data = relocatable_data.toOwnedSlice();
+
+            const names = debug_names.toOwnedSlice();
+            self.object.debug_names = names[0 .. names.len - 1 :0];
         }
 
         /// Based on the "features" custom section, parses it into a list of

From f060edb0f3e23a18b17af9b619f7f499ac8e4e7f Mon Sep 17 00:00:00 2001
From: Luuk de Gram <luuk@degram.dev>
Date: Tue, 30 Aug 2022 21:47:05 +0200
Subject: [PATCH 05/68] wasm-linker: create atoms from debug sections

---
 src/link/Wasm.zig        | 173 ++++++++++++++++++++++++++++-----------
 src/link/Wasm/Object.zig |  39 +++++----
 2 files changed, 146 insertions(+), 66 deletions(-)

diff --git a/src/link/Wasm.zig b/src/link/Wasm.zig
index a62a11a389..ae995bd0b9 100644
--- a/src/link/Wasm.zig
+++ b/src/link/Wasm.zig
@@ -67,6 +67,18 @@ code_section_index: ?u32 = null,
 debug_info_index: ?u32 = null,
 /// The index of the segment representing the custom '.debug_line' section.
 debug_line_index: ?u32 = null,
+/// The index of the segment representing the custom '.debug_loc' section.
+debug_loc_index: ?u32 = null,
+/// The index of the segment representing the custom '.debug_ranges' section.
+debug_ranges_index: ?u32 = null,
+/// The index of the segment representing the custom '.debug_pubnames' section.
+debug_pubnames_index: ?u32 = null,
+/// The index of the segment representing the custom '.debug_pubtypes' section.
+debug_pubtypes_index: ?u32 = null,
+/// The index of the segment representing the custom '.debug_pubtypes' section.
+debug_str_index: ?u32 = null,
+/// The index of the segment representing the custom '.debug_pubtypes' section.
+debug_abbrev_index: ?u32 = null,
 /// The count of imported functions. This number will be appended
 /// to the function indexes as their index starts at the lowest non-extern function.
 imported_functions_count: u32 = 0,
@@ -1753,7 +1765,7 @@ fn setupMemory(self: *Wasm) !void {
 /// From a given object's index and the index of the segment, returns the corresponding
 /// index of the segment within the final data section. When the segment does not yet
 /// exist, a new one will be initialized and appended. The new index will be returned in that case.
-pub fn getMatchingSegment(self: *Wasm, object_index: u16, relocatable_index: u32) !u32 {
+pub fn getMatchingSegment(self: *Wasm, object_index: u16, relocatable_index: u32) !?u32 {
     const object: Object = self.objects.items[object_index];
     const relocatable_data = object.relocatable_data[relocatable_index];
     const index = @intCast(u32, self.segments.items.len);
@@ -1765,27 +1777,83 @@ pub fn getMatchingSegment(self: *Wasm, object_index: u16, relocatable_index: u32
             const result = try self.data_segments.getOrPut(self.base.allocator, segment_info.outputName(merge_segment));
             if (!result.found_existing) {
                 result.value_ptr.* = index;
-                try self.segments.append(self.base.allocator, .{
-                    .alignment = 1,
-                    .size = 0,
-                    .offset = 0,
-                });
+                try self.appendDummySegment();
                 return index;
             } else return result.value_ptr.*;
         },
         .code => return self.code_section_index orelse blk: {
             self.code_section_index = index;
-            try self.segments.append(self.base.allocator, .{
-                .alignment = 1,
-                .size = 0,
-                .offset = 0,
-            });
+            try self.appendDummySegment();
             break :blk index;
         },
-        .debug => return error.@"TODO: Custom section relocations for wasm",
+        .debug => {
+            const debug_name = object.getDebugName(relocatable_data);
+            if (mem.eql(u8, debug_name, ".debug_info")) {
+                return self.debug_info_index orelse blk: {
+                    self.debug_info_index = index;
+                    try self.appendDummySegment();
+                    break :blk index;
+                };
+            } else if (mem.eql(u8, debug_name, ".debug_line")) {
+                return self.debug_line_index orelse blk: {
+                    self.debug_line_index = index;
+                    try self.appendDummySegment();
+                    break :blk index;
+                };
+            } else if (mem.eql(u8, debug_name, ".debug_loc")) {
+                return self.debug_loc_index orelse blk: {
+                    self.debug_loc_index = index;
+                    try self.appendDummySegment();
+                    break :blk index;
+                };
+            } else if (mem.eql(u8, debug_name, ".debug_ranges")) {
+                return self.debug_line_index orelse blk: {
+                    self.debug_ranges_index = index;
+                    try self.appendDummySegment();
+                    break :blk index;
+                };
+            } else if (mem.eql(u8, debug_name, ".debug_pubnames")) {
+                return self.debug_pubnames_index orelse blk: {
+                    self.debug_pubnames_index = index;
+                    try self.appendDummySegment();
+                    break :blk index;
+                };
+            } else if (mem.eql(u8, debug_name, ".debug_pubtypes")) {
+                return self.debug_pubtypes_index orelse blk: {
+                    self.debug_pubtypes_index = index;
+                    try self.appendDummySegment();
+                    break :blk index;
+                };
+            } else if (mem.eql(u8, debug_name, ".debug_abbrev")) {
+                return self.debug_abbrev_index orelse blk: {
+                    self.debug_abbrev_index = index;
+                    try self.appendDummySegment();
+                    break :blk index;
+                };
+            } else if (mem.eql(u8, debug_name, ".debug_str")) {
+                return self.debug_str_index orelse blk: {
+                    self.debug_str_index = index;
+                    try self.appendDummySegment();
+                    break :blk index;
+                };
+            } else {
+                log.warn("found unknown debug section '{s}'", .{debug_name});
+                log.warn("  debug section will be skipped", .{});
+                return null;
+            }
+        },
     }
 }
 
+/// Appends a new segment with default field values
+fn appendDummySegment(self: *Wasm) !void {
+    try self.segments.append(self.base.allocator, .{
+        .alignment = 1,
+        .size = 0,
+        .offset = 0,
+    });
+}
+
 /// Returns the symbol index of the error name table.
 ///
 /// When the symbol does not yet exist, it will create a new one instead.
@@ -1936,17 +2004,18 @@ fn resetState(self: *Wasm) void {
     for (self.segment_info.items) |*segment_info| {
         self.base.allocator.free(segment_info.name);
     }
-    const mod = self.base.options.module.?;
-    var decl_it = self.decls.keyIterator();
-    while (decl_it.next()) |decl_index_ptr| {
-        const decl = mod.declPtr(decl_index_ptr.*);
-        const atom = &decl.link.wasm;
-        atom.next = null;
-        atom.prev = null;
+    if (self.base.options.module) |mod| {
+        var decl_it = self.decls.keyIterator();
+        while (decl_it.next()) |decl_index_ptr| {
+            const decl = mod.declPtr(decl_index_ptr.*);
+            const atom = &decl.link.wasm;
+            atom.next = null;
+            atom.prev = null;
 
-        for (atom.locals.items) |*local_atom| {
-            local_atom.next = null;
-            local_atom.prev = null;
+            for (atom.locals.items) |*local_atom| {
+                local_atom.next = null;
+                local_atom.prev = null;
+            }
         }
     }
     self.functions.clearRetainingCapacity();
@@ -2036,29 +2105,34 @@ pub fn flushModule(self: *Wasm, comp: *Compilation, prog_node: *std.Progress.Nod
     defer self.resetState();
     try self.setupStart();
     try self.setupImports();
-    const mod = self.base.options.module.?;
-    var decl_it = self.decls.keyIterator();
-    while (decl_it.next()) |decl_index_ptr| {
-        const decl = mod.declPtr(decl_index_ptr.*);
-        if (decl.isExtern()) continue;
-        const atom = &decl.*.link.wasm;
-        if (decl.ty.zigTypeTag() == .Fn) {
-            try self.parseAtom(atom, .{ .function = decl.fn_link.wasm });
-        } else if (decl.getVariable()) |variable| {
-            if (!variable.is_mutable) {
-                try self.parseAtom(atom, .{ .data = .read_only });
-            } else if (variable.init.isUndefDeep()) {
-                try self.parseAtom(atom, .{ .data = .uninitialized });
+    if (self.base.options.module) |mod| {
+        var decl_it = self.decls.keyIterator();
+        while (decl_it.next()) |decl_index_ptr| {
+            const decl = mod.declPtr(decl_index_ptr.*);
+            if (decl.isExtern()) continue;
+            const atom = &decl.*.link.wasm;
+            if (decl.ty.zigTypeTag() == .Fn) {
+                try self.parseAtom(atom, .{ .function = decl.fn_link.wasm });
+            } else if (decl.getVariable()) |variable| {
+                if (!variable.is_mutable) {
+                    try self.parseAtom(atom, .{ .data = .read_only });
+                } else if (variable.init.isUndefDeep()) {
+                    try self.parseAtom(atom, .{ .data = .uninitialized });
+                } else {
+                    try self.parseAtom(atom, .{ .data = .initialized });
+                }
             } else {
-                try self.parseAtom(atom, .{ .data = .initialized });
+                try self.parseAtom(atom, .{ .data = .read_only });
+            }
+
+            // also parse atoms for a decl's locals
+            for (atom.locals.items) |*local_atom| {
+                try self.parseAtom(local_atom, .{ .data = .read_only });
             }
-        } else {
-            try self.parseAtom(atom, .{ .data = .read_only });
         }
 
-        // also parse atoms for a decl's locals
-        for (atom.locals.items) |*local_atom| {
-            try self.parseAtom(local_atom, .{ .data = .read_only });
+        if (self.dwarf) |*dwarf| {
+            try dwarf.flushModule(&self.base, self.base.options.module.?);
         }
     }
 
@@ -2066,9 +2140,6 @@ pub fn flushModule(self: *Wasm, comp: *Compilation, prog_node: *std.Progress.Nod
         try object.parseIntoAtoms(self.base.allocator, @intCast(u16, object_index), self);
     }
 
-    if (self.dwarf) |*dwarf| {
-        try dwarf.flushModule(&self.base, self.base.options.module.?);
-    }
     try self.allocateAtoms();
     try self.setupMemory();
     self.mapFunctionTable();
@@ -2425,12 +2496,14 @@ pub fn flushModule(self: *Wasm, comp: *Compilation, prog_node: *std.Progress.Nod
     } else if (!self.base.options.strip) {
         if (self.dwarf) |*dwarf| {
             if (self.debug_info_index != null) {
-                try dwarf.writeDbgAbbrev(&self.base);
-                // for debug info and ranges, the address is always 0,
-                // as locations are always offsets relative to 'code' section.
-                try dwarf.writeDbgInfoHeader(&self.base, mod, 0, code_section_size);
-                try dwarf.writeDbgAranges(&self.base, 0, code_section_size);
-                try dwarf.writeDbgLineHeader(&self.base, mod);
+                if (self.base.options.module) |mod| {
+                    try dwarf.writeDbgAbbrev(&self.base);
+                    // for debug info and ranges, the address is always 0,
+                    // as locations are always offsets relative to 'code' section.
+                    try dwarf.writeDbgInfoHeader(&self.base, mod, 0, code_section_size);
+                    try dwarf.writeDbgAranges(&self.base, 0, code_section_size);
+                    try dwarf.writeDbgLineHeader(&self.base, mod);
+                }
 
                 try emitDebugSection(file, self.debug_info.items, ".debug_info");
                 try emitDebugSection(file, self.debug_aranges.items, ".debug_ranges");
diff --git a/src/link/Wasm/Object.zig b/src/link/Wasm/Object.zig
index d07d0b39ea..bb00eeb821 100644
--- a/src/link/Wasm/Object.zig
+++ b/src/link/Wasm/Object.zig
@@ -889,12 +889,9 @@ pub fn parseIntoAtoms(self: *Object, gpa: Allocator, object_index: u16, wasm_bin
     }
 
     for (self.relocatable_data) |relocatable_data, index| {
-        const symbols = symbol_for_segment.getPtr(.{
-            .kind = relocatable_data.getSymbolKind(),
-            .index = @intCast(u32, relocatable_data.index),
-        }) orelse continue; // encountered a segment we do not create an atom for
-        const sym_index = symbols.pop();
-        const final_index = try wasm_bin.getMatchingSegment(object_index, @intCast(u32, index));
+        const final_index = (try wasm_bin.getMatchingSegment(object_index, @intCast(u32, index))) orelse {
+            continue; // found unknown section, so skip parsing into atom as we do not know how to handle it.
+        };
 
         const atom = try gpa.create(Atom);
         atom.* = Atom.empty;
@@ -907,7 +904,6 @@ pub fn parseIntoAtoms(self: *Object, gpa: Allocator, object_index: u16, wasm_bin
         atom.file = object_index;
         atom.size = relocatable_data.size;
         atom.alignment = relocatable_data.getAlignment(self);
-        atom.sym_index = sym_index;
 
         const relocations: []types.Relocation = self.relocations.get(relocatable_data.section_index) orelse &.{};
         for (relocations) |relocation| {
@@ -929,19 +925,30 @@ pub fn parseIntoAtoms(self: *Object, gpa: Allocator, object_index: u16, wasm_bin
 
         try atom.code.appendSlice(gpa, relocatable_data.data[0..relocatable_data.size]);
 
-        // symbols referencing the same atom will be added as alias
-        // or as 'parent' when they are global.
-        while (symbols.popOrNull()) |idx| {
-            const alias_symbol = self.symtable[idx];
-            const symbol = self.symtable[atom.sym_index];
-            if (alias_symbol.isGlobal() and symbol.isLocal()) {
-                atom.sym_index = idx;
+        if (relocatable_data.type != .debug) {
+            const symbols = symbol_for_segment.getPtr(.{
+                .kind = relocatable_data.getSymbolKind(),
+                .index = @intCast(u32, relocatable_data.index),
+            }) orelse continue; // encountered a segment we do not create an atom for
+            const sym_index = symbols.pop();
+            atom.sym_index = sym_index;
+
+            // symbols referencing the same atom will be added as alias
+            // or as 'parent' when they are global.
+            while (symbols.popOrNull()) |idx| {
+                const alias_symbol = self.symtable[idx];
+                const symbol = self.symtable[atom.sym_index];
+                if (alias_symbol.isGlobal() and symbol.isLocal()) {
+                    atom.sym_index = idx;
+                }
             }
+            try wasm_bin.symbol_atom.putNoClobber(gpa, atom.symbolLoc(), atom);
         }
-        try wasm_bin.symbol_atom.putNoClobber(gpa, atom.symbolLoc(), atom);
 
         const segment: *Wasm.Segment = &wasm_bin.segments.items[final_index];
-        segment.alignment = std.math.max(segment.alignment, atom.alignment);
+        if (relocatable_data.type == .data) { //code section and debug sections are 1-byte aligned
+            segment.alignment = std.math.max(segment.alignment, atom.alignment);
+        }
 
         if (wasm_bin.atoms.getPtr(final_index)) |last| {
             last.*.next = atom;

From c347751338a4a1e2874207674fb47908fd601484 Mon Sep 17 00:00:00 2001
From: Luuk de Gram <luuk@degram.dev>
Date: Wed, 31 Aug 2022 23:03:29 +0200
Subject: [PATCH 06/68] wasm-linker: write debug sections from objects

We now link relocatable debug sections with the correct
section symbol and then allocate and resolve the debug atoms
before writing them into the final binary.

Although this does perform the relocation, the actual relocations
are not done correctly yet.
---
 src/link/Wasm.zig        | 47 +++++++++++++++++++++++++++------------
 src/link/Wasm/Object.zig | 48 ++++++++++++++++++++++------------------
 2 files changed, 59 insertions(+), 36 deletions(-)

diff --git a/src/link/Wasm.zig b/src/link/Wasm.zig
index ae995bd0b9..b17eeb57e9 100644
--- a/src/link/Wasm.zig
+++ b/src/link/Wasm.zig
@@ -2495,21 +2495,39 @@ pub fn flushModule(self: *Wasm, comp: *Compilation, prog_node: *std.Progress.Nod
         }
     } else if (!self.base.options.strip) {
         if (self.dwarf) |*dwarf| {
-            if (self.debug_info_index != null) {
-                if (self.base.options.module) |mod| {
-                    try dwarf.writeDbgAbbrev(&self.base);
-                    // for debug info and ranges, the address is always 0,
-                    // as locations are always offsets relative to 'code' section.
-                    try dwarf.writeDbgInfoHeader(&self.base, mod, 0, code_section_size);
-                    try dwarf.writeDbgAranges(&self.base, 0, code_section_size);
-                    try dwarf.writeDbgLineHeader(&self.base, mod);
-                }
+            const mod = self.base.options.module.?;
+            try dwarf.writeDbgAbbrev(&self.base);
+            // for debug info and ranges, the address is always 0,
+            // as locations are always offsets relative to 'code' section.
+            try dwarf.writeDbgInfoHeader(&self.base, mod, 0, code_section_size);
+            try dwarf.writeDbgAranges(&self.base, 0, code_section_size);
+            try dwarf.writeDbgLineHeader(&self.base, mod);
+        }
 
-                try emitDebugSection(file, self.debug_info.items, ".debug_info");
-                try emitDebugSection(file, self.debug_aranges.items, ".debug_ranges");
-                try emitDebugSection(file, self.debug_abbrev.items, ".debug_abbrev");
-                try emitDebugSection(file, self.debug_line.items, ".debug_line");
-                try emitDebugSection(file, dwarf.strtab.items, ".debug_str");
+        var debug_bytes = std.ArrayList(u8).init(self.base.allocator);
+        defer debug_bytes.deinit();
+
+        const debug_sections = .{
+            .{ ".debug_info", self.debug_info_index },
+            .{ ".debug_pubtypes", self.debug_pubtypes_index },
+            .{ ".debug_abbrev", self.debug_abbrev_index },
+            .{ ".debug_line", self.debug_line_index },
+            .{ ".debug_str", self.debug_str_index },
+            .{ ".debug_pubnames", self.debug_pubnames_index },
+            .{ ".debug_loc", self.debug_loc_index },
+            .{ ".debug_ranges", self.debug_ranges_index },
+        };
+
+        inline for (debug_sections) |item| {
+            if (item[1]) |index| {
+                var atom = self.atoms.get(index).?.getFirst();
+                while (true) {
+                    atom.resolveRelocs(self);
+                    try debug_bytes.appendSlice(atom.code.items);
+                    atom = atom.next orelse break;
+                }
+                try emitDebugSection(file, debug_bytes.items, item[0]);
+                debug_bytes.clearRetainingCapacity();
             }
         }
         try self.emitNameSection(file, arena);
@@ -2517,6 +2535,7 @@ pub fn flushModule(self: *Wasm, comp: *Compilation, prog_node: *std.Progress.Nod
 }
 
 fn emitDebugSection(file: fs.File, data: []const u8, name: []const u8) !void {
+    if (data.len == 0) return;
     const header_offset = try reserveCustomSectionHeader(file);
     const writer = file.writer();
     try leb.writeULEB128(writer, @intCast(u32, name.len));
diff --git a/src/link/Wasm/Object.zig b/src/link/Wasm/Object.zig
index bb00eeb821..fac5ce3aa8 100644
--- a/src/link/Wasm/Object.zig
+++ b/src/link/Wasm/Object.zig
@@ -77,7 +77,7 @@ const RelocatableData = struct {
     /// The size in bytes of the data representing the segment within the section
     size: u32,
     /// The index within the section itself, or in case of a debug section,
-    /// the offset within the `debug_names` table.
+    /// the offset within the `string_table`.
     index: u32,
     /// The offset within the section where the data starts
     offset: u32,
@@ -101,9 +101,16 @@ const RelocatableData = struct {
         return switch (self.type) {
             .data => .data,
             .code => .function,
-            .debug => unreachable, // illegal, debug sections are not represented by a symbol
+            .debug => .section,
         };
     }
+
+    /// Returns the index within a section itself, or in case of a debug section,
+    /// returns the section index within the object file.
+    pub fn getIndex(self: RelocatableData) u32 {
+        if (self.type == .debug) return self.section_index;
+        return self.index;
+    }
 };
 
 pub const InitError = error{NotObjectFile} || ParseError || std.fs.File.ReadError;
@@ -205,7 +212,7 @@ pub fn importedCountByKind(self: *const Object, kind: std.wasm.ExternalKind) u32
 
 /// From a given `RelocatableDate`, find the corresponding debug section name
 pub fn getDebugName(self: *const Object, relocatable_data: RelocatableData) []const u8 {
-    return std.mem.sliceTo(self.debug_names[relocatable_data.index..], 0);
+    return self.string_table.get(relocatable_data.index);
 }
 
 /// Checks if the object file is an MVP version.
@@ -363,6 +370,7 @@ fn Parser(comptime ReaderType: type) type {
 
                         if (std.mem.eql(u8, name, "linking")) {
                             is_object_file.* = true;
+                            self.object.relocatable_data = relocatable_data.items; // at this point no new relocatable sections will appear so we're free to store them.
                             try self.parseMetadata(gpa, @intCast(usize, reader.context.bytes_left));
                         } else if (std.mem.startsWith(u8, name, "reloc")) {
                             try self.parseRelocations(gpa);
@@ -374,19 +382,16 @@ fn Parser(comptime ReaderType: type) type {
                             errdefer gpa.free(debug_content);
                             try reader.readNoEof(debug_content);
 
-                            const debug_name_index = @intCast(u32, debug_names.items.len);
-                            try debug_names.ensureUnusedCapacity(name.len + 1);
-                            debug_names.appendSliceAssumeCapacity(try gpa.dupe(u8, name));
-                            debug_names.appendAssumeCapacity(0);
                             try relocatable_data.append(.{
                                 .type = .debug,
                                 .data = debug_content.ptr,
                                 .size = debug_size,
-                                .index = debug_name_index,
+                                .index = try self.object.string_table.put(gpa, name),
                                 .offset = len - debug_size,
                                 .section_index = section_index,
                             });
                         } else {
+                            log.info("found unknown custom section '{s}' - skipping parsing", .{name});
                             try reader.skipBytes(reader.context.bytes_left, .{});
                         }
                     },
@@ -551,9 +556,6 @@ fn Parser(comptime ReaderType: type) type {
                 else => |e| return e,
             }
             self.object.relocatable_data = relocatable_data.toOwnedSlice();
-
-            const names = debug_names.toOwnedSlice();
-            self.object.debug_names = names[0 .. names.len - 1 :0];
         }
 
         /// Based on the "features" custom section, parses it into a list of
@@ -774,7 +776,12 @@ fn Parser(comptime ReaderType: type) type {
                 },
                 .section => {
                     symbol.index = try leb.readULEB128(u32, reader);
-                    symbol.name = try self.object.string_table.put(gpa, @tagName(symbol.tag));
+                    for (self.object.relocatable_data) |data| {
+                        if (data.section_index == symbol.index) {
+                            symbol.name = data.index;
+                            break;
+                        }
+                    }
                 },
                 else => {
                     symbol.index = try leb.readULEB128(u32, reader);
@@ -864,7 +871,6 @@ fn assertEnd(reader: anytype) !void {
 
 /// Parses an object file into atoms, for code and data sections
 pub fn parseIntoAtoms(self: *Object, gpa: Allocator, object_index: u16, wasm_bin: *Wasm) !void {
-    log.debug("Parsing data section into atoms", .{});
     const Key = struct {
         kind: Symbol.Tag,
         index: u32,
@@ -876,7 +882,7 @@ pub fn parseIntoAtoms(self: *Object, gpa: Allocator, object_index: u16, wasm_bin
 
     for (self.symtable) |symbol, symbol_index| {
         switch (symbol.tag) {
-            .function, .data => if (!symbol.isUndefined()) {
+            .function, .data, .section => if (!symbol.isUndefined()) {
                 const gop = try symbol_for_segment.getOrPut(.{ .kind = symbol.tag, .index = symbol.index });
                 const sym_idx = @intCast(u32, symbol_index);
                 if (!gop.found_existing) {
@@ -925,13 +931,11 @@ pub fn parseIntoAtoms(self: *Object, gpa: Allocator, object_index: u16, wasm_bin
 
         try atom.code.appendSlice(gpa, relocatable_data.data[0..relocatable_data.size]);
 
-        if (relocatable_data.type != .debug) {
-            const symbols = symbol_for_segment.getPtr(.{
-                .kind = relocatable_data.getSymbolKind(),
-                .index = @intCast(u32, relocatable_data.index),
-            }) orelse continue; // encountered a segment we do not create an atom for
-            const sym_index = symbols.pop();
-            atom.sym_index = sym_index;
+        if (symbol_for_segment.getPtr(.{
+            .kind = relocatable_data.getSymbolKind(),
+            .index = relocatable_data.getIndex(),
+        })) |symbols| {
+            atom.sym_index = symbols.pop();
 
             // symbols referencing the same atom will be added as alias
             // or as 'parent' when they are global.
@@ -957,7 +961,7 @@ pub fn parseIntoAtoms(self: *Object, gpa: Allocator, object_index: u16, wasm_bin
         } else {
             try wasm_bin.atoms.putNoClobber(gpa, final_index, atom);
         }
-        log.debug("Parsed into atom: '{s}'", .{self.string_table.get(self.symtable[atom.sym_index].name)});
+        log.debug("Parsed into atom: '{s}' at segment index {d}", .{ self.string_table.get(self.symtable[atom.sym_index].name), final_index });
     }
 }
 

From 46c932a2c9650f14ae8035d7382d825bfabdc0a5 Mon Sep 17 00:00:00 2001
From: Luuk de Gram <luuk@degram.dev>
Date: Thu, 1 Sep 2022 22:02:24 +0200
Subject: [PATCH 07/68] wasm-linker: perform debug relocations

This correctly performs a relocation for debug sections.
The result is that the wasm-linker can now correctly create
a binary from object files while preserving all debug information.
---
 src/link/Wasm.zig        | 12 ++++++++++++
 src/link/Wasm/Atom.zig   |  7 +++----
 src/link/Wasm/Object.zig |  3 +--
 3 files changed, 16 insertions(+), 6 deletions(-)

diff --git a/src/link/Wasm.zig b/src/link/Wasm.zig
index b17eeb57e9..2ff631d9ba 100644
--- a/src/link/Wasm.zig
+++ b/src/link/Wasm.zig
@@ -221,6 +221,18 @@ pub const SymbolLoc = struct {
         }
         return wasm_bin.string_table.get(wasm_bin.symbols.items[self.index].name);
     }
+
+    /// From a given symbol location, returns the final location.
+    /// e.g. when a symbol was resolved and replaced by the symbol
+    /// in a different file, this will return said location.
+    /// If the symbol wasn't replaced by another, this will return
+    /// the given location itself.
+    pub fn finalLoc(self: SymbolLoc, wasm_bin: *const Wasm) SymbolLoc {
+        if (wasm_bin.discarded.get(self)) |new_loc| {
+            return new_loc.finalLoc(wasm_bin);
+        }
+        return self;
+    }
 };
 
 /// Generic string table that duplicates strings
diff --git a/src/link/Wasm/Atom.zig b/src/link/Wasm/Atom.zig
index 9e7f7a5a76..64efa8320f 100644
--- a/src/link/Wasm/Atom.zig
+++ b/src/link/Wasm/Atom.zig
@@ -145,7 +145,7 @@ pub fn resolveRelocs(self: *Atom, wasm_bin: *const Wasm) void {
 /// All values will be represented as a `u64` as all values can fit within it.
 /// The final value must be casted to the correct size.
 fn relocationValue(self: Atom, relocation: types.Relocation, wasm_bin: *const Wasm) u64 {
-    const target_loc: Wasm.SymbolLoc = .{ .file = self.file, .index = relocation.index };
+    const target_loc = (Wasm.SymbolLoc{ .file = self.file, .index = relocation.index }).finalLoc(wasm_bin);
     const symbol = target_loc.getSymbol(wasm_bin).*;
     switch (relocation.relocation_type) {
         .R_WASM_FUNCTION_INDEX_LEB => return symbol.index,
@@ -174,8 +174,7 @@ fn relocationValue(self: Atom, relocation: types.Relocation, wasm_bin: *const Wa
         => {
             std.debug.assert(symbol.tag == .data and !symbol.isUndefined());
             const merge_segment = wasm_bin.base.options.output_mode != .Obj;
-            const target_atom_loc = wasm_bin.discarded.get(target_loc) orelse target_loc;
-            const target_atom = wasm_bin.symbol_atom.get(target_atom_loc).?;
+            const target_atom = wasm_bin.symbol_atom.get(target_loc).?;
             const segment_info = if (target_atom.file) |object_index| blk: {
                 break :blk wasm_bin.objects.items[object_index].segment_info;
             } else wasm_bin.segment_info.items;
@@ -187,6 +186,6 @@ fn relocationValue(self: Atom, relocation: types.Relocation, wasm_bin: *const Wa
         .R_WASM_EVENT_INDEX_LEB => return symbol.index,
         .R_WASM_SECTION_OFFSET_I32,
         .R_WASM_FUNCTION_OFFSET_I32,
-        => return relocation.offset,
+        => return relocation.addend orelse 0,
     }
 }
diff --git a/src/link/Wasm/Object.zig b/src/link/Wasm/Object.zig
index fac5ce3aa8..86c9cdc023 100644
--- a/src/link/Wasm/Object.zig
+++ b/src/link/Wasm/Object.zig
@@ -387,11 +387,10 @@ fn Parser(comptime ReaderType: type) type {
                                 .data = debug_content.ptr,
                                 .size = debug_size,
                                 .index = try self.object.string_table.put(gpa, name),
-                                .offset = len - debug_size,
+                                .offset = 0, // debug sections only contain 1 entry, so no need to calculate offset
                                 .section_index = section_index,
                             });
                         } else {
-                            log.info("found unknown custom section '{s}' - skipping parsing", .{name});
                             try reader.skipBytes(reader.context.bytes_left, .{});
                         }
                     },

From b2718e213ed7e7cd8bcd85bdf49d7ae33c857c58 Mon Sep 17 00:00:00 2001
From: Luuk de Gram <luuk@degram.dev>
Date: Fri, 2 Sep 2022 21:13:59 +0200
Subject: [PATCH 08/68] wasm-linker: use Atoms for zig debug info

Previously we used single arraylists for each debug section for debug
information that was generated from Zig code. (e.i. `Module` is available).
This information is now stored in Atoms, similarly to debug information
from object files. This will allow us to link them together and resolve
debug relocations.
---
 src/link/Dwarf.zig | 45 +++++++++++++++++++++++--------------
 src/link/Wasm.zig  | 55 +++++++++++++---------------------------------
 2 files changed, 44 insertions(+), 56 deletions(-)

diff --git a/src/link/Dwarf.zig b/src/link/Dwarf.zig
index 3ae151491f..e610d56df4 100644
--- a/src/link/Dwarf.zig
+++ b/src/link/Dwarf.zig
@@ -861,7 +861,9 @@ pub fn commitDeclState(
                             },
                             .wasm => {
                                 const wasm_file = file.cast(File.Wasm).?;
-                                writeDbgLineNopsBuffered(wasm_file.debug_line.items, src_fn.off, 0, &.{}, src_fn.len);
+                                const segment_index = wasm_file.debug_line_index.?;
+                                const debug_line = wasm_file.atoms.get(segment_index).?.code;
+                                writeDbgLineNopsBuffered(debug_line.items, src_fn.off, 0, &.{}, src_fn.len);
                             },
                             else => unreachable,
                         }
@@ -972,9 +974,9 @@ pub fn commitDeclState(
                 },
                 .wasm => {
                     const wasm_file = file.cast(File.Wasm).?;
-                    const segment_index = try wasm_file.getDebugLineIndex();
+                    const segment_index = try wasm_file.getOrSetDebugIndex(&wasm_file.debug_line_index);
                     const segment = &wasm_file.segments.items[segment_index];
-                    const debug_line = &wasm_file.debug_line;
+                    const debug_line = &wasm_file.atoms.get(segment_index).?.code;
                     if (needed_size != segment.size) {
                         log.debug(" needed size does not equal allocated size: {d}", .{needed_size});
                         if (needed_size > segment.size) {
@@ -1146,10 +1148,11 @@ fn updateDeclDebugInfoAllocation(self: *Dwarf, file: *File, atom: *Atom, len: u3
                     },
                     .wasm => {
                         const wasm_file = file.cast(File.Wasm).?;
-                        const segment_index = try wasm_file.getDebugInfoIndex();
+                        const segment_index = try wasm_file.getOrSetDebugIndex(&wasm_file.debug_info_index);
                         const segment = &wasm_file.segments.items[segment_index];
+                        const debug_info = &wasm_file.atoms.get(segment_index).?.code;
                         const offset = segment.offset + atom.off;
-                        try writeDbgInfoNopsToArrayList(gpa, &wasm_file.debug_info, offset, 0, &.{0}, atom.len, false);
+                        try writeDbgInfoNopsToArrayList(gpa, debug_info, offset, 0, &.{0}, atom.len, false);
                     },
                     else => unreachable,
                 }
@@ -1276,9 +1279,9 @@ fn writeDeclDebugInfo(self: *Dwarf, file: *File, atom: *Atom, dbg_info_buf: []co
         },
         .wasm => {
             const wasm_file = file.cast(File.Wasm).?;
-            const segment_index = try wasm_file.getDebugInfoIndex();
+            const segment_index = try wasm_file.getOrSetDebugIndex(&wasm_file.debug_info_index);
             const segment = &wasm_file.segments.items[segment_index];
-            const debug_info = &wasm_file.debug_info;
+            const debug_info = &wasm_file.atoms.get(segment_index).?.code;
             if (needed_size != segment.size) {
                 log.debug(" needed size does not equal allocated size: {d}", .{needed_size});
                 if (needed_size > segment.size) {
@@ -1337,10 +1340,10 @@ pub fn updateDeclLineNumber(self: *Dwarf, file: *File, decl: *const Module.Decl)
         },
         .wasm => {
             const wasm_file = file.cast(File.Wasm).?;
-            const segment_index = wasm_file.getDebugLineIndex() catch unreachable;
+            const segment_index = wasm_file.debug_line_index.?;
             const segment = wasm_file.segments.items[segment_index];
             const offset = segment.offset + decl.fn_link.wasm.src_fn.off + self.getRelocDbgLineOff();
-            mem.copy(u8, wasm_file.debug_line.items[offset..], &data);
+            mem.copy(u8, wasm_file.atoms.get(segment_index).?.code.items[offset..], &data);
         },
         else => unreachable,
     }
@@ -1576,8 +1579,10 @@ pub fn writeDbgAbbrev(self: *Dwarf, file: *File) !void {
         },
         .wasm => {
             const wasm_file = file.cast(File.Wasm).?;
-            try wasm_file.debug_abbrev.resize(wasm_file.base.allocator, needed_size);
-            mem.copy(u8, wasm_file.debug_abbrev.items, &abbrev_buf);
+            const segment_index = try wasm_file.getOrSetDebugIndex(&wasm_file.debug_abbrev_index);
+            const debug_abbrev = &wasm_file.atoms.get(segment_index).?.code;
+            try debug_abbrev.resize(wasm_file.base.allocator, needed_size);
+            mem.copy(u8, debug_abbrev.items, &abbrev_buf);
         },
         else => unreachable,
     }
@@ -1687,7 +1692,9 @@ pub fn writeDbgInfoHeader(self: *Dwarf, file: *File, module: *Module, low_pc: u6
         },
         .wasm => {
             const wasm_file = file.cast(File.Wasm).?;
-            try writeDbgInfoNopsToArrayList(self.allocator, &wasm_file.debug_info, 0, 0, di_buf.items, jmp_amt, false);
+            const segment_index = wasm_file.debug_info_index.?;
+            const debug_info = &wasm_file.atoms.get(segment_index).?.code;
+            try writeDbgInfoNopsToArrayList(self.allocator, debug_info, 0, 0, di_buf.items, jmp_amt, false);
         },
         else => unreachable,
     }
@@ -2016,8 +2023,10 @@ pub fn writeDbgAranges(self: *Dwarf, file: *File, addr: u64, size: u64) !void {
         },
         .wasm => {
             const wasm_file = file.cast(File.Wasm).?;
-            try wasm_file.debug_aranges.resize(wasm_file.base.allocator, needed_size);
-            mem.copy(u8, wasm_file.debug_aranges.items, di_buf.items);
+            const segment_index = try wasm_file.getOrSetDebugIndex(&wasm_file.debug_ranges_index);
+            const debug_ranges = &wasm_file.atoms.get(segment_index).?.code;
+            try debug_ranges.resize(wasm_file.base.allocator, needed_size);
+            mem.copy(u8, debug_ranges.items, di_buf.items);
         },
         else => unreachable,
     }
@@ -2139,7 +2148,9 @@ pub fn writeDbgLineHeader(self: *Dwarf, file: *File, module: *Module) !void {
         },
         .wasm => {
             const wasm_file = file.cast(File.Wasm).?;
-            writeDbgLineNopsBuffered(wasm_file.debug_line.items, 0, 0, di_buf.items, jmp_amt);
+            const segment_index = wasm_file.debug_line_index.?;
+            const debug_line = wasm_file.atoms.get(segment_index).?.code;
+            writeDbgLineNopsBuffered(debug_line.items, 0, 0, di_buf.items, jmp_amt);
         },
         else => unreachable,
     }
@@ -2287,7 +2298,9 @@ pub fn flushModule(self: *Dwarf, file: *File, module: *Module) !void {
                 },
                 .wasm => {
                     const wasm_file = file.cast(File.Wasm).?;
-                    mem.copy(u8, wasm_file.debug_info.items[reloc.atom.off + reloc.offset ..], &buf);
+                    const segment_index = wasm_file.debug_info_index.?;
+                    const debug_info = wasm_file.atoms.get(segment_index).?.code;
+                    mem.copy(u8, debug_info.items[reloc.atom.off + reloc.offset ..], &buf);
                 },
                 else => unreachable,
             }
diff --git a/src/link/Wasm.zig b/src/link/Wasm.zig
index 2ff631d9ba..7204182df2 100644
--- a/src/link/Wasm.zig
+++ b/src/link/Wasm.zig
@@ -103,16 +103,6 @@ string_table: StringTable = .{},
 /// Debug information for wasm
 dwarf: ?Dwarf = null,
 
-// *debug information* //
-/// Contains all bytes for the '.debug_info' section
-debug_info: std.ArrayListUnmanaged(u8) = .{},
-/// Contains all bytes for the '.debug_line' section
-debug_line: std.ArrayListUnmanaged(u8) = .{},
-/// Contains all bytes for the '.debug_abbrev' section
-debug_abbrev: std.ArrayListUnmanaged(u8) = .{},
-/// Contains all bytes for the '.debug_ranges' section
-debug_aranges: std.ArrayListUnmanaged(u8) = .{},
-
 // Output sections
 /// Output type section
 func_types: std.ArrayListUnmanaged(wasm.Type) = .{},
@@ -716,11 +706,6 @@ pub fn deinit(self: *Wasm) void {
     if (self.dwarf) |*dwarf| {
         dwarf.deinit();
     }
-
-    self.debug_info.deinit(gpa);
-    self.debug_line.deinit(gpa);
-    self.debug_abbrev.deinit(gpa);
-    self.debug_aranges.deinit(gpa);
 }
 
 pub fn allocateDeclIndexes(self: *Wasm, decl_index: Module.Decl.Index) !void {
@@ -1983,32 +1968,22 @@ fn populateErrorNameTable(self: *Wasm) !void {
     try self.parseAtom(names_atom, .{ .data = .read_only });
 }
 
-pub fn getDebugInfoIndex(self: *Wasm) !u32 {
-    assert(self.dwarf != null);
-    return self.debug_info_index orelse {
-        self.debug_info_index = @intCast(u32, self.segments.items.len);
-        const segment = try self.segments.addOne(self.base.allocator);
-        segment.* = .{
-            .size = 0,
-            .offset = 0,
-            // debug sections always have alignment '1'
-            .alignment = 1,
-        };
-        return self.debug_info_index.?;
-    };
-}
+/// From a given index variable, returns it value if set.
+/// When not set, initialises a new segment, sets the index,
+/// and returns it value.
+/// When a new segment is initialised. It also creates an atom.
+pub fn getOrSetDebugIndex(self: *Wasm, index: *?u32) !u32 {
+    return (index.*) orelse {
+        const new_index = @intCast(u32, self.segments.items.len);
+        index.* = new_index;
+        try self.appendDummySegment();
 
-pub fn getDebugLineIndex(self: *Wasm) !u32 {
-    assert(self.dwarf != null);
-    return self.debug_line_index orelse {
-        self.debug_line_index = @intCast(u32, self.segments.items.len);
-        const segment = try self.segments.addOne(self.base.allocator);
-        segment.* = .{
-            .size = 0,
-            .offset = 0,
-            .alignment = 1,
-        };
-        return self.debug_line_index.?;
+        const atom = try self.base.allocator.create(Atom);
+        atom.* = Atom.empty;
+        atom.alignment = 1; // debug sections are always 1-byte-aligned
+        try self.managed_atoms.append(self.base.allocator, atom);
+        try self.atoms.put(self.base.allocator, new_index, atom);
+        return new_index;
     };
 }
 

From f2c8d09c4f743f4172e0a0a6f0fd59a96e56386d Mon Sep 17 00:00:00 2001
From: Luuk de Gram <luuk@degram.dev>
Date: Sun, 4 Sep 2022 21:00:44 +0200
Subject: [PATCH 09/68] wasm-linker: Mix Zig -and Object debug atoms

When linking a Zig-compilation with an object file,
we allow mixing the debug atoms to make sure debug
information is preserved from object files. By default,
we now always initialize all debug sections if the `strip` flag
is unset.

This also fixes relocations for debug information as previously
the offset of an atom wasn't calculated, and neither was the code
size itself which meant that debug lines were off and file names
from other object files were missing.
---
 src/link/Dwarf.zig     | 35 ++++++++++++++++++--------------
 src/link/Wasm.zig      | 46 +++++++++++++++++++++++++++---------------
 src/link/Wasm/Atom.zig | 35 +++++++++++++++++++++++++++++---
 3 files changed, 82 insertions(+), 34 deletions(-)

diff --git a/src/link/Dwarf.zig b/src/link/Dwarf.zig
index e610d56df4..671425ab71 100644
--- a/src/link/Dwarf.zig
+++ b/src/link/Dwarf.zig
@@ -862,7 +862,8 @@ pub fn commitDeclState(
                             .wasm => {
                                 const wasm_file = file.cast(File.Wasm).?;
                                 const segment_index = wasm_file.debug_line_index.?;
-                                const debug_line = wasm_file.atoms.get(segment_index).?.code;
+                                const atom = wasm_file.atoms.get(segment_index).?;
+                                const debug_line = atom.getFirstZigAtom().code;
                                 writeDbgLineNopsBuffered(debug_line.items, src_fn.off, 0, &.{}, src_fn.len);
                             },
                             else => unreachable,
@@ -974,9 +975,10 @@ pub fn commitDeclState(
                 },
                 .wasm => {
                     const wasm_file = file.cast(File.Wasm).?;
-                    const segment_index = try wasm_file.getOrSetDebugIndex(&wasm_file.debug_line_index);
+                    const segment_index = wasm_file.debug_line_index.?;
                     const segment = &wasm_file.segments.items[segment_index];
-                    const debug_line = &wasm_file.atoms.get(segment_index).?.code;
+                    const atom = wasm_file.atoms.get(segment_index).?;
+                    const debug_line = &atom.getFirstZigAtom().code;
                     if (needed_size != segment.size) {
                         log.debug(" needed size does not equal allocated size: {d}", .{needed_size});
                         if (needed_size > segment.size) {
@@ -1148,9 +1150,10 @@ fn updateDeclDebugInfoAllocation(self: *Dwarf, file: *File, atom: *Atom, len: u3
                     },
                     .wasm => {
                         const wasm_file = file.cast(File.Wasm).?;
-                        const segment_index = try wasm_file.getOrSetDebugIndex(&wasm_file.debug_info_index);
+                        const segment_index = wasm_file.debug_info_index.?;
                         const segment = &wasm_file.segments.items[segment_index];
-                        const debug_info = &wasm_file.atoms.get(segment_index).?.code;
+                        const info_atom = wasm_file.atoms.get(segment_index).?;
+                        const debug_info = &info_atom.getFirstZigAtom().code;
                         const offset = segment.offset + atom.off;
                         try writeDbgInfoNopsToArrayList(gpa, debug_info, offset, 0, &.{0}, atom.len, false);
                     },
@@ -1279,9 +1282,10 @@ fn writeDeclDebugInfo(self: *Dwarf, file: *File, atom: *Atom, dbg_info_buf: []co
         },
         .wasm => {
             const wasm_file = file.cast(File.Wasm).?;
-            const segment_index = try wasm_file.getOrSetDebugIndex(&wasm_file.debug_info_index);
+            const segment_index = wasm_file.debug_info_index.?;
             const segment = &wasm_file.segments.items[segment_index];
-            const debug_info = &wasm_file.atoms.get(segment_index).?.code;
+            const info_atom = wasm_file.atoms.get(segment_index).?;
+            const debug_info = &info_atom.getFirstZigAtom().code;
             if (needed_size != segment.size) {
                 log.debug(" needed size does not equal allocated size: {d}", .{needed_size});
                 if (needed_size > segment.size) {
@@ -1343,7 +1347,8 @@ pub fn updateDeclLineNumber(self: *Dwarf, file: *File, decl: *const Module.Decl)
             const segment_index = wasm_file.debug_line_index.?;
             const segment = wasm_file.segments.items[segment_index];
             const offset = segment.offset + decl.fn_link.wasm.src_fn.off + self.getRelocDbgLineOff();
-            mem.copy(u8, wasm_file.atoms.get(segment_index).?.code.items[offset..], &data);
+            const atom = wasm_file.atoms.get(segment_index).?.getFirstZigAtom();
+            mem.copy(u8, atom.code.items[offset..], &data);
         },
         else => unreachable,
     }
@@ -1579,8 +1584,8 @@ pub fn writeDbgAbbrev(self: *Dwarf, file: *File) !void {
         },
         .wasm => {
             const wasm_file = file.cast(File.Wasm).?;
-            const segment_index = try wasm_file.getOrSetDebugIndex(&wasm_file.debug_abbrev_index);
-            const debug_abbrev = &wasm_file.atoms.get(segment_index).?.code;
+            const segment_index = wasm_file.debug_abbrev_index.?;
+            const debug_abbrev = &wasm_file.atoms.get(segment_index).?.getFirstZigAtom().code;
             try debug_abbrev.resize(wasm_file.base.allocator, needed_size);
             mem.copy(u8, debug_abbrev.items, &abbrev_buf);
         },
@@ -1693,7 +1698,7 @@ pub fn writeDbgInfoHeader(self: *Dwarf, file: *File, module: *Module, low_pc: u6
         .wasm => {
             const wasm_file = file.cast(File.Wasm).?;
             const segment_index = wasm_file.debug_info_index.?;
-            const debug_info = &wasm_file.atoms.get(segment_index).?.code;
+            const debug_info = &wasm_file.atoms.get(segment_index).?.getFirstZigAtom().code;
             try writeDbgInfoNopsToArrayList(self.allocator, debug_info, 0, 0, di_buf.items, jmp_amt, false);
         },
         else => unreachable,
@@ -2023,8 +2028,8 @@ pub fn writeDbgAranges(self: *Dwarf, file: *File, addr: u64, size: u64) !void {
         },
         .wasm => {
             const wasm_file = file.cast(File.Wasm).?;
-            const segment_index = try wasm_file.getOrSetDebugIndex(&wasm_file.debug_ranges_index);
-            const debug_ranges = &wasm_file.atoms.get(segment_index).?.code;
+            const segment_index = wasm_file.debug_ranges_index.?;
+            const debug_ranges = &wasm_file.atoms.get(segment_index).?.getFirstZigAtom().code;
             try debug_ranges.resize(wasm_file.base.allocator, needed_size);
             mem.copy(u8, debug_ranges.items, di_buf.items);
         },
@@ -2149,7 +2154,7 @@ pub fn writeDbgLineHeader(self: *Dwarf, file: *File, module: *Module) !void {
         .wasm => {
             const wasm_file = file.cast(File.Wasm).?;
             const segment_index = wasm_file.debug_line_index.?;
-            const debug_line = wasm_file.atoms.get(segment_index).?.code;
+            const debug_line = wasm_file.atoms.get(segment_index).?.getFirstZigAtom().code;
             writeDbgLineNopsBuffered(debug_line.items, 0, 0, di_buf.items, jmp_amt);
         },
         else => unreachable,
@@ -2299,7 +2304,7 @@ pub fn flushModule(self: *Dwarf, file: *File, module: *Module) !void {
                 .wasm => {
                     const wasm_file = file.cast(File.Wasm).?;
                     const segment_index = wasm_file.debug_info_index.?;
-                    const debug_info = wasm_file.atoms.get(segment_index).?.code;
+                    const debug_info = wasm_file.atoms.get(segment_index).?.getFirstZigAtom().code;
                     mem.copy(u8, debug_info.items[reloc.atom.off + reloc.offset ..], &buf);
                 },
                 else => unreachable,
diff --git a/src/link/Wasm.zig b/src/link/Wasm.zig
index 7204182df2..67f229ca84 100644
--- a/src/link/Wasm.zig
+++ b/src/link/Wasm.zig
@@ -349,6 +349,7 @@ pub fn openPath(allocator: Allocator, sub_path: []const u8, options: link.Option
         };
     }
 
+    try wasm_bin.initDebugSections();
     return wasm_bin;
 }
 
@@ -377,6 +378,23 @@ pub fn createEmpty(gpa: Allocator, options: link.Options) !*Wasm {
     return self;
 }
 
+/// Initializes symbols and atoms for the debug sections
+/// Initialization is only done when compiling Zig code.
+/// When Zig is invoked as a linker instead, the atoms
+/// and symbols come from the object files instead.
+pub fn initDebugSections(self: *Wasm) !void {
+    if (self.dwarf == null) return; // not compiling Zig code, so no need to pre-initialize debug sections
+    // this will create an Atom and set the index for us.
+    try self.createDebugSectionForIndex(&self.debug_info_index);
+    try self.createDebugSectionForIndex(&self.debug_line_index);
+    try self.createDebugSectionForIndex(&self.debug_loc_index);
+    try self.createDebugSectionForIndex(&self.debug_abbrev_index);
+    try self.createDebugSectionForIndex(&self.debug_ranges_index);
+    try self.createDebugSectionForIndex(&self.debug_str_index);
+    try self.createDebugSectionForIndex(&self.debug_pubnames_index);
+    try self.createDebugSectionForIndex(&self.debug_pubtypes_index);
+}
+
 fn parseInputFiles(self: *Wasm, files: []const []const u8) !void {
     for (files) |path| {
         if (try self.parseObjectFile(path)) continue;
@@ -1968,23 +1986,19 @@ fn populateErrorNameTable(self: *Wasm) !void {
     try self.parseAtom(names_atom, .{ .data = .read_only });
 }
 
-/// From a given index variable, returns it value if set.
-/// When not set, initialises a new segment, sets the index,
-/// and returns it value.
-/// When a new segment is initialised. It also creates an atom.
-pub fn getOrSetDebugIndex(self: *Wasm, index: *?u32) !u32 {
-    return (index.*) orelse {
-        const new_index = @intCast(u32, self.segments.items.len);
-        index.* = new_index;
-        try self.appendDummySegment();
+/// From a given index variable, creates a new debug section.
+/// This initializes the index, appends a new segment,
+/// and finally, creates a managed `Atom`.
+pub fn createDebugSectionForIndex(self: *Wasm, index: *?u32) !void {
+    const new_index = @intCast(u32, self.segments.items.len);
+    index.* = new_index;
+    try self.appendDummySegment();
 
-        const atom = try self.base.allocator.create(Atom);
-        atom.* = Atom.empty;
-        atom.alignment = 1; // debug sections are always 1-byte-aligned
-        try self.managed_atoms.append(self.base.allocator, atom);
-        try self.atoms.put(self.base.allocator, new_index, atom);
-        return new_index;
-    };
+    const atom = try self.base.allocator.create(Atom);
+    atom.* = Atom.empty;
+    atom.alignment = 1; // debug sections are always 1-byte-aligned
+    try self.managed_atoms.append(self.base.allocator, atom);
+    try self.atoms.put(self.base.allocator, new_index, atom);
 }
 
 fn resetState(self: *Wasm) void {
diff --git a/src/link/Wasm/Atom.zig b/src/link/Wasm/Atom.zig
index 64efa8320f..440ebea6f4 100644
--- a/src/link/Wasm/Atom.zig
+++ b/src/link/Wasm/Atom.zig
@@ -90,6 +90,19 @@ pub fn getFirst(self: *Atom) *Atom {
     return tmp;
 }
 
+/// Unlike `getFirst` this returns the first `*Atom` that was
+/// produced from Zig code, rather than an object file.
+/// This is useful for debug sections where we want to extend
+/// the bytes, and don't want to overwrite existing Atoms.
+pub fn getFirstZigAtom(self: *Atom) *Atom {
+    if (self.file == null) return self;
+    var tmp = self;
+    return while (tmp.prev) |prev| {
+        if (prev.file == null) break prev;
+        tmp = prev;
+    } else unreachable; // must allocate an Atom first!
+}
+
 /// Returns the location of the symbol that represents this `Atom`
 pub fn symbolLoc(self: Atom) Wasm.SymbolLoc {
     return .{ .file = self.file, .index = self.sym_index };
@@ -184,8 +197,24 @@ fn relocationValue(self: Atom, relocation: types.Relocation, wasm_bin: *const Wa
             return target_atom.offset + segment.offset + (relocation.addend orelse 0);
         },
         .R_WASM_EVENT_INDEX_LEB => return symbol.index,
-        .R_WASM_SECTION_OFFSET_I32,
-        .R_WASM_FUNCTION_OFFSET_I32,
-        => return relocation.addend orelse 0,
+        .R_WASM_SECTION_OFFSET_I32 => {
+            const target_atom = wasm_bin.symbol_atom.get(target_loc).?;
+            return target_atom.offset + (relocation.addend orelse 0);
+        },
+        .R_WASM_FUNCTION_OFFSET_I32 => {
+            const target_atom = wasm_bin.symbol_atom.get(target_loc).?;
+            var atom = target_atom.getFirst();
+            var offset: u32 = 0;
+            // TODO: Calculate this during atom allocation, rather than
+            // this linear calculation. For now it's done here as atoms
+            // are being sorted after atom allocation, as functions aren't
+            // merged until later.
+            while (true) {
+                offset += 5; // each atom uses 5 bytes to store its body's size
+                if (atom == target_atom) break;
+                atom = atom.next.?;
+            }
+            return target_atom.offset + offset + (relocation.addend orelse 0);
+        },
     }
 }

From 971327d6e0a2cdcfd1a7695d1dea86dbbebd730e Mon Sep 17 00:00:00 2001
From: Luuk de Gram <luuk@degram.dev>
Date: Mon, 5 Sep 2022 21:35:44 +0200
Subject: [PATCH 10/68] wasm: fix memory leak

---
 src/arch/wasm/CodeGen.zig | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/src/arch/wasm/CodeGen.zig b/src/arch/wasm/CodeGen.zig
index b9637bf8e3..a72ae6a423 100644
--- a/src/arch/wasm/CodeGen.zig
+++ b/src/arch/wasm/CodeGen.zig
@@ -666,6 +666,10 @@ pub fn deinit(self: *Self) void {
     self.locals.deinit(self.gpa);
     self.mir_instructions.deinit(self.gpa);
     self.mir_extra.deinit(self.gpa);
+    self.free_locals_i32.deinit(self.gpa);
+    self.free_locals_i64.deinit(self.gpa);
+    self.free_locals_f32.deinit(self.gpa);
+    self.free_locals_f64.deinit(self.gpa);
     self.* = undefined;
 }
 

From a8d137d05ae36870d4e896cf5e37b591d9fa219c Mon Sep 17 00:00:00 2001
From: Luuk de Gram <luuk@degram.dev>
Date: Tue, 6 Sep 2022 20:21:33 +0200
Subject: [PATCH 11/68] wasm-linker: support incremental debug info

Although the wasm-linker previously already supported
debug information in incremental-mode, this was no longer
working as-is with the addition of supporting object-file-parsed
debug information. This commit implements the Zig-created debug information
structure from scratch which is a lot more robust and also allows
being linked with debug information from other object files.
---
 src/link/Dwarf.zig       |  73 +++++++-----------
 src/link/Wasm.zig        | 156 ++++++++++++++++++++++++++++-----------
 src/link/Wasm/Atom.zig   |   2 +-
 src/link/Wasm/Object.zig |   8 +-
 4 files changed, 141 insertions(+), 98 deletions(-)

diff --git a/src/link/Dwarf.zig b/src/link/Dwarf.zig
index 671425ab71..474c822ae6 100644
--- a/src/link/Dwarf.zig
+++ b/src/link/Dwarf.zig
@@ -861,9 +861,7 @@ pub fn commitDeclState(
                             },
                             .wasm => {
                                 const wasm_file = file.cast(File.Wasm).?;
-                                const segment_index = wasm_file.debug_line_index.?;
-                                const atom = wasm_file.atoms.get(segment_index).?;
-                                const debug_line = atom.getFirstZigAtom().code;
+                                const debug_line = wasm_file.debug_line_atom.?.code;
                                 writeDbgLineNopsBuffered(debug_line.items, src_fn.off, 0, &.{}, src_fn.len);
                             },
                             else => unreachable,
@@ -975,24 +973,21 @@ pub fn commitDeclState(
                 },
                 .wasm => {
                     const wasm_file = file.cast(File.Wasm).?;
-                    const segment_index = wasm_file.debug_line_index.?;
-                    const segment = &wasm_file.segments.items[segment_index];
-                    const atom = wasm_file.atoms.get(segment_index).?;
-                    const debug_line = &atom.getFirstZigAtom().code;
-                    if (needed_size != segment.size) {
+                    const atom = wasm_file.debug_line_atom.?;
+                    const debug_line = &atom.code;
+                    const segment_size = debug_line.items.len;
+                    if (needed_size != segment_size) {
                         log.debug(" needed size does not equal allocated size: {d}", .{needed_size});
-                        if (needed_size > segment.size) {
-                            log.debug("  allocating {d} bytes for 'debug line' information", .{needed_size - segment.size});
+                        if (needed_size > segment_size) {
+                            log.debug("  allocating {d} bytes for 'debug line' information", .{needed_size - segment_size});
                             try debug_line.resize(self.allocator, needed_size);
-                            mem.set(u8, debug_line.items[segment.size..], 0);
+                            mem.set(u8, debug_line.items[segment_size..], 0);
                         }
-                        segment.size = needed_size;
                         debug_line.items.len = needed_size;
                     }
-                    const offset = segment.offset + src_fn.off;
                     writeDbgLineNopsBuffered(
                         debug_line.items,
-                        offset,
+                        src_fn.off,
                         prev_padding_size,
                         dbg_line_buffer.items,
                         next_padding_size,
@@ -1150,12 +1145,8 @@ fn updateDeclDebugInfoAllocation(self: *Dwarf, file: *File, atom: *Atom, len: u3
                     },
                     .wasm => {
                         const wasm_file = file.cast(File.Wasm).?;
-                        const segment_index = wasm_file.debug_info_index.?;
-                        const segment = &wasm_file.segments.items[segment_index];
-                        const info_atom = wasm_file.atoms.get(segment_index).?;
-                        const debug_info = &info_atom.getFirstZigAtom().code;
-                        const offset = segment.offset + atom.off;
-                        try writeDbgInfoNopsToArrayList(gpa, debug_info, offset, 0, &.{0}, atom.len, false);
+                        const debug_info = &wasm_file.debug_info_atom.?.code;
+                        try writeDbgInfoNopsToArrayList(gpa, debug_info, atom.off, 0, &.{0}, atom.len, false);
                     },
                     else => unreachable,
                 }
@@ -1282,28 +1273,25 @@ fn writeDeclDebugInfo(self: *Dwarf, file: *File, atom: *Atom, dbg_info_buf: []co
         },
         .wasm => {
             const wasm_file = file.cast(File.Wasm).?;
-            const segment_index = wasm_file.debug_info_index.?;
-            const segment = &wasm_file.segments.items[segment_index];
-            const info_atom = wasm_file.atoms.get(segment_index).?;
-            const debug_info = &info_atom.getFirstZigAtom().code;
-            if (needed_size != segment.size) {
+            const info_atom = wasm_file.debug_info_atom.?;
+            const debug_info = &info_atom.code;
+            const segment_size = debug_info.items.len;
+            if (needed_size != segment_size) {
                 log.debug(" needed size does not equal allocated size: {d}", .{needed_size});
-                if (needed_size > segment.size) {
-                    log.debug("  allocating {d} bytes for 'debug info' information", .{needed_size - segment.size});
+                if (needed_size > segment_size) {
+                    log.debug("  allocating {d} bytes for 'debug info' information", .{needed_size - segment_size});
                     try debug_info.resize(self.allocator, needed_size);
-                    mem.set(u8, debug_info.items[segment.size..], 0);
+                    mem.set(u8, debug_info.items[segment_size..], 0);
                 }
-                segment.size = needed_size;
                 debug_info.items.len = needed_size;
             }
-            const offset = segment.offset + atom.off;
             log.debug(" writeDbgInfoNopsToArrayList debug_info_len={d} offset={d} content_len={d} next_padding_size={d}", .{
-                debug_info.items.len, offset, dbg_info_buf.len, next_padding_size,
+                debug_info.items.len, atom.off, dbg_info_buf.len, next_padding_size,
             });
             try writeDbgInfoNopsToArrayList(
                 gpa,
                 debug_info,
-                offset,
+                atom.off,
                 prev_padding_size,
                 dbg_info_buf,
                 next_padding_size,
@@ -1344,10 +1332,8 @@ pub fn updateDeclLineNumber(self: *Dwarf, file: *File, decl: *const Module.Decl)
         },
         .wasm => {
             const wasm_file = file.cast(File.Wasm).?;
-            const segment_index = wasm_file.debug_line_index.?;
-            const segment = wasm_file.segments.items[segment_index];
-            const offset = segment.offset + decl.fn_link.wasm.src_fn.off + self.getRelocDbgLineOff();
-            const atom = wasm_file.atoms.get(segment_index).?.getFirstZigAtom();
+            const offset = decl.fn_link.wasm.src_fn.off + self.getRelocDbgLineOff();
+            const atom = wasm_file.debug_line_atom.?;
             mem.copy(u8, atom.code.items[offset..], &data);
         },
         else => unreachable,
@@ -1584,8 +1570,7 @@ pub fn writeDbgAbbrev(self: *Dwarf, file: *File) !void {
         },
         .wasm => {
             const wasm_file = file.cast(File.Wasm).?;
-            const segment_index = wasm_file.debug_abbrev_index.?;
-            const debug_abbrev = &wasm_file.atoms.get(segment_index).?.getFirstZigAtom().code;
+            const debug_abbrev = &wasm_file.debug_abbrev_atom.?.code;
             try debug_abbrev.resize(wasm_file.base.allocator, needed_size);
             mem.copy(u8, debug_abbrev.items, &abbrev_buf);
         },
@@ -1697,8 +1682,7 @@ pub fn writeDbgInfoHeader(self: *Dwarf, file: *File, module: *Module, low_pc: u6
         },
         .wasm => {
             const wasm_file = file.cast(File.Wasm).?;
-            const segment_index = wasm_file.debug_info_index.?;
-            const debug_info = &wasm_file.atoms.get(segment_index).?.getFirstZigAtom().code;
+            const debug_info = &wasm_file.debug_info_atom.?.code;
             try writeDbgInfoNopsToArrayList(self.allocator, debug_info, 0, 0, di_buf.items, jmp_amt, false);
         },
         else => unreachable,
@@ -2028,8 +2012,7 @@ pub fn writeDbgAranges(self: *Dwarf, file: *File, addr: u64, size: u64) !void {
         },
         .wasm => {
             const wasm_file = file.cast(File.Wasm).?;
-            const segment_index = wasm_file.debug_ranges_index.?;
-            const debug_ranges = &wasm_file.atoms.get(segment_index).?.getFirstZigAtom().code;
+            const debug_ranges = &wasm_file.debug_ranges_atom.?.code;
             try debug_ranges.resize(wasm_file.base.allocator, needed_size);
             mem.copy(u8, debug_ranges.items, di_buf.items);
         },
@@ -2153,8 +2136,7 @@ pub fn writeDbgLineHeader(self: *Dwarf, file: *File, module: *Module) !void {
         },
         .wasm => {
             const wasm_file = file.cast(File.Wasm).?;
-            const segment_index = wasm_file.debug_line_index.?;
-            const debug_line = wasm_file.atoms.get(segment_index).?.getFirstZigAtom().code;
+            const debug_line = wasm_file.debug_line_atom.?.code;
             writeDbgLineNopsBuffered(debug_line.items, 0, 0, di_buf.items, jmp_amt);
         },
         else => unreachable,
@@ -2303,8 +2285,7 @@ pub fn flushModule(self: *Dwarf, file: *File, module: *Module) !void {
                 },
                 .wasm => {
                     const wasm_file = file.cast(File.Wasm).?;
-                    const segment_index = wasm_file.debug_info_index.?;
-                    const debug_info = wasm_file.atoms.get(segment_index).?.getFirstZigAtom().code;
+                    const debug_info = wasm_file.debug_info_atom.?.code;
                     mem.copy(u8, debug_info.items[reloc.atom.off + reloc.offset ..], &buf);
                 },
                 else => unreachable,
diff --git a/src/link/Wasm.zig b/src/link/Wasm.zig
index 67f229ca84..0c5f0e810f 100644
--- a/src/link/Wasm.zig
+++ b/src/link/Wasm.zig
@@ -95,9 +95,10 @@ imports: std.AutoHashMapUnmanaged(SymbolLoc, types.Import) = .{},
 segments: std.ArrayListUnmanaged(Segment) = .{},
 /// Maps a data segment key (such as .rodata) to the index into `segments`.
 data_segments: std.StringArrayHashMapUnmanaged(u32) = .{},
-/// A list of `types.Segment` which provide meta data
-/// about a data symbol such as its name
-segment_info: std.ArrayListUnmanaged(types.Segment) = .{},
+/// A table of `types.Segment` which provide meta data
+/// about a data symbol such as its name where the key is
+/// the segment index, which can be found from `data_segments`
+segment_info: std.AutoArrayHashMapUnmanaged(u32, types.Segment) = .{},
 /// Deduplicated string table for strings used by symbols, imports and exports.
 string_table: StringTable = .{},
 /// Debug information for wasm
@@ -158,6 +159,19 @@ export_names: std.AutoHashMapUnmanaged(SymbolLoc, u32) = .{},
 /// The actual table is populated during `flush`.
 error_table_symbol: ?u32 = null,
 
+// Debug section atoms. These are only set when the current compilation
+// unit contains Zig code. The lifetime of these atoms are extended
+// until the end of the compiler's lifetime. Meaning they're not freed
+// during `flush()` in incremental-mode.
+debug_info_atom: ?*Atom = null,
+debug_line_atom: ?*Atom = null,
+debug_loc_atom: ?*Atom = null,
+debug_ranges_atom: ?*Atom = null,
+debug_abbrev_atom: ?*Atom = null,
+debug_str_atom: ?*Atom = null,
+debug_pubnames_atom: ?*Atom = null,
+debug_pubtypes_atom: ?*Atom = null,
+
 pub const Segment = struct {
     alignment: u32,
     size: u32,
@@ -384,15 +398,16 @@ pub fn createEmpty(gpa: Allocator, options: link.Options) !*Wasm {
 /// and symbols come from the object files instead.
 pub fn initDebugSections(self: *Wasm) !void {
     if (self.dwarf == null) return; // not compiling Zig code, so no need to pre-initialize debug sections
+    assert(self.debug_info_index == null);
     // this will create an Atom and set the index for us.
-    try self.createDebugSectionForIndex(&self.debug_info_index);
-    try self.createDebugSectionForIndex(&self.debug_line_index);
-    try self.createDebugSectionForIndex(&self.debug_loc_index);
-    try self.createDebugSectionForIndex(&self.debug_abbrev_index);
-    try self.createDebugSectionForIndex(&self.debug_ranges_index);
-    try self.createDebugSectionForIndex(&self.debug_str_index);
-    try self.createDebugSectionForIndex(&self.debug_pubnames_index);
-    try self.createDebugSectionForIndex(&self.debug_pubtypes_index);
+    self.debug_info_atom = try self.createDebugSectionForIndex(&self.debug_info_index, ".debug_info");
+    self.debug_line_atom = try self.createDebugSectionForIndex(&self.debug_line_index, ".debug_line");
+    self.debug_loc_atom = try self.createDebugSectionForIndex(&self.debug_loc_index, ".debug_loc");
+    self.debug_abbrev_atom = try self.createDebugSectionForIndex(&self.debug_abbrev_index, ".debug_abbrev");
+    self.debug_ranges_atom = try self.createDebugSectionForIndex(&self.debug_ranges_index, ".debug_ranges");
+    self.debug_str_atom = try self.createDebugSectionForIndex(&self.debug_str_index, ".debug_str");
+    self.debug_pubnames_atom = try self.createDebugSectionForIndex(&self.debug_pubnames_index, ".debug_pubnames");
+    self.debug_pubtypes_atom = try self.createDebugSectionForIndex(&self.debug_pubtypes_index, ".debug_pubtypes");
 }
 
 fn parseInputFiles(self: *Wasm, files: []const []const u8) !void {
@@ -676,7 +691,7 @@ pub fn deinit(self: *Wasm) void {
     for (self.func_types.items) |*func_type| {
         func_type.deinit(gpa);
     }
-    for (self.segment_info.items) |segment_info| {
+    for (self.segment_info.values()) |segment_info| {
         gpa.free(segment_info.name);
     }
     for (self.objects.items) |*object| {
@@ -1364,16 +1379,7 @@ fn parseAtom(self: *Wasm, atom: *Atom, kind: Kind) !void {
                 const index = gop.value_ptr.*;
                 self.segments.items[index].size += atom.size;
 
-                // segment indexes can be off by 1 due to also containing a segment
-                // for the code section, so we must check if the existing segment
-                // is larger than that of the code section, and substract the index by 1 in such case.
-                var info_add = if (self.code_section_index) |idx| blk: {
-                    if (idx < index) break :blk @as(u32, 1);
-                    break :blk 0;
-                } else @as(u32, 0);
-                if (self.debug_info_index != null) info_add += 1;
-                if (self.debug_line_index != null) info_add += 1;
-                symbol.index = index - info_add;
+                symbol.index = @intCast(u32, self.segment_info.getIndex(index).?);
                 // segment info already exists, so free its memory
                 self.base.allocator.free(segment_name);
                 break :result index;
@@ -1386,8 +1392,8 @@ fn parseAtom(self: *Wasm, atom: *Atom, kind: Kind) !void {
                 });
                 gop.value_ptr.* = index;
 
-                const info_index = @intCast(u32, self.segment_info.items.len);
-                try self.segment_info.append(self.base.allocator, segment_info);
+                const info_index = @intCast(u32, self.segment_info.count());
+                try self.segment_info.put(self.base.allocator, index, segment_info);
                 symbol.index = info_index;
                 break :result index;
             }
@@ -1397,18 +1403,54 @@ fn parseAtom(self: *Wasm, atom: *Atom, kind: Kind) !void {
     const segment: *Segment = &self.segments.items[final_index];
     segment.alignment = std.math.max(segment.alignment, atom.alignment);
 
-    if (self.atoms.getPtr(final_index)) |last| {
+    try self.appendAtomAtIndex(final_index, atom);
+}
+
+/// From a given index, append the given `Atom` at the back of the linked list.
+/// Simply inserts it into the map of atoms when it doesn't exist yet.
+pub fn appendAtomAtIndex(self: *Wasm, index: u32, atom: *Atom) !void {
+    if (self.atoms.getPtr(index)) |last| {
         last.*.next = atom;
         atom.prev = last.*;
         last.* = atom;
     } else {
-        try self.atoms.putNoClobber(self.base.allocator, final_index, atom);
+        try self.atoms.putNoClobber(self.base.allocator, index, atom);
     }
 }
 
+/// Allocates debug atoms into their respective debug sections
+/// to merge them with maybe-existing debug atoms from object files.
+fn allocateDebugAtoms(self: *Wasm) !void {
+    if (self.dwarf == null) return;
+
+    const allocAtom = struct {
+        fn f(bin: *Wasm, maybe_index: *?u32, atom: *Atom) !void {
+            const index = maybe_index.* orelse idx: {
+                const index = @intCast(u32, bin.segments.items.len);
+                try bin.appendDummySegment();
+                maybe_index.* = index;
+                break :idx index;
+            };
+            atom.size = @intCast(u32, atom.code.items.len);
+            bin.symbols.items[atom.sym_index].index = index;
+            try bin.appendAtomAtIndex(index, atom);
+        }
+    }.f;
+
+    try allocAtom(self, &self.debug_info_index, self.debug_info_atom.?);
+    try allocAtom(self, &self.debug_line_index, self.debug_line_atom.?);
+    try allocAtom(self, &self.debug_loc_index, self.debug_loc_atom.?);
+    try allocAtom(self, &self.debug_str_index, self.debug_str_atom.?);
+    try allocAtom(self, &self.debug_ranges_index, self.debug_ranges_atom.?);
+    try allocAtom(self, &self.debug_abbrev_index, self.debug_abbrev_atom.?);
+    try allocAtom(self, &self.debug_pubnames_index, self.debug_pubnames_atom.?);
+    try allocAtom(self, &self.debug_pubtypes_index, self.debug_pubtypes_atom.?);
+}
+
 fn allocateAtoms(self: *Wasm) !void {
     // first sort the data segments
     try sortDataSegments(self);
+    try allocateDebugAtoms(self);
 
     var it = self.atoms.iterator();
     while (it.next()) |entry| {
@@ -1426,7 +1468,7 @@ fn allocateAtoms(self: *Wasm) !void {
                 atom.size,
             });
             offset += atom.size;
-            self.symbol_atom.putAssumeCapacity(atom.symbolLoc(), atom); // Update atom pointers
+            try self.symbol_atom.put(self.base.allocator, atom.symbolLoc(), atom); // Update atom pointers
             atom = atom.next orelse break;
         }
         segment.size = std.mem.alignForwardGeneric(u32, offset, segment.alignment);
@@ -1989,20 +2031,35 @@ fn populateErrorNameTable(self: *Wasm) !void {
 /// From a given index variable, creates a new debug section.
 /// This initializes the index, appends a new segment,
 /// and finally, creates a managed `Atom`.
-pub fn createDebugSectionForIndex(self: *Wasm, index: *?u32) !void {
+pub fn createDebugSectionForIndex(self: *Wasm, index: *?u32, name: []const u8) !*Atom {
     const new_index = @intCast(u32, self.segments.items.len);
     index.* = new_index;
     try self.appendDummySegment();
+    // _ = index;
+
+    const sym_index = self.symbols_free_list.popOrNull() orelse idx: {
+        const tmp_index = @intCast(u32, self.symbols.items.len);
+        _ = try self.symbols.addOne(self.base.allocator);
+        break :idx tmp_index;
+    };
+    self.symbols.items[sym_index] = .{
+        .tag = .section,
+        .name = try self.string_table.put(self.base.allocator, name),
+        .index = 0,
+        .flags = @enumToInt(Symbol.Flag.WASM_SYM_BINDING_LOCAL),
+    };
 
     const atom = try self.base.allocator.create(Atom);
     atom.* = Atom.empty;
     atom.alignment = 1; // debug sections are always 1-byte-aligned
+    atom.sym_index = sym_index;
     try self.managed_atoms.append(self.base.allocator, atom);
-    try self.atoms.put(self.base.allocator, new_index, atom);
+    try self.symbol_atom.put(self.base.allocator, atom.symbolLoc(), atom);
+    return atom;
 }
 
 fn resetState(self: *Wasm) void {
-    for (self.segment_info.items) |*segment_info| {
+    for (self.segment_info.values()) |segment_info| {
         self.base.allocator.free(segment_info.name);
     }
     if (self.base.options.module) |mod| {
@@ -2029,6 +2086,12 @@ fn resetState(self: *Wasm) void {
     self.code_section_index = null;
     self.debug_info_index = null;
     self.debug_line_index = null;
+    self.debug_loc_index = null;
+    self.debug_str_index = null;
+    self.debug_ranges_index = null;
+    self.debug_abbrev_index = null;
+    self.debug_pubnames_index = null;
+    self.debug_pubtypes_index = null;
 }
 
 pub fn flush(self: *Wasm, comp: *Compilation, prog_node: *std.Progress.Node) !void {
@@ -2508,26 +2571,31 @@ pub fn flushModule(self: *Wasm, comp: *Compilation, prog_node: *std.Progress.Nod
         var debug_bytes = std.ArrayList(u8).init(self.base.allocator);
         defer debug_bytes.deinit();
 
-        const debug_sections = .{
-            .{ ".debug_info", self.debug_info_index },
-            .{ ".debug_pubtypes", self.debug_pubtypes_index },
-            .{ ".debug_abbrev", self.debug_abbrev_index },
-            .{ ".debug_line", self.debug_line_index },
-            .{ ".debug_str", self.debug_str_index },
-            .{ ".debug_pubnames", self.debug_pubnames_index },
-            .{ ".debug_loc", self.debug_loc_index },
-            .{ ".debug_ranges", self.debug_ranges_index },
+        const DebugSection = struct {
+            name: []const u8,
+            index: ?u32,
         };
 
-        inline for (debug_sections) |item| {
-            if (item[1]) |index| {
+        const debug_sections: []const DebugSection = &.{
+            .{ .name = ".debug_info", .index = self.debug_info_index },
+            .{ .name = ".debug_pubtypes", .index = self.debug_pubtypes_index },
+            .{ .name = ".debug_abbrev", .index = self.debug_abbrev_index },
+            .{ .name = ".debug_line", .index = self.debug_line_index },
+            .{ .name = ".debug_str", .index = self.debug_str_index },
+            .{ .name = ".debug_pubnames", .index = self.debug_pubnames_index },
+            .{ .name = ".debug_loc", .index = self.debug_loc_index },
+            .{ .name = ".debug_ranges", .index = self.debug_ranges_index },
+        };
+
+        for (debug_sections) |item| {
+            if (item.index) |index| {
                 var atom = self.atoms.get(index).?.getFirst();
                 while (true) {
                     atom.resolveRelocs(self);
                     try debug_bytes.appendSlice(atom.code.items);
                     atom = atom.next orelse break;
                 }
-                try emitDebugSection(file, debug_bytes.items, item[0]);
+                try emitDebugSection(file, debug_bytes.items, item.name);
                 debug_bytes.clearRetainingCapacity();
             }
         }
@@ -3242,8 +3310,8 @@ fn emitSegmentInfo(self: *Wasm, file: fs.File, arena: Allocator) !void {
     var payload = std.ArrayList(u8).init(arena);
     const writer = payload.writer();
     try leb.writeULEB128(file.writer(), @enumToInt(types.SubsectionType.WASM_SEGMENT_INFO));
-    try leb.writeULEB128(writer, @intCast(u32, self.segment_info.items.len));
-    for (self.segment_info.items) |segment_info| {
+    try leb.writeULEB128(writer, @intCast(u32, self.segment_info.count()));
+    for (self.segment_info.values()) |segment_info| {
         log.debug("Emit segment: {s} align({d}) flags({b})", .{
             segment_info.name,
             @ctz(segment_info.alignment),
diff --git a/src/link/Wasm/Atom.zig b/src/link/Wasm/Atom.zig
index 440ebea6f4..3e288fa018 100644
--- a/src/link/Wasm/Atom.zig
+++ b/src/link/Wasm/Atom.zig
@@ -190,7 +190,7 @@ fn relocationValue(self: Atom, relocation: types.Relocation, wasm_bin: *const Wa
             const target_atom = wasm_bin.symbol_atom.get(target_loc).?;
             const segment_info = if (target_atom.file) |object_index| blk: {
                 break :blk wasm_bin.objects.items[object_index].segment_info;
-            } else wasm_bin.segment_info.items;
+            } else wasm_bin.segment_info.values();
             const segment_name = segment_info[symbol.index].outputName(merge_segment);
             const segment_index = wasm_bin.data_segments.get(segment_name).?;
             const segment = wasm_bin.segments.items[segment_index];
diff --git a/src/link/Wasm/Object.zig b/src/link/Wasm/Object.zig
index 86c9cdc023..42c3b8a1a0 100644
--- a/src/link/Wasm/Object.zig
+++ b/src/link/Wasm/Object.zig
@@ -953,13 +953,7 @@ pub fn parseIntoAtoms(self: *Object, gpa: Allocator, object_index: u16, wasm_bin
             segment.alignment = std.math.max(segment.alignment, atom.alignment);
         }
 
-        if (wasm_bin.atoms.getPtr(final_index)) |last| {
-            last.*.next = atom;
-            atom.prev = last.*;
-            last.* = atom;
-        } else {
-            try wasm_bin.atoms.putNoClobber(gpa, final_index, atom);
-        }
+        try wasm_bin.appendAtomAtIndex(final_index, atom);
         log.debug("Parsed into atom: '{s}' at segment index {d}", .{ self.string_table.get(self.symtable[atom.sym_index].name), final_index });
     }
 }

From 11d14a23a3984fa8a72555a6d7e17a06965ad1a0 Mon Sep 17 00:00:00 2001
From: Jakub Konka <kubkon@jakubkonka.com>
Date: Wed, 7 Sep 2022 22:01:40 +0200
Subject: [PATCH 12/68] win-ci: add missing ZIGPREFIXPATH variable def

---
 ci/azure/pipelines.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/ci/azure/pipelines.yml b/ci/azure/pipelines.yml
index 45504c5b6e..633c6389d0 100644
--- a/ci/azure/pipelines.yml
+++ b/ci/azure/pipelines.yml
@@ -61,6 +61,7 @@ jobs:
 
   - pwsh: |
       Set-Variable -Name ZIGINSTALLDIR -Value "$(Get-Location)\stage3-release"
+      Set-Variable -Name ZIGPREFIXPATH -Value "$(Get-Location)\$(ZIG_LLVM_CLANG_LLD_NAME)"
 
       function CheckLastExitCode {
         if (!$?) {

From a35f156cf60ed3d8095c15c4ab26aee267761a56 Mon Sep 17 00:00:00 2001
From: Jakub Konka <kubkon@jakubkonka.com>
Date: Wed, 31 Aug 2022 19:55:39 +0200
Subject: [PATCH 13/68] coff: re-enable default entrypoint for Windows

---
 lib/std/start.zig                             |  4 +++
 src/arch/x86_64/CodeGen.zig                   | 24 +++++++++++++--
 src/link.zig                                  |  2 +-
 src/link/Coff.zig                             | 30 +++++++++++++++++--
 .../hello_world_with_updates.0.zig            |  2 +-
 .../hello_world_with_updates.0.zig            |  2 +-
 .../hello_world_with_updates.0.zig            |  2 +-
 7 files changed, 56 insertions(+), 10 deletions(-)

diff --git a/lib/std/start.zig b/lib/std/start.zig
index 49094ab02d..9f70cce1ea 100644
--- a/lib/std/start.zig
+++ b/lib/std/start.zig
@@ -36,6 +36,10 @@ comptime {
                 if (@typeInfo(@TypeOf(root.main)).Fn.calling_convention != .C) {
                     @export(main2, .{ .name = "main" });
                 }
+            } else if (builtin.os.tag == .windows) {
+                if (!@hasDecl(root, "wWinMainCRTStartup") and !@hasDecl(root, "mainCRTStartup")) {
+                    @export(wWinMainCRTStartup2, .{ .name = "wWinMainCRTStartup" });
+                }
             } else if (builtin.os.tag == .wasi and @hasDecl(root, "main")) {
                 @export(wasiMain2, .{ .name = "_start" });
             } else {
diff --git a/src/arch/x86_64/CodeGen.zig b/src/arch/x86_64/CodeGen.zig
index e5d47e589a..d7294e9732 100644
--- a/src/arch/x86_64/CodeGen.zig
+++ b/src/arch/x86_64/CodeGen.zig
@@ -3999,7 +3999,7 @@ fn airCall(self: *Self, inst: Air.Inst.Index, modifier: std.builtin.CallOptions.
                 .data = undefined,
             });
         }
-    } else if (self.bin_file.cast(link.File.Coff)) |_| {
+    } else if (self.bin_file.cast(link.File.Coff)) |coff_file| {
         if (self.air.value(callee)) |func_value| {
             if (func_value.castTag(.function)) |func_payload| {
                 const func = func_payload.data;
@@ -4015,8 +4015,26 @@ fn airCall(self: *Self, inst: Air.Inst.Index, modifier: std.builtin.CallOptions.
                     }),
                     .data = undefined,
                 });
-            } else if (func_value.castTag(.extern_fn)) |_| {
-                return self.fail("TODO implement calling extern functions", .{});
+            } else if (func_value.castTag(.extern_fn)) |func_payload| {
+                const extern_fn = func_payload.data;
+                const decl_name = mod.declPtr(extern_fn.owner_decl).name;
+                if (extern_fn.lib_name) |lib_name| {
+                    log.debug("TODO enforce that '{s}' is expected in '{s}' library", .{
+                        decl_name,
+                        lib_name,
+                    });
+                }
+                const sym_index = try coff_file.getGlobalSymbol(mem.sliceTo(decl_name, 0));
+                _ = try self.addInst(.{
+                    .tag = .call_extern,
+                    .ops = undefined,
+                    .data = .{
+                        .relocation = .{
+                            .atom_index = mod.declPtr(self.mod_fn.owner_decl).link.coff.sym_index,
+                            .sym_index = sym_index,
+                        },
+                    },
+                });
             } else {
                 return self.fail("TODO implement calling bitcasted functions", .{});
             }
diff --git a/src/link.zig b/src/link.zig
index a8845a0d57..986f4e81b6 100644
--- a/src/link.zig
+++ b/src/link.zig
@@ -473,7 +473,7 @@ pub const File = struct {
         log.debug("getGlobalSymbol '{s}'", .{name});
         switch (base.tag) {
             // zig fmt: off
-            .coff  => unreachable,
+            .coff  => return @fieldParentPtr(Coff, "base", base).getGlobalSymbol(name),
             .elf   => unreachable,
             .macho => return @fieldParentPtr(MachO, "base", base).getGlobalSymbol(name),
             .plan9 => unreachable,
diff --git a/src/link/Coff.zig b/src/link/Coff.zig
index e302571671..bf6a32431c 100644
--- a/src/link/Coff.zig
+++ b/src/link/Coff.zig
@@ -596,7 +596,7 @@ fn allocateSymbol(self: *Coff) !u32 {
     self.locals.items[index] = .{
         .name = [_]u8{0} ** 8,
         .value = 0,
-        .section_number = @intToEnum(coff.SectionNumber, 0),
+        .section_number = .UNDEFINED,
         .@"type" = .{ .base_type = .NULL, .complex_type = .NULL },
         .storage_class = .NULL,
         .number_of_aux_symbols = 0,
@@ -1027,7 +1027,7 @@ pub fn freeDecl(self: *Coff, decl_index: Module.Decl.Index) void {
             log.debug("  adding GOT index {d} to free list (target local@{d})", .{ got_index, sym_index });
         }
 
-        self.locals.items[sym_index].section_number = @intToEnum(coff.SectionNumber, 0);
+        self.locals.items[sym_index].section_number = .UNDEFINED;
         _ = self.atom_by_index_table.remove(sym_index);
         decl.link.coff.sym_index = 0;
     }
@@ -1268,6 +1268,30 @@ pub fn getDeclVAddr(
     @panic("TODO getDeclVAddr");
 }
 
+pub fn getGlobalSymbol(self: *Coff, name: []const u8) !u32 {
+    const gpa = self.base.allocator;
+    const sym_name = try gpa.dupe(u8, name);
+    const global_index = @intCast(u32, self.globals.values().len);
+    _ = global_index;
+    const gop = try self.globals.getOrPut(gpa, sym_name);
+    defer if (gop.found_existing) gpa.free(sym_name);
+
+    if (gop.found_existing) {
+        // TODO audit this: can we ever reference anything from outside the Zig module?
+        assert(gop.value_ptr.file == null);
+        return gop.value_ptr.sym_index;
+    }
+
+    const sym_index = try self.allocateSymbol();
+    const sym_loc = SymbolWithLoc{ .sym_index = sym_index, .file = null };
+    const sym = self.getSymbolPtr(sym_loc);
+    try self.setSymbolName(sym, sym_name);
+    sym.storage_class = .EXTERNAL;
+    gop.value_ptr.* = sym_loc;
+
+    return sym_index;
+}
+
 pub fn updateDeclLineNumber(self: *Coff, module: *Module, decl: *Module.Decl) !void {
     _ = self;
     _ = module;
@@ -1614,7 +1638,7 @@ inline fn getSizeOfImage(self: Coff) u32 {
 
 /// Returns symbol location corresponding to the set entrypoint (if any).
 pub fn getEntryPoint(self: Coff) ?SymbolWithLoc {
-    const entry_name = self.base.options.entry orelse "_start"; // TODO this is incomplete
+    const entry_name = self.base.options.entry orelse "wWinMainCRTStartup"; // TODO this is incomplete
     return self.globals.get(entry_name);
 }
 
diff --git a/test/cases/aarch64-macos/hello_world_with_updates.0.zig b/test/cases/aarch64-macos/hello_world_with_updates.0.zig
index dc65cd8279..0de742bdec 100644
--- a/test/cases/aarch64-macos/hello_world_with_updates.0.zig
+++ b/test/cases/aarch64-macos/hello_world_with_updates.0.zig
@@ -2,5 +2,5 @@
 // output_mode=Exe
 // target=aarch64-macos
 //
-// :105:9: error: struct 'tmp.tmp' has no member named 'main'
+// :107:9: error: struct 'tmp.tmp' has no member named 'main'
 // :7:1: note: struct declared here
diff --git a/test/cases/x86_64-linux/hello_world_with_updates.0.zig b/test/cases/x86_64-linux/hello_world_with_updates.0.zig
index 795f4f2991..4816ec1b26 100644
--- a/test/cases/x86_64-linux/hello_world_with_updates.0.zig
+++ b/test/cases/x86_64-linux/hello_world_with_updates.0.zig
@@ -2,5 +2,5 @@
 // output_mode=Exe
 // target=x86_64-linux
 //
-// :105:9: error: struct 'tmp.tmp' has no member named 'main'
+// :107:9: error: struct 'tmp.tmp' has no member named 'main'
 // :7:1: note: struct declared here
diff --git a/test/cases/x86_64-macos/hello_world_with_updates.0.zig b/test/cases/x86_64-macos/hello_world_with_updates.0.zig
index 9839371e31..998b2f13eb 100644
--- a/test/cases/x86_64-macos/hello_world_with_updates.0.zig
+++ b/test/cases/x86_64-macos/hello_world_with_updates.0.zig
@@ -2,5 +2,5 @@
 // output_mode=Exe
 // target=x86_64-macos
 //
-// :105:9: error: struct 'tmp.tmp' has no member named 'main'
+// :107:9: error: struct 'tmp.tmp' has no member named 'main'
 // :7:1: note: struct declared here

From 51fba37af70283427a7ef5d2f2fd39f97aaa1e35 Mon Sep 17 00:00:00 2001
From: Jakub Konka <kubkon@jakubkonka.com>
Date: Wed, 31 Aug 2022 20:03:41 +0200
Subject: [PATCH 14/68] coff: add relocation for call_extern

---
 src/arch/x86_64/Emit.zig | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/src/arch/x86_64/Emit.zig b/src/arch/x86_64/Emit.zig
index 12f3e9118f..055216e2bb 100644
--- a/src/arch/x86_64/Emit.zig
+++ b/src/arch/x86_64/Emit.zig
@@ -1157,6 +1157,18 @@ fn mirCallExtern(emit: *Emit, inst: Mir.Inst.Index) InnerError!void {
             .length = 2,
             .@"type" = @enumToInt(std.macho.reloc_type_x86_64.X86_64_RELOC_BRANCH),
         });
+    } else if (emit.bin_file.cast(link.File.Coff)) |coff_file| {
+        // Add relocation to the decl.
+        const atom = coff_file.atom_by_index_table.get(relocation.atom_index).?;
+        try atom.addRelocation(coff_file, .{
+            .@"type" = .direct,
+            .target = .{ .sym_index = relocation.sym_index, .file = null },
+            .offset = offset,
+            .addend = 0,
+            .pcrel = true,
+            .length = 2,
+            .prev_vaddr = atom.getSymbol(coff_file).value,
+        });
     } else {
         return emit.fail("TODO implement call_extern for linking backends different than MachO", .{});
     }

From 1ab149c5fc474e73cb52872e11cbd2b916961ede Mon Sep 17 00:00:00 2001
From: Jakub Konka <kubkon@jakubkonka.com>
Date: Wed, 31 Aug 2022 21:15:23 +0200
Subject: [PATCH 15/68] coff: create import atoms and matching bindings

---
 src/link/Coff.zig      | 127 +++++++++++++++++++++++++++++++++++++++--
 src/link/Coff/Atom.zig |  10 ++++
 2 files changed, 131 insertions(+), 6 deletions(-)

diff --git a/src/link/Coff.zig b/src/link/Coff.zig
index bf6a32431c..a690c7cf63 100644
--- a/src/link/Coff.zig
+++ b/src/link/Coff.zig
@@ -51,9 +51,11 @@ got_section_index: ?u16 = null,
 rdata_section_index: ?u16 = null,
 data_section_index: ?u16 = null,
 reloc_section_index: ?u16 = null,
+idata_section_index: ?u16 = null,
 
 locals: std.ArrayListUnmanaged(coff.Symbol) = .{},
 globals: std.StringArrayHashMapUnmanaged(SymbolWithLoc) = .{},
+unresolved: std.AutoArrayHashMapUnmanaged(u32, bool) = .{},
 
 locals_free_list: std.ArrayListUnmanaged(u32) = .{},
 
@@ -63,6 +65,9 @@ strtab_offset: ?u32 = null,
 got_entries: std.AutoArrayHashMapUnmanaged(SymbolWithLoc, u32) = .{},
 got_entries_free_list: std.ArrayListUnmanaged(u32) = .{},
 
+imports_table: std.AutoArrayHashMapUnmanaged(SymbolWithLoc, u32) = .{},
+imports_table_free_list: std.ArrayListUnmanaged(u32) = .{},
+
 /// Virtual address of the entry point procedure relative to image base.
 entry_addr: ?u32 = null,
 
@@ -109,6 +114,11 @@ relocs: RelocTable = .{},
 /// this will be a table indexed by index into the list of Atoms.
 base_relocs: BaseRelocationTable = .{},
 
+/// A table of bindings indexed by the owning them `Atom`.
+/// Note that once we refactor `Atom`'s lifetime and ownership rules,
+/// this will be a table indexed by index into the list of Atoms.
+bindings: BindingTable = .{},
+
 pub const Reloc = struct {
     @"type": enum {
         got,
@@ -124,6 +134,7 @@ pub const Reloc = struct {
 
 const RelocTable = std.AutoHashMapUnmanaged(*Atom, std.ArrayListUnmanaged(Reloc));
 const BaseRelocationTable = std.AutoHashMapUnmanaged(*Atom, std.ArrayListUnmanaged(u32));
+const BindingTable = std.AutoHashMapUnmanaged(*Atom, std.ArrayListUnmanaged(SymbolWithLoc));
 const UnnamedConstTable = std.AutoHashMapUnmanaged(Module.Decl.Index, std.ArrayListUnmanaged(*Atom));
 
 const default_file_alignment: u16 = 0x200;
@@ -269,10 +280,13 @@ pub fn deinit(self: *Coff) void {
 
     self.locals.deinit(gpa);
     self.globals.deinit(gpa);
+    self.unresolved.deinit(gpa);
     self.locals_free_list.deinit(gpa);
     self.strtab.deinit(gpa);
     self.got_entries.deinit(gpa);
     self.got_entries_free_list.deinit(gpa);
+    self.imports_table.deinit(gpa);
+    self.imports_table_free_list.deinit(gpa);
     self.decls.deinit(gpa);
     self.atom_by_index_table.deinit(gpa);
 
@@ -299,6 +313,14 @@ pub fn deinit(self: *Coff) void {
         }
         self.base_relocs.deinit(gpa);
     }
+
+    {
+        var it = self.bindings.valueIterator();
+        while (it.next()) |bindings| {
+            bindings.deinit(gpa);
+        }
+        self.bindings.deinit(gpa);
+    }
 }
 
 fn populateMissingMetadata(self: *Coff) !void {
@@ -420,7 +442,7 @@ fn populateMissingMetadata(self: *Coff) !void {
             .number_of_linenumbers = 0,
             .flags = .{
                 .CNT_INITIALIZED_DATA = 1,
-                .MEM_PURGEABLE = 1,
+                .MEM_DISCARDABLE = 1,
                 .MEM_READ = 1,
             },
         };
@@ -428,6 +450,30 @@ fn populateMissingMetadata(self: *Coff) !void {
         try self.sections.append(gpa, .{ .header = header });
     }
 
+    if (self.idata_section_index == null) {
+        self.idata_section_index = @intCast(u16, self.sections.slice().len);
+        const file_size = @intCast(u32, self.base.options.symbol_count_hint) * self.ptr_width.abiSize();
+        const off = self.findFreeSpace(file_size, self.page_size);
+        log.debug("found .idata free space 0x{x} to 0x{x}", .{ off, off + file_size });
+        var header = coff.SectionHeader{
+            .name = undefined,
+            .virtual_size = file_size,
+            .virtual_address = off,
+            .size_of_raw_data = file_size,
+            .pointer_to_raw_data = off,
+            .pointer_to_relocations = 0,
+            .pointer_to_linenumbers = 0,
+            .number_of_relocations = 0,
+            .number_of_linenumbers = 0,
+            .flags = .{
+                .CNT_INITIALIZED_DATA = 1,
+                .MEM_READ = 1,
+            },
+        };
+        try self.setSectionName(&header, ".idata");
+        try self.sections.append(gpa, .{ .header = header });
+    }
+
     if (self.strtab_offset == null) {
         try self.strtab.buffer.append(gpa, 0);
         self.strtab_offset = self.findFreeSpace(@intCast(u32, self.strtab.len()), 1);
@@ -626,6 +672,27 @@ pub fn allocateGotEntry(self: *Coff, target: SymbolWithLoc) !u32 {
     return index;
 }
 
+pub fn allocateImportEntry(self: *Coff, target: SymbolWithLoc) !u32 {
+    const gpa = self.base.allocator;
+    try self.imports_table.ensureUnusedCapacity(gpa, 1);
+    const index: u32 = blk: {
+        if (self.imports_table_free_list.popOrNull()) |index| {
+            log.debug("  (reusing import entry index {d})", .{index});
+            if (self.imports_table.getIndex(target)) |existing| {
+                assert(existing == index);
+            }
+            break :blk index;
+        } else {
+            log.debug("  (allocating import entry at index {d})", .{self.imports_table.keys().len});
+            const index = @intCast(u32, self.imports_table.keys().len);
+            self.imports_table.putAssumeCapacityNoClobber(target, 0);
+            break :blk index;
+        }
+    };
+    self.imports_table.keys()[index] = target;
+    return index;
+}
+
 fn createGotAtom(self: *Coff, target: SymbolWithLoc) !*Atom {
     const gpa = self.base.allocator;
     const atom = try gpa.create(Atom);
@@ -666,6 +733,32 @@ fn createGotAtom(self: *Coff, target: SymbolWithLoc) !*Atom {
     return atom;
 }
 
+fn createImportAtom(self: *Coff, target: SymbolWithLoc) !*Atom {
+    const gpa = self.base.allocator;
+    const atom = try gpa.create(Atom);
+    errdefer gpa.destroy(atom);
+    atom.* = Atom.empty;
+    atom.sym_index = try self.allocateSymbol();
+    atom.size = @sizeOf(u64);
+    atom.alignment = @alignOf(u64);
+
+    try self.managed_atoms.append(gpa, atom);
+    try self.atom_by_index_table.putNoClobber(gpa, atom.sym_index, atom);
+    self.imports_table.getPtr(target).?.* = atom.sym_index;
+
+    const sym = atom.getSymbolPtr(self);
+    sym.section_number = @intToEnum(coff.SectionNumber, self.idata_section_index.? + 1);
+    sym.value = try self.allocateAtom(atom, atom.size, atom.alignment);
+
+    log.debug("allocated import atom at 0x{x}", .{sym.value});
+
+    const target_sym = self.getSymbol(target);
+    assert(target_sym.section_number == .UNDEFINED);
+    try atom.addBinding(self, target);
+
+    return atom;
+}
+
 fn growAtom(self: *Coff, atom: *Atom, new_atom_size: u32, alignment: u32) !u32 {
     const sym = atom.getSymbol(self);
     const align_ok = mem.alignBackwardGeneric(u32, sym.value, alignment) == sym.value;
@@ -691,7 +784,7 @@ fn writeAtom(self: *Coff, atom: *Atom, code: []const u8) !void {
     try self.resolveRelocs(atom);
 }
 
-fn writeGotAtom(self: *Coff, atom: *Atom) !void {
+fn writePtrWidthAtom(self: *Coff, atom: *Atom) !void {
     switch (self.ptr_width) {
         .p32 => {
             var buffer: [@sizeOf(u32)]u8 = [_]u8{0} ** @sizeOf(u32);
@@ -718,7 +811,12 @@ fn resolveRelocs(self: *Coff, atom: *Atom) !void {
                 const got_atom = self.getGotAtomForSymbol(reloc.target) orelse continue;
                 break :blk got_atom.getSymbol(self).value;
             },
-            .direct => self.getSymbol(reloc.target).value,
+            .direct => blk: {
+                if (self.getImportAtomForSymbol(reloc.target)) |import_atom| {
+                    break :blk import_atom.getSymbol(self).value;
+                }
+                break :blk self.getSymbol(reloc.target).value;
+            },
         };
         const target_vaddr_with_addend = target_vaddr + reloc.addend;
 
@@ -971,7 +1069,7 @@ fn updateDeclCode(self: *Coff, decl_index: Module.Decl.Index, code: []const u8,
                 sym.value = vaddr;
                 log.debug("  (updating GOT entry)", .{});
                 const got_atom = self.getGotAtomForSymbol(.{ .sym_index = atom.sym_index, .file = null }).?;
-                try self.writeGotAtom(got_atom);
+                try self.writePtrWidthAtom(got_atom);
             }
         } else if (code_len < atom.size) {
             self.shrinkAtom(atom, code_len);
@@ -992,7 +1090,7 @@ fn updateDeclCode(self: *Coff, decl_index: Module.Decl.Index, code: []const u8,
         const got_target = SymbolWithLoc{ .sym_index = atom.sym_index, .file = null };
         _ = try self.allocateGotEntry(got_target);
         const got_atom = try self.createGotAtom(got_target);
-        try self.writeGotAtom(got_atom);
+        try self.writePtrWidthAtom(got_atom);
     }
 
     try self.writeAtom(atom, code);
@@ -1227,6 +1325,16 @@ pub fn flushModule(self: *Coff, comp: *Compilation, prog_node: *std.Progress.Nod
     sub_prog_node.activate();
     defer sub_prog_node.end();
 
+    while (self.unresolved.popOrNull()) |entry| {
+        assert(entry.value); // We only expect imports generated by the incremental linker for now.
+        const global = self.globals.values()[entry.key];
+        if (self.imports_table.contains(global)) continue;
+
+        _ = try self.allocateImportEntry(global);
+        const import_atom = try self.createImportAtom(global);
+        try self.writePtrWidthAtom(import_atom);
+    }
+
     if (build_options.enable_logging) {
         self.logSymtab();
     }
@@ -1272,7 +1380,6 @@ pub fn getGlobalSymbol(self: *Coff, name: []const u8) !u32 {
     const gpa = self.base.allocator;
     const sym_name = try gpa.dupe(u8, name);
     const global_index = @intCast(u32, self.globals.values().len);
-    _ = global_index;
     const gop = try self.globals.getOrPut(gpa, sym_name);
     defer if (gop.found_existing) gpa.free(sym_name);
 
@@ -1288,6 +1395,7 @@ pub fn getGlobalSymbol(self: *Coff, name: []const u8) !u32 {
     try self.setSymbolName(sym, sym_name);
     sym.storage_class = .EXTERNAL;
     gop.value_ptr.* = sym_loc;
+    try self.unresolved.putNoClobber(gpa, global_index, true);
 
     return sym_index;
 }
@@ -1676,6 +1784,13 @@ pub fn getGotAtomForSymbol(self: *Coff, sym_loc: SymbolWithLoc) ?*Atom {
     return self.atom_by_index_table.get(got_index);
 }
 
+/// Returns import atom that references `sym_with_loc` if one exists.
+/// Returns null otherwise.
+pub fn getImportAtomForSymbol(self: *Coff, sym_loc: SymbolWithLoc) ?*Atom {
+    const imports_index = self.imports_table.get(sym_loc) orelse return null;
+    return self.atom_by_index_table.get(imports_index);
+}
+
 fn setSectionName(self: *Coff, header: *coff.SectionHeader, name: []const u8) !void {
     if (name.len <= 8) {
         mem.copy(u8, &header.name, name);
diff --git a/src/link/Coff/Atom.zig b/src/link/Coff/Atom.zig
index a7608d9a34..1d6e511f3b 100644
--- a/src/link/Coff/Atom.zig
+++ b/src/link/Coff/Atom.zig
@@ -118,3 +118,13 @@ pub fn addBaseRelocation(self: *Atom, coff_file: *Coff, offset: u32) !void {
     }
     try gop.value_ptr.append(gpa, offset);
 }
+
+pub fn addBinding(self: *Atom, coff_file: *Coff, target: SymbolWithLoc) !void {
+    const gpa = coff_file.base.allocator;
+    log.debug("  (adding binding to target %{d} in %{d})", .{ target.sym_index, self.sym_index });
+    const gop = try coff_file.bindings.getOrPut(gpa, self);
+    if (!gop.found_existing) {
+        gop.value_ptr.* = .{};
+    }
+    try gop.value_ptr.append(gpa, target);
+}

From 0ebeb58d91b23acbd2ad3a168af19459af63a8f6 Mon Sep 17 00:00:00 2001
From: Jakub Konka <kubkon@jakubkonka.com>
Date: Wed, 31 Aug 2022 21:49:51 +0200
Subject: [PATCH 16/68] coff: populate import address table dir

---
 src/link/Coff.zig | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/src/link/Coff.zig b/src/link/Coff.zig
index a690c7cf63..b57307c862 100644
--- a/src/link/Coff.zig
+++ b/src/link/Coff.zig
@@ -1346,6 +1346,7 @@ pub fn flushModule(self: *Coff, comp: *Compilation, prog_node: *std.Progress.Nod
         }
     }
     try self.writeBaseRelocations();
+    try self.writeImportTable();
 
     if (self.getEntryPoint()) |entry_sym_loc| {
         self.entry_addr = self.getSymbol(entry_sym_loc).value;
@@ -1484,6 +1485,22 @@ fn writeBaseRelocations(self: *Coff) !void {
     };
 }
 
+fn writeImportTable(self: *Coff) !void {
+    const gpa = self.base.allocator;
+    _ = gpa;
+
+    const section = self.sections.get(self.idata_section_index.?);
+    const iat_rva = section.header.virtual_address;
+    const iat_size = blk: {
+        const last_atom = section.last_atom.?;
+        break :blk last_atom.getSymbol(self).value + last_atom.size - iat_rva;
+    };
+    self.data_directories[@enumToInt(coff.DirectoryEntry.IAT)] = .{
+        .virtual_address = iat_rva,
+        .size = iat_size,
+    };
+}
+
 fn writeStrtab(self: *Coff) !void {
     const allocated_size = self.allocatedSize(self.strtab_offset.?);
     const needed_size = @intCast(u32, self.strtab.len());

From aac4c1d3b225ff4cd7138d9aae599c9540c7f04e Mon Sep 17 00:00:00 2001
From: Jakub Konka <kubkon@jakubkonka.com>
Date: Thu, 1 Sep 2022 00:24:06 +0200
Subject: [PATCH 17/68] coff: fix contents of IAT, and ensure codegen loads
 addr into reg

As far as I can see, unlike with MachO, we don't have any stubs
helper routines available and need to load a bound pointer into
a register to then call it.
---
 src/arch/x86_64/CodeGen.zig | 76 ++++++++++++++++++++++++++-------
 src/arch/x86_64/Emit.zig    |  5 ++-
 src/arch/x86_64/Mir.zig     |  1 +
 src/link/Coff.zig           | 85 +++++++++++++++++++++++++++++++++----
 4 files changed, 140 insertions(+), 27 deletions(-)

diff --git a/src/arch/x86_64/CodeGen.zig b/src/arch/x86_64/CodeGen.zig
index d7294e9732..e6386d3ac7 100644
--- a/src/arch/x86_64/CodeGen.zig
+++ b/src/arch/x86_64/CodeGen.zig
@@ -137,6 +137,7 @@ pub const MCValue = union(enum) {
     /// If the type is a pointer, it means the pointer is referenced indirectly via GOT.
     /// When lowered, linker will emit a relocation of type X86_64_RELOC_GOT.
     got_load: u32,
+    imports_load: u32,
     /// The value is in memory referenced directly via symbol index.
     /// If the type is a pointer, it means the pointer is referenced directly via symbol index.
     /// When lowered, linker will emit a relocation of type X86_64_RELOC_SIGNED.
@@ -156,6 +157,7 @@ pub const MCValue = union(enum) {
             .ptr_stack_offset,
             .direct_load,
             .got_load,
+            .imports_load,
             => true,
             else => false,
         };
@@ -2274,6 +2276,7 @@ fn airArrayElemVal(self: *Self, inst: Air.Inst.Index) !void {
         .memory,
         .got_load,
         .direct_load,
+        .imports_load,
         => {
             try self.loadMemPtrIntoRegister(addr_reg, Type.usize, array);
         },
@@ -2618,6 +2621,7 @@ fn load(self: *Self, dst_mcv: MCValue, ptr: MCValue, ptr_ty: Type) InnerError!vo
         .memory,
         .got_load,
         .direct_load,
+        .imports_load,
         => {
             const reg = try self.copyToTmpRegister(ptr_ty, ptr);
             try self.load(dst_mcv, .{ .register = reg }, ptr_ty);
@@ -2655,6 +2659,7 @@ fn loadMemPtrIntoRegister(self: *Self, reg: Register, ptr_ty: Type, ptr: MCValue
     switch (ptr) {
         .got_load,
         .direct_load,
+        .imports_load,
         => |sym_index| {
             const abi_size = @intCast(u32, ptr_ty.abiSize(self.target.*));
             const mod = self.bin_file.options.module.?;
@@ -2666,6 +2671,7 @@ fn loadMemPtrIntoRegister(self: *Self, reg: Register, ptr_ty: Type, ptr: MCValue
             const flags: u2 = switch (ptr) {
                 .got_load => 0b00,
                 .direct_load => 0b01,
+                .imports_load => 0b10,
                 else => unreachable,
             };
             _ = try self.addInst(.{
@@ -2763,6 +2769,7 @@ fn store(self: *Self, ptr: MCValue, value: MCValue, ptr_ty: Type, value_ty: Type
                 },
                 .got_load,
                 .direct_load,
+                .imports_load,
                 .memory,
                 .stack_offset,
                 => {
@@ -2783,6 +2790,7 @@ fn store(self: *Self, ptr: MCValue, value: MCValue, ptr_ty: Type, value_ty: Type
         },
         .got_load,
         .direct_load,
+        .imports_load,
         .memory,
         => {
             const value_lock: ?RegisterLock = switch (value) {
@@ -2854,6 +2862,7 @@ fn store(self: *Self, ptr: MCValue, value: MCValue, ptr_ty: Type, value_ty: Type
                 },
                 .got_load,
                 .direct_load,
+                .imports_load,
                 .memory,
                 => {
                     if (abi_size <= 8) {
@@ -3565,6 +3574,7 @@ fn genBinOpMir(self: *Self, mir_tag: Mir.Inst.Tag, dst_ty: Type, dst_mcv: MCValu
                 .memory,
                 .got_load,
                 .direct_load,
+                .imports_load,
                 .eflags,
                 => {
                     assert(abi_size <= 8);
@@ -3650,7 +3660,10 @@ fn genBinOpMir(self: *Self, mir_tag: Mir.Inst.Tag, dst_ty: Type, dst_mcv: MCValu
                 => {
                     return self.fail("TODO implement x86 ADD/SUB/CMP source memory", .{});
                 },
-                .got_load, .direct_load => {
+                .got_load,
+                .direct_load,
+                .imports_load,
+                => {
                     return self.fail("TODO implement x86 ADD/SUB/CMP source symbol at index in linker", .{});
                 },
                 .eflags => {
@@ -3661,7 +3674,10 @@ fn genBinOpMir(self: *Self, mir_tag: Mir.Inst.Tag, dst_ty: Type, dst_mcv: MCValu
         .memory => {
             return self.fail("TODO implement x86 ADD/SUB/CMP destination memory", .{});
         },
-        .got_load, .direct_load => {
+        .got_load,
+        .direct_load,
+        .imports_load,
+        => {
             return self.fail("TODO implement x86 ADD/SUB/CMP destination symbol at index", .{});
         },
     }
@@ -3729,7 +3745,10 @@ fn genIntMulComplexOpMir(self: *Self, dst_ty: Type, dst_mcv: MCValue, src_mcv: M
                 .memory => {
                     return self.fail("TODO implement x86 multiply source memory", .{});
                 },
-                .got_load, .direct_load => {
+                .got_load,
+                .direct_load,
+                .imports_load,
+                => {
                     return self.fail("TODO implement x86 multiply source symbol at index in linker", .{});
                 },
                 .eflags => {
@@ -3773,7 +3792,10 @@ fn genIntMulComplexOpMir(self: *Self, dst_ty: Type, dst_mcv: MCValue, src_mcv: M
                 .memory, .stack_offset => {
                     return self.fail("TODO implement x86 multiply source memory", .{});
                 },
-                .got_load, .direct_load => {
+                .got_load,
+                .direct_load,
+                .imports_load,
+                => {
                     return self.fail("TODO implement x86 multiply source symbol at index in linker", .{});
                 },
                 .eflags => {
@@ -3784,7 +3806,10 @@ fn genIntMulComplexOpMir(self: *Self, dst_ty: Type, dst_mcv: MCValue, src_mcv: M
         .memory => {
             return self.fail("TODO implement x86 multiply destination memory", .{});
         },
-        .got_load, .direct_load => {
+        .got_load,
+        .direct_load,
+        .imports_load,
+        => {
             return self.fail("TODO implement x86 multiply destination symbol at index in linker", .{});
         },
     }
@@ -3948,6 +3973,7 @@ fn airCall(self: *Self, inst: Air.Inst.Index, modifier: std.builtin.CallOptions.
             .memory => unreachable,
             .got_load => unreachable,
             .direct_load => unreachable,
+            .imports_load => unreachable,
             .eflags => unreachable,
             .register_overflow => unreachable,
         }
@@ -4025,15 +4051,16 @@ fn airCall(self: *Self, inst: Air.Inst.Index, modifier: std.builtin.CallOptions.
                     });
                 }
                 const sym_index = try coff_file.getGlobalSymbol(mem.sliceTo(decl_name, 0));
+                try self.genSetReg(Type.initTag(.usize), .rax, .{
+                    .imports_load = sym_index,
+                });
                 _ = try self.addInst(.{
-                    .tag = .call_extern,
-                    .ops = undefined,
-                    .data = .{
-                        .relocation = .{
-                            .atom_index = mod.declPtr(self.mod_fn.owner_decl).link.coff.sym_index,
-                            .sym_index = sym_index,
-                        },
-                    },
+                    .tag = .call,
+                    .ops = Mir.Inst.Ops.encode(.{
+                        .reg1 = .rax,
+                        .flags = 0b01,
+                    }),
+                    .data = undefined,
                 });
             } else {
                 return self.fail("TODO implement calling bitcasted functions", .{});
@@ -4443,7 +4470,11 @@ fn genVarDbgInfo(
                     leb128.writeILEB128(dbg_info.writer(), -off) catch unreachable;
                     dbg_info.items[fixup] += @intCast(u8, dbg_info.items.len - fixup - 2);
                 },
-                .memory, .got_load, .direct_load => {
+                .memory,
+                .got_load,
+                .direct_load,
+                .imports_load,
+                => {
                     const ptr_width = @intCast(u8, @divExact(self.target.cpu.arch.ptrBitWidth(), 8));
                     const is_ptr = switch (tag) {
                         .dbg_var_ptr => true,
@@ -4474,7 +4505,10 @@ fn genVarDbgInfo(
                         try dbg_info.append(DW.OP.deref);
                     }
                     switch (mcv) {
-                        .got_load, .direct_load => |index| try dw.addExprlocReloc(index, offset, is_ptr),
+                        .got_load,
+                        .direct_load,
+                        .imports_load,
+                        => |index| try dw.addExprlocReloc(index, offset, is_ptr),
                         else => {},
                     }
                 },
@@ -5474,6 +5508,7 @@ fn genSetStackArg(self: *Self, ty: Type, stack_offset: i32, mcv: MCValue) InnerE
         .memory,
         .direct_load,
         .got_load,
+        .imports_load,
         => {
             if (abi_size <= 8) {
                 const reg = try self.copyToTmpRegister(ty, mcv);
@@ -5721,6 +5756,7 @@ fn genSetStack(self: *Self, ty: Type, stack_offset: i32, mcv: MCValue, opts: Inl
         .memory,
         .got_load,
         .direct_load,
+        .imports_load,
         => {
             if (abi_size <= 8) {
                 const reg = try self.copyToTmpRegister(ty, mcv);
@@ -5848,6 +5884,7 @@ fn genInlineMemcpy(
         .memory,
         .got_load,
         .direct_load,
+        .imports_load,
         => {
             try self.loadMemPtrIntoRegister(dst_addr_reg, Type.usize, dst_ptr);
         },
@@ -5883,6 +5920,7 @@ fn genInlineMemcpy(
         .memory,
         .got_load,
         .direct_load,
+        .imports_load,
         => {
             try self.loadMemPtrIntoRegister(src_addr_reg, Type.usize, src_ptr);
         },
@@ -6021,6 +6059,7 @@ fn genInlineMemset(
         .memory,
         .got_load,
         .direct_load,
+        .imports_load,
         => {
             try self.loadMemPtrIntoRegister(addr_reg, Type.usize, dst_ptr);
         },
@@ -6261,6 +6300,7 @@ fn genSetReg(self: *Self, ty: Type, reg: Register, mcv: MCValue) InnerError!void
         },
         .direct_load,
         .got_load,
+        .imports_load,
         => {
             switch (ty.zigTypeTag()) {
                 .Float => {
@@ -6655,7 +6695,11 @@ fn airMemcpy(self: *Self, inst: Air.Inst.Index) !void {
     // TODO Is this the only condition for pointer dereference for memcpy?
     const src: MCValue = blk: {
         switch (src_ptr) {
-            .got_load, .direct_load, .memory => {
+            .got_load,
+            .direct_load,
+            .imports_load,
+            .memory,
+            => {
                 const reg = try self.register_manager.allocReg(null, gp);
                 try self.loadMemPtrIntoRegister(reg, src_ty, src_ptr);
                 _ = try self.addInst(.{
diff --git a/src/arch/x86_64/Emit.zig b/src/arch/x86_64/Emit.zig
index 055216e2bb..a0f34d5732 100644
--- a/src/arch/x86_64/Emit.zig
+++ b/src/arch/x86_64/Emit.zig
@@ -985,8 +985,8 @@ fn mirLeaPic(emit: *Emit, inst: Mir.Inst.Index) InnerError!void {
     const relocation = emit.mir.instructions.items(.data)[inst].relocation;
 
     switch (ops.flags) {
-        0b00, 0b01 => {},
-        else => return emit.fail("TODO unused LEA PIC variants 0b10 and 0b11", .{}),
+        0b00, 0b01, 0b10 => {},
+        else => return emit.fail("TODO unused LEA PIC variant 0b11", .{}),
     }
 
     // lea reg1, [rip + reloc]
@@ -1024,6 +1024,7 @@ fn mirLeaPic(emit: *Emit, inst: Mir.Inst.Index) InnerError!void {
             .@"type" = switch (ops.flags) {
                 0b00 => .got,
                 0b01 => .direct,
+                0b10 => .imports,
                 else => unreachable,
             },
             .target = .{ .sym_index = relocation.sym_index, .file = null },
diff --git a/src/arch/x86_64/Mir.zig b/src/arch/x86_64/Mir.zig
index 71aecc5e85..b2e0f204eb 100644
--- a/src/arch/x86_64/Mir.zig
+++ b/src/arch/x86_64/Mir.zig
@@ -180,6 +180,7 @@ pub const Inst = struct {
         /// ops flags: form:
         ///      0b00  reg1, [rip + reloc] // via GOT PIC
         ///      0b01  reg1, [rip + reloc] // direct load PIC
+        ///      0b10  reg1, [rip + reloc] // via imports table PIC
         /// Notes:
         /// * `Data` contains `relocation`
         lea_pic,
diff --git a/src/link/Coff.zig b/src/link/Coff.zig
index b57307c862..9f3ccc069c 100644
--- a/src/link/Coff.zig
+++ b/src/link/Coff.zig
@@ -123,6 +123,7 @@ pub const Reloc = struct {
     @"type": enum {
         got,
         direct,
+        imports,
     },
     target: SymbolWithLoc,
     offset: u32,
@@ -812,18 +813,18 @@ fn resolveRelocs(self: *Coff, atom: *Atom) !void {
                 break :blk got_atom.getSymbol(self).value;
             },
             .direct => blk: {
-                if (self.getImportAtomForSymbol(reloc.target)) |import_atom| {
-                    break :blk import_atom.getSymbol(self).value;
-                }
                 break :blk self.getSymbol(reloc.target).value;
             },
+            .imports => blk: {
+                const import_atom = self.getImportAtomForSymbol(reloc.target) orelse continue;
+                break :blk import_atom.getSymbol(self).value;
+            },
         };
         const target_vaddr_with_addend = target_vaddr + reloc.addend;
-
         if (target_vaddr_with_addend == reloc.prev_vaddr) continue;
 
         log.debug("  ({x}: [() => 0x{x} ({s})) ({s})", .{
-            reloc.offset,
+            source_sym.value + reloc.offset,
             target_vaddr_with_addend,
             self.getSymbolName(reloc.target),
             @tagName(reloc.@"type"),
@@ -833,7 +834,7 @@ fn resolveRelocs(self: *Coff, atom: *Atom) !void {
             const source_vaddr = source_sym.value + reloc.offset;
             const disp = target_vaddr_with_addend - source_vaddr - 4;
             try self.base.file.?.pwriteAll(mem.asBytes(&@intCast(u32, disp)), file_offset + reloc.offset);
-            return;
+            continue;
         }
 
         switch (self.ptr_width) {
@@ -1345,8 +1346,8 @@ pub fn flushModule(self: *Coff, comp: *Compilation, prog_node: *std.Progress.Nod
             try self.resolveRelocs(atom.*);
         }
     }
-    try self.writeBaseRelocations();
     try self.writeImportTable();
+    try self.writeBaseRelocations();
 
     if (self.getEntryPoint()) |entry_sym_loc| {
         self.entry_addr = self.getSymbol(entry_sym_loc).value;
@@ -1487,14 +1488,80 @@ fn writeBaseRelocations(self: *Coff) !void {
 
 fn writeImportTable(self: *Coff) !void {
     const gpa = self.base.allocator;
-    _ = gpa;
 
     const section = self.sections.get(self.idata_section_index.?);
     const iat_rva = section.header.virtual_address;
     const iat_size = blk: {
         const last_atom = section.last_atom.?;
-        break :blk last_atom.getSymbol(self).value + last_atom.size - iat_rva;
+        break :blk last_atom.getSymbol(self).value + last_atom.size * 2 - iat_rva; // account for sentinel zero pointer
     };
+
+    const dll_name = "KERNEL32.dll";
+
+    var import_dir_entry = coff.ImportDirectoryEntry{
+        .import_lookup_table_rva = @sizeOf(coff.ImportDirectoryEntry) * 2,
+        .time_date_stamp = 0,
+        .forwarder_chain = 0,
+        .name_rva = 0,
+        .import_address_table_rva = iat_rva,
+    };
+
+    // TODO: we currently assume there's only one (implicit) DLL - ntdll
+    var lookup_table = std.ArrayList(coff.ImportLookupEntry64.ByName).init(gpa);
+    defer lookup_table.deinit();
+
+    var names_table = std.ArrayList(u8).init(gpa);
+    defer names_table.deinit();
+
+    // TODO: check if import is still valid
+    for (self.imports_table.keys()) |target| {
+        const target_name = self.getSymbolName(target);
+        const start = names_table.items.len;
+        mem.writeIntLittle(u16, try names_table.addManyAsArray(2), 0); // TODO: currently, hint is set to 0 as we haven't yet parsed any DLL
+        try names_table.appendSlice(target_name);
+        try names_table.append(0);
+        const end = names_table.items.len;
+        if (!mem.isAlignedGeneric(usize, end - start, @sizeOf(u16))) {
+            try names_table.append(0);
+        }
+        try lookup_table.append(.{ .name_table_rva = @intCast(u31, start) });
+    }
+    try lookup_table.append(.{ .name_table_rva = 0 }); // the sentinel
+
+    const dir_entry_size = @sizeOf(coff.ImportDirectoryEntry) + lookup_table.items.len * @sizeOf(coff.ImportLookupEntry64.ByName) + names_table.items.len + dll_name.len + 1;
+    const needed_size = iat_size + dir_entry_size + @sizeOf(coff.ImportDirectoryEntry);
+    const sect_capacity = self.allocatedSize(section.header.pointer_to_raw_data);
+    assert(needed_size < sect_capacity); // TODO: implement expanding .idata section
+
+    // Fixup offsets
+    const base_rva = iat_rva + iat_size;
+    import_dir_entry.import_lookup_table_rva += base_rva;
+    import_dir_entry.name_rva = @intCast(u32, base_rva + dir_entry_size + @sizeOf(coff.ImportDirectoryEntry) - dll_name.len - 1);
+
+    for (lookup_table.items[0 .. lookup_table.items.len - 1]) |*lk| {
+        lk.name_table_rva += @intCast(u31, base_rva + @sizeOf(coff.ImportDirectoryEntry) * 2 + lookup_table.items.len * @sizeOf(coff.ImportLookupEntry64.ByName));
+    }
+
+    var buffer = std.ArrayList(u8).init(gpa);
+    defer buffer.deinit();
+    try buffer.ensureTotalCapacity(dir_entry_size + @sizeOf(coff.ImportDirectoryEntry));
+    buffer.appendSliceAssumeCapacity(mem.asBytes(&import_dir_entry));
+    buffer.appendNTimesAssumeCapacity(0, @sizeOf(coff.ImportDirectoryEntry)); // the sentinel; TODO: I think doing all of the above on bytes directly might be cleaner
+    buffer.appendSliceAssumeCapacity(mem.sliceAsBytes(lookup_table.items));
+    buffer.appendSliceAssumeCapacity(names_table.items);
+    buffer.appendSliceAssumeCapacity(dll_name);
+    buffer.appendAssumeCapacity(0);
+
+    try self.base.file.?.pwriteAll(buffer.items, section.header.pointer_to_raw_data + iat_size);
+    // Override the IAT atoms
+    // TODO: we should rewrite only dirtied atoms, but that's for way later
+    try self.base.file.?.pwriteAll(mem.sliceAsBytes(lookup_table.items), section.header.pointer_to_raw_data);
+
+    self.data_directories[@enumToInt(coff.DirectoryEntry.IMPORT)] = .{
+        .virtual_address = iat_rva + iat_size,
+        .size = @intCast(u32, @sizeOf(coff.ImportDirectoryEntry) * 2),
+    };
+
     self.data_directories[@enumToInt(coff.DirectoryEntry.IAT)] = .{
         .virtual_address = iat_rva,
         .size = iat_size,

From a19e6adbf90771890ecdbb52d6dafab1943e4cc4 Mon Sep 17 00:00:00 2001
From: Jakub Konka <kubkon@jakubkonka.com>
Date: Thu, 1 Sep 2022 12:24:55 +0200
Subject: [PATCH 18/68] x86_64: add support for Win64/C calling convention

---
 src/arch/x86_64/CodeGen.zig | 20 ++++------
 src/arch/x86_64/Emit.zig    |  7 ++--
 src/arch/x86_64/Mir.zig     | 61 +++++++++++++++---------------
 src/arch/x86_64/abi.zig     | 74 ++++++++++++++++++++++++++++++-------
 4 files changed, 101 insertions(+), 61 deletions(-)

diff --git a/src/arch/x86_64/CodeGen.zig b/src/arch/x86_64/CodeGen.zig
index e6386d3ac7..5e404d00bd 100644
--- a/src/arch/x86_64/CodeGen.zig
+++ b/src/arch/x86_64/CodeGen.zig
@@ -32,11 +32,6 @@ const abi = @import("abi.zig");
 const errUnionPayloadOffset = codegen.errUnionPayloadOffset;
 const errUnionErrorOffset = codegen.errUnionErrorOffset;
 
-const callee_preserved_regs = abi.callee_preserved_regs;
-const caller_preserved_regs = abi.caller_preserved_regs;
-const c_abi_int_param_regs = abi.c_abi_int_param_regs;
-const c_abi_int_return_regs = abi.c_abi_int_return_regs;
-
 const Condition = bits.Condition;
 const RegisterManager = abi.RegisterManager;
 const RegisterLock = RegisterManager.RegisterLock;
@@ -448,10 +443,11 @@ fn gen(self: *Self) InnerError!void {
 
         // Create list of registers to save in the prologue.
         // TODO handle register classes
-        var reg_list: Mir.RegisterList(Register, &callee_preserved_regs) = .{};
-        inline for (callee_preserved_regs) |reg| {
+        var reg_list = Mir.RegisterList{};
+        const callee_preserved_regs = abi.getCalleePreservedRegs(self.target.*);
+        for (callee_preserved_regs) |reg| {
             if (self.register_manager.isRegAllocated(reg)) {
-                reg_list.push(reg);
+                reg_list.push(callee_preserved_regs, reg);
             }
         }
         const saved_regs_stack_space: u32 = reg_list.count() * 8;
@@ -3923,7 +3919,7 @@ fn airCall(self: *Self, inst: Air.Inst.Index, modifier: std.builtin.CallOptions.
 
     try self.spillEflagsIfOccupied();
 
-    for (caller_preserved_regs) |reg| {
+    for (abi.getCallerPreservedRegs(self.target.*)) |reg| {
         try self.register_manager.getReg(reg, null);
     }
 
@@ -7140,7 +7136,7 @@ fn resolveCallingConventionValues(self: *Self, fn_ty: Type) !CallMCValues {
                     assert(ret_ty.isError());
                     result.return_value = .{ .immediate = 0 };
                 } else if (ret_ty_size <= 8) {
-                    const aliased_reg = registerAlias(c_abi_int_return_regs[0], ret_ty_size);
+                    const aliased_reg = registerAlias(abi.getCAbiIntReturnRegs(self.target.*)[0], ret_ty_size);
                     result.return_value = .{ .register = aliased_reg };
                 } else {
                     // We simply make the return MCValue a stack offset. However, the actual value
@@ -7187,7 +7183,7 @@ fn resolveCallingConventionValues(self: *Self, fn_ty: Type) !CallMCValues {
                         else => false,
                     };
                     if (pass_in_reg) {
-                        if (next_int_reg >= c_abi_int_param_regs.len) break;
+                        if (next_int_reg >= abi.getCAbiIntParamRegs(self.target.*).len) break;
                         try by_reg.putNoClobber(i, next_int_reg);
                         next_int_reg += 1;
                     }
@@ -7210,7 +7206,7 @@ fn resolveCallingConventionValues(self: *Self, fn_ty: Type) !CallMCValues {
                 const param_size = @intCast(u32, ty.abiSize(self.target.*));
                 const param_align = @intCast(u32, ty.abiAlignment(self.target.*));
                 if (by_reg.get(i)) |int_reg| {
-                    const aliased_reg = registerAlias(c_abi_int_param_regs[int_reg], param_size);
+                    const aliased_reg = registerAlias(abi.getCAbiIntParamRegs(self.target.*)[int_reg], param_size);
                     result.args[i] = .{ .register = aliased_reg };
                     next_int_reg += 1;
                 } else {
diff --git a/src/arch/x86_64/Emit.zig b/src/arch/x86_64/Emit.zig
index a0f34d5732..66e603aab0 100644
--- a/src/arch/x86_64/Emit.zig
+++ b/src/arch/x86_64/Emit.zig
@@ -283,10 +283,11 @@ fn mirPushPopRegisterList(emit: *Emit, tag: Tag, inst: Mir.Inst.Index) InnerErro
     const ops = emit.mir.instructions.items(.ops)[inst].decode();
     const payload = emit.mir.instructions.items(.data)[inst].payload;
     const save_reg_list = emit.mir.extraData(Mir.SaveRegisterList, payload).data;
-    const reg_list = Mir.RegisterList(Register, &abi.callee_preserved_regs).fromInt(save_reg_list.register_list);
     var disp: i32 = -@intCast(i32, save_reg_list.stack_end);
-    inline for (abi.callee_preserved_regs) |reg| {
-        if (reg_list.isSet(reg)) {
+    const reg_list = Mir.RegisterList.fromInt(save_reg_list.register_list);
+    const callee_preserved_regs = abi.getCalleePreservedRegs(emit.target.*);
+    for (callee_preserved_regs) |reg| {
+        if (reg_list.isSet(callee_preserved_regs, reg)) {
             switch (tag) {
                 .push => try lowerToMrEnc(.mov, RegisterOrMemory.mem(.qword_ptr, .{
                     .disp = @bitCast(u32, disp),
diff --git a/src/arch/x86_64/Mir.zig b/src/arch/x86_64/Mir.zig
index b2e0f204eb..ca19847042 100644
--- a/src/arch/x86_64/Mir.zig
+++ b/src/arch/x86_64/Mir.zig
@@ -461,46 +461,43 @@ pub const Inst = struct {
     }
 };
 
-pub fn RegisterList(comptime Reg: type, comptime registers: []const Reg) type {
-    assert(registers.len <= @bitSizeOf(u32));
-    return struct {
-        bitset: RegBitSet = RegBitSet.initEmpty(),
+pub const RegisterList = struct {
+    bitset: BitSet = BitSet.initEmpty(),
 
-        const RegBitSet = IntegerBitSet(registers.len);
-        const Self = @This();
+    const BitSet = IntegerBitSet(@ctz(@as(u32, 0)));
+    const Self = @This();
 
-        fn getIndexForReg(reg: Reg) RegBitSet.MaskInt {
-            inline for (registers) |cpreg, i| {
-                if (reg.id() == cpreg.id()) return i;
-            }
-            unreachable; // register not in input register list!
+    fn getIndexForReg(registers: []const Register, reg: Register) BitSet.MaskInt {
+        for (registers) |cpreg, i| {
+            if (reg.id() == cpreg.id()) return @intCast(u32, i);
         }
+        unreachable; // register not in input register list!
+    }
 
-        pub fn push(self: *Self, reg: Reg) void {
-            const index = getIndexForReg(reg);
-            self.bitset.set(index);
-        }
+    pub fn push(self: *Self, registers: []const Register, reg: Register) void {
+        const index = getIndexForReg(registers, reg);
+        self.bitset.set(index);
+    }
 
-        pub fn isSet(self: Self, reg: Reg) bool {
-            const index = getIndexForReg(reg);
-            return self.bitset.isSet(index);
-        }
+    pub fn isSet(self: Self, registers: []const Register, reg: Register) bool {
+        const index = getIndexForReg(registers, reg);
+        return self.bitset.isSet(index);
+    }
 
-        pub fn asInt(self: Self) u32 {
-            return self.bitset.mask;
-        }
+    pub fn asInt(self: Self) u32 {
+        return self.bitset.mask;
+    }
 
-        pub fn fromInt(mask: u32) Self {
-            return .{
-                .bitset = RegBitSet{ .mask = @intCast(RegBitSet.MaskInt, mask) },
-            };
-        }
+    pub fn fromInt(mask: u32) Self {
+        return .{
+            .bitset = BitSet{ .mask = @intCast(BitSet.MaskInt, mask) },
+        };
+    }
 
-        pub fn count(self: Self) u32 {
-            return @intCast(u32, self.bitset.count());
-        }
-    };
-}
+    pub fn count(self: Self) u32 {
+        return @intCast(u32, self.bitset.count());
+    }
+};
 
 pub const SaveRegisterList = struct {
     /// Use `RegisterList` to populate.
diff --git a/src/arch/x86_64/abi.zig b/src/arch/x86_64/abi.zig
index 344fe235f3..2f4a7d1681 100644
--- a/src/arch/x86_64/abi.zig
+++ b/src/arch/x86_64/abi.zig
@@ -392,23 +392,69 @@ pub fn classifySystemV(ty: Type, target: Target) [8]Class {
     }
 }
 
-/// Note that .rsp and .rbp also belong to this set, however, we never expect to use them
-/// for anything else but stack offset tracking therefore we exclude them from this set.
-pub const callee_preserved_regs = [_]Register{ .rbx, .r12, .r13, .r14, .r15 };
-/// These registers need to be preserved (saved on the stack) and restored by the caller before
-/// the caller relinquishes control to a subroutine via call instruction (or similar).
-/// In other words, these registers are free to use by the callee.
-pub const caller_preserved_regs = [_]Register{ .rax, .rcx, .rdx, .rsi, .rdi, .r8, .r9, .r10, .r11 };
+pub const SysV = struct {
+    /// Note that .rsp and .rbp also belong to this set, however, we never expect to use them
+    /// for anything else but stack offset tracking therefore we exclude them from this set.
+    pub const callee_preserved_regs = [_]Register{ .rbx, .r12, .r13, .r14, .r15 };
+    /// These registers need to be preserved (saved on the stack) and restored by the caller before
+    /// the caller relinquishes control to a subroutine via call instruction (or similar).
+    /// In other words, these registers are free to use by the callee.
+    pub const caller_preserved_regs = [_]Register{ .rax, .rcx, .rdx, .rsi, .rdi, .r8, .r9, .r10, .r11 };
 
-pub const c_abi_int_param_regs = [_]Register{ .rdi, .rsi, .rdx, .rcx, .r8, .r9 };
-pub const c_abi_int_return_regs = [_]Register{ .rax, .rdx };
+    pub const c_abi_int_param_regs = [_]Register{ .rdi, .rsi, .rdx, .rcx, .r8, .r9 };
+    pub const c_abi_int_return_regs = [_]Register{ .rax, .rdx };
+};
 
+pub const Win64 = struct {
+    /// Note that .rsp and .rbp also belong to this set, however, we never expect to use them
+    /// for anything else but stack offset tracking therefore we exclude them from this set.
+    pub const callee_preserved_regs = [_]Register{ .rbx, .rsi, .rdi, .r12, .r13, .r14, .r15 };
+    /// These registers need to be preserved (saved on the stack) and restored by the caller before
+    /// the caller relinquishes control to a subroutine via call instruction (or similar).
+    /// In other words, these registers are free to use by the callee.
+    pub const caller_preserved_regs = [_]Register{ .rax, .rcx, .rdx, .r8, .r9, .r10, .r11 };
+
+    pub const c_abi_int_param_regs = [_]Register{ .rcx, .rdx, .r8, .r9 };
+    pub const c_abi_int_return_regs = [_]Register{.rax};
+};
+
+pub fn getCalleePreservedRegs(target: Target) []const Register {
+    return switch (target.os.tag) {
+        .windows => &Win64.callee_preserved_regs,
+        else => &SysV.callee_preserved_regs,
+    };
+}
+
+pub fn getCallerPreservedRegs(target: Target) []const Register {
+    return switch (target.os.tag) {
+        .windows => &Win64.caller_preserved_regs,
+        else => &SysV.caller_preserved_regs,
+    };
+}
+
+pub fn getCAbiIntParamRegs(target: Target) []const Register {
+    return switch (target.os.tag) {
+        .windows => &Win64.c_abi_int_param_regs,
+        else => &SysV.c_abi_int_param_regs,
+    };
+}
+
+pub fn getCAbiIntReturnRegs(target: Target) []const Register {
+    return switch (target.os.tag) {
+        .windows => &Win64.c_abi_int_return_regs,
+        else => &SysV.c_abi_int_return_regs,
+    };
+}
+
+const gp_regs = [_]Register{
+    .rax, .rbx, .rcx, .rdx, .rsi, .rdi, .r8, .r9, .r10, .r11, .r12, .r13, .r14, .r15,
+};
 const sse_avx_regs = [_]Register{
     .ymm0, .ymm1, .ymm2,  .ymm3,  .ymm4,  .ymm5,  .ymm6,  .ymm7,
     .ymm8, .ymm9, .ymm10, .ymm11, .ymm12, .ymm13, .ymm14, .ymm15,
 };
-const allocatable_registers = callee_preserved_regs ++ caller_preserved_regs ++ sse_avx_regs;
-pub const RegisterManager = RegisterManagerFn(@import("CodeGen.zig"), Register, &allocatable_registers);
+const allocatable_regs = gp_regs ++ sse_avx_regs;
+pub const RegisterManager = RegisterManagerFn(@import("CodeGen.zig"), Register, &allocatable_regs);
 
 // Register classes
 const RegisterBitSet = RegisterManager.RegisterBitSet;
@@ -417,15 +463,15 @@ pub const RegisterClass = struct {
         var set = RegisterBitSet.initEmpty();
         set.setRangeValue(.{
             .start = 0,
-            .end = caller_preserved_regs.len + callee_preserved_regs.len,
+            .end = gp_regs.len,
         }, true);
         break :blk set;
     };
     pub const sse: RegisterBitSet = blk: {
         var set = RegisterBitSet.initEmpty();
         set.setRangeValue(.{
-            .start = caller_preserved_regs.len + callee_preserved_regs.len,
-            .end = allocatable_registers.len,
+            .start = gp_regs.len,
+            .end = allocatable_regs.len,
         }, true);
         break :blk set;
     };

From 49b1716064cb87b5e8ca13dcb1c9e4fc701737bc Mon Sep 17 00:00:00 2001
From: Jakub Konka <kubkon@jakubkonka.com>
Date: Thu, 1 Sep 2022 16:01:40 +0200
Subject: [PATCH 19/68] coff: implement lowering unnamed consts

---
 src/arch/x86_64/CodeGen.zig |   2 +-
 src/link/Coff.zig           | 110 +++++++++++++++++++++++++++++++++---
 2 files changed, 103 insertions(+), 9 deletions(-)

diff --git a/src/arch/x86_64/CodeGen.zig b/src/arch/x86_64/CodeGen.zig
index 5e404d00bd..1805fe7697 100644
--- a/src/arch/x86_64/CodeGen.zig
+++ b/src/arch/x86_64/CodeGen.zig
@@ -6959,7 +6959,7 @@ fn lowerUnnamedConst(self: *Self, tv: TypedValue) InnerError!MCValue {
     } else if (self.bin_file.cast(link.File.MachO)) |_| {
         return MCValue{ .direct_load = local_sym_index };
     } else if (self.bin_file.cast(link.File.Coff)) |_| {
-        return self.fail("TODO lower unnamed const in COFF", .{});
+        return MCValue{ .direct_load = local_sym_index };
     } else if (self.bin_file.cast(link.File.Plan9)) |_| {
         return self.fail("TODO lower unnamed const in Plan9", .{});
     } else {
diff --git a/src/link/Coff.zig b/src/link/Coff.zig
index 9f3ccc069c..05ccfb7710 100644
--- a/src/link/Coff.zig
+++ b/src/link/Coff.zig
@@ -862,6 +862,11 @@ fn resolveRelocs(self: *Coff, atom: *Atom) !void {
 fn freeAtom(self: *Coff, atom: *Atom) void {
     log.debug("freeAtom {*}", .{atom});
 
+    // TODO hashmap
+    for (self.managed_atoms.items) |owned| {
+        if (owned == atom) break;
+    } else atom.deinit(self.base.allocator);
+
     const sym = atom.getSymbol(self);
     const sect_id = @enumToInt(sym.section_number) - 1;
     const free_list = &self.sections.items(.free_list)[sect_id];
@@ -955,10 +960,67 @@ pub fn updateFunc(self: *Coff, module: *Module, func: *Module.Fn, air: Air, live
 }
 
 pub fn lowerUnnamedConst(self: *Coff, tv: TypedValue, decl_index: Module.Decl.Index) !u32 {
-    _ = self;
-    _ = tv;
-    _ = decl_index;
-    @panic("TODO lowerUnnamedConst");
+    const gpa = self.base.allocator;
+    var code_buffer = std.ArrayList(u8).init(gpa);
+    defer code_buffer.deinit();
+
+    const mod = self.base.options.module.?;
+    const decl = mod.declPtr(decl_index);
+
+    const gop = try self.unnamed_const_atoms.getOrPut(gpa, decl_index);
+    if (!gop.found_existing) {
+        gop.value_ptr.* = .{};
+    }
+    const unnamed_consts = gop.value_ptr;
+
+    const atom = try gpa.create(Atom);
+    errdefer gpa.destroy(atom);
+    atom.* = Atom.empty;
+
+    atom.sym_index = try self.allocateSymbol();
+    const sym = atom.getSymbolPtr(self);
+    const sym_name = blk: {
+        const decl_name = try decl.getFullyQualifiedName(mod);
+        defer gpa.free(decl_name);
+
+        const index = unnamed_consts.items.len;
+        break :blk try std.fmt.allocPrint(gpa, "__unnamed_{s}_{d}", .{ decl_name, index });
+    };
+    defer gpa.free(sym_name);
+    try self.setSymbolName(sym, sym_name);
+    sym.section_number = @intToEnum(coff.SectionNumber, self.rdata_section_index.?);
+
+    try self.managed_atoms.append(gpa, atom);
+    try self.atom_by_index_table.putNoClobber(gpa, atom.sym_index, atom);
+
+    const res = try codegen.generateSymbol(&self.base, decl.srcLoc(), tv, &code_buffer, .none, .{
+        .parent_atom_index = atom.sym_index,
+    });
+    const code = switch (res) {
+        .externally_managed => |x| x,
+        .appended => code_buffer.items,
+        .fail => |em| {
+            decl.analysis = .codegen_failure;
+            try mod.failed_decls.put(mod.gpa, decl_index, em);
+            log.err("{s}", .{em.msg});
+            return error.AnalysisFail;
+        },
+    };
+
+    const required_alignment = tv.ty.abiAlignment(self.base.options.target);
+    atom.alignment = required_alignment;
+    atom.size = @intCast(u32, code.len);
+    sym.value = try self.allocateAtom(atom, atom.size, atom.alignment);
+    errdefer self.freeAtom(atom);
+
+    try unnamed_consts.append(gpa, atom);
+
+    log.debug("allocated atom for {s} at 0x{x}", .{ sym_name, sym.value });
+    log.debug("  (required alignment 0x{x})", .{required_alignment});
+
+    try self.writeAtom(atom, code);
+
+    return atom.sym_index;
 }
 
 pub fn updateDecl(self: *Coff, module: *Module, decl_index: Module.Decl.Index) !void {
@@ -1097,6 +1159,20 @@ fn updateDeclCode(self: *Coff, decl_index: Module.Decl.Index, code: []const u8,
     try self.writeAtom(atom, code);
 }
 
+fn freeUnnamedConsts(self: *Coff, decl_index: Module.Decl.Index) void {
+    const gpa = self.base.allocator;
+    const unnamed_consts = self.unnamed_const_atoms.getPtr(decl_index) orelse return;
+    for (unnamed_consts.items) |atom| {
+        self.freeAtom(atom);
+        self.locals_free_list.append(gpa, atom.sym_index) catch {};
+        self.locals.items[atom.sym_index].section_number = .UNDEFINED;
+        _ = self.atom_by_index_table.remove(atom.sym_index);
+        log.debug("  adding local symbol index {d} to free list", .{atom.sym_index});
+        atom.sym_index = 0;
+    }
+    unnamed_consts.clearAndFree(gpa);
+}
+
 pub fn freeDecl(self: *Coff, decl_index: Module.Decl.Index) void {
     if (build_options.have_llvm) {
         if (self.llvm_object) |llvm_object| return llvm_object.freeDecl(decl_index);
@@ -1110,6 +1186,7 @@ pub fn freeDecl(self: *Coff, decl_index: Module.Decl.Index) void {
     const kv = self.decls.fetchRemove(decl_index);
     if (kv.?.value) |_| {
         self.freeAtom(&decl.link.coff);
+        self.freeUnnamedConsts(decl_index);
     }
 
     // Appending to free lists is allowed to fail because the free lists are heuristics based anyway.
@@ -1372,10 +1449,27 @@ pub fn getDeclVAddr(
     decl_index: Module.Decl.Index,
     reloc_info: link.File.RelocInfo,
 ) !u64 {
-    _ = self;
-    _ = decl_index;
-    _ = reloc_info;
-    @panic("TODO getDeclVAddr");
+    const mod = self.base.options.module.?;
+    const decl = mod.declPtr(decl_index);
+
+    assert(self.llvm_object == null);
+    assert(decl.link.coff.sym_index != 0);
+
+    const atom = self.atom_by_index_table.get(reloc_info.parent_atom_index).?;
+    const target = SymbolWithLoc{ .sym_index = decl.link.coff.sym_index, .file = null };
+    const target_sym = self.getSymbol(target);
+    try atom.addRelocation(self, .{
+        .@"type" = .direct,
+        .target = target,
+        .offset = @intCast(u32, reloc_info.offset),
+        .addend = reloc_info.addend,
+        .pcrel = false,
+        .length = 3,
+        .prev_vaddr = target_sym.value,
+    });
+    try atom.addBaseRelocation(self, @intCast(u32, reloc_info.offset));
+
+    return 0;
 }
 
 pub fn getGlobalSymbol(self: *Coff, name: []const u8) !u32 {

From 38573fed0ba796f642e7b4eebdf3f9eafc572f25 Mon Sep 17 00:00:00 2001
From: Jakub Konka <kubkon@jakubkonka.com>
Date: Thu, 1 Sep 2022 16:35:09 +0200
Subject: [PATCH 20/68] coff: fix runtime traps

---
 src/arch/x86_64/CodeGen.zig | 7 ++++++-
 src/arch/x86_64/abi.zig     | 2 +-
 src/link/Coff.zig           | 2 +-
 3 files changed, 8 insertions(+), 3 deletions(-)

diff --git a/src/arch/x86_64/CodeGen.zig b/src/arch/x86_64/CodeGen.zig
index 1805fe7697..6c2db1f25e 100644
--- a/src/arch/x86_64/CodeGen.zig
+++ b/src/arch/x86_64/CodeGen.zig
@@ -4204,6 +4204,7 @@ fn airRet(self: *Self, inst: Air.Inst.Index) !void {
         },
         .stack_offset => {
             const reg = try self.copyToTmpRegister(Type.usize, self.ret_mcv);
+            log.warn("REG = {}", .{reg});
             const reg_lock = self.register_manager.lockRegAssumeUnused(reg);
             defer self.register_manager.unlockReg(reg_lock);
 
@@ -5854,7 +5855,11 @@ fn genInlineMemcpy(
     len: MCValue,
     opts: InlineMemcpyOpts,
 ) InnerError!void {
-    // TODO preserve contents of .rax and .rcx if not free, and then restore
+    // TODO: Preserve contents of .rax and .rcx if not free and locked, and then restore
+    // How can we do this without context if the value inside .rax or .rcx we preserve contains
+    // value needed to perform the memcpy in the first place?
+    // I think we should have an accumulator-based context that we pass with each subsequent helper
+    // call until we resolve the entire instruction.
     try self.register_manager.getReg(.rax, null);
     try self.register_manager.getReg(.rcx, null);
 
diff --git a/src/arch/x86_64/abi.zig b/src/arch/x86_64/abi.zig
index 2f4a7d1681..298fc6656f 100644
--- a/src/arch/x86_64/abi.zig
+++ b/src/arch/x86_64/abi.zig
@@ -447,7 +447,7 @@ pub fn getCAbiIntReturnRegs(target: Target) []const Register {
 }
 
 const gp_regs = [_]Register{
-    .rax, .rbx, .rcx, .rdx, .rsi, .rdi, .r8, .r9, .r10, .r11, .r12, .r13, .r14, .r15,
+    .rbx, .r12, .r13, .r14, .r15, .rax, .rcx, .rdx, .rsi, .rdi, .r8, .r9, .r10, .r11,
 };
 const sse_avx_regs = [_]Register{
     .ymm0, .ymm1, .ymm2,  .ymm3,  .ymm4,  .ymm5,  .ymm6,  .ymm7,
diff --git a/src/link/Coff.zig b/src/link/Coff.zig
index 05ccfb7710..b5670ce5a1 100644
--- a/src/link/Coff.zig
+++ b/src/link/Coff.zig
@@ -1053,7 +1053,7 @@ pub fn updateDecl(self: *Coff, module: *Module, decl_index: Module.Decl.Index) !
         .ty = decl.ty,
         .val = decl_val,
     }, &code_buffer, .none, .{
-        .parent_atom_index = 0,
+        .parent_atom_index = decl.link.coff.sym_index,
     });
     const code = switch (res) {
         .externally_managed => |x| x,

From 3a4c69c01824fb6f72e90433a5683a8df09ad4c1 Mon Sep 17 00:00:00 2001
From: Jakub Konka <kubkon@jakubkonka.com>
Date: Thu, 1 Sep 2022 19:04:08 +0200
Subject: [PATCH 21/68] x86_64: implement Windows x64 calling convention

---
 lib/std/os/windows/kernel32.zig |   8 +-
 src/arch/x86_64/CodeGen.zig     | 136 ++++++++++++++++++--------------
 2 files changed, 82 insertions(+), 62 deletions(-)

diff --git a/lib/std/os/windows/kernel32.zig b/lib/std/os/windows/kernel32.zig
index 9e6f5df97b..8d146def7f 100644
--- a/lib/std/os/windows/kernel32.zig
+++ b/lib/std/os/windows/kernel32.zig
@@ -348,7 +348,13 @@ pub extern "kernel32" fn WriteFile(
     in_out_lpOverlapped: ?*OVERLAPPED,
 ) callconv(WINAPI) BOOL;
 
-pub extern "kernel32" fn WriteFileEx(hFile: HANDLE, lpBuffer: [*]const u8, nNumberOfBytesToWrite: DWORD, lpOverlapped: *OVERLAPPED, lpCompletionRoutine: LPOVERLAPPED_COMPLETION_ROUTINE) callconv(WINAPI) BOOL;
+pub extern "kernel32" fn WriteFileEx(
+    hFile: HANDLE,
+    lpBuffer: [*]const u8,
+    nNumberOfBytesToWrite: DWORD,
+    lpOverlapped: *OVERLAPPED,
+    lpCompletionRoutine: LPOVERLAPPED_COMPLETION_ROUTINE,
+) callconv(WINAPI) BOOL;
 
 pub extern "kernel32" fn LoadLibraryW(lpLibFileName: [*:0]const u16) callconv(WINAPI) ?HMODULE;
 
diff --git a/src/arch/x86_64/CodeGen.zig b/src/arch/x86_64/CodeGen.zig
index 6c2db1f25e..15e14bcbb8 100644
--- a/src/arch/x86_64/CodeGen.zig
+++ b/src/arch/x86_64/CodeGen.zig
@@ -4204,7 +4204,6 @@ fn airRet(self: *Self, inst: Air.Inst.Index) !void {
         },
         .stack_offset => {
             const reg = try self.copyToTmpRegister(Type.usize, self.ret_mcv);
-            log.warn("REG = {}", .{reg});
             const reg_lock = self.register_manager.lockRegAssumeUnused(reg);
             defer self.register_manager.unlockReg(reg_lock);
 
@@ -7129,7 +7128,75 @@ fn resolveCallingConventionValues(self: *Self, fn_ty: Type) !CallMCValues {
             result.stack_align = 1;
             return result;
         },
-        .Unspecified, .C => {
+        .C => {
+            // Return values
+            if (ret_ty.zigTypeTag() == .NoReturn) {
+                result.return_value = .{ .unreach = {} };
+            } else if (!ret_ty.hasRuntimeBitsIgnoreComptime() and !ret_ty.isError()) {
+                // TODO: is this even possible for C calling convention?
+                result.return_value = .{ .none = {} };
+            } else {
+                const ret_ty_size = @intCast(u32, ret_ty.abiSize(self.target.*));
+                if (ret_ty_size == 0) {
+                    assert(ret_ty.isError());
+                    result.return_value = .{ .immediate = 0 };
+                } else if (ret_ty_size <= 8) {
+                    const aliased_reg = registerAlias(abi.getCAbiIntReturnRegs(self.target.*)[0], ret_ty_size);
+                    result.return_value = .{ .register = aliased_reg };
+                } else {
+                    // TODO: return argument cell should go first
+                    result.return_value = .{ .stack_offset = 0 };
+                }
+            }
+
+            // Input params
+            var next_stack_offset: u32 = switch (result.return_value) {
+                .stack_offset => |off| @intCast(u32, off),
+                else => 0,
+            };
+
+            for (param_types) |ty, i| {
+                assert(ty.hasRuntimeBits());
+
+                if (self.target.os.tag != .windows) {
+                    return self.fail("TODO SysV calling convention", .{});
+                }
+
+                switch (abi.classifyWindows(ty, self.target.*)) {
+                    .integer => blk: {
+                        if (i >= abi.getCAbiIntParamRegs(self.target.*).len) break :blk; // fallthrough
+                        result.args[i] = .{ .register = abi.getCAbiIntParamRegs(self.target.*)[i] };
+                        continue;
+                    },
+                    .sse => return self.fail("TODO float/vector via SSE on Windows", .{}),
+                    .memory => {}, // fallthrough
+                    else => unreachable,
+                }
+
+                const param_size = @intCast(u32, ty.abiSize(self.target.*));
+                const param_align = @intCast(u32, ty.abiAlignment(self.target.*));
+                const offset = mem.alignForwardGeneric(u32, next_stack_offset + param_size, param_align);
+                result.args[i] = .{ .stack_offset = @intCast(i32, offset) };
+                next_stack_offset = offset;
+            }
+            // Align the stack to 16bytes before allocating shadow stack space.
+            const aligned_next_stack_offset = mem.alignForwardGeneric(u32, next_stack_offset, 16);
+            const padding = aligned_next_stack_offset - next_stack_offset;
+            if (padding > 0) {
+                for (result.args) |*arg| {
+                    if (arg.isRegister()) continue;
+                    arg.stack_offset += @intCast(i32, padding);
+                }
+            }
+
+            // TODO fix this so that the 16byte alignment padding is at the current value of $rsp, and push
+            // the args onto the stack so that there is no padding between the first argument and
+            // the standard preamble.
+            // alignment padding | ret value (if > 8) | args ... | shadow stack space | $rbp |
+            result.stack_byte_count = aligned_next_stack_offset + 4 * @sizeOf(u64);
+            result.stack_align = 16;
+        },
+        .Unspecified => {
             // Return values
             if (ret_ty.zigTypeTag() == .NoReturn) {
                 result.return_value = .{ .unreach = {} };
@@ -7141,8 +7208,7 @@ fn resolveCallingConventionValues(self: *Self, fn_ty: Type) !CallMCValues {
                     assert(ret_ty.isError());
                     result.return_value = .{ .immediate = 0 };
                 } else if (ret_ty_size <= 8) {
-                    const aliased_reg = registerAlias(abi.getCAbiIntReturnRegs(self.target.*)[0], ret_ty_size);
-                    result.return_value = .{ .register = aliased_reg };
+                    result.return_value = .{ .register = .rdi };
                 } else {
                     // We simply make the return MCValue a stack offset. However, the actual value
                     // for the offset will be populated later. We will also push the stack offset
@@ -7152,73 +7218,21 @@ fn resolveCallingConventionValues(self: *Self, fn_ty: Type) !CallMCValues {
             }
 
             // Input params
-            // First, split into args that can be passed via registers.
-            // This will make it easier to then push the rest of args in reverse
-            // order on the stack.
-            var next_int_reg: usize = 0;
-            var by_reg = std.AutoHashMap(usize, usize).init(self.bin_file.allocator);
-            defer by_reg.deinit();
-
-            // If we want debug output, we store all args on stack for better liveness of args
-            // in debugging contexts such as previewing the args in the debugger anywhere in
-            // the procedure. Passing the args via registers can lead to reusing the register
-            // for local ops thus clobbering the input arg forever.
-            // This of course excludes C ABI calls.
-            const omit_args_in_registers = blk: {
-                if (cc == .C) break :blk false;
-                switch (self.bin_file.options.optimize_mode) {
-                    .Debug => break :blk true,
-                    else => break :blk false,
-                }
-            };
-            if (!omit_args_in_registers) {
-                for (param_types) |ty, i| {
-                    if (!ty.hasRuntimeBits()) continue;
-                    const param_size = @intCast(u32, ty.abiSize(self.target.*));
-                    // For simplicity of codegen, slices and other types are always pushed onto the stack.
-                    // TODO: look into optimizing this by passing things as registers sometimes,
-                    // such as ptr and len of slices as separate registers.
-                    // TODO: also we need to honor the C ABI for relevant types rather than passing on
-                    // the stack here.
-                    const pass_in_reg = switch (ty.zigTypeTag()) {
-                        .Bool => true,
-                        .Int, .Enum => param_size <= 8,
-                        .Pointer => ty.ptrSize() != .Slice,
-                        .Optional => ty.isPtrLikeOptional(),
-                        else => false,
-                    };
-                    if (pass_in_reg) {
-                        if (next_int_reg >= abi.getCAbiIntParamRegs(self.target.*).len) break;
-                        try by_reg.putNoClobber(i, next_int_reg);
-                        next_int_reg += 1;
-                    }
-                }
-            }
-
             var next_stack_offset: u32 = switch (result.return_value) {
                 .stack_offset => |off| @intCast(u32, off),
                 else => 0,
             };
-            var count: usize = param_types.len;
-            while (count > 0) : (count -= 1) {
-                const i = count - 1;
-                const ty = param_types[i];
+
+            for (param_types) |ty, i| {
                 if (!ty.hasRuntimeBits()) {
-                    assert(cc != .C);
                     result.args[i] = .{ .none = {} };
                     continue;
                 }
                 const param_size = @intCast(u32, ty.abiSize(self.target.*));
                 const param_align = @intCast(u32, ty.abiAlignment(self.target.*));
-                if (by_reg.get(i)) |int_reg| {
-                    const aliased_reg = registerAlias(abi.getCAbiIntParamRegs(self.target.*)[int_reg], param_size);
-                    result.args[i] = .{ .register = aliased_reg };
-                    next_int_reg += 1;
-                } else {
-                    const offset = mem.alignForwardGeneric(u32, next_stack_offset + param_size, param_align);
-                    result.args[i] = .{ .stack_offset = @intCast(i32, offset) };
-                    next_stack_offset = offset;
-                }
+                const offset = mem.alignForwardGeneric(u32, next_stack_offset + param_size, param_align);
+                result.args[i] = .{ .stack_offset = @intCast(i32, offset) };
+                next_stack_offset = offset;
             }
 
             result.stack_align = 16;

From c0e288c78248870bf9881b25ff8354c67753bbe2 Mon Sep 17 00:00:00 2001
From: Jakub Konka <kubkon@jakubkonka.com>
Date: Fri, 2 Sep 2022 17:09:07 +0200
Subject: [PATCH 22/68] x86_64: implement canonicalising branches in switch
 expression

---
 src/arch/x86_64/CodeGen.zig | 192 +++++++++++++++++++++++++++++++-----
 1 file changed, 167 insertions(+), 25 deletions(-)

diff --git a/src/arch/x86_64/CodeGen.zig b/src/arch/x86_64/CodeGen.zig
index 15e14bcbb8..4a7efda02f 100644
--- a/src/arch/x86_64/CodeGen.zig
+++ b/src/arch/x86_64/CodeGen.zig
@@ -200,6 +200,34 @@ const Branch = struct {
         self.inst_table.deinit(gpa);
         self.* = undefined;
     }
+
+    const FormatContext = struct {
+        insts: []const Air.Inst.Index,
+        mcvs: []const MCValue,
+    };
+
+    fn fmt(
+        ctx: FormatContext,
+        comptime unused_format_string: []const u8,
+        options: std.fmt.FormatOptions,
+        writer: anytype,
+    ) @TypeOf(writer).Error!void {
+        _ = options;
+        comptime assert(unused_format_string.len == 0);
+        try writer.writeAll("Branch {\n");
+        for (ctx.insts) |inst, i| {
+            const mcv = ctx.mcvs[i];
+            try writer.print("  %{d} => {}\n", .{ inst, mcv });
+        }
+        try writer.writeAll("}");
+    }
+
+    fn fmtDebug(self: @This()) std.fmt.Formatter(fmt) {
+        return .{ .data = .{
+            .insts = self.inst_table.keys(),
+            .mcvs = self.inst_table.values(),
+        } };
+    }
 };
 
 const StackAllocation = struct {
@@ -232,7 +260,7 @@ const BigTomb = struct {
     fn finishAir(bt: *BigTomb, result: MCValue) void {
         const is_used = !bt.function.liveness.isUnused(bt.inst);
         if (is_used) {
-            log.debug("%{d} => {}", .{ bt.inst, result });
+            log.debug("  (saving %{d} => {})", .{ bt.inst, result });
             const branch = &bt.function.branch_stack.items[bt.function.branch_stack.items.len - 1];
             branch.inst_table.putAssumeCapacityNoClobber(bt.inst, result);
         }
@@ -795,6 +823,7 @@ fn genBody(self: *Self, body: []const Air.Inst.Index) InnerError!void {
 fn processDeath(self: *Self, inst: Air.Inst.Index) void {
     const air_tags = self.air.instructions.items(.tag);
     if (air_tags[inst] == .constant) return; // Constants are immortal.
+    log.debug("  (processing death of %{d})", .{inst});
     // When editing this function, note that the logic must synchronize with `reuseOperand`.
     const prev_value = self.getResolvedInstValue(inst);
     const branch = &self.branch_stack.items[self.branch_stack.items.len - 1];
@@ -822,8 +851,10 @@ fn finishAirBookkeeping(self: *Self) void {
 }
 
 fn finishAir(self: *Self, inst: Air.Inst.Index, result: MCValue, operands: [Liveness.bpi - 1]Air.Inst.Ref) void {
+    log.debug("finishAir: %{d}, {}, {any}", .{ inst, result, operands });
     var tomb_bits = self.liveness.getTombBits(inst);
     for (operands) |op| {
+        log.debug("  (processing {})", .{op});
         const dies = @truncate(u1, tomb_bits) != 0;
         tomb_bits >>= 1;
         if (!dies) continue;
@@ -834,7 +865,7 @@ fn finishAir(self: *Self, inst: Air.Inst.Index, result: MCValue, operands: [Live
     }
     const is_used = @truncate(u1, tomb_bits) == 0;
     if (is_used) {
-        log.debug("%{d} => {}", .{ inst, result });
+        log.debug("  (saving %{d} => {})", .{ inst, result });
         const branch = &self.branch_stack.items[self.branch_stack.items.len - 1];
         branch.inst_table.putAssumeCapacityNoClobber(inst, result);
 
@@ -4647,6 +4678,8 @@ fn airCondBr(self: *Self, inst: Air.Inst.Index) !void {
 
     const reloc = try self.genCondBrMir(cond_ty, cond);
 
+    log.debug("airCondBr: %{d}", .{inst});
+
     // If the condition dies here in this condbr instruction, process
     // that death now instead of later as this has an effect on
     // whether it needs to be spilled in the branches
@@ -4674,15 +4707,17 @@ fn airCondBr(self: *Self, inst: Air.Inst.Index) !void {
 
     // Revert to the previous register and stack allocation state.
 
-    var saved_then_branch = self.branch_stack.pop();
-    defer saved_then_branch.deinit(self.gpa);
+    var then_branch = self.branch_stack.pop();
+    defer then_branch.deinit(self.gpa);
 
     self.revertState(saved_state);
 
     try self.performReloc(reloc);
 
-    const else_branch = self.branch_stack.addOneAssumeCapacity();
-    else_branch.* = .{};
+    try self.branch_stack.append(.{});
+    errdefer {
+        _ = self.branch_stack.pop();
+    }
 
     try self.ensureProcessDeathCapacity(liveness_condbr.else_deaths.len);
     for (liveness_condbr.else_deaths) |operand| {
@@ -4690,6 +4725,9 @@ fn airCondBr(self: *Self, inst: Air.Inst.Index) !void {
     }
     try self.genBody(else_body);
 
+    var else_branch = self.branch_stack.pop();
+    defer else_branch.deinit(self.gpa);
+
     // At this point, each branch will possibly have conflicting values for where
     // each instruction is stored. They agree, however, on which instructions are alive/dead.
     // We use the first ("then") branch as canonical, and here emit
@@ -4698,15 +4736,23 @@ fn airCondBr(self: *Self, inst: Air.Inst.Index) !void {
     // that we can use all the code emitting abstractions. This is why at the bottom we
     // assert that parent_branch.free_registers equals the saved_then_branch.free_registers
     // rather than assigning it.
-    const parent_branch = &self.branch_stack.items[self.branch_stack.items.len - 2];
+    const parent_branch = &self.branch_stack.items[self.branch_stack.items.len - 1];
     try parent_branch.inst_table.ensureUnusedCapacity(self.gpa, else_branch.inst_table.count());
 
+    log.debug("Upper branches:", .{});
+    for (self.branch_stack.items) |bs| {
+        log.debug("{}", .{bs.fmtDebug()});
+    }
+
+    log.debug("Then branch: {}", .{then_branch.fmtDebug()});
+    log.debug("Else branch: {}", .{else_branch.fmtDebug()});
+
     const else_slice = else_branch.inst_table.entries.slice();
     const else_keys = else_slice.items(.key);
     const else_values = else_slice.items(.value);
     for (else_keys) |else_key, else_idx| {
         const else_value = else_values[else_idx];
-        const canon_mcv = if (saved_then_branch.inst_table.fetchSwapRemove(else_key)) |then_entry| blk: {
+        const canon_mcv = if (then_branch.inst_table.fetchSwapRemove(else_key)) |then_entry| blk: {
             // The instruction's MCValue is overridden in both branches.
             parent_branch.inst_table.putAssumeCapacity(else_key, then_entry.value);
             if (else_value == .dead) {
@@ -4718,7 +4764,7 @@ fn airCondBr(self: *Self, inst: Air.Inst.Index) !void {
             if (else_value == .dead)
                 continue;
             // The instruction is only overridden in the else branch.
-            var i: usize = self.branch_stack.items.len - 2;
+            var i: usize = self.branch_stack.items.len - 1;
             while (true) {
                 i -= 1; // If this overflows, the question is: why wasn't the instruction marked dead?
                 if (self.branch_stack.items[i].inst_table.get(else_key)) |mcv| {
@@ -4733,8 +4779,8 @@ fn airCondBr(self: *Self, inst: Air.Inst.Index) !void {
         try self.setRegOrMem(self.air.typeOfIndex(else_key), canon_mcv, else_value);
         // TODO track the new register / stack allocation
     }
-    try parent_branch.inst_table.ensureUnusedCapacity(self.gpa, saved_then_branch.inst_table.count());
-    const then_slice = saved_then_branch.inst_table.entries.slice();
+    try parent_branch.inst_table.ensureUnusedCapacity(self.gpa, then_branch.inst_table.count());
+    const then_slice = then_branch.inst_table.entries.slice();
     const then_keys = then_slice.items(.key);
     const then_values = then_slice.items(.value);
     for (then_keys) |then_key, then_idx| {
@@ -4746,7 +4792,8 @@ fn airCondBr(self: *Self, inst: Air.Inst.Index) !void {
         if (then_value == .dead)
             continue;
         const parent_mcv = blk: {
-            var i: usize = self.branch_stack.items.len - 2;
+            log.debug("{d}", .{self.branch_stack.items.len});
+            var i: usize = self.branch_stack.items.len - 1;
             while (true) {
                 i -= 1;
                 if (self.branch_stack.items[i].inst_table.get(then_key)) |mcv| {
@@ -4762,11 +4809,6 @@ fn airCondBr(self: *Self, inst: Air.Inst.Index) !void {
         // TODO track the new register / stack allocation
     }
 
-    {
-        var item = self.branch_stack.pop();
-        item.deinit(self.gpa);
-    }
-
     // We already took care of pl_op.operand earlier, so we're going
     // to pass .none here
     return self.finishAir(inst, .unreach, .{ .none, .none, .none });
@@ -5139,6 +5181,8 @@ fn airSwitch(self: *Self, inst: Air.Inst.Index) !void {
     );
     defer self.gpa.free(liveness.deaths);
 
+    log.debug("airSwitch: %{d}", .{inst});
+
     // If the condition dies here in this switch instruction, process
     // that death now instead of later as this has an effect on
     // whether it needs to be spilled in the branches
@@ -5150,6 +5194,15 @@ fn airSwitch(self: *Self, inst: Air.Inst.Index) !void {
         }
     }
 
+    var branch_stack = std.ArrayList(Branch).init(self.gpa);
+    defer {
+        for (branch_stack.items) |*bs| {
+            bs.deinit(self.gpa);
+        }
+        branch_stack.deinit();
+    }
+    try branch_stack.ensureTotalCapacityPrecise(switch_br.data.cases_len + 1);
+
     while (case_i < switch_br.data.cases_len) : (case_i += 1) {
         const case = self.air.extraData(Air.SwitchBr.Case, extra_index);
         const items = @ptrCast([]const Air.Inst.Ref, self.air.extra[case.end..][0..case.data.items_len]);
@@ -5179,10 +5232,9 @@ fn airSwitch(self: *Self, inst: Air.Inst.Index) !void {
 
         try self.genBody(case_body);
 
-        // Revert to the previous register and stack allocation state.
-        var saved_case_branch = self.branch_stack.pop();
-        defer saved_case_branch.deinit(self.gpa);
+        branch_stack.appendAssumeCapacity(self.branch_stack.pop());
 
+        // Revert to the previous register and stack allocation state.
         self.revertState(saved_state);
 
         for (relocs) |reloc| {
@@ -5192,10 +5244,13 @@ fn airSwitch(self: *Self, inst: Air.Inst.Index) !void {
 
     if (switch_br.data.else_body_len > 0) {
         const else_body = self.air.extra[extra_index..][0..switch_br.data.else_body_len];
+
+        // Capture the state of register and stack allocation state so that we can revert to it.
+        const saved_state = try self.captureState();
+
         try self.branch_stack.append(.{});
-        defer {
-            var item = self.branch_stack.pop();
-            item.deinit(self.gpa);
+        errdefer {
+            _ = self.branch_stack.pop();
         }
 
         const else_deaths = liveness.deaths.len - 1;
@@ -5206,8 +5261,29 @@ fn airSwitch(self: *Self, inst: Air.Inst.Index) !void {
 
         try self.genBody(else_body);
 
-        // TODO consolidate returned MCValues between prongs and else branch like we do
-        // in airCondBr.
+        branch_stack.appendAssumeCapacity(self.branch_stack.pop());
+
+        // Revert to the previous register and stack allocation state.
+        self.revertState(saved_state);
+    }
+
+    // Consolidate returned MCValues between prongs and else branch like we do
+    // in airCondBr.
+    log.debug("Upper branches:", .{});
+    for (self.branch_stack.items) |bs| {
+        log.debug("{}", .{bs.fmtDebug()});
+    }
+    for (branch_stack.items) |bs, i| {
+        log.debug("Case-{d} branch: {}", .{ i, bs.fmtDebug() });
+    }
+
+    // TODO: can we reduce the complexity of this algorithm?
+    const parent_branch = &self.branch_stack.items[self.branch_stack.items.len - 1];
+    var i: usize = branch_stack.items.len;
+    while (i > 1) : (i -= 1) {
+        const canon_branch = &branch_stack.items[i - 2];
+        const target_branch = &branch_stack.items[i - 1];
+        try self.canonicaliseBranches(parent_branch, canon_branch, target_branch);
     }
 
     // We already took care of pl_op.operand earlier, so we're going
@@ -5215,6 +5291,72 @@ fn airSwitch(self: *Self, inst: Air.Inst.Index) !void {
     return self.finishAir(inst, .unreach, .{ .none, .none, .none });
 }
 
+fn canonicaliseBranches(self: *Self, parent_branch: *Branch, canon_branch: *Branch, target_branch: *Branch) !void {
+    try parent_branch.inst_table.ensureUnusedCapacity(self.gpa, target_branch.inst_table.count());
+
+    const target_slice = target_branch.inst_table.entries.slice();
+    const target_keys = target_slice.items(.key);
+    const target_values = target_slice.items(.value);
+
+    for (target_keys) |target_key, target_idx| {
+        const target_value = target_values[target_idx];
+        const canon_mcv = if (canon_branch.inst_table.fetchSwapRemove(target_key)) |canon_entry| blk: {
+            // The instruction's MCValue is overridden in both branches.
+            parent_branch.inst_table.putAssumeCapacity(target_key, canon_entry.value);
+            if (target_value == .dead) {
+                assert(canon_entry.value == .dead);
+                continue;
+            }
+            break :blk canon_entry.value;
+        } else blk: {
+            if (target_value == .dead)
+                continue;
+            // The instruction is only overridden in the else branch.
+            var i: usize = self.branch_stack.items.len - 1;
+            while (true) {
+                i -= 1; // If this overflows, the question is: why wasn't the instruction marked dead?
+                if (self.branch_stack.items[i].inst_table.get(target_key)) |mcv| {
+                    assert(mcv != .dead);
+                    break :blk mcv;
+                }
+            }
+        };
+        log.debug("consolidating target_entry {d} {}=>{}", .{ target_key, target_value, canon_mcv });
+        // TODO make sure the destination stack offset / register does not already have something
+        // going on there.
+        try self.setRegOrMem(self.air.typeOfIndex(target_key), canon_mcv, target_value);
+        // TODO track the new register / stack allocation
+    }
+    try parent_branch.inst_table.ensureUnusedCapacity(self.gpa, canon_branch.inst_table.count());
+    const canon_slice = canon_branch.inst_table.entries.slice();
+    const canon_keys = canon_slice.items(.key);
+    const canon_values = canon_slice.items(.value);
+    for (canon_keys) |canon_key, canon_idx| {
+        const canon_value = canon_values[canon_idx];
+        // We already deleted the items from this table that matched the target_branch.
+        // So these are all instructions that are only overridden in the canon branch.
+        parent_branch.inst_table.putAssumeCapacity(canon_key, canon_value);
+        log.debug("canon_value = {}", .{canon_value});
+        if (canon_value == .dead)
+            continue;
+        const parent_mcv = blk: {
+            var i: usize = self.branch_stack.items.len - 1;
+            while (true) {
+                i -= 1;
+                if (self.branch_stack.items[i].inst_table.get(canon_key)) |mcv| {
+                    assert(mcv != .dead);
+                    break :blk mcv;
+                }
+            }
+        };
+        log.debug("consolidating canon_entry {d} {}=>{}", .{ canon_key, parent_mcv, canon_value });
+        // TODO make sure the destination stack offset / register does not already have something
+        // going on there.
+        try self.setRegOrMem(self.air.typeOfIndex(canon_key), parent_mcv, canon_value);
+        // TODO track the new register / stack allocation
+    }
+}
+
 fn performReloc(self: *Self, reloc: Mir.Inst.Index) !void {
     const next_inst = @intCast(u32, self.mir_instructions.len);
     switch (self.mir_instructions.items(.tag)[reloc]) {

From b9c31a8703fdd8297673aa65a1bdefd56cd13b77 Mon Sep 17 00:00:00 2001
From: Jakub Konka <kubkon@jakubkonka.com>
Date: Fri, 2 Sep 2022 17:19:14 +0200
Subject: [PATCH 23/68] x86_64: refactor cond_br with canonicaliseBranches
 helper

---
 src/arch/x86_64/CodeGen.zig | 66 ++-----------------------------------
 1 file changed, 2 insertions(+), 64 deletions(-)

diff --git a/src/arch/x86_64/CodeGen.zig b/src/arch/x86_64/CodeGen.zig
index 4a7efda02f..1164df4b19 100644
--- a/src/arch/x86_64/CodeGen.zig
+++ b/src/arch/x86_64/CodeGen.zig
@@ -4736,9 +4736,6 @@ fn airCondBr(self: *Self, inst: Air.Inst.Index) !void {
     // that we can use all the code emitting abstractions. This is why at the bottom we
     // assert that parent_branch.free_registers equals the saved_then_branch.free_registers
     // rather than assigning it.
-    const parent_branch = &self.branch_stack.items[self.branch_stack.items.len - 1];
-    try parent_branch.inst_table.ensureUnusedCapacity(self.gpa, else_branch.inst_table.count());
-
     log.debug("Upper branches:", .{});
     for (self.branch_stack.items) |bs| {
         log.debug("{}", .{bs.fmtDebug()});
@@ -4747,67 +4744,8 @@ fn airCondBr(self: *Self, inst: Air.Inst.Index) !void {
     log.debug("Then branch: {}", .{then_branch.fmtDebug()});
     log.debug("Else branch: {}", .{else_branch.fmtDebug()});
 
-    const else_slice = else_branch.inst_table.entries.slice();
-    const else_keys = else_slice.items(.key);
-    const else_values = else_slice.items(.value);
-    for (else_keys) |else_key, else_idx| {
-        const else_value = else_values[else_idx];
-        const canon_mcv = if (then_branch.inst_table.fetchSwapRemove(else_key)) |then_entry| blk: {
-            // The instruction's MCValue is overridden in both branches.
-            parent_branch.inst_table.putAssumeCapacity(else_key, then_entry.value);
-            if (else_value == .dead) {
-                assert(then_entry.value == .dead);
-                continue;
-            }
-            break :blk then_entry.value;
-        } else blk: {
-            if (else_value == .dead)
-                continue;
-            // The instruction is only overridden in the else branch.
-            var i: usize = self.branch_stack.items.len - 1;
-            while (true) {
-                i -= 1; // If this overflows, the question is: why wasn't the instruction marked dead?
-                if (self.branch_stack.items[i].inst_table.get(else_key)) |mcv| {
-                    assert(mcv != .dead);
-                    break :blk mcv;
-                }
-            }
-        };
-        log.debug("consolidating else_entry {d} {}=>{}", .{ else_key, else_value, canon_mcv });
-        // TODO make sure the destination stack offset / register does not already have something
-        // going on there.
-        try self.setRegOrMem(self.air.typeOfIndex(else_key), canon_mcv, else_value);
-        // TODO track the new register / stack allocation
-    }
-    try parent_branch.inst_table.ensureUnusedCapacity(self.gpa, then_branch.inst_table.count());
-    const then_slice = then_branch.inst_table.entries.slice();
-    const then_keys = then_slice.items(.key);
-    const then_values = then_slice.items(.value);
-    for (then_keys) |then_key, then_idx| {
-        const then_value = then_values[then_idx];
-        // We already deleted the items from this table that matched the else_branch.
-        // So these are all instructions that are only overridden in the then branch.
-        parent_branch.inst_table.putAssumeCapacity(then_key, then_value);
-        log.debug("then_value = {}", .{then_value});
-        if (then_value == .dead)
-            continue;
-        const parent_mcv = blk: {
-            log.debug("{d}", .{self.branch_stack.items.len});
-            var i: usize = self.branch_stack.items.len - 1;
-            while (true) {
-                i -= 1;
-                if (self.branch_stack.items[i].inst_table.get(then_key)) |mcv| {
-                    assert(mcv != .dead);
-                    break :blk mcv;
-                }
-            }
-        };
-        log.debug("consolidating then_entry {d} {}=>{}", .{ then_key, parent_mcv, then_value });
-        // TODO make sure the destination stack offset / register does not already have something
-        // going on there.
-        try self.setRegOrMem(self.air.typeOfIndex(then_key), parent_mcv, then_value);
-        // TODO track the new register / stack allocation
-    }
+    const parent_branch = &self.branch_stack.items[self.branch_stack.items.len - 1];
+    try self.canonicaliseBranches(parent_branch, &then_branch, &else_branch);
 
     // We already took care of pl_op.operand earlier, so we're going
     // to pass .none here

From 1d57b347e9aeddc2de33b1b77b331d36e4900425 Mon Sep 17 00:00:00 2001
From: Jakub Konka <kubkon@jakubkonka.com>
Date: Fri, 2 Sep 2022 20:49:37 +0200
Subject: [PATCH 24/68] x86_64: clean up logging

---
 src/arch/x86_64/CodeGen.zig | 20 ++++++++++++--------
 1 file changed, 12 insertions(+), 8 deletions(-)

diff --git a/src/arch/x86_64/CodeGen.zig b/src/arch/x86_64/CodeGen.zig
index 1164df4b19..bb6191af06 100644
--- a/src/arch/x86_64/CodeGen.zig
+++ b/src/arch/x86_64/CodeGen.zig
@@ -222,6 +222,14 @@ const Branch = struct {
         try writer.writeAll("}");
     }
 
+    fn format(branch: Branch, comptime unused_format_string: []const u8, options: std.fmt.FormatOptions, writer: anytype) !void {
+        _ = branch;
+        _ = unused_format_string;
+        _ = options;
+        _ = writer;
+        @compileError("do not format Branch directly; use ty.fmtDebug()");
+    }
+
     fn fmtDebug(self: @This()) std.fmt.Formatter(fmt) {
         return .{ .data = .{
             .insts = self.inst_table.keys(),
@@ -823,7 +831,7 @@ fn genBody(self: *Self, body: []const Air.Inst.Index) InnerError!void {
 fn processDeath(self: *Self, inst: Air.Inst.Index) void {
     const air_tags = self.air.instructions.items(.tag);
     if (air_tags[inst] == .constant) return; // Constants are immortal.
-    log.debug("  (processing death of %{d})", .{inst});
+    log.debug("%{d} => {}", .{ inst, MCValue.dead });
     // When editing this function, note that the logic must synchronize with `reuseOperand`.
     const prev_value = self.getResolvedInstValue(inst);
     const branch = &self.branch_stack.items[self.branch_stack.items.len - 1];
@@ -851,10 +859,8 @@ fn finishAirBookkeeping(self: *Self) void {
 }
 
 fn finishAir(self: *Self, inst: Air.Inst.Index, result: MCValue, operands: [Liveness.bpi - 1]Air.Inst.Ref) void {
-    log.debug("finishAir: %{d}, {}, {any}", .{ inst, result, operands });
     var tomb_bits = self.liveness.getTombBits(inst);
     for (operands) |op| {
-        log.debug("  (processing {})", .{op});
         const dies = @truncate(u1, tomb_bits) != 0;
         tomb_bits >>= 1;
         if (!dies) continue;
@@ -865,7 +871,7 @@ fn finishAir(self: *Self, inst: Air.Inst.Index, result: MCValue, operands: [Live
     }
     const is_used = @truncate(u1, tomb_bits) == 0;
     if (is_used) {
-        log.debug("  (saving %{d} => {})", .{ inst, result });
+        log.debug("%{d} => {}", .{ inst, result });
         const branch = &self.branch_stack.items[self.branch_stack.items.len - 1];
         branch.inst_table.putAssumeCapacityNoClobber(inst, result);
 
@@ -4678,8 +4684,6 @@ fn airCondBr(self: *Self, inst: Air.Inst.Index) !void {
 
     const reloc = try self.genCondBrMir(cond_ty, cond);
 
-    log.debug("airCondBr: %{d}", .{inst});
-
     // If the condition dies here in this condbr instruction, process
     // that death now instead of later as this has an effect on
     // whether it needs to be spilled in the branches
@@ -4736,6 +4740,7 @@ fn airCondBr(self: *Self, inst: Air.Inst.Index) !void {
     // that we can use all the code emitting abstractions. This is why at the bottom we
     // assert that parent_branch.free_registers equals the saved_then_branch.free_registers
     // rather than assigning it.
+    log.debug("airCondBr: %{d}", .{inst});
     log.debug("Upper branches:", .{});
     for (self.branch_stack.items) |bs| {
         log.debug("{}", .{bs.fmtDebug()});
@@ -5119,8 +5124,6 @@ fn airSwitch(self: *Self, inst: Air.Inst.Index) !void {
     );
     defer self.gpa.free(liveness.deaths);
 
-    log.debug("airSwitch: %{d}", .{inst});
-
     // If the condition dies here in this switch instruction, process
     // that death now instead of later as this has an effect on
     // whether it needs to be spilled in the branches
@@ -5207,6 +5210,7 @@ fn airSwitch(self: *Self, inst: Air.Inst.Index) !void {
 
     // Consolidate returned MCValues between prongs and else branch like we do
     // in airCondBr.
+    log.debug("airSwitch: %{d}", .{inst});
     log.debug("Upper branches:", .{});
     for (self.branch_stack.items) |bs| {
         log.debug("{}", .{bs.fmtDebug()});

From 28f525baa4ff0480043a16dd2467f231c8d6526a Mon Sep 17 00:00:00 2001
From: Jakub Konka <kubkon@jakubkonka.com>
Date: Fri, 2 Sep 2022 21:51:49 +0200
Subject: [PATCH 25/68] x86_64: handle ptr_stack_offset for blocks

---
 src/arch/x86_64/CodeGen.zig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/arch/x86_64/CodeGen.zig b/src/arch/x86_64/CodeGen.zig
index bb6191af06..63f6b48d1e 100644
--- a/src/arch/x86_64/CodeGen.zig
+++ b/src/arch/x86_64/CodeGen.zig
@@ -5328,7 +5328,7 @@ fn br(self: *Self, block: Air.Inst.Index, operand: Air.Inst.Ref) !void {
             block_data.mcv = switch (operand_mcv) {
                 .none, .dead, .unreach => unreachable,
                 .register, .stack_offset, .memory => operand_mcv,
-                .eflags, .immediate => blk: {
+                .eflags, .immediate, .ptr_stack_offset => blk: {
                     const new_mcv = try self.allocRegOrMem(block, true);
                     try self.setRegOrMem(self.air.typeOfIndex(block), new_mcv, operand_mcv);
                     break :blk new_mcv;

From 619d82234ecbb8a130e827e9de6f545288649a58 Mon Sep 17 00:00:00 2001
From: Jakub Konka <kubkon@jakubkonka.com>
Date: Sat, 3 Sep 2022 15:12:30 +0200
Subject: [PATCH 26/68] x86_64: clean up return registers for unspecified cc

---
 src/arch/x86_64/CodeGen.zig | 27 +++++++++++++++------------
 1 file changed, 15 insertions(+), 12 deletions(-)

diff --git a/src/arch/x86_64/CodeGen.zig b/src/arch/x86_64/CodeGen.zig
index 63f6b48d1e..0269a44be8 100644
--- a/src/arch/x86_64/CodeGen.zig
+++ b/src/arch/x86_64/CodeGen.zig
@@ -439,16 +439,17 @@ fn gen(self: *Self) InnerError!void {
         });
 
         if (self.ret_mcv == .stack_offset) {
-            // The address where to store the return value for the caller is in `.rdi`
+            // The address where to store the return value for the caller is in a
             // register which the callee is free to clobber. Therefore, we purposely
             // spill it to stack immediately.
             const stack_offset = mem.alignForwardGeneric(u32, self.next_stack_offset + 8, 8);
             self.next_stack_offset = stack_offset;
             self.max_end_stack = @maximum(self.max_end_stack, self.next_stack_offset);
 
-            try self.genSetStack(Type.usize, @intCast(i32, stack_offset), MCValue{ .register = .rdi }, .{});
+            const ret_reg = abi.getCAbiIntParamRegs(self.target.*)[0];
+            try self.genSetStack(Type.usize, @intCast(i32, stack_offset), MCValue{ .register = ret_reg }, .{});
             self.ret_mcv = MCValue{ .stack_offset = @intCast(i32, stack_offset) };
-            log.debug("gen: spilling .rdi to stack at offset {}", .{stack_offset});
+            log.debug("gen: spilling {s} to stack at offset {}", .{ @tagName(ret_reg), stack_offset });
         }
 
         _ = try self.addInst(.{
@@ -831,7 +832,7 @@ fn genBody(self: *Self, body: []const Air.Inst.Index) InnerError!void {
 fn processDeath(self: *Self, inst: Air.Inst.Index) void {
     const air_tags = self.air.instructions.items(.tag);
     if (air_tags[inst] == .constant) return; // Constants are immortal.
-    log.debug("%{d} => {}", .{ inst, MCValue.dead });
+    log.debug("%{d} => {}", .{ inst, MCValue{ .dead = {} } });
     // When editing this function, note that the logic must synchronize with `reuseOperand`.
     const prev_value = self.getResolvedInstValue(inst);
     const branch = &self.branch_stack.items[self.branch_stack.items.len - 1];
@@ -3960,7 +3961,7 @@ fn airCall(self: *Self, inst: Air.Inst.Index, modifier: std.builtin.CallOptions.
         try self.register_manager.getReg(reg, null);
     }
 
-    const rdi_lock: ?RegisterLock = blk: {
+    const ret_reg_lock: ?RegisterLock = blk: {
         if (info.return_value == .stack_offset) {
             const ret_ty = fn_ty.fnReturnType();
             const ret_abi_size = @intCast(u32, ret_ty.abiSize(self.target.*));
@@ -3968,17 +3969,18 @@ fn airCall(self: *Self, inst: Air.Inst.Index, modifier: std.builtin.CallOptions.
             const stack_offset = @intCast(i32, try self.allocMem(inst, ret_abi_size, ret_abi_align));
             log.debug("airCall: return value on stack at offset {}", .{stack_offset});
 
-            try self.register_manager.getReg(.rdi, null);
-            try self.genSetReg(Type.usize, .rdi, .{ .ptr_stack_offset = stack_offset });
-            const rdi_lock = self.register_manager.lockRegAssumeUnused(.rdi);
+            const ret_reg = abi.getCAbiIntParamRegs(self.target.*)[0];
+            try self.register_manager.getReg(ret_reg, null);
+            try self.genSetReg(Type.usize, ret_reg, .{ .ptr_stack_offset = stack_offset });
+            const ret_reg_lock = self.register_manager.lockRegAssumeUnused(ret_reg);
 
             info.return_value.stack_offset = stack_offset;
 
-            break :blk rdi_lock;
+            break :blk ret_reg_lock;
         }
         break :blk null;
     };
-    defer if (rdi_lock) |lock| self.register_manager.unlockReg(lock);
+    defer if (ret_reg_lock) |lock| self.register_manager.unlockReg(lock);
 
     for (args) |arg, arg_i| {
         const mc_arg = info.args[arg_i];
@@ -7292,11 +7294,12 @@ fn resolveCallingConventionValues(self: *Self, fn_ty: Type) !CallMCValues {
                     assert(ret_ty.isError());
                     result.return_value = .{ .immediate = 0 };
                 } else if (ret_ty_size <= 8) {
-                    result.return_value = .{ .register = .rdi };
+                    const aliased_reg = registerAlias(abi.getCAbiIntReturnRegs(self.target.*)[0], ret_ty_size);
+                    result.return_value = .{ .register = aliased_reg };
                 } else {
                     // We simply make the return MCValue a stack offset. However, the actual value
                     // for the offset will be populated later. We will also push the stack offset
-                    // value into .rdi register when we resolve the offset.
+                    // value into an appropriate register when we resolve the offset.
                     result.return_value = .{ .stack_offset = 0 };
                 }
             }

From e0167ae0e3f0e6c6a667b372b21c322d47ccd92d Mon Sep 17 00:00:00 2001
From: Jakub Konka <kubkon@jakubkonka.com>
Date: Sat, 3 Sep 2022 20:36:01 +0200
Subject: [PATCH 27/68] x86_64: allow for any index register in complex SIB
 encodings

This relieves register pressure, and reduce generated code size
(since now we can use the same index register for both `mov_scale_src`
and `mov_scale_dst` MIR instructions).

Fix lowering of ModRM + SIB encodings where index register is extended
- previously, we would carelessly ignore the fact generating incorrect
encodings.
---
 src/arch/x86_64/CodeGen.zig | 104 +++++++++++++-----------------------
 src/arch/x86_64/Emit.zig    |  69 +++++++++++++-----------
 src/arch/x86_64/Mir.zig     | 101 ++++++++++++++++++++++++++++------
 3 files changed, 161 insertions(+), 113 deletions(-)

diff --git a/src/arch/x86_64/CodeGen.zig b/src/arch/x86_64/CodeGen.zig
index 0269a44be8..b03acfb848 100644
--- a/src/arch/x86_64/CodeGen.zig
+++ b/src/arch/x86_64/CodeGen.zig
@@ -5932,7 +5932,6 @@ const InlineMemcpyOpts = struct {
     dest_stack_base: ?Register = null,
 };
 
-/// Spills .rax and .rcx.
 fn genInlineMemcpy(
     self: *Self,
     dst_ptr: MCValue,
@@ -5940,19 +5939,6 @@ fn genInlineMemcpy(
     len: MCValue,
     opts: InlineMemcpyOpts,
 ) InnerError!void {
-    // TODO: Preserve contents of .rax and .rcx if not free and locked, and then restore
-    // How can we do this without context if the value inside .rax or .rcx we preserve contains
-    // value needed to perform the memcpy in the first place?
-    // I think we should have an accumulator-based context that we pass with each subsequent helper
-    // call until we resolve the entire instruction.
-    try self.register_manager.getReg(.rax, null);
-    try self.register_manager.getReg(.rcx, null);
-
-    const reg_locks = self.register_manager.lockRegsAssumeUnused(2, .{ .rax, .rcx });
-    defer for (reg_locks) |lock| {
-        self.register_manager.unlockReg(lock);
-    };
-
     const ssbase_lock: ?RegisterLock = if (opts.source_stack_base) |reg|
         self.register_manager.lockReg(reg)
     else
@@ -5965,7 +5951,13 @@ fn genInlineMemcpy(
         null;
     defer if (dsbase_lock) |lock| self.register_manager.unlockReg(lock);
 
-    const dst_addr_reg = try self.register_manager.allocReg(null, gp);
+    const regs = try self.register_manager.allocRegs(5, .{ null, null, null, null, null }, gp);
+    const dst_addr_reg = regs[0];
+    const src_addr_reg = regs[1];
+    const index_reg = regs[2].to64();
+    const count_reg = regs[3].to64();
+    const tmp_reg = regs[4].to8();
+
     switch (dst_ptr) {
         .memory,
         .got_load,
@@ -5998,10 +5990,7 @@ fn genInlineMemcpy(
             return self.fail("TODO implement memcpy for setting stack when dest is {}", .{dst_ptr});
         },
     }
-    const dst_addr_reg_lock = self.register_manager.lockRegAssumeUnused(dst_addr_reg);
-    defer self.register_manager.unlockReg(dst_addr_reg_lock);
 
-    const src_addr_reg = try self.register_manager.allocReg(null, gp);
     switch (src_ptr) {
         .memory,
         .got_load,
@@ -6034,26 +6023,13 @@ fn genInlineMemcpy(
             return self.fail("TODO implement memcpy for setting stack when src is {}", .{src_ptr});
         },
     }
-    const src_addr_reg_lock = self.register_manager.lockRegAssumeUnused(src_addr_reg);
-    defer self.register_manager.unlockReg(src_addr_reg_lock);
-
-    const regs = try self.register_manager.allocRegs(2, .{ null, null }, gp);
-    const count_reg = regs[0].to64();
-    const tmp_reg = regs[1].to8();
 
     try self.genSetReg(Type.usize, count_reg, len);
 
-    // mov rcx, 0
+    // mov index_reg, 0
     _ = try self.addInst(.{
         .tag = .mov,
-        .ops = Mir.Inst.Ops.encode(.{ .reg1 = .rcx }),
-        .data = .{ .imm = 0 },
-    });
-
-    // mov rax, 0
-    _ = try self.addInst(.{
-        .tag = .mov,
-        .ops = Mir.Inst.Ops.encode(.{ .reg1 = .rax }),
+        .ops = Mir.Inst.Ops.encode(.{ .reg1 = index_reg }),
         .data = .{ .imm = 0 },
     });
 
@@ -6075,37 +6051,30 @@ fn genInlineMemcpy(
         } },
     });
 
-    // mov tmp, [addr + rcx]
+    // mov tmp, [addr + index_reg]
     _ = try self.addInst(.{
         .tag = .mov_scale_src,
         .ops = Mir.Inst.Ops.encode(.{
             .reg1 = tmp_reg.to8(),
             .reg2 = src_addr_reg,
         }),
-        .data = .{ .imm = 0 },
+        .data = .{ .payload = try self.addExtra(Mir.IndexRegisterDisp.encode(index_reg, 0)) },
     });
 
-    // mov [stack_offset + rax], tmp
+    // mov [stack_offset + index_reg], tmp
     _ = try self.addInst(.{
         .tag = .mov_scale_dst,
         .ops = Mir.Inst.Ops.encode(.{
             .reg1 = dst_addr_reg,
             .reg2 = tmp_reg.to8(),
         }),
-        .data = .{ .imm = 0 },
+        .data = .{ .payload = try self.addExtra(Mir.IndexRegisterDisp.encode(index_reg, 0)) },
     });
 
-    // add rcx, 1
+    // add index_reg, 1
     _ = try self.addInst(.{
         .tag = .add,
-        .ops = Mir.Inst.Ops.encode(.{ .reg1 = .rcx }),
-        .data = .{ .imm = 1 },
-    });
-
-    // add rax, 1
-    _ = try self.addInst(.{
-        .tag = .add,
-        .ops = Mir.Inst.Ops.encode(.{ .reg1 = .rax }),
+        .ops = Mir.Inst.Ops.encode(.{ .reg1 = index_reg }),
         .data = .{ .imm = 1 },
     });
 
@@ -6127,7 +6096,6 @@ fn genInlineMemcpy(
     try self.performReloc(loop_reloc);
 }
 
-/// Spills .rax register.
 fn genInlineMemset(
     self: *Self,
     dst_ptr: MCValue,
@@ -6135,12 +6103,22 @@ fn genInlineMemset(
     len: MCValue,
     opts: InlineMemcpyOpts,
 ) InnerError!void {
-    // TODO preserve contents of .rax and then restore
-    try self.register_manager.getReg(.rax, null);
-    const rax_lock = self.register_manager.lockRegAssumeUnused(.rax);
-    defer self.register_manager.unlockReg(rax_lock);
+    const ssbase_lock: ?RegisterLock = if (opts.source_stack_base) |reg|
+        self.register_manager.lockReg(reg)
+    else
+        null;
+    defer if (ssbase_lock) |reg| self.register_manager.unlockReg(reg);
+
+    const dsbase_lock: ?RegisterLock = if (opts.dest_stack_base) |reg|
+        self.register_manager.lockReg(reg)
+    else
+        null;
+    defer if (dsbase_lock) |lock| self.register_manager.unlockReg(lock);
+
+    const regs = try self.register_manager.allocRegs(2, .{ null, null }, gp);
+    const addr_reg = regs[0];
+    const index_reg = regs[1].to64();
 
-    const addr_reg = try self.register_manager.allocReg(null, gp);
     switch (dst_ptr) {
         .memory,
         .got_load,
@@ -6173,17 +6151,15 @@ fn genInlineMemset(
             return self.fail("TODO implement memcpy for setting stack when dest is {}", .{dst_ptr});
         },
     }
-    const addr_reg_lock = self.register_manager.lockRegAssumeUnused(addr_reg);
-    defer self.register_manager.unlockReg(addr_reg_lock);
 
-    try self.genSetReg(Type.usize, .rax, len);
-    try self.genBinOpMir(.sub, Type.usize, .{ .register = .rax }, .{ .immediate = 1 });
+    try self.genSetReg(Type.usize, index_reg, len);
+    try self.genBinOpMir(.sub, Type.usize, .{ .register = index_reg }, .{ .immediate = 1 });
 
     // loop:
-    // cmp rax, -1
+    // cmp index_reg, -1
     const loop_start = try self.addInst(.{
         .tag = .cmp,
-        .ops = Mir.Inst.Ops.encode(.{ .reg1 = .rax }),
+        .ops = Mir.Inst.Ops.encode(.{ .reg1 = index_reg }),
         .data = .{ .imm = @bitCast(u32, @as(i32, -1)) },
     });
 
@@ -6202,24 +6178,20 @@ fn genInlineMemset(
             if (x > math.maxInt(i32)) {
                 return self.fail("TODO inline memset for value immediate larger than 32bits", .{});
             }
-            // mov byte ptr [rbp + rax + stack_offset], imm
-            const payload = try self.addExtra(Mir.ImmPair{
-                .dest_off = 0,
-                .operand = @truncate(u32, x),
-            });
+            // mov byte ptr [rbp + index_reg + stack_offset], imm
             _ = try self.addInst(.{
                 .tag = .mov_mem_index_imm,
                 .ops = Mir.Inst.Ops.encode(.{ .reg1 = addr_reg }),
-                .data = .{ .payload = payload },
+                .data = .{ .payload = try self.addExtra(Mir.IndexRegisterDispImm.encode(index_reg, 0, @truncate(u32, x))) },
             });
         },
         else => return self.fail("TODO inline memset for value of type {}", .{value}),
     }
 
-    // sub rax, 1
+    // sub index_reg, 1
     _ = try self.addInst(.{
         .tag = .sub,
-        .ops = Mir.Inst.Ops.encode(.{ .reg1 = .rax }),
+        .ops = Mir.Inst.Ops.encode(.{ .reg1 = index_reg }),
         .data = .{ .imm = 1 },
     });
 
diff --git a/src/arch/x86_64/Emit.zig b/src/arch/x86_64/Emit.zig
index 66e603aab0..61ae772794 100644
--- a/src/arch/x86_64/Emit.zig
+++ b/src/arch/x86_64/Emit.zig
@@ -615,14 +615,15 @@ inline fn immOpSize(u_imm: u32) u6 {
 fn mirArithScaleSrc(emit: *Emit, tag: Tag, inst: Mir.Inst.Index) InnerError!void {
     const ops = emit.mir.instructions.items(.ops)[inst].decode();
     const scale = ops.flags;
-    const imm = emit.mir.instructions.items(.data)[inst].imm;
-    // OP reg1, [reg2 + scale*rcx + imm32]
+    const payload = emit.mir.instructions.items(.data)[inst].payload;
+    const index_reg_disp = emit.mir.extraData(Mir.IndexRegisterDisp, payload).data.decode();
+    // OP reg1, [reg2 + scale*index + imm32]
     const scale_index = ScaleIndex{
         .scale = scale,
-        .index = .rcx,
+        .index = index_reg_disp.index,
     };
     return lowerToRmEnc(tag, ops.reg1, RegisterOrMemory.mem(Memory.PtrSize.new(ops.reg1.size()), .{
-        .disp = imm,
+        .disp = index_reg_disp.disp,
         .base = ops.reg2,
         .scale_index = scale_index,
     }), emit.code);
@@ -631,22 +632,16 @@ fn mirArithScaleSrc(emit: *Emit, tag: Tag, inst: Mir.Inst.Index) InnerError!void
 fn mirArithScaleDst(emit: *Emit, tag: Tag, inst: Mir.Inst.Index) InnerError!void {
     const ops = emit.mir.instructions.items(.ops)[inst].decode();
     const scale = ops.flags;
-    const imm = emit.mir.instructions.items(.data)[inst].imm;
+    const payload = emit.mir.instructions.items(.data)[inst].payload;
+    const index_reg_disp = emit.mir.extraData(Mir.IndexRegisterDisp, payload).data.decode();
     const scale_index = ScaleIndex{
         .scale = scale,
-        .index = .rax,
+        .index = index_reg_disp.index,
     };
-    if (ops.reg2 == .none) {
-        // OP qword ptr [reg1 + scale*rax + 0], imm32
-        return lowerToMiEnc(tag, RegisterOrMemory.mem(.qword_ptr, .{
-            .disp = 0,
-            .base = ops.reg1,
-            .scale_index = scale_index,
-        }), imm, emit.code);
-    }
-    // OP [reg1 + scale*rax + imm32], reg2
+    assert(ops.reg2 != .none);
+    // OP [reg1 + scale*index + imm32], reg2
     return lowerToMrEnc(tag, RegisterOrMemory.mem(Memory.PtrSize.new(ops.reg2.size()), .{
-        .disp = imm,
+        .disp = index_reg_disp.disp,
         .base = ops.reg1,
         .scale_index = scale_index,
     }), ops.reg2, emit.code);
@@ -656,24 +651,24 @@ fn mirArithScaleImm(emit: *Emit, tag: Tag, inst: Mir.Inst.Index) InnerError!void
     const ops = emit.mir.instructions.items(.ops)[inst].decode();
     const scale = ops.flags;
     const payload = emit.mir.instructions.items(.data)[inst].payload;
-    const imm_pair = emit.mir.extraData(Mir.ImmPair, payload).data;
+    const index_reg_disp_imm = emit.mir.extraData(Mir.IndexRegisterDispImm, payload).data.decode();
     const scale_index = ScaleIndex{
         .scale = scale,
-        .index = .rax,
+        .index = index_reg_disp_imm.index,
     };
-    // OP qword ptr [reg1 + scale*rax + imm32], imm32
+    // OP qword ptr [reg1 + scale*index + imm32], imm32
     return lowerToMiEnc(tag, RegisterOrMemory.mem(.qword_ptr, .{
-        .disp = imm_pair.dest_off,
+        .disp = index_reg_disp_imm.disp,
         .base = ops.reg1,
         .scale_index = scale_index,
-    }), imm_pair.operand, emit.code);
+    }), index_reg_disp_imm.imm, emit.code);
 }
 
 fn mirArithMemIndexImm(emit: *Emit, tag: Tag, inst: Mir.Inst.Index) InnerError!void {
     const ops = emit.mir.instructions.items(.ops)[inst].decode();
     assert(ops.reg2 == .none);
     const payload = emit.mir.instructions.items(.data)[inst].payload;
-    const imm_pair = emit.mir.extraData(Mir.ImmPair, payload).data;
+    const index_reg_disp_imm = emit.mir.extraData(Mir.IndexRegisterDispImm, payload).data.decode();
     const ptr_size: Memory.PtrSize = switch (ops.flags) {
         0b00 => .byte_ptr,
         0b01 => .word_ptr,
@@ -682,14 +677,14 @@ fn mirArithMemIndexImm(emit: *Emit, tag: Tag, inst: Mir.Inst.Index) InnerError!v
     };
     const scale_index = ScaleIndex{
         .scale = 0,
-        .index = .rax,
+        .index = index_reg_disp_imm.index,
     };
-    // OP ptr [reg1 + rax*1 + imm32], imm32
+    // OP ptr [reg1 + index + imm32], imm32
     return lowerToMiEnc(tag, RegisterOrMemory.mem(ptr_size, .{
-        .disp = imm_pair.dest_off,
+        .disp = index_reg_disp_imm.disp,
         .base = ops.reg1,
         .scale_index = scale_index,
-    }), imm_pair.operand, emit.code);
+    }), index_reg_disp_imm.imm, emit.code);
 }
 
 fn mirMovSignExtend(emit: *Emit, inst: Mir.Inst.Index) InnerError!void {
@@ -957,18 +952,19 @@ fn mirLea(emit: *Emit, inst: Mir.Inst.Index) InnerError!void {
             mem.writeIntLittle(i32, emit.code.items[end_offset - 4 ..][0..4], disp);
         },
         0b10 => {
-            // lea reg, [rbp + rcx + imm32]
-            const imm = emit.mir.instructions.items(.data)[inst].imm;
+            // lea reg, [rbp + index + imm32]
+            const payload = emit.mir.instructions.items(.data)[inst].payload;
+            const index_reg_disp = emit.mir.extraData(Mir.IndexRegisterDisp, payload).data.decode();
             const src_reg: ?Register = if (ops.reg2 != .none) ops.reg2 else null;
             const scale_index = ScaleIndex{
                 .scale = 0,
-                .index = .rcx,
+                .index = index_reg_disp.index,
             };
             return lowerToRmEnc(
                 .lea,
                 ops.reg1,
                 RegisterOrMemory.mem(Memory.PtrSize.new(ops.reg1.size()), .{
-                    .disp = imm,
+                    .disp = index_reg_disp.disp,
                     .base = src_reg,
                     .scale_index = scale_index,
                 }),
@@ -2255,6 +2251,7 @@ fn lowerToMxEnc(tag: Tag, reg_or_mem: RegisterOrMemory, enc: Encoding, code: *st
                 encoder.rex(.{
                     .w = wide,
                     .b = base.isExtended(),
+                    .x = if (mem_op.scale_index) |si| si.index.isExtended() else false,
                 });
             }
             opc.encode(encoder);
@@ -2360,10 +2357,12 @@ fn lowerToMiXEnc(
                 encoder.rex(.{
                     .w = dst_mem.ptr_size == .qword_ptr,
                     .b = base.isExtended(),
+                    .x = if (dst_mem.scale_index) |si| si.index.isExtended() else false,
                 });
             } else {
                 encoder.rex(.{
                     .w = dst_mem.ptr_size == .qword_ptr,
+                    .x = if (dst_mem.scale_index) |si| si.index.isExtended() else false,
                 });
             }
             opc.encode(encoder);
@@ -2415,11 +2414,13 @@ fn lowerToRmEnc(
                     .w = setRexWRegister(reg),
                     .r = reg.isExtended(),
                     .b = base.isExtended(),
+                    .x = if (src_mem.scale_index) |si| si.index.isExtended() else false,
                 });
             } else {
                 encoder.rex(.{
                     .w = setRexWRegister(reg),
                     .r = reg.isExtended(),
+                    .x = if (src_mem.scale_index) |si| si.index.isExtended() else false,
                 });
             }
             opc.encode(encoder);
@@ -2460,11 +2461,13 @@ fn lowerToMrEnc(
                     .w = dst_mem.ptr_size == .qword_ptr or setRexWRegister(reg),
                     .r = reg.isExtended(),
                     .b = base.isExtended(),
+                    .x = if (dst_mem.scale_index) |si| si.index.isExtended() else false,
                 });
             } else {
                 encoder.rex(.{
                     .w = dst_mem.ptr_size == .qword_ptr or setRexWRegister(reg),
                     .r = reg.isExtended(),
+                    .x = if (dst_mem.scale_index) |si| si.index.isExtended() else false,
                 });
             }
             opc.encode(encoder);
@@ -2504,11 +2507,13 @@ fn lowerToRmiEnc(
                     .w = setRexWRegister(reg),
                     .r = reg.isExtended(),
                     .b = base.isExtended(),
+                    .x = if (src_mem.scale_index) |si| si.index.isExtended() else false,
                 });
             } else {
                 encoder.rex(.{
                     .w = setRexWRegister(reg),
                     .r = reg.isExtended(),
+                    .x = if (src_mem.scale_index) |si| si.index.isExtended() else false,
                 });
             }
             opc.encode(encoder);
@@ -2545,10 +2550,12 @@ fn lowerToVmEnc(
                 vex.rex(.{
                     .r = reg.isExtended(),
                     .b = base.isExtended(),
+                    .x = if (src_mem.scale_index) |si| si.index.isExtended() else false,
                 });
             } else {
                 vex.rex(.{
                     .r = reg.isExtended(),
+                    .x = if (src_mem.scale_index) |si| si.index.isExtended() else false,
                 });
             }
             encoder.vex(enc.prefix);
@@ -2585,10 +2592,12 @@ fn lowerToMvEnc(
                 vex.rex(.{
                     .r = reg.isExtended(),
                     .b = base.isExtended(),
+                    .x = if (dst_mem.scale_index) |si| si.index.isExtended() else false,
                 });
             } else {
                 vex.rex(.{
                     .r = reg.isExtended(),
+                    .x = if (dst_mem.scale_index) |si| si.index.isExtended() else false,
                 });
             }
             encoder.vex(enc.prefix);
diff --git a/src/arch/x86_64/Mir.zig b/src/arch/x86_64/Mir.zig
index ca19847042..182f3267a6 100644
--- a/src/arch/x86_64/Mir.zig
+++ b/src/arch/x86_64/Mir.zig
@@ -44,25 +44,28 @@ pub const Inst = struct {
         ///       0b01 word ptr [reg1 + imm32], imm16
         ///       0b10 dword ptr [reg1 + imm32], imm32
         ///       0b11 qword ptr [reg1 + imm32], imm32 (sign-extended to imm64)
+        /// Notes:
+        ///  * Uses `ImmPair` as payload
         adc_mem_imm,
 
-        /// form: reg1, [reg2 + scale*rcx + imm32]
-        /// ops flags  scale
-        ///      0b00      1
-        ///      0b01      2
-        ///      0b10      4
-        ///      0b11      8
-        adc_scale_src,
-
-        /// form: [reg1 + scale*rax + imm32], reg2
-        /// form: [reg1 + scale*rax + 0], imm32
+        /// form: reg1, [reg2 + scale*index + imm32]
         /// ops flags  scale
         ///      0b00      1
         ///      0b01      2
         ///      0b10      4
         ///      0b11      8
         /// Notes:
-        ///  * If reg2 is `none` then it means Data field `imm` is used as the immediate.
+        ///  * Uses `IndexRegisterDisp` as payload
+        adc_scale_src,
+
+        /// form: [reg1 + scale*index + imm32], reg2
+        /// ops flags  scale
+        ///      0b00      1
+        ///      0b01      2
+        ///      0b10      4
+        ///      0b11      8
+        /// Notes:
+        ///  * Uses `IndexRegisterDisp` payload.
         adc_scale_dst,
 
         /// form: [reg1 + scale*rax + imm32], imm32
@@ -72,14 +75,16 @@ pub const Inst = struct {
         ///      0b10      4
         ///      0b11      8
         /// Notes:
-        ///  * Data field `payload` points at `ImmPair`.
+        ///  * Uses `IndexRegisterDispImm` payload.
         adc_scale_imm,
 
         /// ops flags: form:
-        ///       0b00 byte ptr [reg1 + rax + imm32], imm8
-        ///       0b01 word ptr [reg1 + rax + imm32], imm16
-        ///       0b10 dword ptr [reg1 + rax + imm32], imm32
-        ///       0b11 qword ptr [reg1 + rax + imm32], imm32 (sign-extended to imm64)
+        ///       0b00 byte ptr [reg1 + index + imm32], imm8
+        ///       0b01 word ptr [reg1 + index + imm32], imm16
+        ///       0b10 dword ptr [reg1 + index + imm32], imm32
+        ///       0b11 qword ptr [reg1 + index + imm32], imm32 (sign-extended to imm64)
+        /// Notes:
+        ///  * Uses `IndexRegisterDispImm` payload.
         adc_mem_index_imm,
 
         // The following instructions all have the same encoding as `adc`.
@@ -174,7 +179,9 @@ pub const Inst = struct {
         ///      0b00  reg1, [reg2 + imm32]
         ///      0b00  reg1, [ds:imm32]
         ///      0b01  reg1, [rip + imm32]
-        ///      0b10  reg1, [reg2 + rcx + imm32]
+        ///      0b10  reg1, [reg2 + index + imm32]
+        /// Notes:
+        ///  * 0b10 uses `IndexRegisterDisp` payload
         lea,
 
         /// ops flags: form:
@@ -461,6 +468,66 @@ pub const Inst = struct {
     }
 };
 
+pub const IndexRegisterDisp = struct {
+    /// Index register to use with SIB-based encoding
+    index: u32,
+
+    /// Displacement value
+    disp: u32,
+
+    pub fn encode(index: Register, disp: u32) IndexRegisterDisp {
+        return .{
+            .index = @enumToInt(index),
+            .disp = disp,
+        };
+    }
+
+    pub fn decode(this: IndexRegisterDisp) struct {
+        index: Register,
+        disp: u32,
+    } {
+        return .{
+            .index = @intToEnum(Register, this.index),
+            .disp = this.disp,
+        };
+    }
+};
+
+/// TODO: would it be worth making `IndexRegisterDisp` and `IndexRegisterDispImm` a variable length list
+/// instead of having two structs, one a superset of the other one?
+pub const IndexRegisterDispImm = struct {
+    /// Index register to use with SIB-based encoding
+    index: u32,
+
+    /// Displacement value
+    disp: u32,
+
+    /// Immediate
+    imm: u32,
+
+    pub fn encode(index: Register, disp: u32, imm: u32) IndexRegisterDispImm {
+        return .{
+            .index = @enumToInt(index),
+            .disp = disp,
+            .imm = imm,
+        };
+    }
+
+    pub fn decode(this: IndexRegisterDispImm) struct {
+        index: Register,
+        disp: u32,
+        imm: u32,
+    } {
+        return .{
+            .index = @intToEnum(Register, this.index),
+            .disp = this.disp,
+            .imm = this.imm,
+        };
+    }
+};
+
+/// Used in conjunction with `SaveRegisterList` payload to transfer a list of used registers
+/// in a compact manner.
 pub const RegisterList = struct {
     bitset: BitSet = BitSet.initEmpty(),
 

From 423f424c278f8ba41e36096a1603403eb33c6284 Mon Sep 17 00:00:00 2001
From: Jakub Konka <kubkon@jakubkonka.com>
Date: Sat, 3 Sep 2022 20:59:58 +0200
Subject: [PATCH 28/68] libstd: use windows.GetStdHandle() with stage2_x86_64
 backend for now

---
 lib/std/io.zig | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/lib/std/io.zig b/lib/std/io.zig
index 50d134b856..d878afd3ae 100644
--- a/lib/std/io.zig
+++ b/lib/std/io.zig
@@ -36,6 +36,10 @@ pub const default_mode: ModeOverride = if (is_async) Mode.evented else .blocking
 
 fn getStdOutHandle() os.fd_t {
     if (builtin.os.tag == .windows) {
+        if (builtin.zig_backend == .stage2_x86_64) {
+            // TODO: this is just a temporary workaround until we advance x86 backend further along.
+            return os.windows.GetStdHandle(os.windows.STD_OUTPUT_HANDLE) catch os.windows.INVALID_HANDLE_VALUE;
+        }
         return os.windows.peb().ProcessParameters.hStdOutput;
     }
 
@@ -58,6 +62,10 @@ pub fn getStdOut() File {
 
 fn getStdErrHandle() os.fd_t {
     if (builtin.os.tag == .windows) {
+        if (builtin.zig_backend == .stage2_x86_64) {
+            // TODO: this is just a temporary workaround until we advance x86 backend further along.
+            return os.windows.GetStdHandle(os.windows.STD_ERROR_HANDLE) catch os.windows.INVALID_HANDLE_VALUE;
+        }
         return os.windows.peb().ProcessParameters.hStdError;
     }
 
@@ -80,6 +88,10 @@ pub fn getStdErr() File {
 
 fn getStdInHandle() os.fd_t {
     if (builtin.os.tag == .windows) {
+        if (builtin.zig_backend == .stage2_x86_64) {
+            // TODO: this is just a temporary workaround until we advance x86 backend further along.
+            return os.windows.GetStdHandle(os.windows.STD_INPUT_HANDLE) catch os.windows.INVALID_HANDLE_VALUE;
+        }
         return os.windows.peb().ProcessParameters.hStdInput;
     }
 

From 1e2a2d6fad8d83329de1b65338d741c1c6cd2e7d Mon Sep 17 00:00:00 2001
From: Jakub Konka <kubkon@jakubkonka.com>
Date: Sat, 3 Sep 2022 22:53:56 +0200
Subject: [PATCH 29/68] coff: fix bug in lowerUnnamedConst

---
 src/link/Coff.zig | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/link/Coff.zig b/src/link/Coff.zig
index b5670ce5a1..17ec6e5f9c 100644
--- a/src/link/Coff.zig
+++ b/src/link/Coff.zig
@@ -485,7 +485,7 @@ fn populateMissingMetadata(self: *Coff) !void {
     try self.locals.append(gpa, .{
         .name = [_]u8{0} ** 8,
         .value = 0,
-        .section_number = @intToEnum(coff.SectionNumber, 0),
+        .section_number = .UNDEFINED,
         .@"type" = .{ .base_type = .NULL, .complex_type = .NULL },
         .storage_class = .NULL,
         .number_of_aux_symbols = 0,
@@ -988,7 +988,7 @@ pub fn lowerUnnamedConst(self: *Coff, tv: TypedValue, decl_index: Module.Decl.In
     };
     defer gpa.free(sym_name);
     try self.setSymbolName(sym, sym_name);
-    sym.section_number = @intToEnum(coff.SectionNumber, self.rdata_section_index.?);
+    sym.section_number = @intToEnum(coff.SectionNumber, self.rdata_section_index.? + 1);
 
     try self.managed_atoms.append(gpa, atom);
     try self.atom_by_index_table.putNoClobber(gpa, atom.sym_index, atom);
@@ -1334,7 +1334,7 @@ pub fn deleteExport(self: *Coff, exp: Export) void {
     sym.* = .{
         .name = [_]u8{0} ** 8,
         .value = 0,
-        .section_number = @intToEnum(coff.SectionNumber, 0),
+        .section_number = .UNDEFINED,
         .@"type" = .{ .base_type = .NULL, .complex_type = .NULL },
         .storage_class = .NULL,
         .number_of_aux_symbols = 0,

From 66bad3eaaf82b21363c2212eeb1eaa4c171f2625 Mon Sep 17 00:00:00 2001
From: Jakub Konka <kubkon@jakubkonka.com>
Date: Sun, 4 Sep 2022 10:22:21 +0200
Subject: [PATCH 30/68] coff: mark relocations dirty when target atoms change

---
 src/arch/x86_64/Emit.zig |  2 --
 src/link/Coff.zig        | 56 +++++++++++++++++++++-------------------
 2 files changed, 30 insertions(+), 28 deletions(-)

diff --git a/src/arch/x86_64/Emit.zig b/src/arch/x86_64/Emit.zig
index 61ae772794..45e58be972 100644
--- a/src/arch/x86_64/Emit.zig
+++ b/src/arch/x86_64/Emit.zig
@@ -1029,7 +1029,6 @@ fn mirLeaPic(emit: *Emit, inst: Mir.Inst.Index) InnerError!void {
             .addend = 0,
             .pcrel = true,
             .length = 2,
-            .prev_vaddr = atom.getSymbol(coff_file).value,
         });
     } else {
         return emit.fail("TODO implement lea reg, [rip + reloc] for linking backends different than MachO", .{});
@@ -1165,7 +1164,6 @@ fn mirCallExtern(emit: *Emit, inst: Mir.Inst.Index) InnerError!void {
             .addend = 0,
             .pcrel = true,
             .length = 2,
-            .prev_vaddr = atom.getSymbol(coff_file).value,
         });
     } else {
         return emit.fail("TODO implement call_extern for linking backends different than MachO", .{});
diff --git a/src/link/Coff.zig b/src/link/Coff.zig
index 17ec6e5f9c..74d6ce372e 100644
--- a/src/link/Coff.zig
+++ b/src/link/Coff.zig
@@ -114,11 +114,6 @@ relocs: RelocTable = .{},
 /// this will be a table indexed by index into the list of Atoms.
 base_relocs: BaseRelocationTable = .{},
 
-/// A table of bindings indexed by the owning them `Atom`.
-/// Note that once we refactor `Atom`'s lifetime and ownership rules,
-/// this will be a table indexed by index into the list of Atoms.
-bindings: BindingTable = .{},
-
 pub const Reloc = struct {
     @"type": enum {
         got,
@@ -130,12 +125,11 @@ pub const Reloc = struct {
     addend: u32,
     pcrel: bool,
     length: u2,
-    prev_vaddr: u32,
+    dirty: bool = true,
 };
 
 const RelocTable = std.AutoHashMapUnmanaged(*Atom, std.ArrayListUnmanaged(Reloc));
 const BaseRelocationTable = std.AutoHashMapUnmanaged(*Atom, std.ArrayListUnmanaged(u32));
-const BindingTable = std.AutoHashMapUnmanaged(*Atom, std.ArrayListUnmanaged(SymbolWithLoc));
 const UnnamedConstTable = std.AutoHashMapUnmanaged(Module.Decl.Index, std.ArrayListUnmanaged(*Atom));
 
 const default_file_alignment: u16 = 0x200;
@@ -192,6 +186,16 @@ pub const SymbolWithLoc = struct {
 
     // null means it's a synthetic global or Zig source.
     file: ?u32 = null,
+
+    pub fn eql(this: SymbolWithLoc, other: SymbolWithLoc) bool {
+        if (this.file == null and other.file == null) {
+            return this.sym_index == other.sym_index;
+        }
+        if (this.file != null and other.file != null) {
+            return this.sym_index == other.sym_index and this.file.? == other.file.?;
+        }
+        return false;
+    }
 };
 
 /// When allocating, the ideal_capacity is calculated by
@@ -314,14 +318,6 @@ pub fn deinit(self: *Coff) void {
         }
         self.base_relocs.deinit(gpa);
     }
-
-    {
-        var it = self.bindings.valueIterator();
-        while (it.next()) |bindings| {
-            bindings.deinit(gpa);
-        }
-        self.bindings.deinit(gpa);
-    }
 }
 
 fn populateMissingMetadata(self: *Coff) !void {
@@ -720,7 +716,6 @@ fn createGotAtom(self: *Coff, target: SymbolWithLoc) !*Atom {
         .addend = 0,
         .pcrel = false,
         .length = 3,
-        .prev_vaddr = sym.value,
     });
 
     const target_sym = self.getSymbol(target);
@@ -753,10 +748,6 @@ fn createImportAtom(self: *Coff, target: SymbolWithLoc) !*Atom {
 
     log.debug("allocated import atom at 0x{x}", .{sym.value});
 
-    const target_sym = self.getSymbol(target);
-    assert(target_sym.section_number == .UNDEFINED);
-    try atom.addBinding(self, target);
-
     return atom;
 }
 
@@ -798,6 +789,17 @@ fn writePtrWidthAtom(self: *Coff, atom: *Atom) !void {
     }
 }
 
+fn markRelocsDirty(self: *Coff, target: SymbolWithLoc) void {
+    // TODO: reverse-lookup might come in handy here
+    var it = self.relocs.valueIterator();
+    while (it.next()) |relocs| {
+        for (relocs.items) |*reloc| {
+            if (!reloc.target.eql(target)) continue;
+            reloc.dirty = true;
+        }
+    }
+}
+
 fn resolveRelocs(self: *Coff, atom: *Atom) !void {
     const relocs = self.relocs.get(atom) orelse return;
     const source_sym = atom.getSymbol(self);
@@ -807,6 +809,8 @@ fn resolveRelocs(self: *Coff, atom: *Atom) !void {
     log.debug("relocating '{s}'", .{atom.getName(self)});
 
     for (relocs.items) |*reloc| {
+        if (!reloc.dirty) continue;
+
         const target_vaddr = switch (reloc.@"type") {
             .got => blk: {
                 const got_atom = self.getGotAtomForSymbol(reloc.target) orelse continue;
@@ -821,7 +825,6 @@ fn resolveRelocs(self: *Coff, atom: *Atom) !void {
             },
         };
         const target_vaddr_with_addend = target_vaddr + reloc.addend;
-        if (target_vaddr_with_addend == reloc.prev_vaddr) continue;
 
         log.debug("  ({x}: [() => 0x{x} ({s})) ({s})", .{
             source_sym.value + reloc.offset,
@@ -830,6 +833,8 @@ fn resolveRelocs(self: *Coff, atom: *Atom) !void {
             @tagName(reloc.@"type"),
         });
 
+        reloc.dirty = false;
+
         if (reloc.pcrel) {
             const source_vaddr = source_sym.value + reloc.offset;
             const disp = target_vaddr_with_addend - source_vaddr - 4;
@@ -854,8 +859,6 @@ fn resolveRelocs(self: *Coff, atom: *Atom) !void {
                 else => unreachable,
             },
         }
-
-        reloc.prev_vaddr = target_vaddr_with_addend;
     }
 }
 
@@ -1131,7 +1134,9 @@ fn updateDeclCode(self: *Coff, decl_index: Module.Decl.Index, code: []const u8,
             if (vaddr != sym.value) {
                 sym.value = vaddr;
                 log.debug("  (updating GOT entry)", .{});
-                const got_atom = self.getGotAtomForSymbol(.{ .sym_index = atom.sym_index, .file = null }).?;
+                const got_target = SymbolWithLoc{ .sym_index = atom.sym_index, .file = null };
+                const got_atom = self.getGotAtomForSymbol(got_target).?;
+                self.markRelocsDirty(got_target);
                 try self.writePtrWidthAtom(got_atom);
             }
         } else if (code_len < atom.size) {
@@ -1156,6 +1161,7 @@ fn updateDeclCode(self: *Coff, decl_index: Module.Decl.Index, code: []const u8,
         try self.writePtrWidthAtom(got_atom);
     }
 
+    self.markRelocsDirty(atom.getSymbolWithLoc());
     try self.writeAtom(atom, code);
 }
 
@@ -1457,7 +1463,6 @@ pub fn getDeclVAddr(
 
     const atom = self.atom_by_index_table.get(reloc_info.parent_atom_index).?;
     const target = SymbolWithLoc{ .sym_index = decl.link.coff.sym_index, .file = null };
-    const target_sym = self.getSymbol(target);
     try atom.addRelocation(self, .{
         .@"type" = .direct,
         .target = target,
@@ -1465,7 +1470,6 @@ pub fn getDeclVAddr(
         .addend = reloc_info.addend,
         .pcrel = false,
         .length = 3,
-        .prev_vaddr = target_sym.value,
     });
     try atom.addBaseRelocation(self, @intCast(u32, reloc_info.offset));
 

From 56a131d27a45af793b6adc0795bd240ed74d3634 Mon Sep 17 00:00:00 2001
From: Jakub Konka <kubkon@jakubkonka.com>
Date: Sun, 4 Sep 2022 14:55:02 +0200
Subject: [PATCH 31/68] test-cases: fix compiler error locations for hello
 world with updates

---
 test/cases/aarch64-macos/hello_world_with_updates.0.zig | 2 +-
 test/cases/x86_64-linux/hello_world_with_updates.0.zig  | 2 +-
 test/cases/x86_64-macos/hello_world_with_updates.0.zig  | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/test/cases/aarch64-macos/hello_world_with_updates.0.zig b/test/cases/aarch64-macos/hello_world_with_updates.0.zig
index 0de742bdec..3c7a494180 100644
--- a/test/cases/aarch64-macos/hello_world_with_updates.0.zig
+++ b/test/cases/aarch64-macos/hello_world_with_updates.0.zig
@@ -2,5 +2,5 @@
 // output_mode=Exe
 // target=aarch64-macos
 //
-// :107:9: error: struct 'tmp.tmp' has no member named 'main'
+// :109:9: error: struct 'tmp.tmp' has no member named 'main'
 // :7:1: note: struct declared here
diff --git a/test/cases/x86_64-linux/hello_world_with_updates.0.zig b/test/cases/x86_64-linux/hello_world_with_updates.0.zig
index 4816ec1b26..c9c94442d0 100644
--- a/test/cases/x86_64-linux/hello_world_with_updates.0.zig
+++ b/test/cases/x86_64-linux/hello_world_with_updates.0.zig
@@ -2,5 +2,5 @@
 // output_mode=Exe
 // target=x86_64-linux
 //
-// :107:9: error: struct 'tmp.tmp' has no member named 'main'
+// :109:9: error: struct 'tmp.tmp' has no member named 'main'
 // :7:1: note: struct declared here
diff --git a/test/cases/x86_64-macos/hello_world_with_updates.0.zig b/test/cases/x86_64-macos/hello_world_with_updates.0.zig
index 998b2f13eb..5860c9c0f6 100644
--- a/test/cases/x86_64-macos/hello_world_with_updates.0.zig
+++ b/test/cases/x86_64-macos/hello_world_with_updates.0.zig
@@ -2,5 +2,5 @@
 // output_mode=Exe
 // target=x86_64-macos
 //
-// :107:9: error: struct 'tmp.tmp' has no member named 'main'
+// :109:9: error: struct 'tmp.tmp' has no member named 'main'
 // :7:1: note: struct declared here

From 467d69c68aac0459b6a0c2876083d7295e43134d Mon Sep 17 00:00:00 2001
From: Jakub Konka <kubkon@jakubkonka.com>
Date: Sun, 4 Sep 2022 21:27:26 +0200
Subject: [PATCH 32/68] x86_64: fix SystemV calling convention

---
 src/arch/x86_64/CodeGen.zig | 31 +++++++++++++++++++------------
 1 file changed, 19 insertions(+), 12 deletions(-)

diff --git a/src/arch/x86_64/CodeGen.zig b/src/arch/x86_64/CodeGen.zig
index b03acfb848..25e8695e82 100644
--- a/src/arch/x86_64/CodeGen.zig
+++ b/src/arch/x86_64/CodeGen.zig
@@ -7216,19 +7216,23 @@ fn resolveCallingConventionValues(self: *Self, fn_ty: Type) !CallMCValues {
             for (param_types) |ty, i| {
                 assert(ty.hasRuntimeBits());
 
-                if (self.target.os.tag != .windows) {
-                    return self.fail("TODO SysV calling convention", .{});
+                const classes: []const abi.Class = switch (self.target.os.tag) {
+                    .windows => &[1]abi.Class{abi.classifyWindows(ty, self.target.*)},
+                    else => mem.sliceTo(&abi.classifySystemV(ty, self.target.*), .none),
+                };
+                if (classes.len > 1) {
+                    return self.fail("TODO handle multiple classes per type", .{});
                 }
-
-                switch (abi.classifyWindows(ty, self.target.*)) {
+                switch (classes[0]) {
                     .integer => blk: {
                         if (i >= abi.getCAbiIntParamRegs(self.target.*).len) break :blk; // fallthrough
                         result.args[i] = .{ .register = abi.getCAbiIntParamRegs(self.target.*)[i] };
                         continue;
                     },
-                    .sse => return self.fail("TODO float/vector via SSE on Windows", .{}),
                     .memory => {}, // fallthrough
-                    else => unreachable,
+                    else => |class| return self.fail("TODO handle calling convention class {s}", .{
+                        @tagName(class),
+                    }),
                 }
 
                 const param_size = @intCast(u32, ty.abiSize(self.target.*));
@@ -7237,7 +7241,8 @@ fn resolveCallingConventionValues(self: *Self, fn_ty: Type) !CallMCValues {
                 result.args[i] = .{ .stack_offset = @intCast(i32, offset) };
                 next_stack_offset = offset;
             }
-            // Align the stack to 16bytes before allocating shadow stack space.
+
+            // Align the stack to 16bytes before allocating shadow stack space (if any).
             const aligned_next_stack_offset = mem.alignForwardGeneric(u32, next_stack_offset, 16);
             const padding = aligned_next_stack_offset - next_stack_offset;
             if (padding > 0) {
@@ -7247,11 +7252,13 @@ fn resolveCallingConventionValues(self: *Self, fn_ty: Type) !CallMCValues {
                 }
             }
 
-            // TODO fix this so that the 16byte alignment padding is at the current value of $rsp, and push
-            // the args onto the stack so that there is no padding between the first argument and
-            // the standard preamble.
-            // alignment padding | ret value (if > 8) | args ... | shadow stack space | $rbp |
-            result.stack_byte_count = aligned_next_stack_offset + 4 * @sizeOf(u64);
+            const shadow_stack_space: u32 = switch (self.target.os.tag) {
+                .windows => @intCast(u32, 4 * @sizeOf(u64)),
+                else => 0,
+            };
+
+            // alignment padding | args ... | shadow stack space (if any) | ret addr | $rbp |
+            result.stack_byte_count = aligned_next_stack_offset + shadow_stack_space;
             result.stack_align = 16;
         },
         .Unspecified => {

From f1bdf3f62f05388b75d6816b65c9cc5caec71cd9 Mon Sep 17 00:00:00 2001
From: Jakub Konka <kubkon@jakubkonka.com>
Date: Mon, 5 Sep 2022 09:25:47 +0200
Subject: [PATCH 33/68] coff: fix writing strtab to PE image file

---
 src/link/Coff.zig   | 50 +++++++++++++++++++++++++++++++--------------
 src/link/strtab.zig |  4 ++++
 2 files changed, 39 insertions(+), 15 deletions(-)

diff --git a/src/link/Coff.zig b/src/link/Coff.zig
index 74d6ce372e..a85f8c3396 100644
--- a/src/link/Coff.zig
+++ b/src/link/Coff.zig
@@ -324,6 +324,19 @@ fn populateMissingMetadata(self: *Coff) !void {
     assert(self.llvm_object == null);
     const gpa = self.base.allocator;
 
+    try self.strtab.buffer.ensureUnusedCapacity(gpa, @sizeOf(u32));
+    self.strtab.buffer.appendNTimesAssumeCapacity(0, @sizeOf(u32));
+
+    // Index 0 is always a null symbol.
+    try self.locals.append(gpa, .{
+        .name = [_]u8{0} ** 8,
+        .value = 0,
+        .section_number = .UNDEFINED,
+        .@"type" = .{ .base_type = .NULL, .complex_type = .NULL },
+        .storage_class = .NULL,
+        .number_of_aux_symbols = 0,
+    });
+
     if (self.text_section_index == null) {
         self.text_section_index = @intCast(u16, self.sections.slice().len);
         const file_size = @intCast(u32, self.base.options.program_code_size_hint);
@@ -472,21 +485,11 @@ fn populateMissingMetadata(self: *Coff) !void {
     }
 
     if (self.strtab_offset == null) {
-        try self.strtab.buffer.append(gpa, 0);
-        self.strtab_offset = self.findFreeSpace(@intCast(u32, self.strtab.len()), 1);
-        log.debug("found strtab free space 0x{x} to 0x{x}", .{ self.strtab_offset.?, self.strtab_offset.? + self.strtab.len() });
+        const file_size = @intCast(u32, self.strtab.len());
+        self.strtab_offset = self.findFreeSpace(file_size, @alignOf(u32)); // 4bytes aligned seems like a good idea here
+        log.debug("found strtab free space 0x{x} to 0x{x}", .{ self.strtab_offset.?, self.strtab_offset.? + file_size });
     }
 
-    // Index 0 is always a null symbol.
-    try self.locals.append(gpa, .{
-        .name = [_]u8{0} ** 8,
-        .value = 0,
-        .section_number = .UNDEFINED,
-        .@"type" = .{ .base_type = .NULL, .complex_type = .NULL },
-        .storage_class = .NULL,
-        .number_of_aux_symbols = 0,
-    });
-
     {
         // We need to find out what the max file offset is according to section headers.
         // Otherwise, we may end up with an COFF binary with file size not matching the final section's
@@ -1672,11 +1675,20 @@ fn writeStrtab(self: *Coff) !void {
 
     if (needed_size > allocated_size) {
         self.strtab_offset = null;
-        self.strtab_offset = @intCast(u32, self.findFreeSpace(needed_size, 1));
+        self.strtab_offset = @intCast(u32, self.findFreeSpace(needed_size, @alignOf(u32)));
     }
 
     log.debug("writing strtab from 0x{x} to 0x{x}", .{ self.strtab_offset.?, self.strtab_offset.? + needed_size });
-    try self.base.file.?.pwriteAll(self.strtab.buffer.items, self.strtab_offset.?);
+
+    var buffer = std.ArrayList(u8).init(self.base.allocator);
+    defer buffer.deinit();
+    try buffer.ensureTotalCapacityPrecise(needed_size);
+    buffer.appendSliceAssumeCapacity(self.strtab.items());
+    // Here, we do a trick in that we do not commit the size of the strtab to strtab buffer, instead
+    // we write the length of the strtab to a temporary buffer that goes to file.
+    mem.writeIntLittle(u32, buffer.items[0..4], @intCast(u32, self.strtab.len()));
+
+    try self.base.file.?.pwriteAll(buffer.items, self.strtab_offset.?);
 }
 
 fn writeSectionHeaders(self: *Coff) !void {
@@ -1984,6 +1996,14 @@ fn setSectionName(self: *Coff, header: *coff.SectionHeader, name: []const u8) !v
     mem.set(u8, header.name[name_offset.len..], 0);
 }
 
+fn getSectionName(self: *const Coff, header: *const coff.SectionHeader) []const u8 {
+    if (header.getName()) |name| {
+        return name;
+    }
+    const offset = header.getNameOffset().?;
+    return self.strtab.get(offset).?;
+}
+
 fn setSymbolName(self: *Coff, symbol: *coff.Symbol, name: []const u8) !void {
     if (name.len <= 8) {
         mem.copy(u8, &symbol.name, name);
diff --git a/src/link/strtab.zig b/src/link/strtab.zig
index 8e314f189f..abb58defef 100644
--- a/src/link/strtab.zig
+++ b/src/link/strtab.zig
@@ -110,6 +110,10 @@ pub fn StringTable(comptime log_scope: @Type(.EnumLiteral)) type {
             return self.get(off) orelse unreachable;
         }
 
+        pub fn items(self: Self) []const u8 {
+            return self.buffer.items;
+        }
+
         pub fn len(self: Self) usize {
             return self.buffer.items.len;
         }

From 08f6546c8405dd7d9da80f857122f43f4d627a22 Mon Sep 17 00:00:00 2001
From: Jakub Konka <kubkon@jakubkonka.com>
Date: Mon, 5 Sep 2022 10:34:16 +0200
Subject: [PATCH 34/68] coff: create a helper for allocating sections

---
 src/link/Coff.zig | 171 +++++++++++++---------------------------------
 1 file changed, 48 insertions(+), 123 deletions(-)

diff --git a/src/link/Coff.zig b/src/link/Coff.zig
index a85f8c3396..23f0b7ea9d 100644
--- a/src/link/Coff.zig
+++ b/src/link/Coff.zig
@@ -338,150 +338,54 @@ fn populateMissingMetadata(self: *Coff) !void {
     });
 
     if (self.text_section_index == null) {
-        self.text_section_index = @intCast(u16, self.sections.slice().len);
         const file_size = @intCast(u32, self.base.options.program_code_size_hint);
-        const off = self.findFreeSpace(file_size, self.page_size); // TODO we are over-aligning in file; we should track both in file and in memory pointers
-        log.debug("found .text free space 0x{x} to 0x{x}", .{ off, off + file_size });
-        var header = coff.SectionHeader{
-            .name = undefined,
-            .virtual_size = file_size,
-            .virtual_address = off,
-            .size_of_raw_data = file_size,
-            .pointer_to_raw_data = off,
-            .pointer_to_relocations = 0,
-            .pointer_to_linenumbers = 0,
-            .number_of_relocations = 0,
-            .number_of_linenumbers = 0,
-            .flags = .{
-                .CNT_CODE = 1,
-                .MEM_EXECUTE = 1,
-                .MEM_READ = 1,
-            },
-        };
-        try self.setSectionName(&header, ".text");
-        try self.sections.append(gpa, .{ .header = header });
+        self.text_section_index = try self.allocateSection(".text", file_size, .{
+            .CNT_CODE = 1,
+            .MEM_EXECUTE = 1,
+            .MEM_READ = 1,
+        });
     }
 
     if (self.got_section_index == null) {
-        self.got_section_index = @intCast(u16, self.sections.slice().len);
         const file_size = @intCast(u32, self.base.options.symbol_count_hint) * self.ptr_width.abiSize();
-        const off = self.findFreeSpace(file_size, self.page_size);
-        log.debug("found .got free space 0x{x} to 0x{x}", .{ off, off + file_size });
-        var header = coff.SectionHeader{
-            .name = undefined,
-            .virtual_size = file_size,
-            .virtual_address = off,
-            .size_of_raw_data = file_size,
-            .pointer_to_raw_data = off,
-            .pointer_to_relocations = 0,
-            .pointer_to_linenumbers = 0,
-            .number_of_relocations = 0,
-            .number_of_linenumbers = 0,
-            .flags = .{
-                .CNT_INITIALIZED_DATA = 1,
-                .MEM_READ = 1,
-            },
-        };
-        try self.setSectionName(&header, ".got");
-        try self.sections.append(gpa, .{ .header = header });
+        self.got_section_index = try self.allocateSection(".got", file_size, .{
+            .CNT_INITIALIZED_DATA = 1,
+            .MEM_READ = 1,
+        });
     }
 
     if (self.rdata_section_index == null) {
-        self.rdata_section_index = @intCast(u16, self.sections.slice().len);
         const file_size: u32 = 1024;
-        const off = self.findFreeSpace(file_size, self.page_size);
-        log.debug("found .rdata free space 0x{x} to 0x{x}", .{ off, off + file_size });
-        var header = coff.SectionHeader{
-            .name = undefined,
-            .virtual_size = file_size,
-            .virtual_address = off,
-            .size_of_raw_data = file_size,
-            .pointer_to_raw_data = off,
-            .pointer_to_relocations = 0,
-            .pointer_to_linenumbers = 0,
-            .number_of_relocations = 0,
-            .number_of_linenumbers = 0,
-            .flags = .{
-                .CNT_INITIALIZED_DATA = 1,
-                .MEM_READ = 1,
-            },
-        };
-        try self.setSectionName(&header, ".rdata");
-        try self.sections.append(gpa, .{ .header = header });
+        self.rdata_section_index = try self.allocateSection(".rdata", file_size, .{
+            .CNT_INITIALIZED_DATA = 1,
+            .MEM_READ = 1,
+        });
     }
 
     if (self.data_section_index == null) {
-        self.data_section_index = @intCast(u16, self.sections.slice().len);
         const file_size: u32 = 1024;
-        const off = self.findFreeSpace(file_size, self.page_size);
-        log.debug("found .data free space 0x{x} to 0x{x}", .{ off, off + file_size });
-        var header = coff.SectionHeader{
-            .name = undefined,
-            .virtual_size = file_size,
-            .virtual_address = off,
-            .size_of_raw_data = file_size,
-            .pointer_to_raw_data = off,
-            .pointer_to_relocations = 0,
-            .pointer_to_linenumbers = 0,
-            .number_of_relocations = 0,
-            .number_of_linenumbers = 0,
-            .flags = .{
-                .CNT_INITIALIZED_DATA = 1,
-                .MEM_READ = 1,
-                .MEM_WRITE = 1,
-            },
-        };
-        try self.setSectionName(&header, ".data");
-        try self.sections.append(gpa, .{ .header = header });
+        self.data_section_index = try self.allocateSection(".data", file_size, .{
+            .CNT_INITIALIZED_DATA = 1,
+            .MEM_READ = 1,
+            .MEM_WRITE = 1,
+        });
     }
 
     if (self.reloc_section_index == null) {
-        self.reloc_section_index = @intCast(u16, self.sections.slice().len);
         const file_size = @intCast(u32, self.base.options.symbol_count_hint) * @sizeOf(coff.BaseRelocation);
-        const off = self.findFreeSpace(file_size, self.page_size);
-        log.debug("found .reloc free space 0x{x} to 0x{x}", .{ off, off + file_size });
-        var header = coff.SectionHeader{
-            .name = undefined,
-            .virtual_size = file_size,
-            .virtual_address = off,
-            .size_of_raw_data = file_size,
-            .pointer_to_raw_data = off,
-            .pointer_to_relocations = 0,
-            .pointer_to_linenumbers = 0,
-            .number_of_relocations = 0,
-            .number_of_linenumbers = 0,
-            .flags = .{
-                .CNT_INITIALIZED_DATA = 1,
-                .MEM_DISCARDABLE = 1,
-                .MEM_READ = 1,
-            },
-        };
-        try self.setSectionName(&header, ".reloc");
-        try self.sections.append(gpa, .{ .header = header });
+        self.reloc_section_index = try self.allocateSection(".reloc", file_size, .{
+            .CNT_INITIALIZED_DATA = 1,
+            .MEM_DISCARDABLE = 1,
+            .MEM_READ = 1,
+        });
     }
 
     if (self.idata_section_index == null) {
-        self.idata_section_index = @intCast(u16, self.sections.slice().len);
         const file_size = @intCast(u32, self.base.options.symbol_count_hint) * self.ptr_width.abiSize();
-        const off = self.findFreeSpace(file_size, self.page_size);
-        log.debug("found .idata free space 0x{x} to 0x{x}", .{ off, off + file_size });
-        var header = coff.SectionHeader{
-            .name = undefined,
-            .virtual_size = file_size,
-            .virtual_address = off,
-            .size_of_raw_data = file_size,
-            .pointer_to_raw_data = off,
-            .pointer_to_relocations = 0,
-            .pointer_to_linenumbers = 0,
-            .number_of_relocations = 0,
-            .number_of_linenumbers = 0,
-            .flags = .{
-                .CNT_INITIALIZED_DATA = 1,
-                .MEM_READ = 1,
-            },
-        };
-        try self.setSectionName(&header, ".idata");
-        try self.sections.append(gpa, .{ .header = header });
+        self.idata_section_index = try self.allocateSection(".idata", file_size, .{
+            .CNT_INITIALIZED_DATA = 1,
+            .MEM_READ = 1,
+        });
     }
 
     if (self.strtab_offset == null) {
@@ -505,6 +409,27 @@ fn populateMissingMetadata(self: *Coff) !void {
     }
 }
 
+fn allocateSection(self: *Coff, name: []const u8, size: u32, flags: coff.SectionHeaderFlags) !u16 {
+    const index = @intCast(u16, self.sections.slice().len);
+    const off = self.findFreeSpace(size, self.page_size); // TODO: we overalign here
+    log.debug("found {s} free space 0x{x} to 0x{x}", .{ name, off, off + size });
+    var header = coff.SectionHeader{
+        .name = undefined,
+        .virtual_size = size,
+        .virtual_address = off,
+        .size_of_raw_data = size,
+        .pointer_to_raw_data = off,
+        .pointer_to_relocations = 0,
+        .pointer_to_linenumbers = 0,
+        .number_of_relocations = 0,
+        .number_of_linenumbers = 0,
+        .flags = flags,
+    };
+    try self.setSectionName(&header, name);
+    try self.sections.append(self.base.allocator, .{ .header = header });
+    return index;
+}
+
 pub fn allocateDeclIndexes(self: *Coff, decl_index: Module.Decl.Index) !void {
     if (self.llvm_object) |_| return;
     const decl = self.base.options.module.?.declPtr(decl_index);

From 79e51c5e4b5f9701676079ea23e67cc355a8d42c Mon Sep 17 00:00:00 2001
From: Jakub Konka <kubkon@jakubkonka.com>
Date: Mon, 5 Sep 2022 16:06:58 +0200
Subject: [PATCH 35/68] coff: differentiate between file space and VM space for
 alloc

---
 src/link/Coff.zig | 106 +++++++++++++++++++++++++++++++++-------------
 1 file changed, 77 insertions(+), 29 deletions(-)

diff --git a/src/link/Coff.zig b/src/link/Coff.zig
index 23f0b7ea9d..3ffa86d6f8 100644
--- a/src/link/Coff.zig
+++ b/src/link/Coff.zig
@@ -411,12 +411,19 @@ fn populateMissingMetadata(self: *Coff) !void {
 
 fn allocateSection(self: *Coff, name: []const u8, size: u32, flags: coff.SectionHeaderFlags) !u16 {
     const index = @intCast(u16, self.sections.slice().len);
-    const off = self.findFreeSpace(size, self.page_size); // TODO: we overalign here
-    log.debug("found {s} free space 0x{x} to 0x{x}", .{ name, off, off + size });
+    const off = self.findFreeSpace(size, default_file_alignment);
+    const vaddr = self.findFreeSpaceVM(size, self.page_size);
+    log.debug("found {s} free space 0x{x} to 0x{x} (0x{x} - 0x{x})", .{
+        name,
+        off,
+        off + size,
+        vaddr,
+        vaddr + size,
+    });
     var header = coff.SectionHeader{
         .name = undefined,
         .virtual_size = size,
-        .virtual_address = off,
+        .virtual_address = vaddr,
         .size_of_raw_data = size,
         .pointer_to_raw_data = off,
         .pointer_to_relocations = 0,
@@ -513,16 +520,22 @@ fn allocateAtom(self: *Coff, atom: *Atom, new_atom_size: u32, alignment: u32) !u
         const sect_capacity = self.allocatedSize(header.pointer_to_raw_data);
         const needed_size: u32 = (vaddr + new_atom_size) - header.virtual_address;
         if (needed_size > sect_capacity) {
+            // const new_offset = self.findFreeSpace(needed_size, self.page_size);
+            // const current_size = if (last_atom) |atom| blk: {
+            //     const sym = last_atom.getSymbol(self);
+            //     break :blk (sym.value + atom.size) - header.virtual_address;
+            // } else 0;
+            // log.debug("moving {s} from 0x{x} to 0x{x}", .{ header.pointer_to_raw_data, new_offset });
+            // const amt = try self.base.file.?.copyRangeAll(header.pointer_to_raw_data, self.base.file.?, new_offset, current_size);
+            // if (amt != current_size) return error.InputOutput;
+
             @panic("TODO move section");
+            // header.virtual_size = needed_size;
+            // header.size_of_raw_data = mem.alignForwardGeneric(u32, needed_size, default_file_alignment);
         }
         maybe_last_atom.* = atom;
-        // header.virtual_size = needed_size;
-        // header.size_of_raw_data = mem.alignForwardGeneric(u32, needed_size, default_file_alignment);
     }
 
-    // if (header.getAlignment().? < alignment) {
-    //     header.setAlignment(alignment);
-    // }
     atom.size = new_atom_size;
     atom.alignment = alignment;
 
@@ -1778,7 +1791,57 @@ pub fn padToIdeal(actual_size: anytype) @TypeOf(actual_size) {
 }
 
 fn detectAllocCollision(self: *Coff, start: u32, size: u32) ?u32 {
-    const headers_size = self.getSizeOfHeaders();
+    const headers_size = @maximum(self.getSizeOfHeaders(), 0x1000);
+    if (start < headers_size)
+        return headers_size;
+
+    const end = start + padToIdeal(size);
+
+    if (self.strtab_offset) |off| {
+        const tight_size = @intCast(u32, self.strtab.len());
+        const increased_size = padToIdeal(tight_size);
+        const test_end = off + increased_size;
+        if (end > off and start < test_end) {
+            return test_end;
+        }
+    }
+
+    for (self.sections.items(.header)) |header| {
+        const tight_size = header.size_of_raw_data;
+        const increased_size = padToIdeal(tight_size);
+        const test_end = header.pointer_to_raw_data + increased_size;
+        if (end > header.pointer_to_raw_data and start < test_end) {
+            return test_end;
+        }
+    }
+
+    return null;
+}
+
+fn allocatedSize(self: *Coff, start: u32) u32 {
+    if (start == 0)
+        return 0;
+    var min_pos: u32 = std.math.maxInt(u32);
+    if (self.strtab_offset) |off| {
+        if (off > start and off < min_pos) min_pos = off;
+    }
+    for (self.sections.items(.header)) |header| {
+        if (header.pointer_to_raw_data <= start) continue;
+        if (header.pointer_to_raw_data < min_pos) min_pos = header.pointer_to_raw_data;
+    }
+    return min_pos - start;
+}
+
+fn findFreeSpace(self: *Coff, object_size: u32, min_alignment: u32) u32 {
+    var start: u32 = 0;
+    while (self.detectAllocCollision(start, object_size)) |item_end| {
+        start = mem.alignForwardGeneric(u32, item_end, min_alignment);
+    }
+    return start;
+}
+
+fn detectAllocCollisionVM(self: *Coff, start: u32, size: u32) ?u32 {
+    const headers_size = @maximum(self.getSizeOfHeaders(), 0x1000);
     if (start < headers_size)
         return headers_size;
 
@@ -1793,33 +1856,18 @@ fn detectAllocCollision(self: *Coff, start: u32, size: u32) ?u32 {
     }
 
     for (self.sections.items(.header)) |header| {
-        const increased_size = header.size_of_raw_data;
-        const test_end = header.pointer_to_raw_data + increased_size;
-        if (end > header.pointer_to_raw_data and start < test_end) {
+        const increased_size = header.virtual_size;
+        const test_end = header.virtual_address + increased_size;
+        if (end > header.virtual_address and start < test_end) {
             return test_end;
         }
     }
-
     return null;
 }
 
-pub fn allocatedSize(self: *Coff, start: u32) u32 {
-    if (start == 0)
-        return 0;
-    var min_pos: u32 = std.math.maxInt(u32);
-    if (self.strtab_offset) |off| {
-        if (off > start and off < min_pos) min_pos = off;
-    }
-    for (self.sections.items(.header)) |header| {
-        if (header.pointer_to_raw_data <= start) continue;
-        if (header.pointer_to_raw_data < min_pos) min_pos = header.pointer_to_raw_data;
-    }
-    return min_pos - start;
-}
-
-pub fn findFreeSpace(self: *Coff, object_size: u32, min_alignment: u32) u32 {
+fn findFreeSpaceVM(self: *Coff, object_size: u32, min_alignment: u32) u32 {
     var start: u32 = 0;
-    while (self.detectAllocCollision(start, object_size)) |item_end| {
+    while (self.detectAllocCollisionVM(start, object_size)) |item_end| {
         start = mem.alignForwardGeneric(u32, item_end, min_alignment);
     }
     return start;

From 9116e0f7464c65912d758ebd58aad35d9b07cabc Mon Sep 17 00:00:00 2001
From: Jakub Konka <kubkon@jakubkonka.com>
Date: Tue, 6 Sep 2022 09:59:42 +0200
Subject: [PATCH 36/68] coff: find new file space for a section (file offsets)

---
 src/link/Coff.zig | 81 ++++++++++++++++++++++++++++++++++++++---------
 1 file changed, 66 insertions(+), 15 deletions(-)

diff --git a/src/link/Coff.zig b/src/link/Coff.zig
index 3ffa86d6f8..d31344a23e 100644
--- a/src/link/Coff.zig
+++ b/src/link/Coff.zig
@@ -520,19 +520,36 @@ fn allocateAtom(self: *Coff, atom: *Atom, new_atom_size: u32, alignment: u32) !u
         const sect_capacity = self.allocatedSize(header.pointer_to_raw_data);
         const needed_size: u32 = (vaddr + new_atom_size) - header.virtual_address;
         if (needed_size > sect_capacity) {
-            // const new_offset = self.findFreeSpace(needed_size, self.page_size);
-            // const current_size = if (last_atom) |atom| blk: {
-            //     const sym = last_atom.getSymbol(self);
-            //     break :blk (sym.value + atom.size) - header.virtual_address;
-            // } else 0;
-            // log.debug("moving {s} from 0x{x} to 0x{x}", .{ header.pointer_to_raw_data, new_offset });
-            // const amt = try self.base.file.?.copyRangeAll(header.pointer_to_raw_data, self.base.file.?, new_offset, current_size);
-            // if (amt != current_size) return error.InputOutput;
-
-            @panic("TODO move section");
-            // header.virtual_size = needed_size;
-            // header.size_of_raw_data = mem.alignForwardGeneric(u32, needed_size, default_file_alignment);
+            const new_offset = self.findFreeSpace(needed_size, default_file_alignment);
+            const current_size = if (maybe_last_atom.*) |last_atom| blk: {
+                const sym = last_atom.getSymbol(self);
+                break :blk (sym.value + last_atom.size) - header.virtual_address;
+            } else 0;
+            log.debug("moving {s} from (0x{x} - 0x{x}) to (0x{x} - 0x{x})", .{
+                self.getSectionName(header),
+                header.pointer_to_raw_data,
+                header.pointer_to_raw_data + current_size,
+                new_offset,
+                new_offset + current_size,
+            });
+            const amt = try self.base.file.?.copyRangeAll(
+                header.pointer_to_raw_data,
+                self.base.file.?,
+                new_offset,
+                current_size,
+            );
+            if (amt != current_size) return error.InputOutput;
+            header.pointer_to_raw_data = new_offset;
         }
+
+        const sect_vm_capacity = self.allocatedSizeVM(header.virtual_address);
+        if (needed_size > sect_vm_capacity) {
+            log.err("needed {x}, available {x}", .{ needed_size, sect_vm_capacity });
+            @panic("TODO expand section in virtual address space");
+        }
+
+        header.virtual_size = @maximum(header.virtual_size, needed_size);
+        header.size_of_raw_data = needed_size;
         maybe_last_atom.* = atom;
     }
 
@@ -778,8 +795,9 @@ fn resolveRelocs(self: *Coff, atom: *Atom) !void {
 
         if (reloc.pcrel) {
             const source_vaddr = source_sym.value + reloc.offset;
-            const disp = target_vaddr_with_addend - source_vaddr - 4;
-            try self.base.file.?.pwriteAll(mem.asBytes(&@intCast(u32, disp)), file_offset + reloc.offset);
+            const disp =
+                @intCast(i32, target_vaddr_with_addend) - @intCast(i32, source_vaddr) - 4;
+            try self.base.file.?.pwriteAll(mem.asBytes(&disp), file_offset + reloc.offset);
             continue;
         }
 
@@ -1515,7 +1533,24 @@ fn writeBaseRelocations(self: *Coff) !void {
     const header = &self.sections.items(.header)[self.reloc_section_index.?];
     const sect_capacity = self.allocatedSize(header.pointer_to_raw_data);
     const needed_size = @intCast(u32, buffer.items.len);
-    assert(needed_size < sect_capacity); // TODO expand .reloc section
+    if (needed_size > sect_capacity) {
+        const new_offset = self.findFreeSpace(needed_size, default_file_alignment);
+        log.debug("writing {s} at 0x{x} to 0x{x} (0x{x} - 0x{x})", .{
+            self.getSectionName(header),
+            header.pointer_to_raw_data,
+            header.pointer_to_raw_data + needed_size,
+            new_offset,
+            new_offset + needed_size,
+        });
+        header.pointer_to_raw_data = new_offset;
+
+        const sect_vm_capacity = self.allocatedSizeVM(header.virtual_address);
+        if (needed_size > sect_vm_capacity) {
+            @panic("TODO expand section in virtual address space");
+        }
+    }
+    header.virtual_size = @maximum(header.virtual_size, needed_size);
+    header.size_of_raw_data = needed_size;
 
     try self.base.file.?.pwriteAll(buffer.items, header.pointer_to_raw_data);
 
@@ -1608,6 +1643,8 @@ fn writeImportTable(self: *Coff) !void {
 }
 
 fn writeStrtab(self: *Coff) !void {
+    if (self.strtab_offset == null) return;
+
     const allocated_size = self.allocatedSize(self.strtab_offset.?);
     const needed_size = @intCast(u32, self.strtab.len());
 
@@ -1840,6 +1877,20 @@ fn findFreeSpace(self: *Coff, object_size: u32, min_alignment: u32) u32 {
     return start;
 }
 
+fn allocatedSizeVM(self: *Coff, start: u32) u32 {
+    if (start == 0)
+        return 0;
+    var min_pos: u32 = std.math.maxInt(u32);
+    if (self.strtab_offset) |off| {
+        if (off > start and off < min_pos) min_pos = off;
+    }
+    for (self.sections.items(.header)) |header| {
+        if (header.virtual_address <= start) continue;
+        if (header.virtual_address < min_pos) min_pos = header.virtual_address;
+    }
+    return min_pos - start;
+}
+
 fn detectAllocCollisionVM(self: *Coff, start: u32, size: u32) ?u32 {
     const headers_size = @maximum(self.getSizeOfHeaders(), 0x1000);
     if (start < headers_size)

From 2b373b05793d617f308c7f99b35be7ad1ca0321f Mon Sep 17 00:00:00 2001
From: Jakub Konka <kubkon@jakubkonka.com>
Date: Tue, 6 Sep 2022 14:12:01 +0200
Subject: [PATCH 37/68] coff: grow section in virtual address space when
 required

---
 src/link/Coff.zig | 122 +++++++++++++++++++++++++++++++---------------
 1 file changed, 83 insertions(+), 39 deletions(-)

diff --git a/src/link/Coff.zig b/src/link/Coff.zig
index d31344a23e..433355b54d 100644
--- a/src/link/Coff.zig
+++ b/src/link/Coff.zig
@@ -126,6 +126,15 @@ pub const Reloc = struct {
     pcrel: bool,
     length: u2,
     dirty: bool = true,
+
+    /// Returns an Atom which is the target node of this relocation edge (if any).
+    fn getTargetAtom(self: Reloc, coff_file: *Coff) ?*Atom {
+        switch (self.@"type") {
+            .got => return coff_file.getGotAtomForSymbol(self.target),
+            .direct => return coff_file.getAtomForSymbol(self.target),
+            .imports => return coff_file.getImportAtomForSymbol(self.target),
+        }
+    }
 };
 
 const RelocTable = std.AutoHashMapUnmanaged(*Atom, std.ArrayListUnmanaged(Reloc));
@@ -355,7 +364,7 @@ fn populateMissingMetadata(self: *Coff) !void {
     }
 
     if (self.rdata_section_index == null) {
-        const file_size: u32 = 1024;
+        const file_size: u32 = self.page_size;
         self.rdata_section_index = try self.allocateSection(".rdata", file_size, .{
             .CNT_INITIALIZED_DATA = 1,
             .MEM_READ = 1,
@@ -363,7 +372,7 @@ fn populateMissingMetadata(self: *Coff) !void {
     }
 
     if (self.data_section_index == null) {
-        const file_size: u32 = 1024;
+        const file_size: u32 = self.page_size;
         self.data_section_index = try self.allocateSection(".data", file_size, .{
             .CNT_INITIALIZED_DATA = 1,
             .MEM_READ = 1,
@@ -371,6 +380,14 @@ fn populateMissingMetadata(self: *Coff) !void {
         });
     }
 
+    if (self.idata_section_index == null) {
+        const file_size = @intCast(u32, self.base.options.symbol_count_hint) * self.ptr_width.abiSize();
+        self.idata_section_index = try self.allocateSection(".idata", file_size, .{
+            .CNT_INITIALIZED_DATA = 1,
+            .MEM_READ = 1,
+        });
+    }
+
     if (self.reloc_section_index == null) {
         const file_size = @intCast(u32, self.base.options.symbol_count_hint) * @sizeOf(coff.BaseRelocation);
         self.reloc_section_index = try self.allocateSection(".reloc", file_size, .{
@@ -380,14 +397,6 @@ fn populateMissingMetadata(self: *Coff) !void {
         });
     }
 
-    if (self.idata_section_index == null) {
-        const file_size = @intCast(u32, self.base.options.symbol_count_hint) * self.ptr_width.abiSize();
-        self.idata_section_index = try self.allocateSection(".idata", file_size, .{
-            .CNT_INITIALIZED_DATA = 1,
-            .MEM_READ = 1,
-        });
-    }
-
     if (self.strtab_offset == null) {
         const file_size = @intCast(u32, self.strtab.len());
         self.strtab_offset = self.findFreeSpace(file_size, @alignOf(u32)); // 4bytes aligned seems like a good idea here
@@ -437,6 +446,35 @@ fn allocateSection(self: *Coff, name: []const u8, size: u32, flags: coff.Section
     return index;
 }
 
+fn growSectionVM(self: *Coff, sect_id: u32, needed_size: u32) !void {
+    const header = &self.sections.items(.header)[sect_id];
+    const increased_size = padToIdeal(needed_size);
+    const old_aligned_end = header.virtual_address + mem.alignForwardGeneric(u32, header.virtual_size, self.page_size);
+    const new_aligned_end = header.virtual_address + mem.alignForwardGeneric(u32, increased_size, self.page_size);
+    const diff = new_aligned_end - old_aligned_end;
+
+    // TODO: enforce order by increasing VM addresses in self.sections container.
+    // This is required by the loader anyhow as far as I can tell.
+    for (self.sections.items(.header)[sect_id + 1 ..]) |*next_header, next_sect_id| {
+        const maybe_last_atom = &self.sections.items(.last_atom)[sect_id + 1 + next_sect_id];
+        next_header.virtual_address += diff;
+
+        if (maybe_last_atom.*) |last_atom| {
+            var atom = last_atom;
+            while (true) {
+                const sym = atom.getSymbolPtr(self);
+                sym.value += diff;
+
+                if (atom.prev) |prev| {
+                    atom = prev;
+                } else break;
+            }
+        }
+    }
+
+    header.virtual_size = increased_size;
+}
+
 pub fn allocateDeclIndexes(self: *Coff, decl_index: Module.Decl.Index) !void {
     if (self.llvm_object) |_| return;
     const decl = self.base.options.module.?.declPtr(decl_index);
@@ -525,13 +563,7 @@ fn allocateAtom(self: *Coff, atom: *Atom, new_atom_size: u32, alignment: u32) !u
                 const sym = last_atom.getSymbol(self);
                 break :blk (sym.value + last_atom.size) - header.virtual_address;
             } else 0;
-            log.debug("moving {s} from (0x{x} - 0x{x}) to (0x{x} - 0x{x})", .{
-                self.getSectionName(header),
-                header.pointer_to_raw_data,
-                header.pointer_to_raw_data + current_size,
-                new_offset,
-                new_offset + current_size,
-            });
+            log.debug("moving {s} from 0x{x} to 0x{x}", .{ self.getSectionName(header), header.pointer_to_raw_data, new_offset });
             const amt = try self.base.file.?.copyRangeAll(
                 header.pointer_to_raw_data,
                 self.base.file.?,
@@ -544,8 +576,8 @@ fn allocateAtom(self: *Coff, atom: *Atom, new_atom_size: u32, alignment: u32) !u
 
         const sect_vm_capacity = self.allocatedSizeVM(header.virtual_address);
         if (needed_size > sect_vm_capacity) {
-            log.err("needed {x}, available {x}", .{ needed_size, sect_vm_capacity });
-            @panic("TODO expand section in virtual address space");
+            try self.growSectionVM(sect_id, needed_size);
+            self.markRelocsDirtyByAddress(header.virtual_address + needed_size);
         }
 
         header.virtual_size = @maximum(header.virtual_size, needed_size);
@@ -747,7 +779,7 @@ fn writePtrWidthAtom(self: *Coff, atom: *Atom) !void {
     }
 }
 
-fn markRelocsDirty(self: *Coff, target: SymbolWithLoc) void {
+fn markRelocsDirtyByTarget(self: *Coff, target: SymbolWithLoc) void {
     // TODO: reverse-lookup might come in handy here
     var it = self.relocs.valueIterator();
     while (it.next()) |relocs| {
@@ -758,6 +790,18 @@ fn markRelocsDirty(self: *Coff, target: SymbolWithLoc) void {
     }
 }
 
+fn markRelocsDirtyByAddress(self: *Coff, addr: u32) void {
+    var it = self.relocs.valueIterator();
+    while (it.next()) |relocs| {
+        for (relocs.items) |*reloc| {
+            const target_atom = reloc.getTargetAtom(self) orelse continue;
+            const target_sym = target_atom.getSymbol(self);
+            if (target_sym.value < addr) continue;
+            reloc.dirty = true;
+        }
+    }
+}
+
 fn resolveRelocs(self: *Coff, atom: *Atom) !void {
     const relocs = self.relocs.get(atom) orelse return;
     const source_sym = atom.getSymbol(self);
@@ -769,19 +813,8 @@ fn resolveRelocs(self: *Coff, atom: *Atom) !void {
     for (relocs.items) |*reloc| {
         if (!reloc.dirty) continue;
 
-        const target_vaddr = switch (reloc.@"type") {
-            .got => blk: {
-                const got_atom = self.getGotAtomForSymbol(reloc.target) orelse continue;
-                break :blk got_atom.getSymbol(self).value;
-            },
-            .direct => blk: {
-                break :blk self.getSymbol(reloc.target).value;
-            },
-            .imports => blk: {
-                const import_atom = self.getImportAtomForSymbol(reloc.target) orelse continue;
-                break :blk import_atom.getSymbol(self).value;
-            },
-        };
+        const target_atom = reloc.getTargetAtom(self) orelse continue;
+        const target_vaddr = target_atom.getSymbol(self).value;
         const target_vaddr_with_addend = target_vaddr + reloc.addend;
 
         log.debug("  ({x}: [() => 0x{x} ({s})) ({s})", .{
@@ -1095,7 +1128,7 @@ fn updateDeclCode(self: *Coff, decl_index: Module.Decl.Index, code: []const u8,
                 log.debug("  (updating GOT entry)", .{});
                 const got_target = SymbolWithLoc{ .sym_index = atom.sym_index, .file = null };
                 const got_atom = self.getGotAtomForSymbol(got_target).?;
-                self.markRelocsDirty(got_target);
+                self.markRelocsDirtyByTarget(got_target);
                 try self.writePtrWidthAtom(got_atom);
             }
         } else if (code_len < atom.size) {
@@ -1120,7 +1153,7 @@ fn updateDeclCode(self: *Coff, decl_index: Module.Decl.Index, code: []const u8,
         try self.writePtrWidthAtom(got_atom);
     }
 
-    self.markRelocsDirty(atom.getSymbolWithLoc());
+    self.markRelocsDirtyByTarget(atom.getSymbolWithLoc());
     try self.writeAtom(atom, code);
 }
 
@@ -1546,7 +1579,8 @@ fn writeBaseRelocations(self: *Coff) !void {
 
         const sect_vm_capacity = self.allocatedSizeVM(header.virtual_address);
         if (needed_size > sect_vm_capacity) {
-            @panic("TODO expand section in virtual address space");
+            // TODO: we want to enforce .reloc after every alloc section.
+            try self.growSectionVM(self.reloc_section_index.?, needed_size);
         }
     }
     header.virtual_size = @maximum(header.virtual_size, needed_size);
@@ -1881,9 +1915,6 @@ fn allocatedSizeVM(self: *Coff, start: u32) u32 {
     if (start == 0)
         return 0;
     var min_pos: u32 = std.math.maxInt(u32);
-    if (self.strtab_offset) |off| {
-        if (off > start and off < min_pos) min_pos = off;
-    }
     for (self.sections.items(.header)) |header| {
         if (header.virtual_address <= start) continue;
         if (header.virtual_address < min_pos) min_pos = header.virtual_address;
@@ -2116,3 +2147,16 @@ fn logSymtab(self: *Coff) void {
         }
     }
 }
+
+fn logSections(self: *Coff) void {
+    log.debug("sections:", .{});
+    for (self.sections.items(.header)) |*header| {
+        log.debug("  {s}: VM({x}, {x}) FILE({x}, {x})", .{
+            self.getSectionName(header),
+            header.virtual_address,
+            header.virtual_address + header.virtual_size,
+            header.pointer_to_raw_data,
+            header.pointer_to_raw_data + header.size_of_raw_data,
+        });
+    }
+}

From 16ca47b9b81c67c88d678b6230dd02ce9dad7f07 Mon Sep 17 00:00:00 2001
From: Jakub Konka <kubkon@jakubkonka.com>
Date: Tue, 6 Sep 2022 15:13:06 +0200
Subject: [PATCH 38/68] coff: remove redundant bits and clean up

---
 src/link/Coff.zig      | 60 +++++++++++-------------------------------
 src/link/Coff/Atom.zig |  7 -----
 2 files changed, 15 insertions(+), 52 deletions(-)

diff --git a/src/link/Coff.zig b/src/link/Coff.zig
index 433355b54d..e594810ee7 100644
--- a/src/link/Coff.zig
+++ b/src/link/Coff.zig
@@ -30,7 +30,6 @@ const TypedValue = @import("../TypedValue.zig");
 pub const base_tag: link.File.Tag = .coff;
 
 const msdos_stub = @embedFile("msdos-stub.bin");
-const N_DATA_DIRS: u5 = 16;
 
 /// If this is not null, an object file is created by LLVM and linked with LLD afterwards.
 llvm_object: ?*LlvmObject = null,
@@ -44,7 +43,7 @@ page_size: u32,
 objects: std.ArrayListUnmanaged(Object) = .{},
 
 sections: std.MultiArrayList(Section) = .{},
-data_directories: [N_DATA_DIRS]coff.ImageDataDirectory,
+data_directories: [coff.IMAGE_NUMBEROF_DIRECTORY_ENTRIES]coff.ImageDataDirectory,
 
 text_section_index: ?u16 = null,
 got_section_index: ?u16 = null,
@@ -259,7 +258,7 @@ pub fn createEmpty(gpa: Allocator, options: link.Options) !*Coff {
         },
         .ptr_width = ptr_width,
         .page_size = page_size,
-        .data_directories = comptime mem.zeroes([N_DATA_DIRS]coff.ImageDataDirectory),
+        .data_directories = comptime mem.zeroes([coff.IMAGE_NUMBEROF_DIRECTORY_ENTRIES]coff.ImageDataDirectory),
     };
 
     const use_llvm = build_options.have_llvm and options.use_llvm;
@@ -421,7 +420,12 @@ fn populateMissingMetadata(self: *Coff) !void {
 fn allocateSection(self: *Coff, name: []const u8, size: u32, flags: coff.SectionHeaderFlags) !u16 {
     const index = @intCast(u16, self.sections.slice().len);
     const off = self.findFreeSpace(size, default_file_alignment);
-    const vaddr = self.findFreeSpaceVM(size, self.page_size);
+    // Memory is always allocated in sequence
+    const vaddr = blk: {
+        if (index == 0) break :blk self.page_size;
+        const prev_header = self.sections.items(.header)[index - 1];
+        break :blk mem.alignForwardGeneric(u32, prev_header.virtual_address + prev_header.virtual_size, self.page_size);
+    };
     log.debug("found {s} free space 0x{x} to 0x{x} (0x{x} - 0x{x})", .{
         name,
         off,
@@ -574,7 +578,7 @@ fn allocateAtom(self: *Coff, atom: *Atom, new_atom_size: u32, alignment: u32) !u
             header.pointer_to_raw_data = new_offset;
         }
 
-        const sect_vm_capacity = self.allocatedSizeVM(header.virtual_address);
+        const sect_vm_capacity = self.allocatedVirtualSize(header.virtual_address);
         if (needed_size > sect_vm_capacity) {
             try self.growSectionVM(sect_id, needed_size);
             self.markRelocsDirtyByAddress(header.virtual_address + needed_size);
@@ -857,10 +861,9 @@ fn resolveRelocs(self: *Coff, atom: *Atom) !void {
 fn freeAtom(self: *Coff, atom: *Atom) void {
     log.debug("freeAtom {*}", .{atom});
 
-    // TODO hashmap
-    for (self.managed_atoms.items) |owned| {
-        if (owned == atom) break;
-    } else atom.deinit(self.base.allocator);
+    // Remove any relocs and base relocs associated with this Atom
+    _ = self.relocs.remove(atom);
+    _ = self.base_relocs.remove(atom);
 
     const sym = atom.getSymbol(self);
     const sect_id = @enumToInt(sym.section_number) - 1;
@@ -1577,7 +1580,7 @@ fn writeBaseRelocations(self: *Coff) !void {
         });
         header.pointer_to_raw_data = new_offset;
 
-        const sect_vm_capacity = self.allocatedSizeVM(header.virtual_address);
+        const sect_vm_capacity = self.allocatedVirtualSize(header.virtual_address);
         if (needed_size > sect_vm_capacity) {
             // TODO: we want to enforce .reloc after every alloc section.
             try self.growSectionVM(self.reloc_section_index.?, needed_size);
@@ -1862,7 +1865,7 @@ pub fn padToIdeal(actual_size: anytype) @TypeOf(actual_size) {
 }
 
 fn detectAllocCollision(self: *Coff, start: u32, size: u32) ?u32 {
-    const headers_size = @maximum(self.getSizeOfHeaders(), 0x1000);
+    const headers_size = @maximum(self.getSizeOfHeaders(), self.page_size);
     if (start < headers_size)
         return headers_size;
 
@@ -1911,7 +1914,7 @@ fn findFreeSpace(self: *Coff, object_size: u32, min_alignment: u32) u32 {
     return start;
 }
 
-fn allocatedSizeVM(self: *Coff, start: u32) u32 {
+fn allocatedVirtualSize(self: *Coff, start: u32) u32 {
     if (start == 0)
         return 0;
     var min_pos: u32 = std.math.maxInt(u32);
@@ -1922,39 +1925,6 @@ fn allocatedSizeVM(self: *Coff, start: u32) u32 {
     return min_pos - start;
 }
 
-fn detectAllocCollisionVM(self: *Coff, start: u32, size: u32) ?u32 {
-    const headers_size = @maximum(self.getSizeOfHeaders(), 0x1000);
-    if (start < headers_size)
-        return headers_size;
-
-    const end = start + size;
-
-    if (self.strtab_offset) |off| {
-        const increased_size = @intCast(u32, self.strtab.len());
-        const test_end = off + increased_size;
-        if (end > off and start < test_end) {
-            return test_end;
-        }
-    }
-
-    for (self.sections.items(.header)) |header| {
-        const increased_size = header.virtual_size;
-        const test_end = header.virtual_address + increased_size;
-        if (end > header.virtual_address and start < test_end) {
-            return test_end;
-        }
-    }
-    return null;
-}
-
-fn findFreeSpaceVM(self: *Coff, object_size: u32, min_alignment: u32) u32 {
-    var start: u32 = 0;
-    while (self.detectAllocCollisionVM(start, object_size)) |item_end| {
-        start = mem.alignForwardGeneric(u32, item_end, min_alignment);
-    }
-    return start;
-}
-
 inline fn getSizeOfHeaders(self: Coff) u32 {
     const msdos_hdr_size = msdos_stub.len + 4;
     return @intCast(u32, msdos_hdr_size + @sizeOf(coff.CoffHeader) + self.getOptionalHeaderSize() +
diff --git a/src/link/Coff/Atom.zig b/src/link/Coff/Atom.zig
index 1d6e511f3b..ffd8fe45e6 100644
--- a/src/link/Coff/Atom.zig
+++ b/src/link/Coff/Atom.zig
@@ -4,8 +4,6 @@ const std = @import("std");
 const coff = std.coff;
 const log = std.log.scoped(.link);
 
-const Allocator = std.mem.Allocator;
-
 const Coff = @import("../Coff.zig");
 const Reloc = Coff.Reloc;
 const SymbolWithLoc = Coff.SymbolWithLoc;
@@ -41,11 +39,6 @@ pub const empty = Atom{
     .next = null,
 };
 
-pub fn deinit(self: *Atom, gpa: Allocator) void {
-    _ = self;
-    _ = gpa;
-}
-
 /// Returns symbol referencing this atom.
 pub fn getSymbol(self: Atom, coff_file: *const Coff) *const coff.Symbol {
     return coff_file.getSymbol(.{

From 7b8cc599d997759201a945d05b91c24f5cfe29d7 Mon Sep 17 00:00:00 2001
From: Jakub Konka <kubkon@jakubkonka.com>
Date: Tue, 6 Sep 2022 16:56:26 +0200
Subject: [PATCH 39/68] coff: use more generous initial memory sizes for
 sections

---
 src/link/Coff.zig | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/src/link/Coff.zig b/src/link/Coff.zig
index e594810ee7..cdb0f9a9cc 100644
--- a/src/link/Coff.zig
+++ b/src/link/Coff.zig
@@ -421,11 +421,14 @@ fn allocateSection(self: *Coff, name: []const u8, size: u32, flags: coff.Section
     const index = @intCast(u16, self.sections.slice().len);
     const off = self.findFreeSpace(size, default_file_alignment);
     // Memory is always allocated in sequence
+    // TODO: investigate if we can allocate .text last; this way it would never need to grow in memory!
     const vaddr = blk: {
         if (index == 0) break :blk self.page_size;
         const prev_header = self.sections.items(.header)[index - 1];
         break :blk mem.alignForwardGeneric(u32, prev_header.virtual_address + prev_header.virtual_size, self.page_size);
     };
+    // We commit more memory than needed upfront so that we don't have to reallocate too soon.
+    const memsz = mem.alignForwardGeneric(u32, size, self.page_size) * 100;
     log.debug("found {s} free space 0x{x} to 0x{x} (0x{x} - 0x{x})", .{
         name,
         off,
@@ -435,7 +438,7 @@ fn allocateSection(self: *Coff, name: []const u8, size: u32, flags: coff.Section
     });
     var header = coff.SectionHeader{
         .name = undefined,
-        .virtual_size = size,
+        .virtual_size = memsz,
         .virtual_address = vaddr,
         .size_of_raw_data = size,
         .pointer_to_raw_data = off,
@@ -456,6 +459,7 @@ fn growSectionVM(self: *Coff, sect_id: u32, needed_size: u32) !void {
     const old_aligned_end = header.virtual_address + mem.alignForwardGeneric(u32, header.virtual_size, self.page_size);
     const new_aligned_end = header.virtual_address + mem.alignForwardGeneric(u32, increased_size, self.page_size);
     const diff = new_aligned_end - old_aligned_end;
+    log.debug("growing {s} in virtual memory by {x}", .{ self.getSectionName(header), diff });
 
     // TODO: enforce order by increasing VM addresses in self.sections container.
     // This is required by the loader anyhow as far as I can tell.

From 945111ae92c28f8ad642a64af4dc83acd3bef4a5 Mon Sep 17 00:00:00 2001
From: Jakub Konka <kubkon@jakubkonka.com>
Date: Tue, 6 Sep 2022 17:53:21 +0200
Subject: [PATCH 40/68] enable testing of x86_64-windows-gnu using self-hosted
 backend and linker

---
 lib/std/build.zig |  9 +++++++++
 test/tests.zig    | 15 +++++++++++++++
 2 files changed, 24 insertions(+)

diff --git a/lib/std/build.zig b/lib/std/build.zig
index 4c05586159..76744b12d2 100644
--- a/lib/std/build.zig
+++ b/lib/std/build.zig
@@ -1623,6 +1623,7 @@ pub const LibExeObjStep = struct {
     /// Overrides the default stack size
     stack_size: ?u64 = null,
 
+    use_unwind_tables: ?bool = null,
     want_lto: ?bool = null,
     use_stage1: ?bool = null,
     use_llvm: ?bool = null,
@@ -2505,6 +2506,14 @@ pub const LibExeObjStep = struct {
             }
         }
 
+        if (self.use_unwind_tables) |use_unwind_tables| {
+            if (use_unwind_tables) {
+                try zig_args.append("-funwind-tables");
+            } else {
+                try zig_args.append("-fno-unwind-tables");
+            }
+        }
+
         if (self.ofmt) |ofmt| {
             try zig_args.append(try std.fmt.allocPrint(builder.allocator, "-ofmt={s}", .{@tagName(ofmt)}));
         }
diff --git a/test/tests.zig b/test/tests.zig
index a329233199..f46a3acbb5 100644
--- a/test/tests.zig
+++ b/test/tests.zig
@@ -108,6 +108,14 @@ const test_targets = blk: {
             },
             .backend = .stage2_x86_64,
         },
+        .{
+            .target = .{
+                .cpu_arch = .x86_64,
+                .os_tag = .windows,
+                .abi = .gnu,
+            },
+            .backend = .stage2_x86_64,
+        },
 
         .{
             .target = .{
@@ -693,6 +701,13 @@ pub fn addPkgTests(
             else => {
                 these_tests.use_stage1 = false;
                 these_tests.use_llvm = false;
+
+                if (test_target.target.getOsTag() == .windows) {
+                    // TODO: We set these to no so that we don't fallback to LLD for incremental linking context. This is because
+                    // our own COFF linker doesn't yet support these options.
+                    these_tests.emit_implib = .no_emit;
+                    these_tests.use_unwind_tables = false;
+                }
             },
         };
 

From f3e4e44a2b8de8ee860c2c9d11ee1a770e625e0e Mon Sep 17 00:00:00 2001
From: Jakub Konka <kubkon@jakubkonka.com>
Date: Tue, 6 Sep 2022 20:16:39 +0200
Subject: [PATCH 41/68] coff: fix memory leak in incorrectly disposing of
 globals table

---
 src/link/Coff.zig | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/src/link/Coff.zig b/src/link/Coff.zig
index cdb0f9a9cc..cd529ddab0 100644
--- a/src/link/Coff.zig
+++ b/src/link/Coff.zig
@@ -292,7 +292,12 @@ pub fn deinit(self: *Coff) void {
     self.managed_atoms.deinit(gpa);
 
     self.locals.deinit(gpa);
+
+    for (self.globals.keys()) |key| {
+        gpa.free(key);
+    }
     self.globals.deinit(gpa);
+
     self.unresolved.deinit(gpa);
     self.locals_free_list.deinit(gpa);
     self.strtab.deinit(gpa);

From 99c2cb72e850ffdfd83abcc941c84a0053f8494e Mon Sep 17 00:00:00 2001
From: Jakub Konka <kubkon@jakubkonka.com>
Date: Tue, 6 Sep 2022 22:34:32 +0200
Subject: [PATCH 42/68] coff: track globals in contiguous array to allow for
 tombstones

---
 src/link/Coff.zig | 119 ++++++++++++++++++++++++++++++----------------
 1 file changed, 77 insertions(+), 42 deletions(-)

diff --git a/src/link/Coff.zig b/src/link/Coff.zig
index cd529ddab0..2abfc78d1e 100644
--- a/src/link/Coff.zig
+++ b/src/link/Coff.zig
@@ -53,10 +53,12 @@ reloc_section_index: ?u16 = null,
 idata_section_index: ?u16 = null,
 
 locals: std.ArrayListUnmanaged(coff.Symbol) = .{},
-globals: std.StringArrayHashMapUnmanaged(SymbolWithLoc) = .{},
+globals: std.ArrayListUnmanaged(SymbolWithLoc) = .{},
+resolver: std.StringHashMapUnmanaged(u32) = .{},
 unresolved: std.AutoArrayHashMapUnmanaged(u32, bool) = .{},
 
 locals_free_list: std.ArrayListUnmanaged(u32) = .{},
+globals_free_list: std.ArrayListUnmanaged(u32) = .{},
 
 strtab: StringTable(.strtab) = .{},
 strtab_offset: ?u32 = null,
@@ -292,12 +294,16 @@ pub fn deinit(self: *Coff) void {
     self.managed_atoms.deinit(gpa);
 
     self.locals.deinit(gpa);
-
-    for (self.globals.keys()) |key| {
-        gpa.free(key);
-    }
     self.globals.deinit(gpa);
 
+    {
+        var it = self.resolver.keyIterator();
+        while (it.next()) |key_ptr| {
+            gpa.free(key_ptr.*);
+        }
+        self.resolver.deinit(gpa);
+    }
+
     self.unresolved.deinit(gpa);
     self.locals_free_list.deinit(gpa);
     self.strtab.deinit(gpa);
@@ -651,6 +657,30 @@ fn allocateSymbol(self: *Coff) !u32 {
     return index;
 }
 
+fn allocateGlobal(self: *Coff) !u32 {
+    const gpa = self.base.allocator;
+    try self.globals.ensureUnusedCapacity(gpa, 1);
+
+    const index = blk: {
+        if (self.globals_free_list.popOrNull()) |index| {
+            log.debug("  (reusing global index {d})", .{index});
+            break :blk index;
+        } else {
+            log.debug("  (allocating global index {d})", .{self.globals.items.len});
+            const index = @intCast(u32, self.globals.items.len);
+            _ = self.globals.addOneAssumeCapacity();
+            break :blk index;
+        }
+    };
+
+    self.globals.items[index] = .{
+        .sym_index = 0,
+        .file = null,
+    };
+
+    return index;
+}
+
 pub fn allocateGotEntry(self: *Coff, target: SymbolWithLoc) !u32 {
     const gpa = self.base.allocator;
     try self.got_entries.ensureUnusedCapacity(gpa, 1);
@@ -1340,7 +1370,7 @@ pub fn deleteExport(self: *Coff, exp: Export) void {
     const sym = self.getSymbolPtr(sym_loc);
     const sym_name = self.getSymbolName(sym_loc);
     log.debug("deleting export '{s}'", .{sym_name});
-    assert(sym.storage_class == .EXTERNAL);
+    assert(sym.storage_class == .EXTERNAL and sym.section_number != .UNDEFINED);
     sym.* = .{
         .name = [_]u8{0} ** 8,
         .value = 0,
@@ -1351,33 +1381,38 @@ pub fn deleteExport(self: *Coff, exp: Export) void {
     };
     self.locals_free_list.append(gpa, sym_index) catch {};
 
-    if (self.globals.get(sym_name)) |global| blk: {
-        if (global.sym_index != sym_index) break :blk;
-        if (global.file != null) break :blk;
-        const kv = self.globals.fetchSwapRemove(sym_name);
-        gpa.free(kv.?.key);
+    if (self.resolver.fetchRemove(sym_name)) |entry| {
+        defer gpa.free(entry.key);
+        self.globals_free_list.append(gpa, entry.value) catch {};
+        self.globals.items[entry.value] = .{
+            .sym_index = 0,
+            .file = null,
+        };
     }
 }
 
 fn resolveGlobalSymbol(self: *Coff, current: SymbolWithLoc) !void {
     const gpa = self.base.allocator;
     const sym = self.getSymbol(current);
-    _ = sym;
     const sym_name = self.getSymbolName(current);
 
-    const name = try gpa.dupe(u8, sym_name);
-    const global_index = @intCast(u32, self.globals.values().len);
-    _ = global_index;
-    const gop = try self.globals.getOrPut(gpa, name);
-    defer if (gop.found_existing) gpa.free(name);
-
-    if (!gop.found_existing) {
-        gop.value_ptr.* = current;
-        // TODO undef + tentative
+    const global_index = self.resolver.get(sym_name) orelse {
+        const name = try gpa.dupe(u8, sym_name);
+        const global_index = try self.allocateGlobal();
+        self.globals.items[global_index] = current;
+        try self.resolver.putNoClobber(gpa, name, global_index);
+        if (sym.section_number == .UNDEFINED) {
+            try self.unresolved.putNoClobber(gpa, global_index, false);
+        }
         return;
-    }
+    };
 
     log.debug("TODO finish resolveGlobalSymbols implementation", .{});
+
+    if (sym.section_number == .UNDEFINED) return;
+
+    _ = self.unresolved.swapRemove(global_index);
+    self.globals.items[global_index] = current;
 }
 
 pub fn flush(self: *Coff, comp: *Compilation, prog_node: *std.Progress.Node) !void {
@@ -1415,7 +1450,7 @@ pub fn flushModule(self: *Coff, comp: *Compilation, prog_node: *std.Progress.Nod
 
     while (self.unresolved.popOrNull()) |entry| {
         assert(entry.value); // We only expect imports generated by the incremental linker for now.
-        const global = self.globals.values()[entry.key];
+        const global = self.globals.items[entry.key];
         if (self.imports_table.contains(global)) continue;
 
         _ = try self.allocateImportEntry(global);
@@ -1481,24 +1516,22 @@ pub fn getDeclVAddr(
 }
 
 pub fn getGlobalSymbol(self: *Coff, name: []const u8) !u32 {
-    const gpa = self.base.allocator;
-    const sym_name = try gpa.dupe(u8, name);
-    const global_index = @intCast(u32, self.globals.values().len);
-    const gop = try self.globals.getOrPut(gpa, sym_name);
-    defer if (gop.found_existing) gpa.free(sym_name);
-
-    if (gop.found_existing) {
-        // TODO audit this: can we ever reference anything from outside the Zig module?
-        assert(gop.value_ptr.file == null);
-        return gop.value_ptr.sym_index;
+    if (self.resolver.get(name)) |global_index| {
+        return self.globals.items[global_index].sym_index;
     }
 
+    const gpa = self.base.allocator;
     const sym_index = try self.allocateSymbol();
+    const global_index = try self.allocateGlobal();
     const sym_loc = SymbolWithLoc{ .sym_index = sym_index, .file = null };
+    self.globals.items[global_index] = sym_loc;
+
+    const sym_name = try gpa.dupe(u8, name);
     const sym = self.getSymbolPtr(sym_loc);
     try self.setSymbolName(sym, sym_name);
     sym.storage_class = .EXTERNAL;
-    gop.value_ptr.* = sym_loc;
+
+    try self.resolver.putNoClobber(gpa, sym_name, global_index);
     try self.unresolved.putNoClobber(gpa, global_index, true);
 
     return sym_index;
@@ -1607,14 +1640,15 @@ fn writeBaseRelocations(self: *Coff) !void {
 }
 
 fn writeImportTable(self: *Coff) !void {
+    if (self.idata_section_index == null) return;
+
     const gpa = self.base.allocator;
 
     const section = self.sections.get(self.idata_section_index.?);
+    const last_atom = section.last_atom orelse return;
+
     const iat_rva = section.header.virtual_address;
-    const iat_size = blk: {
-        const last_atom = section.last_atom.?;
-        break :blk last_atom.getSymbol(self).value + last_atom.size * 2 - iat_rva; // account for sentinel zero pointer
-    };
+    const iat_size = last_atom.getSymbol(self).value + last_atom.size * 2 - iat_rva; // account for sentinel zero pointer
 
     const dll_name = "KERNEL32.dll";
 
@@ -1975,7 +2009,8 @@ inline fn getSizeOfImage(self: Coff) u32 {
 /// Returns symbol location corresponding to the set entrypoint (if any).
 pub fn getEntryPoint(self: Coff) ?SymbolWithLoc {
     const entry_name = self.base.options.entry orelse "wWinMainCRTStartup"; // TODO this is incomplete
-    return self.globals.get(entry_name);
+    const global_index = self.resolver.get(entry_name) orelse return null;
+    return self.globals.items[global_index];
 }
 
 /// Returns pointer-to-symbol described by `sym_with_loc` descriptor.
@@ -2100,9 +2135,9 @@ fn logSymtab(self: *Coff) void {
     }
 
     log.debug("globals table:", .{});
-    for (self.globals.keys()) |name, id| {
-        const value = self.globals.values()[id];
-        log.debug("  {s} => %{d} in object({?d})", .{ name, value.sym_index, value.file });
+    for (self.globals.items) |sym_loc| {
+        const sym_name = self.getSymbolName(sym_loc);
+        log.debug("  {s} => %{d} in object({?d})", .{ sym_name, sym_loc.sym_index, sym_loc.file });
     }
 
     log.debug("GOT entries:", .{});

From 215fce8c51662970d34ae1f4bf1cd043071fea8a Mon Sep 17 00:00:00 2001
From: Jakub Konka <kubkon@jakubkonka.com>
Date: Wed, 7 Sep 2022 12:46:51 +0200
Subject: [PATCH 43/68] coff: fix tracking of got and import entries; free
 relocs in update* fns

---
 src/link/Coff.zig | 132 ++++++++++++++++++++++++++++------------------
 1 file changed, 81 insertions(+), 51 deletions(-)

diff --git a/src/link/Coff.zig b/src/link/Coff.zig
index 2abfc78d1e..49263df225 100644
--- a/src/link/Coff.zig
+++ b/src/link/Coff.zig
@@ -63,11 +63,13 @@ globals_free_list: std.ArrayListUnmanaged(u32) = .{},
 strtab: StringTable(.strtab) = .{},
 strtab_offset: ?u32 = null,
 
-got_entries: std.AutoArrayHashMapUnmanaged(SymbolWithLoc, u32) = .{},
+got_entries: std.ArrayListUnmanaged(Entry) = .{},
 got_entries_free_list: std.ArrayListUnmanaged(u32) = .{},
+got_entries_table: std.AutoHashMapUnmanaged(SymbolWithLoc, u32) = .{},
 
-imports_table: std.AutoArrayHashMapUnmanaged(SymbolWithLoc, u32) = .{},
-imports_table_free_list: std.ArrayListUnmanaged(u32) = .{},
+imports: std.ArrayListUnmanaged(Entry) = .{},
+imports_free_list: std.ArrayListUnmanaged(u32) = .{},
+imports_table: std.AutoHashMapUnmanaged(SymbolWithLoc, u32) = .{},
 
 /// Virtual address of the entry point procedure relative to image base.
 entry_addr: ?u32 = null,
@@ -115,6 +117,12 @@ relocs: RelocTable = .{},
 /// this will be a table indexed by index into the list of Atoms.
 base_relocs: BaseRelocationTable = .{},
 
+const Entry = struct {
+    target: SymbolWithLoc,
+    // Index into the synthetic symbol table (i.e., file == null).
+    sym_index: u32,
+};
+
 pub const Reloc = struct {
     @"type": enum {
         got,
@@ -309,8 +317,10 @@ pub fn deinit(self: *Coff) void {
     self.strtab.deinit(gpa);
     self.got_entries.deinit(gpa);
     self.got_entries_free_list.deinit(gpa);
+    self.got_entries_table.deinit(gpa);
+    self.imports.deinit(gpa);
+    self.imports_free_list.deinit(gpa);
     self.imports_table.deinit(gpa);
-    self.imports_table_free_list.deinit(gpa);
     self.decls.deinit(gpa);
     self.atom_by_index_table.deinit(gpa);
 
@@ -684,42 +694,44 @@ fn allocateGlobal(self: *Coff) !u32 {
 pub fn allocateGotEntry(self: *Coff, target: SymbolWithLoc) !u32 {
     const gpa = self.base.allocator;
     try self.got_entries.ensureUnusedCapacity(gpa, 1);
+
     const index: u32 = blk: {
         if (self.got_entries_free_list.popOrNull()) |index| {
             log.debug("  (reusing GOT entry index {d})", .{index});
-            if (self.got_entries.getIndex(target)) |existing| {
-                assert(existing == index);
-            }
             break :blk index;
         } else {
-            log.debug("  (allocating GOT entry at index {d})", .{self.got_entries.keys().len});
-            const index = @intCast(u32, self.got_entries.keys().len);
-            self.got_entries.putAssumeCapacityNoClobber(target, 0);
+            log.debug("  (allocating GOT entry at index {d})", .{self.got_entries.items.len});
+            const index = @intCast(u32, self.got_entries.items.len);
+            _ = self.got_entries.addOneAssumeCapacity();
             break :blk index;
         }
     };
-    self.got_entries.keys()[index] = target;
+
+    self.got_entries.items[index] = .{ .target = target, .sym_index = 0 };
+    try self.got_entries_table.putNoClobber(gpa, target, index);
+
     return index;
 }
 
 pub fn allocateImportEntry(self: *Coff, target: SymbolWithLoc) !u32 {
     const gpa = self.base.allocator;
-    try self.imports_table.ensureUnusedCapacity(gpa, 1);
+    try self.imports.ensureUnusedCapacity(gpa, 1);
+
     const index: u32 = blk: {
-        if (self.imports_table_free_list.popOrNull()) |index| {
+        if (self.imports_free_list.popOrNull()) |index| {
             log.debug("  (reusing import entry index {d})", .{index});
-            if (self.imports_table.getIndex(target)) |existing| {
-                assert(existing == index);
-            }
             break :blk index;
         } else {
-            log.debug("  (allocating import entry at index {d})", .{self.imports_table.keys().len});
-            const index = @intCast(u32, self.imports_table.keys().len);
-            self.imports_table.putAssumeCapacityNoClobber(target, 0);
+            log.debug("  (allocating import entry at index {d})", .{self.imports.items.len});
+            const index = @intCast(u32, self.imports.items.len);
+            _ = self.imports.addOneAssumeCapacity();
             break :blk index;
         }
     };
-    self.imports_table.keys()[index] = target;
+
+    self.imports.items[index] = .{ .target = target, .sym_index = 0 };
+    try self.imports_table.putNoClobber(gpa, target, index);
+
     return index;
 }
 
@@ -734,7 +746,6 @@ fn createGotAtom(self: *Coff, target: SymbolWithLoc) !*Atom {
 
     try self.managed_atoms.append(gpa, atom);
     try self.atom_by_index_table.putNoClobber(gpa, atom.sym_index, atom);
-    self.got_entries.getPtr(target).?.* = atom.sym_index;
 
     const sym = atom.getSymbolPtr(self);
     sym.section_number = @intToEnum(coff.SectionNumber, self.got_section_index.? + 1);
@@ -762,7 +773,7 @@ fn createGotAtom(self: *Coff, target: SymbolWithLoc) !*Atom {
     return atom;
 }
 
-fn createImportAtom(self: *Coff, target: SymbolWithLoc) !*Atom {
+fn createImportAtom(self: *Coff) !*Atom {
     const gpa = self.base.allocator;
     const atom = try gpa.create(Atom);
     errdefer gpa.destroy(atom);
@@ -773,7 +784,6 @@ fn createImportAtom(self: *Coff, target: SymbolWithLoc) !*Atom {
 
     try self.managed_atoms.append(gpa, atom);
     try self.atom_by_index_table.putNoClobber(gpa, atom.sym_index, atom);
-    self.imports_table.getPtr(target).?.* = atom.sym_index;
 
     const sym = atom.getSymbolPtr(self);
     sym.section_number = @intToEnum(coff.SectionNumber, self.idata_section_index.? + 1);
@@ -804,7 +814,7 @@ fn writeAtom(self: *Coff, atom: *Atom, code: []const u8) !void {
     const sym = atom.getSymbol(self);
     const section = self.sections.get(@enumToInt(sym.section_number) - 1);
     const file_offset = section.header.pointer_to_raw_data + sym.value - section.header.virtual_address;
-    log.debug("writing atom for symbol {s} at file offset 0x{x}", .{ atom.getName(self), file_offset });
+    log.debug("writing atom for symbol {s} at file offset 0x{x} to 0x{x}", .{ atom.getName(self), file_offset, file_offset + code.len });
     try self.base.file.?.pwriteAll(code, file_offset);
     try self.resolveRelocs(atom);
 }
@@ -860,11 +870,12 @@ fn resolveRelocs(self: *Coff, atom: *Atom) !void {
         const target_vaddr = target_atom.getSymbol(self).value;
         const target_vaddr_with_addend = target_vaddr + reloc.addend;
 
-        log.debug("  ({x}: [() => 0x{x} ({s})) ({s})", .{
+        log.debug("  ({x}: [() => 0x{x} ({s})) ({s}) (in file at 0x{x})", .{
             source_sym.value + reloc.offset,
             target_vaddr_with_addend,
             self.getSymbolName(reloc.target),
             @tagName(reloc.@"type"),
+            file_offset + reloc.offset,
         });
 
         reloc.dirty = false;
@@ -901,8 +912,7 @@ fn freeAtom(self: *Coff, atom: *Atom) void {
     log.debug("freeAtom {*}", .{atom});
 
     // Remove any relocs and base relocs associated with this Atom
-    _ = self.relocs.remove(atom);
-    _ = self.base_relocs.remove(atom);
+    self.freeRelocationsForAtom(atom);
 
     const sym = atom.getSymbol(self);
     const sect_id = @enumToInt(sym.section_number) - 1;
@@ -966,11 +976,14 @@ pub fn updateFunc(self: *Coff, module: *Module, func: *Module.Fn, air: Air, live
     const tracy = trace(@src());
     defer tracy.end();
 
+    const decl_index = func.owner_decl;
+    const decl = module.declPtr(decl_index);
+    self.freeUnnamedConsts(decl_index);
+    self.freeRelocationsForAtom(&decl.link.coff);
+
     var code_buffer = std.ArrayList(u8).init(self.base.allocator);
     defer code_buffer.deinit();
 
-    const decl_index = func.owner_decl;
-    const decl = module.declPtr(decl_index);
     const res = try codegen.generateFunction(
         &self.base,
         decl.srcLoc(),
@@ -1082,6 +1095,8 @@ pub fn updateDecl(self: *Coff, module: *Module, decl_index: Module.Decl.Index) !
         }
     }
 
+    self.freeRelocationsForAtom(&decl.link.coff);
+
     var code_buffer = std.ArrayList(u8).init(self.base.allocator);
     defer code_buffer.deinit();
 
@@ -1190,8 +1205,9 @@ fn updateDeclCode(self: *Coff, decl_index: Module.Decl.Index, code: []const u8,
         sym.value = vaddr;
 
         const got_target = SymbolWithLoc{ .sym_index = atom.sym_index, .file = null };
-        _ = try self.allocateGotEntry(got_target);
+        const got_index = try self.allocateGotEntry(got_target);
         const got_atom = try self.createGotAtom(got_target);
+        self.got_entries.items[got_index].sym_index = got_atom.sym_index;
         try self.writePtrWidthAtom(got_atom);
     }
 
@@ -1199,6 +1215,11 @@ fn updateDeclCode(self: *Coff, decl_index: Module.Decl.Index, code: []const u8,
     try self.writeAtom(atom, code);
 }
 
+fn freeRelocationsForAtom(self: *Coff, atom: *Atom) void {
+    _ = self.relocs.remove(atom);
+    _ = self.base_relocs.remove(atom);
+}
+
 fn freeUnnamedConsts(self: *Coff, decl_index: Module.Decl.Index) void {
     const gpa = self.base.allocator;
     const unnamed_consts = self.unnamed_const_atoms.getPtr(decl_index) orelse return;
@@ -1237,14 +1258,20 @@ pub fn freeDecl(self: *Coff, decl_index: Module.Decl.Index) void {
 
         // Try freeing GOT atom if this decl had one
         const got_target = SymbolWithLoc{ .sym_index = sym_index, .file = null };
-        if (self.got_entries.getIndex(got_target)) |got_index| {
+        if (self.got_entries_table.get(got_target)) |got_index| {
             self.got_entries_free_list.append(gpa, @intCast(u32, got_index)) catch {};
-            self.got_entries.values()[got_index] = 0;
+            self.got_entries.items[got_index] = .{
+                .target = .{ .sym_index = 0, .file = null },
+                .sym_index = 0,
+            };
+            _ = self.got_entries_table.remove(got_target);
+
             log.debug("  adding GOT index {d} to free list (target local@{d})", .{ got_index, sym_index });
         }
 
         self.locals.items[sym_index].section_number = .UNDEFINED;
         _ = self.atom_by_index_table.remove(sym_index);
+        log.debug("  adding local symbol index {d} to free list", .{sym_index});
         decl.link.coff.sym_index = 0;
     }
 }
@@ -1453,8 +1480,9 @@ pub fn flushModule(self: *Coff, comp: *Compilation, prog_node: *std.Progress.Nod
         const global = self.globals.items[entry.key];
         if (self.imports_table.contains(global)) continue;
 
-        _ = try self.allocateImportEntry(global);
-        const import_atom = try self.createImportAtom(global);
+        const import_index = try self.allocateImportEntry(global);
+        const import_atom = try self.createImportAtom();
+        self.imports.items[import_index].sym_index = import_atom.sym_index;
         try self.writePtrWidthAtom(import_atom);
     }
 
@@ -1668,8 +1696,8 @@ fn writeImportTable(self: *Coff) !void {
     defer names_table.deinit();
 
     // TODO: check if import is still valid
-    for (self.imports_table.keys()) |target| {
-        const target_name = self.getSymbolName(target);
+    for (self.imports.items) |entry| {
+        const target_name = self.getSymbolName(entry.target);
         const start = names_table.items.len;
         mem.writeIntLittle(u16, try names_table.addManyAsArray(2), 0); // TODO: currently, hint is set to 0 as we haven't yet parsed any DLL
         try names_table.appendSlice(target_name);
@@ -2013,19 +2041,19 @@ pub fn getEntryPoint(self: Coff) ?SymbolWithLoc {
     return self.globals.items[global_index];
 }
 
-/// Returns pointer-to-symbol described by `sym_with_loc` descriptor.
+/// Returns pointer-to-symbol described by `sym_loc` descriptor.
 pub fn getSymbolPtr(self: *Coff, sym_loc: SymbolWithLoc) *coff.Symbol {
     assert(sym_loc.file == null); // TODO linking object files
     return &self.locals.items[sym_loc.sym_index];
 }
 
-/// Returns symbol described by `sym_with_loc` descriptor.
+/// Returns symbol described by `sym_loc` descriptor.
 pub fn getSymbol(self: *const Coff, sym_loc: SymbolWithLoc) *const coff.Symbol {
     assert(sym_loc.file == null); // TODO linking object files
     return &self.locals.items[sym_loc.sym_index];
 }
 
-/// Returns name of the symbol described by `sym_with_loc` descriptor.
+/// Returns name of the symbol described by `sym_loc` descriptor.
 pub fn getSymbolName(self: *const Coff, sym_loc: SymbolWithLoc) []const u8 {
     assert(sym_loc.file == null); // TODO linking object files
     const sym = self.getSymbol(sym_loc);
@@ -2033,25 +2061,27 @@ pub fn getSymbolName(self: *const Coff, sym_loc: SymbolWithLoc) []const u8 {
     return self.strtab.get(offset).?;
 }
 
-/// Returns atom if there is an atom referenced by the symbol described by `sym_with_loc` descriptor.
+/// Returns atom if there is an atom referenced by the symbol described by `sym_loc` descriptor.
 /// Returns null on failure.
 pub fn getAtomForSymbol(self: *Coff, sym_loc: SymbolWithLoc) ?*Atom {
     assert(sym_loc.file == null); // TODO linking with object files
     return self.atom_by_index_table.get(sym_loc.sym_index);
 }
 
-/// Returns GOT atom that references `sym_with_loc` if one exists.
+/// Returns GOT atom that references `sym_loc` if one exists.
 /// Returns null otherwise.
 pub fn getGotAtomForSymbol(self: *Coff, sym_loc: SymbolWithLoc) ?*Atom {
-    const got_index = self.got_entries.get(sym_loc) orelse return null;
-    return self.atom_by_index_table.get(got_index);
+    const got_index = self.got_entries_table.get(sym_loc) orelse return null;
+    const got_entry = self.got_entries.items[got_index];
+    return self.getAtomForSymbol(.{ .sym_index = got_entry.sym_index, .file = null });
 }
 
-/// Returns import atom that references `sym_with_loc` if one exists.
+/// Returns import atom that references `sym_loc` if one exists.
 /// Returns null otherwise.
 pub fn getImportAtomForSymbol(self: *Coff, sym_loc: SymbolWithLoc) ?*Atom {
     const imports_index = self.imports_table.get(sym_loc) orelse return null;
-    return self.atom_by_index_table.get(imports_index);
+    const imports_entry = self.imports.items[imports_index];
+    return self.getAtomForSymbol(.{ .sym_index = imports_entry.sym_index, .file = null });
 }
 
 fn setSectionName(self: *Coff, header: *coff.SectionHeader, name: []const u8) !void {
@@ -2141,21 +2171,21 @@ fn logSymtab(self: *Coff) void {
     }
 
     log.debug("GOT entries:", .{});
-    for (self.got_entries.keys()) |target, i| {
-        const got_sym = self.getSymbol(.{ .sym_index = self.got_entries.values()[i], .file = null });
-        const target_sym = self.getSymbol(target);
+    for (self.got_entries.items) |entry, i| {
+        const got_sym = self.getSymbol(.{ .sym_index = entry.sym_index, .file = null });
+        const target_sym = self.getSymbol(entry.target);
         if (target_sym.section_number == .UNDEFINED) {
             log.debug("  {d}@{x} => import('{s}')", .{
                 i,
                 got_sym.value,
-                self.getSymbolName(target),
+                self.getSymbolName(entry.target),
             });
         } else {
             log.debug("  {d}@{x} => local(%{d}) in object({?d}) {s}", .{
                 i,
                 got_sym.value,
-                target.sym_index,
-                target.file,
+                entry.target.sym_index,
+                entry.target.file,
                 logSymAttributes(target_sym, &buf),
             });
         }

From 8ef1c62f2efc4a43e8b38d3cacf4fd930add7f46 Mon Sep 17 00:00:00 2001
From: Jakub Konka <kubkon@jakubkonka.com>
Date: Wed, 7 Sep 2022 15:29:21 +0200
Subject: [PATCH 44/68] macho: properly close file handles owned by the linker
 in deinit()

---
 src/link/MachO.zig | 11 ++---------
 1 file changed, 2 insertions(+), 9 deletions(-)

diff --git a/src/link/MachO.zig b/src/link/MachO.zig
index af25441066..bceaa55d8a 100644
--- a/src/link/MachO.zig
+++ b/src/link/MachO.zig
@@ -3013,6 +3013,7 @@ pub fn deinit(self: *MachO) void {
     }
 
     if (self.d_sym) |*d_sym| {
+        d_sym.file.close();
         d_sym.deinit(gpa);
     }
 
@@ -3041,6 +3042,7 @@ pub fn deinit(self: *MachO) void {
     self.objects.deinit(gpa);
 
     for (self.archives.items) |*archive| {
+        archive.file.close();
         archive.deinit(gpa);
     }
     self.archives.deinit(gpa);
@@ -3086,15 +3088,6 @@ pub fn deinit(self: *MachO) void {
     self.atom_by_index_table.deinit(gpa);
 }
 
-pub fn closeFiles(self: MachO) void {
-    for (self.archives.items) |archive| {
-        archive.file.close();
-    }
-    if (self.d_sym) |ds| {
-        ds.file.close();
-    }
-}
-
 fn freeAtom(self: *MachO, atom: *Atom, sect_id: u8, owns_atom: bool) void {
     log.debug("freeAtom {*}", .{atom});
     if (!owns_atom) {

From 639237c7b4655d7d4c38c24d9b36a145bbfb1e1c Mon Sep 17 00:00:00 2001
From: Jakub Konka <kubkon@jakubkonka.com>
Date: Wed, 7 Sep 2022 16:10:19 +0200
Subject: [PATCH 45/68] macho: set file instance in linkOneShot only if not
 already set

---
 src/link/MachO.zig | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/src/link/MachO.zig b/src/link/MachO.zig
index bceaa55d8a..e1392d8903 100644
--- a/src/link/MachO.zig
+++ b/src/link/MachO.zig
@@ -793,11 +793,13 @@ fn linkOneShot(self: *MachO, comp: *Compilation, prog_node: *std.Progress.Node)
         }
     } else {
         const sub_path = self.base.options.emit.?.sub_path;
-        self.base.file = try directory.handle.createFile(sub_path, .{
-            .truncate = true,
-            .read = true,
-            .mode = link.determineMode(self.base.options),
-        });
+        if (self.base.file == null) {
+            self.base.file = try directory.handle.createFile(sub_path, .{
+                .truncate = true,
+                .read = true,
+                .mode = link.determineMode(self.base.options),
+            });
+        }
         // Index 0 is always a null symbol.
         try self.locals.append(gpa, .{
             .n_strx = 0,

From 678e07b924a717e79f412b78895ead1136a722bc Mon Sep 17 00:00:00 2001
From: Jakub Konka <kubkon@jakubkonka.com>
Date: Wed, 7 Sep 2022 19:16:30 +0200
Subject: [PATCH 46/68] macho+wasm: unify and clean up closing file handles

---
 src/link/MachO.zig              | 3 ---
 src/link/MachO/Archive.zig      | 1 +
 src/link/MachO/DebugSymbols.zig | 1 +
 src/link/Wasm.zig               | 2 --
 src/link/Wasm/Archive.zig       | 1 +
 src/link/Wasm/Object.zig        | 3 +++
 6 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/src/link/MachO.zig b/src/link/MachO.zig
index e1392d8903..ef180ab032 100644
--- a/src/link/MachO.zig
+++ b/src/link/MachO.zig
@@ -1437,7 +1437,6 @@ fn parseArchive(self: *MachO, path: []const u8, force_load: bool) !bool {
 
     if (force_load) {
         defer archive.deinit(gpa);
-        defer file.close();
         // Get all offsets from the ToC
         var offsets = std.AutoArrayHashMap(u32, void).init(gpa);
         defer offsets.deinit();
@@ -3015,7 +3014,6 @@ pub fn deinit(self: *MachO) void {
     }
 
     if (self.d_sym) |*d_sym| {
-        d_sym.file.close();
         d_sym.deinit(gpa);
     }
 
@@ -3044,7 +3042,6 @@ pub fn deinit(self: *MachO) void {
     self.objects.deinit(gpa);
 
     for (self.archives.items) |*archive| {
-        archive.file.close();
         archive.deinit(gpa);
     }
     self.archives.deinit(gpa);
diff --git a/src/link/MachO/Archive.zig b/src/link/MachO/Archive.zig
index 054f75fff3..59a956534e 100644
--- a/src/link/MachO/Archive.zig
+++ b/src/link/MachO/Archive.zig
@@ -88,6 +88,7 @@ const ar_hdr = extern struct {
 };
 
 pub fn deinit(self: *Archive, allocator: Allocator) void {
+    self.file.close();
     for (self.toc.keys()) |*key| {
         allocator.free(key.*);
     }
diff --git a/src/link/MachO/DebugSymbols.zig b/src/link/MachO/DebugSymbols.zig
index a7dc6391c2..ffff0fe5f8 100644
--- a/src/link/MachO/DebugSymbols.zig
+++ b/src/link/MachO/DebugSymbols.zig
@@ -306,6 +306,7 @@ pub fn flushModule(self: *DebugSymbols, allocator: Allocator, options: link.Opti
 }
 
 pub fn deinit(self: *DebugSymbols, allocator: Allocator) void {
+    self.file.close();
     self.segments.deinit(allocator);
     self.sections.deinit(allocator);
     self.dwarf.deinit();
diff --git a/src/link/Wasm.zig b/src/link/Wasm.zig
index 050d9287a5..8c73336e9f 100644
--- a/src/link/Wasm.zig
+++ b/src/link/Wasm.zig
@@ -648,12 +648,10 @@ pub fn deinit(self: *Wasm) void {
         gpa.free(segment_info.name);
     }
     for (self.objects.items) |*object| {
-        object.file.?.close();
         object.deinit(gpa);
     }
 
     for (self.archives.items) |*archive| {
-        archive.file.close();
         archive.deinit(gpa);
     }
 
diff --git a/src/link/Wasm/Archive.zig b/src/link/Wasm/Archive.zig
index c80d26d17d..b1cce15b1d 100644
--- a/src/link/Wasm/Archive.zig
+++ b/src/link/Wasm/Archive.zig
@@ -95,6 +95,7 @@ const ar_hdr = extern struct {
 };
 
 pub fn deinit(archive: *Archive, allocator: Allocator) void {
+    archive.file.close();
     for (archive.toc.keys()) |*key| {
         allocator.free(key.*);
     }
diff --git a/src/link/Wasm/Object.zig b/src/link/Wasm/Object.zig
index 50827ca9fb..b182fdfcae 100644
--- a/src/link/Wasm/Object.zig
+++ b/src/link/Wasm/Object.zig
@@ -141,6 +141,9 @@ pub fn create(gpa: Allocator, file: std.fs.File, name: []const u8, maybe_max_siz
 /// Frees all memory of `Object` at once. The given `Allocator` must be
 /// the same allocator that was used when `init` was called.
 pub fn deinit(self: *Object, gpa: Allocator) void {
+    if (self.file) |file| {
+        file.close();
+    }
     for (self.func_types) |func_ty| {
         gpa.free(func_ty.params);
         gpa.free(func_ty.returns);

From a226aef36cc441be4af675e24b3e672fe3fe2d5a Mon Sep 17 00:00:00 2001
From: Jakub Konka <kubkon@jakubkonka.com>
Date: Wed, 7 Sep 2022 19:16:54 +0200
Subject: [PATCH 47/68] test-cases: enable stage2 tests on Windows

---
 ci/azure/pipelines.yml | 3 +--
 src/test.zig           | 9 +++++++++
 2 files changed, 10 insertions(+), 2 deletions(-)

diff --git a/ci/azure/pipelines.yml b/ci/azure/pipelines.yml
index 633c6389d0..86f92f5ef6 100644
--- a/ci/azure/pipelines.yml
+++ b/ci/azure/pipelines.yml
@@ -73,8 +73,7 @@ jobs:
       & "$ZIGINSTALLDIR\bin\zig.exe" build test docs `
         --search-prefix "$ZIGPREFIXPATH" `
         -Dstatic-llvm `
-        -Dskip-non-native `
-        -Dskip-stage2-tests
+        -Dskip-non-native
       CheckLastExitCode
     name: test
     displayName: 'Test'
diff --git a/src/test.zig b/src/test.zig
index babded13f9..dfe5f5c866 100644
--- a/src/test.zig
+++ b/src/test.zig
@@ -177,6 +177,8 @@ const TestManifestConfigDefaults = struct {
                 inline for (&[_][]const u8{ "x86_64", "aarch64" }) |arch| {
                     defaults = defaults ++ arch ++ "-macos" ++ ",";
                 }
+                // Windows
+                defaults = defaults ++ "x86_64-windows" ++ ",";
                 // Wasm
                 defaults = defaults ++ "wasm32-wasi";
                 return defaults;
@@ -1546,6 +1548,13 @@ pub const TestContext = struct {
             .self_exe_path = std.testing.zig_exe_path,
             // TODO instead of turning off color, pass in a std.Progress.Node
             .color = .off,
+            // TODO: We set these to no so that we don't fallback to LLD for incremental linking context. This is because
+            // our own COFF linker doesn't yet support these options.
+            .want_unwind_tables = switch (case.backend) {
+                .stage2 => if (target.os.tag == .windows) false else null,
+                else => null,
+            },
+            .emit_implib = null,
         });
         defer comp.destroy();
 

From c4d297b1af832739a487fa511d16b902dd4fca99 Mon Sep 17 00:00:00 2001
From: Jakub Konka <kubkon@jakubkonka.com>
Date: Wed, 7 Sep 2022 20:37:37 +0200
Subject: [PATCH 48/68] test-cases: add missing incremental x86_64-windows test
 cases

---
 .../hello_world_with_updates.0.zig               |  6 ++++++
 .../hello_world_with_updates.1.zig               |  6 ++++++
 .../hello_world_with_updates.2.zig               | 16 ++++++++++++++++
 3 files changed, 28 insertions(+)
 create mode 100644 test/cases/x86_64-windows/hello_world_with_updates.0.zig
 create mode 100644 test/cases/x86_64-windows/hello_world_with_updates.1.zig
 create mode 100644 test/cases/x86_64-windows/hello_world_with_updates.2.zig

diff --git a/test/cases/x86_64-windows/hello_world_with_updates.0.zig b/test/cases/x86_64-windows/hello_world_with_updates.0.zig
new file mode 100644
index 0000000000..142699b9da
--- /dev/null
+++ b/test/cases/x86_64-windows/hello_world_with_updates.0.zig
@@ -0,0 +1,6 @@
+// error
+// output_mode=Exe
+// target=x86_64-windows
+//
+// :130:9: error: struct 'tmp.tmp' has no member named 'main'
+// :7:1: note: struct declared here
diff --git a/test/cases/x86_64-windows/hello_world_with_updates.1.zig b/test/cases/x86_64-windows/hello_world_with_updates.1.zig
new file mode 100644
index 0000000000..e18a4c6a1e
--- /dev/null
+++ b/test/cases/x86_64-windows/hello_world_with_updates.1.zig
@@ -0,0 +1,6 @@
+pub export fn main() noreturn {}
+
+// error
+//
+// :1:32: error: function declared 'noreturn' returns
+// :1:22: note: 'noreturn' declared here
diff --git a/test/cases/x86_64-windows/hello_world_with_updates.2.zig b/test/cases/x86_64-windows/hello_world_with_updates.2.zig
new file mode 100644
index 0000000000..6c2fd5b24e
--- /dev/null
+++ b/test/cases/x86_64-windows/hello_world_with_updates.2.zig
@@ -0,0 +1,16 @@
+const std = @import("std");
+
+pub fn main() void {
+    print();
+}
+
+fn print() void {
+    const msg = "Hello, World!\n";
+    const stdout = std.io.getStdOut();
+    stdout.writeAll(msg) catch unreachable;
+}
+
+// run
+//
+// Hello, World!
+//

From 0e152b76ac0da0f8132091202eba9f6cebd0e616 Mon Sep 17 00:00:00 2001
From: Jakub Konka <kubkon@jakubkonka.com>
Date: Wed, 7 Sep 2022 23:16:42 +0200
Subject: [PATCH 49/68] tests: force LLD off for stage2 backends until
 auto-select deems worthy

---
 src/test.zig   | 9 ++++-----
 test/tests.zig | 9 ++-------
 2 files changed, 6 insertions(+), 12 deletions(-)

diff --git a/src/test.zig b/src/test.zig
index dfe5f5c866..e824b59066 100644
--- a/src/test.zig
+++ b/src/test.zig
@@ -1548,13 +1548,12 @@ pub const TestContext = struct {
             .self_exe_path = std.testing.zig_exe_path,
             // TODO instead of turning off color, pass in a std.Progress.Node
             .color = .off,
-            // TODO: We set these to no so that we don't fallback to LLD for incremental linking context. This is because
-            // our own COFF linker doesn't yet support these options.
-            .want_unwind_tables = switch (case.backend) {
-                .stage2 => if (target.os.tag == .windows) false else null,
+            // TODO: force self-hosted linkers with stage2 backend to avoid LLD creeping in
+            //       until the auto-select mechanism deems them worthy
+            .use_lld = switch (case.backend) {
+                .stage2 => false,
                 else => null,
             },
-            .emit_implib = null,
         });
         defer comp.destroy();
 
diff --git a/test/tests.zig b/test/tests.zig
index f46a3acbb5..53e58156a4 100644
--- a/test/tests.zig
+++ b/test/tests.zig
@@ -701,13 +701,8 @@ pub fn addPkgTests(
             else => {
                 these_tests.use_stage1 = false;
                 these_tests.use_llvm = false;
-
-                if (test_target.target.getOsTag() == .windows) {
-                    // TODO: We set these to no so that we don't fallback to LLD for incremental linking context. This is because
-                    // our own COFF linker doesn't yet support these options.
-                    these_tests.emit_implib = .no_emit;
-                    these_tests.use_unwind_tables = false;
-                }
+                // TODO: force self-hosted linkers to avoid LLD creeping in until the auto-select mechanism deems them worthy
+                these_tests.use_lld = false;
             },
         };
 

From b98b3252beea98522c25b88eb29b5e2d8a65adfe Mon Sep 17 00:00:00 2001
From: Jakub Konka <kubkon@jakubkonka.com>
Date: Wed, 7 Sep 2022 23:18:57 +0200
Subject: [PATCH 50/68] revert part of 945111ae92c28f8ad642a64af4dc83acd3bef4a5

---
 lib/std/build.zig | 9 ---------
 1 file changed, 9 deletions(-)

diff --git a/lib/std/build.zig b/lib/std/build.zig
index 76744b12d2..4c05586159 100644
--- a/lib/std/build.zig
+++ b/lib/std/build.zig
@@ -1623,7 +1623,6 @@ pub const LibExeObjStep = struct {
     /// Overrides the default stack size
     stack_size: ?u64 = null,
 
-    use_unwind_tables: ?bool = null,
     want_lto: ?bool = null,
     use_stage1: ?bool = null,
     use_llvm: ?bool = null,
@@ -2506,14 +2505,6 @@ pub const LibExeObjStep = struct {
             }
         }
 
-        if (self.use_unwind_tables) |use_unwind_tables| {
-            if (use_unwind_tables) {
-                try zig_args.append("-funwind-tables");
-            } else {
-                try zig_args.append("-fno-unwind-tables");
-            }
-        }
-
         if (self.ofmt) |ofmt| {
             try zig_args.append(try std.fmt.allocPrint(builder.allocator, "-ofmt={s}", .{@tagName(ofmt)}));
         }

From 0fa80e66b7e995e560ac1c3da7069c603e9a0538 Mon Sep 17 00:00:00 2001
From: Veikka Tuominen <git@vexu.eu>
Date: Wed, 7 Sep 2022 19:11:26 +0300
Subject: [PATCH 51/68] Sema: correct types in `@memset` and `@memcpy`

Closes #12750
---
 src/Sema.zig                                  | 50 ++++++++-----------
 .../incorrect_type_to_memset_memcpy.zig       | 19 +++++++
 2 files changed, 40 insertions(+), 29 deletions(-)
 create mode 100644 test/cases/compile_errors/incorrect_type_to_memset_memcpy.zig

diff --git a/src/Sema.zig b/src/Sema.zig
index fb1638bc2a..417455ae5a 100644
--- a/src/Sema.zig
+++ b/src/Sema.zig
@@ -18076,8 +18076,8 @@ fn bitOffsetOf(sema: *Sema, block: *Block, inst: Zir.Inst.Index) CompileError!u6
     const target = sema.mod.getTarget();
 
     try sema.resolveTypeLayout(block, lhs_src, ty);
-    switch (ty.tag()) {
-        .@"struct", .tuple, .anon_struct => {},
+    switch (ty.zigTypeTag()) {
+        .Struct => {},
         else => {
             const msg = msg: {
                 const msg = try sema.errMsg(block, lhs_src, "expected struct type, found '{}'", .{ty.fmt(sema.mod)});
@@ -19617,28 +19617,19 @@ fn zirMemcpy(sema: *Sema, block: *Block, inst: Zir.Inst.Index) CompileError!void
     const dest_src: LazySrcLoc = .{ .node_offset_builtin_call_arg0 = inst_data.src_node };
     const src_src: LazySrcLoc = .{ .node_offset_builtin_call_arg1 = inst_data.src_node };
     const len_src: LazySrcLoc = .{ .node_offset_builtin_call_arg2 = inst_data.src_node };
-    const dest_ptr = try sema.resolveInst(extra.dest);
-    const dest_ptr_ty = sema.typeOf(dest_ptr);
+    const uncasted_dest_ptr = try sema.resolveInst(extra.dest);
 
-    try sema.checkPtrOperand(block, dest_src, dest_ptr_ty);
-    if (dest_ptr_ty.isConstPtr()) {
-        return sema.fail(block, dest_src, "cannot store through const pointer '{}'", .{dest_ptr_ty.fmt(sema.mod)});
-    }
+    // TODO AstGen's coerced_ty cannot handle volatile here
+    var dest_ptr_info = Type.initTag(.manyptr_u8).ptrInfo().data;
+    dest_ptr_info.@"volatile" = sema.typeOf(uncasted_dest_ptr).isVolatilePtr();
+    const dest_ptr_ty = try Type.ptr(sema.arena, sema.mod, dest_ptr_info);
+    const dest_ptr = try sema.coerce(block, dest_ptr_ty, uncasted_dest_ptr, dest_src);
 
     const uncasted_src_ptr = try sema.resolveInst(extra.source);
-    const uncasted_src_ptr_ty = sema.typeOf(uncasted_src_ptr);
-    try sema.checkPtrOperand(block, src_src, uncasted_src_ptr_ty);
-    const src_ptr_info = uncasted_src_ptr_ty.ptrInfo().data;
-    const wanted_src_ptr_ty = try Type.ptr(sema.arena, sema.mod, .{
-        .pointee_type = dest_ptr_ty.elemType2(),
-        .@"align" = src_ptr_info.@"align",
-        .@"addrspace" = src_ptr_info.@"addrspace",
-        .mutable = false,
-        .@"allowzero" = src_ptr_info.@"allowzero",
-        .@"volatile" = src_ptr_info.@"volatile",
-        .size = .Many,
-    });
-    const src_ptr = try sema.coerce(block, wanted_src_ptr_ty, uncasted_src_ptr, src_src);
+    var src_ptr_info = Type.initTag(.manyptr_const_u8).ptrInfo().data;
+    src_ptr_info.@"volatile" = sema.typeOf(uncasted_src_ptr).isVolatilePtr();
+    const src_ptr_ty = try Type.ptr(sema.arena, sema.mod, src_ptr_info);
+    const src_ptr = try sema.coerce(block, src_ptr_ty, uncasted_src_ptr, src_src);
     const len = try sema.coerce(block, Type.usize, try sema.resolveInst(extra.byte_count), len_src);
 
     const runtime_src = if (try sema.resolveDefinedValue(block, dest_src, dest_ptr)) |dest_ptr_val| rs: {
@@ -19674,14 +19665,15 @@ fn zirMemset(sema: *Sema, block: *Block, inst: Zir.Inst.Index) CompileError!void
     const dest_src: LazySrcLoc = .{ .node_offset_builtin_call_arg0 = inst_data.src_node };
     const value_src: LazySrcLoc = .{ .node_offset_builtin_call_arg1 = inst_data.src_node };
     const len_src: LazySrcLoc = .{ .node_offset_builtin_call_arg2 = inst_data.src_node };
-    const dest_ptr = try sema.resolveInst(extra.dest);
-    const dest_ptr_ty = sema.typeOf(dest_ptr);
-    try sema.checkPtrOperand(block, dest_src, dest_ptr_ty);
-    if (dest_ptr_ty.isConstPtr()) {
-        return sema.fail(block, dest_src, "cannot store through const pointer '{}'", .{dest_ptr_ty.fmt(sema.mod)});
-    }
-    const elem_ty = dest_ptr_ty.elemType2();
-    const value = try sema.coerce(block, elem_ty, try sema.resolveInst(extra.byte), value_src);
+    const uncasted_dest_ptr = try sema.resolveInst(extra.dest);
+
+    // TODO AstGen's coerced_ty cannot handle volatile here
+    var ptr_info = Type.initTag(.manyptr_u8).ptrInfo().data;
+    ptr_info.@"volatile" = sema.typeOf(uncasted_dest_ptr).isVolatilePtr();
+    const dest_ptr_ty = try Type.ptr(sema.arena, sema.mod, ptr_info);
+    const dest_ptr = try sema.coerce(block, dest_ptr_ty, uncasted_dest_ptr, dest_src);
+
+    const value = try sema.coerce(block, Type.u8, try sema.resolveInst(extra.byte), value_src);
     const len = try sema.coerce(block, Type.usize, try sema.resolveInst(extra.byte_count), len_src);
 
     const runtime_src = if (try sema.resolveDefinedValue(block, dest_src, dest_ptr)) |ptr_val| rs: {
diff --git a/test/cases/compile_errors/incorrect_type_to_memset_memcpy.zig b/test/cases/compile_errors/incorrect_type_to_memset_memcpy.zig
new file mode 100644
index 0000000000..d3a6b7cc4b
--- /dev/null
+++ b/test/cases/compile_errors/incorrect_type_to_memset_memcpy.zig
@@ -0,0 +1,19 @@
+pub export fn entry() void {
+    var buf: [5]u8 = .{ 1, 2, 3, 4, 5 };
+    var slice: []u8 = &buf;
+    const a: u32 = 1234;
+    @memcpy(slice, @ptrCast([*]const u8, &a), 4);
+}
+pub export fn entry1() void {
+    var buf: [5]u8 = .{ 1, 2, 3, 4, 5 };
+    var ptr: *u8 = &buf[0];
+    @memcpy(ptr, 0, 4);
+}
+
+// error
+// backend=stage2
+// target=native
+//
+// :5:13: error: expected type '[*]u8', found '[]u8'
+// :10:13: error: expected type '[*]u8', found '*u8'
+// :10:13: note: a single pointer cannot cast into a many pointer

From 37afab2addab5809e1419a09e3be5ea4f3ee5501 Mon Sep 17 00:00:00 2001
From: Veikka Tuominen <git@vexu.eu>
Date: Wed, 7 Sep 2022 19:21:12 +0300
Subject: [PATCH 52/68] Sema: preserve alignment of const decl pointers

Closes #12769
---
 src/Sema.zig               |  1 +
 test/behavior/pointers.zig | 11 +++++++++++
 2 files changed, 12 insertions(+)

diff --git a/src/Sema.zig b/src/Sema.zig
index 417455ae5a..8d25a7c93e 100644
--- a/src/Sema.zig
+++ b/src/Sema.zig
@@ -26005,6 +26005,7 @@ fn analyzeDeclRef(sema: *Sema, decl_index: Decl.Index) CompileError!Air.Inst.Ref
             .pointee_type = decl_tv.ty,
             .mutable = false,
             .@"addrspace" = decl.@"addrspace",
+            .@"align" = decl.@"align",
         }),
         try Value.Tag.decl_ref.create(sema.arena, decl_index),
     );
diff --git a/test/behavior/pointers.zig b/test/behavior/pointers.zig
index adbc308742..dcdea1ff80 100644
--- a/test/behavior/pointers.zig
+++ b/test/behavior/pointers.zig
@@ -486,3 +486,14 @@ test "array slicing to slice" {
     try S.doTheTest();
     comptime try S.doTheTest();
 }
+
+test "pointer to constant decl preserves alignment" {
+    const S = struct {
+        a: u8,
+        b: u8,
+        const aligned align(8) = @This(){ .a = 3, .b = 4 };
+    };
+
+    const alignment = @typeInfo(@TypeOf(&S.aligned)).Pointer.alignment;
+    try std.testing.expect(alignment == 8);
+}

From 99826a2ba89ccd80caaa4eeb47c59a71ddfe76b6 Mon Sep 17 00:00:00 2001
From: Veikka Tuominen <git@vexu.eu>
Date: Wed, 7 Sep 2022 22:05:01 +0300
Subject: [PATCH 53/68] Sema: fix UAF in zirClosureGet

Previously if a decl failed its capture scope would be deallocated and
set to undefined which would then lead to invalid dereference in
`zirClosureGet`. To avoid this set the capture scope to a special
failed state and fail the current decl with dependency failure if
the failed state is encountered in `zirClosureGet`.

Closes #12433
Closes #12530
Closes #12593
---
 src/Module.zig                                | 10 +++++++
 src/Sema.zig                                  | 11 +++++++-
 .../closure_get_depends_on_failed_decl.zig    | 26 +++++++++++++++++++
 ...et_in_param_ty_instantiate_incorrectly.zig | 24 +++++++++++++++++
 4 files changed, 70 insertions(+), 1 deletion(-)
 create mode 100644 test/cases/compile_errors/closure_get_depends_on_failed_decl.zig
 create mode 100644 test/cases/compile_errors/closure_get_in_param_ty_instantiate_incorrectly.zig

diff --git a/src/Module.zig b/src/Module.zig
index 3ae6c48edd..ea89225537 100644
--- a/src/Module.zig
+++ b/src/Module.zig
@@ -345,6 +345,15 @@ pub const CaptureScope = struct {
     /// During sema, this map is backed by the gpa.  Once sema completes,
     /// it is reallocated using the value_arena.
     captures: std.AutoHashMapUnmanaged(Zir.Inst.Index, TypedValue) = .{},
+
+    pub fn failed(noalias self: *const @This()) bool {
+        return self.captures.available == 0 and self.captures.size == std.math.maxInt(u32);
+    }
+
+    pub fn fail(noalias self: *@This()) void {
+        self.captures.available = 0;
+        self.captures.size = std.math.maxInt(u32);
+    }
 };
 
 pub const WipCaptureScope = struct {
@@ -383,6 +392,7 @@ pub const WipCaptureScope = struct {
     pub fn deinit(noalias self: *@This()) void {
         if (!self.finalized) {
             self.scope.captures.deinit(self.gpa);
+            self.scope.fail();
         }
         self.* = undefined;
     }
diff --git a/src/Sema.zig b/src/Sema.zig
index 8d25a7c93e..15e891ef87 100644
--- a/src/Sema.zig
+++ b/src/Sema.zig
@@ -5956,7 +5956,6 @@ fn analyzeCall(
                 error.NeededSourceLocation => {
                     _ = sema.inst_map.remove(inst);
                     const decl = sema.mod.declPtr(block.src_decl);
-                    child_block.src_decl = block.src_decl;
                     try sema.analyzeInlineCallArg(
                         block,
                         &child_block,
@@ -13740,6 +13739,16 @@ fn zirClosureGet(
     const tv = while (true) {
         // Note: We don't need to add a dependency here, because
         // decls always depend on their lexical parents.
+
+        // Fail this decl if a scope it depended on failed.
+        if (scope.failed()) {
+            if (sema.owner_func) |owner_func| {
+                owner_func.state = .dependency_failure;
+            } else {
+                sema.owner_decl.analysis = .dependency_failure;
+            }
+            return error.AnalysisFail;
+        }
         if (scope.captures.getPtr(inst_data.inst)) |tv| {
             break tv;
         }
diff --git a/test/cases/compile_errors/closure_get_depends_on_failed_decl.zig b/test/cases/compile_errors/closure_get_depends_on_failed_decl.zig
new file mode 100644
index 0000000000..ccdbf67713
--- /dev/null
+++ b/test/cases/compile_errors/closure_get_depends_on_failed_decl.zig
@@ -0,0 +1,26 @@
+pub inline fn instanceRequestAdapter() void {}
+
+pub inline fn requestAdapter(
+    comptime callbackArg: fn () callconv(.Inline) void,
+) void {
+    _ = (struct {
+        pub fn callback() callconv(.C) void {
+            callbackArg();
+        }
+    }).callback;
+    instanceRequestAdapter(undefined); // note wrong number of arguments here
+}
+
+inline fn foo() void {}
+
+pub export fn entry() void {
+    requestAdapter(foo);
+}
+
+// error
+// backend=stage2
+// target=native
+//
+// :11:5: error: expected 0 argument(s), found 1
+// :1:12: note: function declared here
+// :17:19: note: called from here
diff --git a/test/cases/compile_errors/closure_get_in_param_ty_instantiate_incorrectly.zig b/test/cases/compile_errors/closure_get_in_param_ty_instantiate_incorrectly.zig
new file mode 100644
index 0000000000..dc533442fb
--- /dev/null
+++ b/test/cases/compile_errors/closure_get_in_param_ty_instantiate_incorrectly.zig
@@ -0,0 +1,24 @@
+fn Observable(comptime T: type) type {
+    return struct {
+        fn map(Src: T, Dst: anytype, function: fn (T) Dst) Dst {
+            _ = Src;
+            _ = function;
+            return Observable(Dst);
+        }
+    };
+}
+
+fn u32Tou64(x: u32) u64 {
+    _ = x;
+    return 0;
+}
+
+pub export fn entry() void {
+    Observable(u32).map(u32, u64, u32Tou64(0));
+}
+
+// error
+// backend=stage2
+// target=native
+//
+// :17:25: error: expected type 'u32', found 'type'

From 44b9a1d031cb72cd64909b3a7bd0c65b2e16815b Mon Sep 17 00:00:00 2001
From: Veikka Tuominen <git@vexu.eu>
Date: Thu, 8 Sep 2022 01:00:19 +0300
Subject: [PATCH 54/68] translate-c: use correct name for failDecl

Closes #12737
---
 src/translate_c.zig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/translate_c.zig b/src/translate_c.zig
index e62f5b4568..014f6b1934 100644
--- a/src/translate_c.zig
+++ b/src/translate_c.zig
@@ -1167,7 +1167,7 @@ fn transRecordDecl(c: *Context, scope: *Scope, record_decl: *const clang.RecordD
         }
 
         if (!c.zig_is_stage1 and is_packed) {
-            return failDecl(c, record_loc, bare_name, "cannot translate packed record union", .{});
+            return failDecl(c, record_loc, name, "cannot translate packed record union", .{});
         }
 
         const record_payload = try c.arena.create(ast.Payload.Record);

From a7661f115dccf26b141557c923171f325cdc2757 Mon Sep 17 00:00:00 2001
From: Martin Stuurwold <metheflea@gmail.com>
Date: Tue, 6 Sep 2022 20:48:31 +0200
Subject: [PATCH 55/68] NativeTargetInfo.zig: fix typo

---
 lib/std/zig/system/NativeTargetInfo.zig | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/lib/std/zig/system/NativeTargetInfo.zig b/lib/std/zig/system/NativeTargetInfo.zig
index 824a1a26b6..3aeab0fdba 100644
--- a/lib/std/zig/system/NativeTargetInfo.zig
+++ b/lib/std/zig/system/NativeTargetInfo.zig
@@ -390,7 +390,7 @@ fn detectAbiAndDynamicLinker(
                 error.FileTooBig,
                 error.Unexpected,
                 => |e| {
-                    std.log.warn("Encoutered error: {s}, falling back to default ABI and dynamic linker.\n", .{@errorName(e)});
+                    std.log.warn("Encountered error: {s}, falling back to default ABI and dynamic linker.\n", .{@errorName(e)});
                     return defaultAbiAndDynamicLinker(cpu, os, cross_target);
                 },
 
@@ -447,7 +447,7 @@ fn detectAbiAndDynamicLinker(
         error.NameTooLong,
         // Finally, we fall back on the standard path.
         => |e| {
-            std.log.warn("Encoutered error: {s}, falling back to default ABI and dynamic linker.\n", .{@errorName(e)});
+            std.log.warn("Encountered error: {s}, falling back to default ABI and dynamic linker.\n", .{@errorName(e)});
             return defaultAbiAndDynamicLinker(cpu, os, cross_target);
         },
     };

From c7e45aebafef0372fe231816eeffd18198240f14 Mon Sep 17 00:00:00 2001
From: Veikka Tuominen <git@vexu.eu>
Date: Thu, 8 Sep 2022 12:51:12 +0300
Subject: [PATCH 56/68] llvm: handle pointers in packed structs in more places

Closes #12776
---
 src/codegen/llvm.zig         | 11 +++++++++-
 test/behavior.zig            |  1 +
 test/behavior/bugs/12776.zig | 42 ++++++++++++++++++++++++++++++++++++
 3 files changed, 53 insertions(+), 1 deletion(-)
 create mode 100644 test/behavior/bugs/12776.zig

diff --git a/src/codegen/llvm.zig b/src/codegen/llvm.zig
index 80ffd7a665..043f0bbdc7 100644
--- a/src/codegen/llvm.zig
+++ b/src/codegen/llvm.zig
@@ -9204,6 +9204,12 @@ pub const FuncGen = struct {
             return self.builder.buildBitCast(truncated_int, elem_llvm_ty, "");
         }
 
+        if (info.pointee_type.isPtrAtRuntime()) {
+            const same_size_int = self.context.intType(elem_bits);
+            const truncated_int = self.builder.buildTrunc(shifted_value, same_size_int, "");
+            return self.builder.buildIntToPtr(truncated_int, elem_llvm_ty, "");
+        }
+
         return self.builder.buildTrunc(shifted_value, elem_llvm_ty, "");
     }
 
@@ -9235,7 +9241,10 @@ pub const FuncGen = struct {
             // Convert to equally-sized integer type in order to perform the bit
             // operations on the value to store
             const value_bits_type = self.context.intType(elem_bits);
-            const value_bits = self.builder.buildBitCast(elem, value_bits_type, "");
+            const value_bits = if (elem_ty.isPtrAtRuntime())
+                self.builder.buildPtrToInt(elem, value_bits_type, "")
+            else
+                self.builder.buildBitCast(elem, value_bits_type, "");
 
             var mask_val = value_bits_type.constAllOnes();
             mask_val = mask_val.constZExt(containing_int_ty);
diff --git a/test/behavior.zig b/test/behavior.zig
index 4b55913af5..db107bcbb1 100644
--- a/test/behavior.zig
+++ b/test/behavior.zig
@@ -86,6 +86,7 @@ test {
     _ = @import("behavior/bugs/12430.zig");
     _ = @import("behavior/bugs/12486.zig");
     _ = @import("behavior/bugs/12680.zig");
+    _ = @import("behavior/bugs/12776.zig");
     _ = @import("behavior/byteswap.zig");
     _ = @import("behavior/byval_arg_var.zig");
     _ = @import("behavior/call.zig");
diff --git a/test/behavior/bugs/12776.zig b/test/behavior/bugs/12776.zig
new file mode 100644
index 0000000000..e8fe106ac7
--- /dev/null
+++ b/test/behavior/bugs/12776.zig
@@ -0,0 +1,42 @@
+const std = @import("std");
+const builtin = @import("builtin");
+
+const RAM = struct {
+    data: [0xFFFF + 1]u8,
+    fn new() !RAM {
+        return RAM{ .data = [_]u8{0} ** 0x10000 };
+    }
+    fn get(self: *RAM, addr: u16) u8 {
+        return self.data[addr];
+    }
+};
+
+const CPU = packed struct {
+    interrupts: bool,
+    ram: *RAM,
+    fn new(ram: *RAM) !CPU {
+        return CPU{
+            .ram = ram,
+            .interrupts = false,
+        };
+    }
+    fn tick(self: *CPU) !void {
+        var queued_interrupts = self.ram.get(0xFFFF) & self.ram.get(0xFF0F);
+        if (self.interrupts and queued_interrupts != 0) {
+            self.interrupts = false;
+        }
+    }
+};
+
+test {
+    if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest;
+    if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest;
+    if (builtin.zig_backend == .stage2_c) return error.SkipZigTest;
+    if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest;
+    if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest;
+
+    var ram = try RAM.new();
+    var cpu = try CPU.new(&ram);
+    try cpu.tick();
+    try std.testing.expect(cpu.interrupts == false);
+}

From 0ae2ea671b867e5ecd0bc779405c175f33316559 Mon Sep 17 00:00:00 2001
From: Jakub Konka <kubkon@jakubkonka.com>
Date: Thu, 8 Sep 2022 14:29:51 +0200
Subject: [PATCH 57/68] wasm: temporarily save curr file pointer before
 pwriting on Win

This is a temporary workaround to an unclear platform-dependence
behavior we have in libstd for `std.fs.File` abstraction. See
https://github.com/ziglang/zig/issues/12783 for more information.
---
 lib/std/fs/file.zig | 16 ++++++++++++++++
 src/link/Wasm.zig   | 16 ++++++++++++++--
 test/link.zig       | 22 +++++++++++-----------
 3 files changed, 41 insertions(+), 13 deletions(-)

diff --git a/lib/std/fs/file.zig b/lib/std/fs/file.zig
index 5de746150b..3792e1c1f2 100644
--- a/lib/std/fs/file.zig
+++ b/lib/std/fs/file.zig
@@ -990,6 +990,8 @@ pub const File = struct {
         return index;
     }
 
+    /// On Windows, this function currently does alter the file pointer.
+    /// https://github.com/ziglang/zig/issues/12783
     pub fn pread(self: File, buffer: []u8, offset: u64) PReadError!usize {
         if (is_windows) {
             return windows.ReadFile(self.handle, buffer, offset, self.intended_io_mode);
@@ -1004,6 +1006,8 @@ pub const File = struct {
 
     /// Returns the number of bytes read. If the number read is smaller than `buffer.len`, it
     /// means the file reached the end. Reaching the end of a file is not an error condition.
+    /// On Windows, this function currently does alter the file pointer.
+    /// https://github.com/ziglang/zig/issues/12783
     pub fn preadAll(self: File, buffer: []u8, offset: u64) PReadError!usize {
         var index: usize = 0;
         while (index != buffer.len) {
@@ -1058,6 +1062,8 @@ pub const File = struct {
     }
 
     /// See https://github.com/ziglang/zig/issues/7699
+    /// On Windows, this function currently does alter the file pointer.
+    /// https://github.com/ziglang/zig/issues/12783
     pub fn preadv(self: File, iovecs: []const os.iovec, offset: u64) PReadError!usize {
         if (is_windows) {
             // TODO improve this to use ReadFileScatter
@@ -1079,6 +1085,8 @@ pub const File = struct {
     /// The `iovecs` parameter is mutable because this function needs to mutate the fields in
     /// order to handle partial reads from the underlying OS layer.
     /// See https://github.com/ziglang/zig/issues/7699
+    /// On Windows, this function currently does alter the file pointer.
+    /// https://github.com/ziglang/zig/issues/12783
     pub fn preadvAll(self: File, iovecs: []os.iovec, offset: u64) PReadError!usize {
         if (iovecs.len == 0) return 0;
 
@@ -1122,6 +1130,8 @@ pub const File = struct {
         }
     }
 
+    /// On Windows, this function currently does alter the file pointer.
+    /// https://github.com/ziglang/zig/issues/12783
     pub fn pwrite(self: File, bytes: []const u8, offset: u64) PWriteError!usize {
         if (is_windows) {
             return windows.WriteFile(self.handle, bytes, offset, self.intended_io_mode);
@@ -1134,6 +1144,8 @@ pub const File = struct {
         }
     }
 
+    /// On Windows, this function currently does alter the file pointer.
+    /// https://github.com/ziglang/zig/issues/12783
     pub fn pwriteAll(self: File, bytes: []const u8, offset: u64) PWriteError!void {
         var index: usize = 0;
         while (index < bytes.len) {
@@ -1179,6 +1191,8 @@ pub const File = struct {
     }
 
     /// See https://github.com/ziglang/zig/issues/7699
+    /// On Windows, this function currently does alter the file pointer.
+    /// https://github.com/ziglang/zig/issues/12783
     pub fn pwritev(self: File, iovecs: []os.iovec_const, offset: u64) PWriteError!usize {
         if (is_windows) {
             // TODO improve this to use WriteFileScatter
@@ -1197,6 +1211,8 @@ pub const File = struct {
     /// The `iovecs` parameter is mutable because this function needs to mutate the fields in
     /// order to handle partial writes from the underlying OS layer.
     /// See https://github.com/ziglang/zig/issues/7699
+    /// On Windows, this function currently does alter the file pointer.
+    /// https://github.com/ziglang/zig/issues/12783
     pub fn pwritevAll(self: File, iovecs: []os.iovec_const, offset: u64) PWriteError!void {
         if (iovecs.len == 0) return;
 
diff --git a/src/link/Wasm.zig b/src/link/Wasm.zig
index 8c73336e9f..f0f50049b8 100644
--- a/src/link/Wasm.zig
+++ b/src/link/Wasm.zig
@@ -3055,14 +3055,26 @@ fn writeVecSectionHeader(file: fs.File, offset: u64, section: wasm.Section, size
     buf[0] = @enumToInt(section);
     leb.writeUnsignedFixed(5, buf[1..6], size);
     leb.writeUnsignedFixed(5, buf[6..], items);
-    try file.pwriteAll(&buf, offset);
+
+    if (builtin.target.os.tag == .windows) {
+        // https://github.com/ziglang/zig/issues/12783
+        const curr_pos = try file.getPos();
+        try file.pwriteAll(&buf, offset);
+        try file.seekTo(curr_pos);
+    } else try file.pwriteAll(&buf, offset);
 }
 
 fn writeCustomSectionHeader(file: fs.File, offset: u64, size: u32) !void {
     var buf: [1 + 5]u8 = undefined;
     buf[0] = 0; // 0 = 'custom' section
     leb.writeUnsignedFixed(5, buf[1..6], size);
-    try file.pwriteAll(&buf, offset);
+
+    if (builtin.target.os.tag == .windows) {
+        // https://github.com/ziglang/zig/issues/12783
+        const curr_pos = try file.getPos();
+        try file.pwriteAll(&buf, offset);
+        try file.seekTo(curr_pos);
+    } else try file.pwriteAll(&buf, offset);
 }
 
 fn emitLinkSection(self: *Wasm, file: fs.File, arena: Allocator, symbol_table: *std.AutoArrayHashMap(SymbolLoc, u32)) !void {
diff --git a/test/link.zig b/test/link.zig
index b68353122c..d1dcbbc292 100644
--- a/test/link.zig
+++ b/test/link.zig
@@ -28,11 +28,22 @@ pub fn addCases(cases: *tests.StandaloneContext) void {
 }
 
 fn addWasmCases(cases: *tests.StandaloneContext) void {
+    cases.addBuildFile("test/link/wasm/archive/build.zig", .{
+        .build_modes = true,
+        .requires_stage2 = true,
+    });
+
     cases.addBuildFile("test/link/wasm/bss/build.zig", .{
         .build_modes = true,
         .requires_stage2 = true,
     });
 
+    cases.addBuildFile("test/link/wasm/extern/build.zig", .{
+        .build_modes = true,
+        .requires_stage2 = true,
+        .use_emulation = true,
+    });
+
     cases.addBuildFile("test/link/wasm/segments/build.zig", .{
         .build_modes = true,
         .requires_stage2 = true,
@@ -47,17 +58,6 @@ fn addWasmCases(cases: *tests.StandaloneContext) void {
         .build_modes = true,
         .requires_stage2 = true,
     });
-
-    cases.addBuildFile("test/link/wasm/archive/build.zig", .{
-        .build_modes = true,
-        .requires_stage2 = true,
-    });
-
-    cases.addBuildFile("test/link/wasm/extern/build.zig", .{
-        .build_modes = true,
-        .requires_stage2 = true,
-        .use_emulation = true,
-    });
 }
 
 fn addMachOCases(cases: *tests.StandaloneContext) void {

From 8378cde74369ddb1cc618d444970e963a4ab1110 Mon Sep 17 00:00:00 2001
From: Jakub Konka <kubkon@jakubkonka.com>
Date: Fri, 9 Sep 2022 00:01:20 +0200
Subject: [PATCH 58/68] macho: prefill any space between __DATA and __LINKEDIT
 with 0s if required

If there are zerofill sections, the loader may copy the contents of
the physical space in file directly into memory and attach that to
the zerofill section. This is a performance optimisation in the loader
but requires us, the linker, to properly zero-out any space between
__DATA and __LINKEDIT segments in file. This is of course completely
skipped if there are no zerofill sections present.
---
 src/link/MachO.zig | 28 ++++++++++++++++++++++++++--
 1 file changed, 26 insertions(+), 2 deletions(-)

diff --git a/src/link/MachO.zig b/src/link/MachO.zig
index ef180ab032..1ab0202b44 100644
--- a/src/link/MachO.zig
+++ b/src/link/MachO.zig
@@ -1157,6 +1157,28 @@ fn linkOneShot(self: *MachO, comp: *Compilation, prog_node: *std.Progress.Node)
         var ncmds: u32 = 0;
 
         try self.writeLinkeditSegmentData(&ncmds, lc_writer);
+
+        // If the last section of __DATA segment is zerofill section, we need to ensure
+        // that the free space between the end of the last non-zerofill section of __DATA
+        // segment and the beginning of __LINKEDIT segment is zerofilled as the loader will
+        // copy-paste this space into memory for quicker zerofill operation.
+        if (self.data_segment_cmd_index) |data_seg_id| blk: {
+            var physical_zerofill_start: u64 = 0;
+            const section_indexes = self.getSectionIndexes(data_seg_id);
+            for (self.sections.items(.header)[section_indexes.start..section_indexes.end]) |header| {
+                if (header.isZerofill() and header.size > 0) break;
+                physical_zerofill_start = header.offset + header.size;
+            } else break :blk;
+            const linkedit = self.segments.items[self.linkedit_segment_cmd_index.?];
+            const physical_zerofill_size = linkedit.fileoff - physical_zerofill_start;
+            if (physical_zerofill_size > 0) {
+                var padding = try self.base.allocator.alloc(u8, physical_zerofill_size);
+                defer self.base.allocator.free(padding);
+                mem.set(u8, padding, 0);
+                try self.base.file.?.pwriteAll(padding, physical_zerofill_start);
+            }
+        }
+
         try writeDylinkerLC(&ncmds, lc_writer);
         try self.writeMainLC(&ncmds, lc_writer);
         try self.writeDylibIdLC(&ncmds, lc_writer);
@@ -5690,8 +5712,10 @@ fn writeHeader(self: *MachO, ncmds: u32, sizeofcmds: u32) !void {
         else => unreachable,
     }
 
-    if (self.getSectionByName("__DATA", "__thread_vars")) |_| {
-        header.flags |= macho.MH_HAS_TLV_DESCRIPTORS;
+    if (self.getSectionByName("__DATA", "__thread_vars")) |sect_id| {
+        if (self.sections.items(.header)[sect_id].size > 0) {
+            header.flags |= macho.MH_HAS_TLV_DESCRIPTORS;
+        }
     }
 
     header.ncmds = ncmds;

From a833bdcd7e6fcfee6e9cc33a3f7de78b16a36941 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Motiejus=20Jak=C5=A1tys?= <motiejus@jakstys.lt>
Date: Mon, 9 May 2022 09:44:14 +0300
Subject: [PATCH 59/68] [ld] add --print-* for diagnostics

This adds the following for passthrough to lld:
- `--print-gc-sections`
- `--print-icf-sections`
- `--print-map`

I am not adding these to the cache manifest, since it does not change
the produced artifacts.

Tested with an example from #11398: it successfully prints the resulting
map and the GC'd sections.
---
 src/Compilation.zig |  6 ++++++
 src/link.zig        |  3 +++
 src/link/Elf.zig    | 12 ++++++++++++
 src/main.zig        | 12 ++++++++++++
 4 files changed, 33 insertions(+)

diff --git a/src/Compilation.zig b/src/Compilation.zig
index c1321e40cf..597f5cffff 100644
--- a/src/Compilation.zig
+++ b/src/Compilation.zig
@@ -878,6 +878,9 @@ pub const InitOptions = struct {
     linker_shared_memory: bool = false,
     linker_global_base: ?u64 = null,
     linker_export_symbol_names: []const []const u8 = &.{},
+    linker_print_gc_sections: bool = false,
+    linker_print_icf_sections: bool = false,
+    linker_print_map: bool = false,
     each_lib_rpath: ?bool = null,
     build_id: ?bool = null,
     disable_c_depfile: bool = false,
@@ -1727,6 +1730,9 @@ pub fn create(gpa: Allocator, options: InitOptions) !*Compilation {
             .shared_memory = options.linker_shared_memory,
             .global_base = options.linker_global_base,
             .export_symbol_names = options.linker_export_symbol_names,
+            .print_gc_sections = options.linker_print_gc_sections,
+            .print_icf_sections = options.linker_print_icf_sections,
+            .print_map = options.linker_print_map,
             .z_nodelete = options.linker_z_nodelete,
             .z_notext = options.linker_z_notext,
             .z_defs = options.linker_z_defs,
diff --git a/src/link.zig b/src/link.zig
index a8845a0d57..7439f8d59e 100644
--- a/src/link.zig
+++ b/src/link.zig
@@ -166,6 +166,9 @@ pub const Options = struct {
     version_script: ?[]const u8,
     soname: ?[]const u8,
     llvm_cpu_features: ?[*:0]const u8,
+    print_gc_sections: bool,
+    print_icf_sections: bool,
+    print_map: bool,
 
     objects: []Compilation.LinkObject,
     framework_dirs: []const []const u8,
diff --git a/src/link/Elf.zig b/src/link/Elf.zig
index e63fa07187..1d49198937 100644
--- a/src/link/Elf.zig
+++ b/src/link/Elf.zig
@@ -1482,6 +1482,18 @@ fn linkWithLLD(self: *Elf, comp: *Compilation, prog_node: *std.Progress.Node) !v
             try argv.append("--gc-sections");
         }
 
+        if (self.base.options.print_gc_sections) {
+            try argv.append("--print-gc-sections");
+        }
+
+        if (self.base.options.print_icf_sections) {
+            try argv.append("--print-icf-sections");
+        }
+
+        if (self.base.options.print_map) {
+            try argv.append("--print-map");
+        }
+
         if (self.base.options.eh_frame_hdr) {
             try argv.append("--eh-frame-hdr");
         }
diff --git a/src/main.zig b/src/main.zig
index 6263a6a402..039dacc877 100644
--- a/src/main.zig
+++ b/src/main.zig
@@ -691,6 +691,9 @@ fn buildOutputType(
     var linker_max_memory: ?u64 = null;
     var linker_shared_memory: bool = false;
     var linker_global_base: ?u64 = null;
+    var linker_print_gc_sections: bool = false;
+    var linker_print_icf_sections: bool = false;
+    var linker_print_map: bool = false;
     var linker_z_nodelete = false;
     var linker_z_notext = false;
     var linker_z_defs = false;
@@ -1816,6 +1819,12 @@ fn buildOutputType(
                     linker_gc_sections = true;
                 } else if (mem.eql(u8, arg, "--no-gc-sections")) {
                     linker_gc_sections = false;
+                } else if (mem.eql(u8, arg, "--print-gc-sections")) {
+                    linker_print_gc_sections = true;
+                } else if (mem.eql(u8, arg, "--print-icf-sections")) {
+                    linker_print_icf_sections = true;
+                } else if (mem.eql(u8, arg, "--print-map")) {
+                    linker_print_map = true;
                 } else if (mem.eql(u8, arg, "--allow-shlib-undefined") or
                     mem.eql(u8, arg, "-allow-shlib-undefined"))
                 {
@@ -2911,6 +2920,9 @@ fn buildOutputType(
         .linker_initial_memory = linker_initial_memory,
         .linker_max_memory = linker_max_memory,
         .linker_shared_memory = linker_shared_memory,
+        .linker_print_gc_sections = linker_print_gc_sections,
+        .linker_print_icf_sections = linker_print_icf_sections,
+        .linker_print_map = linker_print_map,
         .linker_global_base = linker_global_base,
         .linker_export_symbol_names = linker_export_symbol_names.items,
         .linker_z_nodelete = linker_z_nodelete,

From fa940bafa2720f49ee249eda1ee4cf26a247172a Mon Sep 17 00:00:00 2001
From: Andrew Kelley <andrew@ziglang.org>
Date: Thu, 8 Sep 2022 18:02:38 -0700
Subject: [PATCH 60/68] std.zig.system.NativeTargetInfo: improve glibc version
 detection

Previously, this code would fail to detect glibc version because it
relied on libc.so.6 being a symlink which revealed the answer. On modern
distros, this is no longer the case.

This new strategy finds the path to libc.so.6 from /usr/bin/env, then
inspects the .dynstr section of libc.so.6, looking for symbols that
start with "GLIBC_2.". It then parses those as semantic versions and
takes the maximum value as the system-native glibc version.

closes #6469
   see #11137
closes #12567
---
 lib/std/zig/system/NativeTargetInfo.zig | 228 ++++++++++++++++++++----
 1 file changed, 192 insertions(+), 36 deletions(-)

diff --git a/lib/std/zig/system/NativeTargetInfo.zig b/lib/std/zig/system/NativeTargetInfo.zig
index 3aeab0fdba..67092e17a9 100644
--- a/lib/std/zig/system/NativeTargetInfo.zig
+++ b/lib/std/zig/system/NativeTargetInfo.zig
@@ -28,6 +28,7 @@ pub const DetectError = error{
     SystemFdQuotaExceeded,
     DeviceBusy,
     OSVersionDetectionFail,
+    Unexpected,
 };
 
 /// Given a `CrossTarget`, which specifies in detail which parts of the target should be detected
@@ -332,9 +333,7 @@ fn detectAbiAndDynamicLinker(
         {
             for (lib_paths) |lib_path| {
                 if (std.mem.endsWith(u8, lib_path, glibc_so_basename)) {
-                    os_adjusted.version_range.linux.glibc = glibcVerFromSO(lib_path) catch |err| switch (err) {
-                        error.UnrecognizedGnuLibCFileName => continue,
-                        error.InvalidGnuLibCVersion => continue,
+                    os_adjusted.version_range.linux.glibc = glibcVerFromSo(lib_path) catch |err| switch (err) {
                         error.GnuLibCVersionUnavailable => continue,
                         else => |e| return e,
                     };
@@ -369,7 +368,7 @@ fn detectAbiAndDynamicLinker(
         // #! (2) + 255 (max length of shebang line since Linux 5.1) + \n (1)
         var buffer: [258]u8 = undefined;
         while (true) {
-            const file = std.fs.openFileAbsolute(file_name, .{}) catch |err| switch (err) {
+            const file = fs.openFileAbsolute(file_name, .{}) catch |err| switch (err) {
                 error.NoSpaceLeft => unreachable,
                 error.NameTooLong => unreachable,
                 error.PathAlreadyExists => unreachable,
@@ -396,6 +395,7 @@ fn detectAbiAndDynamicLinker(
 
                 else => |e| return e,
             };
+            errdefer file.close();
 
             const line = file.reader().readUntilDelimiter(&buffer, '\n') catch |err| switch (err) {
                 error.IsDir => unreachable, // Handled before
@@ -413,15 +413,12 @@ fn detectAbiAndDynamicLinker(
                 error.NotOpenForReading,
                 => break :blk file,
 
-                else => |e| {
-                    file.close();
-                    return e;
-                },
+                else => |e| return e,
             };
             if (!mem.startsWith(u8, line, "#!")) break :blk file;
             var it = std.mem.tokenize(u8, line[2..], " ");
-            file.close();
             file_name = it.next() orelse return defaultAbiAndDynamicLinker(cpu, os, cross_target);
+            file.close();
         }
     };
     defer elf_file.close();
@@ -455,23 +452,158 @@ fn detectAbiAndDynamicLinker(
 
 const glibc_so_basename = "libc.so.6";
 
-fn glibcVerFromSO(so_path: [:0]const u8) !std.builtin.Version {
-    var link_buf: [std.os.PATH_MAX]u8 = undefined;
-    const link_name = std.os.readlinkZ(so_path.ptr, &link_buf) catch |err| switch (err) {
-        error.AccessDenied => return error.GnuLibCVersionUnavailable,
-        error.FileSystem => return error.FileSystem,
-        error.SymLinkLoop => return error.SymLinkLoop,
+fn glibcVerFromSo(so_path: [:0]const u8) !std.builtin.Version {
+    const file = fs.openFileAbsolute(so_path, .{}) catch |err| switch (err) {
+        // Contextually impossible errors.
+        error.NoSpaceLeft => unreachable,
         error.NameTooLong => unreachable,
-        error.NotLink => return error.GnuLibCVersionUnavailable,
-        error.FileNotFound => return error.GnuLibCVersionUnavailable,
+        error.PathAlreadyExists => unreachable,
+        error.SharingViolation => unreachable,
+        error.InvalidUtf8 => unreachable,
+        error.BadPathName => unreachable,
+        error.PipeBusy => unreachable,
+        error.FileLocksNotSupported => unreachable,
+        error.WouldBlock => unreachable,
+        error.FileBusy => unreachable, // opened without write permissions
+        error.NoDevice => unreachable, // not accessing special device
+        error.InvalidHandle => unreachable, // should not be in the error set
+        error.DeviceBusy => unreachable, // read-only
+
+        // Errors that indicate a false negative may occur if we treat this as
+        // not a libc shared object.
+        error.ProcessFdQuotaExceeded => return error.ProcessFdQuotaExceeded,
+        error.SystemFdQuotaExceeded => return error.SystemFdQuotaExceeded,
         error.SystemResources => return error.SystemResources,
+        error.Unexpected => return error.Unexpected,
+
+        // Errors that indicate this file is not a libc shared object.
+        error.SymLinkLoop => return error.GnuLibCVersionUnavailable,
+        error.IsDir => return error.GnuLibCVersionUnavailable,
+        error.AccessDenied => return error.GnuLibCVersionUnavailable,
+        error.FileNotFound => return error.GnuLibCVersionUnavailable,
+        error.FileTooBig => return error.GnuLibCVersionUnavailable,
         error.NotDir => return error.GnuLibCVersionUnavailable,
-        error.Unexpected => return error.GnuLibCVersionUnavailable,
-        error.InvalidUtf8 => unreachable, // Windows only
-        error.BadPathName => unreachable, // Windows only
-        error.UnsupportedReparsePointType => unreachable, // Windows only
     };
-    return glibcVerFromLinkName(link_name, "libc-");
+    defer file.close();
+
+    return glibcVerFromSoFile(file) catch |err| switch (err) {
+        error.InvalidElfMagic => return error.GnuLibCVersionUnavailable,
+        error.InvalidElfEndian => return error.GnuLibCVersionUnavailable,
+        error.InvalidElfClass => return error.GnuLibCVersionUnavailable,
+        error.InvalidElfFile => return error.GnuLibCVersionUnavailable,
+        error.InvalidElfVersion => return error.GnuLibCVersionUnavailable,
+        error.InvalidGnuLibCVersion => return error.GnuLibCVersionUnavailable,
+        error.UnexpectedEndOfFile => return error.GnuLibCVersionUnavailable,
+        error.UnableToReadElfFile => return error.GnuLibCVersionUnavailable,
+
+        error.SystemResources => return error.SystemResources,
+        error.FileSystem => return error.FileSystem,
+        error.Unexpected => return error.Unexpected,
+    };
+}
+
+fn glibcVerFromSoFile(file: fs.File) !std.builtin.Version {
+    var hdr_buf: [@sizeOf(elf.Elf64_Ehdr)]u8 align(@alignOf(elf.Elf64_Ehdr)) = undefined;
+    _ = try preadMin(file, &hdr_buf, 0, hdr_buf.len);
+    const hdr32 = @ptrCast(*elf.Elf32_Ehdr, &hdr_buf);
+    const hdr64 = @ptrCast(*elf.Elf64_Ehdr, &hdr_buf);
+    if (!mem.eql(u8, hdr32.e_ident[0..4], elf.MAGIC)) return error.InvalidElfMagic;
+    const elf_endian: std.builtin.Endian = switch (hdr32.e_ident[elf.EI_DATA]) {
+        elf.ELFDATA2LSB => .Little,
+        elf.ELFDATA2MSB => .Big,
+        else => return error.InvalidElfEndian,
+    };
+    const need_bswap = elf_endian != native_endian;
+    if (hdr32.e_ident[elf.EI_VERSION] != 1) return error.InvalidElfVersion;
+
+    const is_64 = switch (hdr32.e_ident[elf.EI_CLASS]) {
+        elf.ELFCLASS32 => false,
+        elf.ELFCLASS64 => true,
+        else => return error.InvalidElfClass,
+    };
+    const shstrndx = elfInt(is_64, need_bswap, hdr32.e_shstrndx, hdr64.e_shstrndx);
+    var shoff = elfInt(is_64, need_bswap, hdr32.e_shoff, hdr64.e_shoff);
+    const shentsize = elfInt(is_64, need_bswap, hdr32.e_shentsize, hdr64.e_shentsize);
+    const str_section_off = shoff + @as(u64, shentsize) * @as(u64, shstrndx);
+    var sh_buf: [16 * @sizeOf(elf.Elf64_Shdr)]u8 align(@alignOf(elf.Elf64_Shdr)) = undefined;
+    if (sh_buf.len < shentsize) return error.InvalidElfFile;
+
+    _ = try preadMin(file, &sh_buf, str_section_off, shentsize);
+    const shstr32 = @ptrCast(*elf.Elf32_Shdr, @alignCast(@alignOf(elf.Elf32_Shdr), &sh_buf));
+    const shstr64 = @ptrCast(*elf.Elf64_Shdr, @alignCast(@alignOf(elf.Elf64_Shdr), &sh_buf));
+    const shstrtab_off = elfInt(is_64, need_bswap, shstr32.sh_offset, shstr64.sh_offset);
+    const shstrtab_size = elfInt(is_64, need_bswap, shstr32.sh_size, shstr64.sh_size);
+    var strtab_buf: [4096:0]u8 = undefined;
+    const shstrtab_len = std.math.min(shstrtab_size, strtab_buf.len);
+    const shstrtab_read_len = try preadMin(file, &strtab_buf, shstrtab_off, shstrtab_len);
+    const shstrtab = strtab_buf[0..shstrtab_read_len];
+    const shnum = elfInt(is_64, need_bswap, hdr32.e_shnum, hdr64.e_shnum);
+    var sh_i: u16 = 0;
+    const dynstr: struct { offset: u64, size: u64 } = find_dyn_str: while (sh_i < shnum) {
+        // Reserve some bytes so that we can deref the 64-bit struct fields
+        // even when the ELF file is 32-bits.
+        const sh_reserve: usize = @sizeOf(elf.Elf64_Shdr) - @sizeOf(elf.Elf32_Shdr);
+        const sh_read_byte_len = try preadMin(
+            file,
+            sh_buf[0 .. sh_buf.len - sh_reserve],
+            shoff,
+            shentsize,
+        );
+        var sh_buf_i: usize = 0;
+        while (sh_buf_i < sh_read_byte_len and sh_i < shnum) : ({
+            sh_i += 1;
+            shoff += shentsize;
+            sh_buf_i += shentsize;
+        }) {
+            const sh32 = @ptrCast(
+                *elf.Elf32_Shdr,
+                @alignCast(@alignOf(elf.Elf32_Shdr), &sh_buf[sh_buf_i]),
+            );
+            const sh64 = @ptrCast(
+                *elf.Elf64_Shdr,
+                @alignCast(@alignOf(elf.Elf64_Shdr), &sh_buf[sh_buf_i]),
+            );
+            const sh_name_off = elfInt(is_64, need_bswap, sh32.sh_name, sh64.sh_name);
+            // TODO this pointer cast should not be necessary
+            const sh_name = mem.sliceTo(std.meta.assumeSentinel(shstrtab[sh_name_off..].ptr, 0), 0);
+            if (mem.eql(u8, sh_name, ".dynstr")) {
+                break :find_dyn_str .{
+                    .offset = elfInt(is_64, need_bswap, sh32.sh_offset, sh64.sh_offset),
+                    .size = elfInt(is_64, need_bswap, sh32.sh_size, sh64.sh_size),
+                };
+            }
+        }
+    } else return error.InvalidGnuLibCVersion;
+
+    // Here we loop over all the strings in the dynstr string table, assuming that any
+    // strings that start with "GLIBC_2." indicate the existence of such a glibc version,
+    // and furthermore, that the system-installed glibc is at minimum that version.
+
+    // Empirically, glibc 2.34 libc.so .dynstr section is 32441 bytes on my system.
+    // Here I use this value plus some headroom. This makes it only need
+    // a single read syscall here.
+    var buf: [40000]u8 = undefined;
+    if (buf.len < dynstr.size) return error.InvalidGnuLibCVersion;
+
+    const dynstr_bytes = buf[0..dynstr.size];
+    _ = try preadMin(file, dynstr_bytes, dynstr.offset, dynstr.size);
+    var it = mem.split(u8, dynstr_bytes, &.{0});
+    var max_ver: std.builtin.Version = .{ .major = 2, .minor = 2, .patch = 5 };
+    while (it.next()) |s| {
+        if (mem.startsWith(u8, s, "GLIBC_2.")) {
+            const chopped = s["GLIBC_".len..];
+            const ver = std.builtin.Version.parse(chopped) catch |err| switch (err) {
+                error.Overflow => return error.InvalidGnuLibCVersion,
+                error.InvalidCharacter => return error.InvalidGnuLibCVersion,
+                error.InvalidVersion => return error.InvalidGnuLibCVersion,
+            };
+            switch (ver.order(max_ver)) {
+                .gt => max_ver = ver,
+                .lt, .eq => continue,
+            }
+        }
+    }
+    return max_ver;
 }
 
 fn glibcVerFromLinkName(link_name: []const u8, prefix: []const u8) !std.builtin.Version {
@@ -735,36 +867,60 @@ pub fn abiAndDynamicLinkerFromFile(
                     };
                     defer dir.close();
 
-                    var link_buf: [std.os.PATH_MAX]u8 = undefined;
-                    const link_name = std.os.readlinkatZ(
-                        dir.fd,
-                        glibc_so_basename,
-                        &link_buf,
-                    ) catch |err| switch (err) {
+                    // Now we have a candidate for the path to libc shared object. In
+                    // the past, we used readlink() here because the link name would
+                    // reveal the glibc version. However, in more recent GNU/Linux
+                    // installations, there is no symlink. Thus we instead use a more
+                    // robust check of opening the libc shared object and looking at the
+                    // .dynstr section, and finding the max version number of symbols
+                    // that start with "GLIBC_2.".
+                    var f = dir.openFile(glibc_so_basename, .{}) catch |err| switch (err) {
                         error.NameTooLong => unreachable,
                         error.InvalidUtf8 => unreachable, // Windows only
                         error.BadPathName => unreachable, // Windows only
-                        error.UnsupportedReparsePointType => unreachable, // Windows only
+                        error.PipeBusy => unreachable, // Windows-only
+                        error.SharingViolation => unreachable, // Windows-only
+                        error.FileLocksNotSupported => unreachable, // No lock requested.
+                        error.NoSpaceLeft => unreachable, // read-only
+                        error.PathAlreadyExists => unreachable, // read-only
+                        error.DeviceBusy => unreachable, // read-only
+                        error.FileBusy => unreachable, // read-only
+                        error.InvalidHandle => unreachable, // should not be in the error set
+                        error.WouldBlock => unreachable, // not using O_NONBLOCK
+                        error.NoDevice => unreachable, // not asking for a special device
 
                         error.AccessDenied,
                         error.FileNotFound,
-                        error.NotLink,
                         error.NotDir,
                         => continue,
 
+                        error.IsDir => return error.InvalidElfFile,
+                        error.FileTooBig => return error.Unexpected,
+
+                        error.ProcessFdQuotaExceeded,
+                        error.SystemFdQuotaExceeded,
                         error.SystemResources,
-                        error.FileSystem,
                         error.SymLinkLoop,
                         error.Unexpected,
                         => |e| return e,
                     };
-                    result.target.os.version_range.linux.glibc = glibcVerFromLinkName(
-                        link_name,
-                        "libc-",
-                    ) catch |err| switch (err) {
-                        error.UnrecognizedGnuLibCFileName,
+                    defer f.close();
+
+                    result.target.os.version_range.linux.glibc = glibcVerFromSoFile(f) catch |err| switch (err) {
+                        error.InvalidElfMagic,
+                        error.InvalidElfEndian,
+                        error.InvalidElfClass,
+                        error.InvalidElfFile,
+                        error.InvalidElfVersion,
                         error.InvalidGnuLibCVersion,
+                        error.UnexpectedEndOfFile,
                         => continue,
+
+                        error.SystemResources,
+                        error.UnableToReadElfFile,
+                        error.Unexpected,
+                        error.FileSystem,
+                        => |e| return e,
                     };
                     break;
                 }

From 3ee01c14ee7ba42b484f15daeacb67da90a81c9e Mon Sep 17 00:00:00 2001
From: Andrew Kelley <andrew@ziglang.org>
Date: Thu, 8 Sep 2022 18:27:26 -0700
Subject: [PATCH 61/68] std.zig.system.NativeTargetInfo: detection ignores self
 exe

Before, native glibc and dynamic linker detection attempted to use the
executable's own binary if it was dynamically linked to answer both the
C ABI question and the dynamic linker question. However, this could be
problematic on a system that uses a RUNPATH for the compiler binary,
locking it to an older glibc version, while system binaries such as
/usr/bin/env use a newer glibc version. The problem is that libc.so.6
glibc version will match that of the system while the dynamic linker
will match that of the compiler binary. Executables with these versions
mismatching will fail to run.

Therefore, this commit changes the logic to be the same regardless of
whether the compiler binary is dynamically or statically linked. It
inspects `/usr/bin/env` as an ELF file to find the answer to these
questions, or if there is a shebang line, then it chases the referenced
file recursively. If that does not provide the answer, then the function
falls back to defaults.

This commit also solves a TODO to remove an Allocator parameter to the
detect() function.
---
 doc/docgen.zig                          |   3 +-
 lib/std/build.zig                       |   4 +-
 lib/std/build/EmulatableRunStep.zig     |   2 +-
 lib/std/zig/system/NativeTargetInfo.zig | 134 +++---------------------
 src/main.zig                            |  17 ++-
 src/test.zig                            |   4 +-
 6 files changed, 31 insertions(+), 133 deletions(-)

diff --git a/doc/docgen.zig b/doc/docgen.zig
index 0f0e212e3c..50000da44c 100644
--- a/doc/docgen.zig
+++ b/doc/docgen.zig
@@ -1210,7 +1210,7 @@ fn genHtml(
     var env_map = try process.getEnvMap(allocator);
     try env_map.put("ZIG_DEBUG_COLOR", "1");
 
-    const host = try std.zig.system.NativeTargetInfo.detect(allocator, .{});
+    const host = try std.zig.system.NativeTargetInfo.detect(.{});
     const builtin_code = try getBuiltinCode(allocator, &env_map, zig_exe);
 
     for (toc.nodes) |node| {
@@ -1474,7 +1474,6 @@ fn genHtml(
                                 .arch_os_abi = triple,
                             });
                             const target_info = try std.zig.system.NativeTargetInfo.detect(
-                                allocator,
                                 cross_target,
                             );
                             switch (host.getExternalExecutor(target_info, .{
diff --git a/lib/std/build.zig b/lib/std/build.zig
index 4c05586159..f11dba717d 100644
--- a/lib/std/build.zig
+++ b/lib/std/build.zig
@@ -171,7 +171,7 @@ pub const Builder = struct {
         const env_map = try allocator.create(EnvMap);
         env_map.* = try process.getEnvMap(allocator);
 
-        const host = try NativeTargetInfo.detect(allocator, .{});
+        const host = try NativeTargetInfo.detect(.{});
 
         const self = try allocator.create(Builder);
         self.* = Builder{
@@ -1798,7 +1798,7 @@ pub const LibExeObjStep = struct {
     }
 
     fn computeOutFileNames(self: *LibExeObjStep) void {
-        self.target_info = NativeTargetInfo.detect(self.builder.allocator, self.target) catch
+        self.target_info = NativeTargetInfo.detect(self.target) catch
             unreachable;
 
         const target = self.target_info.target;
diff --git a/lib/std/build/EmulatableRunStep.zig b/lib/std/build/EmulatableRunStep.zig
index 0479d3a2f0..23bdf5e595 100644
--- a/lib/std/build/EmulatableRunStep.zig
+++ b/lib/std/build/EmulatableRunStep.zig
@@ -158,7 +158,7 @@ fn warnAboutForeignBinaries(step: *EmulatableRunStep) void {
 
     const host_name = builder.host.target.zigTriple(builder.allocator) catch unreachable;
     const foreign_name = artifact.target.zigTriple(builder.allocator) catch unreachable;
-    const target_info = std.zig.system.NativeTargetInfo.detect(builder.allocator, artifact.target) catch unreachable;
+    const target_info = std.zig.system.NativeTargetInfo.detect(artifact.target) catch unreachable;
     const need_cross_glibc = artifact.target.isGnuLibC() and artifact.is_linking_libc;
     switch (builder.host.getExternalExecutor(target_info, .{
         .qemu_fixes_dl = need_cross_glibc and builder.glibc_runtimes_dir != null,
diff --git a/lib/std/zig/system/NativeTargetInfo.zig b/lib/std/zig/system/NativeTargetInfo.zig
index 67092e17a9..5ed4a02f74 100644
--- a/lib/std/zig/system/NativeTargetInfo.zig
+++ b/lib/std/zig/system/NativeTargetInfo.zig
@@ -37,8 +37,7 @@ pub const DetectError = error{
 /// relative to that.
 /// Any resources this function allocates are released before returning, and so there is no
 /// deinitialization method.
-/// TODO Remove the Allocator requirement from this function.
-pub fn detect(allocator: Allocator, cross_target: CrossTarget) DetectError!NativeTargetInfo {
+pub fn detect(cross_target: CrossTarget) DetectError!NativeTargetInfo {
     var os = cross_target.getOsTag().defaultVersionRange(cross_target.getCpuArch());
     if (cross_target.os_tag == null) {
         switch (builtin.target.os.tag) {
@@ -199,7 +198,7 @@ pub fn detect(allocator: Allocator, cross_target: CrossTarget) DetectError!Nativ
     } orelse backup_cpu_detection: {
         break :backup_cpu_detection Target.Cpu.baseline(cpu_arch);
     };
-    var result = try detectAbiAndDynamicLinker(allocator, cpu, os, cross_target);
+    var result = try detectAbiAndDynamicLinker(cpu, os, cross_target);
     // For x86, we need to populate some CPU feature flags depending on architecture
     // and mode:
     //  * 16bit_mode => if the abi is code16
@@ -236,13 +235,20 @@ pub fn detect(allocator: Allocator, cross_target: CrossTarget) DetectError!Nativ
     return result;
 }
 
-/// First we attempt to use the executable's own binary. If it is dynamically
-/// linked, then it should answer both the C ABI question and the dynamic linker question.
-/// If it is statically linked, then we try /usr/bin/env (or the file it references in shebang). If that does not provide the answer, then
-/// we fall back to the defaults.
-/// TODO Remove the Allocator requirement from this function.
+/// In the past, this function attempted to use the executable's own binary if it was dynamically
+/// linked to answer both the C ABI question and the dynamic linker question. However, this
+/// could be problematic on a system that uses a RUNPATH for the compiler binary, locking
+/// it to an older glibc version, while system binaries such as /usr/bin/env use a newer glibc
+/// version. The problem is that libc.so.6 glibc version will match that of the system while
+/// the dynamic linker will match that of the compiler binary. Executables with these versions
+/// mismatching will fail to run.
+///
+/// Therefore, this function works the same regardless of whether the compiler binary is
+/// dynamically or statically linked. It inspects `/usr/bin/env` as an ELF file to find the
+/// answer to these questions, or if there is a shebang line, then it chases the referenced
+/// file recursively. If that does not provide the answer, then the function falls back to
+/// defaults.
 fn detectAbiAndDynamicLinker(
-    allocator: Allocator,
     cpu: Target.Cpu,
     os: Target.Os,
     cross_target: CrossTarget,
@@ -280,8 +286,8 @@ fn detectAbiAndDynamicLinker(
     const ofmt = cross_target.ofmt orelse Target.ObjectFormat.default(os.tag, cpu.arch);
 
     for (all_abis) |abi| {
-        // This may be a nonsensical parameter. We detect this with error.UnknownDynamicLinkerPath and
-        // skip adding it to `ld_info_list`.
+        // This may be a nonsensical parameter. We detect this with
+        // error.UnknownDynamicLinkerPath and skip adding it to `ld_info_list`.
         const target: Target = .{
             .cpu = cpu,
             .os = os,
@@ -301,62 +307,6 @@ fn detectAbiAndDynamicLinker(
 
     // Best case scenario: the executable is dynamically linked, and we can iterate
     // over our own shared objects and find a dynamic linker.
-    self_exe: {
-        const lib_paths = try std.process.getSelfExeSharedLibPaths(allocator);
-        defer {
-            for (lib_paths) |lib_path| {
-                allocator.free(lib_path);
-            }
-            allocator.free(lib_paths);
-        }
-
-        var found_ld_info: LdInfo = undefined;
-        var found_ld_path: [:0]const u8 = undefined;
-
-        // Look for dynamic linker.
-        // This is O(N^M) but typical case here is N=2 and M=10.
-        find_ld: for (lib_paths) |lib_path| {
-            for (ld_info_list) |ld_info| {
-                const standard_ld_basename = fs.path.basename(ld_info.ld.get().?);
-                if (std.mem.endsWith(u8, lib_path, standard_ld_basename)) {
-                    found_ld_info = ld_info;
-                    found_ld_path = lib_path;
-                    break :find_ld;
-                }
-            }
-        } else break :self_exe;
-
-        // Look for glibc version.
-        var os_adjusted = os;
-        if (builtin.target.os.tag == .linux and found_ld_info.abi.isGnu() and
-            cross_target.glibc_version == null)
-        {
-            for (lib_paths) |lib_path| {
-                if (std.mem.endsWith(u8, lib_path, glibc_so_basename)) {
-                    os_adjusted.version_range.linux.glibc = glibcVerFromSo(lib_path) catch |err| switch (err) {
-                        error.GnuLibCVersionUnavailable => continue,
-                        else => |e| return e,
-                    };
-                    break;
-                }
-            }
-        }
-
-        var result: NativeTargetInfo = .{
-            .target = .{
-                .cpu = cpu,
-                .os = os_adjusted,
-                .abi = cross_target.abi orelse found_ld_info.abi,
-                .ofmt = cross_target.ofmt orelse Target.ObjectFormat.default(os_adjusted.tag, cpu.arch),
-            },
-            .dynamic_linker = if (cross_target.dynamic_linker.get() == null)
-                DynamicLinker.init(found_ld_path)
-            else
-                cross_target.dynamic_linker,
-        };
-        return result;
-    }
-
     const elf_file = blk: {
         // This block looks for a shebang line in /usr/bin/env,
         // if it finds one, then instead of using /usr/bin/env as the ELF file to examine, it uses the file it references instead,
@@ -452,56 +402,6 @@ fn detectAbiAndDynamicLinker(
 
 const glibc_so_basename = "libc.so.6";
 
-fn glibcVerFromSo(so_path: [:0]const u8) !std.builtin.Version {
-    const file = fs.openFileAbsolute(so_path, .{}) catch |err| switch (err) {
-        // Contextually impossible errors.
-        error.NoSpaceLeft => unreachable,
-        error.NameTooLong => unreachable,
-        error.PathAlreadyExists => unreachable,
-        error.SharingViolation => unreachable,
-        error.InvalidUtf8 => unreachable,
-        error.BadPathName => unreachable,
-        error.PipeBusy => unreachable,
-        error.FileLocksNotSupported => unreachable,
-        error.WouldBlock => unreachable,
-        error.FileBusy => unreachable, // opened without write permissions
-        error.NoDevice => unreachable, // not accessing special device
-        error.InvalidHandle => unreachable, // should not be in the error set
-        error.DeviceBusy => unreachable, // read-only
-
-        // Errors that indicate a false negative may occur if we treat this as
-        // not a libc shared object.
-        error.ProcessFdQuotaExceeded => return error.ProcessFdQuotaExceeded,
-        error.SystemFdQuotaExceeded => return error.SystemFdQuotaExceeded,
-        error.SystemResources => return error.SystemResources,
-        error.Unexpected => return error.Unexpected,
-
-        // Errors that indicate this file is not a libc shared object.
-        error.SymLinkLoop => return error.GnuLibCVersionUnavailable,
-        error.IsDir => return error.GnuLibCVersionUnavailable,
-        error.AccessDenied => return error.GnuLibCVersionUnavailable,
-        error.FileNotFound => return error.GnuLibCVersionUnavailable,
-        error.FileTooBig => return error.GnuLibCVersionUnavailable,
-        error.NotDir => return error.GnuLibCVersionUnavailable,
-    };
-    defer file.close();
-
-    return glibcVerFromSoFile(file) catch |err| switch (err) {
-        error.InvalidElfMagic => return error.GnuLibCVersionUnavailable,
-        error.InvalidElfEndian => return error.GnuLibCVersionUnavailable,
-        error.InvalidElfClass => return error.GnuLibCVersionUnavailable,
-        error.InvalidElfFile => return error.GnuLibCVersionUnavailable,
-        error.InvalidElfVersion => return error.GnuLibCVersionUnavailable,
-        error.InvalidGnuLibCVersion => return error.GnuLibCVersionUnavailable,
-        error.UnexpectedEndOfFile => return error.GnuLibCVersionUnavailable,
-        error.UnableToReadElfFile => return error.GnuLibCVersionUnavailable,
-
-        error.SystemResources => return error.SystemResources,
-        error.FileSystem => return error.FileSystem,
-        error.Unexpected => return error.Unexpected,
-    };
-}
-
 fn glibcVerFromSoFile(file: fs.File) !std.builtin.Version {
     var hdr_buf: [@sizeOf(elf.Elf64_Ehdr)]u8 align(@alignOf(elf.Elf64_Ehdr)) = undefined;
     _ = try preadMin(file, &hdr_buf, 0, hdr_buf.len);
diff --git a/src/main.zig b/src/main.zig
index 039dacc877..aaea682c7b 100644
--- a/src/main.zig
+++ b/src/main.zig
@@ -268,7 +268,7 @@ pub fn mainArgs(gpa: Allocator, arena: Allocator, args: []const []const u8) !voi
     } else if (mem.eql(u8, cmd, "init-lib")) {
         return cmdInit(gpa, arena, cmd_args, .Lib);
     } else if (mem.eql(u8, cmd, "targets")) {
-        const info = try detectNativeTargetInfo(arena, .{});
+        const info = try detectNativeTargetInfo(.{});
         const stdout = io.getStdOut().writer();
         return @import("print_targets.zig").cmdTargets(arena, cmd_args, stdout, info.target);
     } else if (mem.eql(u8, cmd, "version")) {
@@ -2267,7 +2267,7 @@ fn buildOutputType(
     }
 
     const cross_target = try parseCrossTargetOrReportFatalError(arena, target_parse_options);
-    const target_info = try detectNativeTargetInfo(gpa, cross_target);
+    const target_info = try detectNativeTargetInfo(cross_target);
 
     if (target_info.target.os.tag != .freestanding) {
         if (ensure_libc_on_non_freestanding)
@@ -3283,7 +3283,7 @@ fn runOrTest(
     if (std.process.can_execv and arg_mode == .run and !watch) {
         // execv releases the locks; no need to destroy the Compilation here.
         const err = std.process.execv(gpa, argv.items);
-        try warnAboutForeignBinaries(gpa, arena, arg_mode, target_info, link_libc);
+        try warnAboutForeignBinaries(arena, arg_mode, target_info, link_libc);
         const cmd = try std.mem.join(arena, " ", argv.items);
         fatal("the following command failed to execve with '{s}':\n{s}", .{ @errorName(err), cmd });
     } else if (std.process.can_spawn) {
@@ -3300,7 +3300,7 @@ fn runOrTest(
         }
 
         const term = child.spawnAndWait() catch |err| {
-            try warnAboutForeignBinaries(gpa, arena, arg_mode, target_info, link_libc);
+            try warnAboutForeignBinaries(arena, arg_mode, target_info, link_libc);
             const cmd = try std.mem.join(arena, " ", argv.items);
             fatal("the following command failed with '{s}':\n{s}", .{ @errorName(err), cmd });
         };
@@ -3914,7 +3914,7 @@ pub fn cmdBuild(gpa: Allocator, arena: Allocator, args: []const []const u8) !voi
         gimmeMoreOfThoseSweetSweetFileDescriptors();
 
         const cross_target: std.zig.CrossTarget = .{};
-        const target_info = try detectNativeTargetInfo(gpa, cross_target);
+        const target_info = try detectNativeTargetInfo(cross_target);
 
         const exe_basename = try std.zig.binNameAlloc(arena, .{
             .root_name = "build",
@@ -4956,8 +4956,8 @@ test "fds" {
     gimmeMoreOfThoseSweetSweetFileDescriptors();
 }
 
-fn detectNativeTargetInfo(gpa: Allocator, cross_target: std.zig.CrossTarget) !std.zig.system.NativeTargetInfo {
-    return std.zig.system.NativeTargetInfo.detect(gpa, cross_target);
+fn detectNativeTargetInfo(cross_target: std.zig.CrossTarget) !std.zig.system.NativeTargetInfo {
+    return std.zig.system.NativeTargetInfo.detect(cross_target);
 }
 
 /// Indicate that we are now terminating with a successful exit code.
@@ -5320,14 +5320,13 @@ fn parseIntSuffix(arg: []const u8, prefix_len: usize) u64 {
 }
 
 fn warnAboutForeignBinaries(
-    gpa: Allocator,
     arena: Allocator,
     arg_mode: ArgMode,
     target_info: std.zig.system.NativeTargetInfo,
     link_libc: bool,
 ) !void {
     const host_cross_target: std.zig.CrossTarget = .{};
-    const host_target_info = try detectNativeTargetInfo(gpa, host_cross_target);
+    const host_target_info = try detectNativeTargetInfo(host_cross_target);
 
     switch (host_target_info.getExternalExecutor(target_info, .{ .link_libc = link_libc })) {
         .native => return,
diff --git a/src/test.zig b/src/test.zig
index babded13f9..358b783148 100644
--- a/src/test.zig
+++ b/src/test.zig
@@ -1211,7 +1211,7 @@ pub const TestContext = struct {
     }
 
     fn run(self: *TestContext) !void {
-        const host = try std.zig.system.NativeTargetInfo.detect(self.gpa, .{});
+        const host = try std.zig.system.NativeTargetInfo.detect(.{});
 
         var progress = std.Progress{};
         const root_node = progress.start("compiler", self.cases.items.len);
@@ -1300,7 +1300,7 @@ pub const TestContext = struct {
         global_cache_directory: Compilation.Directory,
         host: std.zig.system.NativeTargetInfo,
     ) !void {
-        const target_info = try std.zig.system.NativeTargetInfo.detect(allocator, case.target);
+        const target_info = try std.zig.system.NativeTargetInfo.detect(case.target);
         const target = target_info.target;
 
         var arena_allocator = std.heap.ArenaAllocator.init(allocator);

From 1b6fa1965a5472d9bc9e3140d052bde0fb949fe1 Mon Sep 17 00:00:00 2001
From: Andrew Kelley <andrew@ziglang.org>
Date: Thu, 8 Sep 2022 19:03:28 -0700
Subject: [PATCH 62/68] stage2: fix building for 32-bit targets

---
 lib/std/zig/system/NativeTargetInfo.zig | 5 +++--
 src/codegen/llvm.zig                    | 2 +-
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/lib/std/zig/system/NativeTargetInfo.zig b/lib/std/zig/system/NativeTargetInfo.zig
index 5ed4a02f74..0129df3020 100644
--- a/lib/std/zig/system/NativeTargetInfo.zig
+++ b/lib/std/zig/system/NativeTargetInfo.zig
@@ -485,8 +485,9 @@ fn glibcVerFromSoFile(file: fs.File) !std.builtin.Version {
     var buf: [40000]u8 = undefined;
     if (buf.len < dynstr.size) return error.InvalidGnuLibCVersion;
 
-    const dynstr_bytes = buf[0..dynstr.size];
-    _ = try preadMin(file, dynstr_bytes, dynstr.offset, dynstr.size);
+    const dynstr_size = @intCast(usize, dynstr.size);
+    const dynstr_bytes = buf[0..dynstr_size];
+    _ = try preadMin(file, dynstr_bytes, dynstr.offset, dynstr_size);
     var it = mem.split(u8, dynstr_bytes, &.{0});
     var max_ver: std.builtin.Version = .{ .major = 2, .minor = 2, .patch = 5 };
     while (it.next()) |s| {
diff --git a/src/codegen/llvm.zig b/src/codegen/llvm.zig
index 043f0bbdc7..3603713195 100644
--- a/src/codegen/llvm.zig
+++ b/src/codegen/llvm.zig
@@ -3870,7 +3870,7 @@ pub const DeclGen = struct {
                                 var b: usize = 0;
                                 for (parent_ty.structFields().values()[0..field_index]) |field| {
                                     if (field.is_comptime or !field.ty.hasRuntimeBitsIgnoreComptime()) continue;
-                                    b += field.ty.bitSize(target);
+                                    b += @intCast(usize, field.ty.bitSize(target));
                                 }
                                 break :b b;
                             };

From c668396941f19a5c015014822d980bbe9f2bc585 Mon Sep 17 00:00:00 2001
From: Andrew Kelley <andrew@ziglang.org>
Date: Thu, 8 Sep 2022 20:26:58 -0700
Subject: [PATCH 63/68] std.zig.system.NativeTargetInfo: handle missing
 DT_RUNPATH

This commit removes the check that takes advantage of when the dynamic
linker is a symlink. Instead, it falls back on the same directory as the
dynamic linker as a de facto runpath. Empirically, this gives correct
results on Gentoo and NixOS.

Unfortunately it is still falling short for Debian, which has libc.so.6
in a different directory as the dynamic linker.
---
 lib/std/zig/system/NativeTargetInfo.zig | 328 +++++++++++-------------
 1 file changed, 150 insertions(+), 178 deletions(-)

diff --git a/lib/std/zig/system/NativeTargetInfo.zig b/lib/std/zig/system/NativeTargetInfo.zig
index 0129df3020..7d626be994 100644
--- a/lib/std/zig/system/NativeTargetInfo.zig
+++ b/lib/std/zig/system/NativeTargetInfo.zig
@@ -400,7 +400,86 @@ fn detectAbiAndDynamicLinker(
     };
 }
 
-const glibc_so_basename = "libc.so.6";
+fn glibcVerFromRPath(rpath: []const u8) !std.builtin.Version {
+    var dir = fs.cwd().openDir(rpath, .{}) catch |err| switch (err) {
+        error.NameTooLong => unreachable,
+        error.InvalidUtf8 => unreachable,
+        error.BadPathName => unreachable,
+        error.DeviceBusy => unreachable,
+
+        error.FileNotFound,
+        error.NotDir,
+        error.InvalidHandle,
+        error.AccessDenied,
+        error.NoDevice,
+        => return error.GLibCNotFound,
+
+        error.ProcessFdQuotaExceeded,
+        error.SystemFdQuotaExceeded,
+        error.SystemResources,
+        error.SymLinkLoop,
+        error.Unexpected,
+        => |e| return e,
+    };
+    defer dir.close();
+
+    // Now we have a candidate for the path to libc shared object. In
+    // the past, we used readlink() here because the link name would
+    // reveal the glibc version. However, in more recent GNU/Linux
+    // installations, there is no symlink. Thus we instead use a more
+    // robust check of opening the libc shared object and looking at the
+    // .dynstr section, and finding the max version number of symbols
+    // that start with "GLIBC_2.".
+    const glibc_so_basename = "libc.so.6";
+    var f = dir.openFile(glibc_so_basename, .{}) catch |err| switch (err) {
+        error.NameTooLong => unreachable,
+        error.InvalidUtf8 => unreachable, // Windows only
+        error.BadPathName => unreachable, // Windows only
+        error.PipeBusy => unreachable, // Windows-only
+        error.SharingViolation => unreachable, // Windows-only
+        error.FileLocksNotSupported => unreachable, // No lock requested.
+        error.NoSpaceLeft => unreachable, // read-only
+        error.PathAlreadyExists => unreachable, // read-only
+        error.DeviceBusy => unreachable, // read-only
+        error.FileBusy => unreachable, // read-only
+        error.InvalidHandle => unreachable, // should not be in the error set
+        error.WouldBlock => unreachable, // not using O_NONBLOCK
+        error.NoDevice => unreachable, // not asking for a special device
+
+        error.AccessDenied,
+        error.FileNotFound,
+        error.NotDir,
+        error.IsDir,
+        => return error.GLibCNotFound,
+
+        error.FileTooBig => return error.Unexpected,
+
+        error.ProcessFdQuotaExceeded,
+        error.SystemFdQuotaExceeded,
+        error.SystemResources,
+        error.SymLinkLoop,
+        error.Unexpected,
+        => |e| return e,
+    };
+    defer f.close();
+
+    return glibcVerFromSoFile(f) catch |err| switch (err) {
+        error.InvalidElfMagic,
+        error.InvalidElfEndian,
+        error.InvalidElfClass,
+        error.InvalidElfFile,
+        error.InvalidElfVersion,
+        error.InvalidGnuLibCVersion,
+        error.UnexpectedEndOfFile,
+        => return error.GLibCNotFound,
+
+        error.SystemResources,
+        error.UnableToReadElfFile,
+        error.Unexpected,
+        error.FileSystem,
+        => |e| return e,
+    };
+}
 
 fn glibcVerFromSoFile(file: fs.File) !std.builtin.Version {
     var hdr_buf: [@sizeOf(elf.Elf64_Ehdr)]u8 align(@alignOf(elf.Elf64_Ehdr)) = undefined;
@@ -507,23 +586,6 @@ fn glibcVerFromSoFile(file: fs.File) !std.builtin.Version {
     return max_ver;
 }
 
-fn glibcVerFromLinkName(link_name: []const u8, prefix: []const u8) !std.builtin.Version {
-    // example: "libc-2.3.4.so"
-    // example: "libc-2.27.so"
-    // example: "ld-2.33.so"
-    const suffix = ".so";
-    if (!mem.startsWith(u8, link_name, prefix) or !mem.endsWith(u8, link_name, suffix)) {
-        return error.UnrecognizedGnuLibCFileName;
-    }
-    // chop off "libc-" and ".so"
-    const link_name_chopped = link_name[prefix.len .. link_name.len - suffix.len];
-    return std.builtin.Version.parse(link_name_chopped) catch |err| switch (err) {
-        error.Overflow => return error.InvalidGnuLibCVersion,
-        error.InvalidCharacter => return error.InvalidGnuLibCVersion,
-        error.InvalidVersion => return error.InvalidGnuLibCVersion,
-    };
-}
-
 pub const AbiAndDynamicLinkerFromFileError = error{
     FileSystem,
     SystemResources,
@@ -674,65 +736,65 @@ pub fn abiAndDynamicLinkerFromFile(
     if (builtin.target.os.tag == .linux and result.target.isGnuLibC() and
         cross_target.glibc_version == null)
     {
-        if (rpath_offset) |rpoff| {
-            const shstrndx = elfInt(is_64, need_bswap, hdr32.e_shstrndx, hdr64.e_shstrndx);
+        const shstrndx = elfInt(is_64, need_bswap, hdr32.e_shstrndx, hdr64.e_shstrndx);
 
-            var shoff = elfInt(is_64, need_bswap, hdr32.e_shoff, hdr64.e_shoff);
-            const shentsize = elfInt(is_64, need_bswap, hdr32.e_shentsize, hdr64.e_shentsize);
-            const str_section_off = shoff + @as(u64, shentsize) * @as(u64, shstrndx);
+        var shoff = elfInt(is_64, need_bswap, hdr32.e_shoff, hdr64.e_shoff);
+        const shentsize = elfInt(is_64, need_bswap, hdr32.e_shentsize, hdr64.e_shentsize);
+        const str_section_off = shoff + @as(u64, shentsize) * @as(u64, shstrndx);
 
-            var sh_buf: [16 * @sizeOf(elf.Elf64_Shdr)]u8 align(@alignOf(elf.Elf64_Shdr)) = undefined;
-            if (sh_buf.len < shentsize) return error.InvalidElfFile;
+        var sh_buf: [16 * @sizeOf(elf.Elf64_Shdr)]u8 align(@alignOf(elf.Elf64_Shdr)) = undefined;
+        if (sh_buf.len < shentsize) return error.InvalidElfFile;
 
-            _ = try preadMin(file, &sh_buf, str_section_off, shentsize);
-            const shstr32 = @ptrCast(*elf.Elf32_Shdr, @alignCast(@alignOf(elf.Elf32_Shdr), &sh_buf));
-            const shstr64 = @ptrCast(*elf.Elf64_Shdr, @alignCast(@alignOf(elf.Elf64_Shdr), &sh_buf));
-            const shstrtab_off = elfInt(is_64, need_bswap, shstr32.sh_offset, shstr64.sh_offset);
-            const shstrtab_size = elfInt(is_64, need_bswap, shstr32.sh_size, shstr64.sh_size);
-            var strtab_buf: [4096:0]u8 = undefined;
-            const shstrtab_len = std.math.min(shstrtab_size, strtab_buf.len);
-            const shstrtab_read_len = try preadMin(file, &strtab_buf, shstrtab_off, shstrtab_len);
-            const shstrtab = strtab_buf[0..shstrtab_read_len];
+        _ = try preadMin(file, &sh_buf, str_section_off, shentsize);
+        const shstr32 = @ptrCast(*elf.Elf32_Shdr, @alignCast(@alignOf(elf.Elf32_Shdr), &sh_buf));
+        const shstr64 = @ptrCast(*elf.Elf64_Shdr, @alignCast(@alignOf(elf.Elf64_Shdr), &sh_buf));
+        const shstrtab_off = elfInt(is_64, need_bswap, shstr32.sh_offset, shstr64.sh_offset);
+        const shstrtab_size = elfInt(is_64, need_bswap, shstr32.sh_size, shstr64.sh_size);
+        var strtab_buf: [4096:0]u8 = undefined;
+        const shstrtab_len = std.math.min(shstrtab_size, strtab_buf.len);
+        const shstrtab_read_len = try preadMin(file, &strtab_buf, shstrtab_off, shstrtab_len);
+        const shstrtab = strtab_buf[0..shstrtab_read_len];
 
-            const shnum = elfInt(is_64, need_bswap, hdr32.e_shnum, hdr64.e_shnum);
-            var sh_i: u16 = 0;
-            const dynstr: ?struct { offset: u64, size: u64 } = find_dyn_str: while (sh_i < shnum) {
-                // Reserve some bytes so that we can deref the 64-bit struct fields
-                // even when the ELF file is 32-bits.
-                const sh_reserve: usize = @sizeOf(elf.Elf64_Shdr) - @sizeOf(elf.Elf32_Shdr);
-                const sh_read_byte_len = try preadMin(
-                    file,
-                    sh_buf[0 .. sh_buf.len - sh_reserve],
-                    shoff,
-                    shentsize,
+        const shnum = elfInt(is_64, need_bswap, hdr32.e_shnum, hdr64.e_shnum);
+        var sh_i: u16 = 0;
+        const dynstr: ?struct { offset: u64, size: u64 } = find_dyn_str: while (sh_i < shnum) {
+            // Reserve some bytes so that we can deref the 64-bit struct fields
+            // even when the ELF file is 32-bits.
+            const sh_reserve: usize = @sizeOf(elf.Elf64_Shdr) - @sizeOf(elf.Elf32_Shdr);
+            const sh_read_byte_len = try preadMin(
+                file,
+                sh_buf[0 .. sh_buf.len - sh_reserve],
+                shoff,
+                shentsize,
+            );
+            var sh_buf_i: usize = 0;
+            while (sh_buf_i < sh_read_byte_len and sh_i < shnum) : ({
+                sh_i += 1;
+                shoff += shentsize;
+                sh_buf_i += shentsize;
+            }) {
+                const sh32 = @ptrCast(
+                    *elf.Elf32_Shdr,
+                    @alignCast(@alignOf(elf.Elf32_Shdr), &sh_buf[sh_buf_i]),
                 );
-                var sh_buf_i: usize = 0;
-                while (sh_buf_i < sh_read_byte_len and sh_i < shnum) : ({
-                    sh_i += 1;
-                    shoff += shentsize;
-                    sh_buf_i += shentsize;
-                }) {
-                    const sh32 = @ptrCast(
-                        *elf.Elf32_Shdr,
-                        @alignCast(@alignOf(elf.Elf32_Shdr), &sh_buf[sh_buf_i]),
-                    );
-                    const sh64 = @ptrCast(
-                        *elf.Elf64_Shdr,
-                        @alignCast(@alignOf(elf.Elf64_Shdr), &sh_buf[sh_buf_i]),
-                    );
-                    const sh_name_off = elfInt(is_64, need_bswap, sh32.sh_name, sh64.sh_name);
-                    // TODO this pointer cast should not be necessary
-                    const sh_name = mem.sliceTo(std.meta.assumeSentinel(shstrtab[sh_name_off..].ptr, 0), 0);
-                    if (mem.eql(u8, sh_name, ".dynstr")) {
-                        break :find_dyn_str .{
-                            .offset = elfInt(is_64, need_bswap, sh32.sh_offset, sh64.sh_offset),
-                            .size = elfInt(is_64, need_bswap, sh32.sh_size, sh64.sh_size),
-                        };
-                    }
+                const sh64 = @ptrCast(
+                    *elf.Elf64_Shdr,
+                    @alignCast(@alignOf(elf.Elf64_Shdr), &sh_buf[sh_buf_i]),
+                );
+                const sh_name_off = elfInt(is_64, need_bswap, sh32.sh_name, sh64.sh_name);
+                // TODO this pointer cast should not be necessary
+                const sh_name = mem.sliceTo(std.meta.assumeSentinel(shstrtab[sh_name_off..].ptr, 0), 0);
+                if (mem.eql(u8, sh_name, ".dynstr")) {
+                    break :find_dyn_str .{
+                        .offset = elfInt(is_64, need_bswap, sh32.sh_offset, sh64.sh_offset),
+                        .size = elfInt(is_64, need_bswap, sh32.sh_size, sh64.sh_size),
+                    };
                 }
-            } else null;
+            }
+        } else null;
 
-            if (dynstr) |ds| {
+        if (dynstr) |ds| {
+            if (rpath_offset) |rpoff| {
                 // TODO this pointer cast should not be necessary
                 const rpoff_usize = std.math.cast(usize, rpoff) orelse return error.InvalidElfFile;
                 if (rpoff_usize > ds.size) return error.InvalidElfFile;
@@ -746,116 +808,26 @@ pub fn abiAndDynamicLinkerFromFile(
                 const rpath_list = mem.sliceTo(std.meta.assumeSentinel(strtab.ptr, 0), 0);
                 var it = mem.tokenize(u8, rpath_list, ":");
                 while (it.next()) |rpath| {
-                    var dir = fs.cwd().openDir(rpath, .{}) catch |err| switch (err) {
-                        error.NameTooLong => unreachable,
-                        error.InvalidUtf8 => unreachable,
-                        error.BadPathName => unreachable,
-                        error.DeviceBusy => unreachable,
-
-                        error.FileNotFound,
-                        error.NotDir,
-                        error.InvalidHandle,
-                        error.AccessDenied,
-                        error.NoDevice,
-                        => continue,
-
-                        error.ProcessFdQuotaExceeded,
-                        error.SystemFdQuotaExceeded,
-                        error.SystemResources,
-                        error.SymLinkLoop,
-                        error.Unexpected,
-                        => |e| return e,
-                    };
-                    defer dir.close();
-
-                    // Now we have a candidate for the path to libc shared object. In
-                    // the past, we used readlink() here because the link name would
-                    // reveal the glibc version. However, in more recent GNU/Linux
-                    // installations, there is no symlink. Thus we instead use a more
-                    // robust check of opening the libc shared object and looking at the
-                    // .dynstr section, and finding the max version number of symbols
-                    // that start with "GLIBC_2.".
-                    var f = dir.openFile(glibc_so_basename, .{}) catch |err| switch (err) {
-                        error.NameTooLong => unreachable,
-                        error.InvalidUtf8 => unreachable, // Windows only
-                        error.BadPathName => unreachable, // Windows only
-                        error.PipeBusy => unreachable, // Windows-only
-                        error.SharingViolation => unreachable, // Windows-only
-                        error.FileLocksNotSupported => unreachable, // No lock requested.
-                        error.NoSpaceLeft => unreachable, // read-only
-                        error.PathAlreadyExists => unreachable, // read-only
-                        error.DeviceBusy => unreachable, // read-only
-                        error.FileBusy => unreachable, // read-only
-                        error.InvalidHandle => unreachable, // should not be in the error set
-                        error.WouldBlock => unreachable, // not using O_NONBLOCK
-                        error.NoDevice => unreachable, // not asking for a special device
-
-                        error.AccessDenied,
-                        error.FileNotFound,
-                        error.NotDir,
-                        => continue,
-
-                        error.IsDir => return error.InvalidElfFile,
-                        error.FileTooBig => return error.Unexpected,
-
-                        error.ProcessFdQuotaExceeded,
-                        error.SystemFdQuotaExceeded,
-                        error.SystemResources,
-                        error.SymLinkLoop,
-                        error.Unexpected,
-                        => |e| return e,
-                    };
-                    defer f.close();
-
-                    result.target.os.version_range.linux.glibc = glibcVerFromSoFile(f) catch |err| switch (err) {
-                        error.InvalidElfMagic,
-                        error.InvalidElfEndian,
-                        error.InvalidElfClass,
-                        error.InvalidElfFile,
-                        error.InvalidElfVersion,
-                        error.InvalidGnuLibCVersion,
-                        error.UnexpectedEndOfFile,
-                        => continue,
-
-                        error.SystemResources,
-                        error.UnableToReadElfFile,
-                        error.Unexpected,
-                        error.FileSystem,
-                        => |e| return e,
-                    };
-                    break;
+                    if (glibcVerFromRPath(rpath)) |ver| {
+                        result.target.os.version_range.linux.glibc = ver;
+                        break;
+                    } else |err| switch (err) {
+                        error.GLibCNotFound => continue,
+                        else => |e| return e,
+                    }
+                }
+            } else if (result.dynamic_linker.get()) |dl_path| {
+                // There is no DT_RUNPATH so we try to find libc.so.6 inside the same
+                // directory as the dynamic linker.
+                if (fs.path.dirname(dl_path)) |rpath| {
+                    if (glibcVerFromRPath(rpath)) |ver| {
+                        result.target.os.version_range.linux.glibc = ver;
+                    } else |err| switch (err) {
+                        error.GLibCNotFound => {},
+                        else => |e| return e,
+                    }
                 }
             }
-        } else if (result.dynamic_linker.get()) |dl_path| glibc_ver: {
-            // There is no DT_RUNPATH but we can try to see if the information is
-            // present in the symlink data for the dynamic linker path.
-            var link_buf: [std.os.PATH_MAX]u8 = undefined;
-            const link_name = std.os.readlink(dl_path, &link_buf) catch |err| switch (err) {
-                error.NameTooLong => unreachable,
-                error.InvalidUtf8 => unreachable, // Windows only
-                error.BadPathName => unreachable, // Windows only
-                error.UnsupportedReparsePointType => unreachable, // Windows only
-
-                error.AccessDenied,
-                error.FileNotFound,
-                error.NotLink,
-                error.NotDir,
-                => break :glibc_ver,
-
-                error.SystemResources,
-                error.FileSystem,
-                error.SymLinkLoop,
-                error.Unexpected,
-                => |e| return e,
-            };
-            result.target.os.version_range.linux.glibc = glibcVerFromLinkName(
-                fs.path.basename(link_name),
-                "ld-",
-            ) catch |err| switch (err) {
-                error.UnrecognizedGnuLibCFileName,
-                error.InvalidGnuLibCVersion,
-                => break :glibc_ver,
-            };
         }
     }
 

From 9f40f34501ef086d18e4570416c078a7ad508628 Mon Sep 17 00:00:00 2001
From: Andrew Kelley <andrew@ziglang.org>
Date: Thu, 8 Sep 2022 20:49:16 -0700
Subject: [PATCH 64/68] std.zig.system.NativeTargetInfo: restore symlink logic

This is a partial revert of the previous commit, fixing a regression on
Debian. However, the commit additionally improves the
detectAbiAndDynamicLinker function to read more than 1 byte at a time
when detecting a shebang line.
---
 lib/std/zig/system/NativeTargetInfo.zig | 73 +++++++++++++++++++------
 1 file changed, 57 insertions(+), 16 deletions(-)

diff --git a/lib/std/zig/system/NativeTargetInfo.zig b/lib/std/zig/system/NativeTargetInfo.zig
index 7d626be994..11b5ec4191 100644
--- a/lib/std/zig/system/NativeTargetInfo.zig
+++ b/lib/std/zig/system/NativeTargetInfo.zig
@@ -347,26 +347,17 @@ fn detectAbiAndDynamicLinker(
             };
             errdefer file.close();
 
-            const line = file.reader().readUntilDelimiter(&buffer, '\n') catch |err| switch (err) {
-                error.IsDir => unreachable, // Handled before
-                error.AccessDenied => unreachable,
-                error.WouldBlock => unreachable, // Did not request blocking mode
-                error.OperationAborted => unreachable, // Windows-only
-                error.BrokenPipe => unreachable,
-                error.ConnectionResetByPeer => unreachable,
-                error.ConnectionTimedOut => unreachable,
-                error.InputOutput => unreachable,
-                error.Unexpected => unreachable,
-
-                error.StreamTooLong,
-                error.EndOfStream,
-                error.NotOpenForReading,
+            const len = preadMin(file, &buffer, 0, buffer.len) catch |err| switch (err) {
+                error.UnexpectedEndOfFile,
+                error.UnableToReadElfFile,
                 => break :blk file,
 
                 else => |e| return e,
             };
+            const newline = mem.indexOfScalar(u8, buffer[0..len], '\n') orelse break :blk file;
+            const line = buffer[0..newline];
             if (!mem.startsWith(u8, line, "#!")) break :blk file;
-            var it = std.mem.tokenize(u8, line[2..], " ");
+            var it = mem.tokenize(u8, line[2..], " ");
             file_name = it.next() orelse return defaultAbiAndDynamicLinker(cpu, os, cross_target);
             file.close();
         }
@@ -375,6 +366,8 @@ fn detectAbiAndDynamicLinker(
 
     // If Zig is statically linked, such as via distributed binary static builds, the above
     // trick (block self_exe) won't work. The next thing we fall back to is the same thing, but for elf_file.
+    // TODO: inline this function and combine the buffer we already read above to find
+    // the possible shebang line with the buffer we use for the ELF header.
     return abiAndDynamicLinkerFromFile(elf_file, cpu, os, ld_info_list, cross_target) catch |err| switch (err) {
         error.FileSystem,
         error.SystemResources,
@@ -586,6 +579,23 @@ fn glibcVerFromSoFile(file: fs.File) !std.builtin.Version {
     return max_ver;
 }
 
+fn glibcVerFromLinkName(link_name: []const u8, prefix: []const u8) !std.builtin.Version {
+    // example: "libc-2.3.4.so"
+    // example: "libc-2.27.so"
+    // example: "ld-2.33.so"
+    const suffix = ".so";
+    if (!mem.startsWith(u8, link_name, prefix) or !mem.endsWith(u8, link_name, suffix)) {
+        return error.UnrecognizedGnuLibCFileName;
+    }
+    // chop off "libc-" and ".so"
+    const link_name_chopped = link_name[prefix.len .. link_name.len - suffix.len];
+    return std.builtin.Version.parse(link_name_chopped) catch |err| switch (err) {
+        error.Overflow => return error.InvalidGnuLibCVersion,
+        error.InvalidCharacter => return error.InvalidGnuLibCVersion,
+        error.InvalidVersion => return error.InvalidGnuLibCVersion,
+    };
+}
+
 pub const AbiAndDynamicLinkerFromFileError = error{
     FileSystem,
     SystemResources,
@@ -816,17 +826,48 @@ pub fn abiAndDynamicLinkerFromFile(
                         else => |e| return e,
                     }
                 }
-            } else if (result.dynamic_linker.get()) |dl_path| {
+            } else if (result.dynamic_linker.get()) |dl_path| glibc_ver: {
                 // There is no DT_RUNPATH so we try to find libc.so.6 inside the same
                 // directory as the dynamic linker.
                 if (fs.path.dirname(dl_path)) |rpath| {
                     if (glibcVerFromRPath(rpath)) |ver| {
                         result.target.os.version_range.linux.glibc = ver;
+                        break :glibc_ver;
                     } else |err| switch (err) {
                         error.GLibCNotFound => {},
                         else => |e| return e,
                     }
                 }
+
+                // So far, no luck. Next we try to see if the information is
+                // present in the symlink data for the dynamic linker path.
+                var link_buf: [std.os.PATH_MAX]u8 = undefined;
+                const link_name = std.os.readlink(dl_path, &link_buf) catch |err| switch (err) {
+                    error.NameTooLong => unreachable,
+                    error.InvalidUtf8 => unreachable, // Windows only
+                    error.BadPathName => unreachable, // Windows only
+                    error.UnsupportedReparsePointType => unreachable, // Windows only
+
+                    error.AccessDenied,
+                    error.FileNotFound,
+                    error.NotLink,
+                    error.NotDir,
+                    => break :glibc_ver,
+
+                    error.SystemResources,
+                    error.FileSystem,
+                    error.SymLinkLoop,
+                    error.Unexpected,
+                    => |e| return e,
+                };
+                result.target.os.version_range.linux.glibc = glibcVerFromLinkName(
+                    fs.path.basename(link_name),
+                    "ld-",
+                ) catch |err| switch (err) {
+                    error.UnrecognizedGnuLibCFileName,
+                    error.InvalidGnuLibCVersion,
+                    => break :glibc_ver,
+                };
             }
         }
     }

From 5006fb6846ccaa7edb1547588cf1aa08c8decf2b Mon Sep 17 00:00:00 2001
From: Jakub Konka <kubkon@jakubkonka.com>
Date: Fri, 9 Sep 2022 08:30:27 +0200
Subject: [PATCH 65/68] macho: fix compilation for 32bit targets

---
 src/link/MachO.zig | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/link/MachO.zig b/src/link/MachO.zig
index 1ab0202b44..429bf64eb2 100644
--- a/src/link/MachO.zig
+++ b/src/link/MachO.zig
@@ -1170,7 +1170,8 @@ fn linkOneShot(self: *MachO, comp: *Compilation, prog_node: *std.Progress.Node)
                 physical_zerofill_start = header.offset + header.size;
             } else break :blk;
             const linkedit = self.segments.items[self.linkedit_segment_cmd_index.?];
-            const physical_zerofill_size = linkedit.fileoff - physical_zerofill_start;
+            const physical_zerofill_size = math.cast(usize, linkedit.fileoff - physical_zerofill_start) orelse
+                return error.Overflow;
             if (physical_zerofill_size > 0) {
                 var padding = try self.base.allocator.alloc(u8, physical_zerofill_size);
                 defer self.base.allocator.free(padding);

From c7d6048081053c2852d4d3af5d549d83473f808c Mon Sep 17 00:00:00 2001
From: Andrew Kelley <andrew@ziglang.org>
Date: Fri, 9 Sep 2022 00:03:53 -0700
Subject: [PATCH 66/68] std.zig.system.NativeTargetInfo: add fallback check

After failing to find RUNPATH in the ELF of /usr/bin/env, not finding
the answer in a symlink of the dynamic interpreter, and not finding
libc.so.6 in the same directory as the dynamic interpreter, Zig will
check `/lib/$triple`.

This fixes incorrect native glibc version detected on Debian bookworm.
---
 lib/std/zig/system/NativeTargetInfo.zig | 118 +++++++++++++++---------
 1 file changed, 75 insertions(+), 43 deletions(-)

diff --git a/lib/std/zig/system/NativeTargetInfo.zig b/lib/std/zig/system/NativeTargetInfo.zig
index 11b5ec4191..a4d29c17b4 100644
--- a/lib/std/zig/system/NativeTargetInfo.zig
+++ b/lib/std/zig/system/NativeTargetInfo.zig
@@ -820,56 +820,88 @@ pub fn abiAndDynamicLinkerFromFile(
                 while (it.next()) |rpath| {
                     if (glibcVerFromRPath(rpath)) |ver| {
                         result.target.os.version_range.linux.glibc = ver;
-                        break;
+                        return result;
                     } else |err| switch (err) {
                         error.GLibCNotFound => continue,
                         else => |e| return e,
                     }
                 }
-            } else if (result.dynamic_linker.get()) |dl_path| glibc_ver: {
-                // There is no DT_RUNPATH so we try to find libc.so.6 inside the same
-                // directory as the dynamic linker.
-                if (fs.path.dirname(dl_path)) |rpath| {
-                    if (glibcVerFromRPath(rpath)) |ver| {
-                        result.target.os.version_range.linux.glibc = ver;
-                        break :glibc_ver;
-                    } else |err| switch (err) {
-                        error.GLibCNotFound => {},
-                        else => |e| return e,
-                    }
-                }
-
-                // So far, no luck. Next we try to see if the information is
-                // present in the symlink data for the dynamic linker path.
-                var link_buf: [std.os.PATH_MAX]u8 = undefined;
-                const link_name = std.os.readlink(dl_path, &link_buf) catch |err| switch (err) {
-                    error.NameTooLong => unreachable,
-                    error.InvalidUtf8 => unreachable, // Windows only
-                    error.BadPathName => unreachable, // Windows only
-                    error.UnsupportedReparsePointType => unreachable, // Windows only
-
-                    error.AccessDenied,
-                    error.FileNotFound,
-                    error.NotLink,
-                    error.NotDir,
-                    => break :glibc_ver,
-
-                    error.SystemResources,
-                    error.FileSystem,
-                    error.SymLinkLoop,
-                    error.Unexpected,
-                    => |e| return e,
-                };
-                result.target.os.version_range.linux.glibc = glibcVerFromLinkName(
-                    fs.path.basename(link_name),
-                    "ld-",
-                ) catch |err| switch (err) {
-                    error.UnrecognizedGnuLibCFileName,
-                    error.InvalidGnuLibCVersion,
-                    => break :glibc_ver,
-                };
             }
         }
+
+        if (result.dynamic_linker.get()) |dl_path| glibc_ver: {
+            // There is no DT_RUNPATH so we try to find libc.so.6 inside the same
+            // directory as the dynamic linker.
+            if (fs.path.dirname(dl_path)) |rpath| {
+                if (glibcVerFromRPath(rpath)) |ver| {
+                    result.target.os.version_range.linux.glibc = ver;
+                    return result;
+                } else |err| switch (err) {
+                    error.GLibCNotFound => {},
+                    else => |e| return e,
+                }
+            }
+
+            // So far, no luck. Next we try to see if the information is
+            // present in the symlink data for the dynamic linker path.
+            var link_buf: [std.os.PATH_MAX]u8 = undefined;
+            const link_name = std.os.readlink(dl_path, &link_buf) catch |err| switch (err) {
+                error.NameTooLong => unreachable,
+                error.InvalidUtf8 => unreachable, // Windows only
+                error.BadPathName => unreachable, // Windows only
+                error.UnsupportedReparsePointType => unreachable, // Windows only
+
+                error.AccessDenied,
+                error.FileNotFound,
+                error.NotLink,
+                error.NotDir,
+                => break :glibc_ver,
+
+                error.SystemResources,
+                error.FileSystem,
+                error.SymLinkLoop,
+                error.Unexpected,
+                => |e| return e,
+            };
+            result.target.os.version_range.linux.glibc = glibcVerFromLinkName(
+                fs.path.basename(link_name),
+                "ld-",
+            ) catch |err| switch (err) {
+                error.UnrecognizedGnuLibCFileName,
+                error.InvalidGnuLibCVersion,
+                => break :glibc_ver,
+            };
+            return result;
+        }
+
+        // Nothing worked so far. Finally we fall back to hard-coded search paths.
+        // Some distros such as Debian keep their libc.so.6 in `/lib/$triple/`.
+        var path_buf: [std.os.PATH_MAX]u8 = undefined;
+        var index: usize = 0;
+        const prefix = "/lib/";
+        const cpu_arch = @tagName(result.target.cpu.arch);
+        const os_tag = @tagName(result.target.os.tag);
+        const abi = @tagName(result.target.abi);
+        mem.copy(u8, path_buf[index..], prefix);
+        index += prefix.len;
+        mem.copy(u8, path_buf[index..], cpu_arch);
+        index += cpu_arch.len;
+        path_buf[index] = '-';
+        index += 1;
+        mem.copy(u8, path_buf[index..], os_tag);
+        index += os_tag.len;
+        path_buf[index] = '-';
+        index += 1;
+        mem.copy(u8, path_buf[index..], abi);
+        index += abi.len;
+        const rpath = path_buf[0..index];
+        if (glibcVerFromRPath(rpath)) |ver| {
+            result.target.os.version_range.linux.glibc = ver;
+            return result;
+        } else |err| switch (err) {
+            error.GLibCNotFound => {},
+            else => |e| return e,
+        }
     }
 
     return result;

From c9f145a50b11e54b72b2fccd04384bbb856446cf Mon Sep 17 00:00:00 2001
From: Yusuf Bham <ybham6@gmail.com>
Date: Fri, 9 Sep 2022 05:35:32 -0400
Subject: [PATCH 67/68] std.os.uefi: mark BlockIoProtocol and EfiBlockMedia as
 public

---
 lib/std/os/uefi/protocols/block_io_protocol.zig | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/lib/std/os/uefi/protocols/block_io_protocol.zig b/lib/std/os/uefi/protocols/block_io_protocol.zig
index 938eb930da..45b60eb59e 100644
--- a/lib/std/os/uefi/protocols/block_io_protocol.zig
+++ b/lib/std/os/uefi/protocols/block_io_protocol.zig
@@ -2,7 +2,7 @@ const std = @import("std");
 const uefi = std.os.uefi;
 const Status = uefi.Status;
 
-const EfiBlockMedia = extern struct {
+pub const EfiBlockMedia = extern struct {
     /// The current media ID. If the media changes, this value is changed.
     media_id: u32,
 
@@ -38,7 +38,7 @@ const EfiBlockMedia = extern struct {
     optimal_transfer_length_granularity: u32,
 };
 
-const BlockIoProtocol = extern struct {
+pub const BlockIoProtocol = extern struct {
     const Self = @This();
 
     revision: u64,

From 68e61bbc0c3b896d5d549168ff8b88fe04e2269f Mon Sep 17 00:00:00 2001
From: Andrew Kelley <andrew@ziglang.org>
Date: Fri, 9 Sep 2022 09:27:02 -0700
Subject: [PATCH 68/68] std.zig.system.NativeTargetInfo: more headroom for
 libc.so.6 .dynstr

---
 lib/std/zig/system/NativeTargetInfo.zig | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/lib/std/zig/system/NativeTargetInfo.zig b/lib/std/zig/system/NativeTargetInfo.zig
index a4d29c17b4..73f76b11b7 100644
--- a/lib/std/zig/system/NativeTargetInfo.zig
+++ b/lib/std/zig/system/NativeTargetInfo.zig
@@ -552,14 +552,14 @@ fn glibcVerFromSoFile(file: fs.File) !std.builtin.Version {
     // and furthermore, that the system-installed glibc is at minimum that version.
 
     // Empirically, glibc 2.34 libc.so .dynstr section is 32441 bytes on my system.
-    // Here I use this value plus some headroom. This makes it only need
+    // Here I use double this value plus some headroom. This makes it only need
     // a single read syscall here.
-    var buf: [40000]u8 = undefined;
+    var buf: [80000]u8 = undefined;
     if (buf.len < dynstr.size) return error.InvalidGnuLibCVersion;
 
     const dynstr_size = @intCast(usize, dynstr.size);
     const dynstr_bytes = buf[0..dynstr_size];
-    _ = try preadMin(file, dynstr_bytes, dynstr.offset, dynstr_size);
+    _ = try preadMin(file, dynstr_bytes, dynstr.offset, dynstr_bytes.len);
     var it = mem.split(u8, dynstr_bytes, &.{0});
     var max_ver: std.builtin.Version = .{ .major = 2, .minor = 2, .patch = 5 };
     while (it.next()) |s| {