From a3dfe36ca1dac946f507c8b69241a93891bf7da5 Mon Sep 17 00:00:00 2001
From: Andrew Kelley <andrew@ziglang.org>
Date: Wed, 22 Apr 2020 23:42:58 -0400
Subject: [PATCH 01/10] zir-to-elf skeleton

---
 lib/std/fs.zig           |    6 +-
 lib/std/fs/file.zig      |    2 +-
 lib/std/mem.zig          |   16 +-
 src-self-hosted/ir.zig   |   28 +-
 src-self-hosted/link.zig | 1105 +++++++++++++++++++-------------------
 5 files changed, 596 insertions(+), 561 deletions(-)

diff --git a/lib/std/fs.zig b/lib/std/fs.zig
index ef6f75601f..a4a5a3a56a 100644
--- a/lib/std/fs.zig
+++ b/lib/std/fs.zig
@@ -1345,8 +1345,10 @@ pub const Dir = struct {
         mode: File.Mode = File.default_mode,
     };
 
-    /// `dest_path` must remain valid for the lifetime of `AtomicFile`.
-    /// Call `AtomicFile.finish` to atomically replace `dest_path` with contents.
+    /// Directly access the `.file` field, and then call `AtomicFile.finish`
+    /// to atomically replace `dest_path` with contents.
+    /// Always call `AtomicFile.deinit` to clean up, regardless of whether `AtomicFile.finish` succeeded.
+    /// `dest_path` must remain valid until `AtomicFile.deinit` is called.
     pub fn atomicFile(self: Dir, dest_path: []const u8, options: AtomicFileOptions) !AtomicFile {
         if (path.dirname(dest_path)) |dirname| {
             const dir = try self.openDir(dirname, .{});
diff --git a/lib/std/fs/file.zig b/lib/std/fs/file.zig
index 2a6ad875c5..63adfc9648 100644
--- a/lib/std/fs/file.zig
+++ b/lib/std/fs/file.zig
@@ -93,7 +93,7 @@ pub const File = struct {
         /// This means that a process that does not respect the locking API can still get access
         /// to the file, despite the lock.
         ///
-        /// Windows' file locks are mandatory, and any process attempting to access the file will
+        /// Windows's file locks are mandatory, and any process attempting to access the file will
         /// receive an error.
         ///
         /// [1]: https://www.kernel.org/doc/Documentation/filesystems/mandatory-locking.txt
diff --git a/lib/std/mem.zig b/lib/std/mem.zig
index 1900a36dfe..5966f8bc91 100644
--- a/lib/std/mem.zig
+++ b/lib/std/mem.zig
@@ -2027,7 +2027,13 @@ test "sliceAsBytes and bytesAsSlice back" {
 /// Round an address up to the nearest aligned address
 /// The alignment must be a power of 2 and greater than 0.
 pub fn alignForward(addr: usize, alignment: usize) usize {
-    return alignBackward(addr + (alignment - 1), alignment);
+    return alignForwardGeneric(usize, addr, alignment);
+}
+
+/// Round an address up to the nearest aligned address
+/// The alignment must be a power of 2 and greater than 0.
+pub fn alignForwardGeneric(comptime T: type, addr: T, alignment: T) T {
+    return alignBackwardGeneric(T, addr + (alignment - 1), alignment);
 }
 
 test "alignForward" {
@@ -2048,7 +2054,13 @@ test "alignForward" {
 /// Round an address up to the previous aligned address
 /// The alignment must be a power of 2 and greater than 0.
 pub fn alignBackward(addr: usize, alignment: usize) usize {
-    assert(@popCount(usize, alignment) == 1);
+    return alignBackwardGeneric(usize, addr, alignment);
+}
+
+/// Round an address up to the previous aligned address
+/// The alignment must be a power of 2 and greater than 0.
+pub fn alignBackwardGeneric(comptime T: type, addr: T, alignment: T) T {
+    assert(@popCount(T, alignment) == 1);
     // 000010000 // example addr
     // 000001111 // subtract 1
     // 111110000 // binary not
diff --git a/src-self-hosted/ir.zig b/src-self-hosted/ir.zig
index e33d468a26..cea7729642 100644
--- a/src-self-hosted/ir.zig
+++ b/src-self-hosted/ir.zig
@@ -96,6 +96,7 @@ pub const Module = struct {
     errors: []ErrorMsg,
     arena: std.heap.ArenaAllocator,
     fns: []Fn,
+    target: Target,
 
     pub const Export = struct {
         name: []const u8,
@@ -122,9 +123,7 @@ pub const ErrorMsg = struct {
     msg: []const u8,
 };
 
-pub fn analyze(allocator: *Allocator, old_module: text.Module) !Module {
-    const native_info = try std.zig.system.NativeTargetInfo.detect(allocator, .{});
-
+pub fn analyze(allocator: *Allocator, old_module: text.Module, target: Target) !Module {
     var ctx = Analyze{
         .allocator = allocator,
         .arena = std.heap.ArenaAllocator.init(allocator),
@@ -133,7 +132,7 @@ pub fn analyze(allocator: *Allocator, old_module: text.Module) !Module {
         .decl_table = std.AutoHashMap(*text.Inst, Analyze.NewDecl).init(allocator),
         .exports = std.ArrayList(Module.Export).init(allocator),
         .fns = std.ArrayList(Module.Fn).init(allocator),
-        .target = native_info.target,
+        .target = target,
     };
     defer ctx.errors.deinit();
     defer ctx.decl_table.deinit();
@@ -152,6 +151,7 @@ pub fn analyze(allocator: *Allocator, old_module: text.Module) !Module {
         .errors = ctx.errors.toOwnedSlice(),
         .fns = ctx.fns.toOwnedSlice(),
         .arena = ctx.arena,
+        .target = target,
     };
 }
 
@@ -699,7 +699,9 @@ pub fn main() anyerror!void {
         std.process.exit(1);
     }
 
-    var analyzed_module = try analyze(allocator, zir_module);
+    const native_info = try std.zig.system.NativeTargetInfo.detect(allocator, .{});
+
+    var analyzed_module = try analyze(allocator, zir_module, native_info.target);
     defer analyzed_module.deinit(allocator);
 
     if (analyzed_module.errors.len != 0) {
@@ -711,12 +713,18 @@ pub fn main() anyerror!void {
         std.process.exit(1);
     }
 
-    var new_zir_module = try text.emit_zir(allocator, analyzed_module);
-    defer new_zir_module.deinit(allocator);
+    const output_zir = false;
+    if (output_zir) {
+        var new_zir_module = try text.emit_zir(allocator, analyzed_module);
+        defer new_zir_module.deinit(allocator);
 
-    var bos = std.io.bufferedOutStream(std.io.getStdOut().outStream());
-    try new_zir_module.writeToStream(allocator, bos.outStream());
-    try bos.flush();
+        var bos = std.io.bufferedOutStream(std.io.getStdOut().outStream());
+        try new_zir_module.writeToStream(allocator, bos.outStream());
+        try bos.flush();
+    }
+
+    const link = @import("link.zig");
+    try link.updateExecutableFilePath(allocator, analyzed_module, std.fs.cwd(), "a.out");
 }
 
 fn findLineColumn(source: []const u8, byte_offset: usize) struct { line: usize, column: usize } {
diff --git a/src-self-hosted/link.zig b/src-self-hosted/link.zig
index 013a6248cc..f358a4b64d 100644
--- a/src-self-hosted/link.zig
+++ b/src-self-hosted/link.zig
@@ -1,576 +1,589 @@
 const std = @import("std");
 const mem = std.mem;
-const c = @import("c.zig");
-const Compilation = @import("compilation.zig").Compilation;
-const Target = std.Target;
-const ObjectFormat = Target.ObjectFormat;
-const LibCInstallation = @import("libc_installation.zig").LibCInstallation;
 const assert = std.debug.assert;
-const util = @import("util.zig");
+const Allocator = std.mem.Allocator;
+const ir = @import("ir.zig");
+const fs = std.fs;
+const elf = std.elf;
 
-const Context = struct {
-    comp: *Compilation,
-    arena: std.heap.ArenaAllocator,
-    args: std.ArrayList([*:0]const u8),
-    link_in_crt: bool,
+const executable_mode = 0o755;
+const default_entry_addr = 0x8000000;
 
-    link_err: error{OutOfMemory}!void,
-    link_msg: std.ArrayListSentineled(u8, 0),
+/// Attempts incremental linking, if the file already exists.
+/// If incremental linking fails, falls back to truncating the file and rewriting it.
+/// A malicious file is detected as incremental link failure and does not cause Illegal Behavior.
+/// This operation is not atomic.
+pub fn updateExecutableFilePath(allocator: *Allocator, module: ir.Module, dir: fs.Dir, sub_path: []const u8) !void {
+    const file = try dir.createFile(sub_path, .{ .truncate = false, .read = true, .mode = executable_mode });
+    defer file.close();
 
-    libc: *LibCInstallation,
-    out_file_path: std.ArrayListSentineled(u8, 0),
-};
-
-pub fn link(comp: *Compilation) !void {
-    var ctx = Context{
-        .comp = comp,
-        .arena = std.heap.ArenaAllocator.init(comp.gpa()),
-        .args = undefined,
-        .link_in_crt = comp.haveLibC() and comp.kind == .Exe,
-        .link_err = {},
-        .link_msg = undefined,
-        .libc = undefined,
-        .out_file_path = undefined,
-    };
-    defer ctx.arena.deinit();
-    ctx.args = std.ArrayList([*:0]const u8).init(&ctx.arena.allocator);
-    ctx.link_msg = std.ArrayListSentineled(u8, 0).initNull(&ctx.arena.allocator);
-
-    ctx.out_file_path = try std.ArrayListSentineled(u8, 0).init(&ctx.arena.allocator, comp.name.span());
-    switch (comp.kind) {
-        .Exe => {
-            try ctx.out_file_path.append(comp.target.exeFileExt());
-        },
-        .Lib => {
-            try ctx.out_file_path.append(if (comp.is_static) comp.target.staticLibSuffix() else comp.target.dynamicLibSuffix());
-        },
-        .Obj => {
-            try ctx.out_file_path.append(comp.target.oFileExt());
-        },
-    }
-
-    // even though we're calling LLD as a library it thinks the first
-    // argument is its own exe name
-    try ctx.args.append("lld");
-
-    if (comp.haveLibC()) {
-        // TODO https://github.com/ziglang/zig/issues/3190
-        var libc = ctx.comp.override_libc orelse blk: {
-            @panic("this code has bitrotted");
-            //switch (comp.target) {
-            //    Target.Native => {
-            //        break :blk comp.zig_compiler.getNativeLibC() catch return error.LibCRequiredButNotProvidedOrFound;
-            //    },
-            //    else => return error.LibCRequiredButNotProvidedOrFound,
-            //}
-        };
-        ctx.libc = libc;
-    }
-
-    try constructLinkerArgs(&ctx);
-
-    if (comp.verbose_link) {
-        for (ctx.args.span()) |arg, i| {
-            const space = if (i == 0) "" else " ";
-            std.debug.warn("{}{s}", .{ space, arg });
-        }
-        std.debug.warn("\n", .{});
-    }
-
-    const extern_ofmt = toExternObjectFormatType(comp.target.getObjectFormat());
-    const args_slice = ctx.args.span();
-
-    {
-        // LLD is not thread-safe, so we grab a global lock.
-        const held = comp.zig_compiler.lld_lock.acquire();
-        defer held.release();
-
-        // Not evented I/O. LLD does its own multithreading internally.
-        if (!ZigLLDLink(extern_ofmt, args_slice.ptr, args_slice.len, linkDiagCallback, @ptrCast(*c_void, &ctx))) {
-            if (!ctx.link_msg.isNull()) {
-                // TODO capture these messages and pass them through the system, reporting them through the
-                // event system instead of printing them directly here.
-                // perhaps try to parse and understand them.
-                std.debug.warn("{}\n", .{ctx.link_msg.span()});
-            }
-            return error.LinkFailed;
-        }
-    }
+    return updateExecutableFile(allocator, module, file);
 }
 
-extern fn ZigLLDLink(
-    oformat: c.ZigLLVM_ObjectFormatType,
-    args: [*]const [*]const u8,
-    arg_count: usize,
-    append_diagnostic: extern fn (*c_void, [*]const u8, usize) void,
-    context: *c_void,
-) bool;
+/// Atomically overwrites the old file, if present.
+pub fn writeExecutableFilePath(allocator: *Allocator, module: ir.Module, dir: fs.Dir, sub_path: []const u8) !void {
+    const af = try dir.atomicFile(sub_path, .{ .mode = executable_mode });
+    defer af.deinit();
 
-fn linkDiagCallback(context: *c_void, ptr: [*]const u8, len: usize) callconv(.C) void {
-    const ctx = @ptrCast(*Context, @alignCast(@alignOf(Context), context));
-    ctx.link_err = linkDiagCallbackErrorable(ctx, ptr[0..len]);
+    try writeExecutableFile(allocator, module, af.file);
+    try af.finish();
 }
 
-fn linkDiagCallbackErrorable(ctx: *Context, msg: []const u8) !void {
-    if (ctx.link_msg.isNull()) {
-        try ctx.link_msg.resize(0);
-    }
-    try ctx.link_msg.append(msg);
-}
-
-fn toExternObjectFormatType(ofmt: ObjectFormat) c.ZigLLVM_ObjectFormatType {
-    return switch (ofmt) {
-        .unknown => .ZigLLVM_UnknownObjectFormat,
-        .coff => .ZigLLVM_COFF,
-        .elf => .ZigLLVM_ELF,
-        .macho => .ZigLLVM_MachO,
-        .wasm => .ZigLLVM_Wasm,
+/// Attempts incremental linking, if the file already exists.
+/// If incremental linking fails, falls back to truncating the file and rewriting it.
+/// Returns an error if `file` is not already open with +read +write +seek abilities.
+/// A malicious file is detected as incremental link failure and does not cause Illegal Behavior.
+/// This operation is not atomic.
+pub fn updateExecutableFile(allocator: *Allocator, module: ir.Module, file: fs.File) !void {
+    updateExecutableFileInner(allocator, module, file) catch |err| switch (err) {
+        error.IncrFailed => {
+            return writeExecutableFile(allocator, module, file);
+        },
+        else => |e| return e,
     };
 }
 
-fn constructLinkerArgs(ctx: *Context) !void {
-    switch (ctx.comp.target.getObjectFormat()) {
-        .unknown => unreachable,
-        .coff => return constructLinkerArgsCoff(ctx),
-        .elf => return constructLinkerArgsElf(ctx),
-        .macho => return constructLinkerArgsMachO(ctx),
-        .wasm => return constructLinkerArgsWasm(ctx),
-    }
-}
+const Update = struct {
+    file: fs.File,
+    module: *const ir.Module,
 
-fn constructLinkerArgsElf(ctx: *Context) !void {
-    // TODO commented out code in this function
-    //if (g->linker_script) {
-    //    lj->args.append("-T");
-    //    lj->args.append(g->linker_script);
-    //}
-    try ctx.args.append("--gc-sections");
-    if (ctx.comp.link_eh_frame_hdr) {
-        try ctx.args.append("--eh-frame-hdr");
+    /// Stored in native-endian format, depending on target endianness needs to be bswapped on read/write.
+    /// Same order as in the file.
+    sections: std.ArrayList(elf.Elf64_Shdr),
+    shdr_table_offset: ?u64,
+
+    /// Stored in native-endian format, depending on target endianness needs to be bswapped on read/write.
+    /// Same order as in the file.
+    program_headers: std.ArrayList(elf.Elf64_Phdr),
+    phdr_table_offset: ?u64,
+    /// The index into the program headers of a PT_LOAD program header with Read and Execute flags
+    phdr_load_re_index: ?u16,
+    entry_addr: ?u64,
+
+    shstrtab: std.ArrayList(u8),
+    shstrtab_index: ?u16,
+
+    text_section_index: ?u16,
+    symtab_section_index: ?u16,
+
+    /// Key: index into strtab. Value: index into symbols.
+    symbol_table: std.AutoHashMap(usize, usize),
+    /// The same order as in the file
+    symbols: std.ArrayList(elf.Elf64_Sym),
+    /// Sorted by address, index into symbols
+    symbols_by_addr: std.ArrayList(usize),
+
+    fn deinit(self: *Update) void {
+        self.sections.deinit();
+        self.program_headers.deinit();
+        self.shstrtab.deinit();
+        self.symbol_table.deinit();
+        self.symbols.deinit();
+        self.symbols_by_addr.deinit();
     }
 
-    //lj->args.append("-m");
-    //lj->args.append(getLDMOption(&g->zig_target));
+    // `expand_num / expand_den` is the factor of padding when allocation
+    const alloc_num = 4;
+    const alloc_den = 3;
 
-    //bool is_lib = g->out_type == OutTypeLib;
-    //bool shared = !g->is_static && is_lib;
-    //Buf *soname = nullptr;
-    if (ctx.comp.is_static) {
-        //if (util.isArmOrThumb(ctx.comp.target)) {
-        //    try ctx.args.append("-Bstatic");
-        //} else {
-        //    try ctx.args.append("-static");
-        //}
-    }
-    //} else if (shared) {
-    //    lj->args.append("-shared");
+    /// Returns end pos of collision, if any.
+    fn detectAllocCollision(self: *Update, start: u64, size: u64) ?u64 {
+        const small_ptr = self.module.target.cpu.arch.ptrBitWidth() == 32;
+        const ehdr_size: u64 = if (small_ptr) @sizeOf(elf.Elf32_Ehdr) else @sizeOf(elf.Elf64_Ehdr);
+        if (start < ehdr_size)
+            return ehdr_size;
 
-    //    if (buf_len(&lj->out_file) == 0) {
-    //        buf_appendf(&lj->out_file, "lib%s.so.%" ZIG_PRI_usize ".%" ZIG_PRI_usize ".%" ZIG_PRI_usize "",
-    //                buf_ptr(g->root_out_name), g->version_major, g->version_minor, g->version_patch);
-    //    }
-    //    soname = buf_sprintf("lib%s.so.%" ZIG_PRI_usize "", buf_ptr(g->root_out_name), g->version_major);
-    //}
+        const end = start + satMul(size, alloc_num) / alloc_den;
 
-    try ctx.args.append("-o");
-    try ctx.args.append(ctx.out_file_path.span());
-
-    if (ctx.link_in_crt) {
-        const crt1o = if (ctx.comp.is_static) "crt1.o" else "Scrt1.o";
-        try addPathJoin(ctx, ctx.libc.crt_dir.?, crt1o);
-        try addPathJoin(ctx, ctx.libc.crt_dir.?, "crti.o");
-    }
-
-    if (ctx.comp.haveLibC()) {
-        try ctx.args.append("-L");
-        // TODO addNullByte should probably return [:0]u8
-        try ctx.args.append(@ptrCast([*:0]const u8, (try std.cstr.addNullByte(&ctx.arena.allocator, ctx.libc.crt_dir.?)).ptr));
-
-        //if (!ctx.comp.is_static) {
-        //    const dl = blk: {
-        //        //if (ctx.libc.dynamic_linker_path) |dl| break :blk dl;
-        //        //if (util.getDynamicLinkerPath(ctx.comp.target)) |dl| break :blk dl;
-        //        return error.LibCMissingDynamicLinker;
-        //    };
-        //    try ctx.args.append("-dynamic-linker");
-        //    try ctx.args.append(@ptrCast([*:0]const u8, (try std.cstr.addNullByte(&ctx.arena.allocator, dl)).ptr));
-        //}
-    }
-
-    //if (shared) {
-    //    lj->args.append("-soname");
-    //    lj->args.append(buf_ptr(soname));
-    //}
-
-    // .o files
-    for (ctx.comp.link_objects) |link_object| {
-        const link_obj_with_null = try std.cstr.addNullByte(&ctx.arena.allocator, link_object);
-        try ctx.args.append(@ptrCast([*:0]const u8, link_obj_with_null.ptr));
-    }
-    try addFnObjects(ctx);
-
-    //if (g->out_type == OutTypeExe || g->out_type == OutTypeLib) {
-    //    if (g->libc_link_lib == nullptr) {
-    //        Buf *builtin_o_path = build_o(g, "builtin");
-    //        lj->args.append(buf_ptr(builtin_o_path));
-    //    }
-
-    //    // sometimes libgcc is missing stuff, so we still build compiler_rt and rely on weak linkage
-    //    Buf *compiler_rt_o_path = build_compiler_rt(g);
-    //    lj->args.append(buf_ptr(compiler_rt_o_path));
-    //}
-
-    //for (size_t i = 0; i < g->link_libs_list.length; i += 1) {
-    //    LinkLib *link_lib = g->link_libs_list.at(i);
-    //    if (buf_eql_str(link_lib->name, "c")) {
-    //        continue;
-    //    }
-    //    Buf *arg;
-    //    if (buf_starts_with_str(link_lib->name, "/") || buf_ends_with_str(link_lib->name, ".a") ||
-    //        buf_ends_with_str(link_lib->name, ".so"))
-    //    {
-    //        arg = link_lib->name;
-    //    } else {
-    //        arg = buf_sprintf("-l%s", buf_ptr(link_lib->name));
-    //    }
-    //    lj->args.append(buf_ptr(arg));
-    //}
-
-    // libc dep
-    if (ctx.comp.haveLibC()) {
-        if (ctx.comp.is_static) {
-            try ctx.args.append("--start-group");
-            try ctx.args.append("-lgcc");
-            try ctx.args.append("-lgcc_eh");
-            try ctx.args.append("-lc");
-            try ctx.args.append("-lm");
-            try ctx.args.append("--end-group");
-        } else {
-            try ctx.args.append("-lgcc");
-            try ctx.args.append("--as-needed");
-            try ctx.args.append("-lgcc_s");
-            try ctx.args.append("--no-as-needed");
-            try ctx.args.append("-lc");
-            try ctx.args.append("-lm");
-            try ctx.args.append("-lgcc");
-            try ctx.args.append("--as-needed");
-            try ctx.args.append("-lgcc_s");
-            try ctx.args.append("--no-as-needed");
-        }
-    }
-
-    // crt end
-    if (ctx.link_in_crt) {
-        try addPathJoin(ctx, ctx.libc.crt_dir.?, "crtn.o");
-    }
-
-    //if (ctx.comp.target != Target.Native) {
-    //    try ctx.args.append("--allow-shlib-undefined");
-    //}
-}
-
-fn addPathJoin(ctx: *Context, dirname: []const u8, basename: []const u8) !void {
-    const full_path = try std.fs.path.join(&ctx.arena.allocator, &[_][]const u8{ dirname, basename });
-    const full_path_with_null = try std.cstr.addNullByte(&ctx.arena.allocator, full_path);
-    try ctx.args.append(@ptrCast([*:0]const u8, full_path_with_null.ptr));
-}
-
-fn constructLinkerArgsCoff(ctx: *Context) !void {
-    try ctx.args.append("-NOLOGO");
-
-    if (!ctx.comp.strip) {
-        try ctx.args.append("-DEBUG");
-    }
-
-    switch (ctx.comp.target.cpu.arch) {
-        .i386 => try ctx.args.append("-MACHINE:X86"),
-        .x86_64 => try ctx.args.append("-MACHINE:X64"),
-        .aarch64 => try ctx.args.append("-MACHINE:ARM"),
-        else => return error.UnsupportedLinkArchitecture,
-    }
-
-    const is_library = ctx.comp.kind == .Lib;
-
-    const out_arg = try std.fmt.allocPrint(&ctx.arena.allocator, "-OUT:{}\x00", .{ctx.out_file_path.span()});
-    try ctx.args.append(@ptrCast([*:0]const u8, out_arg.ptr));
-
-    if (ctx.comp.haveLibC()) {
-        try ctx.args.append(@ptrCast([*:0]const u8, (try std.fmt.allocPrint(&ctx.arena.allocator, "-LIBPATH:{}\x00", .{ctx.libc.msvc_lib_dir.?})).ptr));
-        try ctx.args.append(@ptrCast([*:0]const u8, (try std.fmt.allocPrint(&ctx.arena.allocator, "-LIBPATH:{}\x00", .{ctx.libc.kernel32_lib_dir.?})).ptr));
-        try ctx.args.append(@ptrCast([*:0]const u8, (try std.fmt.allocPrint(&ctx.arena.allocator, "-LIBPATH:{}\x00", .{ctx.libc.crt_dir.?})).ptr));
-    }
-
-    if (ctx.link_in_crt) {
-        const lib_str = if (ctx.comp.is_static) "lib" else "";
-        const d_str = if (ctx.comp.build_mode == .Debug) "d" else "";
-
-        if (ctx.comp.is_static) {
-            const cmt_lib_name = try std.fmt.allocPrint(&ctx.arena.allocator, "libcmt{}.lib\x00", .{d_str});
-            try ctx.args.append(@ptrCast([*:0]const u8, cmt_lib_name.ptr));
-        } else {
-            const msvcrt_lib_name = try std.fmt.allocPrint(&ctx.arena.allocator, "msvcrt{}.lib\x00", .{d_str});
-            try ctx.args.append(@ptrCast([*:0]const u8, msvcrt_lib_name.ptr));
-        }
-
-        const vcruntime_lib_name = try std.fmt.allocPrint(&ctx.arena.allocator, "{}vcruntime{}.lib\x00", .{
-            lib_str,
-            d_str,
-        });
-        try ctx.args.append(@ptrCast([*:0]const u8, vcruntime_lib_name.ptr));
-
-        const crt_lib_name = try std.fmt.allocPrint(&ctx.arena.allocator, "{}ucrt{}.lib\x00", .{ lib_str, d_str });
-        try ctx.args.append(@ptrCast([*:0]const u8, crt_lib_name.ptr));
-
-        // Visual C++ 2015 Conformance Changes
-        // https://msdn.microsoft.com/en-us/library/bb531344.aspx
-        try ctx.args.append("legacy_stdio_definitions.lib");
-
-        // msvcrt depends on kernel32
-        try ctx.args.append("kernel32.lib");
-    } else {
-        try ctx.args.append("-NODEFAULTLIB");
-        if (!is_library) {
-            try ctx.args.append("-ENTRY:WinMainCRTStartup");
-        }
-    }
-
-    if (is_library and !ctx.comp.is_static) {
-        try ctx.args.append("-DLL");
-    }
-
-    for (ctx.comp.link_objects) |link_object| {
-        const link_obj_with_null = try std.cstr.addNullByte(&ctx.arena.allocator, link_object);
-        try ctx.args.append(@ptrCast([*:0]const u8, link_obj_with_null.ptr));
-    }
-    try addFnObjects(ctx);
-
-    switch (ctx.comp.kind) {
-        .Exe, .Lib => {
-            if (!ctx.comp.haveLibC()) {
-                @panic("TODO");
-            }
-        },
-        .Obj => {},
-    }
-}
-
-fn constructLinkerArgsMachO(ctx: *Context) !void {
-    try ctx.args.append("-demangle");
-
-    if (ctx.comp.linker_rdynamic) {
-        try ctx.args.append("-export_dynamic");
-    }
-
-    const is_lib = ctx.comp.kind == .Lib;
-    const shared = !ctx.comp.is_static and is_lib;
-    if (ctx.comp.is_static) {
-        try ctx.args.append("-static");
-    } else {
-        try ctx.args.append("-dynamic");
-    }
-
-    try ctx.args.append("-arch");
-    try ctx.args.append(util.getDarwinArchString(ctx.comp.target));
-
-    const platform = try DarwinPlatform.get(ctx.comp);
-    switch (platform.kind) {
-        .MacOS => try ctx.args.append("-macosx_version_min"),
-        .IPhoneOS => try ctx.args.append("-iphoneos_version_min"),
-        .IPhoneOSSimulator => try ctx.args.append("-ios_simulator_version_min"),
-    }
-    const ver_str = try std.fmt.allocPrint(&ctx.arena.allocator, "{}.{}.{}\x00", .{
-        platform.major,
-        platform.minor,
-        platform.micro,
-    });
-    try ctx.args.append(@ptrCast([*:0]const u8, ver_str.ptr));
-
-    if (ctx.comp.kind == .Exe) {
-        if (ctx.comp.is_static) {
-            try ctx.args.append("-no_pie");
-        } else {
-            try ctx.args.append("-pie");
-        }
-    }
-
-    try ctx.args.append("-o");
-    try ctx.args.append(ctx.out_file_path.span());
-
-    if (shared) {
-        try ctx.args.append("-headerpad_max_install_names");
-    } else if (ctx.comp.is_static) {
-        try ctx.args.append("-lcrt0.o");
-    } else {
-        switch (platform.kind) {
-            .MacOS => {
-                if (platform.versionLessThan(10, 5)) {
-                    try ctx.args.append("-lcrt1.o");
-                } else if (platform.versionLessThan(10, 6)) {
-                    try ctx.args.append("-lcrt1.10.5.o");
-                } else if (platform.versionLessThan(10, 8)) {
-                    try ctx.args.append("-lcrt1.10.6.o");
-                }
-            },
-            .IPhoneOS => {
-                if (ctx.comp.target.cpu.arch == .aarch64) {
-                    // iOS does not need any crt1 files for arm64
-                } else if (platform.versionLessThan(3, 1)) {
-                    try ctx.args.append("-lcrt1.o");
-                } else if (platform.versionLessThan(6, 0)) {
-                    try ctx.args.append("-lcrt1.3.1.o");
-                }
-            },
-            .IPhoneOSSimulator => {}, // no crt1.o needed
-        }
-    }
-
-    for (ctx.comp.link_objects) |link_object| {
-        const link_obj_with_null = try std.cstr.addNullByte(&ctx.arena.allocator, link_object);
-        try ctx.args.append(@ptrCast([*:0]const u8, link_obj_with_null.ptr));
-    }
-    try addFnObjects(ctx);
-
-    // TODO
-    //if (ctx.comp.target == Target.Native) {
-    //    for (ctx.comp.link_libs_list.span()) |lib| {
-    //        if (mem.eql(u8, lib.name, "c")) {
-    //            // on Darwin, libSystem has libc in it, but also you have to use it
-    //            // to make syscalls because the syscall numbers are not documented
-    //            // and change between versions.
-    //            // so we always link against libSystem
-    //            try ctx.args.append("-lSystem");
-    //        } else {
-    //            if (mem.indexOfScalar(u8, lib.name, '/') == null) {
-    //                const arg = try std.fmt.allocPrint(&ctx.arena.allocator, "-l{}\x00", .{lib.name});
-    //                try ctx.args.append(@ptrCast([*:0]const u8, arg.ptr));
-    //            } else {
-    //                const arg = try std.cstr.addNullByte(&ctx.arena.allocator, lib.name);
-    //                try ctx.args.append(@ptrCast([*:0]const u8, arg.ptr));
-    //            }
-    //        }
-    //    }
-    //} else {
-    //    try ctx.args.append("-undefined");
-    //    try ctx.args.append("dynamic_lookup");
-    //}
-
-    if (platform.kind == .MacOS) {
-        if (platform.versionLessThan(10, 5)) {
-            try ctx.args.append("-lgcc_s.10.4");
-        } else if (platform.versionLessThan(10, 6)) {
-            try ctx.args.append("-lgcc_s.10.5");
-        }
-    } else {
-        @panic("TODO");
-    }
-}
-
-fn constructLinkerArgsWasm(ctx: *Context) void {
-    @panic("TODO");
-}
-
-fn addFnObjects(ctx: *Context) !void {
-    const held = ctx.comp.fn_link_set.acquire();
-    defer held.release();
-
-    var it = held.value.first;
-    while (it) |node| {
-        const fn_val = node.data orelse {
-            // handle the tombstone. See Value.Fn.destroy.
-            it = node.next;
-            held.value.remove(node);
-            ctx.comp.gpa().destroy(node);
-            continue;
-        };
-        try ctx.args.append(fn_val.containing_object.span());
-        it = node.next;
-    }
-}
-
-const DarwinPlatform = struct {
-    kind: Kind,
-    major: u32,
-    minor: u32,
-    micro: u32,
-
-    const Kind = enum {
-        MacOS,
-        IPhoneOS,
-        IPhoneOSSimulator,
-    };
-
-    fn get(comp: *Compilation) !DarwinPlatform {
-        var result: DarwinPlatform = undefined;
-        const ver_str = switch (comp.darwin_version_min) {
-            .MacOS => |ver| blk: {
-                result.kind = .MacOS;
-                break :blk ver;
-            },
-            .Ios => |ver| blk: {
-                result.kind = .IPhoneOS;
-                break :blk ver;
-            },
-            .None => blk: {
-                assert(comp.target.os.tag == .macosx);
-                result.kind = .MacOS;
-                break :blk "10.14";
-            },
-        };
-
-        var had_extra: bool = undefined;
-        try darwinGetReleaseVersion(
-            ver_str,
-            &result.major,
-            &result.minor,
-            &result.micro,
-            &had_extra,
-        );
-        if (had_extra or result.major != 10 or result.minor >= 100 or result.micro >= 100) {
-            return error.InvalidDarwinVersionString;
-        }
-
-        if (result.kind == .IPhoneOS) {
-            switch (comp.target.cpu.arch) {
-                .i386,
-                .x86_64,
-                => result.kind = .IPhoneOSSimulator,
-                else => {},
+        if (self.shdr_table_offset) |off| {
+            const shdr_size: u64 = if (small_ptr) @sizeOf(elf.Elf32_Shdr) else @sizeOf(elf.Elf64_Shdr);
+            const tight_size = self.sections.items.len * shdr_size;
+            const increased_size = satMul(tight_size, alloc_num) / alloc_den;
+            const test_end = off + increased_size;
+            if (end > off and start < test_end) {
+                return test_end;
             }
         }
-        return result;
+
+        if (self.phdr_table_offset) |off| {
+            const phdr_size: u64 = if (small_ptr) @sizeOf(elf.Elf32_Phdr) else @sizeOf(elf.Elf64_Phdr);
+            const tight_size = self.sections.items.len * phdr_size;
+            const increased_size = satMul(tight_size, alloc_num) / alloc_den;
+            const test_end = off + increased_size;
+            if (end > off and start < test_end) {
+                return test_end;
+            }
+        }
+
+        for (self.sections.items) |section| {
+            const increased_size = satMul(section.sh_size, alloc_num) / alloc_den;
+            const test_end = section.sh_offset + increased_size;
+            if (end > section.sh_offset and start < test_end) {
+                return test_end;
+            }
+        }
+        for (self.program_headers.items) |program_header| {
+            const increased_size = satMul(program_header.p_filesz, alloc_num) / alloc_den;
+            const test_end = program_header.p_offset + increased_size;
+            if (end > program_header.p_offset and start < test_end) {
+                return test_end;
+            }
+        }
+        return null;
     }
 
-    fn versionLessThan(self: DarwinPlatform, major: u32, minor: u32) bool {
-        if (self.major < major)
-            return true;
-        if (self.major > major)
-            return false;
-        if (self.minor < minor)
-            return true;
-        return false;
+    fn allocatedSize(self: *Update, start: u64) u64 {
+        var min_pos: u64 = std.math.maxInt(u64);
+        if (self.shdr_table_offset) |off| {
+            if (off > start and off < min_pos) min_pos = off;
+        }
+        if (self.phdr_table_offset) |off| {
+            if (off > start and off < min_pos) min_pos = off;
+        }
+        for (self.sections.items) |section| {
+            if (section.sh_offset <= start) continue;
+            if (section.sh_offset < min_pos) min_pos = section.sh_offset;
+        }
+        for (self.program_headers.items) |program_header| {
+            if (program_header.p_offset <= start) continue;
+            if (program_header.p_offset < min_pos) min_pos = program_header.p_offset;
+        }
+        return min_pos;
+    }
+
+    fn findFreeSpace(self: *Update, object_size: u64, min_alignment: u16) u64 {
+        var start: u64 = 0;
+        while (self.detectAllocCollision(start, object_size)) |item_end| {
+            start = mem.alignForwardGeneric(u64, item_end, min_alignment);
+        }
+        return start;
+    }
+
+    fn makeString(self: *Update, bytes: []const u8) !u32 {
+        const result = self.shstrtab.items.len;
+        try self.shstrtab.appendSlice(bytes);
+        return @intCast(u32, result);
+    }
+
+    fn perform(self: *Update) !void {
+        const ptr_width: enum { p32, p64 } = switch (self.module.target.cpu.arch.ptrBitWidth()) {
+            32 => .p32,
+            64 => .p64,
+            else => return error.UnsupportedArchitecture,
+        };
+        const small_ptr = switch (ptr_width) {
+            .p32 => true,
+            .p64 => false,
+        };
+        // This means the entire read-only executable program code needs to be rewritten.
+        var phdr_load_re_dirty = false;
+        var phdr_table_dirty = false;
+        var shdr_table_dirty = false;
+        var shstrtab_dirty = false;
+        var symtab_dirty = false;
+
+        if (self.phdr_load_re_index == null) {
+            self.phdr_load_re_index = @intCast(u16, self.program_headers.items.len);
+            const file_size = 256 * 1024;
+            const p_align = 0x1000;
+            const off = self.findFreeSpace(file_size, p_align);
+            try self.program_headers.append(.{
+                .p_type = elf.PT_LOAD,
+                .p_offset = off,
+                .p_filesz = file_size,
+                .p_vaddr = default_entry_addr,
+                .p_paddr = default_entry_addr,
+                .p_memsz = 0,
+                .p_align = 0x1000,
+                .p_flags = elf.PF_X | elf.PF_R,
+            });
+            self.entry_addr = default_entry_addr;
+            phdr_load_re_dirty = true;
+            phdr_table_dirty = true;
+        }
+        if (self.sections.items.len == 0) {
+            // There must always be a null section in index 0
+            try self.sections.append(.{
+                .sh_name = 0,
+                .sh_type = 0,
+                .sh_flags = 0,
+                .sh_addr = 0,
+                .sh_offset = 0,
+                .sh_size = 0,
+                .sh_link = 0,
+                .sh_info = 0,
+                .sh_addralign = 0,
+                .sh_entsize = 0,
+            });
+            shdr_table_dirty = true;
+        }
+        if (self.shstrtab_index == null) {
+            self.shstrtab_index = @intCast(u16, self.sections.items.len);
+            const off = self.findFreeSpace(self.shstrtab.items.len, 1);
+            try self.sections.append(.{
+                .sh_name = try self.makeString(".shstrtab"),
+                .sh_type = elf.SHT_STRTAB,
+                .sh_flags = 0,
+                .sh_addr = 0,
+                .sh_offset = off,
+                .sh_size = self.shstrtab.items.len,
+                .sh_link = 0,
+                .sh_info = 0,
+                .sh_addralign = 1,
+                .sh_entsize = 0,
+            });
+            shstrtab_dirty = true;
+            shdr_table_dirty = true;
+        }
+        if (self.text_section_index == null) {
+            self.text_section_index = @intCast(u16, self.sections.items.len);
+            const phdr = &self.program_headers.items[self.phdr_load_re_index.?];
+
+            try self.sections.append(.{
+                .sh_name = try self.makeString(".text"),
+                .sh_type = elf.SHT_PROGBITS,
+                .sh_flags = elf.SHF_ALLOC | elf.SHF_EXECINSTR,
+                .sh_addr = phdr.p_vaddr,
+                .sh_offset = phdr.p_offset,
+                .sh_size = phdr.p_filesz,
+                .sh_link = 0,
+                .sh_info = 0,
+                .sh_addralign = phdr.p_align,
+                .sh_entsize = 0,
+            });
+            shdr_table_dirty = true;
+        }
+        if (self.symtab_section_index == null) {
+            self.symtab_section_index = @intCast(u16, self.sections.items.len);
+            const min_align: u16 = if (small_ptr) @alignOf(elf.Elf32_Sym) else @alignOf(elf.Elf64_Sym);
+            const each_size: u64 = if (small_ptr) @sizeOf(elf.Elf32_Sym) else @sizeOf(elf.Elf64_Sym);
+            const file_size = self.module.exports.len * each_size;
+            const off = self.findFreeSpace(file_size, min_align);
+
+            try self.sections.append(.{
+                .sh_name = try self.makeString(".symtab"),
+                .sh_type = elf.SHT_SYMTAB,
+                .sh_flags = 0,
+                .sh_addr = 0,
+                .sh_offset = off,
+                .sh_size = file_size,
+                // The section header index of the associated string table.
+                .sh_link = self.shstrtab_index.?,
+                // One greater than the symbol table index of the last local symbol (binding STB_LOCAL).
+                .sh_info = @intCast(u32, self.module.exports.len),
+                .sh_addralign = min_align,
+                .sh_entsize = each_size,
+            });
+            symtab_dirty = true;
+            shdr_table_dirty = true;
+        }
+        const shsize: u64 = switch (ptr_width) {
+            .p32 => @sizeOf(elf.Elf32_Shdr),
+            .p64 => @sizeOf(elf.Elf64_Shdr),
+        };
+        const shalign: u16 = switch (ptr_width) {
+            .p32 => @alignOf(elf.Elf32_Shdr),
+            .p64 => @alignOf(elf.Elf64_Shdr),
+        };
+        if (self.shdr_table_offset == null) {
+            self.shdr_table_offset = self.findFreeSpace(self.sections.items.len * shsize, shalign);
+            shdr_table_dirty = true;
+        }
+        const phsize: u64 = switch (ptr_width) {
+            .p32 => @sizeOf(elf.Elf32_Phdr),
+            .p64 => @sizeOf(elf.Elf64_Phdr),
+        };
+        const phalign: u16 = switch (ptr_width) {
+            .p32 => @alignOf(elf.Elf32_Phdr),
+            .p64 => @alignOf(elf.Elf64_Phdr),
+        };
+        if (self.phdr_table_offset == null) {
+            self.phdr_table_offset = self.findFreeSpace(self.program_headers.items.len * phsize, phalign);
+            phdr_table_dirty = true;
+        }
+        const foreign_endian = self.module.target.cpu.arch.endian() != std.Target.current.cpu.arch.endian();
+        if (phdr_table_dirty) {
+            const allocated_size = self.allocatedSize(self.phdr_table_offset.?);
+            const needed_size = self.program_headers.items.len * phsize;
+
+            if (needed_size > allocated_size) {
+                self.phdr_table_offset = self.findFreeSpace(needed_size, phalign);
+            }
+
+            const allocator = self.program_headers.allocator;
+            switch (ptr_width) {
+                .p32 => {
+                    const buf = try allocator.alloc(elf.Elf32_Phdr, self.program_headers.items.len);
+                    defer allocator.free(buf);
+
+                    for (buf) |*phdr, i| {
+                        phdr.* = .{
+                            .p_type = self.program_headers.items[i].p_type,
+                            .p_flags = self.program_headers.items[i].p_flags,
+                            .p_offset = @intCast(u32, self.program_headers.items[i].p_offset),
+                            .p_vaddr = @intCast(u32, self.program_headers.items[i].p_vaddr),
+                            .p_paddr = @intCast(u32, self.program_headers.items[i].p_paddr),
+                            .p_filesz = @intCast(u32, self.program_headers.items[i].p_filesz),
+                            .p_memsz = @intCast(u32, self.program_headers.items[i].p_memsz),
+                            .p_align = @intCast(u32, self.program_headers.items[i].p_align),
+                        };
+                        if (foreign_endian) {
+                            bswapAllFields(elf.Elf32_Phdr, phdr);
+                        }
+                    }
+                    try self.file.pwriteAll(mem.sliceAsBytes(buf), self.phdr_table_offset.?);
+                },
+                .p64 => {
+                    const buf = try allocator.alloc(elf.Elf64_Phdr, self.program_headers.items.len);
+                    defer allocator.free(buf);
+
+                    for (buf) |*phdr, i| {
+                        phdr.* = .{
+                            .p_type = self.program_headers.items[i].p_type,
+                            .p_flags = self.program_headers.items[i].p_flags,
+                            .p_offset = self.program_headers.items[i].p_offset,
+                            .p_vaddr = self.program_headers.items[i].p_vaddr,
+                            .p_paddr = self.program_headers.items[i].p_paddr,
+                            .p_filesz = self.program_headers.items[i].p_filesz,
+                            .p_memsz = self.program_headers.items[i].p_memsz,
+                            .p_align = self.program_headers.items[i].p_align,
+                        };
+                        if (foreign_endian) {
+                            bswapAllFields(elf.Elf64_Phdr, phdr);
+                        }
+                    }
+                    try self.file.pwriteAll(mem.sliceAsBytes(buf), self.phdr_table_offset.?);
+                },
+            }
+        }
+        if (shdr_table_dirty) {
+            const allocated_size = self.allocatedSize(self.shdr_table_offset.?);
+            const needed_size = self.sections.items.len * phsize;
+
+            if (needed_size > allocated_size) {
+                self.shdr_table_offset = self.findFreeSpace(needed_size, phalign);
+            }
+
+            const allocator = self.sections.allocator;
+            switch (ptr_width) {
+                .p32 => {
+                    const buf = try allocator.alloc(elf.Elf32_Shdr, self.sections.items.len);
+                    defer allocator.free(buf);
+
+                    for (buf) |*shdr, i| {
+                        shdr.* = .{
+                            .sh_name = self.sections.items[i].sh_name,
+                            .sh_type = self.sections.items[i].sh_type,
+                            .sh_flags = @intCast(u32, self.sections.items[i].sh_flags),
+                            .sh_addr = @intCast(u32, self.sections.items[i].sh_addr),
+                            .sh_offset = @intCast(u32, self.sections.items[i].sh_offset),
+                            .sh_size = @intCast(u32, self.sections.items[i].sh_size),
+                            .sh_link = self.sections.items[i].sh_link,
+                            .sh_info = self.sections.items[i].sh_info,
+                            .sh_addralign = @intCast(u32, self.sections.items[i].sh_addralign),
+                            .sh_entsize = @intCast(u32, self.sections.items[i].sh_entsize),
+                        };
+                        if (foreign_endian) {
+                            bswapAllFields(elf.Elf32_Shdr, shdr);
+                        }
+                    }
+                    try self.file.pwriteAll(mem.sliceAsBytes(buf), self.shdr_table_offset.?);
+                },
+                .p64 => {
+                    const buf = try allocator.alloc(elf.Elf64_Shdr, self.sections.items.len);
+                    defer allocator.free(buf);
+
+                    for (buf) |*shdr, i| {
+                        shdr.* = .{
+                            .sh_name = self.sections.items[i].sh_name,
+                            .sh_type = self.sections.items[i].sh_type,
+                            .sh_flags = self.sections.items[i].sh_flags,
+                            .sh_addr = self.sections.items[i].sh_addr,
+                            .sh_offset = self.sections.items[i].sh_offset,
+                            .sh_size = self.sections.items[i].sh_size,
+                            .sh_link = self.sections.items[i].sh_link,
+                            .sh_info = self.sections.items[i].sh_info,
+                            .sh_addralign = self.sections.items[i].sh_addralign,
+                            .sh_entsize = self.sections.items[i].sh_entsize,
+                        };
+                        if (foreign_endian) {
+                            bswapAllFields(elf.Elf64_Shdr, shdr);
+                        }
+                    }
+                    try self.file.pwriteAll(mem.sliceAsBytes(buf), self.shdr_table_offset.?);
+                },
+            }
+        }
+        if (shstrtab_dirty) {
+            try self.file.pwriteAll(self.shstrtab.items, self.sections.items[self.shstrtab_index.?].sh_offset);
+        }
+        try self.writeCodeAndSymbols();
+        try self.writeElfHeader();
+        // TODO find end pos and truncate
+    }
+
+    fn writeElfHeader(self: *Update) !void {
+        var hdr_buf: [@sizeOf(elf.Elf64_Ehdr)]u8 = undefined;
+
+        var index: usize = 0;
+        hdr_buf[0..4].* = "\x7fELF".*;
+        index += 4;
+
+        const ptr_width: enum { p32, p64 } = switch (self.module.target.cpu.arch.ptrBitWidth()) {
+            32 => .p32,
+            64 => .p64,
+            else => return error.UnsupportedArchitecture,
+        };
+        hdr_buf[index] = switch (ptr_width) {
+            .p32 => elf.ELFCLASS32,
+            .p64 => elf.ELFCLASS64,
+        };
+        index += 1;
+
+        const endian = self.module.target.cpu.arch.endian();
+        hdr_buf[index] = switch (endian) {
+            .Little => elf.ELFDATA2LSB,
+            .Big => elf.ELFDATA2MSB,
+        };
+        index += 1;
+
+        hdr_buf[index] = 1; // ELF version
+        index += 1;
+
+        // OS ABI, often set to 0 regardless of target platform
+        // ABI Version, possibly used by glibc but not by static executables
+        // padding
+        mem.set(u8, hdr_buf[index..][0..9], 0);
+        index += 9;
+
+        assert(index == 16);
+
+        mem.writeInt(u16, hdr_buf[index..][0..2], @enumToInt(elf.ET.EXEC), endian);
+        index += 2;
+
+        const machine = self.module.target.cpu.arch.toElfMachine();
+        mem.writeInt(u16, hdr_buf[index..][0..2], @enumToInt(machine), endian);
+        index += 2;
+
+        // ELF Version, again
+        mem.writeInt(u32, hdr_buf[index..][0..4], 1, endian);
+        index += 4;
+
+        switch (ptr_width) {
+            .p32 => {
+                // e_entry
+                mem.writeInt(u32, hdr_buf[index..][0..4], @intCast(u32, self.entry_addr.?), endian);
+                index += 4;
+
+                // e_phoff
+                mem.writeInt(u32, hdr_buf[index..][0..4], @intCast(u32, self.phdr_table_offset.?), endian);
+                index += 4;
+
+                // e_shoff
+                mem.writeInt(u32, hdr_buf[index..][0..4], @intCast(u32, self.shdr_table_offset.?), endian);
+                index += 4;
+            },
+            .p64 => {
+                // e_entry
+                mem.writeInt(u64, hdr_buf[index..][0..8], self.entry_addr.?, endian);
+                index += 8;
+
+                // e_phoff
+                mem.writeInt(u64, hdr_buf[index..][0..8], self.phdr_table_offset.?, endian);
+                index += 8;
+
+                // e_shoff
+                mem.writeInt(u64, hdr_buf[index..][0..8], self.shdr_table_offset.?, endian);
+                index += 8;
+            },
+        }
+
+        const e_flags = 0;
+        mem.writeInt(u32, hdr_buf[index..][0..4], e_flags, endian);
+        index += 4;
+
+        const e_ehsize: u16 = switch (ptr_width) {
+            .p32 => @sizeOf(elf.Elf32_Ehdr),
+            .p64 => @sizeOf(elf.Elf64_Ehdr),
+        };
+        mem.writeInt(u16, hdr_buf[index..][0..2], e_ehsize, endian);
+        index += 2;
+
+        const e_phentsize: u16 = switch (ptr_width) {
+            .p32 => @sizeOf(elf.Elf32_Phdr),
+            .p64 => @sizeOf(elf.Elf64_Phdr),
+        };
+        mem.writeInt(u16, hdr_buf[index..][0..2], e_phentsize, endian);
+        index += 2;
+
+        const e_phnum = @intCast(u16, self.program_headers.items.len);
+        mem.writeInt(u16, hdr_buf[index..][0..2], e_phnum, endian);
+        index += 2;
+
+        const e_shentsize: u16 = switch (ptr_width) {
+            .p32 => @sizeOf(elf.Elf32_Shdr),
+            .p64 => @sizeOf(elf.Elf64_Shdr),
+        };
+        mem.writeInt(u16, hdr_buf[index..][0..2], e_shentsize, endian);
+        index += 2;
+
+        const e_shnum = @intCast(u16, self.sections.items.len);
+        mem.writeInt(u16, hdr_buf[index..][0..2], e_shnum, endian);
+        index += 2;
+
+        mem.writeInt(u16, hdr_buf[index..][0..2], self.shstrtab_index.?, endian);
+        index += 2;
+
+        assert(index == e_ehsize);
+
+        try self.file.pwriteAll(hdr_buf[0..index], 0);
+    }
+
+    fn writeCodeAndSymbols(self: *Update) !void {
+        @panic("TODO writeCodeAndSymbols");
     }
 };
 
-/// Parse (([0-9]+)(.([0-9]+)(.([0-9]+)?))?)? and return the
-/// grouped values as integers. Numbers which are not provided are set to 0.
-/// return true if the entire string was parsed (9.2), or all groups were
-/// parsed (10.3.5extrastuff).
-fn darwinGetReleaseVersion(str: []const u8, major: *u32, minor: *u32, micro: *u32, had_extra: *bool) !void {
-    major.* = 0;
-    minor.* = 0;
-    micro.* = 0;
-    had_extra.* = false;
+/// Truncates the existing file contents and overwrites the contents.
+/// Returns an error if `file` is not already open with +read +write +seek abilities.
+pub fn writeExecutableFile(allocator: *Allocator, module: ir.Module, file: fs.File) !void {
+    var update = Update{
+        .file = file,
+        .module = &module,
+        .sections = std.ArrayList(elf.Elf64_Shdr).init(allocator),
+        .shdr_table_offset = null,
+        .program_headers = std.ArrayList(elf.Elf64_Phdr).init(allocator),
+        .phdr_table_offset = null,
+        .phdr_load_re_index = null,
+        .entry_addr = null,
+        .shstrtab = std.ArrayList(u8).init(allocator),
+        .shstrtab_index = null,
+        .text_section_index = null,
+        .symtab_section_index = null,
 
-    if (str.len == 0)
-        return error.InvalidDarwinVersionString;
+        .symbol_table = std.AutoHashMap(usize, usize).init(allocator),
+        .symbols = std.ArrayList(elf.Elf64_Sym).init(allocator),
+        .symbols_by_addr = std.ArrayList(usize).init(allocator),
+    };
+    defer update.deinit();
 
-    var start_pos: usize = 0;
-    for ([_]*u32{ major, minor, micro }) |v| {
-        const dot_pos = mem.indexOfScalarPos(u8, str, start_pos, '.');
-        const end_pos = dot_pos orelse str.len;
-        v.* = std.fmt.parseUnsigned(u32, str[start_pos..end_pos], 10) catch return error.InvalidDarwinVersionString;
-        start_pos = (dot_pos orelse return) + 1;
-        if (start_pos == str.len) return;
-    }
-    had_extra.* = true;
+    return update.perform();
+}
+
+/// Returns error.IncrFailed if incremental update could not be performed.
+fn updateExecutableFileInner(allocator: *Allocator, module: ir.Module, file: fs.File) !void {
+    //var ehdr_buf: [@sizeOf(elf.Elf64_Ehdr)]u8 = undefined;
+
+    // TODO implement incremental linking
+    return error.IncrFailed;
+}
+
+/// Saturating multiplication
+fn satMul(a: var, b: var) @TypeOf(a, b) {
+    const T = @TypeOf(a, b);
+    return std.math.mul(T, a, b) catch std.math.maxInt(T);
+}
+
+fn bswapAllFields(comptime S: type, ptr: *S) void {
+    @panic("TODO implement bswapAllFields");
 }

From 24a01eed90b60a1e57172ae5a5305bc437bfeaba Mon Sep 17 00:00:00 2001
From: Andrew Kelley <andrew@ziglang.org>
Date: Thu, 23 Apr 2020 16:41:20 -0400
Subject: [PATCH 02/10] basics of writing ELF and machine code generation

---
 src-self-hosted/codegen.zig | 552 ++++++++----------------------------
 src-self-hosted/ir.zig      |  11 +-
 src-self-hosted/link.zig    | 209 ++++++++++++--
 src-self-hosted/value.zig   |  50 ++++
 4 files changed, 367 insertions(+), 455 deletions(-)

diff --git a/src-self-hosted/codegen.zig b/src-self-hosted/codegen.zig
index 585ba6c51a..f18b138440 100644
--- a/src-self-hosted/codegen.zig
+++ b/src-self-hosted/codegen.zig
@@ -1,447 +1,137 @@
 const std = @import("std");
-const Compilation = @import("compilation.zig").Compilation;
-const llvm = @import("llvm.zig");
-const c = @import("c.zig");
-const ir = @import("ir.zig");
-const Value = @import("value.zig").Value;
-const Type = @import("type.zig").Type;
-const Scope = @import("scope.zig").Scope;
-const util = @import("util.zig");
-const event = std.event;
+const mem = std.mem;
 const assert = std.debug.assert;
-const DW = std.dwarf;
-const maxInt = std.math.maxInt;
+const ir = @import("ir.zig");
+const Type = @import("type.zig").Type;
+const Value = @import("value.zig").Value;
 
-pub async fn renderToLlvm(comp: *Compilation, fn_val: *Value.Fn, code: *ir.Code) Compilation.BuildError!void {
-    fn_val.base.ref();
-    defer fn_val.base.deref(comp);
-    defer code.destroy(comp.gpa());
+pub const ErrorMsg = struct {
+    byte_offset: usize,
+    msg: []const u8,
+};
 
-    var output_path = try comp.createRandomOutputPath(comp.target.oFileExt());
-    errdefer output_path.deinit();
+pub const Symbol = struct {
+    errors: []ErrorMsg,
 
-    const llvm_handle = try comp.zig_compiler.getAnyLlvmContext();
-    defer llvm_handle.release(comp.zig_compiler);
-
-    const context = llvm_handle.node.data;
-
-    const module = llvm.ModuleCreateWithNameInContext(comp.name.span(), context) orelse return error.OutOfMemory;
-    defer llvm.DisposeModule(module);
-
-    llvm.SetTarget(module, comp.llvm_triple.span());
-    llvm.SetDataLayout(module, comp.target_layout_str);
-
-    if (comp.target.getObjectFormat() == .coff) {
-        llvm.AddModuleCodeViewFlag(module);
-    } else {
-        llvm.AddModuleDebugInfoFlag(module);
-    }
-
-    const builder = llvm.CreateBuilderInContext(context) orelse return error.OutOfMemory;
-    defer llvm.DisposeBuilder(builder);
-
-    const dibuilder = llvm.CreateDIBuilder(module, true) orelse return error.OutOfMemory;
-    defer llvm.DisposeDIBuilder(dibuilder);
-
-    // Don't use ZIG_VERSION_STRING here. LLVM misparses it when it includes
-    // the git revision.
-    const producer = try std.fmt.allocPrintZ(&code.arena.allocator, "zig {}.{}.{}", .{
-        @as(u32, c.ZIG_VERSION_MAJOR),
-        @as(u32, c.ZIG_VERSION_MINOR),
-        @as(u32, c.ZIG_VERSION_PATCH),
-    });
-    const flags = "";
-    const runtime_version = 0;
-    const compile_unit_file = llvm.CreateFile(
-        dibuilder,
-        comp.name.span(),
-        comp.root_package.root_src_dir.span(),
-    ) orelse return error.OutOfMemory;
-    const is_optimized = comp.build_mode != .Debug;
-    const compile_unit = llvm.CreateCompileUnit(
-        dibuilder,
-        DW.LANG_C99,
-        compile_unit_file,
-        producer,
-        is_optimized,
-        flags,
-        runtime_version,
-        "",
-        0,
-        !comp.strip,
-    ) orelse return error.OutOfMemory;
-
-    var ofile = ObjectFile{
-        .comp = comp,
-        .module = module,
-        .builder = builder,
-        .dibuilder = dibuilder,
-        .context = context,
-        .lock = event.Lock.init(),
-        .arena = &code.arena.allocator,
-    };
-
-    try renderToLlvmModule(&ofile, fn_val, code);
-
-    // TODO module level assembly
-    //if (buf_len(&g->global_asm) != 0) {
-    //    LLVMSetModuleInlineAsm(g->module, buf_ptr(&g->global_asm));
-    //}
-
-    llvm.DIBuilderFinalize(dibuilder);
-
-    if (comp.verbose_llvm_ir) {
-        std.debug.warn("raw module:\n", .{});
-        llvm.DumpModule(ofile.module);
-    }
-
-    // verify the llvm module when safety is on
-    if (std.debug.runtime_safety) {
-        var error_ptr: ?[*:0]u8 = null;
-        _ = llvm.VerifyModule(ofile.module, llvm.AbortProcessAction, &error_ptr);
-    }
-
-    const is_small = comp.build_mode == .ReleaseSmall;
-    const is_debug = comp.build_mode == .Debug;
-
-    var err_msg: [*:0]u8 = undefined;
-    // TODO integrate this with evented I/O
-    if (llvm.TargetMachineEmitToFile(
-        comp.target_machine,
-        module,
-        output_path.span(),
-        llvm.EmitBinary,
-        &err_msg,
-        is_debug,
-        is_small,
-    )) {
-        if (std.debug.runtime_safety) {
-            std.debug.panic("unable to write object file {}: {s}\n", .{ output_path.span(), err_msg });
+    pub fn deinit(self: *Symbol, allocator: *mem.Allocator) void {
+        for (self.errors) |err| {
+            allocator.free(err.msg);
         }
-        return error.WritingObjectFileFailed;
-    }
-    //validate_inline_fns(g); TODO
-    fn_val.containing_object = output_path;
-    if (comp.verbose_llvm_ir) {
-        std.debug.warn("optimized module:\n", .{});
-        llvm.DumpModule(ofile.module);
-    }
-    if (comp.verbose_link) {
-        std.debug.warn("created {}\n", .{output_path.span()});
-    }
-}
-
-pub const ObjectFile = struct {
-    comp: *Compilation,
-    module: *llvm.Module,
-    builder: *llvm.Builder,
-    dibuilder: *llvm.DIBuilder,
-    context: *llvm.Context,
-    lock: event.Lock,
-    arena: *std.mem.Allocator,
-
-    fn gpa(self: *ObjectFile) *std.mem.Allocator {
-        return self.comp.gpa();
+        allocator.free(self.errors);
+        self.* = undefined;
     }
 };
 
-pub fn renderToLlvmModule(ofile: *ObjectFile, fn_val: *Value.Fn, code: *ir.Code) !void {
-    // TODO audit more of codegen.cpp:fn_llvm_value and port more logic
-    const llvm_fn_type = try fn_val.base.typ.getLlvmType(ofile.arena, ofile.context);
-    const llvm_fn = llvm.AddFunction(
-        ofile.module,
-        fn_val.symbol_name.span(),
-        llvm_fn_type,
-    ) orelse return error.OutOfMemory;
+pub fn generateSymbol(typed_value: ir.TypedValue, module: ir.Module, code: *std.ArrayList(u8)) !Symbol {
+    switch (typed_value.ty.zigTypeTag()) {
+        .Fn => {
+            const index = typed_value.val.cast(Value.Payload.Function).?.index;
+            const module_fn = module.fns[index];
 
-    const want_fn_safety = fn_val.block_scope.?.safety.get(ofile.comp);
-    if (want_fn_safety and ofile.comp.haveLibC()) {
-        try addLLVMFnAttr(ofile, llvm_fn, "sspstrong");
-        try addLLVMFnAttrStr(ofile, llvm_fn, "stack-protector-buffer-size", "4");
-    }
+            var function = Function{
+                .module = &module,
+                .mod_fn = &module_fn,
+                .code = code,
+                .inst_table = std.AutoHashMap(*ir.Inst, Function.MCValue).init(code.allocator),
+                .errors = std.ArrayList(ErrorMsg).init(code.allocator),
+                .constants = std.ArrayList(ir.TypedValue).init(code.allocator),
+            };
+            defer function.inst_table.deinit();
+            defer function.errors.deinit();
 
-    // TODO
-    //if (fn_val.align_stack) |align_stack| {
-    //    try addLLVMFnAttrInt(ofile, llvm_fn, "alignstack", align_stack);
-    //}
-
-    const fn_type = fn_val.base.typ.cast(Type.Fn).?;
-    const fn_type_normal = &fn_type.key.data.Normal;
-
-    try addLLVMFnAttr(ofile, llvm_fn, "nounwind");
-    //add_uwtable_attr(g, fn_table_entry->llvm_value);
-    try addLLVMFnAttr(ofile, llvm_fn, "nobuiltin");
-
-    //if (g->build_mode == BuildModeDebug && fn_table_entry->fn_inline != FnInlineAlways) {
-    //    ZigLLVMAddFunctionAttr(fn_table_entry->llvm_value, "no-frame-pointer-elim", "true");
-    //    ZigLLVMAddFunctionAttr(fn_table_entry->llvm_value, "no-frame-pointer-elim-non-leaf", nullptr);
-    //}
-
-    //if (fn_table_entry->section_name) {
-    //    LLVMSetSection(fn_table_entry->llvm_value, buf_ptr(fn_table_entry->section_name));
-    //}
-    //if (fn_table_entry->align_bytes > 0) {
-    //    LLVMSetAlignment(fn_table_entry->llvm_value, (unsigned)fn_table_entry->align_bytes);
-    //} else {
-    //    // We'd like to set the best alignment for the function here, but on Darwin LLVM gives
-    //    // "Cannot getTypeInfo() on a type that is unsized!" assertion failure when calling
-    //    // any of the functions for getting alignment. Not specifying the alignment should
-    //    // use the ABI alignment, which is fine.
-    //}
-
-    //if (!type_has_bits(return_type)) {
-    //    // nothing to do
-    //} else if (type_is_codegen_pointer(return_type)) {
-    //    addLLVMAttr(fn_table_entry->llvm_value, 0, "nonnull");
-    //} else if (handle_is_ptr(return_type) &&
-    //        calling_convention_does_first_arg_return(fn_type->data.fn.fn_type_id.cc))
-    //{
-    //    addLLVMArgAttr(fn_table_entry->llvm_value, 0, "sret");
-    //    addLLVMArgAttr(fn_table_entry->llvm_value, 0, "nonnull");
-    //}
-
-    // TODO set parameter attributes
-
-    // TODO
-    //uint32_t err_ret_trace_arg_index = get_err_ret_trace_arg_index(g, fn_table_entry);
-    //if (err_ret_trace_arg_index != UINT32_MAX) {
-    //    addLLVMArgAttr(fn_table_entry->llvm_value, (unsigned)err_ret_trace_arg_index, "nonnull");
-    //}
-
-    const cur_ret_ptr = if (fn_type_normal.return_type.handleIsPtr()) llvm.GetParam(llvm_fn, 0) else null;
-
-    // build all basic blocks
-    for (code.basic_block_list.span()) |bb| {
-        bb.llvm_block = llvm.AppendBasicBlockInContext(
-            ofile.context,
-            llvm_fn,
-            bb.name_hint,
-        ) orelse return error.OutOfMemory;
-    }
-    const entry_bb = code.basic_block_list.at(0);
-    llvm.PositionBuilderAtEnd(ofile.builder, entry_bb.llvm_block);
-
-    llvm.ClearCurrentDebugLocation(ofile.builder);
-
-    // TODO set up error return tracing
-    // TODO allocate temporary stack values
-
-    const var_list = fn_type.non_key.Normal.variable_list.span();
-    // create debug variable declarations for variables and allocate all local variables
-    for (var_list) |var_scope, i| {
-        const var_type = switch (var_scope.data) {
-            .Const => unreachable,
-            .Param => |param| param.typ,
-        };
-        //    if (!type_has_bits(var->value->type)) {
-        //        continue;
-        //    }
-        //    if (ir_get_var_is_comptime(var))
-        //        continue;
-        //    if (type_requires_comptime(var->value->type))
-        //        continue;
-        //    if (var->src_arg_index == SIZE_MAX) {
-        //        var->value_ref = build_alloca(g, var->value->type, buf_ptr(&var->name), var->align_bytes);
-
-        //        var->di_loc_var = ZigLLVMCreateAutoVariable(g->dbuilder, get_di_scope(g, var->parent_scope),
-        //                buf_ptr(&var->name), import->di_file, (unsigned)(var->decl_node->line + 1),
-        //                var->value->type->di_type, !g->strip_debug_symbols, 0);
-
-        //    } else {
-        // it's a parameter
-        //        assert(var->gen_arg_index != SIZE_MAX);
-        //        TypeTableEntry *gen_type;
-        //        FnGenParamInfo *gen_info = &fn_table_entry->type_entry->data.fn.gen_param_info[var->src_arg_index];
-
-        if (var_type.handleIsPtr()) {
-            //            if (gen_info->is_byval) {
-            //                gen_type = var->value->type;
-            //            } else {
-            //                gen_type = gen_info->type;
-            //            }
-            var_scope.data.Param.llvm_value = llvm.GetParam(llvm_fn, @intCast(c_uint, i));
-        } else {
-            //            gen_type = var->value->type;
-            var_scope.data.Param.llvm_value = try renderAlloca(ofile, var_type, var_scope.name, .Abi);
-        }
-        //        if (var->decl_node) {
-        //            var->di_loc_var = ZigLLVMCreateParameterVariable(g->dbuilder, get_di_scope(g, var->parent_scope),
-        //                    buf_ptr(&var->name), import->di_file,
-        //                    (unsigned)(var->decl_node->line + 1),
-        //                    gen_type->di_type, !g->strip_debug_symbols, 0, (unsigned)(var->gen_arg_index + 1));
-        //        }
-
-        //    }
-    }
-
-    // TODO finishing error return trace setup. we have to do this after all the allocas.
-
-    // create debug variable declarations for parameters
-    // rely on the first variables in the variable_list being parameters.
-    //size_t next_var_i = 0;
-    for (fn_type.key.data.Normal.params) |param, i| {
-        //FnGenParamInfo *info = &fn_table_entry->type_entry->data.fn.gen_param_info[param_i];
-        //if (info->gen_index == SIZE_MAX)
-        //    continue;
-        const scope_var = var_list[i];
-        //assert(variable->src_arg_index != SIZE_MAX);
-        //next_var_i += 1;
-        //assert(variable);
-        //assert(variable->value_ref);
-
-        if (!param.typ.handleIsPtr()) {
-            //clear_debug_source_node(g);
-            const llvm_param = llvm.GetParam(llvm_fn, @intCast(c_uint, i));
-            _ = try renderStoreUntyped(
-                ofile,
-                llvm_param,
-                scope_var.data.Param.llvm_value,
-                .Abi,
-                .Non,
-            );
-        }
-
-        //if (variable->decl_node) {
-        //    gen_var_debug_decl(g, variable);
-        //}
-    }
-
-    for (code.basic_block_list.span()) |current_block| {
-        llvm.PositionBuilderAtEnd(ofile.builder, current_block.llvm_block);
-        for (current_block.instruction_list.span()) |instruction| {
-            if (instruction.ref_count == 0 and !instruction.hasSideEffects()) continue;
-
-            instruction.llvm_value = try instruction.render(ofile, fn_val);
-        }
-        current_block.llvm_exit_block = llvm.GetInsertBlock(ofile.builder);
+            for (module_fn.body) |inst| {
+                const new_inst = function.genFuncInst(inst) catch |err| switch (err) {
+                    error.CodegenFail => {
+                        assert(function.errors.items.len != 0);
+                        break;
+                    },
+                    else => |e| return e,
+                };
+                try function.inst_table.putNoClobber(inst, new_inst);
+            }
+            return Symbol{ .errors = function.errors.toOwnedSlice() };
+        },
+        else => @panic("TODO implement generateSymbol for non-function types"),
     }
 }
 
-fn addLLVMAttr(
-    ofile: *ObjectFile,
-    val: *llvm.Value,
-    attr_index: llvm.AttributeIndex,
-    attr_name: []const u8,
-) !void {
-    const kind_id = llvm.GetEnumAttributeKindForName(attr_name.ptr, attr_name.len);
-    assert(kind_id != 0);
-    const llvm_attr = llvm.CreateEnumAttribute(ofile.context, kind_id, 0) orelse return error.OutOfMemory;
-    llvm.AddAttributeAtIndex(val, attr_index, llvm_attr);
-}
+const Function = struct {
+    module: *const ir.Module,
+    mod_fn: *const ir.Module.Fn,
+    code: *std.ArrayList(u8),
+    inst_table: std.AutoHashMap(*ir.Inst, MCValue),
+    /// Constants are embedded within functions (at the end, after `ret`)
+    /// so that they are independently updateable.
+    /// This is a list of constants that must be appended to the symbol after `ret`.
+    constants: std.ArrayList(ir.TypedValue),
+    errors: std.ArrayList(ErrorMsg),
 
-fn addLLVMAttrStr(
-    ofile: *ObjectFile,
-    val: *llvm.Value,
-    attr_index: llvm.AttributeIndex,
-    attr_name: []const u8,
-    attr_val: []const u8,
-) !void {
-    const llvm_attr = llvm.CreateStringAttribute(
-        ofile.context,
-        attr_name.ptr,
-        @intCast(c_uint, attr_name.len),
-        attr_val.ptr,
-        @intCast(c_uint, attr_val.len),
-    ) orelse return error.OutOfMemory;
-    llvm.AddAttributeAtIndex(val, attr_index, llvm_attr);
-}
-
-fn addLLVMAttrInt(
-    val: *llvm.Value,
-    attr_index: llvm.AttributeIndex,
-    attr_name: []const u8,
-    attr_val: u64,
-) !void {
-    const kind_id = llvm.GetEnumAttributeKindForName(attr_name.ptr, attr_name.len);
-    assert(kind_id != 0);
-    const llvm_attr = llvm.CreateEnumAttribute(ofile.context, kind_id, attr_val) orelse return error.OutOfMemory;
-    llvm.AddAttributeAtIndex(val, attr_index, llvm_attr);
-}
-
-fn addLLVMFnAttr(ofile: *ObjectFile, fn_val: *llvm.Value, attr_name: []const u8) !void {
-    return addLLVMAttr(ofile, fn_val, maxInt(llvm.AttributeIndex), attr_name);
-}
-
-fn addLLVMFnAttrStr(ofile: *ObjectFile, fn_val: *llvm.Value, attr_name: []const u8, attr_val: []const u8) !void {
-    return addLLVMAttrStr(ofile, fn_val, maxInt(llvm.AttributeIndex), attr_name, attr_val);
-}
-
-fn addLLVMFnAttrInt(ofile: *ObjectFile, fn_val: *llvm.Value, attr_name: []const u8, attr_val: u64) !void {
-    return addLLVMAttrInt(ofile, fn_val, maxInt(llvm.AttributeIndex), attr_name, attr_val);
-}
-
-fn renderLoadUntyped(
-    ofile: *ObjectFile,
-    ptr: *llvm.Value,
-    alignment: Type.Pointer.Align,
-    vol: Type.Pointer.Vol,
-    name: [*:0]const u8,
-) !*llvm.Value {
-    const result = llvm.BuildLoad(ofile.builder, ptr, name) orelse return error.OutOfMemory;
-    switch (vol) {
-        .Non => {},
-        .Volatile => llvm.SetVolatile(result, 1),
-    }
-    llvm.SetAlignment(result, resolveAlign(ofile, alignment, llvm.GetElementType(llvm.TypeOf(ptr))));
-    return result;
-}
-
-fn renderLoad(ofile: *ObjectFile, ptr: *llvm.Value, ptr_type: *Type.Pointer, name: [*:0]const u8) !*llvm.Value {
-    return renderLoadUntyped(ofile, ptr, ptr_type.key.alignment, ptr_type.key.vol, name);
-}
-
-pub fn getHandleValue(ofile: *ObjectFile, ptr: *llvm.Value, ptr_type: *Type.Pointer) !?*llvm.Value {
-    const child_type = ptr_type.key.child_type;
-    if (!child_type.hasBits()) {
-        return null;
-    }
-    if (child_type.handleIsPtr()) {
-        return ptr;
-    }
-    return try renderLoad(ofile, ptr, ptr_type, "");
-}
-
-pub fn renderStoreUntyped(
-    ofile: *ObjectFile,
-    value: *llvm.Value,
-    ptr: *llvm.Value,
-    alignment: Type.Pointer.Align,
-    vol: Type.Pointer.Vol,
-) !*llvm.Value {
-    const result = llvm.BuildStore(ofile.builder, value, ptr) orelse return error.OutOfMemory;
-    switch (vol) {
-        .Non => {},
-        .Volatile => llvm.SetVolatile(result, 1),
-    }
-    llvm.SetAlignment(result, resolveAlign(ofile, alignment, llvm.TypeOf(value)));
-    return result;
-}
-
-pub fn renderStore(
-    ofile: *ObjectFile,
-    value: *llvm.Value,
-    ptr: *llvm.Value,
-    ptr_type: *Type.Pointer,
-) !*llvm.Value {
-    return renderStoreUntyped(ofile, value, ptr, ptr_type.key.alignment, ptr_type.key.vol);
-}
-
-pub fn renderAlloca(
-    ofile: *ObjectFile,
-    var_type: *Type,
-    name: []const u8,
-    alignment: Type.Pointer.Align,
-) !*llvm.Value {
-    const llvm_var_type = try var_type.getLlvmType(ofile.arena, ofile.context);
-    const name_with_null = try std.cstr.addNullByte(ofile.arena, name);
-    const result = llvm.BuildAlloca(ofile.builder, llvm_var_type, @ptrCast([*:0]const u8, name_with_null.ptr)) orelse return error.OutOfMemory;
-    llvm.SetAlignment(result, resolveAlign(ofile, alignment, llvm_var_type));
-    return result;
-}
-
-pub fn resolveAlign(ofile: *ObjectFile, alignment: Type.Pointer.Align, llvm_type: *llvm.Type) u32 {
-    return switch (alignment) {
-        .Abi => return llvm.ABIAlignmentOfType(ofile.comp.target_data_ref, llvm_type),
-        .Override => |a| a,
+    const MCValue = union(enum) {
+        none,
+        unreach,
+        /// A pointer-sized integer that fits in a register.
+        immediate: u64,
+        /// Refers to the index into `constants` field of `Function`.
+        local_const_ptr: usize,
     };
-}
+
+    fn genFuncInst(self: *Function, inst: *ir.Inst) !MCValue {
+        switch (inst.tag) {
+            .unreach => return self.genPanic(inst.src),
+            .constant => unreachable, // excluded from function bodies
+            .assembly => return self.genAsm(inst.cast(ir.Inst.Assembly).?),
+            .ptrtoint => return self.genPtrToInt(inst.cast(ir.Inst.PtrToInt).?),
+        }
+    }
+
+    fn genPanic(self: *Function, src: usize) !MCValue {
+        // TODO change this to call the panic function
+        switch (self.module.target.cpu.arch) {
+            .i386, .x86_64 => {
+                try self.code.append(0xcc); // x86 int3
+            },
+            else => return self.fail(src, "TODO implement panic for {}", .{self.module.target.cpu.arch}),
+        }
+        return .unreach;
+    }
+
+    fn genAsm(self: *Function, inst: *ir.Inst.Assembly) !MCValue {
+        return self.fail(inst.base.src, "TODO machine code gen assembly", .{});
+    }
+
+    fn genPtrToInt(self: *Function, inst: *ir.Inst.PtrToInt) !MCValue {
+        // no-op
+        return self.resolveInst(inst.args.ptr);
+    }
+
+    fn resolveInst(self: *Function, inst: *ir.Inst) !MCValue {
+        if (inst.cast(ir.Inst.Constant)) |const_inst| {
+            switch (inst.ty.zigTypeTag()) {
+                .Int => {
+                    const info = inst.ty.intInfo(self.module.target);
+                    const ptr_bits = self.module.target.cpu.arch.ptrBitWidth();
+                    if (info.bits > ptr_bits or info.signed) {
+                        return self.fail(inst.src, "TODO const int bigger than ptr and signed int", .{});
+                    }
+                    return MCValue{ .immediate = const_inst.val.toUnsignedInt() };
+                },
+                else => return self.fail(inst.src, "TODO implement const of type '{}'", .{inst.ty}),
+            }
+        } else {
+            return self.inst_table.getValue(inst).?;
+        }
+    }
+
+    fn fail(self: *Function, src: usize, comptime format: []const u8, args: var) error{ CodegenFail, OutOfMemory } {
+        @setCold(true);
+        const msg = try std.fmt.allocPrint(self.errors.allocator, format, args);
+        {
+            errdefer self.errors.allocator.free(msg);
+            (try self.errors.addOne()).* = .{
+                .byte_offset = src,
+                .msg = msg,
+            };
+        }
+        return error.CodegenFail;
+    }
+};
diff --git a/src-self-hosted/ir.zig b/src-self-hosted/ir.zig
index cea7729642..f3051df088 100644
--- a/src-self-hosted/ir.zig
+++ b/src-self-hosted/ir.zig
@@ -724,7 +724,16 @@ pub fn main() anyerror!void {
     }
 
     const link = @import("link.zig");
-    try link.updateExecutableFilePath(allocator, analyzed_module, std.fs.cwd(), "a.out");
+    var result = try link.updateExecutableFilePath(allocator, analyzed_module, std.fs.cwd(), "a.out");
+    defer result.deinit(allocator);
+    if (result.errors.len != 0) {
+        for (result.errors) |err_msg| {
+            const loc = findLineColumn(source, err_msg.byte_offset);
+            std.debug.warn("{}:{}:{}: error: {}\n", .{ src_path, loc.line + 1, loc.column + 1, err_msg.msg });
+        }
+        if (debug_error_trace) return error.ParseFailure;
+        std.process.exit(1);
+    }
 }
 
 fn findLineColumn(source: []const u8, byte_offset: usize) struct { line: usize, column: usize } {
diff --git a/src-self-hosted/link.zig b/src-self-hosted/link.zig
index f358a4b64d..4f89786b3c 100644
--- a/src-self-hosted/link.zig
+++ b/src-self-hosted/link.zig
@@ -5,15 +5,38 @@ const Allocator = std.mem.Allocator;
 const ir = @import("ir.zig");
 const fs = std.fs;
 const elf = std.elf;
+const codegen = @import("codegen.zig");
 
 const executable_mode = 0o755;
 const default_entry_addr = 0x8000000;
 
+pub const ErrorMsg = struct {
+    byte_offset: usize,
+    msg: []const u8,
+};
+
+pub const Result = struct {
+    errors: []ErrorMsg,
+
+    pub fn deinit(self: *Result, allocator: *mem.Allocator) void {
+        for (self.errors) |err| {
+            allocator.free(err.msg);
+        }
+        allocator.free(self.errors);
+        self.* = undefined;
+    }
+};
+
 /// Attempts incremental linking, if the file already exists.
 /// If incremental linking fails, falls back to truncating the file and rewriting it.
 /// A malicious file is detected as incremental link failure and does not cause Illegal Behavior.
 /// This operation is not atomic.
-pub fn updateExecutableFilePath(allocator: *Allocator, module: ir.Module, dir: fs.Dir, sub_path: []const u8) !void {
+pub fn updateExecutableFilePath(
+    allocator: *Allocator,
+    module: ir.Module,
+    dir: fs.Dir,
+    sub_path: []const u8,
+) !Result {
     const file = try dir.createFile(sub_path, .{ .truncate = false, .read = true, .mode = executable_mode });
     defer file.close();
 
@@ -21,12 +44,18 @@ pub fn updateExecutableFilePath(allocator: *Allocator, module: ir.Module, dir: f
 }
 
 /// Atomically overwrites the old file, if present.
-pub fn writeExecutableFilePath(allocator: *Allocator, module: ir.Module, dir: fs.Dir, sub_path: []const u8) !void {
+pub fn writeExecutableFilePath(
+    allocator: *Allocator,
+    module: ir.Module,
+    dir: fs.Dir,
+    sub_path: []const u8,
+) !Result {
     const af = try dir.atomicFile(sub_path, .{ .mode = executable_mode });
     defer af.deinit();
 
-    try writeExecutableFile(allocator, module, af.file);
+    const result = try writeExecutableFile(allocator, module, af.file);
     try af.finish();
+    return result;
 }
 
 /// Attempts incremental linking, if the file already exists.
@@ -34,8 +63,8 @@ pub fn writeExecutableFilePath(allocator: *Allocator, module: ir.Module, dir: fs
 /// Returns an error if `file` is not already open with +read +write +seek abilities.
 /// A malicious file is detected as incremental link failure and does not cause Illegal Behavior.
 /// This operation is not atomic.
-pub fn updateExecutableFile(allocator: *Allocator, module: ir.Module, file: fs.File) !void {
-    updateExecutableFileInner(allocator, module, file) catch |err| switch (err) {
+pub fn updateExecutableFile(allocator: *Allocator, module: ir.Module, file: fs.File) !Result {
+    return updateExecutableFileInner(allocator, module, file) catch |err| switch (err) {
         error.IncrFailed => {
             return writeExecutableFile(allocator, module, file);
         },
@@ -66,20 +95,17 @@ const Update = struct {
     text_section_index: ?u16,
     symtab_section_index: ?u16,
 
-    /// Key: index into strtab. Value: index into symbols.
-    symbol_table: std.AutoHashMap(usize, usize),
     /// The same order as in the file
     symbols: std.ArrayList(elf.Elf64_Sym),
-    /// Sorted by address, index into symbols
-    symbols_by_addr: std.ArrayList(usize),
+
+    errors: std.ArrayList(ErrorMsg),
 
     fn deinit(self: *Update) void {
         self.sections.deinit();
         self.program_headers.deinit();
         self.shstrtab.deinit();
-        self.symbol_table.deinit();
         self.symbols.deinit();
-        self.symbols_by_addr.deinit();
+        self.errors.deinit();
     }
 
     // `expand_num / expand_den` is the factor of padding when allocation
@@ -162,6 +188,7 @@ const Update = struct {
     fn makeString(self: *Update, bytes: []const u8) !u32 {
         const result = self.shstrtab.items.len;
         try self.shstrtab.appendSlice(bytes);
+        try self.shstrtab.append(0);
         return @intCast(u32, result);
     }
 
@@ -187,6 +214,7 @@ const Update = struct {
             const file_size = 256 * 1024;
             const p_align = 0x1000;
             const off = self.findFreeSpace(file_size, p_align);
+            //std.debug.warn("found PT_LOAD free space 0x{x} to 0x{x}\n", .{ off, off + file_size });
             try self.program_headers.append(.{
                 .p_type = elf.PT_LOAD,
                 .p_offset = off,
@@ -194,10 +222,10 @@ const Update = struct {
                 .p_vaddr = default_entry_addr,
                 .p_paddr = default_entry_addr,
                 .p_memsz = 0,
-                .p_align = 0x1000,
+                .p_align = p_align,
                 .p_flags = elf.PF_X | elf.PF_R,
             });
-            self.entry_addr = default_entry_addr;
+            self.entry_addr = null;
             phdr_load_re_dirty = true;
             phdr_table_dirty = true;
         }
@@ -220,6 +248,7 @@ const Update = struct {
         if (self.shstrtab_index == null) {
             self.shstrtab_index = @intCast(u16, self.sections.items.len);
             const off = self.findFreeSpace(self.shstrtab.items.len, 1);
+            //std.debug.warn("found shstrtab free space 0x{x} to 0x{x}\n", .{ off, off + self.shstrtab.items.len });
             try self.sections.append(.{
                 .sh_name = try self.makeString(".shstrtab"),
                 .sh_type = elf.SHT_STRTAB,
@@ -259,6 +288,7 @@ const Update = struct {
             const each_size: u64 = if (small_ptr) @sizeOf(elf.Elf32_Sym) else @sizeOf(elf.Elf64_Sym);
             const file_size = self.module.exports.len * each_size;
             const off = self.findFreeSpace(file_size, min_align);
+            //std.debug.warn("found symtab free space 0x{x} to 0x{x}\n", .{ off, off + file_size });
 
             try self.sections.append(.{
                 .sh_name = try self.makeString(".symtab"),
@@ -307,6 +337,7 @@ const Update = struct {
             const needed_size = self.program_headers.items.len * phsize;
 
             if (needed_size > allocated_size) {
+                self.phdr_table_offset = null; // free the space
                 self.phdr_table_offset = self.findFreeSpace(needed_size, phalign);
             }
 
@@ -361,6 +392,7 @@ const Update = struct {
             const needed_size = self.sections.items.len * phsize;
 
             if (needed_size > allocated_size) {
+                self.shdr_table_offset = null; // free the space
                 self.shdr_table_offset = self.findFreeSpace(needed_size, phalign);
             }
 
@@ -414,11 +446,30 @@ const Update = struct {
                 },
             }
         }
-        if (shstrtab_dirty) {
-            try self.file.pwriteAll(self.shstrtab.items, self.sections.items[self.shstrtab_index.?].sh_offset);
-        }
         try self.writeCodeAndSymbols();
-        try self.writeElfHeader();
+
+        const shstrtab_sect = &self.sections.items[self.shstrtab_index.?];
+        if (shstrtab_dirty or self.shstrtab.items.len != shstrtab_sect.sh_size) {
+            const allocated_size = self.allocatedSize(shstrtab_sect.sh_offset);
+            const needed_size = self.shstrtab.items.len;
+
+            if (needed_size > allocated_size) {
+                shstrtab_sect.sh_size = 0; // free the space
+                shstrtab_sect.sh_offset = self.findFreeSpace(needed_size, 1);
+                shstrtab_sect.sh_size = needed_size;
+            }
+            try self.file.pwriteAll(self.shstrtab.items, shstrtab_sect.sh_offset);
+        }
+        if (self.entry_addr == null) {
+            const msg = try std.fmt.allocPrint(self.errors.allocator, "no entry point found", .{});
+            errdefer self.errors.allocator.free(msg);
+            try self.errors.append(.{
+                .byte_offset = 0,
+                .msg = msg,
+            });
+        } else {
+            try self.writeElfHeader();
+        }
         // TODO find end pos and truncate
     }
 
@@ -540,13 +591,122 @@ const Update = struct {
     }
 
     fn writeCodeAndSymbols(self: *Update) !void {
-        @panic("TODO writeCodeAndSymbols");
+        // index 0 is always a null symbol
+        try self.symbols.resize(1);
+        self.symbols.items[0] = .{
+            .st_name = 0,
+            .st_info = 0,
+            .st_other = 0,
+            .st_shndx = 0,
+            .st_value = 0,
+            .st_size = 0,
+        };
+
+        const phdr = &self.program_headers.items[self.phdr_load_re_index.?];
+        var vaddr: u64 = phdr.p_vaddr;
+
+        var code = std.ArrayList(u8).init(self.sections.allocator);
+        defer code.deinit();
+
+        for (self.module.exports) |exp| {
+            code.shrink(0);
+            var symbol = try codegen.generateSymbol(exp.typed_value, self.module.*, &code);
+            defer symbol.deinit(code.allocator);
+            if (symbol.errors.len != 0) {
+                for (symbol.errors) |err| {
+                    const msg = try mem.dupe(self.errors.allocator, u8, err.msg);
+                    errdefer self.errors.allocator.free(msg);
+                    try self.errors.append(.{
+                        .byte_offset = err.byte_offset,
+                        .msg = msg,
+                    });
+                }
+                continue;
+            }
+
+            if (mem.eql(u8, exp.name, "_start")) {
+                self.entry_addr = vaddr;
+            }
+            (try self.symbols.addOne()).* = .{
+                .st_name = try self.makeString(exp.name),
+                .st_info = (elf.STB_LOCAL << 4) | elf.STT_FUNC,
+                .st_other = 0,
+                .st_shndx = self.text_section_index.?,
+                .st_value = vaddr,
+                .st_size = code.items.len,
+            };
+            vaddr += code.items.len;
+        }
+
+        return self.writeSymbols();
+    }
+
+    fn writeSymbols(self: *Update) !void {
+        const ptr_width: enum { p32, p64 } = switch (self.module.target.cpu.arch.ptrBitWidth()) {
+            32 => .p32,
+            64 => .p64,
+            else => return error.UnsupportedArchitecture,
+        };
+        const small_ptr = ptr_width == .p32;
+        const syms_sect = &self.sections.items[self.symtab_section_index.?];
+        const sym_align: u16 = if (small_ptr) @alignOf(elf.Elf32_Sym) else @alignOf(elf.Elf64_Sym);
+        const sym_size: u64 = if (small_ptr) @sizeOf(elf.Elf32_Sym) else @sizeOf(elf.Elf64_Sym);
+
+        const allocated_size = self.allocatedSize(syms_sect.sh_offset);
+        const needed_size = self.symbols.items.len * sym_size;
+        if (needed_size > allocated_size) {
+            syms_sect.sh_size = 0; // free the space
+            syms_sect.sh_offset = self.findFreeSpace(needed_size, sym_align);
+            syms_sect.sh_size = needed_size;
+        }
+        const allocator = self.symbols.allocator;
+        const foreign_endian = self.module.target.cpu.arch.endian() != std.Target.current.cpu.arch.endian();
+        switch (ptr_width) {
+            .p32 => {
+                const buf = try allocator.alloc(elf.Elf32_Sym, self.symbols.items.len);
+                defer allocator.free(buf);
+
+                for (buf) |*sym, i| {
+                    sym.* = .{
+                        .st_name = self.symbols.items[i].st_name,
+                        .st_value = @intCast(u32, self.symbols.items[i].st_value),
+                        .st_size = @intCast(u32, self.symbols.items[i].st_size),
+                        .st_info = self.symbols.items[i].st_info,
+                        .st_other = self.symbols.items[i].st_other,
+                        .st_shndx = self.symbols.items[i].st_shndx,
+                    };
+                    if (foreign_endian) {
+                        bswapAllFields(elf.Elf32_Sym, sym);
+                    }
+                }
+                try self.file.pwriteAll(mem.sliceAsBytes(buf), syms_sect.sh_offset);
+            },
+            .p64 => {
+                const buf = try allocator.alloc(elf.Elf64_Sym, self.symbols.items.len);
+                defer allocator.free(buf);
+
+                for (buf) |*sym, i| {
+                    sym.* = .{
+                        .st_name = self.symbols.items[i].st_name,
+                        .st_value = self.symbols.items[i].st_value,
+                        .st_size = self.symbols.items[i].st_size,
+                        .st_info = self.symbols.items[i].st_info,
+                        .st_other = self.symbols.items[i].st_other,
+                        .st_shndx = self.symbols.items[i].st_shndx,
+                    };
+                    if (foreign_endian) {
+                        bswapAllFields(elf.Elf64_Sym, sym);
+                    }
+                }
+                try self.file.pwriteAll(mem.sliceAsBytes(buf), syms_sect.sh_offset);
+            },
+        }
     }
 };
 
 /// Truncates the existing file contents and overwrites the contents.
 /// Returns an error if `file` is not already open with +read +write +seek abilities.
-pub fn writeExecutableFile(allocator: *Allocator, module: ir.Module, file: fs.File) !void {
+pub fn writeExecutableFile(allocator: *Allocator, module: ir.Module, file: fs.File) !Result {
     var update = Update{
         .file = file,
         .module = &module,
@@ -561,17 +721,20 @@ pub fn writeExecutableFile(allocator: *Allocator, module: ir.Module, file: fs.Fi
         .text_section_index = null,
         .symtab_section_index = null,
 
-        .symbol_table = std.AutoHashMap(usize, usize).init(allocator),
         .symbols = std.ArrayList(elf.Elf64_Sym).init(allocator),
-        .symbols_by_addr = std.ArrayList(usize).init(allocator),
+
+        .errors = std.ArrayList(ErrorMsg).init(allocator),
     };
     defer update.deinit();
 
-    return update.perform();
+    try update.perform();
+    return Result{
+        .errors = update.errors.toOwnedSlice(),
+    };
 }
 
 /// Returns error.IncrFailed if incremental update could not be performed.
-fn updateExecutableFileInner(allocator: *Allocator, module: ir.Module, file: fs.File) !void {
+fn updateExecutableFileInner(allocator: *Allocator, module: ir.Module, file: fs.File) !Result {
     //var ehdr_buf: [@sizeOf(elf.Elf64_Ehdr)]u8 = undefined;
 
     // TODO implement incremental linking
diff --git a/src-self-hosted/value.zig b/src-self-hosted/value.zig
index 03cda71387..aaa6f0867a 100644
--- a/src-self-hosted/value.zig
+++ b/src-self-hosted/value.zig
@@ -264,6 +264,56 @@ pub const Value = extern union {
         }
     }
 
+    /// Asserts the value is an integer and it fits in a u64
+    pub fn toUnsignedInt(self: Value) u64 {
+        switch (self.tag()) {
+            .ty,
+            .u8_type,
+            .i8_type,
+            .isize_type,
+            .usize_type,
+            .c_short_type,
+            .c_ushort_type,
+            .c_int_type,
+            .c_uint_type,
+            .c_long_type,
+            .c_ulong_type,
+            .c_longlong_type,
+            .c_ulonglong_type,
+            .c_longdouble_type,
+            .f16_type,
+            .f32_type,
+            .f64_type,
+            .f128_type,
+            .c_void_type,
+            .bool_type,
+            .void_type,
+            .type_type,
+            .anyerror_type,
+            .comptime_int_type,
+            .comptime_float_type,
+            .noreturn_type,
+            .fn_naked_noreturn_no_args_type,
+            .single_const_pointer_to_comptime_int_type,
+            .const_slice_u8_type,
+            .void_value,
+            .noreturn_value,
+            .bool_true,
+            .bool_false,
+            .function,
+            .ref,
+            .ref_val,
+            .bytes,
+            => unreachable,
+
+            .zero => return 0,
+
+            .int_u64 => return self.cast(Payload.Int_u64).?.int,
+            .int_i64 => return @intCast(u64, self.cast(Payload.Int_u64).?.int),
+            .int_big => return self.cast(Payload.IntBig).?.big_int.to(u64) catch unreachable,
+        }
+    }
+
     /// Asserts the value is an integer, and the destination type is ComptimeInt or Int.
     pub fn intFitsInType(self: Value, ty: Type, target: Target) bool {
         switch (self.tag()) {

From 99ec614b717d2e36d4dc712ac479be1df5ac62b2 Mon Sep 17 00:00:00 2001
From: Andrew Kelley <andrew@ziglang.org>
Date: Thu, 23 Apr 2020 17:46:01 -0400
Subject: [PATCH 03/10] codegen for const ints and string literals

---
 src-self-hosted/codegen.zig | 95 +++++++++++++++++++++++++++++--------
 1 file changed, 76 insertions(+), 19 deletions(-)

diff --git a/src-self-hosted/codegen.zig b/src-self-hosted/codegen.zig
index f18b138440..91f03aa932 100644
--- a/src-self-hosted/codegen.zig
+++ b/src-self-hosted/codegen.zig
@@ -34,7 +34,6 @@ pub fn generateSymbol(typed_value: ir.TypedValue, module: ir.Module, code: *std.
                 .code = code,
                 .inst_table = std.AutoHashMap(*ir.Inst, Function.MCValue).init(code.allocator),
                 .errors = std.ArrayList(ErrorMsg).init(code.allocator),
-                .constants = std.ArrayList(ir.TypedValue).init(code.allocator),
             };
             defer function.inst_table.deinit();
             defer function.errors.deinit();
@@ -49,6 +48,7 @@ pub fn generateSymbol(typed_value: ir.TypedValue, module: ir.Module, code: *std.
                 };
                 try function.inst_table.putNoClobber(inst, new_inst);
             }
+
             return Symbol{ .errors = function.errors.toOwnedSlice() };
         },
         else => @panic("TODO implement generateSymbol for non-function types"),
@@ -60,10 +60,6 @@ const Function = struct {
     mod_fn: *const ir.Module.Fn,
     code: *std.ArrayList(u8),
     inst_table: std.AutoHashMap(*ir.Inst, MCValue),
-    /// Constants are embedded within functions (at the end, after `ret`)
-    /// so that they are independently updateable.
-    /// This is a list of constants that must be appended to the symbol after `ret`.
-    constants: std.ArrayList(ir.TypedValue),
     errors: std.ArrayList(ErrorMsg),
 
     const MCValue = union(enum) {
@@ -71,8 +67,8 @@ const Function = struct {
         unreach,
         /// A pointer-sized integer that fits in a register.
         immediate: u64,
-        /// Refers to the index into `constants` field of `Function`.
-        local_const_ptr: usize,
+        /// The constant was emitted into the code, at this offset.
+        embedded_in_code: usize,
     };
 
     fn genFuncInst(self: *Function, inst: *ir.Inst) !MCValue {
@@ -88,13 +84,46 @@ const Function = struct {
         // TODO change this to call the panic function
         switch (self.module.target.cpu.arch) {
             .i386, .x86_64 => {
-                try self.code.append(0xcc); // x86 int3
+                try self.code.append(0xcc); // int3
             },
             else => return self.fail(src, "TODO implement panic for {}", .{self.module.target.cpu.arch}),
         }
         return .unreach;
     }
 
+    fn genRet(self: *Function, src: usize) !void {
+        // TODO change this to call the panic function
+        switch (self.module.target.cpu.arch) {
+            .i386, .x86_64 => {
+                try self.code.append(0xc3); // ret
+            },
+            else => return self.fail(src, "TODO implement ret for {}", .{self.module.target.cpu.arch}),
+        }
+    }
+
+    fn genRelativeFwdJump(self: *Function, src: usize, amount: u32) !void {
+        switch (self.module.target.cpu.arch) {
+            .i386, .x86_64 => {
+                if (amount <= std.math.maxInt(u8)) {
+                    try self.code.resize(self.code.items.len + 2);
+                    self.code.items[self.code.items.len - 2] = 0xeb;
+                    self.code.items[self.code.items.len - 1] = @intCast(u8, amount);
+                } else if (amount <= std.math.maxInt(u16)) {
+                    try self.code.resize(self.code.items.len + 3);
+                    self.code.items[self.code.items.len - 3] = 0xe9; // jmp rel16
+                    const imm_ptr = self.code.items[self.code.items.len - 2 ..][0..2];
+                    mem.writeIntLittle(u16, imm_ptr, @intCast(u16, amount));
+                } else {
+                    try self.code.resize(self.code.items.len + 5);
+                    self.code.items[self.code.items.len - 5] = 0xea; // jmp rel32
+                    const imm_ptr = self.code.items[self.code.items.len - 4 ..][0..4];
+                    mem.writeIntLittle(u32, imm_ptr, amount);
+                }
+            },
+            else => return self.fail(src, "TODO implement relative forward jump for {}", .{self.module.target.cpu.arch}),
+        }
+    }
+
     fn genAsm(self: *Function, inst: *ir.Inst.Assembly) !MCValue {
         return self.fail(inst.base.src, "TODO machine code gen assembly", .{});
     }
@@ -105,23 +134,51 @@ const Function = struct {
     }
 
     fn resolveInst(self: *Function, inst: *ir.Inst) !MCValue {
+        if (self.inst_table.getValue(inst)) |mcv| {
+            return mcv;
+        }
         if (inst.cast(ir.Inst.Constant)) |const_inst| {
-            switch (inst.ty.zigTypeTag()) {
-                .Int => {
-                    const info = inst.ty.intInfo(self.module.target);
-                    const ptr_bits = self.module.target.cpu.arch.ptrBitWidth();
-                    if (info.bits > ptr_bits or info.signed) {
-                        return self.fail(inst.src, "TODO const int bigger than ptr and signed int", .{});
-                    }
-                    return MCValue{ .immediate = const_inst.val.toUnsignedInt() };
-                },
-                else => return self.fail(inst.src, "TODO implement const of type '{}'", .{inst.ty}),
-            }
+            const mcvalue = try self.genTypedValue(inst.src, .{ .ty = inst.ty, .val = const_inst.val });
+            try self.inst_table.putNoClobber(inst, mcvalue);
+            return mcvalue;
         } else {
             return self.inst_table.getValue(inst).?;
         }
     }
 
+    fn genTypedValue(self: *Function, src: usize, typed_value: ir.TypedValue) !MCValue {
+        switch (typed_value.ty.zigTypeTag()) {
+            .Pointer => {
+                const ptr_elem_type = typed_value.ty.elemType();
+                switch (ptr_elem_type.zigTypeTag()) {
+                    .Array => {
+                        // TODO more checks to make sure this can be emitted as a string literal
+                        const bytes = try typed_value.val.toAllocatedBytes(self.code.allocator);
+                        defer self.code.allocator.free(bytes);
+                        const smaller_len = std.math.cast(u32, bytes.len) catch
+                            return self.fail(src, "TODO handle a larger string constant", .{});
+
+                        // Emit the string literal directly into the code; jump over it.
+                        const offset = self.code.items.len;
+                        try self.genRelativeFwdJump(src, smaller_len);
+                        try self.code.appendSlice(bytes);
+                        return MCValue{ .embedded_in_code = offset };
+                    },
+                    else => |t| return self.fail(src, "TODO implement emitTypedValue for pointer to '{}'", .{@tagName(t)}),
+                }
+            },
+            .Int => {
+                const info = typed_value.ty.intInfo(self.module.target);
+                const ptr_bits = self.module.target.cpu.arch.ptrBitWidth();
+                if (info.bits > ptr_bits or info.signed) {
+                    return self.fail(src, "TODO const int bigger than ptr and signed int", .{});
+                }
+                return MCValue{ .immediate = typed_value.val.toUnsignedInt() };
+            },
+            else => return self.fail(src, "TODO implement const of type '{}'", .{typed_value.ty}),
+        }
+    }
+
     fn fail(self: *Function, src: usize, comptime format: []const u8, args: var) error{ CodegenFail, OutOfMemory } {
         @setCold(true);
         const msg = try std.fmt.allocPrint(self.errors.allocator, format, args);

From 63b54bcf5173931270724c284696b747c5c6761a Mon Sep 17 00:00:00 2001
From: Andrew Kelley <andrew@ziglang.org>
Date: Thu, 23 Apr 2020 18:58:47 -0400
Subject: [PATCH 04/10] codegen for inline assembly

---
 src-self-hosted/codegen.zig | 219 ++++++++++++++++++++++++++++++++++--
 1 file changed, 212 insertions(+), 7 deletions(-)

diff --git a/src-self-hosted/codegen.zig b/src-self-hosted/codegen.zig
index 91f03aa932..6d401b92f5 100644
--- a/src-self-hosted/codegen.zig
+++ b/src-self-hosted/codegen.zig
@@ -4,6 +4,7 @@ const assert = std.debug.assert;
 const ir = @import("ir.zig");
 const Type = @import("type.zig").Type;
 const Value = @import("value.zig").Value;
+const Target = std.Target;
 
 pub const ErrorMsg = struct {
     byte_offset: usize,
@@ -69,6 +70,9 @@ const Function = struct {
         immediate: u64,
         /// The constant was emitted into the code, at this offset.
         embedded_in_code: usize,
+        /// The value is in a target-specific register. The value can
+        /// be @intToEnum casted to the respective Reg enum.
+        register: usize,
     };
 
     fn genFuncInst(self: *Function, inst: *ir.Inst) !MCValue {
@@ -108,14 +112,9 @@ const Function = struct {
                     try self.code.resize(self.code.items.len + 2);
                     self.code.items[self.code.items.len - 2] = 0xeb;
                     self.code.items[self.code.items.len - 1] = @intCast(u8, amount);
-                } else if (amount <= std.math.maxInt(u16)) {
-                    try self.code.resize(self.code.items.len + 3);
-                    self.code.items[self.code.items.len - 3] = 0xe9; // jmp rel16
-                    const imm_ptr = self.code.items[self.code.items.len - 2 ..][0..2];
-                    mem.writeIntLittle(u16, imm_ptr, @intCast(u16, amount));
                 } else {
                     try self.code.resize(self.code.items.len + 5);
-                    self.code.items[self.code.items.len - 5] = 0xea; // jmp rel32
+                    self.code.items[self.code.items.len - 5] = 0xe9; // jmp rel32
                     const imm_ptr = self.code.items[self.code.items.len - 4 ..][0..4];
                     mem.writeIntLittle(u32, imm_ptr, amount);
                 }
@@ -125,7 +124,104 @@ const Function = struct {
     }
 
     fn genAsm(self: *Function, inst: *ir.Inst.Assembly) !MCValue {
-        return self.fail(inst.base.src, "TODO machine code gen assembly", .{});
+        // TODO convert to inline function
+        switch (self.module.target.cpu.arch) {
+            .arm => return self.genAsmArch(.arm, inst),
+            .armeb => return self.genAsmArch(.armeb, inst),
+            .aarch64 => return self.genAsmArch(.aarch64, inst),
+            .aarch64_be => return self.genAsmArch(.aarch64_be, inst),
+            .aarch64_32 => return self.genAsmArch(.aarch64_32, inst),
+            .arc => return self.genAsmArch(.arc, inst),
+            .avr => return self.genAsmArch(.avr, inst),
+            .bpfel => return self.genAsmArch(.bpfel, inst),
+            .bpfeb => return self.genAsmArch(.bpfeb, inst),
+            .hexagon => return self.genAsmArch(.hexagon, inst),
+            .mips => return self.genAsmArch(.mips, inst),
+            .mipsel => return self.genAsmArch(.mipsel, inst),
+            .mips64 => return self.genAsmArch(.mips64, inst),
+            .mips64el => return self.genAsmArch(.mips64el, inst),
+            .msp430 => return self.genAsmArch(.msp430, inst),
+            .powerpc => return self.genAsmArch(.powerpc, inst),
+            .powerpc64 => return self.genAsmArch(.powerpc64, inst),
+            .powerpc64le => return self.genAsmArch(.powerpc64le, inst),
+            .r600 => return self.genAsmArch(.r600, inst),
+            .amdgcn => return self.genAsmArch(.amdgcn, inst),
+            .riscv32 => return self.genAsmArch(.riscv32, inst),
+            .riscv64 => return self.genAsmArch(.riscv64, inst),
+            .sparc => return self.genAsmArch(.sparc, inst),
+            .sparcv9 => return self.genAsmArch(.sparcv9, inst),
+            .sparcel => return self.genAsmArch(.sparcel, inst),
+            .s390x => return self.genAsmArch(.s390x, inst),
+            .tce => return self.genAsmArch(.tce, inst),
+            .tcele => return self.genAsmArch(.tcele, inst),
+            .thumb => return self.genAsmArch(.thumb, inst),
+            .thumbeb => return self.genAsmArch(.thumbeb, inst),
+            .i386 => return self.genAsmArch(.i386, inst),
+            .x86_64 => return self.genAsmArch(.x86_64, inst),
+            .xcore => return self.genAsmArch(.xcore, inst),
+            .nvptx => return self.genAsmArch(.nvptx, inst),
+            .nvptx64 => return self.genAsmArch(.nvptx64, inst),
+            .le32 => return self.genAsmArch(.le32, inst),
+            .le64 => return self.genAsmArch(.le64, inst),
+            .amdil => return self.genAsmArch(.amdil, inst),
+            .amdil64 => return self.genAsmArch(.amdil64, inst),
+            .hsail => return self.genAsmArch(.hsail, inst),
+            .hsail64 => return self.genAsmArch(.hsail64, inst),
+            .spir => return self.genAsmArch(.spir, inst),
+            .spir64 => return self.genAsmArch(.spir64, inst),
+            .kalimba => return self.genAsmArch(.kalimba, inst),
+            .shave => return self.genAsmArch(.shave, inst),
+            .lanai => return self.genAsmArch(.lanai, inst),
+            .wasm32 => return self.genAsmArch(.wasm32, inst),
+            .wasm64 => return self.genAsmArch(.wasm64, inst),
+            .renderscript32 => return self.genAsmArch(.renderscript32, inst),
+            .renderscript64 => return self.genAsmArch(.renderscript64, inst),
+            .ve => return self.genAsmArch(.ve, inst),
+        }
+    }
+
+    fn genAsmArch(self: *Function, comptime arch: Target.Cpu.Arch, inst: *ir.Inst.Assembly) !MCValue {
+        if (arch != .x86_64 and arch != .i386) {
+            return self.fail(inst.base.src, "TODO implement inline asm support for more architectures", .{});
+        }
+        if (!mem.eql(u8, inst.args.asm_source, "syscall")) {
+            return self.fail(inst.base.src, "TODO implement support for more x86 assembly instructions", .{});
+        }
+        for (inst.args.inputs) |input, i| {
+            if (input.len < 3 or input[0] != '{' or input[input.len - 1] != '}') {
+                return self.fail(inst.base.src, "unrecognized asm input constraint: '{}'", .{input});
+            }
+            const reg_name = input[1 .. input.len - 1];
+            const reg = parseRegName(arch, reg_name) orelse
+                return self.fail(inst.base.src, "unrecognized register: '{}'", .{reg_name});
+            const arg = try self.resolveInst(inst.args.args[i]);
+            try self.genSetReg(inst.base.src, arch, reg, arg);
+        }
+
+        if (inst.args.output) |output| {
+            if (output.len < 4 or output[0] != '=' or output[1] != '{' or output[output.len - 1] != '}') {
+                return self.fail(inst.base.src, "unrecognized asm output constraint: '{}'", .{output});
+            }
+            const reg_name = output[2 .. output.len - 1];
+            const reg = parseRegName(arch, reg_name) orelse
+                return self.fail(inst.base.src, "unrecognized register: '{}'", .{reg_name});
+            return MCValue{ .register = @enumToInt(reg) };
+        } else {
+            return MCValue.none;
+        }
+    }
+
+    fn genSetReg(self: *Function, src: usize, comptime arch: Target.Cpu.Arch, reg: Reg(arch), mcv: MCValue) !void {
+        switch (arch) {
+            .x86_64 => switch (reg) {
+                .rax => return self.fail(src, "TODO implement genSetReg for x86_64 'rax'", .{}),
+                .rdi => return self.fail(src, "TODO implement genSetReg for x86_64 'rdi'", .{}),
+                .rsi => return self.fail(src, "TODO implement genSetReg for x86_64 'rsi'", .{}),
+                .rdx => return self.fail(src, "TODO implement genSetReg for x86_64 'rdx'", .{}),
+                else => return self.fail(src, "TODO implement genSetReg for x86_64 '{}'", .{@tagName(reg)}),
+            },
+            else => return self.fail(src, "TODO implement genSetReg for more architectures", .{}),
+        }
     }
 
     fn genPtrToInt(self: *Function, inst: *ir.Inst.PtrToInt) !MCValue {
@@ -192,3 +288,112 @@ const Function = struct {
         return error.CodegenFail;
     }
 };
+
+fn Reg(comptime arch: Target.Cpu.Arch) type {
+    return switch (arch) {
+        .i386 => enum {
+            eax,
+            ebx,
+            ecx,
+            edx,
+            ebp,
+            esp,
+            esi,
+            edi,
+
+            ax,
+            bx,
+            cx,
+            dx,
+            bp,
+            sp,
+            si,
+            di,
+
+            ah,
+            bh,
+            ch,
+            dh,
+
+            al,
+            bl,
+            cl,
+            dl,
+        },
+        .x86_64 => enum {
+            rax,
+            rbx,
+            rcx,
+            rdx,
+            rbp,
+            rsp,
+            rsi,
+            rdi,
+            r8,
+            r9,
+            r10,
+            r11,
+            r12,
+            r13,
+            r14,
+            r15,
+
+            eax,
+            ebx,
+            ecx,
+            edx,
+            ebp,
+            esp,
+            esi,
+            edi,
+            r8d,
+            r9d,
+            r10d,
+            r11d,
+            r12d,
+            r13d,
+            r14d,
+            r15d,
+
+            ax,
+            bx,
+            cx,
+            dx,
+            bp,
+            sp,
+            si,
+            di,
+            r8w,
+            r9w,
+            r10w,
+            r11w,
+            r12w,
+            r13w,
+            r14w,
+            r15w,
+
+            ah,
+            bh,
+            ch,
+            dh,
+
+            al,
+            bl,
+            cl,
+            dl,
+            r8b,
+            r9b,
+            r10b,
+            r11b,
+            r12b,
+            r13b,
+            r14b,
+            r15b,
+        },
+        else => @compileError("TODO add more register enums"),
+    };
+}
+
+fn parseRegName(comptime arch: Target.Cpu.Arch, name: []const u8) ?Reg(arch) {
+    return std.meta.stringToEnum(Reg(arch), name);
+}

From 0ac502f37257c607fc835840d02f0401c5e59442 Mon Sep 17 00:00:00 2001
From: Andrew Kelley <andrew@ziglang.org>
Date: Thu, 23 Apr 2020 19:40:17 -0400
Subject: [PATCH 05/10] codegen for setting rax and rdi registers

---
 src-self-hosted/codegen.zig | 87 ++++++++++++++++++++++++++++++++++---
 1 file changed, 80 insertions(+), 7 deletions(-)

diff --git a/src-self-hosted/codegen.zig b/src-self-hosted/codegen.zig
index 6d401b92f5..a9cfc678b0 100644
--- a/src-self-hosted/codegen.zig
+++ b/src-self-hosted/codegen.zig
@@ -184,9 +184,6 @@ const Function = struct {
         if (arch != .x86_64 and arch != .i386) {
             return self.fail(inst.base.src, "TODO implement inline asm support for more architectures", .{});
         }
-        if (!mem.eql(u8, inst.args.asm_source, "syscall")) {
-            return self.fail(inst.base.src, "TODO implement support for more x86 assembly instructions", .{});
-        }
         for (inst.args.inputs) |input, i| {
             if (input.len < 3 or input[0] != '{' or input[input.len - 1] != '}') {
                 return self.fail(inst.base.src, "unrecognized asm input constraint: '{}'", .{input});
@@ -198,6 +195,12 @@ const Function = struct {
             try self.genSetReg(inst.base.src, arch, reg, arg);
         }
 
+        if (mem.eql(u8, inst.args.asm_source, "syscall")) {
+            try self.code.appendSlice(&[_]u8{ 0x0f, 0x05 });
+        } else {
+            return self.fail(inst.base.src, "TODO implement support for more x86 assembly instructions", .{});
+        }
+
         if (inst.args.output) |output| {
             if (output.len < 4 or output[0] != '=' or output[1] != '{' or output[output.len - 1] != '}') {
                 return self.fail(inst.base.src, "unrecognized asm output constraint: '{}'", .{output});
@@ -214,10 +217,80 @@ const Function = struct {
     fn genSetReg(self: *Function, src: usize, comptime arch: Target.Cpu.Arch, reg: Reg(arch), mcv: MCValue) !void {
         switch (arch) {
             .x86_64 => switch (reg) {
-                .rax => return self.fail(src, "TODO implement genSetReg for x86_64 'rax'", .{}),
-                .rdi => return self.fail(src, "TODO implement genSetReg for x86_64 'rdi'", .{}),
-                .rsi => return self.fail(src, "TODO implement genSetReg for x86_64 'rsi'", .{}),
-                .rdx => return self.fail(src, "TODO implement genSetReg for x86_64 'rdx'", .{}),
+                .rax => switch (mcv) {
+                    .none, .unreach => unreachable,
+                    .immediate => |x| {
+                        // Setting the eax register zeroes the upper part of rax, so if the number is small
+                        // enough, that is preferable.
+                        // Best case: zero
+                        // 31 c0     xor    eax,eax
+                        if (x == 0) {
+                            return self.code.appendSlice(&[_]u8{ 0x31, 0xc0 });
+                        }
+                        // Next best case: set eax with 4 bytes
+                        // b8 04 03 02 01           mov    eax,0x01020304
+                        if (x <= std.math.maxInt(u32)) {
+                            try self.code.resize(self.code.items.len + 5);
+                            self.code.items[self.code.items.len - 5] = 0xb8;
+                            const imm_ptr = self.code.items[self.code.items.len - 4 ..][0..4];
+                            mem.writeIntLittle(u32, imm_ptr, @intCast(u32, x));
+                            return;
+                        }
+                        // Worst case: set rax with 8 bytes
+                        // 48 b8 08 07 06 05 04 03 02 01    movabs rax,0x0102030405060708
+                        try self.code.resize(self.code.items.len + 10);
+                        self.code.items[self.code.items.len - 10] = 0x48;
+                        self.code.items[self.code.items.len - 9] = 0xb8;
+                        const imm_ptr = self.code.items[self.code.items.len - 8 ..][0..8];
+                        mem.writeIntLittle(u64, imm_ptr, x);
+                        return;
+                    },
+                    .embedded_in_code => return self.fail(src, "TODO implement x86_64 genSetReg %rax = embedded_in_code", .{}),
+                    .register => return self.fail(src, "TODO implement x86_64 genSetReg %rax = register", .{}),
+                },
+                .rdi => switch (mcv) {
+                    .none, .unreach => unreachable,
+                    .immediate => |x| {
+                        // Setting the edi register zeroes the upper part of rdi, so if the number is small
+                        // enough, that is preferable.
+                        // Best case: zero
+                        // 31 ff                    xor    edi,edi
+                        if (x == 0) {
+                            return self.code.appendSlice(&[_]u8{ 0x31, 0xff });
+                        }
+                        // Next best case: set edi with 4 bytes
+                        // bf 04 03 02 01           mov    edi,0x1020304
+                        if (x <= std.math.maxInt(u32)) {
+                            try self.code.resize(self.code.items.len + 5);
+                            self.code.items[self.code.items.len - 5] = 0xbf;
+                            const imm_ptr = self.code.items[self.code.items.len - 4 ..][0..4];
+                            mem.writeIntLittle(u32, imm_ptr, @intCast(u32, x));
+                            return;
+                        }
+                        // Worst case: set rdi with 8 bytes
+                        // 48 bf 08 07 06 05 04 03 02 01    movabs rax,0x0102030405060708
+                        try self.code.resize(self.code.items.len + 10);
+                        self.code.items[self.code.items.len - 10] = 0x48;
+                        self.code.items[self.code.items.len - 9] = 0xbf;
+                        const imm_ptr = self.code.items[self.code.items.len - 8 ..][0..8];
+                        mem.writeIntLittle(u64, imm_ptr, x);
+                        return;
+                    },
+                    .embedded_in_code => return self.fail(src, "TODO implement x86_64 genSetReg %rdi = embedded_in_code", .{}),
+                    .register => return self.fail(src, "TODO implement x86_64 genSetReg %rdi = register", .{}),
+                },
+                .rsi => switch (mcv) {
+                    .none, .unreach => unreachable,
+                    .immediate => return self.fail(src, "TODO implement x86_64 genSetReg %rsi = immediate", .{}),
+                    .embedded_in_code => return self.fail(src, "TODO implement x86_64 genSetReg %rsi = embedded_in_code", .{}),
+                    .register => return self.fail(src, "TODO implement x86_64 genSetReg %rsi = register", .{}),
+                },
+                .rdx => switch (mcv) {
+                    .none, .unreach => unreachable,
+                    .immediate => return self.fail(src, "TODO implement x86_64 genSetReg %rdx = immediate", .{}),
+                    .embedded_in_code => return self.fail(src, "TODO implement x86_64 genSetReg %rdx = embedded_in_code", .{}),
+                    .register => return self.fail(src, "TODO implement x86_64 genSetReg %rdx = register", .{}),
+                },
                 else => return self.fail(src, "TODO implement genSetReg for x86_64 '{}'", .{@tagName(reg)}),
             },
             else => return self.fail(src, "TODO implement genSetReg for more architectures", .{}),

From 757d13d7843879de618247733f22f4fc515cc467 Mon Sep 17 00:00:00 2001
From: Andrew Kelley <andrew@ziglang.org>
Date: Thu, 23 Apr 2020 20:23:16 -0400
Subject: [PATCH 06/10] codegen supports embedded-in-code constants

also coerce no longer requires a bitcast
---
 src-self-hosted/codegen.zig | 29 +++++++++++++++++++++++-
 src-self-hosted/ir.zig      | 45 ++++++++++++++++++++++++++++---------
 src-self-hosted/ir/text.zig | 27 ++++++++++++++++++++++
 3 files changed, 90 insertions(+), 11 deletions(-)

diff --git a/src-self-hosted/codegen.zig b/src-self-hosted/codegen.zig
index a9cfc678b0..b3011081da 100644
--- a/src-self-hosted/codegen.zig
+++ b/src-self-hosted/codegen.zig
@@ -81,6 +81,7 @@ const Function = struct {
             .constant => unreachable, // excluded from function bodies
             .assembly => return self.genAsm(inst.cast(ir.Inst.Assembly).?),
             .ptrtoint => return self.genPtrToInt(inst.cast(ir.Inst.PtrToInt).?),
+            .bitcast => return self.genBitCast(inst.cast(ir.Inst.BitCast).?),
         }
     }
 
@@ -282,7 +283,26 @@ const Function = struct {
                 .rsi => switch (mcv) {
                     .none, .unreach => unreachable,
                     .immediate => return self.fail(src, "TODO implement x86_64 genSetReg %rsi = immediate", .{}),
-                    .embedded_in_code => return self.fail(src, "TODO implement x86_64 genSetReg %rsi = embedded_in_code", .{}),
+                    .embedded_in_code => |code_offset| {
+                        // Examples:
+                        // lea rsi, [rip + 0x01020304]
+                        // lea rsi, [rip - 7]
+                        //  f: 48 8d 35 04 03 02 01  lea    rsi,[rip+0x1020304]        # 102031a <_start+0x102031a>
+                        // 16: 48 8d 35 f9 ff ff ff  lea    rsi,[rip+0xfffffffffffffff9]        # 16 <_start+0x16>
+                        //
+                        // We need the offset from RIP in a signed i32 twos complement.
+                        // The instruction is 7 bytes long and RIP points to the next instruction.
+                        try self.code.resize(self.code.items.len + 7);
+                        const rip = self.code.items.len;
+                        const big_offset = @intCast(i64, code_offset) - @intCast(i64, rip);
+                        const offset = @intCast(i32, big_offset);
+                        self.code.items[self.code.items.len - 7] = 0x48;
+                        self.code.items[self.code.items.len - 6] = 0x8d;
+                        self.code.items[self.code.items.len - 5] = 0x35;
+                        const imm_ptr = self.code.items[self.code.items.len - 4 ..][0..4];
+                        mem.writeIntLittle(i32, imm_ptr, offset);
+                        return;
+                    },
                     .register => return self.fail(src, "TODO implement x86_64 genSetReg %rsi = register", .{}),
                 },
                 .rdx => switch (mcv) {
@@ -302,6 +322,11 @@ const Function = struct {
         return self.resolveInst(inst.args.ptr);
     }
 
+    fn genBitCast(self: *Function, inst: *ir.Inst.BitCast) !MCValue {
+        const operand = try self.resolveInst(inst.args.operand);
+        return operand;
+    }
+
     fn resolveInst(self: *Function, inst: *ir.Inst) !MCValue {
         if (self.inst_table.getValue(inst)) |mcv| {
             return mcv;
@@ -344,6 +369,8 @@ const Function = struct {
                 }
                 return MCValue{ .immediate = typed_value.val.toUnsignedInt() };
             },
+            .ComptimeInt => unreachable, // semantic analysis prevents this
+            .ComptimeFloat => unreachable, // semantic analysis prevents this
             else => return self.fail(src, "TODO implement const of type '{}'", .{typed_value.ty}),
         }
     }
diff --git a/src-self-hosted/ir.zig b/src-self-hosted/ir.zig
index f3051df088..9a609b63ae 100644
--- a/src-self-hosted/ir.zig
+++ b/src-self-hosted/ir.zig
@@ -24,6 +24,7 @@ pub const Inst = struct {
         constant,
         assembly,
         ptrtoint,
+        bitcast,
     };
 
     pub fn cast(base: *Inst, comptime T: type) ?*T {
@@ -45,6 +46,7 @@ pub const Inst = struct {
 
             .assembly,
             .ptrtoint,
+            .bitcast,
             => null,
         };
     }
@@ -84,6 +86,15 @@ pub const Inst = struct {
             ptr: *Inst,
         },
     };
+
+    pub const BitCast = struct {
+        pub const base_tag = Tag.bitcast;
+
+        base: Inst,
+        args: struct {
+            operand: *Inst,
+        },
+    };
 };
 
 pub const TypedValue = struct {
@@ -234,7 +245,7 @@ const Analyze = struct {
     fn resolveConstString(self: *Analyze, func: ?*Fn, old_inst: *text.Inst) ![]u8 {
         const new_inst = try self.resolveInst(func, old_inst);
         const wanted_type = Type.initTag(.const_slice_u8);
-        const coerced_inst = try self.coerce(wanted_type, new_inst);
+        const coerced_inst = try self.coerce(func, wanted_type, new_inst);
         const val = try self.resolveConstValue(coerced_inst);
         return val.toAllocatedBytes(&self.arena.allocator);
     }
@@ -242,7 +253,7 @@ const Analyze = struct {
     fn resolveType(self: *Analyze, func: ?*Fn, old_inst: *text.Inst) !Type {
         const new_inst = try self.resolveInst(func, old_inst);
         const wanted_type = Type.initTag(.@"type");
-        const coerced_inst = try self.coerce(wanted_type, new_inst);
+        const coerced_inst = try self.coerce(func, wanted_type, new_inst);
         const val = try self.resolveConstValue(coerced_inst);
         return val.toType();
     }
@@ -409,6 +420,7 @@ const Analyze = struct {
             .primitive => return self.analyzeInstPrimitive(func, old_inst.cast(text.Inst.Primitive).?),
             .fntype => return self.analyzeInstFnType(func, old_inst.cast(text.Inst.FnType).?),
             .intcast => return self.analyzeInstIntCast(func, old_inst.cast(text.Inst.IntCast).?),
+            .bitcast => return self.analyzeInstBitCast(func, old_inst.cast(text.Inst.BitCast).?),
         }
     }
 
@@ -472,7 +484,7 @@ const Analyze = struct {
     fn analyzeInstAs(self: *Analyze, func: ?*Fn, as: *text.Inst.As) InnerError!*Inst {
         const dest_type = try self.resolveType(func, as.positionals.dest_type);
         const new_inst = try self.resolveInst(func, as.positionals.value);
-        return self.coerce(dest_type, new_inst);
+        return self.coerce(func, dest_type, new_inst);
     }
 
     fn analyzeInstPtrToInt(self: *Analyze, func: ?*Fn, ptrtoint: *text.Inst.PtrToInt) InnerError!*Inst {
@@ -545,12 +557,18 @@ const Analyze = struct {
         }
 
         if (dest_is_comptime_int or new_inst.value() != null) {
-            return self.coerce(dest_type, new_inst);
+            return self.coerce(func, dest_type, new_inst);
         }
 
         return self.fail(intcast.base.src, "TODO implement analyze widen or shorten int", .{});
     }
 
+    fn analyzeInstBitCast(self: *Analyze, func: ?*Fn, inst: *text.Inst.BitCast) InnerError!*Inst {
+        const dest_type = try self.resolveType(func, inst.positionals.dest_type);
+        const operand = try self.resolveInst(func, inst.positionals.operand);
+        return self.bitcast(func, dest_type, operand);
+    }
+
     fn analyzeInstDeref(self: *Analyze, func: ?*Fn, deref: *text.Inst.Deref) InnerError!*Inst {
         const ptr = try self.resolveInst(func, deref.positionals.ptr);
         const elem_ty = switch (ptr.ty.zigTypeTag()) {
@@ -583,7 +601,8 @@ const Analyze = struct {
             elem.* = try self.resolveConstString(func, assembly.kw_args.clobbers[i]);
         }
         for (args) |*elem, i| {
-            elem.* = try self.resolveInst(func, assembly.kw_args.args[i]);
+            const arg = try self.resolveInst(func, assembly.kw_args.args[i]);
+            elem.* = try self.coerce(func, Type.initTag(.usize), arg);
         }
 
         const f = try self.requireFunctionBody(func, assembly.base.src);
@@ -602,10 +621,14 @@ const Analyze = struct {
         return self.addNewInstArgs(f, unreach.base.src, Type.initTag(.noreturn), Inst.Unreach, {});
     }
 
-    fn coerce(self: *Analyze, dest_type: Type, inst: *Inst) !*Inst {
+    fn coerce(self: *Analyze, func: ?*Fn, dest_type: Type, inst: *Inst) !*Inst {
+        // If the types are the same, we can return the operand.
+        if (dest_type.eql(inst.ty))
+            return inst;
+
         const in_memory_result = coerceInMemoryAllowed(dest_type, inst.ty);
         if (in_memory_result == .ok) {
-            return self.bitcast(dest_type, inst);
+            return self.bitcast(func, dest_type, inst);
         }
 
         // *[N]T to []T
@@ -634,12 +657,14 @@ const Analyze = struct {
         return self.fail(inst.src, "TODO implement type coercion", .{});
     }
 
-    fn bitcast(self: *Analyze, dest_type: Type, inst: *Inst) !*Inst {
+    fn bitcast(self: *Analyze, func: ?*Fn, dest_type: Type, inst: *Inst) !*Inst {
         if (inst.value()) |val| {
             // Keep the comptime Value representation; take the new type.
             return self.constInst(inst.src, .{ .ty = dest_type, .val = val });
         }
-        return self.fail(inst.src, "TODO implement runtime bitcast", .{});
+        // TODO validate the type size and other compile errors
+        const f = try self.requireFunctionBody(func, inst.src);
+        return self.addNewInstArgs(f, inst.src, dest_type, Inst.BitCast, Inst.Args(Inst.BitCast){ .operand = inst });
     }
 
     fn coerceArrayPtrToSlice(self: *Analyze, dest_type: Type, inst: *Inst) !*Inst {
@@ -713,7 +738,7 @@ pub fn main() anyerror!void {
         std.process.exit(1);
     }
 
-    const output_zir = false;
+    const output_zir = true;
     if (output_zir) {
         var new_zir_module = try text.emit_zir(allocator, analyzed_module);
         defer new_zir_module.deinit(allocator);
diff --git a/src-self-hosted/ir/text.zig b/src-self-hosted/ir/text.zig
index d8606656b7..94b1de03de 100644
--- a/src-self-hosted/ir/text.zig
+++ b/src-self-hosted/ir/text.zig
@@ -31,6 +31,7 @@ pub const Inst = struct {
         primitive,
         fntype,
         intcast,
+        bitcast,
     };
 
     pub fn TagToType(tag: Tag) type {
@@ -48,6 +49,7 @@ pub const Inst = struct {
             .primitive => Primitive,
             .fntype => FnType,
             .intcast => IntCast,
+            .bitcast => BitCast,
         };
     }
 
@@ -258,6 +260,17 @@ pub const Inst = struct {
         },
         kw_args: struct {},
     };
+
+    pub const BitCast = struct {
+        pub const base_tag = Tag.bitcast;
+        base: Inst,
+
+        positionals: struct {
+            dest_type: *Inst,
+            operand: *Inst,
+        },
+        kw_args: struct {},
+    };
 };
 
 pub const ErrorMsg = struct {
@@ -331,6 +344,7 @@ pub const Module = struct {
             .primitive => return self.writeInstToStreamGeneric(stream, .primitive, decl, inst_table),
             .fntype => return self.writeInstToStreamGeneric(stream, .fntype, decl, inst_table),
             .intcast => return self.writeInstToStreamGeneric(stream, .intcast, decl, inst_table),
+            .bitcast => return self.writeInstToStreamGeneric(stream, .bitcast, decl, inst_table),
         }
     }
 
@@ -957,6 +971,19 @@ const EmitZIR = struct {
                             };
                             break :blk &new_inst.base;
                         },
+                        .bitcast => blk: {
+                            const old_inst = inst.cast(ir.Inst.BitCast).?;
+                            const new_inst = try self.arena.allocator.create(Inst.BitCast);
+                            new_inst.* = .{
+                                .base = .{ .src = inst.src, .tag = Inst.BitCast.base_tag },
+                                .positionals = .{
+                                    .dest_type = try self.emitType(inst.src, inst.ty),
+                                    .operand = try self.resolveInst(&inst_table, old_inst.args.operand),
+                                },
+                                .kw_args = .{},
+                            };
+                            break :blk &new_inst.base;
+                        },
                     };
                     try instructions.append(new_inst);
                     try inst_table.putNoClobber(inst, new_inst);

From 2d35f71fa9a9c18dc5679a6de58969bebafae17f Mon Sep 17 00:00:00 2001
From: Andrew Kelley <andrew@ziglang.org>
Date: Thu, 23 Apr 2020 20:30:20 -0400
Subject: [PATCH 07/10] codegen rdx set immediate

---
 src-self-hosted/codegen.zig | 37 +++++++++++++++++++++++++++++++------
 1 file changed, 31 insertions(+), 6 deletions(-)

diff --git a/src-self-hosted/codegen.zig b/src-self-hosted/codegen.zig
index b3011081da..ce85781e9a 100644
--- a/src-self-hosted/codegen.zig
+++ b/src-self-hosted/codegen.zig
@@ -249,6 +249,37 @@ const Function = struct {
                     .embedded_in_code => return self.fail(src, "TODO implement x86_64 genSetReg %rax = embedded_in_code", .{}),
                     .register => return self.fail(src, "TODO implement x86_64 genSetReg %rax = register", .{}),
                 },
+                .rdx => switch (mcv) {
+                    .none, .unreach => unreachable,
+                    .immediate => |x| {
+                        // Setting the edx register zeroes the upper part of rdx, so if the number is small
+                        // enough, that is preferable.
+                        // Best case: zero
+                        // 31 d2                    xor    edx,edx
+                        if (x == 0) {
+                            return self.code.appendSlice(&[_]u8{ 0x31, 0xd2 });
+                        }
+                        // Next best case: set edx with 4 bytes
+                        // ba 04 03 02 01           mov    edx,0x1020304
+                        if (x <= std.math.maxInt(u32)) {
+                            try self.code.resize(self.code.items.len + 5);
+                            self.code.items[self.code.items.len - 5] = 0xba;
+                            const imm_ptr = self.code.items[self.code.items.len - 4 ..][0..4];
+                            mem.writeIntLittle(u32, imm_ptr, @intCast(u32, x));
+                            return;
+                        }
+                        // Worst case: set rdx with 8 bytes
+                        // 48 ba 08 07 06 05 04 03 02 01    movabs rdx,0x0102030405060708
+                        try self.code.resize(self.code.items.len + 10);
+                        self.code.items[self.code.items.len - 10] = 0x48;
+                        self.code.items[self.code.items.len - 9] = 0xba;
+                        const imm_ptr = self.code.items[self.code.items.len - 8 ..][0..8];
+                        mem.writeIntLittle(u64, imm_ptr, x);
+                        return;
+                    },
+                    .embedded_in_code => return self.fail(src, "TODO implement x86_64 genSetReg %rdx = embedded_in_code", .{}),
+                    .register => return self.fail(src, "TODO implement x86_64 genSetReg %rdx = register", .{}),
+                },
                 .rdi => switch (mcv) {
                     .none, .unreach => unreachable,
                     .immediate => |x| {
@@ -305,12 +336,6 @@ const Function = struct {
                     },
                     .register => return self.fail(src, "TODO implement x86_64 genSetReg %rsi = register", .{}),
                 },
-                .rdx => switch (mcv) {
-                    .none, .unreach => unreachable,
-                    .immediate => return self.fail(src, "TODO implement x86_64 genSetReg %rdx = immediate", .{}),
-                    .embedded_in_code => return self.fail(src, "TODO implement x86_64 genSetReg %rdx = embedded_in_code", .{}),
-                    .register => return self.fail(src, "TODO implement x86_64 genSetReg %rdx = register", .{}),
-                },
                 else => return self.fail(src, "TODO implement genSetReg for x86_64 '{}'", .{@tagName(reg)}),
             },
             else => return self.fail(src, "TODO implement genSetReg for more architectures", .{}),

From 229e99ca37441c758155fca113aecc3c4529c3ea Mon Sep 17 00:00:00 2001
From: Andrew Kelley <andrew@ziglang.org>
Date: Thu, 23 Apr 2020 23:53:52 -0400
Subject: [PATCH 08/10] codegen: write the updated code size to PT_LOAD section
 header

---
 src-self-hosted/link.zig | 124 +++++++++++++++++++++------------------
 1 file changed, 67 insertions(+), 57 deletions(-)

diff --git a/src-self-hosted/link.zig b/src-self-hosted/link.zig
index 4f89786b3c..dff163b458 100644
--- a/src-self-hosted/link.zig
+++ b/src-self-hosted/link.zig
@@ -332,61 +332,6 @@ const Update = struct {
             phdr_table_dirty = true;
         }
         const foreign_endian = self.module.target.cpu.arch.endian() != std.Target.current.cpu.arch.endian();
-        if (phdr_table_dirty) {
-            const allocated_size = self.allocatedSize(self.phdr_table_offset.?);
-            const needed_size = self.program_headers.items.len * phsize;
-
-            if (needed_size > allocated_size) {
-                self.phdr_table_offset = null; // free the space
-                self.phdr_table_offset = self.findFreeSpace(needed_size, phalign);
-            }
-
-            const allocator = self.program_headers.allocator;
-            switch (ptr_width) {
-                .p32 => {
-                    const buf = try allocator.alloc(elf.Elf32_Phdr, self.program_headers.items.len);
-                    defer allocator.free(buf);
-
-                    for (buf) |*phdr, i| {
-                        phdr.* = .{
-                            .p_type = self.program_headers.items[i].p_type,
-                            .p_flags = self.program_headers.items[i].p_flags,
-                            .p_offset = @intCast(u32, self.program_headers.items[i].p_offset),
-                            .p_vaddr = @intCast(u32, self.program_headers.items[i].p_vaddr),
-                            .p_paddr = @intCast(u32, self.program_headers.items[i].p_paddr),
-                            .p_filesz = @intCast(u32, self.program_headers.items[i].p_filesz),
-                            .p_memsz = @intCast(u32, self.program_headers.items[i].p_memsz),
-                            .p_align = @intCast(u32, self.program_headers.items[i].p_align),
-                        };
-                        if (foreign_endian) {
-                            bswapAllFields(elf.Elf32_Phdr, phdr);
-                        }
-                    }
-                    try self.file.pwriteAll(mem.sliceAsBytes(buf), self.phdr_table_offset.?);
-                },
-                .p64 => {
-                    const buf = try allocator.alloc(elf.Elf64_Phdr, self.program_headers.items.len);
-                    defer allocator.free(buf);
-
-                    for (buf) |*phdr, i| {
-                        phdr.* = .{
-                            .p_type = self.program_headers.items[i].p_type,
-                            .p_flags = self.program_headers.items[i].p_flags,
-                            .p_offset = self.program_headers.items[i].p_offset,
-                            .p_vaddr = self.program_headers.items[i].p_vaddr,
-                            .p_paddr = self.program_headers.items[i].p_paddr,
-                            .p_filesz = self.program_headers.items[i].p_filesz,
-                            .p_memsz = self.program_headers.items[i].p_memsz,
-                            .p_align = self.program_headers.items[i].p_align,
-                        };
-                        if (foreign_endian) {
-                            bswapAllFields(elf.Elf64_Phdr, phdr);
-                        }
-                    }
-                    try self.file.pwriteAll(mem.sliceAsBytes(buf), self.phdr_table_offset.?);
-                },
-            }
-        }
         if (shdr_table_dirty) {
             const allocated_size = self.allocatedSize(self.shdr_table_offset.?);
             const needed_size = self.sections.items.len * phsize;
@@ -446,7 +391,64 @@ const Update = struct {
                 },
             }
         }
-        try self.writeCodeAndSymbols();
+
+        try self.writeCodeAndSymbols(&phdr_table_dirty);
+
+        if (phdr_table_dirty) {
+            const allocated_size = self.allocatedSize(self.phdr_table_offset.?);
+            const needed_size = self.program_headers.items.len * phsize;
+
+            if (needed_size > allocated_size) {
+                self.phdr_table_offset = null; // free the space
+                self.phdr_table_offset = self.findFreeSpace(needed_size, phalign);
+            }
+
+            const allocator = self.program_headers.allocator;
+            switch (ptr_width) {
+                .p32 => {
+                    const buf = try allocator.alloc(elf.Elf32_Phdr, self.program_headers.items.len);
+                    defer allocator.free(buf);
+
+                    for (buf) |*phdr, i| {
+                        phdr.* = .{
+                            .p_type = self.program_headers.items[i].p_type,
+                            .p_flags = self.program_headers.items[i].p_flags,
+                            .p_offset = @intCast(u32, self.program_headers.items[i].p_offset),
+                            .p_vaddr = @intCast(u32, self.program_headers.items[i].p_vaddr),
+                            .p_paddr = @intCast(u32, self.program_headers.items[i].p_paddr),
+                            .p_filesz = @intCast(u32, self.program_headers.items[i].p_filesz),
+                            .p_memsz = @intCast(u32, self.program_headers.items[i].p_memsz),
+                            .p_align = @intCast(u32, self.program_headers.items[i].p_align),
+                        };
+                        if (foreign_endian) {
+                            bswapAllFields(elf.Elf32_Phdr, phdr);
+                        }
+                    }
+                    try self.file.pwriteAll(mem.sliceAsBytes(buf), self.phdr_table_offset.?);
+                },
+                .p64 => {
+                    const buf = try allocator.alloc(elf.Elf64_Phdr, self.program_headers.items.len);
+                    defer allocator.free(buf);
+
+                    for (buf) |*phdr, i| {
+                        phdr.* = .{
+                            .p_type = self.program_headers.items[i].p_type,
+                            .p_flags = self.program_headers.items[i].p_flags,
+                            .p_offset = self.program_headers.items[i].p_offset,
+                            .p_vaddr = self.program_headers.items[i].p_vaddr,
+                            .p_paddr = self.program_headers.items[i].p_paddr,
+                            .p_filesz = self.program_headers.items[i].p_filesz,
+                            .p_memsz = self.program_headers.items[i].p_memsz,
+                            .p_align = self.program_headers.items[i].p_align,
+                        };
+                        if (foreign_endian) {
+                            bswapAllFields(elf.Elf64_Phdr, phdr);
+                        }
+                    }
+                    try self.file.pwriteAll(mem.sliceAsBytes(buf), self.phdr_table_offset.?);
+                },
+            }
+        }
 
         const shstrtab_sect = &self.sections.items[self.shstrtab_index.?];
         if (shstrtab_dirty or self.shstrtab.items.len != shstrtab_sect.sh_size) {
@@ -590,7 +592,7 @@ const Update = struct {
         try self.file.pwriteAll(hdr_buf[0..index], 0);
     }
 
-    fn writeCodeAndSymbols(self: *Update) !void {
+    fn writeCodeAndSymbols(self: *Update, phdr_table_dirty: *bool) !void {
         // index 0 is always a null symbol
         try self.symbols.resize(1);
         self.symbols.items[0] = .{
@@ -638,6 +640,14 @@ const Update = struct {
             vaddr += code.items.len;
         }
 
+        {
+            // Now that we know the code size, we need to update the program header for executable code
+            phdr.p_memsz = vaddr - phdr.p_vaddr;
+            phdr.p_filesz = phdr.p_memsz;
+
+            phdr_table_dirty.* = true;
+        }
+
         return self.writeSymbols();
     }
 

From 058937e44df7fe07c09ee9291170a74f58ec1562 Mon Sep 17 00:00:00 2001
From: Andrew Kelley <andrew@ziglang.org>
Date: Fri, 24 Apr 2020 02:09:30 -0400
Subject: [PATCH 09/10] bug fixes to make it work

---
 src-self-hosted/codegen.zig |   2 +-
 src-self-hosted/link.zig    | 246 ++++++++++++++++++++++--------------
 2 files changed, 153 insertions(+), 95 deletions(-)

diff --git a/src-self-hosted/codegen.zig b/src-self-hosted/codegen.zig
index ce85781e9a..3a8d0e1282 100644
--- a/src-self-hosted/codegen.zig
+++ b/src-self-hosted/codegen.zig
@@ -378,8 +378,8 @@ const Function = struct {
                             return self.fail(src, "TODO handle a larger string constant", .{});
 
                         // Emit the string literal directly into the code; jump over it.
-                        const offset = self.code.items.len;
                         try self.genRelativeFwdJump(src, smaller_len);
+                        const offset = self.code.items.len;
                         try self.code.appendSlice(bytes);
                         return MCValue{ .embedded_in_code = offset };
                     },
diff --git a/src-self-hosted/link.zig b/src-self-hosted/link.zig
index dff163b458..9ab462e323 100644
--- a/src-self-hosted/link.zig
+++ b/src-self-hosted/link.zig
@@ -174,7 +174,7 @@ const Update = struct {
             if (program_header.p_offset <= start) continue;
             if (program_header.p_offset < min_pos) min_pos = program_header.p_offset;
         }
-        return min_pos;
+        return min_pos - start;
     }
 
     fn findFreeSpace(self: *Update, object_size: u64, min_alignment: u16) u64 {
@@ -233,7 +233,7 @@ const Update = struct {
             // There must always be a null section in index 0
             try self.sections.append(.{
                 .sh_name = 0,
-                .sh_type = 0,
+                .sh_type = elf.SHT_NULL,
                 .sh_flags = 0,
                 .sh_addr = 0,
                 .sh_offset = 0,
@@ -247,6 +247,8 @@ const Update = struct {
         }
         if (self.shstrtab_index == null) {
             self.shstrtab_index = @intCast(u16, self.sections.items.len);
+            assert(self.shstrtab.items.len == 0);
+            try self.shstrtab.append(0); // need a 0 at position 0
             const off = self.findFreeSpace(self.shstrtab.items.len, 1);
             //std.debug.warn("found shstrtab free space 0x{x} to 0x{x}\n", .{ off, off + self.shstrtab.items.len });
             try self.sections.append(.{
@@ -299,7 +301,6 @@ const Update = struct {
                 .sh_size = file_size,
                 // The section header index of the associated string table.
                 .sh_link = self.shstrtab_index.?,
-                // One greater than the symbol table index of the last local symbol (binding STB_LOCAL).
                 .sh_info = @intCast(u32, self.module.exports.len),
                 .sh_addralign = min_align,
                 .sh_entsize = each_size,
@@ -332,67 +333,8 @@ const Update = struct {
             phdr_table_dirty = true;
         }
         const foreign_endian = self.module.target.cpu.arch.endian() != std.Target.current.cpu.arch.endian();
-        if (shdr_table_dirty) {
-            const allocated_size = self.allocatedSize(self.shdr_table_offset.?);
-            const needed_size = self.sections.items.len * phsize;
 
-            if (needed_size > allocated_size) {
-                self.shdr_table_offset = null; // free the space
-                self.shdr_table_offset = self.findFreeSpace(needed_size, phalign);
-            }
-
-            const allocator = self.sections.allocator;
-            switch (ptr_width) {
-                .p32 => {
-                    const buf = try allocator.alloc(elf.Elf32_Shdr, self.sections.items.len);
-                    defer allocator.free(buf);
-
-                    for (buf) |*shdr, i| {
-                        shdr.* = .{
-                            .sh_name = self.sections.items[i].sh_name,
-                            .sh_type = self.sections.items[i].sh_type,
-                            .sh_flags = @intCast(u32, self.sections.items[i].sh_flags),
-                            .sh_addr = @intCast(u32, self.sections.items[i].sh_addr),
-                            .sh_offset = @intCast(u32, self.sections.items[i].sh_offset),
-                            .sh_size = @intCast(u32, self.sections.items[i].sh_size),
-                            .sh_link = self.sections.items[i].sh_link,
-                            .sh_info = self.sections.items[i].sh_info,
-                            .sh_addralign = @intCast(u32, self.sections.items[i].sh_addralign),
-                            .sh_entsize = @intCast(u32, self.sections.items[i].sh_entsize),
-                        };
-                        if (foreign_endian) {
-                            bswapAllFields(elf.Elf32_Shdr, shdr);
-                        }
-                    }
-                    try self.file.pwriteAll(mem.sliceAsBytes(buf), self.shdr_table_offset.?);
-                },
-                .p64 => {
-                    const buf = try allocator.alloc(elf.Elf64_Shdr, self.sections.items.len);
-                    defer allocator.free(buf);
-
-                    for (buf) |*shdr, i| {
-                        shdr.* = .{
-                            .sh_name = self.sections.items[i].sh_name,
-                            .sh_type = self.sections.items[i].sh_type,
-                            .sh_flags = self.sections.items[i].sh_flags,
-                            .sh_addr = self.sections.items[i].sh_addr,
-                            .sh_offset = self.sections.items[i].sh_offset,
-                            .sh_size = self.sections.items[i].sh_size,
-                            .sh_link = self.sections.items[i].sh_link,
-                            .sh_info = self.sections.items[i].sh_info,
-                            .sh_addralign = self.sections.items[i].sh_addralign,
-                            .sh_entsize = self.sections.items[i].sh_entsize,
-                        };
-                        if (foreign_endian) {
-                            bswapAllFields(elf.Elf64_Shdr, shdr);
-                        }
-                    }
-                    try self.file.pwriteAll(mem.sliceAsBytes(buf), self.shdr_table_offset.?);
-                },
-            }
-        }
-
-        try self.writeCodeAndSymbols(&phdr_table_dirty);
+        try self.writeCodeAndSymbols(phdr_table_dirty, shdr_table_dirty);
 
         if (phdr_table_dirty) {
             const allocated_size = self.allocatedSize(self.phdr_table_offset.?);
@@ -410,16 +352,7 @@ const Update = struct {
                     defer allocator.free(buf);
 
                     for (buf) |*phdr, i| {
-                        phdr.* = .{
-                            .p_type = self.program_headers.items[i].p_type,
-                            .p_flags = self.program_headers.items[i].p_flags,
-                            .p_offset = @intCast(u32, self.program_headers.items[i].p_offset),
-                            .p_vaddr = @intCast(u32, self.program_headers.items[i].p_vaddr),
-                            .p_paddr = @intCast(u32, self.program_headers.items[i].p_paddr),
-                            .p_filesz = @intCast(u32, self.program_headers.items[i].p_filesz),
-                            .p_memsz = @intCast(u32, self.program_headers.items[i].p_memsz),
-                            .p_align = @intCast(u32, self.program_headers.items[i].p_align),
-                        };
+                        phdr.* = progHeaderTo32(self.program_headers.items[i]);
                         if (foreign_endian) {
                             bswapAllFields(elf.Elf32_Phdr, phdr);
                         }
@@ -431,16 +364,7 @@ const Update = struct {
                     defer allocator.free(buf);
 
                     for (buf) |*phdr, i| {
-                        phdr.* = .{
-                            .p_type = self.program_headers.items[i].p_type,
-                            .p_flags = self.program_headers.items[i].p_flags,
-                            .p_offset = self.program_headers.items[i].p_offset,
-                            .p_vaddr = self.program_headers.items[i].p_vaddr,
-                            .p_paddr = self.program_headers.items[i].p_paddr,
-                            .p_filesz = self.program_headers.items[i].p_filesz,
-                            .p_memsz = self.program_headers.items[i].p_memsz,
-                            .p_align = self.program_headers.items[i].p_align,
-                        };
+                        phdr.* = self.program_headers.items[i];
                         if (foreign_endian) {
                             bswapAllFields(elf.Elf64_Phdr, phdr);
                         }
@@ -450,17 +374,63 @@ const Update = struct {
             }
         }
 
-        const shstrtab_sect = &self.sections.items[self.shstrtab_index.?];
-        if (shstrtab_dirty or self.shstrtab.items.len != shstrtab_sect.sh_size) {
-            const allocated_size = self.allocatedSize(shstrtab_sect.sh_offset);
-            const needed_size = self.shstrtab.items.len;
+        {
+            const shstrtab_sect = &self.sections.items[self.shstrtab_index.?];
+            if (shstrtab_dirty or self.shstrtab.items.len != shstrtab_sect.sh_size) {
+                const allocated_size = self.allocatedSize(shstrtab_sect.sh_offset);
+                const needed_size = self.shstrtab.items.len;
+
+                if (needed_size > allocated_size) {
+                    shstrtab_sect.sh_size = 0; // free the space
+                    shstrtab_sect.sh_offset = self.findFreeSpace(needed_size, 1);
+                }
+                shstrtab_sect.sh_size = needed_size;
+                //std.debug.warn("shstrtab start=0x{x} end=0x{x}\n", .{ shstrtab_sect.sh_offset, shstrtab_sect.sh_offset + needed_size });
+
+                try self.file.pwriteAll(self.shstrtab.items, shstrtab_sect.sh_offset);
+                if (!shdr_table_dirty) {
+                    // Then it won't get written with the others and we need to do it.
+                    try self.writeSectHeader(self.shstrtab_index.?);
+                }
+            }
+        }
+        if (shdr_table_dirty) {
+            const allocated_size = self.allocatedSize(self.shdr_table_offset.?);
+            const needed_size = self.sections.items.len * phsize;
 
             if (needed_size > allocated_size) {
-                shstrtab_sect.sh_size = 0; // free the space
-                shstrtab_sect.sh_offset = self.findFreeSpace(needed_size, 1);
-                shstrtab_sect.sh_size = needed_size;
+                self.shdr_table_offset = null; // free the space
+                self.shdr_table_offset = self.findFreeSpace(needed_size, phalign);
+            }
+
+            const allocator = self.sections.allocator;
+            switch (ptr_width) {
+                .p32 => {
+                    const buf = try allocator.alloc(elf.Elf32_Shdr, self.sections.items.len);
+                    defer allocator.free(buf);
+
+                    for (buf) |*shdr, i| {
+                        shdr.* = sectHeaderTo32(self.sections.items[i]);
+                        if (foreign_endian) {
+                            bswapAllFields(elf.Elf32_Shdr, shdr);
+                        }
+                    }
+                    try self.file.pwriteAll(mem.sliceAsBytes(buf), self.shdr_table_offset.?);
+                },
+                .p64 => {
+                    const buf = try allocator.alloc(elf.Elf64_Shdr, self.sections.items.len);
+                    defer allocator.free(buf);
+
+                    for (buf) |*shdr, i| {
+                        shdr.* = self.sections.items[i];
+                        //std.debug.warn("writing section {}\n", .{shdr.*});
+                        if (foreign_endian) {
+                            bswapAllFields(elf.Elf64_Shdr, shdr);
+                        }
+                    }
+                    try self.file.pwriteAll(mem.sliceAsBytes(buf), self.shdr_table_offset.?);
+                },
             }
-            try self.file.pwriteAll(self.shstrtab.items, shstrtab_sect.sh_offset);
         }
         if (self.entry_addr == null) {
             const msg = try std.fmt.allocPrint(self.errors.allocator, "no entry point found", .{});
@@ -592,7 +562,7 @@ const Update = struct {
         try self.file.pwriteAll(hdr_buf[0..index], 0);
     }
 
-    fn writeCodeAndSymbols(self: *Update, phdr_table_dirty: *bool) !void {
+    fn writeCodeAndSymbols(self: *Update, phdr_table_dirty: bool, shdr_table_dirty: bool) !void {
         // index 0 is always a null symbol
         try self.symbols.resize(1);
         self.symbols.items[0] = .{
@@ -606,6 +576,7 @@ const Update = struct {
 
         const phdr = &self.program_headers.items[self.phdr_load_re_index.?];
         var vaddr: u64 = phdr.p_vaddr;
+        var file_off: u64 = phdr.p_offset;
 
         var code = std.ArrayList(u8).init(self.sections.allocator);
         defer code.deinit();
@@ -625,6 +596,7 @@ const Update = struct {
                 }
                 continue;
             }
+            try self.file.pwriteAll(code.items, file_off);
 
             if (mem.eql(u8, exp.name, "_start")) {
                 self.entry_addr = vaddr;
@@ -645,12 +617,67 @@ const Update = struct {
             phdr.p_memsz = vaddr - phdr.p_vaddr;
             phdr.p_filesz = phdr.p_memsz;
 
-            phdr_table_dirty.* = true;
+            const shdr = &self.sections.items[self.text_section_index.?];
+            shdr.sh_size = phdr.p_filesz;
+
+            if (!phdr_table_dirty) {
+                // Then it won't get written with the others and we need to do it.
+                try self.writeProgHeader(self.phdr_load_re_index.?);
+            }
+            if (!shdr_table_dirty) {
+                // Then it won't get written with the others and we need to do it.
+                try self.writeSectHeader(self.text_section_index.?);
+            }
         }
 
         return self.writeSymbols();
     }
 
+    fn writeProgHeader(self: *Update, index: usize) !void {
+        const foreign_endian = self.module.target.cpu.arch.endian() != std.Target.current.cpu.arch.endian();
+        const offset = self.program_headers.items[index].p_offset;
+        switch (self.module.target.cpu.arch.ptrBitWidth()) {
+            32 => {
+                var phdr = [1]elf.Elf32_Phdr{progHeaderTo32(self.program_headers.items[index])};
+                if (foreign_endian) {
+                    bswapAllFields(elf.Elf32_Phdr, &phdr[0]);
+                }
+                return self.file.pwriteAll(mem.sliceAsBytes(&phdr), offset);
+            },
+            64 => {
+                var phdr = [1]elf.Elf64_Phdr{self.program_headers.items[index]};
+                if (foreign_endian) {
+                    bswapAllFields(elf.Elf64_Phdr, &phdr[0]);
+                }
+                return self.file.pwriteAll(mem.sliceAsBytes(&phdr), offset);
+            },
+            else => return error.UnsupportedArchitecture,
+        }
+    }
+
+    fn writeSectHeader(self: *Update, index: usize) !void {
+        const foreign_endian = self.module.target.cpu.arch.endian() != std.Target.current.cpu.arch.endian();
+        const offset = self.sections.items[index].sh_offset;
+        switch (self.module.target.cpu.arch.ptrBitWidth()) {
+            32 => {
+                var shdr: [1]elf.Elf32_Shdr = undefined;
+                shdr[0] = sectHeaderTo32(self.sections.items[index]);
+                if (foreign_endian) {
+                    bswapAllFields(elf.Elf32_Shdr, &shdr[0]);
+                }
+                return self.file.pwriteAll(mem.sliceAsBytes(&shdr), offset);
+            },
+            64 => {
+                var shdr = [1]elf.Elf64_Shdr{self.sections.items[index]};
+                if (foreign_endian) {
+                    bswapAllFields(elf.Elf64_Shdr, &shdr[0]);
+                }
+                return self.file.pwriteAll(mem.sliceAsBytes(&shdr), offset);
+            },
+            else => return error.UnsupportedArchitecture,
+        }
+    }
+
     fn writeSymbols(self: *Update) !void {
         const ptr_width: enum { p32, p64 } = switch (self.module.target.cpu.arch.ptrBitWidth()) {
             32 => .p32,
@@ -667,8 +694,11 @@ const Update = struct {
         if (needed_size > allocated_size) {
             syms_sect.sh_size = 0; // free the space
             syms_sect.sh_offset = self.findFreeSpace(needed_size, sym_align);
-            syms_sect.sh_size = needed_size;
+            //std.debug.warn("moved symtab to 0x{x} to 0x{x}\n", .{ syms_sect.sh_offset, syms_sect.sh_offset + needed_size });
         }
+        //std.debug.warn("symtab start=0x{x} end=0x{x}\n", .{ syms_sect.sh_offset, syms_sect.sh_offset + needed_size });
+        syms_sect.sh_size = needed_size;
+        syms_sect.sh_info = @intCast(u32, self.symbols.items.len);
         const allocator = self.symbols.allocator;
         const foreign_endian = self.module.target.cpu.arch.endian() != std.Target.current.cpu.arch.endian();
         switch (ptr_width) {
@@ -760,3 +790,31 @@ fn satMul(a: var, b: var) @TypeOf(a, b) {
 fn bswapAllFields(comptime S: type, ptr: *S) void {
     @panic("TODO implement bswapAllFields");
 }
+
+fn progHeaderTo32(phdr: elf.Elf64_Phdr) elf.Elf32_Phdr {
+    return .{
+        .p_type = phdr.p_type,
+        .p_flags = phdr.p_flags,
+        .p_offset = @intCast(u32, phdr.p_offset),
+        .p_vaddr = @intCast(u32, phdr.p_vaddr),
+        .p_paddr = @intCast(u32, phdr.p_paddr),
+        .p_filesz = @intCast(u32, phdr.p_filesz),
+        .p_memsz = @intCast(u32, phdr.p_memsz),
+        .p_align = @intCast(u32, phdr.p_align),
+    };
+}
+
+fn sectHeaderTo32(shdr: elf.Elf64_Shdr) elf.Elf32_Shdr {
+    return .{
+        .sh_name = shdr.sh_name,
+        .sh_type = shdr.sh_type,
+        .sh_flags = @intCast(u32, shdr.sh_flags),
+        .sh_addr = @intCast(u32, shdr.sh_addr),
+        .sh_offset = @intCast(u32, shdr.sh_offset),
+        .sh_size = @intCast(u32, shdr.sh_size),
+        .sh_link = shdr.sh_link,
+        .sh_info = shdr.sh_info,
+        .sh_addralign = @intCast(u32, shdr.sh_addralign),
+        .sh_entsize = @intCast(u32, shdr.sh_entsize),
+    };
+}

From 9ebf25d1458988b6aa75d4608062f18f802c6a38 Mon Sep 17 00:00:00 2001
From: Andrew Kelley <andrew@ziglang.org>
Date: Fri, 24 Apr 2020 15:36:08 -0400
Subject: [PATCH 10/10] link: change default executable mode to 0o777

Jonathan S writes:

On common systems with a 022 umask, this will still result in a
file created with 755 permissions, but it works appropriately if the
system is configured more leniently. (As another data point, C's fopen
seems to open files with the 666 mode.)
---
 lib/std/mem.zig          | 2 +-
 src-self-hosted/link.zig | 6 +++++-
 2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/lib/std/mem.zig b/lib/std/mem.zig
index 5966f8bc91..54b5a8a50e 100644
--- a/lib/std/mem.zig
+++ b/lib/std/mem.zig
@@ -2061,7 +2061,7 @@ pub fn alignBackward(addr: usize, alignment: usize) usize {
 /// The alignment must be a power of 2 and greater than 0.
 pub fn alignBackwardGeneric(comptime T: type, addr: T, alignment: T) T {
     assert(@popCount(T, alignment) == 1);
-    // 000010000 // example addr
+    // 000010000 // example alignment
     // 000001111 // subtract 1
     // 111110000 // binary not
     return addr & ~(alignment - 1);
diff --git a/src-self-hosted/link.zig b/src-self-hosted/link.zig
index 9ab462e323..cb6aa40afe 100644
--- a/src-self-hosted/link.zig
+++ b/src-self-hosted/link.zig
@@ -7,7 +7,11 @@ const fs = std.fs;
 const elf = std.elf;
 const codegen = @import("codegen.zig");
 
-const executable_mode = 0o755;
+/// On common systems with a 0o022 umask, 0o777 will still result in a file created
+/// with 0o755 permissions, but it works appropriately if the system is configured
+/// more leniently. As another data point, C's fopen seems to open files with the
+/// 666 mode.
+const executable_mode = 0o777;
 const default_entry_addr = 0x8000000;
 
 pub const ErrorMsg = struct {