From fe3aa4ccd0bc157e1dd9d84b7c57b0cb2f0a77a9 Mon Sep 17 00:00:00 2001
From: Isaac Freund <ifreund@ifreund.xyz>
Date: Tue, 18 Aug 2020 18:30:48 +0200
Subject: [PATCH 1/2] stage2/wasm: do incremental compilation in-memory

Before this commit the wasm backend worked similarly to elf. As
functions were generated they were written directly to the output file
and existing code was shifted around in the file as necessary. This
approach had several disadvantages:

- Large amounts of padding in the output were necessary to avoid
expensive copying of data within the file.
- Function/type/global/etc indexes were required to be known at the time
of preforming codegen, which severely limited the flexibility of where
code could be placed in the binary
- Significant complexity to track the state of the output file through
incremental updates

This commit takes things in a different direction. Code is incrementally
compiled into in-memory buffers and the entire binary is rewritten using
these buffers on flush. This has several advantages:

- Significantly smaller resulting binaries
- More performant resulting binaries due to lack of indirection
- Significantly simpler compiler code
- Indexes no longer need to be known before codegen. We can track where
Decls must be referenced by index insert the proper indexes while
writing the code in the flush() function. This is not yet implemented
but is planned for the next commit.

The main disadvantage is of course increased memory usage in order to
store these buffers of generated code.
---
 src-self-hosted/link/Wasm.zig | 500 ++++++++++------------------------
 1 file changed, 137 insertions(+), 363 deletions(-)

diff --git a/src-self-hosted/link/Wasm.zig b/src-self-hosted/link/Wasm.zig
index a381cfad57..434c17372b 100644
--- a/src-self-hosted/link/Wasm.zig
+++ b/src-self-hosted/link/Wasm.zig
@@ -32,19 +32,19 @@ const spec = struct {
 pub const base_tag = link.File.Tag.wasm;
 
 pub const FnData = struct {
-    funcidx: u32,
+    /// Generated code for the type of the function
+    functype: std.ArrayListUnmanaged(u8) = .{},
+    /// Generated code for the body of the function
+    code: std.ArrayListUnmanaged(u8) = .{},
 };
 
 base: link.File,
 
-types: Types,
-funcs: Funcs,
-exports: Exports,
-
-/// Array over the section structs used in the various sections above to
-/// allow iteration when shifting sections to make space.
-/// TODO: this should eventually be size 11 when we use all the sections.
-sections: [4]*Section,
+/// List of all function Decls to be written to the output file. The index of
+/// each Decl in this list at the time of writing the binary is used as the
+/// function index.
+/// TODO: can/should we access some data structure in Module directly?
+funcs: std.ArrayListUnmanaged(*Module.Decl) = .{},
 
 pub fn openPath(allocator: *Allocator, dir: fs.Dir, sub_path: []const u8, options: link.Options) !*link.File {
     assert(options.object_format == .wasm);
@@ -58,10 +58,6 @@ pub fn openPath(allocator: *Allocator, dir: fs.Dir, sub_path: []const u8, option
 
     try file.writeAll(&(spec.magic ++ spec.version));
 
-    // TODO: this should vary depending on the section and be less arbitrary
-    const size = 1024;
-    const offset = @sizeOf(@TypeOf(spec.magic ++ spec.version));
-
     wasm.* = .{
         .base = .{
             .tag = .wasm,
@@ -69,52 +65,40 @@ pub fn openPath(allocator: *Allocator, dir: fs.Dir, sub_path: []const u8, option
             .file = file,
             .allocator = allocator,
         },
-
-        .types = try Types.init(file, offset, size),
-        .funcs = try Funcs.init(file, offset + size, size, offset + 3 * size, size),
-        .exports = try Exports.init(file, offset + 2 * size, size),
-
-        // These must be ordered as they will appear in the output file
-        .sections = [_]*Section{
-            &wasm.types.typesec.section,
-            &wasm.funcs.funcsec,
-            &wasm.exports.exportsec,
-            &wasm.funcs.codesec.section,
-        },
     };
 
-    try file.setEndPos(offset + 4 * size);
-
     return &wasm.base;
 }
 
 pub fn deinit(self: *Wasm) void {
-    self.types.deinit();
-    self.funcs.deinit();
+    for (self.funcs.items) |decl| {
+        decl.fn_link.wasm.?.functype.deinit(self.base.allocator);
+        decl.fn_link.wasm.?.code.deinit(self.base.allocator);
+    }
+    self.funcs.deinit(self.base.allocator);
 }
 
+// Generate code for the Decl, storing it in memory to be later written to
+// the file on flush().
 pub fn updateDecl(self: *Wasm, module: *Module, decl: *Module.Decl) !void {
     if (decl.typed_value.most_recent.typed_value.ty.zigTypeTag() != .Fn)
         return error.TODOImplementNonFnDeclsForWasm;
 
-    if (decl.fn_link.wasm) |fn_data| {
-        self.funcs.free(fn_data.funcidx);
+    if (decl.fn_link.wasm) |*fn_data| {
+        fn_data.functype.items.len = 0;
+        fn_data.code.items.len = 0;
+    } else {
+        decl.fn_link.wasm = .{};
+        try self.funcs.append(self.base.allocator, decl);
     }
+    const fn_data = &decl.fn_link.wasm.?;
 
-    var buf = std.ArrayList(u8).init(self.base.allocator);
-    defer buf.deinit();
-
-    try codegen.genFunctype(&buf, decl);
-    const typeidx = try self.types.new(buf.items);
-    buf.items.len = 0;
-
-    try codegen.genCode(&buf, decl);
-    const funcidx = try self.funcs.new(typeidx, buf.items);
-
-    decl.fn_link.wasm = .{ .funcidx = funcidx };
-
-    // TODO: we should be more smart and set this only when needed
-    self.exports.dirty = true;
+    var managed_functype = fn_data.functype.toManaged(self.base.allocator);
+    var managed_code = fn_data.code.toManaged(self.base.allocator);
+    try codegen.genFunctype(&managed_functype, decl);
+    try codegen.genCode(&managed_code, decl);
+    fn_data.functype = managed_functype.toUnmanaged();
+    fn_data.code = managed_code.toUnmanaged();
 }
 
 pub fn updateDeclExports(
@@ -122,332 +106,122 @@ pub fn updateDeclExports(
     module: *Module,
     decl: *const Module.Decl,
     exports: []const *Module.Export,
-) !void {
-    self.exports.dirty = true;
-}
+) !void {}
 
 pub fn freeDecl(self: *Wasm, decl: *Module.Decl) void {
     // TODO: remove this assert when non-function Decls are implemented
     assert(decl.typed_value.most_recent.typed_value.ty.zigTypeTag() == .Fn);
-    if (decl.fn_link.wasm) |fn_data| {
-        self.funcs.free(fn_data.funcidx);
-        decl.fn_link.wasm = null;
-    }
+    _ = self.funcs.swapRemove(self.getFuncidx(decl).?);
+    decl.fn_link.wasm.?.functype.deinit(self.base.allocator);
+    decl.fn_link.wasm.?.code.deinit(self.base.allocator);
+    decl.fn_link.wasm = null;
 }
 
 pub fn flush(self: *Wasm, module: *Module) !void {
-    if (self.exports.dirty) try self.exports.writeAll(module);
+    const file = self.base.file.?;
+    const header_size = 5 + 1;
+
+    // No need to rewrite the magic/version header
+    try file.setEndPos(@sizeOf(@TypeOf(spec.magic ++ spec.version)));
+    try file.seekTo(@sizeOf(@TypeOf(spec.magic ++ spec.version)));
+
+    // Type section
+    {
+        const header_offset = try reserveVecSectionHeader(file);
+        for (self.funcs.items) |decl| {
+            try file.writeAll(decl.fn_link.wasm.?.functype.items);
+        }
+        try writeVecSectionHeader(
+            file,
+            header_offset,
+            spec.types_id,
+            @intCast(u32, (try file.getPos()) - header_offset - header_size),
+            @intCast(u32, self.funcs.items.len),
+        );
+    }
+
+    // Function section
+    {
+        const header_offset = try reserveVecSectionHeader(file);
+        const writer = file.writer();
+        for (self.funcs.items) |_, typeidx| try leb.writeULEB128(writer, @intCast(u32, typeidx));
+        try writeVecSectionHeader(
+            file,
+            header_offset,
+            spec.funcs_id,
+            @intCast(u32, (try file.getPos()) - header_offset - header_size),
+            @intCast(u32, self.funcs.items.len),
+        );
+    }
+
+    // Export section
+    {
+        const header_offset = try reserveVecSectionHeader(file);
+        const writer = file.writer();
+        var count: u32 = 0;
+        for (module.decl_exports.entries.items) |entry| {
+            for (entry.value) |exprt| {
+                // Export name length + name
+                try leb.writeULEB128(writer, @intCast(u32, exprt.options.name.len));
+                try writer.writeAll(exprt.options.name);
+
+                switch (exprt.exported_decl.typed_value.most_recent.typed_value.ty.zigTypeTag()) {
+                    .Fn => {
+                        // Type of the export
+                        try writer.writeByte(0x00);
+                        // Exported function index
+                        try leb.writeULEB128(writer, self.getFuncidx(exprt.exported_decl).?);
+                    },
+                    else => return error.TODOImplementNonFnDeclsForWasm,
+                }
+
+                count += 1;
+            }
+        }
+        try writeVecSectionHeader(
+            file,
+            header_offset,
+            spec.exports_id,
+            @intCast(u32, (try file.getPos()) - header_offset - header_size),
+            count,
+        );
+    }
+
+    // Code section
+    {
+        const header_offset = try reserveVecSectionHeader(file);
+        for (self.funcs.items) |decl| try file.writeAll(decl.fn_link.wasm.?.code.items);
+        try writeVecSectionHeader(
+            file,
+            header_offset,
+            spec.code_id,
+            @intCast(u32, (try file.getPos()) - header_offset - header_size),
+            @intCast(u32, self.funcs.items.len),
+        );
+    }
 }
 
-/// This struct describes the location of a named section + custom section
-/// padding in the output file. This is all the data we need to allow for
-/// shifting sections around when padding runs out.
-const Section = struct {
-    /// The size of a section header: 1 byte section id + 5 bytes
-    /// for the fixed-width ULEB128 encoded contents size.
-    const header_size = 1 + 5;
-    /// Offset of the section id byte from the start of the file.
-    offset: u64,
-    /// Size of the section, including the header and directly
-    /// following custom section used for padding if any.
-    size: u64,
+/// Get the current index of a given Decl in the function list
+/// TODO: we could maintain a hash map to potentially make this
+fn getFuncidx(self: Wasm, decl: *Module.Decl) ?u32 {
+    return for (self.funcs.items) |func, idx| {
+        if (func == decl) break @intCast(u32, idx);
+    } else null;
+}
 
-    /// Resize the usable part of the section, handling the following custom
-    /// section used for padding. If there is not enough padding left, shift
-    /// all following sections to make space. Takes the current and target
-    /// contents sizes of the section as arguments.
-    fn resize(self: *Section, file: fs.File, current: u32, target: u32) !void {
-        // Section header + target contents size + custom section header
-        // + custom section name + empty custom section > owned chunk of the file
-        if (header_size + target + header_size + 1 + 0 > self.size)
-            return error.TODOImplementSectionShifting;
+fn reserveVecSectionHeader(file: fs.File) !u64 {
+    // section id + fixed leb contents size + fixed leb vector length
+    const header_size = 1 + 5 + 5;
+    // TODO: this should be a single lseek(2) call, but fs.File does not
+    // currently provide a way to do this.
+    try file.seekBy(header_size);
+    return (try file.getPos()) - header_size;
+}
 
-        const new_custom_start = self.offset + header_size + target;
-        const new_custom_contents_size = self.size - target - 2 * header_size;
-        assert(new_custom_contents_size >= 1);
-        // +1 for the name of the custom section, which we set to an empty string
-        var custom_header: [header_size + 1]u8 = undefined;
-        custom_header[0] = spec.custom_id;
-        leb.writeUnsignedFixed(5, custom_header[1..header_size], @intCast(u32, new_custom_contents_size));
-        custom_header[header_size] = 0;
-        try file.pwriteAll(&custom_header, new_custom_start);
-    }
-};
-
-/// This can be used to manage the contents of any section which uses a vector
-/// of contents. This interface maintains index stability while allowing for
-/// reuse of "dead" indexes.
-const VecSection = struct {
-    /// Represents a single entry in the vector (e.g. a type in the type section)
-    const Entry = struct {
-        /// Offset from the start of the section contents in bytes
-        offset: u32,
-        /// Size in bytes of the entry
-        size: u32,
-    };
-    section: Section,
-    /// Size in bytes of the contents of the section. Does not include
-    /// the "header" containing the section id and this value.
-    contents_size: u32,
-    /// List of all entries in the contents of the section.
-    entries: std.ArrayListUnmanaged(Entry) = std.ArrayListUnmanaged(Entry){},
-    /// List of indexes of unreferenced entries which may be
-    /// overwritten and reused.
-    dead_list: std.ArrayListUnmanaged(u32) = std.ArrayListUnmanaged(u32){},
-
-    /// Write the headers of the section and custom padding section
-    fn init(comptime section_id: u8, file: fs.File, offset: u64, initial_size: u64) !VecSection {
-        // section id, section size, empty vector, custom section id,
-        // custom section size, empty custom section name
-        var initial_data: [1 + 5 + 5 + 1 + 5 + 1]u8 = undefined;
-
-        assert(initial_size >= initial_data.len);
-
-        comptime var i = 0;
-        initial_data[i] = section_id;
-        i += 1;
-        leb.writeUnsignedFixed(5, initial_data[i..(i + 5)], 5);
-        i += 5;
-        leb.writeUnsignedFixed(5, initial_data[i..(i + 5)], 0);
-        i += 5;
-        initial_data[i] = spec.custom_id;
-        i += 1;
-        leb.writeUnsignedFixed(5, initial_data[i..(i + 5)], @intCast(u32, initial_size - @sizeOf(@TypeOf(initial_data))));
-        i += 5;
-        initial_data[i] = 0;
-
-        try file.pwriteAll(&initial_data, offset);
-
-        return VecSection{
-            .section = .{
-                .offset = offset,
-                .size = initial_size,
-            },
-            .contents_size = 5,
-        };
-    }
-
-    fn deinit(self: *VecSection, allocator: *Allocator) void {
-        self.entries.deinit(allocator);
-        self.dead_list.deinit(allocator);
-    }
-
-    /// Write a new entry into the file, returning the index used.
-    fn addEntry(self: *VecSection, file: fs.File, allocator: *Allocator, data: []const u8) !u32 {
-        // First look for a dead entry we can reuse
-        for (self.dead_list.items) |dead_idx, i| {
-            const dead_entry = &self.entries.items[dead_idx];
-            if (dead_entry.size == data.len) {
-                // Found a dead entry of the right length, overwrite it
-                try file.pwriteAll(data, self.section.offset + Section.header_size + dead_entry.offset);
-                _ = self.dead_list.swapRemove(i);
-                return dead_idx;
-            }
-        }
-
-        // TODO: We can be more efficient if we special-case one or
-        // more consecutive dead entries at the end of the vector.
-
-        // We failed to find a dead entry to reuse, so write the new
-        // entry to the end of the section.
-        try self.section.resize(file, self.contents_size, self.contents_size + @intCast(u32, data.len));
-        try file.pwriteAll(data, self.section.offset + Section.header_size + self.contents_size);
-        try self.entries.append(allocator, .{
-            .offset = self.contents_size,
-            .size = @intCast(u32, data.len),
-        });
-        self.contents_size += @intCast(u32, data.len);
-        // Make sure the dead list always has enough space to store all free'd
-        // entries. This makes it so that delEntry() cannot fail.
-        // TODO: figure out a better way that doesn't waste as much memory
-        try self.dead_list.ensureCapacity(allocator, self.entries.items.len);
-
-        // Update the size in the section header and the item count of
-        // the contents vector.
-        var size_and_count: [10]u8 = undefined;
-        leb.writeUnsignedFixed(5, size_and_count[0..5], self.contents_size);
-        leb.writeUnsignedFixed(5, size_and_count[5..], @intCast(u32, self.entries.items.len));
-        try file.pwriteAll(&size_and_count, self.section.offset + 1);
-
-        return @intCast(u32, self.entries.items.len - 1);
-    }
-
-    /// Mark the type referenced by the given index as dead.
-    fn delEntry(self: *VecSection, index: u32) void {
-        self.dead_list.appendAssumeCapacity(index);
-    }
-};
-
-const Types = struct {
-    typesec: VecSection,
-
-    fn init(file: fs.File, offset: u64, initial_size: u64) !Types {
-        return Types{ .typesec = try VecSection.init(spec.types_id, file, offset, initial_size) };
-    }
-
-    fn deinit(self: *Types) void {
-        const wasm = @fieldParentPtr(Wasm, "types", self);
-        self.typesec.deinit(wasm.base.allocator);
-    }
-
-    fn new(self: *Types, data: []const u8) !u32 {
-        const wasm = @fieldParentPtr(Wasm, "types", self);
-        return self.typesec.addEntry(wasm.base.file.?, wasm.base.allocator, data);
-    }
-
-    fn free(self: *Types, typeidx: u32) void {
-        self.typesec.delEntry(typeidx);
-    }
-};
-
-const Funcs = struct {
-    /// This section needs special handling to keep the indexes matching with
-    /// the codesec, so we cant just use a VecSection.
-    funcsec: Section,
-    /// The typeidx stored for each function, indexed by funcidx.
-    func_types: std.ArrayListUnmanaged(u32) = std.ArrayListUnmanaged(u32){},
-    codesec: VecSection,
-
-    fn init(file: fs.File, funcs_offset: u64, funcs_size: u64, code_offset: u64, code_size: u64) !Funcs {
-        return Funcs{
-            .funcsec = (try VecSection.init(spec.funcs_id, file, funcs_offset, funcs_size)).section,
-            .codesec = try VecSection.init(spec.code_id, file, code_offset, code_size),
-        };
-    }
-
-    fn deinit(self: *Funcs) void {
-        const wasm = @fieldParentPtr(Wasm, "funcs", self);
-        self.func_types.deinit(wasm.base.allocator);
-        self.codesec.deinit(wasm.base.allocator);
-    }
-
-    /// Add a new function to the binary, first finding space for and writing
-    /// the code then writing the typeidx to the corresponding index in the
-    /// funcsec. Returns the function index used.
-    fn new(self: *Funcs, typeidx: u32, code: []const u8) !u32 {
-        const wasm = @fieldParentPtr(Wasm, "funcs", self);
-        const file = wasm.base.file.?;
-        const allocator = wasm.base.allocator;
-
-        assert(self.func_types.items.len == self.codesec.entries.items.len);
-
-        // TODO: consider nop-padding the code if there is a close but not perfect fit
-        const funcidx = try self.codesec.addEntry(file, allocator, code);
-
-        if (self.func_types.items.len < self.codesec.entries.items.len) {
-            // u32 vector length + funcs_count u32s in the vector
-            const current = 5 + @intCast(u32, self.func_types.items.len) * 5;
-            try self.funcsec.resize(file, current, current + 5);
-            try self.func_types.append(allocator, typeidx);
-
-            // Update the size in the section header and the item count of
-            // the contents vector.
-            const count = @intCast(u32, self.func_types.items.len);
-            var size_and_count: [10]u8 = undefined;
-            leb.writeUnsignedFixed(5, size_and_count[0..5], 5 + count * 5);
-            leb.writeUnsignedFixed(5, size_and_count[5..], count);
-            try file.pwriteAll(&size_and_count, self.funcsec.offset + 1);
-        } else {
-            // We are overwriting a dead function and may now free the type
-            wasm.types.free(self.func_types.items[funcidx]);
-        }
-
-        assert(self.func_types.items.len == self.codesec.entries.items.len);
-
-        var typeidx_leb: [5]u8 = undefined;
-        leb.writeUnsignedFixed(5, &typeidx_leb, typeidx);
-        try file.pwriteAll(&typeidx_leb, self.funcsec.offset + Section.header_size + 5 + funcidx * 5);
-
-        return funcidx;
-    }
-
-    fn free(self: *Funcs, funcidx: u32) void {
-        self.codesec.delEntry(funcidx);
-    }
-};
-
-/// Exports are tricky. We can't leave dead entries in the binary as they
-/// would obviously be visible from the execution environment. The simplest
-/// way to work around this is to re-emit the export section whenever
-/// something changes. This also makes it easier to ensure exported function
-/// and global indexes are updated as they change.
-const Exports = struct {
-    exportsec: Section,
-    /// Size in bytes of the contents of the section. Does not include
-    /// the "header" containing the section id and this value.
-    contents_size: u32,
-    /// If this is true, then exports will be rewritten on flush()
-    dirty: bool,
-
-    fn init(file: fs.File, offset: u64, initial_size: u64) !Exports {
-        return Exports{
-            .exportsec = (try VecSection.init(spec.exports_id, file, offset, initial_size)).section,
-            .contents_size = 5,
-            .dirty = false,
-        };
-    }
-
-    fn writeAll(self: *Exports, module: *Module) !void {
-        const wasm = @fieldParentPtr(Wasm, "exports", self);
-        const file = wasm.base.file.?;
-        var buf: [5]u8 = undefined;
-
-        // First ensure the section is the right size
-        var export_count: u32 = 0;
-        var new_contents_size: u32 = 5;
-        for (module.decl_exports.entries.items) |entry| {
-            for (entry.value) |e| {
-                export_count += 1;
-                new_contents_size += calcSize(e);
-            }
-        }
-        if (new_contents_size != self.contents_size) {
-            try self.exportsec.resize(file, self.contents_size, new_contents_size);
-            leb.writeUnsignedFixed(5, &buf, new_contents_size);
-            try file.pwriteAll(&buf, self.exportsec.offset + 1);
-        }
-
-        try file.seekTo(self.exportsec.offset + Section.header_size);
-        const writer = file.writer();
-
-        // Length of the exports vec
-        leb.writeUnsignedFixed(5, &buf, export_count);
-        try writer.writeAll(&buf);
-
-        for (module.decl_exports.entries.items) |entry|
-            for (entry.value) |e| try writeExport(writer, e);
-
-        self.dirty = false;
-    }
-
-    /// Return the total number of bytes an export will take.
-    /// TODO: fixed-width LEB128 is currently used for simplicity, but should
-    /// be replaced with proper variable-length LEB128 as it is inefficient.
-    fn calcSize(e: *Module.Export) u32 {
-        // LEB128 name length + name bytes + export type + LEB128 index
-        return 5 + @intCast(u32, e.options.name.len) + 1 + 5;
-    }
-
-    /// Write the data for a single export to the given file at a given offset.
-    /// TODO: fixed-width LEB128 is currently used for simplicity, but should
-    /// be replaced with proper variable-length LEB128 as it is inefficient.
-    fn writeExport(writer: anytype, e: *Module.Export) !void {
-        var buf: [5]u8 = undefined;
-
-        // Export name length + name
-        leb.writeUnsignedFixed(5, &buf, @intCast(u32, e.options.name.len));
-        try writer.writeAll(&buf);
-        try writer.writeAll(e.options.name);
-
-        switch (e.exported_decl.typed_value.most_recent.typed_value.ty.zigTypeTag()) {
-            .Fn => {
-                // Type of the export
-                try writer.writeByte(0x00);
-                // Exported function index
-                leb.writeUnsignedFixed(5, &buf, e.exported_decl.fn_link.wasm.?.funcidx);
-                try writer.writeAll(&buf);
-            },
-            else => return error.TODOImplementNonFnDeclsForWasm,
-        }
-    }
-};
+fn writeVecSectionHeader(file: fs.File, offset: u64, section: u8, size: u32, items: u32) !void {
+    var buf: [1 + 5 + 5]u8 = undefined;
+    buf[0] = section;
+    leb.writeUnsignedFixed(5, buf[1..6], size);
+    leb.writeUnsignedFixed(5, buf[6..], items);
+    try file.pwriteAll(&buf, offset);
+}

From 6242ae35f37a6e67d9da514fa7a3508843515667 Mon Sep 17 00:00:00 2001
From: Isaac Freund <ifreund@ifreund.xyz>
Date: Wed, 19 Aug 2020 01:48:09 +0200
Subject: [PATCH 2/2] stage2/wasm: implement function calls

During codegen we do not yet know the indexes that will be used for
called functions. Therefore, we store the offset into the in-memory
code where the index is needed with a pointer to the Decl and use this
data to insert the proper indexes while writing the binary in the flush
function.
---
 src-self-hosted/codegen/wasm.zig | 96 ++++++++++++++++++++------------
 src-self-hosted/link/Wasm.zig    | 26 ++++++++-
 test/stage2/compare_output.zig   | 27 ++++++++-
 3 files changed, 110 insertions(+), 39 deletions(-)

diff --git a/src-self-hosted/codegen/wasm.zig b/src-self-hosted/codegen/wasm.zig
index 57eb002e82..e55e904934 100644
--- a/src-self-hosted/codegen/wasm.zig
+++ b/src-self-hosted/codegen/wasm.zig
@@ -62,58 +62,80 @@ pub fn genCode(buf: *ArrayList(u8), decl: *Decl) !void {
     // TODO: check for and handle death of instructions
     const tv = decl.typed_value.most_recent.typed_value;
     const mod_fn = tv.val.cast(Value.Payload.Function).?.func;
-    for (mod_fn.analysis.success.instructions) |inst| try genInst(writer, inst);
+    for (mod_fn.analysis.success.instructions) |inst| try genInst(buf, decl, inst);
 
     // Write 'end' opcode
     try writer.writeByte(0x0B);
 
     // Fill in the size of the generated code to the reserved space at the
     // beginning of the buffer.
-    leb.writeUnsignedFixed(5, buf.items[0..5], @intCast(u32, buf.items.len - 5));
+    const size = buf.items.len - 5 + decl.fn_link.wasm.?.idx_refs.items.len * 5;
+    leb.writeUnsignedFixed(5, buf.items[0..5], @intCast(u32, size));
 }
 
-fn genInst(writer: ArrayList(u8).Writer, inst: *Inst) !void {
+fn genInst(buf: *ArrayList(u8), decl: *Decl, inst: *Inst) !void {
     return switch (inst.tag) {
+        .call => genCall(buf, decl, inst.castTag(.call).?),
+        .constant => genConstant(buf, decl, inst.castTag(.constant).?),
         .dbg_stmt => {},
-        .ret => genRet(writer, inst.castTag(.ret).?),
+        .ret => genRet(buf, decl, inst.castTag(.ret).?),
+        .retvoid => {},
         else => error.TODOImplementMoreWasmCodegen,
     };
 }
 
-fn genRet(writer: ArrayList(u8).Writer, inst: *Inst.UnOp) !void {
-    switch (inst.operand.tag) {
-        .constant => {
-            const constant = inst.operand.castTag(.constant).?;
-            switch (inst.operand.ty.tag()) {
-                .u32 => {
-                    try writer.writeByte(0x41); // i32.const
-                    try leb.writeILEB128(writer, constant.val.toUnsignedInt());
-                },
-                .i32 => {
-                    try writer.writeByte(0x41); // i32.const
-                    try leb.writeILEB128(writer, constant.val.toSignedInt());
-                },
-                .u64 => {
-                    try writer.writeByte(0x42); // i64.const
-                    try leb.writeILEB128(writer, constant.val.toUnsignedInt());
-                },
-                .i64 => {
-                    try writer.writeByte(0x42); // i64.const
-                    try leb.writeILEB128(writer, constant.val.toSignedInt());
-                },
-                .f32 => {
-                    try writer.writeByte(0x43); // f32.const
-                    // TODO: enforce LE byte order
-                    try writer.writeAll(mem.asBytes(&constant.val.toFloat(f32)));
-                },
-                .f64 => {
-                    try writer.writeByte(0x44); // f64.const
-                    // TODO: enforce LE byte order
-                    try writer.writeAll(mem.asBytes(&constant.val.toFloat(f64)));
-                },
-                else => return error.TODOImplementMoreWasmCodegen,
-            }
+fn genConstant(buf: *ArrayList(u8), decl: *Decl, inst: *Inst.Constant) !void {
+    const writer = buf.writer();
+    switch (inst.base.ty.tag()) {
+        .u32 => {
+            try writer.writeByte(0x41); // i32.const
+            try leb.writeILEB128(writer, inst.val.toUnsignedInt());
         },
+        .i32 => {
+            try writer.writeByte(0x41); // i32.const
+            try leb.writeILEB128(writer, inst.val.toSignedInt());
+        },
+        .u64 => {
+            try writer.writeByte(0x42); // i64.const
+            try leb.writeILEB128(writer, inst.val.toUnsignedInt());
+        },
+        .i64 => {
+            try writer.writeByte(0x42); // i64.const
+            try leb.writeILEB128(writer, inst.val.toSignedInt());
+        },
+        .f32 => {
+            try writer.writeByte(0x43); // f32.const
+            // TODO: enforce LE byte order
+            try writer.writeAll(mem.asBytes(&inst.val.toFloat(f32)));
+        },
+        .f64 => {
+            try writer.writeByte(0x44); // f64.const
+            // TODO: enforce LE byte order
+            try writer.writeAll(mem.asBytes(&inst.val.toFloat(f64)));
+        },
+        .void => {},
         else => return error.TODOImplementMoreWasmCodegen,
     }
 }
+
+fn genRet(buf: *ArrayList(u8), decl: *Decl, inst: *Inst.UnOp) !void {
+    try genInst(buf, decl, inst.operand);
+}
+
+fn genCall(buf: *ArrayList(u8), decl: *Decl, inst: *Inst.Call) !void {
+    const func_inst = inst.func.castTag(.constant).?;
+    const func_val = func_inst.val.cast(Value.Payload.Function).?;
+    const target = func_val.func.owner_decl;
+    const target_ty = target.typed_value.most_recent.typed_value.ty;
+
+    if (inst.args.len != 0) return error.TODOImplementMoreWasmCodegen;
+
+    try buf.append(0x10); // call
+
+    // The function index immediate argument will be filled in using this data
+    // in link.Wasm.flush().
+    try decl.fn_link.wasm.?.idx_refs.append(buf.allocator, .{
+        .offset = @intCast(u32, buf.items.len),
+        .decl = target,
+    });
+}
diff --git a/src-self-hosted/link/Wasm.zig b/src-self-hosted/link/Wasm.zig
index 434c17372b..d8f172f584 100644
--- a/src-self-hosted/link/Wasm.zig
+++ b/src-self-hosted/link/Wasm.zig
@@ -36,6 +36,9 @@ pub const FnData = struct {
     functype: std.ArrayListUnmanaged(u8) = .{},
     /// Generated code for the body of the function
     code: std.ArrayListUnmanaged(u8) = .{},
+    /// Locations in the generated code where function indexes must be filled in.
+    /// This must be kept ordered by offset.
+    idx_refs: std.ArrayListUnmanaged(struct { offset: u32, decl: *Module.Decl }) = .{},
 };
 
 base: link.File,
@@ -74,6 +77,7 @@ pub fn deinit(self: *Wasm) void {
     for (self.funcs.items) |decl| {
         decl.fn_link.wasm.?.functype.deinit(self.base.allocator);
         decl.fn_link.wasm.?.code.deinit(self.base.allocator);
+        decl.fn_link.wasm.?.idx_refs.deinit(self.base.allocator);
     }
     self.funcs.deinit(self.base.allocator);
 }
@@ -87,6 +91,7 @@ pub fn updateDecl(self: *Wasm, module: *Module, decl: *Module.Decl) !void {
     if (decl.fn_link.wasm) |*fn_data| {
         fn_data.functype.items.len = 0;
         fn_data.code.items.len = 0;
+        fn_data.idx_refs.items.len = 0;
     } else {
         decl.fn_link.wasm = .{};
         try self.funcs.append(self.base.allocator, decl);
@@ -114,6 +119,7 @@ pub fn freeDecl(self: *Wasm, decl: *Module.Decl) void {
     _ = self.funcs.swapRemove(self.getFuncidx(decl).?);
     decl.fn_link.wasm.?.functype.deinit(self.base.allocator);
     decl.fn_link.wasm.?.code.deinit(self.base.allocator);
+    decl.fn_link.wasm.?.idx_refs.deinit(self.base.allocator);
     decl.fn_link.wasm = null;
 }
 
@@ -190,7 +196,25 @@ pub fn flush(self: *Wasm, module: *Module) !void {
     // Code section
     {
         const header_offset = try reserveVecSectionHeader(file);
-        for (self.funcs.items) |decl| try file.writeAll(decl.fn_link.wasm.?.code.items);
+        const writer = file.writer();
+        for (self.funcs.items) |decl| {
+            const fn_data = &decl.fn_link.wasm.?;
+
+            // Write the already generated code to the file, inserting
+            // function indexes where required.
+            var current: u32 = 0;
+            for (fn_data.idx_refs.items) |idx_ref| {
+                try writer.writeAll(fn_data.code.items[current..idx_ref.offset]);
+                current = idx_ref.offset;
+                // Use a fixed width here to make calculating the code size
+                // in codegen.wasm.genCode() simpler.
+                var buf: [5]u8 = undefined;
+                leb.writeUnsignedFixed(5, &buf, self.getFuncidx(idx_ref.decl).?);
+                try writer.writeAll(&buf);
+            }
+
+            try writer.writeAll(fn_data.code.items[current..]);
+        }
         try writeVecSectionHeader(
             file,
             header_offset,
diff --git a/test/stage2/compare_output.zig b/test/stage2/compare_output.zig
index 4208cc3911..34e8f5e159 100644
--- a/test/stage2/compare_output.zig
+++ b/test/stage2/compare_output.zig
@@ -546,28 +546,53 @@ pub fn addCases(ctx: *TestContext) !void {
     }
 
     {
-        var case = ctx.exe("wasm returns", wasi);
+        var case = ctx.exe("wasm function calls", wasi);
 
         case.addCompareOutput(
             \\export fn _start() u32 {
+            \\    foo();
+            \\    bar();
             \\    return 42;
             \\}
+            \\fn foo() void {
+            \\    bar();
+            \\    bar();
+            \\}
+            \\fn bar() void {}
         ,
             "42\n",
         );
 
         case.addCompareOutput(
             \\export fn _start() i64 {
+            \\    bar();
+            \\    foo();
+            \\    foo();
+            \\    bar();
+            \\    foo();
+            \\    bar();
             \\    return 42;
             \\}
+            \\fn foo() void {
+            \\    bar();
+            \\}
+            \\fn bar() void {}
         ,
             "42\n",
         );
 
         case.addCompareOutput(
             \\export fn _start() f32 {
+            \\    bar();
+            \\    foo();
             \\    return 42.0;
             \\}
+            \\fn foo() void {
+            \\    bar();
+            \\    bar();
+            \\    bar();
+            \\}
+            \\fn bar() void {}
         ,
             // This is what you get when you take the bits of the IEE-754
             // representation of 42.0 and reinterpret them as an unsigned