diff --git a/src-self-hosted/Module.zig b/src-self-hosted/Module.zig index df7c1986f3..5e22058acf 100644 --- a/src-self-hosted/Module.zig +++ b/src-self-hosted/Module.zig @@ -1571,7 +1571,7 @@ fn analyzeRootSrcFile(self: *Module, root_scope: *Scope.File) !void { .macho => { // TODO Implement for MachO }, - .c => {}, + .c, .wasm => {}, } } } else { @@ -1781,11 +1781,13 @@ fn allocateNewDecl( .elf => .{ .elf = link.File.Elf.TextBlock.empty }, .macho => .{ .macho = link.File.MachO.TextBlock.empty }, .c => .{ .c = {} }, + .wasm => .{ .wasm = {} }, }, .fn_link = switch (self.bin_file.tag) { .elf => .{ .elf = link.File.Elf.SrcFn.empty }, .macho => .{ .macho = link.File.MachO.SrcFn.empty }, .c => .{ .c = {} }, + .wasm => .{ .wasm = null }, }, .generation = 0, }; diff --git a/src-self-hosted/codegen/wasm.zig b/src-self-hosted/codegen/wasm.zig new file mode 100644 index 0000000000..78d8d22ded --- /dev/null +++ b/src-self-hosted/codegen/wasm.zig @@ -0,0 +1,70 @@ +const std = @import("std"); +const Allocator = std.mem.Allocator; +const ArrayList = std.ArrayList; +const assert = std.debug.assert; +const leb = std.debug.leb; + +const Decl = @import("../Module.zig").Decl; +const Type = @import("../type.zig").Type; + +fn genValtype(ty: Type) u8 { + return switch (ty.tag()) { + .u32, .i32 => 0x7F, + .u64, .i64 => 0x7E, + .f32 => 0x7D, + .f64 => 0x7C, + else => @panic("TODO: Implement more types for wasm."), + }; +} + +pub fn genFunctype(buf: *ArrayList(u8), decl: *Decl) !void { + const ty = decl.typed_value.most_recent.typed_value.ty; + const writer = buf.writer(); + + // functype magic + try writer.writeByte(0x60); + + // param types + try leb.writeULEB128(writer, @intCast(u32, ty.fnParamLen())); + if (ty.fnParamLen() != 0) { + const params = try buf.allocator.alloc(Type, ty.fnParamLen()); + defer buf.allocator.free(params); + ty.fnParamTypes(params); + for (params) |param_type| try writer.writeByte(genValtype(param_type)); + } + + // return type + const return_type = ty.fnReturnType(); + switch (return_type.tag()) { + .void, .noreturn => try leb.writeULEB128(writer, @as(u32, 0)), + else => { + try leb.writeULEB128(writer, @as(u32, 1)); + try writer.writeByte(genValtype(return_type)); + }, + } +} + +pub fn genCode(buf: *ArrayList(u8), decl: *Decl) !void { + assert(buf.items.len == 0); + const writer = buf.writer(); + + // Reserve space to write the size after generating the code + try writer.writeAll(&([1]u8{undefined} ** 5)); + + // Write the size of the locals vec + // TODO: implement locals + try leb.writeULEB128(writer, @as(u32, 0)); + + // Write instructions + + // TODO: actually implement codegen + try writer.writeByte(0x41); // i32.const + try leb.writeILEB128(writer, @as(i32, 42)); + + // Write 'end' opcode + try writer.writeByte(0x0B); + + // Fill in the size of the generated code to the reserved space at the + // beginning of the buffer. + leb.writeUnsignedFixed(5, buf.items[0..5], @intCast(u32, buf.items.len - 5)); +} diff --git a/src-self-hosted/link.zig b/src-self-hosted/link.zig index a835cc6b7c..6a51138785 100644 --- a/src-self-hosted/link.zig +++ b/src-self-hosted/link.zig @@ -46,12 +46,14 @@ pub const File = struct { elf: Elf.TextBlock, macho: MachO.TextBlock, c: void, + wasm: void, }; pub const LinkFn = union { elf: Elf.SrcFn, macho: MachO.SrcFn, c: void, + wasm: ?Wasm.FnData, }; tag: Tag, @@ -69,7 +71,7 @@ pub const File = struct { .coff => return error.TODOImplementCoff, .elf => return Elf.openPath(allocator, dir, sub_path, options), .macho => return MachO.openPath(allocator, dir, sub_path, options), - .wasm => return error.TODOImplementWasm, + .wasm => return Wasm.openPath(allocator, dir, sub_path, options), .c => return C.openPath(allocator, dir, sub_path, options), .hex => return error.TODOImplementHex, .raw => return error.TODOImplementRaw, @@ -93,7 +95,7 @@ pub const File = struct { .mode = determineMode(base.options), }); }, - .c => {}, + .c, .wasm => {}, } } @@ -102,6 +104,7 @@ pub const File = struct { if (base.file) |f| { f.close(); base.file = null; + } } @@ -110,6 +113,7 @@ pub const File = struct { .elf => return @fieldParentPtr(Elf, "base", base).updateDecl(module, decl), .macho => return @fieldParentPtr(MachO, "base", base).updateDecl(module, decl), .c => return @fieldParentPtr(C, "base", base).updateDecl(module, decl), + .wasm => return @fieldParentPtr(Wasm, "base", base).updateDecl(module, decl), } } @@ -117,7 +121,7 @@ pub const File = struct { switch (base.tag) { .elf => return @fieldParentPtr(Elf, "base", base).updateDeclLineNumber(module, decl), .macho => return @fieldParentPtr(MachO, "base", base).updateDeclLineNumber(module, decl), - .c => {}, + .c, .wasm => {}, } } @@ -125,7 +129,7 @@ pub const File = struct { switch (base.tag) { .elf => return @fieldParentPtr(Elf, "base", base).allocateDeclIndexes(decl), .macho => return @fieldParentPtr(MachO, "base", base).allocateDeclIndexes(decl), - .c => {}, + .c, .wasm => {}, } } @@ -135,6 +139,7 @@ pub const File = struct { .elf => @fieldParentPtr(Elf, "base", base).deinit(), .macho => @fieldParentPtr(MachO, "base", base).deinit(), .c => @fieldParentPtr(C, "base", base).deinit(), + .wasm => @fieldParentPtr(Wasm, "base", base).deinit(), } } @@ -155,6 +160,11 @@ pub const File = struct { parent.deinit(); base.allocator.destroy(parent); }, + .wasm => { + const parent = @fieldParentPtr(Wasm, "base", base); + parent.deinit(); + base.allocator.destroy(parent); + }, } } @@ -167,6 +177,7 @@ pub const File = struct { .elf => @fieldParentPtr(Elf, "base", base).flush(), .macho => @fieldParentPtr(MachO, "base", base).flush(), .c => @fieldParentPtr(C, "base", base).flush(), + .wasm => @fieldParentPtr(Wasm, "base", base).flush(), }; } @@ -175,6 +186,7 @@ pub const File = struct { .elf => @fieldParentPtr(Elf, "base", base).freeDecl(decl), .macho => @fieldParentPtr(MachO, "base", base).freeDecl(decl), .c => unreachable, + .wasm => @fieldParentPtr(Wasm, "base", base).freeDecl(decl), } } @@ -183,6 +195,7 @@ pub const File = struct { .elf => @fieldParentPtr(Elf, "base", base).error_flags, .macho => @fieldParentPtr(MachO, "base", base).error_flags, .c => return .{ .no_entry_point_found = false }, + .wasm => return ErrorFlags{}, }; } @@ -197,6 +210,7 @@ pub const File = struct { .elf => return @fieldParentPtr(Elf, "base", base).updateDeclExports(module, decl, exports), .macho => return @fieldParentPtr(MachO, "base", base).updateDeclExports(module, decl, exports), .c => return {}, + .wasm => return @fieldParentPtr(Wasm, "base", base).updateDeclExports(module, decl, exports), } } @@ -204,6 +218,7 @@ pub const File = struct { elf, macho, c, + wasm, }; pub const ErrorFlags = struct { @@ -2832,6 +2847,7 @@ pub const File = struct { }; pub const MachO = @import("link/MachO.zig"); + const Wasm = @import("link/Wasm.zig"); }; /// Saturating multiplication diff --git a/src-self-hosted/link/Wasm.zig b/src-self-hosted/link/Wasm.zig new file mode 100644 index 0000000000..a62cff15ee --- /dev/null +++ b/src-self-hosted/link/Wasm.zig @@ -0,0 +1,445 @@ +const Wasm = @This(); + +const std = @import("std"); +const Allocator = std.mem.Allocator; +const assert = std.debug.assert; +const fs = std.fs; +const leb = std.debug.leb; + +const Module = @import("../Module.zig"); +const codegen = @import("../codegen/wasm.zig"); +const link = @import("../link.zig"); + +/// Various magic numbers defined by the wasm spec +const spec = struct { + const magic = [_]u8{ 0x00, 0x61, 0x73, 0x6D }; // \0asm + const version = [_]u8{ 0x01, 0x00, 0x00, 0x00 }; // version 1 + + const custom_id = 0; + const types_id = 1; + const imports_id = 2; + const funcs_id = 3; + const tables_id = 4; + const memories_id = 5; + const globals_id = 6; + const exports_id = 7; + const start_id = 8; + const elements_id = 9; + const code_id = 10; + const data_id = 11; +}; + +pub const base_tag = link.File.Tag.wasm; + +pub const FnData = struct { + funcidx: u32, + typeidx: u32, +}; + +base: link.File, + +types: Types, +funcs: Funcs, +exports: Exports, + +/// Array over the section structs used in the various sections above to +/// allow iteration when shifting sections to make space. +/// TODO: this should eventually be size 11 when we use all the sections. +sections: [4]*Section, + +pub fn openPath(allocator: *Allocator, dir: fs.Dir, sub_path: []const u8, options: link.Options) !*link.File { + assert(options.object_format == .wasm); + + // TODO: read the file and keep vaild parts instead of truncating + const file = try dir.createFile(sub_path, .{ .truncate = true, .read = true }); + errdefer file.close(); + + const wasm = try allocator.create(Wasm); + errdefer allocator.destroy(wasm); + + try file.writeAll(&(spec.magic ++ spec.version)); + + wasm.base = .{ + .tag = .wasm, + .options = options, + .file = file, + .allocator = allocator, + }; + + // TODO: this should vary depending on the section and be less arbitrary + const size = 1024; + const offset = @sizeOf(@TypeOf(spec.magic ++ spec.version)); + + wasm.types = try Types.init(file, offset, size); + wasm.funcs = try Funcs.init(file, offset + size, size, offset + 3 * size, size); + wasm.exports = try Exports.init(file, offset + 2 * size, size); + try file.setEndPos(offset + 4 * size); + + wasm.sections = [_]*Section{ + &wasm.types.typesec.section, + &wasm.funcs.funcsec, + &wasm.exports.exportsec, + &wasm.funcs.codesec.section, + }; + + return &wasm.base; +} + +pub fn deinit(self: *Wasm) void { + if (self.base.file) |f| f.close(); + self.types.deinit(); + self.funcs.deinit(); +} + +pub fn updateDecl(self: *Wasm, module: *Module, decl: *Module.Decl) !void { + if (decl.typed_value.most_recent.typed_value.ty.zigTypeTag() != .Fn) + return error.TODOImplementNonFnDeclsForWasm; + + if (decl.fn_link.wasm) |fn_data| { + self.types.free(fn_data.typeidx); + self.funcs.free(fn_data.funcidx); + } + + var buf = std.ArrayList(u8).init(self.base.allocator); + defer buf.deinit(); + + try codegen.genFunctype(&buf, decl); + const typeidx = try self.types.new(buf.items); + buf.items.len = 0; + + try codegen.genCode(&buf, decl); + const funcidx = try self.funcs.new(typeidx, buf.items); + + decl.fn_link.wasm = .{ .typeidx = typeidx, .funcidx = funcidx }; + + try self.exports.writeAll(module); +} + +pub fn updateDeclExports( + self: *Wasm, + module: *Module, + decl: *const Module.Decl, + exports: []const *Module.Export, +) !void { + // TODO: updateDeclExports() may currently be called before updateDecl, + // presumably due to a bug. For now just rely on the following call + // being made in updateDecl(). + + //try self.exports.writeAll(module); +} + +pub fn freeDecl(self: *Wasm, decl: *Module.Decl) void { + // TODO: remove this assert when non-function Decls are implemented + assert(decl.typed_value.most_recent.typed_value.ty.zigTypeTag() == .Fn); + if (decl.fn_link.wasm) |fn_data| { + self.types.free(fn_data.typeidx); + self.funcs.free(fn_data.funcidx); + decl.fn_link.wasm = null; + } +} + +pub fn flush(self: *Wasm) !void {} + +/// This struct describes the location of a named section + custom section +/// padding in the output file. This is all the data we need to allow for +/// shifting sections around when padding runs out. +const Section = struct { + /// The size of a section header: 1 byte section id + 5 bytes + /// for the fixed-width ULEB128 encoded contents size. + const header_size = 1 + 5; + /// Offset of the section id byte from the start of the file. + offset: u64, + /// Size of the section, including the header and directly + /// following custom section used for padding if any. + size: u64, + + /// Resize the usable part of the section, handling the following custom + /// section used for padding. If there is not enough padding left, shift + /// all following sections to make space. Takes the current and target + /// contents sizes of the section as arguments. + fn resize(self: *Section, file: fs.File, current: u32, target: u32) !void { + // Section header + target contents size + custom section header + // + custom section name + empty custom section > owned chunk of the file + if (header_size + target + header_size + 1 + 0 > self.size) + return error.TODOImplementSectionShifting; + + const new_custom_start = self.offset + header_size + target; + const new_custom_contents_size = self.size - target - 2 * header_size; + assert(new_custom_contents_size >= 1); + // +1 for the name of the custom section, which we set to an empty string + var custom_header: [header_size + 1]u8 = undefined; + custom_header[0] = spec.custom_id; + leb.writeUnsignedFixed(5, custom_header[1..header_size], @intCast(u32, new_custom_contents_size)); + custom_header[header_size] = 0; + try file.pwriteAll(&custom_header, new_custom_start); + } +}; + +/// This can be used to manage the contents of any section which uses a vector +/// of contents. This interface maintains index stability while allowing for +/// reuse of "dead" indexes. +const VecSection = struct { + /// Represents a single entry in the vector (e.g. a type in the type section) + const Entry = struct { + /// Offset from the start of the section contents in bytes + offset: u32, + /// Size in bytes of the entry + size: u32, + }; + section: Section, + /// Size in bytes of the contents of the section. Does not include + /// the "header" containing the section id and this value. + contents_size: u32, + /// List of all entries in the contents of the section. + entries: std.ArrayListUnmanaged(Entry) = std.ArrayListUnmanaged(Entry){}, + /// List of indexes of unreferenced entries which may be + /// overwritten and reused. + dead_list: std.ArrayListUnmanaged(u32) = std.ArrayListUnmanaged(u32){}, + + /// Write the headers of the section and custom padding section + fn init(comptime section_id: u8, file: fs.File, offset: u64, initial_size: u64) !VecSection { + // section id, section size, empty vector, custom section id, + // custom section size, empty custom section name + var initial_data: [1 + 5 + 5 + 1 + 5 + 1]u8 = undefined; + + assert(initial_size >= initial_data.len); + + comptime var i = 0; + initial_data[i] = section_id; + i += 1; + leb.writeUnsignedFixed(5, initial_data[i..(i + 5)], 5); + i += 5; + leb.writeUnsignedFixed(5, initial_data[i..(i + 5)], 0); + i += 5; + initial_data[i] = spec.custom_id; + i += 1; + leb.writeUnsignedFixed(5, initial_data[i..(i + 5)], @intCast(u32, initial_size - @sizeOf(@TypeOf(initial_data)))); + i += 5; + initial_data[i] = 0; + + try file.pwriteAll(&initial_data, offset); + + return VecSection{ + .section = .{ + .offset = offset, + .size = initial_size, + }, + .contents_size = 5, + }; + } + + fn deinit(self: *VecSection, allocator: *Allocator) void { + self.entries.deinit(allocator); + self.dead_list.deinit(allocator); + } + + /// Write a new entry into the file, returning the index used. + fn addEntry(self: *VecSection, file: fs.File, allocator: *Allocator, data: []const u8) !u32 { + // First look for a dead entry we can reuse + for (self.dead_list.items) |dead_idx, i| { + const dead_entry = &self.entries.items[dead_idx]; + if (dead_entry.size == data.len) { + // Found a dead entry of the right length, overwrite it + try file.pwriteAll(data, self.section.offset + Section.header_size + dead_entry.offset); + _ = self.dead_list.swapRemove(i); + return dead_idx; + } + } + + // TODO: We can be more efficient if we special-case one or + // more consecutive dead entries at the end of the vector. + + // We failed to find a dead entry to reuse, so write the new + // entry to the end of the section. + try self.section.resize(file, self.contents_size, self.contents_size + @intCast(u32, data.len)); + try file.pwriteAll(data, self.section.offset + Section.header_size + self.contents_size); + try self.entries.append(allocator, .{ + .offset = self.contents_size, + .size = @intCast(u32, data.len), + }); + self.contents_size += @intCast(u32, data.len); + // Make sure the dead list always has enough space to store all free'd + // entries. This makes it so that delEntry() cannot fail. + // TODO: figure out a better way that doesn't waste as much memory + try self.dead_list.ensureCapacity(allocator, self.entries.items.len); + + // Update the size in the section header and the item count of + // the contents vector. + var size_and_count: [10]u8 = undefined; + leb.writeUnsignedFixed(5, size_and_count[0..5], self.contents_size); + leb.writeUnsignedFixed(5, size_and_count[5..], @intCast(u32, self.entries.items.len)); + try file.pwriteAll(&size_and_count, self.section.offset + 1); + + return @intCast(u32, self.entries.items.len - 1); + } + + /// Mark the type referenced by the given index as dead. + fn delEntry(self: *VecSection, index: u32) void { + self.dead_list.appendAssumeCapacity(index); + } +}; + +const Types = struct { + typesec: VecSection, + + fn init(file: fs.File, offset: u64, initial_size: u64) !Types { + return Types{ .typesec = try VecSection.init(spec.types_id, file, offset, initial_size) }; + } + + fn deinit(self: *Types) void { + const wasm = @fieldParentPtr(Wasm, "types", self); + self.typesec.deinit(wasm.base.allocator); + } + + fn new(self: *Types, data: []const u8) !u32 { + const wasm = @fieldParentPtr(Wasm, "types", self); + return self.typesec.addEntry(wasm.base.file.?, wasm.base.allocator, data); + } + + fn free(self: *Types, typeidx: u32) void { + self.typesec.delEntry(typeidx); + } +}; + +const Funcs = struct { + /// This section needs special handling to keep the indexes matching with + /// the codesec, so we cant just use a VecSection. + funcsec: Section, + /// Number of functions listed in the funcsec. Must be kept in sync with + /// codesec.entries.items.len. + funcs_count: u32, + codesec: VecSection, + + fn init(file: fs.File, funcs_offset: u64, funcs_size: u64, code_offset: u64, code_size: u64) !Funcs { + return Funcs{ + .funcsec = (try VecSection.init(spec.funcs_id, file, funcs_offset, funcs_size)).section, + .funcs_count = 0, + .codesec = try VecSection.init(spec.code_id, file, code_offset, code_size), + }; + } + + fn deinit(self: *Funcs) void { + const wasm = @fieldParentPtr(Wasm, "funcs", self); + self.codesec.deinit(wasm.base.allocator); + } + + /// Add a new function to the binary, first finding space for and writing + /// the code then writing the typeidx to the corresponding index in the + /// funcsec. Returns the function index used. + fn new(self: *Funcs, typeidx: u32, code: []const u8) !u32 { + const wasm = @fieldParentPtr(Wasm, "funcs", self); + const file = wasm.base.file.?; + const allocator = wasm.base.allocator; + + assert(self.funcs_count == self.codesec.entries.items.len); + + // TODO: consider nop-padding the code if there is a close but not perfect fit + const funcidx = try self.codesec.addEntry(file, allocator, code); + + if (self.funcs_count < self.codesec.entries.items.len) { + // u32 vector length + funcs_count u32s in the vector + const current = 5 + self.funcs_count * 5; + try self.funcsec.resize(file, current, current + 5); + self.funcs_count += 1; + + // Update the size in the section header and the item count of + // the contents vector. + var size_and_count: [10]u8 = undefined; + leb.writeUnsignedFixed(5, size_and_count[0..5], 5 + self.funcs_count * 5); + leb.writeUnsignedFixed(5, size_and_count[5..], self.funcs_count); + try file.pwriteAll(&size_and_count, self.funcsec.offset + 1); + } + assert(self.funcs_count == self.codesec.entries.items.len); + + var typeidx_leb: [5]u8 = undefined; + leb.writeUnsignedFixed(5, &typeidx_leb, typeidx); + try file.pwriteAll(&typeidx_leb, self.funcsec.offset + Section.header_size + 5 + funcidx * 5); + + return funcidx; + } + + fn free(self: *Funcs, funcidx: u32) void { + self.codesec.delEntry(funcidx); + } +}; + +/// Exports are tricky. We can't leave dead entries in the binary as they +/// would obviously be visible from the execution environment. The simplest +/// way to work around this is to re-emit the export section whenever +/// something changes. This also makes it easier to ensure exported function +/// and global indexes are updated as they change. +const Exports = struct { + exportsec: Section, + /// Size in bytes of the contents of the section. Does not include + /// the "header" containing the section id and this value. + contents_size: u32, + + fn init(file: fs.File, offset: u64, initial_size: u64) !Exports { + return Exports{ + .exportsec = (try VecSection.init(spec.exports_id, file, offset, initial_size)).section, + .contents_size = 5, + }; + } + + fn writeAll(self: *Exports, module: *Module) !void { + const wasm = @fieldParentPtr(Wasm, "exports", self); + const file = wasm.base.file.?; + var buf: [5]u8 = undefined; + + // First ensure the section is the right size + var export_count: u32 = 0; + var new_contents_size: u32 = 5; + for (module.decl_exports.entries.items) |entry| { + for (entry.value) |e| { + export_count += 1; + new_contents_size += calcSize(e); + } + } + if (new_contents_size != self.contents_size) { + try self.exportsec.resize(file, self.contents_size, new_contents_size); + leb.writeUnsignedFixed(5, &buf, new_contents_size); + try file.pwriteAll(&buf, self.exportsec.offset + 1); + } + + try file.seekTo(self.exportsec.offset + Section.header_size); + const writer = file.writer(); + + // Length of the exports vec + leb.writeUnsignedFixed(5, &buf, export_count); + try writer.writeAll(&buf); + + for (module.decl_exports.entries.items) |entry| + for (entry.value) |e| try writeExport(writer, e); + } + + /// Return the total number of bytes an export will take. + /// TODO: fixed-width LEB128 is currently used for simplicity, but should + /// be replaced with proper variable-length LEB128 as it is inefficient. + fn calcSize(e: *Module.Export) u32 { + // LEB128 name length + name bytes + export type + LEB128 index + return 5 + @intCast(u32, e.options.name.len) + 1 + 5; + } + + /// Write the data for a single export to the given file at a given offset. + /// TODO: fixed-width LEB128 is currently used for simplicity, but should + /// be replaced with proper variable-length LEB128 as it is inefficient. + fn writeExport(writer: anytype, e: *Module.Export) !void { + var buf: [5]u8 = undefined; + + // Export name length + name + leb.writeUnsignedFixed(5, &buf, @intCast(u32, e.options.name.len)); + try writer.writeAll(&buf); + try writer.writeAll(e.options.name); + + switch (e.exported_decl.typed_value.most_recent.typed_value.ty.zigTypeTag()) { + .Fn => { + // Type of the export + try writer.writeByte(0x00); + // Exported function index + leb.writeUnsignedFixed(5, &buf, e.exported_decl.fn_link.wasm.?.funcidx); + try writer.writeAll(&buf); + }, + else => return error.TODOImplementNonFnDeclsForWasm, + } + } +}; diff --git a/src-self-hosted/main.zig b/src-self-hosted/main.zig index d63ea3a757..52e6a3b651 100644 --- a/src-self-hosted/main.zig +++ b/src-self-hosted/main.zig @@ -150,6 +150,7 @@ const usage_build_generic = \\ -ofmt=[mode] Override target object format \\ elf Executable and Linking Format \\ c Compile to C source code + \\ wasm WebAssembly \\ coff (planned) Common Object File Format (Windows) \\ pe (planned) Portable Executable (Windows) \\ macho (planned) macOS relocatables