stage2/wasm: implement basic container generation

Thus far, we only generate the type, function, export, and code
sections. These are sufficient to generate and export simple functions.

Codegen is currently hardcoded to `i32.const 42`, the main goal of this
commit is to create infrastructure for the container format which will
work with incremental compilation.
This commit is contained in:
Isaac Freund 2020-08-07 00:53:55 +02:00
parent 96a27557e2
commit 3370b5f109
No known key found for this signature in database
GPG Key ID: 86DED400DDFD7A11
5 changed files with 539 additions and 5 deletions

View File

@ -1571,7 +1571,7 @@ fn analyzeRootSrcFile(self: *Module, root_scope: *Scope.File) !void {
.macho => {
// TODO Implement for MachO
},
.c => {},
.c, .wasm => {},
}
}
} else {
@ -1781,11 +1781,13 @@ fn allocateNewDecl(
.elf => .{ .elf = link.File.Elf.TextBlock.empty },
.macho => .{ .macho = link.File.MachO.TextBlock.empty },
.c => .{ .c = {} },
.wasm => .{ .wasm = {} },
},
.fn_link = switch (self.bin_file.tag) {
.elf => .{ .elf = link.File.Elf.SrcFn.empty },
.macho => .{ .macho = link.File.MachO.SrcFn.empty },
.c => .{ .c = {} },
.wasm => .{ .wasm = null },
},
.generation = 0,
};

View File

@ -0,0 +1,70 @@
const std = @import("std");
const Allocator = std.mem.Allocator;
const ArrayList = std.ArrayList;
const assert = std.debug.assert;
const leb = std.debug.leb;
const Decl = @import("../Module.zig").Decl;
const Type = @import("../type.zig").Type;
fn genValtype(ty: Type) u8 {
return switch (ty.tag()) {
.u32, .i32 => 0x7F,
.u64, .i64 => 0x7E,
.f32 => 0x7D,
.f64 => 0x7C,
else => @panic("TODO: Implement more types for wasm."),
};
}
pub fn genFunctype(buf: *ArrayList(u8), decl: *Decl) !void {
const ty = decl.typed_value.most_recent.typed_value.ty;
const writer = buf.writer();
// functype magic
try writer.writeByte(0x60);
// param types
try leb.writeULEB128(writer, @intCast(u32, ty.fnParamLen()));
if (ty.fnParamLen() != 0) {
const params = try buf.allocator.alloc(Type, ty.fnParamLen());
defer buf.allocator.free(params);
ty.fnParamTypes(params);
for (params) |param_type| try writer.writeByte(genValtype(param_type));
}
// return type
const return_type = ty.fnReturnType();
switch (return_type.tag()) {
.void, .noreturn => try leb.writeULEB128(writer, @as(u32, 0)),
else => {
try leb.writeULEB128(writer, @as(u32, 1));
try writer.writeByte(genValtype(return_type));
},
}
}
pub fn genCode(buf: *ArrayList(u8), decl: *Decl) !void {
assert(buf.items.len == 0);
const writer = buf.writer();
// Reserve space to write the size after generating the code
try writer.writeAll(&([1]u8{undefined} ** 5));
// Write the size of the locals vec
// TODO: implement locals
try leb.writeULEB128(writer, @as(u32, 0));
// Write instructions
// TODO: actually implement codegen
try writer.writeByte(0x41); // i32.const
try leb.writeILEB128(writer, @as(i32, 42));
// Write 'end' opcode
try writer.writeByte(0x0B);
// Fill in the size of the generated code to the reserved space at the
// beginning of the buffer.
leb.writeUnsignedFixed(5, buf.items[0..5], @intCast(u32, buf.items.len - 5));
}

View File

@ -46,12 +46,14 @@ pub const File = struct {
elf: Elf.TextBlock,
macho: MachO.TextBlock,
c: void,
wasm: void,
};
pub const LinkFn = union {
elf: Elf.SrcFn,
macho: MachO.SrcFn,
c: void,
wasm: ?Wasm.FnData,
};
tag: Tag,
@ -69,7 +71,7 @@ pub const File = struct {
.coff => return error.TODOImplementCoff,
.elf => return Elf.openPath(allocator, dir, sub_path, options),
.macho => return MachO.openPath(allocator, dir, sub_path, options),
.wasm => return error.TODOImplementWasm,
.wasm => return Wasm.openPath(allocator, dir, sub_path, options),
.c => return C.openPath(allocator, dir, sub_path, options),
.hex => return error.TODOImplementHex,
.raw => return error.TODOImplementRaw,
@ -93,7 +95,7 @@ pub const File = struct {
.mode = determineMode(base.options),
});
},
.c => {},
.c, .wasm => {},
}
}
@ -102,6 +104,7 @@ pub const File = struct {
if (base.file) |f| {
f.close();
base.file = null;
}
}
@ -110,6 +113,7 @@ pub const File = struct {
.elf => return @fieldParentPtr(Elf, "base", base).updateDecl(module, decl),
.macho => return @fieldParentPtr(MachO, "base", base).updateDecl(module, decl),
.c => return @fieldParentPtr(C, "base", base).updateDecl(module, decl),
.wasm => return @fieldParentPtr(Wasm, "base", base).updateDecl(module, decl),
}
}
@ -117,7 +121,7 @@ pub const File = struct {
switch (base.tag) {
.elf => return @fieldParentPtr(Elf, "base", base).updateDeclLineNumber(module, decl),
.macho => return @fieldParentPtr(MachO, "base", base).updateDeclLineNumber(module, decl),
.c => {},
.c, .wasm => {},
}
}
@ -125,7 +129,7 @@ pub const File = struct {
switch (base.tag) {
.elf => return @fieldParentPtr(Elf, "base", base).allocateDeclIndexes(decl),
.macho => return @fieldParentPtr(MachO, "base", base).allocateDeclIndexes(decl),
.c => {},
.c, .wasm => {},
}
}
@ -135,6 +139,7 @@ pub const File = struct {
.elf => @fieldParentPtr(Elf, "base", base).deinit(),
.macho => @fieldParentPtr(MachO, "base", base).deinit(),
.c => @fieldParentPtr(C, "base", base).deinit(),
.wasm => @fieldParentPtr(Wasm, "base", base).deinit(),
}
}
@ -155,6 +160,11 @@ pub const File = struct {
parent.deinit();
base.allocator.destroy(parent);
},
.wasm => {
const parent = @fieldParentPtr(Wasm, "base", base);
parent.deinit();
base.allocator.destroy(parent);
},
}
}
@ -167,6 +177,7 @@ pub const File = struct {
.elf => @fieldParentPtr(Elf, "base", base).flush(),
.macho => @fieldParentPtr(MachO, "base", base).flush(),
.c => @fieldParentPtr(C, "base", base).flush(),
.wasm => @fieldParentPtr(Wasm, "base", base).flush(),
};
}
@ -175,6 +186,7 @@ pub const File = struct {
.elf => @fieldParentPtr(Elf, "base", base).freeDecl(decl),
.macho => @fieldParentPtr(MachO, "base", base).freeDecl(decl),
.c => unreachable,
.wasm => @fieldParentPtr(Wasm, "base", base).freeDecl(decl),
}
}
@ -183,6 +195,7 @@ pub const File = struct {
.elf => @fieldParentPtr(Elf, "base", base).error_flags,
.macho => @fieldParentPtr(MachO, "base", base).error_flags,
.c => return .{ .no_entry_point_found = false },
.wasm => return ErrorFlags{},
};
}
@ -197,6 +210,7 @@ pub const File = struct {
.elf => return @fieldParentPtr(Elf, "base", base).updateDeclExports(module, decl, exports),
.macho => return @fieldParentPtr(MachO, "base", base).updateDeclExports(module, decl, exports),
.c => return {},
.wasm => return @fieldParentPtr(Wasm, "base", base).updateDeclExports(module, decl, exports),
}
}
@ -204,6 +218,7 @@ pub const File = struct {
elf,
macho,
c,
wasm,
};
pub const ErrorFlags = struct {
@ -2832,6 +2847,7 @@ pub const File = struct {
};
pub const MachO = @import("link/MachO.zig");
const Wasm = @import("link/Wasm.zig");
};
/// Saturating multiplication

View File

@ -0,0 +1,445 @@
const Wasm = @This();
const std = @import("std");
const Allocator = std.mem.Allocator;
const assert = std.debug.assert;
const fs = std.fs;
const leb = std.debug.leb;
const Module = @import("../Module.zig");
const codegen = @import("../codegen/wasm.zig");
const link = @import("../link.zig");
/// Various magic numbers defined by the wasm spec
const spec = struct {
const magic = [_]u8{ 0x00, 0x61, 0x73, 0x6D }; // \0asm
const version = [_]u8{ 0x01, 0x00, 0x00, 0x00 }; // version 1
const custom_id = 0;
const types_id = 1;
const imports_id = 2;
const funcs_id = 3;
const tables_id = 4;
const memories_id = 5;
const globals_id = 6;
const exports_id = 7;
const start_id = 8;
const elements_id = 9;
const code_id = 10;
const data_id = 11;
};
pub const base_tag = link.File.Tag.wasm;
pub const FnData = struct {
funcidx: u32,
typeidx: u32,
};
base: link.File,
types: Types,
funcs: Funcs,
exports: Exports,
/// Array over the section structs used in the various sections above to
/// allow iteration when shifting sections to make space.
/// TODO: this should eventually be size 11 when we use all the sections.
sections: [4]*Section,
pub fn openPath(allocator: *Allocator, dir: fs.Dir, sub_path: []const u8, options: link.Options) !*link.File {
assert(options.object_format == .wasm);
// TODO: read the file and keep vaild parts instead of truncating
const file = try dir.createFile(sub_path, .{ .truncate = true, .read = true });
errdefer file.close();
const wasm = try allocator.create(Wasm);
errdefer allocator.destroy(wasm);
try file.writeAll(&(spec.magic ++ spec.version));
wasm.base = .{
.tag = .wasm,
.options = options,
.file = file,
.allocator = allocator,
};
// TODO: this should vary depending on the section and be less arbitrary
const size = 1024;
const offset = @sizeOf(@TypeOf(spec.magic ++ spec.version));
wasm.types = try Types.init(file, offset, size);
wasm.funcs = try Funcs.init(file, offset + size, size, offset + 3 * size, size);
wasm.exports = try Exports.init(file, offset + 2 * size, size);
try file.setEndPos(offset + 4 * size);
wasm.sections = [_]*Section{
&wasm.types.typesec.section,
&wasm.funcs.funcsec,
&wasm.exports.exportsec,
&wasm.funcs.codesec.section,
};
return &wasm.base;
}
pub fn deinit(self: *Wasm) void {
if (self.base.file) |f| f.close();
self.types.deinit();
self.funcs.deinit();
}
pub fn updateDecl(self: *Wasm, module: *Module, decl: *Module.Decl) !void {
if (decl.typed_value.most_recent.typed_value.ty.zigTypeTag() != .Fn)
return error.TODOImplementNonFnDeclsForWasm;
if (decl.fn_link.wasm) |fn_data| {
self.types.free(fn_data.typeidx);
self.funcs.free(fn_data.funcidx);
}
var buf = std.ArrayList(u8).init(self.base.allocator);
defer buf.deinit();
try codegen.genFunctype(&buf, decl);
const typeidx = try self.types.new(buf.items);
buf.items.len = 0;
try codegen.genCode(&buf, decl);
const funcidx = try self.funcs.new(typeidx, buf.items);
decl.fn_link.wasm = .{ .typeidx = typeidx, .funcidx = funcidx };
try self.exports.writeAll(module);
}
pub fn updateDeclExports(
self: *Wasm,
module: *Module,
decl: *const Module.Decl,
exports: []const *Module.Export,
) !void {
// TODO: updateDeclExports() may currently be called before updateDecl,
// presumably due to a bug. For now just rely on the following call
// being made in updateDecl().
//try self.exports.writeAll(module);
}
pub fn freeDecl(self: *Wasm, decl: *Module.Decl) void {
// TODO: remove this assert when non-function Decls are implemented
assert(decl.typed_value.most_recent.typed_value.ty.zigTypeTag() == .Fn);
if (decl.fn_link.wasm) |fn_data| {
self.types.free(fn_data.typeidx);
self.funcs.free(fn_data.funcidx);
decl.fn_link.wasm = null;
}
}
pub fn flush(self: *Wasm) !void {}
/// This struct describes the location of a named section + custom section
/// padding in the output file. This is all the data we need to allow for
/// shifting sections around when padding runs out.
const Section = struct {
/// The size of a section header: 1 byte section id + 5 bytes
/// for the fixed-width ULEB128 encoded contents size.
const header_size = 1 + 5;
/// Offset of the section id byte from the start of the file.
offset: u64,
/// Size of the section, including the header and directly
/// following custom section used for padding if any.
size: u64,
/// Resize the usable part of the section, handling the following custom
/// section used for padding. If there is not enough padding left, shift
/// all following sections to make space. Takes the current and target
/// contents sizes of the section as arguments.
fn resize(self: *Section, file: fs.File, current: u32, target: u32) !void {
// Section header + target contents size + custom section header
// + custom section name + empty custom section > owned chunk of the file
if (header_size + target + header_size + 1 + 0 > self.size)
return error.TODOImplementSectionShifting;
const new_custom_start = self.offset + header_size + target;
const new_custom_contents_size = self.size - target - 2 * header_size;
assert(new_custom_contents_size >= 1);
// +1 for the name of the custom section, which we set to an empty string
var custom_header: [header_size + 1]u8 = undefined;
custom_header[0] = spec.custom_id;
leb.writeUnsignedFixed(5, custom_header[1..header_size], @intCast(u32, new_custom_contents_size));
custom_header[header_size] = 0;
try file.pwriteAll(&custom_header, new_custom_start);
}
};
/// This can be used to manage the contents of any section which uses a vector
/// of contents. This interface maintains index stability while allowing for
/// reuse of "dead" indexes.
const VecSection = struct {
/// Represents a single entry in the vector (e.g. a type in the type section)
const Entry = struct {
/// Offset from the start of the section contents in bytes
offset: u32,
/// Size in bytes of the entry
size: u32,
};
section: Section,
/// Size in bytes of the contents of the section. Does not include
/// the "header" containing the section id and this value.
contents_size: u32,
/// List of all entries in the contents of the section.
entries: std.ArrayListUnmanaged(Entry) = std.ArrayListUnmanaged(Entry){},
/// List of indexes of unreferenced entries which may be
/// overwritten and reused.
dead_list: std.ArrayListUnmanaged(u32) = std.ArrayListUnmanaged(u32){},
/// Write the headers of the section and custom padding section
fn init(comptime section_id: u8, file: fs.File, offset: u64, initial_size: u64) !VecSection {
// section id, section size, empty vector, custom section id,
// custom section size, empty custom section name
var initial_data: [1 + 5 + 5 + 1 + 5 + 1]u8 = undefined;
assert(initial_size >= initial_data.len);
comptime var i = 0;
initial_data[i] = section_id;
i += 1;
leb.writeUnsignedFixed(5, initial_data[i..(i + 5)], 5);
i += 5;
leb.writeUnsignedFixed(5, initial_data[i..(i + 5)], 0);
i += 5;
initial_data[i] = spec.custom_id;
i += 1;
leb.writeUnsignedFixed(5, initial_data[i..(i + 5)], @intCast(u32, initial_size - @sizeOf(@TypeOf(initial_data))));
i += 5;
initial_data[i] = 0;
try file.pwriteAll(&initial_data, offset);
return VecSection{
.section = .{
.offset = offset,
.size = initial_size,
},
.contents_size = 5,
};
}
fn deinit(self: *VecSection, allocator: *Allocator) void {
self.entries.deinit(allocator);
self.dead_list.deinit(allocator);
}
/// Write a new entry into the file, returning the index used.
fn addEntry(self: *VecSection, file: fs.File, allocator: *Allocator, data: []const u8) !u32 {
// First look for a dead entry we can reuse
for (self.dead_list.items) |dead_idx, i| {
const dead_entry = &self.entries.items[dead_idx];
if (dead_entry.size == data.len) {
// Found a dead entry of the right length, overwrite it
try file.pwriteAll(data, self.section.offset + Section.header_size + dead_entry.offset);
_ = self.dead_list.swapRemove(i);
return dead_idx;
}
}
// TODO: We can be more efficient if we special-case one or
// more consecutive dead entries at the end of the vector.
// We failed to find a dead entry to reuse, so write the new
// entry to the end of the section.
try self.section.resize(file, self.contents_size, self.contents_size + @intCast(u32, data.len));
try file.pwriteAll(data, self.section.offset + Section.header_size + self.contents_size);
try self.entries.append(allocator, .{
.offset = self.contents_size,
.size = @intCast(u32, data.len),
});
self.contents_size += @intCast(u32, data.len);
// Make sure the dead list always has enough space to store all free'd
// entries. This makes it so that delEntry() cannot fail.
// TODO: figure out a better way that doesn't waste as much memory
try self.dead_list.ensureCapacity(allocator, self.entries.items.len);
// Update the size in the section header and the item count of
// the contents vector.
var size_and_count: [10]u8 = undefined;
leb.writeUnsignedFixed(5, size_and_count[0..5], self.contents_size);
leb.writeUnsignedFixed(5, size_and_count[5..], @intCast(u32, self.entries.items.len));
try file.pwriteAll(&size_and_count, self.section.offset + 1);
return @intCast(u32, self.entries.items.len - 1);
}
/// Mark the type referenced by the given index as dead.
fn delEntry(self: *VecSection, index: u32) void {
self.dead_list.appendAssumeCapacity(index);
}
};
const Types = struct {
typesec: VecSection,
fn init(file: fs.File, offset: u64, initial_size: u64) !Types {
return Types{ .typesec = try VecSection.init(spec.types_id, file, offset, initial_size) };
}
fn deinit(self: *Types) void {
const wasm = @fieldParentPtr(Wasm, "types", self);
self.typesec.deinit(wasm.base.allocator);
}
fn new(self: *Types, data: []const u8) !u32 {
const wasm = @fieldParentPtr(Wasm, "types", self);
return self.typesec.addEntry(wasm.base.file.?, wasm.base.allocator, data);
}
fn free(self: *Types, typeidx: u32) void {
self.typesec.delEntry(typeidx);
}
};
const Funcs = struct {
/// This section needs special handling to keep the indexes matching with
/// the codesec, so we cant just use a VecSection.
funcsec: Section,
/// Number of functions listed in the funcsec. Must be kept in sync with
/// codesec.entries.items.len.
funcs_count: u32,
codesec: VecSection,
fn init(file: fs.File, funcs_offset: u64, funcs_size: u64, code_offset: u64, code_size: u64) !Funcs {
return Funcs{
.funcsec = (try VecSection.init(spec.funcs_id, file, funcs_offset, funcs_size)).section,
.funcs_count = 0,
.codesec = try VecSection.init(spec.code_id, file, code_offset, code_size),
};
}
fn deinit(self: *Funcs) void {
const wasm = @fieldParentPtr(Wasm, "funcs", self);
self.codesec.deinit(wasm.base.allocator);
}
/// Add a new function to the binary, first finding space for and writing
/// the code then writing the typeidx to the corresponding index in the
/// funcsec. Returns the function index used.
fn new(self: *Funcs, typeidx: u32, code: []const u8) !u32 {
const wasm = @fieldParentPtr(Wasm, "funcs", self);
const file = wasm.base.file.?;
const allocator = wasm.base.allocator;
assert(self.funcs_count == self.codesec.entries.items.len);
// TODO: consider nop-padding the code if there is a close but not perfect fit
const funcidx = try self.codesec.addEntry(file, allocator, code);
if (self.funcs_count < self.codesec.entries.items.len) {
// u32 vector length + funcs_count u32s in the vector
const current = 5 + self.funcs_count * 5;
try self.funcsec.resize(file, current, current + 5);
self.funcs_count += 1;
// Update the size in the section header and the item count of
// the contents vector.
var size_and_count: [10]u8 = undefined;
leb.writeUnsignedFixed(5, size_and_count[0..5], 5 + self.funcs_count * 5);
leb.writeUnsignedFixed(5, size_and_count[5..], self.funcs_count);
try file.pwriteAll(&size_and_count, self.funcsec.offset + 1);
}
assert(self.funcs_count == self.codesec.entries.items.len);
var typeidx_leb: [5]u8 = undefined;
leb.writeUnsignedFixed(5, &typeidx_leb, typeidx);
try file.pwriteAll(&typeidx_leb, self.funcsec.offset + Section.header_size + 5 + funcidx * 5);
return funcidx;
}
fn free(self: *Funcs, funcidx: u32) void {
self.codesec.delEntry(funcidx);
}
};
/// Exports are tricky. We can't leave dead entries in the binary as they
/// would obviously be visible from the execution environment. The simplest
/// way to work around this is to re-emit the export section whenever
/// something changes. This also makes it easier to ensure exported function
/// and global indexes are updated as they change.
const Exports = struct {
exportsec: Section,
/// Size in bytes of the contents of the section. Does not include
/// the "header" containing the section id and this value.
contents_size: u32,
fn init(file: fs.File, offset: u64, initial_size: u64) !Exports {
return Exports{
.exportsec = (try VecSection.init(spec.exports_id, file, offset, initial_size)).section,
.contents_size = 5,
};
}
fn writeAll(self: *Exports, module: *Module) !void {
const wasm = @fieldParentPtr(Wasm, "exports", self);
const file = wasm.base.file.?;
var buf: [5]u8 = undefined;
// First ensure the section is the right size
var export_count: u32 = 0;
var new_contents_size: u32 = 5;
for (module.decl_exports.entries.items) |entry| {
for (entry.value) |e| {
export_count += 1;
new_contents_size += calcSize(e);
}
}
if (new_contents_size != self.contents_size) {
try self.exportsec.resize(file, self.contents_size, new_contents_size);
leb.writeUnsignedFixed(5, &buf, new_contents_size);
try file.pwriteAll(&buf, self.exportsec.offset + 1);
}
try file.seekTo(self.exportsec.offset + Section.header_size);
const writer = file.writer();
// Length of the exports vec
leb.writeUnsignedFixed(5, &buf, export_count);
try writer.writeAll(&buf);
for (module.decl_exports.entries.items) |entry|
for (entry.value) |e| try writeExport(writer, e);
}
/// Return the total number of bytes an export will take.
/// TODO: fixed-width LEB128 is currently used for simplicity, but should
/// be replaced with proper variable-length LEB128 as it is inefficient.
fn calcSize(e: *Module.Export) u32 {
// LEB128 name length + name bytes + export type + LEB128 index
return 5 + @intCast(u32, e.options.name.len) + 1 + 5;
}
/// Write the data for a single export to the given file at a given offset.
/// TODO: fixed-width LEB128 is currently used for simplicity, but should
/// be replaced with proper variable-length LEB128 as it is inefficient.
fn writeExport(writer: anytype, e: *Module.Export) !void {
var buf: [5]u8 = undefined;
// Export name length + name
leb.writeUnsignedFixed(5, &buf, @intCast(u32, e.options.name.len));
try writer.writeAll(&buf);
try writer.writeAll(e.options.name);
switch (e.exported_decl.typed_value.most_recent.typed_value.ty.zigTypeTag()) {
.Fn => {
// Type of the export
try writer.writeByte(0x00);
// Exported function index
leb.writeUnsignedFixed(5, &buf, e.exported_decl.fn_link.wasm.?.funcidx);
try writer.writeAll(&buf);
},
else => return error.TODOImplementNonFnDeclsForWasm,
}
}
};

View File

@ -150,6 +150,7 @@ const usage_build_generic =
\\ -ofmt=[mode] Override target object format
\\ elf Executable and Linking Format
\\ c Compile to C source code
\\ wasm WebAssembly
\\ coff (planned) Common Object File Format (Windows)
\\ pe (planned) Portable Executable (Windows)
\\ macho (planned) macOS relocatables