Isaac Freund f9963909a1
stage2/wasm: only free types after func overwrite
Functions which are free'd are not immediately removed from the binary
as this would cause a shifting of function indexes. Instead, they hang
around until they can be overwritten by a new function. This means that
the types associated with these dead functions must also remain until
the function is overwritten to avoid a type mismatch.
2020-08-18 01:01:13 +02:00

454 lines
17 KiB
Zig

const Wasm = @This();
const std = @import("std");
const Allocator = std.mem.Allocator;
const assert = std.debug.assert;
const fs = std.fs;
const leb = std.debug.leb;
const Module = @import("../Module.zig");
const codegen = @import("../codegen/wasm.zig");
const link = @import("../link.zig");
/// Various magic numbers defined by the wasm spec
const spec = struct {
const magic = [_]u8{ 0x00, 0x61, 0x73, 0x6D }; // \0asm
const version = [_]u8{ 0x01, 0x00, 0x00, 0x00 }; // version 1
const custom_id = 0;
const types_id = 1;
const imports_id = 2;
const funcs_id = 3;
const tables_id = 4;
const memories_id = 5;
const globals_id = 6;
const exports_id = 7;
const start_id = 8;
const elements_id = 9;
const code_id = 10;
const data_id = 11;
};
pub const base_tag = link.File.Tag.wasm;
pub const FnData = struct {
funcidx: u32,
};
base: link.File,
types: Types,
funcs: Funcs,
exports: Exports,
/// Array over the section structs used in the various sections above to
/// allow iteration when shifting sections to make space.
/// TODO: this should eventually be size 11 when we use all the sections.
sections: [4]*Section,
pub fn openPath(allocator: *Allocator, dir: fs.Dir, sub_path: []const u8, options: link.Options) !*link.File {
assert(options.object_format == .wasm);
// TODO: read the file and keep vaild parts instead of truncating
const file = try dir.createFile(sub_path, .{ .truncate = true, .read = true });
errdefer file.close();
const wasm = try allocator.create(Wasm);
errdefer allocator.destroy(wasm);
try file.writeAll(&(spec.magic ++ spec.version));
// TODO: this should vary depending on the section and be less arbitrary
const size = 1024;
const offset = @sizeOf(@TypeOf(spec.magic ++ spec.version));
wasm.* = .{
.base = .{
.tag = .wasm,
.options = options,
.file = file,
.allocator = allocator,
},
.types = try Types.init(file, offset, size),
.funcs = try Funcs.init(file, offset + size, size, offset + 3 * size, size),
.exports = try Exports.init(file, offset + 2 * size, size),
// These must be ordered as they will appear in the output file
.sections = [_]*Section{
&wasm.types.typesec.section,
&wasm.funcs.funcsec,
&wasm.exports.exportsec,
&wasm.funcs.codesec.section,
},
};
try file.setEndPos(offset + 4 * size);
return &wasm.base;
}
pub fn deinit(self: *Wasm) void {
self.types.deinit();
self.funcs.deinit();
}
pub fn updateDecl(self: *Wasm, module: *Module, decl: *Module.Decl) !void {
if (decl.typed_value.most_recent.typed_value.ty.zigTypeTag() != .Fn)
return error.TODOImplementNonFnDeclsForWasm;
if (decl.fn_link.wasm) |fn_data| {
self.funcs.free(fn_data.funcidx);
}
var buf = std.ArrayList(u8).init(self.base.allocator);
defer buf.deinit();
try codegen.genFunctype(&buf, decl);
const typeidx = try self.types.new(buf.items);
buf.items.len = 0;
try codegen.genCode(&buf, decl);
const funcidx = try self.funcs.new(typeidx, buf.items);
decl.fn_link.wasm = .{ .funcidx = funcidx };
// TODO: we should be more smart and set this only when needed
self.exports.dirty = true;
}
pub fn updateDeclExports(
self: *Wasm,
module: *Module,
decl: *const Module.Decl,
exports: []const *Module.Export,
) !void {
self.exports.dirty = true;
}
pub fn freeDecl(self: *Wasm, decl: *Module.Decl) void {
// TODO: remove this assert when non-function Decls are implemented
assert(decl.typed_value.most_recent.typed_value.ty.zigTypeTag() == .Fn);
if (decl.fn_link.wasm) |fn_data| {
self.funcs.free(fn_data.funcidx);
decl.fn_link.wasm = null;
}
}
pub fn flush(self: *Wasm, module: *Module) !void {
if (self.exports.dirty) try self.exports.writeAll(module);
}
/// This struct describes the location of a named section + custom section
/// padding in the output file. This is all the data we need to allow for
/// shifting sections around when padding runs out.
const Section = struct {
/// The size of a section header: 1 byte section id + 5 bytes
/// for the fixed-width ULEB128 encoded contents size.
const header_size = 1 + 5;
/// Offset of the section id byte from the start of the file.
offset: u64,
/// Size of the section, including the header and directly
/// following custom section used for padding if any.
size: u64,
/// Resize the usable part of the section, handling the following custom
/// section used for padding. If there is not enough padding left, shift
/// all following sections to make space. Takes the current and target
/// contents sizes of the section as arguments.
fn resize(self: *Section, file: fs.File, current: u32, target: u32) !void {
// Section header + target contents size + custom section header
// + custom section name + empty custom section > owned chunk of the file
if (header_size + target + header_size + 1 + 0 > self.size)
return error.TODOImplementSectionShifting;
const new_custom_start = self.offset + header_size + target;
const new_custom_contents_size = self.size - target - 2 * header_size;
assert(new_custom_contents_size >= 1);
// +1 for the name of the custom section, which we set to an empty string
var custom_header: [header_size + 1]u8 = undefined;
custom_header[0] = spec.custom_id;
leb.writeUnsignedFixed(5, custom_header[1..header_size], @intCast(u32, new_custom_contents_size));
custom_header[header_size] = 0;
try file.pwriteAll(&custom_header, new_custom_start);
}
};
/// This can be used to manage the contents of any section which uses a vector
/// of contents. This interface maintains index stability while allowing for
/// reuse of "dead" indexes.
const VecSection = struct {
/// Represents a single entry in the vector (e.g. a type in the type section)
const Entry = struct {
/// Offset from the start of the section contents in bytes
offset: u32,
/// Size in bytes of the entry
size: u32,
};
section: Section,
/// Size in bytes of the contents of the section. Does not include
/// the "header" containing the section id and this value.
contents_size: u32,
/// List of all entries in the contents of the section.
entries: std.ArrayListUnmanaged(Entry) = std.ArrayListUnmanaged(Entry){},
/// List of indexes of unreferenced entries which may be
/// overwritten and reused.
dead_list: std.ArrayListUnmanaged(u32) = std.ArrayListUnmanaged(u32){},
/// Write the headers of the section and custom padding section
fn init(comptime section_id: u8, file: fs.File, offset: u64, initial_size: u64) !VecSection {
// section id, section size, empty vector, custom section id,
// custom section size, empty custom section name
var initial_data: [1 + 5 + 5 + 1 + 5 + 1]u8 = undefined;
assert(initial_size >= initial_data.len);
comptime var i = 0;
initial_data[i] = section_id;
i += 1;
leb.writeUnsignedFixed(5, initial_data[i..(i + 5)], 5);
i += 5;
leb.writeUnsignedFixed(5, initial_data[i..(i + 5)], 0);
i += 5;
initial_data[i] = spec.custom_id;
i += 1;
leb.writeUnsignedFixed(5, initial_data[i..(i + 5)], @intCast(u32, initial_size - @sizeOf(@TypeOf(initial_data))));
i += 5;
initial_data[i] = 0;
try file.pwriteAll(&initial_data, offset);
return VecSection{
.section = .{
.offset = offset,
.size = initial_size,
},
.contents_size = 5,
};
}
fn deinit(self: *VecSection, allocator: *Allocator) void {
self.entries.deinit(allocator);
self.dead_list.deinit(allocator);
}
/// Write a new entry into the file, returning the index used.
fn addEntry(self: *VecSection, file: fs.File, allocator: *Allocator, data: []const u8) !u32 {
// First look for a dead entry we can reuse
for (self.dead_list.items) |dead_idx, i| {
const dead_entry = &self.entries.items[dead_idx];
if (dead_entry.size == data.len) {
// Found a dead entry of the right length, overwrite it
try file.pwriteAll(data, self.section.offset + Section.header_size + dead_entry.offset);
_ = self.dead_list.swapRemove(i);
return dead_idx;
}
}
// TODO: We can be more efficient if we special-case one or
// more consecutive dead entries at the end of the vector.
// We failed to find a dead entry to reuse, so write the new
// entry to the end of the section.
try self.section.resize(file, self.contents_size, self.contents_size + @intCast(u32, data.len));
try file.pwriteAll(data, self.section.offset + Section.header_size + self.contents_size);
try self.entries.append(allocator, .{
.offset = self.contents_size,
.size = @intCast(u32, data.len),
});
self.contents_size += @intCast(u32, data.len);
// Make sure the dead list always has enough space to store all free'd
// entries. This makes it so that delEntry() cannot fail.
// TODO: figure out a better way that doesn't waste as much memory
try self.dead_list.ensureCapacity(allocator, self.entries.items.len);
// Update the size in the section header and the item count of
// the contents vector.
var size_and_count: [10]u8 = undefined;
leb.writeUnsignedFixed(5, size_and_count[0..5], self.contents_size);
leb.writeUnsignedFixed(5, size_and_count[5..], @intCast(u32, self.entries.items.len));
try file.pwriteAll(&size_and_count, self.section.offset + 1);
return @intCast(u32, self.entries.items.len - 1);
}
/// Mark the type referenced by the given index as dead.
fn delEntry(self: *VecSection, index: u32) void {
self.dead_list.appendAssumeCapacity(index);
}
};
const Types = struct {
typesec: VecSection,
fn init(file: fs.File, offset: u64, initial_size: u64) !Types {
return Types{ .typesec = try VecSection.init(spec.types_id, file, offset, initial_size) };
}
fn deinit(self: *Types) void {
const wasm = @fieldParentPtr(Wasm, "types", self);
self.typesec.deinit(wasm.base.allocator);
}
fn new(self: *Types, data: []const u8) !u32 {
const wasm = @fieldParentPtr(Wasm, "types", self);
return self.typesec.addEntry(wasm.base.file.?, wasm.base.allocator, data);
}
fn free(self: *Types, typeidx: u32) void {
self.typesec.delEntry(typeidx);
}
};
const Funcs = struct {
/// This section needs special handling to keep the indexes matching with
/// the codesec, so we cant just use a VecSection.
funcsec: Section,
/// The typeidx stored for each function, indexed by funcidx.
func_types: std.ArrayListUnmanaged(u32) = std.ArrayListUnmanaged(u32){},
codesec: VecSection,
fn init(file: fs.File, funcs_offset: u64, funcs_size: u64, code_offset: u64, code_size: u64) !Funcs {
return Funcs{
.funcsec = (try VecSection.init(spec.funcs_id, file, funcs_offset, funcs_size)).section,
.codesec = try VecSection.init(spec.code_id, file, code_offset, code_size),
};
}
fn deinit(self: *Funcs) void {
const wasm = @fieldParentPtr(Wasm, "funcs", self);
self.func_types.deinit(wasm.base.allocator);
self.codesec.deinit(wasm.base.allocator);
}
/// Add a new function to the binary, first finding space for and writing
/// the code then writing the typeidx to the corresponding index in the
/// funcsec. Returns the function index used.
fn new(self: *Funcs, typeidx: u32, code: []const u8) !u32 {
const wasm = @fieldParentPtr(Wasm, "funcs", self);
const file = wasm.base.file.?;
const allocator = wasm.base.allocator;
assert(self.func_types.items.len == self.codesec.entries.items.len);
// TODO: consider nop-padding the code if there is a close but not perfect fit
const funcidx = try self.codesec.addEntry(file, allocator, code);
if (self.func_types.items.len < self.codesec.entries.items.len) {
// u32 vector length + funcs_count u32s in the vector
const current = 5 + @intCast(u32, self.func_types.items.len) * 5;
try self.funcsec.resize(file, current, current + 5);
try self.func_types.append(allocator, typeidx);
// Update the size in the section header and the item count of
// the contents vector.
const count = @intCast(u32, self.func_types.items.len);
var size_and_count: [10]u8 = undefined;
leb.writeUnsignedFixed(5, size_and_count[0..5], 5 + count * 5);
leb.writeUnsignedFixed(5, size_and_count[5..], count);
try file.pwriteAll(&size_and_count, self.funcsec.offset + 1);
} else {
// We are overwriting a dead function and may now free the type
wasm.types.free(self.func_types.items[funcidx]);
}
assert(self.func_types.items.len == self.codesec.entries.items.len);
var typeidx_leb: [5]u8 = undefined;
leb.writeUnsignedFixed(5, &typeidx_leb, typeidx);
try file.pwriteAll(&typeidx_leb, self.funcsec.offset + Section.header_size + 5 + funcidx * 5);
return funcidx;
}
fn free(self: *Funcs, funcidx: u32) void {
self.codesec.delEntry(funcidx);
}
};
/// Exports are tricky. We can't leave dead entries in the binary as they
/// would obviously be visible from the execution environment. The simplest
/// way to work around this is to re-emit the export section whenever
/// something changes. This also makes it easier to ensure exported function
/// and global indexes are updated as they change.
const Exports = struct {
exportsec: Section,
/// Size in bytes of the contents of the section. Does not include
/// the "header" containing the section id and this value.
contents_size: u32,
/// If this is true, then exports will be rewritten on flush()
dirty: bool,
fn init(file: fs.File, offset: u64, initial_size: u64) !Exports {
return Exports{
.exportsec = (try VecSection.init(spec.exports_id, file, offset, initial_size)).section,
.contents_size = 5,
.dirty = false,
};
}
fn writeAll(self: *Exports, module: *Module) !void {
const wasm = @fieldParentPtr(Wasm, "exports", self);
const file = wasm.base.file.?;
var buf: [5]u8 = undefined;
// First ensure the section is the right size
var export_count: u32 = 0;
var new_contents_size: u32 = 5;
for (module.decl_exports.entries.items) |entry| {
for (entry.value) |e| {
export_count += 1;
new_contents_size += calcSize(e);
}
}
if (new_contents_size != self.contents_size) {
try self.exportsec.resize(file, self.contents_size, new_contents_size);
leb.writeUnsignedFixed(5, &buf, new_contents_size);
try file.pwriteAll(&buf, self.exportsec.offset + 1);
}
try file.seekTo(self.exportsec.offset + Section.header_size);
const writer = file.writer();
// Length of the exports vec
leb.writeUnsignedFixed(5, &buf, export_count);
try writer.writeAll(&buf);
for (module.decl_exports.entries.items) |entry|
for (entry.value) |e| try writeExport(writer, e);
self.dirty = false;
}
/// Return the total number of bytes an export will take.
/// TODO: fixed-width LEB128 is currently used for simplicity, but should
/// be replaced with proper variable-length LEB128 as it is inefficient.
fn calcSize(e: *Module.Export) u32 {
// LEB128 name length + name bytes + export type + LEB128 index
return 5 + @intCast(u32, e.options.name.len) + 1 + 5;
}
/// Write the data for a single export to the given file at a given offset.
/// TODO: fixed-width LEB128 is currently used for simplicity, but should
/// be replaced with proper variable-length LEB128 as it is inefficient.
fn writeExport(writer: anytype, e: *Module.Export) !void {
var buf: [5]u8 = undefined;
// Export name length + name
leb.writeUnsignedFixed(5, &buf, @intCast(u32, e.options.name.len));
try writer.writeAll(&buf);
try writer.writeAll(e.options.name);
switch (e.exported_decl.typed_value.most_recent.typed_value.ty.zigTypeTag()) {
.Fn => {
// Type of the export
try writer.writeByte(0x00);
// Exported function index
leb.writeUnsignedFixed(5, &buf, e.exported_decl.fn_link.wasm.?.funcidx);
try writer.writeAll(&buf);
},
else => return error.TODOImplementNonFnDeclsForWasm,
}
}
};