mirror of
https://github.com/ziglang/zig.git
synced 2025-12-28 17:13:19 +00:00
Functions which are free'd are not immediately removed from the binary as this would cause a shifting of function indexes. Instead, they hang around until they can be overwritten by a new function. This means that the types associated with these dead functions must also remain until the function is overwritten to avoid a type mismatch.
454 lines
17 KiB
Zig
454 lines
17 KiB
Zig
const Wasm = @This();
|
|
|
|
const std = @import("std");
|
|
const Allocator = std.mem.Allocator;
|
|
const assert = std.debug.assert;
|
|
const fs = std.fs;
|
|
const leb = std.debug.leb;
|
|
|
|
const Module = @import("../Module.zig");
|
|
const codegen = @import("../codegen/wasm.zig");
|
|
const link = @import("../link.zig");
|
|
|
|
/// Various magic numbers defined by the wasm spec
|
|
const spec = struct {
|
|
const magic = [_]u8{ 0x00, 0x61, 0x73, 0x6D }; // \0asm
|
|
const version = [_]u8{ 0x01, 0x00, 0x00, 0x00 }; // version 1
|
|
|
|
const custom_id = 0;
|
|
const types_id = 1;
|
|
const imports_id = 2;
|
|
const funcs_id = 3;
|
|
const tables_id = 4;
|
|
const memories_id = 5;
|
|
const globals_id = 6;
|
|
const exports_id = 7;
|
|
const start_id = 8;
|
|
const elements_id = 9;
|
|
const code_id = 10;
|
|
const data_id = 11;
|
|
};
|
|
|
|
pub const base_tag = link.File.Tag.wasm;
|
|
|
|
pub const FnData = struct {
|
|
funcidx: u32,
|
|
};
|
|
|
|
base: link.File,
|
|
|
|
types: Types,
|
|
funcs: Funcs,
|
|
exports: Exports,
|
|
|
|
/// Array over the section structs used in the various sections above to
|
|
/// allow iteration when shifting sections to make space.
|
|
/// TODO: this should eventually be size 11 when we use all the sections.
|
|
sections: [4]*Section,
|
|
|
|
pub fn openPath(allocator: *Allocator, dir: fs.Dir, sub_path: []const u8, options: link.Options) !*link.File {
|
|
assert(options.object_format == .wasm);
|
|
|
|
// TODO: read the file and keep vaild parts instead of truncating
|
|
const file = try dir.createFile(sub_path, .{ .truncate = true, .read = true });
|
|
errdefer file.close();
|
|
|
|
const wasm = try allocator.create(Wasm);
|
|
errdefer allocator.destroy(wasm);
|
|
|
|
try file.writeAll(&(spec.magic ++ spec.version));
|
|
|
|
// TODO: this should vary depending on the section and be less arbitrary
|
|
const size = 1024;
|
|
const offset = @sizeOf(@TypeOf(spec.magic ++ spec.version));
|
|
|
|
wasm.* = .{
|
|
.base = .{
|
|
.tag = .wasm,
|
|
.options = options,
|
|
.file = file,
|
|
.allocator = allocator,
|
|
},
|
|
|
|
.types = try Types.init(file, offset, size),
|
|
.funcs = try Funcs.init(file, offset + size, size, offset + 3 * size, size),
|
|
.exports = try Exports.init(file, offset + 2 * size, size),
|
|
|
|
// These must be ordered as they will appear in the output file
|
|
.sections = [_]*Section{
|
|
&wasm.types.typesec.section,
|
|
&wasm.funcs.funcsec,
|
|
&wasm.exports.exportsec,
|
|
&wasm.funcs.codesec.section,
|
|
},
|
|
};
|
|
|
|
try file.setEndPos(offset + 4 * size);
|
|
|
|
return &wasm.base;
|
|
}
|
|
|
|
pub fn deinit(self: *Wasm) void {
|
|
self.types.deinit();
|
|
self.funcs.deinit();
|
|
}
|
|
|
|
pub fn updateDecl(self: *Wasm, module: *Module, decl: *Module.Decl) !void {
|
|
if (decl.typed_value.most_recent.typed_value.ty.zigTypeTag() != .Fn)
|
|
return error.TODOImplementNonFnDeclsForWasm;
|
|
|
|
if (decl.fn_link.wasm) |fn_data| {
|
|
self.funcs.free(fn_data.funcidx);
|
|
}
|
|
|
|
var buf = std.ArrayList(u8).init(self.base.allocator);
|
|
defer buf.deinit();
|
|
|
|
try codegen.genFunctype(&buf, decl);
|
|
const typeidx = try self.types.new(buf.items);
|
|
buf.items.len = 0;
|
|
|
|
try codegen.genCode(&buf, decl);
|
|
const funcidx = try self.funcs.new(typeidx, buf.items);
|
|
|
|
decl.fn_link.wasm = .{ .funcidx = funcidx };
|
|
|
|
// TODO: we should be more smart and set this only when needed
|
|
self.exports.dirty = true;
|
|
}
|
|
|
|
pub fn updateDeclExports(
|
|
self: *Wasm,
|
|
module: *Module,
|
|
decl: *const Module.Decl,
|
|
exports: []const *Module.Export,
|
|
) !void {
|
|
self.exports.dirty = true;
|
|
}
|
|
|
|
pub fn freeDecl(self: *Wasm, decl: *Module.Decl) void {
|
|
// TODO: remove this assert when non-function Decls are implemented
|
|
assert(decl.typed_value.most_recent.typed_value.ty.zigTypeTag() == .Fn);
|
|
if (decl.fn_link.wasm) |fn_data| {
|
|
self.funcs.free(fn_data.funcidx);
|
|
decl.fn_link.wasm = null;
|
|
}
|
|
}
|
|
|
|
pub fn flush(self: *Wasm, module: *Module) !void {
|
|
if (self.exports.dirty) try self.exports.writeAll(module);
|
|
}
|
|
|
|
/// This struct describes the location of a named section + custom section
|
|
/// padding in the output file. This is all the data we need to allow for
|
|
/// shifting sections around when padding runs out.
|
|
const Section = struct {
|
|
/// The size of a section header: 1 byte section id + 5 bytes
|
|
/// for the fixed-width ULEB128 encoded contents size.
|
|
const header_size = 1 + 5;
|
|
/// Offset of the section id byte from the start of the file.
|
|
offset: u64,
|
|
/// Size of the section, including the header and directly
|
|
/// following custom section used for padding if any.
|
|
size: u64,
|
|
|
|
/// Resize the usable part of the section, handling the following custom
|
|
/// section used for padding. If there is not enough padding left, shift
|
|
/// all following sections to make space. Takes the current and target
|
|
/// contents sizes of the section as arguments.
|
|
fn resize(self: *Section, file: fs.File, current: u32, target: u32) !void {
|
|
// Section header + target contents size + custom section header
|
|
// + custom section name + empty custom section > owned chunk of the file
|
|
if (header_size + target + header_size + 1 + 0 > self.size)
|
|
return error.TODOImplementSectionShifting;
|
|
|
|
const new_custom_start = self.offset + header_size + target;
|
|
const new_custom_contents_size = self.size - target - 2 * header_size;
|
|
assert(new_custom_contents_size >= 1);
|
|
// +1 for the name of the custom section, which we set to an empty string
|
|
var custom_header: [header_size + 1]u8 = undefined;
|
|
custom_header[0] = spec.custom_id;
|
|
leb.writeUnsignedFixed(5, custom_header[1..header_size], @intCast(u32, new_custom_contents_size));
|
|
custom_header[header_size] = 0;
|
|
try file.pwriteAll(&custom_header, new_custom_start);
|
|
}
|
|
};
|
|
|
|
/// This can be used to manage the contents of any section which uses a vector
|
|
/// of contents. This interface maintains index stability while allowing for
|
|
/// reuse of "dead" indexes.
|
|
const VecSection = struct {
|
|
/// Represents a single entry in the vector (e.g. a type in the type section)
|
|
const Entry = struct {
|
|
/// Offset from the start of the section contents in bytes
|
|
offset: u32,
|
|
/// Size in bytes of the entry
|
|
size: u32,
|
|
};
|
|
section: Section,
|
|
/// Size in bytes of the contents of the section. Does not include
|
|
/// the "header" containing the section id and this value.
|
|
contents_size: u32,
|
|
/// List of all entries in the contents of the section.
|
|
entries: std.ArrayListUnmanaged(Entry) = std.ArrayListUnmanaged(Entry){},
|
|
/// List of indexes of unreferenced entries which may be
|
|
/// overwritten and reused.
|
|
dead_list: std.ArrayListUnmanaged(u32) = std.ArrayListUnmanaged(u32){},
|
|
|
|
/// Write the headers of the section and custom padding section
|
|
fn init(comptime section_id: u8, file: fs.File, offset: u64, initial_size: u64) !VecSection {
|
|
// section id, section size, empty vector, custom section id,
|
|
// custom section size, empty custom section name
|
|
var initial_data: [1 + 5 + 5 + 1 + 5 + 1]u8 = undefined;
|
|
|
|
assert(initial_size >= initial_data.len);
|
|
|
|
comptime var i = 0;
|
|
initial_data[i] = section_id;
|
|
i += 1;
|
|
leb.writeUnsignedFixed(5, initial_data[i..(i + 5)], 5);
|
|
i += 5;
|
|
leb.writeUnsignedFixed(5, initial_data[i..(i + 5)], 0);
|
|
i += 5;
|
|
initial_data[i] = spec.custom_id;
|
|
i += 1;
|
|
leb.writeUnsignedFixed(5, initial_data[i..(i + 5)], @intCast(u32, initial_size - @sizeOf(@TypeOf(initial_data))));
|
|
i += 5;
|
|
initial_data[i] = 0;
|
|
|
|
try file.pwriteAll(&initial_data, offset);
|
|
|
|
return VecSection{
|
|
.section = .{
|
|
.offset = offset,
|
|
.size = initial_size,
|
|
},
|
|
.contents_size = 5,
|
|
};
|
|
}
|
|
|
|
fn deinit(self: *VecSection, allocator: *Allocator) void {
|
|
self.entries.deinit(allocator);
|
|
self.dead_list.deinit(allocator);
|
|
}
|
|
|
|
/// Write a new entry into the file, returning the index used.
|
|
fn addEntry(self: *VecSection, file: fs.File, allocator: *Allocator, data: []const u8) !u32 {
|
|
// First look for a dead entry we can reuse
|
|
for (self.dead_list.items) |dead_idx, i| {
|
|
const dead_entry = &self.entries.items[dead_idx];
|
|
if (dead_entry.size == data.len) {
|
|
// Found a dead entry of the right length, overwrite it
|
|
try file.pwriteAll(data, self.section.offset + Section.header_size + dead_entry.offset);
|
|
_ = self.dead_list.swapRemove(i);
|
|
return dead_idx;
|
|
}
|
|
}
|
|
|
|
// TODO: We can be more efficient if we special-case one or
|
|
// more consecutive dead entries at the end of the vector.
|
|
|
|
// We failed to find a dead entry to reuse, so write the new
|
|
// entry to the end of the section.
|
|
try self.section.resize(file, self.contents_size, self.contents_size + @intCast(u32, data.len));
|
|
try file.pwriteAll(data, self.section.offset + Section.header_size + self.contents_size);
|
|
try self.entries.append(allocator, .{
|
|
.offset = self.contents_size,
|
|
.size = @intCast(u32, data.len),
|
|
});
|
|
self.contents_size += @intCast(u32, data.len);
|
|
// Make sure the dead list always has enough space to store all free'd
|
|
// entries. This makes it so that delEntry() cannot fail.
|
|
// TODO: figure out a better way that doesn't waste as much memory
|
|
try self.dead_list.ensureCapacity(allocator, self.entries.items.len);
|
|
|
|
// Update the size in the section header and the item count of
|
|
// the contents vector.
|
|
var size_and_count: [10]u8 = undefined;
|
|
leb.writeUnsignedFixed(5, size_and_count[0..5], self.contents_size);
|
|
leb.writeUnsignedFixed(5, size_and_count[5..], @intCast(u32, self.entries.items.len));
|
|
try file.pwriteAll(&size_and_count, self.section.offset + 1);
|
|
|
|
return @intCast(u32, self.entries.items.len - 1);
|
|
}
|
|
|
|
/// Mark the type referenced by the given index as dead.
|
|
fn delEntry(self: *VecSection, index: u32) void {
|
|
self.dead_list.appendAssumeCapacity(index);
|
|
}
|
|
};
|
|
|
|
const Types = struct {
|
|
typesec: VecSection,
|
|
|
|
fn init(file: fs.File, offset: u64, initial_size: u64) !Types {
|
|
return Types{ .typesec = try VecSection.init(spec.types_id, file, offset, initial_size) };
|
|
}
|
|
|
|
fn deinit(self: *Types) void {
|
|
const wasm = @fieldParentPtr(Wasm, "types", self);
|
|
self.typesec.deinit(wasm.base.allocator);
|
|
}
|
|
|
|
fn new(self: *Types, data: []const u8) !u32 {
|
|
const wasm = @fieldParentPtr(Wasm, "types", self);
|
|
return self.typesec.addEntry(wasm.base.file.?, wasm.base.allocator, data);
|
|
}
|
|
|
|
fn free(self: *Types, typeidx: u32) void {
|
|
self.typesec.delEntry(typeidx);
|
|
}
|
|
};
|
|
|
|
const Funcs = struct {
|
|
/// This section needs special handling to keep the indexes matching with
|
|
/// the codesec, so we cant just use a VecSection.
|
|
funcsec: Section,
|
|
/// The typeidx stored for each function, indexed by funcidx.
|
|
func_types: std.ArrayListUnmanaged(u32) = std.ArrayListUnmanaged(u32){},
|
|
codesec: VecSection,
|
|
|
|
fn init(file: fs.File, funcs_offset: u64, funcs_size: u64, code_offset: u64, code_size: u64) !Funcs {
|
|
return Funcs{
|
|
.funcsec = (try VecSection.init(spec.funcs_id, file, funcs_offset, funcs_size)).section,
|
|
.codesec = try VecSection.init(spec.code_id, file, code_offset, code_size),
|
|
};
|
|
}
|
|
|
|
fn deinit(self: *Funcs) void {
|
|
const wasm = @fieldParentPtr(Wasm, "funcs", self);
|
|
self.func_types.deinit(wasm.base.allocator);
|
|
self.codesec.deinit(wasm.base.allocator);
|
|
}
|
|
|
|
/// Add a new function to the binary, first finding space for and writing
|
|
/// the code then writing the typeidx to the corresponding index in the
|
|
/// funcsec. Returns the function index used.
|
|
fn new(self: *Funcs, typeidx: u32, code: []const u8) !u32 {
|
|
const wasm = @fieldParentPtr(Wasm, "funcs", self);
|
|
const file = wasm.base.file.?;
|
|
const allocator = wasm.base.allocator;
|
|
|
|
assert(self.func_types.items.len == self.codesec.entries.items.len);
|
|
|
|
// TODO: consider nop-padding the code if there is a close but not perfect fit
|
|
const funcidx = try self.codesec.addEntry(file, allocator, code);
|
|
|
|
if (self.func_types.items.len < self.codesec.entries.items.len) {
|
|
// u32 vector length + funcs_count u32s in the vector
|
|
const current = 5 + @intCast(u32, self.func_types.items.len) * 5;
|
|
try self.funcsec.resize(file, current, current + 5);
|
|
try self.func_types.append(allocator, typeidx);
|
|
|
|
// Update the size in the section header and the item count of
|
|
// the contents vector.
|
|
const count = @intCast(u32, self.func_types.items.len);
|
|
var size_and_count: [10]u8 = undefined;
|
|
leb.writeUnsignedFixed(5, size_and_count[0..5], 5 + count * 5);
|
|
leb.writeUnsignedFixed(5, size_and_count[5..], count);
|
|
try file.pwriteAll(&size_and_count, self.funcsec.offset + 1);
|
|
} else {
|
|
// We are overwriting a dead function and may now free the type
|
|
wasm.types.free(self.func_types.items[funcidx]);
|
|
}
|
|
|
|
assert(self.func_types.items.len == self.codesec.entries.items.len);
|
|
|
|
var typeidx_leb: [5]u8 = undefined;
|
|
leb.writeUnsignedFixed(5, &typeidx_leb, typeidx);
|
|
try file.pwriteAll(&typeidx_leb, self.funcsec.offset + Section.header_size + 5 + funcidx * 5);
|
|
|
|
return funcidx;
|
|
}
|
|
|
|
fn free(self: *Funcs, funcidx: u32) void {
|
|
self.codesec.delEntry(funcidx);
|
|
}
|
|
};
|
|
|
|
/// Exports are tricky. We can't leave dead entries in the binary as they
|
|
/// would obviously be visible from the execution environment. The simplest
|
|
/// way to work around this is to re-emit the export section whenever
|
|
/// something changes. This also makes it easier to ensure exported function
|
|
/// and global indexes are updated as they change.
|
|
const Exports = struct {
|
|
exportsec: Section,
|
|
/// Size in bytes of the contents of the section. Does not include
|
|
/// the "header" containing the section id and this value.
|
|
contents_size: u32,
|
|
/// If this is true, then exports will be rewritten on flush()
|
|
dirty: bool,
|
|
|
|
fn init(file: fs.File, offset: u64, initial_size: u64) !Exports {
|
|
return Exports{
|
|
.exportsec = (try VecSection.init(spec.exports_id, file, offset, initial_size)).section,
|
|
.contents_size = 5,
|
|
.dirty = false,
|
|
};
|
|
}
|
|
|
|
fn writeAll(self: *Exports, module: *Module) !void {
|
|
const wasm = @fieldParentPtr(Wasm, "exports", self);
|
|
const file = wasm.base.file.?;
|
|
var buf: [5]u8 = undefined;
|
|
|
|
// First ensure the section is the right size
|
|
var export_count: u32 = 0;
|
|
var new_contents_size: u32 = 5;
|
|
for (module.decl_exports.entries.items) |entry| {
|
|
for (entry.value) |e| {
|
|
export_count += 1;
|
|
new_contents_size += calcSize(e);
|
|
}
|
|
}
|
|
if (new_contents_size != self.contents_size) {
|
|
try self.exportsec.resize(file, self.contents_size, new_contents_size);
|
|
leb.writeUnsignedFixed(5, &buf, new_contents_size);
|
|
try file.pwriteAll(&buf, self.exportsec.offset + 1);
|
|
}
|
|
|
|
try file.seekTo(self.exportsec.offset + Section.header_size);
|
|
const writer = file.writer();
|
|
|
|
// Length of the exports vec
|
|
leb.writeUnsignedFixed(5, &buf, export_count);
|
|
try writer.writeAll(&buf);
|
|
|
|
for (module.decl_exports.entries.items) |entry|
|
|
for (entry.value) |e| try writeExport(writer, e);
|
|
|
|
self.dirty = false;
|
|
}
|
|
|
|
/// Return the total number of bytes an export will take.
|
|
/// TODO: fixed-width LEB128 is currently used for simplicity, but should
|
|
/// be replaced with proper variable-length LEB128 as it is inefficient.
|
|
fn calcSize(e: *Module.Export) u32 {
|
|
// LEB128 name length + name bytes + export type + LEB128 index
|
|
return 5 + @intCast(u32, e.options.name.len) + 1 + 5;
|
|
}
|
|
|
|
/// Write the data for a single export to the given file at a given offset.
|
|
/// TODO: fixed-width LEB128 is currently used for simplicity, but should
|
|
/// be replaced with proper variable-length LEB128 as it is inefficient.
|
|
fn writeExport(writer: anytype, e: *Module.Export) !void {
|
|
var buf: [5]u8 = undefined;
|
|
|
|
// Export name length + name
|
|
leb.writeUnsignedFixed(5, &buf, @intCast(u32, e.options.name.len));
|
|
try writer.writeAll(&buf);
|
|
try writer.writeAll(e.options.name);
|
|
|
|
switch (e.exported_decl.typed_value.most_recent.typed_value.ty.zigTypeTag()) {
|
|
.Fn => {
|
|
// Type of the export
|
|
try writer.writeByte(0x00);
|
|
// Exported function index
|
|
leb.writeUnsignedFixed(5, &buf, e.exported_decl.fn_link.wasm.?.funcidx);
|
|
try writer.writeAll(&buf);
|
|
},
|
|
else => return error.TODOImplementNonFnDeclsForWasm,
|
|
}
|
|
}
|
|
};
|