wasm-linker: Upstream zwld into stage2

- Converts previous `DeclBlock` into `Atom`'s to also make them compatible when
the rest of zlwd gets upstreamed and we can link with other object files.
- Resolves function signatures and removes any duplicates, saving us a lot of
potential bytes for larger projects.
- We now create symbols for each decl of the respective type
- We can now (but not implemented yet) perform proper relocations.
- Having symbols and segment_info allows us to create an object file
for wasm.
This commit is contained in:
Luuk de Gram 2021-11-24 19:09:37 +01:00
parent 17f057c556
commit f56ae69edd
No known key found for this signature in database
GPG Key ID: A8CFE58E4DC7D664
7 changed files with 1077 additions and 331 deletions

View File

@ -1,4 +1,8 @@
const testing = @import("std.zig").testing; ///! Contains all constants and types representing the wasm
///! binary format, as specified by:
///! https://webassembly.github.io/spec/core/
const std = @import("std.zig");
const testing = std.testing;
// TODO: Add support for multi-byte ops (e.g. table operations) // TODO: Add support for multi-byte ops (e.g. table operations)
@ -222,6 +226,18 @@ pub fn valtype(value: Valtype) u8 {
return @enumToInt(value); return @enumToInt(value);
} }
/// Reference types, where the funcref references to a function regardless of its type
/// and ref references an object from the embedder.
pub const RefType = enum(u8) {
funcref = 0x70,
externref = 0x6F,
};
/// Returns the integer value of a `Reftype`
pub fn reftype(value: RefType) u8 {
return @enumToInt(value);
}
test "Wasm - valtypes" { test "Wasm - valtypes" {
const _i32 = valtype(.i32); const _i32 = valtype(.i32);
const _i64 = valtype(.i64); const _i64 = valtype(.i64);
@ -234,6 +250,124 @@ test "Wasm - valtypes" {
try testing.expectEqual(@as(u8, 0x7C), _f64); try testing.expectEqual(@as(u8, 0x7C), _f64);
} }
/// Limits classify the size range of resizeable storage associated with memory types and table types.
pub const Limits = struct {
min: u32,
max: ?u32,
};
/// Initialization expressions are used to set the initial value on an object
/// when a wasm module is being loaded.
pub const InitExpression = union(enum) {
i32_const: i32,
i64_const: i64,
f32_const: f32,
f64_const: f64,
global_get: u32,
};
///
pub const Func = struct {
type_index: u32,
};
/// Tables are used to hold pointers to opaque objects.
/// This can either by any function, or an object from the host.
pub const Table = struct {
limits: Limits,
reftype: RefType,
};
/// Describes the layout of the memory where `min` represents
/// the minimal amount of pages, and the optional `max` represents
/// the max pages. When `null` will allow the host to determine the
/// amount of pages.
pub const Memory = struct {
limits: Limits,
};
/// Represents the type of a `Global` or an imported global.
pub const GlobalType = struct {
valtype: Valtype,
mutable: bool,
};
pub const Global = struct {
global_type: GlobalType,
init: InitExpression,
};
/// Notates an object to be exported from wasm
/// to the host.
pub const Export = struct {
name: []const u8,
kind: ExternalKind,
index: u32,
};
/// Element describes the layout of the table that can
/// be found at `table_index`
pub const Element = struct {
table_index: u32,
offset: InitExpression,
func_indexes: []const u32,
};
/// Imports are used to import objects from the host
pub const Import = struct {
module_name: []const u8,
name: []const u8,
kind: Kind,
pub const Kind = union(ExternalKind) {
function: u32,
table: Table,
memory: Limits,
global: GlobalType,
};
};
/// `Type` represents a function signature type containing both
/// a slice of parameters as well as a slice of return values.
pub const Type = struct {
params: []const Valtype,
returns: []const Valtype,
pub fn format(self: Type, comptime fmt: []const u8, opt: std.fmt.FormatOptions, writer: anytype) !void {
_ = fmt;
_ = opt;
try writer.writeByte('(');
for (self.params) |param, i| {
try writer.print("{s}", .{@tagName(param)});
if (i + 1 != self.params.len) {
try writer.writeAll(", ");
}
}
try writer.writeAll(") -> ");
if (self.returns.len == 0) {
try writer.writeAll("nil");
} else {
for (self.returns) |return_ty, i| {
try writer.print("{s}", .{@tagName(return_ty)});
if (i + 1 != self.returns.len) {
try writer.writeAll(", ");
}
}
}
}
pub fn eql(self: Type, other: Type) bool {
return std.mem.eql(Valtype, self.params, other.params) and
std.mem.eql(Valtype, self.returns, other.returns);
}
pub fn deinit(self: *Type, gpa: *std.mem.Allocator) void {
gpa.free(self.params);
gpa.free(self.returns);
self.* = undefined;
}
};
/// Wasm module sections as per spec: /// Wasm module sections as per spec:
/// https://webassembly.github.io/spec/core/binary/modules.html /// https://webassembly.github.io/spec/core/binary/modules.html
pub const Section = enum(u8) { pub const Section = enum(u8) {
@ -249,6 +383,8 @@ pub const Section = enum(u8) {
element, element,
code, code,
data, data,
data_count,
_,
}; };
/// Returns the integer value of a given `Section` /// Returns the integer value of a given `Section`
@ -270,7 +406,7 @@ pub fn externalKind(val: ExternalKind) u8 {
return @enumToInt(val); return @enumToInt(val);
} }
// types // type constants
pub const element_type: u8 = 0x70; pub const element_type: u8 = 0x70;
pub const function_type: u8 = 0x60; pub const function_type: u8 = 0x60;
pub const result_type: u8 = 0x40; pub const result_type: u8 = 0x40;
@ -280,7 +416,7 @@ pub const block_empty: u8 = 0x40;
// binary constants // binary constants
pub const magic = [_]u8{ 0x00, 0x61, 0x73, 0x6D }; // \0asm pub const magic = [_]u8{ 0x00, 0x61, 0x73, 0x6D }; // \0asm
pub const version = [_]u8{ 0x01, 0x00, 0x00, 0x00 }; // version 1 pub const version = [_]u8{ 0x01, 0x00, 0x00, 0x00 }; // version 1 (MVP)
// Each wasm page size is 64kB // Each wasm page size is 64kB
pub const page_size = 64 * 1024; pub const page_size = 64 * 1024;

View File

@ -518,9 +518,6 @@ blocks: std.AutoArrayHashMapUnmanaged(Air.Inst.Index, struct {
}) = .{}, }) = .{},
/// `bytes` contains the wasm bytecode belonging to the 'code' section. /// `bytes` contains the wasm bytecode belonging to the 'code' section.
code: ArrayList(u8), code: ArrayList(u8),
/// Contains the generated function type bytecode for the current function
/// found in `decl`
func_type_data: ArrayList(u8),
/// The index the next local generated will have /// The index the next local generated will have
/// NOTE: arguments share the index with locals therefore the first variable /// NOTE: arguments share the index with locals therefore the first variable
/// will have the index that comes after the last argument's index /// will have the index that comes after the last argument's index
@ -539,7 +536,7 @@ locals: std.ArrayListUnmanaged(u8),
/// The Target we're emitting (used to call intInfo) /// The Target we're emitting (used to call intInfo)
target: std.Target, target: std.Target,
/// Represents the wasm binary file that is being linked. /// Represents the wasm binary file that is being linked.
bin_file: *link.File, bin_file: *link.File.Wasm,
/// Table with the global error set. Consists of every error found in /// Table with the global error set. Consists of every error found in
/// the compiled code. Each error name maps to a `Module.ErrorInt` which is emitted /// the compiled code. Each error name maps to a `Module.ErrorInt` which is emitted
/// during codegen to determine the error value. /// during codegen to determine the error value.
@ -577,6 +574,7 @@ pub fn deinit(self: *Self) void {
self.locals.deinit(self.gpa); self.locals.deinit(self.gpa);
self.mir_instructions.deinit(self.gpa); self.mir_instructions.deinit(self.gpa);
self.mir_extra.deinit(self.gpa); self.mir_extra.deinit(self.gpa);
self.code.deinit();
self.* = undefined; self.* = undefined;
} }
@ -734,43 +732,44 @@ fn allocLocal(self: *Self, ty: Type) InnerError!WValue {
return WValue{ .local = initial_index }; return WValue{ .local = initial_index };
} }
fn genFunctype(self: *Self) InnerError!void { /// Generates a `wasm.Type` from a given function type.
assert(self.decl.has_tv); /// Memory is owned by the caller.
const ty = self.decl.ty; fn genFunctype(self: *Self, fn_ty: Type) !wasm.Type {
const writer = self.func_type_data.writer(); var params = std.ArrayList(wasm.Valtype).init(self.gpa);
defer params.deinit();
try writer.writeByte(wasm.function_type); var returns = std.ArrayList(wasm.Valtype).init(self.gpa);
defer returns.deinit();
// param types // param types
try leb.writeULEB128(writer, @intCast(u32, ty.fnParamLen())); if (fn_ty.fnParamLen() != 0) {
if (ty.fnParamLen() != 0) { const fn_params = try self.gpa.alloc(Type, fn_ty.fnParamLen());
const params = try self.gpa.alloc(Type, ty.fnParamLen()); defer self.gpa.free(fn_params);
defer self.gpa.free(params); fn_ty.fnParamTypes(fn_params);
ty.fnParamTypes(params); for (fn_params) |param_type| {
for (params) |param_type| { if (!param_type.hasCodeGenBits()) continue;
// Can we maybe get the source index of each param? try params.append(try self.typeToValtype(param_type));
const val_type = try self.genValtype(param_type);
try writer.writeByte(val_type);
} }
} }
// return type // return type
const return_type = ty.fnReturnType(); const return_type = fn_ty.fnReturnType();
switch (return_type.zigTypeTag()) { switch (return_type.zigTypeTag()) {
.Void, .NoReturn => try leb.writeULEB128(writer, @as(u32, 0)), .Void, .NoReturn => {},
.Struct => return self.fail("TODO: Implement struct as return type for wasm", .{}), .Struct => return self.fail("TODO: Implement struct as return type for wasm", .{}),
.Optional => return self.fail("TODO: Implement optionals as return type for wasm", .{}), .Optional => return self.fail("TODO: Implement optionals as return type for wasm", .{}),
else => { else => try returns.append(try self.typeToValtype(return_type)),
try leb.writeULEB128(writer, @as(u32, 1));
const val_type = try self.genValtype(return_type);
try writer.writeByte(val_type);
},
} }
return wasm.Type{
.params = params.toOwnedSlice(),
.returns = returns.toOwnedSlice(),
};
} }
pub fn genFunc(self: *Self) InnerError!Result { pub fn genFunc(self: *Self) InnerError!Result {
try self.genFunctype(); var func_type = try self.genFunctype(self.decl.ty);
// TODO: check for and handle death of instructions defer func_type.deinit(self.gpa);
self.decl.fn_link.wasm.type_index = try self.bin_file.putOrGetFuncType(func_type);
var cc_result = try self.resolveCallingConventionValues(self.decl.ty); var cc_result = try self.resolveCallingConventionValues(self.decl.ty);
defer cc_result.deinit(self.gpa); defer cc_result.deinit(self.gpa);
@ -791,7 +790,7 @@ pub fn genFunc(self: *Self) InnerError!Result {
var emit: Emit = .{ var emit: Emit = .{
.mir = mir, .mir = mir,
.bin_file = self.bin_file, .bin_file = &self.bin_file.base,
.code = &self.code, .code = &self.code,
.locals = self.locals.items, .locals = self.locals.items,
.decl = self.decl, .decl = self.decl,
@ -813,8 +812,10 @@ pub fn genFunc(self: *Self) InnerError!Result {
pub fn gen(self: *Self, ty: Type, val: Value) InnerError!Result { pub fn gen(self: *Self, ty: Type, val: Value) InnerError!Result {
switch (ty.zigTypeTag()) { switch (ty.zigTypeTag()) {
.Fn => { .Fn => {
try self.genFunctype();
if (val.tag() == .extern_fn) { if (val.tag() == .extern_fn) {
var func_type = try self.genFunctype(self.decl.ty);
defer func_type.deinit(self.gpa);
self.decl.fn_link.wasm.type_index = try self.bin_file.putOrGetFuncType(func_type);
return Result.appended; // don't need code body for extern functions return Result.appended; // don't need code body for extern functions
} }
return self.fail("TODO implement wasm codegen for function pointers", .{}); return self.fail("TODO implement wasm codegen for function pointers", .{});
@ -1079,7 +1080,7 @@ fn airCall(self: *Self, inst: Air.Inst.Index) InnerError!WValue {
try self.emitWValue(arg_val); try self.emitWValue(arg_val);
} }
try self.addLabel(.call, target.link.wasm.symbol_index); try self.addLabel(.call, target.link.wasm.sym_index);
const ret_ty = target.ty.fnReturnType(); const ret_ty = target.ty.fnReturnType();
switch (ret_ty.zigTypeTag()) { switch (ret_ty.zigTypeTag()) {
@ -1364,13 +1365,14 @@ fn emitConstant(self: *Self, val: Value, ty: Type) InnerError!void {
decl.alive = true; decl.alive = true;
// offset into the offset table within the 'data' section // offset into the offset table within the 'data' section
const ptr_width = self.target.cpu.arch.ptrBitWidth() / 8; // const ptr_width = self.target.cpu.arch.ptrBitWidth() / 8;
try self.addImm32(@bitCast(i32, decl.link.wasm.offset_index * ptr_width)); // try self.addImm32(@bitCast(i32, decl.link.wasm.offset_index * ptr_width));
// memory instruction followed by their memarg immediate // memory instruction followed by their memarg immediate
// memarg ::== x:u32, y:u32 => {align x, offset y} // memarg ::== x:u32, y:u32 => {align x, offset y}
const extra_index = try self.addExtra(Mir.MemArg{ .offset = 0, .alignment = 4 }); const extra_index = try self.addExtra(Mir.MemArg{ .offset = 0, .alignment = 4 });
try self.addInst(.{ .tag = .i32_load, .data = .{ .payload = extra_index } }); try self.addInst(.{ .tag = .i32_load, .data = .{ .payload = extra_index } });
@panic("REDO!\n");
} else return self.fail("Wasm TODO: emitConstant for other const pointer tag {s}", .{val.tag()}); } else return self.fail("Wasm TODO: emitConstant for other const pointer tag {s}", .{val.tag()});
}, },
.Void => {}, .Void => {},

View File

@ -29,8 +29,6 @@ const InnerError = error{
pub fn emitMir(emit: *Emit) InnerError!void { pub fn emitMir(emit: *Emit) InnerError!void {
const mir_tags = emit.mir.instructions.items(.tag); const mir_tags = emit.mir.instructions.items(.tag);
// Reserve space to write the size after generating the code.
try emit.code.resize(5);
// write the locals in the prologue of the function body // write the locals in the prologue of the function body
// before we emit the function body when lowering MIR // before we emit the function body when lowering MIR
try emit.emitLocals(); try emit.emitLocals();
@ -157,11 +155,6 @@ pub fn emitMir(emit: *Emit) InnerError!void {
.i64_extend32_s => try emit.emitTag(tag), .i64_extend32_s => try emit.emitTag(tag),
} }
} }
// Fill in the size of the generated code to the reserved space at the
// beginning of the buffer.
const size = emit.code.items.len - 5;
leb128.writeUnsignedFixed(5, emit.code.items[0..5], @intCast(u32, size));
} }
fn fail(emit: *Emit, comptime format: []const u8, args: anytype) InnerError { fn fail(emit: *Emit, comptime format: []const u8, args: anytype) InnerError {
@ -269,8 +262,9 @@ fn emitCall(emit: *Emit, inst: Mir.Inst.Index) !void {
// The function index immediate argument will be filled in using this data // The function index immediate argument will be filled in using this data
// in link.Wasm.flush(). // in link.Wasm.flush().
// TODO: Replace this with proper relocations saved in the Atom. // TODO: Replace this with proper relocations saved in the Atom.
try emit.decl.fn_link.wasm.idx_refs.append(emit.bin_file.allocator, .{ try emit.decl.link.wasm.relocs.append(emit.bin_file.allocator, .{
.offset = offset, .offset = offset,
.decl = label, .index = label,
.relocation_type = .R_WASM_FUNCTION_INDEX_LEB,
}); });
} }

View File

@ -10,6 +10,7 @@ const leb = std.leb;
const log = std.log.scoped(.link); const log = std.log.scoped(.link);
const wasm = std.wasm; const wasm = std.wasm;
const Atom = @import("Wasm/Atom.zig");
const Module = @import("../Module.zig"); const Module = @import("../Module.zig");
const Compilation = @import("../Compilation.zig"); const Compilation = @import("../Compilation.zig");
const CodeGen = @import("../arch/wasm/CodeGen.zig"); const CodeGen = @import("../arch/wasm/CodeGen.zig");
@ -22,99 +23,78 @@ const TypedValue = @import("../TypedValue.zig");
const LlvmObject = @import("../codegen/llvm.zig").Object; const LlvmObject = @import("../codegen/llvm.zig").Object;
const Air = @import("../Air.zig"); const Air = @import("../Air.zig");
const Liveness = @import("../Liveness.zig"); const Liveness = @import("../Liveness.zig");
const Symbol = @import("Wasm/Symbol.zig");
const types = @import("Wasm/types.zig");
pub const base_tag = link.File.Tag.wasm; pub const base_tag = link.File.Tag.wasm;
/// deprecated: Use `@import("Wasm/Atom.zig");`
pub const DeclBlock = Atom;
base: link.File, base: link.File,
/// If this is not null, an object file is created by LLVM and linked with LLD afterwards. /// If this is not null, an object file is created by LLVM and linked with LLD afterwards.
llvm_object: ?*LlvmObject = null, llvm_object: ?*LlvmObject = null,
/// List of all function Decls to be written to the output file. The index of
/// each Decl in this list at the time of writing the binary is used as the
/// function index. In the event where ext_funcs' size is not 0, the index of
/// each function is added on top of the ext_funcs' length.
/// TODO: can/should we access some data structure in Module directly?
funcs: std.ArrayListUnmanaged(*Module.Decl) = .{},
/// List of all extern function Decls to be written to the `import` section of the
/// wasm binary. The position in the list defines the function index
ext_funcs: std.ArrayListUnmanaged(*Module.Decl) = .{},
/// When importing objects from the host environment, a name must be supplied. /// When importing objects from the host environment, a name must be supplied.
/// LLVM uses "env" by default when none is given. This would be a good default for Zig /// LLVM uses "env" by default when none is given. This would be a good default for Zig
/// to support existing code. /// to support existing code.
/// TODO: Allow setting this through a flag? /// TODO: Allow setting this through a flag?
host_name: []const u8 = "env", host_name: []const u8 = "env",
/// The last `DeclBlock` that was initialized will be saved here. /// The last `DeclBlock` that was initialized will be saved here.
last_block: ?*DeclBlock = null, last_atom: ?*Atom = null,
/// Table with offsets, each element represents an offset with the value being
/// the offset into the 'data' section where the data lives
offset_table: std.ArrayListUnmanaged(u32) = .{},
/// List of offset indexes which are free to be used for new decl's.
/// Each element's value points to an index into the offset_table.
offset_table_free_list: std.ArrayListUnmanaged(u32) = .{},
/// List of all `Decl` that are currently alive. /// List of all `Decl` that are currently alive.
/// This is ment for bookkeeping so we can safely cleanup all codegen memory /// This is ment for bookkeeping so we can safely cleanup all codegen memory
/// when calling `deinit` /// when calling `deinit`
symbols: std.ArrayListUnmanaged(*Module.Decl) = .{}, decls: std.AutoHashMapUnmanaged(*Module.Decl, void) = .{},
/// List of all symbols.
symbols: std.ArrayListUnmanaged(Symbol) = .{},
/// List of symbol indexes which are free to be used. /// List of symbol indexes which are free to be used.
symbols_free_list: std.ArrayListUnmanaged(u32) = .{}, symbols_free_list: std.ArrayListUnmanaged(u32) = .{},
/// Maps atoms to their segment index
atoms: std.AutoHashMapUnmanaged(u32, *Atom) = .{},
/// Represents the index into `segments` where the 'code' section
/// lives.
code_section_index: ?u32 = null,
/// The count of imported functions. This number will be appended
/// to the function indexes as their index starts at the lowest non-extern function.
imported_functions_count: u32 = 0,
/// List of all 'extern' declarations
imports: std.ArrayListUnmanaged(wasm.Import) = .{},
/// List of indexes of symbols representing extern declarations.
import_symbols: std.ArrayListUnmanaged(u32) = .{},
/// Represents non-synthetic section entries.
/// Used for code, data and custom sections.
segments: std.ArrayListUnmanaged(Segment) = .{},
/// Maps a data segment key (such as .rodata) to the index into `segments`.
data_segments: std.StringArrayHashMapUnmanaged(u32) = .{},
/// A list of `types.Segment` which provide meta data
/// about a data symbol such as its name
segment_info: std.ArrayListUnmanaged(types.Segment) = .{},
pub const FnData = struct { // Output sections
/// Generated code for the type of the function /// Output type section
functype: std.ArrayListUnmanaged(u8), func_types: std.ArrayListUnmanaged(wasm.Type) = .{},
/// Generated code for the body of the function /// Output function section
code: std.ArrayListUnmanaged(u8), functions: std.ArrayListUnmanaged(wasm.Func) = .{},
/// Locations in the generated code where function indexes must be filled in. /// Output global section
/// This must be kept ordered by offset. globals: std.ArrayListUnmanaged(wasm.Global) = .{},
/// `decl` is the symbol_index of the target.
idx_refs: std.ArrayListUnmanaged(struct { offset: u32, decl: u32 }),
pub const empty: FnData = .{ /// Indirect function table, used to call function pointers
.functype = .{}, /// When this is non-zero, we must emit a table entry,
.code = .{}, /// as well as an 'elements' section.
.idx_refs = .{}, function_table: std.ArrayListUnmanaged(Symbol) = .{},
};
pub const Segment = struct {
alignment: u32,
size: u32,
offset: u32,
}; };
pub const DeclBlock = struct { pub const FnData = struct {
/// Determines whether the `DeclBlock` has been initialized for codegen. type_index: u32,
init: bool,
/// Index into the `symbols` list.
symbol_index: u32,
/// Index into the offset table
offset_index: u32,
/// The size of the block and how large part of the data section it occupies.
/// Will be 0 when the Decl will not live inside the data section and `data` will be undefined.
size: u32,
/// Points to the previous and next blocks.
/// Can be used to find the total size, and used to calculate the `offset` based on the previous block.
prev: ?*DeclBlock,
next: ?*DeclBlock,
/// Pointer to data that will be written to the 'data' section.
/// This data either lives in `FnData.code` or is externally managed.
/// For data that does not live inside the 'data' section, this field will be undefined. (size == 0).
data: [*]const u8,
pub const empty: DeclBlock = .{ pub const empty: FnData = .{
.init = false, .type_index = undefined,
.symbol_index = 0,
.offset_index = 0,
.size = 0,
.prev = null,
.next = null,
.data = undefined,
}; };
/// Unplugs the `DeclBlock` from the chain
fn unplug(self: *DeclBlock) void {
if (self.prev) |prev| {
prev.next = self.next;
}
if (self.next) |next| {
next.prev = self.prev;
}
self.next = null;
self.prev = null;
}
}; };
pub fn openPath(allocator: *Allocator, sub_path: []const u8, options: link.Options) !*Wasm { pub fn openPath(allocator: *Allocator, sub_path: []const u8, options: link.Options) !*Wasm {
@ -139,6 +119,22 @@ pub fn openPath(allocator: *Allocator, sub_path: []const u8, options: link.Optio
try file.writeAll(&(wasm.magic ++ wasm.version)); try file.writeAll(&(wasm.magic ++ wasm.version));
// As sym_index '0' is reserved, we use it for our stack pointer symbol
const global = try wasm_bin.globals.addOne(allocator);
global.* = .{
.global_type = .{
.valtype = .i32,
.mutable = true,
},
.init = .{ .i32_const = 0 },
};
const symbol = try wasm_bin.symbols.addOne(allocator);
symbol.* = .{
.name = "__stack_pointer",
.tag = .global,
.flags = 0,
.index = 0,
};
return wasm_bin; return wasm_bin;
} }
@ -160,63 +156,58 @@ pub fn deinit(self: *Wasm) void {
if (self.llvm_object) |llvm_object| llvm_object.destroy(self.base.allocator); if (self.llvm_object) |llvm_object| llvm_object.destroy(self.base.allocator);
} }
for (self.symbols.items) |decl, symbol_index| { var decl_it = self.decls.keyIterator();
// Check if we already freed all memory for the symbol while (decl_it.next()) |decl_ptr| {
// TODO: Audit this when we refactor the linker. const decl = decl_ptr.*;
var already_freed = false; decl.link.wasm.deinit(self.base.allocator);
for (self.symbols_free_list.items) |index| {
if (symbol_index == index) {
already_freed = true;
break;
}
}
if (already_freed) continue;
decl.fn_link.wasm.functype.deinit(self.base.allocator);
decl.fn_link.wasm.code.deinit(self.base.allocator);
decl.fn_link.wasm.idx_refs.deinit(self.base.allocator);
} }
self.funcs.deinit(self.base.allocator); for (self.func_types.items) |func_type| {
self.ext_funcs.deinit(self.base.allocator); self.base.allocator.free(func_type.params);
self.offset_table.deinit(self.base.allocator); self.base.allocator.free(func_type.returns);
self.offset_table_free_list.deinit(self.base.allocator); }
for (self.segment_info.items) |segment_info| {
self.base.allocator.free(segment_info.name);
}
self.decls.deinit(self.base.allocator);
self.symbols.deinit(self.base.allocator); self.symbols.deinit(self.base.allocator);
self.symbols_free_list.deinit(self.base.allocator); self.symbols_free_list.deinit(self.base.allocator);
self.atoms.deinit(self.base.allocator);
self.segments.deinit(self.base.allocator);
self.data_segments.deinit(self.base.allocator);
self.segment_info.deinit(self.base.allocator);
// free output sections
self.imports.deinit(self.base.allocator);
self.import_symbols.deinit(self.base.allocator);
self.func_types.deinit(self.base.allocator);
self.functions.deinit(self.base.allocator);
self.globals.deinit(self.base.allocator);
self.function_table.deinit(self.base.allocator);
} }
pub fn allocateDeclIndexes(self: *Wasm, decl: *Module.Decl) !void { pub fn allocateDeclIndexes(self: *Wasm, decl: *Module.Decl) !void {
if (decl.link.wasm.init) return; if (decl.link.wasm.sym_index != 0) return;
try self.offset_table.ensureUnusedCapacity(self.base.allocator, 1);
try self.symbols.ensureUnusedCapacity(self.base.allocator, 1); try self.symbols.ensureUnusedCapacity(self.base.allocator, 1);
try self.decls.putNoClobber(self.base.allocator, decl, {});
const block = &decl.link.wasm; const atom = &decl.link.wasm;
block.init = true;
if (self.offset_table_free_list.popOrNull()) |index| { var symbol: Symbol = .{
block.offset_index = index; .name = undefined, // will be set after updateDecl
} else { .flags = 0,
block.offset_index = @intCast(u32, self.offset_table.items.len); .tag = undefined, // will be set after updateDecl
_ = self.offset_table.addOneAssumeCapacity(); .index = undefined, // will be set after updateDecl
} };
if (self.symbols_free_list.popOrNull()) |index| { if (self.symbols_free_list.popOrNull()) |index| {
block.symbol_index = index; atom.sym_index = index;
self.symbols.items[block.symbol_index] = decl; self.symbols.items[index] = symbol;
} else { } else {
block.symbol_index = @intCast(u32, self.symbols.items.len); atom.sym_index = @intCast(u32, self.symbols.items.len);
self.symbols.appendAssumeCapacity(decl); self.symbols.appendAssumeCapacity(symbol);
}
self.offset_table.items[block.offset_index] = 0;
if (decl.ty.zigTypeTag() == .Fn) {
switch (decl.val.tag()) {
// dependent on function type, appends it to the correct list
.function => try self.funcs.append(self.base.allocator, decl),
.extern_fn => try self.ext_funcs.append(self.base.allocator, decl),
else => unreachable,
}
} }
} }
@ -228,25 +219,19 @@ pub fn updateFunc(self: *Wasm, module: *Module, func: *Module.Fn, air: Air, live
if (self.llvm_object) |llvm_object| return llvm_object.updateFunc(module, func, air, liveness); if (self.llvm_object) |llvm_object| return llvm_object.updateFunc(module, func, air, liveness);
} }
const decl = func.owner_decl; const decl = func.owner_decl;
assert(decl.link.wasm.init); // Must call allocateDeclIndexes() assert(decl.link.wasm.sym_index != 0); // Must call allocateDeclIndexes()
const fn_data = &decl.fn_link.wasm;
fn_data.functype.items.len = 0;
fn_data.code.items.len = 0;
fn_data.idx_refs.items.len = 0;
var codegen: CodeGen = .{ var codegen: CodeGen = .{
.gpa = self.base.allocator, .gpa = self.base.allocator,
.air = air, .air = air,
.liveness = liveness, .liveness = liveness,
.values = .{}, .values = .{},
.code = fn_data.code.toManaged(self.base.allocator), .code = std.ArrayList(u8).init(self.base.allocator),
.func_type_data = fn_data.functype.toManaged(self.base.allocator),
.decl = decl, .decl = decl,
.err_msg = undefined, .err_msg = undefined,
.locals = .{}, .locals = .{},
.target = self.base.options.target, .target = self.base.options.target,
.bin_file = &self.base, .bin_file = self,
.global_error_set = self.base.options.module.?.global_error_set, .global_error_set = self.base.options.module.?.global_error_set,
}; };
defer codegen.deinit(); defer codegen.deinit();
@ -272,26 +257,19 @@ pub fn updateDecl(self: *Wasm, module: *Module, decl: *Module.Decl) !void {
if (build_options.have_llvm) { if (build_options.have_llvm) {
if (self.llvm_object) |llvm_object| return llvm_object.updateDecl(module, decl); if (self.llvm_object) |llvm_object| return llvm_object.updateDecl(module, decl);
} }
assert(decl.link.wasm.init); // Must call allocateDeclIndexes() assert(decl.link.wasm.sym_index != 0); // Must call allocateDeclIndexes()
// TODO don't use this for non-functions
const fn_data = &decl.fn_link.wasm;
fn_data.functype.items.len = 0;
fn_data.code.items.len = 0;
fn_data.idx_refs.items.len = 0;
var codegen: CodeGen = .{ var codegen: CodeGen = .{
.gpa = self.base.allocator, .gpa = self.base.allocator,
.air = undefined, .air = undefined,
.liveness = undefined, .liveness = undefined,
.values = .{}, .values = .{},
.code = fn_data.code.toManaged(self.base.allocator), .code = std.ArrayList(u8).init(self.base.allocator),
.func_type_data = fn_data.functype.toManaged(self.base.allocator),
.decl = decl, .decl = decl,
.err_msg = undefined, .err_msg = undefined,
.locals = .{}, .locals = .{},
.target = self.base.options.target, .target = self.base.options.target,
.bin_file = &self.base, .bin_file = self,
.global_error_set = self.base.options.module.?.global_error_set, .global_error_set = self.base.options.module.?.global_error_set,
}; };
defer codegen.deinit(); defer codegen.deinit();
@ -310,33 +288,87 @@ pub fn updateDecl(self: *Wasm, module: *Module, decl: *Module.Decl) !void {
} }
fn finishUpdateDecl(self: *Wasm, decl: *Module.Decl, result: CodeGen.Result, codegen: *CodeGen) !void { fn finishUpdateDecl(self: *Wasm, decl: *Module.Decl, result: CodeGen.Result, codegen: *CodeGen) !void {
const fn_data: *FnData = &decl.fn_link.wasm;
fn_data.code = codegen.code.toUnmanaged();
fn_data.functype = codegen.func_type_data.toUnmanaged();
const code: []const u8 = switch (result) { const code: []const u8 = switch (result) {
.appended => @as([]const u8, fn_data.code.items), .appended => @as([]const u8, codegen.code.items),
.externally_managed => |payload| payload, .externally_managed => |payload| payload,
}; };
const block = &decl.link.wasm; const atom: *Atom = &decl.link.wasm;
if (decl.ty.zigTypeTag() != .Fn) { atom.size = @intCast(u32, code.len);
block.size = @intCast(u32, code.len); try atom.code.appendSlice(self.base.allocator, code);
block.data = code.ptr;
}
// If we're updating an existing decl, unplug it first // If we're updating an existing decl, unplug it first
// to avoid infinite loops due to earlier links // to avoid infinite loops due to earlier links
block.unplug(); atom.unplug();
if (self.last_block) |last| { const symbol: *Symbol = &self.symbols.items[atom.sym_index];
if (last != block) { if (decl.isExtern()) {
last.next = block; symbol.setUndefined(true);
block.prev = last; }
} symbol.name = decl.name;
const final_index = switch (decl.ty.zigTypeTag()) {
.Fn => result: {
const type_index = decl.fn_link.wasm.type_index;
const index = @intCast(u32, self.functions.items.len);
try self.functions.append(self.base.allocator, .{ .type_index = type_index });
symbol.tag = .function;
symbol.index = index;
atom.alignment = 1;
if (self.code_section_index == null) {
self.code_section_index = @intCast(u32, self.segments.items.len);
try self.segments.append(self.base.allocator, .{
.alignment = atom.alignment,
.size = atom.size,
.offset = atom.offset,
});
} else {
self.segments.items[self.code_section_index.?].size += atom.size;
}
break :result self.code_section_index.?;
},
else => result: {
const gop = try self.data_segments.getOrPut(self.base.allocator, ".rodata");
const atom_index = if (gop.found_existing) blk: {
self.segments.items[gop.value_ptr.*].size += atom.size;
break :blk gop.value_ptr.*;
} else blk: {
const index = @intCast(u32, self.segments.items.len) - @boolToInt(self.code_section_index != null);
try self.segments.append(self.base.allocator, .{
.alignment = atom.alignment,
.size = atom.size,
.offset = atom.offset,
});
gop.value_ptr.* = index;
break :blk index;
};
const info_index = @intCast(u32, self.segment_info.items.len);
const segment_name = try std.mem.concat(self.base.allocator, u8, &.{
".rodata.",
std.mem.span(symbol.name),
});
errdefer self.base.allocator.free(segment_name);
try self.segment_info.append(self.base.allocator, .{
.name = segment_name,
.alignment = atom.alignment,
.flags = 0,
});
symbol.tag = .data;
symbol.index = info_index;
atom.alignment = decl.ty.abiAlignment(self.base.options.target);
break :result atom_index;
},
};
if (self.atoms.getPtr(final_index)) |last| {
last.*.next = atom;
atom.prev = last.*;
atom.offset = last.*.offset + last.*.size;
last.* = atom;
} else {
try self.atoms.putNoClobber(self.base.allocator, final_index, atom);
} }
self.last_block = block;
} }
pub fn updateDeclExports( pub fn updateDeclExports(
@ -358,29 +390,28 @@ pub fn freeDecl(self: *Wasm, decl: *Module.Decl) void {
if (self.llvm_object) |llvm_object| return llvm_object.freeDecl(decl); if (self.llvm_object) |llvm_object| return llvm_object.freeDecl(decl);
} }
if (self.getFuncidx(decl)) |func_idx| { const atom = &decl.link.wasm;
switch (decl.val.tag()) {
.function => _ = self.funcs.swapRemove(func_idx),
.extern_fn => _ = self.ext_funcs.swapRemove(func_idx),
else => unreachable,
}
}
const block = &decl.link.wasm;
if (self.last_block == block) { if (self.last_atom == atom) {
self.last_block = block.prev; self.last_atom = atom.prev;
} }
block.unplug(); atom.unplug();
self.symbols_free_list.append(self.base.allocator, atom.sym_index) catch {};
atom.deinit(self.base.allocator);
_ = self.decls.remove(decl);
}
self.offset_table_free_list.append(self.base.allocator, decl.link.wasm.offset_index) catch {}; fn createUndefinedSymbol(self: *Wasm, decl: *Module.Decl, symbol_index: u32) !void {
self.symbols_free_list.append(self.base.allocator, block.symbol_index) catch {}; var symbol: *Symbol = &self.symbols.items[symbol_index];
symbol.setUndefined(true);
block.init = false; switch (decl.ty.zigTypeTag()) {
.Fn => {
decl.fn_link.wasm.functype.deinit(self.base.allocator); symbol.setIndex(self.imported_functions_count);
decl.fn_link.wasm.code.deinit(self.base.allocator); self.imported_functions_count += 1;
decl.fn_link.wasm.idx_refs.deinit(self.base.allocator); },
else => @panic("TODO: Wasm implement extern non-function types"),
}
} }
pub fn flush(self: *Wasm, comp: *Compilation) !void { pub fn flush(self: *Wasm, comp: *Compilation) !void {
@ -398,27 +429,20 @@ pub fn flushModule(self: *Wasm, comp: *Compilation) !void {
const file = self.base.file.?; const file = self.base.file.?;
const header_size = 5 + 1; const header_size = 5 + 1;
// ptr_width in bytes
const ptr_width = self.base.options.target.cpu.arch.ptrBitWidth() / 8;
// The size of the offset table in bytes
// The table contains all decl's with its corresponding offset into
// the 'data' section
const offset_table_size = @intCast(u32, self.offset_table.items.len * ptr_width);
// The size of the emulated stack // The size of the emulated stack
const stack_size = @intCast(u32, self.base.options.stack_size_override orelse std.wasm.page_size); const stack_size = @intCast(u32, self.base.options.stack_size_override orelse std.wasm.page_size);
// The size of the data, this together with `offset_table_size` amounts to the var data_size: u32 = 0;
// total size of the 'data' section for (self.segments.items) |segment, index| {
var first_decl: ?*DeclBlock = null; // skip 'code' segments as they do not count towards data section size
const data_size: u32 = if (self.last_block) |last| blk: { if (self.code_section_index) |code_index| {
var size = last.size; if (index == code_index) continue;
var cur = last;
while (cur.prev) |prev| : (cur = prev) {
size += prev.size;
} }
first_decl = cur; data_size += segment.size;
break :blk size; }
} else 0;
// set the stack size on the global
self.globals.items[0].init = .{ .i32_const = @bitCast(i32, data_size + stack_size) };
// No need to rewrite the magic/version header // No need to rewrite the magic/version header
try file.setEndPos(@sizeOf(@TypeOf(wasm.magic ++ wasm.version))); try file.setEndPos(@sizeOf(@TypeOf(wasm.magic ++ wasm.version)));
@ -427,46 +451,62 @@ pub fn flushModule(self: *Wasm, comp: *Compilation) !void {
// Type section // Type section
{ {
const header_offset = try reserveVecSectionHeader(file); const header_offset = try reserveVecSectionHeader(file);
const writer = file.writer();
// extern functions are defined in the wasm binary first through the `import` for (self.func_types.items) |func_type| {
// section, so define their func types first try leb.writeULEB128(writer, wasm.function_type);
for (self.ext_funcs.items) |decl| try file.writeAll(decl.fn_link.wasm.functype.items); try leb.writeULEB128(writer, @intCast(u32, func_type.params.len));
for (self.funcs.items) |decl| try file.writeAll(decl.fn_link.wasm.functype.items); for (func_type.params) |param_ty| try leb.writeULEB128(writer, wasm.valtype(param_ty));
try leb.writeULEB128(writer, @intCast(u32, func_type.returns.len));
for (func_type.returns) |ret_ty| try leb.writeULEB128(writer, wasm.valtype(ret_ty));
}
try writeVecSectionHeader( try writeVecSectionHeader(
file, file,
header_offset, header_offset,
.type, .type,
@intCast(u32, (try file.getPos()) - header_offset - header_size), @intCast(u32, (try file.getPos()) - header_offset - header_size),
@intCast(u32, self.ext_funcs.items.len + self.funcs.items.len), @intCast(u32, self.func_types.items.len),
); );
} }
// Import section // Import section
{ if (self.import_symbols.items.len > 0) {
// TODO: implement non-functions imports
const header_offset = try reserveVecSectionHeader(file); const header_offset = try reserveVecSectionHeader(file);
const writer = file.writer(); const writer = file.writer();
for (self.ext_funcs.items) |decl, typeidx| { for (self.import_symbols.items) |symbol_index| {
const import_symbol = self.symbols.items[symbol_index];
std.debug.assert(import_symbol.isUndefined());
try leb.writeULEB128(writer, @intCast(u32, self.host_name.len)); try leb.writeULEB128(writer, @intCast(u32, self.host_name.len));
try writer.writeAll(self.host_name); try writer.writeAll(self.host_name);
// wasm requires the length of the import name with no null-termination const name = std.mem.span(import_symbol.name);
const decl_len = mem.len(decl.name); try leb.writeULEB128(writer, @intCast(u32, name.len));
try leb.writeULEB128(writer, @intCast(u32, decl_len)); try writer.writeAll(name);
try writer.writeAll(decl.name[0..decl_len]);
// emit kind and the function type try writer.writeByte(wasm.externalKind(import_symbol.tag.externalType()));
try writer.writeByte(wasm.externalKind(.function)); const import = self.findImport(import_symbol.index, import_symbol.tag.externalType()).?;
try leb.writeULEB128(writer, @intCast(u32, typeidx)); switch (import.kind) {
.function => |type_index| try leb.writeULEB128(writer, type_index),
.global => |global_type| {
try leb.writeULEB128(writer, wasm.valtype(global_type.valtype));
try writer.writeByte(@boolToInt(global_type.mutable));
},
.table => |table| {
try leb.writeULEB128(writer, wasm.reftype(table.reftype));
try emitLimits(writer, table.limits);
},
.memory => |limits| {
try emitLimits(writer, limits);
},
}
} }
try writeVecSectionHeader( try writeVecSectionHeader(
file, file,
header_offset, header_offset,
.import, .import,
@intCast(u32, (try file.getPos()) - header_offset - header_size), @intCast(u32, (try file.getPos()) - header_offset - header_size),
@intCast(u32, self.ext_funcs.items.len), @intCast(u32, self.imports.items.len),
); );
} }
@ -474,9 +514,11 @@ pub fn flushModule(self: *Wasm, comp: *Compilation) !void {
{ {
const header_offset = try reserveVecSectionHeader(file); const header_offset = try reserveVecSectionHeader(file);
const writer = file.writer(); const writer = file.writer();
for (self.funcs.items) |_, typeidx| { for (self.functions.items) |function| {
const func_idx = @intCast(u32, self.getFuncIdxOffset() + typeidx); try leb.writeULEB128(
try leb.writeULEB128(writer, func_idx); writer,
@intCast(u32, function.type_index),
);
} }
try writeVecSectionHeader( try writeVecSectionHeader(
@ -484,7 +526,7 @@ pub fn flushModule(self: *Wasm, comp: *Compilation) !void {
header_offset, header_offset,
.function, .function,
@intCast(u32, (try file.getPos()) - header_offset - header_size), @intCast(u32, (try file.getPos()) - header_offset - header_size),
@intCast(u32, self.funcs.items.len), @intCast(u32, self.functions.items.len),
); );
} }
@ -500,7 +542,7 @@ pub fn flushModule(self: *Wasm, comp: *Compilation) !void {
writer, writer,
try std.math.divCeil( try std.math.divCeil(
u32, u32,
offset_table_size + data_size + stack_size, data_size + stack_size,
std.wasm.page_size, std.wasm.page_size,
), ),
); );
@ -515,29 +557,21 @@ pub fn flushModule(self: *Wasm, comp: *Compilation) !void {
// Global section (used to emit stack pointer) // Global section (used to emit stack pointer)
{ {
// We emit the emulated stack at the end of the data section,
// 'growing' downwards towards the program memory.
// TODO: Have linker resolve the offset table, so we can emit the stack
// at the start so we can't overwrite program memory with the stack.
const sp_value = offset_table_size + data_size + std.wasm.page_size;
const mutable = true; // stack pointer MUST be mutable
const header_offset = try reserveVecSectionHeader(file); const header_offset = try reserveVecSectionHeader(file);
const writer = file.writer(); const writer = file.writer();
try writer.writeByte(wasm.valtype(.i32)); for (self.globals.items) |global| {
try writer.writeByte(@boolToInt(mutable)); try writer.writeByte(wasm.valtype(global.global_type.valtype));
try writer.writeByte(@boolToInt(global.global_type.mutable));
// set the initial value of the stack pointer to the data size + stack size try emitInit(writer, global.init);
try writer.writeByte(wasm.opcode(.i32_const)); }
try leb.writeILEB128(writer, @bitCast(i32, sp_value));
try writer.writeByte(wasm.opcode(.end));
try writeVecSectionHeader( try writeVecSectionHeader(
file, file,
header_offset, header_offset,
.global, .global,
@intCast(u32, (try file.getPos()) - header_offset - header_size), @intCast(u32, (try file.getPos()) - header_offset - header_size),
@as(u32, 1), @intCast(u32, self.globals.items.len),
); );
} }
@ -546,6 +580,7 @@ pub fn flushModule(self: *Wasm, comp: *Compilation) !void {
const header_offset = try reserveVecSectionHeader(file); const header_offset = try reserveVecSectionHeader(file);
const writer = file.writer(); const writer = file.writer();
var count: u32 = 0; var count: u32 = 0;
var func_index: u32 = 0;
for (module.decl_exports.values()) |exports| { for (module.decl_exports.values()) |exports| {
for (exports) |exprt| { for (exports) |exprt| {
// Export name length + name // Export name length + name
@ -557,7 +592,8 @@ pub fn flushModule(self: *Wasm, comp: *Compilation) !void {
// Type of the export // Type of the export
try writer.writeByte(wasm.externalKind(.function)); try writer.writeByte(wasm.externalKind(.function));
// Exported function index // Exported function index
try leb.writeULEB128(writer, self.getFuncidx(exprt.exported_decl).?); try leb.writeULEB128(writer, func_index);
func_index += 1;
}, },
else => return error.TODOImplementNonFnDeclsForWasm, else => return error.TODOImplementNonFnDeclsForWasm,
} }
@ -585,75 +621,108 @@ pub fn flushModule(self: *Wasm, comp: *Compilation) !void {
} }
// Code section // Code section
{ if (self.code_section_index) |code_index| {
const header_offset = try reserveVecSectionHeader(file); const header_offset = try reserveVecSectionHeader(file);
const writer = file.writer(); const writer = file.writer();
for (self.funcs.items) |decl| { var atom = self.atoms.get(code_index).?.getFirst();
const fn_data = &decl.fn_link.wasm; while (true) {
try leb.writeULEB128(writer, atom.size);
try writer.writeAll(atom.code.items);
// Write the already generated code to the file, inserting atom = atom.next orelse break;
// function indexes where required.
for (fn_data.idx_refs.items) |idx_ref| {
const relocatable_decl = self.symbols.items[idx_ref.decl];
const index = self.getFuncidx(relocatable_decl).?;
leb.writeUnsignedFixed(5, fn_data.code.items[idx_ref.offset..][0..5], index);
}
try writer.writeAll(fn_data.code.items);
} }
try writeVecSectionHeader( try writeVecSectionHeader(
file, file,
header_offset, header_offset,
.code, .code,
@intCast(u32, (try file.getPos()) - header_offset - header_size), @intCast(u32, (try file.getPos()) - header_offset - header_size),
@intCast(u32, self.funcs.items.len), @intCast(u32, self.functions.items.len),
); );
} }
// Data section // Data section
if (data_size != 0) { if (self.data_segments.count() != 0) {
const header_offset = try reserveVecSectionHeader(file); const header_offset = try reserveVecSectionHeader(file);
const writer = file.writer(); const writer = file.writer();
// index to memory section (currently, there can only be 1 memory section in wasm)
try leb.writeULEB128(writer, @as(u32, 0));
// offset into data section var it = self.data_segments.iterator();
try writer.writeByte(wasm.opcode(.i32_const)); while (it.next()) |entry| {
try leb.writeILEB128(writer, @as(i32, 0)); // do not output 'bss' section
try writer.writeByte(wasm.opcode(.end)); if (std.mem.eql(u8, entry.key_ptr.*, ".bss")) continue;
const atom_index = entry.value_ptr.*;
var atom = self.atoms.getPtr(atom_index).?.*.getFirst();
var segment = self.segments.items[atom_index];
const total_size = offset_table_size + data_size; // flag and index to memory section (currently, there can only be 1 memory section in wasm)
try leb.writeULEB128(writer, @as(u32, 0));
// offset table + data size // offset into data section
try leb.writeULEB128(writer, total_size); try writer.writeByte(wasm.opcode(.i32_const));
try leb.writeILEB128(writer, @as(i32, 0));
try writer.writeByte(wasm.opcode(.end));
// fill in the offset table and the data segments // offset table + data size
const file_offset = try file.getPos(); try leb.writeULEB128(writer, segment.size);
var cur = first_decl;
var data_offset = offset_table_size;
while (cur) |cur_block| : (cur = cur_block.next) {
if (cur_block.size == 0) continue;
assert(cur_block.init);
const offset = (cur_block.offset_index) * ptr_width; // fill in the offset table and the data segments
var buf: [4]u8 = undefined; var current_offset: u32 = 0;
std.mem.writeIntLittle(u32, &buf, data_offset); while (true) {
std.debug.assert(current_offset == atom.offset);
std.debug.assert(atom.code.items.len == atom.size);
try file.pwriteAll(&buf, file_offset + offset); try writer.writeAll(atom.code.items);
try file.pwriteAll(cur_block.data[0..cur_block.size], file_offset + data_offset);
data_offset += cur_block.size; current_offset += atom.size;
if (atom.next) |next| {
atom = next;
} else break;
}
} }
try file.seekTo(file_offset + data_offset);
try writeVecSectionHeader( try writeVecSectionHeader(
file, file,
header_offset, header_offset,
.data, .data,
@intCast(u32, (file_offset + data_offset) - header_offset - header_size), @intCast(u32, (try file.getPos()) - header_offset - header_size),
@intCast(u32, 1), // only 1 data section @intCast(u32, 1), // only 1 data section
); );
} }
} }
fn emitLimits(writer: anytype, limits: wasm.Limits) !void {
try leb.writeULEB128(writer, @boolToInt(limits.max != null));
try leb.writeULEB128(writer, limits.min);
if (limits.max) |max| {
try leb.writeULEB128(writer, max);
}
}
fn emitInit(writer: anytype, init_expr: wasm.InitExpression) !void {
switch (init_expr) {
.i32_const => |val| {
try writer.writeByte(wasm.opcode(.i32_const));
try leb.writeILEB128(writer, val);
},
.i64_const => |val| {
try writer.writeByte(wasm.opcode(.i64_const));
try leb.writeILEB128(writer, val);
},
.f32_const => |val| {
try writer.writeByte(wasm.opcode(.f32_const));
try writer.writeIntLittle(u32, @bitCast(u32, val));
},
.f64_const => |val| {
try writer.writeByte(wasm.opcode(.f64_const));
try writer.writeIntLittle(u64, @bitCast(u64, val));
},
.global_get => |val| {
try writer.writeByte(wasm.opcode(.global_get));
try leb.writeULEB128(writer, val);
},
}
try writer.writeByte(wasm.opcode(.end));
}
fn linkWithLLD(self: *Wasm, comp: *Compilation) !void { fn linkWithLLD(self: *Wasm, comp: *Compilation) !void {
const tracy = trace(@src()); const tracy = trace(@src());
defer tracy.end(); defer tracy.end();
@ -970,32 +1039,6 @@ fn linkWithLLD(self: *Wasm, comp: *Compilation) !void {
} }
} }
/// Get the current index of a given Decl in the function list
/// This will correctly provide the index, regardless whether the function is extern or not
/// TODO: we could maintain a hash map to potentially make this simpler
fn getFuncidx(self: Wasm, decl: *Module.Decl) ?u32 {
var offset: u32 = 0;
const slice = switch (decl.val.tag()) {
.function => blk: {
// when the target is a regular function, we have to calculate
// the offset of where the index starts
offset += self.getFuncIdxOffset();
break :blk self.funcs.items;
},
.extern_fn => self.ext_funcs.items,
else => return null,
};
return for (slice) |func, idx| {
if (func == decl) break @intCast(u32, offset + idx);
} else null;
}
/// Based on the size of `ext_funcs` returns the
/// offset of the function indices
fn getFuncIdxOffset(self: Wasm) u32 {
return @intCast(u32, self.ext_funcs.items.len);
}
fn reserveVecSectionHeader(file: fs.File) !u64 { fn reserveVecSectionHeader(file: fs.File) !u64 {
// section id + fixed leb contents size + fixed leb vector length // section id + fixed leb contents size + fixed leb vector length
const header_size = 1 + 5 + 5; const header_size = 1 + 5 + 5;
@ -1012,3 +1055,36 @@ fn writeVecSectionHeader(file: fs.File, offset: u64, section: wasm.Section, size
leb.writeUnsignedFixed(5, buf[6..], items); leb.writeUnsignedFixed(5, buf[6..], items);
try file.pwriteAll(&buf, offset); try file.pwriteAll(&buf, offset);
} }
/// Searches for an a matching function signature, when not found
/// a new entry will be made. The index of the existing/new signature will be returned.
pub fn putOrGetFuncType(self: *Wasm, func_type: wasm.Type) !u32 {
var index: u32 = 0;
while (index < self.func_types.items.len) : (index += 1) {
if (self.func_types.items[index].eql(func_type)) return index;
}
// functype does not exist.
const params = try self.base.allocator.dupe(wasm.Valtype, func_type.params);
errdefer self.base.allocator.free(params);
const returns = try self.base.allocator.dupe(wasm.Valtype, func_type.returns);
errdefer self.base.allocator.free(returns);
try self.func_types.append(self.base.allocator, .{
.params = params,
.returns = returns,
});
return index;
}
/// From a given index and an `ExternalKind`, finds the corresponding Import.
/// This is due to indexes for imports being unique per type, rather than across all imports.
fn findImport(self: Wasm, index: u32, external_type: wasm.ExternalKind) ?*wasm.Import {
var current_index: u32 = 0;
for (self.imports.items) |*import| {
if (import.kind == external_type) {
if (current_index == index) return import;
current_index += 1;
}
}
return null;
}

182
src/link/Wasm/Atom.zig Normal file
View File

@ -0,0 +1,182 @@
const Atom = @This();
const std = @import("std");
const types = @import("types.zig");
const Wasm = @import("../Wasm.zig");
const Symbol = @import("Symbol.zig");
const leb = std.leb;
const log = std.log.scoped(.zld);
const mem = std.mem;
const Allocator = mem.Allocator;
/// symbol index of the symbol representing this atom
sym_index: u32,
/// Size of the atom, used to calculate section sizes in the final binary
size: u32,
/// List of relocations belonging to this atom
relocs: std.ArrayListUnmanaged(types.Relocation) = .{},
/// Contains the binary data of an atom, which can be non-relocated
code: std.ArrayListUnmanaged(u8) = .{},
/// For code this is 1, for data this is set to the highest value of all segments
alignment: u32,
/// Offset into the section where the atom lives, this already accounts
/// for alignment.
offset: u32,
/// Next atom in relation to this atom.
/// When null, this atom is the last atom
next: ?*Atom,
/// Previous atom in relation to this atom.
/// is null when this atom is the first in its order
prev: ?*Atom,
/// Represents a default empty wasm `Atom`
pub const empty: Atom = .{
.alignment = 0,
.next = null,
.offset = 0,
.prev = null,
.size = 0,
.sym_index = 0,
};
/// Frees all resources owned by this `Atom`.
/// Also destroys itself, making any usage of this atom illegal.
pub fn deinit(self: *Atom, gpa: *Allocator) void {
self.relocs.deinit(gpa);
self.code.deinit(gpa);
}
pub fn format(self: Atom, comptime fmt: []const u8, options: std.fmt.FormatOptions, writer: anytype) !void {
_ = fmt;
_ = options;
writer.print("Atom{{ .sym_index = {d}, .alignment = {d}, .size = {d}, .offset = 0x{x:0>8} }}", .{
self.sym_index,
self.alignment,
self.size,
self.offset,
});
}
/// Returns the first `Atom` from a given atom
pub fn getFirst(self: *Atom) *Atom {
var tmp = self;
while (tmp.prev) |prev| tmp = prev;
return tmp;
}
/// Returns the last `Atom` from a given atom
pub fn getLast(self: *Atom) *Atom {
var tmp = self;
while (tmp.next) |next| tmp = next;
return tmp;
}
/// Unplugs the `Atom` from the chain
pub fn unplug(self: *Atom) void {
if (self.prev) |prev| {
prev.next = self.next;
}
if (self.next) |next| {
next.prev = self.prev;
}
self.next = null;
self.prev = null;
}
/// Resolves the relocations within the atom, writing the new value
/// at the calculated offset.
pub fn resolveRelocs(self: *Atom, wasm_bin: *const Wasm) !void {
const object = wasm_bin.objects.items[self.file];
const symbol: Symbol = object.symtable[self.sym_index];
log.debug("Resolving relocs in atom '{s}' count({d})", .{
symbol.name,
self.relocs.items.len,
});
for (self.relocs.items) |reloc| {
const value = self.relocationValue(reloc, wasm_bin);
log.debug("Relocating '{s}' referenced in '{s}' offset=0x{x:0>8} value={d}", .{
object.symtable[reloc.index].name,
symbol.name,
reloc.offset,
value,
});
switch (reloc.relocation_type) {
.R_WASM_TABLE_INDEX_I32,
.R_WASM_FUNCTION_OFFSET_I32,
.R_WASM_GLOBAL_INDEX_I32,
.R_WASM_MEMORY_ADDR_I32,
.R_WASM_SECTION_OFFSET_I32,
=> std.mem.writeIntLittle(u32, self.code.items[reloc.offset..][0..4], @intCast(u32, value)),
.R_WASM_TABLE_INDEX_I64,
.R_WASM_MEMORY_ADDR_I64,
=> std.mem.writeIntLittle(u64, self.code.items[reloc.offset..][0..8], value),
.R_WASM_GLOBAL_INDEX_LEB,
.R_WASM_EVENT_INDEX_LEB,
.R_WASM_FUNCTION_INDEX_LEB,
.R_WASM_MEMORY_ADDR_LEB,
.R_WASM_MEMORY_ADDR_SLEB,
.R_WASM_TABLE_INDEX_SLEB,
.R_WASM_TABLE_NUMBER_LEB,
.R_WASM_TYPE_INDEX_LEB,
=> leb.writeUnsignedFixed(5, self.code.items[reloc.offset..][0..5], @intCast(u32, value)),
.R_WASM_MEMORY_ADDR_LEB64,
.R_WASM_MEMORY_ADDR_SLEB64,
.R_WASM_TABLE_INDEX_SLEB64,
=> leb.writeUnsignedFixed(10, self.code.items[reloc.offset..][0..10], value),
}
}
}
/// From a given `relocation` will return the new value to be written.
/// All values will be represented as a `u64` as all values can fit within it.
/// The final value must be casted to the correct size.
fn relocationValue(self: *Atom, relocation: types.Relocation, wasm_bin: *const Wasm) u64 {
const object = wasm_bin.objects.items[self.file];
const symbol: Symbol = object.symtable[relocation.index];
return switch (relocation.relocation_type) {
.R_WASM_FUNCTION_INDEX_LEB => symbol.kind.function.functionIndex(),
.R_WASM_TABLE_NUMBER_LEB => symbol.kind.table.table.table_idx,
.R_WASM_TABLE_INDEX_I32,
.R_WASM_TABLE_INDEX_I64,
.R_WASM_TABLE_INDEX_SLEB,
.R_WASM_TABLE_INDEX_SLEB64,
=> symbol.getTableIndex() orelse 0,
.R_WASM_TYPE_INDEX_LEB => symbol.kind.function.func.type_idx,
.R_WASM_GLOBAL_INDEX_I32,
.R_WASM_GLOBAL_INDEX_LEB,
=> symbol.kind.global.global.global_idx,
.R_WASM_MEMORY_ADDR_I32,
.R_WASM_MEMORY_ADDR_I64,
.R_WASM_MEMORY_ADDR_LEB,
.R_WASM_MEMORY_ADDR_LEB64,
.R_WASM_MEMORY_ADDR_SLEB,
.R_WASM_MEMORY_ADDR_SLEB64,
=> blk: {
if (symbol.isUndefined() and (symbol.kind == .data or symbol.isWeak())) {
return 0;
}
const segment_name = object.segment_info[symbol.index().?].outputName();
const atom_index = wasm_bin.data_segments.get(segment_name).?;
var target_atom = wasm_bin.atoms.getPtr(atom_index).?.*.getFirst();
while (true) {
if (target_atom.sym_index == relocation.index) break;
if (target_atom.next) |next| {
target_atom = next;
} else break;
}
const segment = wasm_bin.segments.items[atom_index];
const base = wasm_bin.options.global_base orelse 1024;
const offset = target_atom.offset + segment.offset;
break :blk offset + base + (relocation.addend orelse 0);
},
.R_WASM_EVENT_INDEX_LEB => symbol.kind.event.index,
.R_WASM_SECTION_OFFSET_I32,
.R_WASM_FUNCTION_OFFSET_I32,
=> relocation.offset,
};
}

157
src/link/Wasm/Symbol.zig Normal file
View File

@ -0,0 +1,157 @@
//! Wasm symbols describing its kind,
//! name and its properties.
const Symbol = @This();
const std = @import("std");
const types = @import("types.zig");
/// Bitfield containings flags for a symbol
/// Can contain any of the flags defined in `Flag`
flags: u32,
/// Symbol name, when undefined this will be taken from the import.
name: [*:0]const u8,
/// An union that represents both the type of symbol
/// as well as the data it holds.
tag: Tag,
/// Index into the list of objects based on set `tag`
/// NOTE: This will be set to `undefined` when `tag` is `data`
/// and the symbol is undefined.
index: u32,
pub const Tag = enum {
function,
data,
global,
section,
event,
table,
/// From a given symbol tag, returns the `ExternalType`
/// Asserts the given tag can be represented as an external type.
pub fn externalType(self: Tag) std.wasm.ExternalKind {
return switch (self) {
.function => .function,
.global => .global,
.data => .memory,
.section => unreachable, // Not an external type
.event => unreachable, // Not an external type
.table => .table,
};
}
};
pub const Flag = enum(u32) {
/// Indicates a weak symbol.
/// When linking multiple modules defining the same symbol, all weak definitions are discarded
/// in favourite of the strong definition. When no strong definition exists, all weak but one definiton is discarded.
/// If multiple definitions remain, we get an error: symbol collision.
WASM_SYM_BINDING_WEAK = 0x1,
/// Indicates a local, non-exported, non-module-linked symbol.
/// The names of local symbols are not required to be unique, unlike non-local symbols.
WASM_SYM_BINDING_LOCAL = 0x2,
/// Represents the binding of a symbol, indicating if it's local or not, and weak or not.
WASM_SYM_BINDING_MASK = 0x3,
/// Indicates a hidden symbol. Hidden symbols will not be exported to the link result, but may
/// link to other modules.
WASM_SYM_VISIBILITY_HIDDEN = 0x4,
/// Indicates an undefined symbol. For non-data symbols, this must match whether the symbol is
/// an import or is defined. For data symbols however, determines whether a segment is specified.
WASM_SYM_UNDEFINED = 0x10,
/// Indicates a symbol of which its intention is to be exported from the wasm module to the host environment.
/// This differs from the visibility flag as this flag affects the static linker.
WASM_SYM_EXPORTED = 0x20,
/// Indicates the symbol uses an explicit symbol name, rather than reusing the name from a wasm import.
/// Allows remapping imports from foreign WASM modules into local symbols with a different name.
WASM_SYM_EXPLICIT_NAME = 0x40,
/// Indicates the symbol is to be included in the linker output, regardless of whether it is used or has any references to it.
WASM_SYM_NO_STRIP = 0x80,
/// Indicates a symbol is TLS
WASM_SYM_TLS = 0x100,
};
/// Verifies if the given symbol should be imported from the
/// host environment or not
pub fn requiresImport(self: Symbol) bool {
if (!self.isUndefined()) return false;
if (self.isWeak()) return false;
if (self.kind == .data) return false;
// if (self.isDefined() and self.isWeak()) return true; //TODO: Only when building shared lib
return true;
}
pub fn hasFlag(self: Symbol, flag: Flag) bool {
return self.flags & @enumToInt(flag) != 0;
}
pub fn setFlag(self: *Symbol, flag: Flag) void {
self.flags |= @enumToInt(flag);
}
pub fn isUndefined(self: Symbol) bool {
return self.flags & @enumToInt(Flag.WASM_SYM_UNDEFINED) != 0;
}
pub fn setUndefined(self: *Symbol, is_undefined: bool) void {
if (is_undefined) {
self.setFlag(.WASM_SYM_UNDEFINED);
} else {
self.flags &= ~@enumToInt(Flag.WASM_SYM_UNDEFINED);
}
}
pub fn isDefined(self: Symbol) bool {
return !self.isUndefined();
}
pub fn isVisible(self: Symbol) bool {
return self.flags & @enumToInt(Flag.WASM_SYM_VISIBILITY_HIDDEN) == 0;
}
pub fn isLocal(self: Symbol) bool {
return self.flags & @enumToInt(Flag.WASM_SYM_BINDING_LOCAL) != 0;
}
pub fn isGlobal(self: Symbol) bool {
return self.flags & @enumToInt(Flag.WASM_SYM_BINDING_LOCAL) == 0;
}
pub fn isHidden(self: Symbol) bool {
return self.flags & @enumToInt(Flag.WASM_SYM_VISIBILITY_HIDDEN) != 0;
}
pub fn isNoStrip(self: Symbol) bool {
return self.flags & @enumToInt(Flag.WASM_SYM_NO_STRIP) != 0;
}
pub fn isExported(self: Symbol) bool {
if (self.isUndefined() or self.isLocal()) return false;
if (self.isHidden()) return false;
return true;
}
pub fn isWeak(self: Symbol) bool {
return self.flags & @enumToInt(Flag.WASM_SYM_BINDING_WEAK) != 0;
}
/// Formats the symbol into human-readable text
pub fn format(self: Symbol, comptime fmt: []const u8, options: std.fmt.FormatOptions, writer: anytype) !void {
_ = fmt;
_ = options;
const kind_fmt: u8 = switch (self.kind) {
.function => 'F',
.data => 'D',
.global => 'G',
.section => 'S',
.event => 'E',
.table => 'T',
};
const visible: []const u8 = if (self.isVisible()) "yes" else "no";
const binding: []const u8 = if (self.isLocal()) "local" else "global";
try writer.print(
"{c} binding={s} visible={s} id={d} name={s}",
.{ kind_fmt, binding, visible, self.index(), self.name },
);
}

199
src/link/Wasm/types.zig Normal file
View File

@ -0,0 +1,199 @@
//! This file contains all constants and related to wasm's object format.
const std = @import("std");
pub const Relocation = struct {
/// Represents the type of the `Relocation`
relocation_type: RelocationType,
/// Offset of the value to rewrite relative to the relevant section's contents.
/// When `offset` is zero, its position is immediately after the id and size of the section.
offset: u32,
/// The index of the symbol used.
/// When the type is `R_WASM_TYPE_INDEX_LEB`, it represents the index of the type.
index: u32,
/// Addend to add to the address.
/// This field is only non-null for `R_WASM_MEMORY_ADDR_*`, `R_WASM_FUNCTION_OFFSET_I32` and `R_WASM_SECTION_OFFSET_I32`.
addend: ?u32 = null,
/// All possible relocation types currently existing.
/// This enum is exhaustive as the spec is WIP and new types
/// can be added which means that a generated binary will be invalid,
/// so instead we will show an error in such cases.
pub const RelocationType = enum(u8) {
R_WASM_FUNCTION_INDEX_LEB = 0,
R_WASM_TABLE_INDEX_SLEB = 1,
R_WASM_TABLE_INDEX_I32 = 2,
R_WASM_MEMORY_ADDR_LEB = 3,
R_WASM_MEMORY_ADDR_SLEB = 4,
R_WASM_MEMORY_ADDR_I32 = 5,
R_WASM_TYPE_INDEX_LEB = 6,
R_WASM_GLOBAL_INDEX_LEB = 7,
R_WASM_FUNCTION_OFFSET_I32 = 8,
R_WASM_SECTION_OFFSET_I32 = 9,
R_WASM_EVENT_INDEX_LEB = 10,
R_WASM_GLOBAL_INDEX_I32 = 13,
R_WASM_MEMORY_ADDR_LEB64 = 14,
R_WASM_MEMORY_ADDR_SLEB64 = 15,
R_WASM_MEMORY_ADDR_I64 = 16,
R_WASM_TABLE_INDEX_SLEB64 = 18,
R_WASM_TABLE_INDEX_I64 = 19,
R_WASM_TABLE_NUMBER_LEB = 20,
/// Returns true for relocation types where the `addend` field is present.
pub fn addendIsPresent(self: RelocationType) bool {
return switch (self) {
.R_WASM_MEMORY_ADDR_LEB,
.R_WASM_MEMORY_ADDR_SLEB,
.R_WASM_MEMORY_ADDR_I32,
.R_WASM_MEMORY_ADDR_LEB64,
.R_WASM_MEMORY_ADDR_SLEB64,
.R_WASM_MEMORY_ADDR_I64,
.R_WASM_FUNCTION_OFFSET_I32,
.R_WASM_SECTION_OFFSET_I32,
=> true,
else => false,
};
}
};
/// Verifies the relocation type of a given `Relocation` and returns
/// true when the relocation references a function call or address to a function.
pub fn isFunction(self: Relocation) bool {
return switch (self.relocation_type) {
.R_WASM_FUNCTION_INDEX_LEB,
.R_WASM_TABLE_INDEX_SLEB,
=> true,
else => false,
};
}
pub fn format(self: Relocation, comptime fmt: []const u8, options: std.fmt.FormatOptions, writer: anytype) !void {
_ = fmt;
_ = options;
try writer.print("{s} offset=0x{x:0>6} symbol={d}", .{
@tagName(self.relocation_type),
self.offset,
self.index,
});
}
};
pub const SubsectionType = enum(u8) {
WASM_SEGMENT_INFO = 5,
WASM_INIT_FUNCS = 6,
WASM_COMDAT_INFO = 7,
WASM_SYMBOL_TABLE = 8,
};
pub const Segment = struct {
/// Segment's name, encoded as UTF-8 bytes.
name: []const u8,
/// The required alignment of the segment, encoded as a power of 2
alignment: u32,
/// Bitfield containing flags for a segment
flags: u32,
pub fn outputName(self: Segment) []const u8 {
if (std.mem.startsWith(u8, self.name, ".rodata.")) {
return ".rodata";
} else if (std.mem.startsWith(u8, self.name, ".text.")) {
return ".text";
} else if (std.mem.startsWith(u8, self.name, ".rodata.")) {
return ".rodata";
} else if (std.mem.startsWith(u8, self.name, ".data.")) {
return ".data";
} else if (std.mem.startsWith(u8, self.name, ".bss.")) {
return ".bss";
}
return self.name;
}
};
pub const InitFunc = struct {
/// Priority of the init function
priority: u32,
/// The symbol index of init function (not the function index).
symbol_index: u32,
};
pub const Comdat = struct {
name: []const u8,
/// Must be zero, no flags are currently defined by the tool-convention.
flags: u32,
symbols: []const ComdatSym,
};
pub const ComdatSym = struct {
kind: Type,
/// Index of the data segment/function/global/event/table within a WASM module.
/// The object must not be an import.
index: u32,
pub const Type = enum(u8) {
WASM_COMDAT_DATA = 0,
WASM_COMDAT_FUNCTION = 1,
WASM_COMDAT_GLOBAL = 2,
WASM_COMDAT_EVENT = 3,
WASM_COMDAT_TABLE = 4,
WASM_COMDAT_SECTION = 5,
};
};
pub const Feature = struct {
/// Provides information about the usage of the feature.
/// - '0x2b' (+): Object uses this feature, and the link fails if feature is not in the allowed set.
/// - '0x2d' (-): Object does not use this feature, and the link fails if this feature is in the allowed set.
/// - '0x3d' (=): Object uses this feature, and the link fails if this feature is not in the allowed set,
/// or if any object does not use this feature.
prefix: Prefix,
/// Type of the feature, must be unique in the sequence of features.
tag: Tag,
pub const Tag = enum {
atomics,
bulk_memory,
exception_handling,
multivalue,
mutable_globals,
nontrapping_fptoint,
sign_ext,
simd128,
tail_call,
};
pub const Prefix = enum(u8) {
used = '+',
disallowed = '-',
required = '=',
};
pub fn toString(self: Feature) []const u8 {
return switch (self.tag) {
.bulk_memory => "bulk-memory",
.exception_handling => "exception-handling",
.mutable_globals => "mutable-globals",
.nontrapping_fptoint => "nontrapping-fptoint",
.sign_ext => "sign-ext",
.tail_call => "tail-call",
else => @tagName(self),
};
}
pub fn format(self: Feature, comptime fmt: []const u8, opt: std.fmt.FormatOptions, writer: anytype) !void {
_ = opt;
_ = fmt;
try writer.print("{c} {s}", .{ self.prefix, self.toString() });
}
};
pub const known_features = std.ComptimeStringMap(Feature.Tag, .{
.{ "atomics", .atomics },
.{ "bulk-memory", .bulk_memory },
.{ "exception-handling", .exception_handling },
.{ "multivalue", .multivalue },
.{ "mutable-globals", .mutable_globals },
.{ "nontrapping-fptoint", .nontrapping_fptoint },
.{ "sign-ext", .sign_ext },
.{ "simd128", .simd128 },
.{ "tail-call", .tail_call },
});