zig/src/codegen/wasm.zig
2021-04-05 14:37:04 +08:00

584 lines
22 KiB
Zig

const std = @import("std");
const Allocator = std.mem.Allocator;
const ArrayList = std.ArrayList;
const assert = std.debug.assert;
const leb = std.leb;
const mem = std.mem;
const wasm = std.wasm;
const Module = @import("../Module.zig");
const Decl = Module.Decl;
const ir = @import("../ir.zig");
const Inst = ir.Inst;
const Type = @import("../type.zig").Type;
const Value = @import("../value.zig").Value;
const Compilation = @import("../Compilation.zig");
const AnyMCValue = @import("../codegen.zig").AnyMCValue;
const LazySrcLoc = Module.LazySrcLoc;
/// Wasm Value, created when generating an instruction
const WValue = union(enum) {
none: void,
/// Index of the local variable
local: u32,
/// Instruction holding a constant `Value`
constant: *Inst,
/// Offset position in the list of bytecode instructions
code_offset: usize,
/// The label of the block, used by breaks to find its relative distance
block_idx: u32,
};
/// Hashmap to store generated `WValue` for each `Inst`
pub const ValueTable = std.AutoHashMapUnmanaged(*Inst, WValue);
/// Code represents the `Code` section of wasm that
/// belongs to a function
pub const Context = struct {
/// Reference to the function declaration the code
/// section belongs to
decl: *Decl,
gpa: *mem.Allocator,
/// Table to save `WValue`'s generated by an `Inst`
values: ValueTable,
/// `bytes` contains the wasm bytecode belonging to the 'code' section.
code: ArrayList(u8),
/// Contains the generated function type bytecode for the current function
/// found in `decl`
func_type_data: ArrayList(u8),
/// The index the next local generated will have
/// NOTE: arguments share the index with locals therefore the first variable
/// will have the index that comes after the last argument's index
local_index: u32 = 0,
/// If codegen fails, an error messages will be allocated and saved in `err_msg`
err_msg: *Module.ErrorMsg,
/// Current block depth. Used to calculate the relative difference between a break
/// and block
block_depth: u32 = 0,
/// List of all locals' types generated throughout this declaration
/// used to emit locals count at start of 'code' section.
locals: std.ArrayListUnmanaged(u8),
const InnerError = error{
OutOfMemory,
CodegenFail,
};
pub fn deinit(self: *Context) void {
self.values.deinit(self.gpa);
self.locals.deinit(self.gpa);
self.* = undefined;
}
/// Sets `err_msg` on `Context` and returns `error.CodegemFail` which is caught in link/Wasm.zig
fn fail(self: *Context, src: LazySrcLoc, comptime fmt: []const u8, args: anytype) InnerError {
const src_loc = src.toSrcLocWithDecl(self.decl);
self.err_msg = try Module.ErrorMsg.create(self.gpa, src_loc, fmt, args);
return error.CodegenFail;
}
/// Resolves the `WValue` for the given instruction `inst`
/// When the given instruction has a `Value`, it returns a constant instead
fn resolveInst(self: Context, inst: *Inst) WValue {
if (!inst.ty.hasCodeGenBits()) return .none;
if (inst.value()) |_| {
return WValue{ .constant = inst };
}
return self.values.get(inst).?; // Instruction does not dominate all uses!
}
/// Using a given `Type`, returns the corresponding wasm value type
fn genValtype(self: *Context, src: LazySrcLoc, ty: Type) InnerError!u8 {
return switch (ty.tag()) {
.f32 => wasm.valtype(.f32),
.f64 => wasm.valtype(.f64),
.u32, .i32, .bool => wasm.valtype(.i32),
.u64, .i64 => wasm.valtype(.i64),
else => self.fail(src, "TODO - Wasm genValtype for type '{s}'", .{ty.tag()}),
};
}
/// Using a given `Type`, returns the corresponding wasm value type
/// Differently from `genValtype` this also allows `void` to create a block
/// with no return type
fn genBlockType(self: *Context, src: LazySrcLoc, ty: Type) InnerError!u8 {
return switch (ty.tag()) {
.void, .noreturn => wasm.block_empty,
else => self.genValtype(src, ty),
};
}
/// Writes the bytecode depending on the given `WValue` in `val`
fn emitWValue(self: *Context, val: WValue) InnerError!void {
const writer = self.code.writer();
switch (val) {
.block_idx => unreachable,
.none, .code_offset => {},
.local => |idx| {
try writer.writeByte(wasm.opcode(.local_get));
try leb.writeULEB128(writer, idx);
},
.constant => |inst| try self.emitConstant(inst.castTag(.constant).?), // creates a new constant onto the stack
}
}
fn genFunctype(self: *Context) InnerError!void {
const ty = self.decl.typed_value.most_recent.typed_value.ty;
const writer = self.func_type_data.writer();
try writer.writeByte(wasm.function_type);
// param types
try leb.writeULEB128(writer, @intCast(u32, ty.fnParamLen()));
if (ty.fnParamLen() != 0) {
const params = try self.gpa.alloc(Type, ty.fnParamLen());
defer self.gpa.free(params);
ty.fnParamTypes(params);
for (params) |param_type| {
// Can we maybe get the source index of each param?
const val_type = try self.genValtype(.{ .node_offset = 0 }, param_type);
try writer.writeByte(val_type);
}
}
// return type
const return_type = ty.fnReturnType();
switch (return_type.tag()) {
.void, .noreturn => try leb.writeULEB128(writer, @as(u32, 0)),
else => |ret_type| {
try leb.writeULEB128(writer, @as(u32, 1));
// Can we maybe get the source index of the return type?
const val_type = try self.genValtype(.{ .node_offset = 0 }, return_type);
try writer.writeByte(val_type);
},
}
}
/// Generates the wasm bytecode for the function declaration belonging to `Context`
pub fn gen(self: *Context) InnerError!void {
assert(self.code.items.len == 0);
try self.genFunctype();
// Write instructions
// TODO: check for and handle death of instructions
const tv = self.decl.typed_value.most_recent.typed_value;
const mod_fn = blk: {
if (tv.val.castTag(.function)) |func| break :blk func.data;
if (tv.val.castTag(.extern_fn)) |ext_fn| return; // don't need codegen for extern functions
return self.fail(.{ .node_offset = 0 }, "TODO: Wasm codegen for decl type '{s}'", .{tv.ty.tag()});
};
// Reserve space to write the size after generating the code as well as space for locals count
try self.code.resize(10);
try self.genBody(mod_fn.body);
// finally, write our local types at the 'offset' position
{
leb.writeUnsignedFixed(5, self.code.items[5..10], @intCast(u32, self.locals.items.len));
// offset into 'code' section where we will put our locals types
var local_offset: usize = 10;
// emit the actual locals amount
for (self.locals.items) |local| {
var buf: [6]u8 = undefined;
leb.writeUnsignedFixed(5, buf[0..5], @as(u32, 1));
buf[5] = local;
try self.code.insertSlice(local_offset, &buf);
local_offset += 6;
}
}
const writer = self.code.writer();
try writer.writeByte(wasm.opcode(.end));
// Fill in the size of the generated code to the reserved space at the
// beginning of the buffer.
const size = self.code.items.len - 5 + self.decl.fn_link.wasm.?.idx_refs.items.len * 5;
leb.writeUnsignedFixed(5, self.code.items[0..5], @intCast(u32, size));
}
fn genInst(self: *Context, inst: *Inst) InnerError!WValue {
return switch (inst.tag) {
.add => self.genAdd(inst.castTag(.add).?),
.alloc => self.genAlloc(inst.castTag(.alloc).?),
.arg => self.genArg(inst.castTag(.arg).?),
.block => self.genBlock(inst.castTag(.block).?),
.breakpoint => self.genBreakpoint(inst.castTag(.breakpoint).?),
.br => self.genBr(inst.castTag(.br).?),
.call => self.genCall(inst.castTag(.call).?),
.cmp_eq => self.genCmp(inst.castTag(.cmp_eq).?, .eq),
.cmp_gte => self.genCmp(inst.castTag(.cmp_gte).?, .gte),
.cmp_gt => self.genCmp(inst.castTag(.cmp_gt).?, .gt),
.cmp_lte => self.genCmp(inst.castTag(.cmp_lte).?, .lte),
.cmp_lt => self.genCmp(inst.castTag(.cmp_lt).?, .lt),
.cmp_neq => self.genCmp(inst.castTag(.cmp_neq).?, .neq),
.condbr => self.genCondBr(inst.castTag(.condbr).?),
.constant => unreachable,
.dbg_stmt => WValue.none,
.load => self.genLoad(inst.castTag(.load).?),
.loop => self.genLoop(inst.castTag(.loop).?),
.not => self.genNot(inst.castTag(.not).?),
.ret => self.genRet(inst.castTag(.ret).?),
.retvoid => WValue.none,
.store => self.genStore(inst.castTag(.store).?),
.sub => self.genSub(inst.castTag(.sub).?),
.unreach => self.genUnreachable(inst.castTag(.unreach).?),
else => self.fail(inst.src, "TODO: Implement wasm inst: {s}", .{inst.tag}),
};
}
fn genBody(self: *Context, body: ir.Body) InnerError!void {
for (body.instructions) |inst| {
const result = try self.genInst(inst);
try self.values.putNoClobber(self.gpa, inst, result);
}
}
fn genRet(self: *Context, inst: *Inst.UnOp) InnerError!WValue {
// TODO: Implement tail calls
const operand = self.resolveInst(inst.operand);
try self.emitWValue(operand);
return .none;
}
fn genCall(self: *Context, inst: *Inst.Call) InnerError!WValue {
const func_inst = inst.func.castTag(.constant).?;
const func_val = inst.func.value().?;
const target = blk: {
if (func_val.castTag(.function)) |func| {
break :blk func.data.owner_decl;
} else if (func_val.castTag(.extern_fn)) |ext_fn| {
break :blk ext_fn.data;
}
return self.fail(inst.base.src, "Expected a function, but instead found type '{s}'", .{func_val.tag()});
};
for (inst.args) |arg| {
const arg_val = self.resolveInst(arg);
try self.emitWValue(arg_val);
}
try self.code.append(wasm.opcode(.call));
// The function index immediate argument will be filled in using this data
// in link.Wasm.flush().
try self.decl.fn_link.wasm.?.idx_refs.append(self.gpa, .{
.offset = @intCast(u32, self.code.items.len),
.decl = target,
});
return .none;
}
fn genAlloc(self: *Context, inst: *Inst.NoOp) InnerError!WValue {
const elem_type = inst.base.ty.elemType();
const valtype = try self.genValtype(inst.base.src, elem_type);
try self.locals.append(self.gpa, valtype);
defer self.local_index += 1;
return WValue{ .local = self.local_index };
}
fn genStore(self: *Context, inst: *Inst.BinOp) InnerError!WValue {
const writer = self.code.writer();
const lhs = self.resolveInst(inst.lhs);
const rhs = self.resolveInst(inst.rhs);
try self.emitWValue(rhs);
try writer.writeByte(wasm.opcode(.local_set));
try leb.writeULEB128(writer, lhs.local);
return .none;
}
fn genLoad(self: *Context, inst: *Inst.UnOp) InnerError!WValue {
return self.resolveInst(inst.operand);
}
fn genArg(self: *Context, inst: *Inst.Arg) InnerError!WValue {
// arguments share the index with locals
defer self.local_index += 1;
return WValue{ .local = self.local_index };
}
fn genAdd(self: *Context, inst: *Inst.BinOp) InnerError!WValue {
const lhs = self.resolveInst(inst.lhs);
const rhs = self.resolveInst(inst.rhs);
try self.emitWValue(lhs);
try self.emitWValue(rhs);
const opcode: wasm.Opcode = switch (inst.base.ty.tag()) {
.u32, .i32 => .i32_add,
.u64, .i64 => .i64_add,
.f32 => .f32_add,
.f64 => .f64_add,
else => return self.fail(inst.base.src, "TODO - Implement wasm genAdd for type '{s}'", .{inst.base.ty.tag()}),
};
try self.code.append(wasm.opcode(opcode));
return .none;
}
fn genSub(self: *Context, inst: *Inst.BinOp) InnerError!WValue {
const lhs = self.resolveInst(inst.lhs);
const rhs = self.resolveInst(inst.rhs);
try self.emitWValue(lhs);
try self.emitWValue(rhs);
const opcode: wasm.Opcode = switch (inst.base.ty.tag()) {
.u32, .i32 => .i32_sub,
.u64, .i64 => .i64_sub,
.f32 => .f32_sub,
.f64 => .f64_sub,
else => return self.fail(inst.base.src, "TODO - Implement wasm genSub for type '{s}'", .{inst.base.ty.tag()}),
};
try self.code.append(wasm.opcode(opcode));
return .none;
}
fn emitConstant(self: *Context, inst: *Inst.Constant) InnerError!void {
const writer = self.code.writer();
switch (inst.base.ty.tag()) {
.u32 => {
try writer.writeByte(wasm.opcode(.i32_const));
try leb.writeILEB128(writer, inst.val.toUnsignedInt());
},
.i32, .bool => {
try writer.writeByte(wasm.opcode(.i32_const));
try leb.writeILEB128(writer, inst.val.toSignedInt());
},
.u64 => {
try writer.writeByte(wasm.opcode(.i64_const));
try leb.writeILEB128(writer, inst.val.toUnsignedInt());
},
.i64 => {
try writer.writeByte(wasm.opcode(.i64_const));
try leb.writeILEB128(writer, inst.val.toSignedInt());
},
.f32 => {
try writer.writeByte(wasm.opcode(.f32_const));
// TODO: enforce LE byte order
try writer.writeAll(mem.asBytes(&inst.val.toFloat(f32)));
},
.f64 => {
try writer.writeByte(wasm.opcode(.f64_const));
// TODO: enforce LE byte order
try writer.writeAll(mem.asBytes(&inst.val.toFloat(f64)));
},
.void => {},
else => |ty| return self.fail(inst.base.src, "Wasm TODO: emitConstant for type {s}", .{ty}),
}
}
fn genBlock(self: *Context, block: *Inst.Block) InnerError!WValue {
const block_ty = try self.genBlockType(block.base.src, block.base.ty);
try self.startBlock(.block, block_ty, null);
block.codegen = .{
// we don't use relocs, so using `relocs` is illegal behaviour.
.relocs = undefined,
// Here we set the current block idx, so breaks know the depth to jump
// to when breaking out.
.mcv = @bitCast(AnyMCValue, WValue{ .block_idx = self.block_depth }),
};
try self.genBody(block.body);
try self.endBlock();
return .none;
}
/// appends a new wasm block to the code section and increases the `block_depth` by 1
fn startBlock(self: *Context, block_type: wasm.Opcode, valtype: u8, with_offset: ?usize) !void {
self.block_depth += 1;
if (with_offset) |offset| {
try self.code.insert(offset, wasm.opcode(block_type));
try self.code.insert(offset + 1, valtype);
} else {
try self.code.append(wasm.opcode(block_type));
try self.code.append(valtype);
}
}
/// Ends the current wasm block and decreases the `block_depth` by 1
fn endBlock(self: *Context) !void {
try self.code.append(wasm.opcode(.end));
self.block_depth -= 1;
}
fn genLoop(self: *Context, loop: *Inst.Loop) InnerError!WValue {
const loop_ty = try self.genBlockType(loop.base.src, loop.base.ty);
try self.startBlock(.loop, loop_ty, null);
try self.genBody(loop.body);
// breaking to the index of a loop block will continue the loop instead
try self.code.append(wasm.opcode(.br));
try leb.writeULEB128(self.code.writer(), @as(u32, 0));
try self.endBlock();
return .none;
}
fn genCondBr(self: *Context, condbr: *Inst.CondBr) InnerError!WValue {
const condition = self.resolveInst(condbr.condition);
const writer = self.code.writer();
// TODO: Handle death instructions for then and else body
// insert blocks at the position of `offset` so
// the condition can jump to it
const offset = switch (condition) {
.code_offset => |offset| offset,
else => blk: {
const offset = self.code.items.len;
try self.emitWValue(condition);
break :blk offset;
},
};
const block_ty = try self.genBlockType(condbr.base.src, condbr.base.ty);
try self.startBlock(.block, block_ty, offset);
// we inserted the block in front of the condition
// so now check if condition matches. If not, break outside this block
// and continue with the then codepath
try writer.writeByte(wasm.opcode(.br_if));
try leb.writeULEB128(writer, @as(u32, 0));
try self.genBody(condbr.else_body);
try self.endBlock();
// Outer block that matches the condition
try self.genBody(condbr.then_body);
return .none;
}
fn genCmp(self: *Context, inst: *Inst.BinOp, op: std.math.CompareOperator) InnerError!WValue {
const ty = inst.lhs.ty.tag();
// save offset, so potential conditions can insert blocks in front of
// the comparison that we can later jump back to
const offset = self.code.items.len;
const lhs = self.resolveInst(inst.lhs);
const rhs = self.resolveInst(inst.rhs);
try self.emitWValue(lhs);
try self.emitWValue(rhs);
const opcode_maybe: ?wasm.Opcode = switch (op) {
.lt => @as(?wasm.Opcode, switch (ty) {
.i32 => .i32_lt_s,
.u32 => .i32_lt_u,
.i64 => .i64_lt_s,
.u64 => .i64_lt_u,
.f32 => .f32_lt,
.f64 => .f64_lt,
else => null,
}),
.lte => @as(?wasm.Opcode, switch (ty) {
.i32 => .i32_le_s,
.u32 => .i32_le_u,
.i64 => .i64_le_s,
.u64 => .i64_le_u,
.f32 => .f32_le,
.f64 => .f64_le,
else => null,
}),
.eq => @as(?wasm.Opcode, switch (ty) {
.i32, .u32 => .i32_eq,
.i64, .u64 => .i64_eq,
.f32 => .f32_eq,
.f64 => .f64_eq,
else => null,
}),
.gte => @as(?wasm.Opcode, switch (ty) {
.i32 => .i32_ge_s,
.u32 => .i32_ge_u,
.i64 => .i64_ge_s,
.u64 => .i64_ge_u,
.f32 => .f32_ge,
.f64 => .f64_ge,
else => null,
}),
.gt => @as(?wasm.Opcode, switch (ty) {
.i32 => .i32_gt_s,
.u32 => .i32_gt_u,
.i64 => .i64_gt_s,
.u64 => .i64_gt_u,
.f32 => .f32_gt,
.f64 => .f64_gt,
else => null,
}),
.neq => @as(?wasm.Opcode, switch (ty) {
.i32, .u32 => .i32_ne,
.i64, .u64 => .i64_ne,
.f32 => .f32_ne,
.f64 => .f64_ne,
else => null,
}),
};
const opcode = opcode_maybe orelse
return self.fail(inst.base.src, "TODO - Wasm genCmp for type '{s}' and operator '{s}'", .{ ty, @tagName(op) });
try self.code.append(wasm.opcode(opcode));
return WValue{ .code_offset = offset };
}
fn genBr(self: *Context, br: *Inst.Br) InnerError!WValue {
// if operand has codegen bits we should break with a value
if (br.operand.ty.hasCodeGenBits()) {
const operand = self.resolveInst(br.operand);
try self.emitWValue(operand);
}
// every block contains a `WValue` with its block index.
// We then determine how far we have to jump to it by substracting it from current block depth
const wvalue = @bitCast(WValue, br.block.codegen.mcv);
const idx: u32 = self.block_depth - wvalue.block_idx;
const writer = self.code.writer();
try writer.writeByte(wasm.opcode(.br));
try leb.writeULEB128(writer, idx);
return .none;
}
fn genNot(self: *Context, not: *Inst.UnOp) InnerError!WValue {
const offset = self.code.items.len;
const operand = self.resolveInst(not.operand);
try self.emitWValue(operand);
// wasm does not have booleans nor the `not` instruction, therefore compare with 0
// to create the same logic
const writer = self.code.writer();
try writer.writeByte(wasm.opcode(.i32_const));
try leb.writeILEB128(writer, @as(i32, 0));
try writer.writeByte(wasm.opcode(.i32_eq));
return WValue{ .code_offset = offset };
}
fn genBreakpoint(self: *Context, breakpoint: *Inst.NoOp) InnerError!WValue {
// unsupported by wasm itself. Can be implemented once we support DWARF
// for wasm
return .none;
}
fn genUnreachable(self: *Context, unreach: *Inst.NoOp) InnerError!WValue {
try self.code.append(wasm.opcode(.@"unreachable"));
return .none;
}
};