From 8abdebecdca3a905099fa17f2497e8bf5f918e8a Mon Sep 17 00:00:00 2001 From: Andrew Kelley Date: Mon, 13 Jan 2025 15:19:24 -0800 Subject: [PATCH] wasm linker: implement `@tagName` for sparse enums --- lib/std/wasm.zig | 13 ++++- src/Value.zig | 8 +-- src/arch/wasm/CodeGen.zig | 55 ++++++++++----------- src/arch/wasm/Emit.zig | 2 +- src/arch/wasm/Mir.zig | 2 +- src/link/Wasm.zig | 2 +- src/link/Wasm/Flush.zig | 101 ++++++++++++++++++++++++++++++++++---- 7 files changed, 136 insertions(+), 47 deletions(-) diff --git a/lib/std/wasm.zig b/lib/std/wasm.zig index 7764d40aff..403cc2f3b9 100644 --- a/lib/std/wasm.zig +++ b/lib/std/wasm.zig @@ -655,7 +655,18 @@ pub const function_type: u8 = 0x60; pub const result_type: u8 = 0x40; /// Represents a block which will not return a value -pub const block_empty: u8 = 0x40; +pub const BlockType = enum(u8) { + empty = 0x40, + i32 = 0x7F, + i64 = 0x7E, + f32 = 0x7D, + f64 = 0x7C, + v128 = 0x7B, + + pub fn fromValtype(valtype: Valtype) BlockType { + return @enumFromInt(@intFromEnum(valtype)); + } +}; // binary constants pub const magic = [_]u8{ 0x00, 0x61, 0x73, 0x6D }; // \0asm diff --git a/src/Value.zig b/src/Value.zig index 3048691a55..3081a28b9b 100644 --- a/src/Value.zig +++ b/src/Value.zig @@ -241,12 +241,12 @@ pub fn getVariable(val: Value, mod: *Zcu) ?InternPool.Key.Variable { /// If the value fits in a u64, return it, otherwise null. /// Asserts not undefined. -pub fn getUnsignedInt(val: Value, zcu: *Zcu) ?u64 { +pub fn getUnsignedInt(val: Value, zcu: *const Zcu) ?u64 { return getUnsignedIntInner(val, .normal, zcu, {}) catch unreachable; } /// Asserts the value is an integer and it fits in a u64 -pub fn toUnsignedInt(val: Value, zcu: *Zcu) u64 { +pub fn toUnsignedInt(val: Value, zcu: *const Zcu) u64 { return getUnsignedInt(val, zcu).?; } @@ -259,7 +259,7 @@ pub fn getUnsignedIntSema(val: Value, pt: Zcu.PerThread) !?u64 { pub fn getUnsignedIntInner( val: Value, comptime strat: ResolveStrat, - zcu: *Zcu, + zcu: strat.ZcuPtr(), tid: strat.Tid(), ) !?u64 { return switch (val.toIntern()) { @@ -304,7 +304,7 @@ pub fn toUnsignedIntSema(val: Value, pt: Zcu.PerThread) !u64 { } /// Asserts the value is an integer and it fits in a i64 -pub fn toSignedInt(val: Value, zcu: *Zcu) i64 { +pub fn toSignedInt(val: Value, zcu: *const Zcu) i64 { return switch (val.toIntern()) { .bool_false => 0, .bool_true => 1, diff --git a/src/arch/wasm/CodeGen.zig b/src/arch/wasm/CodeGen.zig index 1ba0f371a4..e44380d571 100644 --- a/src/arch/wasm/CodeGen.zig +++ b/src/arch/wasm/CodeGen.zig @@ -80,7 +80,7 @@ start_mir_extra_off: u32, start_locals_off: u32, /// List of all locals' types generated throughout this declaration /// used to emit locals count at start of 'code' section. -locals: *std.ArrayListUnmanaged(u8), +locals: *std.ArrayListUnmanaged(std.wasm.Valtype), /// When a function is executing, we store the the current stack pointer's value within this local. /// This value is then used to restore the stack pointer to the original value at the return of the function. initial_stack_value: WValue = .none, @@ -210,7 +210,7 @@ const WValue = union(enum) { if (local_value < reserved + 2) return; // reserved locals may never be re-used. Also accounts for 2 stack locals. const index = local_value - reserved; - const valtype: std.wasm.Valtype = @enumFromInt(gen.locals.items[gen.start_locals_off + index]); + const valtype = gen.locals.items[gen.start_locals_off + index]; switch (valtype) { .i32 => gen.free_locals_i32.append(gen.gpa, local_value) catch return, // It's ok to fail any of those, a new local can be allocated instead .i64 => gen.free_locals_i64.append(gen.gpa, local_value) catch return, @@ -995,18 +995,13 @@ pub fn typeToValtype(ty: Type, zcu: *const Zcu, target: *const std.Target) std.w }; } -/// Using a given `Type`, returns the byte representation of its wasm value type -fn genValtype(ty: Type, zcu: *const Zcu, target: *const std.Target) u8 { - return @intFromEnum(typeToValtype(ty, zcu, target)); -} - /// Using a given `Type`, returns the corresponding wasm value type -/// Differently from `genValtype` this also allows `void` to create a block +/// Differently from `typeToValtype` this also allows `void` to create a block /// with no return type -fn genBlockType(ty: Type, zcu: *const Zcu, target: *const std.Target) u8 { +fn genBlockType(ty: Type, zcu: *const Zcu, target: *const std.Target) std.wasm.BlockType { return switch (ty.ip_index) { - .void_type, .noreturn_type => std.wasm.block_empty, - else => genValtype(ty, zcu, target), + .void_type, .noreturn_type => .empty, + else => .fromValtype(typeToValtype(ty, zcu, target)), }; } @@ -1145,7 +1140,7 @@ fn allocLocal(cg: *CodeGen, ty: Type) InnerError!WValue { /// to use a zero-initialized local. fn ensureAllocLocal(cg: *CodeGen, ty: Type) InnerError!WValue { const zcu = cg.pt.zcu; - try cg.locals.append(cg.gpa, genValtype(ty, zcu, cg.target)); + try cg.locals.append(cg.gpa, typeToValtype(ty, zcu, cg.target)); const initial_index = cg.local_index; cg.local_index += 1; return .{ .local = .{ .value = initial_index, .references = 1 } }; @@ -1197,7 +1192,7 @@ pub const Function = extern struct { std.leb.writeUleb128(code.fixedWriter(), @as(u32, @intCast(locals.len))) catch unreachable; for (locals) |local| { std.leb.writeUleb128(code.fixedWriter(), @as(u32, 1)) catch unreachable; - code.appendAssumeCapacity(local); + code.appendAssumeCapacity(@intFromEnum(local)); } // Stack management section of function prologue. @@ -1651,8 +1646,8 @@ fn memcpy(cg: *CodeGen, dst: WValue, src: WValue, len: WValue) !void { try cg.addLocal(.local_set, offset.local.value); // outer block to jump to when loop is done - try cg.startBlock(.block, std.wasm.block_empty); - try cg.startBlock(.loop, std.wasm.block_empty); + try cg.startBlock(.block, .empty); + try cg.startBlock(.loop, .empty); // loop condition (offset == length -> break) { @@ -3387,12 +3382,12 @@ fn lowerBlock(cg: *CodeGen, inst: Air.Inst.Index, block_ty: Type, body: []const const wasm_block_ty = genBlockType(block_ty, zcu, cg.target); // if wasm_block_ty is non-empty, we create a register to store the temporary value - const block_result: WValue = if (wasm_block_ty != std.wasm.block_empty) blk: { + const block_result: WValue = if (wasm_block_ty != .empty) blk: { const ty: Type = if (isByRef(block_ty, zcu, cg.target)) Type.u32 else block_ty; break :blk try cg.ensureAllocLocal(ty); // make sure it's a clean local as it may never get overwritten } else .none; - try cg.startBlock(.block, std.wasm.block_empty); + try cg.startBlock(.block, .empty); // Here we set the current block idx, so breaks know the depth to jump // to when breaking out. try cg.blocks.putNoClobber(cg.gpa, inst, .{ @@ -3410,11 +3405,11 @@ fn lowerBlock(cg: *CodeGen, inst: Air.Inst.Index, block_ty: Type, body: []const } /// appends a new wasm block to the code section and increases the `block_depth` by 1 -fn startBlock(cg: *CodeGen, block_tag: std.wasm.Opcode, valtype: u8) !void { +fn startBlock(cg: *CodeGen, block_tag: std.wasm.Opcode, block_type: std.wasm.BlockType) !void { cg.block_depth += 1; try cg.addInst(.{ .tag = Mir.Inst.Tag.fromOpcode(block_tag), - .data = .{ .block_type = valtype }, + .data = .{ .block_type = block_type }, }); } @@ -3431,7 +3426,7 @@ fn airLoop(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void { // result type of loop is always 'noreturn', meaning we can always // emit the wasm type 'block_empty'. - try cg.startBlock(.loop, std.wasm.block_empty); + try cg.startBlock(.loop, .empty); try cg.loops.putNoClobber(cg.gpa, inst, cg.block_depth); defer assert(cg.loops.remove(inst)); @@ -3451,7 +3446,7 @@ fn airCondBr(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void { const liveness_condbr = cg.liveness.getCondBr(inst); // result type is always noreturn, so use `block_empty` as type. - try cg.startBlock(.block, std.wasm.block_empty); + try cg.startBlock(.block, .empty); // emit the conditional value try cg.emitWValue(condition); @@ -3940,7 +3935,7 @@ fn airSwitchBr(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void { const pt = cg.pt; const zcu = pt.zcu; // result type is always 'noreturn' - const blocktype = std.wasm.block_empty; + const blocktype: std.wasm.BlockType = .empty; const switch_br = cg.air.unwrapSwitch(inst); const target = try cg.resolveInst(switch_br.operand); const target_ty = cg.typeOf(switch_br.operand); @@ -4832,8 +4827,8 @@ fn memset(cg: *CodeGen, elem_ty: Type, ptr: WValue, len: WValue, value: WValue) try cg.addLocal(.local_set, end_ptr.local.value); // outer block to jump to when loop is done - try cg.startBlock(.block, std.wasm.block_empty); - try cg.startBlock(.loop, std.wasm.block_empty); + try cg.startBlock(.block, .empty); + try cg.startBlock(.loop, .empty); // check for condition for loop end try cg.emitWValue(new_ptr); @@ -5410,7 +5405,7 @@ fn cmpOptionals(cg: *CodeGen, lhs: WValue, rhs: WValue, operand_ty: Type, op: st var result = try cg.ensureAllocLocal(Type.i32); defer result.free(cg); - try cg.startBlock(.block, std.wasm.block_empty); + try cg.startBlock(.block, .empty); _ = try cg.isNull(lhs, operand_ty, .i32_eq); _ = try cg.isNull(rhs, operand_ty, .i32_eq); try cg.addTag(.i32_ne); // inverse so we can exit early @@ -6420,7 +6415,7 @@ fn lowerTry( if (!err_union_ty.errorUnionSet(zcu).errorSetIsEmpty(zcu)) { // Block we can jump out of when error is not set - try cg.startBlock(.block, std.wasm.block_empty); + try cg.startBlock(.block, .empty); // check if the error tag is set for the error union. try cg.emitWValue(err_union); @@ -7105,11 +7100,11 @@ fn airErrorSetHasValue(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void { } // start block for 'true' branch - try cg.startBlock(.block, std.wasm.block_empty); + try cg.startBlock(.block, .empty); // start block for 'false' branch - try cg.startBlock(.block, std.wasm.block_empty); + try cg.startBlock(.block, .empty); // block for the jump table itself - try cg.startBlock(.block, std.wasm.block_empty); + try cg.startBlock(.block, .empty); // lower operand to determine jump table target try cg.emitWValue(operand); @@ -7274,7 +7269,7 @@ fn airAtomicRmw(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void { const value = try tmp.toLocal(cg, ty); // create a loop to cmpxchg the new value - try cg.startBlock(.loop, std.wasm.block_empty); + try cg.startBlock(.loop, .empty); try cg.emitWValue(ptr); try cg.emitWValue(value); diff --git a/src/arch/wasm/Emit.zig b/src/arch/wasm/Emit.zig index 008c6fefed..0a50a67f03 100644 --- a/src/arch/wasm/Emit.zig +++ b/src/arch/wasm/Emit.zig @@ -43,7 +43,7 @@ pub fn lowerToCode(emit: *Emit) Error!void { const block_type = datas[inst].block_type; try code.ensureUnusedCapacity(gpa, 2); code.appendAssumeCapacity(@intFromEnum(tags[inst])); - code.appendAssumeCapacity(block_type); + code.appendAssumeCapacity(@intFromEnum(block_type)); inst += 1; continue :loop tags[inst]; diff --git a/src/arch/wasm/Mir.zig b/src/arch/wasm/Mir.zig index 3ff6b8072b..5c8c558926 100644 --- a/src/arch/wasm/Mir.zig +++ b/src/arch/wasm/Mir.zig @@ -588,7 +588,7 @@ pub const Inst = struct { /// Uses no additional data tag: void, /// Contains the result type of a block - block_type: u8, + block_type: std.wasm.BlockType, /// Label: Each structured control instruction introduces an implicit label. /// Labels are targets for branch instructions that reference them with /// label indices. Unlike with other index spaces, indexing of labels diff --git a/src/link/Wasm.zig b/src/link/Wasm.zig index 6b6843692a..5bc88a63d0 100644 --- a/src/link/Wasm.zig +++ b/src/link/Wasm.zig @@ -283,7 +283,7 @@ mir_instructions: std.MultiArrayList(Mir.Inst) = .{}, /// Corresponds to `mir_instructions`. mir_extra: std.ArrayListUnmanaged(u32) = .empty, /// All local types for all Zcu functions. -all_zcu_locals: std.ArrayListUnmanaged(u8) = .empty, +all_zcu_locals: std.ArrayListUnmanaged(std.wasm.Valtype) = .empty, params_scratch: std.ArrayListUnmanaged(std.wasm.Valtype) = .empty, returns_scratch: std.ArrayListUnmanaged(std.wasm.Valtype) = .empty, diff --git a/src/link/Wasm/Flush.zig b/src/link/Wasm/Flush.zig index 41f1af26eb..83c2df6053 100644 --- a/src/link/Wasm/Flush.zig +++ b/src/link/Wasm/Flush.zig @@ -780,9 +780,10 @@ pub fn finish(f: *Flush, wasm: *Wasm) !void { const zcu = comp.zcu.?; const ip = &zcu.intern_pool; - switch (ip.indexToKey(i.key(wasm).*)) { + const ip_index = i.key(wasm).*; + switch (ip.indexToKey(ip_index)) { .enum_type => { - try emitTagNameFunction(gpa, binary_bytes, f.data_segments.get(.__zig_tag_name_table).?, i.value(wasm).tag_name.table_index); + try emitTagNameFunction(wasm, binary_bytes, f.data_segments.get(.__zig_tag_name_table).?, i.value(wasm).tag_name.table_index, ip_index); }, else => try i.value(wasm).function.lower(wasm, binary_bytes), } @@ -1772,13 +1773,13 @@ fn emitInitMemoryFunction( // destination blocks // based on values we jump to corresponding label binary_bytes.appendAssumeCapacity(@intFromEnum(std.wasm.Opcode.block)); // $drop - binary_bytes.appendAssumeCapacity(std.wasm.block_empty); // block type + binary_bytes.appendAssumeCapacity(@intFromEnum(std.wasm.BlockType.empty)); binary_bytes.appendAssumeCapacity(@intFromEnum(std.wasm.Opcode.block)); // $wait - binary_bytes.appendAssumeCapacity(std.wasm.block_empty); // block type + binary_bytes.appendAssumeCapacity(@intFromEnum(std.wasm.BlockType.empty)); binary_bytes.appendAssumeCapacity(@intFromEnum(std.wasm.Opcode.block)); // $init - binary_bytes.appendAssumeCapacity(std.wasm.block_empty); // block type + binary_bytes.appendAssumeCapacity(@intFromEnum(std.wasm.BlockType.empty)); // atomically check appendReservedI32Const(binary_bytes, flag_address); @@ -1898,18 +1899,25 @@ fn emitInitMemoryFunction( } fn emitTagNameFunction( - gpa: Allocator, + wasm: *Wasm, code: *std.ArrayListUnmanaged(u8), table_base_addr: u32, table_index: u32, -) Allocator.Error!void { + enum_type_ip: InternPool.Index, +) !void { + const comp = wasm.base.comp; + const gpa = comp.gpa; + const diags = &comp.link_diags; + const zcu = comp.zcu.?; + const ip = &zcu.intern_pool; + const enum_type = ip.loadEnumType(enum_type_ip); + try code.ensureUnusedCapacity(gpa, 7 * 5 + 6 + 1 * 6); appendReservedUleb32(code, 0); // no locals const slice_abi_size = 8; const encoded_alignment = @ctz(@as(u32, 4)); - const all_tag_values_autoassigned = true; - if (all_tag_values_autoassigned) { + if (enum_type.tag_mode == .auto) { // Then it's a direct table lookup. code.appendAssumeCapacity(@intFromEnum(std.wasm.Opcode.local_get)); appendReservedUleb32(code, 0); @@ -1924,6 +1932,75 @@ fn emitTagNameFunction( appendReservedUleb32(code, encoded_alignment); appendReservedUleb32(code, table_base_addr + table_index * 8); + code.appendAssumeCapacity(@intFromEnum(std.wasm.Opcode.i64_store)); + appendReservedUleb32(code, encoded_alignment); + appendReservedUleb32(code, 0); + } else { + const int_info = Zcu.Type.intInfo(.fromInterned(enum_type.tag_ty), zcu); + const outer_block_type: std.wasm.BlockType = switch (int_info.bits) { + 0...32 => .i32, + 33...64 => .i64, + else => return diags.fail("wasm linker does not yet implement @tagName for sparse enums with more than 64 bit integer tag types", .{}), + }; + + code.appendAssumeCapacity(@intFromEnum(std.wasm.Opcode.local_get)); + appendReservedUleb32(code, 0); + + // Outer block that computes table offset. + code.appendAssumeCapacity(@intFromEnum(std.wasm.Opcode.block)); + code.appendAssumeCapacity(@intFromEnum(outer_block_type)); + + for (enum_type.values.get(ip), 0..) |tag_value, tag_index| { + // block for this if case + code.appendAssumeCapacity(@intFromEnum(std.wasm.Opcode.block)); + code.appendAssumeCapacity(@intFromEnum(std.wasm.BlockType.empty)); + + // Tag value whose name should be returned. + code.appendAssumeCapacity(@intFromEnum(std.wasm.Opcode.local_get)); + appendReservedUleb32(code, 1); + + const val: Zcu.Value = .fromInterned(tag_value); + switch (outer_block_type) { + .i32 => { + const x: u32 = switch (int_info.signedness) { + .signed => @bitCast(@as(i32, @intCast(val.toSignedInt(zcu)))), + .unsigned => @intCast(val.toUnsignedInt(zcu)), + }; + appendReservedI32Const(code, x); + code.appendAssumeCapacity(@intFromEnum(std.wasm.Opcode.i32_ne)); + }, + .i64 => { + const x: u64 = switch (int_info.signedness) { + .signed => @bitCast(val.toSignedInt(zcu)), + .unsigned => val.toUnsignedInt(zcu), + }; + appendReservedI64Const(code, x); + code.appendAssumeCapacity(@intFromEnum(std.wasm.Opcode.i64_ne)); + }, + else => unreachable, + } + + // if they're not equal, break out of current branch + code.appendAssumeCapacity(@intFromEnum(std.wasm.Opcode.br_if)); + appendReservedUleb32(code, 0); + + // Put the table offset of the result on the stack. + appendReservedI32Const(code, @intCast(tag_index * slice_abi_size)); + + // break outside blocks + code.appendAssumeCapacity(@intFromEnum(std.wasm.Opcode.br)); + appendReservedUleb32(code, 1); + + // end the block for this case + code.appendAssumeCapacity(@intFromEnum(std.wasm.Opcode.end)); + } + code.appendAssumeCapacity(@intFromEnum(std.wasm.Opcode.@"unreachable")); + code.appendAssumeCapacity(@intFromEnum(std.wasm.Opcode.end)); + + code.appendAssumeCapacity(@intFromEnum(std.wasm.Opcode.i64_load)); + appendReservedUleb32(code, encoded_alignment); + appendReservedUleb32(code, table_base_addr + table_index * 8); + code.appendAssumeCapacity(@intFromEnum(std.wasm.Opcode.i64_store)); appendReservedUleb32(code, encoded_alignment); appendReservedUleb32(code, 0); @@ -1939,6 +2016,12 @@ fn appendReservedI32Const(bytes: *std.ArrayListUnmanaged(u8), val: u32) void { leb.writeIleb128(bytes.fixedWriter(), @as(i32, @bitCast(val))) catch unreachable; } +/// Writes an unsigned 64-bit integer as a LEB128-encoded 'i64.const' value. +fn appendReservedI64Const(bytes: *std.ArrayListUnmanaged(u8), val: u64) void { + bytes.appendAssumeCapacity(@intFromEnum(std.wasm.Opcode.i64_const)); + leb.writeIleb128(bytes.fixedWriter(), @as(i64, @bitCast(val))) catch unreachable; +} + fn appendReservedUleb32(bytes: *std.ArrayListUnmanaged(u8), val: u32) void { leb.writeUleb128(bytes.fixedWriter(), val) catch unreachable; }