From c6d654f73bbe80ed3653be6a31ddcaa4772a4fe2 Mon Sep 17 00:00:00 2001
From: Luuk de Gram <luuk@degram.dev>
Date: Thu, 8 Dec 2022 21:18:11 +0100
Subject: [PATCH] wasm: implement the 'splat' instruction part 1

This implements `airSplat` for the native WebAssembly backend when
the features 'simd128' or 'relaxed-simd' are enabled. The commit
supports splat where the value lives in the linear memory segment,
as well as on the stack. This saves a lot of instruction cost.
When it detects the element type is not 8, 16, 32 or 64 bits,
the backend will instead use the same strategy as if the features
where disabled.
---
 lib/std/wasm.zig          |  4 +--
 src/arch/wasm/CodeGen.zig | 51 +++++++++++++++++++++++++++++++++++++--
 src/arch/wasm/Emit.zig    | 15 +++++++++++-
 3 files changed, 65 insertions(+), 5 deletions(-)

diff --git a/lib/std/wasm.zig b/lib/std/wasm.zig
index 6601e92973..0e61b3fb30 100644
--- a/lib/std/wasm.zig
+++ b/lib/std/wasm.zig
@@ -256,13 +256,13 @@ pub const SimdOpcode = enum(u32) {
     v128_const = 0x0C,
     i8x16_shuffle = 0x0D,
     i8x16_swizzle = 0x0E,
-    @"8x16_splat" = 0x0F,
+    i8x16_splat = 0x0F,
     i16x8_splat = 0x10,
     i32x4_splat = 0x11,
     i64x2_splat = 0x12,
     f32x4_splat = 0x13,
     f64x2_splat = 0x14,
-    @"8x16_extract_lane_s" = 0x15,
+    i8x16_extract_lane_s = 0x15,
     i8x16_extract_lane_u = 0x16,
     i8x16_replace_lane = 0x17,
     i16x8_extract_lane_s = 0x18,
diff --git a/src/arch/wasm/CodeGen.zig b/src/arch/wasm/CodeGen.zig
index 0164b17c0f..5e69860fbc 100644
--- a/src/arch/wasm/CodeGen.zig
+++ b/src/arch/wasm/CodeGen.zig
@@ -4430,9 +4430,56 @@ fn airIntToFloat(func: *CodeGen, inst: Air.Inst.Index) InnerError!void {
 fn airSplat(func: *CodeGen, inst: Air.Inst.Index) InnerError!void {
     const ty_op = func.air.instructions.items(.data)[inst].ty_op;
     const operand = try func.resolveInst(ty_op.operand);
+    const ty = func.air.typeOfIndex(inst);
+    const elem_ty = ty.childType();
 
-    _ = operand;
-    return func.fail("TODO: Implement wasm airSplat", .{});
+    const result = try func.allocLocal(ty);
+    if (determineSimdStoreStrategy(ty, func.target) == .direct) blk: {
+        switch (operand) {
+            // when the operand lives in the linear memory section, we can directly
+            // load and splat the value at once. Meaning we do not first have to load
+            // the scalar value onto the stack.
+            .stack_offset, .memory, .memory_offset => {
+                const opcode = switch (elem_ty.bitSize(func.target)) {
+                    8 => std.wasm.simdOpcode(.v128_load8_splat),
+                    16 => std.wasm.simdOpcode(.v128_load16_splat),
+                    32 => std.wasm.simdOpcode(.v128_load32_splat),
+                    64 => std.wasm.simdOpcode(.v128_load64_splat),
+                    else => break :blk, // Cannot make use of simd-instructions
+                };
+                try func.emitWValue(operand);
+                // TODO: Add helper functions for simd opcodes
+                const extra_index = @intCast(u32, func.mir_extra.items.len);
+                // stores as := opcode, offset, alignment (opcode::memarg)
+                try func.mir_extra.appendSlice(func.gpa, &[_]u32{
+                    opcode,
+                    operand.offset(),
+                    elem_ty.abiAlignment(func.target),
+                });
+                try func.addInst(.{ .tag = .simd, .data = .{ .payload = extra_index } });
+                try func.addLabel(.local_set, result.local.value);
+                return func.finishAir(inst, result, &.{ty_op.operand});
+            },
+            .local => {
+                const opcode = switch (elem_ty.bitSize(func.target)) {
+                    8 => std.wasm.simdOpcode(.i8x16_splat),
+                    16 => std.wasm.simdOpcode(.i16x8_splat),
+                    32 => if (elem_ty.isInt()) std.wasm.simdOpcode(.i32x4_splat) else std.wasm.simdOpcode(.f32x4_splat),
+                    64 => if (elem_ty.isInt()) std.wasm.simdOpcode(.i64x2_splat) else std.wasm.simdOpcode(.f64x2_splat),
+                    else => break :blk, // Cannot make use of simd-instructions
+                };
+                try func.emitWValue(operand);
+                const extra_index = @intCast(u32, func.mir_extra.items.len);
+                try func.mir_extra.append(func.gpa, opcode);
+                try func.addInst(.{ .tag = .simd, .data = .{ .payload = extra_index } });
+                try func.addLabel(.local_set, result.local.value);
+                return func.finishAir(inst, result, &.{ty_op.operand});
+            },
+            else => unreachable,
+        }
+    }
+
+    return func.fail("TODO: Implement wasm airSplat unrolled", .{});
 }
 
 fn airSelect(func: *CodeGen, inst: Air.Inst.Index) InnerError!void {
diff --git a/src/arch/wasm/Emit.zig b/src/arch/wasm/Emit.zig
index ae371458ff..45e02e0986 100644
--- a/src/arch/wasm/Emit.zig
+++ b/src/arch/wasm/Emit.zig
@@ -437,7 +437,13 @@ fn emitSimd(emit: *Emit, inst: Mir.Inst.Index) !void {
     try emit.code.append(0xFD);
     try leb128.writeULEB128(writer, opcode);
     switch (@intToEnum(std.wasm.SimdOpcode, opcode)) {
-        .v128_store, .v128_load => {
+        .v128_store,
+        .v128_load,
+        .v128_load8_splat,
+        .v128_load16_splat,
+        .v128_load32_splat,
+        .v128_load64_splat,
+        => {
             const mem_arg = emit.mir.extraData(Mir.MemArg, extra_index + 1).data;
             try encodeMemArg(mem_arg, writer);
         },
@@ -445,6 +451,13 @@ fn emitSimd(emit: *Emit, inst: Mir.Inst.Index) !void {
             const simd_value = emit.mir.extra[extra_index + 1 ..][0..4];
             try writer.writeAll(std.mem.asBytes(simd_value));
         },
+        .i8x16_splat,
+        .i16x8_splat,
+        .i32x4_splat,
+        .i64x2_splat,
+        .f32x4_splat,
+        .f64x2_splat,
+        => {}, // opcode already written
         else => |tag| return emit.fail("TODO: Implement simd instruction: {s}\n", .{@tagName(tag)}),
     }
 }