From d5511b35a9a2db91643405ec255ced6dce208387 Mon Sep 17 00:00:00 2001 From: Auguste Rame <19855629+SuperAuguste@users.noreply.github.com> Date: Fri, 7 Apr 2023 19:07:48 -0400 Subject: [PATCH 1/5] Make airShuffle work for unrolled --- src/arch/wasm/CodeGen.zig | 44 ++++++++++++++++++++++++++++++++++----- 1 file changed, 39 insertions(+), 5 deletions(-) diff --git a/src/arch/wasm/CodeGen.zig b/src/arch/wasm/CodeGen.zig index d4206bb294..83b38e08e8 100644 --- a/src/arch/wasm/CodeGen.zig +++ b/src/arch/wasm/CodeGen.zig @@ -4670,11 +4670,40 @@ fn airSelect(func: *CodeGen, inst: Air.Inst.Index) InnerError!void { } fn airShuffle(func: *CodeGen, inst: Air.Inst.Index) InnerError!void { - const ty_op = func.air.instructions.items(.data)[inst].ty_op; - const operand = try func.resolveInst(ty_op.operand); + const inst_ty = func.air.typeOfIndex(inst); + const ty_pl = func.air.instructions.items(.data)[inst].ty_pl; + const extra = func.air.extraData(Air.Shuffle, ty_pl.payload).data; - _ = operand; - return func.fail("TODO: Implement wasm airShuffle", .{}); + const a = try func.resolveInst(extra.a); + const b = try func.resolveInst(extra.b); + const mask = func.air.values[extra.mask]; + const mask_len = extra.mask_len; + + const child_ty = inst_ty.childType(); + const elem_size = child_ty.abiSize(func.target); + + if (func.liveness.isUnused(inst)) { + return func.finishAir(inst, .none, &.{ extra.a, extra.b }); + } + + const module = func.bin_file.base.options.module.?; + const result = try func.allocStack(inst_ty); + + for (0..mask_len) |index| { + var buf: Value.ElemValueBuffer = undefined; + const value = mask.elemValueBuffer(module, index, &buf).toSignedInt(func.target); + + try func.emitWValue(result); + + const loaded = if (value >= 0) + try func.load(a, child_ty, @intCast(u32, @intCast(i64, elem_size) * value)) + else + try func.load(b, child_ty, @intCast(u32, @intCast(i64, elem_size) * ~value)); + + try func.store(.stack, loaded, child_ty, result.stack_offset.value + @intCast(u32, elem_size) * @intCast(u32, index)); + } + + return func.finishAir(inst, result, &.{ extra.a, extra.b }); } fn airReduce(func: *CodeGen, inst: Air.Inst.Index) InnerError!void { @@ -5125,7 +5154,12 @@ fn airMemcpy(func: *CodeGen, inst: Air.Inst.Index) InnerError!void { } fn airRetAddr(func: *CodeGen, inst: Air.Inst.Index) InnerError!void { - func.finishAir(inst, .{ .imm32 = 0 }, &.{}); + // TODO: Implement this properly once stack serialization is solved + func.finishAir(inst, switch (func.arch()) { + .wasm32 => .{ .imm32 = 0 }, + .wasm64 => .{ .imm64 = 0 }, + else => unreachable, + }, &.{}); } fn airPopcount(func: *CodeGen, inst: Air.Inst.Index) InnerError!void { From 1e310d335078c441c8b37bc220715aab938538e0 Mon Sep 17 00:00:00 2001 From: Auguste Rame <19855629+SuperAuguste@users.noreply.github.com> Date: Fri, 7 Apr 2023 20:35:15 -0400 Subject: [PATCH 2/5] Finish shuffle, fix arrayElemVal for vectors --- src/arch/wasm/CodeGen.zig | 80 ++++++++++++++++++++++++++++++--------- src/arch/wasm/Emit.zig | 4 +- 2 files changed, 66 insertions(+), 18 deletions(-) diff --git a/src/arch/wasm/CodeGen.zig b/src/arch/wasm/CodeGen.zig index 83b38e08e8..c5376792a7 100644 --- a/src/arch/wasm/CodeGen.zig +++ b/src/arch/wasm/CodeGen.zig @@ -4518,11 +4518,27 @@ fn airArrayElemVal(func: *CodeGen, inst: Air.Inst.Index) InnerError!void { const elem_ty = array_ty.childType(); const elem_size = elem_ty.abiSize(func.target); - try func.lowerToStack(array); - try func.emitWValue(index); - try func.addImm32(@bitCast(i32, @intCast(u32, elem_size))); - try func.addTag(.i32_mul); - try func.addTag(.i32_add); + if (isByRef(array_ty, func.target)) { + try func.lowerToStack(array); + try func.emitWValue(index); + try func.addImm32(@bitCast(i32, @intCast(u32, elem_size))); + try func.addTag(.i32_mul); + try func.addTag(.i32_add); + } else { + std.debug.assert(array_ty.zigTypeTag() == .Vector); + + // TODO: Check if index is constant; if so, use a lane extract + + var stack_vec = try func.allocStack(array_ty); + try func.store(stack_vec, array, array_ty, 0); + + // Is a non-unrolled vector (v128) + try func.lowerToStack(stack_vec); + try func.emitWValue(index); + try func.addImm32(@bitCast(i32, @intCast(u32, elem_size))); + try func.addTag(.i32_mul); + try func.addTag(.i32_add); + } const elem_result = val: { var result = try func.allocLocal(Type.usize); @@ -4687,23 +4703,53 @@ fn airShuffle(func: *CodeGen, inst: Air.Inst.Index) InnerError!void { } const module = func.bin_file.base.options.module.?; - const result = try func.allocStack(inst_ty); + // TODO: One of them could be by ref; handle in loop + if (isByRef(func.air.typeOf(extra.a), func.target) or isByRef(inst_ty, func.target)) { + const result = try func.allocStack(inst_ty); - for (0..mask_len) |index| { - var buf: Value.ElemValueBuffer = undefined; - const value = mask.elemValueBuffer(module, index, &buf).toSignedInt(func.target); + for (0..mask_len) |index| { + var buf: Value.ElemValueBuffer = undefined; + const value = mask.elemValueBuffer(module, index, &buf).toSignedInt(func.target); - try func.emitWValue(result); + try func.emitWValue(result); - const loaded = if (value >= 0) - try func.load(a, child_ty, @intCast(u32, @intCast(i64, elem_size) * value)) - else - try func.load(b, child_ty, @intCast(u32, @intCast(i64, elem_size) * ~value)); + const loaded = if (value >= 0) + try func.load(a, child_ty, @intCast(u32, @intCast(i64, elem_size) * value)) + else + try func.load(b, child_ty, @intCast(u32, @intCast(i64, elem_size) * ~value)); - try func.store(.stack, loaded, child_ty, result.stack_offset.value + @intCast(u32, elem_size) * @intCast(u32, index)); + try func.store(.stack, loaded, child_ty, result.stack_offset.value + @intCast(u32, elem_size) * @intCast(u32, index)); + } + + return func.finishAir(inst, result, &.{ extra.a, extra.b }); + } else { + var operands = [_]u32{ + std.wasm.simdOpcode(.i8x16_shuffle), + } ++ [1]u32{undefined} ** 4; + + var lanes = std.mem.asBytes(operands[1..]); + for (0..mask_len) |index| { + var buf: Value.ElemValueBuffer = undefined; + const mask_elem = mask.elemValueBuffer(module, index, &buf).toSignedInt(func.target); + const base_index = if (mask_elem >= 0) + @intCast(u8, @intCast(i64, elem_size) * mask_elem) + else + 16 + @intCast(u8, @intCast(i64, elem_size) * ~mask_elem); + + for (0..elem_size) |byte_offset| { + lanes[index * elem_size + byte_offset] = base_index + @intCast(u8, byte_offset); + } + } + + try func.emitWValue(a); + try func.emitWValue(b); + + const extra_index = @intCast(u32, func.mir_extra.items.len); + try func.mir_extra.appendSlice(func.gpa, &operands); + try func.addInst(.{ .tag = .simd_prefix, .data = .{ .payload = extra_index } }); + + return func.finishAir(inst, try WValue.toLocal(.stack, func, inst_ty), &.{ extra.a, extra.b }); } - - return func.finishAir(inst, result, &.{ extra.a, extra.b }); } fn airReduce(func: *CodeGen, inst: Air.Inst.Index) InnerError!void { diff --git a/src/arch/wasm/Emit.zig b/src/arch/wasm/Emit.zig index 5982d3b48c..3d05ff7987 100644 --- a/src/arch/wasm/Emit.zig +++ b/src/arch/wasm/Emit.zig @@ -486,7 +486,9 @@ fn emitSimd(emit: *Emit, inst: Mir.Inst.Index) !void { const mem_arg = emit.mir.extraData(Mir.MemArg, extra_index + 1).data; try encodeMemArg(mem_arg, writer); }, - .v128_const => { + .v128_const, + .i8x16_shuffle, + => { const simd_value = emit.mir.extra[extra_index + 1 ..][0..4]; try writer.writeAll(std.mem.asBytes(simd_value)); }, From 8ba3ab948a2675fb1be88a2654555eb3dbc3df09 Mon Sep 17 00:00:00 2001 From: Auguste Rame <19855629+SuperAuguste@users.noreply.github.com> Date: Fri, 7 Apr 2023 20:52:04 -0400 Subject: [PATCH 3/5] Handle compile time case for vector element access using lane access --- src/arch/wasm/CodeGen.zig | 39 ++++++++++++++++++++++++++++++--------- src/arch/wasm/Emit.zig | 17 +++++++++++++++++ 2 files changed, 47 insertions(+), 9 deletions(-) diff --git a/src/arch/wasm/CodeGen.zig b/src/arch/wasm/CodeGen.zig index c5376792a7..70cba5eb9f 100644 --- a/src/arch/wasm/CodeGen.zig +++ b/src/arch/wasm/CodeGen.zig @@ -4527,17 +4527,38 @@ fn airArrayElemVal(func: *CodeGen, inst: Air.Inst.Index) InnerError!void { } else { std.debug.assert(array_ty.zigTypeTag() == .Vector); - // TODO: Check if index is constant; if so, use a lane extract + switch (index) { + inline .imm32, .imm64 => |lane| { + const opcode: wasm.SimdOpcode = switch (elem_ty.bitSize(func.target)) { + 8 => if (elem_ty.isSignedInt()) .i8x16_extract_lane_s else .i8x16_extract_lane_u, + 16 => if (elem_ty.isSignedInt()) .i16x8_extract_lane_s else .i16x8_extract_lane_u, + 32 => if (elem_ty.isInt()) .i32x4_extract_lane else .f32x4_extract_lane, + 64 => if (elem_ty.isInt()) .i64x2_extract_lane else .f64x2_extract_lane, + else => unreachable, + }; - var stack_vec = try func.allocStack(array_ty); - try func.store(stack_vec, array, array_ty, 0); + var operands = [_]u32{ std.wasm.simdOpcode(opcode), @intCast(u8, lane) }; - // Is a non-unrolled vector (v128) - try func.lowerToStack(stack_vec); - try func.emitWValue(index); - try func.addImm32(@bitCast(i32, @intCast(u32, elem_size))); - try func.addTag(.i32_mul); - try func.addTag(.i32_add); + try func.emitWValue(array); + + const extra_index = @intCast(u32, func.mir_extra.items.len); + try func.mir_extra.appendSlice(func.gpa, &operands); + try func.addInst(.{ .tag = .simd_prefix, .data = .{ .payload = extra_index } }); + + return func.finishAir(inst, try WValue.toLocal(.stack, func, elem_ty), &.{ bin_op.lhs, bin_op.rhs }); + }, + else => { + var stack_vec = try func.allocStack(array_ty); + try func.store(stack_vec, array, array_ty, 0); + + // Is a non-unrolled vector (v128) + try func.lowerToStack(stack_vec); + try func.emitWValue(index); + try func.addImm32(@bitCast(i32, @intCast(u32, elem_size))); + try func.addTag(.i32_mul); + try func.addTag(.i32_add); + }, + } } const elem_result = val: { diff --git a/src/arch/wasm/Emit.zig b/src/arch/wasm/Emit.zig index 3d05ff7987..1d039f7495 100644 --- a/src/arch/wasm/Emit.zig +++ b/src/arch/wasm/Emit.zig @@ -492,6 +492,23 @@ fn emitSimd(emit: *Emit, inst: Mir.Inst.Index) !void { const simd_value = emit.mir.extra[extra_index + 1 ..][0..4]; try writer.writeAll(std.mem.asBytes(simd_value)); }, + .i8x16_extract_lane_s, + .i8x16_extract_lane_u, + .i8x16_replace_lane, + .i16x8_extract_lane_s, + .i16x8_extract_lane_u, + .i16x8_replace_lane, + .i32x4_extract_lane, + .i32x4_replace_lane, + .i64x2_extract_lane, + .i64x2_replace_lane, + .f32x4_extract_lane, + .f32x4_replace_lane, + .f64x2_extract_lane, + .f64x2_replace_lane, + => { + try writer.writeByte(@intCast(u8, emit.mir.extra[extra_index + 1])); + }, .i8x16_splat, .i16x8_splat, .i32x4_splat, From 7225a15abe5a35af586985446a868340875fce6d Mon Sep 17 00:00:00 2001 From: Auguste Rame <19855629+SuperAuguste@users.noreply.github.com> Date: Fri, 7 Apr 2023 20:55:04 -0400 Subject: [PATCH 4/5] Enable new tests --- test/behavior/shuffle.zig | 1 - test/behavior/vector.zig | 1 - 2 files changed, 2 deletions(-) diff --git a/test/behavior/shuffle.zig b/test/behavior/shuffle.zig index 97223cc263..5ef48b5160 100644 --- a/test/behavior/shuffle.zig +++ b/test/behavior/shuffle.zig @@ -4,7 +4,6 @@ const mem = std.mem; const expect = std.testing.expect; test "@shuffle int" { - if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO diff --git a/test/behavior/vector.zig b/test/behavior/vector.zig index 3b716692ef..70faceffee 100644 --- a/test/behavior/vector.zig +++ b/test/behavior/vector.zig @@ -807,7 +807,6 @@ test "vector @reduce comptime" { } test "mask parameter of @shuffle is comptime scope" { - if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO From 09fda086186ca4d19ed516efe52de0f47fd0a095 Mon Sep 17 00:00:00 2001 From: Auguste Rame <19855629+SuperAuguste@users.noreply.github.com> Date: Fri, 7 Apr 2023 23:04:24 -0400 Subject: [PATCH 5/5] Fix 32-bit compile errors --- src/arch/wasm/CodeGen.zig | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/arch/wasm/CodeGen.zig b/src/arch/wasm/CodeGen.zig index 70cba5eb9f..8eb767fbb5 100644 --- a/src/arch/wasm/CodeGen.zig +++ b/src/arch/wasm/CodeGen.zig @@ -4749,7 +4749,7 @@ fn airShuffle(func: *CodeGen, inst: Air.Inst.Index) InnerError!void { } ++ [1]u32{undefined} ** 4; var lanes = std.mem.asBytes(operands[1..]); - for (0..mask_len) |index| { + for (0..@intCast(usize, mask_len)) |index| { var buf: Value.ElemValueBuffer = undefined; const mask_elem = mask.elemValueBuffer(module, index, &buf).toSignedInt(func.target); const base_index = if (mask_elem >= 0) @@ -4757,8 +4757,8 @@ fn airShuffle(func: *CodeGen, inst: Air.Inst.Index) InnerError!void { else 16 + @intCast(u8, @intCast(i64, elem_size) * ~mask_elem); - for (0..elem_size) |byte_offset| { - lanes[index * elem_size + byte_offset] = base_index + @intCast(u8, byte_offset); + for (0..@intCast(usize, elem_size)) |byte_offset| { + lanes[index * @intCast(usize, elem_size) + byte_offset] = base_index + @intCast(u8, byte_offset); } }