stage2: implement @reduce

Notably, Value.eql and Value.hash are improved to treat NaN as equal to itself, so that Type/Value can be hash map keys. Likewise float hashing normalizes the float value before computing the hash.
2026-02-14 13:30:45 +00:00 · 2022-03-17 17:24:35 -07:00 · 2022-03-17 17:24:35 -07:00 · 7233a3324a
commit 7233a3324a
parent 76e103057e
14 changed files with 357 additions and 103 deletions
--- a/src/Air.zig
+++ b/src/Air.zig
@ -530,6 +530,14 @@ pub const Inst = struct {
        /// Given an integer operand, return the float with the closest mathematical meaning.
        /// Uses the `ty_op` field.
        int_to_float,
+
+        /// Transforms a vector into a scalar value by performing a sequential
+        /// horizontal reduction of its elements using the specified operator.
+        /// The vector element type (and hence result type) will be:
+        ///  * and, or, xor       => integer or boolean
+        ///  * min, max, add, mul => integer or float
+        /// Uses the `reduce` field.
+        reduce,
        /// Given an integer, bool, float, or pointer operand, return a vector with all elements
        /// equal to the scalar value.
        /// Uses the `ty_op` field.
@ -695,6 +703,10 @@ pub const Inst = struct {
            locality: u2,
            cache: std.builtin.PrefetchOptions.Cache,
        },
+        reduce: struct {
+            operand: Ref,
+            operation: std.builtin.ReduceOp,
+        },

        // Make sure we don't accidentally add a field to make this union
        // bigger than expected. Note that in Debug builds, Zig is allowed
@ -1027,6 +1039,8 @@ pub fn typeOfIndex(air: Air, inst: Air.Inst.Index) Type {
            return ptr_ty.elemType();
        },

+        .reduce => return air.typeOf(datas[inst].reduce.operand).childType(),
+
        .mul_add => return air.typeOf(datas[inst].pl_op.operand),

        .add_with_overflow,
--- a/src/Liveness.zig
+++ b/src/Liveness.zig
@ -435,6 +435,10 @@ fn analyzeInst(
            const extra = a.air.extraData(Air.Shuffle, inst_datas[inst].ty_pl.payload).data;
            return trackOperands(a, new_set, inst, main_tomb, .{ extra.a, extra.b, .none });
        },
+        .reduce => {
+            const reduce = inst_datas[inst].reduce;
+            return trackOperands(a, new_set, inst, main_tomb, .{ reduce.operand, .none, .none });
+        },
        .aggregate_init => {
            const ty_pl = inst_datas[inst].ty_pl;
            const aggregate_ty = a.air.getRefType(ty_pl.ty);
--- a/src/Sema.zig
+++ b/src/Sema.zig
@ -13973,17 +13973,27 @@ fn resolveExportOptions(
    };
 }

+fn resolveBuiltinEnum(
+    sema: *Sema,
+    block: *Block,
+    src: LazySrcLoc,
+    zir_ref: Zir.Inst.Ref,
+    comptime name: []const u8,
+) CompileError!@field(std.builtin, name) {
+    const ty = try sema.getBuiltinType(block, src, name);
+    const air_ref = sema.resolveInst(zir_ref);
+    const coerced = try sema.coerce(block, ty, air_ref, src);
+    const val = try sema.resolveConstValue(block, src, coerced);
+    return val.toEnum(@field(std.builtin, name));
+}
+
 fn resolveAtomicOrder(
    sema: *Sema,
    block: *Block,
    src: LazySrcLoc,
    zir_ref: Zir.Inst.Ref,
 ) CompileError!std.builtin.AtomicOrder {
-    const atomic_order_ty = try sema.getBuiltinType(block, src, "AtomicOrder");
-    const air_ref = sema.resolveInst(zir_ref);
-    const coerced = try sema.coerce(block, atomic_order_ty, air_ref, src);
-    const val = try sema.resolveConstValue(block, src, coerced);
-    return val.toEnum(std.builtin.AtomicOrder);
+    return resolveBuiltinEnum(sema, block, src, zir_ref, "AtomicOrder");
 }

 fn resolveAtomicRmwOp(
@ -13992,11 +14002,7 @@ fn resolveAtomicRmwOp(
    src: LazySrcLoc,
    zir_ref: Zir.Inst.Ref,
 ) CompileError!std.builtin.AtomicRmwOp {
-    const atomic_rmw_op_ty = try sema.getBuiltinType(block, src, "AtomicRmwOp");
-    const air_ref = sema.resolveInst(zir_ref);
-    const coerced = try sema.coerce(block, atomic_rmw_op_ty, air_ref, src);
-    const val = try sema.resolveConstValue(block, src, coerced);
-    return val.toEnum(std.builtin.AtomicRmwOp);
+    return resolveBuiltinEnum(sema, block, src, zir_ref, "AtomicRmwOp");
 }

 fn zirCmpxchg(
@ -14118,8 +14124,72 @@ fn zirSplat(sema: *Sema, block: *Block, inst: Zir.Inst.Index) CompileError!Air.I

 fn zirReduce(sema: *Sema, block: *Block, inst: Zir.Inst.Index) CompileError!Air.Inst.Ref {
    const inst_data = sema.code.instructions.items(.data)[inst].pl_node;
-    const src = inst_data.src();
-    return sema.fail(block, src, "TODO: Sema.zirReduce", .{});
+    const extra = sema.code.extraData(Zir.Inst.Bin, inst_data.payload_index).data;
+    const op_src: LazySrcLoc = .{ .node_offset_builtin_call_arg0 = inst_data.src_node };
+    const operand_src: LazySrcLoc = .{ .node_offset_builtin_call_arg1 = inst_data.src_node };
+    const operation = try sema.resolveBuiltinEnum(block, op_src, extra.lhs, "ReduceOp");
+    const operand = sema.resolveInst(extra.rhs);
+    const operand_ty = sema.typeOf(operand);
+
+    if (operand_ty.zigTypeTag() != .Vector) {
+        return sema.fail(block, operand_src, "expected vector, found {}", .{operand_ty});
+    }
+
+    const scalar_ty = operand_ty.childType();
+
+    // Type-check depending on operation.
+    switch (operation) {
+        .And, .Or, .Xor => switch (scalar_ty.zigTypeTag()) {
+            .Int, .Bool => {},
+            else => return sema.fail(block, operand_src, "@reduce operation '{s}' requires integer or boolean operand; found {}", .{
+                @tagName(operation), operand_ty,
+            }),
+        },
+        .Min, .Max, .Add, .Mul => switch (scalar_ty.zigTypeTag()) {
+            .Int, .Float => {},
+            else => return sema.fail(block, operand_src, "@reduce operation '{s}' requires integer or float operand; found {}", .{
+                @tagName(operation), operand_ty,
+            }),
+        },
+    }
+
+    const vec_len = operand_ty.vectorLen();
+    if (vec_len == 0) {
+        // TODO re-evaluate if we should introduce a "neutral value" for some operations,
+        // e.g. zero for add and one for mul.
+        return sema.fail(block, operand_src, "@reduce operation requires a vector with nonzero length", .{});
+    }
+
+    if (try sema.resolveMaybeUndefVal(block, operand_src, operand)) |operand_val| {
+        if (operand_val.isUndef()) return sema.addConstUndef(scalar_ty);
+
+        const target = sema.mod.getTarget();
+        var accum: Value = try operand_val.elemValue(sema.arena, 0);
+        var elem_buf: Value.ElemValueBuffer = undefined;
+        var i: u32 = 1;
+        while (i < vec_len) : (i += 1) {
+            const elem_val = operand_val.elemValueBuffer(i, &elem_buf);
+            switch (operation) {
+                .And => accum = try accum.bitwiseAnd(elem_val, sema.arena),
+                .Or => accum = try accum.bitwiseOr(elem_val, sema.arena),
+                .Xor => accum = try accum.bitwiseXor(elem_val, sema.arena),
+                .Min => accum = accum.numberMin(elem_val),
+                .Max => accum = accum.numberMax(elem_val),
+                .Add => accum = try accum.numberAddWrap(elem_val, scalar_ty, sema.arena, target),
+                .Mul => accum = try accum.numberMulWrap(elem_val, scalar_ty, sema.arena, target),
+            }
+        }
+        return sema.addConstant(scalar_ty, accum);
+    }
+
+    try sema.requireRuntimeBlock(block, operand_src);
+    return block.addInst(.{
+        .tag = .reduce,
+        .data = .{ .reduce = .{
+            .operand = operand,
+            .operation = operation,
+        } },
+    });
 }

 fn zirShuffle(sema: *Sema, block: *Block, inst: Zir.Inst.Index) CompileError!Air.Inst.Ref {
@ -14425,8 +14495,8 @@ fn zirAtomicRmw(sema: *Sema, block: *Block, inst: Zir.Inst.Index) CompileError!A
                .Nand => try stored_val.bitwiseNand  (operand_val, operand_ty, sema.arena, target),
                .Or   => try stored_val.bitwiseOr    (operand_val,             sema.arena),
                .Xor  => try stored_val.bitwiseXor   (operand_val,             sema.arena),
-                .Max  => try stored_val.numberMax    (operand_val),
-                .Min  => try stored_val.numberMin    (operand_val),
+                .Max  =>     stored_val.numberMax    (operand_val),
+                .Min  =>     stored_val.numberMin    (operand_val),
                // zig fmt: on
            };
            try sema.storePtrVal(block, src, ptr_val, new_val, operand_ty);
@ -14760,7 +14830,7 @@ fn analyzeMinMax(
            else => unreachable,
        };
        const vec_len = simd_op.len orelse {
-            const result_val = try opFunc(lhs_val, rhs_val);
+            const result_val = opFunc(lhs_val, rhs_val);
            return sema.addConstant(simd_op.result_ty, result_val);
        };
        var lhs_buf: Value.ElemValueBuffer = undefined;
@ -14769,7 +14839,7 @@ fn analyzeMinMax(
        for (elems) |*elem, i| {
            const lhs_elem_val = lhs_val.elemValueBuffer(i, &lhs_buf);
            const rhs_elem_val = rhs_val.elemValueBuffer(i, &rhs_buf);
-            elem.* = try opFunc(lhs_elem_val, rhs_elem_val);
+            elem.* = opFunc(lhs_elem_val, rhs_elem_val);
        }
        return sema.addConstant(
            simd_op.result_ty,
@ -19246,9 +19316,9 @@ fn cmpNumeric(
    const rhs_ty_tag = rhs_ty.zigTypeTag();

    if (lhs_ty_tag == .Vector and rhs_ty_tag == .Vector) {
-        if (lhs_ty.arrayLen() != rhs_ty.arrayLen()) {
+        if (lhs_ty.vectorLen() != rhs_ty.vectorLen()) {
            return sema.fail(block, src, "vector length mismatch: {d} and {d}", .{
-                lhs_ty.arrayLen(), rhs_ty.arrayLen(),
+                lhs_ty.vectorLen(), rhs_ty.vectorLen(),
            });
        }
        return sema.fail(block, src, "TODO implement support for vectors in cmpNumeric", .{});
--- a/src/arch/aarch64/CodeGen.zig
+++ b/src/arch/aarch64/CodeGen.zig
@ -640,6 +640,7 @@ fn genBody(self: *Self, body: []const Air.Inst.Index) InnerError!void {
            .error_name      => try self.airErrorName(inst),
            .splat           => try self.airSplat(inst),
            .shuffle         => try self.airShuffle(inst),
+            .reduce          => try self.airReduce(inst),
            .aggregate_init  => try self.airAggregateInit(inst),
            .union_init      => try self.airUnionInit(inst),
            .prefetch        => try self.airPrefetch(inst),
@ -3727,6 +3728,12 @@ fn airShuffle(self: *Self, inst: Air.Inst.Index) !void {
    return self.finishAir(inst, result, .{ ty_op.operand, .none, .none });
 }

+fn airReduce(self: *Self, inst: Air.Inst.Index) !void {
+    const reduce = self.air.instructions.items(.data)[inst].reduce;
+    const result: MCValue = if (self.liveness.isUnused(inst)) .dead else return self.fail("TODO implement airReduce for aarch64", .{});
+    return self.finishAir(inst, result, .{ reduce.operand, .none, .none });
+}
+
 fn airAggregateInit(self: *Self, inst: Air.Inst.Index) !void {
    const vector_ty = self.air.typeOfIndex(inst);
    const len = vector_ty.vectorLen();
--- a/src/arch/arm/CodeGen.zig
+++ b/src/arch/arm/CodeGen.zig
@ -637,6 +637,7 @@ fn genBody(self: *Self, body: []const Air.Inst.Index) InnerError!void {
            .error_name      => try self.airErrorName(inst),
            .splat           => try self.airSplat(inst),
            .shuffle         => try self.airShuffle(inst),
+            .reduce          => try self.airReduce(inst),
            .aggregate_init  => try self.airAggregateInit(inst),
            .union_init      => try self.airUnionInit(inst),
            .prefetch        => try self.airPrefetch(inst),
@ -4204,6 +4205,12 @@ fn airShuffle(self: *Self, inst: Air.Inst.Index) !void {
    return self.finishAir(inst, result, .{ ty_op.operand, .none, .none });
 }

+fn airReduce(self: *Self, inst: Air.Inst.Index) !void {
+    const reduce = self.air.instructions.items(.data)[inst].reduce;
+    const result: MCValue = if (self.liveness.isUnused(inst)) .dead else return self.fail("TODO implement airReduce for arm", .{});
+    return self.finishAir(inst, result, .{ reduce.operand, .none, .none });
+}
+
 fn airAggregateInit(self: *Self, inst: Air.Inst.Index) !void {
    const vector_ty = self.air.typeOfIndex(inst);
    const len = vector_ty.vectorLen();
--- a/src/arch/riscv64/CodeGen.zig
+++ b/src/arch/riscv64/CodeGen.zig
@ -604,6 +604,7 @@ fn genBody(self: *Self, body: []const Air.Inst.Index) InnerError!void {
            .error_name      => try self.airErrorName(inst),
            .splat           => try self.airSplat(inst),
            .shuffle         => try self.airShuffle(inst),
+            .reduce          => try self.airReduce(inst),
            .aggregate_init  => try self.airAggregateInit(inst),
            .union_init      => try self.airUnionInit(inst),
            .prefetch        => try self.airPrefetch(inst),
@ -2213,6 +2214,12 @@ fn airShuffle(self: *Self, inst: Air.Inst.Index) !void {
    return self.finishAir(inst, result, .{ ty_op.operand, .none, .none });
 }

+fn airReduce(self: *Self, inst: Air.Inst.Index) !void {
+    const reduce = self.air.instructions.items(.data)[inst].reduce;
+    const result: MCValue = if (self.liveness.isUnused(inst)) .dead else return self.fail("TODO implement airReduce for riscv64", .{});
+    return self.finishAir(inst, result, .{ reduce.operand, .none, .none });
+}
+
 fn airAggregateInit(self: *Self, inst: Air.Inst.Index) !void {
    const vector_ty = self.air.typeOfIndex(inst);
    const len = vector_ty.vectorLen();
--- a/src/arch/wasm/CodeGen.zig
+++ b/src/arch/wasm/CodeGen.zig
@ -1263,6 +1263,7 @@ fn genInst(self: *Self, inst: Air.Inst.Index) !WValue {
        .ret_load => self.airRetLoad(inst),
        .splat => self.airSplat(inst),
        .shuffle => self.airShuffle(inst),
+        .reduce => self.airReduce(inst),
        .aggregate_init => self.airAggregateInit(inst),
        .union_init => self.airUnionInit(inst),
        .prefetch => self.airPrefetch(inst),
@ -2988,7 +2989,6 @@ fn airSplat(self: *Self, inst: Air.Inst.Index) InnerError!WValue {
    const ty_op = self.air.instructions.items(.data)[inst].ty_op;
    const operand = try self.resolveInst(ty_op.operand);

-    _ = ty_op;
    _ = operand;
    return self.fail("TODO: Implement wasm airSplat", .{});
 }
@ -2999,11 +2999,20 @@ fn airShuffle(self: *Self, inst: Air.Inst.Index) InnerError!WValue {
    const ty_op = self.air.instructions.items(.data)[inst].ty_op;
    const operand = try self.resolveInst(ty_op.operand);

-    _ = ty_op;
    _ = operand;
    return self.fail("TODO: Implement wasm airShuffle", .{});
 }

+fn airReduce(self: *Self, inst: Air.Inst.Index) InnerError!WValue {
+    if (self.liveness.isUnused(inst)) return WValue{ .none = {} };
+
+    const reduce = self.air.instructions.items(.data)[inst].reduce;
+    const operand = try self.resolveInst(reduce.operand);
+
+    _ = operand;
+    return self.fail("TODO: Implement wasm airReduce", .{});
+}
+
 fn airAggregateInit(self: *Self, inst: Air.Inst.Index) InnerError!WValue {
    if (self.liveness.isUnused(inst)) return WValue{ .none = {} };

--- a/src/arch/x86_64/CodeGen.zig
+++ b/src/arch/x86_64/CodeGen.zig
@ -721,6 +721,7 @@ fn genBody(self: *Self, body: []const Air.Inst.Index) InnerError!void {
            .error_name      => try self.airErrorName(inst),
            .splat           => try self.airSplat(inst),
            .shuffle         => try self.airShuffle(inst),
+            .reduce          => try self.airReduce(inst),
            .aggregate_init  => try self.airAggregateInit(inst),
            .union_init      => try self.airUnionInit(inst),
            .prefetch        => try self.airPrefetch(inst),
@ -5567,6 +5568,12 @@ fn airShuffle(self: *Self, inst: Air.Inst.Index) !void {
    return self.finishAir(inst, result, .{ ty_op.operand, .none, .none });
 }

+fn airReduce(self: *Self, inst: Air.Inst.Index) !void {
+    const reduce = self.air.instructions.items(.data)[inst].reduce;
+    const result: MCValue = if (self.liveness.isUnused(inst)) .dead else return self.fail("TODO implement airReduce for x86_64", .{});
+    return self.finishAir(inst, result, .{ reduce.operand, .none, .none });
+}
+
 fn airAggregateInit(self: *Self, inst: Air.Inst.Index) !void {
    const vector_ty = self.air.typeOfIndex(inst);
    const len = vector_ty.vectorLen();
--- a/src/codegen/c.zig
+++ b/src/codegen/c.zig
@ -1731,6 +1731,7 @@ fn genBody(f: *Function, body: []const Air.Inst.Index) error{ AnalysisFail, OutO
            .error_name       => try airErrorName(f, inst),
            .splat            => try airSplat(f, inst),
            .shuffle          => try airShuffle(f, inst),
+            .reduce           => try airReduce(f, inst),
            .aggregate_init   => try airAggregateInit(f, inst),
            .union_init       => try airUnionInit(f, inst),
            .prefetch         => try airPrefetch(f, inst),
@ -3625,6 +3626,21 @@ fn airShuffle(f: *Function, inst: Air.Inst.Index) !CValue {
    return f.fail("TODO: C backend: implement airShuffle", .{});
 }

+fn airReduce(f: *Function, inst: Air.Inst.Index) !CValue {
+    if (f.liveness.isUnused(inst)) return CValue.none;
+
+    const inst_ty = f.air.typeOfIndex(inst);
+    const reduce = f.air.instructions.items(.data)[inst].reduce;
+    const operand = try f.resolveInst(reduce.operand);
+    const writer = f.object.writer();
+    const local = try f.allocLocal(inst_ty, .Const);
+    try writer.writeAll(" = ");
+
+    _ = operand;
+    _ = local;
+    return f.fail("TODO: C backend: implement airReduce", .{});
+}
+
 fn airAggregateInit(f: *Function, inst: Air.Inst.Index) !CValue {
    if (f.liveness.isUnused(inst)) return CValue.none;

--- a/src/codegen/llvm.zig
+++ b/src/codegen/llvm.zig
@ -3426,6 +3426,7 @@ pub const FuncGen = struct {
                .error_name     => try self.airErrorName(inst),
                .splat          => try self.airSplat(inst),
                .shuffle        => try self.airShuffle(inst),
+                .reduce         => try self.airReduce(inst),
                .aggregate_init => try self.airAggregateInit(inst),
                .union_init     => try self.airUnionInit(inst),
                .prefetch       => try self.airPrefetch(inst),
@ -6281,6 +6282,50 @@ pub const FuncGen = struct {
        return self.builder.buildShuffleVector(a, b, llvm_mask_value, "");
    }

+    fn airReduce(self: *FuncGen, inst: Air.Inst.Index) !?*const llvm.Value {
+        if (self.liveness.isUnused(inst)) return null;
+
+        const reduce = self.air.instructions.items(.data)[inst].reduce;
+        const operand = try self.resolveInst(reduce.operand);
+        const scalar_ty = self.air.typeOfIndex(inst);
+
+        // TODO handle the fast math setting
+
+        switch (reduce.operation) {
+            .And => return self.builder.buildAndReduce(operand),
+            .Or => return self.builder.buildOrReduce(operand),
+            .Xor => return self.builder.buildXorReduce(operand),
+            .Min => switch (scalar_ty.zigTypeTag()) {
+                .Int => return self.builder.buildIntMinReduce(operand, scalar_ty.isSignedInt()),
+                .Float => return self.builder.buildFPMinReduce(operand),
+                else => unreachable,
+            },
+            .Max => switch (scalar_ty.zigTypeTag()) {
+                .Int => return self.builder.buildIntMaxReduce(operand, scalar_ty.isSignedInt()),
+                .Float => return self.builder.buildFPMaxReduce(operand),
+                else => unreachable,
+            },
+            .Add => switch (scalar_ty.zigTypeTag()) {
+                .Int => return self.builder.buildAddReduce(operand),
+                .Float => {
+                    const scalar_llvm_ty = try self.dg.llvmType(scalar_ty);
+                    const neutral_value = scalar_llvm_ty.constReal(-0.0);
+                    return self.builder.buildFPAddReduce(neutral_value, operand);
+                },
+                else => unreachable,
+            },
+            .Mul => switch (scalar_ty.zigTypeTag()) {
+                .Int => return self.builder.buildMulReduce(operand),
+                .Float => {
+                    const scalar_llvm_ty = try self.dg.llvmType(scalar_ty);
+                    const neutral_value = scalar_llvm_ty.constReal(1.0);
+                    return self.builder.buildFPMulReduce(neutral_value, operand);
+                },
+                else => unreachable,
+            },
+        }
+    }
+
    fn airAggregateInit(self: *FuncGen, inst: Air.Inst.Index) !?*const llvm.Value {
        if (self.liveness.isUnused(inst)) return null;

--- a/src/codegen/llvm/bindings.zig
+++ b/src/codegen/llvm/bindings.zig
@ -853,6 +853,39 @@ pub const Builder = opaque {

    pub const buildShuffleVector = LLVMBuildShuffleVector;
    extern fn LLVMBuildShuffleVector(*const Builder, V1: *const Value, V2: *const Value, Mask: *const Value, Name: [*:0]const u8) *const Value;
+
+    pub const buildAndReduce = ZigLLVMBuildAndReduce;
+    extern fn ZigLLVMBuildAndReduce(B: *const Builder, Val: *const Value) *const Value;
+
+    pub const buildOrReduce = ZigLLVMBuildOrReduce;
+    extern fn ZigLLVMBuildOrReduce(B: *const Builder, Val: *const Value) *const Value;
+
+    pub const buildXorReduce = ZigLLVMBuildXorReduce;
+    extern fn ZigLLVMBuildXorReduce(B: *const Builder, Val: *const Value) *const Value;
+
+    pub const buildIntMaxReduce = ZigLLVMBuildIntMaxReduce;
+    extern fn ZigLLVMBuildIntMaxReduce(B: *const Builder, Val: *const Value, is_signed: bool) *const Value;
+
+    pub const buildIntMinReduce = ZigLLVMBuildIntMinReduce;
+    extern fn ZigLLVMBuildIntMinReduce(B: *const Builder, Val: *const Value, is_signed: bool) *const Value;
+
+    pub const buildFPMaxReduce = ZigLLVMBuildFPMaxReduce;
+    extern fn ZigLLVMBuildFPMaxReduce(B: *const Builder, Val: *const Value) *const Value;
+
+    pub const buildFPMinReduce = ZigLLVMBuildFPMinReduce;
+    extern fn ZigLLVMBuildFPMinReduce(B: *const Builder, Val: *const Value) *const Value;
+
+    pub const buildAddReduce = ZigLLVMBuildAddReduce;
+    extern fn ZigLLVMBuildAddReduce(B: *const Builder, Val: *const Value) *const Value;
+
+    pub const buildMulReduce = ZigLLVMBuildMulReduce;
+    extern fn ZigLLVMBuildMulReduce(B: *const Builder, Val: *const Value) *const Value;
+
+    pub const buildFPAddReduce = ZigLLVMBuildFPAddReduce;
+    extern fn ZigLLVMBuildFPAddReduce(B: *const Builder, Acc: *const Value, Val: *const Value) *const Value;
+
+    pub const buildFPMulReduce = ZigLLVMBuildFPMulReduce;
+    extern fn ZigLLVMBuildFPMulReduce(B: *const Builder, Acc: *const Value, Val: *const Value) *const Value;
 };

 pub const MDString = opaque {
--- a/src/print_air.zig
+++ b/src/print_air.zig
@ -265,6 +265,7 @@ const Writer = struct {
            .wasm_memory_grow => try w.writeWasmMemoryGrow(s, inst),
            .mul_add => try w.writeMulAdd(s, inst),
            .shuffle => try w.writeShuffle(s, inst),
+            .reduce => try w.writeReduce(s, inst),

            .add_with_overflow,
            .sub_with_overflow,
@ -392,6 +393,13 @@ const Writer = struct {
        try s.print(", mask {d}, len {d}", .{ extra.mask, extra.mask_len });
    }

+    fn writeReduce(w: *Writer, s: anytype, inst: Air.Inst.Index) @TypeOf(s).Error!void {
+        const reduce = w.air.instructions.items(.data)[inst].reduce;
+
+        try w.writeOperand(s, inst, 0, reduce.operand);
+        try s.print(", {s}", .{@tagName(reduce.operation)});
+    }
+
    fn writeFence(w: *Writer, s: anytype, inst: Air.Inst.Index) @TypeOf(s).Error!void {
        const atomic_order = w.air.instructions.items(.data)[inst].fence;

--- a/src/value.zig
+++ b/src/value.zig
@ -1841,6 +1841,8 @@ pub const Value = extern union {
        return orderAgainstZero(lhs).compare(op);
    }

+    /// This function is used by hash maps and so treats floating-point NaNs as equal
+    /// to each other, and not equal to other floating-point values.
    pub fn eql(a: Value, b: Value, ty: Type) bool {
        const a_tag = a.tag();
        const b_tag = b.tag();
@ -2006,10 +2008,20 @@ pub const Value = extern union {
                // end up here and the values are equal if the type has zero fields.
                return ty.structFieldCount() != 0;
            },
+            .Float => {
+                const a_nan = a.isNan();
+                const b_nan = b.isNan();
+                if (a_nan or b_nan) {
+                    return a_nan and b_nan;
+                }
+                return order(a, b).compare(.eq);
+            },
            else => return order(a, b).compare(.eq),
        }
    }

+    /// This function is used by hash maps and so treats floating-point NaNs as equal
+    /// to each other, and not equal to other floating-point values.
    pub fn hash(val: Value, ty: Type, hasher: *std.hash.Wyhash) void {
        const zig_ty_tag = ty.zigTypeTag();
        std.hash.autoHash(hasher, zig_ty_tag);
@ -2030,10 +2042,18 @@ pub const Value = extern union {
                return val.toType(&buf).hashWithHasher(hasher);
            },
            .Float, .ComptimeFloat => {
-                // TODO double check the lang spec. should we to bitwise hashing here,
-                // or a hash that normalizes the float value?
+                // Normalize the float here because this hash must match eql semantics.
+                // These functions are used for hash maps so we want NaN to equal itself,
+                // and -0.0 to equal +0.0.
                const float = val.toFloat(f128);
-                std.hash.autoHash(hasher, @bitCast(u128, float));
+                if (std.math.isNan(float)) {
+                    std.hash.autoHash(hasher, std.math.nan_u128);
+                } else if (float == 0.0) {
+                    var normalized_zero: f128 = 0.0;
+                    std.hash.autoHash(hasher, @bitCast(u128, normalized_zero));
+                } else {
+                    std.hash.autoHash(hasher, @bitCast(u128, float));
+                }
            },
            .Bool, .Int, .ComptimeInt, .Pointer => switch (val.tag()) {
                .slice => {
@ -2948,7 +2968,7 @@ pub const Value = extern union {
    }

    /// Supports both floats and ints; handles undefined.
-    pub fn numberMax(lhs: Value, rhs: Value) !Value {
+    pub fn numberMax(lhs: Value, rhs: Value) Value {
        if (lhs.isUndef() or rhs.isUndef()) return undef;
        if (lhs.isNan()) return rhs;
        if (rhs.isNan()) return lhs;
@ -2960,7 +2980,7 @@ pub const Value = extern union {
    }

    /// Supports both floats and ints; handles undefined.
-    pub fn numberMin(lhs: Value, rhs: Value) !Value {
+    pub fn numberMin(lhs: Value, rhs: Value) Value {
        if (lhs.isUndef() or rhs.isUndef()) return undef;
        if (lhs.isNan()) return rhs;
        if (rhs.isNan()) return lhs;
--- a/test/behavior/vector.zig
+++ b/test/behavior/vector.zig
@ -520,15 +520,20 @@ test "vector shift operators" {
 }

 test "vector reduce operation" {
-    if (builtin.zig_backend != .stage1) return error.SkipZigTest; // TODO
+    if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; // TODO
+    if (builtin.zig_backend == .stage2_c) return error.SkipZigTest; // TODO
+    if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO
+    if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO
+    if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
+
    const S = struct {
-        fn doTheTestReduce(comptime op: std.builtin.ReduceOp, x: anytype, expected: anytype) !void {
+        fn testReduce(comptime op: std.builtin.ReduceOp, x: anytype, expected: anytype) !void {
            const N = @typeInfo(@TypeOf(x)).Array.len;
            const TX = @typeInfo(@TypeOf(x)).Array.child;

-            var r = @reduce(op, @as(Vector(N, TX), x));
+            var r = @reduce(op, @as(@Vector(N, TX), x));
            switch (@typeInfo(TX)) {
-                .Int, .Bool => try expectEqual(expected, r),
+                .Int, .Bool => try expect(expected == r),
                .Float => {
                    const expected_nan = math.isNan(expected);
                    const got_nan = math.isNan(r);
@ -537,117 +542,119 @@ test "vector reduce operation" {
                        // Do this check explicitly as two NaN values are never
                        // equal.
                    } else {
-                        try expectApproxEqRel(expected, r, math.sqrt(math.epsilon(TX)));
+                        const F = @TypeOf(expected);
+                        const tolerance = @sqrt(math.epsilon(TX));
+                        try expect(std.math.approxEqRel(F, expected, r, tolerance));
                    }
                },
                else => unreachable,
            }
        }
        fn doTheTest() !void {
-            try doTheTestReduce(.Add, [4]i16{ -9, -99, -999, -9999 }, @as(i32, -11106));
-            try doTheTestReduce(.Add, [4]u16{ 9, 99, 999, 9999 }, @as(u32, 11106));
-            try doTheTestReduce(.Add, [4]i32{ -9, -99, -999, -9999 }, @as(i32, -11106));
-            try doTheTestReduce(.Add, [4]u32{ 9, 99, 999, 9999 }, @as(u32, 11106));
-            try doTheTestReduce(.Add, [4]i64{ -9, -99, -999, -9999 }, @as(i64, -11106));
-            try doTheTestReduce(.Add, [4]u64{ 9, 99, 999, 9999 }, @as(u64, 11106));
-            try doTheTestReduce(.Add, [4]i128{ -9, -99, -999, -9999 }, @as(i128, -11106));
-            try doTheTestReduce(.Add, [4]u128{ 9, 99, 999, 9999 }, @as(u128, 11106));
-            try doTheTestReduce(.Add, [4]f16{ -1.9, 5.1, -60.3, 100.0 }, @as(f16, 42.9));
-            try doTheTestReduce(.Add, [4]f32{ -1.9, 5.1, -60.3, 100.0 }, @as(f32, 42.9));
-            try doTheTestReduce(.Add, [4]f64{ -1.9, 5.1, -60.3, 100.0 }, @as(f64, 42.9));
+            try testReduce(.Add, [4]i16{ -9, -99, -999, -9999 }, @as(i32, -11106));
+            try testReduce(.Add, [4]u16{ 9, 99, 999, 9999 }, @as(u32, 11106));
+            try testReduce(.Add, [4]i32{ -9, -99, -999, -9999 }, @as(i32, -11106));
+            try testReduce(.Add, [4]u32{ 9, 99, 999, 9999 }, @as(u32, 11106));
+            try testReduce(.Add, [4]i64{ -9, -99, -999, -9999 }, @as(i64, -11106));
+            try testReduce(.Add, [4]u64{ 9, 99, 999, 9999 }, @as(u64, 11106));
+            try testReduce(.Add, [4]i128{ -9, -99, -999, -9999 }, @as(i128, -11106));
+            try testReduce(.Add, [4]u128{ 9, 99, 999, 9999 }, @as(u128, 11106));
+            try testReduce(.Add, [4]f16{ -1.9, 5.1, -60.3, 100.0 }, @as(f16, 42.9));
+            try testReduce(.Add, [4]f32{ -1.9, 5.1, -60.3, 100.0 }, @as(f32, 42.9));
+            try testReduce(.Add, [4]f64{ -1.9, 5.1, -60.3, 100.0 }, @as(f64, 42.9));

-            try doTheTestReduce(.And, [4]bool{ true, false, true, true }, @as(bool, false));
-            try doTheTestReduce(.And, [4]u1{ 1, 0, 1, 1 }, @as(u1, 0));
-            try doTheTestReduce(.And, [4]u16{ 0xffff, 0xff55, 0xaaff, 0x1010 }, @as(u16, 0x10));
-            try doTheTestReduce(.And, [4]u32{ 0xffffffff, 0xffff5555, 0xaaaaffff, 0x10101010 }, @as(u32, 0x1010));
-            try doTheTestReduce(.And, [4]u64{ 0xffffffff, 0xffff5555, 0xaaaaffff, 0x10101010 }, @as(u64, 0x1010));
+            try testReduce(.And, [4]bool{ true, false, true, true }, @as(bool, false));
+            try testReduce(.And, [4]u1{ 1, 0, 1, 1 }, @as(u1, 0));
+            try testReduce(.And, [4]u16{ 0xffff, 0xff55, 0xaaff, 0x1010 }, @as(u16, 0x10));
+            try testReduce(.And, [4]u32{ 0xffffffff, 0xffff5555, 0xaaaaffff, 0x10101010 }, @as(u32, 0x1010));
+            try testReduce(.And, [4]u64{ 0xffffffff, 0xffff5555, 0xaaaaffff, 0x10101010 }, @as(u64, 0x1010));

-            try doTheTestReduce(.Min, [4]i16{ -1, 2, 3, 4 }, @as(i16, -1));
-            try doTheTestReduce(.Min, [4]u16{ 1, 2, 3, 4 }, @as(u16, 1));
-            try doTheTestReduce(.Min, [4]i32{ 1234567, -386, 0, 3 }, @as(i32, -386));
-            try doTheTestReduce(.Min, [4]u32{ 99, 9999, 9, 99999 }, @as(u32, 9));
+            try testReduce(.Min, [4]i16{ -1, 2, 3, 4 }, @as(i16, -1));
+            try testReduce(.Min, [4]u16{ 1, 2, 3, 4 }, @as(u16, 1));
+            try testReduce(.Min, [4]i32{ 1234567, -386, 0, 3 }, @as(i32, -386));
+            try testReduce(.Min, [4]u32{ 99, 9999, 9, 99999 }, @as(u32, 9));

            // LLVM 11 ERROR: Cannot select type
            // https://github.com/ziglang/zig/issues/7138
            if (builtin.target.cpu.arch != .aarch64) {
-                try doTheTestReduce(.Min, [4]i64{ 1234567, -386, 0, 3 }, @as(i64, -386));
-                try doTheTestReduce(.Min, [4]u64{ 99, 9999, 9, 99999 }, @as(u64, 9));
+                try testReduce(.Min, [4]i64{ 1234567, -386, 0, 3 }, @as(i64, -386));
+                try testReduce(.Min, [4]u64{ 99, 9999, 9, 99999 }, @as(u64, 9));
            }

-            try doTheTestReduce(.Min, [4]i128{ 1234567, -386, 0, 3 }, @as(i128, -386));
-            try doTheTestReduce(.Min, [4]u128{ 99, 9999, 9, 99999 }, @as(u128, 9));
-            try doTheTestReduce(.Min, [4]f16{ -10.3, 10.0e9, 13.0, -100.0 }, @as(f16, -100.0));
-            try doTheTestReduce(.Min, [4]f32{ -10.3, 10.0e9, 13.0, -100.0 }, @as(f32, -100.0));
-            try doTheTestReduce(.Min, [4]f64{ -10.3, 10.0e9, 13.0, -100.0 }, @as(f64, -100.0));
+            try testReduce(.Min, [4]i128{ 1234567, -386, 0, 3 }, @as(i128, -386));
+            try testReduce(.Min, [4]u128{ 99, 9999, 9, 99999 }, @as(u128, 9));
+            try testReduce(.Min, [4]f16{ -10.3, 10.0e9, 13.0, -100.0 }, @as(f16, -100.0));
+            try testReduce(.Min, [4]f32{ -10.3, 10.0e9, 13.0, -100.0 }, @as(f32, -100.0));
+            try testReduce(.Min, [4]f64{ -10.3, 10.0e9, 13.0, -100.0 }, @as(f64, -100.0));

-            try doTheTestReduce(.Max, [4]i16{ -1, 2, 3, 4 }, @as(i16, 4));
-            try doTheTestReduce(.Max, [4]u16{ 1, 2, 3, 4 }, @as(u16, 4));
-            try doTheTestReduce(.Max, [4]i32{ 1234567, -386, 0, 3 }, @as(i32, 1234567));
-            try doTheTestReduce(.Max, [4]u32{ 99, 9999, 9, 99999 }, @as(u32, 99999));
+            try testReduce(.Max, [4]i16{ -1, 2, 3, 4 }, @as(i16, 4));
+            try testReduce(.Max, [4]u16{ 1, 2, 3, 4 }, @as(u16, 4));
+            try testReduce(.Max, [4]i32{ 1234567, -386, 0, 3 }, @as(i32, 1234567));
+            try testReduce(.Max, [4]u32{ 99, 9999, 9, 99999 }, @as(u32, 99999));

            // LLVM 11 ERROR: Cannot select type
            // https://github.com/ziglang/zig/issues/7138
            if (builtin.target.cpu.arch != .aarch64) {
-                try doTheTestReduce(.Max, [4]i64{ 1234567, -386, 0, 3 }, @as(i64, 1234567));
-                try doTheTestReduce(.Max, [4]u64{ 99, 9999, 9, 99999 }, @as(u64, 99999));
+                try testReduce(.Max, [4]i64{ 1234567, -386, 0, 3 }, @as(i64, 1234567));
+                try testReduce(.Max, [4]u64{ 99, 9999, 9, 99999 }, @as(u64, 99999));
            }

-            try doTheTestReduce(.Max, [4]i128{ 1234567, -386, 0, 3 }, @as(i128, 1234567));
-            try doTheTestReduce(.Max, [4]u128{ 99, 9999, 9, 99999 }, @as(u128, 99999));
-            try doTheTestReduce(.Max, [4]f16{ -10.3, 10.0e9, 13.0, -100.0 }, @as(f16, 10.0e9));
-            try doTheTestReduce(.Max, [4]f32{ -10.3, 10.0e9, 13.0, -100.0 }, @as(f32, 10.0e9));
-            try doTheTestReduce(.Max, [4]f64{ -10.3, 10.0e9, 13.0, -100.0 }, @as(f64, 10.0e9));
+            try testReduce(.Max, [4]i128{ 1234567, -386, 0, 3 }, @as(i128, 1234567));
+            try testReduce(.Max, [4]u128{ 99, 9999, 9, 99999 }, @as(u128, 99999));
+            try testReduce(.Max, [4]f16{ -10.3, 10.0e9, 13.0, -100.0 }, @as(f16, 10.0e9));
+            try testReduce(.Max, [4]f32{ -10.3, 10.0e9, 13.0, -100.0 }, @as(f32, 10.0e9));
+            try testReduce(.Max, [4]f64{ -10.3, 10.0e9, 13.0, -100.0 }, @as(f64, 10.0e9));

-            try doTheTestReduce(.Mul, [4]i16{ -1, 2, 3, 4 }, @as(i16, -24));
-            try doTheTestReduce(.Mul, [4]u16{ 1, 2, 3, 4 }, @as(u16, 24));
-            try doTheTestReduce(.Mul, [4]i32{ -9, -99, -999, 999 }, @as(i32, -889218891));
-            try doTheTestReduce(.Mul, [4]u32{ 1, 2, 3, 4 }, @as(u32, 24));
-            try doTheTestReduce(.Mul, [4]i64{ 9, 99, 999, 9999 }, @as(i64, 8900199891));
-            try doTheTestReduce(.Mul, [4]u64{ 9, 99, 999, 9999 }, @as(u64, 8900199891));
-            try doTheTestReduce(.Mul, [4]i128{ -9, -99, -999, 9999 }, @as(i128, -8900199891));
-            try doTheTestReduce(.Mul, [4]u128{ 9, 99, 999, 9999 }, @as(u128, 8900199891));
-            try doTheTestReduce(.Mul, [4]f16{ -1.9, 5.1, -60.3, 100.0 }, @as(f16, 58430.7));
-            try doTheTestReduce(.Mul, [4]f32{ -1.9, 5.1, -60.3, 100.0 }, @as(f32, 58430.7));
-            try doTheTestReduce(.Mul, [4]f64{ -1.9, 5.1, -60.3, 100.0 }, @as(f64, 58430.7));
+            try testReduce(.Mul, [4]i16{ -1, 2, 3, 4 }, @as(i16, -24));
+            try testReduce(.Mul, [4]u16{ 1, 2, 3, 4 }, @as(u16, 24));
+            try testReduce(.Mul, [4]i32{ -9, -99, -999, 999 }, @as(i32, -889218891));
+            try testReduce(.Mul, [4]u32{ 1, 2, 3, 4 }, @as(u32, 24));
+            try testReduce(.Mul, [4]i64{ 9, 99, 999, 9999 }, @as(i64, 8900199891));
+            try testReduce(.Mul, [4]u64{ 9, 99, 999, 9999 }, @as(u64, 8900199891));
+            try testReduce(.Mul, [4]i128{ -9, -99, -999, 9999 }, @as(i128, -8900199891));
+            try testReduce(.Mul, [4]u128{ 9, 99, 999, 9999 }, @as(u128, 8900199891));
+            try testReduce(.Mul, [4]f16{ -1.9, 5.1, -60.3, 100.0 }, @as(f16, 58430.7));
+            try testReduce(.Mul, [4]f32{ -1.9, 5.1, -60.3, 100.0 }, @as(f32, 58430.7));
+            try testReduce(.Mul, [4]f64{ -1.9, 5.1, -60.3, 100.0 }, @as(f64, 58430.7));

-            try doTheTestReduce(.Or, [4]bool{ false, true, false, false }, @as(bool, true));
-            try doTheTestReduce(.Or, [4]u1{ 0, 1, 0, 0 }, @as(u1, 1));
-            try doTheTestReduce(.Or, [4]u16{ 0xff00, 0xff00, 0xf0, 0xf }, ~@as(u16, 0));
-            try doTheTestReduce(.Or, [4]u32{ 0xffff0000, 0xff00, 0xf0, 0xf }, ~@as(u32, 0));
-            try doTheTestReduce(.Or, [4]u64{ 0xffff0000, 0xff00, 0xf0, 0xf }, @as(u64, 0xffffffff));
-            try doTheTestReduce(.Or, [4]u128{ 0xffff0000, 0xff00, 0xf0, 0xf }, @as(u128, 0xffffffff));
+            try testReduce(.Or, [4]bool{ false, true, false, false }, @as(bool, true));
+            try testReduce(.Or, [4]u1{ 0, 1, 0, 0 }, @as(u1, 1));
+            try testReduce(.Or, [4]u16{ 0xff00, 0xff00, 0xf0, 0xf }, ~@as(u16, 0));
+            try testReduce(.Or, [4]u32{ 0xffff0000, 0xff00, 0xf0, 0xf }, ~@as(u32, 0));
+            try testReduce(.Or, [4]u64{ 0xffff0000, 0xff00, 0xf0, 0xf }, @as(u64, 0xffffffff));
+            try testReduce(.Or, [4]u128{ 0xffff0000, 0xff00, 0xf0, 0xf }, @as(u128, 0xffffffff));

-            try doTheTestReduce(.Xor, [4]bool{ true, true, true, false }, @as(bool, true));
-            try doTheTestReduce(.Xor, [4]u1{ 1, 1, 1, 0 }, @as(u1, 1));
-            try doTheTestReduce(.Xor, [4]u16{ 0x0000, 0x3333, 0x8888, 0x4444 }, ~@as(u16, 0));
-            try doTheTestReduce(.Xor, [4]u32{ 0x00000000, 0x33333333, 0x88888888, 0x44444444 }, ~@as(u32, 0));
-            try doTheTestReduce(.Xor, [4]u64{ 0x00000000, 0x33333333, 0x88888888, 0x44444444 }, @as(u64, 0xffffffff));
-            try doTheTestReduce(.Xor, [4]u128{ 0x00000000, 0x33333333, 0x88888888, 0x44444444 }, @as(u128, 0xffffffff));
+            try testReduce(.Xor, [4]bool{ true, true, true, false }, @as(bool, true));
+            try testReduce(.Xor, [4]u1{ 1, 1, 1, 0 }, @as(u1, 1));
+            try testReduce(.Xor, [4]u16{ 0x0000, 0x3333, 0x8888, 0x4444 }, ~@as(u16, 0));
+            try testReduce(.Xor, [4]u32{ 0x00000000, 0x33333333, 0x88888888, 0x44444444 }, ~@as(u32, 0));
+            try testReduce(.Xor, [4]u64{ 0x00000000, 0x33333333, 0x88888888, 0x44444444 }, @as(u64, 0xffffffff));
+            try testReduce(.Xor, [4]u128{ 0x00000000, 0x33333333, 0x88888888, 0x44444444 }, @as(u128, 0xffffffff));

            // Test the reduction on vectors containing NaNs.
            const f16_nan = math.nan(f16);
            const f32_nan = math.nan(f32);
            const f64_nan = math.nan(f64);

-            try doTheTestReduce(.Add, [4]f16{ -1.9, 5.1, f16_nan, 100.0 }, f16_nan);
-            try doTheTestReduce(.Add, [4]f32{ -1.9, 5.1, f32_nan, 100.0 }, f32_nan);
-            try doTheTestReduce(.Add, [4]f64{ -1.9, 5.1, f64_nan, 100.0 }, f64_nan);
+            try testReduce(.Add, [4]f16{ -1.9, 5.1, f16_nan, 100.0 }, f16_nan);
+            try testReduce(.Add, [4]f32{ -1.9, 5.1, f32_nan, 100.0 }, f32_nan);
+            try testReduce(.Add, [4]f64{ -1.9, 5.1, f64_nan, 100.0 }, f64_nan);

            // LLVM 11 ERROR: Cannot select type
            // https://github.com/ziglang/zig/issues/7138
            if (false) {
-                try doTheTestReduce(.Min, [4]f16{ -1.9, 5.1, f16_nan, 100.0 }, f16_nan);
-                try doTheTestReduce(.Min, [4]f32{ -1.9, 5.1, f32_nan, 100.0 }, f32_nan);
-                try doTheTestReduce(.Min, [4]f64{ -1.9, 5.1, f64_nan, 100.0 }, f64_nan);
+                try testReduce(.Min, [4]f16{ -1.9, 5.1, f16_nan, 100.0 }, f16_nan);
+                try testReduce(.Min, [4]f32{ -1.9, 5.1, f32_nan, 100.0 }, f32_nan);
+                try testReduce(.Min, [4]f64{ -1.9, 5.1, f64_nan, 100.0 }, f64_nan);

-                try doTheTestReduce(.Max, [4]f16{ -1.9, 5.1, f16_nan, 100.0 }, f16_nan);
-                try doTheTestReduce(.Max, [4]f32{ -1.9, 5.1, f32_nan, 100.0 }, f32_nan);
-                try doTheTestReduce(.Max, [4]f64{ -1.9, 5.1, f64_nan, 100.0 }, f64_nan);
+                try testReduce(.Max, [4]f16{ -1.9, 5.1, f16_nan, 100.0 }, f16_nan);
+                try testReduce(.Max, [4]f32{ -1.9, 5.1, f32_nan, 100.0 }, f32_nan);
+                try testReduce(.Max, [4]f64{ -1.9, 5.1, f64_nan, 100.0 }, f64_nan);
            }

-            try doTheTestReduce(.Mul, [4]f16{ -1.9, 5.1, f16_nan, 100.0 }, f16_nan);
-            try doTheTestReduce(.Mul, [4]f32{ -1.9, 5.1, f32_nan, 100.0 }, f32_nan);
-            try doTheTestReduce(.Mul, [4]f64{ -1.9, 5.1, f64_nan, 100.0 }, f64_nan);
+            try testReduce(.Mul, [4]f16{ -1.9, 5.1, f16_nan, 100.0 }, f16_nan);
+            try testReduce(.Mul, [4]f32{ -1.9, 5.1, f32_nan, 100.0 }, f32_nan);
+            try testReduce(.Mul, [4]f64{ -1.9, 5.1, f64_nan, 100.0 }, f64_nan);
        }
    };