diff --git a/src/Air.zig b/src/Air.zig index 45cae07f6a..83a3bf1792 100644 --- a/src/Air.zig +++ b/src/Air.zig @@ -530,6 +530,14 @@ pub const Inst = struct { /// Given an integer operand, return the float with the closest mathematical meaning. /// Uses the `ty_op` field. int_to_float, + + /// Transforms a vector into a scalar value by performing a sequential + /// horizontal reduction of its elements using the specified operator. + /// The vector element type (and hence result type) will be: + /// * and, or, xor => integer or boolean + /// * min, max, add, mul => integer or float + /// Uses the `reduce` field. + reduce, /// Given an integer, bool, float, or pointer operand, return a vector with all elements /// equal to the scalar value. /// Uses the `ty_op` field. @@ -695,6 +703,10 @@ pub const Inst = struct { locality: u2, cache: std.builtin.PrefetchOptions.Cache, }, + reduce: struct { + operand: Ref, + operation: std.builtin.ReduceOp, + }, // Make sure we don't accidentally add a field to make this union // bigger than expected. Note that in Debug builds, Zig is allowed @@ -1027,6 +1039,8 @@ pub fn typeOfIndex(air: Air, inst: Air.Inst.Index) Type { return ptr_ty.elemType(); }, + .reduce => return air.typeOf(datas[inst].reduce.operand).childType(), + .mul_add => return air.typeOf(datas[inst].pl_op.operand), .add_with_overflow, diff --git a/src/Liveness.zig b/src/Liveness.zig index 45558c6811..5fd4bc2595 100644 --- a/src/Liveness.zig +++ b/src/Liveness.zig @@ -435,6 +435,10 @@ fn analyzeInst( const extra = a.air.extraData(Air.Shuffle, inst_datas[inst].ty_pl.payload).data; return trackOperands(a, new_set, inst, main_tomb, .{ extra.a, extra.b, .none }); }, + .reduce => { + const reduce = inst_datas[inst].reduce; + return trackOperands(a, new_set, inst, main_tomb, .{ reduce.operand, .none, .none }); + }, .aggregate_init => { const ty_pl = inst_datas[inst].ty_pl; const aggregate_ty = a.air.getRefType(ty_pl.ty); diff --git a/src/Sema.zig b/src/Sema.zig index c6de8cf5dc..7805d7f095 100644 --- a/src/Sema.zig +++ b/src/Sema.zig @@ -13973,17 +13973,27 @@ fn resolveExportOptions( }; } +fn resolveBuiltinEnum( + sema: *Sema, + block: *Block, + src: LazySrcLoc, + zir_ref: Zir.Inst.Ref, + comptime name: []const u8, +) CompileError!@field(std.builtin, name) { + const ty = try sema.getBuiltinType(block, src, name); + const air_ref = sema.resolveInst(zir_ref); + const coerced = try sema.coerce(block, ty, air_ref, src); + const val = try sema.resolveConstValue(block, src, coerced); + return val.toEnum(@field(std.builtin, name)); +} + fn resolveAtomicOrder( sema: *Sema, block: *Block, src: LazySrcLoc, zir_ref: Zir.Inst.Ref, ) CompileError!std.builtin.AtomicOrder { - const atomic_order_ty = try sema.getBuiltinType(block, src, "AtomicOrder"); - const air_ref = sema.resolveInst(zir_ref); - const coerced = try sema.coerce(block, atomic_order_ty, air_ref, src); - const val = try sema.resolveConstValue(block, src, coerced); - return val.toEnum(std.builtin.AtomicOrder); + return resolveBuiltinEnum(sema, block, src, zir_ref, "AtomicOrder"); } fn resolveAtomicRmwOp( @@ -13992,11 +14002,7 @@ fn resolveAtomicRmwOp( src: LazySrcLoc, zir_ref: Zir.Inst.Ref, ) CompileError!std.builtin.AtomicRmwOp { - const atomic_rmw_op_ty = try sema.getBuiltinType(block, src, "AtomicRmwOp"); - const air_ref = sema.resolveInst(zir_ref); - const coerced = try sema.coerce(block, atomic_rmw_op_ty, air_ref, src); - const val = try sema.resolveConstValue(block, src, coerced); - return val.toEnum(std.builtin.AtomicRmwOp); + return resolveBuiltinEnum(sema, block, src, zir_ref, "AtomicRmwOp"); } fn zirCmpxchg( @@ -14118,8 +14124,72 @@ fn zirSplat(sema: *Sema, block: *Block, inst: Zir.Inst.Index) CompileError!Air.I fn zirReduce(sema: *Sema, block: *Block, inst: Zir.Inst.Index) CompileError!Air.Inst.Ref { const inst_data = sema.code.instructions.items(.data)[inst].pl_node; - const src = inst_data.src(); - return sema.fail(block, src, "TODO: Sema.zirReduce", .{}); + const extra = sema.code.extraData(Zir.Inst.Bin, inst_data.payload_index).data; + const op_src: LazySrcLoc = .{ .node_offset_builtin_call_arg0 = inst_data.src_node }; + const operand_src: LazySrcLoc = .{ .node_offset_builtin_call_arg1 = inst_data.src_node }; + const operation = try sema.resolveBuiltinEnum(block, op_src, extra.lhs, "ReduceOp"); + const operand = sema.resolveInst(extra.rhs); + const operand_ty = sema.typeOf(operand); + + if (operand_ty.zigTypeTag() != .Vector) { + return sema.fail(block, operand_src, "expected vector, found {}", .{operand_ty}); + } + + const scalar_ty = operand_ty.childType(); + + // Type-check depending on operation. + switch (operation) { + .And, .Or, .Xor => switch (scalar_ty.zigTypeTag()) { + .Int, .Bool => {}, + else => return sema.fail(block, operand_src, "@reduce operation '{s}' requires integer or boolean operand; found {}", .{ + @tagName(operation), operand_ty, + }), + }, + .Min, .Max, .Add, .Mul => switch (scalar_ty.zigTypeTag()) { + .Int, .Float => {}, + else => return sema.fail(block, operand_src, "@reduce operation '{s}' requires integer or float operand; found {}", .{ + @tagName(operation), operand_ty, + }), + }, + } + + const vec_len = operand_ty.vectorLen(); + if (vec_len == 0) { + // TODO re-evaluate if we should introduce a "neutral value" for some operations, + // e.g. zero for add and one for mul. + return sema.fail(block, operand_src, "@reduce operation requires a vector with nonzero length", .{}); + } + + if (try sema.resolveMaybeUndefVal(block, operand_src, operand)) |operand_val| { + if (operand_val.isUndef()) return sema.addConstUndef(scalar_ty); + + const target = sema.mod.getTarget(); + var accum: Value = try operand_val.elemValue(sema.arena, 0); + var elem_buf: Value.ElemValueBuffer = undefined; + var i: u32 = 1; + while (i < vec_len) : (i += 1) { + const elem_val = operand_val.elemValueBuffer(i, &elem_buf); + switch (operation) { + .And => accum = try accum.bitwiseAnd(elem_val, sema.arena), + .Or => accum = try accum.bitwiseOr(elem_val, sema.arena), + .Xor => accum = try accum.bitwiseXor(elem_val, sema.arena), + .Min => accum = accum.numberMin(elem_val), + .Max => accum = accum.numberMax(elem_val), + .Add => accum = try accum.numberAddWrap(elem_val, scalar_ty, sema.arena, target), + .Mul => accum = try accum.numberMulWrap(elem_val, scalar_ty, sema.arena, target), + } + } + return sema.addConstant(scalar_ty, accum); + } + + try sema.requireRuntimeBlock(block, operand_src); + return block.addInst(.{ + .tag = .reduce, + .data = .{ .reduce = .{ + .operand = operand, + .operation = operation, + } }, + }); } fn zirShuffle(sema: *Sema, block: *Block, inst: Zir.Inst.Index) CompileError!Air.Inst.Ref { @@ -14425,8 +14495,8 @@ fn zirAtomicRmw(sema: *Sema, block: *Block, inst: Zir.Inst.Index) CompileError!A .Nand => try stored_val.bitwiseNand (operand_val, operand_ty, sema.arena, target), .Or => try stored_val.bitwiseOr (operand_val, sema.arena), .Xor => try stored_val.bitwiseXor (operand_val, sema.arena), - .Max => try stored_val.numberMax (operand_val), - .Min => try stored_val.numberMin (operand_val), + .Max => stored_val.numberMax (operand_val), + .Min => stored_val.numberMin (operand_val), // zig fmt: on }; try sema.storePtrVal(block, src, ptr_val, new_val, operand_ty); @@ -14760,7 +14830,7 @@ fn analyzeMinMax( else => unreachable, }; const vec_len = simd_op.len orelse { - const result_val = try opFunc(lhs_val, rhs_val); + const result_val = opFunc(lhs_val, rhs_val); return sema.addConstant(simd_op.result_ty, result_val); }; var lhs_buf: Value.ElemValueBuffer = undefined; @@ -14769,7 +14839,7 @@ fn analyzeMinMax( for (elems) |*elem, i| { const lhs_elem_val = lhs_val.elemValueBuffer(i, &lhs_buf); const rhs_elem_val = rhs_val.elemValueBuffer(i, &rhs_buf); - elem.* = try opFunc(lhs_elem_val, rhs_elem_val); + elem.* = opFunc(lhs_elem_val, rhs_elem_val); } return sema.addConstant( simd_op.result_ty, @@ -19246,9 +19316,9 @@ fn cmpNumeric( const rhs_ty_tag = rhs_ty.zigTypeTag(); if (lhs_ty_tag == .Vector and rhs_ty_tag == .Vector) { - if (lhs_ty.arrayLen() != rhs_ty.arrayLen()) { + if (lhs_ty.vectorLen() != rhs_ty.vectorLen()) { return sema.fail(block, src, "vector length mismatch: {d} and {d}", .{ - lhs_ty.arrayLen(), rhs_ty.arrayLen(), + lhs_ty.vectorLen(), rhs_ty.vectorLen(), }); } return sema.fail(block, src, "TODO implement support for vectors in cmpNumeric", .{}); diff --git a/src/arch/aarch64/CodeGen.zig b/src/arch/aarch64/CodeGen.zig index dddd4a27a1..739aaf2d45 100644 --- a/src/arch/aarch64/CodeGen.zig +++ b/src/arch/aarch64/CodeGen.zig @@ -640,6 +640,7 @@ fn genBody(self: *Self, body: []const Air.Inst.Index) InnerError!void { .error_name => try self.airErrorName(inst), .splat => try self.airSplat(inst), .shuffle => try self.airShuffle(inst), + .reduce => try self.airReduce(inst), .aggregate_init => try self.airAggregateInit(inst), .union_init => try self.airUnionInit(inst), .prefetch => try self.airPrefetch(inst), @@ -3727,6 +3728,12 @@ fn airShuffle(self: *Self, inst: Air.Inst.Index) !void { return self.finishAir(inst, result, .{ ty_op.operand, .none, .none }); } +fn airReduce(self: *Self, inst: Air.Inst.Index) !void { + const reduce = self.air.instructions.items(.data)[inst].reduce; + const result: MCValue = if (self.liveness.isUnused(inst)) .dead else return self.fail("TODO implement airReduce for aarch64", .{}); + return self.finishAir(inst, result, .{ reduce.operand, .none, .none }); +} + fn airAggregateInit(self: *Self, inst: Air.Inst.Index) !void { const vector_ty = self.air.typeOfIndex(inst); const len = vector_ty.vectorLen(); diff --git a/src/arch/arm/CodeGen.zig b/src/arch/arm/CodeGen.zig index 0a3413569a..f2283bd3df 100644 --- a/src/arch/arm/CodeGen.zig +++ b/src/arch/arm/CodeGen.zig @@ -637,6 +637,7 @@ fn genBody(self: *Self, body: []const Air.Inst.Index) InnerError!void { .error_name => try self.airErrorName(inst), .splat => try self.airSplat(inst), .shuffle => try self.airShuffle(inst), + .reduce => try self.airReduce(inst), .aggregate_init => try self.airAggregateInit(inst), .union_init => try self.airUnionInit(inst), .prefetch => try self.airPrefetch(inst), @@ -4204,6 +4205,12 @@ fn airShuffle(self: *Self, inst: Air.Inst.Index) !void { return self.finishAir(inst, result, .{ ty_op.operand, .none, .none }); } +fn airReduce(self: *Self, inst: Air.Inst.Index) !void { + const reduce = self.air.instructions.items(.data)[inst].reduce; + const result: MCValue = if (self.liveness.isUnused(inst)) .dead else return self.fail("TODO implement airReduce for arm", .{}); + return self.finishAir(inst, result, .{ reduce.operand, .none, .none }); +} + fn airAggregateInit(self: *Self, inst: Air.Inst.Index) !void { const vector_ty = self.air.typeOfIndex(inst); const len = vector_ty.vectorLen(); diff --git a/src/arch/riscv64/CodeGen.zig b/src/arch/riscv64/CodeGen.zig index 0b8cbc2d62..80bdcd216e 100644 --- a/src/arch/riscv64/CodeGen.zig +++ b/src/arch/riscv64/CodeGen.zig @@ -604,6 +604,7 @@ fn genBody(self: *Self, body: []const Air.Inst.Index) InnerError!void { .error_name => try self.airErrorName(inst), .splat => try self.airSplat(inst), .shuffle => try self.airShuffle(inst), + .reduce => try self.airReduce(inst), .aggregate_init => try self.airAggregateInit(inst), .union_init => try self.airUnionInit(inst), .prefetch => try self.airPrefetch(inst), @@ -2213,6 +2214,12 @@ fn airShuffle(self: *Self, inst: Air.Inst.Index) !void { return self.finishAir(inst, result, .{ ty_op.operand, .none, .none }); } +fn airReduce(self: *Self, inst: Air.Inst.Index) !void { + const reduce = self.air.instructions.items(.data)[inst].reduce; + const result: MCValue = if (self.liveness.isUnused(inst)) .dead else return self.fail("TODO implement airReduce for riscv64", .{}); + return self.finishAir(inst, result, .{ reduce.operand, .none, .none }); +} + fn airAggregateInit(self: *Self, inst: Air.Inst.Index) !void { const vector_ty = self.air.typeOfIndex(inst); const len = vector_ty.vectorLen(); diff --git a/src/arch/wasm/CodeGen.zig b/src/arch/wasm/CodeGen.zig index 33fb35163a..caddbbeaca 100644 --- a/src/arch/wasm/CodeGen.zig +++ b/src/arch/wasm/CodeGen.zig @@ -1263,6 +1263,7 @@ fn genInst(self: *Self, inst: Air.Inst.Index) !WValue { .ret_load => self.airRetLoad(inst), .splat => self.airSplat(inst), .shuffle => self.airShuffle(inst), + .reduce => self.airReduce(inst), .aggregate_init => self.airAggregateInit(inst), .union_init => self.airUnionInit(inst), .prefetch => self.airPrefetch(inst), @@ -2988,7 +2989,6 @@ fn airSplat(self: *Self, inst: Air.Inst.Index) InnerError!WValue { const ty_op = self.air.instructions.items(.data)[inst].ty_op; const operand = try self.resolveInst(ty_op.operand); - _ = ty_op; _ = operand; return self.fail("TODO: Implement wasm airSplat", .{}); } @@ -2999,11 +2999,20 @@ fn airShuffle(self: *Self, inst: Air.Inst.Index) InnerError!WValue { const ty_op = self.air.instructions.items(.data)[inst].ty_op; const operand = try self.resolveInst(ty_op.operand); - _ = ty_op; _ = operand; return self.fail("TODO: Implement wasm airShuffle", .{}); } +fn airReduce(self: *Self, inst: Air.Inst.Index) InnerError!WValue { + if (self.liveness.isUnused(inst)) return WValue{ .none = {} }; + + const reduce = self.air.instructions.items(.data)[inst].reduce; + const operand = try self.resolveInst(reduce.operand); + + _ = operand; + return self.fail("TODO: Implement wasm airReduce", .{}); +} + fn airAggregateInit(self: *Self, inst: Air.Inst.Index) InnerError!WValue { if (self.liveness.isUnused(inst)) return WValue{ .none = {} }; diff --git a/src/arch/x86_64/CodeGen.zig b/src/arch/x86_64/CodeGen.zig index 63c85a5a9e..df79ec990e 100644 --- a/src/arch/x86_64/CodeGen.zig +++ b/src/arch/x86_64/CodeGen.zig @@ -721,6 +721,7 @@ fn genBody(self: *Self, body: []const Air.Inst.Index) InnerError!void { .error_name => try self.airErrorName(inst), .splat => try self.airSplat(inst), .shuffle => try self.airShuffle(inst), + .reduce => try self.airReduce(inst), .aggregate_init => try self.airAggregateInit(inst), .union_init => try self.airUnionInit(inst), .prefetch => try self.airPrefetch(inst), @@ -5567,6 +5568,12 @@ fn airShuffle(self: *Self, inst: Air.Inst.Index) !void { return self.finishAir(inst, result, .{ ty_op.operand, .none, .none }); } +fn airReduce(self: *Self, inst: Air.Inst.Index) !void { + const reduce = self.air.instructions.items(.data)[inst].reduce; + const result: MCValue = if (self.liveness.isUnused(inst)) .dead else return self.fail("TODO implement airReduce for x86_64", .{}); + return self.finishAir(inst, result, .{ reduce.operand, .none, .none }); +} + fn airAggregateInit(self: *Self, inst: Air.Inst.Index) !void { const vector_ty = self.air.typeOfIndex(inst); const len = vector_ty.vectorLen(); diff --git a/src/codegen/c.zig b/src/codegen/c.zig index c0c2031116..c306e0a6b0 100644 --- a/src/codegen/c.zig +++ b/src/codegen/c.zig @@ -1731,6 +1731,7 @@ fn genBody(f: *Function, body: []const Air.Inst.Index) error{ AnalysisFail, OutO .error_name => try airErrorName(f, inst), .splat => try airSplat(f, inst), .shuffle => try airShuffle(f, inst), + .reduce => try airReduce(f, inst), .aggregate_init => try airAggregateInit(f, inst), .union_init => try airUnionInit(f, inst), .prefetch => try airPrefetch(f, inst), @@ -3625,6 +3626,21 @@ fn airShuffle(f: *Function, inst: Air.Inst.Index) !CValue { return f.fail("TODO: C backend: implement airShuffle", .{}); } +fn airReduce(f: *Function, inst: Air.Inst.Index) !CValue { + if (f.liveness.isUnused(inst)) return CValue.none; + + const inst_ty = f.air.typeOfIndex(inst); + const reduce = f.air.instructions.items(.data)[inst].reduce; + const operand = try f.resolveInst(reduce.operand); + const writer = f.object.writer(); + const local = try f.allocLocal(inst_ty, .Const); + try writer.writeAll(" = "); + + _ = operand; + _ = local; + return f.fail("TODO: C backend: implement airReduce", .{}); +} + fn airAggregateInit(f: *Function, inst: Air.Inst.Index) !CValue { if (f.liveness.isUnused(inst)) return CValue.none; diff --git a/src/codegen/llvm.zig b/src/codegen/llvm.zig index 8fb2a171ef..481730452c 100644 --- a/src/codegen/llvm.zig +++ b/src/codegen/llvm.zig @@ -3426,6 +3426,7 @@ pub const FuncGen = struct { .error_name => try self.airErrorName(inst), .splat => try self.airSplat(inst), .shuffle => try self.airShuffle(inst), + .reduce => try self.airReduce(inst), .aggregate_init => try self.airAggregateInit(inst), .union_init => try self.airUnionInit(inst), .prefetch => try self.airPrefetch(inst), @@ -6281,6 +6282,50 @@ pub const FuncGen = struct { return self.builder.buildShuffleVector(a, b, llvm_mask_value, ""); } + fn airReduce(self: *FuncGen, inst: Air.Inst.Index) !?*const llvm.Value { + if (self.liveness.isUnused(inst)) return null; + + const reduce = self.air.instructions.items(.data)[inst].reduce; + const operand = try self.resolveInst(reduce.operand); + const scalar_ty = self.air.typeOfIndex(inst); + + // TODO handle the fast math setting + + switch (reduce.operation) { + .And => return self.builder.buildAndReduce(operand), + .Or => return self.builder.buildOrReduce(operand), + .Xor => return self.builder.buildXorReduce(operand), + .Min => switch (scalar_ty.zigTypeTag()) { + .Int => return self.builder.buildIntMinReduce(operand, scalar_ty.isSignedInt()), + .Float => return self.builder.buildFPMinReduce(operand), + else => unreachable, + }, + .Max => switch (scalar_ty.zigTypeTag()) { + .Int => return self.builder.buildIntMaxReduce(operand, scalar_ty.isSignedInt()), + .Float => return self.builder.buildFPMaxReduce(operand), + else => unreachable, + }, + .Add => switch (scalar_ty.zigTypeTag()) { + .Int => return self.builder.buildAddReduce(operand), + .Float => { + const scalar_llvm_ty = try self.dg.llvmType(scalar_ty); + const neutral_value = scalar_llvm_ty.constReal(-0.0); + return self.builder.buildFPAddReduce(neutral_value, operand); + }, + else => unreachable, + }, + .Mul => switch (scalar_ty.zigTypeTag()) { + .Int => return self.builder.buildMulReduce(operand), + .Float => { + const scalar_llvm_ty = try self.dg.llvmType(scalar_ty); + const neutral_value = scalar_llvm_ty.constReal(1.0); + return self.builder.buildFPMulReduce(neutral_value, operand); + }, + else => unreachable, + }, + } + } + fn airAggregateInit(self: *FuncGen, inst: Air.Inst.Index) !?*const llvm.Value { if (self.liveness.isUnused(inst)) return null; diff --git a/src/codegen/llvm/bindings.zig b/src/codegen/llvm/bindings.zig index a6c53fddce..b7a3ff7230 100644 --- a/src/codegen/llvm/bindings.zig +++ b/src/codegen/llvm/bindings.zig @@ -853,6 +853,39 @@ pub const Builder = opaque { pub const buildShuffleVector = LLVMBuildShuffleVector; extern fn LLVMBuildShuffleVector(*const Builder, V1: *const Value, V2: *const Value, Mask: *const Value, Name: [*:0]const u8) *const Value; + + pub const buildAndReduce = ZigLLVMBuildAndReduce; + extern fn ZigLLVMBuildAndReduce(B: *const Builder, Val: *const Value) *const Value; + + pub const buildOrReduce = ZigLLVMBuildOrReduce; + extern fn ZigLLVMBuildOrReduce(B: *const Builder, Val: *const Value) *const Value; + + pub const buildXorReduce = ZigLLVMBuildXorReduce; + extern fn ZigLLVMBuildXorReduce(B: *const Builder, Val: *const Value) *const Value; + + pub const buildIntMaxReduce = ZigLLVMBuildIntMaxReduce; + extern fn ZigLLVMBuildIntMaxReduce(B: *const Builder, Val: *const Value, is_signed: bool) *const Value; + + pub const buildIntMinReduce = ZigLLVMBuildIntMinReduce; + extern fn ZigLLVMBuildIntMinReduce(B: *const Builder, Val: *const Value, is_signed: bool) *const Value; + + pub const buildFPMaxReduce = ZigLLVMBuildFPMaxReduce; + extern fn ZigLLVMBuildFPMaxReduce(B: *const Builder, Val: *const Value) *const Value; + + pub const buildFPMinReduce = ZigLLVMBuildFPMinReduce; + extern fn ZigLLVMBuildFPMinReduce(B: *const Builder, Val: *const Value) *const Value; + + pub const buildAddReduce = ZigLLVMBuildAddReduce; + extern fn ZigLLVMBuildAddReduce(B: *const Builder, Val: *const Value) *const Value; + + pub const buildMulReduce = ZigLLVMBuildMulReduce; + extern fn ZigLLVMBuildMulReduce(B: *const Builder, Val: *const Value) *const Value; + + pub const buildFPAddReduce = ZigLLVMBuildFPAddReduce; + extern fn ZigLLVMBuildFPAddReduce(B: *const Builder, Acc: *const Value, Val: *const Value) *const Value; + + pub const buildFPMulReduce = ZigLLVMBuildFPMulReduce; + extern fn ZigLLVMBuildFPMulReduce(B: *const Builder, Acc: *const Value, Val: *const Value) *const Value; }; pub const MDString = opaque { diff --git a/src/print_air.zig b/src/print_air.zig index 7442d276c3..0e7a02b498 100644 --- a/src/print_air.zig +++ b/src/print_air.zig @@ -265,6 +265,7 @@ const Writer = struct { .wasm_memory_grow => try w.writeWasmMemoryGrow(s, inst), .mul_add => try w.writeMulAdd(s, inst), .shuffle => try w.writeShuffle(s, inst), + .reduce => try w.writeReduce(s, inst), .add_with_overflow, .sub_with_overflow, @@ -392,6 +393,13 @@ const Writer = struct { try s.print(", mask {d}, len {d}", .{ extra.mask, extra.mask_len }); } + fn writeReduce(w: *Writer, s: anytype, inst: Air.Inst.Index) @TypeOf(s).Error!void { + const reduce = w.air.instructions.items(.data)[inst].reduce; + + try w.writeOperand(s, inst, 0, reduce.operand); + try s.print(", {s}", .{@tagName(reduce.operation)}); + } + fn writeFence(w: *Writer, s: anytype, inst: Air.Inst.Index) @TypeOf(s).Error!void { const atomic_order = w.air.instructions.items(.data)[inst].fence; diff --git a/src/value.zig b/src/value.zig index 005f1f1ffd..b45c106c26 100644 --- a/src/value.zig +++ b/src/value.zig @@ -1841,6 +1841,8 @@ pub const Value = extern union { return orderAgainstZero(lhs).compare(op); } + /// This function is used by hash maps and so treats floating-point NaNs as equal + /// to each other, and not equal to other floating-point values. pub fn eql(a: Value, b: Value, ty: Type) bool { const a_tag = a.tag(); const b_tag = b.tag(); @@ -2006,10 +2008,20 @@ pub const Value = extern union { // end up here and the values are equal if the type has zero fields. return ty.structFieldCount() != 0; }, + .Float => { + const a_nan = a.isNan(); + const b_nan = b.isNan(); + if (a_nan or b_nan) { + return a_nan and b_nan; + } + return order(a, b).compare(.eq); + }, else => return order(a, b).compare(.eq), } } + /// This function is used by hash maps and so treats floating-point NaNs as equal + /// to each other, and not equal to other floating-point values. pub fn hash(val: Value, ty: Type, hasher: *std.hash.Wyhash) void { const zig_ty_tag = ty.zigTypeTag(); std.hash.autoHash(hasher, zig_ty_tag); @@ -2030,10 +2042,18 @@ pub const Value = extern union { return val.toType(&buf).hashWithHasher(hasher); }, .Float, .ComptimeFloat => { - // TODO double check the lang spec. should we to bitwise hashing here, - // or a hash that normalizes the float value? + // Normalize the float here because this hash must match eql semantics. + // These functions are used for hash maps so we want NaN to equal itself, + // and -0.0 to equal +0.0. const float = val.toFloat(f128); - std.hash.autoHash(hasher, @bitCast(u128, float)); + if (std.math.isNan(float)) { + std.hash.autoHash(hasher, std.math.nan_u128); + } else if (float == 0.0) { + var normalized_zero: f128 = 0.0; + std.hash.autoHash(hasher, @bitCast(u128, normalized_zero)); + } else { + std.hash.autoHash(hasher, @bitCast(u128, float)); + } }, .Bool, .Int, .ComptimeInt, .Pointer => switch (val.tag()) { .slice => { @@ -2948,7 +2968,7 @@ pub const Value = extern union { } /// Supports both floats and ints; handles undefined. - pub fn numberMax(lhs: Value, rhs: Value) !Value { + pub fn numberMax(lhs: Value, rhs: Value) Value { if (lhs.isUndef() or rhs.isUndef()) return undef; if (lhs.isNan()) return rhs; if (rhs.isNan()) return lhs; @@ -2960,7 +2980,7 @@ pub const Value = extern union { } /// Supports both floats and ints; handles undefined. - pub fn numberMin(lhs: Value, rhs: Value) !Value { + pub fn numberMin(lhs: Value, rhs: Value) Value { if (lhs.isUndef() or rhs.isUndef()) return undef; if (lhs.isNan()) return rhs; if (rhs.isNan()) return lhs; diff --git a/test/behavior/vector.zig b/test/behavior/vector.zig index 788ef3b62a..7efd018a65 100644 --- a/test/behavior/vector.zig +++ b/test/behavior/vector.zig @@ -520,15 +520,20 @@ test "vector shift operators" { } test "vector reduce operation" { - if (builtin.zig_backend != .stage1) return error.SkipZigTest; // TODO + if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; // TODO + if (builtin.zig_backend == .stage2_c) return error.SkipZigTest; // TODO + if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO + if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO + if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO + const S = struct { - fn doTheTestReduce(comptime op: std.builtin.ReduceOp, x: anytype, expected: anytype) !void { + fn testReduce(comptime op: std.builtin.ReduceOp, x: anytype, expected: anytype) !void { const N = @typeInfo(@TypeOf(x)).Array.len; const TX = @typeInfo(@TypeOf(x)).Array.child; - var r = @reduce(op, @as(Vector(N, TX), x)); + var r = @reduce(op, @as(@Vector(N, TX), x)); switch (@typeInfo(TX)) { - .Int, .Bool => try expectEqual(expected, r), + .Int, .Bool => try expect(expected == r), .Float => { const expected_nan = math.isNan(expected); const got_nan = math.isNan(r); @@ -537,117 +542,119 @@ test "vector reduce operation" { // Do this check explicitly as two NaN values are never // equal. } else { - try expectApproxEqRel(expected, r, math.sqrt(math.epsilon(TX))); + const F = @TypeOf(expected); + const tolerance = @sqrt(math.epsilon(TX)); + try expect(std.math.approxEqRel(F, expected, r, tolerance)); } }, else => unreachable, } } fn doTheTest() !void { - try doTheTestReduce(.Add, [4]i16{ -9, -99, -999, -9999 }, @as(i32, -11106)); - try doTheTestReduce(.Add, [4]u16{ 9, 99, 999, 9999 }, @as(u32, 11106)); - try doTheTestReduce(.Add, [4]i32{ -9, -99, -999, -9999 }, @as(i32, -11106)); - try doTheTestReduce(.Add, [4]u32{ 9, 99, 999, 9999 }, @as(u32, 11106)); - try doTheTestReduce(.Add, [4]i64{ -9, -99, -999, -9999 }, @as(i64, -11106)); - try doTheTestReduce(.Add, [4]u64{ 9, 99, 999, 9999 }, @as(u64, 11106)); - try doTheTestReduce(.Add, [4]i128{ -9, -99, -999, -9999 }, @as(i128, -11106)); - try doTheTestReduce(.Add, [4]u128{ 9, 99, 999, 9999 }, @as(u128, 11106)); - try doTheTestReduce(.Add, [4]f16{ -1.9, 5.1, -60.3, 100.0 }, @as(f16, 42.9)); - try doTheTestReduce(.Add, [4]f32{ -1.9, 5.1, -60.3, 100.0 }, @as(f32, 42.9)); - try doTheTestReduce(.Add, [4]f64{ -1.9, 5.1, -60.3, 100.0 }, @as(f64, 42.9)); + try testReduce(.Add, [4]i16{ -9, -99, -999, -9999 }, @as(i32, -11106)); + try testReduce(.Add, [4]u16{ 9, 99, 999, 9999 }, @as(u32, 11106)); + try testReduce(.Add, [4]i32{ -9, -99, -999, -9999 }, @as(i32, -11106)); + try testReduce(.Add, [4]u32{ 9, 99, 999, 9999 }, @as(u32, 11106)); + try testReduce(.Add, [4]i64{ -9, -99, -999, -9999 }, @as(i64, -11106)); + try testReduce(.Add, [4]u64{ 9, 99, 999, 9999 }, @as(u64, 11106)); + try testReduce(.Add, [4]i128{ -9, -99, -999, -9999 }, @as(i128, -11106)); + try testReduce(.Add, [4]u128{ 9, 99, 999, 9999 }, @as(u128, 11106)); + try testReduce(.Add, [4]f16{ -1.9, 5.1, -60.3, 100.0 }, @as(f16, 42.9)); + try testReduce(.Add, [4]f32{ -1.9, 5.1, -60.3, 100.0 }, @as(f32, 42.9)); + try testReduce(.Add, [4]f64{ -1.9, 5.1, -60.3, 100.0 }, @as(f64, 42.9)); - try doTheTestReduce(.And, [4]bool{ true, false, true, true }, @as(bool, false)); - try doTheTestReduce(.And, [4]u1{ 1, 0, 1, 1 }, @as(u1, 0)); - try doTheTestReduce(.And, [4]u16{ 0xffff, 0xff55, 0xaaff, 0x1010 }, @as(u16, 0x10)); - try doTheTestReduce(.And, [4]u32{ 0xffffffff, 0xffff5555, 0xaaaaffff, 0x10101010 }, @as(u32, 0x1010)); - try doTheTestReduce(.And, [4]u64{ 0xffffffff, 0xffff5555, 0xaaaaffff, 0x10101010 }, @as(u64, 0x1010)); + try testReduce(.And, [4]bool{ true, false, true, true }, @as(bool, false)); + try testReduce(.And, [4]u1{ 1, 0, 1, 1 }, @as(u1, 0)); + try testReduce(.And, [4]u16{ 0xffff, 0xff55, 0xaaff, 0x1010 }, @as(u16, 0x10)); + try testReduce(.And, [4]u32{ 0xffffffff, 0xffff5555, 0xaaaaffff, 0x10101010 }, @as(u32, 0x1010)); + try testReduce(.And, [4]u64{ 0xffffffff, 0xffff5555, 0xaaaaffff, 0x10101010 }, @as(u64, 0x1010)); - try doTheTestReduce(.Min, [4]i16{ -1, 2, 3, 4 }, @as(i16, -1)); - try doTheTestReduce(.Min, [4]u16{ 1, 2, 3, 4 }, @as(u16, 1)); - try doTheTestReduce(.Min, [4]i32{ 1234567, -386, 0, 3 }, @as(i32, -386)); - try doTheTestReduce(.Min, [4]u32{ 99, 9999, 9, 99999 }, @as(u32, 9)); + try testReduce(.Min, [4]i16{ -1, 2, 3, 4 }, @as(i16, -1)); + try testReduce(.Min, [4]u16{ 1, 2, 3, 4 }, @as(u16, 1)); + try testReduce(.Min, [4]i32{ 1234567, -386, 0, 3 }, @as(i32, -386)); + try testReduce(.Min, [4]u32{ 99, 9999, 9, 99999 }, @as(u32, 9)); // LLVM 11 ERROR: Cannot select type // https://github.com/ziglang/zig/issues/7138 if (builtin.target.cpu.arch != .aarch64) { - try doTheTestReduce(.Min, [4]i64{ 1234567, -386, 0, 3 }, @as(i64, -386)); - try doTheTestReduce(.Min, [4]u64{ 99, 9999, 9, 99999 }, @as(u64, 9)); + try testReduce(.Min, [4]i64{ 1234567, -386, 0, 3 }, @as(i64, -386)); + try testReduce(.Min, [4]u64{ 99, 9999, 9, 99999 }, @as(u64, 9)); } - try doTheTestReduce(.Min, [4]i128{ 1234567, -386, 0, 3 }, @as(i128, -386)); - try doTheTestReduce(.Min, [4]u128{ 99, 9999, 9, 99999 }, @as(u128, 9)); - try doTheTestReduce(.Min, [4]f16{ -10.3, 10.0e9, 13.0, -100.0 }, @as(f16, -100.0)); - try doTheTestReduce(.Min, [4]f32{ -10.3, 10.0e9, 13.0, -100.0 }, @as(f32, -100.0)); - try doTheTestReduce(.Min, [4]f64{ -10.3, 10.0e9, 13.0, -100.0 }, @as(f64, -100.0)); + try testReduce(.Min, [4]i128{ 1234567, -386, 0, 3 }, @as(i128, -386)); + try testReduce(.Min, [4]u128{ 99, 9999, 9, 99999 }, @as(u128, 9)); + try testReduce(.Min, [4]f16{ -10.3, 10.0e9, 13.0, -100.0 }, @as(f16, -100.0)); + try testReduce(.Min, [4]f32{ -10.3, 10.0e9, 13.0, -100.0 }, @as(f32, -100.0)); + try testReduce(.Min, [4]f64{ -10.3, 10.0e9, 13.0, -100.0 }, @as(f64, -100.0)); - try doTheTestReduce(.Max, [4]i16{ -1, 2, 3, 4 }, @as(i16, 4)); - try doTheTestReduce(.Max, [4]u16{ 1, 2, 3, 4 }, @as(u16, 4)); - try doTheTestReduce(.Max, [4]i32{ 1234567, -386, 0, 3 }, @as(i32, 1234567)); - try doTheTestReduce(.Max, [4]u32{ 99, 9999, 9, 99999 }, @as(u32, 99999)); + try testReduce(.Max, [4]i16{ -1, 2, 3, 4 }, @as(i16, 4)); + try testReduce(.Max, [4]u16{ 1, 2, 3, 4 }, @as(u16, 4)); + try testReduce(.Max, [4]i32{ 1234567, -386, 0, 3 }, @as(i32, 1234567)); + try testReduce(.Max, [4]u32{ 99, 9999, 9, 99999 }, @as(u32, 99999)); // LLVM 11 ERROR: Cannot select type // https://github.com/ziglang/zig/issues/7138 if (builtin.target.cpu.arch != .aarch64) { - try doTheTestReduce(.Max, [4]i64{ 1234567, -386, 0, 3 }, @as(i64, 1234567)); - try doTheTestReduce(.Max, [4]u64{ 99, 9999, 9, 99999 }, @as(u64, 99999)); + try testReduce(.Max, [4]i64{ 1234567, -386, 0, 3 }, @as(i64, 1234567)); + try testReduce(.Max, [4]u64{ 99, 9999, 9, 99999 }, @as(u64, 99999)); } - try doTheTestReduce(.Max, [4]i128{ 1234567, -386, 0, 3 }, @as(i128, 1234567)); - try doTheTestReduce(.Max, [4]u128{ 99, 9999, 9, 99999 }, @as(u128, 99999)); - try doTheTestReduce(.Max, [4]f16{ -10.3, 10.0e9, 13.0, -100.0 }, @as(f16, 10.0e9)); - try doTheTestReduce(.Max, [4]f32{ -10.3, 10.0e9, 13.0, -100.0 }, @as(f32, 10.0e9)); - try doTheTestReduce(.Max, [4]f64{ -10.3, 10.0e9, 13.0, -100.0 }, @as(f64, 10.0e9)); + try testReduce(.Max, [4]i128{ 1234567, -386, 0, 3 }, @as(i128, 1234567)); + try testReduce(.Max, [4]u128{ 99, 9999, 9, 99999 }, @as(u128, 99999)); + try testReduce(.Max, [4]f16{ -10.3, 10.0e9, 13.0, -100.0 }, @as(f16, 10.0e9)); + try testReduce(.Max, [4]f32{ -10.3, 10.0e9, 13.0, -100.0 }, @as(f32, 10.0e9)); + try testReduce(.Max, [4]f64{ -10.3, 10.0e9, 13.0, -100.0 }, @as(f64, 10.0e9)); - try doTheTestReduce(.Mul, [4]i16{ -1, 2, 3, 4 }, @as(i16, -24)); - try doTheTestReduce(.Mul, [4]u16{ 1, 2, 3, 4 }, @as(u16, 24)); - try doTheTestReduce(.Mul, [4]i32{ -9, -99, -999, 999 }, @as(i32, -889218891)); - try doTheTestReduce(.Mul, [4]u32{ 1, 2, 3, 4 }, @as(u32, 24)); - try doTheTestReduce(.Mul, [4]i64{ 9, 99, 999, 9999 }, @as(i64, 8900199891)); - try doTheTestReduce(.Mul, [4]u64{ 9, 99, 999, 9999 }, @as(u64, 8900199891)); - try doTheTestReduce(.Mul, [4]i128{ -9, -99, -999, 9999 }, @as(i128, -8900199891)); - try doTheTestReduce(.Mul, [4]u128{ 9, 99, 999, 9999 }, @as(u128, 8900199891)); - try doTheTestReduce(.Mul, [4]f16{ -1.9, 5.1, -60.3, 100.0 }, @as(f16, 58430.7)); - try doTheTestReduce(.Mul, [4]f32{ -1.9, 5.1, -60.3, 100.0 }, @as(f32, 58430.7)); - try doTheTestReduce(.Mul, [4]f64{ -1.9, 5.1, -60.3, 100.0 }, @as(f64, 58430.7)); + try testReduce(.Mul, [4]i16{ -1, 2, 3, 4 }, @as(i16, -24)); + try testReduce(.Mul, [4]u16{ 1, 2, 3, 4 }, @as(u16, 24)); + try testReduce(.Mul, [4]i32{ -9, -99, -999, 999 }, @as(i32, -889218891)); + try testReduce(.Mul, [4]u32{ 1, 2, 3, 4 }, @as(u32, 24)); + try testReduce(.Mul, [4]i64{ 9, 99, 999, 9999 }, @as(i64, 8900199891)); + try testReduce(.Mul, [4]u64{ 9, 99, 999, 9999 }, @as(u64, 8900199891)); + try testReduce(.Mul, [4]i128{ -9, -99, -999, 9999 }, @as(i128, -8900199891)); + try testReduce(.Mul, [4]u128{ 9, 99, 999, 9999 }, @as(u128, 8900199891)); + try testReduce(.Mul, [4]f16{ -1.9, 5.1, -60.3, 100.0 }, @as(f16, 58430.7)); + try testReduce(.Mul, [4]f32{ -1.9, 5.1, -60.3, 100.0 }, @as(f32, 58430.7)); + try testReduce(.Mul, [4]f64{ -1.9, 5.1, -60.3, 100.0 }, @as(f64, 58430.7)); - try doTheTestReduce(.Or, [4]bool{ false, true, false, false }, @as(bool, true)); - try doTheTestReduce(.Or, [4]u1{ 0, 1, 0, 0 }, @as(u1, 1)); - try doTheTestReduce(.Or, [4]u16{ 0xff00, 0xff00, 0xf0, 0xf }, ~@as(u16, 0)); - try doTheTestReduce(.Or, [4]u32{ 0xffff0000, 0xff00, 0xf0, 0xf }, ~@as(u32, 0)); - try doTheTestReduce(.Or, [4]u64{ 0xffff0000, 0xff00, 0xf0, 0xf }, @as(u64, 0xffffffff)); - try doTheTestReduce(.Or, [4]u128{ 0xffff0000, 0xff00, 0xf0, 0xf }, @as(u128, 0xffffffff)); + try testReduce(.Or, [4]bool{ false, true, false, false }, @as(bool, true)); + try testReduce(.Or, [4]u1{ 0, 1, 0, 0 }, @as(u1, 1)); + try testReduce(.Or, [4]u16{ 0xff00, 0xff00, 0xf0, 0xf }, ~@as(u16, 0)); + try testReduce(.Or, [4]u32{ 0xffff0000, 0xff00, 0xf0, 0xf }, ~@as(u32, 0)); + try testReduce(.Or, [4]u64{ 0xffff0000, 0xff00, 0xf0, 0xf }, @as(u64, 0xffffffff)); + try testReduce(.Or, [4]u128{ 0xffff0000, 0xff00, 0xf0, 0xf }, @as(u128, 0xffffffff)); - try doTheTestReduce(.Xor, [4]bool{ true, true, true, false }, @as(bool, true)); - try doTheTestReduce(.Xor, [4]u1{ 1, 1, 1, 0 }, @as(u1, 1)); - try doTheTestReduce(.Xor, [4]u16{ 0x0000, 0x3333, 0x8888, 0x4444 }, ~@as(u16, 0)); - try doTheTestReduce(.Xor, [4]u32{ 0x00000000, 0x33333333, 0x88888888, 0x44444444 }, ~@as(u32, 0)); - try doTheTestReduce(.Xor, [4]u64{ 0x00000000, 0x33333333, 0x88888888, 0x44444444 }, @as(u64, 0xffffffff)); - try doTheTestReduce(.Xor, [4]u128{ 0x00000000, 0x33333333, 0x88888888, 0x44444444 }, @as(u128, 0xffffffff)); + try testReduce(.Xor, [4]bool{ true, true, true, false }, @as(bool, true)); + try testReduce(.Xor, [4]u1{ 1, 1, 1, 0 }, @as(u1, 1)); + try testReduce(.Xor, [4]u16{ 0x0000, 0x3333, 0x8888, 0x4444 }, ~@as(u16, 0)); + try testReduce(.Xor, [4]u32{ 0x00000000, 0x33333333, 0x88888888, 0x44444444 }, ~@as(u32, 0)); + try testReduce(.Xor, [4]u64{ 0x00000000, 0x33333333, 0x88888888, 0x44444444 }, @as(u64, 0xffffffff)); + try testReduce(.Xor, [4]u128{ 0x00000000, 0x33333333, 0x88888888, 0x44444444 }, @as(u128, 0xffffffff)); // Test the reduction on vectors containing NaNs. const f16_nan = math.nan(f16); const f32_nan = math.nan(f32); const f64_nan = math.nan(f64); - try doTheTestReduce(.Add, [4]f16{ -1.9, 5.1, f16_nan, 100.0 }, f16_nan); - try doTheTestReduce(.Add, [4]f32{ -1.9, 5.1, f32_nan, 100.0 }, f32_nan); - try doTheTestReduce(.Add, [4]f64{ -1.9, 5.1, f64_nan, 100.0 }, f64_nan); + try testReduce(.Add, [4]f16{ -1.9, 5.1, f16_nan, 100.0 }, f16_nan); + try testReduce(.Add, [4]f32{ -1.9, 5.1, f32_nan, 100.0 }, f32_nan); + try testReduce(.Add, [4]f64{ -1.9, 5.1, f64_nan, 100.0 }, f64_nan); // LLVM 11 ERROR: Cannot select type // https://github.com/ziglang/zig/issues/7138 if (false) { - try doTheTestReduce(.Min, [4]f16{ -1.9, 5.1, f16_nan, 100.0 }, f16_nan); - try doTheTestReduce(.Min, [4]f32{ -1.9, 5.1, f32_nan, 100.0 }, f32_nan); - try doTheTestReduce(.Min, [4]f64{ -1.9, 5.1, f64_nan, 100.0 }, f64_nan); + try testReduce(.Min, [4]f16{ -1.9, 5.1, f16_nan, 100.0 }, f16_nan); + try testReduce(.Min, [4]f32{ -1.9, 5.1, f32_nan, 100.0 }, f32_nan); + try testReduce(.Min, [4]f64{ -1.9, 5.1, f64_nan, 100.0 }, f64_nan); - try doTheTestReduce(.Max, [4]f16{ -1.9, 5.1, f16_nan, 100.0 }, f16_nan); - try doTheTestReduce(.Max, [4]f32{ -1.9, 5.1, f32_nan, 100.0 }, f32_nan); - try doTheTestReduce(.Max, [4]f64{ -1.9, 5.1, f64_nan, 100.0 }, f64_nan); + try testReduce(.Max, [4]f16{ -1.9, 5.1, f16_nan, 100.0 }, f16_nan); + try testReduce(.Max, [4]f32{ -1.9, 5.1, f32_nan, 100.0 }, f32_nan); + try testReduce(.Max, [4]f64{ -1.9, 5.1, f64_nan, 100.0 }, f64_nan); } - try doTheTestReduce(.Mul, [4]f16{ -1.9, 5.1, f16_nan, 100.0 }, f16_nan); - try doTheTestReduce(.Mul, [4]f32{ -1.9, 5.1, f32_nan, 100.0 }, f32_nan); - try doTheTestReduce(.Mul, [4]f64{ -1.9, 5.1, f64_nan, 100.0 }, f64_nan); + try testReduce(.Mul, [4]f16{ -1.9, 5.1, f16_nan, 100.0 }, f16_nan); + try testReduce(.Mul, [4]f32{ -1.9, 5.1, f32_nan, 100.0 }, f32_nan); + try testReduce(.Mul, [4]f64{ -1.9, 5.1, f64_nan, 100.0 }, f64_nan); } };