From 2957433b25373dccc336492f6817a1cefadb945c Mon Sep 17 00:00:00 2001 From: LemonBoy Date: Sun, 1 Nov 2020 18:25:43 +0100 Subject: [PATCH 1/5] stage1: Fix comptime comparison of NaNs --- src/stage1/ir.cpp | 4 ++-- src/stage1/softfloat.hpp | 13 +++++++++++++ 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/src/stage1/ir.cpp b/src/stage1/ir.cpp index 98422ef578..c85c26f2c0 100644 --- a/src/stage1/ir.cpp +++ b/src/stage1/ir.cpp @@ -10953,13 +10953,13 @@ static bool float_is_nan(ZigValue *op) { } else if (op->type->id == ZigTypeIdFloat) { switch (op->type->data.floating.bit_count) { case 16: - return f16_isSignalingNaN(op->data.x_f16); + return zig_f16_isNaN(op->data.x_f16); case 32: return op->data.x_f32 != op->data.x_f32; case 64: return op->data.x_f64 != op->data.x_f64; case 128: - return f128M_isSignalingNaN(&op->data.x_f128); + return zig_f128_isNaN(&op->data.x_f128); default: zig_unreachable(); } diff --git a/src/stage1/softfloat.hpp b/src/stage1/softfloat.hpp index a1173690b5..57e60a5fc0 100644 --- a/src/stage1/softfloat.hpp +++ b/src/stage1/softfloat.hpp @@ -29,4 +29,17 @@ static inline double zig_f16_to_double(float16_t x) { return z; } +static inline bool zig_f16_isNaN(float16_t a) { + union { uint16_t ui; float16_t f; } uA; + uA.f = a; + return 0x7C00 < (uA.ui & 0x7FFF); +} + +static inline bool zig_f128_isNaN(float128_t *aPtr) { + uint64_t absA64 = aPtr->v[1] & UINT64_C(0x7FFFFFFFFFFFFFFF); + return + (UINT64_C(0x7FFF000000000000) < absA64) + || ((absA64 == UINT64_C(0x7FFF000000000000)) && aPtr->v[0]); +} + #endif From 6f3d6c1f45edea883bc206b8e60dae3b6b34cbbb Mon Sep 17 00:00:00 2001 From: LemonBoy Date: Sun, 1 Nov 2020 18:35:19 +0100 Subject: [PATCH 2/5] std: Fix expectWithinEpsilon with negative values --- lib/std/testing.zig | 30 +++++++++++++++++++++--------- 1 file changed, 21 insertions(+), 9 deletions(-) diff --git a/lib/std/testing.zig b/lib/std/testing.zig index 5f2cb112bb..8ab4e802ab 100644 --- a/lib/std/testing.zig +++ b/lib/std/testing.zig @@ -4,6 +4,7 @@ // The MIT license requires this copyright notice to be included in all copies // and substantial portions of the software. const std = @import("std.zig"); +const math = std.math; const print = std.debug.print; pub const FailingAllocator = @import("testing/failing_allocator.zig").FailingAllocator; @@ -198,11 +199,16 @@ pub fn expectWithinMargin(expected: anytype, actual: @TypeOf(expected), margin: } } -test "expectWithinMargin.f32" { - const x: f32 = 12.0; - const y: f32 = 12.06; +test "expectWithinMargin" { + inline for ([_]type{ f16, f32, f64, f128 }) |T| { + const pos_x: T = 12.0; + const pos_y: T = 12.06; + const neg_x: T = -12.0; + const neg_y: T = -12.06; - expectWithinMargin(x, y, 0.1); + expectWithinMargin(pos_x, pos_y, 0.1); + expectWithinMargin(neg_x, neg_y, 0.1); + } } /// This function is intended to be used only in tests. When the actual value is not @@ -212,7 +218,8 @@ test "expectWithinMargin.f32" { pub fn expectWithinEpsilon(expected: anytype, actual: @TypeOf(expected), epsilon: @TypeOf(expected)) void { std.debug.assert(epsilon >= 0.0 and epsilon <= 1.0); - const margin = epsilon * expected; + // Relative epsilon test. + const margin = math.max(math.fabs(expected), math.fabs(actual)) * epsilon; switch (@typeInfo(@TypeOf(actual))) { .Float, .ComptimeFloat, @@ -225,11 +232,16 @@ pub fn expectWithinEpsilon(expected: anytype, actual: @TypeOf(expected), epsilon } } -test "expectWithinEpsilon.f32" { - const x: f32 = 12.0; - const y: f32 = 13.2; +test "expectWithinEpsilon" { + inline for ([_]type{ f16, f32, f64, f128 }) |T| { + const pos_x: T = 12.0; + const pos_y: T = 13.2; + const neg_x: T = -12.0; + const neg_y: T = -13.2; - expectWithinEpsilon(x, y, 0.1); + expectWithinEpsilon(pos_x, pos_y, 0.1); + expectWithinEpsilon(neg_x, neg_y, 0.1); + } } /// This function is intended to be used only in tests. When the two slices are not From 0d6a7088dc82cfe686beb5ebfe540ba2b7935cd6 Mon Sep 17 00:00:00 2001 From: LemonBoy Date: Sun, 1 Nov 2020 19:51:42 +0100 Subject: [PATCH 3/5] stage1: Implement Add/Mul reduction operators --- lib/std/builtin.zig | 2 ++ src/stage1/all_types.hpp | 2 ++ src/stage1/codegen.cpp | 20 ++++++++++++++ src/stage1/ir.cpp | 40 ++++++++++++++++++++++++++- src/stage1/ir_print.cpp | 2 ++ src/zig_llvm.cpp | 16 +++++++++++ src/zig_llvm.h | 4 +++ test/stage1/behavior/vector.zig | 48 ++++++++++++++++++++++++++++++++- 8 files changed, 132 insertions(+), 2 deletions(-) diff --git a/lib/std/builtin.zig b/lib/std/builtin.zig index 8543461f33..97555dc9a5 100644 --- a/lib/std/builtin.zig +++ b/lib/std/builtin.zig @@ -106,6 +106,8 @@ pub const ReduceOp = enum { Xor, Min, Max, + Add, + Mul, }; /// This data structure is used by the Zig language code generation and diff --git a/src/stage1/all_types.hpp b/src/stage1/all_types.hpp index 8162d2a537..89c2002e33 100644 --- a/src/stage1/all_types.hpp +++ b/src/stage1/all_types.hpp @@ -2447,6 +2447,8 @@ enum ReduceOp { ReduceOp_xor, ReduceOp_min, ReduceOp_max, + ReduceOp_add, + ReduceOp_mul, }; // synchronized with the code in define_builtin_compile_vars diff --git a/src/stage1/codegen.cpp b/src/stage1/codegen.cpp index 8f74536665..c034a79cea 100644 --- a/src/stage1/codegen.cpp +++ b/src/stage1/codegen.cpp @@ -5460,6 +5460,8 @@ static LLVMValueRef ir_render_reduce(CodeGen *g, IrExecutableGen *executable, Ir assert(value_type->id == ZigTypeIdVector); ZigType *scalar_type = value_type->data.vector.elem_type; + ZigLLVMSetFastMath(g->builder, ir_want_fast_math(g, &instruction->base)); + LLVMValueRef result_val; switch (instruction->op) { case ReduceOp_and: @@ -5490,6 +5492,24 @@ static LLVMValueRef ir_render_reduce(CodeGen *g, IrExecutableGen *executable, Ir result_val = ZigLLVMBuildFPMaxReduce(g->builder, value); } else zig_unreachable(); } break; + case ReduceOp_add: { + if (scalar_type->id == ZigTypeIdInt) { + result_val = ZigLLVMBuildAddReduce(g->builder, value); + } else if (scalar_type->id == ZigTypeIdFloat) { + LLVMValueRef neutral_value = LLVMConstReal( + get_llvm_type(g, scalar_type), -0.0); + result_val = ZigLLVMBuildFPAddReduce(g->builder, neutral_value, value); + } else zig_unreachable(); + } break; + case ReduceOp_mul: { + if (scalar_type->id == ZigTypeIdInt) { + result_val = ZigLLVMBuildMulReduce(g->builder, value); + } else if (scalar_type->id == ZigTypeIdFloat) { + LLVMValueRef neutral_value = LLVMConstReal( + get_llvm_type(g, scalar_type), 1.0); + result_val = ZigLLVMBuildFPMulReduce(g->builder, neutral_value, value); + } else zig_unreachable(); + } break; default: zig_unreachable(); } diff --git a/src/stage1/ir.cpp b/src/stage1/ir.cpp index c85c26f2c0..3fea4ed7f0 100644 --- a/src/stage1/ir.cpp +++ b/src/stage1/ir.cpp @@ -27046,7 +27046,8 @@ static ErrorMsg *ir_eval_reduce(IrAnalyze *ira, IrInst *source_instr, ReduceOp o return nullptr; } - if (op != ReduceOp_min && op != ReduceOp_max) { + // Evaluate and/or/xor. + if (op == ReduceOp_and || op == ReduceOp_or || op == ReduceOp_xor) { ZigValue *first_elem_val = &value->data.x_array.data.s_none.elements[0]; copy_const_val(ira->codegen, out_value, first_elem_val); @@ -27071,6 +27072,43 @@ static ErrorMsg *ir_eval_reduce(IrAnalyze *ira, IrInst *source_instr, ReduceOp o return nullptr; } + // Evaluate add/sub. + // Perform the reduction sequentially, starting from the neutral value. + if (op == ReduceOp_add || op == ReduceOp_mul) { + if (scalar_type->id == ZigTypeIdInt) { + if (op == ReduceOp_add) { + bigint_init_unsigned(&out_value->data.x_bigint, 0); + } else { + bigint_init_unsigned(&out_value->data.x_bigint, 1); + } + } else { + if (op == ReduceOp_add) { + float_init_f64(out_value, -0.0); + } else { + float_init_f64(out_value, 1.0); + } + } + + for (size_t i = 0; i < len; i++) { + ZigValue *elem_val = &value->data.x_array.data.s_none.elements[i]; + + IrBinOp bin_op; + switch (op) { + case ReduceOp_add: bin_op = IrBinOpAdd; break; + case ReduceOp_mul: bin_op = IrBinOpMult; break; + default: zig_unreachable(); + } + + ErrorMsg *msg = ir_eval_math_op_scalar(ira, source_instr, scalar_type, + out_value, bin_op, elem_val, out_value); + if (msg != nullptr) + return msg; + } + + return nullptr; + } + + // Evaluate min/max. ZigValue *candidate_elem_val = &value->data.x_array.data.s_none.elements[0]; ZigValue *dummy_cmp_value = ira->codegen->pass1_arena->create(); diff --git a/src/stage1/ir_print.cpp b/src/stage1/ir_print.cpp index b357efe3b8..6a90f5fe5c 100644 --- a/src/stage1/ir_print.cpp +++ b/src/stage1/ir_print.cpp @@ -1611,6 +1611,8 @@ static const char *reduce_op_str(ReduceOp op) { case ReduceOp_xor: return "Xor"; case ReduceOp_min: return "Min"; case ReduceOp_max: return "Max"; + case ReduceOp_add: return "Add"; + case ReduceOp_mul: return "Mul"; } zig_unreachable(); } diff --git a/src/zig_llvm.cpp b/src/zig_llvm.cpp index ac45932e0a..1a81cf33ef 100644 --- a/src/zig_llvm.cpp +++ b/src/zig_llvm.cpp @@ -1156,6 +1156,22 @@ LLVMValueRef ZigLLVMBuildFPMinReduce(LLVMBuilderRef B, LLVMValueRef Val) { return wrap(unwrap(B)->CreateFPMinReduce(unwrap(Val))); } +LLVMValueRef ZigLLVMBuildAddReduce(LLVMBuilderRef B, LLVMValueRef Val) { + return wrap(unwrap(B)->CreateAddReduce(unwrap(Val))); +} + +LLVMValueRef ZigLLVMBuildMulReduce(LLVMBuilderRef B, LLVMValueRef Val) { + return wrap(unwrap(B)->CreateMulReduce(unwrap(Val))); +} + +LLVMValueRef ZigLLVMBuildFPAddReduce(LLVMBuilderRef B, LLVMValueRef Acc, LLVMValueRef Val) { + return wrap(unwrap(B)->CreateFAddReduce(unwrap(Acc), unwrap(Val))); +} + +LLVMValueRef ZigLLVMBuildFPMulReduce(LLVMBuilderRef B, LLVMValueRef Acc, LLVMValueRef Val) { + return wrap(unwrap(B)->CreateFMulReduce(unwrap(Acc), unwrap(Val))); +} + static_assert((Triple::ArchType)ZigLLVM_UnknownArch == Triple::UnknownArch, ""); static_assert((Triple::ArchType)ZigLLVM_arm == Triple::arm, ""); static_assert((Triple::ArchType)ZigLLVM_armeb == Triple::armeb, ""); diff --git a/src/zig_llvm.h b/src/zig_llvm.h index 966f142e03..2500c162eb 100644 --- a/src/zig_llvm.h +++ b/src/zig_llvm.h @@ -462,6 +462,10 @@ LLVMValueRef ZigLLVMBuildIntMaxReduce(LLVMBuilderRef B, LLVMValueRef Val, bool i LLVMValueRef ZigLLVMBuildIntMinReduce(LLVMBuilderRef B, LLVMValueRef Val, bool is_signed); LLVMValueRef ZigLLVMBuildFPMaxReduce(LLVMBuilderRef B, LLVMValueRef Val); LLVMValueRef ZigLLVMBuildFPMinReduce(LLVMBuilderRef B, LLVMValueRef Val); +LLVMValueRef ZigLLVMBuildAddReduce(LLVMBuilderRef B, LLVMValueRef Val); +LLVMValueRef ZigLLVMBuildMulReduce(LLVMBuilderRef B, LLVMValueRef Val); +LLVMValueRef ZigLLVMBuildFPAddReduce(LLVMBuilderRef B, LLVMValueRef Acc, LLVMValueRef Val); +LLVMValueRef ZigLLVMBuildFPMulReduce(LLVMBuilderRef B, LLVMValueRef Acc, LLVMValueRef Val); #define ZigLLVM_DIFlags_Zero 0U #define ZigLLVM_DIFlags_Private 1U diff --git a/test/stage1/behavior/vector.zig b/test/stage1/behavior/vector.zig index aeb98f28fd..1c0f1b04c9 100644 --- a/test/stage1/behavior/vector.zig +++ b/test/stage1/behavior/vector.zig @@ -4,6 +4,7 @@ const mem = std.mem; const math = std.math; const expect = std.testing.expect; const expectEqual = std.testing.expectEqual; +const expectWithinEpsilon = std.testing.expectWithinEpsilon; const Vector = std.meta.Vector; test "implicit cast vector to array - bool" { @@ -492,7 +493,17 @@ test "vector reduce operation" { const TX = @typeInfo(@TypeOf(x)).Array.child; var r = @reduce(op, @as(Vector(N, TX), x)); - expectEqual(expected, r); + switch (@typeInfo(TX)) { + .Int, .Bool => expectEqual(expected, r), + .Float => { + if (math.isNan(expected) != math.isNan(r)) { + std.debug.panic("unexpected NaN value!", .{}); + } else { + expectWithinEpsilon(expected, r, 0.0001); + } + }, + else => unreachable, + } } fn doTheTest() void { doTheTestReduce(.And, [4]bool{ true, false, true, true }, @as(bool, false)); @@ -510,14 +521,49 @@ test "vector reduce operation" { doTheTestReduce(.Min, [4]i32{ 1234567, -386, 0, 3 }, @as(i32, -386)); doTheTestReduce(.Max, [4]i32{ 1234567, -386, 0, 3 }, @as(i32, 1234567)); + doTheTestReduce(.Add, [4]i32{ -9, -99, -999, -9999 }, @as(i32, -11106)); + doTheTestReduce(.Add, [4]i64{ 9, 99, 999, 9999 }, @as(i64, 11106)); + doTheTestReduce(.Min, [4]u32{ 99, 9999, 9, 99999 }, @as(u32, 9)); doTheTestReduce(.Max, [4]u32{ 99, 9999, 9, 99999 }, @as(u32, 99999)); + doTheTestReduce(.Mul, [4]i32{ -9, -99, -999, 999 }, @as(i32, -889218891)); + doTheTestReduce(.Mul, [4]i64{ 9, 99, 999, 9999 }, @as(i64, 8900199891)); + doTheTestReduce(.Min, [4]f32{ -10.3, 10.0e9, 13.0, -100.0 }, @as(f32, -100.0)); doTheTestReduce(.Max, [4]f32{ -10.3, 10.0e9, 13.0, -100.0 }, @as(f32, 10.0e9)); doTheTestReduce(.Min, [4]f64{ -10.3, 10.0e9, 13.0, -100.0 }, @as(f64, -100.0)); doTheTestReduce(.Max, [4]f64{ -10.3, 10.0e9, 13.0, -100.0 }, @as(f64, 10.0e9)); + + doTheTestReduce(.Add, [4]f32{ -1.9, 5.1, -60.3, 100.0 }, @as(f32, 42.9)); + doTheTestReduce(.Add, [4]f64{ -1.9, 5.1, -60.3, 100.0 }, @as(f64, 42.9)); + + doTheTestReduce(.Mul, [4]f32{ -1.9, 5.1, -60.3, 100.0 }, @as(f32, 58430.7)); + doTheTestReduce(.Mul, [4]f64{ -1.9, 5.1, -60.3, 100.0 }, @as(f64, 58430.7)); + + // Test the reduction on vectors containing NaNs. + const f16_nan = math.nan(f16); + const f32_nan = math.nan(f32); + const f64_nan = math.nan(f64); + + doTheTestReduce(.Add, [4]f16{ -1.9, 5.1, f16_nan, 100.0 }, f16_nan); + doTheTestReduce(.Add, [4]f16{ -1.9, 5.1, f16_nan, 100.0 }, f16_nan); + + doTheTestReduce(.Add, [4]f32{ -1.9, 5.1, f32_nan, 100.0 }, f32_nan); + doTheTestReduce(.Add, [4]f32{ -1.9, 5.1, f32_nan, 100.0 }, f32_nan); + + doTheTestReduce(.Add, [4]f64{ -1.9, 5.1, f64_nan, 100.0 }, f64_nan); + doTheTestReduce(.Add, [4]f64{ -1.9, 5.1, f64_nan, 100.0 }, f64_nan); + + doTheTestReduce(.Mul, [4]f16{ -1.9, 5.1, f16_nan, 100.0 }, f16_nan); + doTheTestReduce(.Mul, [4]f16{ -1.9, 5.1, f16_nan, 100.0 }, f16_nan); + + doTheTestReduce(.Mul, [4]f32{ -1.9, 5.1, f32_nan, 100.0 }, f32_nan); + doTheTestReduce(.Mul, [4]f32{ -1.9, 5.1, f32_nan, 100.0 }, f32_nan); + + doTheTestReduce(.Mul, [4]f64{ -1.9, 5.1, f64_nan, 100.0 }, f64_nan); + doTheTestReduce(.Mul, [4]f64{ -1.9, 5.1, f64_nan, 100.0 }, f64_nan); } }; From 577b99450764f5271b232eda3589eae94c9eb147 Mon Sep 17 00:00:00 2001 From: LemonBoy Date: Sun, 1 Nov 2020 19:51:59 +0100 Subject: [PATCH 4/5] docs: Add @reduce documentation --- doc/langref.html.in | 43 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) diff --git a/doc/langref.html.in b/doc/langref.html.in index 5e8ec10534..1a37ae5bcf 100644 --- a/doc/langref.html.in +++ b/doc/langref.html.in @@ -8209,6 +8209,49 @@ test "vector @splat" {

{#see_also|Vectors|@shuffle#} {#header_close#} + + {#header_open|@reduce#} +
{#syntax#}@reduce(comptime op: builtin.ReduceOp, value: anytype) std.meta.Child(value){#endsyntax#}
+

+ Transforms a {#link|vector|Vectors#} into a scalar value by performing a + sequential horizontal reduction of its elements using the specified + specified operator {#syntax#}op{#endsyntax#}. +

+

+ Not every operator is available for every vector element type: +

    +
  • {#syntax#}.And{#endsyntax#}, {#syntax#}.Or{#endsyntax#}, + {#syntax#}.Xor{#endsyntax#} are available for + {#syntax#}bool{#endsyntax#} vectors,
  • +
  • {#syntax#}.Min{#endsyntax#}, {#syntax#}.Max{#endsyntax#}, + {#syntax#}.Add{#endsyntax#}, {#syntax#}.Mul{#endsyntax#} are + available for {#link|floating point|Floats#} vectors,
  • +
  • Every operator is available for {#link|integer|Integers#} vectors. +
+

+

+ Note that {#syntax#}.Add{#endsyntax#} and {#syntax#}.Mul{#endsyntax#} + reductions on integral types are wrapping; when applied on floating point + types the operation associativity is preserved, unless the float mode is + set to {#syntax#}Optimized{#endsyntax#}. +

+ {#code_begin|test#} +const std = @import("std"); +const expect = std.testing.expect; + +test "vector @reduce" { + const value: std.meta.Vector(4, i32) = [_]i32{ 1, -1, 1, -1 }; + const result = value > @splat(4, @as(i32, 0)); + // result is { true, false, true, false }; + comptime expect(@TypeOf(result) == std.meta.Vector(4, bool)); + const is_all_true = @reduce(.And, result); + comptime expect(@TypeOf(is_all_true) == bool); + expect(is_all_true == false); +} + {#code_end#} + {#see_also|Vectors|@setFloatMode#} + {#header_close#} + {#header_open|@src#}
{#syntax#}@src() std.builtin.SourceLocation{#endsyntax#}

From f4ed5d7d480db29d5b3142bacb9d0b98eee0fa2b Mon Sep 17 00:00:00 2001 From: Andrew Kelley Date: Sun, 1 Nov 2020 14:39:45 -0700 Subject: [PATCH 5/5] langref tidy html fixup --- doc/langref.html.in | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/langref.html.in b/doc/langref.html.in index 1a37ae5bcf..1d79293a9c 100644 --- a/doc/langref.html.in +++ b/doc/langref.html.in @@ -8219,6 +8219,7 @@ test "vector @splat" {

Not every operator is available for every vector element type: +

  • {#syntax#}.And{#endsyntax#}, {#syntax#}.Or{#endsyntax#}, {#syntax#}.Xor{#endsyntax#} are available for @@ -8228,7 +8229,6 @@ test "vector @splat" { available for {#link|floating point|Floats#} vectors,
  • Every operator is available for {#link|integer|Integers#} vectors.
-

Note that {#syntax#}.Add{#endsyntax#} and {#syntax#}.Mul{#endsyntax#} reductions on integral types are wrapping; when applied on floating point