Merge pull request #1176 from bnoordhuis/f16-std

improve std.math f16 support
2026-02-21 16:54:52 +00:00 · 2018-07-02 16:03:25 -04:00 · 2018-07-02 16:03:25 -04:00 · bd282d6cca
commit bd282d6cca
parent 22b7312460 30cfc0ab2c
14 changed files with 191 additions and 2 deletions
--- a/std/math/copysign.zig
+++ b/std/math/copysign.zig
@ -4,12 +4,22 @@ const assert = std.debug.assert;

 pub fn copysign(comptime T: type, x: T, y: T) T {
    return switch (T) {
+        f16 => copysign16(x, y),
        f32 => copysign32(x, y),
        f64 => copysign64(x, y),
        else => @compileError("copysign not implemented for " ++ @typeName(T)),
    };
 }

+fn copysign16(x: f16, y: f16) f16 {
+    const ux = @bitCast(u16, x);
+    const uy = @bitCast(u16, y);
+
+    const h1 = ux & (@maxValue(u16) / 2);
+    const h2 = uy & (u16(1) << 15);
+    return @bitCast(f16, h1 | h2);
+}
+
 fn copysign32(x: f32, y: f32) f32 {
    const ux = @bitCast(u32, x);
    const uy = @bitCast(u32, y);
@ -29,10 +39,18 @@ fn copysign64(x: f64, y: f64) f64 {
 }

 test "math.copysign" {
+    assert(copysign(f16, 1.0, 1.0) == copysign16(1.0, 1.0));
    assert(copysign(f32, 1.0, 1.0) == copysign32(1.0, 1.0));
    assert(copysign(f64, 1.0, 1.0) == copysign64(1.0, 1.0));
 }

+test "math.copysign16" {
+    assert(copysign16(5.0, 1.0) == 5.0);
+    assert(copysign16(5.0, -1.0) == -5.0);
+    assert(copysign16(-5.0, -1.0) == -5.0);
+    assert(copysign16(-5.0, 1.0) == 5.0);
+}
+
 test "math.copysign32" {
    assert(copysign32(5.0, 1.0) == 5.0);
    assert(copysign32(5.0, -1.0) == -5.0);
--- a/std/math/fabs.zig
+++ b/std/math/fabs.zig
@ -10,12 +10,19 @@ const assert = std.debug.assert;
 pub fn fabs(x: var) @typeOf(x) {
    const T = @typeOf(x);
    return switch (T) {
+        f16 => fabs16(x),
        f32 => fabs32(x),
        f64 => fabs64(x),
        else => @compileError("fabs not implemented for " ++ @typeName(T)),
    };
 }

+fn fabs16(x: f16) f16 {
+    var u = @bitCast(u16, x);
+    u &= 0x7FFF;
+    return @bitCast(f16, u);
+}
+
 fn fabs32(x: f32) f32 {
    var u = @bitCast(u32, x);
    u &= 0x7FFFFFFF;
@ -29,10 +36,16 @@ fn fabs64(x: f64) f64 {
 }

 test "math.fabs" {
+    assert(fabs(f16(1.0)) == fabs16(1.0));
    assert(fabs(f32(1.0)) == fabs32(1.0));
    assert(fabs(f64(1.0)) == fabs64(1.0));
 }

+test "math.fabs16" {
+    assert(fabs16(1.0) == 1.0);
+    assert(fabs16(-1.0) == 1.0);
+}
+
 test "math.fabs32" {
    assert(fabs32(1.0) == 1.0);
    assert(fabs32(-1.0) == 1.0);
@ -43,6 +56,12 @@ test "math.fabs64" {
    assert(fabs64(-1.0) == 1.0);
 }

+test "math.fabs16.special" {
+    assert(math.isPositiveInf(fabs(math.inf(f16))));
+    assert(math.isPositiveInf(fabs(-math.inf(f16))));
+    assert(math.isNan(fabs(math.nan(f16))));
+}
+
 test "math.fabs32.special" {
    assert(math.isPositiveInf(fabs(math.inf(f32))));
    assert(math.isPositiveInf(fabs(-math.inf(f32))));
--- a/std/math/floor.zig
+++ b/std/math/floor.zig
@ -12,12 +12,47 @@ const math = std.math;
 pub fn floor(x: var) @typeOf(x) {
    const T = @typeOf(x);
    return switch (T) {
+        f16 => floor16(x),
        f32 => floor32(x),
        f64 => floor64(x),
        else => @compileError("floor not implemented for " ++ @typeName(T)),
    };
 }

+fn floor16(x: f16) f16 {
+    var u = @bitCast(u16, x);
+    const e = @intCast(i16, (u >> 10) & 31) - 15;
+    var m: u16 = undefined;
+
+    // TODO: Shouldn't need this explicit check.
+    if (x == 0.0) {
+        return x;
+    }
+
+    if (e >= 10) {
+        return x;
+    }
+
+    if (e >= 0) {
+        m = u16(1023) >> @intCast(u4, e);
+        if (u & m == 0) {
+            return x;
+        }
+        math.forceEval(x + 0x1.0p120);
+        if (u >> 15 != 0) {
+            u += m;
+        }
+        return @bitCast(f16, u & ~m);
+    } else {
+        math.forceEval(x + 0x1.0p120);
+        if (u >> 15 == 0) {
+            return 0.0;
+        } else {
+            return -1.0;
+        }
+    }
+}
+
 fn floor32(x: f32) f32 {
    var u = @bitCast(u32, x);
    const e = @intCast(i32, (u >> 23) & 0xFF) - 0x7F;
@ -84,10 +119,17 @@ fn floor64(x: f64) f64 {
 }

 test "math.floor" {
+    assert(floor(f16(1.3)) == floor16(1.3));
    assert(floor(f32(1.3)) == floor32(1.3));
    assert(floor(f64(1.3)) == floor64(1.3));
 }

+test "math.floor16" {
+    assert(floor16(1.3) == 1.0);
+    assert(floor16(-1.3) == -2.0);
+    assert(floor16(0.2) == 0.0);
+}
+
 test "math.floor32" {
    assert(floor32(1.3) == 1.0);
    assert(floor32(-1.3) == -2.0);
@ -100,6 +142,14 @@ test "math.floor64" {
    assert(floor64(0.2) == 0.0);
 }

+test "math.floor16.special" {
+    assert(floor16(0.0) == 0.0);
+    assert(floor16(-0.0) == -0.0);
+    assert(math.isPositiveInf(floor16(math.inf(f16))));
+    assert(math.isNegativeInf(floor16(-math.inf(f16))));
+    assert(math.isNan(floor16(math.nan(f16))));
+}
+
 test "math.floor32.special" {
    assert(floor32(0.0) == 0.0);
    assert(floor32(-0.0) == -0.0);
--- a/std/math/index.zig
+++ b/std/math/index.zig
@ -19,6 +19,18 @@ pub const f32_max = 3.40282346638528859812e+38;
 pub const f32_epsilon = 1.1920928955078125e-07;
 pub const f32_toint = 1.0 / f32_epsilon;

+pub const f16_true_min = 0.000000059604644775390625; // 2**-24
+pub const f16_min = 0.00006103515625; // 2**-14
+pub const f16_max = 65504;
+pub const f16_epsilon = 0.0009765625; // 2**-10
+pub const f16_toint = 1.0 / f16_epsilon;
+
+pub const nan_u16 = u16(0x7C01);
+pub const nan_f16 = @bitCast(f16, nan_u16);
+
+pub const inf_u16 = u16(0x7C00);
+pub const inf_f16 = @bitCast(f16, inf_u16);
+
 pub const nan_u32 = u32(0x7F800001);
 pub const nan_f32 = @bitCast(f32, nan_u32);

@ -44,6 +56,11 @@ pub fn approxEq(comptime T: type, x: T, y: T, epsilon: T) bool {
 pub fn forceEval(value: var) void {
    const T = @typeOf(value);
    switch (T) {
+        f16 => {
+            var x: f16 = undefined;
+            const p = @ptrCast(*volatile f16, &x);
+            p.* = x;
+        },
        f32 => {
            var x: f32 = undefined;
            const p = @ptrCast(*volatile f32, &x);
--- a/std/math/inf.zig
+++ b/std/math/inf.zig
@ -1,9 +1,9 @@
 const std = @import("../index.zig");
 const math = std.math;
-const assert = std.debug.assert;

 pub fn inf(comptime T: type) T {
    return switch (T) {
+        f16 => @bitCast(f16, math.inf_u16),
        f32 => @bitCast(f32, math.inf_u32),
        f64 => @bitCast(f64, math.inf_u64),
        else => @compileError("inf not implemented for " ++ @typeName(T)),
--- a/std/math/isfinite.zig
+++ b/std/math/isfinite.zig
@ -5,6 +5,10 @@ const assert = std.debug.assert;
 pub fn isFinite(x: var) bool {
    const T = @typeOf(x);
    switch (T) {
+        f16 => {
+            const bits = @bitCast(u16, x);
+            return bits & 0x7FFF < 0x7C00;
+        },
        f32 => {
            const bits = @bitCast(u32, x);
            return bits & 0x7FFFFFFF < 0x7F800000;
@ -20,10 +24,14 @@ pub fn isFinite(x: var) bool {
 }

 test "math.isFinite" {
+    assert(isFinite(f16(0.0)));
+    assert(isFinite(f16(-0.0)));
    assert(isFinite(f32(0.0)));
    assert(isFinite(f32(-0.0)));
    assert(isFinite(f64(0.0)));
    assert(isFinite(f64(-0.0)));
+    assert(!isFinite(math.inf(f16)));
+    assert(!isFinite(-math.inf(f16)));
    assert(!isFinite(math.inf(f32)));
    assert(!isFinite(-math.inf(f32)));
    assert(!isFinite(math.inf(f64)));
--- a/std/math/isinf.zig
+++ b/std/math/isinf.zig
@ -5,6 +5,10 @@ const assert = std.debug.assert;
 pub fn isInf(x: var) bool {
    const T = @typeOf(x);
    switch (T) {
+        f16 => {
+            const bits = @bitCast(u16, x);
+            return bits & 0x7FFF == 0x7C00;
+        },
        f32 => {
            const bits = @bitCast(u32, x);
            return bits & 0x7FFFFFFF == 0x7F800000;
@ -22,6 +26,9 @@ pub fn isInf(x: var) bool {
 pub fn isPositiveInf(x: var) bool {
    const T = @typeOf(x);
    switch (T) {
+        f16 => {
+            return @bitCast(u16, x) == 0x7C00;
+        },
        f32 => {
            return @bitCast(u32, x) == 0x7F800000;
        },
@ -37,6 +44,9 @@ pub fn isPositiveInf(x: var) bool {
 pub fn isNegativeInf(x: var) bool {
    const T = @typeOf(x);
    switch (T) {
+        f16 => {
+            return @bitCast(u16, x) == 0xFC00;
+        },
        f32 => {
            return @bitCast(u32, x) == 0xFF800000;
        },
@ -50,10 +60,14 @@ pub fn isNegativeInf(x: var) bool {
 }

 test "math.isInf" {
+    assert(!isInf(f16(0.0)));
+    assert(!isInf(f16(-0.0)));
    assert(!isInf(f32(0.0)));
    assert(!isInf(f32(-0.0)));
    assert(!isInf(f64(0.0)));
    assert(!isInf(f64(-0.0)));
+    assert(isInf(math.inf(f16)));
+    assert(isInf(-math.inf(f16)));
    assert(isInf(math.inf(f32)));
    assert(isInf(-math.inf(f32)));
    assert(isInf(math.inf(f64)));
@ -61,10 +75,14 @@ test "math.isInf" {
 }

 test "math.isPositiveInf" {
+    assert(!isPositiveInf(f16(0.0)));
+    assert(!isPositiveInf(f16(-0.0)));
    assert(!isPositiveInf(f32(0.0)));
    assert(!isPositiveInf(f32(-0.0)));
    assert(!isPositiveInf(f64(0.0)));
    assert(!isPositiveInf(f64(-0.0)));
+    assert(isPositiveInf(math.inf(f16)));
+    assert(!isPositiveInf(-math.inf(f16)));
    assert(isPositiveInf(math.inf(f32)));
    assert(!isPositiveInf(-math.inf(f32)));
    assert(isPositiveInf(math.inf(f64)));
@ -72,10 +90,14 @@ test "math.isPositiveInf" {
 }

 test "math.isNegativeInf" {
+    assert(!isNegativeInf(f16(0.0)));
+    assert(!isNegativeInf(f16(-0.0)));
    assert(!isNegativeInf(f32(0.0)));
    assert(!isNegativeInf(f32(-0.0)));
    assert(!isNegativeInf(f64(0.0)));
    assert(!isNegativeInf(f64(-0.0)));
+    assert(!isNegativeInf(math.inf(f16)));
+    assert(isNegativeInf(-math.inf(f16)));
    assert(!isNegativeInf(math.inf(f32)));
    assert(isNegativeInf(-math.inf(f32)));
    assert(!isNegativeInf(math.inf(f64)));
--- a/std/math/isnan.zig
+++ b/std/math/isnan.zig
@ -5,6 +5,10 @@ const assert = std.debug.assert;
 pub fn isNan(x: var) bool {
    const T = @typeOf(x);
    switch (T) {
+        f16 => {
+            const bits = @bitCast(u16, x);
+            return (bits & 0x7fff) > 0x7c00;
+        },
        f32 => {
            const bits = @bitCast(u32, x);
            return bits & 0x7FFFFFFF > 0x7F800000;
@ -26,8 +30,10 @@ pub fn isSignalNan(x: var) bool {
 }

 test "math.isNan" {
+    assert(isNan(math.nan(f16)));
    assert(isNan(math.nan(f32)));
    assert(isNan(math.nan(f64)));
+    assert(!isNan(f16(1.0)));
    assert(!isNan(f32(1.0)));
    assert(!isNan(f64(1.0)));
 }
--- a/std/math/isnormal.zig
+++ b/std/math/isnormal.zig
@ -5,6 +5,10 @@ const assert = std.debug.assert;
 pub fn isNormal(x: var) bool {
    const T = @typeOf(x);
    switch (T) {
+        f16 => {
+            const bits = @bitCast(u16, x);
+            return (bits + 1024) & 0x7FFF >= 2048;
+        },
        f32 => {
            const bits = @bitCast(u32, x);
            return (bits + 0x00800000) & 0x7FFFFFFF >= 0x01000000;
@ -20,8 +24,13 @@ pub fn isNormal(x: var) bool {
 }

 test "math.isNormal" {
+    assert(!isNormal(math.nan(f16)));
    assert(!isNormal(math.nan(f32)));
    assert(!isNormal(math.nan(f64)));
+    assert(!isNormal(f16(0)));
+    assert(!isNormal(f32(0)));
+    assert(!isNormal(f64(0)));
+    assert(isNormal(f16(1.0)));
    assert(isNormal(f32(1.0)));
    assert(isNormal(f64(1.0)));
 }
--- a/std/math/nan.zig
+++ b/std/math/nan.zig
@ -2,6 +2,7 @@ const math = @import("index.zig");

 pub fn nan(comptime T: type) T {
    return switch (T) {
+        f16 => @bitCast(f16, math.nan_u16),
        f32 => @bitCast(f32, math.nan_u32),
        f64 => @bitCast(f64, math.nan_u64),
        else => @compileError("nan not implemented for " ++ @typeName(T)),
@ -12,6 +13,7 @@ pub fn nan(comptime T: type) T {
 // representation in the future when required.
 pub fn snan(comptime T: type) T {
    return switch (T) {
+        f16 => @bitCast(f16, math.nan_u16),
        f32 => @bitCast(f32, math.nan_u32),
        f64 => @bitCast(f64, math.nan_u64),
        else => @compileError("snan not implemented for " ++ @typeName(T)),
--- a/std/math/signbit.zig
+++ b/std/math/signbit.zig
@ -5,12 +5,18 @@ const assert = std.debug.assert;
 pub fn signbit(x: var) bool {
    const T = @typeOf(x);
    return switch (T) {
+        f16 => signbit16(x),
        f32 => signbit32(x),
        f64 => signbit64(x),
        else => @compileError("signbit not implemented for " ++ @typeName(T)),
    };
 }

+fn signbit16(x: f16) bool {
+    const bits = @bitCast(u16, x);
+    return bits >> 15 != 0;
+}
+
 fn signbit32(x: f32) bool {
    const bits = @bitCast(u32, x);
    return bits >> 31 != 0;
@ -22,10 +28,16 @@ fn signbit64(x: f64) bool {
 }

 test "math.signbit" {
+    assert(signbit(f16(4.0)) == signbit16(4.0));
    assert(signbit(f32(4.0)) == signbit32(4.0));
    assert(signbit(f64(4.0)) == signbit64(4.0));
 }

+test "math.signbit16" {
+    assert(!signbit16(4.0));
+    assert(signbit16(-3.0));
+}
+
 test "math.signbit32" {
    assert(!signbit32(4.0));
    assert(signbit32(-3.0));
--- a/std/math/sqrt.zig
+++ b/std/math/sqrt.zig
@ -31,10 +31,25 @@ pub fn sqrt(x: var) (if (@typeId(@typeOf(x)) == TypeId.Int) @IntType(false, @typ
 }

 test "math.sqrt" {
+    assert(sqrt(f16(0.0)) == @sqrt(f16, 0.0));
    assert(sqrt(f32(0.0)) == @sqrt(f32, 0.0));
    assert(sqrt(f64(0.0)) == @sqrt(f64, 0.0));
 }

+test "math.sqrt16" {
+    const epsilon = 0.000001;
+
+    assert(@sqrt(f16, 0.0) == 0.0);
+    assert(math.approxEq(f16, @sqrt(f16, 2.0), 1.414214, epsilon));
+    assert(math.approxEq(f16, @sqrt(f16, 3.6), 1.897367, epsilon));
+    assert(@sqrt(f16, 4.0) == 2.0);
+    assert(math.approxEq(f16, @sqrt(f16, 7.539840), 2.745877, epsilon));
+    assert(math.approxEq(f16, @sqrt(f16, 19.230934), 4.385309, epsilon));
+    assert(@sqrt(f16, 64.0) == 8.0);
+    assert(math.approxEq(f16, @sqrt(f16, 64.1), 8.006248, epsilon));
+    assert(math.approxEq(f16, @sqrt(f16, 8942.230469), 94.563370, epsilon));
+}
+
 test "math.sqrt32" {
    const epsilon = 0.000001;

@ -63,6 +78,14 @@ test "math.sqrt64" {
    assert(math.approxEq(f64, @sqrt(f64, 8942.230469), 94.563367, epsilon));
 }

+test "math.sqrt16.special" {
+    assert(math.isPositiveInf(@sqrt(f16, math.inf(f16))));
+    assert(@sqrt(f16, 0.0) == 0.0);
+    assert(@sqrt(f16, -0.0) == -0.0);
+    assert(math.isNan(@sqrt(f16, -1.0)));
+    assert(math.isNan(@sqrt(f16, math.nan(f16))));
+}
+
 test "math.sqrt32.special" {
    assert(math.isPositiveInf(@sqrt(f32, math.inf(f32))));
    assert(@sqrt(f32, 0.0) == 0.0);
--- a/std/special/builtin.zig
+++ b/std/special/builtin.zig
@ -210,7 +210,9 @@ fn generic_fmod(comptime T: type, x: T, y: T) T {
 }

 fn isNan(comptime T: type, bits: T) bool {
-    if (T == u32) {
+    if (T == u16) {
+        return (bits & 0x7fff) > 0x7c00;
+    } else if (T == u32) {
        return (bits & 0x7fffffff) > 0x7f800000;
    } else if (T == u64) {
        return (bits & (@maxValue(u64) >> 1)) > (u64(0x7ff) << 52);
--- a/std/special/compiler_rt/extendXfYf2_test.zig
+++ b/std/special/compiler_rt/extendXfYf2_test.zig
@ -88,6 +88,7 @@ test "extenddftf2" {
 test "extendhfsf2" {
    test__extendhfsf2(0x7e00, 0x7fc00000);  // qNaN
    test__extendhfsf2(0x7f00, 0x7fe00000);  // sNaN
+    test__extendhfsf2(0x7c01, 0x7f802000);  // sNaN

    test__extendhfsf2(0, 0);  // 0
    test__extendhfsf2(0x8000, 0x80000000);  // -0