From 612ad779bb2df8e7e15ab20dd3df3a2900ff82f0 Mon Sep 17 00:00:00 2001
From: viri <viridianmeow@gmail.com>
Date: Fri, 14 May 2021 14:15:53 -0600
Subject: [PATCH] std: rework math.scalbn (#8733)

* std: fix overflow in math.scalbn32

* std: rewrite math.scalbn to be generic

* std: support f128 in math.isNormal

* std: enable f128 tests in math.scalbn
---
 lib/std/math/isnormal.zig |  11 ++-
 lib/std/math/scalbn.zig   | 138 +++++++++++++++++++-------------------
 2 files changed, 78 insertions(+), 71 deletions(-)

diff --git a/lib/std/math/isnormal.zig b/lib/std/math/isnormal.zig
index b10a6e2286..f3942d121c 100644
--- a/lib/std/math/isnormal.zig
+++ b/lib/std/math/isnormal.zig
@@ -14,16 +14,20 @@ pub fn isNormal(x: anytype) bool {
     switch (T) {
         f16 => {
             const bits = @bitCast(u16, x);
-            return (bits + 1024) & 0x7FFF >= 2048;
+            return (bits + (1 << 10)) & (maxInt(u16) >> 1) >= (1 << 11);
         },
         f32 => {
             const bits = @bitCast(u32, x);
-            return (bits + 0x00800000) & 0x7FFFFFFF >= 0x01000000;
+            return (bits + (1 << 23)) & (maxInt(u32) >> 1) >= (1 << 24);
         },
         f64 => {
             const bits = @bitCast(u64, x);
             return (bits + (1 << 52)) & (maxInt(u64) >> 1) >= (1 << 53);
         },
+        f128 => {
+            const bits = @bitCast(u128, x);
+            return (bits + (1 << 112)) & (maxInt(u128) >> 1) >= (1 << 113);
+        },
         else => {
             @compileError("isNormal not implemented for " ++ @typeName(T));
         },
@@ -34,10 +38,13 @@ test "math.isNormal" {
     try expect(!isNormal(math.nan(f16)));
     try expect(!isNormal(math.nan(f32)));
     try expect(!isNormal(math.nan(f64)));
+    try expect(!isNormal(math.nan(f128)));
     try expect(!isNormal(@as(f16, 0)));
     try expect(!isNormal(@as(f32, 0)));
     try expect(!isNormal(@as(f64, 0)));
+    try expect(!isNormal(@as(f128, 0)));
     try expect(isNormal(@as(f16, 1.0)));
     try expect(isNormal(@as(f32, 1.0)));
     try expect(isNormal(@as(f64, 1.0)));
+    try expect(isNormal(@as(f128, 1.0)));
 }
diff --git a/lib/std/math/scalbn.zig b/lib/std/math/scalbn.zig
index 49aea08931..bdf382a303 100644
--- a/lib/std/math/scalbn.zig
+++ b/lib/std/math/scalbn.zig
@@ -9,89 +9,89 @@
 // https://git.musl-libc.org/cgit/musl/tree/src/math/scalbnf.c
 // https://git.musl-libc.org/cgit/musl/tree/src/math/scalbn.c
 
-const std = @import("../std.zig");
+const std = @import("std");
 const math = std.math;
+const assert = std.debug.assert;
 const expect = std.testing.expect;
 
 /// Returns x * 2^n.
 pub fn scalbn(x: anytype, n: i32) @TypeOf(x) {
-    const T = @TypeOf(x);
-    return switch (T) {
-        f32 => scalbn32(x, n),
-        f64 => scalbn64(x, n),
-        else => @compileError("scalbn not implemented for " ++ @typeName(T)),
-    };
-}
+    var base = x;
+    var shift = n;
 
-fn scalbn32(x: f32, n_: i32) f32 {
-    var y = x;
-    var n = n_;
+    const T = @TypeOf(base);
+    const IntT = std.meta.Int(.unsigned, @bitSizeOf(T));
+    if (@typeInfo(T) != .Float) {
+        @compileError("scalbn not implemented for " ++ @typeName(T));
+    }
 
-    if (n > 127) {
-        y *= 0x1.0p127;
-        n -= 127;
-        if (n > 1023) {
-            y *= 0x1.0p127;
-            n -= 127;
-            if (n > 127) {
-                n = 127;
-            }
+    const mantissa_bits = math.floatMantissaBits(T);
+    const exponent_bits = math.floatExponentBits(T);
+    const exponent_bias = (1 << (exponent_bits - 1)) - 1;
+    const exponent_min = 1 - exponent_bias;
+    const exponent_max = exponent_bias;
+
+    // fix double rounding errors in subnormal ranges
+    // https://git.musl-libc.org/cgit/musl/commit/src/math/scalbn.c?id=8c44a060243f04283ca68dad199aab90336141db
+    const scale_min_expo = exponent_min + mantissa_bits + 1;
+    const scale_min = @bitCast(T, @as(IntT, scale_min_expo + exponent_bias) << mantissa_bits);
+    const scale_max = @bitCast(T, @intCast(IntT, exponent_max + exponent_bias) << mantissa_bits);
+
+    // scale `shift` within floating point limits, if possible
+    // second pass is possible due to subnormal range
+    // third pass always results in +/-0.0 or +/-inf
+    if (shift > exponent_max) {
+        base *= scale_max;
+        shift -= exponent_max;
+        if (shift > exponent_max) {
+            base *= scale_max;
+            shift -= exponent_max;
+            if (shift > exponent_max) shift = exponent_max;
         }
-    } else if (n < -126) {
-        y *= 0x1.0p-126 * 0x1.0p24;
-        n += 126 - 24;
-        if (n < -126) {
-            y *= 0x1.0p-126 * 0x1.0p24;
-            n += 126 - 24;
-            if (n < -126) {
-                n = -126;
-            }
+    } else if (shift < exponent_min) {
+        base *= scale_min;
+        shift -= scale_min_expo;
+        if (shift < exponent_min) {
+            base *= scale_min;
+            shift -= scale_min_expo;
+            if (shift < exponent_min) shift = exponent_min;
         }
     }
 
-    const u = @intCast(u32, n +% 0x7F) << 23;
-    return y * @bitCast(f32, u);
-}
-
-fn scalbn64(x: f64, n_: i32) f64 {
-    var y = x;
-    var n = n_;
-
-    if (n > 1023) {
-        y *= 0x1.0p1023;
-        n -= 1023;
-        if (n > 1023) {
-            y *= 0x1.0p1023;
-            n -= 1023;
-            if (n > 1023) {
-                n = 1023;
-            }
-        }
-    } else if (n < -1022) {
-        y *= 0x1.0p-1022 * 0x1.0p53;
-        n += 1022 - 53;
-        if (n < -1022) {
-            y *= 0x1.0p-1022 * 0x1.0p53;
-            n += 1022 - 53;
-            if (n < -1022) {
-                n = -1022;
-            }
-        }
-    }
-
-    const u = @intCast(u64, n +% 0x3FF) << 52;
-    return y * @bitCast(f64, u);
+    return base * @bitCast(T, @intCast(IntT, shift + exponent_bias) << mantissa_bits);
 }
 
 test "math.scalbn" {
-    try expect(scalbn(@as(f32, 1.5), 4) == scalbn32(1.5, 4));
-    try expect(scalbn(@as(f64, 1.5), 4) == scalbn64(1.5, 4));
-}
+    // basic usage
+    try expect(scalbn(@as(f16, 1.5), 4) == 24.0);
+    try expect(scalbn(@as(f32, 1.5), 4) == 24.0);
+    try expect(scalbn(@as(f64, 1.5), 4) == 24.0);
+    try expect(scalbn(@as(f128, 1.5), 4) == 24.0);
 
-test "math.scalbn32" {
-    try expect(scalbn32(1.5, 4) == 24.0);
-}
+    // subnormals
+    try expect(math.isNormal(scalbn(@as(f16, 1.0), -14)));
+    try expect(!math.isNormal(scalbn(@as(f16, 1.0), -15)));
+    try expect(math.isNormal(scalbn(@as(f32, 1.0), -126)));
+    try expect(!math.isNormal(scalbn(@as(f32, 1.0), -127)));
+    try expect(math.isNormal(scalbn(@as(f64, 1.0), -1022)));
+    try expect(!math.isNormal(scalbn(@as(f64, 1.0), -1023)));
+    try expect(math.isNormal(scalbn(@as(f128, 1.0), -16382)));
+    try expect(!math.isNormal(scalbn(@as(f128, 1.0), -16383)));
+    // unreliable due to lack of native f16 support, see talk on PR #8733
+    // try expect(scalbn(@as(f16, 0x1.1FFp-1), -14 - 9) == math.f16_true_min);
+    try expect(scalbn(@as(f32, 0x1.3FFFFFp-1), -126 - 22) == math.f32_true_min);
+    try expect(scalbn(@as(f64, 0x1.7FFFFFFFFFFFFp-1), -1022 - 51) == math.f64_true_min);
+    try expect(scalbn(@as(f128, 0x1.7FFFFFFFFFFFFFFFFFFFFFFFFFFFp-1), -16382 - 111) == math.f128_true_min);
 
-test "math.scalbn64" {
-    try expect(scalbn64(1.5, 4) == 24.0);
+    // float limits
+    try expect(scalbn(@as(f32, math.f32_max), -128 - 149) > 0.0);
+    try expect(scalbn(@as(f32, math.f32_max), -128 - 149 - 1) == 0.0);
+    try expect(!math.isPositiveInf(scalbn(@as(f16, math.f16_true_min), 15 + 24)));
+    try expect(math.isPositiveInf(scalbn(@as(f16, math.f16_true_min), 15 + 24 + 1)));
+    try expect(!math.isPositiveInf(scalbn(@as(f32, math.f32_true_min), 127 + 149)));
+    try expect(math.isPositiveInf(scalbn(@as(f32, math.f32_true_min), 127 + 149 + 1)));
+    try expect(!math.isPositiveInf(scalbn(@as(f64, math.f64_true_min), 1023 + 1074)));
+    try expect(math.isPositiveInf(scalbn(@as(f64, math.f64_true_min), 1023 + 1074 + 1)));
+    try expect(!math.isPositiveInf(scalbn(@as(f128, math.f128_true_min), 16383 + 16494)));
+    try expect(math.isPositiveInf(scalbn(@as(f128, math.f128_true_min), 16383 + 16494 + 1)));
 }