Add floatFractionalBits to replace floatMantissaDigits

2026-02-21 16:54:52 +00:00 · 2022-04-12 12:23:18 -07:00 · 2022-04-12 12:23:18 -07:00 · 319555a669
commit 319555a669
parent 319b5cbce5
5 changed files with 24 additions and 24 deletions
--- a/lib/std/math.zig
+++ b/lib/std/math.zig
@ -38,7 +38,7 @@ pub const sqrt1_2 = 0.707106781186547524400844362104849039;

 pub const floatExponentBits = @import("math/float.zig").floatExponentBits;
 pub const floatMantissaBits = @import("math/float.zig").floatMantissaBits;
-pub const floatMantissaDigits = @import("math/float.zig").floatMantissaDigits;
+pub const floatFractionalBits = @import("math/float.zig").floatFractionalBits;
 pub const floatExponentMin = @import("math/float.zig").floatExponentMin;
 pub const floatExponentMax = @import("math/float.zig").floatExponentMax;
 pub const floatTrueMin = @import("math/float.zig").floatTrueMin;
--- a/lib/std/math/float.zig
+++ b/lib/std/math/float.zig
@ -4,7 +4,7 @@ const expect = std.testing.expect;

 /// Creates a raw "1.0" mantissa for floating point type T. Used to dedupe f80 logic.
 fn mantissaOne(comptime T: type) comptime_int {
-    return if (floatMantissaDigits(T) == 64) 1 << 63 else 0;
+    return if (T == f80) 1 << floatFractionalBits(T) else 0;
 }

 /// Creates floating point type T from an unbiased exponent and raw mantissa.
@ -42,19 +42,19 @@ pub fn floatMantissaBits(comptime T: type) comptime_int {
    };
 }

-/// Returns the number of binary digits in the mantissa of floating point type T.
-pub fn floatMantissaDigits(comptime T: type) comptime_int {
+/// Returns the number of fractional bits in the mantissa of floating point type T.
+pub fn floatFractionalBits(comptime T: type) comptime_int {
    assert(@typeInfo(T) == .Float);

    // standard IEEE floats have an implicit 0.m or 1.m integer part
    // f80 is special and has an explicitly stored bit in the MSB
-    // this function corresponds to `MANT_DIG' constants from C
+    // this function corresponds to `MANT_DIG - 1' from C
    return switch (@typeInfo(T).Float.bits) {
-        16 => 11,
-        32 => 24,
-        64 => 53,
-        80 => 64,
-        128 => 113,
+        16 => 10,
+        32 => 23,
+        64 => 52,
+        80 => 63,
+        128 => 112,
        else => @compileError("unknown floating point type " ++ @typeName(T)),
    };
 }
@ -89,7 +89,7 @@ pub fn floatMax(comptime T: type) T {

 /// Returns the machine epsilon of floating point type T.
 pub fn floatEps(comptime T: type) T {
-    return reconstructFloat(T, -(floatMantissaDigits(T) - 1), mantissaOne(T));
+    return reconstructFloat(T, -floatFractionalBits(T), mantissaOne(T));
 }

 /// Returns the value inf for floating point type T.
@ -104,7 +104,7 @@ test "std.math.float" {
        try expect(@bitSizeOf(T) == size);

        // for machine epsilon, assert expmin <= -prec <= expmax
-        try expect(floatExponentMin(T) <= -(floatMantissaDigits(T) - 1));
-        try expect(-(floatMantissaDigits(T) - 1) <= floatExponentMax(T));
+        try expect(floatExponentMin(T) <= -floatFractionalBits(T));
+        try expect(-floatFractionalBits(T) <= floatExponentMax(T));
    }
 }
--- a/lib/std/math/isnormal.zig
+++ b/lib/std/math/isnormal.zig
@ -41,7 +41,7 @@ test "math.isNormal" {
        try expect(!isNormal(@as(T, math.floatTrueMin(T))));

        // largest subnormal
-        try expect(!isNormal(@bitCast(T, ~(~@as(TBits, 0) << math.floatMantissaDigits(T) - 1))));
+        try expect(!isNormal(@bitCast(T, ~(~@as(TBits, 0) << math.floatFractionalBits(T)))));

        // non-finite numbers
        try expect(!isNormal(-math.inf(T)));
--- a/lib/std/special/compiler_rt/fixXfYi.zig
+++ b/lib/std/special/compiler_rt/fixXfYi.zig
@ -12,7 +12,7 @@ pub inline fn fixXfYi(comptime I: type, a: anytype) I {
    const rep_t = std.meta.Int(.unsigned, float_bits);
    const sig_bits = math.floatMantissaBits(F);
    const exp_bits = math.floatExponentBits(F);
-    const fractional_sig_bits = math.floatMantissaDigits(F) - 1;
+    const fractional_bits = math.floatFractionalBits(F);

    const implicit_bit = if (F != f80) (@as(rep_t, 1) << sig_bits) else 0;
    const max_exp = (1 << (exp_bits - 1));
@ -42,10 +42,10 @@ pub inline fn fixXfYi(comptime I: type, a: anytype) I {
    // If 0 <= exponent < sig_bits, right shift to get the result.
    // Otherwise, shift left.
    var result: I = undefined;
-    if (exponent < fractional_sig_bits) {
-        result = @intCast(I, significand >> @intCast(Log2Int(rep_t), fractional_sig_bits - exponent));
+    if (exponent < fractional_bits) {
+        result = @intCast(I, significand >> @intCast(Log2Int(rep_t), fractional_bits - exponent));
    } else {
-        result = @intCast(I, significand) << @intCast(Log2Int(I), exponent - fractional_sig_bits);
+        result = @intCast(I, significand) << @intCast(Log2Int(I), exponent - fractional_bits);
    }

    if ((@typeInfo(I).Int.signedness == .signed) and negative)
--- a/lib/std/special/compiler_rt/floatXiYf.zig
+++ b/lib/std/special/compiler_rt/floatXiYf.zig
@ -17,9 +17,9 @@ pub fn floatXiYf(comptime T: type, x: anytype) T {
    const float_bits = @bitSizeOf(T);
    const int_bits = @bitSizeOf(@TypeOf(x));
    const exp_bits = math.floatExponentBits(T);
-    const sig_bits = math.floatMantissaDigits(T) - 1; // Only counts the fractional bits
+    const fractional_bits = math.floatFractionalBits(T);
    const exp_bias = math.maxInt(std.meta.Int(.unsigned, exp_bits - 1));
-    const implicit_bit = if (T != f80) @as(uT, 1) << sig_bits else 0;
+    const implicit_bit = if (T != f80) @as(uT, 1) << fractional_bits else 0;
    const max_exp = exp_bias;

    // Sign
@ -29,14 +29,14 @@ pub fn floatXiYf(comptime T: type, x: anytype) T {

    // Compute significand
    var exp = int_bits - @clz(Z, abs_val) - 1;
-    if (int_bits <= sig_bits or exp <= sig_bits) {
-        const shift_amt = sig_bits - @intCast(math.Log2Int(uT), exp);
+    if (int_bits <= fractional_bits or exp <= fractional_bits) {
+        const shift_amt = fractional_bits - @intCast(math.Log2Int(uT), exp);

        // Shift up result to line up with the significand - no rounding required
        result = (@intCast(uT, abs_val) << shift_amt);
        result ^= implicit_bit; // Remove implicit integer bit
    } else {
-        var shift_amt = @intCast(math.Log2Int(Z), exp - sig_bits);
+        var shift_amt = @intCast(math.Log2Int(Z), exp - fractional_bits);
        const exact_tie: bool = @ctz(Z, abs_val) == shift_amt - 1;

        // Shift down result and remove implicit integer bit
@ -53,7 +53,7 @@ pub fn floatXiYf(comptime T: type, x: anytype) T {
    result += (@as(uT, exp) + exp_bias) << math.floatMantissaBits(T);

    // If the result included a carry, we need to restore the explicit integer bit
-    if (T == f80) result |= 1 << sig_bits;
+    if (T == f80) result |= 1 << fractional_bits;

    return @bitCast(T, sign_bit | result);
 }