Add floatFractionalBits to replace floatMantissaDigits

This commit is contained in:
Cody Tapscott 2022-04-12 12:23:18 -07:00
parent 319b5cbce5
commit 319555a669
5 changed files with 24 additions and 24 deletions

View File

@ -38,7 +38,7 @@ pub const sqrt1_2 = 0.707106781186547524400844362104849039;
pub const floatExponentBits = @import("math/float.zig").floatExponentBits;
pub const floatMantissaBits = @import("math/float.zig").floatMantissaBits;
pub const floatMantissaDigits = @import("math/float.zig").floatMantissaDigits;
pub const floatFractionalBits = @import("math/float.zig").floatFractionalBits;
pub const floatExponentMin = @import("math/float.zig").floatExponentMin;
pub const floatExponentMax = @import("math/float.zig").floatExponentMax;
pub const floatTrueMin = @import("math/float.zig").floatTrueMin;

View File

@ -4,7 +4,7 @@ const expect = std.testing.expect;
/// Creates a raw "1.0" mantissa for floating point type T. Used to dedupe f80 logic.
fn mantissaOne(comptime T: type) comptime_int {
return if (floatMantissaDigits(T) == 64) 1 << 63 else 0;
return if (T == f80) 1 << floatFractionalBits(T) else 0;
}
/// Creates floating point type T from an unbiased exponent and raw mantissa.
@ -42,19 +42,19 @@ pub fn floatMantissaBits(comptime T: type) comptime_int {
};
}
/// Returns the number of binary digits in the mantissa of floating point type T.
pub fn floatMantissaDigits(comptime T: type) comptime_int {
/// Returns the number of fractional bits in the mantissa of floating point type T.
pub fn floatFractionalBits(comptime T: type) comptime_int {
assert(@typeInfo(T) == .Float);
// standard IEEE floats have an implicit 0.m or 1.m integer part
// f80 is special and has an explicitly stored bit in the MSB
// this function corresponds to `MANT_DIG' constants from C
// this function corresponds to `MANT_DIG - 1' from C
return switch (@typeInfo(T).Float.bits) {
16 => 11,
32 => 24,
64 => 53,
80 => 64,
128 => 113,
16 => 10,
32 => 23,
64 => 52,
80 => 63,
128 => 112,
else => @compileError("unknown floating point type " ++ @typeName(T)),
};
}
@ -89,7 +89,7 @@ pub fn floatMax(comptime T: type) T {
/// Returns the machine epsilon of floating point type T.
pub fn floatEps(comptime T: type) T {
return reconstructFloat(T, -(floatMantissaDigits(T) - 1), mantissaOne(T));
return reconstructFloat(T, -floatFractionalBits(T), mantissaOne(T));
}
/// Returns the value inf for floating point type T.
@ -104,7 +104,7 @@ test "std.math.float" {
try expect(@bitSizeOf(T) == size);
// for machine epsilon, assert expmin <= -prec <= expmax
try expect(floatExponentMin(T) <= -(floatMantissaDigits(T) - 1));
try expect(-(floatMantissaDigits(T) - 1) <= floatExponentMax(T));
try expect(floatExponentMin(T) <= -floatFractionalBits(T));
try expect(-floatFractionalBits(T) <= floatExponentMax(T));
}
}

View File

@ -41,7 +41,7 @@ test "math.isNormal" {
try expect(!isNormal(@as(T, math.floatTrueMin(T))));
// largest subnormal
try expect(!isNormal(@bitCast(T, ~(~@as(TBits, 0) << math.floatMantissaDigits(T) - 1))));
try expect(!isNormal(@bitCast(T, ~(~@as(TBits, 0) << math.floatFractionalBits(T)))));
// non-finite numbers
try expect(!isNormal(-math.inf(T)));

View File

@ -12,7 +12,7 @@ pub inline fn fixXfYi(comptime I: type, a: anytype) I {
const rep_t = std.meta.Int(.unsigned, float_bits);
const sig_bits = math.floatMantissaBits(F);
const exp_bits = math.floatExponentBits(F);
const fractional_sig_bits = math.floatMantissaDigits(F) - 1;
const fractional_bits = math.floatFractionalBits(F);
const implicit_bit = if (F != f80) (@as(rep_t, 1) << sig_bits) else 0;
const max_exp = (1 << (exp_bits - 1));
@ -42,10 +42,10 @@ pub inline fn fixXfYi(comptime I: type, a: anytype) I {
// If 0 <= exponent < sig_bits, right shift to get the result.
// Otherwise, shift left.
var result: I = undefined;
if (exponent < fractional_sig_bits) {
result = @intCast(I, significand >> @intCast(Log2Int(rep_t), fractional_sig_bits - exponent));
if (exponent < fractional_bits) {
result = @intCast(I, significand >> @intCast(Log2Int(rep_t), fractional_bits - exponent));
} else {
result = @intCast(I, significand) << @intCast(Log2Int(I), exponent - fractional_sig_bits);
result = @intCast(I, significand) << @intCast(Log2Int(I), exponent - fractional_bits);
}
if ((@typeInfo(I).Int.signedness == .signed) and negative)

View File

@ -17,9 +17,9 @@ pub fn floatXiYf(comptime T: type, x: anytype) T {
const float_bits = @bitSizeOf(T);
const int_bits = @bitSizeOf(@TypeOf(x));
const exp_bits = math.floatExponentBits(T);
const sig_bits = math.floatMantissaDigits(T) - 1; // Only counts the fractional bits
const fractional_bits = math.floatFractionalBits(T);
const exp_bias = math.maxInt(std.meta.Int(.unsigned, exp_bits - 1));
const implicit_bit = if (T != f80) @as(uT, 1) << sig_bits else 0;
const implicit_bit = if (T != f80) @as(uT, 1) << fractional_bits else 0;
const max_exp = exp_bias;
// Sign
@ -29,14 +29,14 @@ pub fn floatXiYf(comptime T: type, x: anytype) T {
// Compute significand
var exp = int_bits - @clz(Z, abs_val) - 1;
if (int_bits <= sig_bits or exp <= sig_bits) {
const shift_amt = sig_bits - @intCast(math.Log2Int(uT), exp);
if (int_bits <= fractional_bits or exp <= fractional_bits) {
const shift_amt = fractional_bits - @intCast(math.Log2Int(uT), exp);
// Shift up result to line up with the significand - no rounding required
result = (@intCast(uT, abs_val) << shift_amt);
result ^= implicit_bit; // Remove implicit integer bit
} else {
var shift_amt = @intCast(math.Log2Int(Z), exp - sig_bits);
var shift_amt = @intCast(math.Log2Int(Z), exp - fractional_bits);
const exact_tie: bool = @ctz(Z, abs_val) == shift_amt - 1;
// Shift down result and remove implicit integer bit
@ -53,7 +53,7 @@ pub fn floatXiYf(comptime T: type, x: anytype) T {
result += (@as(uT, exp) + exp_bias) << math.floatMantissaBits(T);
// If the result included a carry, we need to restore the explicit integer bit
if (T == f80) result |= 1 << sig_bits;
if (T == f80) result |= 1 << fractional_bits;
return @bitCast(T, sign_bit | result);
}