mirror of
https://github.com/ziglang/zig.git
synced 2026-02-21 16:54:52 +00:00
Add floatFractionalBits to replace floatMantissaDigits
This commit is contained in:
parent
319b5cbce5
commit
319555a669
@ -38,7 +38,7 @@ pub const sqrt1_2 = 0.707106781186547524400844362104849039;
|
||||
|
||||
pub const floatExponentBits = @import("math/float.zig").floatExponentBits;
|
||||
pub const floatMantissaBits = @import("math/float.zig").floatMantissaBits;
|
||||
pub const floatMantissaDigits = @import("math/float.zig").floatMantissaDigits;
|
||||
pub const floatFractionalBits = @import("math/float.zig").floatFractionalBits;
|
||||
pub const floatExponentMin = @import("math/float.zig").floatExponentMin;
|
||||
pub const floatExponentMax = @import("math/float.zig").floatExponentMax;
|
||||
pub const floatTrueMin = @import("math/float.zig").floatTrueMin;
|
||||
|
||||
@ -4,7 +4,7 @@ const expect = std.testing.expect;
|
||||
|
||||
/// Creates a raw "1.0" mantissa for floating point type T. Used to dedupe f80 logic.
|
||||
fn mantissaOne(comptime T: type) comptime_int {
|
||||
return if (floatMantissaDigits(T) == 64) 1 << 63 else 0;
|
||||
return if (T == f80) 1 << floatFractionalBits(T) else 0;
|
||||
}
|
||||
|
||||
/// Creates floating point type T from an unbiased exponent and raw mantissa.
|
||||
@ -42,19 +42,19 @@ pub fn floatMantissaBits(comptime T: type) comptime_int {
|
||||
};
|
||||
}
|
||||
|
||||
/// Returns the number of binary digits in the mantissa of floating point type T.
|
||||
pub fn floatMantissaDigits(comptime T: type) comptime_int {
|
||||
/// Returns the number of fractional bits in the mantissa of floating point type T.
|
||||
pub fn floatFractionalBits(comptime T: type) comptime_int {
|
||||
assert(@typeInfo(T) == .Float);
|
||||
|
||||
// standard IEEE floats have an implicit 0.m or 1.m integer part
|
||||
// f80 is special and has an explicitly stored bit in the MSB
|
||||
// this function corresponds to `MANT_DIG' constants from C
|
||||
// this function corresponds to `MANT_DIG - 1' from C
|
||||
return switch (@typeInfo(T).Float.bits) {
|
||||
16 => 11,
|
||||
32 => 24,
|
||||
64 => 53,
|
||||
80 => 64,
|
||||
128 => 113,
|
||||
16 => 10,
|
||||
32 => 23,
|
||||
64 => 52,
|
||||
80 => 63,
|
||||
128 => 112,
|
||||
else => @compileError("unknown floating point type " ++ @typeName(T)),
|
||||
};
|
||||
}
|
||||
@ -89,7 +89,7 @@ pub fn floatMax(comptime T: type) T {
|
||||
|
||||
/// Returns the machine epsilon of floating point type T.
|
||||
pub fn floatEps(comptime T: type) T {
|
||||
return reconstructFloat(T, -(floatMantissaDigits(T) - 1), mantissaOne(T));
|
||||
return reconstructFloat(T, -floatFractionalBits(T), mantissaOne(T));
|
||||
}
|
||||
|
||||
/// Returns the value inf for floating point type T.
|
||||
@ -104,7 +104,7 @@ test "std.math.float" {
|
||||
try expect(@bitSizeOf(T) == size);
|
||||
|
||||
// for machine epsilon, assert expmin <= -prec <= expmax
|
||||
try expect(floatExponentMin(T) <= -(floatMantissaDigits(T) - 1));
|
||||
try expect(-(floatMantissaDigits(T) - 1) <= floatExponentMax(T));
|
||||
try expect(floatExponentMin(T) <= -floatFractionalBits(T));
|
||||
try expect(-floatFractionalBits(T) <= floatExponentMax(T));
|
||||
}
|
||||
}
|
||||
|
||||
@ -41,7 +41,7 @@ test "math.isNormal" {
|
||||
try expect(!isNormal(@as(T, math.floatTrueMin(T))));
|
||||
|
||||
// largest subnormal
|
||||
try expect(!isNormal(@bitCast(T, ~(~@as(TBits, 0) << math.floatMantissaDigits(T) - 1))));
|
||||
try expect(!isNormal(@bitCast(T, ~(~@as(TBits, 0) << math.floatFractionalBits(T)))));
|
||||
|
||||
// non-finite numbers
|
||||
try expect(!isNormal(-math.inf(T)));
|
||||
|
||||
@ -12,7 +12,7 @@ pub inline fn fixXfYi(comptime I: type, a: anytype) I {
|
||||
const rep_t = std.meta.Int(.unsigned, float_bits);
|
||||
const sig_bits = math.floatMantissaBits(F);
|
||||
const exp_bits = math.floatExponentBits(F);
|
||||
const fractional_sig_bits = math.floatMantissaDigits(F) - 1;
|
||||
const fractional_bits = math.floatFractionalBits(F);
|
||||
|
||||
const implicit_bit = if (F != f80) (@as(rep_t, 1) << sig_bits) else 0;
|
||||
const max_exp = (1 << (exp_bits - 1));
|
||||
@ -42,10 +42,10 @@ pub inline fn fixXfYi(comptime I: type, a: anytype) I {
|
||||
// If 0 <= exponent < sig_bits, right shift to get the result.
|
||||
// Otherwise, shift left.
|
||||
var result: I = undefined;
|
||||
if (exponent < fractional_sig_bits) {
|
||||
result = @intCast(I, significand >> @intCast(Log2Int(rep_t), fractional_sig_bits - exponent));
|
||||
if (exponent < fractional_bits) {
|
||||
result = @intCast(I, significand >> @intCast(Log2Int(rep_t), fractional_bits - exponent));
|
||||
} else {
|
||||
result = @intCast(I, significand) << @intCast(Log2Int(I), exponent - fractional_sig_bits);
|
||||
result = @intCast(I, significand) << @intCast(Log2Int(I), exponent - fractional_bits);
|
||||
}
|
||||
|
||||
if ((@typeInfo(I).Int.signedness == .signed) and negative)
|
||||
|
||||
@ -17,9 +17,9 @@ pub fn floatXiYf(comptime T: type, x: anytype) T {
|
||||
const float_bits = @bitSizeOf(T);
|
||||
const int_bits = @bitSizeOf(@TypeOf(x));
|
||||
const exp_bits = math.floatExponentBits(T);
|
||||
const sig_bits = math.floatMantissaDigits(T) - 1; // Only counts the fractional bits
|
||||
const fractional_bits = math.floatFractionalBits(T);
|
||||
const exp_bias = math.maxInt(std.meta.Int(.unsigned, exp_bits - 1));
|
||||
const implicit_bit = if (T != f80) @as(uT, 1) << sig_bits else 0;
|
||||
const implicit_bit = if (T != f80) @as(uT, 1) << fractional_bits else 0;
|
||||
const max_exp = exp_bias;
|
||||
|
||||
// Sign
|
||||
@ -29,14 +29,14 @@ pub fn floatXiYf(comptime T: type, x: anytype) T {
|
||||
|
||||
// Compute significand
|
||||
var exp = int_bits - @clz(Z, abs_val) - 1;
|
||||
if (int_bits <= sig_bits or exp <= sig_bits) {
|
||||
const shift_amt = sig_bits - @intCast(math.Log2Int(uT), exp);
|
||||
if (int_bits <= fractional_bits or exp <= fractional_bits) {
|
||||
const shift_amt = fractional_bits - @intCast(math.Log2Int(uT), exp);
|
||||
|
||||
// Shift up result to line up with the significand - no rounding required
|
||||
result = (@intCast(uT, abs_val) << shift_amt);
|
||||
result ^= implicit_bit; // Remove implicit integer bit
|
||||
} else {
|
||||
var shift_amt = @intCast(math.Log2Int(Z), exp - sig_bits);
|
||||
var shift_amt = @intCast(math.Log2Int(Z), exp - fractional_bits);
|
||||
const exact_tie: bool = @ctz(Z, abs_val) == shift_amt - 1;
|
||||
|
||||
// Shift down result and remove implicit integer bit
|
||||
@ -53,7 +53,7 @@ pub fn floatXiYf(comptime T: type, x: anytype) T {
|
||||
result += (@as(uT, exp) + exp_bias) << math.floatMantissaBits(T);
|
||||
|
||||
// If the result included a carry, we need to restore the explicit integer bit
|
||||
if (T == f80) result |= 1 << sig_bits;
|
||||
if (T == f80) result |= 1 << fractional_bits;
|
||||
|
||||
return @bitCast(T, sign_bit | result);
|
||||
}
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user