mirror of
https://github.com/ziglang/zig.git
synced 2026-02-13 12:59:04 +00:00
compiler_rt: Re-implement ldexp/ilogb using bit-ops
This re-write was needed to fix deficiencies in the existing ldexp, which was failing to compute correct results for both f16 and f80. It would be nice to add a fast multiplication-based fallback in the future for targets that have a hardware FPU, but this implementation should be much faster than the existing for targets without one.
This commit is contained in:
parent
7f508480f4
commit
eac1e613be
@ -15,175 +15,157 @@ const minInt = std.math.minInt;
|
||||
///
|
||||
/// Special Cases:
|
||||
/// - ilogb(+-inf) = maxInt(i32)
|
||||
/// - ilogb(0) = maxInt(i32)
|
||||
/// - ilogb(nan) = maxInt(i32)
|
||||
/// - ilogb(+-0) = minInt(i32)
|
||||
/// - ilogb(nan) = minInt(i32)
|
||||
pub fn ilogb(x: anytype) i32 {
|
||||
const T = @TypeOf(x);
|
||||
return switch (T) {
|
||||
f32 => ilogb32(x),
|
||||
f64 => ilogb64(x),
|
||||
f128 => ilogb128(x),
|
||||
else => @compileError("ilogb not implemented for " ++ @typeName(T)),
|
||||
};
|
||||
return ilogbX(T, x);
|
||||
}
|
||||
|
||||
// TODO: unify these implementations with generics
|
||||
pub const fp_ilogbnan = minInt(i32);
|
||||
pub const fp_ilogb0 = minInt(i32);
|
||||
|
||||
// NOTE: Should these be exposed publicly?
|
||||
const fp_ilogbnan = -1 - @as(i32, maxInt(u32) >> 1);
|
||||
const fp_ilogb0 = fp_ilogbnan;
|
||||
fn ilogbX(comptime T: type, x: T) i32 {
|
||||
const typeWidth = @typeInfo(T).Float.bits;
|
||||
const significandBits = math.floatMantissaBits(T);
|
||||
const exponentBits = math.floatExponentBits(T);
|
||||
|
||||
fn ilogb32(x: f32) i32 {
|
||||
var u = @bitCast(u32, x);
|
||||
var e = @intCast(i32, (u >> 23) & 0xFF);
|
||||
const Z = std.meta.Int(.unsigned, typeWidth);
|
||||
|
||||
// TODO: We should be able to merge this with the lower check.
|
||||
if (math.isNan(x)) {
|
||||
return maxInt(i32);
|
||||
}
|
||||
const signBit = (@as(Z, 1) << (significandBits + exponentBits));
|
||||
const maxExponent = ((1 << exponentBits) - 1);
|
||||
const exponentBias = (maxExponent >> 1);
|
||||
|
||||
const absMask = signBit - 1;
|
||||
|
||||
var u = @bitCast(Z, x) & absMask;
|
||||
var e = @intCast(i32, u >> significandBits);
|
||||
|
||||
if (e == 0) {
|
||||
u <<= 9;
|
||||
if (u == 0) {
|
||||
math.raiseInvalid();
|
||||
return fp_ilogb0;
|
||||
}
|
||||
|
||||
// subnormal
|
||||
e = -0x7F;
|
||||
while (u >> 31 == 0) : (u <<= 1) {
|
||||
e -= 1;
|
||||
}
|
||||
return e;
|
||||
// offset sign bit, exponent bits, and integer bit (if present) + bias
|
||||
const offset = 1 + exponentBits + @boolToInt(T == f80) - exponentBias;
|
||||
return offset - @intCast(i32, @clz(u));
|
||||
}
|
||||
|
||||
if (e == 0xFF) {
|
||||
if (e == maxExponent) {
|
||||
math.raiseInvalid();
|
||||
if (u << 9 != 0) {
|
||||
return fp_ilogbnan;
|
||||
} else {
|
||||
return maxInt(i32);
|
||||
}
|
||||
if (u > @bitCast(Z, math.inf(T))) {
|
||||
return fp_ilogbnan; // u is a NaN
|
||||
} else return maxInt(i32);
|
||||
}
|
||||
|
||||
return e - 0x7F;
|
||||
}
|
||||
|
||||
fn ilogb64(x: f64) i32 {
|
||||
var u = @bitCast(u64, x);
|
||||
var e = @intCast(i32, (u >> 52) & 0x7FF);
|
||||
|
||||
if (math.isNan(x)) {
|
||||
return maxInt(i32);
|
||||
}
|
||||
|
||||
if (e == 0) {
|
||||
u <<= 12;
|
||||
if (u == 0) {
|
||||
math.raiseInvalid();
|
||||
return fp_ilogb0;
|
||||
}
|
||||
|
||||
// subnormal
|
||||
e = -0x3FF;
|
||||
while (u >> 63 == 0) : (u <<= 1) {
|
||||
e -= 1;
|
||||
}
|
||||
return e;
|
||||
}
|
||||
|
||||
if (e == 0x7FF) {
|
||||
math.raiseInvalid();
|
||||
if (u << 12 != 0) {
|
||||
return fp_ilogbnan;
|
||||
} else {
|
||||
return maxInt(i32);
|
||||
}
|
||||
}
|
||||
|
||||
return e - 0x3FF;
|
||||
}
|
||||
|
||||
fn ilogb128(x: f128) i32 {
|
||||
var u = @bitCast(u128, x);
|
||||
var e = @intCast(i32, (u >> 112) & 0x7FFF);
|
||||
|
||||
if (math.isNan(x)) {
|
||||
return maxInt(i32);
|
||||
}
|
||||
|
||||
if (e == 0) {
|
||||
u <<= 16;
|
||||
if (u == 0) {
|
||||
math.raiseInvalid();
|
||||
return fp_ilogb0;
|
||||
}
|
||||
|
||||
// subnormal x
|
||||
return ilogb128(x * 0x1p120) - 120;
|
||||
}
|
||||
|
||||
if (e == 0x7FFF) {
|
||||
math.raiseInvalid();
|
||||
if (u << 16 != 0) {
|
||||
return fp_ilogbnan;
|
||||
} else {
|
||||
return maxInt(i32);
|
||||
}
|
||||
}
|
||||
|
||||
return e - 0x3FFF;
|
||||
return e - exponentBias;
|
||||
}
|
||||
|
||||
test "type dispatch" {
|
||||
try expect(ilogb(@as(f32, 0.2)) == ilogb32(0.2));
|
||||
try expect(ilogb(@as(f64, 0.2)) == ilogb64(0.2));
|
||||
try expect(ilogb(@as(f32, 0.2)) == ilogbX(f32, 0.2));
|
||||
try expect(ilogb(@as(f64, 0.2)) == ilogbX(f64, 0.2));
|
||||
}
|
||||
|
||||
test "16" {
|
||||
try expect(ilogbX(f16, 0.0) == fp_ilogb0);
|
||||
try expect(ilogbX(f16, 0.5) == -1);
|
||||
try expect(ilogbX(f16, 0.8923) == -1);
|
||||
try expect(ilogbX(f16, 10.0) == 3);
|
||||
try expect(ilogbX(f16, -65504) == 15);
|
||||
try expect(ilogbX(f16, 2398.23) == 11);
|
||||
|
||||
try expect(ilogbX(f16, 0x1p-1) == -1);
|
||||
try expect(ilogbX(f16, 0x1p-17) == -17);
|
||||
try expect(ilogbX(f16, 0x1p-24) == -24);
|
||||
}
|
||||
|
||||
test "32" {
|
||||
try expect(ilogb32(0.0) == fp_ilogb0);
|
||||
try expect(ilogb32(0.5) == -1);
|
||||
try expect(ilogb32(0.8923) == -1);
|
||||
try expect(ilogb32(10.0) == 3);
|
||||
try expect(ilogb32(-123984) == 16);
|
||||
try expect(ilogb32(2398.23) == 11);
|
||||
try expect(ilogbX(f32, 0.0) == fp_ilogb0);
|
||||
try expect(ilogbX(f32, 0.5) == -1);
|
||||
try expect(ilogbX(f32, 0.8923) == -1);
|
||||
try expect(ilogbX(f32, 10.0) == 3);
|
||||
try expect(ilogbX(f32, -123984) == 16);
|
||||
try expect(ilogbX(f32, 2398.23) == 11);
|
||||
|
||||
try expect(ilogbX(f32, 0x1p-1) == -1);
|
||||
try expect(ilogbX(f32, 0x1p-122) == -122);
|
||||
try expect(ilogbX(f32, 0x1p-127) == -127);
|
||||
}
|
||||
|
||||
test "64" {
|
||||
try expect(ilogb64(0.0) == fp_ilogb0);
|
||||
try expect(ilogb64(0.5) == -1);
|
||||
try expect(ilogb64(0.8923) == -1);
|
||||
try expect(ilogb64(10.0) == 3);
|
||||
try expect(ilogb64(-123984) == 16);
|
||||
try expect(ilogb64(2398.23) == 11);
|
||||
try expect(ilogbX(f64, 0.0) == fp_ilogb0);
|
||||
try expect(ilogbX(f64, 0.5) == -1);
|
||||
try expect(ilogbX(f64, 0.8923) == -1);
|
||||
try expect(ilogbX(f64, 10.0) == 3);
|
||||
try expect(ilogbX(f64, -123984) == 16);
|
||||
try expect(ilogbX(f64, 2398.23) == 11);
|
||||
|
||||
try expect(ilogbX(f64, 0x1p-1) == -1);
|
||||
try expect(ilogbX(f64, 0x1p-127) == -127);
|
||||
try expect(ilogbX(f64, 0x1p-1012) == -1012);
|
||||
try expect(ilogbX(f64, 0x1p-1023) == -1023);
|
||||
}
|
||||
|
||||
test "80" {
|
||||
try expect(ilogbX(f80, 0.0) == fp_ilogb0);
|
||||
try expect(ilogbX(f80, 0.5) == -1);
|
||||
try expect(ilogbX(f80, 0.8923) == -1);
|
||||
try expect(ilogbX(f80, 10.0) == 3);
|
||||
try expect(ilogbX(f80, -123984) == 16);
|
||||
try expect(ilogbX(f80, 2398.23) == 11);
|
||||
|
||||
try expect(ilogbX(f80, 0x1p-1) == -1);
|
||||
try expect(ilogbX(f80, 0x1p-127) == -127);
|
||||
try expect(ilogbX(f80, 0x1p-1023) == -1023);
|
||||
try expect(ilogbX(f80, 0x1p-16383) == -16383);
|
||||
}
|
||||
|
||||
test "128" {
|
||||
try expect(ilogb128(0.0) == fp_ilogb0);
|
||||
try expect(ilogb128(0.5) == -1);
|
||||
try expect(ilogb128(0.8923) == -1);
|
||||
try expect(ilogb128(10.0) == 3);
|
||||
try expect(ilogb128(-123984) == 16);
|
||||
try expect(ilogb128(2398.23) == 11);
|
||||
try expect(ilogbX(f128, 0.0) == fp_ilogb0);
|
||||
try expect(ilogbX(f128, 0.5) == -1);
|
||||
try expect(ilogbX(f128, 0.8923) == -1);
|
||||
try expect(ilogbX(f128, 10.0) == 3);
|
||||
try expect(ilogbX(f128, -123984) == 16);
|
||||
try expect(ilogbX(f128, 2398.23) == 11);
|
||||
|
||||
try expect(ilogbX(f128, 0x1p-1) == -1);
|
||||
try expect(ilogbX(f128, 0x1p-127) == -127);
|
||||
try expect(ilogbX(f128, 0x1p-1023) == -1023);
|
||||
try expect(ilogbX(f128, 0x1p-16383) == -16383);
|
||||
}
|
||||
|
||||
test "16 special" {
|
||||
try expect(ilogbX(f16, math.inf(f16)) == maxInt(i32));
|
||||
try expect(ilogbX(f16, -math.inf(f16)) == maxInt(i32));
|
||||
try expect(ilogbX(f16, 0.0) == minInt(i32));
|
||||
try expect(ilogbX(f16, math.nan(f16)) == fp_ilogbnan);
|
||||
}
|
||||
|
||||
test "32 special" {
|
||||
try expect(ilogb32(math.inf(f32)) == maxInt(i32));
|
||||
try expect(ilogb32(-math.inf(f32)) == maxInt(i32));
|
||||
try expect(ilogb32(0.0) == minInt(i32));
|
||||
try expect(ilogb32(math.nan(f32)) == maxInt(i32));
|
||||
try expect(ilogbX(f32, math.inf(f32)) == maxInt(i32));
|
||||
try expect(ilogbX(f32, -math.inf(f32)) == maxInt(i32));
|
||||
try expect(ilogbX(f32, 0.0) == minInt(i32));
|
||||
try expect(ilogbX(f32, math.nan(f32)) == fp_ilogbnan);
|
||||
}
|
||||
|
||||
test "64 special" {
|
||||
try expect(ilogb64(math.inf(f64)) == maxInt(i32));
|
||||
try expect(ilogb64(-math.inf(f64)) == maxInt(i32));
|
||||
try expect(ilogb64(0.0) == minInt(i32));
|
||||
try expect(ilogb64(math.nan(f64)) == maxInt(i32));
|
||||
try expect(ilogbX(f64, math.inf(f64)) == maxInt(i32));
|
||||
try expect(ilogbX(f64, -math.inf(f64)) == maxInt(i32));
|
||||
try expect(ilogbX(f64, 0.0) == minInt(i32));
|
||||
try expect(ilogbX(f64, math.nan(f64)) == fp_ilogbnan);
|
||||
}
|
||||
|
||||
test "80 special" {
|
||||
try expect(ilogbX(f80, math.inf(f80)) == maxInt(i32));
|
||||
try expect(ilogbX(f80, -math.inf(f80)) == maxInt(i32));
|
||||
try expect(ilogbX(f80, 0.0) == minInt(i32));
|
||||
try expect(ilogbX(f80, math.nan(f80)) == fp_ilogbnan);
|
||||
}
|
||||
|
||||
test "128 special" {
|
||||
try expect(ilogb128(math.inf(f128)) == maxInt(i32));
|
||||
try expect(ilogb128(-math.inf(f128)) == maxInt(i32));
|
||||
try expect(ilogb128(0.0) == minInt(i32));
|
||||
try expect(ilogb128(math.nan(f128)) == maxInt(i32));
|
||||
try expect(ilogbX(f128, math.inf(f128)) == maxInt(i32));
|
||||
try expect(ilogbX(f128, -math.inf(f128)) == maxInt(i32));
|
||||
try expect(ilogbX(f128, 0.0) == minInt(i32));
|
||||
try expect(ilogbX(f128, math.nan(f128)) == fp_ilogbnan);
|
||||
}
|
||||
|
||||
@ -1,91 +1,142 @@
|
||||
// Ported from musl, which is licensed under the MIT license:
|
||||
// https://git.musl-libc.org/cgit/musl/tree/COPYRIGHT
|
||||
//
|
||||
// https://git.musl-libc.org/cgit/musl/tree/src/math/ldexpf.c
|
||||
// https://git.musl-libc.org/cgit/musl/tree/src/math/ldexp.c
|
||||
|
||||
const std = @import("std");
|
||||
const math = std.math;
|
||||
const Log2Int = std.math.Log2Int;
|
||||
const assert = std.debug.assert;
|
||||
const expect = std.testing.expect;
|
||||
|
||||
/// Returns x * 2^n.
|
||||
pub fn ldexp(x: anytype, n: i32) @TypeOf(x) {
|
||||
var base = x;
|
||||
var shift = n;
|
||||
|
||||
const T = @TypeOf(base);
|
||||
const T = @TypeOf(x);
|
||||
const TBits = std.meta.Int(.unsigned, @typeInfo(T).Float.bits);
|
||||
|
||||
const exponent_bits = math.floatExponentBits(T);
|
||||
const mantissa_bits = math.floatMantissaBits(T);
|
||||
const exponent_min = math.floatExponentMin(T);
|
||||
const exponent_max = math.floatExponentMax(T);
|
||||
const fractional_bits = math.floatFractionalBits(T);
|
||||
|
||||
const exponent_bias = exponent_max;
|
||||
const max_biased_exponent = 2 * math.floatExponentMax(T);
|
||||
const mantissa_mask = @as(TBits, (1 << mantissa_bits) - 1);
|
||||
|
||||
// fix double rounding errors in subnormal ranges
|
||||
// https://git.musl-libc.org/cgit/musl/commit/src/math/ldexp.c?id=8c44a060243f04283ca68dad199aab90336141db
|
||||
const scale_min_expo = exponent_min + mantissa_bits + 1;
|
||||
const scale_min = @bitCast(T, @as(TBits, scale_min_expo + exponent_bias) << mantissa_bits);
|
||||
const scale_max = @bitCast(T, @intCast(TBits, exponent_max + exponent_bias) << mantissa_bits);
|
||||
const repr = @bitCast(TBits, x);
|
||||
const sign_bit = repr & (1 << (exponent_bits + mantissa_bits));
|
||||
|
||||
// scale `shift` within floating point limits, if possible
|
||||
// second pass is possible due to subnormal range
|
||||
// third pass always results in +/-0.0 or +/-inf
|
||||
if (shift > exponent_max) {
|
||||
base *= scale_max;
|
||||
shift -= exponent_max;
|
||||
if (shift > exponent_max) {
|
||||
base *= scale_max;
|
||||
shift -= exponent_max;
|
||||
if (shift > exponent_max) shift = exponent_max;
|
||||
if (math.isNan(x) or !math.isFinite(x))
|
||||
return x;
|
||||
|
||||
var exponent: i32 = @intCast(i32, (repr << 1) >> (mantissa_bits + 1));
|
||||
if (exponent == 0)
|
||||
exponent += (@as(i32, exponent_bits) + @boolToInt(T == f80)) - @clz(repr << 1);
|
||||
|
||||
if (n >= 0) {
|
||||
if (n > max_biased_exponent - exponent) {
|
||||
// Overflow. Return +/- inf
|
||||
return @bitCast(T, @bitCast(TBits, math.inf(T)) | sign_bit);
|
||||
} else if (exponent + n <= 0) {
|
||||
// Result is subnormal
|
||||
return @bitCast(T, (repr << @intCast(Log2Int(TBits), n)) | sign_bit);
|
||||
} else if (exponent <= 0) {
|
||||
// Result is normal, but needs shifting
|
||||
var result = @intCast(TBits, n + exponent) << mantissa_bits;
|
||||
result |= (repr << @intCast(Log2Int(TBits), 1 - exponent)) & mantissa_mask;
|
||||
return @bitCast(T, result | sign_bit);
|
||||
}
|
||||
} else if (shift < exponent_min) {
|
||||
base *= scale_min;
|
||||
shift -= scale_min_expo;
|
||||
if (shift < exponent_min) {
|
||||
base *= scale_min;
|
||||
shift -= scale_min_expo;
|
||||
if (shift < exponent_min) shift = exponent_min;
|
||||
|
||||
// Result needs no shifting
|
||||
return @bitCast(T, repr + (@intCast(TBits, n) << mantissa_bits));
|
||||
} else {
|
||||
if (n <= -exponent) {
|
||||
if (n < -(mantissa_bits + exponent))
|
||||
return @bitCast(T, sign_bit); // Severe underflow. Return +/- 0
|
||||
|
||||
// Result underflowed, we need to shift and round
|
||||
const shift = @intCast(Log2Int(TBits), math.min(-n, -(exponent + n) + 1));
|
||||
const exact_tie: bool = @ctz(repr) == shift - 1;
|
||||
var result = repr & mantissa_mask;
|
||||
|
||||
if (T != f80) // Include integer bit
|
||||
result |= @as(TBits, @boolToInt(exponent > 0)) << fractional_bits;
|
||||
result = @intCast(TBits, (result >> (shift - 1)));
|
||||
|
||||
// Round result, including round-to-even for exact ties
|
||||
result = ((result + 1) >> 1) & ~@as(TBits, @boolToInt(exact_tie));
|
||||
return @bitCast(T, result | sign_bit);
|
||||
}
|
||||
|
||||
// Result is exact, and needs no shifting
|
||||
return @bitCast(T, repr - (@intCast(TBits, -n) << mantissa_bits));
|
||||
}
|
||||
|
||||
return base * @bitCast(T, @intCast(TBits, shift + exponent_bias) << mantissa_bits);
|
||||
}
|
||||
|
||||
test "math.ldexp" {
|
||||
// TODO derive the various constants here with new maths API
|
||||
|
||||
// basic usage
|
||||
try expect(ldexp(@as(f16, 1.5), 4) == 24.0);
|
||||
try expect(ldexp(@as(f32, 1.5), 4) == 24.0);
|
||||
try expect(ldexp(@as(f64, 1.5), 4) == 24.0);
|
||||
try expect(ldexp(@as(f128, 1.5), 4) == 24.0);
|
||||
|
||||
// subnormals
|
||||
try expect(math.isNormal(ldexp(@as(f16, 1.0), -14)));
|
||||
try expect(!math.isNormal(ldexp(@as(f16, 1.0), -15)));
|
||||
try expect(math.isNormal(ldexp(@as(f32, 1.0), -126)));
|
||||
try expect(!math.isNormal(ldexp(@as(f32, 1.0), -127)));
|
||||
try expect(math.isNormal(ldexp(@as(f64, 1.0), -1022)));
|
||||
try expect(!math.isNormal(ldexp(@as(f64, 1.0), -1023)));
|
||||
try expect(math.isNormal(ldexp(@as(f128, 1.0), -16382)));
|
||||
try expect(!math.isNormal(ldexp(@as(f128, 1.0), -16383)));
|
||||
// unreliable due to lack of native f16 support, see talk on PR #8733
|
||||
// try expect(ldexp(@as(f16, 0x1.1FFp-1), -14 - 9) == math.floatTrueMin(f16));
|
||||
try expect(ldexp(@as(f16, 0x1.1FFp14), -14 - 9 - 15) == math.floatTrueMin(f16));
|
||||
try expect(ldexp(@as(f32, 0x1.3FFFFFp-1), -126 - 22) == math.floatTrueMin(f32));
|
||||
try expect(ldexp(@as(f64, 0x1.7FFFFFFFFFFFFp-1), -1022 - 51) == math.floatTrueMin(f64));
|
||||
try expect(ldexp(@as(f80, 0x1.7FFFFFFFFFFFFFFEp-1), -16382 - 62) == math.floatTrueMin(f80));
|
||||
try expect(ldexp(@as(f128, 0x1.7FFFFFFFFFFFFFFFFFFFFFFFFFFFp-1), -16382 - 111) == math.floatTrueMin(f128));
|
||||
|
||||
// float limits
|
||||
try expect(ldexp(math.floatMax(f32), -128 - 149) > 0.0);
|
||||
try expect(ldexp(math.floatMax(f32), -128 - 149 - 1) == 0.0);
|
||||
try expect(!math.isPositiveInf(ldexp(math.floatTrueMin(f16), 15 + 24)));
|
||||
try expect(math.isPositiveInf(ldexp(math.floatTrueMin(f16), 15 + 24 + 1)));
|
||||
try expect(!math.isPositiveInf(ldexp(math.floatTrueMin(f32), 127 + 149)));
|
||||
try expect(math.isPositiveInf(ldexp(math.floatTrueMin(f32), 127 + 149 + 1)));
|
||||
try expect(!math.isPositiveInf(ldexp(math.floatTrueMin(f64), 1023 + 1074)));
|
||||
try expect(math.isPositiveInf(ldexp(math.floatTrueMin(f64), 1023 + 1074 + 1)));
|
||||
try expect(!math.isPositiveInf(ldexp(math.floatTrueMin(f128), 16383 + 16494)));
|
||||
try expect(math.isPositiveInf(ldexp(math.floatTrueMin(f128), 16383 + 16494 + 1)));
|
||||
|
||||
@setEvalBranchQuota(10_000);
|
||||
|
||||
inline for ([_]type{ f16, f32, f64, f80, f128 }) |T| {
|
||||
const fractional_bits = math.floatFractionalBits(T);
|
||||
|
||||
const min_exponent = math.floatExponentMin(T);
|
||||
const max_exponent = math.floatExponentMax(T);
|
||||
const exponent_bias = max_exponent;
|
||||
|
||||
// basic usage
|
||||
try expect(ldexp(@as(T, 1.5), 4) == 24.0);
|
||||
|
||||
// normals -> subnormals
|
||||
try expect(math.isNormal(ldexp(@as(T, 1.0), min_exponent)));
|
||||
try expect(!math.isNormal(ldexp(@as(T, 1.0), min_exponent - 1)));
|
||||
|
||||
// normals -> zero
|
||||
try expect(ldexp(@as(T, 1.0), min_exponent - fractional_bits) > 0.0);
|
||||
try expect(ldexp(@as(T, 1.0), min_exponent - fractional_bits - 1) == 0.0);
|
||||
|
||||
// subnormals -> zero
|
||||
try expect(ldexp(math.floatTrueMin(T), 0) > 0.0);
|
||||
try expect(ldexp(math.floatTrueMin(T), -1) == 0.0);
|
||||
|
||||
// subnormals -> subnormals
|
||||
try expect(ldexp(math.floatTrueMin(T), 3) == math.floatTrueMin(T) * 8);
|
||||
try expect(ldexp(math.floatTrueMin(T) * 8, -2) == math.floatTrueMin(T) * 2);
|
||||
try expect(ldexp(math.floatTrueMin(T) * 8, -3) == math.floatTrueMin(T));
|
||||
|
||||
// subnormals -> normals (+)
|
||||
try expect(ldexp(math.floatTrueMin(T), fractional_bits) == math.floatMin(T));
|
||||
try expect(ldexp(math.floatTrueMin(T), fractional_bits - 1) == math.floatMin(T) * 0.5);
|
||||
|
||||
// subnormals -> normals (-)
|
||||
try expect(ldexp(-math.floatTrueMin(T), fractional_bits) == -math.floatMin(T));
|
||||
try expect(ldexp(-math.floatTrueMin(T), fractional_bits - 1) == -math.floatMin(T) * 0.5);
|
||||
|
||||
// subnormals -> float limits (+inf)
|
||||
try expect(math.isFinite(ldexp(math.floatTrueMin(T), max_exponent + exponent_bias + fractional_bits - 1)));
|
||||
try expect(ldexp(math.floatTrueMin(T), max_exponent + exponent_bias + fractional_bits) == math.inf(T));
|
||||
|
||||
// subnormals -> float limits (-inf)
|
||||
try expect(math.isFinite(ldexp(-math.floatTrueMin(T), max_exponent + exponent_bias + fractional_bits - 1)));
|
||||
try expect(ldexp(-math.floatTrueMin(T), max_exponent + exponent_bias + fractional_bits) == -math.inf(T));
|
||||
|
||||
// infinity -> infinity
|
||||
try expect(ldexp(math.inf(T), math.maxInt(i32)) == math.inf(T));
|
||||
try expect(ldexp(math.inf(T), math.minInt(i32)) == math.inf(T));
|
||||
try expect(ldexp(math.inf(T), max_exponent) == math.inf(T));
|
||||
try expect(ldexp(math.inf(T), min_exponent) == math.inf(T));
|
||||
try expect(ldexp(-math.inf(T), math.maxInt(i32)) == -math.inf(T));
|
||||
try expect(ldexp(-math.inf(T), math.minInt(i32)) == -math.inf(T));
|
||||
|
||||
// extremely large n
|
||||
try expect(ldexp(math.floatMax(T), math.maxInt(i32)) == math.inf(T));
|
||||
try expect(ldexp(math.floatMax(T), -math.maxInt(i32)) == 0.0);
|
||||
try expect(ldexp(math.floatMax(T), math.minInt(i32)) == 0.0);
|
||||
try expect(ldexp(math.floatTrueMin(T), math.maxInt(i32)) == math.inf(T));
|
||||
try expect(ldexp(math.floatTrueMin(T), -math.maxInt(i32)) == 0.0);
|
||||
try expect(ldexp(math.floatTrueMin(T), math.minInt(i32)) == 0.0);
|
||||
}
|
||||
}
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user