zig/lib/std/math/float.zig
mlugg f26dda2117 all: migrate code to new cast builtin syntax
Most of this migration was performed automatically with `zig fmt`. There
were a few exceptions which I had to manually fix:

* `@alignCast` and `@addrSpaceCast` cannot be automatically rewritten
* `@truncate`'s fixup is incorrect for vectors
* Test cases are not formatted, and their error locations change
2023-06-24 16:56:39 -07:00

111 lines
4.0 KiB
Zig

const std = @import("../std.zig");
const assert = std.debug.assert;
const expect = std.testing.expect;
/// Creates a raw "1.0" mantissa for floating point type T. Used to dedupe f80 logic.
inline fn mantissaOne(comptime T: type) comptime_int {
return if (@typeInfo(T).Float.bits == 80) 1 << floatFractionalBits(T) else 0;
}
/// Creates floating point type T from an unbiased exponent and raw mantissa.
inline fn reconstructFloat(comptime T: type, comptime exponent: comptime_int, comptime mantissa: comptime_int) T {
const TBits = @Type(.{ .Int = .{ .signedness = .unsigned, .bits = @bitSizeOf(T) } });
const biased_exponent = @as(TBits, exponent + floatExponentMax(T));
return @as(T, @bitCast((biased_exponent << floatMantissaBits(T)) | @as(TBits, mantissa)));
}
/// Returns the number of bits in the exponent of floating point type T.
pub inline fn floatExponentBits(comptime T: type) comptime_int {
comptime assert(@typeInfo(T) == .Float);
return switch (@typeInfo(T).Float.bits) {
16 => 5,
32 => 8,
64 => 11,
80 => 15,
128 => 15,
else => @compileError("unknown floating point type " ++ @typeName(T)),
};
}
/// Returns the number of bits in the mantissa of floating point type T.
pub inline fn floatMantissaBits(comptime T: type) comptime_int {
comptime assert(@typeInfo(T) == .Float);
return switch (@typeInfo(T).Float.bits) {
16 => 10,
32 => 23,
64 => 52,
80 => 64,
128 => 112,
else => @compileError("unknown floating point type " ++ @typeName(T)),
};
}
/// Returns the number of fractional bits in the mantissa of floating point type T.
pub inline fn floatFractionalBits(comptime T: type) comptime_int {
comptime assert(@typeInfo(T) == .Float);
// standard IEEE floats have an implicit 0.m or 1.m integer part
// f80 is special and has an explicitly stored bit in the MSB
// this function corresponds to `MANT_DIG - 1' from C
return switch (@typeInfo(T).Float.bits) {
16 => 10,
32 => 23,
64 => 52,
80 => 63,
128 => 112,
else => @compileError("unknown floating point type " ++ @typeName(T)),
};
}
/// Returns the minimum exponent that can represent
/// a normalised value in floating point type T.
pub inline fn floatExponentMin(comptime T: type) comptime_int {
return -floatExponentMax(T) + 1;
}
/// Returns the maximum exponent that can represent
/// a normalised value in floating point type T.
pub inline fn floatExponentMax(comptime T: type) comptime_int {
return (1 << (floatExponentBits(T) - 1)) - 1;
}
/// Returns the smallest subnormal number representable in floating point type T.
pub inline fn floatTrueMin(comptime T: type) T {
return reconstructFloat(T, floatExponentMin(T) - 1, 1);
}
/// Returns the smallest normal number representable in floating point type T.
pub inline fn floatMin(comptime T: type) T {
return reconstructFloat(T, floatExponentMin(T), mantissaOne(T));
}
/// Returns the largest normal number representable in floating point type T.
pub inline fn floatMax(comptime T: type) T {
const all1s_mantissa = (1 << floatMantissaBits(T)) - 1;
return reconstructFloat(T, floatExponentMax(T), all1s_mantissa);
}
/// Returns the machine epsilon of floating point type T.
pub inline fn floatEps(comptime T: type) T {
return reconstructFloat(T, -floatFractionalBits(T), mantissaOne(T));
}
/// Returns the value inf for floating point type T.
pub inline fn inf(comptime T: type) T {
return reconstructFloat(T, floatExponentMax(T) + 1, mantissaOne(T));
}
test "float bits" {
inline for ([_]type{ f16, f32, f64, f80, f128, c_longdouble }) |T| {
// (1 +) for the sign bit, since it is separate from the other bits
const size = 1 + floatExponentBits(T) + floatMantissaBits(T);
try expect(@bitSizeOf(T) == size);
// for machine epsilon, assert expmin <= -prec <= expmax
try expect(floatExponentMin(T) <= -floatFractionalBits(T));
try expect(-floatFractionalBits(T) <= floatExponentMax(T));
}
}