diff --git a/lib/std/math.zig b/lib/std/math.zig index 7111cd6bd5..353b3539b9 100644 --- a/lib/std/math.zig +++ b/lib/std/math.zig @@ -36,38 +36,44 @@ pub const sqrt2 = 1.414213562373095048801688724209698079; /// 1/sqrt(2) pub const sqrt1_2 = 0.707106781186547524400844362104849039; -pub const f128_true_min = @bitCast(f128, @as(u128, 0x00000000000000000000000000000001)); -pub const f128_min = @bitCast(f128, @as(u128, 0x00010000000000000000000000000000)); -pub const f128_max = @bitCast(f128, @as(u128, 0x7FFEFFFFFFFFFFFFFFFFFFFFFFFFFFFF)); -pub const f128_epsilon = @bitCast(f128, @as(u128, 0x3F8F0000000000000000000000000000)); -pub const f128_toint = 1.0 / f128_epsilon; +pub const floatExponentBits = @import("math/float.zig").floatExponentBits; +pub const floatMantissaBits = @import("math/float.zig").floatMantissaBits; +pub const floatMantissaDigits = @import("math/float.zig").floatMantissaDigits; +pub const floatExponentMin = @import("math/float.zig").floatExponentMin; +pub const floatExponentMax = @import("math/float.zig").floatExponentMax; +pub const floatTrueMin = @import("math/float.zig").floatTrueMin; +pub const floatMin = @import("math/float.zig").floatMin; +pub const floatMax = @import("math/float.zig").floatMax; +pub const floatEps = @import("math/float.zig").floatEps; -// float.h details -pub const f80_true_min = make_f80(.{ .fraction = 1, .exp = 0 }); -pub const f80_min = make_f80(.{ .fraction = 0x8000000000000000, .exp = 1 }); -pub const f80_max = make_f80(.{ .fraction = 0xFFFFFFFFFFFFFFFF, .exp = 0x7FFE }); -pub const f80_epsilon = make_f80(.{ .fraction = 0x8000000000000000, .exp = 0x3FC0 }); -pub const f80_toint = 1.0 / f80_epsilon; - -pub const f64_true_min = 4.94065645841246544177e-324; -pub const f64_min = 2.2250738585072014e-308; -pub const f64_max = 1.79769313486231570815e+308; -pub const f64_epsilon = 2.22044604925031308085e-16; -pub const f64_toint = 1.0 / f64_epsilon; - -pub const f32_true_min = 1.40129846432481707092e-45; -pub const f32_min = 1.17549435082228750797e-38; -pub const f32_max = 3.40282346638528859812e+38; -pub const f32_epsilon = 1.1920928955078125e-07; -pub const f32_toint = 1.0 / f32_epsilon; - -pub const f16_true_min = 0.000000059604644775390625; // 2**-24 -pub const f16_min = 0.00006103515625; // 2**-14 -pub const f16_max = 65504; -pub const f16_epsilon = 0.0009765625; // 2**-10 -pub const f16_toint = 1.0 / f16_epsilon; - -pub const epsilon = @import("math/epsilon.zig").epsilon; +// TODO Replace with @compileError("deprecated for foobar") after 0.10.0 is released. +pub const f16_true_min: comptime_float = floatTrueMin(f16); // prev: 0.000000059604644775390625 +pub const f32_true_min: comptime_float = floatTrueMin(f32); // prev: 1.40129846432481707092e-45 +pub const f64_true_min: comptime_float = floatTrueMin(f64); // prev: 4.94065645841246544177e-324 +pub const f80_true_min = floatTrueMin(f80); // prev: make_f80(.{ .fraction = 1, .exp = 0 }) +pub const f128_true_min = floatTrueMin(f128); // prev: @bitCast(f128, @as(u128, 0x00000000000000000000000000000001)) +pub const f16_min: comptime_float = floatMin(f16); // prev: 0.00006103515625 +pub const f32_min: comptime_float = floatMin(f32); // prev: 1.17549435082228750797e-38 +pub const f64_min: comptime_float = floatMin(f64); // prev: 2.2250738585072014e-308 +pub const f80_min = floatMin(f80); // prev: make_f80(.{ .fraction = 0x8000000000000000, .exp = 1 }) +pub const f128_min = floatMin(f128); // prev: @bitCast(f128, @as(u128, 0x00010000000000000000000000000000)) +pub const f16_max: comptime_float = floatMax(f16); // prev: 65504 +pub const f32_max: comptime_float = floatMax(f32); // prev: 3.40282346638528859812e+38 +pub const f64_max: comptime_float = floatMax(f64); // prev: 1.79769313486231570815e+308 +pub const f80_max = floatMax(f80); // prev: make_f80(.{ .fraction = 0xFFFFFFFFFFFFFFFF, .exp = 0x7FFE }) +pub const f128_max = floatMax(f128); // prev: @bitCast(f128, @as(u128, 0x7FFEFFFFFFFFFFFFFFFFFFFFFFFFFFFF)) +pub const f16_epsilon: comptime_float = floatEps(f16); // prev: 0.0009765625 +pub const f32_epsilon: comptime_float = floatEps(f32); // prev: 1.1920928955078125e-07 +pub const f64_epsilon: comptime_float = floatEps(f64); // prev: 2.22044604925031308085e-16 +pub const f80_epsilon = floatEps(f80); // prev: make_f80(.{ .fraction = 0x8000000000000000, .exp = 0x3FC0 }) +pub const f128_epsilon = floatEps(f128); // prev: @bitCast(f128, @as(u128, 0x3F8F0000000000000000000000000000)) +pub const f16_toint: comptime_float = 1.0 / f16_epsilon; // same as before +pub const f32_toint: comptime_float = 1.0 / f32_epsilon; // same as before +pub const f64_toint: comptime_float = 1.0 / f64_epsilon; // same as before +pub const f80_toint = 1.0 / f80_epsilon; // same as before +pub const f128_toint = 1.0 / f128_epsilon; // same as before +pub const epsilon = floatEps; +// End of "soft deprecated" section pub const nan_u16 = @as(u16, 0x7C01); pub const nan_f16 = @bitCast(f16, nan_u16); @@ -294,36 +300,6 @@ test { std.testing.refAllDecls(@This()); } -/// Returns the number of bits in the mantissa of floating point type -/// T. -pub fn floatMantissaBits(comptime T: type) comptime_int { - assert(@typeInfo(T) == .Float); - - return switch (@typeInfo(T).Float.bits) { - 16 => 10, - 32 => 23, - 64 => 52, - 80 => 64, - 128 => 112, - else => @compileError("unknown floating point type " ++ @typeName(T)), - }; -} - -/// Returns the number of bits in the exponent of floating point type -/// T. -pub fn floatExponentBits(comptime T: type) comptime_int { - assert(@typeInfo(T) == .Float); - - return switch (@typeInfo(T).Float.bits) { - 16 => 5, - 32 => 8, - 64 => 11, - 80 => 15, - 128 => 15, - else => @compileError("unknown floating point type " ++ @typeName(T)), - }; -} - /// Given two types, returns the smallest one which is capable of holding the /// full range of the minimum value. pub fn Min(comptime A: type, comptime B: type) type { diff --git a/lib/std/math/epsilon.zig b/lib/std/math/epsilon.zig deleted file mode 100644 index 7f78be1aab..0000000000 --- a/lib/std/math/epsilon.zig +++ /dev/null @@ -1,15 +0,0 @@ -const math = @import("../math.zig"); - -/// Returns the machine epsilon for type T. -/// This is the smallest value of type T that satisfies the inequality 1.0 + -/// epsilon != 1.0. -pub fn epsilon(comptime T: type) T { - return switch (T) { - f16 => math.f16_epsilon, - f32 => math.f32_epsilon, - f64 => math.f64_epsilon, - f80 => math.f80_epsilon, - f128 => math.f128_epsilon, - else => @compileError("epsilon not implemented for " ++ @typeName(T)), - }; -} diff --git a/lib/std/math/float.zig b/lib/std/math/float.zig new file mode 100644 index 0000000000..9858aeb161 --- /dev/null +++ b/lib/std/math/float.zig @@ -0,0 +1,105 @@ +const std = @import("../std.zig"); +const assert = std.debug.assert; +const expect = std.testing.expect; + +/// Creates a raw "1.0" mantissa for floating point type T. Used to dedupe f80 logic. +fn mantissaOne(comptime T: type) comptime_int { + return if (floatMantissaDigits(T) == 64) 1 << 63 else 0; +} + +/// Creates floating point type T from an unbiased exponent and raw mantissa. +fn reconstructFloat(comptime T: type, exponent: comptime_int, mantissa: comptime_int) T { + const TBits = std.meta.Int(.unsigned, @bitSizeOf(T)); + const biased_exponent = @as(TBits, exponent + floatExponentMax(T)); + return @bitCast(T, (biased_exponent << floatMantissaBits(T)) | @as(TBits, mantissa)); +} + +/// Returns the number of bits in the exponent of floating point type T. +pub fn floatExponentBits(comptime T: type) comptime_int { + assert(@typeInfo(T) == .Float); + + return switch (@typeInfo(T).Float.bits) { + 16 => 5, + 32 => 8, + 64 => 11, + 80 => 15, + 128 => 15, + else => @compileError("unknown floating point type " ++ @typeName(T)), + }; +} + +/// Returns the number of bits in the mantissa of floating point type T. +pub fn floatMantissaBits(comptime T: type) comptime_int { + assert(@typeInfo(T) == .Float); + + return switch (@typeInfo(T).Float.bits) { + 16 => 10, + 32 => 23, + 64 => 52, + 80 => 64, + 128 => 112, + else => @compileError("unknown floating point type " ++ @typeName(T)), + }; +} + +/// Returns the number of binary digits in the mantissa of floating point type T. +pub fn floatMantissaDigits(comptime T: type) comptime_int { + assert(@typeInfo(T) == .Float); + + // standard IEEE floats have an implicit 0.m or 1.m integer part + // f80 is special and has an explicitly stored bit in the MSB + // this function corresponds to `MANT_DIG' constants from C + return switch (@typeInfo(T).Float.bits) { + 16 => 11, + 32 => 24, + 64 => 53, + 80 => 64, + 128 => 113, + else => @compileError("unknown floating point type " ++ @typeName(T)), + }; +} + +/// Returns the minimum exponent that can represent +/// a normalised value in floating point type T. +pub fn floatExponentMin(comptime T: type) comptime_int { + return -floatExponentMax(T) + 1; +} + +/// Returns the maximum exponent that can represent +/// a normalised value in floating point type T. +pub fn floatExponentMax(comptime T: type) comptime_int { + return (1 << (floatExponentBits(T) - 1)) - 1; +} + +/// Returns the smallest subnormal number representable in floating point type T. +pub fn floatTrueMin(comptime T: type) T { + return reconstructFloat(T, floatExponentMin(T) - 1, 1); +} + +/// Returns the smallest normal number representable in floating point type T. +pub fn floatMin(comptime T: type) T { + return reconstructFloat(T, floatExponentMin(T), mantissaOne(T)); +} + +/// Returns the largest normal number representable in floating point type T. +pub fn floatMax(comptime T: type) T { + const all1s_mantissa = (1 << floatMantissaBits(T)) - 1; + return reconstructFloat(T, floatExponentMax(T), all1s_mantissa); +} + +/// Returns the machine epsilon of floating point type T. +pub fn floatEps(comptime T: type) T { + return reconstructFloat(T, -(floatMantissaDigits(T) - 1), mantissaOne(T)); +} + +test "std.math.float" { + inline for ([_]type{ f16, f32, f64, f80, f128, c_longdouble }) |T| { + // (1 +) for the sign bit, since it is separate from the other bits + const size = 1 + floatExponentBits(T) + floatMantissaBits(T); + try expect(@bitSizeOf(T) == size); + + // for machine epsilon, assert expmin <= -prec <= expmax + try expect(floatExponentMin(T) <= -(floatMantissaDigits(T) - 1)); + try expect(-(floatMantissaDigits(T) - 1) <= floatExponentMax(T)); + } +}