diff --git a/lib/std/special/compiler_rt.zig b/lib/std/special/compiler_rt.zig index 0f5d6c4d23..6a44d859c9 100644 --- a/lib/std/special/compiler_rt.zig +++ b/lib/std/special/compiler_rt.zig @@ -253,6 +253,8 @@ comptime { @export(__divsf3, .{ .name = "__divsf3", .linkage = linkage }); const __divdf3 = @import("compiler_rt/divdf3.zig").__divdf3; @export(__divdf3, .{ .name = "__divdf3", .linkage = linkage }); + const __divxf3 = @import("compiler_rt/divxf3.zig").__divxf3; + @export(__divxf3, .{ .name = "__divxf3", .linkage = linkage }); const __divtf3 = @import("compiler_rt/divtf3.zig").__divtf3; @export(__divtf3, .{ .name = "__divtf3", .linkage = linkage }); @@ -725,17 +727,13 @@ comptime { } if (!is_test) { - @export(fmodl, .{ .name = "fmodl", .linkage = linkage }); if (long_double_is_f80) { - @export(fmodl, .{ .name = "fmodx", .linkage = linkage }); - } else { - @export(fmodx, .{ .name = "fmodx", .linkage = linkage }); - } - if (long_double_is_f128) { - @export(fmodl, .{ .name = "fmodq", .linkage = linkage }); - } else { - @export(fmodq, .{ .name = "fmodq", .linkage = linkage }); + @export(fmodx, .{ .name = "fmodl", .linkage = linkage }); + } else if (long_double_is_f128) { + @export(fmodq, .{ .name = "fmodl", .linkage = linkage }); } + @export(fmodx, .{ .name = "fmodx", .linkage = linkage }); + @export(fmodq, .{ .name = "fmodq", .linkage = linkage }); @export(floorf, .{ .name = "floorf", .linkage = linkage }); @export(floor, .{ .name = "floor", .linkage = linkage }); diff --git a/lib/std/special/compiler_rt/divxf3.zig b/lib/std/special/compiler_rt/divxf3.zig new file mode 100644 index 0000000000..5f5ed667ec --- /dev/null +++ b/lib/std/special/compiler_rt/divxf3.zig @@ -0,0 +1,202 @@ +const std = @import("std"); +const builtin = @import("builtin"); +const normalize = @import("divdf3.zig").normalize; +const wideMultiply = @import("divdf3.zig").wideMultiply; + +pub fn __divxf3(a: f80, b: f80) callconv(.C) f80 { + @setRuntimeSafety(builtin.is_test); + const T = f80; + const Z = std.meta.Int(.unsigned, @bitSizeOf(T)); + + const significandBits = std.math.floatMantissaBits(T); + const fractionalBits = std.math.floatFractionalBits(T); + const exponentBits = std.math.floatExponentBits(T); + + const signBit = (@as(Z, 1) << (significandBits + exponentBits)); + const maxExponent = ((1 << exponentBits) - 1); + const exponentBias = (maxExponent >> 1); + + const integerBit = (@as(Z, 1) << fractionalBits); + const quietBit = integerBit >> 1; + const significandMask = (@as(Z, 1) << significandBits) - 1; + + const absMask = signBit - 1; + const qnanRep = @bitCast(Z, std.math.nan(T)) | quietBit; + const infRep = @bitCast(Z, std.math.inf(T)); + + const aExponent = @truncate(u32, (@bitCast(Z, a) >> significandBits) & maxExponent); + const bExponent = @truncate(u32, (@bitCast(Z, b) >> significandBits) & maxExponent); + const quotientSign: Z = (@bitCast(Z, a) ^ @bitCast(Z, b)) & signBit; + + var aSignificand: Z = @bitCast(Z, a) & significandMask; + var bSignificand: Z = @bitCast(Z, b) & significandMask; + var scale: i32 = 0; + + // Detect if a or b is zero, denormal, infinity, or NaN. + if (aExponent -% 1 >= maxExponent - 1 or bExponent -% 1 >= maxExponent - 1) { + const aAbs: Z = @bitCast(Z, a) & absMask; + const bAbs: Z = @bitCast(Z, b) & absMask; + + // NaN / anything = qNaN + if (aAbs > infRep) return @bitCast(T, @bitCast(Z, a) | quietBit); + // anything / NaN = qNaN + if (bAbs > infRep) return @bitCast(T, @bitCast(Z, b) | quietBit); + + if (aAbs == infRep) { + // infinity / infinity = NaN + if (bAbs == infRep) { + return @bitCast(T, qnanRep); + } + // infinity / anything else = +/- infinity + else { + return @bitCast(T, aAbs | quotientSign); + } + } + + // anything else / infinity = +/- 0 + if (bAbs == infRep) return @bitCast(T, quotientSign); + + if (aAbs == 0) { + // zero / zero = NaN + if (bAbs == 0) { + return @bitCast(T, qnanRep); + } + // zero / anything else = +/- zero + else { + return @bitCast(T, quotientSign); + } + } + // anything else / zero = +/- infinity + if (bAbs == 0) return @bitCast(T, infRep | quotientSign); + + // one or both of a or b is denormal, the other (if applicable) is a + // normal number. Renormalize one or both of a and b, and set scale to + // include the necessary exponent adjustment. + if (aAbs < integerBit) scale +%= normalize(T, &aSignificand); + if (bAbs < integerBit) scale -%= normalize(T, &bSignificand); + } + var quotientExponent: i32 = @bitCast(i32, aExponent -% bExponent) +% scale; + + // Align the significand of b as a Q63 fixed-point number in the range + // [1, 2.0) and get a Q64 approximate reciprocal using a small minimax + // polynomial approximation: reciprocal = 3/4 + 1/sqrt(2) - b/2. This + // is accurate to about 3.5 binary digits. + const q63b = @intCast(u64, bSignificand); + var recip64 = @as(u64, 0x7504f333F9DE6484) -% q63b; + // 0x7504f333F9DE6484 / 2^64 + 1 = 3/4 + 1/sqrt(2) + + // Now refine the reciprocal estimate using a Newton-Raphson iteration: + // + // x1 = x0 * (2 - x0 * b) + // + // This doubles the number of correct binary digits in the approximation + // with each iteration. + var correction64: u64 = undefined; + correction64 = @truncate(u64, ~(@as(u128, recip64) *% q63b >> 64) +% 1); + recip64 = @truncate(u64, @as(u128, recip64) *% correction64 >> 63); + correction64 = @truncate(u64, ~(@as(u128, recip64) *% q63b >> 64) +% 1); + recip64 = @truncate(u64, @as(u128, recip64) *% correction64 >> 63); + correction64 = @truncate(u64, ~(@as(u128, recip64) *% q63b >> 64) +% 1); + recip64 = @truncate(u64, @as(u128, recip64) *% correction64 >> 63); + correction64 = @truncate(u64, ~(@as(u128, recip64) *% q63b >> 64) +% 1); + recip64 = @truncate(u64, @as(u128, recip64) *% correction64 >> 63); + correction64 = @truncate(u64, ~(@as(u128, recip64) *% q63b >> 64) +% 1); + recip64 = @truncate(u64, @as(u128, recip64) *% correction64 >> 63); + + // The reciprocal may have overflowed to zero if the upper half of b is + // exactly 1.0. This would sabatoge the full-width final stage of the + // computation that follows, so we adjust the reciprocal down by one bit. + recip64 -%= 1; + + // We need to perform one more iteration to get us to 112 binary digits; + // The last iteration needs to happen with extra precision. + + // NOTE: This operation is equivalent to __multi3, which is not implemented + // in some architechures + var reciprocal: u128 = undefined; + var correction: u128 = undefined; + var dummy: u128 = undefined; + wideMultiply(u128, recip64, q63b, &dummy, &correction); + + correction = -%correction; + + const cHi = @truncate(u64, correction >> 64); + const cLo = @truncate(u64, correction); + + var r64cH: u128 = undefined; + var r64cL: u128 = undefined; + wideMultiply(u128, recip64, cHi, &dummy, &r64cH); + wideMultiply(u128, recip64, cLo, &dummy, &r64cL); + + reciprocal = r64cH + (r64cL >> 64); + + // Adjust the final 128-bit reciprocal estimate downward to ensure that it + // is strictly smaller than the infinitely precise exact reciprocal. Because + // the computation of the Newton-Raphson step is truncating at every step, + // this adjustment is small; most of the work is already done. + reciprocal -%= 2; + + // The numerical reciprocal is accurate to within 2^-112, lies in the + // interval [0.5, 1.0), and is strictly smaller than the true reciprocal + // of b. Multiplying a by this reciprocal thus gives a numerical q = a/b + // in Q127 with the following properties: + // + // 1. q < a/b + // 2. q is in the interval [0.5, 2.0) + // 3. The error in q is bounded away from 2^-63 (actually, we have + // many bits to spare, but this is all we need). + + // We need a 128 x 128 multiply high to compute q. + var quotient128: u128 = undefined; + var quotientLo: u128 = undefined; + wideMultiply(u128, aSignificand << 2, reciprocal, "ient128, "ientLo); + + // Two cases: quotient is in [0.5, 1.0) or quotient is in [1.0, 2.0). + // Right shift the quotient if it falls in the [1,2) range and adjust the + // exponent accordingly. + var quotient: u64 = if (quotient128 < (integerBit << 1)) b: { + quotientExponent -= 1; + break :b @intCast(u64, quotient128); + } else @intCast(u64, quotient128 >> 1); + + // We are going to compute a residual of the form + // + // r = a - q*b + // + // We know from the construction of q that r satisfies: + // + // 0 <= r < ulp(q)*b + // + // If r is greater than 1/2 ulp(q)*b, then q rounds up. Otherwise, we + // already have the correct result. The exact halfway case cannot occur. + var residual: u64 = -%(quotient *% q63b); + + const writtenExponent = quotientExponent + exponentBias; + if (writtenExponent >= maxExponent) { + // If we have overflowed the exponent, return infinity. + return @bitCast(T, infRep | quotientSign); + } else if (writtenExponent < 1) { + if (writtenExponent == 0) { + // Check whether the rounded result is normal. + if (residual > (bSignificand >> 1)) { // round + if (quotient == (integerBit - 1)) // If the rounded result is normal, return it + return @bitCast(T, @bitCast(Z, std.math.floatMin(T)) | quotientSign); + } + } + // Flush denormals to zero. In the future, it would be nice to add + // code to round them correctly. + return @bitCast(T, quotientSign); + } else { + const round = @boolToInt(residual > (bSignificand >> 1)); + // Insert the exponent + var absResult = quotient | (@intCast(Z, writtenExponent) << significandBits); + // Round + absResult +%= round; + // Insert the sign and return + return @bitCast(T, absResult | quotientSign | integerBit); + } +} + +test { + _ = @import("divxf3_test.zig"); +} diff --git a/lib/std/special/compiler_rt/divxf3_test.zig b/lib/std/special/compiler_rt/divxf3_test.zig new file mode 100644 index 0000000000..b79b90c6fb --- /dev/null +++ b/lib/std/special/compiler_rt/divxf3_test.zig @@ -0,0 +1,65 @@ +const std = @import("std"); +const math = std.math; +const testing = std.testing; + +const __divxf3 = @import("divxf3.zig").__divxf3; + +fn compareResult(result: f80, expected: u80) bool { + const rep = @bitCast(u80, result); + + if (rep == expected) return true; + // test other possible NaN representations (signal NaN) + if (math.isNan(result) and math.isNan(@bitCast(f80, expected))) return true; + + return false; +} + +fn expect__divxf3_result(a: f80, b: f80, expected: u80) !void { + const x = __divxf3(a, b); + const ret = compareResult(x, expected); + try testing.expect(ret == true); +} + +fn test__divxf3(a: f80, b: f80) !void { + const integerBit = 1 << math.floatFractionalBits(f80); + const x = __divxf3(a, b); + + // Next float (assuming normal, non-zero result) + const x_plus_eps = @bitCast(f80, (@bitCast(u80, x) + 1) | integerBit); + // Prev float (assuming normal, non-zero result) + const x_minus_eps = @bitCast(f80, (@bitCast(u80, x) - 1) | integerBit); + + // Make sure result is more accurate than the adjacent floats + const err_x = std.math.fabs(@mulAdd(f80, x, b, -a)); + const err_x_plus_eps = std.math.fabs(@mulAdd(f80, x_plus_eps, b, -a)); + const err_x_minus_eps = std.math.fabs(@mulAdd(f80, x_minus_eps, b, -a)); + + try testing.expect(err_x_minus_eps > err_x); + try testing.expect(err_x_plus_eps > err_x); +} + +test "divxf3" { + // qNaN / any = qNaN + try expect__divxf3_result(math.qnan_f80, 0x1.23456789abcdefp+5, 0x7fffC000000000000000); + // NaN / any = NaN + try expect__divxf3_result(math.nan_f80, 0x1.23456789abcdefp+5, 0x7fffC000000000000000); + // inf / any(except inf and nan) = inf + try expect__divxf3_result(math.inf(f80), 0x1.23456789abcdefp+5, 0x7fff8000000000000000); + // inf / inf = nan + try expect__divxf3_result(math.inf(f80), math.inf(f80), 0x7fffC000000000000000); + // inf / nan = nan + try expect__divxf3_result(math.inf(f80), math.nan(f80), 0x7fffC000000000000000); + + try test__divxf3(0x1.a23b45362464523375893ab4cdefp+5, 0x1.eedcbaba3a94546558237654321fp-1); + try test__divxf3(0x1.a2b34c56d745382f9abf2c3dfeffp-50, 0x1.ed2c3ba15935332532287654321fp-9); + try test__divxf3(0x1.2345f6aaaa786555f42432abcdefp+456, 0x1.edacbba9874f765463544dd3621fp+6400); + try test__divxf3(0x1.2d3456f789ba6322bc665544edefp-234, 0x1.eddcdba39f3c8b7a36564354321fp-4455); + try test__divxf3(0x1.2345f6b77b7a8953365433abcdefp+234, 0x1.edcba987d6bb3aa467754354321fp-4055); + try test__divxf3(0x1.a23b45362464523375893ab4cdefp+5, 0x1.a2b34c56d745382f9abf2c3dfeffp-50); + try test__divxf3(0x1.a23b45362464523375893ab4cdefp+5, 0x1.1234567890abcdef987654321123p0); + try test__divxf3(0x1.a23b45362464523375893ab4cdefp+5, 0x1.12394205810257120adae8929f23p+16); + try test__divxf3(0x1.a23b45362464523375893ab4cdefp+5, 0x1.febdcefa1231245f9abf2c3dfeffp-50); + + // Result rounds down to zero + try expect__divxf3_result(6.72420628622418701252535563464350521E-4932, 2.0, 0x0); +}