From b2950866b18916fe5e6e458b62ec294244fa2c2f Mon Sep 17 00:00:00 2001 From: Cody Tapscott Date: Mon, 18 Apr 2022 20:23:02 -0700 Subject: [PATCH] compiler_rt: Fix rounding/NaN handling for f80 add/sub There were a few minor bugs in the rounding behavior and Inf/NaN handling for the f80 __addxf3 and __subtf3 functions. This change updates the original generic implementation to correctly handle f80 floats, including the explicit integer bit. --- lib/std/special/compiler_rt/addXf3.zig | 223 +++----------------- lib/std/special/compiler_rt/addXf3_test.zig | 78 ++++++- 2 files changed, 108 insertions(+), 193 deletions(-) diff --git a/lib/std/special/compiler_rt/addXf3.zig b/lib/std/special/compiler_rt/addXf3.zig index 13758afce7..c53d60600d 100644 --- a/lib/std/special/compiler_rt/addXf3.zig +++ b/lib/std/special/compiler_rt/addXf3.zig @@ -3,6 +3,7 @@ // https://github.com/llvm/llvm-project/blob/02d85149a05cb1f6dc49f0ba7a2ceca53718ae17/compiler-rt/lib/builtins/fp_add_impl.inc const std = @import("std"); +const math = std.math; const builtin = @import("builtin"); const compiler_rt = @import("../compiler_rt.zig"); @@ -14,6 +15,16 @@ pub fn __adddf3(a: f64, b: f64) callconv(.C) f64 { return addXf3(f64, a, b); } +pub fn __addxf3(a: f80, b: f80) callconv(.C) f80 { + return addXf3(f80, a, b); +} + +pub fn __subxf3(a: f80, b: f80) callconv(.C) f80 { + var b_rep = std.math.break_f80(b); + b_rep.exp ^= 0x8000; + return __addxf3(a, std.math.make_f80(b_rep)); +} + pub fn __addtf3(a: f128, b: f128) callconv(.C) f128 { return addXf3(f128, a, b); } @@ -58,10 +69,10 @@ fn normalize(comptime T: type, significand: *std.meta.Int(.unsigned, @typeInfo(T const bits = @typeInfo(T).Float.bits; const Z = std.meta.Int(.unsigned, bits); const S = std.meta.Int(.unsigned, bits - @clz(Z, @as(Z, bits) - 1)); - const significandBits = std.math.floatMantissaBits(T); - const implicitBit = @as(Z, 1) << significandBits; + const fractionalBits = math.floatFractionalBits(T); + const integerBit = @as(Z, 1) << fractionalBits; - const shift = @clz(std.meta.Int(.unsigned, bits), significand.*) - @clz(Z, implicitBit); + const shift = @clz(std.meta.Int(.unsigned, bits), significand.*) - @clz(Z, integerBit); significand.* <<= @intCast(S, shift); return 1 - shift; } @@ -73,26 +84,26 @@ fn addXf3(comptime T: type, a: T, b: T) T { const S = std.meta.Int(.unsigned, bits - @clz(Z, @as(Z, bits) - 1)); const typeWidth = bits; - const significandBits = std.math.floatMantissaBits(T); - const exponentBits = std.math.floatExponentBits(T); + const significandBits = math.floatMantissaBits(T); + const fractionalBits = math.floatFractionalBits(T); + const exponentBits = math.floatExponentBits(T); const signBit = (@as(Z, 1) << (significandBits + exponentBits)); const maxExponent = ((1 << exponentBits) - 1); - const implicitBit = (@as(Z, 1) << significandBits); - const quietBit = implicitBit >> 1; - const significandMask = implicitBit - 1; + const integerBit = (@as(Z, 1) << fractionalBits); + const quietBit = integerBit >> 1; + const significandMask = (@as(Z, 1) << significandBits) - 1; const absMask = signBit - 1; - const exponentMask = absMask ^ significandMask; - const qnanRep = exponentMask | quietBit; + const qnanRep = @bitCast(Z, math.nan(T)) | quietBit; var aRep = @bitCast(Z, a); var bRep = @bitCast(Z, b); const aAbs = aRep & absMask; const bAbs = bRep & absMask; - const infRep = @bitCast(Z, std.math.inf(T)); + const infRep = @bitCast(Z, math.inf(T)); // Detect if a or b is zero, infinity, or NaN. if (aAbs -% @as(Z, 1) >= infRep - @as(Z, 1) or @@ -157,12 +168,12 @@ fn addXf3(comptime T: type, a: T, b: T) T { // implicit significand bit. (If we fell through from the denormal path it // was already set by normalize( ), but setting it twice won't hurt // anything.) - aSignificand = (aSignificand | implicitBit) << 3; - bSignificand = (bSignificand | implicitBit) << 3; + aSignificand = (aSignificand | integerBit) << 3; + bSignificand = (bSignificand | integerBit) << 3; // Shift the significand of b by the difference in exponents, with a sticky // bottom bit to get rounding correct. - const @"align" = @intCast(Z, aExponent - bExponent); + const @"align" = @intCast(u32, aExponent - bExponent); if (@"align" != 0) { if (@"align" < typeWidth) { const sticky = if (bSignificand << @intCast(S, typeWidth - @"align") != 0) @as(Z, 1) else 0; @@ -178,8 +189,8 @@ fn addXf3(comptime T: type, a: T, b: T) T { // If partial cancellation occured, we need to left-shift the result // and adjust the exponent: - if (aSignificand < implicitBit << 3) { - const shift = @intCast(i32, @clz(Z, aSignificand)) - @intCast(i32, @clz(std.meta.Int(.unsigned, bits), implicitBit << 3)); + if (aSignificand < integerBit << 3) { + const shift = @intCast(i32, @clz(Z, aSignificand)) - @intCast(i32, @clz(std.meta.Int(.unsigned, bits), integerBit << 3)); aSignificand <<= @intCast(S, shift); aExponent -= shift; } @@ -188,7 +199,7 @@ fn addXf3(comptime T: type, a: T, b: T) T { // If the addition carried up, we need to right-shift the result and // adjust the exponent: - if (aSignificand & (implicitBit << 4) != 0) { + if (aSignificand & (integerBit << 4) != 0) { const sticky = aSignificand & 1; aSignificand = aSignificand >> 1 | sticky; aExponent += 1; @@ -210,7 +221,7 @@ fn addXf3(comptime T: type, a: T, b: T) T { // Low three bits are round, guard, and sticky. const roundGuardSticky = aSignificand & 0x7; - // Shift the significand into place, and mask off the implicit bit. + // Shift the significand into place, and mask off the integer bit, if it's implicit. var result = (aSignificand >> 3) & significandMask; // Insert the exponent and sign. @@ -222,180 +233,14 @@ fn addXf3(comptime T: type, a: T, b: T) T { if (roundGuardSticky > 0x4) result += 1; if (roundGuardSticky == 0x4) result += result & 1; + // Restore any explicit integer bit, if it was rounded off + if (significandBits != fractionalBits) { + if ((result >> significandBits) != 0) result |= integerBit; + } + return @bitCast(T, result); } -fn normalize_f80(exp: *i32, significand: *u80) void { - const shift = @clz(u64, @truncate(u64, significand.*)); - significand.* = (significand.* << shift); - exp.* += -@as(i8, shift); -} - -pub fn __addxf3(a: f80, b: f80) callconv(.C) f80 { - var a_rep = std.math.break_f80(a); - var b_rep = std.math.break_f80(b); - var a_exp: i32 = a_rep.exp & 0x7FFF; - var b_exp: i32 = b_rep.exp & 0x7FFF; - - const significand_bits = std.math.floatMantissaBits(f80); - const int_bit = 0x8000000000000000; - const significand_mask = 0x7FFFFFFFFFFFFFFF; - const qnan_bit = 0xC000000000000000; - const max_exp = 0x7FFF; - const sign_bit = 0x8000; - - // Detect if a or b is infinity, or NaN. - if (a_exp == max_exp) { - if (a_rep.fraction ^ int_bit == 0) { - if (b_exp == max_exp and (b_rep.fraction ^ int_bit == 0)) { - // +/-infinity + -/+infinity = qNaN - return std.math.qnan_f80; - } - // +/-infinity + anything = +/- infinity - return a; - } else { - std.debug.assert(a_rep.fraction & significand_mask != 0); - // NaN + anything = qNaN - a_rep.fraction |= qnan_bit; - return std.math.make_f80(a_rep); - } - } - if (b_exp == max_exp) { - if (b_rep.fraction ^ int_bit == 0) { - // anything + +/-infinity = +/-infinity - return b; - } else { - std.debug.assert(b_rep.fraction & significand_mask != 0); - // anything + NaN = qNaN - b_rep.fraction |= qnan_bit; - return std.math.make_f80(b_rep); - } - } - - const a_zero = (a_rep.fraction | @bitCast(u32, a_exp)) == 0; - const b_zero = (b_rep.fraction | @bitCast(u32, b_exp)) == 0; - if (a_zero) { - // zero + anything = anything - if (b_zero) { - // but we need to get the sign right for zero + zero - a_rep.exp &= b_rep.exp; - return std.math.make_f80(a_rep); - } else { - return b; - } - } else if (b_zero) { - // anything + zero = anything - return a; - } - - var a_int: u80 = a_rep.fraction | (@as(u80, a_rep.exp & max_exp) << significand_bits); - var b_int: u80 = b_rep.fraction | (@as(u80, b_rep.exp & max_exp) << significand_bits); - - // Swap a and b if necessary so that a has the larger absolute value. - if (b_int > a_int) { - const temp = a_rep; - a_rep = b_rep; - b_rep = temp; - } - - // Extract the exponent and significand from the (possibly swapped) a and b. - a_exp = a_rep.exp & max_exp; - b_exp = b_rep.exp & max_exp; - a_int = a_rep.fraction; - b_int = b_rep.fraction; - - // Normalize any denormals, and adjust the exponent accordingly. - normalize_f80(&a_exp, &a_int); - normalize_f80(&b_exp, &b_int); - - // The sign of the result is the sign of the larger operand, a. If they - // have opposite signs, we are performing a subtraction; otherwise addition. - const result_sign = a_rep.exp & sign_bit; - const subtraction = (a_rep.exp ^ b_rep.exp) & sign_bit != 0; - - // Shift the significands to give us round, guard and sticky, and or in the - // implicit significand bit. (If we fell through from the denormal path it - // was already set by normalize( ), but setting it twice won't hurt - // anything.) - a_int = a_int << 3; - b_int = b_int << 3; - - // Shift the significand of b by the difference in exponents, with a sticky - // bottom bit to get rounding correct. - const @"align" = @intCast(u80, a_exp - b_exp); - if (@"align" != 0) { - if (@"align" < 80) { - const sticky = if (b_int << @intCast(u7, 80 - @"align") != 0) @as(u80, 1) else 0; - b_int = (b_int >> @truncate(u7, @"align")) | sticky; - } else { - b_int = 1; // sticky; b is known to be non-zero. - } - } - if (subtraction) { - a_int -= b_int; - // If a == -b, return +zero. - if (a_int == 0) return 0.0; - - // If partial cancellation occurred, we need to left-shift the result - // and adjust the exponent: - if (a_int < int_bit << 3) { - const shift = @intCast(i32, @clz(u80, a_int)) - @intCast(i32, @clz(u80, @as(u80, int_bit) << 3)); - a_int <<= @intCast(u7, shift); - a_exp -= shift; - } - } else { // addition - a_int += b_int; - - // If the addition carried up, we need to right-shift the result and - // adjust the exponent: - if (a_int & (int_bit << 4) != 0) { - const sticky = a_int & 1; - a_int = a_int >> 1 | sticky; - a_exp += 1; - } - } - - // If we have overflowed the type, return +/- infinity: - if (a_exp >= max_exp) { - a_rep.exp = max_exp | result_sign; - a_rep.fraction = int_bit; // integer bit is set for +/-inf - return std.math.make_f80(a_rep); - } - - if (a_exp <= 0) { - // Result is denormal before rounding; the exponent is zero and we - // need to shift the significand. - const shift = @intCast(u80, 1 - a_exp); - const sticky = if (a_int << @intCast(u7, 80 - shift) != 0) @as(u1, 1) else 0; - a_int = a_int >> @intCast(u7, shift | sticky); - a_exp = 0; - } - - // Low three bits are round, guard, and sticky. - const round_guard_sticky = @truncate(u3, a_int); - - // Shift the significand into place. - a_int = @truncate(u64, a_int >> 3); - - // // Insert the exponent and sign. - a_int |= (@intCast(u80, a_exp) | result_sign) << significand_bits; - - // Final rounding. The result may overflow to infinity, but that is the - // correct result in that case. - if (round_guard_sticky > 0x4) a_int += 1; - if (round_guard_sticky == 0x4) a_int += a_int & 1; - - a_rep.fraction = @truncate(u64, a_int); - a_rep.exp = @truncate(u16, a_int >> significand_bits); - return std.math.make_f80(a_rep); -} - -pub fn __subxf3(a: f80, b: f80) callconv(.C) f80 { - var b_rep = std.math.break_f80(b); - b_rep.exp ^= 0x8000; - return __addxf3(a, std.math.make_f80(b_rep)); -} - test { _ = @import("addXf3_test.zig"); } diff --git a/lib/std/special/compiler_rt/addXf3_test.zig b/lib/std/special/compiler_rt/addXf3_test.zig index 70eb203cee..a4a18b41e0 100644 --- a/lib/std/special/compiler_rt/addXf3_test.zig +++ b/lib/std/special/compiler_rt/addXf3_test.zig @@ -3,8 +3,9 @@ // https://github.com/llvm/llvm-project/blob/02d85149a05cb1f6dc49f0ba7a2ceca53718ae17/compiler-rt/test/builtins/Unit/addtf3_test.c // https://github.com/llvm/llvm-project/blob/02d85149a05cb1f6dc49f0ba7a2ceca53718ae17/compiler-rt/test/builtins/Unit/subtf3_test.c +const std = @import("std"); +const math = std.math; const qnan128 = @bitCast(f128, @as(u128, 0x7fff800000000000) << 64); -const inf128 = @bitCast(f128, @as(u128, 0x7fff000000000000) << 64); const __addtf3 = @import("addXf3.zig").__addtf3; @@ -37,13 +38,14 @@ test "addtf3" { try test__addtf3(@bitCast(f128, (@as(u128, 0x7fff000000000000) << 64) | @as(u128, 0x800030000000)), 0x1.23456789abcdefp+5, 0x7fff800000000000, 0x0); // inf + inf = inf - try test__addtf3(inf128, inf128, 0x7fff000000000000, 0x0); + try test__addtf3(math.inf(f128), math.inf(f128), 0x7fff000000000000, 0x0); // inf + any = inf - try test__addtf3(inf128, 0x1.2335653452436234723489432abcdefp+5, 0x7fff000000000000, 0x0); + try test__addtf3(math.inf(f128), 0x1.2335653452436234723489432abcdefp+5, 0x7fff000000000000, 0x0); // any + any try test__addtf3(0x1.23456734245345543849abcdefp+5, 0x1.edcba52449872455634654321fp-1, 0x40042afc95c8b579, 0x61e58dd6c51eb77c); + try test__addtf3(0x1.edcba52449872455634654321fp-1, 0x1.23456734245345543849abcdefp+5, 0x40042afc95c8b579, 0x61e58dd6c51eb77c); } const __subtf3 = @import("addXf3.zig").__subtf3; @@ -78,8 +80,76 @@ test "subtf3" { try test__subtf3(@bitCast(f128, (@as(u128, 0x7fff000000000000) << 64) | @as(u128, 0x800030000000)), 0x1.23456789abcdefp+5, 0x7fff800000000000, 0x0); // inf - any = inf - try test__subtf3(inf128, 0x1.23456789abcdefp+5, 0x7fff000000000000, 0x0); + try test__subtf3(math.inf(f128), 0x1.23456789abcdefp+5, 0x7fff000000000000, 0x0); // any + any try test__subtf3(0x1.234567829a3bcdef5678ade36734p+5, 0x1.ee9d7c52354a6936ab8d7654321fp-1, 0x40041b8af1915166, 0xa44a7bca780a166c); + try test__subtf3(0x1.ee9d7c52354a6936ab8d7654321fp-1, 0x1.234567829a3bcdef5678ade36734p+5, 0xc0041b8af1915166, 0xa44a7bca780a166c); +} + +const __addxf3 = @import("addXf3.zig").__addxf3; +const qnan80 = @bitCast(f80, @bitCast(u80, math.nan(f80)) | (1 << (math.floatFractionalBits(f80) - 1))); + +fn test__addxf3(a: f80, b: f80, expected: u80) !void { + const x = __addxf3(a, b); + const rep = @bitCast(u80, x); + + if (rep == expected) + return; + + if (math.isNan(@bitCast(f80, expected)) and math.isNan(x)) + return; // We don't currently test NaN payload propagation + + return error.TestFailed; +} + +test "addxf3" { + // NaN + any = NaN + try test__addxf3(qnan80, 0x1.23456789abcdefp+5, @bitCast(u80, qnan80)); + try test__addxf3(@bitCast(f80, @as(u80, 0x7fff_8000_8000_3000_0000)), 0x1.23456789abcdefp+5, @bitCast(u80, qnan80)); + + // any + NaN = NaN + try test__addxf3(0x1.23456789abcdefp+5, qnan80, @bitCast(u80, qnan80)); + try test__addxf3(0x1.23456789abcdefp+5, @bitCast(f80, @as(u80, 0x7fff_8000_8000_3000_0000)), @bitCast(u80, qnan80)); + + // NaN + inf = NaN + try test__addxf3(qnan80, math.inf(f80), @bitCast(u80, qnan80)); + + // inf + NaN = NaN + try test__addxf3(math.inf(f80), qnan80, @bitCast(u80, qnan80)); + + // inf + inf = inf + try test__addxf3(math.inf(f80), math.inf(f80), @bitCast(u80, math.inf(f80))); + + // inf + -inf = NaN + try test__addxf3(math.inf(f80), -math.inf(f80), @bitCast(u80, qnan80)); + + // -inf + inf = NaN + try test__addxf3(-math.inf(f80), math.inf(f80), @bitCast(u80, qnan80)); + + // inf + any = inf + try test__addxf3(math.inf(f80), 0x1.2335653452436234723489432abcdefp+5, @bitCast(u80, math.inf(f80))); + + // any + inf = inf + try test__addxf3(0x1.2335653452436234723489432abcdefp+5, math.inf(f80), @bitCast(u80, math.inf(f80))); + + // any + any + try test__addxf3(0x1.23456789abcdp+5, 0x1.dcba987654321p+5, 0x4005_BFFFFFFFFFFFC400); + try test__addxf3(0x1.23456734245345543849abcdefp+5, 0x1.edcba52449872455634654321fp-1, 0x4004_957E_4AE4_5ABC_B0F3); + try test__addxf3(0x1.ffff_ffff_ffff_fffcp+0, 0x1.0p-63, 0x3FFF_FFFFFFFFFFFFFFFF); // exact + try test__addxf3(0x1.ffff_ffff_ffff_fffep+0, 0x0.0p0, 0x3FFF_FFFFFFFFFFFFFFFF); // exact + try test__addxf3(0x1.ffff_ffff_ffff_fffcp+0, 0x1.4p-63, 0x3FFF_FFFFFFFFFFFFFFFF); // round down + try test__addxf3(0x1.ffff_ffff_ffff_fffcp+0, 0x1.8p-63, 0x4000_8000000000000000); // round up to even + try test__addxf3(0x1.ffff_ffff_ffff_fffcp+0, 0x1.cp-63, 0x4000_8000000000000000); // round up + try test__addxf3(0x1.ffff_ffff_ffff_fffcp+0, 0x2.0p-63, 0x4000_8000000000000000); // exact + try test__addxf3(0x1.ffff_ffff_ffff_fffcp+0, 0x2.1p-63, 0x4000_8000000000000000); // round down + try test__addxf3(0x1.ffff_ffff_ffff_fffcp+0, 0x3.0p-63, 0x4000_8000000000000000); // round down to even + try test__addxf3(0x1.ffff_ffff_ffff_fffcp+0, 0x3.1p-63, 0x4000_8000000000000001); // round up + try test__addxf3(0x1.ffff_ffff_ffff_fffcp+0, 0x4.0p-63, 0x4000_8000000000000001); // exact + + try test__addxf3(0x1.0fff_ffff_ffff_fffep+0, 0x1.0p-63, 0x3FFF_8800000000000000); // exact + try test__addxf3(0x1.0fff_ffff_ffff_fffep+0, 0x1.7p-63, 0x3FFF_8800000000000000); // round down + try test__addxf3(0x1.0fff_ffff_ffff_fffep+0, 0x1.8p-63, 0x3FFF_8800000000000000); // round down to even + try test__addxf3(0x1.0fff_ffff_ffff_fffep+0, 0x1.9p-63, 0x3FFF_8800000000000001); // round up + try test__addxf3(0x1.0fff_ffff_ffff_fffep+0, 0x2.0p-63, 0x3FFF_8800000000000001); // exact }