diff --git a/src/tokenizer.cpp b/src/tokenizer.cpp index 9ff6ed3bbe..fb30f3c12c 100644 --- a/src/tokenizer.cpp +++ b/src/tokenizer.cpp @@ -326,51 +326,49 @@ static void end_float_token(Tokenize *t) { return; } - // A SoftFloat-3d float128 is represented internally as a standard - // quad-precision float with 15bit exponent and 113bit fractional. + // A SoftFloat-3e float128 is represented internally as a standard + // quad-precision float with 15bit exponent and 112bit fractional. union { uint64_t repr[2]; float128_t actual; } f_bits; if (bigint_cmp_zero(&t->significand) == CmpEQ) { f_bits.repr[0] = 0; f_bits.repr[1] = 0; } else { - // normalize the significand - if (t->radix == 10) { - zig_panic("TODO: decimal floats"); - } else { - int significand_magnitude_in_bin = 127 - bigint_clz(&t->significand, 128); - t->exponent_in_bin_or_dec += significand_magnitude_in_bin; - if (!(-16382 <= t->exponent_in_bin_or_dec && t->exponent_in_bin_or_dec <= 16383)) { - t->cur_tok->data.float_lit.overflow = true; - return; - } - - uint64_t sig_bits[2] = {0, 0}; - bigint_write_twos_complement(&t->significand, (uint8_t*) sig_bits, 128, false); - - const uint64_t shift = 112 - significand_magnitude_in_bin; - const uint64_t exp_shift = 48; - // Mask the sign bit to 0 since always non-negative lex - const uint64_t exp_mask = 0xffffull << exp_shift; - - // must be special-cased to avoid undefined behavior on shift == 64 - if (shift == 128) { - f_bits.repr[0] = 0; - f_bits.repr[1] = sig_bits[0]; - } else if (shift == 0) { - f_bits.repr[0] = sig_bits[0]; - f_bits.repr[1] = sig_bits[1]; - } else if (shift >= 64) { - f_bits.repr[0] = 0; - f_bits.repr[1] = sig_bits[0] << (shift - 64); - } else { - f_bits.repr[0] = sig_bits[0] << shift; - f_bits.repr[1] = (sig_bits[1] << shift) | (sig_bits[0] >> (64 - shift)); - } - - f_bits.repr[1] &= ~exp_mask; - f_bits.repr[1] |= (uint64_t)(t->exponent_in_bin_or_dec + 16383) << exp_shift; + int significand_magnitude_in_bin = 127 - bigint_clz(&t->significand, 128); + t->exponent_in_bin_or_dec += significand_magnitude_in_bin; + if (!(-16382 <= t->exponent_in_bin_or_dec && t->exponent_in_bin_or_dec <= 16383)) { + t->cur_tok->data.float_lit.overflow = true; + return; } + + // Shift bits of significand so they are left-justified at the 112-bit + // mark. We truncate excess bits and lose precision. No rounding. + // + // -16 <= shift <= 112 + // + // NOTE: The loss of precision could be considered a limitation of using + // 128-bit floats. In stage2 we should use an arbitrary precision + // float/rational type to represent these and avoid this. + const int shift = 112 - significand_magnitude_in_bin; + bigint_write_twos_complement(&t->significand, (uint8_t*) f_bits.repr, 128, false); + + if (shift >= 64) { + f_bits.repr[1] = f_bits.repr[0] << (shift - 64); + f_bits.repr[0] = 0; + } else if (shift > 0) { + f_bits.repr[1] = (f_bits.repr[1] << shift) | (f_bits.repr[0] >> (64 - shift)); + f_bits.repr[0] = f_bits.repr[0] << shift; + } else if (shift < 0) { + int positive_shift = -shift; + assert(positive_shift <= 16); + f_bits.repr[0] = (f_bits.repr[0] >> positive_shift) | (f_bits.repr[1] << (64 - positive_shift)); + f_bits.repr[1] = f_bits.repr[1] >> positive_shift; + } + + // Lexer separates negative sign from value so this is always non-negative. + const uint64_t exp_mask = 0xffffull << 48; + f_bits.repr[1] &= ~exp_mask; + f_bits.repr[1] |= (uint64_t)(t->exponent_in_bin_or_dec + 16383) << 48; } bigfloat_init_128(&t->cur_tok->data.float_lit.bigfloat, f_bits.actual); diff --git a/test/stage1/behavior/math.zig b/test/stage1/behavior/math.zig index 36e81e11ed..9b277ce91a 100644 --- a/test/stage1/behavior/math.zig +++ b/test/stage1/behavior/math.zig @@ -307,6 +307,88 @@ test "quad hex float literal parsing accurate" { // implied 1 is dropped, with an exponent of 0 (0x3fff) after biasing. const expected: u128 = 0x3fff1111222233334444555566667777; expect(@bitCast(u128, a) == expected); + + // non-normalized + const b: f128 = 0x11.111222233334444555566667777p-4; + expect(@bitCast(u128, b) == expected); + + const S = struct { + fn doTheTest() void { + { + var f: f128 = 0x1.2eab345678439abcdefea56782346p+5; + expect(@bitCast(u128, f) == 0x40042eab345678439abcdefea5678234); + } + { + var f: f128 = 0x1.edcb34a235253948765432134674fp-1; + expect(@bitCast(u128, f) == 0x3ffeedcb34a235253948765432134674); + } + { + var f: f128 = 0x1.353e45674d89abacc3a2ebf3ff4ffp-50; + expect(@bitCast(u128, f) == 0x3fcd353e45674d89abacc3a2ebf3ff4f); + } + { + var f: f128 = 0x1.ed8764648369535adf4be3214567fp-9; + expect(@bitCast(u128, f) == 0x3ff6ed8764648369535adf4be3214567); + } + const exp2ft = []f64{ + 0x1.6a09e667f3bcdp-1, + 0x1.7a11473eb0187p-1, + 0x1.8ace5422aa0dbp-1, + 0x1.9c49182a3f090p-1, + 0x1.ae89f995ad3adp-1, + 0x1.c199bdd85529cp-1, + 0x1.d5818dcfba487p-1, + 0x1.ea4afa2a490dap-1, + 0x1.0000000000000p+0, + 0x1.0b5586cf9890fp+0, + 0x1.172b83c7d517bp+0, + 0x1.2387a6e756238p+0, + 0x1.306fe0a31b715p+0, + 0x1.3dea64c123422p+0, + 0x1.4bfdad5362a27p+0, + 0x1.5ab07dd485429p+0, + 0x1.8p23, + 0x1.62e430p-1, + 0x1.ebfbe0p-3, + 0x1.c6b348p-5, + 0x1.3b2c9cp-7, + 0x1.0p127, + -0x1.0p-149, + }; + + const answers = []u64{ + 0x3fe6a09e667f3bcd, + 0x3fe7a11473eb0187, + 0x3fe8ace5422aa0db, + 0x3fe9c49182a3f090, + 0x3feae89f995ad3ad, + 0x3fec199bdd85529c, + 0x3fed5818dcfba487, + 0x3feea4afa2a490da, + 0x3ff0000000000000, + 0x3ff0b5586cf9890f, + 0x3ff172b83c7d517b, + 0x3ff2387a6e756238, + 0x3ff306fe0a31b715, + 0x3ff3dea64c123422, + 0x3ff4bfdad5362a27, + 0x3ff5ab07dd485429, + 0x4168000000000000, + 0x3fe62e4300000000, + 0x3fcebfbe00000000, + 0x3fac6b3480000000, + 0x3f83b2c9c0000000, + 0x47e0000000000000, + 0xb6a0000000000000, + }; + + for (exp2ft) |x, i| { + expect(@bitCast(u64, x) == answers[i]); + } + } + }; + S.doTheTest(); + comptime S.doTheTest(); } test "hex float literal within range" {