From 7aac21c6f59b70deea6ced617f7b6a550e92bab4 Mon Sep 17 00:00:00 2001 From: momumi <57862114+momumi@users.noreply.github.com> Date: Sun, 15 Mar 2020 11:37:36 +1000 Subject: [PATCH] allow `_` separators in number literals (stage 1) * Underscores `_` may be placed between two digits in a int/float literal * Consecutive underscores are not allowed * Fixed parsing bug in exponents of hexadecimal float literals. Exponents should always be base 10, but hex characters would be parsed inside the exponent and everything after them would be ignored. eg: `0x1.0p1ab1` would be parsed as `0x1.0p1`. --- doc/langref.html.in | 11 ++ lib/std/special/compiler_rt/floatundisf.zig | 38 +++--- src/parse_f128.c | 79 ++++++++--- src/tokenizer.cpp | 137 ++++++++++++-------- test/compile_errors.zig | 96 ++++++++++++++ test/stage1/behavior/math.zig | 28 ++++ 6 files changed, 297 insertions(+), 92 deletions(-) diff --git a/doc/langref.html.in b/doc/langref.html.in index 3a7892fd45..616edd44eb 100644 --- a/doc/langref.html.in +++ b/doc/langref.html.in @@ -885,6 +885,12 @@ const hex_int = 0xff; const another_hex_int = 0xFF; const octal_int = 0o755; const binary_int = 0b11110000; + +// underscores may be placed between two digits as a visual separator +const one_billion = 1_000_000_000; +const binary_mask = 0b1_1111_1111; +const permissions = 0o7_5_5; +const big_address = 0xFF80_0000_0000_0000; {#code_end#} {#header_close#} {#header_open|Runtime Integer Values#} @@ -947,6 +953,11 @@ const yet_another = 123.0e+77; const hex_floating_point = 0x103.70p-5; const another_hex_float = 0x103.70; const yet_another_hex_float = 0x103.70P-5; + +// underscores may be placed between two digits as a visual separator +const lightspeed = 299_792_458.000_000; +const nanosecond = 0.000_000_001; +const more_hex = 0x1234_5678.9ABC_CDEFp-10; {#code_end#}

There is no syntax for NaN, infinity, or negative infinity. For these special values, diff --git a/lib/std/special/compiler_rt/floatundisf.zig b/lib/std/special/compiler_rt/floatundisf.zig index 41ff02daee..ff242721d6 100644 --- a/lib/std/special/compiler_rt/floatundisf.zig +++ b/lib/std/special/compiler_rt/floatundisf.zig @@ -69,23 +69,23 @@ test "floatundisf" { test__floatundisf(0, 0.0); test__floatundisf(1, 1.0); test__floatundisf(2, 2.0); - test__floatundisf(0x7FFFFF8000000000, 0x1.FFFFFEp+62F); - test__floatundisf(0x7FFFFF0000000000, 0x1.FFFFFCp+62F); - test__floatundisf(0x8000008000000000, 0x1p+63F); - test__floatundisf(0x8000010000000000, 0x1.000002p+63F); - test__floatundisf(0x8000000000000000, 0x1p+63F); - test__floatundisf(0x8000000000000001, 0x1p+63F); - test__floatundisf(0xFFFFFFFFFFFFFFFE, 0x1p+64F); - test__floatundisf(0xFFFFFFFFFFFFFFFF, 0x1p+64F); - test__floatundisf(0x0007FB72E8000000, 0x1.FEDCBAp+50F); - test__floatundisf(0x0007FB72EA000000, 0x1.FEDCBAp+50F); - test__floatundisf(0x0007FB72EB000000, 0x1.FEDCBAp+50F); - test__floatundisf(0x0007FB72EBFFFFFF, 0x1.FEDCBAp+50F); - test__floatundisf(0x0007FB72EC000000, 0x1.FEDCBCp+50F); - test__floatundisf(0x0007FB72E8000001, 0x1.FEDCBAp+50F); - test__floatundisf(0x0007FB72E6000000, 0x1.FEDCBAp+50F); - test__floatundisf(0x0007FB72E7000000, 0x1.FEDCBAp+50F); - test__floatundisf(0x0007FB72E7FFFFFF, 0x1.FEDCBAp+50F); - test__floatundisf(0x0007FB72E4000001, 0x1.FEDCBAp+50F); - test__floatundisf(0x0007FB72E4000000, 0x1.FEDCB8p+50F); + test__floatundisf(0x7FFFFF8000000000, 0x1.FFFFFEp+62); + test__floatundisf(0x7FFFFF0000000000, 0x1.FFFFFCp+62); + test__floatundisf(0x8000008000000000, 0x1p+63); + test__floatundisf(0x8000010000000000, 0x1.000002p+63); + test__floatundisf(0x8000000000000000, 0x1p+63); + test__floatundisf(0x8000000000000001, 0x1p+63); + test__floatundisf(0xFFFFFFFFFFFFFFFE, 0x1p+64); + test__floatundisf(0xFFFFFFFFFFFFFFFF, 0x1p+64); + test__floatundisf(0x0007FB72E8000000, 0x1.FEDCBAp+50); + test__floatundisf(0x0007FB72EA000000, 0x1.FEDCBAp+50); + test__floatundisf(0x0007FB72EB000000, 0x1.FEDCBAp+50); + test__floatundisf(0x0007FB72EBFFFFFF, 0x1.FEDCBAp+50); + test__floatundisf(0x0007FB72EC000000, 0x1.FEDCBCp+50); + test__floatundisf(0x0007FB72E8000001, 0x1.FEDCBAp+50); + test__floatundisf(0x0007FB72E6000000, 0x1.FEDCBAp+50); + test__floatundisf(0x0007FB72E7000000, 0x1.FEDCBAp+50); + test__floatundisf(0x0007FB72E7FFFFFF, 0x1.FEDCBAp+50); + test__floatundisf(0x0007FB72E4000001, 0x1.FEDCBAp+50); + test__floatundisf(0x0007FB72E4000000, 0x1.FEDCB8p+50); } diff --git a/src/parse_f128.c b/src/parse_f128.c index cffb3796b4..9b5c287a3c 100644 --- a/src/parse_f128.c +++ b/src/parse_f128.c @@ -165,22 +165,36 @@ static long long scanexp(struct MuslFILE *f, int pok) int x; long long y; int neg = 0; - + c = shgetc(f); if (c=='+' || c=='-') { neg = (c=='-'); c = shgetc(f); if (c-'0'>=10U && pok) shunget(f); } - if (c-'0'>=10U) { + if (c-'0'>=10U && c!='_') { shunget(f); return LLONG_MIN; } - for (x=0; c-'0'<10U && xdata.int_lit.bigint, 0); - bigint_init_unsigned(&t.specified_exponent, 0); break; case DIGIT_NON_ZERO: t.state = TokenizeStateNumber; begin_token(&t, TokenIdIntLiteral); + t.is_trailing_underscore = false; t.radix = 10; - t.exp_add_amt = 1; - t.exponent_in_bin_or_dec = 0; bigint_init_unsigned(&t.cur_tok->data.int_lit.bigint, get_digit_value(c)); - bigint_init_unsigned(&t.specified_exponent, 0); break; case '"': begin_token(&t, TokenIdStringLiteral); @@ -1189,17 +1184,15 @@ void tokenize(Buf *buf, Tokenization *out) { switch (c) { case 'b': t.radix = 2; - t.state = TokenizeStateNumber; + t.state = TokenizeStateNumberNoUnderscore; break; case 'o': t.radix = 8; - t.exp_add_amt = 3; - t.state = TokenizeStateNumber; + t.state = TokenizeStateNumberNoUnderscore; break; case 'x': t.radix = 16; - t.exp_add_amt = 4; - t.state = TokenizeStateNumber; + t.state = TokenizeStateNumberNoUnderscore; break; default: // reinterpret as normal number @@ -1208,9 +1201,27 @@ void tokenize(Buf *buf, Tokenization *out) { continue; } break; + case TokenizeStateNumberNoUnderscore: + if (c == '_') { + invalid_char_error(&t, c); + break; + } else if (get_digit_value(c) < t.radix) { + t.is_trailing_underscore = false; + t.state = TokenizeStateNumber; + } + // fall through case TokenizeStateNumber: { + if (c == '_') { + t.is_trailing_underscore = true; + t.state = TokenizeStateNumberNoUnderscore; + break; + } if (c == '.') { + if (t.is_trailing_underscore) { + invalid_char_error(&t, c); + break; + } if (t.radix != 16 && t.radix != 10) { invalid_char_error(&t, c); } @@ -1222,13 +1233,18 @@ void tokenize(Buf *buf, Tokenization *out) { invalid_char_error(&t, c); } t.state = TokenizeStateFloatExponentUnsigned; + t.radix = 10; // exponent is always base 10 assert(t.cur_tok->id == TokenIdIntLiteral); - bigint_init_bigint(&t.significand, &t.cur_tok->data.int_lit.bigint); set_token_id(&t, t.cur_tok, TokenIdFloatLiteral); break; } uint32_t digit_value = get_digit_value(c); if (digit_value >= t.radix) { + if (t.is_trailing_underscore) { + invalid_char_error(&t, c); + break; + } + if (is_symbol_char(c)) { invalid_char_error(&t, c); } @@ -1259,20 +1275,37 @@ void tokenize(Buf *buf, Tokenization *out) { continue; } t.pos -= 1; - t.state = TokenizeStateFloatFraction; + t.state = TokenizeStateFloatFractionNoUnderscore; assert(t.cur_tok->id == TokenIdIntLiteral); - bigint_init_bigint(&t.significand, &t.cur_tok->data.int_lit.bigint); set_token_id(&t, t.cur_tok, TokenIdFloatLiteral); continue; } + case TokenizeStateFloatFractionNoUnderscore: + if (c == '_') { + invalid_char_error(&t, c); + } else if (get_digit_value(c) < t.radix) { + t.is_trailing_underscore = false; + t.state = TokenizeStateFloatFraction; + } + // fall through case TokenizeStateFloatFraction: { + if (c == '_') { + t.is_trailing_underscore = true; + t.state = TokenizeStateFloatFractionNoUnderscore; + break; + } if (is_exponent_signifier(c, t.radix)) { t.state = TokenizeStateFloatExponentUnsigned; + t.radix = 10; // exponent is always base 10 break; } uint32_t digit_value = get_digit_value(c); if (digit_value >= t.radix) { + if (t.is_trailing_underscore) { + invalid_char_error(&t, c); + break; + } if (is_symbol_char(c)) { invalid_char_error(&t, c); } @@ -1282,46 +1315,47 @@ void tokenize(Buf *buf, Tokenization *out) { t.state = TokenizeStateStart; continue; } - t.exponent_in_bin_or_dec -= t.exp_add_amt; - if (t.radix == 10) { - // For now we use strtod to parse decimal floats, so we just have to get to the - // end of the token. - break; - } - BigInt digit_value_bi; - bigint_init_unsigned(&digit_value_bi, digit_value); - BigInt radix_bi; - bigint_init_unsigned(&radix_bi, t.radix); - - BigInt multiplied; - bigint_mul(&multiplied, &t.significand, &radix_bi); - - bigint_add(&t.significand, &multiplied, &digit_value_bi); - break; + // we use parse_f128 to generate the float literal, so just + // need to get to the end of the token } + break; case TokenizeStateFloatExponentUnsigned: switch (c) { case '+': - t.is_exp_negative = false; - t.state = TokenizeStateFloatExponentNumber; + t.state = TokenizeStateFloatExponentNumberNoUnderscore; break; case '-': - t.is_exp_negative = true; - t.state = TokenizeStateFloatExponentNumber; + t.state = TokenizeStateFloatExponentNumberNoUnderscore; break; default: // reinterpret as normal exponent number t.pos -= 1; - t.is_exp_negative = false; - t.state = TokenizeStateFloatExponentNumber; + t.state = TokenizeStateFloatExponentNumberNoUnderscore; continue; } break; + case TokenizeStateFloatExponentNumberNoUnderscore: + if (c == '_') { + invalid_char_error(&t, c); + } else if (get_digit_value(c) < t.radix) { + t.is_trailing_underscore = false; + t.state = TokenizeStateFloatExponentNumber; + } + // fall through case TokenizeStateFloatExponentNumber: { + if (c == '_') { + t.is_trailing_underscore = true; + t.state = TokenizeStateFloatExponentNumberNoUnderscore; + break; + } uint32_t digit_value = get_digit_value(c); if (digit_value >= t.radix) { + if (t.is_trailing_underscore) { + invalid_char_error(&t, c); + break; + } if (is_symbol_char(c)) { invalid_char_error(&t, c); } @@ -1331,21 +1365,9 @@ void tokenize(Buf *buf, Tokenization *out) { t.state = TokenizeStateStart; continue; } - if (t.radix == 10) { - // For now we use strtod to parse decimal floats, so we just have to get to the - // end of the token. - break; - } - BigInt digit_value_bi; - bigint_init_unsigned(&digit_value_bi, digit_value); - BigInt radix_bi; - bigint_init_unsigned(&radix_bi, 10); - - BigInt multiplied; - bigint_mul(&multiplied, &t.specified_exponent, &radix_bi); - - bigint_add(&t.specified_exponent, &multiplied, &digit_value_bi); + // we use parse_f128 to generate the float literal, so just + // need to get to the end of the token } break; case TokenizeStateSawDash: @@ -1399,6 +1421,9 @@ void tokenize(Buf *buf, Tokenization *out) { case TokenizeStateStart: case TokenizeStateError: break; + case TokenizeStateNumberNoUnderscore: + case TokenizeStateFloatFractionNoUnderscore: + case TokenizeStateFloatExponentNumberNoUnderscore: case TokenizeStateNumberDot: tokenize_error(&t, "unterminated number literal"); break; diff --git a/test/compile_errors.zig b/test/compile_errors.zig index f894a152a7..83fe1def62 100644 --- a/test/compile_errors.zig +++ b/test/compile_errors.zig @@ -389,6 +389,102 @@ pub fn addCases(cases: *tests.CompileErrorContext) void { "tmp.zig:5:29: error: invalid token: '.'", }); + cases.add("invalid underscore placement in float literal - 1", + \\fn main() void { + \\ var bad: f128 = 0._0; + \\}) + , &[_][]const u8{ + "tmp.zig:2:23: error: invalid character: '_'", + }); + + cases.add("invalid underscore placement in float literal - 2", + \\fn main() void { + \\ var bad: f128 = 0_.0; + \\}) + , &[_][]const u8{ + "tmp.zig:2:23: error: invalid character: '.'", + }); + + cases.add("invalid underscore placement in float literal - 3", + \\fn main() void { + \\ var bad: f128 = 0.0_; + \\}) + , &[_][]const u8{ + "tmp.zig:2:25: error: invalid character: ';'", + }); + + cases.add("invalid underscore placement in float literal - 4", + \\fn main() void { + \\ var bad: f128 = 1.0e_1; + \\}) + , &[_][]const u8{ + "tmp.zig:2:25: error: invalid character: '_'", + }); + + cases.add("invalid underscore placement in float literal - 5", + \\fn main() void { + \\ var bad: f128 = 1.0e+_1; + \\}) + , &[_][]const u8{ + "tmp.zig:2:26: error: invalid character: '_'", + }); + + cases.add("invalid underscore placement in float literal - 6", + \\fn main() void { + \\ var bad: f128 = 1.0e-_1; + \\}) + , &[_][]const u8{ + "tmp.zig:2:26: error: invalid character: '_'", + }); + + cases.add("invalid underscore placement in float literal - 7", + \\fn main() void { + \\ var bad: f128 = 1.0e-1_; + \\}) + , &[_][]const u8{ + "tmp.zig:2:28: error: invalid character: ';'", + }); + + cases.add("invalid underscore placement in float literal - 9", + \\fn main() void { + \\ var bad: f128 = 1__0.0e-1; + \\}) + , &[_][]const u8{ + "tmp.zig:2:23: error: invalid character: '_'", + }); + + cases.add("invalid underscore placement in float literal - 10", + \\fn main() void { + \\ var bad: f128 = 1.0__0e-1; + \\}) + , &[_][]const u8{ + "tmp.zig:2:25: error: invalid character: '_'", + }); + + cases.add("invalid underscore placement in float literal - 11", + \\fn main() void { + \\ var bad: f128 = 1.0e-1__0; + \\}) + , &[_][]const u8{ + "tmp.zig:2:28: error: invalid character: '_'", + }); + + cases.add("invalid underscore placement in float literal - 12", + \\fn main() void { + \\ var bad: f128 = 0_x0.0; + \\}) + , &[_][]const u8{ + "tmp.zig:2:23: error: invalid character: 'x'", + }); + + cases.add("invalid underscore placement in float literal - 13", + \\fn main() void { + \\ var bad: f128 = 0x_0.0; + \\}) + , &[_][]const u8{ + "tmp.zig:2:23: error: invalid character: '_'", + }); + cases.add("var args without c calling conv", \\fn foo(args: ...) void {} \\comptime { diff --git a/test/stage1/behavior/math.zig b/test/stage1/behavior/math.zig index fb70fb7e44..b342597acf 100644 --- a/test/stage1/behavior/math.zig +++ b/test/stage1/behavior/math.zig @@ -411,6 +411,34 @@ test "quad hex float literal parsing accurate" { comptime S.doTheTest(); } +test "underscore separator parsing" { + expect(0_0_0_0 == 0); + expect(1_234_567 == 1234567); + expect(001_234_567 == 1234567); + expect(0_0_1_2_3_4_5_6_7 == 1234567); + + expect(0b0_0_0_0 == 0); + expect(0b1010_1010 == 0b10101010); + expect(0b0000_1010_1010 == 0b10101010); + expect(0b1_0_1_0_1_0_1_0 == 0b10101010); + + expect(0o0_0_0_0 == 0); + expect(0o1010_1010 == 0o10101010); + expect(0o0000_1010_1010 == 0o10101010); + expect(0o1_0_1_0_1_0_1_0 == 0o10101010); + + expect(0x0_0_0_0 == 0); + expect(0x1010_1010 == 0x10101010); + expect(0x0000_1010_1010 == 0x10101010); + expect(0x1_0_1_0_1_0_1_0 == 0x10101010); + + expect(123_456.789_000e1_0 == 123456.789000e10); + expect(0_1_2_3_4_5_6.7_8_9_0_0_0e0_0_1_0 == 123456.789000e10); + + expect(0x1234_5678.9ABC_DEF0p-1_0 == 0x12345678.9ABCDEF0p-10); + expect(0x1_2_3_4_5_6_7_8.9_A_B_C_D_E_F_0p-0_0_0_1_0 == 0x12345678.9ABCDEF0p-10); +} + test "hex float literal within range" { const a = 0x1.0p16383; const b = 0x0.1p16387;