diff --git a/doc/langref.html.in b/doc/langref.html.in index 3a7892fd45..616edd44eb 100644 --- a/doc/langref.html.in +++ b/doc/langref.html.in @@ -885,6 +885,12 @@ const hex_int = 0xff; const another_hex_int = 0xFF; const octal_int = 0o755; const binary_int = 0b11110000; + +// underscores may be placed between two digits as a visual separator +const one_billion = 1_000_000_000; +const binary_mask = 0b1_1111_1111; +const permissions = 0o7_5_5; +const big_address = 0xFF80_0000_0000_0000; {#code_end#} {#header_close#} {#header_open|Runtime Integer Values#} @@ -947,6 +953,11 @@ const yet_another = 123.0e+77; const hex_floating_point = 0x103.70p-5; const another_hex_float = 0x103.70; const yet_another_hex_float = 0x103.70P-5; + +// underscores may be placed between two digits as a visual separator +const lightspeed = 299_792_458.000_000; +const nanosecond = 0.000_000_001; +const more_hex = 0x1234_5678.9ABC_CDEFp-10; {#code_end#}

There is no syntax for NaN, infinity, or negative infinity. For these special values, diff --git a/lib/std/special/compiler_rt/floatundisf.zig b/lib/std/special/compiler_rt/floatundisf.zig index 41ff02daee..ff242721d6 100644 --- a/lib/std/special/compiler_rt/floatundisf.zig +++ b/lib/std/special/compiler_rt/floatundisf.zig @@ -69,23 +69,23 @@ test "floatundisf" { test__floatundisf(0, 0.0); test__floatundisf(1, 1.0); test__floatundisf(2, 2.0); - test__floatundisf(0x7FFFFF8000000000, 0x1.FFFFFEp+62F); - test__floatundisf(0x7FFFFF0000000000, 0x1.FFFFFCp+62F); - test__floatundisf(0x8000008000000000, 0x1p+63F); - test__floatundisf(0x8000010000000000, 0x1.000002p+63F); - test__floatundisf(0x8000000000000000, 0x1p+63F); - test__floatundisf(0x8000000000000001, 0x1p+63F); - test__floatundisf(0xFFFFFFFFFFFFFFFE, 0x1p+64F); - test__floatundisf(0xFFFFFFFFFFFFFFFF, 0x1p+64F); - test__floatundisf(0x0007FB72E8000000, 0x1.FEDCBAp+50F); - test__floatundisf(0x0007FB72EA000000, 0x1.FEDCBAp+50F); - test__floatundisf(0x0007FB72EB000000, 0x1.FEDCBAp+50F); - test__floatundisf(0x0007FB72EBFFFFFF, 0x1.FEDCBAp+50F); - test__floatundisf(0x0007FB72EC000000, 0x1.FEDCBCp+50F); - test__floatundisf(0x0007FB72E8000001, 0x1.FEDCBAp+50F); - test__floatundisf(0x0007FB72E6000000, 0x1.FEDCBAp+50F); - test__floatundisf(0x0007FB72E7000000, 0x1.FEDCBAp+50F); - test__floatundisf(0x0007FB72E7FFFFFF, 0x1.FEDCBAp+50F); - test__floatundisf(0x0007FB72E4000001, 0x1.FEDCBAp+50F); - test__floatundisf(0x0007FB72E4000000, 0x1.FEDCB8p+50F); + test__floatundisf(0x7FFFFF8000000000, 0x1.FFFFFEp+62); + test__floatundisf(0x7FFFFF0000000000, 0x1.FFFFFCp+62); + test__floatundisf(0x8000008000000000, 0x1p+63); + test__floatundisf(0x8000010000000000, 0x1.000002p+63); + test__floatundisf(0x8000000000000000, 0x1p+63); + test__floatundisf(0x8000000000000001, 0x1p+63); + test__floatundisf(0xFFFFFFFFFFFFFFFE, 0x1p+64); + test__floatundisf(0xFFFFFFFFFFFFFFFF, 0x1p+64); + test__floatundisf(0x0007FB72E8000000, 0x1.FEDCBAp+50); + test__floatundisf(0x0007FB72EA000000, 0x1.FEDCBAp+50); + test__floatundisf(0x0007FB72EB000000, 0x1.FEDCBAp+50); + test__floatundisf(0x0007FB72EBFFFFFF, 0x1.FEDCBAp+50); + test__floatundisf(0x0007FB72EC000000, 0x1.FEDCBCp+50); + test__floatundisf(0x0007FB72E8000001, 0x1.FEDCBAp+50); + test__floatundisf(0x0007FB72E6000000, 0x1.FEDCBAp+50); + test__floatundisf(0x0007FB72E7000000, 0x1.FEDCBAp+50); + test__floatundisf(0x0007FB72E7FFFFFF, 0x1.FEDCBAp+50); + test__floatundisf(0x0007FB72E4000001, 0x1.FEDCBAp+50); + test__floatundisf(0x0007FB72E4000000, 0x1.FEDCB8p+50); } diff --git a/src/parse_f128.c b/src/parse_f128.c index cffb3796b4..9b5c287a3c 100644 --- a/src/parse_f128.c +++ b/src/parse_f128.c @@ -165,22 +165,36 @@ static long long scanexp(struct MuslFILE *f, int pok) int x; long long y; int neg = 0; - + c = shgetc(f); if (c=='+' || c=='-') { neg = (c=='-'); c = shgetc(f); if (c-'0'>=10U && pok) shunget(f); } - if (c-'0'>=10U) { + if (c-'0'>=10U && c!='_') { shunget(f); return LLONG_MIN; } - for (x=0; c-'0'<10U && xdata.int_lit.bigint, 0); - bigint_init_unsigned(&t.specified_exponent, 0); break; case DIGIT_NON_ZERO: t.state = TokenizeStateNumber; begin_token(&t, TokenIdIntLiteral); + t.is_trailing_underscore = false; t.radix = 10; - t.exp_add_amt = 1; - t.exponent_in_bin_or_dec = 0; bigint_init_unsigned(&t.cur_tok->data.int_lit.bigint, get_digit_value(c)); - bigint_init_unsigned(&t.specified_exponent, 0); break; case '"': begin_token(&t, TokenIdStringLiteral); @@ -1189,17 +1184,15 @@ void tokenize(Buf *buf, Tokenization *out) { switch (c) { case 'b': t.radix = 2; - t.state = TokenizeStateNumber; + t.state = TokenizeStateNumberNoUnderscore; break; case 'o': t.radix = 8; - t.exp_add_amt = 3; - t.state = TokenizeStateNumber; + t.state = TokenizeStateNumberNoUnderscore; break; case 'x': t.radix = 16; - t.exp_add_amt = 4; - t.state = TokenizeStateNumber; + t.state = TokenizeStateNumberNoUnderscore; break; default: // reinterpret as normal number @@ -1208,9 +1201,27 @@ void tokenize(Buf *buf, Tokenization *out) { continue; } break; + case TokenizeStateNumberNoUnderscore: + if (c == '_') { + invalid_char_error(&t, c); + break; + } else if (get_digit_value(c) < t.radix) { + t.is_trailing_underscore = false; + t.state = TokenizeStateNumber; + } + // fall through case TokenizeStateNumber: { + if (c == '_') { + t.is_trailing_underscore = true; + t.state = TokenizeStateNumberNoUnderscore; + break; + } if (c == '.') { + if (t.is_trailing_underscore) { + invalid_char_error(&t, c); + break; + } if (t.radix != 16 && t.radix != 10) { invalid_char_error(&t, c); } @@ -1222,13 +1233,18 @@ void tokenize(Buf *buf, Tokenization *out) { invalid_char_error(&t, c); } t.state = TokenizeStateFloatExponentUnsigned; + t.radix = 10; // exponent is always base 10 assert(t.cur_tok->id == TokenIdIntLiteral); - bigint_init_bigint(&t.significand, &t.cur_tok->data.int_lit.bigint); set_token_id(&t, t.cur_tok, TokenIdFloatLiteral); break; } uint32_t digit_value = get_digit_value(c); if (digit_value >= t.radix) { + if (t.is_trailing_underscore) { + invalid_char_error(&t, c); + break; + } + if (is_symbol_char(c)) { invalid_char_error(&t, c); } @@ -1259,20 +1275,37 @@ void tokenize(Buf *buf, Tokenization *out) { continue; } t.pos -= 1; - t.state = TokenizeStateFloatFraction; + t.state = TokenizeStateFloatFractionNoUnderscore; assert(t.cur_tok->id == TokenIdIntLiteral); - bigint_init_bigint(&t.significand, &t.cur_tok->data.int_lit.bigint); set_token_id(&t, t.cur_tok, TokenIdFloatLiteral); continue; } + case TokenizeStateFloatFractionNoUnderscore: + if (c == '_') { + invalid_char_error(&t, c); + } else if (get_digit_value(c) < t.radix) { + t.is_trailing_underscore = false; + t.state = TokenizeStateFloatFraction; + } + // fall through case TokenizeStateFloatFraction: { + if (c == '_') { + t.is_trailing_underscore = true; + t.state = TokenizeStateFloatFractionNoUnderscore; + break; + } if (is_exponent_signifier(c, t.radix)) { t.state = TokenizeStateFloatExponentUnsigned; + t.radix = 10; // exponent is always base 10 break; } uint32_t digit_value = get_digit_value(c); if (digit_value >= t.radix) { + if (t.is_trailing_underscore) { + invalid_char_error(&t, c); + break; + } if (is_symbol_char(c)) { invalid_char_error(&t, c); } @@ -1282,46 +1315,47 @@ void tokenize(Buf *buf, Tokenization *out) { t.state = TokenizeStateStart; continue; } - t.exponent_in_bin_or_dec -= t.exp_add_amt; - if (t.radix == 10) { - // For now we use strtod to parse decimal floats, so we just have to get to the - // end of the token. - break; - } - BigInt digit_value_bi; - bigint_init_unsigned(&digit_value_bi, digit_value); - BigInt radix_bi; - bigint_init_unsigned(&radix_bi, t.radix); - - BigInt multiplied; - bigint_mul(&multiplied, &t.significand, &radix_bi); - - bigint_add(&t.significand, &multiplied, &digit_value_bi); - break; + // we use parse_f128 to generate the float literal, so just + // need to get to the end of the token } + break; case TokenizeStateFloatExponentUnsigned: switch (c) { case '+': - t.is_exp_negative = false; - t.state = TokenizeStateFloatExponentNumber; + t.state = TokenizeStateFloatExponentNumberNoUnderscore; break; case '-': - t.is_exp_negative = true; - t.state = TokenizeStateFloatExponentNumber; + t.state = TokenizeStateFloatExponentNumberNoUnderscore; break; default: // reinterpret as normal exponent number t.pos -= 1; - t.is_exp_negative = false; - t.state = TokenizeStateFloatExponentNumber; + t.state = TokenizeStateFloatExponentNumberNoUnderscore; continue; } break; + case TokenizeStateFloatExponentNumberNoUnderscore: + if (c == '_') { + invalid_char_error(&t, c); + } else if (get_digit_value(c) < t.radix) { + t.is_trailing_underscore = false; + t.state = TokenizeStateFloatExponentNumber; + } + // fall through case TokenizeStateFloatExponentNumber: { + if (c == '_') { + t.is_trailing_underscore = true; + t.state = TokenizeStateFloatExponentNumberNoUnderscore; + break; + } uint32_t digit_value = get_digit_value(c); if (digit_value >= t.radix) { + if (t.is_trailing_underscore) { + invalid_char_error(&t, c); + break; + } if (is_symbol_char(c)) { invalid_char_error(&t, c); } @@ -1331,21 +1365,9 @@ void tokenize(Buf *buf, Tokenization *out) { t.state = TokenizeStateStart; continue; } - if (t.radix == 10) { - // For now we use strtod to parse decimal floats, so we just have to get to the - // end of the token. - break; - } - BigInt digit_value_bi; - bigint_init_unsigned(&digit_value_bi, digit_value); - BigInt radix_bi; - bigint_init_unsigned(&radix_bi, 10); - - BigInt multiplied; - bigint_mul(&multiplied, &t.specified_exponent, &radix_bi); - - bigint_add(&t.specified_exponent, &multiplied, &digit_value_bi); + // we use parse_f128 to generate the float literal, so just + // need to get to the end of the token } break; case TokenizeStateSawDash: @@ -1399,6 +1421,9 @@ void tokenize(Buf *buf, Tokenization *out) { case TokenizeStateStart: case TokenizeStateError: break; + case TokenizeStateNumberNoUnderscore: + case TokenizeStateFloatFractionNoUnderscore: + case TokenizeStateFloatExponentNumberNoUnderscore: case TokenizeStateNumberDot: tokenize_error(&t, "unterminated number literal"); break; diff --git a/test/compile_errors.zig b/test/compile_errors.zig index f894a152a7..83fe1def62 100644 --- a/test/compile_errors.zig +++ b/test/compile_errors.zig @@ -389,6 +389,102 @@ pub fn addCases(cases: *tests.CompileErrorContext) void { "tmp.zig:5:29: error: invalid token: '.'", }); + cases.add("invalid underscore placement in float literal - 1", + \\fn main() void { + \\ var bad: f128 = 0._0; + \\}) + , &[_][]const u8{ + "tmp.zig:2:23: error: invalid character: '_'", + }); + + cases.add("invalid underscore placement in float literal - 2", + \\fn main() void { + \\ var bad: f128 = 0_.0; + \\}) + , &[_][]const u8{ + "tmp.zig:2:23: error: invalid character: '.'", + }); + + cases.add("invalid underscore placement in float literal - 3", + \\fn main() void { + \\ var bad: f128 = 0.0_; + \\}) + , &[_][]const u8{ + "tmp.zig:2:25: error: invalid character: ';'", + }); + + cases.add("invalid underscore placement in float literal - 4", + \\fn main() void { + \\ var bad: f128 = 1.0e_1; + \\}) + , &[_][]const u8{ + "tmp.zig:2:25: error: invalid character: '_'", + }); + + cases.add("invalid underscore placement in float literal - 5", + \\fn main() void { + \\ var bad: f128 = 1.0e+_1; + \\}) + , &[_][]const u8{ + "tmp.zig:2:26: error: invalid character: '_'", + }); + + cases.add("invalid underscore placement in float literal - 6", + \\fn main() void { + \\ var bad: f128 = 1.0e-_1; + \\}) + , &[_][]const u8{ + "tmp.zig:2:26: error: invalid character: '_'", + }); + + cases.add("invalid underscore placement in float literal - 7", + \\fn main() void { + \\ var bad: f128 = 1.0e-1_; + \\}) + , &[_][]const u8{ + "tmp.zig:2:28: error: invalid character: ';'", + }); + + cases.add("invalid underscore placement in float literal - 9", + \\fn main() void { + \\ var bad: f128 = 1__0.0e-1; + \\}) + , &[_][]const u8{ + "tmp.zig:2:23: error: invalid character: '_'", + }); + + cases.add("invalid underscore placement in float literal - 10", + \\fn main() void { + \\ var bad: f128 = 1.0__0e-1; + \\}) + , &[_][]const u8{ + "tmp.zig:2:25: error: invalid character: '_'", + }); + + cases.add("invalid underscore placement in float literal - 11", + \\fn main() void { + \\ var bad: f128 = 1.0e-1__0; + \\}) + , &[_][]const u8{ + "tmp.zig:2:28: error: invalid character: '_'", + }); + + cases.add("invalid underscore placement in float literal - 12", + \\fn main() void { + \\ var bad: f128 = 0_x0.0; + \\}) + , &[_][]const u8{ + "tmp.zig:2:23: error: invalid character: 'x'", + }); + + cases.add("invalid underscore placement in float literal - 13", + \\fn main() void { + \\ var bad: f128 = 0x_0.0; + \\}) + , &[_][]const u8{ + "tmp.zig:2:23: error: invalid character: '_'", + }); + cases.add("var args without c calling conv", \\fn foo(args: ...) void {} \\comptime { diff --git a/test/stage1/behavior/math.zig b/test/stage1/behavior/math.zig index fb70fb7e44..b342597acf 100644 --- a/test/stage1/behavior/math.zig +++ b/test/stage1/behavior/math.zig @@ -411,6 +411,34 @@ test "quad hex float literal parsing accurate" { comptime S.doTheTest(); } +test "underscore separator parsing" { + expect(0_0_0_0 == 0); + expect(1_234_567 == 1234567); + expect(001_234_567 == 1234567); + expect(0_0_1_2_3_4_5_6_7 == 1234567); + + expect(0b0_0_0_0 == 0); + expect(0b1010_1010 == 0b10101010); + expect(0b0000_1010_1010 == 0b10101010); + expect(0b1_0_1_0_1_0_1_0 == 0b10101010); + + expect(0o0_0_0_0 == 0); + expect(0o1010_1010 == 0o10101010); + expect(0o0000_1010_1010 == 0o10101010); + expect(0o1_0_1_0_1_0_1_0 == 0o10101010); + + expect(0x0_0_0_0 == 0); + expect(0x1010_1010 == 0x10101010); + expect(0x0000_1010_1010 == 0x10101010); + expect(0x1_0_1_0_1_0_1_0 == 0x10101010); + + expect(123_456.789_000e1_0 == 123456.789000e10); + expect(0_1_2_3_4_5_6.7_8_9_0_0_0e0_0_1_0 == 123456.789000e10); + + expect(0x1234_5678.9ABC_DEF0p-1_0 == 0x12345678.9ABCDEF0p-10); + expect(0x1_2_3_4_5_6_7_8.9_A_B_C_D_E_F_0p-0_0_0_1_0 == 0x12345678.9ABCDEF0p-10); +} + test "hex float literal within range" { const a = 0x1.0p16383; const b = 0x0.1p16387;