From 7aac21c6f59b70deea6ced617f7b6a550e92bab4 Mon Sep 17 00:00:00 2001
From: momumi <57862114+momumi@users.noreply.github.com>
Date: Sun, 15 Mar 2020 11:37:36 +1000
Subject: [PATCH 1/7] allow `_` separators in number literals (stage 1)
* Underscores `_` may be placed between two digits in a int/float literal
* Consecutive underscores are not allowed
* Fixed parsing bug in exponents of hexadecimal float literals.
Exponents should always be base 10, but hex characters would be parsed
inside the exponent and everything after them would be ignored. eg:
`0x1.0p1ab1` would be parsed as `0x1.0p1`.
---
doc/langref.html.in | 11 ++
lib/std/special/compiler_rt/floatundisf.zig | 38 +++---
src/parse_f128.c | 79 ++++++++---
src/tokenizer.cpp | 137 ++++++++++++--------
test/compile_errors.zig | 96 ++++++++++++++
test/stage1/behavior/math.zig | 28 ++++
6 files changed, 297 insertions(+), 92 deletions(-)
diff --git a/doc/langref.html.in b/doc/langref.html.in
index 3a7892fd45..616edd44eb 100644
--- a/doc/langref.html.in
+++ b/doc/langref.html.in
@@ -885,6 +885,12 @@ const hex_int = 0xff;
const another_hex_int = 0xFF;
const octal_int = 0o755;
const binary_int = 0b11110000;
+
+// underscores may be placed between two digits as a visual separator
+const one_billion = 1_000_000_000;
+const binary_mask = 0b1_1111_1111;
+const permissions = 0o7_5_5;
+const big_address = 0xFF80_0000_0000_0000;
{#code_end#}
{#header_close#}
{#header_open|Runtime Integer Values#}
@@ -947,6 +953,11 @@ const yet_another = 123.0e+77;
const hex_floating_point = 0x103.70p-5;
const another_hex_float = 0x103.70;
const yet_another_hex_float = 0x103.70P-5;
+
+// underscores may be placed between two digits as a visual separator
+const lightspeed = 299_792_458.000_000;
+const nanosecond = 0.000_000_001;
+const more_hex = 0x1234_5678.9ABC_CDEFp-10;
{#code_end#}
There is no syntax for NaN, infinity, or negative infinity. For these special values,
diff --git a/lib/std/special/compiler_rt/floatundisf.zig b/lib/std/special/compiler_rt/floatundisf.zig
index 41ff02daee..ff242721d6 100644
--- a/lib/std/special/compiler_rt/floatundisf.zig
+++ b/lib/std/special/compiler_rt/floatundisf.zig
@@ -69,23 +69,23 @@ test "floatundisf" {
test__floatundisf(0, 0.0);
test__floatundisf(1, 1.0);
test__floatundisf(2, 2.0);
- test__floatundisf(0x7FFFFF8000000000, 0x1.FFFFFEp+62F);
- test__floatundisf(0x7FFFFF0000000000, 0x1.FFFFFCp+62F);
- test__floatundisf(0x8000008000000000, 0x1p+63F);
- test__floatundisf(0x8000010000000000, 0x1.000002p+63F);
- test__floatundisf(0x8000000000000000, 0x1p+63F);
- test__floatundisf(0x8000000000000001, 0x1p+63F);
- test__floatundisf(0xFFFFFFFFFFFFFFFE, 0x1p+64F);
- test__floatundisf(0xFFFFFFFFFFFFFFFF, 0x1p+64F);
- test__floatundisf(0x0007FB72E8000000, 0x1.FEDCBAp+50F);
- test__floatundisf(0x0007FB72EA000000, 0x1.FEDCBAp+50F);
- test__floatundisf(0x0007FB72EB000000, 0x1.FEDCBAp+50F);
- test__floatundisf(0x0007FB72EBFFFFFF, 0x1.FEDCBAp+50F);
- test__floatundisf(0x0007FB72EC000000, 0x1.FEDCBCp+50F);
- test__floatundisf(0x0007FB72E8000001, 0x1.FEDCBAp+50F);
- test__floatundisf(0x0007FB72E6000000, 0x1.FEDCBAp+50F);
- test__floatundisf(0x0007FB72E7000000, 0x1.FEDCBAp+50F);
- test__floatundisf(0x0007FB72E7FFFFFF, 0x1.FEDCBAp+50F);
- test__floatundisf(0x0007FB72E4000001, 0x1.FEDCBAp+50F);
- test__floatundisf(0x0007FB72E4000000, 0x1.FEDCB8p+50F);
+ test__floatundisf(0x7FFFFF8000000000, 0x1.FFFFFEp+62);
+ test__floatundisf(0x7FFFFF0000000000, 0x1.FFFFFCp+62);
+ test__floatundisf(0x8000008000000000, 0x1p+63);
+ test__floatundisf(0x8000010000000000, 0x1.000002p+63);
+ test__floatundisf(0x8000000000000000, 0x1p+63);
+ test__floatundisf(0x8000000000000001, 0x1p+63);
+ test__floatundisf(0xFFFFFFFFFFFFFFFE, 0x1p+64);
+ test__floatundisf(0xFFFFFFFFFFFFFFFF, 0x1p+64);
+ test__floatundisf(0x0007FB72E8000000, 0x1.FEDCBAp+50);
+ test__floatundisf(0x0007FB72EA000000, 0x1.FEDCBAp+50);
+ test__floatundisf(0x0007FB72EB000000, 0x1.FEDCBAp+50);
+ test__floatundisf(0x0007FB72EBFFFFFF, 0x1.FEDCBAp+50);
+ test__floatundisf(0x0007FB72EC000000, 0x1.FEDCBCp+50);
+ test__floatundisf(0x0007FB72E8000001, 0x1.FEDCBAp+50);
+ test__floatundisf(0x0007FB72E6000000, 0x1.FEDCBAp+50);
+ test__floatundisf(0x0007FB72E7000000, 0x1.FEDCBAp+50);
+ test__floatundisf(0x0007FB72E7FFFFFF, 0x1.FEDCBAp+50);
+ test__floatundisf(0x0007FB72E4000001, 0x1.FEDCBAp+50);
+ test__floatundisf(0x0007FB72E4000000, 0x1.FEDCB8p+50);
}
diff --git a/src/parse_f128.c b/src/parse_f128.c
index cffb3796b4..9b5c287a3c 100644
--- a/src/parse_f128.c
+++ b/src/parse_f128.c
@@ -165,22 +165,36 @@ static long long scanexp(struct MuslFILE *f, int pok)
int x;
long long y;
int neg = 0;
-
+
c = shgetc(f);
if (c=='+' || c=='-') {
neg = (c=='-');
c = shgetc(f);
if (c-'0'>=10U && pok) shunget(f);
}
- if (c-'0'>=10U) {
+ if (c-'0'>=10U && c!='_') {
shunget(f);
return LLONG_MIN;
}
- for (x=0; c-'0'<10U && xdata.int_lit.bigint, 0);
- bigint_init_unsigned(&t.specified_exponent, 0);
break;
case DIGIT_NON_ZERO:
t.state = TokenizeStateNumber;
begin_token(&t, TokenIdIntLiteral);
+ t.is_trailing_underscore = false;
t.radix = 10;
- t.exp_add_amt = 1;
- t.exponent_in_bin_or_dec = 0;
bigint_init_unsigned(&t.cur_tok->data.int_lit.bigint, get_digit_value(c));
- bigint_init_unsigned(&t.specified_exponent, 0);
break;
case '"':
begin_token(&t, TokenIdStringLiteral);
@@ -1189,17 +1184,15 @@ void tokenize(Buf *buf, Tokenization *out) {
switch (c) {
case 'b':
t.radix = 2;
- t.state = TokenizeStateNumber;
+ t.state = TokenizeStateNumberNoUnderscore;
break;
case 'o':
t.radix = 8;
- t.exp_add_amt = 3;
- t.state = TokenizeStateNumber;
+ t.state = TokenizeStateNumberNoUnderscore;
break;
case 'x':
t.radix = 16;
- t.exp_add_amt = 4;
- t.state = TokenizeStateNumber;
+ t.state = TokenizeStateNumberNoUnderscore;
break;
default:
// reinterpret as normal number
@@ -1208,9 +1201,27 @@ void tokenize(Buf *buf, Tokenization *out) {
continue;
}
break;
+ case TokenizeStateNumberNoUnderscore:
+ if (c == '_') {
+ invalid_char_error(&t, c);
+ break;
+ } else if (get_digit_value(c) < t.radix) {
+ t.is_trailing_underscore = false;
+ t.state = TokenizeStateNumber;
+ }
+ // fall through
case TokenizeStateNumber:
{
+ if (c == '_') {
+ t.is_trailing_underscore = true;
+ t.state = TokenizeStateNumberNoUnderscore;
+ break;
+ }
if (c == '.') {
+ if (t.is_trailing_underscore) {
+ invalid_char_error(&t, c);
+ break;
+ }
if (t.radix != 16 && t.radix != 10) {
invalid_char_error(&t, c);
}
@@ -1222,13 +1233,18 @@ void tokenize(Buf *buf, Tokenization *out) {
invalid_char_error(&t, c);
}
t.state = TokenizeStateFloatExponentUnsigned;
+ t.radix = 10; // exponent is always base 10
assert(t.cur_tok->id == TokenIdIntLiteral);
- bigint_init_bigint(&t.significand, &t.cur_tok->data.int_lit.bigint);
set_token_id(&t, t.cur_tok, TokenIdFloatLiteral);
break;
}
uint32_t digit_value = get_digit_value(c);
if (digit_value >= t.radix) {
+ if (t.is_trailing_underscore) {
+ invalid_char_error(&t, c);
+ break;
+ }
+
if (is_symbol_char(c)) {
invalid_char_error(&t, c);
}
@@ -1259,20 +1275,37 @@ void tokenize(Buf *buf, Tokenization *out) {
continue;
}
t.pos -= 1;
- t.state = TokenizeStateFloatFraction;
+ t.state = TokenizeStateFloatFractionNoUnderscore;
assert(t.cur_tok->id == TokenIdIntLiteral);
- bigint_init_bigint(&t.significand, &t.cur_tok->data.int_lit.bigint);
set_token_id(&t, t.cur_tok, TokenIdFloatLiteral);
continue;
}
+ case TokenizeStateFloatFractionNoUnderscore:
+ if (c == '_') {
+ invalid_char_error(&t, c);
+ } else if (get_digit_value(c) < t.radix) {
+ t.is_trailing_underscore = false;
+ t.state = TokenizeStateFloatFraction;
+ }
+ // fall through
case TokenizeStateFloatFraction:
{
+ if (c == '_') {
+ t.is_trailing_underscore = true;
+ t.state = TokenizeStateFloatFractionNoUnderscore;
+ break;
+ }
if (is_exponent_signifier(c, t.radix)) {
t.state = TokenizeStateFloatExponentUnsigned;
+ t.radix = 10; // exponent is always base 10
break;
}
uint32_t digit_value = get_digit_value(c);
if (digit_value >= t.radix) {
+ if (t.is_trailing_underscore) {
+ invalid_char_error(&t, c);
+ break;
+ }
if (is_symbol_char(c)) {
invalid_char_error(&t, c);
}
@@ -1282,46 +1315,47 @@ void tokenize(Buf *buf, Tokenization *out) {
t.state = TokenizeStateStart;
continue;
}
- t.exponent_in_bin_or_dec -= t.exp_add_amt;
- if (t.radix == 10) {
- // For now we use strtod to parse decimal floats, so we just have to get to the
- // end of the token.
- break;
- }
- BigInt digit_value_bi;
- bigint_init_unsigned(&digit_value_bi, digit_value);
- BigInt radix_bi;
- bigint_init_unsigned(&radix_bi, t.radix);
-
- BigInt multiplied;
- bigint_mul(&multiplied, &t.significand, &radix_bi);
-
- bigint_add(&t.significand, &multiplied, &digit_value_bi);
- break;
+ // we use parse_f128 to generate the float literal, so just
+ // need to get to the end of the token
}
+ break;
case TokenizeStateFloatExponentUnsigned:
switch (c) {
case '+':
- t.is_exp_negative = false;
- t.state = TokenizeStateFloatExponentNumber;
+ t.state = TokenizeStateFloatExponentNumberNoUnderscore;
break;
case '-':
- t.is_exp_negative = true;
- t.state = TokenizeStateFloatExponentNumber;
+ t.state = TokenizeStateFloatExponentNumberNoUnderscore;
break;
default:
// reinterpret as normal exponent number
t.pos -= 1;
- t.is_exp_negative = false;
- t.state = TokenizeStateFloatExponentNumber;
+ t.state = TokenizeStateFloatExponentNumberNoUnderscore;
continue;
}
break;
+ case TokenizeStateFloatExponentNumberNoUnderscore:
+ if (c == '_') {
+ invalid_char_error(&t, c);
+ } else if (get_digit_value(c) < t.radix) {
+ t.is_trailing_underscore = false;
+ t.state = TokenizeStateFloatExponentNumber;
+ }
+ // fall through
case TokenizeStateFloatExponentNumber:
{
+ if (c == '_') {
+ t.is_trailing_underscore = true;
+ t.state = TokenizeStateFloatExponentNumberNoUnderscore;
+ break;
+ }
uint32_t digit_value = get_digit_value(c);
if (digit_value >= t.radix) {
+ if (t.is_trailing_underscore) {
+ invalid_char_error(&t, c);
+ break;
+ }
if (is_symbol_char(c)) {
invalid_char_error(&t, c);
}
@@ -1331,21 +1365,9 @@ void tokenize(Buf *buf, Tokenization *out) {
t.state = TokenizeStateStart;
continue;
}
- if (t.radix == 10) {
- // For now we use strtod to parse decimal floats, so we just have to get to the
- // end of the token.
- break;
- }
- BigInt digit_value_bi;
- bigint_init_unsigned(&digit_value_bi, digit_value);
- BigInt radix_bi;
- bigint_init_unsigned(&radix_bi, 10);
-
- BigInt multiplied;
- bigint_mul(&multiplied, &t.specified_exponent, &radix_bi);
-
- bigint_add(&t.specified_exponent, &multiplied, &digit_value_bi);
+ // we use parse_f128 to generate the float literal, so just
+ // need to get to the end of the token
}
break;
case TokenizeStateSawDash:
@@ -1399,6 +1421,9 @@ void tokenize(Buf *buf, Tokenization *out) {
case TokenizeStateStart:
case TokenizeStateError:
break;
+ case TokenizeStateNumberNoUnderscore:
+ case TokenizeStateFloatFractionNoUnderscore:
+ case TokenizeStateFloatExponentNumberNoUnderscore:
case TokenizeStateNumberDot:
tokenize_error(&t, "unterminated number literal");
break;
diff --git a/test/compile_errors.zig b/test/compile_errors.zig
index f894a152a7..83fe1def62 100644
--- a/test/compile_errors.zig
+++ b/test/compile_errors.zig
@@ -389,6 +389,102 @@ pub fn addCases(cases: *tests.CompileErrorContext) void {
"tmp.zig:5:29: error: invalid token: '.'",
});
+ cases.add("invalid underscore placement in float literal - 1",
+ \\fn main() void {
+ \\ var bad: f128 = 0._0;
+ \\})
+ , &[_][]const u8{
+ "tmp.zig:2:23: error: invalid character: '_'",
+ });
+
+ cases.add("invalid underscore placement in float literal - 2",
+ \\fn main() void {
+ \\ var bad: f128 = 0_.0;
+ \\})
+ , &[_][]const u8{
+ "tmp.zig:2:23: error: invalid character: '.'",
+ });
+
+ cases.add("invalid underscore placement in float literal - 3",
+ \\fn main() void {
+ \\ var bad: f128 = 0.0_;
+ \\})
+ , &[_][]const u8{
+ "tmp.zig:2:25: error: invalid character: ';'",
+ });
+
+ cases.add("invalid underscore placement in float literal - 4",
+ \\fn main() void {
+ \\ var bad: f128 = 1.0e_1;
+ \\})
+ , &[_][]const u8{
+ "tmp.zig:2:25: error: invalid character: '_'",
+ });
+
+ cases.add("invalid underscore placement in float literal - 5",
+ \\fn main() void {
+ \\ var bad: f128 = 1.0e+_1;
+ \\})
+ , &[_][]const u8{
+ "tmp.zig:2:26: error: invalid character: '_'",
+ });
+
+ cases.add("invalid underscore placement in float literal - 6",
+ \\fn main() void {
+ \\ var bad: f128 = 1.0e-_1;
+ \\})
+ , &[_][]const u8{
+ "tmp.zig:2:26: error: invalid character: '_'",
+ });
+
+ cases.add("invalid underscore placement in float literal - 7",
+ \\fn main() void {
+ \\ var bad: f128 = 1.0e-1_;
+ \\})
+ , &[_][]const u8{
+ "tmp.zig:2:28: error: invalid character: ';'",
+ });
+
+ cases.add("invalid underscore placement in float literal - 9",
+ \\fn main() void {
+ \\ var bad: f128 = 1__0.0e-1;
+ \\})
+ , &[_][]const u8{
+ "tmp.zig:2:23: error: invalid character: '_'",
+ });
+
+ cases.add("invalid underscore placement in float literal - 10",
+ \\fn main() void {
+ \\ var bad: f128 = 1.0__0e-1;
+ \\})
+ , &[_][]const u8{
+ "tmp.zig:2:25: error: invalid character: '_'",
+ });
+
+ cases.add("invalid underscore placement in float literal - 11",
+ \\fn main() void {
+ \\ var bad: f128 = 1.0e-1__0;
+ \\})
+ , &[_][]const u8{
+ "tmp.zig:2:28: error: invalid character: '_'",
+ });
+
+ cases.add("invalid underscore placement in float literal - 12",
+ \\fn main() void {
+ \\ var bad: f128 = 0_x0.0;
+ \\})
+ , &[_][]const u8{
+ "tmp.zig:2:23: error: invalid character: 'x'",
+ });
+
+ cases.add("invalid underscore placement in float literal - 13",
+ \\fn main() void {
+ \\ var bad: f128 = 0x_0.0;
+ \\})
+ , &[_][]const u8{
+ "tmp.zig:2:23: error: invalid character: '_'",
+ });
+
cases.add("var args without c calling conv",
\\fn foo(args: ...) void {}
\\comptime {
diff --git a/test/stage1/behavior/math.zig b/test/stage1/behavior/math.zig
index fb70fb7e44..b342597acf 100644
--- a/test/stage1/behavior/math.zig
+++ b/test/stage1/behavior/math.zig
@@ -411,6 +411,34 @@ test "quad hex float literal parsing accurate" {
comptime S.doTheTest();
}
+test "underscore separator parsing" {
+ expect(0_0_0_0 == 0);
+ expect(1_234_567 == 1234567);
+ expect(001_234_567 == 1234567);
+ expect(0_0_1_2_3_4_5_6_7 == 1234567);
+
+ expect(0b0_0_0_0 == 0);
+ expect(0b1010_1010 == 0b10101010);
+ expect(0b0000_1010_1010 == 0b10101010);
+ expect(0b1_0_1_0_1_0_1_0 == 0b10101010);
+
+ expect(0o0_0_0_0 == 0);
+ expect(0o1010_1010 == 0o10101010);
+ expect(0o0000_1010_1010 == 0o10101010);
+ expect(0o1_0_1_0_1_0_1_0 == 0o10101010);
+
+ expect(0x0_0_0_0 == 0);
+ expect(0x1010_1010 == 0x10101010);
+ expect(0x0000_1010_1010 == 0x10101010);
+ expect(0x1_0_1_0_1_0_1_0 == 0x10101010);
+
+ expect(123_456.789_000e1_0 == 123456.789000e10);
+ expect(0_1_2_3_4_5_6.7_8_9_0_0_0e0_0_1_0 == 123456.789000e10);
+
+ expect(0x1234_5678.9ABC_DEF0p-1_0 == 0x12345678.9ABCDEF0p-10);
+ expect(0x1_2_3_4_5_6_7_8.9_A_B_C_D_E_F_0p-0_0_0_1_0 == 0x12345678.9ABCDEF0p-10);
+}
+
test "hex float literal within range" {
const a = 0x1.0p16383;
const b = 0x0.1p16387;
From 925f71085269a8072ea9c49025a534fa47fbec44 Mon Sep 17 00:00:00 2001
From: momumi <57862114+momumi@users.noreply.github.com>
Date: Sun, 15 Mar 2020 13:05:24 +1000
Subject: [PATCH 2/7] make parsing `0.0_e1` an error
---
src/tokenizer.cpp | 8 ++++++++
test/compile_errors.zig | 8 ++++++++
2 files changed, 16 insertions(+)
diff --git a/src/tokenizer.cpp b/src/tokenizer.cpp
index 73efae2037..22d63568bf 100644
--- a/src/tokenizer.cpp
+++ b/src/tokenizer.cpp
@@ -1229,6 +1229,10 @@ void tokenize(Buf *buf, Tokenization *out) {
break;
}
if (is_exponent_signifier(c, t.radix)) {
+ if (t.is_trailing_underscore) {
+ invalid_char_error(&t, c);
+ break;
+ }
if (t.radix != 16 && t.radix != 10) {
invalid_char_error(&t, c);
}
@@ -1296,6 +1300,10 @@ void tokenize(Buf *buf, Tokenization *out) {
break;
}
if (is_exponent_signifier(c, t.radix)) {
+ if (t.is_trailing_underscore) {
+ invalid_char_error(&t, c);
+ break;
+ }
t.state = TokenizeStateFloatExponentUnsigned;
t.radix = 10; // exponent is always base 10
break;
diff --git a/test/compile_errors.zig b/test/compile_errors.zig
index 83fe1def62..2b40fec106 100644
--- a/test/compile_errors.zig
+++ b/test/compile_errors.zig
@@ -485,6 +485,14 @@ pub fn addCases(cases: *tests.CompileErrorContext) void {
"tmp.zig:2:23: error: invalid character: '_'",
});
+ cases.add("invalid underscore placement in float literal - 14",
+ \\fn main() void {
+ \\ var bad: f128 = 0x0.0_p1;
+ \\})
+ , &[_][]const u8{
+ "tmp.zig:2:27: error: invalid character: 'p'",
+ });
+
cases.add("var args without c calling conv",
\\fn foo(args: ...) void {}
\\comptime {
From 47f7e6658077fb4f55c36e9c62ed5012ef8aace2 Mon Sep 17 00:00:00 2001
From: momumi <57862114+momumi@users.noreply.github.com>
Date: Sun, 15 Mar 2020 23:42:29 +1000
Subject: [PATCH 3/7] add more test cases for invalid number literals
---
test/compile_errors.zig | 76 +++++++++++++++++++++++++++++++++--------
1 file changed, 62 insertions(+), 14 deletions(-)
diff --git a/test/compile_errors.zig b/test/compile_errors.zig
index 2b40fec106..73bf60216e 100644
--- a/test/compile_errors.zig
+++ b/test/compile_errors.zig
@@ -384,15 +384,31 @@ pub fn addCases(cases: *tests.CompileErrorContext) void {
\\ var bad_float :f32 = 0.0;
\\ bad_float = bad_float + .20;
\\ std.debug.assert(bad_float < 1.0);
- \\})
+ \\}
, &[_][]const u8{
"tmp.zig:5:29: error: invalid token: '.'",
});
+ cases.add("invalid exponent in float literal - 1",
+ \\fn main() void {
+ \\ var bad: f128 = 0x1.0p1ab1;
+ \\}
+ , &[_][]const u8{
+ "tmp.zig:2:28: error: invalid character: 'a'",
+ });
+
+ cases.add("invalid exponent in float literal - 2",
+ \\fn main() void {
+ \\ var bad: f128 = 0x1.0p50F;
+ \\}
+ , &[_][]const u8{
+ "tmp.zig:2:29: error: invalid character: 'F'",
+ });
+
cases.add("invalid underscore placement in float literal - 1",
\\fn main() void {
\\ var bad: f128 = 0._0;
- \\})
+ \\}
, &[_][]const u8{
"tmp.zig:2:23: error: invalid character: '_'",
});
@@ -400,7 +416,7 @@ pub fn addCases(cases: *tests.CompileErrorContext) void {
cases.add("invalid underscore placement in float literal - 2",
\\fn main() void {
\\ var bad: f128 = 0_.0;
- \\})
+ \\}
, &[_][]const u8{
"tmp.zig:2:23: error: invalid character: '.'",
});
@@ -408,7 +424,7 @@ pub fn addCases(cases: *tests.CompileErrorContext) void {
cases.add("invalid underscore placement in float literal - 3",
\\fn main() void {
\\ var bad: f128 = 0.0_;
- \\})
+ \\}
, &[_][]const u8{
"tmp.zig:2:25: error: invalid character: ';'",
});
@@ -416,7 +432,7 @@ pub fn addCases(cases: *tests.CompileErrorContext) void {
cases.add("invalid underscore placement in float literal - 4",
\\fn main() void {
\\ var bad: f128 = 1.0e_1;
- \\})
+ \\}
, &[_][]const u8{
"tmp.zig:2:25: error: invalid character: '_'",
});
@@ -424,7 +440,7 @@ pub fn addCases(cases: *tests.CompileErrorContext) void {
cases.add("invalid underscore placement in float literal - 5",
\\fn main() void {
\\ var bad: f128 = 1.0e+_1;
- \\})
+ \\}
, &[_][]const u8{
"tmp.zig:2:26: error: invalid character: '_'",
});
@@ -432,7 +448,7 @@ pub fn addCases(cases: *tests.CompileErrorContext) void {
cases.add("invalid underscore placement in float literal - 6",
\\fn main() void {
\\ var bad: f128 = 1.0e-_1;
- \\})
+ \\}
, &[_][]const u8{
"tmp.zig:2:26: error: invalid character: '_'",
});
@@ -440,7 +456,7 @@ pub fn addCases(cases: *tests.CompileErrorContext) void {
cases.add("invalid underscore placement in float literal - 7",
\\fn main() void {
\\ var bad: f128 = 1.0e-1_;
- \\})
+ \\}
, &[_][]const u8{
"tmp.zig:2:28: error: invalid character: ';'",
});
@@ -448,7 +464,7 @@ pub fn addCases(cases: *tests.CompileErrorContext) void {
cases.add("invalid underscore placement in float literal - 9",
\\fn main() void {
\\ var bad: f128 = 1__0.0e-1;
- \\})
+ \\}
, &[_][]const u8{
"tmp.zig:2:23: error: invalid character: '_'",
});
@@ -456,7 +472,7 @@ pub fn addCases(cases: *tests.CompileErrorContext) void {
cases.add("invalid underscore placement in float literal - 10",
\\fn main() void {
\\ var bad: f128 = 1.0__0e-1;
- \\})
+ \\}
, &[_][]const u8{
"tmp.zig:2:25: error: invalid character: '_'",
});
@@ -464,7 +480,7 @@ pub fn addCases(cases: *tests.CompileErrorContext) void {
cases.add("invalid underscore placement in float literal - 11",
\\fn main() void {
\\ var bad: f128 = 1.0e-1__0;
- \\})
+ \\}
, &[_][]const u8{
"tmp.zig:2:28: error: invalid character: '_'",
});
@@ -472,7 +488,7 @@ pub fn addCases(cases: *tests.CompileErrorContext) void {
cases.add("invalid underscore placement in float literal - 12",
\\fn main() void {
\\ var bad: f128 = 0_x0.0;
- \\})
+ \\}
, &[_][]const u8{
"tmp.zig:2:23: error: invalid character: 'x'",
});
@@ -480,7 +496,7 @@ pub fn addCases(cases: *tests.CompileErrorContext) void {
cases.add("invalid underscore placement in float literal - 13",
\\fn main() void {
\\ var bad: f128 = 0x_0.0;
- \\})
+ \\}
, &[_][]const u8{
"tmp.zig:2:23: error: invalid character: '_'",
});
@@ -488,11 +504,43 @@ pub fn addCases(cases: *tests.CompileErrorContext) void {
cases.add("invalid underscore placement in float literal - 14",
\\fn main() void {
\\ var bad: f128 = 0x0.0_p1;
- \\})
+ \\}
, &[_][]const u8{
"tmp.zig:2:27: error: invalid character: 'p'",
});
+ cases.add("invalid underscore placement in int literal - 1",
+ \\fn main() void {
+ \\ var bad: u128 = 0010_;
+ \\}
+ , &[_][]const u8{
+ "tmp.zig:2:26: error: invalid character: ';'",
+ });
+
+ cases.add("invalid underscore placement in int literal - 2",
+ \\fn main() void {
+ \\ var bad: u128 = 0b0010_;
+ \\}
+ , &[_][]const u8{
+ "tmp.zig:2:28: error: invalid character: ';'",
+ });
+
+ cases.add("invalid underscore placement in int literal - 3",
+ \\fn main() void {
+ \\ var bad: u128 = 0o0010_;
+ \\}
+ , &[_][]const u8{
+ "tmp.zig:2:28: error: invalid character: ';'",
+ });
+
+ cases.add("invalid underscore placement in int literal - 4",
+ \\fn main() void {
+ \\ var bad: u128 = 0x0010_;
+ \\}
+ , &[_][]const u8{
+ "tmp.zig:2:28: error: invalid character: ';'",
+ });
+
cases.add("var args without c calling conv",
\\fn foo(args: ...) void {}
\\comptime {
From 138dab45248c718b8d38fa0a8eefe56f40fe617c Mon Sep 17 00:00:00 2001
From: momumi <57862114+momumi@users.noreply.github.com>
Date: Sun, 22 Mar 2020 10:35:19 +1000
Subject: [PATCH 4/7] add number `_` separators for stage 2 tokenizer
---
lib/std/zig/tokenizer.zig | 419 +++++++++++++++++++++++++++++++++-----
1 file changed, 370 insertions(+), 49 deletions(-)
diff --git a/lib/std/zig/tokenizer.zig b/lib/std/zig/tokenizer.zig
index f6c71479e7..8c9b39d34f 100644
--- a/lib/std/zig/tokenizer.zig
+++ b/lib/std/zig/tokenizer.zig
@@ -387,17 +387,23 @@ pub const Tokenizer = struct {
DocComment,
ContainerDocComment,
Zero,
- IntegerLiteral,
- IntegerLiteralWithRadix,
- IntegerLiteralWithRadixHex,
- NumberDot,
+ IntegerLiteralDec,
+ IntegerLiteralDecNoUnderscore,
+ IntegerLiteralBin,
+ IntegerLiteralBinNoUnderscore,
+ IntegerLiteralOct,
+ IntegerLiteralOctNoUnderscore,
+ IntegerLiteralHex,
+ IntegerLiteralHexNoUnderscore,
+ NumberDotDec,
NumberDotHex,
- FloatFraction,
+ FloatFractionDec,
+ FloatFractionDecNoUnderscore,
FloatFractionHex,
+ FloatFractionHexNoUnderscore,
FloatExponentUnsigned,
- FloatExponentUnsignedHex,
FloatExponentNumber,
- FloatExponentNumberHex,
+ FloatExponentNumberNoUnderscore,
Ampersand,
Caret,
Percent,
@@ -550,7 +556,7 @@ pub const Tokenizer = struct {
result.id = Token.Id.IntegerLiteral;
},
'1'...'9' => {
- state = State.IntegerLiteral;
+ state = State.IntegerLiteralDec;
result.id = Token.Id.IntegerLiteral;
},
else => {
@@ -1048,55 +1054,122 @@ pub const Tokenizer = struct {
else => self.checkLiteralCharacter(),
},
State.Zero => switch (c) {
- 'b', 'o' => {
- state = State.IntegerLiteralWithRadix;
+ 'b' => {
+ state = State.IntegerLiteralBinNoUnderscore;
+ },
+ 'o' => {
+ state = State.IntegerLiteralOctNoUnderscore;
},
'x' => {
- state = State.IntegerLiteralWithRadixHex;
+ state = State.IntegerLiteralHexNoUnderscore;
},
else => {
// reinterpret as a normal number
self.index -= 1;
- state = State.IntegerLiteral;
+ state = State.IntegerLiteralDec;
},
},
- State.IntegerLiteral => switch (c) {
- '.' => {
- state = State.NumberDot;
+ State.IntegerLiteralBinNoUnderscore => switch (c) {
+ '0'...'1' => {
+ state = State.IntegerLiteralBin;
},
- 'p', 'P', 'e', 'E' => {
+ else => {
+ result.id = Token.Id.Invalid;
+ },
+ },
+ State.IntegerLiteralBin => switch (c) {
+ '_' => {
+ state = State.IntegerLiteralBinNoUnderscore;
+ },
+ '0'...'1' => {},
+ '2'...'9', 'a'...'z', 'A'...'Z' => {
+ result.id = Token.Id.Invalid;
+ },
+ else => break,
+ },
+ State.IntegerLiteralOctNoUnderscore => switch (c) {
+ '0'...'7' => {
+ state = State.IntegerLiteralOct;
+ },
+ else => {
+ result.id = Token.Id.Invalid;
+ },
+ },
+ State.IntegerLiteralOct => switch (c) {
+ '_' => {
+ state = State.IntegerLiteralOctNoUnderscore;
+ },
+ '0'...'7' => {},
+ '8'...'9', 'a'...'z', 'A'...'Z' => {
+ result.id = Token.Id.Invalid;
+ },
+ else => break,
+ },
+ State.IntegerLiteralDecNoUnderscore => switch (c) {
+ '0'...'9' => {
+ state = State.IntegerLiteralDec;
+ },
+ else => {
+ result.id = Token.Id.Invalid;
+ },
+ },
+ State.IntegerLiteralDec => switch (c) {
+ '_' => {
+ state = State.IntegerLiteralDecNoUnderscore;
+ },
+ '.' => {
+ state = State.NumberDotDec;
+ result.id = Token.Id.FloatLiteral;
+ },
+ 'e', 'E' => {
state = State.FloatExponentUnsigned;
+ result.id = Token.Id.FloatLiteral;
},
'0'...'9' => {},
- else => break,
- },
- State.IntegerLiteralWithRadix => switch (c) {
- '.' => {
- state = State.NumberDot;
+ 'a'...'d', 'f'...'z', 'A'...'D', 'F'...'Z' => {
+ result.id = Token.Id.Invalid;
},
- '0'...'9' => {},
else => break,
},
- State.IntegerLiteralWithRadixHex => switch (c) {
+ State.IntegerLiteralHexNoUnderscore => switch (c) {
+ '0'...'9', 'a'...'f', 'A'...'F' => {
+ state = State.IntegerLiteralHex;
+ },
+ else => {
+ result.id = Token.Id.Invalid;
+ },
+ },
+ State.IntegerLiteralHex => switch (c) {
+ '_' => {
+ state = State.IntegerLiteralHexNoUnderscore;
+ },
'.' => {
state = State.NumberDotHex;
+ result.id = Token.Id.FloatLiteral;
},
'p', 'P' => {
- state = State.FloatExponentUnsignedHex;
+ state = State.FloatExponentUnsigned;
+ result.id = Token.Id.FloatLiteral;
},
'0'...'9', 'a'...'f', 'A'...'F' => {},
+ 'g'...'o', 'q'...'z', 'G'...'O', 'Q'...'Z' => {
+ result.id = Token.Id.Invalid;
+ },
else => break,
},
- State.NumberDot => switch (c) {
+ State.NumberDotDec => switch (c) {
'.' => {
self.index -= 1;
state = State.Start;
break;
},
+ 'e', 'E' => {
+ state = State.FloatExponentUnsigned;
+ },
else => {
self.index -= 1;
result.id = Token.Id.FloatLiteral;
- state = State.FloatFraction;
+ state = State.FloatFractionDecNoUnderscore;
},
},
State.NumberDotHex => switch (c) {
@@ -1105,65 +1178,98 @@ pub const Tokenizer = struct {
state = State.Start;
break;
},
+ 'p', 'P' => {
+ state = State.FloatExponentUnsigned;
+ },
else => {
self.index -= 1;
result.id = Token.Id.FloatLiteral;
- state = State.FloatFractionHex;
+ state = State.FloatFractionHexNoUnderscore;
},
},
- State.FloatFraction => switch (c) {
+ State.FloatFractionDecNoUnderscore => switch (c) {
+ '0'...'9' => {
+ state = State.FloatFractionDec;
+ },
+ else => {
+ result.id = Token.Id.Invalid;
+ },
+ },
+ State.FloatFractionDec => switch (c) {
+ '_' => {
+ state = State.FloatFractionDecNoUnderscore;
+ },
'e', 'E' => {
state = State.FloatExponentUnsigned;
},
'0'...'9' => {},
+ 'a'...'d', 'f'...'z', 'A'...'D', 'F'...'Z' => {
+ result.id = Token.Id.Invalid;
+ },
else => break,
},
+ State.FloatFractionHexNoUnderscore => switch (c) {
+ '0'...'9', 'a'...'f', 'A'...'F' => {
+ state = State.FloatFractionHex;
+ },
+ else => {
+ result.id = Token.Id.Invalid;
+ },
+ },
State.FloatFractionHex => switch (c) {
+ '_' => {
+ state = State.FloatFractionHexNoUnderscore;
+ },
'p', 'P' => {
- state = State.FloatExponentUnsignedHex;
+ state = State.FloatExponentUnsigned;
},
'0'...'9', 'a'...'f', 'A'...'F' => {},
+ 'g'...'o', 'q'...'z', 'G'...'O', 'Q'...'Z' => {
+ result.id = Token.Id.Invalid;
+ },
else => break,
},
State.FloatExponentUnsigned => switch (c) {
'+', '-' => {
- state = State.FloatExponentNumber;
+ state = State.FloatExponentNumberNoUnderscore;
},
else => {
// reinterpret as a normal exponent number
self.index -= 1;
- state = State.FloatExponentNumber;
+ state = State.FloatExponentNumberNoUnderscore;
},
},
- State.FloatExponentUnsignedHex => switch (c) {
- '+', '-' => {
- state = State.FloatExponentNumberHex;
+ State.FloatExponentNumberNoUnderscore => switch (c) {
+ '0'...'9' => {
+ state = State.FloatExponentNumber;
},
else => {
- // reinterpret as a normal exponent number
- self.index -= 1;
- state = State.FloatExponentNumberHex;
+ result.id = Token.Id.Invalid;
},
},
State.FloatExponentNumber => switch (c) {
+ '_' => {
+ state = State.FloatExponentNumberNoUnderscore;
+ },
'0'...'9' => {},
- else => break,
- },
- State.FloatExponentNumberHex => switch (c) {
- '0'...'9', 'a'...'f', 'A'...'F' => {},
+ 'a'...'z', 'A'...'Z' => {
+ result.id = Token.Id.Invalid;
+ },
else => break,
},
}
} else if (self.index == self.buffer.len) {
switch (state) {
State.Start,
- State.IntegerLiteral,
- State.IntegerLiteralWithRadix,
- State.IntegerLiteralWithRadixHex,
- State.FloatFraction,
+ State.IntegerLiteralDec,
+ State.IntegerLiteralBin,
+ State.IntegerLiteralOct,
+ State.IntegerLiteralHex,
+ State.NumberDotDec,
+ State.NumberDotHex,
+ State.FloatFractionDec,
State.FloatFractionHex,
State.FloatExponentNumber,
- State.FloatExponentNumberHex,
State.StringLiteral, // find this error later
State.MultilineStringLiteralLine,
State.Builtin,
@@ -1184,10 +1290,14 @@ pub const Tokenizer = struct {
result.id = Token.Id.ContainerDocComment;
},
- State.NumberDot,
- State.NumberDotHex,
+ State.IntegerLiteralDecNoUnderscore,
+ State.IntegerLiteralBinNoUnderscore,
+ State.IntegerLiteralOctNoUnderscore,
+ State.IntegerLiteralHexNoUnderscore,
+ State.FloatFractionDecNoUnderscore,
+ State.FloatFractionHexNoUnderscore,
+ State.FloatExponentNumberNoUnderscore,
State.FloatExponentUnsigned,
- State.FloatExponentUnsignedHex,
State.SawAtSign,
State.Backslash,
State.CharLiteral,
@@ -1585,6 +1695,217 @@ test "correctly parse pointer assignment" {
});
}
+test "tokenizer - number literals decimal" {
+ testTokenize("1", &[_]Token.Id{ .IntegerLiteral });
+ testTokenize("2", &[_]Token.Id{ .IntegerLiteral });
+ testTokenize("3", &[_]Token.Id{ .IntegerLiteral });
+ testTokenize("4", &[_]Token.Id{ .IntegerLiteral });
+ testTokenize("5", &[_]Token.Id{ .IntegerLiteral });
+ testTokenize("6", &[_]Token.Id{ .IntegerLiteral });
+ testTokenize("7", &[_]Token.Id{ .IntegerLiteral });
+ testTokenize("8", &[_]Token.Id{ .IntegerLiteral });
+ testTokenize("9", &[_]Token.Id{ .IntegerLiteral });
+ testTokenize("0a", &[_]Token.Id{ .Invalid });
+ testTokenize("9b", &[_]Token.Id{ .Invalid });
+ testTokenize("1z", &[_]Token.Id{ .Invalid });
+ testTokenize("1z_1", &[_]Token.Id{ .Invalid });
+ testTokenize("9z3", &[_]Token.Id{ .Invalid });
+
+ testTokenize("0_0", &[_]Token.Id{ .IntegerLiteral });
+ testTokenize("0001", &[_]Token.Id{ .IntegerLiteral });
+ testTokenize("01234567890", &[_]Token.Id{ .IntegerLiteral });
+ testTokenize("012_345_6789_0", &[_]Token.Id{ .IntegerLiteral });
+ testTokenize("0_1_2_3_4_5_6_7_8_9_0", &[_]Token.Id{ .IntegerLiteral });
+
+ testTokenize("00_", &[_]Token.Id{ .Invalid });
+ testTokenize("0_0_", &[_]Token.Id{ .Invalid });
+ testTokenize("0__0", &[_]Token.Id{ .Invalid });
+ testTokenize("0_0f", &[_]Token.Id{ .Invalid });
+ testTokenize("0_0_f", &[_]Token.Id{ .Invalid });
+ testTokenize("1_,", &[_]Token.Id{ .Invalid });
+
+ testTokenize("1.", &[_]Token.Id{ .FloatLiteral });
+ testTokenize("0.0", &[_]Token.Id{ .FloatLiteral });
+ testTokenize("1.0", &[_]Token.Id{ .FloatLiteral });
+ testTokenize("10.0", &[_]Token.Id{ .FloatLiteral });
+ testTokenize("1e0", &[_]Token.Id{ .FloatLiteral });
+ testTokenize("1e100", &[_]Token.Id{ .FloatLiteral });
+ testTokenize("1.e100", &[_]Token.Id{ .FloatLiteral });
+ testTokenize("1.0e100", &[_]Token.Id{ .FloatLiteral });
+ testTokenize("1.0e+100", &[_]Token.Id{ .FloatLiteral });
+ testTokenize("1.0e-100", &[_]Token.Id{ .FloatLiteral });
+ testTokenize("1_0_0_0.0_0_0_0_0_1e1_0_0_0", &[_]Token.Id{ .FloatLiteral });
+
+ testTokenize("1e", &[_]Token.Id{ .Invalid });
+ testTokenize("1.0e1f0", &[_]Token.Id{ .Invalid });
+ testTokenize("1.0p100", &[_]Token.Id{ .Invalid });
+ testTokenize("1.0p-100", &[_]Token.Id{ .Invalid, .Minus, .IntegerLiteral });
+ testTokenize("1.0p1f0", &[_]Token.Id{ .Invalid });
+ testTokenize("1.0_,", &[_]Token.Id{ .Invalid });
+ testTokenize("1.0e,", &[_]Token.Id{ .Invalid });
+}
+
+
+test "tokenizer - number literals binary" {
+ testTokenize("0b0", &[_]Token.Id{ .IntegerLiteral });
+ testTokenize("0b1", &[_]Token.Id{ .IntegerLiteral });
+ testTokenize("0b2", &[_]Token.Id{ .Invalid });
+ testTokenize("0b3", &[_]Token.Id{ .Invalid });
+ testTokenize("0b4", &[_]Token.Id{ .Invalid });
+ testTokenize("0b5", &[_]Token.Id{ .Invalid });
+ testTokenize("0b6", &[_]Token.Id{ .Invalid });
+ testTokenize("0b7", &[_]Token.Id{ .Invalid });
+ testTokenize("0b8", &[_]Token.Id{ .Invalid });
+ testTokenize("0b9", &[_]Token.Id{ .Invalid });
+ testTokenize("0ba", &[_]Token.Id{ .Invalid });
+ testTokenize("0bb", &[_]Token.Id{ .Invalid });
+ testTokenize("0bc", &[_]Token.Id{ .Invalid });
+ testTokenize("0bd", &[_]Token.Id{ .Invalid });
+ testTokenize("0be", &[_]Token.Id{ .Invalid });
+ testTokenize("0bf", &[_]Token.Id{ .Invalid });
+ testTokenize("0bz", &[_]Token.Id{ .Invalid });
+
+ testTokenize("0b0000_0000", &[_]Token.Id{ .IntegerLiteral });
+ testTokenize("0b1111_1111", &[_]Token.Id{ .IntegerLiteral });
+ testTokenize("0b10_10_10_10", &[_]Token.Id{ .IntegerLiteral });
+ testTokenize("0b0_1_0_1_0_1_0_1", &[_]Token.Id{ .IntegerLiteral });
+ testTokenize("0b1.", &[_]Token.Id{ .IntegerLiteral, .Period });
+ testTokenize("0b1.0", &[_]Token.Id{ .IntegerLiteral, .Period, .IntegerLiteral });
+
+ testTokenize("0B0", &[_]Token.Id{ .Invalid });
+ testTokenize("0b_", &[_]Token.Id{ .Invalid });
+ testTokenize("0b_0", &[_]Token.Id{ .Invalid });
+ testTokenize("0b1_", &[_]Token.Id{ .Invalid });
+ testTokenize("0b0__1", &[_]Token.Id{ .Invalid });
+ testTokenize("0b0_1_", &[_]Token.Id{ .Invalid });
+ testTokenize("0b1e", &[_]Token.Id{ .Invalid });
+ testTokenize("0b1p", &[_]Token.Id{ .Invalid });
+ testTokenize("0b1e0", &[_]Token.Id{ .Invalid });
+ testTokenize("0b1p0", &[_]Token.Id{ .Invalid });
+ testTokenize("0b1_,", &[_]Token.Id{ .Invalid });
+}
+
+test "tokenizer - number literals octal" {
+ testTokenize("0o0", &[_]Token.Id{ .IntegerLiteral });
+ testTokenize("0o1", &[_]Token.Id{ .IntegerLiteral });
+ testTokenize("0o2", &[_]Token.Id{ .IntegerLiteral });
+ testTokenize("0o3", &[_]Token.Id{ .IntegerLiteral });
+ testTokenize("0o4", &[_]Token.Id{ .IntegerLiteral });
+ testTokenize("0o5", &[_]Token.Id{ .IntegerLiteral });
+ testTokenize("0o6", &[_]Token.Id{ .IntegerLiteral });
+ testTokenize("0o7", &[_]Token.Id{ .IntegerLiteral });
+ testTokenize("0o8", &[_]Token.Id{ .Invalid });
+ testTokenize("0o9", &[_]Token.Id{ .Invalid });
+ testTokenize("0oa", &[_]Token.Id{ .Invalid });
+ testTokenize("0ob", &[_]Token.Id{ .Invalid });
+ testTokenize("0oc", &[_]Token.Id{ .Invalid });
+ testTokenize("0od", &[_]Token.Id{ .Invalid });
+ testTokenize("0oe", &[_]Token.Id{ .Invalid });
+ testTokenize("0of", &[_]Token.Id{ .Invalid });
+ testTokenize("0oz", &[_]Token.Id{ .Invalid });
+
+ testTokenize("0o01234567", &[_]Token.Id{ .IntegerLiteral });
+ testTokenize("0o0123_4567", &[_]Token.Id{ .IntegerLiteral });
+ testTokenize("0o01_23_45_67", &[_]Token.Id{ .IntegerLiteral });
+ testTokenize("0o0_1_2_3_4_5_6_7", &[_]Token.Id{ .IntegerLiteral });
+ testTokenize("0o7.", &[_]Token.Id{ .IntegerLiteral, .Period });
+ testTokenize("0o7.0", &[_]Token.Id{ .IntegerLiteral, .Period, .IntegerLiteral });
+
+ testTokenize("0O0", &[_]Token.Id{ .Invalid });
+ testTokenize("0o_", &[_]Token.Id{ .Invalid });
+ testTokenize("0o_0", &[_]Token.Id{ .Invalid });
+ testTokenize("0o1_", &[_]Token.Id{ .Invalid });
+ testTokenize("0o0__1", &[_]Token.Id{ .Invalid });
+ testTokenize("0o0_1_", &[_]Token.Id{ .Invalid });
+ testTokenize("0o1e", &[_]Token.Id{ .Invalid });
+ testTokenize("0o1p", &[_]Token.Id{ .Invalid });
+ testTokenize("0o1e0", &[_]Token.Id{ .Invalid });
+ testTokenize("0o1p0", &[_]Token.Id{ .Invalid });
+ testTokenize("0o_,", &[_]Token.Id{ .Invalid });
+}
+
+test "tokenizer - number literals hexadeciaml" {
+ testTokenize("0x0", &[_]Token.Id{ .IntegerLiteral });
+ testTokenize("0x1", &[_]Token.Id{ .IntegerLiteral });
+ testTokenize("0x2", &[_]Token.Id{ .IntegerLiteral });
+ testTokenize("0x3", &[_]Token.Id{ .IntegerLiteral });
+ testTokenize("0x4", &[_]Token.Id{ .IntegerLiteral });
+ testTokenize("0x5", &[_]Token.Id{ .IntegerLiteral });
+ testTokenize("0x6", &[_]Token.Id{ .IntegerLiteral });
+ testTokenize("0x7", &[_]Token.Id{ .IntegerLiteral });
+ testTokenize("0x8", &[_]Token.Id{ .IntegerLiteral });
+ testTokenize("0x9", &[_]Token.Id{ .IntegerLiteral });
+ testTokenize("0xa", &[_]Token.Id{ .IntegerLiteral });
+ testTokenize("0xb", &[_]Token.Id{ .IntegerLiteral });
+ testTokenize("0xc", &[_]Token.Id{ .IntegerLiteral });
+ testTokenize("0xd", &[_]Token.Id{ .IntegerLiteral });
+ testTokenize("0xe", &[_]Token.Id{ .IntegerLiteral });
+ testTokenize("0xf", &[_]Token.Id{ .IntegerLiteral });
+ testTokenize("0xA", &[_]Token.Id{ .IntegerLiteral });
+ testTokenize("0xB", &[_]Token.Id{ .IntegerLiteral });
+ testTokenize("0xC", &[_]Token.Id{ .IntegerLiteral });
+ testTokenize("0xD", &[_]Token.Id{ .IntegerLiteral });
+ testTokenize("0xE", &[_]Token.Id{ .IntegerLiteral });
+ testTokenize("0xF", &[_]Token.Id{ .IntegerLiteral });
+ testTokenize("0x0z", &[_]Token.Id{ .Invalid });
+ testTokenize("0xz", &[_]Token.Id{ .Invalid });
+
+ testTokenize("0x0123456789ABCDEF", &[_]Token.Id{ .IntegerLiteral });
+ testTokenize("0x0123_4567_89AB_CDEF", &[_]Token.Id{ .IntegerLiteral });
+ testTokenize("0x01_23_45_67_89AB_CDE_F", &[_]Token.Id{ .IntegerLiteral });
+ testTokenize("0x0_1_2_3_4_5_6_7_8_9_A_B_C_D_E_F", &[_]Token.Id{ .IntegerLiteral });
+
+ testTokenize("0X0", &[_]Token.Id{ .Invalid });
+ testTokenize("0x_", &[_]Token.Id{ .Invalid });
+ testTokenize("0x_1", &[_]Token.Id{ .Invalid });
+ testTokenize("0x1_", &[_]Token.Id{ .Invalid });
+ testTokenize("0x0__1", &[_]Token.Id{ .Invalid });
+ testTokenize("0x0_1_", &[_]Token.Id{ .Invalid });
+ testTokenize("0x_,", &[_]Token.Id{ .Invalid });
+
+ testTokenize("0x1.", &[_]Token.Id{ .FloatLiteral });
+ testTokenize("0x1.0", &[_]Token.Id{ .FloatLiteral });
+ testTokenize("0xF.", &[_]Token.Id{ .FloatLiteral });
+ testTokenize("0xF.0", &[_]Token.Id{ .FloatLiteral });
+ testTokenize("0xF.F", &[_]Token.Id{ .FloatLiteral });
+ testTokenize("0xF.Fp0", &[_]Token.Id{ .FloatLiteral });
+ testTokenize("0xF.FP0", &[_]Token.Id{ .FloatLiteral });
+ testTokenize("0x1p0", &[_]Token.Id{ .FloatLiteral });
+ testTokenize("0xfp0", &[_]Token.Id{ .FloatLiteral });
+
+ testTokenize("0x0123456.789ABCDEF", &[_]Token.Id{ .FloatLiteral });
+ testTokenize("0x0_123_456.789_ABC_DEF", &[_]Token.Id{ .FloatLiteral });
+ testTokenize("0x0_1_2_3_4_5_6.7_8_9_A_B_C_D_E_F", &[_]Token.Id{ .FloatLiteral });
+ testTokenize("0x0.0p0", &[_]Token.Id{ .FloatLiteral });
+ testTokenize("0xff.ffp10", &[_]Token.Id{ .FloatLiteral });
+ testTokenize("0xff.ffP10", &[_]Token.Id{ .FloatLiteral });
+ testTokenize("0xff.p10", &[_]Token.Id{ .FloatLiteral });
+ testTokenize("0xffp10", &[_]Token.Id{ .FloatLiteral });
+ testTokenize("0xff_ff.ff_ffp1_0_0_0", &[_]Token.Id{ .FloatLiteral });
+ testTokenize("0xf_f_f_f.f_f_f_fp+1_000", &[_]Token.Id{ .FloatLiteral });
+ testTokenize("0xf_f_f_f.f_f_f_fp-1_00_0", &[_]Token.Id{ .FloatLiteral });
+
+ testTokenize("0x1e", &[_]Token.Id{ .IntegerLiteral });
+ testTokenize("0x1e0", &[_]Token.Id{ .IntegerLiteral });
+ testTokenize("0x1p", &[_]Token.Id{ .Invalid });
+ testTokenize("0xfp0z1", &[_]Token.Id{ .Invalid });
+ testTokenize("0xff.ffpff", &[_]Token.Id{ .Invalid });
+ testTokenize("0x0_.0", &[_]Token.Id{ .Invalid });
+ testTokenize("0x0._0", &[_]Token.Id{ .Invalid });
+ testTokenize("0x0.0_", &[_]Token.Id{ .Invalid });
+ testTokenize("0x0_p0", &[_]Token.Id{ .Invalid });
+ testTokenize("0x0_.p0", &[_]Token.Id{ .Invalid });
+ testTokenize("0x0._p0", &[_]Token.Id{ .Invalid });
+ testTokenize("0x0.0_p0", &[_]Token.Id{ .Invalid });
+ testTokenize("0x0._0p0", &[_]Token.Id{ .Invalid });
+ testTokenize("0x0.0_p0", &[_]Token.Id{ .Invalid });
+ testTokenize("0x0.0p_0", &[_]Token.Id{ .Invalid });
+ testTokenize("0x0.0p+_0", &[_]Token.Id{ .Invalid });
+ testTokenize("0x0.0p-_0", &[_]Token.Id{ .Invalid });
+ testTokenize("0x0.0p0_", &[_]Token.Id{ .Invalid });
+}
+
+
fn testTokenize(source: []const u8, expected_tokens: []const Token.Id) void {
var tokenizer = Tokenizer.init(source);
for (expected_tokens) |expected_token_id| {
From 29324e6f393011ee010b80f2c88d946f082bef22 Mon Sep 17 00:00:00 2001
From: momumi <57862114+momumi@users.noreply.github.com>
Date: Sun, 22 Mar 2020 12:41:11 +1000
Subject: [PATCH 5/7] fix formatting in tokenizer tests
---
lib/std/zig/tokenizer.zig | 361 +++++++++++++++++++-------------------
1 file changed, 180 insertions(+), 181 deletions(-)
diff --git a/lib/std/zig/tokenizer.zig b/lib/std/zig/tokenizer.zig
index 8c9b39d34f..6beaba0fe5 100644
--- a/lib/std/zig/tokenizer.zig
+++ b/lib/std/zig/tokenizer.zig
@@ -1696,216 +1696,215 @@ test "correctly parse pointer assignment" {
}
test "tokenizer - number literals decimal" {
- testTokenize("1", &[_]Token.Id{ .IntegerLiteral });
- testTokenize("2", &[_]Token.Id{ .IntegerLiteral });
- testTokenize("3", &[_]Token.Id{ .IntegerLiteral });
- testTokenize("4", &[_]Token.Id{ .IntegerLiteral });
- testTokenize("5", &[_]Token.Id{ .IntegerLiteral });
- testTokenize("6", &[_]Token.Id{ .IntegerLiteral });
- testTokenize("7", &[_]Token.Id{ .IntegerLiteral });
- testTokenize("8", &[_]Token.Id{ .IntegerLiteral });
- testTokenize("9", &[_]Token.Id{ .IntegerLiteral });
- testTokenize("0a", &[_]Token.Id{ .Invalid });
- testTokenize("9b", &[_]Token.Id{ .Invalid });
- testTokenize("1z", &[_]Token.Id{ .Invalid });
- testTokenize("1z_1", &[_]Token.Id{ .Invalid });
- testTokenize("9z3", &[_]Token.Id{ .Invalid });
+ testTokenize("0", &[_]Token.Id{.IntegerLiteral});
+ testTokenize("1", &[_]Token.Id{.IntegerLiteral});
+ testTokenize("2", &[_]Token.Id{.IntegerLiteral});
+ testTokenize("3", &[_]Token.Id{.IntegerLiteral});
+ testTokenize("4", &[_]Token.Id{.IntegerLiteral});
+ testTokenize("5", &[_]Token.Id{.IntegerLiteral});
+ testTokenize("6", &[_]Token.Id{.IntegerLiteral});
+ testTokenize("7", &[_]Token.Id{.IntegerLiteral});
+ testTokenize("8", &[_]Token.Id{.IntegerLiteral});
+ testTokenize("9", &[_]Token.Id{.IntegerLiteral});
+ testTokenize("0a", &[_]Token.Id{.Invalid});
+ testTokenize("9b", &[_]Token.Id{.Invalid});
+ testTokenize("1z", &[_]Token.Id{.Invalid});
+ testTokenize("1z_1", &[_]Token.Id{.Invalid});
+ testTokenize("9z3", &[_]Token.Id{.Invalid});
- testTokenize("0_0", &[_]Token.Id{ .IntegerLiteral });
- testTokenize("0001", &[_]Token.Id{ .IntegerLiteral });
- testTokenize("01234567890", &[_]Token.Id{ .IntegerLiteral });
- testTokenize("012_345_6789_0", &[_]Token.Id{ .IntegerLiteral });
- testTokenize("0_1_2_3_4_5_6_7_8_9_0", &[_]Token.Id{ .IntegerLiteral });
+ testTokenize("0_0", &[_]Token.Id{.IntegerLiteral});
+ testTokenize("0001", &[_]Token.Id{.IntegerLiteral});
+ testTokenize("01234567890", &[_]Token.Id{.IntegerLiteral});
+ testTokenize("012_345_6789_0", &[_]Token.Id{.IntegerLiteral});
+ testTokenize("0_1_2_3_4_5_6_7_8_9_0", &[_]Token.Id{.IntegerLiteral});
- testTokenize("00_", &[_]Token.Id{ .Invalid });
- testTokenize("0_0_", &[_]Token.Id{ .Invalid });
- testTokenize("0__0", &[_]Token.Id{ .Invalid });
- testTokenize("0_0f", &[_]Token.Id{ .Invalid });
- testTokenize("0_0_f", &[_]Token.Id{ .Invalid });
- testTokenize("1_,", &[_]Token.Id{ .Invalid });
+ testTokenize("00_", &[_]Token.Id{.Invalid});
+ testTokenize("0_0_", &[_]Token.Id{.Invalid});
+ testTokenize("0__0", &[_]Token.Id{.Invalid});
+ testTokenize("0_0f", &[_]Token.Id{.Invalid});
+ testTokenize("0_0_f", &[_]Token.Id{.Invalid});
+ testTokenize("1_,", &[_]Token.Id{.Invalid});
- testTokenize("1.", &[_]Token.Id{ .FloatLiteral });
- testTokenize("0.0", &[_]Token.Id{ .FloatLiteral });
- testTokenize("1.0", &[_]Token.Id{ .FloatLiteral });
- testTokenize("10.0", &[_]Token.Id{ .FloatLiteral });
- testTokenize("1e0", &[_]Token.Id{ .FloatLiteral });
- testTokenize("1e100", &[_]Token.Id{ .FloatLiteral });
- testTokenize("1.e100", &[_]Token.Id{ .FloatLiteral });
- testTokenize("1.0e100", &[_]Token.Id{ .FloatLiteral });
- testTokenize("1.0e+100", &[_]Token.Id{ .FloatLiteral });
- testTokenize("1.0e-100", &[_]Token.Id{ .FloatLiteral });
- testTokenize("1_0_0_0.0_0_0_0_0_1e1_0_0_0", &[_]Token.Id{ .FloatLiteral });
+ testTokenize("1.", &[_]Token.Id{.FloatLiteral});
+ testTokenize("0.0", &[_]Token.Id{.FloatLiteral});
+ testTokenize("1.0", &[_]Token.Id{.FloatLiteral});
+ testTokenize("10.0", &[_]Token.Id{.FloatLiteral});
+ testTokenize("1e0", &[_]Token.Id{.FloatLiteral});
+ testTokenize("1e100", &[_]Token.Id{.FloatLiteral});
+ testTokenize("1.e100", &[_]Token.Id{.FloatLiteral});
+ testTokenize("1.0e100", &[_]Token.Id{.FloatLiteral});
+ testTokenize("1.0e+100", &[_]Token.Id{.FloatLiteral});
+ testTokenize("1.0e-100", &[_]Token.Id{.FloatLiteral});
+ testTokenize("1_0_0_0.0_0_0_0_0_1e1_0_0_0", &[_]Token.Id{.FloatLiteral});
- testTokenize("1e", &[_]Token.Id{ .Invalid });
- testTokenize("1.0e1f0", &[_]Token.Id{ .Invalid });
- testTokenize("1.0p100", &[_]Token.Id{ .Invalid });
+ testTokenize("1e", &[_]Token.Id{.Invalid});
+ testTokenize("1.0e1f0", &[_]Token.Id{.Invalid});
+ testTokenize("1.0p100", &[_]Token.Id{.Invalid});
testTokenize("1.0p-100", &[_]Token.Id{ .Invalid, .Minus, .IntegerLiteral });
- testTokenize("1.0p1f0", &[_]Token.Id{ .Invalid });
- testTokenize("1.0_,", &[_]Token.Id{ .Invalid });
- testTokenize("1.0e,", &[_]Token.Id{ .Invalid });
+ testTokenize("1.0p1f0", &[_]Token.Id{.Invalid});
+ testTokenize("1.0_,", &[_]Token.Id{.Invalid});
+ testTokenize("1.0e,", &[_]Token.Id{.Invalid});
}
-
test "tokenizer - number literals binary" {
- testTokenize("0b0", &[_]Token.Id{ .IntegerLiteral });
- testTokenize("0b1", &[_]Token.Id{ .IntegerLiteral });
- testTokenize("0b2", &[_]Token.Id{ .Invalid });
- testTokenize("0b3", &[_]Token.Id{ .Invalid });
- testTokenize("0b4", &[_]Token.Id{ .Invalid });
- testTokenize("0b5", &[_]Token.Id{ .Invalid });
- testTokenize("0b6", &[_]Token.Id{ .Invalid });
- testTokenize("0b7", &[_]Token.Id{ .Invalid });
- testTokenize("0b8", &[_]Token.Id{ .Invalid });
- testTokenize("0b9", &[_]Token.Id{ .Invalid });
- testTokenize("0ba", &[_]Token.Id{ .Invalid });
- testTokenize("0bb", &[_]Token.Id{ .Invalid });
- testTokenize("0bc", &[_]Token.Id{ .Invalid });
- testTokenize("0bd", &[_]Token.Id{ .Invalid });
- testTokenize("0be", &[_]Token.Id{ .Invalid });
- testTokenize("0bf", &[_]Token.Id{ .Invalid });
- testTokenize("0bz", &[_]Token.Id{ .Invalid });
+ testTokenize("0b0", &[_]Token.Id{.IntegerLiteral});
+ testTokenize("0b1", &[_]Token.Id{.IntegerLiteral});
+ testTokenize("0b2", &[_]Token.Id{.Invalid});
+ testTokenize("0b3", &[_]Token.Id{.Invalid});
+ testTokenize("0b4", &[_]Token.Id{.Invalid});
+ testTokenize("0b5", &[_]Token.Id{.Invalid});
+ testTokenize("0b6", &[_]Token.Id{.Invalid});
+ testTokenize("0b7", &[_]Token.Id{.Invalid});
+ testTokenize("0b8", &[_]Token.Id{.Invalid});
+ testTokenize("0b9", &[_]Token.Id{.Invalid});
+ testTokenize("0ba", &[_]Token.Id{.Invalid});
+ testTokenize("0bb", &[_]Token.Id{.Invalid});
+ testTokenize("0bc", &[_]Token.Id{.Invalid});
+ testTokenize("0bd", &[_]Token.Id{.Invalid});
+ testTokenize("0be", &[_]Token.Id{.Invalid});
+ testTokenize("0bf", &[_]Token.Id{.Invalid});
+ testTokenize("0bz", &[_]Token.Id{.Invalid});
- testTokenize("0b0000_0000", &[_]Token.Id{ .IntegerLiteral });
- testTokenize("0b1111_1111", &[_]Token.Id{ .IntegerLiteral });
- testTokenize("0b10_10_10_10", &[_]Token.Id{ .IntegerLiteral });
- testTokenize("0b0_1_0_1_0_1_0_1", &[_]Token.Id{ .IntegerLiteral });
+ testTokenize("0b0000_0000", &[_]Token.Id{.IntegerLiteral});
+ testTokenize("0b1111_1111", &[_]Token.Id{.IntegerLiteral});
+ testTokenize("0b10_10_10_10", &[_]Token.Id{.IntegerLiteral});
+ testTokenize("0b0_1_0_1_0_1_0_1", &[_]Token.Id{.IntegerLiteral});
testTokenize("0b1.", &[_]Token.Id{ .IntegerLiteral, .Period });
testTokenize("0b1.0", &[_]Token.Id{ .IntegerLiteral, .Period, .IntegerLiteral });
- testTokenize("0B0", &[_]Token.Id{ .Invalid });
- testTokenize("0b_", &[_]Token.Id{ .Invalid });
- testTokenize("0b_0", &[_]Token.Id{ .Invalid });
- testTokenize("0b1_", &[_]Token.Id{ .Invalid });
- testTokenize("0b0__1", &[_]Token.Id{ .Invalid });
- testTokenize("0b0_1_", &[_]Token.Id{ .Invalid });
- testTokenize("0b1e", &[_]Token.Id{ .Invalid });
- testTokenize("0b1p", &[_]Token.Id{ .Invalid });
- testTokenize("0b1e0", &[_]Token.Id{ .Invalid });
- testTokenize("0b1p0", &[_]Token.Id{ .Invalid });
- testTokenize("0b1_,", &[_]Token.Id{ .Invalid });
+ testTokenize("0B0", &[_]Token.Id{.Invalid});
+ testTokenize("0b_", &[_]Token.Id{.Invalid});
+ testTokenize("0b_0", &[_]Token.Id{.Invalid});
+ testTokenize("0b1_", &[_]Token.Id{.Invalid});
+ testTokenize("0b0__1", &[_]Token.Id{.Invalid});
+ testTokenize("0b0_1_", &[_]Token.Id{.Invalid});
+ testTokenize("0b1e", &[_]Token.Id{.Invalid});
+ testTokenize("0b1p", &[_]Token.Id{.Invalid});
+ testTokenize("0b1e0", &[_]Token.Id{.Invalid});
+ testTokenize("0b1p0", &[_]Token.Id{.Invalid});
+ testTokenize("0b1_,", &[_]Token.Id{.Invalid});
}
test "tokenizer - number literals octal" {
- testTokenize("0o0", &[_]Token.Id{ .IntegerLiteral });
- testTokenize("0o1", &[_]Token.Id{ .IntegerLiteral });
- testTokenize("0o2", &[_]Token.Id{ .IntegerLiteral });
- testTokenize("0o3", &[_]Token.Id{ .IntegerLiteral });
- testTokenize("0o4", &[_]Token.Id{ .IntegerLiteral });
- testTokenize("0o5", &[_]Token.Id{ .IntegerLiteral });
- testTokenize("0o6", &[_]Token.Id{ .IntegerLiteral });
- testTokenize("0o7", &[_]Token.Id{ .IntegerLiteral });
- testTokenize("0o8", &[_]Token.Id{ .Invalid });
- testTokenize("0o9", &[_]Token.Id{ .Invalid });
- testTokenize("0oa", &[_]Token.Id{ .Invalid });
- testTokenize("0ob", &[_]Token.Id{ .Invalid });
- testTokenize("0oc", &[_]Token.Id{ .Invalid });
- testTokenize("0od", &[_]Token.Id{ .Invalid });
- testTokenize("0oe", &[_]Token.Id{ .Invalid });
- testTokenize("0of", &[_]Token.Id{ .Invalid });
- testTokenize("0oz", &[_]Token.Id{ .Invalid });
+ testTokenize("0o0", &[_]Token.Id{.IntegerLiteral});
+ testTokenize("0o1", &[_]Token.Id{.IntegerLiteral});
+ testTokenize("0o2", &[_]Token.Id{.IntegerLiteral});
+ testTokenize("0o3", &[_]Token.Id{.IntegerLiteral});
+ testTokenize("0o4", &[_]Token.Id{.IntegerLiteral});
+ testTokenize("0o5", &[_]Token.Id{.IntegerLiteral});
+ testTokenize("0o6", &[_]Token.Id{.IntegerLiteral});
+ testTokenize("0o7", &[_]Token.Id{.IntegerLiteral});
+ testTokenize("0o8", &[_]Token.Id{.Invalid});
+ testTokenize("0o9", &[_]Token.Id{.Invalid});
+ testTokenize("0oa", &[_]Token.Id{.Invalid});
+ testTokenize("0ob", &[_]Token.Id{.Invalid});
+ testTokenize("0oc", &[_]Token.Id{.Invalid});
+ testTokenize("0od", &[_]Token.Id{.Invalid});
+ testTokenize("0oe", &[_]Token.Id{.Invalid});
+ testTokenize("0of", &[_]Token.Id{.Invalid});
+ testTokenize("0oz", &[_]Token.Id{.Invalid});
- testTokenize("0o01234567", &[_]Token.Id{ .IntegerLiteral });
- testTokenize("0o0123_4567", &[_]Token.Id{ .IntegerLiteral });
- testTokenize("0o01_23_45_67", &[_]Token.Id{ .IntegerLiteral });
- testTokenize("0o0_1_2_3_4_5_6_7", &[_]Token.Id{ .IntegerLiteral });
+ testTokenize("0o01234567", &[_]Token.Id{.IntegerLiteral});
+ testTokenize("0o0123_4567", &[_]Token.Id{.IntegerLiteral});
+ testTokenize("0o01_23_45_67", &[_]Token.Id{.IntegerLiteral});
+ testTokenize("0o0_1_2_3_4_5_6_7", &[_]Token.Id{.IntegerLiteral});
testTokenize("0o7.", &[_]Token.Id{ .IntegerLiteral, .Period });
testTokenize("0o7.0", &[_]Token.Id{ .IntegerLiteral, .Period, .IntegerLiteral });
- testTokenize("0O0", &[_]Token.Id{ .Invalid });
- testTokenize("0o_", &[_]Token.Id{ .Invalid });
- testTokenize("0o_0", &[_]Token.Id{ .Invalid });
- testTokenize("0o1_", &[_]Token.Id{ .Invalid });
- testTokenize("0o0__1", &[_]Token.Id{ .Invalid });
- testTokenize("0o0_1_", &[_]Token.Id{ .Invalid });
- testTokenize("0o1e", &[_]Token.Id{ .Invalid });
- testTokenize("0o1p", &[_]Token.Id{ .Invalid });
- testTokenize("0o1e0", &[_]Token.Id{ .Invalid });
- testTokenize("0o1p0", &[_]Token.Id{ .Invalid });
- testTokenize("0o_,", &[_]Token.Id{ .Invalid });
+ testTokenize("0O0", &[_]Token.Id{.Invalid});
+ testTokenize("0o_", &[_]Token.Id{.Invalid});
+ testTokenize("0o_0", &[_]Token.Id{.Invalid});
+ testTokenize("0o1_", &[_]Token.Id{.Invalid});
+ testTokenize("0o0__1", &[_]Token.Id{.Invalid});
+ testTokenize("0o0_1_", &[_]Token.Id{.Invalid});
+ testTokenize("0o1e", &[_]Token.Id{.Invalid});
+ testTokenize("0o1p", &[_]Token.Id{.Invalid});
+ testTokenize("0o1e0", &[_]Token.Id{.Invalid});
+ testTokenize("0o1p0", &[_]Token.Id{.Invalid});
+ testTokenize("0o_,", &[_]Token.Id{.Invalid});
}
test "tokenizer - number literals hexadeciaml" {
- testTokenize("0x0", &[_]Token.Id{ .IntegerLiteral });
- testTokenize("0x1", &[_]Token.Id{ .IntegerLiteral });
- testTokenize("0x2", &[_]Token.Id{ .IntegerLiteral });
- testTokenize("0x3", &[_]Token.Id{ .IntegerLiteral });
- testTokenize("0x4", &[_]Token.Id{ .IntegerLiteral });
- testTokenize("0x5", &[_]Token.Id{ .IntegerLiteral });
- testTokenize("0x6", &[_]Token.Id{ .IntegerLiteral });
- testTokenize("0x7", &[_]Token.Id{ .IntegerLiteral });
- testTokenize("0x8", &[_]Token.Id{ .IntegerLiteral });
- testTokenize("0x9", &[_]Token.Id{ .IntegerLiteral });
- testTokenize("0xa", &[_]Token.Id{ .IntegerLiteral });
- testTokenize("0xb", &[_]Token.Id{ .IntegerLiteral });
- testTokenize("0xc", &[_]Token.Id{ .IntegerLiteral });
- testTokenize("0xd", &[_]Token.Id{ .IntegerLiteral });
- testTokenize("0xe", &[_]Token.Id{ .IntegerLiteral });
- testTokenize("0xf", &[_]Token.Id{ .IntegerLiteral });
- testTokenize("0xA", &[_]Token.Id{ .IntegerLiteral });
- testTokenize("0xB", &[_]Token.Id{ .IntegerLiteral });
- testTokenize("0xC", &[_]Token.Id{ .IntegerLiteral });
- testTokenize("0xD", &[_]Token.Id{ .IntegerLiteral });
- testTokenize("0xE", &[_]Token.Id{ .IntegerLiteral });
- testTokenize("0xF", &[_]Token.Id{ .IntegerLiteral });
- testTokenize("0x0z", &[_]Token.Id{ .Invalid });
- testTokenize("0xz", &[_]Token.Id{ .Invalid });
+ testTokenize("0x0", &[_]Token.Id{.IntegerLiteral});
+ testTokenize("0x1", &[_]Token.Id{.IntegerLiteral});
+ testTokenize("0x2", &[_]Token.Id{.IntegerLiteral});
+ testTokenize("0x3", &[_]Token.Id{.IntegerLiteral});
+ testTokenize("0x4", &[_]Token.Id{.IntegerLiteral});
+ testTokenize("0x5", &[_]Token.Id{.IntegerLiteral});
+ testTokenize("0x6", &[_]Token.Id{.IntegerLiteral});
+ testTokenize("0x7", &[_]Token.Id{.IntegerLiteral});
+ testTokenize("0x8", &[_]Token.Id{.IntegerLiteral});
+ testTokenize("0x9", &[_]Token.Id{.IntegerLiteral});
+ testTokenize("0xa", &[_]Token.Id{.IntegerLiteral});
+ testTokenize("0xb", &[_]Token.Id{.IntegerLiteral});
+ testTokenize("0xc", &[_]Token.Id{.IntegerLiteral});
+ testTokenize("0xd", &[_]Token.Id{.IntegerLiteral});
+ testTokenize("0xe", &[_]Token.Id{.IntegerLiteral});
+ testTokenize("0xf", &[_]Token.Id{.IntegerLiteral});
+ testTokenize("0xA", &[_]Token.Id{.IntegerLiteral});
+ testTokenize("0xB", &[_]Token.Id{.IntegerLiteral});
+ testTokenize("0xC", &[_]Token.Id{.IntegerLiteral});
+ testTokenize("0xD", &[_]Token.Id{.IntegerLiteral});
+ testTokenize("0xE", &[_]Token.Id{.IntegerLiteral});
+ testTokenize("0xF", &[_]Token.Id{.IntegerLiteral});
+ testTokenize("0x0z", &[_]Token.Id{.Invalid});
+ testTokenize("0xz", &[_]Token.Id{.Invalid});
- testTokenize("0x0123456789ABCDEF", &[_]Token.Id{ .IntegerLiteral });
- testTokenize("0x0123_4567_89AB_CDEF", &[_]Token.Id{ .IntegerLiteral });
- testTokenize("0x01_23_45_67_89AB_CDE_F", &[_]Token.Id{ .IntegerLiteral });
- testTokenize("0x0_1_2_3_4_5_6_7_8_9_A_B_C_D_E_F", &[_]Token.Id{ .IntegerLiteral });
+ testTokenize("0x0123456789ABCDEF", &[_]Token.Id{.IntegerLiteral});
+ testTokenize("0x0123_4567_89AB_CDEF", &[_]Token.Id{.IntegerLiteral});
+ testTokenize("0x01_23_45_67_89AB_CDE_F", &[_]Token.Id{.IntegerLiteral});
+ testTokenize("0x0_1_2_3_4_5_6_7_8_9_A_B_C_D_E_F", &[_]Token.Id{.IntegerLiteral});
- testTokenize("0X0", &[_]Token.Id{ .Invalid });
- testTokenize("0x_", &[_]Token.Id{ .Invalid });
- testTokenize("0x_1", &[_]Token.Id{ .Invalid });
- testTokenize("0x1_", &[_]Token.Id{ .Invalid });
- testTokenize("0x0__1", &[_]Token.Id{ .Invalid });
- testTokenize("0x0_1_", &[_]Token.Id{ .Invalid });
- testTokenize("0x_,", &[_]Token.Id{ .Invalid });
+ testTokenize("0X0", &[_]Token.Id{.Invalid});
+ testTokenize("0x_", &[_]Token.Id{.Invalid});
+ testTokenize("0x_1", &[_]Token.Id{.Invalid});
+ testTokenize("0x1_", &[_]Token.Id{.Invalid});
+ testTokenize("0x0__1", &[_]Token.Id{.Invalid});
+ testTokenize("0x0_1_", &[_]Token.Id{.Invalid});
+ testTokenize("0x_,", &[_]Token.Id{.Invalid});
- testTokenize("0x1.", &[_]Token.Id{ .FloatLiteral });
- testTokenize("0x1.0", &[_]Token.Id{ .FloatLiteral });
- testTokenize("0xF.", &[_]Token.Id{ .FloatLiteral });
- testTokenize("0xF.0", &[_]Token.Id{ .FloatLiteral });
- testTokenize("0xF.F", &[_]Token.Id{ .FloatLiteral });
- testTokenize("0xF.Fp0", &[_]Token.Id{ .FloatLiteral });
- testTokenize("0xF.FP0", &[_]Token.Id{ .FloatLiteral });
- testTokenize("0x1p0", &[_]Token.Id{ .FloatLiteral });
- testTokenize("0xfp0", &[_]Token.Id{ .FloatLiteral });
+ testTokenize("0x1.", &[_]Token.Id{.FloatLiteral});
+ testTokenize("0x1.0", &[_]Token.Id{.FloatLiteral});
+ testTokenize("0xF.", &[_]Token.Id{.FloatLiteral});
+ testTokenize("0xF.0", &[_]Token.Id{.FloatLiteral});
+ testTokenize("0xF.F", &[_]Token.Id{.FloatLiteral});
+ testTokenize("0xF.Fp0", &[_]Token.Id{.FloatLiteral});
+ testTokenize("0xF.FP0", &[_]Token.Id{.FloatLiteral});
+ testTokenize("0x1p0", &[_]Token.Id{.FloatLiteral});
+ testTokenize("0xfp0", &[_]Token.Id{.FloatLiteral});
- testTokenize("0x0123456.789ABCDEF", &[_]Token.Id{ .FloatLiteral });
- testTokenize("0x0_123_456.789_ABC_DEF", &[_]Token.Id{ .FloatLiteral });
- testTokenize("0x0_1_2_3_4_5_6.7_8_9_A_B_C_D_E_F", &[_]Token.Id{ .FloatLiteral });
- testTokenize("0x0.0p0", &[_]Token.Id{ .FloatLiteral });
- testTokenize("0xff.ffp10", &[_]Token.Id{ .FloatLiteral });
- testTokenize("0xff.ffP10", &[_]Token.Id{ .FloatLiteral });
- testTokenize("0xff.p10", &[_]Token.Id{ .FloatLiteral });
- testTokenize("0xffp10", &[_]Token.Id{ .FloatLiteral });
- testTokenize("0xff_ff.ff_ffp1_0_0_0", &[_]Token.Id{ .FloatLiteral });
- testTokenize("0xf_f_f_f.f_f_f_fp+1_000", &[_]Token.Id{ .FloatLiteral });
- testTokenize("0xf_f_f_f.f_f_f_fp-1_00_0", &[_]Token.Id{ .FloatLiteral });
+ testTokenize("0x0123456.789ABCDEF", &[_]Token.Id{.FloatLiteral});
+ testTokenize("0x0_123_456.789_ABC_DEF", &[_]Token.Id{.FloatLiteral});
+ testTokenize("0x0_1_2_3_4_5_6.7_8_9_A_B_C_D_E_F", &[_]Token.Id{.FloatLiteral});
+ testTokenize("0x0.0p0", &[_]Token.Id{.FloatLiteral});
+ testTokenize("0xff.ffp10", &[_]Token.Id{.FloatLiteral});
+ testTokenize("0xff.ffP10", &[_]Token.Id{.FloatLiteral});
+ testTokenize("0xff.p10", &[_]Token.Id{.FloatLiteral});
+ testTokenize("0xffp10", &[_]Token.Id{.FloatLiteral});
+ testTokenize("0xff_ff.ff_ffp1_0_0_0", &[_]Token.Id{.FloatLiteral});
+ testTokenize("0xf_f_f_f.f_f_f_fp+1_000", &[_]Token.Id{.FloatLiteral});
+ testTokenize("0xf_f_f_f.f_f_f_fp-1_00_0", &[_]Token.Id{.FloatLiteral});
- testTokenize("0x1e", &[_]Token.Id{ .IntegerLiteral });
- testTokenize("0x1e0", &[_]Token.Id{ .IntegerLiteral });
- testTokenize("0x1p", &[_]Token.Id{ .Invalid });
- testTokenize("0xfp0z1", &[_]Token.Id{ .Invalid });
- testTokenize("0xff.ffpff", &[_]Token.Id{ .Invalid });
- testTokenize("0x0_.0", &[_]Token.Id{ .Invalid });
- testTokenize("0x0._0", &[_]Token.Id{ .Invalid });
- testTokenize("0x0.0_", &[_]Token.Id{ .Invalid });
- testTokenize("0x0_p0", &[_]Token.Id{ .Invalid });
- testTokenize("0x0_.p0", &[_]Token.Id{ .Invalid });
- testTokenize("0x0._p0", &[_]Token.Id{ .Invalid });
- testTokenize("0x0.0_p0", &[_]Token.Id{ .Invalid });
- testTokenize("0x0._0p0", &[_]Token.Id{ .Invalid });
- testTokenize("0x0.0_p0", &[_]Token.Id{ .Invalid });
- testTokenize("0x0.0p_0", &[_]Token.Id{ .Invalid });
- testTokenize("0x0.0p+_0", &[_]Token.Id{ .Invalid });
- testTokenize("0x0.0p-_0", &[_]Token.Id{ .Invalid });
- testTokenize("0x0.0p0_", &[_]Token.Id{ .Invalid });
+ testTokenize("0x1e", &[_]Token.Id{.IntegerLiteral});
+ testTokenize("0x1e0", &[_]Token.Id{.IntegerLiteral});
+ testTokenize("0x1p", &[_]Token.Id{.Invalid});
+ testTokenize("0xfp0z1", &[_]Token.Id{.Invalid});
+ testTokenize("0xff.ffpff", &[_]Token.Id{.Invalid});
+ testTokenize("0x0_.0", &[_]Token.Id{.Invalid});
+ testTokenize("0x0._0", &[_]Token.Id{.Invalid});
+ testTokenize("0x0.0_", &[_]Token.Id{.Invalid});
+ testTokenize("0x0_p0", &[_]Token.Id{.Invalid});
+ testTokenize("0x0_.p0", &[_]Token.Id{.Invalid});
+ testTokenize("0x0._p0", &[_]Token.Id{.Invalid});
+ testTokenize("0x0.0_p0", &[_]Token.Id{.Invalid});
+ testTokenize("0x0._0p0", &[_]Token.Id{.Invalid});
+ testTokenize("0x0.0_p0", &[_]Token.Id{.Invalid});
+ testTokenize("0x0.0p_0", &[_]Token.Id{.Invalid});
+ testTokenize("0x0.0p+_0", &[_]Token.Id{.Invalid});
+ testTokenize("0x0.0p-_0", &[_]Token.Id{.Invalid});
+ testTokenize("0x0.0p0_", &[_]Token.Id{.Invalid});
}
-
fn testTokenize(source: []const u8, expected_tokens: []const Token.Id) void {
var tokenizer = Tokenizer.init(source);
for (expected_tokens) |expected_token_id| {
From 8de45e51438d7f593825ae2134ab7dfc46f3bab2 Mon Sep 17 00:00:00 2001
From: momumi <57862114+momumi@users.noreply.github.com>
Date: Sun, 22 Mar 2020 13:45:31 +1000
Subject: [PATCH 6/7] update parsing of int literals in self-hosted
* update std.math.big.Int.setString() to ignore underscores and make it
case insensitive
* fix issue in ir.zig with leading zeroes in integer literals
---
lib/std/math/big/int.zig | 25 +++++++++++++++++++++++--
src-self-hosted/ir.zig | 7 +++++--
2 files changed, 28 insertions(+), 4 deletions(-)
diff --git a/lib/std/math/big/int.zig b/lib/std/math/big/int.zig
index 95d0764f68..3557a40798 100644
--- a/lib/std/math/big/int.zig
+++ b/lib/std/math/big/int.zig
@@ -373,6 +373,7 @@ pub const Int = struct {
const d = switch (ch) {
'0'...'9' => ch - '0',
'a'...'f' => (ch - 'a') + 0xa,
+ 'A'...'F' => (ch - 'A') + 0xa,
else => return error.InvalidCharForDigit,
};
@@ -393,8 +394,9 @@ pub const Int = struct {
/// Set self from the string representation `value`.
///
- /// value must contain only digits <= `base`. Base prefixes are not allowed (e.g. 0x43 should
- /// simply be 43).
+ /// `value` must contain only digits <= `base` and is case insensitive. Base prefixes are
+ /// not allowed (e.g. 0x43 should simply be 43). Underscores in the input string are
+ /// ignored and can be used as digit separators.
///
/// Returns an error if memory could not be allocated or `value` has invalid digits for the
/// requested base.
@@ -415,6 +417,9 @@ pub const Int = struct {
try self.set(0);
for (value[i..]) |ch| {
+ if (ch == '_') {
+ continue;
+ }
const d = try charToDigit(ch, base);
const ap_d = Int.initFixed(([_]Limb{d})[0..]);
@@ -1582,6 +1587,22 @@ test "big.int string negative" {
testing.expect((try a.to(i32)) == -1023);
}
+test "big.int string set number with underscores" {
+ var a = try Int.init(testing.allocator);
+ defer a.deinit();
+
+ try a.setString(10, "__1_2_0_3_1_7_2_4_1_2_0_____9_1__2__4_7_8_1_2_4_1_2_9_0_8_4_7_1_2_4___");
+ testing.expect((try a.to(u128)) == 120317241209124781241290847124);
+}
+
+test "big.int string set case insensitive number" {
+ var a = try Int.init(testing.allocator);
+ defer a.deinit();
+
+ try a.setString(16, "aB_cD_eF");
+ testing.expect((try a.to(u32)) == 0xabcdef);
+}
+
test "big.int string set bad char error" {
var a = try Int.init(testing.allocator);
defer a.deinit();
diff --git a/src-self-hosted/ir.zig b/src-self-hosted/ir.zig
index 2e65962d41..e3d13b43db 100644
--- a/src-self-hosted/ir.zig
+++ b/src-self-hosted/ir.zig
@@ -1311,13 +1311,16 @@ pub const Builder = struct {
var base: u8 = undefined;
var rest: []const u8 = undefined;
if (int_token.len >= 3 and int_token[0] == '0') {
+ rest = int_token[2..];
base = switch (int_token[1]) {
'b' => 2,
'o' => 8,
'x' => 16,
- else => unreachable,
+ else => {
+ base = 10;
+ rest = int_token;
+ },
};
- rest = int_token[2..];
} else {
base = 10;
rest = int_token;
From 2d18178c27060ff9b9b4a5b56941617dc47868d0 Mon Sep 17 00:00:00 2001
From: momumi <57862114+momumi@users.noreply.github.com>
Date: Mon, 23 Mar 2020 09:21:34 +1000
Subject: [PATCH 7/7] minor fixes and more tests for _ separators
* Make the tokenizer spit out an Invalid token on the first invalid
character found in the number literal.
* More parsing and tokenizer tests for number literals
* fix invalid switch statement in ir.zig
---
lib/std/zig/parser_test.zig | 69 +++++++++
lib/std/zig/tokenizer.zig | 275 ++++++++++++++++++++++--------------
src-self-hosted/ir.zig | 10 +-
3 files changed, 242 insertions(+), 112 deletions(-)
diff --git a/lib/std/zig/parser_test.zig b/lib/std/zig/parser_test.zig
index d00568e49f..00f6b33673 100644
--- a/lib/std/zig/parser_test.zig
+++ b/lib/std/zig/parser_test.zig
@@ -2800,6 +2800,75 @@ test "zig fmt: extern without container keyword returns error" {
);
}
+test "zig fmt: integer literals with underscore separators" {
+ try testTransform(
+ \\const
+ \\ x =
+ \\ 1_234_567
+ \\ +(0b0_1-0o7_0+0xff_FF ) + 0_0;
+ ,
+ \\const x = 1_234_567 + (0b0_1 - 0o7_0 + 0xff_FF) + 0_0;
+ \\
+ );
+}
+
+test "zig fmt: hex literals with underscore separators" {
+ try testTransform(
+ \\pub fn orMask(a: [ 1_000 ]u64, b: [ 1_000] u64) [1_000]u64 {
+ \\ var c: [1_000]u64 = [1]u64{ 0xFFFF_FFFF_FFFF_FFFF}**1_000;
+ \\ for (c [ 0_0 .. ]) |_, i| {
+ \\ c[i] = (a[i] | b[i]) & 0xCCAA_CCAA_CCAA_CCAA;
+ \\ }
+ \\ return c;
+ \\}
+ \\
+ \\
+ ,
+ \\pub fn orMask(a: [1_000]u64, b: [1_000]u64) [1_000]u64 {
+ \\ var c: [1_000]u64 = [1]u64{0xFFFF_FFFF_FFFF_FFFF} ** 1_000;
+ \\ for (c[0_0..]) |_, i| {
+ \\ c[i] = (a[i] | b[i]) & 0xCCAA_CCAA_CCAA_CCAA;
+ \\ }
+ \\ return c;
+ \\}
+ \\
+ );
+}
+
+test "zig fmt: decimal float literals with underscore separators" {
+ try testTransform(
+ \\pub fn main() void {
+ \\ const a:f64=(10.0e-0+(10.e+0))+10_00.00_00e-2+00_00.00_10e+4;
+ \\ const b:f64=010.0--0_10.+0_1_0.0_0+1e2;
+ \\ std.debug.warn("a: {}, b: {} -> a+b: {}\n", .{ a, b, a + b });
+ \\}
+ ,
+ \\pub fn main() void {
+ \\ const a: f64 = (10.0e-0 + (10.e+0)) + 10_00.00_00e-2 + 00_00.00_10e+4;
+ \\ const b: f64 = 010.0 - -0_10. + 0_1_0.0_0 + 1e2;
+ \\ std.debug.warn("a: {}, b: {} -> a+b: {}\n", .{ a, b, a + b });
+ \\}
+ \\
+ );
+}
+
+test "zig fmt: hexadeciaml float literals with underscore separators" {
+ try testTransform(
+ \\pub fn main() void {
+ \\ const a: f64 = (0x10.0p-0+(0x10.p+0))+0x10_00.00_00p-8+0x00_00.00_10p+16;
+ \\ const b: f64 = 0x0010.0--0x00_10.+0x10.00+0x1p4;
+ \\ std.debug.warn("a: {}, b: {} -> a+b: {}\n", .{ a, b, a + b });
+ \\}
+ ,
+ \\pub fn main() void {
+ \\ const a: f64 = (0x10.0p-0 + (0x10.p+0)) + 0x10_00.00_00p-8 + 0x00_00.00_10p+16;
+ \\ const b: f64 = 0x0010.0 - -0x00_10. + 0x10.00 + 0x1p4;
+ \\ std.debug.warn("a: {}, b: {} -> a+b: {}\n", .{ a, b, a + b });
+ \\}
+ \\
+ );
+}
+
const std = @import("std");
const mem = std.mem;
const warn = std.debug.warn;
diff --git a/lib/std/zig/tokenizer.zig b/lib/std/zig/tokenizer.zig
index 6beaba0fe5..6cb66595a7 100644
--- a/lib/std/zig/tokenizer.zig
+++ b/lib/std/zig/tokenizer.zig
@@ -418,6 +418,10 @@ pub const Tokenizer = struct {
SawAtSign,
};
+ fn isIdentifierChar(char: u8) bool {
+ return std.ascii.isAlNum(char) or char == '_';
+ }
+
pub fn next(self: *Tokenizer) Token {
if (self.pending_invalid_token) |token| {
self.pending_invalid_token = null;
@@ -1063,11 +1067,17 @@ pub const Tokenizer = struct {
'x' => {
state = State.IntegerLiteralHexNoUnderscore;
},
- else => {
- // reinterpret as a normal number
+ '0'...'9', '_', '.', 'e', 'E' => {
+ // reinterpret as a decimal number
self.index -= 1;
state = State.IntegerLiteralDec;
},
+ else => {
+ if (isIdentifierChar(c)) {
+ result.id = Token.Id.Invalid;
+ }
+ break;
+ },
},
State.IntegerLiteralBinNoUnderscore => switch (c) {
'0'...'1' => {
@@ -1075,6 +1085,7 @@ pub const Tokenizer = struct {
},
else => {
result.id = Token.Id.Invalid;
+ break;
},
},
State.IntegerLiteralBin => switch (c) {
@@ -1082,10 +1093,12 @@ pub const Tokenizer = struct {
state = State.IntegerLiteralBinNoUnderscore;
},
'0'...'1' => {},
- '2'...'9', 'a'...'z', 'A'...'Z' => {
- result.id = Token.Id.Invalid;
+ else => {
+ if (isIdentifierChar(c)) {
+ result.id = Token.Id.Invalid;
+ }
+ break;
},
- else => break,
},
State.IntegerLiteralOctNoUnderscore => switch (c) {
'0'...'7' => {
@@ -1093,6 +1106,7 @@ pub const Tokenizer = struct {
},
else => {
result.id = Token.Id.Invalid;
+ break;
},
},
State.IntegerLiteralOct => switch (c) {
@@ -1100,10 +1114,12 @@ pub const Tokenizer = struct {
state = State.IntegerLiteralOctNoUnderscore;
},
'0'...'7' => {},
- '8'...'9', 'a'...'z', 'A'...'Z' => {
- result.id = Token.Id.Invalid;
+ else => {
+ if (isIdentifierChar(c)) {
+ result.id = Token.Id.Invalid;
+ }
+ break;
},
- else => break,
},
State.IntegerLiteralDecNoUnderscore => switch (c) {
'0'...'9' => {
@@ -1111,6 +1127,7 @@ pub const Tokenizer = struct {
},
else => {
result.id = Token.Id.Invalid;
+ break;
},
},
State.IntegerLiteralDec => switch (c) {
@@ -1126,10 +1143,12 @@ pub const Tokenizer = struct {
result.id = Token.Id.FloatLiteral;
},
'0'...'9' => {},
- 'a'...'d', 'f'...'z', 'A'...'D', 'F'...'Z' => {
- result.id = Token.Id.Invalid;
+ else => {
+ if (isIdentifierChar(c)) {
+ result.id = Token.Id.Invalid;
+ }
+ break;
},
- else => break,
},
State.IntegerLiteralHexNoUnderscore => switch (c) {
'0'...'9', 'a'...'f', 'A'...'F' => {
@@ -1137,6 +1156,7 @@ pub const Tokenizer = struct {
},
else => {
result.id = Token.Id.Invalid;
+ break;
},
},
State.IntegerLiteralHex => switch (c) {
@@ -1152,10 +1172,12 @@ pub const Tokenizer = struct {
result.id = Token.Id.FloatLiteral;
},
'0'...'9', 'a'...'f', 'A'...'F' => {},
- 'g'...'o', 'q'...'z', 'G'...'O', 'Q'...'Z' => {
- result.id = Token.Id.Invalid;
+ else => {
+ if (isIdentifierChar(c)) {
+ result.id = Token.Id.Invalid;
+ }
+ break;
},
- else => break,
},
State.NumberDotDec => switch (c) {
'.' => {
@@ -1166,10 +1188,15 @@ pub const Tokenizer = struct {
'e', 'E' => {
state = State.FloatExponentUnsigned;
},
- else => {
- self.index -= 1;
+ '0'...'9' => {
result.id = Token.Id.FloatLiteral;
- state = State.FloatFractionDecNoUnderscore;
+ state = State.FloatFractionDec;
+ },
+ else => {
+ if (isIdentifierChar(c)) {
+ result.id = Token.Id.Invalid;
+ }
+ break;
},
},
State.NumberDotHex => switch (c) {
@@ -1181,10 +1208,15 @@ pub const Tokenizer = struct {
'p', 'P' => {
state = State.FloatExponentUnsigned;
},
- else => {
- self.index -= 1;
+ '0'...'9', 'a'...'f', 'A'...'F' => {
result.id = Token.Id.FloatLiteral;
- state = State.FloatFractionHexNoUnderscore;
+ state = State.FloatFractionHex;
+ },
+ else => {
+ if (isIdentifierChar(c)) {
+ result.id = Token.Id.Invalid;
+ }
+ break;
},
},
State.FloatFractionDecNoUnderscore => switch (c) {
@@ -1193,6 +1225,7 @@ pub const Tokenizer = struct {
},
else => {
result.id = Token.Id.Invalid;
+ break;
},
},
State.FloatFractionDec => switch (c) {
@@ -1203,10 +1236,12 @@ pub const Tokenizer = struct {
state = State.FloatExponentUnsigned;
},
'0'...'9' => {},
- 'a'...'d', 'f'...'z', 'A'...'D', 'F'...'Z' => {
- result.id = Token.Id.Invalid;
+ else => {
+ if (isIdentifierChar(c)) {
+ result.id = Token.Id.Invalid;
+ }
+ break;
},
- else => break,
},
State.FloatFractionHexNoUnderscore => switch (c) {
'0'...'9', 'a'...'f', 'A'...'F' => {
@@ -1214,6 +1249,7 @@ pub const Tokenizer = struct {
},
else => {
result.id = Token.Id.Invalid;
+ break;
},
},
State.FloatFractionHex => switch (c) {
@@ -1224,10 +1260,12 @@ pub const Tokenizer = struct {
state = State.FloatExponentUnsigned;
},
'0'...'9', 'a'...'f', 'A'...'F' => {},
- 'g'...'o', 'q'...'z', 'G'...'O', 'Q'...'Z' => {
- result.id = Token.Id.Invalid;
+ else => {
+ if (isIdentifierChar(c)) {
+ result.id = Token.Id.Invalid;
+ }
+ break;
},
- else => break,
},
State.FloatExponentUnsigned => switch (c) {
'+', '-' => {
@@ -1245,6 +1283,7 @@ pub const Tokenizer = struct {
},
else => {
result.id = Token.Id.Invalid;
+ break;
},
},
State.FloatExponentNumber => switch (c) {
@@ -1252,10 +1291,12 @@ pub const Tokenizer = struct {
state = State.FloatExponentNumberNoUnderscore;
},
'0'...'9' => {},
- 'a'...'z', 'A'...'Z' => {
- result.id = Token.Id.Invalid;
+ else => {
+ if (isIdentifierChar(c)) {
+ result.id = Token.Id.Invalid;
+ }
+ break;
},
- else => break,
},
}
} else if (self.index == self.buffer.len) {
@@ -1706,11 +1747,11 @@ test "tokenizer - number literals decimal" {
testTokenize("7", &[_]Token.Id{.IntegerLiteral});
testTokenize("8", &[_]Token.Id{.IntegerLiteral});
testTokenize("9", &[_]Token.Id{.IntegerLiteral});
- testTokenize("0a", &[_]Token.Id{.Invalid});
- testTokenize("9b", &[_]Token.Id{.Invalid});
- testTokenize("1z", &[_]Token.Id{.Invalid});
- testTokenize("1z_1", &[_]Token.Id{.Invalid});
- testTokenize("9z3", &[_]Token.Id{.Invalid});
+ testTokenize("0a", &[_]Token.Id{ .Invalid, .Identifier });
+ testTokenize("9b", &[_]Token.Id{ .Invalid, .Identifier });
+ testTokenize("1z", &[_]Token.Id{ .Invalid, .Identifier });
+ testTokenize("1z_1", &[_]Token.Id{ .Invalid, .Identifier });
+ testTokenize("9z3", &[_]Token.Id{ .Invalid, .Identifier });
testTokenize("0_0", &[_]Token.Id{.IntegerLiteral});
testTokenize("0001", &[_]Token.Id{.IntegerLiteral});
@@ -1720,15 +1761,17 @@ test "tokenizer - number literals decimal" {
testTokenize("00_", &[_]Token.Id{.Invalid});
testTokenize("0_0_", &[_]Token.Id{.Invalid});
- testTokenize("0__0", &[_]Token.Id{.Invalid});
- testTokenize("0_0f", &[_]Token.Id{.Invalid});
- testTokenize("0_0_f", &[_]Token.Id{.Invalid});
- testTokenize("1_,", &[_]Token.Id{.Invalid});
+ testTokenize("0__0", &[_]Token.Id{ .Invalid, .Identifier });
+ testTokenize("0_0f", &[_]Token.Id{ .Invalid, .Identifier });
+ testTokenize("0_0_f", &[_]Token.Id{ .Invalid, .Identifier });
+ testTokenize("0_0_f_00", &[_]Token.Id{ .Invalid, .Identifier });
+ testTokenize("1_,", &[_]Token.Id{ .Invalid, .Comma });
testTokenize("1.", &[_]Token.Id{.FloatLiteral});
testTokenize("0.0", &[_]Token.Id{.FloatLiteral});
testTokenize("1.0", &[_]Token.Id{.FloatLiteral});
testTokenize("10.0", &[_]Token.Id{.FloatLiteral});
+ testTokenize("0e0", &[_]Token.Id{.FloatLiteral});
testTokenize("1e0", &[_]Token.Id{.FloatLiteral});
testTokenize("1e100", &[_]Token.Id{.FloatLiteral});
testTokenize("1.e100", &[_]Token.Id{.FloatLiteral});
@@ -1736,34 +1779,47 @@ test "tokenizer - number literals decimal" {
testTokenize("1.0e+100", &[_]Token.Id{.FloatLiteral});
testTokenize("1.0e-100", &[_]Token.Id{.FloatLiteral});
testTokenize("1_0_0_0.0_0_0_0_0_1e1_0_0_0", &[_]Token.Id{.FloatLiteral});
+ testTokenize("1.+", &[_]Token.Id{ .FloatLiteral, .Plus });
testTokenize("1e", &[_]Token.Id{.Invalid});
- testTokenize("1.0e1f0", &[_]Token.Id{.Invalid});
- testTokenize("1.0p100", &[_]Token.Id{.Invalid});
- testTokenize("1.0p-100", &[_]Token.Id{ .Invalid, .Minus, .IntegerLiteral });
- testTokenize("1.0p1f0", &[_]Token.Id{.Invalid});
- testTokenize("1.0_,", &[_]Token.Id{.Invalid});
- testTokenize("1.0e,", &[_]Token.Id{.Invalid});
+ testTokenize("1.0e1f0", &[_]Token.Id{ .Invalid, .Identifier });
+ testTokenize("1.0p100", &[_]Token.Id{ .Invalid, .Identifier });
+ testTokenize("1.0p-100", &[_]Token.Id{ .Invalid, .Identifier, .Minus, .IntegerLiteral });
+ testTokenize("1.0p1f0", &[_]Token.Id{ .Invalid, .Identifier });
+ testTokenize("1.0_,", &[_]Token.Id{ .Invalid, .Comma });
+ testTokenize("1_.0", &[_]Token.Id{ .Invalid, .Period, .IntegerLiteral });
+ testTokenize("1._", &[_]Token.Id{ .Invalid, .Identifier });
+ testTokenize("1.a", &[_]Token.Id{ .Invalid, .Identifier });
+ testTokenize("1.z", &[_]Token.Id{ .Invalid, .Identifier });
+ testTokenize("1._0", &[_]Token.Id{ .Invalid, .Identifier });
+ testTokenize("1._+", &[_]Token.Id{ .Invalid, .Identifier, .Plus });
+ testTokenize("1._e", &[_]Token.Id{ .Invalid, .Identifier });
+ testTokenize("1.0e", &[_]Token.Id{.Invalid});
+ testTokenize("1.0e,", &[_]Token.Id{ .Invalid, .Comma });
+ testTokenize("1.0e_", &[_]Token.Id{ .Invalid, .Identifier });
+ testTokenize("1.0e+_", &[_]Token.Id{ .Invalid, .Identifier });
+ testTokenize("1.0e-_", &[_]Token.Id{ .Invalid, .Identifier });
+ testTokenize("1.0e0_+", &[_]Token.Id{ .Invalid, .Plus });
}
test "tokenizer - number literals binary" {
testTokenize("0b0", &[_]Token.Id{.IntegerLiteral});
testTokenize("0b1", &[_]Token.Id{.IntegerLiteral});
- testTokenize("0b2", &[_]Token.Id{.Invalid});
- testTokenize("0b3", &[_]Token.Id{.Invalid});
- testTokenize("0b4", &[_]Token.Id{.Invalid});
- testTokenize("0b5", &[_]Token.Id{.Invalid});
- testTokenize("0b6", &[_]Token.Id{.Invalid});
- testTokenize("0b7", &[_]Token.Id{.Invalid});
- testTokenize("0b8", &[_]Token.Id{.Invalid});
- testTokenize("0b9", &[_]Token.Id{.Invalid});
- testTokenize("0ba", &[_]Token.Id{.Invalid});
- testTokenize("0bb", &[_]Token.Id{.Invalid});
- testTokenize("0bc", &[_]Token.Id{.Invalid});
- testTokenize("0bd", &[_]Token.Id{.Invalid});
- testTokenize("0be", &[_]Token.Id{.Invalid});
- testTokenize("0bf", &[_]Token.Id{.Invalid});
- testTokenize("0bz", &[_]Token.Id{.Invalid});
+ testTokenize("0b2", &[_]Token.Id{ .Invalid, .IntegerLiteral });
+ testTokenize("0b3", &[_]Token.Id{ .Invalid, .IntegerLiteral });
+ testTokenize("0b4", &[_]Token.Id{ .Invalid, .IntegerLiteral });
+ testTokenize("0b5", &[_]Token.Id{ .Invalid, .IntegerLiteral });
+ testTokenize("0b6", &[_]Token.Id{ .Invalid, .IntegerLiteral });
+ testTokenize("0b7", &[_]Token.Id{ .Invalid, .IntegerLiteral });
+ testTokenize("0b8", &[_]Token.Id{ .Invalid, .IntegerLiteral });
+ testTokenize("0b9", &[_]Token.Id{ .Invalid, .IntegerLiteral });
+ testTokenize("0ba", &[_]Token.Id{ .Invalid, .Identifier });
+ testTokenize("0bb", &[_]Token.Id{ .Invalid, .Identifier });
+ testTokenize("0bc", &[_]Token.Id{ .Invalid, .Identifier });
+ testTokenize("0bd", &[_]Token.Id{ .Invalid, .Identifier });
+ testTokenize("0be", &[_]Token.Id{ .Invalid, .Identifier });
+ testTokenize("0bf", &[_]Token.Id{ .Invalid, .Identifier });
+ testTokenize("0bz", &[_]Token.Id{ .Invalid, .Identifier });
testTokenize("0b0000_0000", &[_]Token.Id{.IntegerLiteral});
testTokenize("0b1111_1111", &[_]Token.Id{.IntegerLiteral});
@@ -1772,17 +1828,17 @@ test "tokenizer - number literals binary" {
testTokenize("0b1.", &[_]Token.Id{ .IntegerLiteral, .Period });
testTokenize("0b1.0", &[_]Token.Id{ .IntegerLiteral, .Period, .IntegerLiteral });
- testTokenize("0B0", &[_]Token.Id{.Invalid});
- testTokenize("0b_", &[_]Token.Id{.Invalid});
- testTokenize("0b_0", &[_]Token.Id{.Invalid});
+ testTokenize("0B0", &[_]Token.Id{ .Invalid, .Identifier });
+ testTokenize("0b_", &[_]Token.Id{ .Invalid, .Identifier });
+ testTokenize("0b_0", &[_]Token.Id{ .Invalid, .Identifier });
testTokenize("0b1_", &[_]Token.Id{.Invalid});
- testTokenize("0b0__1", &[_]Token.Id{.Invalid});
+ testTokenize("0b0__1", &[_]Token.Id{ .Invalid, .Identifier });
testTokenize("0b0_1_", &[_]Token.Id{.Invalid});
- testTokenize("0b1e", &[_]Token.Id{.Invalid});
- testTokenize("0b1p", &[_]Token.Id{.Invalid});
- testTokenize("0b1e0", &[_]Token.Id{.Invalid});
- testTokenize("0b1p0", &[_]Token.Id{.Invalid});
- testTokenize("0b1_,", &[_]Token.Id{.Invalid});
+ testTokenize("0b1e", &[_]Token.Id{ .Invalid, .Identifier });
+ testTokenize("0b1p", &[_]Token.Id{ .Invalid, .Identifier });
+ testTokenize("0b1e0", &[_]Token.Id{ .Invalid, .Identifier });
+ testTokenize("0b1p0", &[_]Token.Id{ .Invalid, .Identifier });
+ testTokenize("0b1_,", &[_]Token.Id{ .Invalid, .Comma });
}
test "tokenizer - number literals octal" {
@@ -1794,15 +1850,15 @@ test "tokenizer - number literals octal" {
testTokenize("0o5", &[_]Token.Id{.IntegerLiteral});
testTokenize("0o6", &[_]Token.Id{.IntegerLiteral});
testTokenize("0o7", &[_]Token.Id{.IntegerLiteral});
- testTokenize("0o8", &[_]Token.Id{.Invalid});
- testTokenize("0o9", &[_]Token.Id{.Invalid});
- testTokenize("0oa", &[_]Token.Id{.Invalid});
- testTokenize("0ob", &[_]Token.Id{.Invalid});
- testTokenize("0oc", &[_]Token.Id{.Invalid});
- testTokenize("0od", &[_]Token.Id{.Invalid});
- testTokenize("0oe", &[_]Token.Id{.Invalid});
- testTokenize("0of", &[_]Token.Id{.Invalid});
- testTokenize("0oz", &[_]Token.Id{.Invalid});
+ testTokenize("0o8", &[_]Token.Id{ .Invalid, .IntegerLiteral });
+ testTokenize("0o9", &[_]Token.Id{ .Invalid, .IntegerLiteral });
+ testTokenize("0oa", &[_]Token.Id{ .Invalid, .Identifier });
+ testTokenize("0ob", &[_]Token.Id{ .Invalid, .Identifier });
+ testTokenize("0oc", &[_]Token.Id{ .Invalid, .Identifier });
+ testTokenize("0od", &[_]Token.Id{ .Invalid, .Identifier });
+ testTokenize("0oe", &[_]Token.Id{ .Invalid, .Identifier });
+ testTokenize("0of", &[_]Token.Id{ .Invalid, .Identifier });
+ testTokenize("0oz", &[_]Token.Id{ .Invalid, .Identifier });
testTokenize("0o01234567", &[_]Token.Id{.IntegerLiteral});
testTokenize("0o0123_4567", &[_]Token.Id{.IntegerLiteral});
@@ -1811,17 +1867,17 @@ test "tokenizer - number literals octal" {
testTokenize("0o7.", &[_]Token.Id{ .IntegerLiteral, .Period });
testTokenize("0o7.0", &[_]Token.Id{ .IntegerLiteral, .Period, .IntegerLiteral });
- testTokenize("0O0", &[_]Token.Id{.Invalid});
- testTokenize("0o_", &[_]Token.Id{.Invalid});
- testTokenize("0o_0", &[_]Token.Id{.Invalid});
+ testTokenize("0O0", &[_]Token.Id{ .Invalid, .Identifier });
+ testTokenize("0o_", &[_]Token.Id{ .Invalid, .Identifier });
+ testTokenize("0o_0", &[_]Token.Id{ .Invalid, .Identifier });
testTokenize("0o1_", &[_]Token.Id{.Invalid});
- testTokenize("0o0__1", &[_]Token.Id{.Invalid});
+ testTokenize("0o0__1", &[_]Token.Id{ .Invalid, .Identifier });
testTokenize("0o0_1_", &[_]Token.Id{.Invalid});
- testTokenize("0o1e", &[_]Token.Id{.Invalid});
- testTokenize("0o1p", &[_]Token.Id{.Invalid});
- testTokenize("0o1e0", &[_]Token.Id{.Invalid});
- testTokenize("0o1p0", &[_]Token.Id{.Invalid});
- testTokenize("0o_,", &[_]Token.Id{.Invalid});
+ testTokenize("0o1e", &[_]Token.Id{ .Invalid, .Identifier });
+ testTokenize("0o1p", &[_]Token.Id{ .Invalid, .Identifier });
+ testTokenize("0o1e0", &[_]Token.Id{ .Invalid, .Identifier });
+ testTokenize("0o1p0", &[_]Token.Id{ .Invalid, .Identifier });
+ testTokenize("0o_,", &[_]Token.Id{ .Invalid, .Identifier, .Comma });
}
test "tokenizer - number literals hexadeciaml" {
@@ -1847,21 +1903,21 @@ test "tokenizer - number literals hexadeciaml" {
testTokenize("0xD", &[_]Token.Id{.IntegerLiteral});
testTokenize("0xE", &[_]Token.Id{.IntegerLiteral});
testTokenize("0xF", &[_]Token.Id{.IntegerLiteral});
- testTokenize("0x0z", &[_]Token.Id{.Invalid});
- testTokenize("0xz", &[_]Token.Id{.Invalid});
+ testTokenize("0x0z", &[_]Token.Id{ .Invalid, .Identifier });
+ testTokenize("0xz", &[_]Token.Id{ .Invalid, .Identifier });
testTokenize("0x0123456789ABCDEF", &[_]Token.Id{.IntegerLiteral});
testTokenize("0x0123_4567_89AB_CDEF", &[_]Token.Id{.IntegerLiteral});
testTokenize("0x01_23_45_67_89AB_CDE_F", &[_]Token.Id{.IntegerLiteral});
testTokenize("0x0_1_2_3_4_5_6_7_8_9_A_B_C_D_E_F", &[_]Token.Id{.IntegerLiteral});
- testTokenize("0X0", &[_]Token.Id{.Invalid});
- testTokenize("0x_", &[_]Token.Id{.Invalid});
- testTokenize("0x_1", &[_]Token.Id{.Invalid});
+ testTokenize("0X0", &[_]Token.Id{ .Invalid, .Identifier });
+ testTokenize("0x_", &[_]Token.Id{ .Invalid, .Identifier });
+ testTokenize("0x_1", &[_]Token.Id{ .Invalid, .Identifier });
testTokenize("0x1_", &[_]Token.Id{.Invalid});
- testTokenize("0x0__1", &[_]Token.Id{.Invalid});
+ testTokenize("0x0__1", &[_]Token.Id{ .Invalid, .Identifier });
testTokenize("0x0_1_", &[_]Token.Id{.Invalid});
- testTokenize("0x_,", &[_]Token.Id{.Invalid});
+ testTokenize("0x_,", &[_]Token.Id{ .Invalid, .Identifier, .Comma });
testTokenize("0x1.", &[_]Token.Id{.FloatLiteral});
testTokenize("0x1.0", &[_]Token.Id{.FloatLiteral});
@@ -1872,10 +1928,12 @@ test "tokenizer - number literals hexadeciaml" {
testTokenize("0xF.FP0", &[_]Token.Id{.FloatLiteral});
testTokenize("0x1p0", &[_]Token.Id{.FloatLiteral});
testTokenize("0xfp0", &[_]Token.Id{.FloatLiteral});
+ testTokenize("0x1.+0xF.", &[_]Token.Id{ .FloatLiteral, .Plus, .FloatLiteral });
testTokenize("0x0123456.789ABCDEF", &[_]Token.Id{.FloatLiteral});
testTokenize("0x0_123_456.789_ABC_DEF", &[_]Token.Id{.FloatLiteral});
testTokenize("0x0_1_2_3_4_5_6.7_8_9_A_B_C_D_E_F", &[_]Token.Id{.FloatLiteral});
+ testTokenize("0x0p0", &[_]Token.Id{.FloatLiteral});
testTokenize("0x0.0p0", &[_]Token.Id{.FloatLiteral});
testTokenize("0xff.ffp10", &[_]Token.Id{.FloatLiteral});
testTokenize("0xff.ffP10", &[_]Token.Id{.FloatLiteral});
@@ -1888,21 +1946,24 @@ test "tokenizer - number literals hexadeciaml" {
testTokenize("0x1e", &[_]Token.Id{.IntegerLiteral});
testTokenize("0x1e0", &[_]Token.Id{.IntegerLiteral});
testTokenize("0x1p", &[_]Token.Id{.Invalid});
- testTokenize("0xfp0z1", &[_]Token.Id{.Invalid});
- testTokenize("0xff.ffpff", &[_]Token.Id{.Invalid});
- testTokenize("0x0_.0", &[_]Token.Id{.Invalid});
- testTokenize("0x0._0", &[_]Token.Id{.Invalid});
+ testTokenize("0xfp0z1", &[_]Token.Id{ .Invalid, .Identifier });
+ testTokenize("0xff.ffpff", &[_]Token.Id{ .Invalid, .Identifier });
+ testTokenize("0x0.p", &[_]Token.Id{.Invalid});
+ testTokenize("0x0.z", &[_]Token.Id{ .Invalid, .Identifier });
+ testTokenize("0x0._", &[_]Token.Id{ .Invalid, .Identifier });
+ testTokenize("0x0_.0", &[_]Token.Id{ .Invalid, .Period, .IntegerLiteral });
+ testTokenize("0x0_.0.0", &[_]Token.Id{ .Invalid, .Period, .FloatLiteral });
+ testTokenize("0x0._0", &[_]Token.Id{ .Invalid, .Identifier });
testTokenize("0x0.0_", &[_]Token.Id{.Invalid});
- testTokenize("0x0_p0", &[_]Token.Id{.Invalid});
- testTokenize("0x0_.p0", &[_]Token.Id{.Invalid});
- testTokenize("0x0._p0", &[_]Token.Id{.Invalid});
- testTokenize("0x0.0_p0", &[_]Token.Id{.Invalid});
- testTokenize("0x0._0p0", &[_]Token.Id{.Invalid});
- testTokenize("0x0.0_p0", &[_]Token.Id{.Invalid});
- testTokenize("0x0.0p_0", &[_]Token.Id{.Invalid});
- testTokenize("0x0.0p+_0", &[_]Token.Id{.Invalid});
- testTokenize("0x0.0p-_0", &[_]Token.Id{.Invalid});
- testTokenize("0x0.0p0_", &[_]Token.Id{.Invalid});
+ testTokenize("0x0_p0", &[_]Token.Id{ .Invalid, .Identifier });
+ testTokenize("0x0_.p0", &[_]Token.Id{ .Invalid, .Period, .Identifier });
+ testTokenize("0x0._p0", &[_]Token.Id{ .Invalid, .Identifier });
+ testTokenize("0x0.0_p0", &[_]Token.Id{ .Invalid, .Identifier });
+ testTokenize("0x0._0p0", &[_]Token.Id{ .Invalid, .Identifier });
+ testTokenize("0x0.0p_0", &[_]Token.Id{ .Invalid, .Identifier });
+ testTokenize("0x0.0p+_0", &[_]Token.Id{ .Invalid, .Identifier });
+ testTokenize("0x0.0p-_0", &[_]Token.Id{ .Invalid, .Identifier });
+ testTokenize("0x0.0p0_", &[_]Token.Id{ .Invalid, .Eof });
}
fn testTokenize(source: []const u8, expected_tokens: []const Token.Id) void {
diff --git a/src-self-hosted/ir.zig b/src-self-hosted/ir.zig
index e3d13b43db..63dc67a6fb 100644
--- a/src-self-hosted/ir.zig
+++ b/src-self-hosted/ir.zig
@@ -1312,15 +1312,15 @@ pub const Builder = struct {
var rest: []const u8 = undefined;
if (int_token.len >= 3 and int_token[0] == '0') {
rest = int_token[2..];
- base = switch (int_token[1]) {
- 'b' => 2,
- 'o' => 8,
- 'x' => 16,
+ switch (int_token[1]) {
+ 'b' => base = 2,
+ 'o' => base = 8,
+ 'x' => base = 16,
else => {
base = 10;
rest = int_token;
},
- };
+ }
} else {
base = 10;
rest = int_token;