From 5c6cd5e2c9e8b2d0feb0026bad7c201035a175b4 Mon Sep 17 00:00:00 2001 From: LemonBoy Date: Sun, 27 Sep 2020 17:17:27 +0200 Subject: [PATCH] stage{1,2}: Fix parsing of range literals stage1 was unable to parse ranges whose starting point was written in binary/octal as the first dot in '...' was incorrectly interpreted as decimal point. stage2 forgot to reset the literal type to IntegerLiteral when it discovered the dot was not a decimal point. I've only stumbled across this bug because zig fmt keeps formatting the ranges without any space around the ... --- lib/std/zig/tokenizer.zig | 9 +++++++++ src/tokenizer.cpp | 6 +++--- 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/lib/std/zig/tokenizer.zig b/lib/std/zig/tokenizer.zig index 86968c73b2..e40483c022 100644 --- a/lib/std/zig/tokenizer.zig +++ b/lib/std/zig/tokenizer.zig @@ -1195,6 +1195,7 @@ pub const Tokenizer = struct { }, .num_dot_hex => switch (c) { '.' => { + result.id = .IntegerLiteral; self.index -= 1; state = .start; break; @@ -1758,6 +1759,14 @@ test "correctly parse pointer assignment" { }); } +test "tokenizer - range literals" { + testTokenize("0...9", &[_]Token.Id{ .IntegerLiteral, .Ellipsis3, .IntegerLiteral }); + testTokenize("'0'...'9'", &[_]Token.Id{ .CharLiteral, .Ellipsis3, .CharLiteral }); + testTokenize("0x00...0x09", &[_]Token.Id{ .IntegerLiteral, .Ellipsis3, .IntegerLiteral }); + testTokenize("0b00...0b11", &[_]Token.Id{ .IntegerLiteral, .Ellipsis3, .IntegerLiteral }); + testTokenize("0o00...0o11", &[_]Token.Id{ .IntegerLiteral, .Ellipsis3, .IntegerLiteral }); +} + test "tokenizer - number literals decimal" { testTokenize("0", &[_]Token.Id{.IntegerLiteral}); testTokenize("1", &[_]Token.Id{.IntegerLiteral}); diff --git a/src/tokenizer.cpp b/src/tokenizer.cpp index 4415bdf431..fa14dd40fa 100644 --- a/src/tokenizer.cpp +++ b/src/tokenizer.cpp @@ -1225,9 +1225,6 @@ void tokenize(Buf *buf, Tokenization *out) { invalid_char_error(&t, c); break; } - if (t.radix != 16 && t.radix != 10) { - invalid_char_error(&t, c); - } t.state = TokenizeStateNumberDot; break; } @@ -1281,6 +1278,9 @@ void tokenize(Buf *buf, Tokenization *out) { t.state = TokenizeStateStart; continue; } + if (t.radix != 16 && t.radix != 10) { + invalid_char_error(&t, c); + } t.pos -= 1; t.state = TokenizeStateFloatFractionNoUnderscore; assert(t.cur_tok->id == TokenIdIntLiteral);