From 3b09262c1252de0d9f946630e701be65bc5b2fc7 Mon Sep 17 00:00:00 2001 From: Ryan Liptak Date: Mon, 20 Sep 2021 18:00:04 -0700 Subject: [PATCH] tokenizer: Fix index-out-of-bounds on unfinished unicode escapes before EOF --- lib/std/zig/tokenizer.zig | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/lib/std/zig/tokenizer.zig b/lib/std/zig/tokenizer.zig index c24d6666f1..3ef6c9a6ba 100644 --- a/lib/std/zig/tokenizer.zig +++ b/lib/std/zig/tokenizer.zig @@ -772,6 +772,10 @@ pub const Tokenizer = struct { }, .char_literal_unicode_escape_saw_u => switch (c) { + 0 => { + result.tag = .invalid; + break; + }, '{' => { state = .char_literal_unicode_escape; }, @@ -782,6 +786,10 @@ pub const Tokenizer = struct { }, .char_literal_unicode_escape => switch (c) { + 0 => { + result.tag = .invalid; + break; + }, '0'...'9', 'a'...'f', 'A'...'F' => {}, '}' => { state = .char_literal_end; // too many/few digits handled later @@ -1922,8 +1930,10 @@ test "tokenizer - invalid builtin identifiers" { try testTokenize("@0()", &.{ .invalid, .integer_literal, .l_paren, .r_paren }); } -test "tokenizer - backslash before eof in string literal" { +test "tokenizer - invalid token with unfinished escape right before eof" { try testTokenize("\"\\", &.{.invalid}); + try testTokenize("'\\", &.{.invalid}); + try testTokenize("'\\u", &.{.invalid}); } fn testTokenize(source: [:0]const u8, expected_tokens: []const Token.Tag) !void {