From f07cba10a3b8bfe5fa75da9f67284c9b2869b261 Mon Sep 17 00:00:00 2001 From: r00ster91 Date: Fri, 12 Aug 2022 14:23:57 +0200 Subject: [PATCH 1/4] test(names): remove unnecessary "tokenizer - " prefix --- lib/std/zig/tokenizer.zig | 60 +++++++++++++++++++-------------------- 1 file changed, 30 insertions(+), 30 deletions(-) diff --git a/lib/std/zig/tokenizer.zig b/lib/std/zig/tokenizer.zig index 5c6d732419..178249091e 100644 --- a/lib/std/zig/tokenizer.zig +++ b/lib/std/zig/tokenizer.zig @@ -1485,7 +1485,7 @@ test "line comment followed by top-level comptime" { }); } -test "tokenizer - unknown length pointer and then c pointer" { +test "unknown length pointer and then c pointer" { try testTokenize( \\[*]u8 \\[*c]u8 @@ -1502,7 +1502,7 @@ test "tokenizer - unknown length pointer and then c pointer" { }); } -test "tokenizer - code point literal with hex escape" { +test "code point literal with hex escape" { try testTokenize( \\'\x1b' , &.{.char_literal}); @@ -1511,21 +1511,21 @@ test "tokenizer - code point literal with hex escape" { , &.{ .invalid, .invalid }); } -test "tokenizer - newline in char literal" { +test "newline in char literal" { try testTokenize( \\' \\' , &.{ .invalid, .invalid }); } -test "tokenizer - newline in string literal" { +test "newline in string literal" { try testTokenize( \\" \\" , &.{ .invalid, .string_literal }); } -test "tokenizer - code point literal with unicode escapes" { +test "code point literal with unicode escapes" { // Valid unicode escapes try testTokenize( \\'\u{3}' @@ -1575,13 +1575,13 @@ test "tokenizer - code point literal with unicode escapes" { , &.{ .invalid, .integer_literal, .invalid }); } -test "tokenizer - code point literal with unicode code point" { +test "code point literal with unicode code point" { try testTokenize( \\'💩' , &.{.char_literal}); } -test "tokenizer - float literal e exponent" { +test "float literal e exponent" { try testTokenize("a = 4.94065645841246544177e-324;\n", &.{ .identifier, .equal, @@ -1590,7 +1590,7 @@ test "tokenizer - float literal e exponent" { }); } -test "tokenizer - float literal p exponent" { +test "float literal p exponent" { try testTokenize("a = 0x1.a827999fcef32p+1022;\n", &.{ .identifier, .equal, @@ -1599,11 +1599,11 @@ test "tokenizer - float literal p exponent" { }); } -test "tokenizer - chars" { +test "chars" { try testTokenize("'c'", &.{.char_literal}); } -test "tokenizer - invalid token characters" { +test "invalid token characters" { try testTokenize("#", &.{.invalid}); try testTokenize("`", &.{.invalid}); try testTokenize("'c", &.{.invalid}); @@ -1611,7 +1611,7 @@ test "tokenizer - invalid token characters" { try testTokenize("''", &.{ .invalid, .invalid }); } -test "tokenizer - invalid literal/comment characters" { +test "invalid literal/comment characters" { try testTokenize("\"\x00\"", &.{ .string_literal, .invalid, @@ -1627,12 +1627,12 @@ test "tokenizer - invalid literal/comment characters" { }); } -test "tokenizer - utf8" { +test "utf8" { try testTokenize("//\xc2\x80", &.{}); try testTokenize("//\xf4\x8f\xbf\xbf", &.{}); } -test "tokenizer - invalid utf8" { +test "invalid utf8" { try testTokenize("//\x80", &.{ .invalid, }); @@ -1659,7 +1659,7 @@ test "tokenizer - invalid utf8" { }); } -test "tokenizer - illegal unicode codepoints" { +test "illegal unicode codepoints" { // unicode newline characters.U+0085, U+2028, U+2029 try testTokenize("//\xc2\x84", &.{}); try testTokenize("//\xc2\x85", &.{ @@ -1676,7 +1676,7 @@ test "tokenizer - illegal unicode codepoints" { try testTokenize("//\xe2\x80\xaa", &.{}); } -test "tokenizer - string identifier and builtin fns" { +test "string identifier and builtin fns" { try testTokenize( \\const @"if" = @import("std"); , &.{ @@ -1691,7 +1691,7 @@ test "tokenizer - string identifier and builtin fns" { }); } -test "tokenizer - multiline string literal with literal tab" { +test "multiline string literal with literal tab" { try testTokenize( \\\\foo bar , &.{ @@ -1699,7 +1699,7 @@ test "tokenizer - multiline string literal with literal tab" { }); } -test "tokenizer - comments with literal tab" { +test "comments with literal tab" { try testTokenize( \\//foo bar \\//!foo bar @@ -1715,14 +1715,14 @@ test "tokenizer - comments with literal tab" { }); } -test "tokenizer - pipe and then invalid" { +test "pipe and then invalid" { try testTokenize("||=", &.{ .pipe_pipe, .equal, }); } -test "tokenizer - line comment and doc comment" { +test "line comment and doc comment" { try testTokenize("//", &.{}); try testTokenize("// a / b", &.{}); try testTokenize("// /", &.{}); @@ -1733,7 +1733,7 @@ test "tokenizer - line comment and doc comment" { try testTokenize("//!!", &.{.container_doc_comment}); } -test "tokenizer - line comment followed by identifier" { +test "line comment followed by identifier" { try testTokenize( \\ Unexpected, \\ // another @@ -1746,7 +1746,7 @@ test "tokenizer - line comment followed by identifier" { }); } -test "tokenizer - UTF-8 BOM is recognized and skipped" { +test "UTF-8 BOM is recognized and skipped" { try testTokenize("\xEF\xBB\xBFa;\n", &.{ .identifier, .semicolon, @@ -1788,7 +1788,7 @@ test "correctly parse pointer dereference followed by asterisk" { }); } -test "tokenizer - range literals" { +test "range literals" { try testTokenize("0...9", &.{ .integer_literal, .ellipsis3, .integer_literal }); try testTokenize("'0'...'9'", &.{ .char_literal, .ellipsis3, .char_literal }); try testTokenize("0x00...0x09", &.{ .integer_literal, .ellipsis3, .integer_literal }); @@ -1796,7 +1796,7 @@ test "tokenizer - range literals" { try testTokenize("0o00...0o11", &.{ .integer_literal, .ellipsis3, .integer_literal }); } -test "tokenizer - number literals decimal" { +test "number literals decimal" { try testTokenize("0", &.{.integer_literal}); try testTokenize("1", &.{.integer_literal}); try testTokenize("2", &.{.integer_literal}); @@ -1863,7 +1863,7 @@ test "tokenizer - number literals decimal" { try testTokenize("1.0e0_+", &.{ .invalid, .plus }); } -test "tokenizer - number literals binary" { +test "number literals binary" { try testTokenize("0b0", &.{.integer_literal}); try testTokenize("0b1", &.{.integer_literal}); try testTokenize("0b2", &.{ .invalid, .integer_literal }); @@ -1902,7 +1902,7 @@ test "tokenizer - number literals binary" { try testTokenize("0b1_,", &.{ .invalid, .comma }); } -test "tokenizer - number literals octal" { +test "number literals octal" { try testTokenize("0o0", &.{.integer_literal}); try testTokenize("0o1", &.{.integer_literal}); try testTokenize("0o2", &.{.integer_literal}); @@ -1941,7 +1941,7 @@ test "tokenizer - number literals octal" { try testTokenize("0o_,", &.{ .invalid, .identifier, .comma }); } -test "tokenizer - number literals hexadecimal" { +test "number literals hexadecimal" { try testTokenize("0x0", &.{.integer_literal}); try testTokenize("0x1", &.{.integer_literal}); try testTokenize("0x2", &.{.integer_literal}); @@ -2029,22 +2029,22 @@ test "tokenizer - number literals hexadecimal" { try testTokenize("0x0.0p0_", &.{ .invalid, .eof }); } -test "tokenizer - multi line string literal with only 1 backslash" { +test "multi line string literal with only 1 backslash" { try testTokenize("x \\\n;", &.{ .identifier, .invalid, .semicolon }); } -test "tokenizer - invalid builtin identifiers" { +test "invalid builtin identifiers" { try testTokenize("@()", &.{ .invalid, .l_paren, .r_paren }); try testTokenize("@0()", &.{ .invalid, .integer_literal, .l_paren, .r_paren }); } -test "tokenizer - invalid token with unfinished escape right before eof" { +test "invalid token with unfinished escape right before eof" { try testTokenize("\"\\", &.{.invalid}); try testTokenize("'\\", &.{.invalid}); try testTokenize("'\\u", &.{.invalid}); } -test "tokenizer - saturating" { +test "saturating" { try testTokenize("<<", &.{.angle_bracket_angle_bracket_left}); try testTokenize("<<|", &.{.angle_bracket_angle_bracket_left_pipe}); try testTokenize("<<|=", &.{.angle_bracket_angle_bracket_left_pipe_equal}); From e3b3eab840d6fc055cdf12cde6d9077cb59b4022 Mon Sep 17 00:00:00 2001 From: r00ster91 Date: Fri, 12 Aug 2022 14:25:47 +0200 Subject: [PATCH 2/4] test(names): some renamings --- lib/std/zig/tokenizer.zig | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/lib/std/zig/tokenizer.zig b/lib/std/zig/tokenizer.zig index 178249091e..8cc221a8dc 100644 --- a/lib/std/zig/tokenizer.zig +++ b/lib/std/zig/tokenizer.zig @@ -1469,8 +1469,8 @@ pub const Tokenizer = struct { } }; -test "tokenizer" { - try testTokenize("test", &.{.keyword_test}); +test "keywords" { + try testTokenize("test const else", &.{ .keyword_test, .keyword_const, .keyword_else }); } test "line comment followed by top-level comptime" { @@ -2044,7 +2044,7 @@ test "invalid token with unfinished escape right before eof" { try testTokenize("'\\u", &.{.invalid}); } -test "saturating" { +test "saturating operators" { try testTokenize("<<", &.{.angle_bracket_angle_bracket_left}); try testTokenize("<<|", &.{.angle_bracket_angle_bracket_left_pipe}); try testTokenize("<<|=", &.{.angle_bracket_angle_bracket_left_pipe_equal}); From 5490688d658973982d111ad5ad52bc497ef15d84 Mon Sep 17 00:00:00 2001 From: r00ster91 Date: Fri, 12 Aug 2022 14:28:22 +0200 Subject: [PATCH 3/4] refactor: use std.ascii functions --- lib/std/zig/tokenizer.zig | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/lib/std/zig/tokenizer.zig b/lib/std/zig/tokenizer.zig index 8cc221a8dc..89d4ee59d9 100644 --- a/lib/std/zig/tokenizer.zig +++ b/lib/std/zig/tokenizer.zig @@ -1,5 +1,4 @@ const std = @import("../std.zig"); -const mem = std.mem; pub const Token = struct { tag: Tag, @@ -350,7 +349,7 @@ pub const Tokenizer = struct { pub fn init(buffer: [:0]const u8) Tokenizer { // Skip the UTF-8 BOM if present - const src_start = if (mem.startsWith(u8, buffer, "\xEF\xBB\xBF")) 3 else @as(usize, 0); + const src_start: usize = if (std.mem.startsWith(u8, buffer, "\xEF\xBB\xBF")) 3 else 0; return Tokenizer{ .buffer = buffer, .index = src_start, @@ -1433,8 +1432,8 @@ pub const Tokenizer = struct { fn getInvalidCharacterLength(self: *Tokenizer) u3 { const c0 = self.buffer[self.index]; - if (c0 < 0x80) { - if (c0 < 0x20 or c0 == 0x7f) { + if (std.ascii.isASCII(c0)) { + if (std.ascii.isCntrl(c0)) { // ascii control codes are never allowed // (note that \n was checked before we got here) return 1; From 83909651ea99eb45c67ead40b5fcb5773d1998d5 Mon Sep 17 00:00:00 2001 From: r00ster91 Date: Sat, 13 Aug 2022 11:44:19 +0200 Subject: [PATCH 4/4] test: simplify testTokenize What this does is already done by `expectEqual`. Now the trace seems to be shorter and more concise so the errors should be easier to read now. --- lib/std/zig/tokenizer.zig | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/lib/std/zig/tokenizer.zig b/lib/std/zig/tokenizer.zig index 89d4ee59d9..eaa0ddd716 100644 --- a/lib/std/zig/tokenizer.zig +++ b/lib/std/zig/tokenizer.zig @@ -2061,17 +2061,14 @@ test "saturating operators" { try testTokenize("-|=", &.{.minus_pipe_equal}); } -fn testTokenize(source: [:0]const u8, expected_tokens: []const Token.Tag) !void { +fn testTokenize(source: [:0]const u8, expected_token_tags: []const Token.Tag) !void { var tokenizer = Tokenizer.init(source); - for (expected_tokens) |expected_token_id| { + for (expected_token_tags) |expected_token_tag| { const token = tokenizer.next(); - if (token.tag != expected_token_id) { - std.debug.panic("expected {s}, found {s}\n", .{ - @tagName(expected_token_id), @tagName(token.tag), - }); - } + try std.testing.expectEqual(expected_token_tag, token.tag); } const last_token = tokenizer.next(); try std.testing.expectEqual(Token.Tag.eof, last_token.tag); try std.testing.expectEqual(source.len, last_token.loc.start); + try std.testing.expectEqual(source.len, last_token.loc.end); }