From 0ea96c11ef5eca9cfd19233b6d51b00ba33da716 Mon Sep 17 00:00:00 2001 From: Vexu Date: Tue, 14 Jan 2020 21:24:39 +0200 Subject: [PATCH] disallow multiline strings in test and library names --- doc/langref.html.in | 10 +++++----- lib/std/zig/parse.zig | 26 +++++++++++++------------- lib/std/zig/parser_test.zig | 10 ---------- src/parser.cpp | 29 +++++++++++++++-------------- src/tokenizer.cpp | 19 +++---------------- src/tokenizer.hpp | 3 ++- 6 files changed, 38 insertions(+), 59 deletions(-) diff --git a/doc/langref.html.in b/doc/langref.html.in index 442e4ac52b..092e303013 100644 --- a/doc/langref.html.in +++ b/doc/langref.html.in @@ -10212,13 +10212,13 @@ ContainerMembers / ContainerField / -TestDecl <- KEYWORD_test STRINGLITERAL Block +TestDecl <- KEYWORD_test STRINGLITERALSINGLE Block TopLevelComptime <- KEYWORD_comptime BlockExpr TopLevelDecl - <- (KEYWORD_export / KEYWORD_extern STRINGLITERAL? / KEYWORD_inline)? FnProto (SEMICOLON / Block) - / (KEYWORD_export / KEYWORD_extern STRINGLITERAL?)? KEYWORD_threadlocal? VarDecl + <- (KEYWORD_export / KEYWORD_extern STRINGLITERALSINGLE? / KEYWORD_inline)? FnProto (SEMICOLON / Block) + / (KEYWORD_export / KEYWORD_extern STRINGLITERALSINGLE?)? KEYWORD_threadlocal? VarDecl / KEYWORD_usingnamespace Expr SEMICOLON FnProto <- FnCC? KEYWORD_fn IDENTIFIER? LPAREN ParamDeclList RPAREN ByteAlign? LinkSection? EXCLAMATIONMARK? (KEYWORD_var / TypeExpr) @@ -10561,10 +10561,10 @@ INTEGER / "0o" [0-7]+ skip / "0x" hex+ skip / [0-9]+ skip +STRINGLITERALSINGLE <- "\"" string_char* "\"" skip STRINGLITERAL - <- "\"" string_char* "\"" skip + <- STRINGLITERALSINGLE / line_string skip - / line_cstring skip IDENTIFIER <- !keyword [A-Za-z_] [A-Za-z0-9_]* skip / "@\"" string_char* "\"" skip diff --git a/lib/std/zig/parse.zig b/lib/std/zig/parse.zig index 604de516c4..1b6afc1c1a 100644 --- a/lib/std/zig/parse.zig +++ b/lib/std/zig/parse.zig @@ -183,10 +183,10 @@ fn parseContainerDocComments(arena: *Allocator, it: *TokenIterator, tree: *Tree) return &node.base; } -/// TestDecl <- KEYWORD_test STRINGLITERAL Block +/// TestDecl <- KEYWORD_test STRINGLITERALSINGLE Block fn parseTestDecl(arena: *Allocator, it: *TokenIterator, tree: *Tree) !?*Node { const test_token = eatToken(it, .Keyword_test) orelse return null; - const name_node = try expectNode(arena, it, tree, parseStringLiteral, AstError{ + const name_node = try expectNode(arena, it, tree, parseStringLiteralSingle, AstError{ .ExpectedStringLiteral = AstError.ExpectedStringLiteral{ .token = it.index }, }); const block_node = try expectNode(arena, it, tree, parseBlock, AstError{ @@ -225,15 +225,15 @@ fn parseTopLevelComptime(arena: *Allocator, it: *TokenIterator, tree: *Tree) !?* } /// TopLevelDecl -/// <- (KEYWORD_export / KEYWORD_extern STRINGLITERAL? / (KEYWORD_inline / KEYWORD_noinline))? FnProto (SEMICOLON / Block) -/// / (KEYWORD_export / KEYWORD_extern STRINGLITERAL?)? KEYWORD_threadlocal? VarDecl +/// <- (KEYWORD_export / KEYWORD_extern STRINGLITERALSINGLE? / (KEYWORD_inline / KEYWORD_noinline))? FnProto (SEMICOLON / Block) +/// / (KEYWORD_export / KEYWORD_extern STRINGLITERALSINGLE?)? KEYWORD_threadlocal? VarDecl /// / KEYWORD_usingnamespace Expr SEMICOLON fn parseTopLevelDecl(arena: *Allocator, it: *TokenIterator, tree: *Tree) !?*Node { var lib_name: ?*Node = null; const extern_export_inline_token = blk: { if (eatToken(it, .Keyword_export)) |token| break :blk token; if (eatToken(it, .Keyword_extern)) |token| { - lib_name = try parseStringLiteral(arena, it, tree); + lib_name = try parseStringLiteralSingle(arena, it, tree); break :blk token; } if (eatToken(it, .Keyword_inline)) |token| break :blk token; @@ -285,12 +285,7 @@ fn parseTopLevelDecl(arena: *Allocator, it: *TokenIterator, tree: *Tree) !?*Node if (extern_export_inline_token) |token| { if (lib_name) |string_literal_node| - if (string_literal_node.cast(Node.StringLiteral)) |single| { - putBackToken(it, single.token); - } else if (string_literal_node.cast(Node.MultilineStringLiteral)) |multi| { - while (multi.lines.pop()) |line| - putBackToken(it, line); - } else unreachable; + putBackToken(it, string_literal_node.cast(Node.StringLiteral).?.token); putBackToken(it, token); return null; } @@ -2776,8 +2771,7 @@ fn createLiteral(arena: *Allocator, comptime T: type, token: TokenIndex) !*Node return &result.base; } -// string literal or multiline string literal -fn parseStringLiteral(arena: *Allocator, it: *TokenIterator, tree: *Tree) !?*Node { +fn parseStringLiteralSingle(arena: *Allocator, it: *TokenIterator, tree: *Tree) !?*Node { if (eatToken(it, .StringLiteral)) |token| { const node = try arena.create(Node.StringLiteral); node.* = Node.StringLiteral{ @@ -2785,6 +2779,12 @@ fn parseStringLiteral(arena: *Allocator, it: *TokenIterator, tree: *Tree) !?*Nod }; return &node.base; } + return null; +} + +// string literal or multiline string literal +fn parseStringLiteral(arena: *Allocator, it: *TokenIterator, tree: *Tree) !?*Node { + if (try parseStringLiteralSingle(arena, it, tree)) |node| return node; if (eatToken(it, .MultilineStringLiteralLine)) |first_line| { const node = try arena.create(Node.MultilineStringLiteral); diff --git a/lib/std/zig/parser_test.zig b/lib/std/zig/parser_test.zig index 1ebef6a114..c57540ade9 100644 --- a/lib/std/zig/parser_test.zig +++ b/lib/std/zig/parser_test.zig @@ -2721,16 +2721,6 @@ test "zig fmt: extern without container keyword returns error" { ); } -test "zig fmt: extern multiline lib name" { - try testError( - \\extern \\super - \\ \\long - \\ \\library - \\ \\name - \\ - ); -} - const std = @import("std"); const mem = std.mem; const warn = std.debug.warn; diff --git a/src/parser.cpp b/src/parser.cpp index f6f5811e63..0054c0a0c6 100644 --- a/src/parser.cpp +++ b/src/parser.cpp @@ -141,16 +141,9 @@ static void ast_error(ParseContext *pc, Token *token, const char *format, ...) { exit(EXIT_FAILURE); } -static Buf ast_token_str(Buf *input, Token *token) { - Buf str = BUF_INIT; - buf_init_from_mem(&str, buf_ptr(input) + token->start_pos, token->end_pos - token->start_pos); - return str; -} - ATTRIBUTE_NORETURN static void ast_invalid_token_error(ParseContext *pc, Token *token) { - Buf token_value = ast_token_str(pc->buf, token); - ast_error(pc, token, "invalid token: '%s'", buf_ptr(&token_value)); + ast_error(pc, token, "invalid token: '%s'", token_name(token->id)); } static AstNode *ast_create_node_no_line_info(ParseContext *pc, NodeType type) { @@ -213,7 +206,7 @@ static void put_back_token(ParseContext *pc) { static Buf *token_buf(Token *token) { if (token == nullptr) return nullptr; - assert(token->id == TokenIdStringLiteral || token->id == TokenIdSymbol); + assert(token->id == TokenIdStringLiteral || token->id == TokenIdMultilineStringLiteral || token->id == TokenIdSymbol); return &token->data.str_lit.str; } @@ -596,7 +589,7 @@ static AstNodeContainerDecl ast_parse_container_members(ParseContext *pc) { return res; } -// TestDecl <- KEYWORD_test STRINGLITERAL Block +// TestDecl <- KEYWORD_test STRINGLITERALSINGLE Block static AstNode *ast_parse_test_decl(ParseContext *pc) { Token *test = eat_token_if(pc, TokenIdKeywordTest); if (test == nullptr) @@ -630,8 +623,8 @@ static AstNode *ast_parse_top_level_comptime(ParseContext *pc) { } // TopLevelDecl -// <- (KEYWORD_export / KEYWORD_extern STRINGLITERAL? / (KEYWORD_inline / KEYWORD_noinline))? FnProto (SEMICOLON / Block) -// / (KEYWORD_export / KEYWORD_extern STRINGLITERAL?)? KEYWORD_threadlocal? VarDecl +// <- (KEYWORD_export / KEYWORD_extern STRINGLITERALSINGLE? / (KEYWORD_inline / KEYWORD_noinline))? FnProto (SEMICOLON / Block) +// / (KEYWORD_export / KEYWORD_extern STRINGLITERALSINGLE?)? KEYWORD_threadlocal? VarDecl // / KEYWORD_use Expr SEMICOLON static AstNode *ast_parse_top_level_decl(ParseContext *pc, VisibMod visib_mod, Buf *doc_comments) { Token *first = eat_token_if(pc, TokenIdKeywordExport); @@ -1729,6 +1722,8 @@ static AstNode *ast_parse_primary_type_expr(ParseContext *pc) { return ast_create_node(pc, NodeTypeUnreachable, unreachable); Token *string_lit = eat_token_if(pc, TokenIdStringLiteral); + if (string_lit == nullptr) + string_lit = eat_token_if(pc, TokenIdMultilineStringLiteral); if (string_lit != nullptr) { AstNode *res = ast_create_node(pc, NodeTypeStringLiteral, string_lit); res->data.string_literal.buf = token_buf(string_lit); @@ -1957,7 +1952,9 @@ static AsmOutput *ast_parse_asm_output_item(ParseContext *pc) { Token *sym_name = expect_token(pc, TokenIdSymbol); expect_token(pc, TokenIdRBracket); - Token *str = expect_token(pc, TokenIdStringLiteral); + Token *str = eat_token_if(pc, TokenIdMultilineStringLiteral); + if (str == nullptr) + str = expect_token(pc, TokenIdStringLiteral); expect_token(pc, TokenIdLParen); Token *var_name = eat_token_if(pc, TokenIdSymbol); @@ -1999,7 +1996,9 @@ static AsmInput *ast_parse_asm_input_item(ParseContext *pc) { Token *sym_name = expect_token(pc, TokenIdSymbol); expect_token(pc, TokenIdRBracket); - Token *constraint = expect_token(pc, TokenIdStringLiteral); + Token *constraint = eat_token_if(pc, TokenIdMultilineStringLiteral); + if (constraint == nullptr) + constraint = expect_token(pc, TokenIdStringLiteral); expect_token(pc, TokenIdLParen); AstNode *expr = ast_expect(pc, ast_parse_expr); expect_token(pc, TokenIdRParen); @@ -2018,6 +2017,8 @@ static AstNode *ast_parse_asm_clobbers(ParseContext *pc) { ZigList clobber_list = ast_parse_list(pc, TokenIdComma, [](ParseContext *context) { Token *str = eat_token_if(context, TokenIdStringLiteral); + if (str == nullptr) + str = eat_token_if(context, TokenIdMultilineStringLiteral); if (str != nullptr) return token_buf(str); return (Buf*)nullptr; diff --git a/src/tokenizer.cpp b/src/tokenizer.cpp index 9182c5227a..2aae048cdf 100644 --- a/src/tokenizer.cpp +++ b/src/tokenizer.cpp @@ -209,7 +209,6 @@ enum TokenizeState { TokenizeStateLineString, TokenizeStateLineStringEnd, TokenizeStateLineStringContinue, - TokenizeStateLineStringContinueC, TokenizeStateSawEq, TokenizeStateSawBang, TokenizeStateSawLessThan, @@ -266,7 +265,7 @@ static void set_token_id(Tokenize *t, Token *token, TokenId id) { } else if (id == TokenIdFloatLiteral) { bigfloat_init_32(&token->data.float_lit.bigfloat, 0.0f); token->data.float_lit.overflow = false; - } else if (id == TokenIdStringLiteral || id == TokenIdSymbol) { + } else if (id == TokenIdStringLiteral || id == TokenIdMultilineStringLiteral || id == TokenIdSymbol) { memset(&token->data.str_lit.str, 0, sizeof(Buf)); buf_resize(&token->data.str_lit.str, 0); } @@ -503,7 +502,7 @@ void tokenize(Buf *buf, Tokenization *out) { t.state = TokenizeStateSawSlash; break; case '\\': - begin_token(&t, TokenIdStringLiteral); + begin_token(&t, TokenIdMultilineStringLiteral); t.state = TokenizeStateSawBackslash; break; case '%': @@ -945,18 +944,6 @@ void tokenize(Buf *buf, Tokenization *out) { continue; } break; - case TokenizeStateLineStringContinueC: - switch (c) { - case '\\': - t.state = TokenizeStateLineStringContinue; - break; - default: - t.pos -= 1; - end_token(&t); - t.state = TokenizeStateStart; - continue; - } - break; case TokenizeStateLineStringContinue: switch (c) { case '\\': @@ -1471,7 +1458,6 @@ void tokenize(Buf *buf, Tokenization *out) { case TokenizeStateSawDotDot: case TokenizeStateSawBackslash: case TokenizeStateLineStringContinue: - case TokenizeStateLineStringContinueC: tokenize_error(&t, "unexpected EOF"); break; case TokenizeStateLineComment: @@ -1607,6 +1593,7 @@ const char * token_name(TokenId id) { case TokenIdStar: return "*"; case TokenIdStarStar: return "**"; case TokenIdStringLiteral: return "StringLiteral"; + case TokenIdMultilineStringLiteral: return "MultilineStringLiteral"; case TokenIdSymbol: return "Symbol"; case TokenIdTilde: return "~"; case TokenIdTimesEq: return "*="; diff --git a/src/tokenizer.hpp b/src/tokenizer.hpp index a893cc2373..3f025ca74a 100644 --- a/src/tokenizer.hpp +++ b/src/tokenizer.hpp @@ -124,6 +124,7 @@ enum TokenId { TokenIdStar, TokenIdStarStar, TokenIdStringLiteral, + TokenIdMultilineStringLiteral, TokenIdSymbol, TokenIdTilde, TokenIdTimesEq, @@ -165,7 +166,7 @@ struct Token { // TokenIdFloatLiteral TokenFloatLit float_lit; - // TokenIdStringLiteral or TokenIdSymbol + // TokenIdStringLiteral, TokenIdMultilineStringLiteral or TokenIdSymbol TokenStrLit str_lit; // TokenIdCharLiteral