From f06bce5ddaea368040560f584170aee2864fa399 Mon Sep 17 00:00:00 2001 From: Andrew Kelley Date: Sat, 2 Jun 2018 04:03:25 -0400 Subject: [PATCH] introduce [*] for unknown length pointers See #770 Currently it does not have any different behavior than `*` but it is now recommended to use `[*]` for unknown length pointers to be future-proof. Instead of [ * ] being separate tokens as the proposal suggested, this commit implements `[*]` as a single token. --- doc/langref.html.in | 2 +- src/all_types.hpp | 1 + src/parser.cpp | 5 +++-- src/tokenizer.cpp | 31 +++++++++++++++++++++++++++++- src/tokenizer.hpp | 1 + std/cstr.zig | 8 ++++---- std/zig/parse.zig | 2 +- std/zig/parser_test.zig | 7 +++++++ std/zig/tokenizer.zig | 42 ++++++++++++++++++++++++++++++++++++++--- 9 files changed, 87 insertions(+), 12 deletions(-) diff --git a/doc/langref.html.in b/doc/langref.html.in index fb02e6277d..217f02777f 100644 --- a/doc/langref.html.in +++ b/doc/langref.html.in @@ -6450,7 +6450,7 @@ ContainerInitBody = list(StructLiteralField, ",") | list(Expression, ",") StructLiteralField = "." Symbol "=" Expression -PrefixOp = "!" | "-" | "~" | ("*" option("align" "(" Expression option(":" Integer ":" Integer) ")" ) option("const") option("volatile")) | "?" | "??" | "-%" | "try" | "await" +PrefixOp = "!" | "-" | "~" | (("*" | "[*]") option("align" "(" Expression option(":" Integer ":" Integer) ")" ) option("const") option("volatile")) | "?" | "??" | "-%" | "try" | "await" PrimaryExpression = Integer | Float | String | CharLiteral | KeywordLiteral | GroupedExpression | BlockExpression(BlockOrExpression) | Symbol | ("@" Symbol FnCallExpression) | ArrayType | FnProto | AsmExpression | ContainerDecl | ("continue" option(":" Symbol)) | ErrorSetDecl | PromiseType diff --git a/src/all_types.hpp b/src/all_types.hpp index d5906cae95..8e65cfc789 100644 --- a/src/all_types.hpp +++ b/src/all_types.hpp @@ -625,6 +625,7 @@ struct AstNodePrefixOpExpr { }; struct AstNodePointerType { + Token *star_token; AstNode *align_expr; BigInt *bit_offset_start; BigInt *bit_offset_end; diff --git a/src/parser.cpp b/src/parser.cpp index ef390a3a2e..6c900c3bfa 100644 --- a/src/parser.cpp +++ b/src/parser.cpp @@ -1174,6 +1174,7 @@ static PrefixOp tok_to_prefix_op(Token *token) { static AstNode *ast_parse_pointer_type(ParseContext *pc, size_t *token_index, Token *star_tok) { AstNode *node = ast_create_node(pc, NodeTypePointerType, star_tok); + node->data.pointer_type.star_token = star_tok; Token *token = &pc->tokens->at(*token_index); if (token->id == TokenIdKeywordAlign) { @@ -1211,11 +1212,11 @@ static AstNode *ast_parse_pointer_type(ParseContext *pc, size_t *token_index, To /* PrefixOpExpression = PrefixOp ErrorSetExpr | SuffixOpExpression -PrefixOp = "!" | "-" | "~" | ("*" option("align" "(" Expression option(":" Integer ":" Integer) ")" ) option("const") option("volatile")) | "?" | "??" | "-%" | "try" | "await" +PrefixOp = "!" | "-" | "~" | (("*" | "[*]") option("align" "(" Expression option(":" Integer ":" Integer) ")" ) option("const") option("volatile")) | "?" | "??" | "-%" | "try" | "await" */ static AstNode *ast_parse_prefix_op_expr(ParseContext *pc, size_t *token_index, bool mandatory) { Token *token = &pc->tokens->at(*token_index); - if (token->id == TokenIdStar) { + if (token->id == TokenIdStar || token->id == TokenIdBracketStarBracket) { *token_index += 1; return ast_parse_pointer_type(pc, token_index, token); } diff --git a/src/tokenizer.cpp b/src/tokenizer.cpp index 365b35cdfd..badbd695ec 100644 --- a/src/tokenizer.cpp +++ b/src/tokenizer.cpp @@ -219,6 +219,8 @@ enum TokenizeState { TokenizeStateSawAtSign, TokenizeStateCharCode, TokenizeStateError, + TokenizeStateLBracket, + TokenizeStateLBracketStar, }; @@ -539,8 +541,8 @@ void tokenize(Buf *buf, Tokenization *out) { end_token(&t); break; case '[': + t.state = TokenizeStateLBracket; begin_token(&t, TokenIdLBracket); - end_token(&t); break; case ']': begin_token(&t, TokenIdRBracket); @@ -852,6 +854,30 @@ void tokenize(Buf *buf, Tokenization *out) { continue; } break; + case TokenizeStateLBracket: + switch (c) { + case '*': + t.state = TokenizeStateLBracketStar; + set_token_id(&t, t.cur_tok, TokenIdBracketStarBracket); + break; + default: + // reinterpret as just an lbracket + t.pos -= 1; + end_token(&t); + t.state = TokenizeStateStart; + continue; + } + break; + case TokenizeStateLBracketStar: + switch (c) { + case ']': + end_token(&t); + t.state = TokenizeStateStart; + break; + default: + invalid_char_error(&t, c); + } + break; case TokenizeStateSawPlusPercent: switch (c) { case '=': @@ -1467,12 +1493,14 @@ void tokenize(Buf *buf, Tokenization *out) { case TokenizeStateLineString: case TokenizeStateLineStringEnd: case TokenizeStateSawBarBar: + case TokenizeStateLBracket: end_token(&t); break; case TokenizeStateSawDotDot: case TokenizeStateSawBackslash: case TokenizeStateLineStringContinue: case TokenizeStateLineStringContinueC: + case TokenizeStateLBracketStar: tokenize_error(&t, "unexpected EOF"); break; case TokenizeStateLineComment: @@ -1509,6 +1537,7 @@ const char * token_name(TokenId id) { case TokenIdBitShiftRight: return ">>"; case TokenIdBitShiftRightEq: return ">>="; case TokenIdBitXorEq: return "^="; + case TokenIdBracketStarBracket: return "[*]"; case TokenIdCharLiteral: return "CharLiteral"; case TokenIdCmpEq: return "=="; case TokenIdCmpGreaterOrEq: return ">="; diff --git a/src/tokenizer.hpp b/src/tokenizer.hpp index b719293704..d659c0a772 100644 --- a/src/tokenizer.hpp +++ b/src/tokenizer.hpp @@ -28,6 +28,7 @@ enum TokenId { TokenIdBitShiftRight, TokenIdBitShiftRightEq, TokenIdBitXorEq, + TokenIdBracketStarBracket, TokenIdCharLiteral, TokenIdCmpEq, TokenIdCmpGreaterOrEq, diff --git a/std/cstr.zig b/std/cstr.zig index dfbfb8047f..d60adf8faa 100644 --- a/std/cstr.zig +++ b/std/cstr.zig @@ -9,13 +9,13 @@ pub const line_sep = switch (builtin.os) { else => "\n", }; -pub fn len(ptr: *const u8) usize { +pub fn len(ptr: [*]const u8) usize { var count: usize = 0; while (ptr[count] != 0) : (count += 1) {} return count; } -pub fn cmp(a: *const u8, b: *const u8) i8 { +pub fn cmp(a: [*]const u8, b: [*]const u8) i8 { var index: usize = 0; while (a[index] == b[index] and a[index] != 0) : (index += 1) {} if (a[index] > b[index]) { @@ -27,11 +27,11 @@ pub fn cmp(a: *const u8, b: *const u8) i8 { } } -pub fn toSliceConst(str: *const u8) []const u8 { +pub fn toSliceConst(str: [*]const u8) []const u8 { return str[0..len(str)]; } -pub fn toSlice(str: *u8) []u8 { +pub fn toSlice(str: [*]u8) []u8 { return str[0..len(str)]; } diff --git a/std/zig/parse.zig b/std/zig/parse.zig index 6adcf34c95..7faca8e11b 100644 --- a/std/zig/parse.zig +++ b/std/zig/parse.zig @@ -3292,7 +3292,7 @@ fn tokenIdToPrefixOp(id: @TagType(Token.Id)) ?ast.Node.PrefixOp.Op { Token.Id.Minus => ast.Node.PrefixOp.Op{ .Negation = void{} }, Token.Id.MinusPercent => ast.Node.PrefixOp.Op{ .NegationWrap = void{} }, Token.Id.Ampersand => ast.Node.PrefixOp.Op{ .AddressOf = void{} }, - Token.Id.Asterisk, Token.Id.AsteriskAsterisk => ast.Node.PrefixOp.Op{ + Token.Id.Asterisk, Token.Id.AsteriskAsterisk, Token.Id.BracketStarBracket => ast.Node.PrefixOp.Op{ .PtrType = ast.Node.PrefixOp.PtrInfo{ .align_info = null, .const_token = null, diff --git a/std/zig/parser_test.zig b/std/zig/parser_test.zig index bad677580c..c28a70b770 100644 --- a/std/zig/parser_test.zig +++ b/std/zig/parser_test.zig @@ -1,3 +1,10 @@ +test "zig fmt: pointer of unknown length" { + try testCanonical( + \\fn foo(ptr: [*]u8) void {} + \\ + ); +} + test "zig fmt: spaces around slice operator" { try testCanonical( \\var a = b[c..d]; diff --git a/std/zig/tokenizer.zig b/std/zig/tokenizer.zig index 8378a9011d..b288a3adb7 100644 --- a/std/zig/tokenizer.zig +++ b/std/zig/tokenizer.zig @@ -143,6 +143,7 @@ pub const Token = struct { FloatLiteral, LineComment, DocComment, + BracketStarBracket, Keyword_align, Keyword_and, Keyword_asm, @@ -263,6 +264,8 @@ pub const Tokenizer = struct { Period, Period2, SawAtSign, + LBracket, + LBracketStar, }; pub fn next(self: *Tokenizer) Token { @@ -325,9 +328,7 @@ pub const Tokenizer = struct { break; }, '[' => { - result.id = Token.Id.LBracket; - self.index += 1; - break; + state = State.LBracket; }, ']' => { result.id = Token.Id.RBracket; @@ -429,6 +430,28 @@ pub const Tokenizer = struct { }, }, + State.LBracket => switch (c) { + '*' => { + state = State.LBracketStar; + }, + else => { + result.id = Token.Id.LBracket; + break; + }, + }, + + State.LBracketStar => switch (c) { + ']' => { + result.id = Token.Id.BracketStarBracket; + self.index += 1; + break; + }, + else => { + result.id = Token.Id.Invalid; + break; + }, + }, + State.Ampersand => switch (c) { '=' => { result.id = Token.Id.AmpersandEqual; @@ -1008,6 +1031,7 @@ pub const Tokenizer = struct { State.CharLiteralEscape2, State.CharLiteralEnd, State.StringLiteralBackslash, + State.LBracketStar, => { result.id = Token.Id.Invalid; }, @@ -1024,6 +1048,9 @@ pub const Tokenizer = struct { State.Slash => { result.id = Token.Id.Slash; }, + State.LBracket => { + result.id = Token.Id.LBracket; + }, State.Zero => { result.id = Token.Id.IntegerLiteral; }, @@ -1142,6 +1169,15 @@ test "tokenizer" { testTokenize("test", []Token.Id{Token.Id.Keyword_test}); } +test "tokenizer - unknown length pointer" { + testTokenize( + \\[*]u8 + , []Token.Id{ + Token.Id.BracketStarBracket, + Token.Id.Identifier, + }); +} + test "tokenizer - char literal with hex escape" { testTokenize( \\'\x1b'