From 4d8f9e2295bb672f70550b3bb5a4cb667f68bb70 Mon Sep 17 00:00:00 2001 From: Jimmi Holst Christensen Date: Sat, 31 Mar 2018 21:04:54 +0200 Subject: [PATCH] std.zig.parser now parses multi line strings --- std/zig/ast.zig | 21 +++++++++++++++++ std/zig/parser.zig | 52 +++++++++++++++++++++++++++++++++++++++---- std/zig/tokenizer.zig | 40 ++++++++++++++++++++++++++++++++- 3 files changed, 108 insertions(+), 5 deletions(-) diff --git a/std/zig/ast.zig b/std/zig/ast.zig index e64e5931c3..75003b06cd 100644 --- a/std/zig/ast.zig +++ b/std/zig/ast.zig @@ -20,6 +20,7 @@ pub const Node = struct { IntegerLiteral, FloatLiteral, StringLiteral, + MultilineStringLiteral, UndefinedLiteral, BuiltinCall, Call, @@ -40,6 +41,7 @@ pub const Node = struct { Id.IntegerLiteral => @fieldParentPtr(NodeIntegerLiteral, "base", base).iterate(index), Id.FloatLiteral => @fieldParentPtr(NodeFloatLiteral, "base", base).iterate(index), Id.StringLiteral => @fieldParentPtr(NodeStringLiteral, "base", base).iterate(index), + Id.MultilineStringLiteral => @fieldParentPtr(NodeMultilineStringLiteral, "base", base).iterate(index), Id.UndefinedLiteral => @fieldParentPtr(NodeUndefinedLiteral, "base", base).iterate(index), Id.BuiltinCall => @fieldParentPtr(NodeBuiltinCall, "base", base).iterate(index), Id.Call => @fieldParentPtr(NodeCall, "base", base).iterate(index), @@ -61,6 +63,7 @@ pub const Node = struct { Id.IntegerLiteral => @fieldParentPtr(NodeIntegerLiteral, "base", base).firstToken(), Id.FloatLiteral => @fieldParentPtr(NodeFloatLiteral, "base", base).firstToken(), Id.StringLiteral => @fieldParentPtr(NodeStringLiteral, "base", base).firstToken(), + Id.MultilineStringLiteral => @fieldParentPtr(NodeMultilineStringLiteral, "base", base).firstToken(), Id.UndefinedLiteral => @fieldParentPtr(NodeUndefinedLiteral, "base", base).firstToken(), Id.BuiltinCall => @fieldParentPtr(NodeBuiltinCall, "base", base).firstToken(), Id.Call => @fieldParentPtr(NodeCall, "base", base).firstToken(), @@ -82,6 +85,7 @@ pub const Node = struct { Id.IntegerLiteral => @fieldParentPtr(NodeIntegerLiteral, "base", base).lastToken(), Id.FloatLiteral => @fieldParentPtr(NodeFloatLiteral, "base", base).lastToken(), Id.StringLiteral => @fieldParentPtr(NodeStringLiteral, "base", base).lastToken(), + Id.MultilineStringLiteral => @fieldParentPtr(NodeMultilineStringLiteral, "base", base).lastToken(), Id.UndefinedLiteral => @fieldParentPtr(NodeUndefinedLiteral, "base", base).lastToken(), Id.BuiltinCall => @fieldParentPtr(NodeBuiltinCall, "base", base).lastToken(), Id.Call => @fieldParentPtr(NodeCall, "base", base).lastToken(), @@ -587,6 +591,23 @@ pub const NodeStringLiteral = struct { } }; +pub const NodeMultilineStringLiteral = struct { + base: Node, + tokens: ArrayList(Token), + + pub fn iterate(self: &NodeMultilineStringLiteral, index: usize) ?&Node { + return null; + } + + pub fn firstToken(self: &NodeMultilineStringLiteral) Token { + return self.tokens.at(0); + } + + pub fn lastToken(self: &NodeMultilineStringLiteral) Token { + return self.tokens.at(self.tokens.len - 1); + } +}; + pub const NodeUndefinedLiteral = struct { base: Node, token: Token, diff --git a/std/zig/parser.zig b/std/zig/parser.zig index 1bad72aea4..fa09a26dbb 100644 --- a/std/zig/parser.zig +++ b/std/zig/parser.zig @@ -456,6 +456,30 @@ pub const Parser = struct { try stack.append(State.AfterOperand); continue; }, + Token.Id.MultilineStringLiteralLine => { + const node = try arena.create(ast.NodeMultilineStringLiteral); + *node = ast.NodeMultilineStringLiteral { + .base = self.initNode(ast.Node.Id.MultilineStringLiteral), + .tokens = ArrayList(Token).init(arena), + }; + try node.tokens.append(token); + + while (true) { + const multiline_str = self.getNextToken(); + if (multiline_str.id != Token.Id.MultilineStringLiteralLine) { + self.putBackToken(multiline_str); + break; + } + + try node.tokens.append(multiline_str); + } + + try stack.append(State { + .Operand = &node.base + }); + try stack.append(State.AfterOperand); + continue; + }, else => return self.parseError(token, "expected primary expression, found {}", @tagName(token.id)), } @@ -1427,6 +1451,20 @@ pub const Parser = struct { const string_literal = @fieldParentPtr(ast.NodeStringLiteral, "base", base); try stream.print("{}", self.tokenizer.getTokenSlice(string_literal.token)); }, + ast.Node.Id.MultilineStringLiteral => { + const multiline_str_literal = @fieldParentPtr(ast.NodeMultilineStringLiteral, "base", base); + try stream.print("\n"); + + var i : usize = 0; + indent += 4; + while (i < multiline_str_literal.tokens.len) : (i += 1) { + const t = multiline_str_literal.tokens.at(i); + try stream.writeByteNTimes(' ', indent); + try stream.print("{}", self.tokenizer.getTokenSlice(t)); + } + try stream.writeByteNTimes(' ', indent); + indent -= 4; + }, ast.Node.Id.UndefinedLiteral => { const undefined_literal = @fieldParentPtr(ast.NodeUndefinedLiteral, "base", base); try stream.print("{}", self.tokenizer.getTokenSlice(undefined_literal.token)); @@ -1806,6 +1844,16 @@ test "zig fmt: extern function" { ); } +test "zig fmt: multiline string" { + try testCanonical( + \\const s = + \\ \\ something + \\ \\ something else + \\ ; + \\ + ); +} + test "zig fmt: values" { try testCanonical( \\test "values" { @@ -1813,10 +1861,6 @@ test "zig fmt: values" { \\ 1.0; \\ "string"; \\ c"cstring"; - \\ \\ Multi - \\ \\ line - \\ \\ string - \\ ; \\ 'c'; \\ true; \\ false; diff --git a/std/zig/tokenizer.zig b/std/zig/tokenizer.zig index 7a13d89975..5647fcb866 100644 --- a/std/zig/tokenizer.zig +++ b/std/zig/tokenizer.zig @@ -72,6 +72,7 @@ pub const Token = struct { Invalid, Identifier, StringLiteral: StrLitKind, + MultilineStringLiteralLine: StrLitKind, StringIdentifier, Eof, Builtin, @@ -225,6 +226,9 @@ pub const Tokenizer = struct { C, StringLiteral, StringLiteralBackslash, + MultilineStringLiteralLine, + MultilineStringLiteralLineBackslash, + Backslash, Equal, Bang, Pipe, @@ -352,6 +356,10 @@ pub const Tokenizer = struct { '^' => { state = State.Caret; }, + '\\' => { + state = State.Backslash; + result.id = Token.Id { .MultilineStringLiteralLine = Token.StrLitKind.Normal }; + }, '{' => { result.id = Token.Id.LBrace; self.index += 1; @@ -532,8 +540,17 @@ pub const Tokenizer = struct { 'a'...'z', 'A'...'Z', '_', '0'...'9' => {}, else => break, }, + State.Backslash => switch (c) { + '\\' => { + state = State.MultilineStringLiteralLine; + }, + else => break, + }, State.C => switch (c) { - '\\' => @panic("TODO"), + '\\' => { + state = State.Backslash; + result.id = Token.Id { .MultilineStringLiteralLine = Token.StrLitKind.C }; + }, '"' => { state = State.StringLiteral; result.id = Token.Id { .StringLiteral = Token.StrLitKind.C }; @@ -562,6 +579,24 @@ pub const Tokenizer = struct { }, }, + State.MultilineStringLiteralLine => switch (c) { + '\\' => { + state = State.MultilineStringLiteralLineBackslash; + }, + '\n' => { + self.index += 1; + break; + }, + else => self.checkLiteralCharacter(), + }, + + State.MultilineStringLiteralLineBackslash => switch (c) { + '\n' => break, // Look for this error later. + else => { + state = State.MultilineStringLiteralLine; + }, + }, + State.Bang => switch (c) { '=' => { result.id = Token.Id.BangEqual; @@ -811,6 +846,7 @@ pub const Tokenizer = struct { State.FloatFraction, State.FloatExponentNumber, State.StringLiteral, // find this error later + State.MultilineStringLiteralLine, State.Builtin => {}, State.Identifier => { @@ -825,6 +861,8 @@ pub const Tokenizer = struct { State.NumberDot, State.FloatExponentUnsigned, State.SawAtSign, + State.Backslash, + State.MultilineStringLiteralLineBackslash, State.StringLiteralBackslash => { result.id = Token.Id.Invalid; },