From 795a5039995a1a23ba00d15488565f1a79d3f25b Mon Sep 17 00:00:00 2001 From: Vexu Date: Sun, 5 Jan 2020 19:28:14 +0200 Subject: [PATCH] std-c tokenizer always add newline token --- lib/std/c/parse.zig | 38 +++++----- lib/std/c/tokenizer.zig | 162 ++++++++++++++++++++++++---------------- 2 files changed, 117 insertions(+), 83 deletions(-) diff --git a/lib/std/c/parse.zig b/lib/std/c/parse.zig index 79790fd0ff..e5082d06c0 100644 --- a/lib/std/c/parse.zig +++ b/lib/std/c/parse.zig @@ -797,38 +797,42 @@ const Parser = struct { fn eatToken(parser: *Parser, id: @TagType(Token.Id)) ?TokenIndex { while (true) { - const next_tok = parser.it.next() orelse return null; - if (next_tok.id != .LineComment and next_tok.id != .MultiLineComment) { - if (next_tok.id == id) { + switch (parser.it.next() orelse return null) { + .LineComment, .MultiLineComment, .Nl => continue, + else => |next_id| if (next_id == id) { return parser.it.index; - } - _ = parser.it.prev(); - return null; + } else { + _ = parser.it.prev(); + return null; + }, } } } fn expectToken(parser: *Parser, id: @TagType(Token.Id)) Error!TokenIndex { while (true) { - const next_tok = parser.it.next() orelse return error.ParseError; - if (next_tok.id != .LineComment and next_tok.id != .MultiLineComment) { - if (next_tok.id != id) { - try parser.tree.errors.push(.{ + switch (parser.it.next() orelse return null) { + .LineComment, .MultiLineComment, .Nl => continue, + else => |next_id| if (next_id != id) { + return parser.err(.{ .ExpectedToken = .{ .token = parser.it.index, .expected_id = id }, }); - return error.ParseError; - } - return parser.it.index; + } else { + return parser.it.index; + }, } } } fn putBackToken(parser: *Parser, putting_back: TokenIndex) void { while (true) { - const prev_tok = parser.it.prev() orelse return; - if (prev_tok.id == .LineComment or prev_tok.id == .MultiLineComment) continue; - assert(parser.it.list.at(putting_back) == prev_tok); - return; + switch (parser.it.next() orelse return null) { + .LineComment, .MultiLineComment, .Nl => continue, + else => |next_id| { + assert(parser.it.list.at(putting_back) == prev_tok); + return; + }, + } } } diff --git a/lib/std/c/tokenizer.zig b/lib/std/c/tokenizer.zig index 4e74b97018..92c139f3c2 100644 --- a/lib/std/c/tokenizer.zig +++ b/lib/std/c/tokenizer.zig @@ -449,20 +449,12 @@ pub const Tokenizer = struct { switch (state) { .Start => switch (c) { '\n' => { - if (!self.pp_directive) { - result.start = self.index + 1; - continue; - } self.pp_directive = false; result.id = .Nl; self.index += 1; break; }, '\r' => { - if (!self.pp_directive) { - result.start = self.index + 1; - continue; - } state = .Cr; }, '"' => { @@ -612,11 +604,14 @@ pub const Tokenizer = struct { }, .BackSlash => switch (c) { '\n' => { - state = .Start; + state = if (string) .AfterStringLiteral else .Start; }, '\r' => { state = .BackSlashCr; }, + '\t', '\x0B', '\x0C', ' ' => { + // TODO warn + }, else => { result.id = .Invalid; break; @@ -624,7 +619,7 @@ pub const Tokenizer = struct { }, .BackSlashCr => switch (c) { '\n' => { - state = .Start; + state = if (string) .AfterStringLiteral else .Start; }, else => { result.id = .Invalid; @@ -700,7 +695,14 @@ pub const Tokenizer = struct { '"' => { state = .StringLiteral; }, - '\n'...'\r', ' ' => {}, + '\\' => { + state = .BackSlash; + }, + '\n', '\r' => { + if (self.pp_directive) + break; + }, + '\t', '\x0B', '\x0C', ' ' => {}, else => { break; }, @@ -1314,60 +1316,64 @@ test "operators" { \\ , & && &= ? < <= << \\ <<= > >= >> >>= ~ # ## \\ - , - &[_]Token.Id{ - .Bang, - .BangEqual, - .Pipe, - .PipePipe, - .PipeEqual, - .Equal, - .EqualEqual, - .LParen, - .RParen, - .LBrace, - .RBrace, - .LBracket, - .RBracket, - .Period, - .Period, - .Period, - .Ellipsis, - .Caret, - .CaretEqual, - .Plus, - .PlusPlus, - .PlusEqual, - .Minus, - .MinusMinus, - .MinusEqual, - .Asterisk, - .AsteriskEqual, - .Percent, - .PercentEqual, - .Arrow, - .Colon, - .Semicolon, - .Slash, - .SlashEqual, - .Comma, - .Ampersand, - .AmpersandAmpersand, - .AmpersandEqual, - .QuestionMark, - .AngleBracketLeft, - .AngleBracketLeftEqual, - .AngleBracketAngleBracketLeft, - .AngleBracketAngleBracketLeftEqual, - .AngleBracketRight, - .AngleBracketRightEqual, - .AngleBracketAngleBracketRight, - .AngleBracketAngleBracketRightEqual, - .Tilde, - .Hash, - .HashHash, - }, - ); + , &[_]Token.Id{ + .Bang, + .BangEqual, + .Pipe, + .PipePipe, + .PipeEqual, + .Equal, + .EqualEqual, + .Nl, + .LParen, + .RParen, + .LBrace, + .RBrace, + .LBracket, + .RBracket, + .Period, + .Period, + .Period, + .Ellipsis, + .Nl, + .Caret, + .CaretEqual, + .Plus, + .PlusPlus, + .PlusEqual, + .Minus, + .MinusMinus, + .MinusEqual, + .Nl, + .Asterisk, + .AsteriskEqual, + .Percent, + .PercentEqual, + .Arrow, + .Colon, + .Semicolon, + .Slash, + .SlashEqual, + .Nl, + .Comma, + .Ampersand, + .AmpersandAmpersand, + .AmpersandEqual, + .QuestionMark, + .AngleBracketLeft, + .AngleBracketLeftEqual, + .AngleBracketAngleBracketLeft, + .Nl, + .AngleBracketAngleBracketLeftEqual, + .AngleBracketRight, + .AngleBracketRightEqual, + .AngleBracketAngleBracketRight, + .AngleBracketAngleBracketRightEqual, + .Tilde, + .Hash, + .HashHash, + .Nl, + }); } test "keywords" { @@ -1388,6 +1394,7 @@ test "keywords" { .Keyword_continue, .Keyword_default, .Keyword_do, + .Nl, .Keyword_double, .Keyword_else, .Keyword_enum, @@ -1397,6 +1404,7 @@ test "keywords" { .Keyword_goto, .Keyword_if, .Keyword_int, + .Nl, .Keyword_long, .Keyword_register, .Keyword_return, @@ -1404,6 +1412,7 @@ test "keywords" { .Keyword_signed, .Keyword_sizeof, .Keyword_static, + .Nl, .Keyword_struct, .Keyword_switch, .Keyword_typedef, @@ -1411,6 +1420,7 @@ test "keywords" { .Keyword_unsigned, .Keyword_void, .Keyword_volatile, + .Nl, .Keyword_while, .Keyword_bool, .Keyword_complex, @@ -1418,12 +1428,14 @@ test "keywords" { .Keyword_inline, .Keyword_restrict, .Keyword_alignas, + .Nl, .Keyword_alignof, .Keyword_atomic, .Keyword_generic, .Keyword_noreturn, .Keyword_static_assert, .Keyword_thread_local, + .Nl, }); } @@ -1469,7 +1481,10 @@ test "line continuation" { \\ bar \\"foo\ \\ bar" - \\ + \\#define "foo" + \\ "bar" + \\#define "foo" \ + \\ "bar" , &[_]Token.Id{ .Hash, .Keyword_define, @@ -1477,6 +1492,14 @@ test "line continuation" { .Identifier, .Nl, .{ .StringLiteral = .None }, + .Hash, + .Keyword_define, + .{ .StringLiteral = .None }, + .Nl, + .{ .StringLiteral = .None }, + .Hash, + .Keyword_define, + .{ .StringLiteral = .None }, }); } @@ -1499,9 +1522,13 @@ test "string prefix" { .{ .StringLiteral = .Utf32 }, .{ .StringLiteral = .Wide }, .{ .CharLiteral = .None }, + .Nl, .{ .CharLiteral = .Utf16 }, + .Nl, .{ .CharLiteral = .Utf32 }, + .Nl, .{ .CharLiteral = .Wide }, + .Nl, }); } @@ -1517,15 +1544,18 @@ test "num suffixes" { .{ .FloatLiteral = .None }, .{ .FloatLiteral = .None }, .{ .FloatLiteral = .None }, + .Nl, .{ .IntegerLiteral = .L }, .{ .IntegerLiteral = .LU }, .{ .IntegerLiteral = .LL }, .{ .IntegerLiteral = .LLU }, .{ .IntegerLiteral = .None }, + .Nl, .{ .IntegerLiteral = .U }, .{ .IntegerLiteral = .LU }, .{ .IntegerLiteral = .LLU }, .{ .IntegerLiteral = .None }, + .Nl, }); }