std-c tokenizer always add newline token

This commit is contained in:
Vexu 2020-01-05 19:28:14 +02:00
parent f934f9b419
commit 795a503999
No known key found for this signature in database
GPG Key ID: 59AEB8936E16A6AC
2 changed files with 117 additions and 83 deletions

View File

@ -797,38 +797,42 @@ const Parser = struct {
fn eatToken(parser: *Parser, id: @TagType(Token.Id)) ?TokenIndex {
while (true) {
const next_tok = parser.it.next() orelse return null;
if (next_tok.id != .LineComment and next_tok.id != .MultiLineComment) {
if (next_tok.id == id) {
switch (parser.it.next() orelse return null) {
.LineComment, .MultiLineComment, .Nl => continue,
else => |next_id| if (next_id == id) {
return parser.it.index;
}
_ = parser.it.prev();
return null;
} else {
_ = parser.it.prev();
return null;
},
}
}
}
fn expectToken(parser: *Parser, id: @TagType(Token.Id)) Error!TokenIndex {
while (true) {
const next_tok = parser.it.next() orelse return error.ParseError;
if (next_tok.id != .LineComment and next_tok.id != .MultiLineComment) {
if (next_tok.id != id) {
try parser.tree.errors.push(.{
switch (parser.it.next() orelse return null) {
.LineComment, .MultiLineComment, .Nl => continue,
else => |next_id| if (next_id != id) {
return parser.err(.{
.ExpectedToken = .{ .token = parser.it.index, .expected_id = id },
});
return error.ParseError;
}
return parser.it.index;
} else {
return parser.it.index;
},
}
}
}
fn putBackToken(parser: *Parser, putting_back: TokenIndex) void {
while (true) {
const prev_tok = parser.it.prev() orelse return;
if (prev_tok.id == .LineComment or prev_tok.id == .MultiLineComment) continue;
assert(parser.it.list.at(putting_back) == prev_tok);
return;
switch (parser.it.next() orelse return null) {
.LineComment, .MultiLineComment, .Nl => continue,
else => |next_id| {
assert(parser.it.list.at(putting_back) == prev_tok);
return;
},
}
}
}

View File

@ -449,20 +449,12 @@ pub const Tokenizer = struct {
switch (state) {
.Start => switch (c) {
'\n' => {
if (!self.pp_directive) {
result.start = self.index + 1;
continue;
}
self.pp_directive = false;
result.id = .Nl;
self.index += 1;
break;
},
'\r' => {
if (!self.pp_directive) {
result.start = self.index + 1;
continue;
}
state = .Cr;
},
'"' => {
@ -612,11 +604,14 @@ pub const Tokenizer = struct {
},
.BackSlash => switch (c) {
'\n' => {
state = .Start;
state = if (string) .AfterStringLiteral else .Start;
},
'\r' => {
state = .BackSlashCr;
},
'\t', '\x0B', '\x0C', ' ' => {
// TODO warn
},
else => {
result.id = .Invalid;
break;
@ -624,7 +619,7 @@ pub const Tokenizer = struct {
},
.BackSlashCr => switch (c) {
'\n' => {
state = .Start;
state = if (string) .AfterStringLiteral else .Start;
},
else => {
result.id = .Invalid;
@ -700,7 +695,14 @@ pub const Tokenizer = struct {
'"' => {
state = .StringLiteral;
},
'\n'...'\r', ' ' => {},
'\\' => {
state = .BackSlash;
},
'\n', '\r' => {
if (self.pp_directive)
break;
},
'\t', '\x0B', '\x0C', ' ' => {},
else => {
break;
},
@ -1314,60 +1316,64 @@ test "operators" {
\\ , & && &= ? < <= <<
\\ <<= > >= >> >>= ~ # ##
\\
,
&[_]Token.Id{
.Bang,
.BangEqual,
.Pipe,
.PipePipe,
.PipeEqual,
.Equal,
.EqualEqual,
.LParen,
.RParen,
.LBrace,
.RBrace,
.LBracket,
.RBracket,
.Period,
.Period,
.Period,
.Ellipsis,
.Caret,
.CaretEqual,
.Plus,
.PlusPlus,
.PlusEqual,
.Minus,
.MinusMinus,
.MinusEqual,
.Asterisk,
.AsteriskEqual,
.Percent,
.PercentEqual,
.Arrow,
.Colon,
.Semicolon,
.Slash,
.SlashEqual,
.Comma,
.Ampersand,
.AmpersandAmpersand,
.AmpersandEqual,
.QuestionMark,
.AngleBracketLeft,
.AngleBracketLeftEqual,
.AngleBracketAngleBracketLeft,
.AngleBracketAngleBracketLeftEqual,
.AngleBracketRight,
.AngleBracketRightEqual,
.AngleBracketAngleBracketRight,
.AngleBracketAngleBracketRightEqual,
.Tilde,
.Hash,
.HashHash,
},
);
, &[_]Token.Id{
.Bang,
.BangEqual,
.Pipe,
.PipePipe,
.PipeEqual,
.Equal,
.EqualEqual,
.Nl,
.LParen,
.RParen,
.LBrace,
.RBrace,
.LBracket,
.RBracket,
.Period,
.Period,
.Period,
.Ellipsis,
.Nl,
.Caret,
.CaretEqual,
.Plus,
.PlusPlus,
.PlusEqual,
.Minus,
.MinusMinus,
.MinusEqual,
.Nl,
.Asterisk,
.AsteriskEqual,
.Percent,
.PercentEqual,
.Arrow,
.Colon,
.Semicolon,
.Slash,
.SlashEqual,
.Nl,
.Comma,
.Ampersand,
.AmpersandAmpersand,
.AmpersandEqual,
.QuestionMark,
.AngleBracketLeft,
.AngleBracketLeftEqual,
.AngleBracketAngleBracketLeft,
.Nl,
.AngleBracketAngleBracketLeftEqual,
.AngleBracketRight,
.AngleBracketRightEqual,
.AngleBracketAngleBracketRight,
.AngleBracketAngleBracketRightEqual,
.Tilde,
.Hash,
.HashHash,
.Nl,
});
}
test "keywords" {
@ -1388,6 +1394,7 @@ test "keywords" {
.Keyword_continue,
.Keyword_default,
.Keyword_do,
.Nl,
.Keyword_double,
.Keyword_else,
.Keyword_enum,
@ -1397,6 +1404,7 @@ test "keywords" {
.Keyword_goto,
.Keyword_if,
.Keyword_int,
.Nl,
.Keyword_long,
.Keyword_register,
.Keyword_return,
@ -1404,6 +1412,7 @@ test "keywords" {
.Keyword_signed,
.Keyword_sizeof,
.Keyword_static,
.Nl,
.Keyword_struct,
.Keyword_switch,
.Keyword_typedef,
@ -1411,6 +1420,7 @@ test "keywords" {
.Keyword_unsigned,
.Keyword_void,
.Keyword_volatile,
.Nl,
.Keyword_while,
.Keyword_bool,
.Keyword_complex,
@ -1418,12 +1428,14 @@ test "keywords" {
.Keyword_inline,
.Keyword_restrict,
.Keyword_alignas,
.Nl,
.Keyword_alignof,
.Keyword_atomic,
.Keyword_generic,
.Keyword_noreturn,
.Keyword_static_assert,
.Keyword_thread_local,
.Nl,
});
}
@ -1469,7 +1481,10 @@ test "line continuation" {
\\ bar
\\"foo\
\\ bar"
\\
\\#define "foo"
\\ "bar"
\\#define "foo" \
\\ "bar"
, &[_]Token.Id{
.Hash,
.Keyword_define,
@ -1477,6 +1492,14 @@ test "line continuation" {
.Identifier,
.Nl,
.{ .StringLiteral = .None },
.Hash,
.Keyword_define,
.{ .StringLiteral = .None },
.Nl,
.{ .StringLiteral = .None },
.Hash,
.Keyword_define,
.{ .StringLiteral = .None },
});
}
@ -1499,9 +1522,13 @@ test "string prefix" {
.{ .StringLiteral = .Utf32 },
.{ .StringLiteral = .Wide },
.{ .CharLiteral = .None },
.Nl,
.{ .CharLiteral = .Utf16 },
.Nl,
.{ .CharLiteral = .Utf32 },
.Nl,
.{ .CharLiteral = .Wide },
.Nl,
});
}
@ -1517,15 +1544,18 @@ test "num suffixes" {
.{ .FloatLiteral = .None },
.{ .FloatLiteral = .None },
.{ .FloatLiteral = .None },
.Nl,
.{ .IntegerLiteral = .L },
.{ .IntegerLiteral = .LU },
.{ .IntegerLiteral = .LL },
.{ .IntegerLiteral = .LLU },
.{ .IntegerLiteral = .None },
.Nl,
.{ .IntegerLiteral = .U },
.{ .IntegerLiteral = .LU },
.{ .IntegerLiteral = .LLU },
.{ .IntegerLiteral = .None },
.Nl,
});
}