std-c tokenizer line continuation, tests and fixes

This commit is contained in:
Vexu 2020-01-04 11:23:19 +02:00
parent c221593d7d
commit a5d1fb1e49
No known key found for this signature in database
GPG Key ID: 59AEB8936E16A6AC

View File

@ -265,13 +265,17 @@ pub const Tokenizer = struct {
var state: enum { var state: enum {
Start, Start,
Cr, Cr,
BackSlash,
BackSlashCr,
u, u,
u8, u8,
U, U,
L, L,
StringLiteral, StringLiteral,
CharLiteralStart,
CharLiteral, CharLiteral,
EscapeSequence, EscapeSequence,
CrEscape,
OctalEscape, OctalEscape,
HexEscape, HexEscape,
UnicodeEscape, UnicodeEscape,
@ -344,7 +348,7 @@ pub const Tokenizer = struct {
}, },
'\'' => { '\'' => {
result.id = .{ .CharLiteral = .None }; result.id = .{ .CharLiteral = .None };
state = .CharLiteral; state = .CharLiteralStart;
}, },
'u' => { 'u' => {
state = .u; state = .u;
@ -464,6 +468,9 @@ pub const Tokenizer = struct {
'1'...'9' => { '1'...'9' => {
state = .IntegerLiteral; state = .IntegerLiteral;
}, },
'\\' => {
state = .BackSlash;
},
else => { else => {
result.start = self.index + 1; result.start = self.index + 1;
}, },
@ -480,13 +487,34 @@ pub const Tokenizer = struct {
break; break;
}, },
}, },
.BackSlash => switch (c) {
'\n' => {
state = .Start;
},
'\r' => {
state = .BackSlashCr;
},
else => {
result.id = .Invalid;
break;
},
},
.BackSlashCr => switch (c) {
'\n' => {
state = .Start;
},
else => {
result.id = .Invalid;
break;
},
},
.u => switch (c) { .u => switch (c) {
'8' => { '8' => {
state = .u8; state = .u8;
}, },
'\'' => { '\'' => {
result.id = .{ .CharLiteral = .Utf16 }; result.id = .{ .CharLiteral = .Utf16 };
state = .CharLiteral; state = .CharLiteralStart;
}, },
'\"' => { '\"' => {
result.id = .{ .StringLiteral = .Utf16 }; result.id = .{ .StringLiteral = .Utf16 };
@ -508,7 +536,7 @@ pub const Tokenizer = struct {
.U => switch (c) { .U => switch (c) {
'\'' => { '\'' => {
result.id = .{ .CharLiteral = .Utf32 }; result.id = .{ .CharLiteral = .Utf32 };
state = .CharLiteral; state = .CharLiteralStart;
}, },
'\"' => { '\"' => {
result.id = .{ .StringLiteral = .Utf32 }; result.id = .{ .StringLiteral = .Utf32 };
@ -521,7 +549,7 @@ pub const Tokenizer = struct {
.L => switch (c) { .L => switch (c) {
'\'' => { '\'' => {
result.id = .{ .CharLiteral = .Wide }; result.id = .{ .CharLiteral = .Wide };
state = .CharLiteral; state = .CharLiteralStart;
}, },
'\"' => { '\"' => {
result.id = .{ .StringLiteral = .Wide }; result.id = .{ .StringLiteral = .Wide };
@ -546,7 +574,7 @@ pub const Tokenizer = struct {
}, },
else => {}, else => {},
}, },
.CharLiteral => switch (c) { .CharLiteralStart => switch (c) {
'\\' => { '\\' => {
string = false; string = false;
state = .EscapeSequence; state = .EscapeSequence;
@ -555,10 +583,32 @@ pub const Tokenizer = struct {
result.id = .Invalid; result.id = .Invalid;
break; break;
}, },
else => {
state = .CharLiteral;
},
},
.CharLiteral => switch (c) {
'\\' => {
string = false;
state = .EscapeSequence;
},
'\'' => {
self.index += 1;
break;
},
'\n' => {
result.id = .Invalid;
break;
},
else => {}, else => {},
}, },
.EscapeSequence => switch (c) { .EscapeSequence => switch (c) {
'\'', '"', '?', '\\', 'a', 'b', 'f', 'n', 'r', 't', 'v' => {}, '\'', '"', '?', '\\', 'a', 'b', 'f', 'n', 'r', 't', 'v', '\n' => {
state = if (string) .StringLiteral else .CharLiteral;
},
'\r' => {
state = .CrEscape;
},
'0'...'7' => { '0'...'7' => {
counter = 1; counter = 1;
state = .OctalEscape; state = .OctalEscape;
@ -579,6 +629,15 @@ pub const Tokenizer = struct {
break; break;
}, },
}, },
.CrEscape => switch (c) {
'\n' => {
state = if (string) .StringLiteral else .CharLiteral;
},
else => {
result.id = .Invalid;
break;
},
},
.OctalEscape => switch (c) { .OctalEscape => switch (c) {
'0'...'7' => { '0'...'7' => {
counter += 1; counter += 1;
@ -1056,10 +1115,14 @@ pub const Tokenizer = struct {
}, },
.Cr, .Cr,
.BackSlash,
.BackSlashCr,
.Period2, .Period2,
.StringLiteral, .StringLiteral,
.CharLiteralStart,
.CharLiteral, .CharLiteral,
.EscapeSequence, .EscapeSequence,
.CrEscape,
.OctalEscape, .OctalEscape,
.HexEscape, .HexEscape,
.UnicodeEscape, .UnicodeEscape,
@ -1269,6 +1332,72 @@ test "preprocessor keywords" {
}); });
} }
test "line continuation" {
expectTokens(
\\#define foo \
\\ bar
\\"foo\
\\ bar"
\\
, &[_]Token.Id{
.Hash,
.Keyword_define,
.Identifier,
.Identifier,
.Nl,
.{ .StringLiteral = .None },
});
}
test "string prefix" {
expectTokens(
\\"foo"
\\u"foo"
\\u8"foo"
\\U"foo"
\\L"foo"
\\'foo'
\\u'foo'
\\U'foo'
\\L'foo'
\\
, &[_]Token.Id{
.{ .StringLiteral = .None },
.{ .StringLiteral = .Utf16 },
.{ .StringLiteral = .Utf8 },
.{ .StringLiteral = .Utf32 },
.{ .StringLiteral = .Wide },
.{ .CharLiteral = .None },
.{ .CharLiteral = .Utf16 },
.{ .CharLiteral = .Utf32 },
.{ .CharLiteral = .Wide },
});
}
test "num suffixes" {
expectTokens(
\\ 1.0f 1.0L 1.0 .0 1.
\\ 0l 0lu 0ll 0llu 0
\\ 1u 1ul 1ull 1
\\
, &[_]Token.Id{
.{ .FloatLiteral = .F },
.{ .FloatLiteral = .L },
.{ .FloatLiteral = .None },
.{ .FloatLiteral = .None },
.{ .FloatLiteral = .None },
.{ .IntegerLiteral = .L },
.{ .IntegerLiteral = .LU },
.{ .IntegerLiteral = .LL },
.{ .IntegerLiteral = .LLU },
.{ .IntegerLiteral = .None },
.{ .IntegerLiteral = .U },
.{ .IntegerLiteral = .LU },
.{ .IntegerLiteral = .LLU },
.{ .IntegerLiteral = .None },
});
}
fn expectTokens(source: []const u8, expected_tokens: []const Token.Id) void { fn expectTokens(source: []const u8, expected_tokens: []const Token.Id) void {
var tokenizer = Tokenizer{ var tokenizer = Tokenizer{
.source = &Source{ .source = &Source{