From 26bf410b061b9d6d18e4945417ddec62d7486e9c Mon Sep 17 00:00:00 2001 From: Vexu Date: Sat, 4 Jan 2020 02:00:29 +0200 Subject: [PATCH] std-c finish tokenizer --- lib/std/c/tokenizer.zig | 166 ++++++++++++++++++++++++++-------------- 1 file changed, 108 insertions(+), 58 deletions(-) diff --git a/lib/std/c/tokenizer.zig b/lib/std/c/tokenizer.zig index 034b7637fb..a5f2ad770d 100644 --- a/lib/std/c/tokenizer.zig +++ b/lib/std/c/tokenizer.zig @@ -7,21 +7,15 @@ pub const Source = struct { }; pub const Token = struct { - id: Id, - num_suffix: NumSuffix = .None, - start: usize, - end: usize, - source: *Source, - - pub const Id = enum { + id: union(enum) { Invalid, Eof, Nl, Identifier, - StringLiteral, - CharLiteral, - IntegerLiteral, - FloatLiteral, + StringLiteral: StrKind, + CharLiteral: StrKind, + IntegerLiteral: NumSuffix, + FloatLiteral: NumSuffix, Bang, BangEqual, Pipe, @@ -74,7 +68,10 @@ pub const Token = struct { MultiLineComment, Hash, HashHash, - }; + }, + start: usize, + end: usize, + source: *Source, pub const NumSuffix = enum { None, @@ -85,6 +82,14 @@ pub const Token = struct { LL, LLU, }; + + pub const StrKind = enum { + None, + Wide, + Utf8, + Utf16, + Utf32, + }; }; pub const Tokenizer = struct { @@ -102,6 +107,10 @@ pub const Tokenizer = struct { var state: enum { Start, Cr, + u, + u8, + U, + L, StringLiteral, CharLiteral, EscapeSequence, @@ -162,13 +171,23 @@ pub const Tokenizer = struct { result.start = self.index + 1; }, '"' => { + result.id = .{ .StringLiteral = .None }; state = .StringLiteral; - result.id = .StringLiteral; }, '\'' => { + result.id = .{ .CharLiteral = .None }; state = .CharLiteral; }, - 'a'...'z', 'A'...'Z', '_' => { + 'u' => { + state = .u; + }, + 'U' => { + state = .U; + }, + 'L' => { + state = .L; + }, + 'a'...'t', 'v'...'z', 'A'...'K', 'M'...'T', 'V'...'Z', '_' => { state = .Identifier; result.id = .Identifier; }, @@ -268,11 +287,9 @@ pub const Tokenizer = struct { }, '0' => { state = .Zero; - result.id = .IntegerLiteral; }, '1'...'9' => { state = .IntegerLiteral; - result.id = .IntegerLiteral; }, else => { result.id = .Invalid; @@ -291,14 +308,63 @@ pub const Tokenizer = struct { break; }, }, - // TODO l"" u"" U"" u8"" + .u => switch (c) { + '8' => { + state = .u8; + }, + '\'' => { + result.id = .{ .CharLiteral = .Utf16 }; + state = .CharLiteral; + }, + '\"' => { + result.id = .{ .StringLiteral = .Utf16 }; + state = .StringLiteral; + }, + else => { + state = .Identifier; + }, + }, + .u8 => switch (c) { + '\"' => { + result.id = .{ .StringLiteral = .Utf8 }; + state = .StringLiteral; + }, + else => { + state = .Identifier; + }, + }, + .U => switch (c) { + '\'' => { + result.id = .{ .CharLiteral = .Utf32 }; + state = .CharLiteral; + }, + '\"' => { + result.id = .{ .StringLiteral = .Utf32 }; + state = .StringLiteral; + }, + else => { + state = .Identifier; + }, + }, + .L => switch (c) { + '\'' => { + result.id = .{ .CharLiteral = .Wide }; + state = .CharLiteral; + }, + '\"' => { + result.id = .{ .StringLiteral = .Wide }; + state = .StringLiteral; + }, + else => { + state = .Identifier; + }, + }, .StringLiteral => switch (c) { '\\' => { string = true; state = .EscapeSequence; }, '"' => { - result.id = .StringLiteral; self.index += 1; break; }, @@ -308,7 +374,6 @@ pub const Tokenizer = struct { }, else => {}, }, - // TODO l'' u'' U'' .CharLiteral => switch (c) { '\\' => { string = false; @@ -683,7 +748,7 @@ pub const Tokenizer = struct { state = .IntegerSuffixL; }, else => { - result.id = .IntegerLiteral; + result.id = .{ .IntegerLiteral = .None }; break; }, }, @@ -692,8 +757,7 @@ pub const Tokenizer = struct { state = .IntegerSuffixUL; }, else => { - result.id = .IntegerLiteral; - result.num_suffix = .U; + result.id = .{ .IntegerLiteral = .U }; break; }, }, @@ -702,40 +766,34 @@ pub const Tokenizer = struct { state = .IntegerSuffixLL; }, 'u', 'U' => { - result.id = .IntegerLiteral; - result.num_suffix = .LU; + result.id = .{ .IntegerLiteral = .LU }; self.index += 1; break; }, else => { - result.id = .IntegerLiteral; - result.num_suffix = .L; + result.id = .{ .IntegerLiteral = .L }; break; }, }, .IntegerSuffixLL => switch (c) { 'u', 'U' => { - result.id = .IntegerLiteral; - result.num_suffix = .LLU; + result.id = .{ .IntegerLiteral = .LLU }; self.index += 1; break; }, else => { - result.id = .IntegerLiteral; - result.num_suffix = .LL; + result.id = .{ .IntegerLiteral = .LL }; break; }, }, .IntegerSuffixUL => switch (c) { 'l', 'L' => { - result.id = .IntegerLiteral; - result.num_suffix = .LLU; + result.id = .{ .IntegerLiteral = .LLU }; self.index += 1; break; }, else => { - result.id = .IntegerLiteral; - result.num_suffix = .LU; + result.id = .{ .IntegerLiteral = .LU }; break; }, }, @@ -782,19 +840,17 @@ pub const Tokenizer = struct { }, .FloatSuffix => switch (c) { 'l', 'L' => { - result.id = .FloatLiteral; - result.num_suffix = .L; + result.id = .{ .FloatLiteral = .L }; self.index += 1; break; }, 'f', 'F' => { - result.id = .FloatLiteral; - result.num_suffix = .F; + result.id = .{ .FloatLiteral = .F }; self.index += 1; break; }, else => { - result.id = .FloatLiteral; + result.id = .{ .FloatLiteral = .None }; break; }, }, @@ -802,7 +858,7 @@ pub const Tokenizer = struct { } else if (self.index == self.source.buffer.len) { switch (state) { .Start => {}, - .Identifier => { + .u, .u8, .U, .L, .Identifier => { result.id = .Identifier; }, @@ -822,25 +878,19 @@ pub const Tokenizer = struct { .FloatExponentDigits, => result.id = .Invalid, - .IntegerLiteralOct, .IntegerLiteralBinary, .IntegerLiteralHex, .IntegerLiteral, .IntegerSuffix, .Zero => result.id = .IntegerLiteral, - .IntegerSuffixU => { - result.id = .IntegerLiteral; - result.num_suffix = .U; - }, - .IntegerSuffixL => { - result.id = .IntegerLiteral; - result.num_suffix = .L; - }, - .IntegerSuffixLL => { - result.id = .IntegerLiteral; - result.num_suffix = .LL; - }, - .IntegerSuffixUL => { - result.id = .IntegerLiteral; - result.num_suffix = .LU; - }, + .IntegerLiteralOct, + .IntegerLiteralBinary, + .IntegerLiteralHex, + .IntegerLiteral, + .IntegerSuffix, + .Zero, + => result.id = .{ .IntegerLiteral = .None }, + .IntegerSuffixU => result.id = .{ .IntegerLiteral = .U }, + .IntegerSuffixL => result.id = .{ .IntegerLiteral = .L }, + .IntegerSuffixLL => result.id = .{ .IntegerLiteral = .LL }, + .IntegerSuffixUL => result.id = .{ .IntegerLiteral = .LU }, - .FloatSuffix => result.id = .FloatLiteral, + .FloatSuffix => result.id = .{ .FloatLiteral = .None }, .Equal => result.id = .Equal, .Bang => result.id = .Bang, .Minus => result.id = .Minus,