Merge pull request #873 from zig-lang/self-hosted-parser

Self hosted parser completion
This commit is contained in:
Andrew Kelley 2018-04-10 22:47:18 -04:00 committed by GitHub
commit 64d96ad703
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 5275 additions and 628 deletions

View File

@ -14,26 +14,43 @@ pub fn panic(msg: []const u8, error_return_trace: ?&builtin.StackTrace) noreturn
} }
} }
// Note that memset does not return `dest`, like the libc API. export fn memset(dest: ?&u8, c: u8, n: usize) ?&u8 {
// The semantics of memset is dictated by the corresponding
// LLVM intrinsics, not by the libc API.
export fn memset(dest: ?&u8, c: u8, n: usize) void {
@setRuntimeSafety(false); @setRuntimeSafety(false);
var index: usize = 0; var index: usize = 0;
while (index != n) : (index += 1) while (index != n) : (index += 1)
(??dest)[index] = c; (??dest)[index] = c;
return dest;
} }
// Note that memcpy does not return `dest`, like the libc API. export fn memcpy(noalias dest: ?&u8, noalias src: ?&const u8, n: usize) ?&u8 {
// The semantics of memcpy is dictated by the corresponding
// LLVM intrinsics, not by the libc API.
export fn memcpy(noalias dest: ?&u8, noalias src: ?&const u8, n: usize) void {
@setRuntimeSafety(false); @setRuntimeSafety(false);
var index: usize = 0; var index: usize = 0;
while (index != n) : (index += 1) while (index != n) : (index += 1)
(??dest)[index] = (??src)[index]; (??dest)[index] = (??src)[index];
return dest;
}
export fn memmove(dest: ?&u8, src: ?&const u8, n: usize) ?&u8 {
@setRuntimeSafety(false);
if (@ptrToInt(dest) < @ptrToInt(src)) {
var index: usize = 0;
while (index != n) : (index += 1) {
(??dest)[index] = (??src)[index];
}
} else {
var index = n;
while (index != 0) {
index -= 1;
(??dest)[index] = (??src)[index];
}
}
return dest;
} }
comptime { comptime {

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -5,8 +5,6 @@ pub const Token = struct {
id: Id, id: Id,
start: usize, start: usize,
end: usize, end: usize,
line: usize,
column: usize,
const KeywordId = struct { const KeywordId = struct {
bytes: []const u8, bytes: []const u8,
@ -17,14 +15,18 @@ pub const Token = struct {
KeywordId{.bytes="align", .id = Id.Keyword_align}, KeywordId{.bytes="align", .id = Id.Keyword_align},
KeywordId{.bytes="and", .id = Id.Keyword_and}, KeywordId{.bytes="and", .id = Id.Keyword_and},
KeywordId{.bytes="asm", .id = Id.Keyword_asm}, KeywordId{.bytes="asm", .id = Id.Keyword_asm},
KeywordId{.bytes="async", .id = Id.Keyword_async},
KeywordId{.bytes="await", .id = Id.Keyword_await},
KeywordId{.bytes="break", .id = Id.Keyword_break}, KeywordId{.bytes="break", .id = Id.Keyword_break},
KeywordId{.bytes="catch", .id = Id.Keyword_catch}, KeywordId{.bytes="catch", .id = Id.Keyword_catch},
KeywordId{.bytes="cancel", .id = Id.Keyword_cancel},
KeywordId{.bytes="comptime", .id = Id.Keyword_comptime}, KeywordId{.bytes="comptime", .id = Id.Keyword_comptime},
KeywordId{.bytes="const", .id = Id.Keyword_const}, KeywordId{.bytes="const", .id = Id.Keyword_const},
KeywordId{.bytes="continue", .id = Id.Keyword_continue}, KeywordId{.bytes="continue", .id = Id.Keyword_continue},
KeywordId{.bytes="defer", .id = Id.Keyword_defer}, KeywordId{.bytes="defer", .id = Id.Keyword_defer},
KeywordId{.bytes="else", .id = Id.Keyword_else}, KeywordId{.bytes="else", .id = Id.Keyword_else},
KeywordId{.bytes="enum", .id = Id.Keyword_enum}, KeywordId{.bytes="enum", .id = Id.Keyword_enum},
KeywordId{.bytes="errdefer", .id = Id.Keyword_errdefer},
KeywordId{.bytes="error", .id = Id.Keyword_error}, KeywordId{.bytes="error", .id = Id.Keyword_error},
KeywordId{.bytes="export", .id = Id.Keyword_export}, KeywordId{.bytes="export", .id = Id.Keyword_export},
KeywordId{.bytes="extern", .id = Id.Keyword_extern}, KeywordId{.bytes="extern", .id = Id.Keyword_extern},
@ -39,10 +41,12 @@ pub const Token = struct {
KeywordId{.bytes="or", .id = Id.Keyword_or}, KeywordId{.bytes="or", .id = Id.Keyword_or},
KeywordId{.bytes="packed", .id = Id.Keyword_packed}, KeywordId{.bytes="packed", .id = Id.Keyword_packed},
KeywordId{.bytes="pub", .id = Id.Keyword_pub}, KeywordId{.bytes="pub", .id = Id.Keyword_pub},
KeywordId{.bytes="resume", .id = Id.Keyword_resume},
KeywordId{.bytes="return", .id = Id.Keyword_return}, KeywordId{.bytes="return", .id = Id.Keyword_return},
KeywordId{.bytes="section", .id = Id.Keyword_section}, KeywordId{.bytes="section", .id = Id.Keyword_section},
KeywordId{.bytes="stdcallcc", .id = Id.Keyword_stdcallcc}, KeywordId{.bytes="stdcallcc", .id = Id.Keyword_stdcallcc},
KeywordId{.bytes="struct", .id = Id.Keyword_struct}, KeywordId{.bytes="struct", .id = Id.Keyword_struct},
KeywordId{.bytes="suspend", .id = Id.Keyword_suspend},
KeywordId{.bytes="switch", .id = Id.Keyword_switch}, KeywordId{.bytes="switch", .id = Id.Keyword_switch},
KeywordId{.bytes="test", .id = Id.Keyword_test}, KeywordId{.bytes="test", .id = Id.Keyword_test},
KeywordId{.bytes="this", .id = Id.Keyword_this}, KeywordId{.bytes="this", .id = Id.Keyword_this},
@ -72,7 +76,8 @@ pub const Token = struct {
Invalid, Invalid,
Identifier, Identifier,
StringLiteral: StrLitKind, StringLiteral: StrLitKind,
StringIdentifier, MultilineStringLiteralLine: StrLitKind,
CharLiteral,
Eof, Eof,
Builtin, Builtin,
Bang, Bang,
@ -81,6 +86,7 @@ pub const Token = struct {
PipeEqual, PipeEqual,
Equal, Equal,
EqualEqual, EqualEqual,
EqualAngleBracketRight,
BangEqual, BangEqual,
LParen, LParen,
RParen, RParen,
@ -89,6 +95,8 @@ pub const Token = struct {
PercentEqual, PercentEqual,
LBrace, LBrace,
RBrace, RBrace,
LBracket,
RBracket,
Period, Period,
Ellipsis2, Ellipsis2,
Ellipsis3, Ellipsis3,
@ -132,7 +140,10 @@ pub const Token = struct {
Keyword_align, Keyword_align,
Keyword_and, Keyword_and,
Keyword_asm, Keyword_asm,
Keyword_async,
Keyword_await,
Keyword_break, Keyword_break,
Keyword_cancel,
Keyword_catch, Keyword_catch,
Keyword_comptime, Keyword_comptime,
Keyword_const, Keyword_const,
@ -140,6 +151,7 @@ pub const Token = struct {
Keyword_defer, Keyword_defer,
Keyword_else, Keyword_else,
Keyword_enum, Keyword_enum,
Keyword_errdefer,
Keyword_error, Keyword_error,
Keyword_export, Keyword_export,
Keyword_extern, Keyword_extern,
@ -154,10 +166,12 @@ pub const Token = struct {
Keyword_or, Keyword_or,
Keyword_packed, Keyword_packed,
Keyword_pub, Keyword_pub,
Keyword_resume,
Keyword_return, Keyword_return,
Keyword_section, Keyword_section,
Keyword_stdcallcc, Keyword_stdcallcc,
Keyword_struct, Keyword_struct,
Keyword_suspend,
Keyword_switch, Keyword_switch,
Keyword_test, Keyword_test,
Keyword_this, Keyword_this,
@ -176,28 +190,34 @@ pub const Token = struct {
pub const Tokenizer = struct { pub const Tokenizer = struct {
buffer: []const u8, buffer: []const u8,
index: usize, index: usize,
line: usize,
column: usize,
pending_invalid_token: ?Token, pending_invalid_token: ?Token,
pub const LineLocation = struct { pub const Location = struct {
line: usize,
column: usize,
line_start: usize, line_start: usize,
line_end: usize, line_end: usize,
}; };
pub fn getTokenLocation(self: &Tokenizer, token: &const Token) LineLocation { pub fn getTokenLocation(self: &Tokenizer, start_index: usize, token: &const Token) Location {
var loc = LineLocation { var loc = Location {
.line_start = 0, .line = 0,
.column = 0,
.line_start = start_index,
.line_end = self.buffer.len, .line_end = self.buffer.len,
}; };
for (self.buffer) |c, i| { for (self.buffer[start_index..]) |c, i| {
if (i == token.start) { if (i + start_index == token.start) {
loc.line_end = i; loc.line_end = i + start_index;
while (loc.line_end < self.buffer.len and self.buffer[loc.line_end] != '\n') : (loc.line_end += 1) {} while (loc.line_end < self.buffer.len and self.buffer[loc.line_end] != '\n') : (loc.line_end += 1) {}
return loc; return loc;
} }
if (c == '\n') { if (c == '\n') {
loc.line += 1;
loc.column = 0;
loc.line_start = i + 1; loc.line_start = i + 1;
} else {
loc.column += 1;
} }
} }
return loc; return loc;
@ -212,8 +232,6 @@ pub const Tokenizer = struct {
return Tokenizer { return Tokenizer {
.buffer = buffer, .buffer = buffer,
.index = 0, .index = 0,
.line = 0,
.column = 0,
.pending_invalid_token = null, .pending_invalid_token = null,
}; };
} }
@ -225,6 +243,12 @@ pub const Tokenizer = struct {
C, C,
StringLiteral, StringLiteral,
StringLiteralBackslash, StringLiteralBackslash,
MultilineStringLiteralLine,
MultilineStringLiteralLineBackslash,
CharLiteral,
CharLiteralBackslash,
CharLiteralEnd,
Backslash,
Equal, Equal,
Bang, Bang,
Pipe, Pipe,
@ -261,26 +285,22 @@ pub const Tokenizer = struct {
self.pending_invalid_token = null; self.pending_invalid_token = null;
return token; return token;
} }
const start_index = self.index;
var state = State.Start; var state = State.Start;
var result = Token { var result = Token {
.id = Token.Id.Eof, .id = Token.Id.Eof,
.start = self.index, .start = self.index,
.end = undefined, .end = undefined,
.line = self.line,
.column = self.column,
}; };
while (self.index < self.buffer.len) { while (self.index < self.buffer.len) : (self.index += 1) {
const c = self.buffer[self.index]; const c = self.buffer[self.index];
switch (state) { switch (state) {
State.Start => switch (c) { State.Start => switch (c) {
' ' => { ' ' => {
result.start = self.index + 1; result.start = self.index + 1;
result.column += 1;
}, },
'\n' => { '\n' => {
result.start = self.index + 1; result.start = self.index + 1;
result.line += 1;
result.column = 0;
}, },
'c' => { 'c' => {
state = State.C; state = State.C;
@ -290,6 +310,9 @@ pub const Tokenizer = struct {
state = State.StringLiteral; state = State.StringLiteral;
result.id = Token.Id { .StringLiteral = Token.StrLitKind.Normal }; result.id = Token.Id { .StringLiteral = Token.StrLitKind.Normal };
}, },
'\'' => {
state = State.CharLiteral;
},
'a'...'b', 'd'...'z', 'A'...'Z', '_' => { 'a'...'b', 'd'...'z', 'A'...'Z', '_' => {
state = State.Identifier; state = State.Identifier;
result.id = Token.Id.Identifier; result.id = Token.Id.Identifier;
@ -316,6 +339,16 @@ pub const Tokenizer = struct {
self.index += 1; self.index += 1;
break; break;
}, },
'[' => {
result.id = Token.Id.LBracket;
self.index += 1;
break;
},
']' => {
result.id = Token.Id.RBracket;
self.index += 1;
break;
},
';' => { ';' => {
result.id = Token.Id.Semicolon; result.id = Token.Id.Semicolon;
self.index += 1; self.index += 1;
@ -352,6 +385,10 @@ pub const Tokenizer = struct {
'^' => { '^' => {
state = State.Caret; state = State.Caret;
}, },
'\\' => {
state = State.Backslash;
result.id = Token.Id { .MultilineStringLiteralLine = Token.StrLitKind.Normal };
},
'{' => { '{' => {
result.id = Token.Id.LBrace; result.id = Token.Id.LBrace;
self.index += 1; self.index += 1;
@ -396,7 +433,7 @@ pub const Tokenizer = struct {
State.SawAtSign => switch (c) { State.SawAtSign => switch (c) {
'"' => { '"' => {
result.id = Token.Id.StringIdentifier; result.id = Token.Id.Identifier;
state = State.StringLiteral; state = State.StringLiteral;
}, },
else => { else => {
@ -532,8 +569,17 @@ pub const Tokenizer = struct {
'a'...'z', 'A'...'Z', '_', '0'...'9' => {}, 'a'...'z', 'A'...'Z', '_', '0'...'9' => {},
else => break, else => break,
}, },
State.Backslash => switch (c) {
'\\' => {
state = State.MultilineStringLiteralLine;
},
else => break,
},
State.C => switch (c) { State.C => switch (c) {
'\\' => @panic("TODO"), '\\' => {
state = State.Backslash;
result.id = Token.Id { .MultilineStringLiteralLine = Token.StrLitKind.C };
},
'"' => { '"' => {
state = State.StringLiteral; state = State.StringLiteral;
result.id = Token.Id { .StringLiteral = Token.StrLitKind.C }; result.id = Token.Id { .StringLiteral = Token.StrLitKind.C };
@ -562,6 +608,64 @@ pub const Tokenizer = struct {
}, },
}, },
State.CharLiteral => switch (c) {
'\\' => {
state = State.CharLiteralBackslash;
},
'\'' => {
result.id = Token.Id.Invalid;
break;
},
else => {
if (c < 0x20 or c == 0x7f) {
result.id = Token.Id.Invalid;
break;
}
state = State.CharLiteralEnd;
}
},
State.CharLiteralBackslash => switch (c) {
'\n' => {
result.id = Token.Id.Invalid;
break;
},
else => {
state = State.CharLiteralEnd;
},
},
State.CharLiteralEnd => switch (c) {
'\'' => {
result.id = Token.Id.CharLiteral;
self.index += 1;
break;
},
else => {
result.id = Token.Id.Invalid;
break;
},
},
State.MultilineStringLiteralLine => switch (c) {
'\\' => {
state = State.MultilineStringLiteralLineBackslash;
},
'\n' => {
self.index += 1;
break;
},
else => self.checkLiteralCharacter(),
},
State.MultilineStringLiteralLineBackslash => switch (c) {
'\n' => break, // Look for this error later.
else => {
state = State.MultilineStringLiteralLine;
},
},
State.Bang => switch (c) { State.Bang => switch (c) {
'=' => { '=' => {
result.id = Token.Id.BangEqual; result.id = Token.Id.BangEqual;
@ -597,6 +701,11 @@ pub const Tokenizer = struct {
self.index += 1; self.index += 1;
break; break;
}, },
'>' => {
result.id = Token.Id.EqualAngleBracketRight;
self.index += 1;
break;
},
else => { else => {
result.id = Token.Id.Equal; result.id = Token.Id.Equal;
break; break;
@ -794,14 +903,6 @@ pub const Tokenizer = struct {
else => break, else => break,
}, },
} }
self.index += 1;
if (c == '\n') {
self.line += 1;
self.column = 0;
} else {
self.column += 1;
}
} else if (self.index == self.buffer.len) { } else if (self.index == self.buffer.len) {
switch (state) { switch (state) {
State.Start, State.Start,
@ -811,6 +912,7 @@ pub const Tokenizer = struct {
State.FloatFraction, State.FloatFraction,
State.FloatExponentNumber, State.FloatExponentNumber,
State.StringLiteral, // find this error later State.StringLiteral, // find this error later
State.MultilineStringLiteralLine,
State.Builtin => {}, State.Builtin => {},
State.Identifier => { State.Identifier => {
@ -825,6 +927,11 @@ pub const Tokenizer = struct {
State.NumberDot, State.NumberDot,
State.FloatExponentUnsigned, State.FloatExponentUnsigned,
State.SawAtSign, State.SawAtSign,
State.Backslash,
State.MultilineStringLiteralLineBackslash,
State.CharLiteral,
State.CharLiteralBackslash,
State.CharLiteralEnd,
State.StringLiteralBackslash => { State.StringLiteralBackslash => {
result.id = Token.Id.Invalid; result.id = Token.Id.Invalid;
}, },
@ -894,6 +1001,7 @@ pub const Tokenizer = struct {
}, },
} }
} }
if (result.id == Token.Id.Eof) { if (result.id == Token.Id.Eof) {
if (self.pending_invalid_token) |token| { if (self.pending_invalid_token) |token| {
self.pending_invalid_token = null; self.pending_invalid_token = null;
@ -917,8 +1025,6 @@ pub const Tokenizer = struct {
.id = Token.Id.Invalid, .id = Token.Id.Invalid,
.start = self.index, .start = self.index,
.end = self.index + invalid_length, .end = self.index + invalid_length,
.line = self.line,
.column = self.column,
}; };
} }
@ -968,9 +1074,16 @@ test "tokenizer" {
}); });
} }
test "tokenizer - chars" {
testTokenize("'c'", []Token.Id {Token.Id.CharLiteral});
}
test "tokenizer - invalid token characters" { test "tokenizer - invalid token characters" {
testTokenize("#", []Token.Id{Token.Id.Invalid}); testTokenize("#", []Token.Id{Token.Id.Invalid});
testTokenize("`", []Token.Id{Token.Id.Invalid}); testTokenize("`", []Token.Id{Token.Id.Invalid});
testTokenize("'c", []Token.Id {Token.Id.Invalid});
testTokenize("'", []Token.Id {Token.Id.Invalid});
testTokenize("''", []Token.Id {Token.Id.Invalid, Token.Id.Invalid});
} }
test "tokenizer - invalid literal/comment characters" { test "tokenizer - invalid literal/comment characters" {
@ -1022,7 +1135,7 @@ test "tokenizer - string identifier and builtin fns" {
, ,
[]Token.Id{ []Token.Id{
Token.Id.Keyword_const, Token.Id.Keyword_const,
Token.Id.StringIdentifier, Token.Id.Identifier,
Token.Id.Equal, Token.Id.Equal,
Token.Id.Builtin, Token.Id.Builtin,
Token.Id.LParen, Token.Id.LParen,