From 08d531143f0b373cbc54e037fa526fb00d9db398 Mon Sep 17 00:00:00 2001 From: Andrew Kelley Date: Tue, 5 Dec 2017 00:20:23 -0500 Subject: [PATCH] parser skeleton --- src-self-hosted/main.zig | 228 +++++++++++++++++++++++++++++---------- 1 file changed, 173 insertions(+), 55 deletions(-) diff --git a/src-self-hosted/main.zig b/src-self-hosted/main.zig index 4e040840b7..de44a4652f 100644 --- a/src-self-hosted/main.zig +++ b/src-self-hosted/main.zig @@ -5,60 +5,66 @@ const heap = @import("std").heap; const warn = @import("std").debug.warn; const assert = @import("std").debug.assert; const mem = @import("std").mem; +const ArrayList = @import("std").ArrayList; + const Token = struct { id: Id, start: usize, end: usize, - const Keyword = enum { - @"align", - @"and", - @"asm", - @"break", - @"coldcc", - @"comptime", - @"const", - @"continue", - @"defer", - @"else", - @"enum", - @"error", - @"export", - @"extern", - @"false", - @"fn", - @"for", - @"goto", - @"if", - @"inline", - @"nakedcc", - @"noalias", - @"null", - @"or", - @"packed", - @"pub", - @"return", - @"stdcallcc", - @"struct", - @"switch", - @"test", - @"this", - @"true", - @"undefined", - @"union", - @"unreachable", - @"use", - @"var", - @"volatile", - @"while", + const KeywordId = struct { + bytes: []const u8, + id: Id, }; - fn getKeyword(bytes: []const u8) -> ?Keyword { - comptime var i = 0; - inline while (i < @memberCount(Keyword)) : (i += 1) { - if (mem.eql(u8, @memberName(Keyword, i), bytes)) { - return Keyword(i); + const keywords = []KeywordId { + KeywordId{.bytes="align", .id = Id {.Keyword_align = {}}}, + KeywordId{.bytes="and", .id = Id {.Keyword_and = {}}}, + KeywordId{.bytes="asm", .id = Id {.Keyword_asm = {}}}, + KeywordId{.bytes="break", .id = Id {.Keyword_break = {}}}, + KeywordId{.bytes="coldcc", .id = Id {.Keyword_coldcc = {}}}, + KeywordId{.bytes="comptime", .id = Id {.Keyword_comptime = {}}}, + KeywordId{.bytes="const", .id = Id {.Keyword_const = {}}}, + KeywordId{.bytes="continue", .id = Id {.Keyword_continue = {}}}, + KeywordId{.bytes="defer", .id = Id {.Keyword_defer = {}}}, + KeywordId{.bytes="else", .id = Id {.Keyword_else = {}}}, + KeywordId{.bytes="enum", .id = Id {.Keyword_enum = {}}}, + KeywordId{.bytes="error", .id = Id {.Keyword_error = {}}}, + KeywordId{.bytes="export", .id = Id {.Keyword_export = {}}}, + KeywordId{.bytes="extern", .id = Id {.Keyword_extern = {}}}, + KeywordId{.bytes="false", .id = Id {.Keyword_false = {}}}, + KeywordId{.bytes="fn", .id = Id {.Keyword_fn = {}}}, + KeywordId{.bytes="for", .id = Id {.Keyword_for = {}}}, + KeywordId{.bytes="goto", .id = Id {.Keyword_goto = {}}}, + KeywordId{.bytes="if", .id = Id {.Keyword_if = {}}}, + KeywordId{.bytes="inline", .id = Id {.Keyword_inline = {}}}, + KeywordId{.bytes="nakedcc", .id = Id {.Keyword_nakedcc = {}}}, + KeywordId{.bytes="noalias", .id = Id {.Keyword_noalias = {}}}, + KeywordId{.bytes="null", .id = Id {.Keyword_null = {}}}, + KeywordId{.bytes="or", .id = Id {.Keyword_or = {}}}, + KeywordId{.bytes="packed", .id = Id {.Keyword_packed = {}}}, + KeywordId{.bytes="pub", .id = Id {.Keyword_pub = {}}}, + KeywordId{.bytes="return", .id = Id {.Keyword_return = {}}}, + KeywordId{.bytes="stdcallcc", .id = Id {.Keyword_stdcallcc = {}}}, + KeywordId{.bytes="struct", .id = Id {.Keyword_struct = {}}}, + KeywordId{.bytes="switch", .id = Id {.Keyword_switch = {}}}, + KeywordId{.bytes="test", .id = Id {.Keyword_test = {}}}, + KeywordId{.bytes="this", .id = Id {.Keyword_this = {}}}, + KeywordId{.bytes="true", .id = Id {.Keyword_true = {}}}, + KeywordId{.bytes="undefined", .id = Id {.Keyword_undefined = {}}}, + KeywordId{.bytes="union", .id = Id {.Keyword_union = {}}}, + KeywordId{.bytes="unreachable", .id = Id {.Keyword_unreachable = {}}}, + KeywordId{.bytes="use", .id = Id {.Keyword_use = {}}}, + KeywordId{.bytes="var", .id = Id {.Keyword_var = {}}}, + KeywordId{.bytes="volatile", .id = Id {.Keyword_volatile = {}}}, + KeywordId{.bytes="while", .id = Id {.Keyword_while = {}}}, + }; + + fn getKeyword(bytes: []const u8) -> ?Id { + for (keywords) |kw| { + if (mem.eql(u8, kw.bytes, bytes)) { + return kw.id; } } return null; @@ -69,7 +75,6 @@ const Token = struct { const Id = union(enum) { Invalid, Identifier, - Keyword: Keyword, StringLiteral: StrLitKind, Eof, Builtin, @@ -83,6 +88,46 @@ const Token = struct { Period, Minus, Arrow, + Keyword_align, + Keyword_and, + Keyword_asm, + Keyword_break, + Keyword_coldcc, + Keyword_comptime, + Keyword_const, + Keyword_continue, + Keyword_defer, + Keyword_else, + Keyword_enum, + Keyword_error, + Keyword_export, + Keyword_extern, + Keyword_false, + Keyword_fn, + Keyword_for, + Keyword_goto, + Keyword_if, + Keyword_inline, + Keyword_nakedcc, + Keyword_noalias, + Keyword_null, + Keyword_or, + Keyword_packed, + Keyword_pub, + Keyword_return, + Keyword_stdcallcc, + Keyword_struct, + Keyword_switch, + Keyword_test, + Keyword_this, + Keyword_true, + Keyword_undefined, + Keyword_union, + Keyword_unreachable, + Keyword_use, + Keyword_var, + Keyword_volatile, + Keyword_while, }; }; @@ -193,8 +238,8 @@ const Tokenizer = struct { State.Identifier => switch (c) { 'a'...'z', 'A'...'Z', '_', '0'...'9' => {}, else => { - if (Token.getKeyword(self.buffer[result.start..self.index])) |keyword_id| { - result.id = Token.Id { .Keyword = keyword_id }; + if (Token.getKeyword(self.buffer[result.start..self.index])) |id| { + result.id = id; } break; }, @@ -251,6 +296,73 @@ const Tokenizer = struct { } }; +const AstNode = struct { + +}; + +const Parser = struct { + tokenizer: &Tokenizer, + allocator: &mem.Allocator, + + fn init(tokenizer: &Tokenizer, allocator: &mem.Allocator) -> Parser { + return Parser { + .tokenizer = tokenizer, + .allocator = allocator, + }; + } + + const StackFrame = struct { + + }; + + const State = enum { + TopLevel, + Expression, + }; + + fn parse(self: &Parser) -> %void { + var stack = ArrayList(StackFrame).init(self.allocator); + defer stack.deinit(); + + var state = State.TopLevel; + while (true) { + const token = self.tokenizer.next(); + switch (state) { + State.TopLevel => switch (token.id) { + Token.Id.Keyword_pub => { + const next_token = self.tokenizer.next(); + switch (next_token.id) { + Token.Id.Keyword_fn => { + const fn_name = self.tokenizer.next(); + if (@TagType(Token.Id)(fn_name.id) != Token.Id.Identifier) { + @panic("parse error"); + } + + const lparen = self.tokenizer.next(); + if (@TagType(Token.Id)(lparen.id) != Token.Id.LParen) { + @panic("parse error"); + } + }, + Token.Id.Keyword_const => @panic("TODO"), + Token.Id.Keyword_var => @panic("TODO"), + Token.Id.Keyword_use => @panic("TODO"), + else => @panic("parse error"), + } + }, + Token.Id.Keyword_const => @panic("TODO"), + Token.Id.Keyword_var => @panic("TODO"), + Token.Id.Keyword_fn => @panic("TODO"), + Token.Id.Keyword_export => @panic("TODO"), + Token.Id.Keyword_use => @panic("TODO"), + Token.Id.Keyword_comptime => @panic("TODO"), + else => @panic("parse error"), + }, + State.Expression => @panic("TODO"), + } + } + } +}; + pub fn main() -> %void { main2() %% |err| { @@ -271,12 +383,18 @@ pub fn main2() -> %void { warn("{}", target_file_buf); - var tokenizer = Tokenizer.init(target_file_buf); - while (true) { - const token = tokenizer.next(); - tokenizer.dump(token); - if (@TagType(Token.Id)(token.id) == Token.Id.Eof) { - break; + { + var tokenizer = Tokenizer.init(target_file_buf); + while (true) { + const token = tokenizer.next(); + tokenizer.dump(token); + if (@TagType(Token.Id)(token.id) == Token.Id.Eof) { + break; + } } } + + var tokenizer = Tokenizer.init(target_file_buf); + var parser = Parser.init(&tokenizer, allocator); + %return parser.parse(); }