From d431b0fb99342f1e2629f5dd8bf1ca578a5e5d9a Mon Sep 17 00:00:00 2001 From: Andrew Kelley Date: Fri, 8 Dec 2017 23:15:43 -0500 Subject: [PATCH] parse a simple variable declaration --- src-self-hosted/main.zig | 440 +++++++++++++++++++++++++++++++++++---- std/mem.zig | 11 + 2 files changed, 406 insertions(+), 45 deletions(-) diff --git a/src-self-hosted/main.zig b/src-self-hosted/main.zig index d8b403c066..680de4d041 100644 --- a/src-self-hosted/main.zig +++ b/src-self-hosted/main.zig @@ -88,6 +88,8 @@ const Token = struct { Period, Minus, Arrow, + Colon, + Slash, Keyword_align, Keyword_and, Keyword_asm, @@ -135,6 +137,37 @@ const Tokenizer = struct { buffer: []const u8, index: usize, + pub const Location = struct { + line: usize, + column: usize, + line_start: usize, + line_end: usize, + }; + + pub fn getTokenLocation(self: &Tokenizer, token: &const Token) -> Location { + var loc = Location { + .line = 0, + .column = 0, + .line_start = 0, + .line_end = 0, + }; + for (self.buffer) |c, i| { + if (i == token.start) { + loc.line_end = i; + while (loc.line_end < self.buffer.len and self.buffer[loc.line_end] != '\n') : (loc.line_end += 1) {} + return loc; + } + if (c == '\n') { + loc.line += 1; + loc.column = 0; + loc.line_start = i; + } else { + loc.column += 1; + } + } + return loc; + } + pub fn dump(self: &Tokenizer, token: &const Token) { warn("{} \"{}\"\n", @tagName(token.id), self.buffer[token.start..token.end]); } @@ -154,6 +187,8 @@ const Tokenizer = struct { StringLiteral, StringLiteralBackslash, Minus, + Slash, + LineComment, }; pub fn next(self: &Tokenizer) -> Token { @@ -206,6 +241,11 @@ const Tokenizer = struct { self.index += 1; break; }, + ':' => { + result.id = Token.Id.Colon; + self.index += 1; + break; + }, '%' => { result.id = Token.Id.Percent; self.index += 1; @@ -229,6 +269,9 @@ const Tokenizer = struct { '-' => { state = State.Minus; }, + '/' => { + state = State.Slash; + }, else => { result.id = Token.Id.Invalid; self.index += 1; @@ -289,78 +332,363 @@ const Tokenizer = struct { break; }, }, + State.Slash => switch (c) { + '/' => { + result.id = undefined; + state = State.LineComment; + }, + else => { + result.id = Token.Id.Slash; + break; + }, + }, + State.LineComment => switch (c) { + '\n' => { + state = State.Start; + result = Token { + .id = Token.Id.Eof, + .start = self.index + 1, + .end = undefined, + }; + }, + else => {}, + }, } } result.end = self.index; + // TODO check state when returning EOF return result; } }; -const AstNode = struct { - +const Visibility = enum { + Private, + Pub, + Export, }; +const Mutability = enum { + Const, + Var, +}; + +const AstNode = struct { + id: Id, + + const Id = enum { + Root, + VarDecl, + Identifier, + }; + + fn iterate(base: &AstNode, index: usize) -> ?&AstNode { + return switch (base.id) { + Id.Root => @fieldParentPtr(AstNodeRoot, "base", base).iterate(index), + Id.VarDecl => @fieldParentPtr(AstNodeVarDecl, "base", base).iterate(index), + Id.Identifier => @fieldParentPtr(AstNodeIdentifier, "base", base).iterate(index), + }; + } +}; + +const AstNodeRoot = struct { + base: AstNode, + decls: ArrayList(&AstNode), + + fn iterate(self: &AstNodeRoot, index: usize) -> ?&AstNode { + if (index < self.decls.len) { + return self.decls.items[index]; + } + return null; + } +}; + +const AstNodeVarDecl = struct { + base: AstNode, + visib: Visibility, + name_token: Token, + eq_token: Token, + mut: Mutability, + is_comptime: bool, + type_node: ?&AstNode, + align_node: ?&AstNode, + init_node: ?&AstNode, + + fn iterate(self: &AstNodeVarDecl, index: usize) -> ?&AstNode { + var i = index; + + if (self.type_node) |type_node| { + if (i < 1) return type_node; + i -= 1; + } + + if (self.align_node) |align_node| { + if (i < 1) return align_node; + i -= 1; + } + + if (self.init_node) |init_node| { + if (i < 1) return init_node; + i -= 1; + } + + return null; + } +}; + +const AstNodeIdentifier = struct { + base: AstNode, + name_token: Token, + + fn iterate(self: &AstNodeIdentifier, index: usize) -> ?&AstNode { + return null; + } +}; + +error ParseError; + const Parser = struct { tokenizer: &Tokenizer, allocator: &mem.Allocator, + put_back_tokens: [1]Token, + put_back_count: usize, + source_file_name: []const u8, - fn init(tokenizer: &Tokenizer, allocator: &mem.Allocator) -> Parser { + fn init(tokenizer: &Tokenizer, allocator: &mem.Allocator, source_file_name: []const u8) -> Parser { return Parser { .tokenizer = tokenizer, .allocator = allocator, + .put_back_tokens = undefined, + .put_back_count = 0, + .source_file_name = source_file_name, }; } - const StackFrame = struct { - - }; - - const State = enum { + const State = union(enum) { TopLevel, - Expression, + TopLevelModifier: Visibility, + Expression: &?&AstNode, + GroupedExpression: &?&AstNode, + PrimaryExpression: &?&AstNode, + TypeExpr: &?&AstNode, + VarDecl: &AstNodeVarDecl, + VarDeclAlign: &AstNodeVarDecl, + VarDeclEq: &AstNodeVarDecl, + ExpectSemicolon, }; - fn parse(self: &Parser) -> %void { - var stack = ArrayList(StackFrame).init(self.allocator); + pub fn parse(self: &Parser) -> %&AstNode { + var stack = ArrayList(State).init(self.allocator); defer stack.deinit(); - var state = State.TopLevel; - while (true) { - const token = self.tokenizer.next(); - switch (state) { - State.TopLevel => switch (token.id) { - Token.Id.Keyword_pub => { - const next_token = self.tokenizer.next(); - switch (next_token.id) { - Token.Id.Keyword_fn => { - const fn_name = self.tokenizer.next(); - if (fn_name.id != Token.Id.Identifier) { - @panic("parse error"); - } + %return stack.append(State.TopLevel); - const lparen = self.tokenizer.next(); - if (lparen.id != Token.Id.LParen) { - @panic("parse error"); - } - }, - Token.Id.Keyword_const => @panic("TODO"), - Token.Id.Keyword_var => @panic("TODO"), - Token.Id.Keyword_use => @panic("TODO"), - else => @panic("parse error"), - } - }, - Token.Id.Keyword_const => @panic("TODO"), - Token.Id.Keyword_var => @panic("TODO"), - Token.Id.Keyword_fn => @panic("TODO"), - Token.Id.Keyword_export => @panic("TODO"), - Token.Id.Keyword_use => @panic("TODO"), - Token.Id.Keyword_comptime => @panic("TODO"), - else => @panic("parse error"), + const root_node = %return self.createRoot(); + // TODO %defer self.freeAst(); + + while (true) { + // This gives us 1 free append that can't fail + const state = stack.pop(); + + switch (state) { + State.TopLevel => { + const token = self.getNextToken(); + switch (token.id) { + Token.Id.Keyword_pub => { + stack.append(State {.TopLevelModifier = Visibility.Pub }) %% unreachable; + continue; + }, + Token.Id.Keyword_export => { + stack.append(State {.TopLevelModifier = Visibility.Export }) %% unreachable; + continue; + }, + Token.Id.Keyword_const => { + stack.append(State.TopLevel) %% unreachable; + const var_decl_node = %return self.createVarDecl(Visibility.Private, Mutability.Const, false); + %return root_node.decls.append(&var_decl_node.base); + %return stack.append(State { .VarDecl = var_decl_node }); + continue; + }, + Token.Id.Keyword_var => { + stack.append(State.TopLevel) %% unreachable; + const var_decl_node = %return self.createVarDecl(Visibility.Private, Mutability.Var, false); + %return root_node.decls.append(&var_decl_node.base); + %return stack.append(State { .VarDecl = var_decl_node }); + continue; + }, + Token.Id.Eof => return &root_node.base, + else => return self.parseError(token, "expected top level declaration, found {}", @tagName(token.id)), + } }, - State.Expression => @panic("TODO"), + State.TopLevelModifier => |visib| { + const token = self.getNextToken(); + switch (token.id) { + Token.Id.Keyword_const => { + stack.append(State.TopLevel) %% unreachable; + const var_decl_node = %return self.createVarDecl(visib, Mutability.Const, false); + %return root_node.decls.append(&var_decl_node.base); + %return stack.append(State { .VarDecl = var_decl_node }); + continue; + }, + Token.Id.Keyword_var => { + stack.append(State.TopLevel) %% unreachable; + const var_decl_node = %return self.createVarDecl(visib, Mutability.Var, false); + %return root_node.decls.append(&var_decl_node.base); + %return stack.append(State { .VarDecl = var_decl_node }); + continue; + }, + else => return self.parseError(token, "expected top level declaration, found {}", @tagName(token.id)), + } + }, + State.VarDecl => |var_decl| { + var_decl.name_token = %return self.eatToken(Token.Id.Identifier); + stack.append(State { .VarDeclAlign = var_decl }) %% unreachable; + + const next_token = self.getNextToken(); + if (next_token.id == Token.Id.Colon) { + %return stack.append(State { .TypeExpr = &var_decl.type_node }); + continue; + } + + self.putBackToken(next_token); + continue; + }, + State.VarDeclAlign => |var_decl| { + stack.append(State { .VarDeclEq = var_decl }) %% unreachable; + + const next_token = self.getNextToken(); + if (next_token.id == Token.Id.Keyword_align) { + %return stack.append(State { .GroupedExpression = &var_decl.align_node }); + continue; + } + + self.putBackToken(next_token); + continue; + }, + State.VarDeclEq => |var_decl| { + var_decl.eq_token = %return self.eatToken(Token.Id.Equal); + stack.append(State.ExpectSemicolon) %% unreachable; + %return stack.append(State { + .Expression = &var_decl.init_node, + }); + continue; + }, + State.ExpectSemicolon => { + _ = %return self.eatToken(Token.Id.Semicolon); + continue; + }, + State.Expression => |result_ptr| { + // TODO this should not jump straight to primary expression + stack.append(State {.PrimaryExpression = result_ptr}) %% unreachable; + continue; + }, + State.PrimaryExpression => |result_ptr| { + const token = self.getNextToken(); + switch (token.id) { + Token.Id.Identifier => { + const identifier = %return self.createIdentifier(token); + *result_ptr = &identifier.base; + continue; + }, + else => return self.parseError(token, "expected primary expression, found {}", @tagName(token.id)), + } + }, + State.TypeExpr => @panic("TODO"), + State.GroupedExpression => @panic("TODO"), } + unreachable; } } + + fn createRoot(self: &Parser) -> %&AstNodeRoot { + const node = %return self.allocator.create(AstNodeRoot); + %defer self.allocator.destroy(node); + + *node = AstNodeRoot { + .base = AstNode {.id = AstNode.Id.Root}, + .decls = ArrayList(&AstNode).init(self.allocator), + }; + return node; + } + + fn createVarDecl(self: &Parser, visib: Visibility, mut: Mutability, is_comptime: bool) -> %&AstNodeVarDecl { + const node = %return self.allocator.create(AstNodeVarDecl); + %defer self.allocator.destroy(node); + + *node = AstNodeVarDecl { + .base = AstNode {.id = AstNode.Id.VarDecl}, + .visib = visib, + .mut = mut, + .is_comptime = is_comptime, + .type_node = null, + .align_node = null, + .init_node = null, + // initialized later + .name_token = undefined, + .eq_token = undefined, + }; + return node; + } + + fn createIdentifier(self: &Parser, name_token: &const Token) -> %&AstNodeIdentifier { + const node = %return self.allocator.create(AstNodeIdentifier); + %defer self.allocator.destroy(node); + + *node = AstNodeIdentifier { + .base = AstNode {.id = AstNode.Id.Identifier}, + .name_token = *name_token, + }; + return node; + } + + fn parseError(self: &Parser, token: &const Token, comptime fmt: []const u8, args: ...) -> error { + const loc = self.tokenizer.getTokenLocation(token); + warn("{}:{}:{}: error: " ++ fmt ++ "\n", self.source_file_name, loc.line + 1, loc.column + 1, args); + warn("{}\n", self.tokenizer.buffer[loc.line_start..loc.line_end]); + { + var i: usize = 0; + while (i < loc.column) : (i += 1) { + warn(" "); + } + } + { + const caret_count = token.end - token.start; + var i: usize = 0; + while (i < caret_count) : (i += 1) { + warn("~"); + } + } + warn("\n"); + return error.ParseError; + } + + fn expectToken(self: &Parser, token: &const Token, id: @TagType(Token.Id)) -> %void { + if (token.id != id) { + return self.parseError(token, "expected {}, found {}", @tagName(id), @tagName(token.id)); + } + } + + fn eatToken(self: &Parser, id: @TagType(Token.Id)) -> %Token { + const token = self.getNextToken(); + %return self.expectToken(token, id); + return token; + } + + fn putBackToken(self: &Parser, token: &const Token) { + self.put_back_tokens[self.put_back_count] = *token; + self.put_back_count += 1; + } + + fn getNextToken(self: &Parser) -> Token { + return if (self.put_back_count != 0) { + const put_back_index = self.put_back_count - 1; + const put_back_token = self.put_back_tokens[put_back_index]; + self.put_back_count = put_back_index; + put_back_token + } else { + self.tokenizer.next() + }; + } + }; @@ -384,8 +712,11 @@ pub fn main2() -> %void { const target_file_buf = %return io.readFileAlloc(target_file, allocator); + warn("====input:====\n"); + warn("{}", target_file_buf); + warn("====tokenization:====\n"); { var tokenizer = Tokenizer.init(target_file_buf); while (true) { @@ -397,7 +728,26 @@ pub fn main2() -> %void { } } + warn("====parse:====\n"); + var tokenizer = Tokenizer.init(target_file_buf); - var parser = Parser.init(&tokenizer, allocator); - %return parser.parse(); + var parser = Parser.init(&tokenizer, allocator, target_file); + const node = %return parser.parse(); + + + render(node, 0); +} + +fn render(node: &AstNode, indent: usize) { + { + var i: usize = 0; + while (i < indent) : (i += 1) { + warn(" "); + } + } + warn("{}\n", @tagName(node.id)); + var i: usize = 0; + while (node.iterate(i)) |child| : (i += 1) { + render(child, indent + 2); + } } diff --git a/std/mem.zig b/std/mem.zig index e99675d248..b62451687f 100644 --- a/std/mem.zig +++ b/std/mem.zig @@ -8,6 +8,7 @@ pub const Cmp = math.Cmp; pub const Allocator = struct { /// Allocate byte_count bytes and return them in a slice, with the /// slice's pointer aligned at least to alignment bytes. + /// The returned newly allocated memory is undefined. allocFn: fn (self: &Allocator, byte_count: usize, alignment: u29) -> %[]u8, /// If `new_byte_count > old_mem.len`: @@ -17,6 +18,8 @@ pub const Allocator = struct { /// If `new_byte_count <= old_mem.len`: /// * this function must return successfully. /// * alignment <= alignment of old_mem.ptr + /// + /// The returned newly allocated memory is undefined. reallocFn: fn (self: &Allocator, old_mem: []u8, new_byte_count: usize, alignment: u29) -> %[]u8, /// Guaranteed: `old_mem.len` is the same as what was returned from `allocFn` or `reallocFn` @@ -40,6 +43,10 @@ pub const Allocator = struct { { const byte_count = %return math.mul(usize, @sizeOf(T), n); const byte_slice = %return self.allocFn(self, byte_count, alignment); + // This loop should get optimized out in ReleaseFast mode + for (byte_slice) |*byte| { + *byte = undefined; + } return ([]align(alignment) T)(@alignCast(alignment, byte_slice)); } @@ -56,6 +63,10 @@ pub const Allocator = struct { const byte_count = %return math.mul(usize, @sizeOf(T), n); const byte_slice = %return self.reallocFn(self, ([]u8)(old_mem), byte_count, alignment); + // This loop should get optimized out in ReleaseFast mode + for (byte_slice[old_mem.len..]) |*byte| { + *byte = undefined; + } return ([]T)(@alignCast(alignment, byte_slice)); }