diff --git a/CMakeLists.txt b/CMakeLists.txt index 7c8deec0d7..811ff18f07 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -21,14 +21,15 @@ include_directories( ) set(GRAMMAR_TXT "${CMAKE_BINARY_DIR}/simple.txt") -set(PARSER_CPP "${CMAKE_BINARY_DIR}/parser.cpp") +set(PARSER_GENERATED_CPP "${CMAKE_BINARY_DIR}/parser_generated.cpp") set(ZIG_SOURCES "${CMAKE_SOURCE_DIR}/src/main.cpp" "${CMAKE_SOURCE_DIR}/src/util.cpp" "${CMAKE_SOURCE_DIR}/src/buffer.cpp" "${CMAKE_SOURCE_DIR}/src/tokenizer.cpp" - ${PARSER_CPP} + "${CMAKE_SOURCE_DIR}/src/parser.cpp" + ${PARSER_GENERATED_CPP} ) set(PARSERGEN_SOURCES @@ -68,8 +69,8 @@ set_target_properties(parsergen PROPERTIES add_custom_command( - OUTPUT ${PARSER_CPP} - COMMAND parsergen ARGS ${GRAMMAR_TXT} ${PARSER_CPP} + OUTPUT ${PARSER_GENERATED_CPP} + COMMAND parsergen ARGS ${GRAMMAR_TXT} ${PARSER_GENERATED_CPP} DEPENDS ${GRAMMAR_TXT} ${PARSERGEN_SOURCES} WORKING_DIRECTORY ${CMAKE_SOURCE_DIR} ) diff --git a/README.md b/README.md index 4054a94516..60449464fb 100644 --- a/README.md +++ b/README.md @@ -19,7 +19,7 @@ readable, safe, optimal, and concise code to solve any computing problem. * Eliminate the need for C headers (when using zig internally). * Ability to declare dependencies as Git URLS with commit locking (can provide a tag or sha1). - * Rust-style enums. + * Tagged union enum type. * Opinionated when it makes life easier. - Tab character in source code is a compile error. - Whitespace at the end of line is a compile error. @@ -32,23 +32,29 @@ readable, safe, optimal, and concise code to solve any computing problem. * Hello, world. - Build AST - Code Gen + - Produce .o file. + * Produce executable file instead of .o file. + * Add debugging symbols. + * Debug/Release mode. * C style comments. * Unit tests. * Simple .so library * How should the Widget use case be solved? In Genesis I'm using C++ and inheritance. -## Grammar +### Primitive Numeric Types: -``` -Root : FnDecl* -FnDecl : TokenFn TokenSymbol TokenLParen list(ParamDecl, TokenComma, 0) TokenRParen (TokenArrow Type)? Block -ParamDecl : TokenSymbol TokenColon Type -Type : TokenSymbol | PointerType -PointerType : TokenStar (TokenConst | TokenMut) Type -Block : TokenLBrace Statement* Expression? TokenRBrace -Statement : ExpressionStatement | ReturnStatement -ExpressionStatement : Expression TokenSemicolon -ReturnStatement : TokenReturn Expression TokenSemicolon -Expression : TokenNumber | TokenString | FnCall -FnCall : TokenSymbol TokenLParen list(Expression, TokenComma, 0) TokenRParen -``` +zig | C equivalent | Description +-------|--------------|------------------------------- + i8 | int8_t | signed 8-bit integer + u8 | uint8_t | unsigned 8-bit integer + i16 | int16_t | signed 16-bit integer + u16 | uint16_t | unsigned 16-bit integer + i32 | int32_t | signed 32-bit integer + u32 | uint32_t | unsigned 32-bit integer + i64 | int64_t | signed 64-bit integer + u64 | uint64_t | unsigned 64-bit integer + f32 | float | 32-bit IEE754 floating point + f64 | double | 64-bit IEE754 floating point + f128 | long double | 128-bit IEE754 floating point + isize | ssize_t | signed pointer sized integer + usize | size_t | unsigned pointer sized integer diff --git a/src/main.cpp b/src/main.cpp index c313c7918a..eb79b61f1b 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -15,7 +15,6 @@ #include #include #include -#include #include #include #include @@ -50,82 +49,6 @@ static Buf *fetch_file(FILE *f) { return buf; } -void ast_error(Token *token, const char *format, ...) { - int line = token->start_line + 1; - int column = token->start_column + 1; - - va_list ap; - va_start(ap, format); - fprintf(stderr, "Error: Line %d, column %d: ", line, column); - vfprintf(stderr, format, ap); - fprintf(stderr, "\n"); - va_end(ap); - exit(EXIT_FAILURE); -} - -static const char *node_type_str(NodeType node_type) { - switch (node_type) { - case NodeTypeRoot: - return "Root"; - case NodeTypeFnDecl: - return "FnDecl"; - case NodeTypeParamDecl: - return "ParamDecl"; - case NodeTypeType: - return "Type"; - case NodeTypePointerType: - return "PointerType"; - case NodeTypeBlock: - return "Block"; - case NodeTypeStatement: - return "Statement"; - case NodeTypeExpressionStatement: - return "ExpressionStatement"; - case NodeTypeReturnStatement: - return "ReturnStatement"; - case NodeTypeExpression: - return "Expression"; - case NodeTypeFnCall: - return "FnCall"; - } - zig_panic("unreachable"); -} - -static void ast_print(AstNode *node, int indent) { - for (int i = 0; i < indent; i += 1) { - fprintf(stderr, " "); - } - - switch (node->type) { - case NodeTypeRoot: - fprintf(stderr, "%s\n", node_type_str(node->type)); - for (int i = 0; i < node->data.root.fn_decls.length; i += 1) { - AstNode *child = node->data.root.fn_decls.at(i); - ast_print(child, indent + 2); - } - break; - case NodeTypeFnDecl: - { - Buf *name_buf = &node->data.fn_decl.name; - fprintf(stderr, "%s '%s'\n", node_type_str(node->type), buf_ptr(name_buf)); - - for (int i = 0; i < node->data.fn_decl.params.length; i += 1) { - AstNode *child = node->data.fn_decl.params.at(i); - ast_print(child, indent + 2); - } - - ast_print(node->data.fn_decl.return_type, indent + 2); - - ast_print(node->data.fn_decl.body, indent + 2); - - break; - } - default: - fprintf(stderr, "%s\n", node_type_str(node->type)); - break; - } -} - char cur_dir[1024]; int main(int argc, char **argv) { diff --git a/src/parser.cpp b/src/parser.cpp new file mode 100644 index 0000000000..f5da8addeb --- /dev/null +++ b/src/parser.cpp @@ -0,0 +1,80 @@ +#include "parser.hpp" + +#include +#include + +void ast_error(Token *token, const char *format, ...) { + int line = token->start_line + 1; + int column = token->start_column + 1; + + va_list ap; + va_start(ap, format); + fprintf(stderr, "Error: Line %d, column %d: ", line, column); + vfprintf(stderr, format, ap); + fprintf(stderr, "\n"); + va_end(ap); + exit(EXIT_FAILURE); +} + +const char *node_type_str(NodeType node_type) { + switch (node_type) { + case NodeTypeRoot: + return "Root"; + case NodeTypeFnDecl: + return "FnDecl"; + case NodeTypeParamDecl: + return "ParamDecl"; + case NodeTypeType: + return "Type"; + case NodeTypePointerType: + return "PointerType"; + case NodeTypeBlock: + return "Block"; + case NodeTypeStatement: + return "Statement"; + case NodeTypeExpressionStatement: + return "ExpressionStatement"; + case NodeTypeReturnStatement: + return "ReturnStatement"; + case NodeTypeExpression: + return "Expression"; + case NodeTypeFnCall: + return "FnCall"; + } + zig_panic("unreachable"); +} + +void ast_print(AstNode *node, int indent) { + for (int i = 0; i < indent; i += 1) { + fprintf(stderr, " "); + } + + switch (node->type) { + case NodeTypeRoot: + fprintf(stderr, "%s\n", node_type_str(node->type)); + for (int i = 0; i < node->data.root.fn_decls.length; i += 1) { + AstNode *child = node->data.root.fn_decls.at(i); + ast_print(child, indent + 2); + } + break; + case NodeTypeFnDecl: + { + Buf *name_buf = &node->data.fn_decl.name; + fprintf(stderr, "%s '%s'\n", node_type_str(node->type), buf_ptr(name_buf)); + + for (int i = 0; i < node->data.fn_decl.params.length; i += 1) { + AstNode *child = node->data.fn_decl.params.at(i); + ast_print(child, indent + 2); + } + + ast_print(node->data.fn_decl.return_type, indent + 2); + + ast_print(node->data.fn_decl.body, indent + 2); + + break; + } + default: + fprintf(stderr, "%s\n", node_type_str(node->type)); + break; + } +} diff --git a/src/parser.hpp b/src/parser.hpp index 0f15228d01..db2f5eb623 100644 --- a/src/parser.hpp +++ b/src/parser.hpp @@ -82,6 +82,11 @@ struct AstNode { __attribute__ ((format (printf, 2, 3))) void ast_error(Token *token, const char *format, ...); +// This function is provided by generated code, generated by parsergen.cpp AstNode * ast_parse(Buf *buf, ZigList *tokens); +const char *node_type_str(NodeType node_type); + +void ast_print(AstNode *node, int indent); + #endif diff --git a/src/parsergen.cpp b/src/parsergen.cpp index 4fcf54032c..a812f990c0 100644 --- a/src/parsergen.cpp +++ b/src/parsergen.cpp @@ -190,12 +190,17 @@ struct RuleNode { enum ParserStateType { ParserStateTypeError, ParserStateTypeOk, + ParserStateTypeCapture, }; struct ParserStateError { Buf *msg; }; +struct ParserStateCapture { + Buf *body; +}; + struct ParserState { ParserStateType type; // One for each token ID. @@ -203,6 +208,7 @@ struct ParserState { int index; union { ParserStateError error; + ParserStateCapture capture; }; }; @@ -278,6 +284,8 @@ static void gen(Gen *g, RuleNode *node) { RuleNode *child = node->tuple.children.at(i); gen(g, child); } + g->cur_state->type = ParserStateTypeCapture; + g->cur_state->capture.body = &node->tuple.body; } break; case RuleNodeTypeMany: @@ -598,7 +606,8 @@ int main(int argc, char **argv) { g.cur_state = create_state(&g, ParserStateTypeOk); gen(&g, g.root); - fprintf(out_f, "/* This file is auto-generated by parsergen.cpp */\n"); + fprintf(out_f, "/* This file is generated by parsergen.cpp */\n"); + fprintf(out_f, "\n"); fprintf(out_f, "#include \"src/parser.hpp\"\n"); fprintf(out_f, "#include \n"); @@ -616,6 +625,17 @@ int main(int argc, char **argv) { fprintf(out_f, "static_assert(TokenId%s == %d, \"wrong token id\");\n", buf_ptr(&token->name), token->id); } + fprintf(out_f, "\n"); + + /* TODO + fprintf(out_f, "struct ParserGenNode{\n"); + fprintf(out_f, " union {\n"); + fprintf(out_f, " [%d];\n", biggest_tuple_len); + fprintf(out_f, " Token *token;\n"); + fprintf(out_f, " };\n"); + fprintf(out_f, "};\n"); + fprintf(out_f, "\n"); + */ fprintf(out_f, "AstNode * ast_parse(Buf *buf, ZigList *tokens) {\n"); @@ -644,7 +664,6 @@ int main(int argc, char **argv) { for (int i = 0; i < g.transition_table.length; i += 1) { ParserState *state = g.transition_table.at(i); fprintf(out_f, " case %d:\n", i); - fprintf(out_f, " fprintf(stderr, \"state = %%d\\n\", state);\n"); switch (state->type) { case ParserStateTypeError: fprintf(out_f, " ast_error(token, \"%s\");\n", buf_ptr(state->error.msg)); @@ -655,6 +674,10 @@ int main(int argc, char **argv) { state->index, g.transition_table.length); fprintf(out_f, " state = transition[%d][token->id];\n", state->index); break; + case ParserStateTypeCapture: + // TODO fprintf(out_f, " %s\n", buf_ptr(state->capture.body)); + fprintf(out_f, " state = transition[%d][token->id];\n", state->index); + break; } fprintf(out_f, " break;\n"); } diff --git a/src/tokenizer.cpp b/src/tokenizer.cpp index 9a54776cc5..fd3872519c 100644 --- a/src/tokenizer.cpp +++ b/src/tokenizer.cpp @@ -1,3 +1,10 @@ +/* + * Copyright (c) 2015 Andrew Kelley + * + * This file is part of zig, which is MIT licensed. + * See http://opensource.org/licenses/MIT + */ + #include "tokenizer.hpp" #include "util.hpp" diff --git a/src/tokenizer.hpp b/src/tokenizer.hpp index 4904bb0382..5baf9bc231 100644 --- a/src/tokenizer.hpp +++ b/src/tokenizer.hpp @@ -1,3 +1,10 @@ +/* + * Copyright (c) 2015 Andrew Kelley + * + * This file is part of zig, which is MIT licensed. + * See http://opensource.org/licenses/MIT + */ + #ifndef ZIG_TOKENIZER_HPP #define ZIG_TOKENIZER_HPP diff --git a/test/hello.zig b/test/hello.zig index bfcb87ff9c..4a90c871be 100644 --- a/test/hello.zig +++ b/test/hello.zig @@ -1,6 +1,4 @@ - - -fn main(argc: isize, argv: *mut u8) -> isize { +fn main(argc: i32, argv: *mut u8) -> i32 { puts("Hello, world!\n"); return 0; }