mirror of
https://github.com/ziglang/zig.git
synced 2025-12-24 15:13:08 +00:00
progress toward more complex parser gen
This commit is contained in:
parent
c36cd9d313
commit
174baa49bd
@ -21,14 +21,15 @@ include_directories(
|
|||||||
)
|
)
|
||||||
|
|
||||||
set(GRAMMAR_TXT "${CMAKE_BINARY_DIR}/simple.txt")
|
set(GRAMMAR_TXT "${CMAKE_BINARY_DIR}/simple.txt")
|
||||||
set(PARSER_CPP "${CMAKE_BINARY_DIR}/parser.cpp")
|
set(PARSER_GENERATED_CPP "${CMAKE_BINARY_DIR}/parser_generated.cpp")
|
||||||
|
|
||||||
set(ZIG_SOURCES
|
set(ZIG_SOURCES
|
||||||
"${CMAKE_SOURCE_DIR}/src/main.cpp"
|
"${CMAKE_SOURCE_DIR}/src/main.cpp"
|
||||||
"${CMAKE_SOURCE_DIR}/src/util.cpp"
|
"${CMAKE_SOURCE_DIR}/src/util.cpp"
|
||||||
"${CMAKE_SOURCE_DIR}/src/buffer.cpp"
|
"${CMAKE_SOURCE_DIR}/src/buffer.cpp"
|
||||||
"${CMAKE_SOURCE_DIR}/src/tokenizer.cpp"
|
"${CMAKE_SOURCE_DIR}/src/tokenizer.cpp"
|
||||||
${PARSER_CPP}
|
"${CMAKE_SOURCE_DIR}/src/parser.cpp"
|
||||||
|
${PARSER_GENERATED_CPP}
|
||||||
)
|
)
|
||||||
|
|
||||||
set(PARSERGEN_SOURCES
|
set(PARSERGEN_SOURCES
|
||||||
@ -68,8 +69,8 @@ set_target_properties(parsergen PROPERTIES
|
|||||||
|
|
||||||
|
|
||||||
add_custom_command(
|
add_custom_command(
|
||||||
OUTPUT ${PARSER_CPP}
|
OUTPUT ${PARSER_GENERATED_CPP}
|
||||||
COMMAND parsergen ARGS ${GRAMMAR_TXT} ${PARSER_CPP}
|
COMMAND parsergen ARGS ${GRAMMAR_TXT} ${PARSER_GENERATED_CPP}
|
||||||
DEPENDS ${GRAMMAR_TXT} ${PARSERGEN_SOURCES}
|
DEPENDS ${GRAMMAR_TXT} ${PARSERGEN_SOURCES}
|
||||||
WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}
|
WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}
|
||||||
)
|
)
|
||||||
|
|||||||
36
README.md
36
README.md
@ -19,7 +19,7 @@ readable, safe, optimal, and concise code to solve any computing problem.
|
|||||||
* Eliminate the need for C headers (when using zig internally).
|
* Eliminate the need for C headers (when using zig internally).
|
||||||
* Ability to declare dependencies as Git URLS with commit locking (can
|
* Ability to declare dependencies as Git URLS with commit locking (can
|
||||||
provide a tag or sha1).
|
provide a tag or sha1).
|
||||||
* Rust-style enums.
|
* Tagged union enum type.
|
||||||
* Opinionated when it makes life easier.
|
* Opinionated when it makes life easier.
|
||||||
- Tab character in source code is a compile error.
|
- Tab character in source code is a compile error.
|
||||||
- Whitespace at the end of line is a compile error.
|
- Whitespace at the end of line is a compile error.
|
||||||
@ -32,23 +32,29 @@ readable, safe, optimal, and concise code to solve any computing problem.
|
|||||||
* Hello, world.
|
* Hello, world.
|
||||||
- Build AST
|
- Build AST
|
||||||
- Code Gen
|
- Code Gen
|
||||||
|
- Produce .o file.
|
||||||
|
* Produce executable file instead of .o file.
|
||||||
|
* Add debugging symbols.
|
||||||
|
* Debug/Release mode.
|
||||||
* C style comments.
|
* C style comments.
|
||||||
* Unit tests.
|
* Unit tests.
|
||||||
* Simple .so library
|
* Simple .so library
|
||||||
* How should the Widget use case be solved? In Genesis I'm using C++ and inheritance.
|
* How should the Widget use case be solved? In Genesis I'm using C++ and inheritance.
|
||||||
|
|
||||||
## Grammar
|
### Primitive Numeric Types:
|
||||||
|
|
||||||
```
|
zig | C equivalent | Description
|
||||||
Root : FnDecl*
|
-------|--------------|-------------------------------
|
||||||
FnDecl : TokenFn TokenSymbol TokenLParen list(ParamDecl, TokenComma, 0) TokenRParen (TokenArrow Type)? Block
|
i8 | int8_t | signed 8-bit integer
|
||||||
ParamDecl : TokenSymbol TokenColon Type
|
u8 | uint8_t | unsigned 8-bit integer
|
||||||
Type : TokenSymbol | PointerType
|
i16 | int16_t | signed 16-bit integer
|
||||||
PointerType : TokenStar (TokenConst | TokenMut) Type
|
u16 | uint16_t | unsigned 16-bit integer
|
||||||
Block : TokenLBrace Statement* Expression? TokenRBrace
|
i32 | int32_t | signed 32-bit integer
|
||||||
Statement : ExpressionStatement | ReturnStatement
|
u32 | uint32_t | unsigned 32-bit integer
|
||||||
ExpressionStatement : Expression TokenSemicolon
|
i64 | int64_t | signed 64-bit integer
|
||||||
ReturnStatement : TokenReturn Expression TokenSemicolon
|
u64 | uint64_t | unsigned 64-bit integer
|
||||||
Expression : TokenNumber | TokenString | FnCall
|
f32 | float | 32-bit IEE754 floating point
|
||||||
FnCall : TokenSymbol TokenLParen list(Expression, TokenComma, 0) TokenRParen
|
f64 | double | 64-bit IEE754 floating point
|
||||||
```
|
f128 | long double | 128-bit IEE754 floating point
|
||||||
|
isize | ssize_t | signed pointer sized integer
|
||||||
|
usize | size_t | unsigned pointer sized integer
|
||||||
|
|||||||
77
src/main.cpp
77
src/main.cpp
@ -15,7 +15,6 @@
|
|||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
#include <stdarg.h>
|
|
||||||
#include <limits.h>
|
#include <limits.h>
|
||||||
#include <stdint.h>
|
#include <stdint.h>
|
||||||
#include <errno.h>
|
#include <errno.h>
|
||||||
@ -50,82 +49,6 @@ static Buf *fetch_file(FILE *f) {
|
|||||||
return buf;
|
return buf;
|
||||||
}
|
}
|
||||||
|
|
||||||
void ast_error(Token *token, const char *format, ...) {
|
|
||||||
int line = token->start_line + 1;
|
|
||||||
int column = token->start_column + 1;
|
|
||||||
|
|
||||||
va_list ap;
|
|
||||||
va_start(ap, format);
|
|
||||||
fprintf(stderr, "Error: Line %d, column %d: ", line, column);
|
|
||||||
vfprintf(stderr, format, ap);
|
|
||||||
fprintf(stderr, "\n");
|
|
||||||
va_end(ap);
|
|
||||||
exit(EXIT_FAILURE);
|
|
||||||
}
|
|
||||||
|
|
||||||
static const char *node_type_str(NodeType node_type) {
|
|
||||||
switch (node_type) {
|
|
||||||
case NodeTypeRoot:
|
|
||||||
return "Root";
|
|
||||||
case NodeTypeFnDecl:
|
|
||||||
return "FnDecl";
|
|
||||||
case NodeTypeParamDecl:
|
|
||||||
return "ParamDecl";
|
|
||||||
case NodeTypeType:
|
|
||||||
return "Type";
|
|
||||||
case NodeTypePointerType:
|
|
||||||
return "PointerType";
|
|
||||||
case NodeTypeBlock:
|
|
||||||
return "Block";
|
|
||||||
case NodeTypeStatement:
|
|
||||||
return "Statement";
|
|
||||||
case NodeTypeExpressionStatement:
|
|
||||||
return "ExpressionStatement";
|
|
||||||
case NodeTypeReturnStatement:
|
|
||||||
return "ReturnStatement";
|
|
||||||
case NodeTypeExpression:
|
|
||||||
return "Expression";
|
|
||||||
case NodeTypeFnCall:
|
|
||||||
return "FnCall";
|
|
||||||
}
|
|
||||||
zig_panic("unreachable");
|
|
||||||
}
|
|
||||||
|
|
||||||
static void ast_print(AstNode *node, int indent) {
|
|
||||||
for (int i = 0; i < indent; i += 1) {
|
|
||||||
fprintf(stderr, " ");
|
|
||||||
}
|
|
||||||
|
|
||||||
switch (node->type) {
|
|
||||||
case NodeTypeRoot:
|
|
||||||
fprintf(stderr, "%s\n", node_type_str(node->type));
|
|
||||||
for (int i = 0; i < node->data.root.fn_decls.length; i += 1) {
|
|
||||||
AstNode *child = node->data.root.fn_decls.at(i);
|
|
||||||
ast_print(child, indent + 2);
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
case NodeTypeFnDecl:
|
|
||||||
{
|
|
||||||
Buf *name_buf = &node->data.fn_decl.name;
|
|
||||||
fprintf(stderr, "%s '%s'\n", node_type_str(node->type), buf_ptr(name_buf));
|
|
||||||
|
|
||||||
for (int i = 0; i < node->data.fn_decl.params.length; i += 1) {
|
|
||||||
AstNode *child = node->data.fn_decl.params.at(i);
|
|
||||||
ast_print(child, indent + 2);
|
|
||||||
}
|
|
||||||
|
|
||||||
ast_print(node->data.fn_decl.return_type, indent + 2);
|
|
||||||
|
|
||||||
ast_print(node->data.fn_decl.body, indent + 2);
|
|
||||||
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
default:
|
|
||||||
fprintf(stderr, "%s\n", node_type_str(node->type));
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
char cur_dir[1024];
|
char cur_dir[1024];
|
||||||
|
|
||||||
int main(int argc, char **argv) {
|
int main(int argc, char **argv) {
|
||||||
|
|||||||
80
src/parser.cpp
Normal file
80
src/parser.cpp
Normal file
@ -0,0 +1,80 @@
|
|||||||
|
#include "parser.hpp"
|
||||||
|
|
||||||
|
#include <stdarg.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
|
||||||
|
void ast_error(Token *token, const char *format, ...) {
|
||||||
|
int line = token->start_line + 1;
|
||||||
|
int column = token->start_column + 1;
|
||||||
|
|
||||||
|
va_list ap;
|
||||||
|
va_start(ap, format);
|
||||||
|
fprintf(stderr, "Error: Line %d, column %d: ", line, column);
|
||||||
|
vfprintf(stderr, format, ap);
|
||||||
|
fprintf(stderr, "\n");
|
||||||
|
va_end(ap);
|
||||||
|
exit(EXIT_FAILURE);
|
||||||
|
}
|
||||||
|
|
||||||
|
const char *node_type_str(NodeType node_type) {
|
||||||
|
switch (node_type) {
|
||||||
|
case NodeTypeRoot:
|
||||||
|
return "Root";
|
||||||
|
case NodeTypeFnDecl:
|
||||||
|
return "FnDecl";
|
||||||
|
case NodeTypeParamDecl:
|
||||||
|
return "ParamDecl";
|
||||||
|
case NodeTypeType:
|
||||||
|
return "Type";
|
||||||
|
case NodeTypePointerType:
|
||||||
|
return "PointerType";
|
||||||
|
case NodeTypeBlock:
|
||||||
|
return "Block";
|
||||||
|
case NodeTypeStatement:
|
||||||
|
return "Statement";
|
||||||
|
case NodeTypeExpressionStatement:
|
||||||
|
return "ExpressionStatement";
|
||||||
|
case NodeTypeReturnStatement:
|
||||||
|
return "ReturnStatement";
|
||||||
|
case NodeTypeExpression:
|
||||||
|
return "Expression";
|
||||||
|
case NodeTypeFnCall:
|
||||||
|
return "FnCall";
|
||||||
|
}
|
||||||
|
zig_panic("unreachable");
|
||||||
|
}
|
||||||
|
|
||||||
|
void ast_print(AstNode *node, int indent) {
|
||||||
|
for (int i = 0; i < indent; i += 1) {
|
||||||
|
fprintf(stderr, " ");
|
||||||
|
}
|
||||||
|
|
||||||
|
switch (node->type) {
|
||||||
|
case NodeTypeRoot:
|
||||||
|
fprintf(stderr, "%s\n", node_type_str(node->type));
|
||||||
|
for (int i = 0; i < node->data.root.fn_decls.length; i += 1) {
|
||||||
|
AstNode *child = node->data.root.fn_decls.at(i);
|
||||||
|
ast_print(child, indent + 2);
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case NodeTypeFnDecl:
|
||||||
|
{
|
||||||
|
Buf *name_buf = &node->data.fn_decl.name;
|
||||||
|
fprintf(stderr, "%s '%s'\n", node_type_str(node->type), buf_ptr(name_buf));
|
||||||
|
|
||||||
|
for (int i = 0; i < node->data.fn_decl.params.length; i += 1) {
|
||||||
|
AstNode *child = node->data.fn_decl.params.at(i);
|
||||||
|
ast_print(child, indent + 2);
|
||||||
|
}
|
||||||
|
|
||||||
|
ast_print(node->data.fn_decl.return_type, indent + 2);
|
||||||
|
|
||||||
|
ast_print(node->data.fn_decl.body, indent + 2);
|
||||||
|
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
default:
|
||||||
|
fprintf(stderr, "%s\n", node_type_str(node->type));
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
@ -82,6 +82,11 @@ struct AstNode {
|
|||||||
__attribute__ ((format (printf, 2, 3)))
|
__attribute__ ((format (printf, 2, 3)))
|
||||||
void ast_error(Token *token, const char *format, ...);
|
void ast_error(Token *token, const char *format, ...);
|
||||||
|
|
||||||
|
// This function is provided by generated code, generated by parsergen.cpp
|
||||||
AstNode * ast_parse(Buf *buf, ZigList<Token> *tokens);
|
AstNode * ast_parse(Buf *buf, ZigList<Token> *tokens);
|
||||||
|
|
||||||
|
const char *node_type_str(NodeType node_type);
|
||||||
|
|
||||||
|
void ast_print(AstNode *node, int indent);
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|||||||
@ -190,12 +190,17 @@ struct RuleNode {
|
|||||||
enum ParserStateType {
|
enum ParserStateType {
|
||||||
ParserStateTypeError,
|
ParserStateTypeError,
|
||||||
ParserStateTypeOk,
|
ParserStateTypeOk,
|
||||||
|
ParserStateTypeCapture,
|
||||||
};
|
};
|
||||||
|
|
||||||
struct ParserStateError {
|
struct ParserStateError {
|
||||||
Buf *msg;
|
Buf *msg;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
struct ParserStateCapture {
|
||||||
|
Buf *body;
|
||||||
|
};
|
||||||
|
|
||||||
struct ParserState {
|
struct ParserState {
|
||||||
ParserStateType type;
|
ParserStateType type;
|
||||||
// One for each token ID.
|
// One for each token ID.
|
||||||
@ -203,6 +208,7 @@ struct ParserState {
|
|||||||
int index;
|
int index;
|
||||||
union {
|
union {
|
||||||
ParserStateError error;
|
ParserStateError error;
|
||||||
|
ParserStateCapture capture;
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -278,6 +284,8 @@ static void gen(Gen *g, RuleNode *node) {
|
|||||||
RuleNode *child = node->tuple.children.at(i);
|
RuleNode *child = node->tuple.children.at(i);
|
||||||
gen(g, child);
|
gen(g, child);
|
||||||
}
|
}
|
||||||
|
g->cur_state->type = ParserStateTypeCapture;
|
||||||
|
g->cur_state->capture.body = &node->tuple.body;
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
case RuleNodeTypeMany:
|
case RuleNodeTypeMany:
|
||||||
@ -598,7 +606,8 @@ int main(int argc, char **argv) {
|
|||||||
g.cur_state = create_state(&g, ParserStateTypeOk);
|
g.cur_state = create_state(&g, ParserStateTypeOk);
|
||||||
gen(&g, g.root);
|
gen(&g, g.root);
|
||||||
|
|
||||||
fprintf(out_f, "/* This file is auto-generated by parsergen.cpp */\n");
|
fprintf(out_f, "/* This file is generated by parsergen.cpp */\n");
|
||||||
|
fprintf(out_f, "\n");
|
||||||
fprintf(out_f, "#include \"src/parser.hpp\"\n");
|
fprintf(out_f, "#include \"src/parser.hpp\"\n");
|
||||||
fprintf(out_f, "#include <stdio.h>\n");
|
fprintf(out_f, "#include <stdio.h>\n");
|
||||||
|
|
||||||
@ -616,6 +625,17 @@ int main(int argc, char **argv) {
|
|||||||
fprintf(out_f, "static_assert(TokenId%s == %d, \"wrong token id\");\n",
|
fprintf(out_f, "static_assert(TokenId%s == %d, \"wrong token id\");\n",
|
||||||
buf_ptr(&token->name), token->id);
|
buf_ptr(&token->name), token->id);
|
||||||
}
|
}
|
||||||
|
fprintf(out_f, "\n");
|
||||||
|
|
||||||
|
/* TODO
|
||||||
|
fprintf(out_f, "struct ParserGenNode{\n");
|
||||||
|
fprintf(out_f, " union {\n");
|
||||||
|
fprintf(out_f, " [%d];\n", biggest_tuple_len);
|
||||||
|
fprintf(out_f, " Token *token;\n");
|
||||||
|
fprintf(out_f, " };\n");
|
||||||
|
fprintf(out_f, "};\n");
|
||||||
|
fprintf(out_f, "\n");
|
||||||
|
*/
|
||||||
|
|
||||||
fprintf(out_f, "AstNode * ast_parse(Buf *buf, ZigList<Token> *tokens) {\n");
|
fprintf(out_f, "AstNode * ast_parse(Buf *buf, ZigList<Token> *tokens) {\n");
|
||||||
|
|
||||||
@ -644,7 +664,6 @@ int main(int argc, char **argv) {
|
|||||||
for (int i = 0; i < g.transition_table.length; i += 1) {
|
for (int i = 0; i < g.transition_table.length; i += 1) {
|
||||||
ParserState *state = g.transition_table.at(i);
|
ParserState *state = g.transition_table.at(i);
|
||||||
fprintf(out_f, " case %d:\n", i);
|
fprintf(out_f, " case %d:\n", i);
|
||||||
fprintf(out_f, " fprintf(stderr, \"state = %%d\\n\", state);\n");
|
|
||||||
switch (state->type) {
|
switch (state->type) {
|
||||||
case ParserStateTypeError:
|
case ParserStateTypeError:
|
||||||
fprintf(out_f, " ast_error(token, \"%s\");\n", buf_ptr(state->error.msg));
|
fprintf(out_f, " ast_error(token, \"%s\");\n", buf_ptr(state->error.msg));
|
||||||
@ -655,6 +674,10 @@ int main(int argc, char **argv) {
|
|||||||
state->index, g.transition_table.length);
|
state->index, g.transition_table.length);
|
||||||
fprintf(out_f, " state = transition[%d][token->id];\n", state->index);
|
fprintf(out_f, " state = transition[%d][token->id];\n", state->index);
|
||||||
break;
|
break;
|
||||||
|
case ParserStateTypeCapture:
|
||||||
|
// TODO fprintf(out_f, " %s\n", buf_ptr(state->capture.body));
|
||||||
|
fprintf(out_f, " state = transition[%d][token->id];\n", state->index);
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
fprintf(out_f, " break;\n");
|
fprintf(out_f, " break;\n");
|
||||||
}
|
}
|
||||||
|
|||||||
@ -1,3 +1,10 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (c) 2015 Andrew Kelley
|
||||||
|
*
|
||||||
|
* This file is part of zig, which is MIT licensed.
|
||||||
|
* See http://opensource.org/licenses/MIT
|
||||||
|
*/
|
||||||
|
|
||||||
#include "tokenizer.hpp"
|
#include "tokenizer.hpp"
|
||||||
#include "util.hpp"
|
#include "util.hpp"
|
||||||
|
|
||||||
|
|||||||
@ -1,3 +1,10 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (c) 2015 Andrew Kelley
|
||||||
|
*
|
||||||
|
* This file is part of zig, which is MIT licensed.
|
||||||
|
* See http://opensource.org/licenses/MIT
|
||||||
|
*/
|
||||||
|
|
||||||
#ifndef ZIG_TOKENIZER_HPP
|
#ifndef ZIG_TOKENIZER_HPP
|
||||||
#define ZIG_TOKENIZER_HPP
|
#define ZIG_TOKENIZER_HPP
|
||||||
|
|
||||||
|
|||||||
@ -1,6 +1,4 @@
|
|||||||
|
fn main(argc: i32, argv: *mut u8) -> i32 {
|
||||||
|
|
||||||
fn main(argc: isize, argv: *mut u8) -> isize {
|
|
||||||
puts("Hello, world!\n");
|
puts("Hello, world!\n");
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user