mirror of
https://github.com/ziglang/zig.git
synced 2025-12-06 06:13:07 +00:00
tokenizing hello.zig
This commit is contained in:
parent
5f48463bdd
commit
34f8d80eac
31
README.md
31
README.md
@ -1,5 +1,32 @@
|
||||
# zig lang
|
||||
|
||||
C upgrade.
|
||||
An experiment in writing a low-level programming language with the intent to
|
||||
replace C. Zig intends to be a small language, yet powerful enough to write
|
||||
readable, safe, optimal, and concise code to solve any computing problem.
|
||||
|
||||
Start with C.
|
||||
## Goals
|
||||
|
||||
* Ability to run arbitrary code at compile time and generate code.
|
||||
* Completely compatible with C libraries with no wrapper necessary.
|
||||
* Creating a C library should be a primary use case. Should be easy to export
|
||||
an auto-generated .h file.
|
||||
* Generics such as containers.
|
||||
* Do not depend on libc.
|
||||
* First class error code support.
|
||||
* Include documentation generator.
|
||||
* Eliminate the need for make, cmake, etc.
|
||||
* Friendly toward package maintainers.
|
||||
* Eliminate the need for C headers (when using zig internally).
|
||||
* Ability to declare dependencies as Git URLS with commit locking (can
|
||||
provide a tag or sha1).
|
||||
* Rust-style enums.
|
||||
* Opinionated when it makes life easier.
|
||||
- Tab character in source code is a compile error.
|
||||
- Whitespace at the end of line is a compile error.
|
||||
* Resilient to parsing errors to make IDE integration work well.
|
||||
* Source code is UTF-8.
|
||||
|
||||
## Roadmap
|
||||
|
||||
* Hello, world.
|
||||
* How should the Widget use case be solved? In Genesis I'm using C++ and inheritance.
|
||||
|
||||
@ -23,3 +23,24 @@ Buf *buf_sprintf(const char *format, ...) {
|
||||
|
||||
return buf;
|
||||
}
|
||||
|
||||
void buf_appendf(Buf *buf, const char *format, ...) {
|
||||
va_list ap, ap2;
|
||||
va_start(ap, format);
|
||||
va_copy(ap2, ap);
|
||||
|
||||
int len1 = vsnprintf(nullptr, 0, format, ap);
|
||||
assert(len1 >= 0);
|
||||
|
||||
size_t required_size = len1 + 1;
|
||||
|
||||
int orig_len = buf_len(buf);
|
||||
|
||||
buf_resize(buf, orig_len + required_size);
|
||||
|
||||
int len2 = vsnprintf(buf_ptr(buf) + orig_len, required_size, format, ap2);
|
||||
assert(len2 == len1);
|
||||
|
||||
va_end(ap2);
|
||||
va_end(ap);
|
||||
}
|
||||
|
||||
@ -93,6 +93,9 @@ static inline void buf_append_char(Buf *buf, uint8_t c) {
|
||||
buf_append_mem(buf, (const char *)&c, 1);
|
||||
}
|
||||
|
||||
void buf_appendf(Buf *buf, const char *format, ...)
|
||||
__attribute__ ((format (printf, 2, 3)));
|
||||
|
||||
static inline bool buf_eql_mem(Buf *buf, const char *mem, int mem_len) {
|
||||
if (buf_len(buf) != mem_len)
|
||||
return false;
|
||||
|
||||
293
src/main.cpp
293
src/main.cpp
@ -129,7 +129,6 @@ static Buf *fetch_file(FILE *f) {
|
||||
|
||||
|
||||
enum TokenId {
|
||||
TokenIdDirective,
|
||||
TokenIdSymbol,
|
||||
TokenIdLParen,
|
||||
TokenIdRParen,
|
||||
@ -141,6 +140,9 @@ enum TokenId {
|
||||
TokenIdSemicolon,
|
||||
TokenIdNumberLiteral,
|
||||
TokenIdPlus,
|
||||
TokenIdColon,
|
||||
TokenIdArrow,
|
||||
TokenIdDash,
|
||||
};
|
||||
|
||||
struct Token {
|
||||
@ -153,14 +155,10 @@ struct Token {
|
||||
|
||||
enum TokenizeState {
|
||||
TokenizeStateStart,
|
||||
TokenizeStateDirective,
|
||||
TokenizeStateDirectiveName,
|
||||
TokenizeStateIncludeQuote,
|
||||
TokenizeStateDirectiveEnd,
|
||||
TokenizeStateInclude,
|
||||
TokenizeStateSymbol,
|
||||
TokenizeStateString,
|
||||
TokenizeStateNumber,
|
||||
TokenizeStateString,
|
||||
TokenizeStateSawDash,
|
||||
};
|
||||
|
||||
struct Tokenize {
|
||||
@ -171,11 +169,7 @@ struct Tokenize {
|
||||
int line;
|
||||
int column;
|
||||
Token *cur_tok;
|
||||
Buf *directive_name;
|
||||
Buf *cur_dir_path;
|
||||
uint8_t unquote_char;
|
||||
int quote_start_pos;
|
||||
Buf *include_path;
|
||||
ZigList<char *> *include_paths;
|
||||
};
|
||||
|
||||
@ -217,68 +211,6 @@ static void end_token(Tokenize *t) {
|
||||
t->cur_tok = nullptr;
|
||||
}
|
||||
|
||||
static void put_back(Tokenize *t, int count) {
|
||||
t->pos -= count;
|
||||
}
|
||||
|
||||
static void begin_directive(Tokenize *t) {
|
||||
t->state = TokenizeStateDirective;
|
||||
begin_token(t, TokenIdDirective);
|
||||
assert(!t->directive_name);
|
||||
t->directive_name = buf_alloc();
|
||||
}
|
||||
|
||||
static bool find_and_include_file(Tokenize *t, char *dir_path, char *file_path) {
|
||||
Buf *full_path = buf_sprintf("%s/%s", dir_path, file_path);
|
||||
|
||||
FILE *f = fopen(buf_ptr(full_path), "rb");
|
||||
if (!f)
|
||||
return false;
|
||||
|
||||
Buf *contents = fetch_file(f);
|
||||
|
||||
buf_splice_buf(t->buf, t->pos, t->pos, contents);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static void render_include(Tokenize *t, Buf *target_path, char unquote_char) {
|
||||
if (unquote_char == '"') {
|
||||
if (find_and_include_file(t, buf_ptr(t->cur_dir_path), buf_ptr(target_path)))
|
||||
return;
|
||||
}
|
||||
for (int i = 0; i < t->include_paths->length; i += 1) {
|
||||
char *include_path = t->include_paths->at(i);
|
||||
if (find_and_include_file(t, include_path, buf_ptr(target_path)))
|
||||
return;
|
||||
}
|
||||
tokenize_error(t, "include path \"%s\" not found", buf_ptr(target_path));
|
||||
}
|
||||
|
||||
static void end_directive(Tokenize *t) {
|
||||
end_token(t);
|
||||
if (t->include_path) {
|
||||
render_include(t, t->include_path, t->unquote_char);
|
||||
t->include_path = nullptr;
|
||||
}
|
||||
t->state = TokenizeStateStart;
|
||||
}
|
||||
|
||||
static void end_directive_name(Tokenize *t) {
|
||||
if (buf_eql_str(t->directive_name, "include")) {
|
||||
t->state = TokenizeStateInclude;
|
||||
t->directive_name = nullptr;
|
||||
} else {
|
||||
tokenize_error(t, "invalid directive name: \"%s\"", buf_ptr(t->directive_name));
|
||||
}
|
||||
}
|
||||
|
||||
static void end_symbol(Tokenize *t) {
|
||||
put_back(t, 1);
|
||||
end_token(t);
|
||||
t->state = TokenizeStateStart;
|
||||
}
|
||||
|
||||
static ZigList<Token> *tokenize(Buf *buf, ZigList<char *> *include_paths, Buf *cur_dir_path) {
|
||||
Tokenize t = {0};
|
||||
t.tokens = allocate<ZigList<Token>>(1);
|
||||
@ -300,9 +232,6 @@ static ZigList<Token> *tokenize(Buf *buf, ZigList<char *> *include_paths, Buf *c
|
||||
t.state = TokenizeStateNumber;
|
||||
begin_token(&t, TokenIdNumberLiteral);
|
||||
break;
|
||||
case '#':
|
||||
begin_directive(&t);
|
||||
break;
|
||||
case '"':
|
||||
begin_token(&t, TokenIdStringLiteral);
|
||||
t.state = TokenizeStateString;
|
||||
@ -335,88 +264,31 @@ static ZigList<Token> *tokenize(Buf *buf, ZigList<char *> *include_paths, Buf *c
|
||||
begin_token(&t, TokenIdSemicolon);
|
||||
end_token(&t);
|
||||
break;
|
||||
case ':':
|
||||
begin_token(&t, TokenIdColon);
|
||||
end_token(&t);
|
||||
break;
|
||||
case '+':
|
||||
begin_token(&t, TokenIdPlus);
|
||||
end_token(&t);
|
||||
break;
|
||||
case '-':
|
||||
begin_token(&t, TokenIdDash);
|
||||
t.state = TokenizeStateSawDash;
|
||||
break;
|
||||
default:
|
||||
tokenize_error(&t, "invalid character: '%c'", c);
|
||||
}
|
||||
break;
|
||||
case TokenizeStateDirective:
|
||||
switch (c) {
|
||||
case '\n':
|
||||
end_directive_name(&t);
|
||||
end_directive(&t);
|
||||
break;
|
||||
case ' ':
|
||||
case '\t':
|
||||
case '\f':
|
||||
case '\r':
|
||||
case 0xb:
|
||||
break;
|
||||
case SYMBOL_CHAR:
|
||||
t.state = TokenizeStateDirectiveName;
|
||||
buf_append_char(t.directive_name, c);
|
||||
break;
|
||||
default:
|
||||
tokenize_error(&t, "invalid directive character: '%c'", c);
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case TokenizeStateDirectiveName:
|
||||
switch (c) {
|
||||
case WHITESPACE:
|
||||
end_directive_name(&t);
|
||||
break;
|
||||
case SYMBOL_CHAR:
|
||||
buf_append_char(t.directive_name, c);
|
||||
break;
|
||||
default:
|
||||
tokenize_error(&t, "invalid directive name character: '%c'", c);
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case TokenizeStateInclude:
|
||||
switch (c) {
|
||||
case WHITESPACE:
|
||||
break;
|
||||
case '<':
|
||||
case '"':
|
||||
t.state = TokenizeStateIncludeQuote;
|
||||
t.quote_start_pos = t.pos;
|
||||
t.unquote_char = (c == '<') ? '>' : '"';
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case TokenizeStateIncludeQuote:
|
||||
if (c == t.unquote_char) {
|
||||
t.include_path = buf_slice(t.buf, t.quote_start_pos + 1, t.pos);
|
||||
t.state = TokenizeStateDirectiveEnd;
|
||||
}
|
||||
break;
|
||||
case TokenizeStateDirectiveEnd:
|
||||
switch (c) {
|
||||
case '\n':
|
||||
end_directive(&t);
|
||||
break;
|
||||
case ' ':
|
||||
case '\t':
|
||||
case '\f':
|
||||
case '\r':
|
||||
case 0xb:
|
||||
break;
|
||||
default:
|
||||
tokenize_error(&t, "expected whitespace or newline: '%c'", c);
|
||||
}
|
||||
break;
|
||||
case TokenizeStateSymbol:
|
||||
switch (c) {
|
||||
case SYMBOL_CHAR:
|
||||
break;
|
||||
default:
|
||||
end_symbol(&t);
|
||||
break;
|
||||
t.pos -= 1;
|
||||
end_token(&t);
|
||||
t.state = TokenizeStateStart;
|
||||
continue;
|
||||
}
|
||||
break;
|
||||
case TokenizeStateString:
|
||||
@ -434,7 +306,22 @@ static ZigList<Token> *tokenize(Buf *buf, ZigList<char *> *include_paths, Buf *c
|
||||
case DIGIT:
|
||||
break;
|
||||
default:
|
||||
end_symbol(&t);
|
||||
t.pos -= 1;
|
||||
end_token(&t);
|
||||
t.state = TokenizeStateStart;
|
||||
continue;
|
||||
}
|
||||
break;
|
||||
case TokenizeStateSawDash:
|
||||
switch (c) {
|
||||
case '>':
|
||||
t.cur_tok->id = TokenIdArrow;
|
||||
end_token(&t);
|
||||
t.state = TokenizeStateStart;
|
||||
break;
|
||||
default:
|
||||
end_token(&t);
|
||||
t.state = TokenizeStateStart;
|
||||
break;
|
||||
}
|
||||
break;
|
||||
@ -450,30 +337,17 @@ static ZigList<Token> *tokenize(Buf *buf, ZigList<char *> *include_paths, Buf *c
|
||||
switch (t.state) {
|
||||
case TokenizeStateStart:
|
||||
break;
|
||||
case TokenizeStateDirective:
|
||||
end_directive(&t);
|
||||
break;
|
||||
case TokenizeStateDirectiveName:
|
||||
end_directive_name(&t);
|
||||
end_directive(&t);
|
||||
break;
|
||||
case TokenizeStateInclude:
|
||||
tokenize_error(&t, "missing include path");
|
||||
break;
|
||||
case TokenizeStateSymbol:
|
||||
end_symbol(&t);
|
||||
end_token(&t);
|
||||
break;
|
||||
case TokenizeStateString:
|
||||
tokenize_error(&t, "unterminated string");
|
||||
break;
|
||||
case TokenizeStateNumber:
|
||||
end_symbol(&t);
|
||||
end_token(&t);
|
||||
break;
|
||||
case TokenizeStateIncludeQuote:
|
||||
tokenize_error(&t, "unterminated include path");
|
||||
break;
|
||||
case TokenizeStateDirectiveEnd:
|
||||
end_directive(&t);
|
||||
case TokenizeStateSawDash:
|
||||
end_token(&t);
|
||||
break;
|
||||
}
|
||||
assert(!t.cur_tok);
|
||||
@ -482,7 +356,6 @@ static ZigList<Token> *tokenize(Buf *buf, ZigList<char *> *include_paths, Buf *c
|
||||
|
||||
static const char * token_name(Token *token) {
|
||||
switch (token->id) {
|
||||
case TokenIdDirective: return "Directive";
|
||||
case TokenIdSymbol: return "Symbol";
|
||||
case TokenIdLParen: return "LParen";
|
||||
case TokenIdRParen: return "RParen";
|
||||
@ -494,6 +367,9 @@ static const char * token_name(Token *token) {
|
||||
case TokenIdSemicolon: return "Semicolon";
|
||||
case TokenIdNumberLiteral: return "NumberLiteral";
|
||||
case TokenIdPlus: return "Plus";
|
||||
case TokenIdColon: return "Colon";
|
||||
case TokenIdArrow: return "Arrow";
|
||||
case TokenIdDash: return "Dash";
|
||||
}
|
||||
return "(invalid token)";
|
||||
}
|
||||
@ -507,6 +383,83 @@ static void print_tokens(Buf *buf, ZigList<Token> *tokens) {
|
||||
}
|
||||
}
|
||||
|
||||
enum NodeType {
|
||||
NodeTypeRoot,
|
||||
};
|
||||
|
||||
struct AstNode {
|
||||
enum NodeType type;
|
||||
ZigList<AstNode *> children;
|
||||
};
|
||||
|
||||
enum AstState {
|
||||
AstStateStart,
|
||||
};
|
||||
|
||||
struct BuildAst {
|
||||
Buf *buf;
|
||||
AstNode *root;
|
||||
AstState state;
|
||||
int line;
|
||||
int column;
|
||||
};
|
||||
|
||||
__attribute__ ((format (printf, 2, 3)))
|
||||
static void ast_error(BuildAst *b, const char *format, ...) {
|
||||
int line = b->line + 1;
|
||||
int column = b->column + 1;
|
||||
|
||||
va_list ap;
|
||||
va_start(ap, format);
|
||||
fprintf(stderr, "Error: Line %d, column %d: ", line, column);
|
||||
vfprintf(stderr, format, ap);
|
||||
fprintf(stderr, "\n");
|
||||
va_end(ap);
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
static inline bool mem_eql_str(const char *mem, size_t mem_len, const char *str) {
|
||||
size_t str_len = strlen(str);
|
||||
if (str_len != mem_len)
|
||||
return false;
|
||||
return memcmp(mem, str, mem_len) == 0;
|
||||
}
|
||||
|
||||
|
||||
static AstNode *build_ast(Buf *buf, ZigList<Token> *tokens) {
|
||||
BuildAst b = {0};
|
||||
b.buf = buf;
|
||||
b.root = allocate<AstNode>(1);
|
||||
b.root->type = NodeTypeRoot;
|
||||
|
||||
for (int i = 0; i < tokens->length; i += 1) {
|
||||
Token *token = &tokens->at(i);
|
||||
const char *token_str = buf_ptr(buf) + token->start_pos;
|
||||
int token_len = token->end_pos - token->start_pos;
|
||||
b.line = token->start_line;
|
||||
b.column = token->start_column;
|
||||
switch (b.state) {
|
||||
case AstStateStart:
|
||||
if (mem_eql_str(token_str, token_len, "fn")) {
|
||||
zig_panic("TODO fn");
|
||||
} else {
|
||||
Buf msg = {0};
|
||||
buf_append_str(&msg, "unexpected symbol: '");
|
||||
buf_append_mem(&msg, token_str, token_len);
|
||||
buf_append_str(&msg, "'");
|
||||
ast_error(&b, "%s", buf_ptr(&msg));
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return b.root;
|
||||
}
|
||||
|
||||
static void print_ast(AstNode *node) {
|
||||
zig_panic("TODO");
|
||||
}
|
||||
|
||||
char cur_dir[1024];
|
||||
|
||||
int main(int argc, char **argv) {
|
||||
@ -559,18 +512,18 @@ int main(int argc, char **argv) {
|
||||
|
||||
Buf *in_data = fetch_file(in_f);
|
||||
|
||||
fprintf(stderr, "Original source:\n%s\n", buf_ptr(in_data));
|
||||
fprintf(stderr, "Original source:\n");
|
||||
fprintf(stderr, "----------------\n");
|
||||
fprintf(stderr, "%s\n", buf_ptr(in_data));
|
||||
|
||||
ZigList<Token> *tokens = tokenize(in_data, &include_paths, cur_dir_path);
|
||||
|
||||
fprintf(stderr, "\nTokens:\n");
|
||||
fprintf(stderr, "---------\n");
|
||||
print_tokens(in_data, tokens);
|
||||
|
||||
/*
|
||||
Buf *preprocessed_source = preprocess(in_data, tokens, &include_paths, cur_dir_path);
|
||||
|
||||
fprintf(stderr, "\nPreprocessed source:\n%s\n", buf_ptr(preprocessed_source));
|
||||
*/
|
||||
AstNode *root = build_ast(in_data, tokens);
|
||||
print_ast(root);
|
||||
|
||||
|
||||
return EXIT_SUCCESS;
|
||||
|
||||
@ -1,3 +1,3 @@
|
||||
int add(int a, int b) {
|
||||
return a + b;
|
||||
pub fn add(a: int, b: int) -> int {
|
||||
a + b
|
||||
}
|
||||
|
||||
@ -1,6 +1,6 @@
|
||||
#include <stdio.h>
|
||||
#include "add.h"
|
||||
|
||||
int main(int argc, char **argv) {
|
||||
fprintf(stderr, "hello: %d", add(1, 2));
|
||||
|
||||
fn main(argc: int, argv: *mut char) -> int {
|
||||
puts("Hello, world!\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user