From e71521335ac656f94892ee049cc4814ecb772b30 Mon Sep 17 00:00:00 2001 From: Andrew Kelley Date: Wed, 5 Aug 2015 20:26:58 -0700 Subject: [PATCH] preprocessor detects #include directives --- src/buffer.hpp | 82 ++++++++++++++++++ src/list.hpp | 2 +- src/main.cpp | 224 +++++++++++++++++++++++++++++++++++++++---------- 3 files changed, 264 insertions(+), 44 deletions(-) create mode 100644 src/buffer.hpp diff --git a/src/buffer.hpp b/src/buffer.hpp new file mode 100644 index 0000000000..f463ab776d --- /dev/null +++ b/src/buffer.hpp @@ -0,0 +1,82 @@ +/* + * Copyright (c) 2015 Andrew Kelley + * + * This file is part of zig, which is MIT licensed. + * See http://opensource.org/licenses/MIT + */ + +#ifndef ZIG_BUFFER_HPP +#define ZIG_BUFFER_HPP + +#include "list.hpp" + +#include + +struct Buf { + ZigList list; +}; + +static inline int buf_len(Buf *buf) { + return buf->list.length - 1; +} + +static inline char *buf_ptr(Buf *buf) { + return buf->list.items; +} + +static inline void buf_resize(Buf *buf, int new_len) { + buf->list.resize(new_len + 1); + buf->list.at(buf_len(buf)) = 0; +} + +static inline Buf *buf_alloc(void) { + Buf *buf = allocate(1); + buf_resize(buf, 0); + return buf; +} + +static inline Buf *buf_alloc_fixed(int size) { + Buf *buf = allocate(1); + buf_resize(buf, size); + return buf; +} + +static inline void buf_deinit(Buf *buf) { + buf->list.deinit(); +} + +static inline Buf *buf_from_mem(char *ptr, int len) { + Buf *buf = allocate(1); + buf->list.resize(len + 1); + memcpy(buf_ptr(buf), ptr, len); + buf->list.at(buf_len(buf)) = 0; + return buf; +} + +static inline Buf *buf_slice(Buf *in_buf, int start, int end) { + assert(start >= 0); + assert(end >= 0); + assert(start < buf_len(in_buf)); + assert(end <= buf_len(in_buf)); + Buf *out_buf = allocate(1); + out_buf->list.resize(end - start + 1); + memcpy(buf_ptr(out_buf), buf_ptr(in_buf) + start, end - start); + out_buf->list.at(buf_len(out_buf)) = 0; + return out_buf; +} + +static inline void buf_append_str(Buf *buf, const char *str, int str_len) { + if (str_len == -1) + str_len = strlen(str); + + int old_len = buf_len(buf); + buf_resize(buf, old_len + str_len); + memcpy(buf_ptr(buf) + old_len, str, str_len); + buf->list.at(buf_len(buf)) = 0; +} + +static inline void buf_append_buf(Buf *buf, Buf *append_buf) { + buf_append_str(buf, buf_ptr(append_buf), buf_len(append_buf)); +} + +#endif diff --git a/src/list.hpp b/src/list.hpp index 434ff9ffe6..43b5370d13 100644 --- a/src/list.hpp +++ b/src/list.hpp @@ -15,7 +15,7 @@ template struct ZigList { void deinit() { - deallocate(items); + free(items); } void append(T item) { ensure_capacity(length + 1); diff --git a/src/main.cpp b/src/main.cpp index 7393952e05..50d0eff383 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -8,6 +8,8 @@ #include "config.h" #include "util.hpp" #include "list.hpp" +#include "buffer.hpp" + #include #include #include @@ -19,25 +21,6 @@ #include #include -struct Buf { - int len; - char ptr[0]; -}; - -static Buf *alloc_buf(int size) { - Buf *buf = (Buf *)allocate_nonzero(sizeof(Buf) + size + 1); - buf->len = size; - buf->ptr[buf->len] = 0; - return buf; -} - -/* -static void fprint_buf(FILE *f, Buf *buf) { - if (fwrite(buf->ptr, 1, buf->len, f)) - zig_panic("error writing: %s", strerror(errno)); -} -*/ - static int usage(char *arg0) { fprintf(stderr, "Usage: %s --output outfile code.zig\n" "Other options:\n" @@ -46,7 +29,7 @@ static int usage(char *arg0) { return EXIT_FAILURE; } -static struct Buf *fetch_file(FILE *f) { +static Buf *fetch_file(FILE *f) { int fd = fileno(f); struct stat st; if (fstat(fd, &st)) @@ -56,9 +39,9 @@ static struct Buf *fetch_file(FILE *f) { zig_panic("file too big"); int size = (int)big_size; - Buf *buf = alloc_buf(size); - size_t amt_read = fread(buf->ptr, 1, buf->len, f); - if (amt_read != (size_t)buf->len) + Buf *buf = buf_alloc_fixed(size); + size_t amt_read = fread(buf_ptr(buf), 1, buf_len(buf), f); + if (amt_read != (size_t)buf_len(buf)) zig_panic("error reading: %s", strerror(errno)); return buf; @@ -138,6 +121,12 @@ static struct Buf *fetch_file(FILE *f) { case 'Y': \ case 'Z' +#define SYMBOL_CHAR \ + ALPHA: \ + case DIGIT: \ + case '_' + + enum TokenId { TokenIdDirective, TokenIdSymbol, @@ -157,6 +146,8 @@ struct Token { TokenId id; int start_pos; int end_pos; + int start_line; + int start_column; }; enum TokenizeState { @@ -178,10 +169,21 @@ struct Tokenize { __attribute__ ((format (printf, 2, 3))) static void tokenize_error(Tokenize *t, const char *format, ...) { + int line; + int column; + if (t->cur_tok) { + line = t->cur_tok->start_line + 1; + column = t->cur_tok->start_column + 1; + } else { + line = t->line + 1; + column = t->column + 1; + } + va_list ap; va_start(ap, format); - fprintf(stderr, "Error. Line %d, column %d: ", t->line + 1, t->column + 1); + fprintf(stderr, "Error: Line %d, column %d: ", line, column); vfprintf(stderr, format, ap); + fprintf(stderr, "\n"); va_end(ap); exit(EXIT_FAILURE); } @@ -190,6 +192,8 @@ static void begin_token(Tokenize *t, TokenId id) { assert(!t->cur_tok); t->tokens->add_one(); Token *token = &t->tokens->last(); + token->start_line = t->line; + token->start_column = t->column; token->id = id; token->start_pos = t->pos; t->cur_tok = token; @@ -205,11 +209,24 @@ static void put_back(Tokenize *t, int count) { t->pos -= count; } +static void end_directive(Tokenize *t) { + assert(t->cur_tok); + t->cur_tok->end_pos = t->pos; + t->cur_tok = nullptr; + t->state = TokenizeStateStart; +} + +static void end_symbol(Tokenize *t) { + put_back(t, 1); + end_token(t); + t->state = TokenizeStateStart; +} + static ZigList *tokenize(Buf *buf) { Tokenize t = {0}; t.tokens = allocate>(1); - for (t.pos = 0; t.pos < buf->len; t.pos += 1) { - uint8_t c = buf->ptr[t.pos]; + for (t.pos = 0; t.pos < buf_len(buf); t.pos += 1) { + uint8_t c = buf_ptr(buf)[t.pos]; switch (t.state) { case TokenizeStateStart: switch (c) { @@ -232,7 +249,7 @@ static ZigList *tokenize(Buf *buf) { end_token(&t); break; case ')': - begin_token(&t, TokenIdLParen); + begin_token(&t, TokenIdRParen); end_token(&t); break; case ',': @@ -269,22 +286,15 @@ static ZigList *tokenize(Buf *buf) { break; case TokenizeStateDirective: if (c == '\n') { - assert(t.cur_tok); - t.cur_tok->end_pos = t.pos; - t.cur_tok = nullptr; - t.state = TokenizeStateStart; + end_directive(&t); } break; case TokenizeStateSymbol: switch (c) { - case ALPHA: - case DIGIT: - case '_': + case SYMBOL_CHAR: break; default: - put_back(&t, 1); - end_token(&t); - t.state = TokenizeStateStart; + end_symbol(&t); break; } break; @@ -303,9 +313,7 @@ static ZigList *tokenize(Buf *buf) { case DIGIT: break; default: - put_back(&t, 1); - end_token(&t); - t.state = TokenizeStateStart; + end_symbol(&t); break; } break; @@ -317,6 +325,24 @@ static ZigList *tokenize(Buf *buf) { t.column += 1; } } + // EOF + switch (t.state) { + case TokenizeStateStart: + break; + case TokenizeStateDirective: + end_directive(&t); + break; + case TokenizeStateSymbol: + end_symbol(&t); + break; + case TokenizeStateString: + tokenize_error(&t, "unterminated string"); + break; + case TokenizeStateNumber: + end_symbol(&t); + break; + } + assert(!t.cur_tok); return t.tokens; } @@ -342,11 +368,118 @@ static void print_tokens(Buf *buf, ZigList *tokens) { for (int i = 0; i < tokens->length; i += 1) { Token *token = &tokens->at(i); printf("%s ", token_name(token)); - fwrite(buf->ptr + token->start_pos, 1, token->end_pos - token->start_pos, stdout); + fwrite(buf_ptr(buf) + token->start_pos, 1, token->end_pos - token->start_pos, stdout); printf("\n"); } } +struct Preprocess { + Buf *out_buf; + Buf *in_buf; + Token *token; +}; + +__attribute__ ((format (printf, 2, 3))) +static void preprocess_error(Preprocess *p, const char *format, ...) { + va_list ap; + va_start(ap, format); + fprintf(stderr, "Error: Line %d, column %d: ", p->token->start_line + 1, p->token->start_column + 1); + vfprintf(stderr, format, ap); + fprintf(stderr, "\n"); + va_end(ap); + exit(EXIT_FAILURE); +} + +enum IncludeState { + IncludeStateStart, + IncludeStateQuote, +}; + +static void render_include(Preprocess *p, Buf *include_path, char unquote_char) { + fprintf(stderr, "render_include \"%s\" '%c'\n", buf_ptr(include_path), unquote_char); +} + +static void parse_and_render_include(Preprocess *p, Buf *directive_buf, int pos) { + int state = IncludeStateStart; + char unquote_char; + int quote_start_pos; + for (; pos < buf_len(directive_buf); pos += 1) { + uint8_t c = buf_ptr(directive_buf)[pos]; + switch (state) { + case IncludeStateStart: + switch (c) { + case WHITESPACE: + break; + case '<': + case '"': + state = IncludeStateQuote; + quote_start_pos = pos; + unquote_char = (c == '<') ? '>' : '"'; + break; + + } + break; + case IncludeStateQuote: + if (c == unquote_char) { + Buf *include_path = buf_slice(directive_buf, quote_start_pos + 1, pos); + render_include(p, include_path, unquote_char); + return; + } + break; + } + } + preprocess_error(p, "include directive missing path"); +} + +static void render_directive(Preprocess *p, Buf *directive_buf) { + for (int pos = 1; pos < buf_len(directive_buf); pos += 1) { + uint8_t c = buf_ptr(directive_buf)[pos]; + switch (c) { + case SYMBOL_CHAR: + break; + default: + pos -= 1; + Buf *directive_name = buf_from_mem(buf_ptr(directive_buf) + 1, pos); + if (strcmp(buf_ptr(directive_name), "include") == 0) { + parse_and_render_include(p, directive_buf, pos); + } else { + preprocess_error(p, "invalid directive: \"%s\"", buf_ptr(directive_name)); + } + return; + } + } +} + +static void render_token(Preprocess *p) { + Buf *token_buf = buf_slice(p->in_buf, p->token->start_pos, p->token->end_pos); + switch (p->token->id) { + case TokenIdDirective: + render_directive(p, token_buf); + break; + default: + buf_append_buf(p->out_buf, token_buf); + if (p->token->id == TokenIdSemicolon || + p->token->id == TokenIdLBrace || + p->token->id == TokenIdRBrace) + { + buf_append_str(p->out_buf, "\n", -1); + } else { + buf_append_str(p->out_buf, " ", -1); + } + } +} + +static Buf *preprocess(Buf *in_buf, ZigList *tokens) { + Preprocess p = {0}; + p.out_buf = buf_alloc(); + p.in_buf = in_buf; + for (int i = 0; i < tokens->length; i += 1) { + p.token = &tokens->at(i); + render_token(&p); + } + return p.out_buf; +} + int main(int argc, char **argv) { char *arg0 = argv[0]; char *in_file = NULL; @@ -386,14 +519,19 @@ int main(int argc, char **argv) { zig_panic("unable to open %s for reading: %s\n", in_file, strerror(errno)); } - struct Buf *in_data = fetch_file(in_f); + Buf *in_data = fetch_file(in_f); - fprintf(stderr, "%s\n", in_data->ptr); + fprintf(stderr, "Original source:\n%s\n", buf_ptr(in_data)); ZigList *tokens = tokenize(in_data); + fprintf(stderr, "\nTokens:\n"); print_tokens(in_data, tokens); + Buf *preprocessed_source = preprocess(in_data, tokens); + + fprintf(stderr, "\nPreprocessed source:\n%s\n", buf_ptr(preprocessed_source)); + return EXIT_SUCCESS; }