From b5d9584e6fc9e113435e0d555c2df62379cdcb8b Mon Sep 17 00:00:00 2001 From: Andrew Kelley Date: Sun, 10 Sep 2017 00:20:09 -0400 Subject: [PATCH] support parens in C macros closes #454 --- src/c_tokenizer.cpp | 18 +++++ src/c_tokenizer.hpp | 3 + src/parsec.cpp | 168 ++++++++++++++++++++++++-------------------- test/parsec.zig | 6 ++ 4 files changed, 119 insertions(+), 76 deletions(-) diff --git a/src/c_tokenizer.cpp b/src/c_tokenizer.cpp index 920bcacf90..0988c659b8 100644 --- a/src/c_tokenizer.cpp +++ b/src/c_tokenizer.cpp @@ -114,6 +114,9 @@ static void begin_token(CTokenize *ctok, CTokId id) { case CTokIdCharLit: case CTokIdNumLitFloat: case CTokIdMinus: + case CTokIdLParen: + case CTokIdRParen: + case CTokIdEOF: break; } } @@ -214,6 +217,18 @@ void tokenize_c_macro(CTokenize *ctok, const uint8_t *c) { ctok->state = CTokStateFloat; buf_init_from_str(&ctok->buf, "0."); break; + case '(': + begin_token(ctok, CTokIdLParen); + end_token(ctok); + break; + case ')': + begin_token(ctok, CTokIdRParen); + end_token(ctok); + break; + case '-': + begin_token(ctok, CTokIdMinus); + end_token(ctok); + break; default: return mark_error(ctok); } @@ -738,4 +753,7 @@ found_end_of_macro: } assert(ctok->cur_tok == nullptr); + + begin_token(ctok, CTokIdEOF); + end_token(ctok); } diff --git a/src/c_tokenizer.hpp b/src/c_tokenizer.hpp index 573391239f..6267ea8764 100644 --- a/src/c_tokenizer.hpp +++ b/src/c_tokenizer.hpp @@ -18,6 +18,9 @@ enum CTokId { CTokIdNumLitFloat, CTokIdSymbol, CTokIdMinus, + CTokIdLParen, + CTokIdRParen, + CTokIdEOF, }; enum CNumLitSuffix { diff --git a/src/parsec.cpp b/src/parsec.cpp index 742db4e610..32462407ee 100644 --- a/src/parsec.cpp +++ b/src/parsec.cpp @@ -2469,6 +2469,74 @@ static void render_macros(Context *c) { } } +static AstNode *parse_ctok_num_lit(Context *c, CTokenize *ctok, size_t *tok_i, bool negate) { + CTok *tok = &ctok->tokens.at(*tok_i); + if (tok->id == CTokIdNumLitInt) { + *tok_i += 1; + switch (tok->data.num_lit_int.suffix) { + case CNumLitSuffixNone: + return trans_create_node_unsigned_negative(c, tok->data.num_lit_int.x, negate); + case CNumLitSuffixL: + return trans_create_node_unsigned_negative_type(c, tok->data.num_lit_int.x, negate, "c_long"); + case CNumLitSuffixU: + return trans_create_node_unsigned_negative_type(c, tok->data.num_lit_int.x, negate, "c_uint"); + case CNumLitSuffixLU: + return trans_create_node_unsigned_negative_type(c, tok->data.num_lit_int.x, negate, "c_ulong"); + case CNumLitSuffixLL: + return trans_create_node_unsigned_negative_type(c, tok->data.num_lit_int.x, negate, "c_longlong"); + case CNumLitSuffixLLU: + return trans_create_node_unsigned_negative_type(c, tok->data.num_lit_int.x, negate, "c_ulonglong"); + } + zig_unreachable(); + } else if (tok->id == CTokIdNumLitFloat) { + *tok_i += 1; + double value = negate ? -tok->data.num_lit_float : tok->data.num_lit_float; + return trans_create_node_float_lit(c, value); + } + return nullptr; +} + +static AstNode *parse_ctok(Context *c, CTokenize *ctok, size_t *tok_i) { + CTok *tok = &ctok->tokens.at(*tok_i); + switch (tok->id) { + case CTokIdCharLit: + *tok_i += 1; + return trans_create_node_unsigned(c, tok->data.char_lit); + case CTokIdStrLit: + *tok_i += 1; + return trans_create_node_str_lit_c(c, buf_create_from_buf(&tok->data.str_lit)); + case CTokIdMinus: + *tok_i += 1; + return parse_ctok_num_lit(c, ctok, tok_i, true); + case CTokIdNumLitInt: + case CTokIdNumLitFloat: + return parse_ctok_num_lit(c, ctok, tok_i, false); + case CTokIdSymbol: + { + *tok_i += 1; + Buf *symbol_name = buf_create_from_buf(&tok->data.symbol); + return trans_create_node_symbol(c, symbol_name); + } + case CTokIdLParen: + { + *tok_i += 1; + AstNode *inner_node = parse_ctok(c, ctok, tok_i); + + CTok *next_tok = &ctok->tokens.at(*tok_i); + if (next_tok->id != CTokIdRParen) { + return nullptr; + } + *tok_i += 1; + return inner_node; + } + case CTokIdEOF: + case CTokIdRParen: + // not able to make sense of this + return nullptr; + } + zig_unreachable(); +} + static void process_macro(Context *c, CTokenize *ctok, Buf *name, const char *char_ptr) { tokenize_c_macro(ctok, (const uint8_t *)char_ptr); @@ -2476,81 +2544,29 @@ static void process_macro(Context *c, CTokenize *ctok, Buf *name, const char *ch return; } - bool negate = false; - for (size_t i = 0; i < ctok->tokens.length; i += 1) { - bool is_first = (i == 0); - bool is_last = (i == ctok->tokens.length - 1); - CTok *tok = &ctok->tokens.at(i); - switch (tok->id) { - case CTokIdCharLit: - if (is_last && is_first) { - AstNode *node = trans_create_node_unsigned(c, tok->data.char_lit); - c->macro_table.put(name, node); - } - return; - case CTokIdStrLit: - if (is_last && is_first) { - AstNode *node = trans_create_node_str_lit_c(c, buf_create_from_buf(&tok->data.str_lit)); - c->macro_table.put(name, node); - } - return; - case CTokIdNumLitInt: - if (is_last) { - AstNode *node; - switch (tok->data.num_lit_int.suffix) { - case CNumLitSuffixNone: - node = trans_create_node_unsigned_negative(c, tok->data.num_lit_int.x, negate); - break; - case CNumLitSuffixL: - node = trans_create_node_unsigned_negative_type(c, tok->data.num_lit_int.x, negate, - "c_long"); - break; - case CNumLitSuffixU: - node = trans_create_node_unsigned_negative_type(c, tok->data.num_lit_int.x, negate, - "c_uint"); - break; - case CNumLitSuffixLU: - node = trans_create_node_unsigned_negative_type(c, tok->data.num_lit_int.x, negate, - "c_ulong"); - break; - case CNumLitSuffixLL: - node = trans_create_node_unsigned_negative_type(c, tok->data.num_lit_int.x, negate, - "c_longlong"); - break; - case CNumLitSuffixLLU: - node = trans_create_node_unsigned_negative_type(c, tok->data.num_lit_int.x, negate, - "c_ulonglong"); - break; - } - c->macro_table.put(name, node); - } - return; - case CTokIdNumLitFloat: - if (is_last) { - double value = negate ? -tok->data.num_lit_float : tok->data.num_lit_float; - AstNode *node = trans_create_node_float_lit(c, value); - c->macro_table.put(name, node); - } - return; - case CTokIdSymbol: - if (is_last && is_first) { - // if it equals itself, ignore. for example, from stdio.h: - // #define stdin stdin - Buf *symbol_name = buf_create_from_buf(&tok->data.symbol); - if (buf_eql_buf(name, symbol_name)) { - return; - } - c->macro_symbols.append({name, symbol_name}); - return; - } - case CTokIdMinus: - if (is_first) { - negate = true; - break; - } else { - return; - } + size_t tok_i = 0; + CTok *name_tok = &ctok->tokens.at(tok_i); + assert(name_tok->id == CTokIdSymbol && buf_eql_buf(&name_tok->data.symbol, name)); + tok_i += 1; + + AstNode *result_node = parse_ctok(c, ctok, &tok_i); + if (result_node == nullptr) { + return; + } + CTok *eof_tok = &ctok->tokens.at(tok_i); + if (eof_tok->id != CTokIdEOF) { + return; + } + if (result_node->type == NodeTypeSymbol) { + // if it equals itself, ignore. for example, from stdio.h: + // #define stdin stdin + Buf *symbol_name = result_node->data.symbol_expr.symbol; + if (buf_eql_buf(name, symbol_name)) { + return; } + c->macro_symbols.append({name, symbol_name}); + } else { + c->macro_table.put(name, result_node); } } @@ -2613,8 +2629,8 @@ static void process_preprocessor_entities(Context *c, ASTUnit &unit) { continue; } - const char *end_c = c->source_manager->getCharacterData(end_loc); - process_macro(c, &ctok, name, end_c); + const char *begin_c = c->source_manager->getCharacterData(begin_loc); + process_macro(c, &ctok, name, begin_c); } } } diff --git a/test/parsec.zig b/test/parsec.zig index c0e7689bb3..96f1cceafa 100644 --- a/test/parsec.zig +++ b/test/parsec.zig @@ -296,4 +296,10 @@ pub fn addCases(cases: &tests.ParseCContext) { , \\pub const FOO_CHAR = 63; ); + + cases.add("macro with parens around negative number", + \\#define LUA_GLOBALSINDEX (-10002) + , + \\pub const LUA_GLOBALSINDEX = -10002; + ); }