diff --git a/src/ast_render.cpp b/src/ast_render.cpp index 6b88579b49..d39f858ed1 100644 --- a/src/ast_render.cpp +++ b/src/ast_render.cpp @@ -239,10 +239,80 @@ static bool is_node_void(AstNode *node) { return false; } -static bool is_printable(uint8_t c) { +static bool is_alpha_under(uint8_t c) { return (c >= 'a' && c <= 'z') || - (c >= 'A' && c <= 'A') || - (c >= '0' && c <= '9'); + (c >= 'A' && c <= 'Z') || c == '_'; +} + +static bool is_digit(uint8_t c) { + return (c >= '0' && c <= '9'); +} + +static bool is_printable(uint8_t c) { + return is_alpha_under(c) || is_digit(c); +} + +static void string_literal_escape(Buf *source, Buf *dest) { + buf_resize(dest, 0); + for (int i = 0; i < buf_len(source); i += 1) { + uint8_t c = *((uint8_t*)buf_ptr(source) + i); + if (is_printable(c)) { + buf_append_char(dest, c); + } else if (c == '\'') { + buf_append_str(dest, "\\'"); + } else if (c == '"') { + buf_append_str(dest, "\\\""); + } else if (c == '\\') { + buf_append_str(dest, "\\\\"); + } else if (c == '\a') { + buf_append_str(dest, "\\a"); + } else if (c == '\b') { + buf_append_str(dest, "\\b"); + } else if (c == '\f') { + buf_append_str(dest, "\\f"); + } else if (c == '\n') { + buf_append_str(dest, "\\n"); + } else if (c == '\r') { + buf_append_str(dest, "\\r"); + } else if (c == '\t') { + buf_append_str(dest, "\\t"); + } else if (c == '\v') { + buf_append_str(dest, "\\v"); + } else { + buf_appendf(dest, "\\x%x", (int)c); + } + } +} + +static bool is_valid_bare_symbol(Buf *symbol) { + if (buf_len(symbol) == 0) { + return false; + } + uint8_t first_char = *buf_ptr(symbol); + if (!is_alpha_under(first_char)) { + return false; + } + for (int i = 1; i < buf_len(symbol); i += 1) { + uint8_t c = *((uint8_t*)buf_ptr(symbol) + i); + if (!is_alpha_under(c) && !is_digit(c)) { + return false; + } + } + return true; +} + +static void print_symbol(AstRender *ar, Buf *symbol) { + if (is_zig_keyword(symbol)) { + fprintf(ar->f, "@\"%s\"", buf_ptr(symbol)); + return; + } + if (is_valid_bare_symbol(symbol)) { + fprintf(ar->f, "%s", buf_ptr(symbol)); + return; + } + Buf escaped = BUF_INIT; + string_literal_escape(symbol, &escaped); + fprintf(ar->f, "@\"%s\"", buf_ptr(&escaped)); } static void render_node(AstRender *ar, AstNode *node) { @@ -268,20 +338,22 @@ static void render_node(AstRender *ar, AstNode *node) { break; case NodeTypeFnProto: { - const char *fn_name = buf_ptr(&node->data.fn_proto.name); const char *pub_str = visib_mod_string(node->data.fn_proto.top_level_decl.visib_mod); const char *extern_str = extern_string(node->data.fn_proto.is_extern); const char *inline_str = inline_string(node->data.fn_proto.is_inline); - fprintf(ar->f, "%s%s%sfn %s(", pub_str, inline_str, extern_str, fn_name); + fprintf(ar->f, "%s%s%sfn ", pub_str, inline_str, extern_str); + print_symbol(ar, &node->data.fn_proto.name); + fprintf(ar->f, "("); int arg_count = node->data.fn_proto.params.length; bool is_var_args = node->data.fn_proto.is_var_args; for (int arg_i = 0; arg_i < arg_count; arg_i += 1) { AstNode *param_decl = node->data.fn_proto.params.at(arg_i); assert(param_decl->type == NodeTypeParamDecl); - const char *arg_name = buf_ptr(¶m_decl->data.param_decl.name); if (buf_len(¶m_decl->data.param_decl.name) > 0) { const char *noalias_str = param_decl->data.param_decl.is_noalias ? "noalias " : ""; - fprintf(ar->f, "%s%s: ", noalias_str, arg_name); + fprintf(ar->f, "%s", noalias_str); + print_symbol(ar, ¶m_decl->data.param_decl.name); + fprintf(ar->f, ": "); } render_node(ar, param_decl->data.param_decl.type); @@ -345,9 +417,10 @@ static void render_node(AstRender *ar, AstNode *node) { { const char *pub_str = visib_mod_string(node->data.variable_declaration.top_level_decl.visib_mod); const char *extern_str = extern_string(node->data.variable_declaration.is_extern); - const char *var_name = buf_ptr(&node->data.variable_declaration.symbol); const char *const_or_var = const_or_var_string(node->data.variable_declaration.is_const); - fprintf(ar->f, "%s%s%s %s", pub_str, extern_str, const_or_var, var_name); + fprintf(ar->f, "%s%s%s ", pub_str, extern_str, const_or_var); + print_symbol(ar, &node->data.variable_declaration.symbol); + if (node->data.variable_declaration.type) { fprintf(ar->f, ": "); render_node(ar, node->data.variable_declaration.type); @@ -495,9 +568,8 @@ static void render_node(AstRender *ar, AstNode *node) { for (int field_i = 0; field_i < node->data.struct_decl.fields.length; field_i += 1) { AstNode *field_node = node->data.struct_decl.fields.at(field_i); assert(field_node->type == NodeTypeStructField); - const char *field_name = buf_ptr(&field_node->data.struct_field.name); print_indent(ar); - fprintf(ar->f, "%s", field_name); + print_symbol(ar, &field_node->data.struct_field.name); if (!is_node_void(field_node->data.struct_field.type)) { fprintf(ar->f, ": "); render_node(ar, field_node->data.struct_field.type); diff --git a/src/parseh.cpp b/src/parseh.cpp index 9806f3cfb3..4ae426f1c7 100644 --- a/src/parseh.cpp +++ b/src/parseh.cpp @@ -11,7 +11,6 @@ #include "error.hpp" #include "parser.hpp" #include "all_types.hpp" -#include "tokenizer.hpp" #include "c_tokenizer.hpp" #include "analyze.hpp" @@ -1265,10 +1264,6 @@ static void render_macros(Context *c) { } static void process_macro(Context *c, CTokenize *ctok, Buf *name, const char *char_ptr) { - if (is_zig_keyword(name)) { - return; - } - tokenize_c_macro(ctok, (const uint8_t *)char_ptr); if (ctok->error) { diff --git a/src/parser.cpp b/src/parser.cpp index 29a65521d7..d73bacead2 100644 --- a/src/parser.cpp +++ b/src/parser.cpp @@ -87,10 +87,6 @@ static AstNode *ast_create_void_type_node(ParseContext *pc, Token *token) { return node; } -static void ast_buf_from_token(ParseContext *pc, Token *token, Buf *buf) { - buf_init_from_mem(buf, buf_ptr(pc->buf) + token->start_pos, token->end_pos - token->start_pos); -} - static void parse_asm_template(ParseContext *pc, AstNode *node) { Buf *asm_template = &node->data.asm_expr.asm_template; @@ -277,6 +273,8 @@ static void parse_string_literal(ParseContext *pc, Token *token, Buf *buf, bool // detect c string literal enum State { + StatePre, + StateSkipQuot, StateStart, StateEscape, StateHex1, @@ -285,90 +283,100 @@ static void parse_string_literal(ParseContext *pc, Token *token, Buf *buf, bool buf_resize(buf, 0); - State state = StateStart; - bool skip_quote; + State state = StatePre; SrcPos pos = {token->start_line, token->start_column}; int hex_value = 0; for (int i = token->start_pos; i < token->end_pos - 1; i += 1) { uint8_t c = *((uint8_t*)buf_ptr(pc->buf) + i); - if (i == token->start_pos) { - skip_quote = (c == 'c'); - if (out_c_str) { - *out_c_str = skip_quote; - } else if (skip_quote) { - ast_error(pc, token, "C string literal not allowed here"); - } - } else if (skip_quote) { - skip_quote = false; - } else { - switch (state) { - case StateStart: - if (c == '\\') { - state = StateEscape; - } else { - buf_append_char(buf, c); - if (offset_map) offset_map->append(pos); - } - break; - case StateEscape: - switch (c) { - case '\\': - buf_append_char(buf, '\\'); - if (offset_map) offset_map->append(pos); - state = StateStart; - break; - case 'r': - buf_append_char(buf, '\r'); - if (offset_map) offset_map->append(pos); - state = StateStart; - break; - case 'n': - buf_append_char(buf, '\n'); - if (offset_map) offset_map->append(pos); - state = StateStart; - break; - case 't': - buf_append_char(buf, '\t'); - if (offset_map) offset_map->append(pos); - state = StateStart; - break; - case '"': - buf_append_char(buf, '"'); - if (offset_map) offset_map->append(pos); - state = StateStart; - break; - case 'x': - state = StateHex1; - break; - default: - ast_error(pc, token, "invalid escape character"); - break; - } - break; - case StateHex1: - { - int hex_digit = get_hex_digit(c); - if (hex_digit == -1) { - ast_error(pc, token, "invalid hex digit: '%c'", c); - } - hex_value = hex_digit * 16; - state = StateHex2; + switch (state) { + case StatePre: + switch (c) { + case '@': + state = StateSkipQuot; break; - } - case StateHex2: - { - int hex_digit = get_hex_digit(c); - if (hex_digit == -1) { - ast_error(pc, token, "invalid hex digit: '%c'", c); + case 'c': + if (out_c_str) { + *out_c_str = true; + } else { + ast_error(pc, token, "C string literal not allowed here"); } - hex_value += hex_digit; - assert(hex_value >= 0 && hex_value <= 255); - buf_append_char(buf, hex_value); + state = StateSkipQuot; + break; + case '"': state = StateStart; break; + default: + ast_error(pc, token, "invalid string character"); + } + break; + case StateSkipQuot: + state = StateStart; + break; + case StateStart: + if (c == '\\') { + state = StateEscape; + } else { + buf_append_char(buf, c); + if (offset_map) offset_map->append(pos); + } + break; + case StateEscape: + switch (c) { + case '\\': + buf_append_char(buf, '\\'); + if (offset_map) offset_map->append(pos); + state = StateStart; + break; + case 'r': + buf_append_char(buf, '\r'); + if (offset_map) offset_map->append(pos); + state = StateStart; + break; + case 'n': + buf_append_char(buf, '\n'); + if (offset_map) offset_map->append(pos); + state = StateStart; + break; + case 't': + buf_append_char(buf, '\t'); + if (offset_map) offset_map->append(pos); + state = StateStart; + break; + case '"': + buf_append_char(buf, '"'); + if (offset_map) offset_map->append(pos); + state = StateStart; + break; + case 'x': + state = StateHex1; + break; + default: + ast_error(pc, token, "invalid escape character"); + } + break; + case StateHex1: + { + int hex_digit = get_hex_digit(c); + if (hex_digit == -1) { + ast_error(pc, token, "invalid hex digit: '%c'", c); } - } + hex_value = hex_digit * 16; + state = StateHex2; + break; + } + case StateHex2: + { + int hex_digit = get_hex_digit(c); + if (hex_digit == -1) { + ast_error(pc, token, "invalid hex digit: '%c'", c); + } + hex_value += hex_digit; + assert(hex_value >= 0 && hex_value <= 255); + buf_append_char(buf, hex_value); + state = StateStart; + break; + } } if (c == '\n') { pos.line += 1; @@ -381,6 +389,17 @@ static void parse_string_literal(ParseContext *pc, Token *token, Buf *buf, bool if (offset_map) offset_map->append(pos); } +static void ast_buf_from_token(ParseContext *pc, Token *token, Buf *buf) { + uint8_t *first_char = (uint8_t *)buf_ptr(pc->buf) + token->start_pos; + bool at_sign = *first_char == '@'; + if (at_sign) { + parse_string_literal(pc, token, buf, nullptr, nullptr); + } else { + buf_init_from_mem(buf, buf_ptr(pc->buf) + token->start_pos, token->end_pos - token->start_pos); + } +} + + static unsigned long long parse_int_digits(ParseContext *pc, int digits_start, int digits_end, int radix, int skip_index, bool *overflow) { diff --git a/src/tokenizer.cpp b/src/tokenizer.cpp index 700090eec0..246bf6ba68 100644 --- a/src/tokenizer.cpp +++ b/src/tokenizer.cpp @@ -159,6 +159,7 @@ enum TokenizeState { TokenizeStateSawDot, TokenizeStateSawDotDot, TokenizeStateSawQuestionMark, + TokenizeStateSawAtSign, TokenizeStateError, }; @@ -429,7 +430,7 @@ void tokenize(Buf *buf, Tokenization *out) { break; case '@': begin_token(&t, TokenIdAtSign); - end_token(&t); + t.state = TokenizeStateSawAtSign; break; case '-': begin_token(&t, TokenIdDash); @@ -858,6 +859,19 @@ void tokenize(Buf *buf, Tokenization *out) { continue; } break; + case TokenizeStateSawAtSign: + switch (c) { + case '"': + t.cur_tok->id = TokenIdSymbol; + t.state = TokenizeStateString; + break; + default: + t.pos -= 1; + end_token(&t); + t.state = TokenizeStateStart; + continue; + } + break; case TokenizeStateFirstR: switch (c) { case '"': @@ -1131,6 +1145,7 @@ void tokenize(Buf *buf, Tokenization *out) { case TokenizeStateSawGreaterThanGreaterThan: case TokenizeStateSawDot: case TokenizeStateSawQuestionMark: + case TokenizeStateSawAtSign: end_token(&t); break; case TokenizeStateSawDotDot: diff --git a/test/run_tests.cpp b/test/run_tests.cpp index a08b3ea047..618e75d717 100644 --- a/test/run_tests.cpp +++ b/test/run_tests.cpp @@ -1394,6 +1394,14 @@ void foo(void (__cdecl *fn_ptr)(void)); add_parseh_case("comment after integer literal", R"SOURCE( #define SDL_INIT_VIDEO 0x00000020 /**< SDL_INIT_VIDEO implies SDL_INIT_EVENTS */ )SOURCE", 1, "pub const SDL_INIT_VIDEO = 32;"); + + add_parseh_case("zig keywords in C code", R"SOURCE( +struct type { + int defer; +}; + )SOURCE", 2, R"(export struct struct_type { + @"defer": c_int, +})", R"(pub const @"type" = struct_type;)"); } static void run_self_hosted_test(void) { diff --git a/test/self_hosted.zig b/test/self_hosted.zig index 3bf19b61f5..0673301092 100644 --- a/test/self_hosted.zig +++ b/test/self_hosted.zig @@ -1295,3 +1295,7 @@ struct EmptyStruct { #static_eval_enable(false) fn method(es: EmptyStruct) -> i32 { 1234 } } + + +#attribute("test") +fn @"weird function name"() { }