add syntax to allow symbols to have arbitrary strings as names

This commit is contained in:
Andrew Kelley 2016-04-22 15:24:18 -07:00
parent 35362f8137
commit 8187396f64
6 changed files with 208 additions and 95 deletions

View File

@ -239,10 +239,80 @@ static bool is_node_void(AstNode *node) {
return false;
}
static bool is_printable(uint8_t c) {
static bool is_alpha_under(uint8_t c) {
return (c >= 'a' && c <= 'z') ||
(c >= 'A' && c <= 'A') ||
(c >= '0' && c <= '9');
(c >= 'A' && c <= 'Z') || c == '_';
}
static bool is_digit(uint8_t c) {
return (c >= '0' && c <= '9');
}
static bool is_printable(uint8_t c) {
return is_alpha_under(c) || is_digit(c);
}
static void string_literal_escape(Buf *source, Buf *dest) {
buf_resize(dest, 0);
for (int i = 0; i < buf_len(source); i += 1) {
uint8_t c = *((uint8_t*)buf_ptr(source) + i);
if (is_printable(c)) {
buf_append_char(dest, c);
} else if (c == '\'') {
buf_append_str(dest, "\\'");
} else if (c == '"') {
buf_append_str(dest, "\\\"");
} else if (c == '\\') {
buf_append_str(dest, "\\\\");
} else if (c == '\a') {
buf_append_str(dest, "\\a");
} else if (c == '\b') {
buf_append_str(dest, "\\b");
} else if (c == '\f') {
buf_append_str(dest, "\\f");
} else if (c == '\n') {
buf_append_str(dest, "\\n");
} else if (c == '\r') {
buf_append_str(dest, "\\r");
} else if (c == '\t') {
buf_append_str(dest, "\\t");
} else if (c == '\v') {
buf_append_str(dest, "\\v");
} else {
buf_appendf(dest, "\\x%x", (int)c);
}
}
}
static bool is_valid_bare_symbol(Buf *symbol) {
if (buf_len(symbol) == 0) {
return false;
}
uint8_t first_char = *buf_ptr(symbol);
if (!is_alpha_under(first_char)) {
return false;
}
for (int i = 1; i < buf_len(symbol); i += 1) {
uint8_t c = *((uint8_t*)buf_ptr(symbol) + i);
if (!is_alpha_under(c) && !is_digit(c)) {
return false;
}
}
return true;
}
static void print_symbol(AstRender *ar, Buf *symbol) {
if (is_zig_keyword(symbol)) {
fprintf(ar->f, "@\"%s\"", buf_ptr(symbol));
return;
}
if (is_valid_bare_symbol(symbol)) {
fprintf(ar->f, "%s", buf_ptr(symbol));
return;
}
Buf escaped = BUF_INIT;
string_literal_escape(symbol, &escaped);
fprintf(ar->f, "@\"%s\"", buf_ptr(&escaped));
}
static void render_node(AstRender *ar, AstNode *node) {
@ -268,20 +338,22 @@ static void render_node(AstRender *ar, AstNode *node) {
break;
case NodeTypeFnProto:
{
const char *fn_name = buf_ptr(&node->data.fn_proto.name);
const char *pub_str = visib_mod_string(node->data.fn_proto.top_level_decl.visib_mod);
const char *extern_str = extern_string(node->data.fn_proto.is_extern);
const char *inline_str = inline_string(node->data.fn_proto.is_inline);
fprintf(ar->f, "%s%s%sfn %s(", pub_str, inline_str, extern_str, fn_name);
fprintf(ar->f, "%s%s%sfn ", pub_str, inline_str, extern_str);
print_symbol(ar, &node->data.fn_proto.name);
fprintf(ar->f, "(");
int arg_count = node->data.fn_proto.params.length;
bool is_var_args = node->data.fn_proto.is_var_args;
for (int arg_i = 0; arg_i < arg_count; arg_i += 1) {
AstNode *param_decl = node->data.fn_proto.params.at(arg_i);
assert(param_decl->type == NodeTypeParamDecl);
const char *arg_name = buf_ptr(&param_decl->data.param_decl.name);
if (buf_len(&param_decl->data.param_decl.name) > 0) {
const char *noalias_str = param_decl->data.param_decl.is_noalias ? "noalias " : "";
fprintf(ar->f, "%s%s: ", noalias_str, arg_name);
fprintf(ar->f, "%s", noalias_str);
print_symbol(ar, &param_decl->data.param_decl.name);
fprintf(ar->f, ": ");
}
render_node(ar, param_decl->data.param_decl.type);
@ -345,9 +417,10 @@ static void render_node(AstRender *ar, AstNode *node) {
{
const char *pub_str = visib_mod_string(node->data.variable_declaration.top_level_decl.visib_mod);
const char *extern_str = extern_string(node->data.variable_declaration.is_extern);
const char *var_name = buf_ptr(&node->data.variable_declaration.symbol);
const char *const_or_var = const_or_var_string(node->data.variable_declaration.is_const);
fprintf(ar->f, "%s%s%s %s", pub_str, extern_str, const_or_var, var_name);
fprintf(ar->f, "%s%s%s ", pub_str, extern_str, const_or_var);
print_symbol(ar, &node->data.variable_declaration.symbol);
if (node->data.variable_declaration.type) {
fprintf(ar->f, ": ");
render_node(ar, node->data.variable_declaration.type);
@ -495,9 +568,8 @@ static void render_node(AstRender *ar, AstNode *node) {
for (int field_i = 0; field_i < node->data.struct_decl.fields.length; field_i += 1) {
AstNode *field_node = node->data.struct_decl.fields.at(field_i);
assert(field_node->type == NodeTypeStructField);
const char *field_name = buf_ptr(&field_node->data.struct_field.name);
print_indent(ar);
fprintf(ar->f, "%s", field_name);
print_symbol(ar, &field_node->data.struct_field.name);
if (!is_node_void(field_node->data.struct_field.type)) {
fprintf(ar->f, ": ");
render_node(ar, field_node->data.struct_field.type);

View File

@ -11,7 +11,6 @@
#include "error.hpp"
#include "parser.hpp"
#include "all_types.hpp"
#include "tokenizer.hpp"
#include "c_tokenizer.hpp"
#include "analyze.hpp"
@ -1265,10 +1264,6 @@ static void render_macros(Context *c) {
}
static void process_macro(Context *c, CTokenize *ctok, Buf *name, const char *char_ptr) {
if (is_zig_keyword(name)) {
return;
}
tokenize_c_macro(ctok, (const uint8_t *)char_ptr);
if (ctok->error) {

View File

@ -87,10 +87,6 @@ static AstNode *ast_create_void_type_node(ParseContext *pc, Token *token) {
return node;
}
static void ast_buf_from_token(ParseContext *pc, Token *token, Buf *buf) {
buf_init_from_mem(buf, buf_ptr(pc->buf) + token->start_pos, token->end_pos - token->start_pos);
}
static void parse_asm_template(ParseContext *pc, AstNode *node) {
Buf *asm_template = &node->data.asm_expr.asm_template;
@ -277,6 +273,8 @@ static void parse_string_literal(ParseContext *pc, Token *token, Buf *buf, bool
// detect c string literal
enum State {
StatePre,
StateSkipQuot,
StateStart,
StateEscape,
StateHex1,
@ -285,90 +283,100 @@ static void parse_string_literal(ParseContext *pc, Token *token, Buf *buf, bool
buf_resize(buf, 0);
State state = StateStart;
bool skip_quote;
State state = StatePre;
SrcPos pos = {token->start_line, token->start_column};
int hex_value = 0;
for (int i = token->start_pos; i < token->end_pos - 1; i += 1) {
uint8_t c = *((uint8_t*)buf_ptr(pc->buf) + i);
if (i == token->start_pos) {
skip_quote = (c == 'c');
if (out_c_str) {
*out_c_str = skip_quote;
} else if (skip_quote) {
ast_error(pc, token, "C string literal not allowed here");
}
} else if (skip_quote) {
skip_quote = false;
} else {
switch (state) {
case StateStart:
if (c == '\\') {
state = StateEscape;
} else {
buf_append_char(buf, c);
if (offset_map) offset_map->append(pos);
}
break;
case StateEscape:
switch (c) {
case '\\':
buf_append_char(buf, '\\');
if (offset_map) offset_map->append(pos);
state = StateStart;
break;
case 'r':
buf_append_char(buf, '\r');
if (offset_map) offset_map->append(pos);
state = StateStart;
break;
case 'n':
buf_append_char(buf, '\n');
if (offset_map) offset_map->append(pos);
state = StateStart;
break;
case 't':
buf_append_char(buf, '\t');
if (offset_map) offset_map->append(pos);
state = StateStart;
break;
case '"':
buf_append_char(buf, '"');
if (offset_map) offset_map->append(pos);
state = StateStart;
break;
case 'x':
state = StateHex1;
break;
default:
ast_error(pc, token, "invalid escape character");
break;
}
break;
case StateHex1:
{
int hex_digit = get_hex_digit(c);
if (hex_digit == -1) {
ast_error(pc, token, "invalid hex digit: '%c'", c);
}
hex_value = hex_digit * 16;
state = StateHex2;
switch (state) {
case StatePre:
switch (c) {
case '@':
state = StateSkipQuot;
break;
}
case StateHex2:
{
int hex_digit = get_hex_digit(c);
if (hex_digit == -1) {
ast_error(pc, token, "invalid hex digit: '%c'", c);
case 'c':
if (out_c_str) {
*out_c_str = true;
} else {
ast_error(pc, token, "C string literal not allowed here");
}
hex_value += hex_digit;
assert(hex_value >= 0 && hex_value <= 255);
buf_append_char(buf, hex_value);
state = StateSkipQuot;
break;
case '"':
state = StateStart;
break;
default:
ast_error(pc, token, "invalid string character");
}
break;
case StateSkipQuot:
state = StateStart;
break;
case StateStart:
if (c == '\\') {
state = StateEscape;
} else {
buf_append_char(buf, c);
if (offset_map) offset_map->append(pos);
}
break;
case StateEscape:
switch (c) {
case '\\':
buf_append_char(buf, '\\');
if (offset_map) offset_map->append(pos);
state = StateStart;
break;
case 'r':
buf_append_char(buf, '\r');
if (offset_map) offset_map->append(pos);
state = StateStart;
break;
case 'n':
buf_append_char(buf, '\n');
if (offset_map) offset_map->append(pos);
state = StateStart;
break;
case 't':
buf_append_char(buf, '\t');
if (offset_map) offset_map->append(pos);
state = StateStart;
break;
case '"':
buf_append_char(buf, '"');
if (offset_map) offset_map->append(pos);
state = StateStart;
break;
case 'x':
state = StateHex1;
break;
default:
ast_error(pc, token, "invalid escape character");
}
break;
case StateHex1:
{
int hex_digit = get_hex_digit(c);
if (hex_digit == -1) {
ast_error(pc, token, "invalid hex digit: '%c'", c);
}
}
hex_value = hex_digit * 16;
state = StateHex2;
break;
}
case StateHex2:
{
int hex_digit = get_hex_digit(c);
if (hex_digit == -1) {
ast_error(pc, token, "invalid hex digit: '%c'", c);
}
hex_value += hex_digit;
assert(hex_value >= 0 && hex_value <= 255);
buf_append_char(buf, hex_value);
state = StateStart;
break;
}
}
if (c == '\n') {
pos.line += 1;
@ -381,6 +389,17 @@ static void parse_string_literal(ParseContext *pc, Token *token, Buf *buf, bool
if (offset_map) offset_map->append(pos);
}
static void ast_buf_from_token(ParseContext *pc, Token *token, Buf *buf) {
uint8_t *first_char = (uint8_t *)buf_ptr(pc->buf) + token->start_pos;
bool at_sign = *first_char == '@';
if (at_sign) {
parse_string_literal(pc, token, buf, nullptr, nullptr);
} else {
buf_init_from_mem(buf, buf_ptr(pc->buf) + token->start_pos, token->end_pos - token->start_pos);
}
}
static unsigned long long parse_int_digits(ParseContext *pc, int digits_start, int digits_end, int radix,
int skip_index, bool *overflow)
{

View File

@ -159,6 +159,7 @@ enum TokenizeState {
TokenizeStateSawDot,
TokenizeStateSawDotDot,
TokenizeStateSawQuestionMark,
TokenizeStateSawAtSign,
TokenizeStateError,
};
@ -429,7 +430,7 @@ void tokenize(Buf *buf, Tokenization *out) {
break;
case '@':
begin_token(&t, TokenIdAtSign);
end_token(&t);
t.state = TokenizeStateSawAtSign;
break;
case '-':
begin_token(&t, TokenIdDash);
@ -858,6 +859,19 @@ void tokenize(Buf *buf, Tokenization *out) {
continue;
}
break;
case TokenizeStateSawAtSign:
switch (c) {
case '"':
t.cur_tok->id = TokenIdSymbol;
t.state = TokenizeStateString;
break;
default:
t.pos -= 1;
end_token(&t);
t.state = TokenizeStateStart;
continue;
}
break;
case TokenizeStateFirstR:
switch (c) {
case '"':
@ -1131,6 +1145,7 @@ void tokenize(Buf *buf, Tokenization *out) {
case TokenizeStateSawGreaterThanGreaterThan:
case TokenizeStateSawDot:
case TokenizeStateSawQuestionMark:
case TokenizeStateSawAtSign:
end_token(&t);
break;
case TokenizeStateSawDotDot:

View File

@ -1394,6 +1394,14 @@ void foo(void (__cdecl *fn_ptr)(void));
add_parseh_case("comment after integer literal", R"SOURCE(
#define SDL_INIT_VIDEO 0x00000020 /**< SDL_INIT_VIDEO implies SDL_INIT_EVENTS */
)SOURCE", 1, "pub const SDL_INIT_VIDEO = 32;");
add_parseh_case("zig keywords in C code", R"SOURCE(
struct type {
int defer;
};
)SOURCE", 2, R"(export struct struct_type {
@"defer": c_int,
})", R"(pub const @"type" = struct_type;)");
}
static void run_self_hosted_test(void) {

View File

@ -1295,3 +1295,7 @@ struct EmptyStruct {
#static_eval_enable(false)
fn method(es: EmptyStruct) -> i32 { 1234 }
}
#attribute("test")
fn @"weird function name"() { }