stage1: memoize strings in the AST

Currently, stage1 runs astgen for every comptime function call,
resulting in identifier strings being allocated multiple times,
wasting memory. As a workaround until the code is adjusted to
make astgen run only once per source node, we memoize the
result into the AST.

 * Rename `ir_gen_*` to `astgen_*`
   - Oops, meant to do this in a separate commit. My bad.
 * tokenizer: avoid using designated initializer syntax.
   MSVC does not support it.
This commit is contained in:
Andrew Kelley 2021-05-28 15:22:03 -07:00
parent 52b3daa90e
commit f5d4fe3e17
5 changed files with 489 additions and 467 deletions

View File

@ -1123,6 +1123,14 @@ struct AstNodeContainerInitExpr {
ContainerInitKind kind;
};
struct AstNodeIdentifier {
Buf *name;
};
struct AstNodeEnumLiteral {
Buf *name;
};
struct AstNodeBoolLiteral {
bool value;
};
@ -1204,6 +1212,12 @@ struct AstNode {
AstNodeAwaitExpr await_expr;
AstNodeSuspend suspend;
AstNodeAnyFrameType anyframe_type;
// These are part of an astgen workaround to use less memory by
// memoizing into the AST. Once astgen is modified to only run once
// per corresponding source, this workaround can be removed.
AstNodeIdentifier identifier;
AstNodeEnumLiteral enum_literal;
} data;
// This is a function for use in the debugger to print

File diff suppressed because it is too large Load Diff

View File

@ -202,6 +202,19 @@ static void put_back_token(ParseContext *pc) {
pc->current_token -= 1;
}
static Buf *token_string_literal_buf(RootStruct *root_struct, TokenIndex token) {
Error err;
assert(root_struct->token_ids[token] == TokenIdStringLiteral);
const char *source = buf_ptr(root_struct->source_code);
size_t byte_offset = root_struct->token_locs[token].offset;
size_t bad_index;
Buf *str = buf_alloc();
if ((err = source_string_literal_buf(source + byte_offset, str, &bad_index))) {
zig_panic("TODO handle string literal parse error");
}
return str;
}
static Buf *token_buf(ParseContext *pc, TokenIndex token) {
if (token == 0)
return nullptr;
@ -3465,19 +3478,6 @@ Error source_char_literal(const char *source, uint32_t *result, size_t *bad_inde
}
Buf *token_string_literal_buf(RootStruct *root_struct, TokenIndex token) {
Error err;
assert(root_struct->token_ids[token] == TokenIdStringLiteral);
const char *source = buf_ptr(root_struct->source_code);
size_t byte_offset = root_struct->token_locs[token].offset;
size_t bad_index;
Buf *str = buf_alloc();
if ((err = source_string_literal_buf(source + byte_offset, str, &bad_index))) {
zig_panic("TODO handle string literal parse error");
}
return str;
}
Buf *token_identifier_buf(RootStruct *root_struct, TokenIndex token) {
Error err;
const char *source = buf_ptr(root_struct->source_code);
@ -3515,14 +3515,15 @@ Buf *token_identifier_buf(RootStruct *root_struct, TokenIndex token) {
Buf *node_identifier_buf(AstNode *node) {
assert(node->type == NodeTypeIdentifier);
RootStruct *root_struct = node->owner->data.structure.root_struct;
return token_identifier_buf(root_struct, node->main_token);
}
Buf *node_string_literal_buf(AstNode *node) {
assert(node->type == NodeTypeStringLiteral);
RootStruct *root_struct = node->owner->data.structure.root_struct;
return token_string_literal_buf(root_struct, node->main_token);
// Currently, stage1 runs astgen for every comptime function call,
// resulting the allocation here wasting memory. As a workaround until
// the code is adjusted to make astgen run only once per source node,
// we memoize the result into the AST here.
if (node->data.identifier.name == nullptr) {
RootStruct *root_struct = node->owner->data.structure.root_struct;
node->data.identifier.name = token_identifier_buf(root_struct, node->main_token);
}
return node->data.identifier.name;
}
void token_number_literal_bigint(RootStruct *root_struct, BigInt *result, TokenIndex token) {

View File

@ -19,10 +19,8 @@ void ast_print(AstNode *node, int indent);
void ast_visit_node_children(AstNode *node, void (*visit)(AstNode **, void *context), void *context);
Buf *node_identifier_buf(AstNode *node);
Buf *node_string_literal_buf(AstNode *node);
Buf *token_identifier_buf(RootStruct *root_struct, TokenIndex token);
Buf *token_string_literal_buf(RootStruct *root_struct, TokenIndex token);
void token_number_literal_bigint(RootStruct *root_struct, BigInt *result, TokenIndex token);

View File

@ -291,11 +291,11 @@ static void tokenize_error(Tokenize *t, const char *format, ...) {
static void begin_token(Tokenize *t, TokenId id) {
t->out->ids.append(id);
t->out->locs.append({
.offset = (uint32_t) t->pos,
.line = t->line,
.column = t->column,
});
TokenLoc tok_loc;
tok_loc.offset = (uint32_t) t->pos;
tok_loc.line = t->line;
tok_loc.column = t->column;
t->out->locs.append(tok_loc);
}
static void cancel_token(Tokenize *t) {