mirror of
https://github.com/ziglang/zig.git
synced 2025-12-27 08:33:15 +00:00
stage1: memoize strings in the AST
Currently, stage1 runs astgen for every comptime function call, resulting in identifier strings being allocated multiple times, wasting memory. As a workaround until the code is adjusted to make astgen run only once per source node, we memoize the result into the AST. * Rename `ir_gen_*` to `astgen_*` - Oops, meant to do this in a separate commit. My bad. * tokenizer: avoid using designated initializer syntax. MSVC does not support it.
This commit is contained in:
parent
52b3daa90e
commit
f5d4fe3e17
@ -1123,6 +1123,14 @@ struct AstNodeContainerInitExpr {
|
||||
ContainerInitKind kind;
|
||||
};
|
||||
|
||||
struct AstNodeIdentifier {
|
||||
Buf *name;
|
||||
};
|
||||
|
||||
struct AstNodeEnumLiteral {
|
||||
Buf *name;
|
||||
};
|
||||
|
||||
struct AstNodeBoolLiteral {
|
||||
bool value;
|
||||
};
|
||||
@ -1204,6 +1212,12 @@ struct AstNode {
|
||||
AstNodeAwaitExpr await_expr;
|
||||
AstNodeSuspend suspend;
|
||||
AstNodeAnyFrameType anyframe_type;
|
||||
|
||||
// These are part of an astgen workaround to use less memory by
|
||||
// memoizing into the AST. Once astgen is modified to only run once
|
||||
// per corresponding source, this workaround can be removed.
|
||||
AstNodeIdentifier identifier;
|
||||
AstNodeEnumLiteral enum_literal;
|
||||
} data;
|
||||
|
||||
// This is a function for use in the debugger to print
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@ -202,6 +202,19 @@ static void put_back_token(ParseContext *pc) {
|
||||
pc->current_token -= 1;
|
||||
}
|
||||
|
||||
static Buf *token_string_literal_buf(RootStruct *root_struct, TokenIndex token) {
|
||||
Error err;
|
||||
assert(root_struct->token_ids[token] == TokenIdStringLiteral);
|
||||
const char *source = buf_ptr(root_struct->source_code);
|
||||
size_t byte_offset = root_struct->token_locs[token].offset;
|
||||
size_t bad_index;
|
||||
Buf *str = buf_alloc();
|
||||
if ((err = source_string_literal_buf(source + byte_offset, str, &bad_index))) {
|
||||
zig_panic("TODO handle string literal parse error");
|
||||
}
|
||||
return str;
|
||||
}
|
||||
|
||||
static Buf *token_buf(ParseContext *pc, TokenIndex token) {
|
||||
if (token == 0)
|
||||
return nullptr;
|
||||
@ -3465,19 +3478,6 @@ Error source_char_literal(const char *source, uint32_t *result, size_t *bad_inde
|
||||
}
|
||||
|
||||
|
||||
Buf *token_string_literal_buf(RootStruct *root_struct, TokenIndex token) {
|
||||
Error err;
|
||||
assert(root_struct->token_ids[token] == TokenIdStringLiteral);
|
||||
const char *source = buf_ptr(root_struct->source_code);
|
||||
size_t byte_offset = root_struct->token_locs[token].offset;
|
||||
size_t bad_index;
|
||||
Buf *str = buf_alloc();
|
||||
if ((err = source_string_literal_buf(source + byte_offset, str, &bad_index))) {
|
||||
zig_panic("TODO handle string literal parse error");
|
||||
}
|
||||
return str;
|
||||
}
|
||||
|
||||
Buf *token_identifier_buf(RootStruct *root_struct, TokenIndex token) {
|
||||
Error err;
|
||||
const char *source = buf_ptr(root_struct->source_code);
|
||||
@ -3515,14 +3515,15 @@ Buf *token_identifier_buf(RootStruct *root_struct, TokenIndex token) {
|
||||
|
||||
Buf *node_identifier_buf(AstNode *node) {
|
||||
assert(node->type == NodeTypeIdentifier);
|
||||
RootStruct *root_struct = node->owner->data.structure.root_struct;
|
||||
return token_identifier_buf(root_struct, node->main_token);
|
||||
}
|
||||
|
||||
Buf *node_string_literal_buf(AstNode *node) {
|
||||
assert(node->type == NodeTypeStringLiteral);
|
||||
RootStruct *root_struct = node->owner->data.structure.root_struct;
|
||||
return token_string_literal_buf(root_struct, node->main_token);
|
||||
// Currently, stage1 runs astgen for every comptime function call,
|
||||
// resulting the allocation here wasting memory. As a workaround until
|
||||
// the code is adjusted to make astgen run only once per source node,
|
||||
// we memoize the result into the AST here.
|
||||
if (node->data.identifier.name == nullptr) {
|
||||
RootStruct *root_struct = node->owner->data.structure.root_struct;
|
||||
node->data.identifier.name = token_identifier_buf(root_struct, node->main_token);
|
||||
}
|
||||
return node->data.identifier.name;
|
||||
}
|
||||
|
||||
void token_number_literal_bigint(RootStruct *root_struct, BigInt *result, TokenIndex token) {
|
||||
|
||||
@ -19,10 +19,8 @@ void ast_print(AstNode *node, int indent);
|
||||
void ast_visit_node_children(AstNode *node, void (*visit)(AstNode **, void *context), void *context);
|
||||
|
||||
Buf *node_identifier_buf(AstNode *node);
|
||||
Buf *node_string_literal_buf(AstNode *node);
|
||||
|
||||
Buf *token_identifier_buf(RootStruct *root_struct, TokenIndex token);
|
||||
Buf *token_string_literal_buf(RootStruct *root_struct, TokenIndex token);
|
||||
|
||||
void token_number_literal_bigint(RootStruct *root_struct, BigInt *result, TokenIndex token);
|
||||
|
||||
|
||||
@ -291,11 +291,11 @@ static void tokenize_error(Tokenize *t, const char *format, ...) {
|
||||
|
||||
static void begin_token(Tokenize *t, TokenId id) {
|
||||
t->out->ids.append(id);
|
||||
t->out->locs.append({
|
||||
.offset = (uint32_t) t->pos,
|
||||
.line = t->line,
|
||||
.column = t->column,
|
||||
});
|
||||
TokenLoc tok_loc;
|
||||
tok_loc.offset = (uint32_t) t->pos;
|
||||
tok_loc.line = t->line;
|
||||
tok_loc.column = t->column;
|
||||
t->out->locs.append(tok_loc);
|
||||
}
|
||||
|
||||
static void cancel_token(Tokenize *t) {
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user