From f6eecfe5f486b250de33a8129b1c85198be43280 Mon Sep 17 00:00:00 2001 From: Josh Wolfe Date: Mon, 7 Dec 2015 08:29:19 -0700 Subject: [PATCH] getting started on array types --- README.md | 6 +++-- src/analyze.cpp | 55 ++++++++++++++++++++++++++++++++++++++++--- src/analyze.hpp | 1 + src/codegen.cpp | 12 +++++----- src/parser.cpp | 35 +++++++++++++++++++-------- src/parser.hpp | 2 ++ src/semantic_info.hpp | 3 +++ src/tokenizer.cpp | 10 ++++++++ src/tokenizer.hpp | 2 ++ src/util.cpp | 7 ++++++ src/util.hpp | 4 ++++ 11 files changed, 116 insertions(+), 21 deletions(-) diff --git a/README.md b/README.md index f5f6af7c2e..b8ab541244 100644 --- a/README.md +++ b/README.md @@ -144,9 +144,11 @@ ParamDeclList : token(LParen) list(ParamDecl, token(Comma)) token(RParen) ParamDecl : token(Symbol) token(Colon) Type -Type : token(Symbol) | PointerType | token(Unreachable) +Type : token(Symbol) | token(Unreachable) | token(Void) | PointerType | ArrayType -PointerType : token(Star) token(Const) Type | token(Star) token(Mut) Type +PointerType : token(Star) (token(Const) | token(Mut)) Type + +ArrayType : token(LBracket) Type token(Semicolon) Expression token(RBracket) Block : token(LBrace) list(option(Statement), token(Semicolon)) token(RBrace) diff --git a/src/analyze.cpp b/src/analyze.cpp index a626649323..c62aea8bcc 100644 --- a/src/analyze.cpp +++ b/src/analyze.cpp @@ -48,18 +48,23 @@ static void set_root_export_version(CodeGen *g, Buf *version_buf, AstNode *node) } } +TypeTableEntry *new_type_table_entry() { + TypeTableEntry *entry = allocate(1); + entry->arrays_by_size.init(2); + return entry; +} + TypeTableEntry *get_pointer_to_type(CodeGen *g, TypeTableEntry *child_type, bool is_const) { TypeTableEntry **parent_pointer = is_const ? &child_type->pointer_const_parent : &child_type->pointer_mut_parent; - const char *const_or_mut_str = is_const ? "const" : "mut"; if (*parent_pointer) { return *parent_pointer; } else { - TypeTableEntry *entry = allocate(1); + TypeTableEntry *entry = new_type_table_entry(); entry->type_ref = LLVMPointerType(child_type->type_ref, 0); buf_resize(&entry->name, 0); - buf_appendf(&entry->name, "*%s %s", const_or_mut_str, buf_ptr(&child_type->name)); + buf_appendf(&entry->name, "*%s %s", is_const ? "const" : "mut", buf_ptr(&child_type->name)); entry->di_type = LLVMZigCreateDebugPointerType(g->dbuilder, child_type->di_type, g->pointer_size_bytes * 8, g->pointer_size_bytes * 8, buf_ptr(&entry->name)); g->type_table.put(&entry->name, entry); @@ -68,6 +73,28 @@ TypeTableEntry *get_pointer_to_type(CodeGen *g, TypeTableEntry *child_type, bool } } +static TypeTableEntry *get_array_type(CodeGen *g, TypeTableEntry *child_type, int array_size) { + auto existing_entry = child_type->arrays_by_size.maybe_get(array_size); + if (existing_entry) { + return existing_entry->value; + } else { + TypeTableEntry *entry = new_type_table_entry(); + entry->type_ref = LLVMArrayType(child_type->type_ref, array_size); + buf_resize(&entry->name, 0); + buf_appendf(&entry->name, "[%s; %ud]", buf_ptr(&child_type->name), array_size); + //entry->di_type = LLVMZigCreateDebugArrayType(g->dbuilder, ..., buf_ptr(&entry->name)); // TODO + + g->type_table.put(&entry->name, entry); + child_type->arrays_by_size.put(array_size, entry); + return entry; + } +} + +static int parse_int(Buf *number) { + // TODO: think about integer size of array sizes + return atoi(buf_ptr(number)); +} + static TypeTableEntry *resolve_type(CodeGen *g, AstNode *node) { assert(node->type == NodeTypeType); assert(!node->codegen_node); @@ -98,6 +125,28 @@ static TypeTableEntry *resolve_type(CodeGen *g, AstNode *node) { type_node->entry = get_pointer_to_type(g, child_type, node->data.type.is_const); return type_node->entry; } + case AstNodeTypeTypeArray: + { + resolve_type(g, node->data.type.child_type); + TypeTableEntry *child_type = node->data.type.child_type->codegen_node->data.type_node.entry; + if (child_type == g->builtin_types.entry_unreachable) { + add_node_error(g, node, + buf_create_from_str("array of unreachable not allowed")); + } + + AstNode *size_node = node->data.type.array_size; + int size; // TODO: think about integer size of array sizes + if (size_node->type != NodeTypeNumberLiteral) { + add_node_error(g, size_node, + buf_create_from_str("array size must be literal number")); + size = -1; + } else { + size = parse_int(&size_node->data.number); + } + + type_node->entry = get_array_type(g, child_type, size); // TODO + return type_node->entry; + } } zig_unreachable(); } diff --git a/src/analyze.hpp b/src/analyze.hpp index 839b88f53c..b398d8e22c 100644 --- a/src/analyze.hpp +++ b/src/analyze.hpp @@ -18,6 +18,7 @@ struct BlockContext; void semantic_analyze(CodeGen *g); void add_node_error(CodeGen *g, AstNode *node, Buf *msg); +TypeTableEntry *new_type_table_entry(); TypeTableEntry *get_pointer_to_type(CodeGen *g, TypeTableEntry *child_type, bool is_const); LocalVariableTableEntry *find_local_variable(BlockContext *context, Buf *name); diff --git a/src/codegen.cpp b/src/codegen.cpp index e1a5525e37..ce3eb174ef 100644 --- a/src/codegen.cpp +++ b/src/codegen.cpp @@ -781,12 +781,12 @@ static void do_code_gen(CodeGen *g) { static void define_primitive_types(CodeGen *g) { { // if this type is anywhere in the AST, we should never hit codegen. - TypeTableEntry *entry = allocate(1); + TypeTableEntry *entry = new_type_table_entry(); buf_init_from_str(&entry->name, "(invalid)"); g->builtin_types.entry_invalid = entry; } { - TypeTableEntry *entry = allocate(1); + TypeTableEntry *entry = new_type_table_entry(); entry->type_ref = LLVMInt1Type(); buf_init_from_str(&entry->name, "bool"); entry->di_type = LLVMZigCreateDebugBasicType(g->dbuilder, buf_ptr(&entry->name), 1, 8, @@ -795,7 +795,7 @@ static void define_primitive_types(CodeGen *g) { g->builtin_types.entry_bool = entry; } { - TypeTableEntry *entry = allocate(1); + TypeTableEntry *entry = new_type_table_entry(); entry->type_ref = LLVMInt8Type(); buf_init_from_str(&entry->name, "u8"); entry->di_type = LLVMZigCreateDebugBasicType(g->dbuilder, buf_ptr(&entry->name), 8, 8, @@ -805,7 +805,7 @@ static void define_primitive_types(CodeGen *g) { } g->builtin_types.entry_string_literal = get_pointer_to_type(g, g->builtin_types.entry_u8, true); { - TypeTableEntry *entry = allocate(1); + TypeTableEntry *entry = new_type_table_entry(); entry->type_ref = LLVMInt32Type(); buf_init_from_str(&entry->name, "i32"); entry->di_type = LLVMZigCreateDebugBasicType(g->dbuilder, buf_ptr(&entry->name), 32, 32, @@ -814,7 +814,7 @@ static void define_primitive_types(CodeGen *g) { g->builtin_types.entry_i32 = entry; } { - TypeTableEntry *entry = allocate(1); + TypeTableEntry *entry = new_type_table_entry(); entry->type_ref = LLVMVoidType(); buf_init_from_str(&entry->name, "void"); entry->di_type = LLVMZigCreateDebugBasicType(g->dbuilder, buf_ptr(&entry->name), 0, 0, @@ -823,7 +823,7 @@ static void define_primitive_types(CodeGen *g) { g->builtin_types.entry_void = entry; } { - TypeTableEntry *entry = allocate(1); + TypeTableEntry *entry = new_type_table_entry(); entry->type_ref = LLVMVoidType(); buf_init_from_str(&entry->name, "unreachable"); entry->di_type = g->builtin_types.entry_void->di_type; diff --git a/src/parser.cpp b/src/parser.cpp index fc47c7ad64..d6f9b96a8d 100644 --- a/src/parser.cpp +++ b/src/parser.cpp @@ -180,6 +180,13 @@ void ast_print(AstNode *node, int indent) { ast_print(node->data.type.child_type, indent + 2); break; } + case AstNodeTypeTypeArray: + { + fprintf(stderr, "ArrayType\n"); + ast_print(node->data.type.child_type, indent + 2); + ast_print(node->data.type.array_size, indent + 2); + break; + } } break; case NodeTypeReturnExpr: @@ -448,8 +455,9 @@ static void ast_parse_directives(ParseContext *pc, int *token_index, /* -Type : token(Symbol) | PointerType | token(Unreachable) -PointerType : token(Star) token(Const) Type | token(Star) token(Mut) Type; +Type : token(Symbol) | token(Unreachable) | token(Void) | PointerType | ArrayType +PointerType : token(Star) (token(Const) | token(Mut)) Type +ArrayType : token(LBracket) Type token(Semicolon) token(Number) token(RBracket) */ static AstNode *ast_parse_type(ParseContext *pc, int token_index, int *new_token_index) { Token *token = &pc->tokens->at(token_index); @@ -463,12 +471,6 @@ static AstNode *ast_parse_type(ParseContext *pc, int token_index, int *new_token } else if (token->id == TokenIdKeywordVoid) { node->data.type.type = AstNodeTypeTypePrimitive; buf_init_from_str(&node->data.type.primitive_name, "void"); - } else if (token->id == TokenIdKeywordTrue) { - node->data.type.type = AstNodeTypeTypePrimitive; - buf_init_from_str(&node->data.type.primitive_name, "true"); - } else if (token->id == TokenIdKeywordFalse) { - node->data.type.type = AstNodeTypeTypePrimitive; - buf_init_from_str(&node->data.type.primitive_name, "false"); } else if (token->id == TokenIdSymbol) { node->data.type.type = AstNodeTypeTypePrimitive; ast_buf_from_token(pc, token, &node->data.type.primitive_name); @@ -485,6 +487,20 @@ static AstNode *ast_parse_type(ParseContext *pc, int token_index, int *new_token } node->data.type.child_type = ast_parse_type(pc, token_index, &token_index); + } else if (token->id == TokenIdLBracket) { + node->data.type.type = AstNodeTypeTypeArray; + + node->data.type.child_type = ast_parse_type(pc, token_index, &token_index); + + Token *semicolon_token = &pc->tokens->at(token_index); + token_index += 1; + ast_expect_token(pc, semicolon_token, TokenIdSemicolon); + + node->data.type.array_size = ast_parse_expression(pc, &token_index, true); + + Token *rbracket_token = &pc->tokens->at(token_index); + token_index += 1; + ast_expect_token(pc, rbracket_token, TokenIdRBracket); } else { ast_invalid_token_error(pc, token); } @@ -494,8 +510,7 @@ static AstNode *ast_parse_type(ParseContext *pc, int token_index, int *new_token } /* -ParamDecl : token(Symbol) token(Colon) Type { -}; +ParamDecl : token(Symbol) token(Colon) Type */ static AstNode *ast_parse_param_decl(ParseContext *pc, int token_index, int *new_token_index) { Token *param_name = &pc->tokens->at(token_index); diff --git a/src/parser.hpp b/src/parser.hpp index f029991878..adac6e7043 100644 --- a/src/parser.hpp +++ b/src/parser.hpp @@ -81,12 +81,14 @@ struct AstNodeParamDecl { enum AstNodeTypeType { AstNodeTypeTypePrimitive, AstNodeTypeTypePointer, + AstNodeTypeTypeArray, }; struct AstNodeType { AstNodeTypeType type; Buf primitive_name; AstNode *child_type; + AstNode *array_size; bool is_const; }; diff --git a/src/semantic_info.hpp b/src/semantic_info.hpp index 47cb20e92c..0ab0c01b1c 100644 --- a/src/semantic_info.hpp +++ b/src/semantic_info.hpp @@ -23,8 +23,11 @@ struct TypeTableEntry { bool pointer_is_const; int user_defined_id; Buf name; + + // use these fields to make sure we don't duplicate type table entries for the same type TypeTableEntry *pointer_const_parent; TypeTableEntry *pointer_mut_parent; + HashMap arrays_by_size; }; struct ImportTableEntry { diff --git a/src/tokenizer.cpp b/src/tokenizer.cpp index c98078fdf9..5244e4174b 100644 --- a/src/tokenizer.cpp +++ b/src/tokenizer.cpp @@ -259,6 +259,14 @@ void tokenize(Buf *buf, Tokenization *out) { begin_token(&t, TokenIdRBrace); end_token(&t); break; + case '[': + begin_token(&t, TokenIdLBracket); + end_token(&t); + break; + case ']': + begin_token(&t, TokenIdRBracket); + end_token(&t); + break; case ';': begin_token(&t, TokenIdSemicolon); end_token(&t); @@ -601,6 +609,8 @@ static const char * token_name(Token *token) { case TokenIdStar: return "Star"; case TokenIdLBrace: return "LBrace"; case TokenIdRBrace: return "RBrace"; + case TokenIdLBracket: return "LBracket"; + case TokenIdRBracket: return "RBracket"; case TokenIdStringLiteral: return "StringLiteral"; case TokenIdSemicolon: return "Semicolon"; case TokenIdNumberLiteral: return "NumberLiteral"; diff --git a/src/tokenizer.hpp b/src/tokenizer.hpp index d6cd120bd8..f0e4e5613e 100644 --- a/src/tokenizer.hpp +++ b/src/tokenizer.hpp @@ -36,6 +36,8 @@ enum TokenId { TokenIdStar, TokenIdLBrace, TokenIdRBrace, + TokenIdLBracket, + TokenIdRBracket, TokenIdStringLiteral, TokenIdSemicolon, TokenIdNumberLiteral, diff --git a/src/util.cpp b/src/util.cpp index 341b1c354b..cb87ab6243 100644 --- a/src/util.cpp +++ b/src/util.cpp @@ -19,3 +19,10 @@ void zig_panic(const char *format, ...) { va_end(ap); abort(); } + +uint32_t int_hash(int i) { + return *reinterpret_cast(&i); +} +bool int_eq(int a, int b) { + return a == b; +} diff --git a/src/util.hpp b/src/util.hpp index 74fcf85020..d5729e1ba3 100644 --- a/src/util.hpp +++ b/src/util.hpp @@ -9,6 +9,7 @@ #define ZIG_UTIL_HPP #include +#include #include #include @@ -78,4 +79,7 @@ static inline bool mem_eql_str(const char *mem, size_t mem_len, const char *str) return memcmp(mem, str, mem_len) == 0; } +uint32_t int_hash(int i); +bool int_eq(int a, int b); + #endif