From 925c805d4b2431b1c3140ba9c9dd7fc81de8a7d7 Mon Sep 17 00:00:00 2001 From: Andrew Kelley Date: Tue, 24 Nov 2015 13:37:14 -0700 Subject: [PATCH] add unreachable expression now creating .o file from hello.zig correctly --- README.md | 6 +-- src/codegen.cpp | 99 +++++++++++++++++++++++++++++++++-------------- src/main.cpp | 4 +- src/parser.cpp | 18 +++++++-- src/parser.hpp | 1 + src/tokenizer.cpp | 3 ++ src/tokenizer.hpp | 1 + test/hello.zig | 4 +- 8 files changed, 95 insertions(+), 41 deletions(-) diff --git a/README.md b/README.md index 27842c1fac..6c8611a366 100644 --- a/README.md +++ b/README.md @@ -40,8 +40,6 @@ readable, safe, optimal, and concise code to solve any computing problem. ## Roadmap - * Hello, world. - - Produce .o file. * Produce executable file instead of .o file. * Add debugging symbols. * Debug/Release mode. @@ -87,7 +85,7 @@ ParamDeclList : token(LParen) list(ParamDecl, token(Comma)) token(RParen) ParamDecl : token(Symbol) token(Colon) Type -Type : token(Symbol) | PointerType +Type : token(Symbol) | PointerType | token(Unreachable) PointerType : token(Star) token(Const) Type | token(Star) token(Mut) Type @@ -99,7 +97,7 @@ ExpressionStatement : Expression token(Semicolon) ReturnStatement : token(Return) Expression token(Semicolon) -Expression : token(Number) | token(String) | FnCall +Expression : token(Number) | token(String) | token(Unreachable) | FnCall FnCall : token(Symbol) token(LParen) list(Expression, token(Comma)) token(RParen) ``` diff --git a/src/codegen.cpp b/src/codegen.cpp index 0577e98c59..5669408be5 100644 --- a/src/codegen.cpp +++ b/src/codegen.cpp @@ -26,9 +26,14 @@ struct CodeGen { HashMap str_table; }; +struct TypeNode { + LLVMTypeRef type_ref; + bool is_unreachable; +}; + struct CodeGenNode { union { - LLVMTypeRef type_ref; // for NodeTypeType + TypeNode type_node; // for NodeTypeType } data; }; @@ -54,9 +59,16 @@ static void add_node_error(CodeGen *g, AstNode *node, Buf *msg) { static LLVMTypeRef to_llvm_type(AstNode *type_node) { assert(type_node->type == NodeTypeType); assert(type_node->codegen_node); - assert(type_node->codegen_node->data.type_ref); + assert(type_node->codegen_node->data.type_node.type_ref); - return type_node->codegen_node->data.type_ref; + return type_node->codegen_node->data.type_node.type_ref; +} + + +static bool type_is_unreachable(AstNode *type_node) { + assert(type_node->type == NodeTypeType); + return type_node->data.type.type == AstNodeTypeTypePrimitive && + buf_eql_str(&type_node->data.type.primitive_name, "unreachable"); } static void analyze_node(CodeGen *g, AstNode *node) { @@ -83,13 +95,18 @@ static void analyze_node(CodeGen *g, AstNode *node) { AstNode *param_type = param_node->data.param_decl.type; fn_param_values[param_i] = to_llvm_type(param_type); } - LLVMTypeRef return_type = to_llvm_type(fn_proto->data.fn_proto.return_type); + AstNode *return_type_node = fn_proto->data.fn_proto.return_type; + LLVMTypeRef return_type = to_llvm_type(return_type_node); LLVMTypeRef fn_type = LLVMFunctionType(return_type, fn_param_values, params->length, 0); LLVMValueRef fn_val = LLVMAddFunction(g->mod, buf_ptr(name), fn_type); LLVMSetLinkage(fn_val, LLVMExternalLinkage); LLVMSetFunctionCallConv(fn_val, LLVMCCallConv); + if (type_is_unreachable(return_type_node)) { + LLVMAddFunctionAttr(fn_val, LLVMNoReturnAttribute); + } + FnTableEntry *fn_table_entry = allocate(1); fn_table_entry->fn_value = fn_val; fn_table_entry->proto_node = fn_proto; @@ -131,33 +148,43 @@ static void analyze_node(CodeGen *g, AstNode *node) { analyze_node(g, node->data.param_decl.type); break; case NodeTypeType: - node->codegen_node = allocate(1); - switch (node->data.type.type) { - case AstNodeTypeTypePrimitive: - { - Buf *name = &node->data.type.primitive_name; - if (buf_eql_str(name, "u8")) { - node->codegen_node->data.type_ref = LLVMInt8Type(); - } else if (buf_eql_str(name, "i32")) { - node->codegen_node->data.type_ref = LLVMInt32Type(); - } else if (buf_eql_str(name, "void")) { - node->codegen_node->data.type_ref = LLVMVoidType(); - } else { - add_node_error(g, node, - buf_sprintf("invalid type name: '%s'", buf_ptr(name))); - node->codegen_node->data.type_ref = LLVMInt8Type(); + { + node->codegen_node = allocate(1); + TypeNode *type_node = &node->codegen_node->data.type_node; + switch (node->data.type.type) { + case AstNodeTypeTypePrimitive: + { + Buf *name = &node->data.type.primitive_name; + if (buf_eql_str(name, "u8")) { + type_node->type_ref = LLVMInt8Type(); + } else if (buf_eql_str(name, "i32")) { + type_node->type_ref = LLVMInt32Type(); + } else if (buf_eql_str(name, "void")) { + type_node->type_ref = LLVMVoidType(); + } else if (buf_eql_str(name, "unreachable")) { + type_node->type_ref = LLVMVoidType(); + type_node->is_unreachable = true; + } else { + add_node_error(g, node, + buf_sprintf("invalid type name: '%s'", buf_ptr(name))); + type_node->type_ref = LLVMVoidType(); + } + break; } - break; - } - case AstNodeTypeTypePointer: - { - analyze_node(g, node->data.type.child_type); - node->codegen_node->data.type_ref = LLVMPointerType( - node->data.type.child_type->codegen_node->data.type_ref, 0); - break; - } + case AstNodeTypeTypePointer: + { + analyze_node(g, node->data.type.child_type); + TypeNode *child_type_node = &node->data.type.child_type->codegen_node->data.type_node; + if (child_type_node->is_unreachable) { + add_node_error(g, node, + buf_create_from_str("pointer to unreachable not allowed")); + } + type_node->type_ref = LLVMPointerType(child_type_node->type_ref, 0); + break; + } + } + break; } - break; case NodeTypeBlock: for (int i = 0; i < node->data.block.statements.length; i += 1) { AstNode *child = node->data.block.statements.at(i); @@ -183,6 +210,8 @@ static void analyze_node(CodeGen *g, AstNode *node) { case AstNodeExpressionTypeFnCall: analyze_node(g, node->data.expression.data.fn_call); break; + case AstNodeExpressionTypeUnreachable: + break; } break; case NodeTypeFnCall: @@ -235,7 +264,11 @@ static LLVMValueRef gen_fn_call(CodeGen *g, AstNode *fn_call_node) { LLVMValueRef result = LLVMBuildCall(g->builder, fn_table_entry->fn_value, param_values, actual_param_count, ""); - return result; + if (type_is_unreachable(fn_table_entry->proto_node->data.fn_proto.return_type)) { + return LLVMBuildUnreachable(g->builder); + } else { + return result; + } } static LLVMValueRef find_or_create_string(CodeGen *g, Buf *str) { @@ -280,6 +313,8 @@ static LLVMValueRef gen_expr(CodeGen *g, AstNode *expr_node) { } case AstNodeExpressionTypeFnCall: return gen_fn_call(g, expr_node->data.expression.data.fn_call); + case AstNodeExpressionTypeUnreachable: + return LLVMBuildUnreachable(g->builder); } zig_unreachable(); } @@ -333,6 +368,10 @@ void code_gen(CodeGen *g) { LLVMTypeRef function_type = LLVMFunctionType(ret_type, param_types, fn_proto->params.length, 0); LLVMValueRef fn = LLVMAddFunction(g->mod, buf_ptr(&fn_proto->name), function_type); + if (type_is_unreachable(fn_proto->return_type)) { + LLVMAddFunctionAttr(fn, LLVMNoReturnAttribute); + } + LLVMBasicBlockRef entry_block = LLVMAppendBasicBlock(fn, "entry"); LLVMPositionBuilderAtEnd(g->builder, entry_block); diff --git a/src/main.cpp b/src/main.cpp index 6ef75d2094..bb2ed78a6c 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -102,7 +102,8 @@ static int build(const char *arg0, const char *in_file, const char *out_file, Zi } else { for (int i = 0; i < errors->length; i += 1) { ErrorMsg *err = &errors->at(i); - fprintf(stderr, "Error: Line %d, column %d: %s\n", err->line_start, err->column_start, + fprintf(stderr, "Error: Line %d, column %d: %s\n", + err->line_start + 1, err->column_start + 1, buf_ptr(err->msg)); } return 1; @@ -115,6 +116,7 @@ static int build(const char *arg0, const char *in_file, const char *out_file, Zi fprintf(stderr, "\nLink:\n"); fprintf(stderr, "------------------\n"); code_gen_link(codegen, false, out_file); + fprintf(stderr, "OK\n"); return 0; } diff --git a/src/parser.cpp b/src/parser.cpp index 72052e414a..9de20e9c9c 100644 --- a/src/parser.cpp +++ b/src/parser.cpp @@ -236,7 +236,7 @@ static void ast_expect_token(ParseContext *pc, Token *token, TokenId token_id) { } /* -Type : token(Symbol) | PointerType; +Type : token(Symbol) | PointerType | token(Unreachable) PointerType : token(Star) token(Const) Type | token(Star) token(Mut) Type; */ static AstNode *ast_parse_type(ParseContext *pc, int token_index, int *new_token_index) { @@ -245,7 +245,10 @@ static AstNode *ast_parse_type(ParseContext *pc, int token_index, int *new_token AstNode *node = ast_create_node(NodeTypeType, token); - if (token->id == TokenIdSymbol) { + if (token->id == TokenIdKeywordUnreachable) { + node->data.type.type = AstNodeTypeTypePrimitive; + buf_init_from_str(&node->data.type.primitive_name, "unreachable"); + } else if (token->id == TokenIdSymbol) { node->data.type.type = AstNodeTypeTypePrimitive; ast_buf_from_token(pc, token, &node->data.type.primitive_name); } else if (token->id == TokenIdStar) { @@ -373,10 +376,16 @@ static AstNode *ast_parse_fn_call(ParseContext *pc, int token_index, int *new_to return node; } +/* +Expression : token(Number) | token(String) | token(Unreachable) | FnCall +*/ static AstNode *ast_parse_expression(ParseContext *pc, int token_index, int *new_token_index) { Token *token = &pc->tokens->at(token_index); AstNode *node = ast_create_node(NodeTypeExpression, token); - if (token->id == TokenIdSymbol) { + if (token->id == TokenIdKeywordUnreachable) { + node->data.expression.type = AstNodeExpressionTypeUnreachable; + token_index += 1; + } else if (token->id == TokenIdSymbol) { node->data.expression.type = AstNodeExpressionTypeFnCall; node->data.expression.data.fn_call = ast_parse_fn_call(pc, token_index, &token_index); } else if (token->id == TokenIdNumberLiteral) { @@ -402,7 +411,7 @@ ExpressionStatement : Expression token(Semicolon) ; ReturnStatement : token(Return) Expression token(Semicolon) ; -Expression : token(Number) | token(String) | FnCall ; +Expression : token(Number) | token(String) | token(Unreachable) | FnCall FnCall : token(Symbol) token(LParen) list(Expression, token(Comma)) token(RParen) ; */ @@ -420,6 +429,7 @@ static AstNode *ast_parse_statement(ParseContext *pc, int token_index, int *new_ ast_expect_token(pc, semicolon, TokenIdSemicolon); } else if (token->id == TokenIdSymbol || token->id == TokenIdStringLiteral || + token->id == TokenIdKeywordUnreachable || token->id == TokenIdNumberLiteral) { node->data.statement.type = AstNodeStatementTypeExpression; diff --git a/src/parser.hpp b/src/parser.hpp index 501749eea9..b82c0b6ec8 100644 --- a/src/parser.hpp +++ b/src/parser.hpp @@ -94,6 +94,7 @@ enum AstNodeExpressionType { AstNodeExpressionTypeNumber, AstNodeExpressionTypeString, AstNodeExpressionTypeFnCall, + AstNodeExpressionTypeUnreachable, }; struct AstNodeExpression { diff --git a/src/tokenizer.cpp b/src/tokenizer.cpp index fafc5b1f38..fd28f73b8c 100644 --- a/src/tokenizer.cpp +++ b/src/tokenizer.cpp @@ -152,6 +152,8 @@ static void end_token(Tokenize *t) { t->cur_tok->id = TokenIdKeywordConst; } else if (mem_eql_str(token_mem, token_len, "extern")) { t->cur_tok->id = TokenIdKeywordExtern; + } else if (mem_eql_str(token_mem, token_len, "unreachable")) { + t->cur_tok->id = TokenIdKeywordUnreachable; } t->cur_tok = nullptr; @@ -311,6 +313,7 @@ static const char * token_name(Token *token) { case TokenIdKeywordMut: return "Mut"; case TokenIdKeywordReturn: return "Return"; case TokenIdKeywordExtern: return "Extern"; + case TokenIdKeywordUnreachable: return "Unreachable"; case TokenIdLParen: return "LParen"; case TokenIdRParen: return "RParen"; case TokenIdComma: return "Comma"; diff --git a/src/tokenizer.hpp b/src/tokenizer.hpp index 5d3a619ef9..a7ce7c637f 100644 --- a/src/tokenizer.hpp +++ b/src/tokenizer.hpp @@ -18,6 +18,7 @@ enum TokenId { TokenIdKeywordMut, TokenIdKeywordConst, TokenIdKeywordExtern, + TokenIdKeywordUnreachable, TokenIdLParen, TokenIdRParen, TokenIdComma, diff --git a/test/hello.zig b/test/hello.zig index 1d96b23766..6c52f81108 100644 --- a/test/hello.zig +++ b/test/hello.zig @@ -1,9 +1,9 @@ extern { fn puts(s: *mut u8) -> i32; - fn exit(code: i32); + fn exit(code: i32) -> unreachable; } -fn _start() { +fn _start() -> unreachable { puts("Hello, world!"); exit(0); }