diff --git a/README.md b/README.md index df6e0db00f..c69ba73404 100644 --- a/README.md +++ b/README.md @@ -41,7 +41,6 @@ readable, safe, optimal, and concise code to solve any computing problem. ## Roadmap * Hello, world. - - Build AST - Code Gen - Produce .o file. * Produce executable file instead of .o file. diff --git a/src/codegen.cpp b/src/codegen.cpp index 70f4acfde8..16a3ee8a53 100644 --- a/src/codegen.cpp +++ b/src/codegen.cpp @@ -3,10 +3,25 @@ #include +#include + struct CodeGen { AstNode *root; HashMap fn_decls; ZigList errors; + LLVMBuilderRef builder; + HashMap external_fns; +}; + +struct ExpressionNode { + AstNode *type_node; +}; + +struct CodeGenNode { + union { + LLVMTypeRef type_ref; // for NodeTypeType + ExpressionNode expr; // for NodeTypeExpression + } data; }; CodeGen *create_codegen(AstNode *root) { @@ -55,31 +70,165 @@ static void analyze_node(CodeGen *g, AstNode *node) { analyze_node(g, node->data.param_decl.type); break; case NodeTypeType: - break; - case NodeTypePointerType: + node->codegen_node = allocate(1); + switch (node->data.type.type) { + case AstNodeTypeTypePrimitive: + { + Buf *name = &node->data.type.primitive_name; + if (buf_eql_str(name, "u8")) { + node->codegen_node->data.type_ref = LLVMInt8Type(); + } else if (buf_eql_str(name, "i32")) { + node->codegen_node->data.type_ref = LLVMInt32Type(); + } else { + add_node_error(g, node, + buf_sprintf("invalid type name: '%s'", buf_ptr(name))); + } + break; + } + case AstNodeTypeTypePointer: + { + analyze_node(g, node->data.type.child_type); + node->codegen_node->data.type_ref = LLVMPointerType( + node->data.type.child_type->codegen_node->data.type_ref, 0); + break; + } + } break; case NodeTypeBlock: + for (int i = 0; i < node->data.block.statements.length; i += 1) { + AstNode *child = node->data.block.statements.at(i); + analyze_node(g, child); + } break; case NodeTypeStatement: - break; - case NodeTypeExpressionStatement: - break; - case NodeTypeReturnStatement: + switch (node->data.statement.type) { + case AstNodeStatementTypeExpression: + analyze_node(g, node->data.statement.data.expr.expression); + break; + case AstNodeStatementTypeReturn: + analyze_node(g, node->data.statement.data.retrn.expression); + break; + } break; case NodeTypeExpression: + switch (node->data.expression.type) { + case AstNodeExpressionTypeNumber: + break; + case AstNodeExpressionTypeString: + break; + case AstNodeExpressionTypeFnCall: + analyze_node(g, node->data.expression.data.fn_call); + break; + } break; case NodeTypeFnCall: + for (int i = 0; i < node->data.fn_call.params.length; i += 1) { + AstNode *child = node->data.fn_call.params.at(i); + analyze_node(g, child); + } break; } } + +/* TODO external fn + LLVMTypeRef puts_param_types[] = {LLVMPointerType(LLVMInt8Type(), 0)}; + LLVMTypeRef puts_type = LLVMFunctionType(LLVMInt32Type(), puts_param_types, 1, 0); + LLVMValueRef puts_fn = LLVMAddFunction(mod, "puts", puts_type); + LLVMSetLinkage(puts_fn, LLVMExternalLinkage); + */ + void semantic_analyze(CodeGen *g) { // Pass 1. analyze_node(g, g->root); } -void code_gen(CodeGen *g) { +static LLVMTypeRef to_llvm_type(AstNode *type_node) { + assert(type_node->type == NodeTypeType); + assert(type_node->codegen_node); + return type_node->codegen_node->data.type_ref; +} + +static LLVMValueRef gen_fn_call(CodeGen *g, AstNode *fn_call_node) { + assert(fn_call_node->type == NodeTypeFnCall); + + zig_panic("TODO support external fn declarations"); + //LLVMTypeRef fn_type = LLVMFunctionType(LLVMVoidType(), ); + + // resolve function name + //LLVMValueRef result = LLVMBuildCall(g->builder, + + + //return value; +} + +static LLVMValueRef gen_expr(CodeGen *g, AstNode *expr_node) { + assert(expr_node->type == NodeTypeExpression); + switch (expr_node->data.expression.type) { + case AstNodeExpressionTypeNumber: + zig_panic("TODO number expr"); + break; + case AstNodeExpressionTypeString: + zig_panic("TODO string expr"); + break; + case AstNodeExpressionTypeFnCall: + return gen_fn_call(g, expr_node->data.expression.data.fn_call); + } + zig_unreachable(); +} + +static void gen_block(CodeGen *g, AstNode *block_node) { + assert(block_node->type == NodeTypeBlock); + + for (int i = 0; i < block_node->data.block.statements.length; i += 1) { + AstNode *statement_node = block_node->data.block.statements.at(i); + assert(statement_node->type == NodeTypeStatement); + switch (statement_node->data.statement.type) { + case AstNodeStatementTypeReturn: + { + AstNode *expr_node = statement_node->data.statement.data.retrn.expression; + LLVMValueRef value = gen_expr(g, expr_node); + LLVMBuildRet(g->builder, value); + break; + } + case AstNodeStatementTypeExpression: + { + AstNode *expr_node = statement_node->data.statement.data.expr.expression; + gen_expr(g, expr_node); + break; + } + } + } +} + +void code_gen(CodeGen *g) { + LLVMModuleRef mod = LLVMModuleCreateWithName("ZigModule"); + g->builder = LLVMCreateBuilder(); + + + for (int fn_decl_i = 0; fn_decl_i < g->root->data.root.fn_decls.length; fn_decl_i += 1) { + AstNode *fn_decl_node = g->root->data.root.fn_decls.at(fn_decl_i); + AstNodeFnDecl *fn_decl = &fn_decl_node->data.fn_decl; + + LLVMTypeRef ret_type = to_llvm_type(fn_decl->return_type); + LLVMTypeRef *param_types = allocate(fn_decl->params.length); + for (int param_decl_i = 0; param_decl_i < fn_decl->params.length; param_decl_i += 1) { + AstNode *param_node = fn_decl->params.at(param_decl_i); + assert(param_node->type == NodeTypeParamDecl); + AstNode *type_node = param_node->data.param_decl.type; + param_types[param_decl_i] = to_llvm_type(type_node); + } + LLVMTypeRef function_type = LLVMFunctionType(ret_type, param_types, fn_decl->params.length, 0); + LLVMValueRef fn = LLVMAddFunction(mod, buf_ptr(&fn_decl->name), function_type); + + LLVMBasicBlockRef entry = LLVMAppendBasicBlock(fn, "entry"); + LLVMPositionBuilderAtEnd(g->builder, entry); + + gen_block(g, fn_decl->body); + } + + LLVMDumpModule(mod); } ZigList *codegen_error_messages(CodeGen *g) { diff --git a/src/parser.cpp b/src/parser.cpp index ac1d9caf3d..4de1b9c2d4 100644 --- a/src/parser.cpp +++ b/src/parser.cpp @@ -26,16 +26,10 @@ const char *node_type_str(NodeType node_type) { return "ParamDecl"; case NodeTypeType: return "Type"; - case NodeTypePointerType: - return "PointerType"; case NodeTypeBlock: return "Block"; case NodeTypeStatement: return "Statement"; - case NodeTypeExpressionStatement: - return "ExpressionStatement"; - case NodeTypeReturnStatement: - return "ReturnStatement"; case NodeTypeExpression: return "Expression"; case NodeTypeFnCall: diff --git a/src/parser.hpp b/src/parser.hpp index 7a00fb88f1..4f4595cc6b 100644 --- a/src/parser.hpp +++ b/src/parser.hpp @@ -6,17 +6,15 @@ #include "tokenizer.hpp" struct AstNode; +struct CodeGenNode; enum NodeType { NodeTypeRoot, NodeTypeFnDecl, NodeTypeParamDecl, NodeTypeType, - NodeTypePointerType, NodeTypeBlock, NodeTypeStatement, - NodeTypeExpressionStatement, - NodeTypeReturnStatement, NodeTypeExpression, NodeTypeFnCall, }; @@ -99,6 +97,7 @@ struct AstNode { AstNode *parent; int line; int column; + CodeGenNode *codegen_node; union { AstNodeRoot root; AstNodeFnDecl fn_decl; diff --git a/test/hello.zig b/test/hello.zig index 4a90c871be..ea84b4f21a 100644 --- a/test/hello.zig +++ b/test/hello.zig @@ -1,4 +1,4 @@ -fn main(argc: i32, argv: *mut u8) -> i32 { +fn main(argc: i32, argv: *mut *mut u8) -> i32 { puts("Hello, world!\n"); return 0; }