parseh command, parses a C .h file and produces extern decls

This commit is contained in:
Andrew Kelley 2015-12-03 15:59:14 -07:00
parent 174e58a05f
commit ad3f98c615
6 changed files with 503 additions and 83 deletions

View File

@ -16,6 +16,9 @@ find_package(llvm)
include_directories(${LLVM_INCLUDE_DIRS})
link_directories(${LLVM_LIBDIRS})
find_package(clang)
include_directories(${CLANG_INCLUDE_DIR})
include_directories(
${CMAKE_SOURCE_DIR}
${CMAKE_BINARY_DIR}
@ -33,6 +36,7 @@ set(ZIG_SOURCES
"${CMAKE_SOURCE_DIR}/src/util.cpp"
"${CMAKE_SOURCE_DIR}/src/errmsg.cpp"
"${CMAKE_SOURCE_DIR}/src/zig_llvm.cpp"
"${CMAKE_SOURCE_DIR}/src/parseh.cpp"
)
set(TEST_SOURCES
@ -64,6 +68,7 @@ set_target_properties(zig PROPERTIES
COMPILE_FLAGS ${EXE_CFLAGS})
target_link_libraries(zig LINK_PUBLIC
${LLVM_LIBRARIES}
${CLANG_LIBRARY}
)
install(TARGETS zig DESTINATION bin)

16
cmake/Findclang.cmake Normal file
View File

@ -0,0 +1,16 @@
# Copyright (c) 2015 Andrew Kelley
# This file is MIT licensed.
# See http://opensource.org/licenses/MIT
# CLANG_FOUND
# CLANG_INCLUDE_DIR
# CLANG_LIBRARY
find_path(CLANG_INCLUDE_DIR NAMES clang-c/Index.h)
find_library(CLANG_LIBRARY NAMES clang)
include(FindPackageHandleStandardArgs)
find_package_handle_standard_args(CLANG DEFAULT_MSG CLANG_LIBRARY CLANG_INCLUDE_DIR)
mark_as_advanced(CLANG_INCLUDE_DIR CLANG_LIBRARY)

View File

@ -59,6 +59,7 @@ static inline void buf_deinit(Buf *buf) {
}
static inline void buf_init_from_mem(Buf *buf, const char *ptr, int len) {
assert(len >= 0);
buf->list.resize(len + 1);
memcpy(buf_ptr(buf), ptr, len);
buf->list.at(buf_len(buf)) = 0;
@ -73,6 +74,7 @@ static inline void buf_init_from_buf(Buf *buf, Buf *other) {
}
static inline Buf *buf_create_from_mem(const char *ptr, int len) {
assert(len >= 0);
Buf *buf = allocate<Buf>(1);
buf_init_from_mem(buf, ptr, len);
return buf;
@ -82,6 +84,10 @@ static inline Buf *buf_create_from_str(const char *str) {
return buf_create_from_mem(str, strlen(str));
}
static inline Buf *buf_create_from_buf(Buf *buf) {
return buf_create_from_mem(buf_ptr(buf), buf_len(buf));
}
static inline Buf *buf_slice(Buf *in_buf, int start, int end) {
assert(in_buf->list.length);
assert(start >= 0);

View File

@ -10,15 +10,17 @@
#include "codegen.hpp"
#include "os.hpp"
#include "error.hpp"
#include "parseh.hpp"
#include <stdio.h>
static int usage(const char *arg0) {
fprintf(stderr, "Usage: %s [command] [options] target\n"
fprintf(stderr, "Usage: %s [command] [options]\n"
"Commands:\n"
" build create executable, object, or library from target\n"
" version print version number and exit\n"
"Optional Options:\n"
" parseh convert a c header file to zig extern declarations\n"
"Command: build target\n"
" --release build with optimizations on and debug protection off\n"
" --static output will be statically linked\n"
" --strip exclude debug symbols\n"
@ -27,11 +29,15 @@ static int usage(const char *arg0) {
" --output [file] override destination path\n"
" --verbose turn on compiler debug output\n"
" --color [auto|off|on] enable or disable colored error messages\n"
"Command: parseh target\n"
" -isystem [dir] add additional search path for other .h files\n"
" -dirafter [dir] same as -isystem but do it last\n"
" -B[prefix] set the C compiler data path\n"
, arg0);
return EXIT_FAILURE;
}
static int version(void) {
static int version(const char *arg0, int argc, char **argv) {
printf("%s\n", ZIG_VERSION_STRING);
return EXIT_SUCCESS;
}
@ -48,62 +54,11 @@ struct Build {
ErrColor color;
};
static int build(const char *arg0, Build *b) {
static int build(const char *arg0, int argc, char **argv) {
int err;
if (!b->in_file)
return usage(arg0);
Buf in_file_buf = BUF_INIT;
buf_init_from_str(&in_file_buf, b->in_file);
Buf root_source_dir = BUF_INIT;
Buf root_source_code = BUF_INIT;
Buf root_source_name = BUF_INIT;
if (buf_eql_str(&in_file_buf, "-")) {
os_get_cwd(&root_source_dir);
if ((err = os_fetch_file(stdin, &root_source_code))) {
fprintf(stderr, "unable to read stdin: %s\n", err_str(err));
return 1;
}
buf_init_from_str(&root_source_name, "");
} else {
os_path_split(&in_file_buf, &root_source_dir, &root_source_name);
if ((err = os_fetch_file_path(buf_create_from_str(b->in_file), &root_source_code))) {
fprintf(stderr, "unable to open '%s': %s\n", b->in_file, err_str(err));
return 1;
}
}
CodeGen *g = codegen_create(&root_source_dir);
codegen_set_build_type(g, b->release ? CodeGenBuildTypeRelease : CodeGenBuildTypeDebug);
codegen_set_strip(g, b->strip);
codegen_set_is_static(g, b->is_static);
if (b->out_type != OutTypeUnknown)
codegen_set_out_type(g, b->out_type);
if (b->out_name)
codegen_set_out_name(g, buf_create_from_str(b->out_name));
codegen_set_verbose(g, b->verbose);
codegen_set_errmsg_color(g, b->color);
codegen_add_root_code(g, &root_source_name, &root_source_code);
codegen_link(g, b->out_file);
return 0;
}
enum Cmd {
CmdNone,
CmdBuild,
CmdVersion,
};
int main(int argc, char **argv) {
char *arg0 = argv[0];
Build b = {0};
Cmd cmd = CmdNone;
for (int i = 1; i < argc; i += 1) {
for (int i = 0; i < argc; i += 1) {
char *arg = argv[i];
if (arg[0] == '-' && arg[1] == '-') {
if (strcmp(arg, "--release") == 0) {
@ -148,40 +103,109 @@ int main(int argc, char **argv) {
return usage(arg0);
}
}
} else if (cmd == CmdNone) {
} else if (!b.in_file) {
b.in_file = arg;
} else {
return usage(arg0);
}
}
if (!b.in_file)
return usage(arg0);
Buf in_file_buf = BUF_INIT;
buf_init_from_str(&in_file_buf, b.in_file);
Buf root_source_dir = BUF_INIT;
Buf root_source_code = BUF_INIT;
Buf root_source_name = BUF_INIT;
if (buf_eql_str(&in_file_buf, "-")) {
os_get_cwd(&root_source_dir);
if ((err = os_fetch_file(stdin, &root_source_code))) {
fprintf(stderr, "unable to read stdin: %s\n", err_str(err));
return 1;
}
buf_init_from_str(&root_source_name, "");
} else {
os_path_split(&in_file_buf, &root_source_dir, &root_source_name);
if ((err = os_fetch_file_path(buf_create_from_str(b.in_file), &root_source_code))) {
fprintf(stderr, "unable to open '%s': %s\n", b.in_file, err_str(err));
return 1;
}
}
CodeGen *g = codegen_create(&root_source_dir);
codegen_set_build_type(g, b.release ? CodeGenBuildTypeRelease : CodeGenBuildTypeDebug);
codegen_set_strip(g, b.strip);
codegen_set_is_static(g, b.is_static);
if (b.out_type != OutTypeUnknown)
codegen_set_out_type(g, b.out_type);
if (b.out_name)
codegen_set_out_name(g, buf_create_from_str(b.out_name));
codegen_set_verbose(g, b.verbose);
codegen_set_errmsg_color(g, b.color);
codegen_add_root_code(g, &root_source_name, &root_source_code);
codegen_link(g, b.out_file);
return 0;
}
static int parseh(const char *arg0, int argc, char **argv) {
char *in_file = nullptr;
ZigList<const char *> clang_argv = {0};
for (int i = 0; i < argc; i += 1) {
char *arg = argv[i];
if (arg[0] == '-') {
if (arg[1] == 'I') {
clang_argv.append(arg);
} else if (strcmp(arg, "-isystem") == 0) {
if (i + 1 >= argc) {
return usage(arg0);
}
clang_argv.append("-isystem");
clang_argv.append(argv[i + 1]);
} else if (arg[1] == 'B') {
clang_argv.append(arg);
} else {
fprintf(stderr, "unrecognized argument: %s", arg);
return usage(arg0);
}
} else if (!in_file) {
in_file = arg;
} else {
return usage(arg0);
}
}
if (!in_file) {
fprintf(stderr, "missing target argument");
return usage(arg0);
}
parse_h_file(in_file, &clang_argv, stdout);
return 0;
}
int main(int argc, char **argv) {
char *arg0 = argv[0];
int (*cmd)(const char *, int, char **) = nullptr;
for (int i = 1; i < argc; i += 1) {
char *arg = argv[i];
if (arg[0] == '-' && arg[1] == '-') {
return usage(arg0);
} else {
if (strcmp(arg, "build") == 0) {
cmd = CmdBuild;
cmd = build;
} else if (strcmp(arg, "version") == 0) {
cmd = CmdVersion;
cmd = version;
} else if (strcmp(arg, "parseh") == 0) {
cmd = parseh;
} else {
fprintf(stderr, "Unrecognized command: %s\n", arg);
return usage(arg0);
}
} else {
switch (cmd) {
case CmdNone:
zig_unreachable();
case CmdBuild:
if (!b.in_file) {
b.in_file = arg;
} else {
return usage(arg0);
}
break;
case CmdVersion:
return usage(arg0);
}
return cmd(arg0, argc - i - 1, &argv[i + 1]);
}
}
switch (cmd) {
case CmdNone:
return usage(arg0);
case CmdBuild:
return build(arg0, &b);
case CmdVersion:
return version();
}
zig_unreachable();
return usage(arg0);
}

351
src/parseh.cpp Normal file
View File

@ -0,0 +1,351 @@
#include "parseh.hpp"
#include <clang-c/Index.h>
#include <string.h>
struct Arg {
Buf name;
Buf *type;
};
struct Fn {
Buf name;
Buf *return_type;
Arg *args;
int arg_count;
};
struct ParseH {
FILE *f;
ZigList<Fn *> fn_list;
Fn *cur_fn;
int arg_index;
int cur_indent;
};
static const int indent_size = 4;
static bool str_has_prefix(const char *str, const char *prefix) {
while (*prefix) {
if (*str && *str == *prefix) {
str += 1;
prefix += 1;
} else {
return false;
}
}
return true;
}
static const char *prefixes_stripped(CXType type) {
CXString name = clang_getTypeSpelling(type);
const char *c_name = clang_getCString(name);
static const char *prefixes[] = {
"struct ",
"enum ",
"const ",
};
start_over:
for (int i = 0; i < array_length(prefixes); i += 1) {
const char *prefix = prefixes[i];
if (str_has_prefix(c_name, prefix)) {
c_name += strlen(prefix);
goto start_over;
}
}
return c_name;
}
static Buf *to_zig_type(CXType raw_type) {
CXType canonical = clang_getCanonicalType(raw_type);
switch (canonical.kind) {
case CXType_Invalid:
zig_unreachable();
case CXType_Unexposed:
zig_panic("clang C api insufficient");
case CXType_Void:
return buf_create_from_str("void");
case CXType_Bool:
return buf_create_from_str("bool");
case CXType_SChar:
return buf_create_from_str("i8");
case CXType_Char_U:
case CXType_Char_S:
case CXType_UChar:
return buf_create_from_str("u8");
case CXType_WChar:
zig_panic("TODO");
case CXType_Char16:
zig_panic("TODO");
case CXType_Char32:
zig_panic("TODO");
case CXType_UShort:
return buf_create_from_str("c_ushort");
case CXType_UInt:
return buf_create_from_str("c_uint");
case CXType_ULong:
return buf_create_from_str("c_ulong");
case CXType_ULongLong:
return buf_create_from_str("c_ulonglong");
case CXType_UInt128:
zig_panic("TODO");
case CXType_Short:
return buf_create_from_str("c_short");
case CXType_Int:
return buf_create_from_str("c_int");
case CXType_Long:
return buf_create_from_str("c_long");
case CXType_LongLong:
return buf_create_from_str("c_longlong");
case CXType_Int128:
zig_panic("TODO");
case CXType_Float:
return buf_create_from_str("f32");
case CXType_Double:
return buf_create_from_str("f64");
case CXType_LongDouble:
return buf_create_from_str("f128");
case CXType_NullPtr:
zig_panic("TODO");
case CXType_Overload:
zig_panic("TODO");
case CXType_Dependent:
zig_panic("TODO");
case CXType_ObjCId:
zig_panic("TODO");
case CXType_ObjCClass:
zig_panic("TODO");
case CXType_ObjCSel:
zig_panic("TODO");
case CXType_Complex:
zig_panic("TODO");
case CXType_Pointer:
{
CXType pointee_type = clang_getPointeeType(canonical);
Buf *pointee_buf = to_zig_type(pointee_type);
if (clang_isConstQualifiedType(pointee_type)) {
return buf_sprintf("*const %s", buf_ptr(pointee_buf));
} else {
return buf_sprintf("*mut %s", buf_ptr(pointee_buf));
}
}
case CXType_BlockPointer:
zig_panic("TODO");
case CXType_LValueReference:
zig_panic("TODO");
case CXType_RValueReference:
zig_panic("TODO");
case CXType_Record:
{
const char *name = prefixes_stripped(canonical);
return buf_sprintf("%s", name);
}
case CXType_Enum:
{
const char *name = prefixes_stripped(canonical);
return buf_sprintf("%s", name);
}
case CXType_Typedef:
zig_panic("TODO");
case CXType_ObjCInterface:
zig_panic("TODO");
case CXType_ObjCObjectPointer:
zig_panic("TODO");
case CXType_FunctionNoProto:
zig_panic("TODO");
case CXType_FunctionProto:
zig_panic("TODO");
case CXType_ConstantArray:
zig_panic("TODO");
case CXType_Vector:
zig_panic("TODO");
case CXType_IncompleteArray:
zig_panic("TODO");
case CXType_VariableArray:
zig_panic("TODO");
case CXType_DependentSizedArray:
zig_panic("TODO");
case CXType_MemberPointer:
zig_panic("TODO");
}
zig_unreachable();
}
static bool is_storage_class_export(CX_StorageClass storage_class) {
switch (storage_class) {
case CX_SC_Invalid:
zig_unreachable();
case CX_SC_None:
case CX_SC_Extern:
case CX_SC_Auto:
return true;
case CX_SC_Static:
case CX_SC_PrivateExtern:
case CX_SC_OpenCLWorkGroupLocal:
case CX_SC_Register:
return false;
}
zig_unreachable();
}
static void begin_fn(ParseH *p) {
assert(!p->cur_fn);
p->cur_fn = allocate<Fn>(1);
}
static void end_fn(ParseH *p) {
if (p->cur_fn) {
p->fn_list.append(p->cur_fn);
p->cur_fn = nullptr;
}
}
static enum CXChildVisitResult fn_visitor(CXCursor cursor, CXCursor parent, CXClientData client_data) {
ParseH *p = (ParseH*)client_data;
enum CXCursorKind kind = clang_getCursorKind(cursor);
CXString name = clang_getCursorSpelling(cursor);
switch (kind) {
case CXCursor_FunctionDecl:
{
CX_StorageClass storage_class = clang_Cursor_getStorageClass(cursor);
if (!is_storage_class_export(storage_class))
return CXChildVisit_Continue;
end_fn(p);
begin_fn(p);
CXType fn_type = clang_getCursorType(cursor);
if (clang_isFunctionTypeVariadic(fn_type)) {
zig_panic("TODO support variadic function");
}
if (clang_getFunctionTypeCallingConv(fn_type) != CXCallingConv_C) {
zig_panic("TODO support non c calling convention");
}
CXType return_type = clang_getResultType(fn_type);
p->cur_fn->return_type = to_zig_type(return_type);
buf_init_from_str(&p->cur_fn->name, clang_getCString(name));
p->cur_fn->arg_count = clang_getNumArgTypes(fn_type);
p->cur_fn->args = allocate<Arg>(p->cur_fn->arg_count);
for (int i = 0; i < p->cur_fn->arg_count; i += 1) {
CXType param_type = clang_getArgType(fn_type, i);
p->cur_fn->args[i].type = to_zig_type(param_type);
}
p->arg_index = 0;
return CXChildVisit_Recurse;
}
case CXCursor_ParmDecl:
{
assert(p->cur_fn);
assert(p->arg_index < p->cur_fn->arg_count);
buf_init_from_str(&p->cur_fn->args[p->arg_index].name, clang_getCString(name));
p->arg_index += 1;
return CXChildVisit_Continue;
}
case CXCursor_UnexposedAttr:
case CXCursor_CompoundStmt:
case CXCursor_FieldDecl:
return CXChildVisit_Continue;
default:
return CXChildVisit_Recurse;
}
}
static void print_indent(ParseH *p) {
for (int i = 0; i < p->cur_indent; i += 1) {
fprintf(p->f, " ");
}
}
void parse_h_file(const char *target_path, ZigList<const char *> *clang_argv, FILE *f) {
ParseH parse_h = {0};
ParseH *p = &parse_h;
p->f = f;
CXTranslationUnit tu;
CXIndex index = clang_createIndex(1, 0);
char *ZIG_PARSEH_CFLAGS = getenv("ZIG_PARSEH_CFLAGS");
if (ZIG_PARSEH_CFLAGS) {
Buf tmp_buf = {0};
char *start = ZIG_PARSEH_CFLAGS;
char *space = strstr(start, " ");
while (space) {
if (space - start > 0) {
buf_init_from_mem(&tmp_buf, start, space - start);
clang_argv->append(buf_ptr(buf_create_from_buf(&tmp_buf)));
}
start = space + 1;
space = strstr(start, " ");
}
buf_init_from_str(&tmp_buf, start);
clang_argv->append(buf_ptr(buf_create_from_buf(&tmp_buf)));
}
clang_argv->append(nullptr);
enum CXErrorCode err_code;
if ((err_code = clang_parseTranslationUnit2(index, target_path,
clang_argv->items, clang_argv->length - 1,
NULL, 0, CXTranslationUnit_None, &tu)))
{
zig_panic("parse translation unit failure");
}
unsigned diag_count = clang_getNumDiagnostics(tu);
if (diag_count > 0) {
for (unsigned i = 0; i < diag_count; i += 1) {
CXDiagnostic diagnostic = clang_getDiagnostic(tu, i);
CXSourceLocation location = clang_getDiagnosticLocation(diagnostic);
CXFile file;
unsigned line, column, offset;
clang_getSpellingLocation(location, &file, &line, &column, &offset);
CXString text = clang_getDiagnosticSpelling(diagnostic);
CXString file_name = clang_getFileName(file);
fprintf(stderr, "%s line %u, column %u: %s\n", clang_getCString(file_name),
line, column, clang_getCString(text));
}
exit(1);
}
CXCursor cursor = clang_getTranslationUnitCursor(tu);
clang_visitChildren(cursor, fn_visitor, p);
end_fn(p);
if (p->fn_list.length) {
fprintf(f, "extern {\n");
p->cur_indent += indent_size;
for (int fn_i = 0; fn_i < p->fn_list.length; fn_i += 1) {
Fn *fn = p->fn_list.at(fn_i);
print_indent(p);
fprintf(p->f, "fn %s(", buf_ptr(&fn->name));
for (int arg_i = 0; arg_i < fn->arg_count; arg_i += 1) {
Arg *arg = &fn->args[arg_i];
fprintf(p->f, "%s: %s", buf_ptr(&arg->name), buf_ptr(arg->type));
if (arg_i + 1 < fn->arg_count) {
fprintf(p->f, ", ");
}
}
fprintf(p->f, ")");
if (!buf_eql_str(fn->return_type, "void")) {
fprintf(p->f, " -> %s", buf_ptr(fn->return_type));
}
fprintf(p->f, ";\n");
}
fprintf(f, "}\n");
}
}

18
src/parseh.hpp Normal file
View File

@ -0,0 +1,18 @@
/*
* Copyright (c) 2015 Andrew Kelley
*
* This file is part of zig, which is MIT licensed.
* See http://opensource.org/licenses/MIT
*/
#ifndef ZIG_PARSEH_HPP
#define ZIG_PARSEH_HPP
#include "buffer.hpp"
#include <stdio.h>
void parse_h_file(const char *target_path, ZigList<const char *> *clang_argv, FILE *f);
#endif