From 57b2176e28fd336309922cc9182cffcf084c1879 Mon Sep 17 00:00:00 2001 From: Evan Haas Date: Tue, 19 Jan 2021 23:42:48 -0800 Subject: [PATCH] translate-c: Improve array support 1. For incomplete arrays with initializer list (`int x[] = {1};`) use the initializer size as the array size. 2. For arrays initialized with a string literal translate it as an array of character literals instead of `[*c]const u8` 3. Don't crash if an empty initializer is used for an incomplete array. 4. Add a test for multi-character character constants Additionally lay some groundwork for supporting wide string literals. fixes #4831 #7832 #7842 --- src/clang.zig | 9 ++ src/translate_c.zig | 213 ++++++++++++++++++++++++++++++-------- src/zig_clang.cpp | 15 +++ src/zig_clang.h | 4 + test/run_translated_c.zig | 48 +++++++++ test/translate_c.zig | 36 ++++++- 6 files changed, 279 insertions(+), 46 deletions(-) diff --git a/src/clang.zig b/src/clang.zig index 350afb90ca..fc7a25fe12 100644 --- a/src/clang.zig +++ b/src/clang.zig @@ -735,6 +735,15 @@ pub const StringLiteral = opaque { pub const getKind = ZigClangStringLiteral_getKind; extern fn ZigClangStringLiteral_getKind(*const StringLiteral) StringLiteral_StringKind; + pub const getCodeUnit = ZigClangStringLiteral_getCodeUnit; + extern fn ZigClangStringLiteral_getCodeUnit(*const StringLiteral, usize) u32; + + pub const getLength = ZigClangStringLiteral_getLength; + extern fn ZigClangStringLiteral_getLength(*const StringLiteral) c_uint; + + pub const getCharByteWidth = ZigClangStringLiteral_getCharByteWidth; + extern fn ZigClangStringLiteral_getCharByteWidth(*const StringLiteral) c_uint; + pub const getString_bytes_begin_size = ZigClangStringLiteral_getString_bytes_begin_size; extern fn ZigClangStringLiteral_getString_bytes_begin_size(*const StringLiteral, *usize) [*]const u8; }; diff --git a/src/translate_c.zig b/src/translate_c.zig index 17078474e7..bca9ff3a20 100644 --- a/src/translate_c.zig +++ b/src/translate_c.zig @@ -710,6 +710,12 @@ fn visitFnDecl(c: *Context, fn_decl: *const clang.FunctionDecl) Error!void { return addTopLevelDecl(c, fn_name, &proto_node.base); } +fn transQualTypeMaybeInitialized(rp: RestorePoint, qt: clang.QualType, decl_init: ?*const clang.Expr, loc: clang.SourceLocation) TransError!*ast.Node { + return if (decl_init) |init_expr| + transQualTypeInitialized(rp, qt, init_expr, loc) + else + transQualType(rp, qt, loc); +} /// if mangled_name is not null, this var decl was declared in a block scope. fn visitVarDecl(c: *Context, var_decl: *const clang.VarDecl, mangled_name: ?[]const u8) Error!void { const var_name = mangled_name orelse try c.str(@ptrCast(*const clang.NamedDecl, var_decl).getName_bytes_begin()); @@ -734,6 +740,7 @@ fn visitVarDecl(c: *Context, var_decl: *const clang.VarDecl, mangled_name: ?[]co const storage_class = var_decl.getStorageClass(); const is_const = qual_type.isConstQualified(); const has_init = var_decl.hasInit(); + const decl_init = var_decl.getInit(); // In C extern variables with initializers behave like Zig exports. // extern int foo = 2; @@ -755,8 +762,9 @@ fn visitVarDecl(c: *Context, var_decl: *const clang.VarDecl, mangled_name: ?[]co const name_tok = try appendIdentifier(c, checked_name); _ = try appendToken(c, .Colon, ":"); - const type_node = transQualType(rp, qual_type, var_decl_loc) catch |err| switch (err) { - error.UnsupportedType => { + + const type_node = transQualTypeMaybeInitialized(rp, qual_type, decl_init, var_decl_loc) catch |err| switch (err) { + error.UnsupportedTranslation, error.UnsupportedType => { return failDecl(c, var_decl_loc, checked_name, "unable to resolve variable type", .{}); }, error.OutOfMemory => |e| return e, @@ -770,17 +778,22 @@ fn visitVarDecl(c: *Context, var_decl: *const clang.VarDecl, mangled_name: ?[]co // with the variable type. if (has_init) { eq_tok = try appendToken(c, .Equal, "="); - init_node = if (var_decl.getInit()) |expr| - transExprCoercing(rp, &c.global_scope.base, expr, .used, .r_value) catch |err| switch (err) { + if (decl_init) |expr| { + const node_or_error = if (expr.getStmtClass() == .StringLiteralClass) + transStringLiteralAsArray(rp, &c.global_scope.base, @ptrCast(*const clang.StringLiteral, expr), type_node) + else + transExprCoercing(rp, scope, expr, .used, .r_value); + init_node = node_or_error catch |err| switch (err) { error.UnsupportedTranslation, error.UnsupportedType, => { return failDecl(c, var_decl_loc, checked_name, "unable to translate initializer", .{}); }, error.OutOfMemory => |e| return e, - } - else - try transCreateNodeUndefinedLiteral(c); + }; + } else { + init_node = try transCreateNodeUndefinedLiteral(c); + } } else if (storage_class != .Extern) { eq_tok = try appendToken(c, .Equal, "="); // The C language specification states that variables with static or threadlocal @@ -1620,6 +1633,7 @@ fn transDeclStmtOne( switch (decl.getKind()) { .Var => { const var_decl = @ptrCast(*const clang.VarDecl, decl); + const decl_init = var_decl.getInit(); const qual_type = var_decl.getTypeSourceInfo_getType(); const name = try c.str(@ptrCast(*const clang.NamedDecl, var_decl).getName_bytes_begin()); @@ -1643,11 +1657,14 @@ fn transDeclStmtOne( _ = try appendToken(c, .Colon, ":"); const loc = decl.getLocation(); - const type_node = try transQualType(rp, qual_type, loc); + const type_node = try transQualTypeMaybeInitialized(rp, qual_type, decl_init, loc); const eq_token = try appendToken(c, .Equal, "="); - var init_node = if (var_decl.getInit()) |expr| - try transExprCoercing(rp, scope, expr, .used, .r_value) + var init_node = if (decl_init) |expr| + if (expr.getStmtClass() == .StringLiteralClass) + try transStringLiteralAsArray(rp, scope, @ptrCast(*const clang.StringLiteral, expr), type_node) + else + try transExprCoercing(rp, scope, expr, .used, .r_value) else try transCreateNodeUndefinedLiteral(c); if (!qualTypeIsBoolean(qual_type) and isBoolRes(init_node)) { @@ -1740,7 +1757,7 @@ fn transImplicitCastExpr( return maybeSuppressResult(rp, scope, result_used, sub_expr_node); }, .ArrayToPointerDecay => { - if (exprIsStringLiteral(sub_expr)) { + if (exprIsNarrowStringLiteral(sub_expr)) { const sub_expr_node = try transExpr(rp, scope, sub_expr, .used, .r_value); return maybeSuppressResult(rp, scope, result_used, sub_expr_node); } @@ -1841,17 +1858,20 @@ fn exprIsBooleanType(expr: *const clang.Expr) bool { return qualTypeIsBoolean(expr.getType()); } -fn exprIsStringLiteral(expr: *const clang.Expr) bool { +fn exprIsNarrowStringLiteral(expr: *const clang.Expr) bool { switch (expr.getStmtClass()) { - .StringLiteralClass => return true, + .StringLiteralClass => { + const string_lit = @ptrCast(*const clang.StringLiteral, expr); + return string_lit.getCharByteWidth() == 1; + }, .PredefinedExprClass => return true, .UnaryOperatorClass => { const op_expr = @ptrCast(*const clang.UnaryOperator, expr).getSubExpr(); - return exprIsStringLiteral(op_expr); + return exprIsNarrowStringLiteral(op_expr); }, .ParenExprClass => { const op_expr = @ptrCast(*const clang.ParenExpr, expr).getSubExpr(); - return exprIsStringLiteral(op_expr); + return exprIsNarrowStringLiteral(op_expr); }, else => return false, } @@ -2049,6 +2069,71 @@ fn transStringLiteral( } } +/// Parse the size of an array back out from an ast Node. +fn zigArraySize(c: *Context, node: *ast.Node) TransError!usize { + if (node.castTag(.ArrayType)) |array| { + if (array.len_expr.castTag(.IntegerLiteral)) |int_lit| { + const tok = tokenSlice(c, int_lit.token); + return std.fmt.parseUnsigned(usize, tok, 10) catch error.UnsupportedTranslation; + } + } + return error.UnsupportedTranslation; +} + +/// Translate a string literal to an array of integers. Used when an +/// array is initialized from a string literal. `target_node` is the +/// array being initialized. If the string literal is larger than the +/// array, truncate the string. If the array is larger than the string +/// literal, pad the array with 0's +fn transStringLiteralAsArray( + rp: RestorePoint, + scope: *Scope, + stmt: *const clang.StringLiteral, + target_node: *ast.Node, +) TransError!*ast.Node { + const array_size = try zigArraySize(rp.c, target_node); + const str_length = stmt.getLength(); + + const expr_base = @ptrCast(*const clang.Expr, stmt); + const ty = expr_base.getType().getTypePtr(); + const const_arr_ty = @ptrCast(*const clang.ConstantArrayType, ty); + + const ty_node = try rp.c.arena.create(ast.Node.ArrayType); + const op_token = try appendToken(rp.c, .LBracket, "["); + const len_expr = try transCreateNodeInt(rp.c, array_size); + _ = try appendToken(rp.c, .RBracket, "]"); + + ty_node.* = .{ + .op_token = op_token, + .rhs = try transQualType(rp, const_arr_ty.getElementType(), expr_base.getBeginLoc()), + .len_expr = len_expr, + }; + _ = try appendToken(rp.c, .LBrace, "{"); + var init_node = try ast.Node.ArrayInitializer.alloc(rp.c.arena, array_size); + init_node.* = .{ + .lhs = &ty_node.base, + .rtoken = undefined, + .list_len = array_size, + }; + const init_list = init_node.list(); + + var i: c_uint = 0; + const kind = stmt.getKind(); + const narrow = kind == .Ascii or kind == .UTF8; + while (i < str_length and i < array_size) : (i += 1) { + const code_unit = stmt.getCodeUnit(i); + init_list[i] = try transCreateCharLitNode(rp.c, narrow, code_unit); + _ = try appendToken(rp.c, .Comma, ","); + } + while (i < array_size) : (i += 1) { + init_list[i] = try transCreateNodeInt(rp.c, 0); + _ = try appendToken(rp.c, .Comma, ","); + } + init_node.rtoken = try appendToken(rp.c, .RBrace, "}"); + + return &init_node.base; +} + fn cIsEnum(qt: clang.QualType) bool { return qt.getCanonicalType().getTypeClass() == .Enum; } @@ -2343,6 +2428,18 @@ fn transCreateNodeArrayType( return &node.base; } +fn transCreateEmptyArray(rp: RestorePoint, loc: clang.SourceLocation, ty: *const clang.Type) TransError!*ast.Node { + const ty_node = try transCreateNodeArrayType(rp, loc, ty, 0); + _ = try appendToken(rp.c, .LBrace, "{"); + const filler_init_node = try ast.Node.ArrayInitializer.alloc(rp.c.arena, 0); + filler_init_node.* = .{ + .lhs = ty_node, + .rtoken = try appendToken(rp.c, .RBrace, "}"), + .list_len = 0, + }; + return &filler_init_node.base; +} + fn transInitListExprArray( rp: RestorePoint, scope: *Scope, @@ -2360,6 +2457,10 @@ fn transInitListExprArray( const all_count = size_ap_int.getLimitedValue(math.maxInt(usize)); const leftover_count = all_count - init_count; + if (all_count == 0) { + return transCreateEmptyArray(rp, loc, child_qt.getTypePtr()); + } + var init_node: *ast.Node.ArrayInitializer = undefined; var cat_tok: ast.TokenIndex = undefined; if (init_count != 0) { @@ -2934,6 +3035,21 @@ fn transPredefinedExpr(rp: RestorePoint, scope: *Scope, expr: *const clang.Prede return transStringLiteral(rp, scope, expr.getFunctionName(), used); } +fn transCreateCharLitNode(c: *Context, narrow: bool, val: u32) TransError!*ast.Node { + const node = try c.arena.create(ast.Node.OneToken); + node.* = .{ + .base = .{ .tag = .CharLiteral }, + .token = undefined, + }; + if (narrow) { + const val_array = [_]u8{@intCast(u8, val)}; + node.token = try appendTokenFmt(c, .CharLiteral, "'{}'", .{std.zig.fmtEscapes(&val_array)}); + } else { + node.token = try appendTokenFmt(c, .CharLiteral, "'\\u{{{x}}}'", .{val}); + } + return &node.base; +} + fn transCharLiteral( rp: RestorePoint, scope: *Scope, @@ -2943,33 +3059,14 @@ fn transCharLiteral( ) TransError!*ast.Node { const kind = stmt.getKind(); const val = stmt.getValue(); - const int_lit_node = switch (kind) { - .Ascii, .UTF8 => blk: { - if (kind == .Ascii) { - // C has a somewhat obscure feature called multi-character character - // constant - if (val > 255) - break :blk try transCreateNodeInt(rp.c, val); - } - const val_array = [_]u8 { @intCast(u8, val) }; - const token = try appendTokenFmt(rp.c, .CharLiteral, "'{}'", .{std.zig.fmtEscapes(&val_array)}); - const node = try rp.c.arena.create(ast.Node.OneToken); - node.* = .{ - .base = .{ .tag = .CharLiteral }, - .token = token, - }; - break :blk &node.base; - }, - .Wide, .UTF16, .UTF32 => blk: { - const token = try appendTokenFmt(rp.c, .CharLiteral, "'\\u{{{x}}}'", .{val}); - const node = try rp.c.arena.create(ast.Node.OneToken); - node.* = .{ - .base = .{ .tag = .CharLiteral }, - .token = token, - }; - break :blk &node.base; - }, - }; + const narrow = kind == .Ascii or kind == .UTF8; + // C has a somewhat obscure feature called multi-character character constant + // e.g. 'abcd' + const int_lit_node = if (kind == .Ascii and val > 255) + try transCreateNodeInt(rp.c, val) + else + try transCreateCharLitNode(rp.c, narrow, val); + if (suppress_as == .no_as) { return maybeSuppressResult(rp, scope, result_used, int_lit_node); } @@ -3891,6 +3988,38 @@ fn addTopLevelDecl(c: *Context, name: []const u8, decl_node: *ast.Node) !void { _ = try c.global_scope.sym_table.put(name, decl_node); } +/// Translate a qual type for a variable with an initializer. The initializer +/// only matters for incomplete arrays, since the size of the array is determined +/// by the size of the initializer +fn transQualTypeInitialized( + rp: RestorePoint, + qt: clang.QualType, + decl_init: *const clang.Expr, + source_loc: clang.SourceLocation, +) TypeError!*ast.Node { + const ty = qt.getTypePtr(); + if (ty.getTypeClass() == .IncompleteArray) { + const incomplete_array_ty = @ptrCast(*const clang.IncompleteArrayType, ty); + const elem_ty = incomplete_array_ty.getElementType().getTypePtr(); + + switch (decl_init.getStmtClass()) { + .StringLiteralClass => { + const string_lit = @ptrCast(*const clang.StringLiteral, decl_init); + const string_lit_size = string_lit.getLength() + 1; // +1 for null terminator + const array_size = @intCast(usize, string_lit_size); + return transCreateNodeArrayType(rp, source_loc, elem_ty, array_size); + }, + .InitListExprClass => { + const init_expr = @ptrCast(*const clang.InitListExpr, decl_init); + const size = init_expr.getNumInits(); + return transCreateNodeArrayType(rp, source_loc, elem_ty, size); + }, + else => {}, + } + } + return transQualType(rp, qt, source_loc); +} + fn transQualType(rp: RestorePoint, qt: clang.QualType, source_loc: clang.SourceLocation) TypeError!*ast.Node { return transType(rp, qt.getTypePtr(), source_loc); } diff --git a/src/zig_clang.cpp b/src/zig_clang.cpp index 1fa5bbe078..9bd68859e8 100644 --- a/src/zig_clang.cpp +++ b/src/zig_clang.cpp @@ -2504,6 +2504,21 @@ enum ZigClangStringLiteral_StringKind ZigClangStringLiteral_getKind(const struct return (ZigClangStringLiteral_StringKind)casted->getKind(); } +uint32_t ZigClangStringLiteral_getCodeUnit(const struct ZigClangStringLiteral *self, size_t i) { + auto casted = reinterpret_cast(self); + return casted->getCodeUnit(i); +} + +unsigned ZigClangStringLiteral_getLength(const struct ZigClangStringLiteral *self) { + auto casted = reinterpret_cast(self); + return casted->getLength(); +} + +unsigned ZigClangStringLiteral_getCharByteWidth(const struct ZigClangStringLiteral *self) { + auto casted = reinterpret_cast(self); + return casted->getCharByteWidth(); +} + const char *ZigClangStringLiteral_getString_bytes_begin_size(const struct ZigClangStringLiteral *self, size_t *len) { auto casted = reinterpret_cast(self); llvm::StringRef str_ref = casted->getString(); diff --git a/src/zig_clang.h b/src/zig_clang.h index 169fbcedfb..42587b1719 100644 --- a/src/zig_clang.h +++ b/src/zig_clang.h @@ -1126,6 +1126,10 @@ ZIG_EXTERN_C unsigned ZigClangAPFloat_convertToHexString(const struct ZigClangAP ZIG_EXTERN_C double ZigClangFloatingLiteral_getValueAsApproximateDouble(const ZigClangFloatingLiteral *self); ZIG_EXTERN_C enum ZigClangStringLiteral_StringKind ZigClangStringLiteral_getKind(const struct ZigClangStringLiteral *self); +ZIG_EXTERN_C uint32_t ZigClangStringLiteral_getCodeUnit(const struct ZigClangStringLiteral *self, size_t i); +ZIG_EXTERN_C unsigned ZigClangStringLiteral_getLength(const struct ZigClangStringLiteral *self); +ZIG_EXTERN_C unsigned ZigClangStringLiteral_getCharByteWidth(const struct ZigClangStringLiteral *self); + ZIG_EXTERN_C const char *ZigClangStringLiteral_getString_bytes_begin_size(const struct ZigClangStringLiteral *self, size_t *len); diff --git a/test/run_translated_c.zig b/test/run_translated_c.zig index 8e9ddf2d33..e3f9d6b132 100644 --- a/test/run_translated_c.zig +++ b/test/run_translated_c.zig @@ -746,4 +746,52 @@ pub fn addCases(cases: *tests.RunTranslatedCContext) void { \\ return 0; \\} , "1 2" ++ nl); + + cases.add("multi-character character constant", + \\#include + \\int main(void) { + \\ int foo = 'abcd'; + \\ switch (foo) { + \\ case 'abcd': break; + \\ default: abort(); + \\ } + \\ return 0; + \\} + , ""); + + cases.add("Array initializers (string literals, incomplete arrays)", + \\#include + \\#include + \\extern int foo[]; + \\int global_arr[] = {1, 2, 3}; + \\char global_string[] = "hello"; + \\int main(int argc, char *argv[]) { + \\ if (global_arr[2] != 3) abort(); + \\ if (strlen(global_string) != 5) abort(); + \\ const char *const_str = "hello"; + \\ if (strcmp(const_str, "hello") != 0) abort(); + \\ char empty_str[] = ""; + \\ if (strlen(empty_str) != 0) abort(); + \\ char hello[] = "hello"; + \\ if (strlen(hello) != 5 || sizeof(hello) != 6) abort(); + \\ int empty[] = {}; + \\ if (sizeof(empty) != 0) abort(); + \\ int bar[] = {42}; + \\ if (bar[0] != 42) abort(); + \\ bar[0] = 43; + \\ if (bar[0] != 43) abort(); + \\ int baz[] = {1, [42] = 123, 456}; + \\ if (baz[42] != 123 || baz[43] != 456) abort(); + \\ if (sizeof(baz) != sizeof(int) * 44) abort(); + \\ const char *const names[] = {"first", "second", "third"}; + \\ if (strcmp(names[2], "third") != 0) abort(); + \\ char catted_str[] = "abc" "def"; + \\ if (strlen(catted_str) != 6 || sizeof(catted_str) != 7) abort(); + \\ char catted_trunc_str[2] = "abc" "def"; + \\ if (sizeof(catted_trunc_str) != 2 || catted_trunc_str[0] != 'a' || catted_trunc_str[1] != 'b') abort(); + \\ char big_array_utf8lit[10] = "💯"; + \\ if (strcmp(big_array_utf8lit, "💯") != 0 || big_array_utf8lit[9] != 0) abort(); + \\ return 0; + \\} + , ""); } diff --git a/test/translate_c.zig b/test/translate_c.zig index 6ebe27c0a7..d6ed8da4bc 100644 --- a/test/translate_c.zig +++ b/test/translate_c.zig @@ -539,7 +539,14 @@ pub fn addCases(cases: *tests.TranslateCContext) void { \\ static const char v2[] = "2.2.2"; \\} , &[_][]const u8{ - \\const v2: [*c]const u8 = "2.2.2"; + \\const v2: [6]u8 = [6]u8{ + \\ '2', + \\ '.', + \\ '2', + \\ '.', + \\ '2', + \\ 0, + \\}; \\pub export fn foo() void { \\ _ = v2; \\} @@ -1395,9 +1402,30 @@ pub fn addCases(cases: *tests.TranslateCContext) void { \\static char arr1[] = "hello"; \\char arr2[] = "hello"; , &[_][]const u8{ - \\pub export var arr0: [*c]u8 = "hello"; - \\pub var arr1: [*c]u8 = "hello"; - \\pub export var arr2: [*c]u8 = "hello"; + \\pub export var arr0: [6]u8 = [6]u8{ + \\ 'h', + \\ 'e', + \\ 'l', + \\ 'l', + \\ 'o', + \\ 0, + \\}; + \\pub var arr1: [6]u8 = [6]u8{ + \\ 'h', + \\ 'e', + \\ 'l', + \\ 'l', + \\ 'o', + \\ 0, + \\}; + \\pub export var arr2: [6]u8 = [6]u8{ + \\ 'h', + \\ 'e', + \\ 'l', + \\ 'l', + \\ 'o', + \\ 0, + \\}; }); cases.add("array initializer expr",