diff --git a/src-self-hosted/c_tokenizer.zig b/src-self-hosted/c_tokenizer.zig index 6899e3efba..704cd268f7 100644 --- a/src-self-hosted/c_tokenizer.zig +++ b/src-self-hosted/c_tokenizer.zig @@ -1,4 +1,5 @@ const std = @import("std"); +const expect = std.testing.expect; pub const TokenList = std.SegmentedList(CToken, 32); @@ -28,6 +29,7 @@ pub const CToken = struct { pub const NumLitSuffix = enum { None, + F, L, U, LU, @@ -39,19 +41,18 @@ pub const CToken = struct { pub fn tokenizeCMacro(tl: *TokenList, chars: [*]const u8) !void { var index: usize = 0; while (true) { - const tok = try next(chars[index..], &index); - tl.push(tok); + const tok = try next(chars, &index); + try tl.push(tok); if (tok.id == .Eof) return; } } -fn next(chars: [*]const u8, index: *usize) !CToken { +fn next(chars: [*]const u8, i: *usize) !CToken { var state: enum { Start, GotLt, - ExpectChar, - ExpectEndQuot, + CharLit, OpenComment, Comment, CommentStar, @@ -62,6 +63,7 @@ fn next(chars: [*]const u8, index: *usize) !CToken { Octal, GotZero, Hex, + Bin, Float, ExpSign, FloatExp, @@ -70,7 +72,6 @@ fn next(chars: [*]const u8, index: *usize) !CToken { NumLitIntSuffixL, NumLitIntSuffixLL, NumLitIntSuffixUL, - GotLt, } = .Start; var result = CToken{ @@ -79,9 +80,10 @@ fn next(chars: [*]const u8, index: *usize) !CToken { }; var begin_index: usize = 0; var digits: u8 = 0; - var pre_escape = .Start; + var pre_escape = state; - for (chars[begin_index..]) |c, i| { + while (true) { + const c = chars[i.*]; if (c == 0) { switch (state) { .Start => { @@ -90,22 +92,25 @@ fn next(chars: [*]const u8, index: *usize) !CToken { .Identifier, .Decimal, .Hex, + .Bin, .Octal, .GotZero, + .Float, + .FloatExp, + => { + result.bytes = chars[begin_index..i.*]; + return result; + }, .NumLitIntSuffixU, .NumLitIntSuffixL, .NumLitIntSuffixUL, .NumLitIntSuffixLL, - .Float, - .FloatExp, .GotLt, => { return result; }, - .ExpectChar, - .ExpectEndQuot, + .CharLit, .OpenComment, - .LineComment, .Comment, .CommentStar, .Backslash, @@ -115,20 +120,20 @@ fn next(chars: [*]const u8, index: *usize) !CToken { => return error.TokenizingFailed, } } - index.* += 1; + i.* += 1; switch (state) { .Start => { switch (c) { ' ', '\t', '\x0B', '\x0C' => {}, '\'' => { - state = .ExpectChar; + state = .CharLit; result.id = .CharLit; - begin_index = i; + begin_index = i.* - 1; }, '\"' => { state = .String; result.id = .StrLit; - begin_index = i; + begin_index = i.* - 1; }, '/' => { state = .OpenComment; @@ -142,17 +147,17 @@ fn next(chars: [*]const u8, index: *usize) !CToken { 'a'...'z', 'A'...'Z', '_' => { state = .Identifier; result.id = .Identifier; - begin_index = i; + begin_index = i.* - 1; }, '1'...'9' => { state = .Decimal; result.id = .NumLitInt; - begin_index = i; + begin_index = i.* - 1; }, '0' => { state = .GotZero; result.id = .NumLitInt; - begin_index = i; + begin_index = i.* - 1; }, '.' => { result.id = .Dot; @@ -206,12 +211,23 @@ fn next(chars: [*]const u8, index: *usize) !CToken { 'e', 'E' => { state = .ExpSign; }, - 'f', 'F', 'l', 'L' => { - result.bytes = chars[begin_index..i]; + 'f', + 'F', + => { + i.* -= 1; + result.num_lit_suffix = .F; + result.bytes = chars[begin_index..i.*]; + return result; + }, + 'l', 'L' => { + i.* -= 1; + result.num_lit_suffix = .L; + result.bytes = chars[begin_index..i.*]; return result; }, else => { - result.bytes = chars[begin_index..i]; + i.* -= 1; + result.bytes = chars[begin_index..i.*]; return result; }, } @@ -238,12 +254,19 @@ fn next(chars: [*]const u8, index: *usize) !CToken { .FloatExp => { switch (c) { '0'...'9' => {}, - 'f', 'F', 'l', 'L' => { - result.bytes = chars[begin_index..i]; + 'f', 'F' => { + result.num_lit_suffix = .F; + result.bytes = chars[begin_index .. i.* - 1]; + return result; + }, + 'l', 'L' => { + result.num_lit_suffix = .L; + result.bytes = chars[begin_index .. i.* - 1]; return result; }, else => { - result.bytes = chars[begin_index..i]; + i.* -= 1; + result.bytes = chars[begin_index..i.*]; return result; }, } @@ -255,17 +278,20 @@ fn next(chars: [*]const u8, index: *usize) !CToken { 'u', 'U' => { state = .NumLitIntSuffixU; result.num_lit_suffix = .U; + result.bytes = chars[begin_index .. i.* - 1]; }, 'l', 'L' => { state = .NumLitIntSuffixL; result.num_lit_suffix = .L; + result.bytes = chars[begin_index .. i.* - 1]; }, '.' => { result.id = .NumLitFloat; state = .Float; }, else => { - result.bytes = chars[begin_index..i]; + i.* -= 1; + result.bytes = chars[begin_index..i.*]; return result; }, } @@ -275,15 +301,25 @@ fn next(chars: [*]const u8, index: *usize) !CToken { 'x', 'X' => { state = .Hex; }, + 'b', 'B' => { + state = .Bin; + }, '.' => { state = .Float; result.id = .NumLitFloat; }, - 'l', 'L', 'u', 'U' => { - c -= 1; - state = .Decimal; + 'u', 'U' => { + state = .NumLitIntSuffixU; + result.num_lit_suffix = .U; + result.bytes = chars[begin_index .. i.* - 1]; + }, + 'l', 'L' => { + state = .NumLitIntSuffixL; + result.num_lit_suffix = .L; + result.bytes = chars[begin_index .. i.* - 1]; }, else => { + i.* -= 1; state = .Octal; }, } @@ -293,7 +329,8 @@ fn next(chars: [*]const u8, index: *usize) !CToken { '0'...'7' => {}, '8', '9' => return error.TokenizingFailed, else => { - result.bytes = chars[begin_index..i]; + i.* -= 1; + result.bytes = chars[begin_index..i.*]; return result; }, } @@ -301,23 +338,44 @@ fn next(chars: [*]const u8, index: *usize) !CToken { .Hex => { switch (c) { '0'...'9', 'a'...'f', 'A'...'F' => {}, - - 'p', 'P' => { - result.id = .NumLitFloat; - state = .ExpSign; - }, 'u', 'U' => { // marks the number literal as unsigned state = .NumLitIntSuffixU; result.num_lit_suffix = .U; + result.bytes = chars[begin_index .. i.* - 1]; }, 'l', 'L' => { // marks the number literal as long state = .NumLitIntSuffixL; result.num_lit_suffix = .L; + result.bytes = chars[begin_index .. i.* - 1]; }, else => { - result.bytes = chars[begin_index..i]; + i.* -= 1; + result.bytes = chars[begin_index..i.*]; + return result; + }, + } + }, + .Bin => { + switch (c) { + '0'...'1' => {}, + '2'...'9' => return error.TokenizingFailed, + 'u', 'U' => { + // marks the number literal as unsigned + state = .NumLitIntSuffixU; + result.num_lit_suffix = .U; + result.bytes = chars[begin_index .. i.* - 1]; + }, + 'l', 'L' => { + // marks the number literal as long + state = .NumLitIntSuffixL; + result.num_lit_suffix = .L; + result.bytes = chars[begin_index .. i.* - 1]; + }, + else => { + i.* -= 1; + result.bytes = chars[begin_index..i.*]; return result; }, } @@ -329,7 +387,7 @@ fn next(chars: [*]const u8, index: *usize) !CToken { state = .NumLitIntSuffixUL; }, else => { - result.bytes = chars[begin_index..i - 1]; + i.* -= 1; return result; }, } @@ -342,11 +400,10 @@ fn next(chars: [*]const u8, index: *usize) !CToken { }, 'u', 'U' => { result.num_lit_suffix = .LU; - result.bytes = chars[begin_index..i - 2]; return result; }, else => { - result.bytes = chars[begin_index..i - 1]; + i.* -= 1; return result; }, } @@ -355,11 +412,10 @@ fn next(chars: [*]const u8, index: *usize) !CToken { switch (c) { 'u', 'U' => { result.num_lit_suffix = .LLU; - result.bytes = chars[begin_index..i - 3]; return result; }, else => { - result.bytes = chars[begin_index..i - 2]; + i.* -= 1; return result; }, } @@ -368,11 +424,10 @@ fn next(chars: [*]const u8, index: *usize) !CToken { switch (c) { 'l', 'L' => { result.num_lit_suffix = .LLU; - result.bytes = chars[begin_index..i - 3]; return result; }, else => { - result.bytes = chars[begin_index..i - 2]; + i.* -= 1; return result; }, } @@ -381,35 +436,28 @@ fn next(chars: [*]const u8, index: *usize) !CToken { switch (c) { '_', 'a'...'z', 'A'...'Z', '0'...'9' => {}, else => { - result.bytes = chars[begin_index..i]; + i.* -= 1; + result.bytes = chars[begin_index..i.*]; return result; }, } }, - .String => { + .String => { // TODO char escapes switch (c) { '\"' => { - result.bytes = chars[begin_index + 1 .. i]; + result.bytes = chars[begin_index + 1 .. i.* - 1]; return result; }, else => {}, } }, - .ExpectChar => { - switch (c) { - '\'' => return error.TokenizingFailed, - else => { - state = .ExpectEndQuot; - }, - } - }, - .ExpectEndQuot => { + .CharLit => { switch (c) { '\'' => { - result.bytes = chars[begin_index + 1 .. i]; + result.bytes = chars[begin_index + 1 .. i.* - 1]; return result; }, - else => return error.TokenizingFailed, + else => {}, } }, .OpenComment => { @@ -455,4 +503,56 @@ fn next(chars: [*]const u8, index: *usize) !CToken { }, } } + unreachable; +} + +test "tokenize macro" { + var tl = TokenList.init(std.heap.page_allocator); + defer tl.deinit(); + + const src = "TEST 0\n"; + try tokenizeCMacro(&tl, src); + var it = tl.iterator(0); + expect(it.next().?.id == .Identifier); + expect(std.mem.eql(u8, it.next().?.bytes, "0")); + expect(it.next().?.id == .Eof); + expect(it.next() == null); + tl.shrink(0); + + const src2 = "__FLT_MIN_10_EXP__ -37\n"; + try tokenizeCMacro(&tl, src2); + it = tl.iterator(0); + expect(std.mem.eql(u8, it.next().?.bytes, "__FLT_MIN_10_EXP__")); + expect(it.next().?.id == .Minus); + expect(std.mem.eql(u8, it.next().?.bytes, "37")); + expect(it.next().?.id == .Eof); + expect(it.next() == null); + tl.shrink(0); + + const src3 = "__llvm__ 1\n#define"; + try tokenizeCMacro(&tl, src3); + it = tl.iterator(0); + expect(std.mem.eql(u8, it.next().?.bytes, "__llvm__")); + expect(std.mem.eql(u8, it.next().?.bytes, "1")); + expect(it.next().?.id == .Eof); + expect(it.next() == null); + tl.shrink(0); + + const src4 = "TEST 2"; + try tokenizeCMacro(&tl, src4); + it = tl.iterator(0); + expect(it.next().?.id == .Identifier); + expect(std.mem.eql(u8, it.next().?.bytes, "2")); + expect(it.next().?.id == .Eof); + expect(it.next() == null); + tl.shrink(0); + + const src5 = "FOO 0l"; + try tokenizeCMacro(&tl, src5); + it = tl.iterator(0); + expect(it.next().?.id == .Identifier); + expect(std.mem.eql(u8, it.next().?.bytes, "0")); + expect(it.next().?.id == .Eof); + expect(it.next() == null); + tl.shrink(0); } diff --git a/src-self-hosted/clang.zig b/src-self-hosted/clang.zig index 4b3aa44fab..901660adce 100644 --- a/src-self-hosted/clang.zig +++ b/src-self-hosted/clang.zig @@ -75,6 +75,7 @@ pub const struct_ZigClangWhileStmt = @OpaqueType(); pub const struct_ZigClangFunctionType = @OpaqueType(); pub const struct_ZigClangPredefinedExpr = @OpaqueType(); pub const struct_ZigClangInitListExpr = @OpaqueType(); +pub const ZigClangPreprocessingRecord = @OpaqueType(); pub const ZigClangBO = extern enum { PtrMemD, @@ -717,6 +718,18 @@ pub const ZigClangEnumDecl_enumerator_iterator = extern struct { opaque: *c_void, }; +pub const ZigClangPreprocessingRecord_iterator = extern struct { + I: c_int, + Self: *ZigClangPreprocessingRecord, +}; + +pub const ZigClangPreprocessedEntity_EntityKind = extern enum { + InvalidKind, + MacroExpansionKind, + MacroDefinitionKind, + InclusionDirectiveKind, +}; + pub extern fn ZigClangSourceManager_getSpellingLoc(self: ?*const struct_ZigClangSourceManager, Loc: struct_ZigClangSourceLocation) struct_ZigClangSourceLocation; pub extern fn ZigClangSourceManager_getFilename(self: *const struct_ZigClangSourceManager, SpellingLoc: struct_ZigClangSourceLocation) ?[*:0]const u8; pub extern fn ZigClangSourceManager_getSpellingLineNumber(self: ?*const struct_ZigClangSourceManager, Loc: struct_ZigClangSourceLocation) c_uint; @@ -1014,3 +1027,12 @@ pub extern fn ZigClangFieldDecl_getLocation(*const struct_ZigClangFieldDecl) str pub extern fn ZigClangEnumConstantDecl_getInitExpr(*const ZigClangEnumConstantDecl) ?*const ZigClangExpr; pub extern fn ZigClangEnumConstantDecl_getInitVal(*const ZigClangEnumConstantDecl) *const ZigClangAPSInt; + +pub extern fn ZigClangASTUnit_getLocalPreprocessingEntities_begin(*ZigClangASTUnit) ZigClangPreprocessingRecord_iterator; +pub extern fn ZigClangASTUnit_getLocalPreprocessingEntities_end(*ZigClangASTUnit) ZigClangPreprocessingRecord_iterator; +pub extern fn ZigClangPreprocessingRecord_iterator_deref(ZigClangPreprocessingRecord_iterator) *ZigClangPreprocessedEntity; +pub extern fn ZigClangPreprocessedEntity_getKind(*const ZigClangPreprocessedEntity) ZigClangPreprocessedEntity_EntityKind; + +pub extern fn ZigClangMacroDefinitionRecord_getName_getNameStart(*const ZigClangMacroDefinitionRecord) [*:0]const u8; +pub extern fn ZigClangMacroDefinitionRecord_getSourceRange_getBegin(*const ZigClangMacroDefinitionRecord) ZigClangSourceLocation; +pub extern fn ZigClangMacroDefinitionRecord_getSourceRange_getEnd(*const ZigClangMacroDefinitionRecord) ZigClangSourceLocation; diff --git a/src-self-hosted/translate_c.zig b/src-self-hosted/translate_c.zig index 5cd0198911..c36d0f3d5e 100644 --- a/src-self-hosted/translate_c.zig +++ b/src-self-hosted/translate_c.zig @@ -6,6 +6,8 @@ const assert = std.debug.assert; const ast = std.zig.ast; const Token = std.zig.Token; usingnamespace @import("clang.zig"); +const ctok = @import("c_tokenizer.zig"); +const CToken = ctok.CToken; const CallingConvention = std.builtin.TypeInfo.CallingConvention; @@ -31,6 +33,7 @@ fn addrEql(a: usize, b: usize) bool { return a == b; } +const MacroTable = std.StringHashMap(*ast.Node); const SymbolTable = std.StringHashMap(void); const AliasList = std.SegmentedList(struct { alias: []const u8, @@ -106,6 +109,7 @@ const Context = struct { decl_table: DeclTable, alias_list: AliasList, sym_table: SymbolTable, + macro_table: MacroTable, global_scope: *Scope.Root, ptr_params: std.BufSet, clang_context: *ZigClangASTContext, @@ -193,6 +197,7 @@ pub fn translate( .decl_table = DeclTable.init(arena), .alias_list = AliasList.init(arena), .sym_table = SymbolTable.init(arena), + .macro_table = MacroTable.init(arena), .global_scope = try arena.create(Scope.Root), .ptr_params = std.BufSet.init(arena), .clang_context = ZigClangASTUnit_getASTContext(ast_unit).?, @@ -207,6 +212,14 @@ pub fn translate( if (!ZigClangASTUnit_visitLocalTopLevelDecls(ast_unit, &context, declVisitorC)) { return context.err; } + + try transPreprocessorEntities(&context, ast_unit); + + var macro_it = context.macro_table.iterator(); + while (macro_it.next()) |kv| { + try addTopLevelDecl(&context, kv.key, kv.value); + } + var it = context.alias_list.iterator(0); while (it.next()) |alias| { if (!context.sym_table.contains(alias.alias)) { @@ -1931,18 +1944,18 @@ fn transCreateNodeInt(c: *Context, int: var) !*ast.Node { return &node.base; } -fn transCreateNodeOpaqueType(c: *Context) !*ast.Node { - const builtin_tok = try appendToken(c, .Builtin, "@OpaqueType"); - _ = try appendToken(c, .LParen, "("); - const rparen_tok = try appendToken(c, .RParen, ")"); - - const call_node = try c.a().create(ast.Node.BuiltinCall); - call_node.* = ast.Node.BuiltinCall{ - .base = ast.Node{ .id = ast.Node.Id.BuiltinCall }, - .builtin_token = builtin_tok, - .params = ast.Node.BuiltinCall.ParamList.init(c.a()), - .rparen_token = rparen_tok, +fn transCreateNodeFloat(c: *Context, int: var) !*ast.Node { + const token = try appendTokenFmt(c, .FloatLiteral, "{}", .{int}); + const node = try c.a().create(ast.Node.FloatLiteral); + node.* = .{ + .token = token, }; + return &node.base; +} + +fn transCreateNodeOpaqueType(c: *Context) !*ast.Node { + const call_node = try transCreateNodeBuiltinFnCall(c, "@OpaqueType"); + call_node.rparen_token = try appendToken(c, .RParen, ")"); return &call_node.base; } @@ -2441,3 +2454,273 @@ fn transCreateNodeIdentifier(c: *Context, name: []const u8) !*ast.Node { pub fn freeErrors(errors: []ClangErrMsg) void { ZigClangErrorMsg_delete(errors.ptr, errors.len); } + +fn transPreprocessorEntities(c: *Context, unit: *ZigClangASTUnit) Error!void { + // TODO if we see #undef, delete it from the table + var it = ZigClangASTUnit_getLocalPreprocessingEntities_begin(unit); + const it_end = ZigClangASTUnit_getLocalPreprocessingEntities_end(unit); + var tok_list = ctok.TokenList.init(c.a()); + + while (it.I != it_end.I) : (it.I += 1) { + const entity = ZigClangPreprocessingRecord_iterator_deref(it); + tok_list.shrink(0); + + switch (ZigClangPreprocessedEntity_getKind(entity)) { + .MacroExpansionKind => { + // TODO + }, + .MacroDefinitionKind => { + const macro = @ptrCast(*ZigClangMacroDefinitionRecord, entity); + const raw_name = ZigClangMacroDefinitionRecord_getName_getNameStart(macro); + const begin_loc = ZigClangMacroDefinitionRecord_getSourceRange_getBegin(macro); + + const name = try c.str(raw_name); + // if (name_exists_global(c, name)) { // TODO + // continue; + // } + + const begin_c = ZigClangSourceManager_getCharacterData(c.source_manager, begin_loc); + try transMacroDefine(c, &tok_list, name, begin_c, begin_loc); + }, + else => {}, + } + } +} + +fn transMacroDefine(c: *Context, tok_list: *ctok.TokenList, name: []const u8, char_ptr: [*]const u8, source_loc: ZigClangSourceLocation) Error!void { + ctok.tokenizeCMacro(tok_list, char_ptr) catch |err| switch (err) { + error.OutOfMemory => |e| return e, + else => return failDecl(c, source_loc, name, "unable to tokenize macro definition", .{}), + }; + const rp = makeRestorePoint(c); + + var it = tok_list.iterator(0); + const first_tok = it.next().?; + assert(first_tok.id == .Identifier and std.mem.eql(u8, first_tok.bytes, name)); + const next = it.peek().?; + switch (next.id) { + .Identifier => { + // if it equals itself, ignore. for example, from stdio.h: + // #define stdin stdin + if (std.mem.eql(u8, name, next.bytes)) { + return; + } + }, + .Eof => { + // this means it is a macro without a value + // we don't care about such things + return; + }, + else => {}, + } + + const visib_tok = try appendToken(c, .Keyword_pub, "pub"); + const mut_tok = try appendToken(c, .Keyword_const, "const"); + const name_tok = try appendIdentifier(c, name); + + const eq_tok = try appendToken(c, .Equal, "="); + + const init_node = parseCExpr(rp, &it, source_loc) catch |err| switch (err) { + error.UnsupportedTranslation, + error.ParseError, + => return failDecl(c, source_loc, name, "unable to translate macro", .{}), + error.OutOfMemory => |e| return e, + }; + + const node = try c.a().create(ast.Node.VarDecl); + node.* = ast.Node.VarDecl{ + .doc_comments = null, + .visib_token = visib_tok, + .thread_local_token = null, + .name_token = name_tok, + .eq_token = eq_tok, + .mut_token = mut_tok, + .comptime_token = null, + .extern_export_token = null, + .lib_name = null, + .type_node = null, + .align_node = null, + .section_node = null, + .init_node = init_node, + .semicolon_token = try appendToken(c, .Semicolon, ";"), + }; + _ = try c.macro_table.put(name, &node.base); +} + +const ParseError = Error || error{ + ParseError, + UnsupportedTranslation, +}; + +fn parseCExpr(rp: RestorePoint, it: *ctok.TokenList.Iterator, source_loc: ZigClangSourceLocation) ParseError!*ast.Node { + return parseCPrefixOpExpr(rp, it, source_loc); +} + +fn parseCNumLit(rp: RestorePoint, tok: *CToken, source_loc: ZigClangSourceLocation) ParseError!*ast.Node { + if (tok.id == .NumLitInt) { + if (tok.num_lit_suffix == .None) { + if (tok.bytes.len > 2 and tok.bytes[0] == '0') { + switch (tok.bytes[1]) { + '0'...'7' => { + // octal + return transCreateNodeInt(rp.c, try std.fmt.allocPrint(rp.c.a(), "0o{}", .{tok.bytes})); + }, + else => {}, + } + } + return transCreateNodeInt(rp.c, tok.bytes); + } + const cast_node = try transCreateNodeBuiltinFnCall(rp.c, "@as"); + try cast_node.params.push(try transCreateNodeIdentifier(rp.c, switch (tok.num_lit_suffix) { + .U => "c_uint", + .L => "c_long", + .LU => "c_ulong", + .LL => "c_longlong", + .LLU => "c_ulonglong", + else => unreachable, + })); + _ = try appendToken(rp.c, .Comma, ","); + try cast_node.params.push(try transCreateNodeInt(rp.c, tok.bytes)); + cast_node.rparen_token = try appendToken(rp.c, .RParen, ")"); + return &cast_node.base; + } else if (tok.id == .NumLitFloat) { + if (tok.num_lit_suffix == .None) { + return transCreateNodeFloat(rp.c, tok.bytes); + } + const cast_node = try transCreateNodeBuiltinFnCall(rp.c, "@as"); + try cast_node.params.push(try transCreateNodeIdentifier(rp.c, switch (tok.num_lit_suffix) { + .F => "f32", + .L => "f64", + else => unreachable, + })); + _ = try appendToken(rp.c, .Comma, ","); + try cast_node.params.push(try transCreateNodeFloat(rp.c, tok.bytes)); + cast_node.rparen_token = try appendToken(rp.c, .RParen, ")"); + return &cast_node.base; + } else + return revertAndWarn( + rp, + error.ParseError, + source_loc, + "expected number literal", + .{}, + ); +} + +fn parseCPrimaryExpr(rp: RestorePoint, it: *ctok.TokenList.Iterator, source_loc: ZigClangSourceLocation) ParseError!*ast.Node { + const tok = it.next().?; + switch (tok.id) { + .CharLit => { + const buf = try rp.c.a().alloc(u8, tok.bytes.len + "''".len); + buf[0] = '\''; + writeEscapedString(buf[1..], tok.bytes); + buf[buf.len - 1] = '\''; + const token = try appendToken(rp.c, .CharLiteral, buf); + const node = try rp.c.a().create(ast.Node.CharLiteral); + node.* = ast.Node.CharLiteral{ + .token = token, + }; + return &node.base; + }, + .StrLit => { + const buf = try rp.c.a().alloc(u8, tok.bytes.len + "\"\"".len); + buf[0] = '"'; + writeEscapedString(buf[1..], tok.bytes); + buf[buf.len - 1] = '"'; + const token = try appendToken(rp.c, .StringLiteral, buf); + const node = try rp.c.a().create(ast.Node.StringLiteral); + node.* = ast.Node.StringLiteral{ + .token = token, + }; + return &node.base; + }, + .Minus => { + const node = try transCreateNodePrefixOp( + rp.c, + .Negation, + .Minus, + "-", + ); + node.rhs = try parseCNumLit(rp, it.next().?, source_loc); + return &node.base; + }, + .NumLitInt, .NumLitFloat => { + return parseCNumLit(rp, tok, source_loc); + }, + .Identifier => return transCreateNodeIdentifier(rp.c, tok.bytes), + .LParen => { + _ = try appendToken(rp.c, .LParen, "("); + const inner_node = try parseCExpr(rp, it, source_loc); + _ = try appendToken(rp.c, .RParen, ")"); + + return inner_node; // TODO + }, + else => return revertAndWarn( + rp, + error.UnsupportedTranslation, + source_loc, + "unable to translate C expr", + .{}, + ), + } +} + +fn parseCSuffixOpExpr(rp: RestorePoint, it: *ctok.TokenList.Iterator, source_loc: ZigClangSourceLocation) ParseError!*ast.Node { + var node = try parseCPrimaryExpr(rp, it, source_loc); + while (true) { + const tok = it.next().?; + switch (tok.id) { + .Dot => { + const name_tok = it.next().?; + if (name_tok.id != .Identifier) + return revertAndWarn( + rp, + error.ParseError, + source_loc, + "unable to translate C expr", + .{}, + ); + + const op_token = try appendToken(rp.c, .Period, "."); + const rhs = try transCreateNodeIdentifier(rp.c, tok.bytes); + const access_node = try rp.c.a().create(ast.Node.InfixOp); + access_node.* = .{ + .op_token = op_token, + .lhs = node, + .op = .Period, + .rhs = rhs, + }; + node = &access_node.base; + }, + .Shl => { + const rhs_node = try parseCPrimaryExpr(rp, it, source_loc); + + const op_token = try appendToken(rp.c, .AngleBracketAngleBracketLeft, "<<"); + const rhs = try parseCPrimaryExpr(rp, it, source_loc); + const bitshift_node = try rp.c.a().create(ast.Node.InfixOp); + bitshift_node.* = .{ + .op_token = op_token, + .lhs = node, + .op = .BitShiftLeft, + .rhs = rhs, + }; + node = &bitshift_node.base; + }, + else => { + _ = it.prev(); + return node; + }, + } + } +} + +fn parseCPrefixOpExpr(rp: RestorePoint, it: *ctok.TokenList.Iterator, source_loc: ZigClangSourceLocation) ParseError!*ast.Node { + const op_tok = it.next().?; + + switch (op_tok.id) { + else => { + _ = it.prev(); + return try parseCSuffixOpExpr(rp, it, source_loc); + }, + } +} diff --git a/test/translate_c.zig b/test/translate_c.zig index 97cfc129f7..0f2a0e83e9 100644 --- a/test/translate_c.zig +++ b/test/translate_c.zig @@ -214,6 +214,7 @@ pub fn addCases(cases: *tests.TranslateCContext) void { \\ Clear: c_int, \\ }, \\}; + , \\pub const OpenGLProcs = union_OpenGLProcs; }); @@ -280,9 +281,64 @@ pub fn addCases(cases: *tests.TranslateCContext) void { \\ o, \\ p, \\}; + , \\pub const Baz = struct_Baz; }); + cases.add_2("#define a char literal", + \\#define A_CHAR 'a' + , &[_][]const u8{ + \\pub const A_CHAR = 'a'; + }); + + cases.add_2("comment after integer literal", + \\#define SDL_INIT_VIDEO 0x00000020 /**< SDL_INIT_VIDEO implies SDL_INIT_EVENTS */ + , &[_][]const u8{ + \\pub const SDL_INIT_VIDEO = 0x00000020; + }); + + cases.add_2("u integer suffix after hex literal", + \\#define SDL_INIT_VIDEO 0x00000020u /**< SDL_INIT_VIDEO implies SDL_INIT_EVENTS */ + , &[_][]const u8{ + \\pub const SDL_INIT_VIDEO = @as(c_uint, 0x00000020); + }); + + cases.add_2("l integer suffix after hex literal", + \\#define SDL_INIT_VIDEO 0x00000020l /**< SDL_INIT_VIDEO implies SDL_INIT_EVENTS */ + , &[_][]const u8{ + \\pub const SDL_INIT_VIDEO = @as(c_long, 0x00000020); + }); + + cases.add_2("ul integer suffix after hex literal", + \\#define SDL_INIT_VIDEO 0x00000020ul /**< SDL_INIT_VIDEO implies SDL_INIT_EVENTS */ + , &[_][]const u8{ + \\pub const SDL_INIT_VIDEO = @as(c_ulong, 0x00000020); + }); + + cases.add_2("lu integer suffix after hex literal", + \\#define SDL_INIT_VIDEO 0x00000020lu /**< SDL_INIT_VIDEO implies SDL_INIT_EVENTS */ + , &[_][]const u8{ + \\pub const SDL_INIT_VIDEO = @as(c_ulong, 0x00000020); + }); + + cases.add_2("ll integer suffix after hex literal", + \\#define SDL_INIT_VIDEO 0x00000020ll /**< SDL_INIT_VIDEO implies SDL_INIT_EVENTS */ + , &[_][]const u8{ + \\pub const SDL_INIT_VIDEO = @as(c_longlong, 0x00000020); + }); + + cases.add_2("ull integer suffix after hex literal", + \\#define SDL_INIT_VIDEO 0x00000020ull /**< SDL_INIT_VIDEO implies SDL_INIT_EVENTS */ + , &[_][]const u8{ + \\pub const SDL_INIT_VIDEO = @as(c_ulonglong, 0x00000020); + }); + + cases.add_2("llu integer suffix after hex literal", + \\#define SDL_INIT_VIDEO 0x00000020llu /**< SDL_INIT_VIDEO implies SDL_INIT_EVENTS */ + , &[_][]const u8{ + \\pub const SDL_INIT_VIDEO = @as(c_ulonglong, 0x00000020); + }); + /////////////// Cases for only stage1 which are TODO items for stage2 //////////////// cases.add_both("typedef of function in struct field", @@ -314,7 +370,7 @@ pub fn addCases(cases: *tests.TranslateCContext) void { \\}; }); - cases.add("macro with left shift", + cases.add_both("macro with left shift", \\#define REDISMODULE_READ (1<<0) , &[_][]const u8{ \\pub const REDISMODULE_READ = 1 << 0; @@ -637,13 +693,13 @@ pub fn addCases(cases: *tests.TranslateCContext) void { \\pub const A_CHAR = 97; }); - cases.add("#define an unsigned integer literal", + cases.add_both("#define an unsigned integer literal", \\#define CHANNEL_COUNT 24 , &[_][]const u8{ \\pub const CHANNEL_COUNT = 24; }); - cases.add("#define referencing another #define", + cases.add_both("#define referencing another #define", \\#define THING2 THING1 \\#define THING1 1234 , &[_][]const u8{ @@ -692,7 +748,7 @@ pub fn addCases(cases: *tests.TranslateCContext) void { \\} }); - cases.add("#define string", + cases.add_both("#define string", \\#define foo "a string" , &[_][]const u8{ \\pub const foo = "a string"; @@ -788,7 +844,7 @@ pub fn addCases(cases: *tests.TranslateCContext) void { \\pub const FOO_CHAR = 63; }); - cases.add("macro with parens around negative number", + cases.add_both("macro with parens around negative number", \\#define LUA_GLOBALSINDEX (-10002) , &[_][]const u8{ \\pub const LUA_GLOBALSINDEX = -10002; @@ -1732,7 +1788,7 @@ pub fn addCases(cases: *tests.TranslateCContext) void { \\} }); - cases.addC( + cases.add_both( "u integer suffix after 0 (zero) in macro definition", "#define ZERO 0U", &[_][]const u8{ @@ -1740,7 +1796,7 @@ pub fn addCases(cases: *tests.TranslateCContext) void { }, ); - cases.addC( + cases.add_both( "l integer suffix after 0 (zero) in macro definition", "#define ZERO 0L", &[_][]const u8{ @@ -1748,7 +1804,7 @@ pub fn addCases(cases: *tests.TranslateCContext) void { }, ); - cases.addC( + cases.add_both( "ul integer suffix after 0 (zero) in macro definition", "#define ZERO 0UL", &[_][]const u8{ @@ -1756,7 +1812,7 @@ pub fn addCases(cases: *tests.TranslateCContext) void { }, ); - cases.addC( + cases.add_both( "lu integer suffix after 0 (zero) in macro definition", "#define ZERO 0LU", &[_][]const u8{ @@ -1764,7 +1820,7 @@ pub fn addCases(cases: *tests.TranslateCContext) void { }, ); - cases.addC( + cases.add_both( "ll integer suffix after 0 (zero) in macro definition", "#define ZERO 0LL", &[_][]const u8{ @@ -1772,7 +1828,7 @@ pub fn addCases(cases: *tests.TranslateCContext) void { }, ); - cases.addC( + cases.add_both( "ull integer suffix after 0 (zero) in macro definition", "#define ZERO 0ULL", &[_][]const u8{ @@ -1780,7 +1836,7 @@ pub fn addCases(cases: *tests.TranslateCContext) void { }, ); - cases.addC( + cases.add_both( "llu integer suffix after 0 (zero) in macro definition", "#define ZERO 0LLU", &[_][]const u8{ @@ -1788,7 +1844,7 @@ pub fn addCases(cases: *tests.TranslateCContext) void { }, ); - cases.addC( + cases.addC(//todo "bitwise not on u-suffixed 0 (zero) in macro definition", "#define NOT_ZERO (~0U)", &[_][]const u8{