diff --git a/src-self-hosted/c_tokenizer.zig b/src-self-hosted/c_tokenizer.zig index f14f26ae55..2aea0d57d3 100644 --- a/src-self-hosted/c_tokenizer.zig +++ b/src-self-hosted/c_tokenizer.zig @@ -40,12 +40,15 @@ pub const CToken = struct { }; }; -pub fn tokenizeCMacro(tl: *TokenList, chars: [*]const u8) !void { +pub fn tokenizeCMacro(tl: *TokenList, chars: [*:0]const u8) !void { var index: usize = 0; var first = true; while (true) { const tok = try next(chars, &index); - try tl.push(tok); + if (tok.id == .StrLit or tok.id == .CharLit) + try tl.push(try zigifyEscapeSequences(tl.allocator, tok)) + else + try tl.push(tok); if (tok.id == .Eof) return; if (first) { @@ -61,7 +64,83 @@ pub fn tokenizeCMacro(tl: *TokenList, chars: [*]const u8) !void { } } -fn next(chars: [*]const u8, i: *usize) !CToken { +fn zigifyEscapeSequences(allocator: *std.mem.Allocator, tok: CToken) !CToken { + for (tok.bytes) |c| { + if (c == '\\') { + break; + } + } else return tok; + var bytes = try allocator.alloc(u8, tok.bytes.len * 2); + var escape = false; + var i: usize = 0; + for (tok.bytes) |c| { + if (escape) { + switch (c) { + 'n', 'r', 't', '\\', '\'', '\"', 'x' => { + bytes[i] = c; + }, + 'a' => { + bytes[i] = 'x'; + i += 1; + bytes[i] = '0'; + i += 1; + bytes[i] = '7'; + }, + 'b' => { + bytes[i] = 'x'; + i += 1; + bytes[i] = '0'; + i += 1; + bytes[i] = '8'; + }, + 'f' => { + bytes[i] = 'x'; + i += 1; + bytes[i] = '0'; + i += 1; + bytes[i] = 'C'; + }, + 'v' => { + bytes[i] = 'x'; + i += 1; + bytes[i] = '0'; + i += 1; + bytes[i] = 'B'; + }, + '?' => { + i -= 1; + bytes[i] = '?'; + }, + 'u', 'U' => { + // TODO unicode escape sequences + return error.TokenizingFailed; + }, + '0'...'7' => { + // TODO octal escape sequences + return error.TokenizingFailed; + }, + else => { + // unknown escape sequence + return error.TokenizingFailed; + }, + } + i += 1; + escape = false; + } else { + if (c == '\\') { + escape = true; + } + bytes[i] = c; + i += 1; + } + } + return CToken{ + .id = tok.id, + .bytes = bytes[0..i], + }; +} + +fn next(chars: [*:0]const u8, i: *usize) !CToken { var state: enum { Start, GotLt, @@ -462,7 +541,7 @@ fn next(chars: [*]const u8, i: *usize) !CToken { .String => { // TODO char escapes switch (c) { '\"' => { - result.bytes = chars[begin_index + 1 .. i.* - 1]; + result.bytes = chars[begin_index..i.*]; return result; }, else => {}, @@ -471,7 +550,7 @@ fn next(chars: [*]const u8, i: *usize) !CToken { .CharLit => { switch (c) { '\'' => { - result.bytes = chars[begin_index + 1 .. i.* - 1]; + result.bytes = chars[begin_index..i.*]; return result; }, else => {}, diff --git a/src-self-hosted/clang.zig b/src-self-hosted/clang.zig index 901660adce..ee76585f77 100644 --- a/src-self-hosted/clang.zig +++ b/src-self-hosted/clang.zig @@ -734,7 +734,7 @@ pub extern fn ZigClangSourceManager_getSpellingLoc(self: ?*const struct_ZigClang pub extern fn ZigClangSourceManager_getFilename(self: *const struct_ZigClangSourceManager, SpellingLoc: struct_ZigClangSourceLocation) ?[*:0]const u8; pub extern fn ZigClangSourceManager_getSpellingLineNumber(self: ?*const struct_ZigClangSourceManager, Loc: struct_ZigClangSourceLocation) c_uint; pub extern fn ZigClangSourceManager_getSpellingColumnNumber(self: ?*const struct_ZigClangSourceManager, Loc: struct_ZigClangSourceLocation) c_uint; -pub extern fn ZigClangSourceManager_getCharacterData(self: ?*const struct_ZigClangSourceManager, SL: struct_ZigClangSourceLocation) [*c]const u8; +pub extern fn ZigClangSourceManager_getCharacterData(self: ?*const struct_ZigClangSourceManager, SL: struct_ZigClangSourceLocation) [*:0]const u8; pub extern fn ZigClangASTContext_getPointerType(self: ?*const struct_ZigClangASTContext, T: struct_ZigClangQualType) struct_ZigClangQualType; pub extern fn ZigClangASTUnit_getASTContext(self: ?*struct_ZigClangASTUnit) ?*struct_ZigClangASTContext; pub extern fn ZigClangASTUnit_getSourceManager(self: *struct_ZigClangASTUnit) *struct_ZigClangSourceManager; diff --git a/src-self-hosted/translate_c.zig b/src-self-hosted/translate_c.zig index 4dcf03b5a3..43f0aa0853 100644 --- a/src-self-hosted/translate_c.zig +++ b/src-self-hosted/translate_c.zig @@ -2629,9 +2629,9 @@ fn transPreprocessorEntities(c: *Context, unit: *ZigClangASTUnit) Error!void { } else false; (if (macro_fn) - transMacroFnDefine(c, &tok_it, name, begin_c, begin_loc) + transMacroFnDefine(c, &tok_it, name, begin_loc) else - transMacroDefine(c, &tok_it, name, begin_c, begin_loc)) catch |err| switch (err) { + transMacroDefine(c, &tok_it, name, begin_loc)) catch |err| switch (err) { error.UnsupportedTranslation, error.ParseError, => try failDecl(c, begin_loc, name, "unable to translate macro", .{}), @@ -2643,7 +2643,7 @@ fn transPreprocessorEntities(c: *Context, unit: *ZigClangASTUnit) Error!void { } } -fn transMacroDefine(c: *Context, it: *ctok.TokenList.Iterator, name: []const u8, char_ptr: [*]const u8, source_loc: ZigClangSourceLocation) ParseError!void { +fn transMacroDefine(c: *Context, it: *ctok.TokenList.Iterator, name: []const u8, source_loc: ZigClangSourceLocation) ParseError!void { const rp = makeRestorePoint(c); const visib_tok = try appendToken(c, .Keyword_pub, "pub"); @@ -2674,7 +2674,7 @@ fn transMacroDefine(c: *Context, it: *ctok.TokenList.Iterator, name: []const u8, _ = try c.macro_table.put(name, &node.base); } -fn transMacroFnDefine(c: *Context, it: *ctok.TokenList.Iterator, name: []const u8, char_ptr: [*]const u8, source_loc: ZigClangSourceLocation) ParseError!void { +fn transMacroFnDefine(c: *Context, it: *ctok.TokenList.Iterator, name: []const u8, source_loc: ZigClangSourceLocation) ParseError!void { const rp = makeRestorePoint(c); const pub_tok = try appendToken(c, .Keyword_pub, "pub"); const inline_tok = try appendToken(c, .Keyword_inline, "inline"); @@ -2829,11 +2829,7 @@ fn parseCPrimaryExpr(rp: RestorePoint, it: *ctok.TokenList.Iterator, source_loc: const tok = it.next().?; switch (tok.id) { .CharLit => { - const buf = try rp.c.a().alloc(u8, tok.bytes.len + "''".len); - buf[0] = '\''; - writeEscapedString(buf[1..], tok.bytes); - buf[buf.len - 1] = '\''; - const token = try appendToken(rp.c, .CharLiteral, buf); + const token = try appendToken(rp.c, .CharLiteral, tok.bytes); const node = try rp.c.a().create(ast.Node.CharLiteral); node.* = ast.Node.CharLiteral{ .token = token, @@ -2841,11 +2837,7 @@ fn parseCPrimaryExpr(rp: RestorePoint, it: *ctok.TokenList.Iterator, source_loc: return &node.base; }, .StrLit => { - const buf = try rp.c.a().alloc(u8, tok.bytes.len + "\"\"".len); - buf[0] = '"'; - writeEscapedString(buf[1..], tok.bytes); - buf[buf.len - 1] = '"'; - const token = try appendToken(rp.c, .StringLiteral, buf); + const token = try appendToken(rp.c, .StringLiteral, tok.bytes); const node = try rp.c.a().create(ast.Node.StringLiteral); node.* = ast.Node.StringLiteral{ .token = token, diff --git a/test/translate_c.zig b/test/translate_c.zig index f603dd3837..7edbb0ea70 100644 --- a/test/translate_c.zig +++ b/test/translate_c.zig @@ -411,6 +411,15 @@ pub fn addCases(cases: *tests.TranslateCContext) void { \\} }); + cases.add_2("macro escape sequences", + \\#define FOO "aoeu\xab derp" + \\#define FOO2 "aoeu\a derp" + , &[_][]const u8{ + \\pub const FOO = "aoeu\xab derp"; + , + \\pub const FOO2 = "aoeu\x07 derp"; + }); + /////////////// Cases for only stage1 which are TODO items for stage2 //////////////// cases.add_both("typedef of function in struct field",