mirror of
https://github.com/ziglang/zig.git
synced 2026-02-12 20:37:54 +00:00
translate-c: Escape non-ASCII characters that appear in macros
Macro definitions are simply a slice of bytes, which may not be UTF-8 encoded. If they are not UTF-8 encoded, escape non-printable and non-ASCII characters as `\xNN`. Fixes #12784
This commit is contained in:
parent
9e070b653c
commit
8e631ee3e7
@ -5957,20 +5957,36 @@ fn zigifyEscapeSequences(ctx: *Context, m: *MacroCtx) ![]const u8 {
|
||||
return bytes[0..i];
|
||||
}
|
||||
|
||||
/// non-ASCII characters (c > 127) are also treated as non-printable by fmtSliceEscapeLower.
|
||||
/// If a C string literal or char literal in a macro is not valid UTF-8, we need to escape
|
||||
/// non-ASCII characters so that the Zig source we output will itself be UTF-8.
|
||||
fn escapeUnprintables(ctx: *Context, m: *MacroCtx) ![]const u8 {
|
||||
const zigified = try zigifyEscapeSequences(ctx, m);
|
||||
if (std.unicode.utf8ValidateSlice(zigified)) return zigified;
|
||||
|
||||
const formatter = std.fmt.fmtSliceEscapeLower(zigified);
|
||||
const encoded_size = @intCast(usize, std.fmt.count("{s}", .{formatter}));
|
||||
var output = try ctx.arena.alloc(u8, encoded_size);
|
||||
return std.fmt.bufPrint(output, "{s}", .{formatter}) catch |err| switch (err) {
|
||||
error.NoSpaceLeft => unreachable,
|
||||
else => |e| return e,
|
||||
};
|
||||
}
|
||||
|
||||
fn parseCPrimaryExprInner(c: *Context, m: *MacroCtx, scope: *Scope) ParseError!Node {
|
||||
const tok = m.next().?;
|
||||
const slice = m.slice();
|
||||
switch (tok) {
|
||||
.CharLiteral => {
|
||||
if (slice[0] != '\'' or slice[1] == '\\' or slice.len == 3) {
|
||||
return Tag.char_literal.create(c.arena, try zigifyEscapeSequences(c, m));
|
||||
return Tag.char_literal.create(c.arena, try escapeUnprintables(c, m));
|
||||
} else {
|
||||
const str = try std.fmt.allocPrint(c.arena, "0x{s}", .{std.fmt.fmtSliceHexLower(slice[1 .. slice.len - 1])});
|
||||
return Tag.integer_literal.create(c.arena, str);
|
||||
}
|
||||
},
|
||||
.StringLiteral => {
|
||||
return Tag.string_literal.create(c.arena, try zigifyEscapeSequences(c, m));
|
||||
return Tag.string_literal.create(c.arena, try escapeUnprintables(c, m));
|
||||
},
|
||||
.IntegerLiteral, .FloatLiteral => {
|
||||
return parseCNumLit(c, m);
|
||||
|
||||
@ -5,6 +5,7 @@ const expectEqual = std.testing.expectEqual;
|
||||
const expectEqualStrings = std.testing.expectEqualStrings;
|
||||
|
||||
const h = @cImport(@cInclude("behavior/translate_c_macros.h"));
|
||||
const latin1 = @cImport(@cInclude("behavior/translate_c_macros_not_utf8.h"));
|
||||
|
||||
test "casting to void with a macro" {
|
||||
h.IGNORE_ME_1(42);
|
||||
@ -134,3 +135,14 @@ test "string literal macro with embedded tab character" {
|
||||
|
||||
try expectEqualStrings("hello\t", h.EMBEDDED_TAB);
|
||||
}
|
||||
|
||||
test "string and char literals that are not UTF-8 encoded. Issue #12784" {
|
||||
if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; // TODO
|
||||
if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO
|
||||
if (builtin.zig_backend == .stage2_c) return error.SkipZigTest; // TODO
|
||||
if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
|
||||
if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO
|
||||
|
||||
try expectEqual(@as(u8, '\xA9'), latin1.UNPRINTABLE_CHAR);
|
||||
try expectEqualStrings("\xA9\xA9\xA9", latin1.UNPRINTABLE_STRING);
|
||||
}
|
||||
|
||||
5
test/behavior/translate_c_macros_not_utf8.h
Normal file
5
test/behavior/translate_c_macros_not_utf8.h
Normal file
@ -0,0 +1,5 @@
|
||||
// Note: This file is encoded with ISO/IEC 8859-1 (latin1), not UTF-8.
|
||||
// Do not change the encoding
|
||||
|
||||
#define UNPRINTABLE_STRING "©©©"
|
||||
#define UNPRINTABLE_CHAR '©'
|
||||
Loading…
x
Reference in New Issue
Block a user