mirror of
https://github.com/ziglang/zig.git
synced 2026-01-09 08:55:36 +00:00
std: add options to std.json.stringfy to control escaping
This commit is contained in:
parent
17f5d04bed
commit
edf487b126
@ -2357,10 +2357,38 @@ pub const StringifyOptions = struct {
|
||||
/// Controls the whitespace emitted
|
||||
whitespace: ?Whitespace = null,
|
||||
|
||||
// TODO: make escaping '/' in strings optional?
|
||||
/// Should '/' be escaped in strings?
|
||||
escape_solidus: bool = false,
|
||||
|
||||
/// Should unicode characters be escaped in strings?
|
||||
escape_unicode: bool = false,
|
||||
|
||||
// TODO: allow picking if []u8 is string or array?
|
||||
};
|
||||
|
||||
fn outputUnicodeEscape(
|
||||
codepoint: u21,
|
||||
out_stream: var,
|
||||
) !void {
|
||||
if (codepoint <= 0xFFFF) {
|
||||
// If the character is in the Basic Multilingual Plane (U+0000 through U+FFFF),
|
||||
// then it may be represented as a six-character sequence: a reverse solidus, followed
|
||||
// by the lowercase letter u, followed by four hexadecimal digits that encode the character's code point.
|
||||
try out_stream.writeAll("\\u");
|
||||
try std.fmt.formatIntValue(codepoint, "x", std.fmt.FormatOptions{ .width = 4, .fill = '0' }, out_stream);
|
||||
} else {
|
||||
assert(codepoint <= 0x10FFFF);
|
||||
// To escape an extended character that is not in the Basic Multilingual Plane,
|
||||
// the character is represented as a 12-character sequence, encoding the UTF-16 surrogate pair.
|
||||
const high = @intCast(u16, (codepoint - 0x10000) >> 10) + 0xD800;
|
||||
const low = @intCast(u16, codepoint & 0x3FF) + 0xDC00;
|
||||
try out_stream.writeAll("\\u");
|
||||
try std.fmt.formatIntValue(high, "x", std.fmt.FormatOptions{ .width = 4, .fill = '0' }, out_stream);
|
||||
try out_stream.writeAll("\\u");
|
||||
try std.fmt.formatIntValue(low, "x", std.fmt.FormatOptions{ .width = 4, .fill = '0' }, out_stream);
|
||||
}
|
||||
}
|
||||
|
||||
pub fn stringify(
|
||||
value: var,
|
||||
options: StringifyOptions,
|
||||
@ -2467,12 +2495,21 @@ pub fn stringify(
|
||||
var i: usize = 0;
|
||||
while (i < value.len) : (i += 1) {
|
||||
switch (value[i]) {
|
||||
// normal ascii characters
|
||||
0x20...0x21, 0x23...0x2E, 0x30...0x5B, 0x5D...0x7F => try out_stream.writeAll(value[i .. i + 1]),
|
||||
// control characters with short escapes
|
||||
// normal ascii character
|
||||
0x20...0x21, 0x23...0x2E, 0x30...0x5B, 0x5D...0x7F => |c| try out_stream.writeByte(c),
|
||||
// only 2 characters that *must* be escaped
|
||||
'\\' => try out_stream.writeAll("\\\\"),
|
||||
'\"' => try out_stream.writeAll("\\\""),
|
||||
'/' => try out_stream.writeAll("\\/"),
|
||||
// solidus is optional to escape
|
||||
'/' => {
|
||||
if (options.escape_solidus) {
|
||||
try out_stream.writeAll("\\/");
|
||||
} else {
|
||||
try out_stream.writeByte('\\');
|
||||
}
|
||||
},
|
||||
// control characters with short escapes
|
||||
// TODO: option to switch between unicode and 'short' forms?
|
||||
0x8 => try out_stream.writeAll("\\b"),
|
||||
0xC => try out_stream.writeAll("\\f"),
|
||||
'\n' => try out_stream.writeAll("\\n"),
|
||||
@ -2480,22 +2517,12 @@ pub fn stringify(
|
||||
'\t' => try out_stream.writeAll("\\t"),
|
||||
else => {
|
||||
const ulen = std.unicode.utf8ByteSequenceLength(value[i]) catch unreachable;
|
||||
const codepoint = std.unicode.utf8Decode(value[i .. i + ulen]) catch unreachable;
|
||||
if (codepoint <= 0xFFFF) {
|
||||
// If the character is in the Basic Multilingual Plane (U+0000 through U+FFFF),
|
||||
// then it may be represented as a six-character sequence: a reverse solidus, followed
|
||||
// by the lowercase letter u, followed by four hexadecimal digits that encode the character's code point.
|
||||
try out_stream.writeAll("\\u");
|
||||
try std.fmt.formatIntValue(codepoint, "x", std.fmt.FormatOptions{ .width = 4, .fill = '0' }, out_stream);
|
||||
// control characters (only things left with 1 byte length) should always be printed as unicode escapes
|
||||
if (ulen == 1 or options.escape_unicode) {
|
||||
const codepoint = std.unicode.utf8Decode(value[i .. i + ulen]) catch unreachable;
|
||||
try outputUnicodeEscape(codepoint, out_stream);
|
||||
} else {
|
||||
// To escape an extended character that is not in the Basic Multilingual Plane,
|
||||
// the character is represented as a 12-character sequence, encoding the UTF-16 surrogate pair.
|
||||
const high = @intCast(u16, (codepoint - 0x10000) >> 10) + 0xD800;
|
||||
const low = @intCast(u16, codepoint & 0x3FF) + 0xDC00;
|
||||
try out_stream.writeAll("\\u");
|
||||
try std.fmt.formatIntValue(high, "x", std.fmt.FormatOptions{ .width = 4, .fill = '0' }, out_stream);
|
||||
try out_stream.writeAll("\\u");
|
||||
try std.fmt.formatIntValue(low, "x", std.fmt.FormatOptions{ .width = 4, .fill = '0' }, out_stream);
|
||||
try out_stream.writeAll(value[i .. i + ulen]);
|
||||
}
|
||||
i += ulen - 1;
|
||||
},
|
||||
@ -2609,15 +2636,25 @@ test "stringify basic types" {
|
||||
test "stringify string" {
|
||||
try teststringify("\"hello\"", "hello", StringifyOptions{});
|
||||
try teststringify("\"with\\nescapes\\r\"", "with\nescapes\r", StringifyOptions{});
|
||||
try teststringify("\"with\\nescapes\\r\"", "with\nescapes\r", StringifyOptions{ .escape_unicode = true });
|
||||
try teststringify("\"with unicode\\u0001\"", "with unicode\u{1}", StringifyOptions{});
|
||||
try teststringify("\"with unicode\\u0080\"", "with unicode\u{80}", StringifyOptions{});
|
||||
try teststringify("\"with unicode\\u00ff\"", "with unicode\u{FF}", StringifyOptions{});
|
||||
try teststringify("\"with unicode\\u0100\"", "with unicode\u{100}", StringifyOptions{});
|
||||
try teststringify("\"with unicode\\u0800\"", "with unicode\u{800}", StringifyOptions{});
|
||||
try teststringify("\"with unicode\\u8000\"", "with unicode\u{8000}", StringifyOptions{});
|
||||
try teststringify("\"with unicode\\ud799\"", "with unicode\u{D799}", StringifyOptions{});
|
||||
try teststringify("\"with unicode\\ud800\\udc00\"", "with unicode\u{10000}", StringifyOptions{});
|
||||
try teststringify("\"with unicode\\udbff\\udfff\"", "with unicode\u{10FFFF}", StringifyOptions{});
|
||||
try teststringify("\"with unicode\\u0001\"", "with unicode\u{1}", StringifyOptions{ .escape_unicode = true });
|
||||
try teststringify("\"with unicode\u{80}\"", "with unicode\u{80}", StringifyOptions{});
|
||||
try teststringify("\"with unicode\\u0080\"", "with unicode\u{80}", StringifyOptions{ .escape_unicode = true });
|
||||
try teststringify("\"with unicode\u{FF}\"", "with unicode\u{FF}", StringifyOptions{});
|
||||
try teststringify("\"with unicode\\u00ff\"", "with unicode\u{FF}", StringifyOptions{ .escape_unicode = true });
|
||||
try teststringify("\"with unicode\u{100}\"", "with unicode\u{100}", StringifyOptions{});
|
||||
try teststringify("\"with unicode\\u0100\"", "with unicode\u{100}", StringifyOptions{ .escape_unicode = true });
|
||||
try teststringify("\"with unicode\u{800}\"", "with unicode\u{800}", StringifyOptions{});
|
||||
try teststringify("\"with unicode\\u0800\"", "with unicode\u{800}", StringifyOptions{ .escape_unicode = true });
|
||||
try teststringify("\"with unicode\u{8000}\"", "with unicode\u{8000}", StringifyOptions{});
|
||||
try teststringify("\"with unicode\\u8000\"", "with unicode\u{8000}", StringifyOptions{ .escape_unicode = true });
|
||||
try teststringify("\"with unicode\u{D799}\"", "with unicode\u{D799}", StringifyOptions{});
|
||||
try teststringify("\"with unicode\\ud799\"", "with unicode\u{D799}", StringifyOptions{ .escape_unicode = true });
|
||||
try teststringify("\"with unicode\u{10000}\"", "with unicode\u{10000}", StringifyOptions{});
|
||||
try teststringify("\"with unicode\\ud800\\udc00\"", "with unicode\u{10000}", StringifyOptions{ .escape_unicode = true });
|
||||
try teststringify("\"with unicode\u{10FFFF}\"", "with unicode\u{10FFFF}", StringifyOptions{});
|
||||
try teststringify("\"with unicode\\udbff\\udfff\"", "with unicode\u{10FFFF}", StringifyOptions{ .escape_unicode = true });
|
||||
}
|
||||
|
||||
test "stringify tagged unions" {
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user