mirror of
https://github.com/ziglang/zig.git
synced 2026-02-14 13:30:45 +00:00
Add std.unicode.replacement_character
This commit is contained in:
parent
618398b7d3
commit
62d717e2ff
@ -966,10 +966,10 @@ pub fn formatUnicodeCodepoint(
|
||||
writer: anytype,
|
||||
) !void {
|
||||
var buf: [4]u8 = undefined;
|
||||
const len = std.unicode.utf8Encode(c, &buf) catch |err| switch (err) {
|
||||
const len = unicode.utf8Encode(c, &buf) catch |err| switch (err) {
|
||||
error.Utf8CannotEncodeSurrogateHalf, error.CodepointTooLarge => {
|
||||
// In case of error output the replacement char U+FFFD
|
||||
return formatBuf(&[_]u8{ 0xef, 0xbf, 0xbd }, options, writer);
|
||||
const len = unicode.utf8Encode(unicode.replacement_character, &buf) catch unreachable;
|
||||
return formatBuf(buf[0..len], options, writer);
|
||||
},
|
||||
};
|
||||
return formatBuf(buf[0..len], options, writer);
|
||||
|
||||
@ -3,6 +3,11 @@ const assert = std.debug.assert;
|
||||
const testing = std.testing;
|
||||
const mem = std.mem;
|
||||
|
||||
/// Use this to replace an unknown, unrecognized, or unrepresentable character.
|
||||
///
|
||||
/// See also: https://en.wikipedia.org/wiki/Specials_(Unicode_block)#Replacement_character
|
||||
pub const replacement_character: u21 = 0xFFFD;
|
||||
|
||||
/// Returns how many bytes the UTF-8 representation would require
|
||||
/// for the given codepoint.
|
||||
pub fn utf8CodepointSequenceLength(c: u21) !u3 {
|
||||
@ -777,15 +782,14 @@ fn formatUtf16le(
|
||||
options: std.fmt.FormatOptions,
|
||||
writer: anytype,
|
||||
) !void {
|
||||
const unknown_codepoint = 0xfffd;
|
||||
_ = fmt;
|
||||
_ = options;
|
||||
var buf: [300]u8 = undefined; // just a random size I chose
|
||||
var it = Utf16LeIterator.init(utf16le);
|
||||
var u8len: usize = 0;
|
||||
while (it.nextCodepoint() catch unknown_codepoint) |codepoint| {
|
||||
while (it.nextCodepoint() catch replacement_character) |codepoint| {
|
||||
u8len += utf8Encode(codepoint, buf[u8len..]) catch
|
||||
utf8Encode(unknown_codepoint, buf[u8len..]) catch unreachable;
|
||||
utf8Encode(replacement_character, buf[u8len..]) catch unreachable;
|
||||
if (u8len + 3 >= buf.len) {
|
||||
try writer.writeAll(buf[0..u8len]);
|
||||
u8len = 0;
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user