From 62d717e2ffb1e9a1127652521de57c2e18cf7d3b Mon Sep 17 00:00:00 2001
From: r00ster91 <r00ster91@protonmail.com>
Date: Wed, 13 Apr 2022 18:28:49 +0200
Subject: [PATCH] Add `std.unicode.replacement_character`

---
 lib/std/fmt.zig     |  6 +++---
 lib/std/unicode.zig | 10 +++++++---
 2 files changed, 10 insertions(+), 6 deletions(-)

diff --git a/lib/std/fmt.zig b/lib/std/fmt.zig
index 3f230a445e..80a2e87c5c 100644
--- a/lib/std/fmt.zig
+++ b/lib/std/fmt.zig
@@ -966,10 +966,10 @@ pub fn formatUnicodeCodepoint(
     writer: anytype,
 ) !void {
     var buf: [4]u8 = undefined;
-    const len = std.unicode.utf8Encode(c, &buf) catch |err| switch (err) {
+    const len = unicode.utf8Encode(c, &buf) catch |err| switch (err) {
         error.Utf8CannotEncodeSurrogateHalf, error.CodepointTooLarge => {
-            // In case of error output the replacement char U+FFFD
-            return formatBuf(&[_]u8{ 0xef, 0xbf, 0xbd }, options, writer);
+            const len = unicode.utf8Encode(unicode.replacement_character, &buf) catch unreachable;
+            return formatBuf(buf[0..len], options, writer);
         },
     };
     return formatBuf(buf[0..len], options, writer);
diff --git a/lib/std/unicode.zig b/lib/std/unicode.zig
index 487f3defdf..81a7ed838f 100644
--- a/lib/std/unicode.zig
+++ b/lib/std/unicode.zig
@@ -3,6 +3,11 @@ const assert = std.debug.assert;
 const testing = std.testing;
 const mem = std.mem;
 
+/// Use this to replace an unknown, unrecognized, or unrepresentable character.
+///
+/// See also: https://en.wikipedia.org/wiki/Specials_(Unicode_block)#Replacement_character
+pub const replacement_character: u21 = 0xFFFD;
+
 /// Returns how many bytes the UTF-8 representation would require
 /// for the given codepoint.
 pub fn utf8CodepointSequenceLength(c: u21) !u3 {
@@ -777,15 +782,14 @@ fn formatUtf16le(
     options: std.fmt.FormatOptions,
     writer: anytype,
 ) !void {
-    const unknown_codepoint = 0xfffd;
     _ = fmt;
     _ = options;
     var buf: [300]u8 = undefined; // just a random size I chose
     var it = Utf16LeIterator.init(utf16le);
     var u8len: usize = 0;
-    while (it.nextCodepoint() catch unknown_codepoint) |codepoint| {
+    while (it.nextCodepoint() catch replacement_character) |codepoint| {
         u8len += utf8Encode(codepoint, buf[u8len..]) catch
-            utf8Encode(unknown_codepoint, buf[u8len..]) catch unreachable;
+            utf8Encode(replacement_character, buf[u8len..]) catch unreachable;
         if (u8len + 3 >= buf.len) {
             try writer.writeAll(buf[0..u8len]);
             u8len = 0;