From 44533f10fee130498fb811eabb72e2afdc3c0f56 Mon Sep 17 00:00:00 2001 From: LemonBoy Date: Mon, 21 Sep 2020 15:50:43 +0200 Subject: [PATCH] std: Introduce std.unicode.utf8ValidCodepoint --- lib/std/unicode.zig | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/lib/std/unicode.zig b/lib/std/unicode.zig index 06dd78bd40..ecce1b7722 100644 --- a/lib/std/unicode.zig +++ b/lib/std/unicode.zig @@ -153,6 +153,15 @@ pub fn utf8Decode4(bytes: []const u8) Utf8Decode4Error!u21 { return value; } +/// Returns true if the given unicode codepoint can be encoded in UTF-8. +pub fn utf8ValidCodepoint(value: u21) bool { + return switch (value) { + 0xD800...0xDFFF => false, // Surrogates range + 0x110000...0x1FFFFF => false, // Above the maximum codepoint value + else => true, + }; +} + /// Returns the length of a supplied UTF-8 string literal in terms of unicode /// codepoints. /// Asserts that the data is valid UTF-8. @@ -785,3 +794,19 @@ test "utf8 count codepoints" { try testUtf8CountCodepoints(); comptime testUtf8CountCodepoints() catch unreachable; } + +fn testUtf8ValidCodepoint() !void { + testing.expect(utf8ValidCodepoint('e')); + testing.expect(utf8ValidCodepoint('ë')); + testing.expect(utf8ValidCodepoint('は')); + testing.expect(utf8ValidCodepoint(0xe000)); + testing.expect(utf8ValidCodepoint(0x10ffff)); + testing.expect(!utf8ValidCodepoint(0xd800)); + testing.expect(!utf8ValidCodepoint(0xdfff)); + testing.expect(!utf8ValidCodepoint(0x110000)); +} + +test "utf8 valid codepoint" { + try testUtf8ValidCodepoint(); + comptime testUtf8ValidCodepoint() catch unreachable; +}