From 6c4efab10611b9fe807f2517d2eec2ac60ae4f5c Mon Sep 17 00:00:00 2001
From: LemonBoy <thatlemon@gmail.com>
Date: Mon, 21 Sep 2020 15:19:14 +0200
Subject: [PATCH] std: Introduce std.unicode.utf8CountCodepoints

---
 lib/std/unicode.zig | 30 +++++++++++++++++++++++++++++-
 1 file changed, 29 insertions(+), 1 deletion(-)

diff --git a/lib/std/unicode.zig b/lib/std/unicode.zig
index 18bd5ab0e2..06dd78bd40 100644
--- a/lib/std/unicode.zig
+++ b/lib/std/unicode.zig
@@ -153,6 +153,23 @@ pub fn utf8Decode4(bytes: []const u8) Utf8Decode4Error!u21 {
     return value;
 }
 
+/// Returns the length of a supplied UTF-8 string literal in terms of unicode
+/// codepoints.
+/// Asserts that the data is valid UTF-8.
+pub fn utf8CountCodepoints(s: []const u8) !usize {
+    var len: usize = 0;
+
+    var i: usize = 0;
+    while (i < s.len) : (len += 1) {
+        const n = try utf8ByteSequenceLength(s[i]);
+        if (i + n > s.len) return error.TruncatedInput;
+        _ = try utf8Decode(s[i .. i + n]);
+        i += n;
+    }
+
+    return len;
+}
+
 pub fn utf8ValidateSlice(s: []const u8) bool {
     var i: usize = 0;
     while (i < s.len) {
@@ -687,7 +704,6 @@ pub fn utf8ToUtf16LeStringLiteral(comptime utf8: []const u8) *const [calcUtf16Le
     }
 }
 
-/// Returns length of a supplied UTF-8 string literal. Asserts that the data is valid UTF-8.
 fn calcUtf16LeLen(utf8: []const u8) usize {
     var src_i: usize = 0;
     var dest_len: usize = 0;
@@ -757,3 +773,15 @@ test "utf8ToUtf16LeStringLiteral" {
         testing.expect(utf16[2] == 0);
     }
 }
+
+fn testUtf8CountCodepoints() !void {
+    testing.expectEqual(@as(usize, 10), try utf8CountCodepoints("abcdefghij"));
+    testing.expectEqual(@as(usize, 10), try utf8CountCodepoints("äåéëþüúíóö"));
+    testing.expectEqual(@as(usize, 5), try utf8CountCodepoints("こんにちは"));
+    testing.expectError(error.Utf8EncodesSurrogateHalf, utf8CountCodepoints("\xED\xA0\x80"));
+}
+
+test "utf8 count codepoints" {
+    try testUtf8CountCodepoints();
+    comptime testUtf8CountCodepoints() catch unreachable;
+}