std: optimise utf8ByteSequenceLength

Also tested (but not as fast): ```zig pub fn utf8ByteSequenceLength(first_byte: u8) !u3 { const len = @clz(u8, ~first_byte); if (len == 0) return 1; if (len < 4) return @intCast(u3, len); return error.Utf8InvalidStartByte; } ```
2026-02-21 08:45:52 +00:00 · 2019-12-28 14:34:00 +11:00 · 2019-12-28 14:34:00 +11:00 · 5843a6e3bc
commit 5843a6e3bc
parent 8b72eedc76
1 changed files with 7 additions and 5 deletions
--- a/lib/std/unicode.zig
+++ b/lib/std/unicode.zig
@ -18,11 +18,13 @@ pub fn utf8CodepointSequenceLength(c: u32) !u3 {
 /// returns a number 1-4 indicating the total length of the codepoint in bytes.
 /// If this byte does not match the form of a UTF-8 start byte, returns Utf8InvalidStartByte.
 pub fn utf8ByteSequenceLength(first_byte: u8) !u3 {
-    if (first_byte < 0b10000000) return @as(u3, 1);
-    if (first_byte & 0b11100000 == 0b11000000) return @as(u3, 2);
-    if (first_byte & 0b11110000 == 0b11100000) return @as(u3, 3);
-    if (first_byte & 0b11111000 == 0b11110000) return @as(u3, 4);
-    return error.Utf8InvalidStartByte;
+    return switch (@clz(u8, ~first_byte)) {
+        0 => 1,
+        2 => 2,
+        3 => 3,
+        4 => 4,
+        else => error.Utf8InvalidStartByte,
+    };
 }

 /// Encodes the given codepoint into a UTF-8 byte sequence.