From 5843a6e3bc1a6353f76ebca57d6099337b90139a Mon Sep 17 00:00:00 2001 From: daurnimator Date: Sat, 28 Dec 2019 14:34:00 +1100 Subject: [PATCH] std: optimise utf8ByteSequenceLength Also tested (but not as fast): ```zig pub fn utf8ByteSequenceLength(first_byte: u8) !u3 { const len = @clz(u8, ~first_byte); if (len == 0) return 1; if (len < 4) return @intCast(u3, len); return error.Utf8InvalidStartByte; } ``` --- lib/std/unicode.zig | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/lib/std/unicode.zig b/lib/std/unicode.zig index c2639f9158..88b6b0bd2c 100644 --- a/lib/std/unicode.zig +++ b/lib/std/unicode.zig @@ -18,11 +18,13 @@ pub fn utf8CodepointSequenceLength(c: u32) !u3 { /// returns a number 1-4 indicating the total length of the codepoint in bytes. /// If this byte does not match the form of a UTF-8 start byte, returns Utf8InvalidStartByte. pub fn utf8ByteSequenceLength(first_byte: u8) !u3 { - if (first_byte < 0b10000000) return @as(u3, 1); - if (first_byte & 0b11100000 == 0b11000000) return @as(u3, 2); - if (first_byte & 0b11110000 == 0b11100000) return @as(u3, 3); - if (first_byte & 0b11111000 == 0b11110000) return @as(u3, 4); - return error.Utf8InvalidStartByte; + return switch (@clz(u8, ~first_byte)) { + 0 => 1, + 2 => 2, + 3 => 3, + 4 => 4, + else => error.Utf8InvalidStartByte, + }; } /// Encodes the given codepoint into a UTF-8 byte sequence.