From ab6065407d74fc8d63d398c60f9fe653374d9d6d Mon Sep 17 00:00:00 2001 From: daurnimator Date: Sat, 28 Dec 2019 14:35:53 +1100 Subject: [PATCH] std: simplify utf8ToUtf16Le Also faster, on my machine unicode/throughput_test.zig now gives e.g. > original utf8ToUtf16Le: elapsed: 1048 ns (0 ms) > new utf8ToUtf16Le: elapsed: 971 ns (0 ms) --- lib/std/unicode.zig | 40 ++++++++++++++-------------------------- 1 file changed, 14 insertions(+), 26 deletions(-) diff --git a/lib/std/unicode.zig b/lib/std/unicode.zig index 88b6b0bd2c..52fd5889e4 100644 --- a/lib/std/unicode.zig +++ b/lib/std/unicode.zig @@ -576,33 +576,21 @@ pub fn utf8ToUtf16Le(utf16le: []u16, utf8: []const u8) !usize { var dest_i: usize = 0; var src_i: usize = 0; while (src_i < utf8.len) { - const byte = utf8[src_i]; - const n = @clz(u8, ~byte); - switch (n) { - 0 => { - utf16le[dest_i] = byte; - dest_i += 1; - src_i += 1; - continue; - }, - 2, 3, 4 => { - const next_src_i = src_i + n; - const codepoint = utf8Decode(utf8[src_i..next_src_i]) catch return error.InvalidUtf8; - if (codepoint < 0x10000) { - const short = @intCast(u16, codepoint); - utf16le[dest_i] = mem.nativeToLittle(u16, short); - dest_i += 1; - } else { - const high = @intCast(u16, (codepoint - 0x10000) >> 10) + 0xD800; - const low = @intCast(u16, codepoint & 0x3FF) + 0xDC00; - utf16le[dest_i] = mem.nativeToLittle(u16, high); - utf16le[dest_i + 1] = mem.nativeToLittle(u16, low); - dest_i += 2; - } - src_i = next_src_i; - }, - else => return error.InvalidUtf8, + const n = utf8ByteSequenceLength(utf8[src_i]) catch return error.InvalidUtf8; + const next_src_i = src_i + n; + const codepoint = utf8Decode(utf8[src_i..next_src_i]) catch return error.InvalidUtf8; + if (codepoint < 0x10000) { + const short = @intCast(u16, codepoint); + utf16le[dest_i] = mem.nativeToLittle(u16, short); + dest_i += 1; + } else { + const high = @intCast(u16, (codepoint - 0x10000) >> 10) + 0xD800; + const low = @intCast(u16, codepoint & 0x3FF) + 0xDC00; + utf16le[dest_i] = mem.nativeToLittle(u16, high); + utf16le[dest_i + 1] = mem.nativeToLittle(u16, low); + dest_i += 2; } + src_i = next_src_i; } return dest_i; }