std: simplify utf8ToUtf16Le

Also faster, on my machine unicode/throughput_test.zig now gives e.g.
> original utf8ToUtf16Le: elapsed: 1048 ns (0 ms)
> new utf8ToUtf16Le: elapsed: 971 ns (0 ms)
This commit is contained in:
daurnimator 2019-12-28 14:35:53 +11:00
parent 5843a6e3bc
commit ab6065407d
No known key found for this signature in database
GPG Key ID: 45B429A8F9D9D22A

View File

@ -576,33 +576,21 @@ pub fn utf8ToUtf16Le(utf16le: []u16, utf8: []const u8) !usize {
var dest_i: usize = 0;
var src_i: usize = 0;
while (src_i < utf8.len) {
const byte = utf8[src_i];
const n = @clz(u8, ~byte);
switch (n) {
0 => {
utf16le[dest_i] = byte;
dest_i += 1;
src_i += 1;
continue;
},
2, 3, 4 => {
const next_src_i = src_i + n;
const codepoint = utf8Decode(utf8[src_i..next_src_i]) catch return error.InvalidUtf8;
if (codepoint < 0x10000) {
const short = @intCast(u16, codepoint);
utf16le[dest_i] = mem.nativeToLittle(u16, short);
dest_i += 1;
} else {
const high = @intCast(u16, (codepoint - 0x10000) >> 10) + 0xD800;
const low = @intCast(u16, codepoint & 0x3FF) + 0xDC00;
utf16le[dest_i] = mem.nativeToLittle(u16, high);
utf16le[dest_i + 1] = mem.nativeToLittle(u16, low);
dest_i += 2;
}
src_i = next_src_i;
},
else => return error.InvalidUtf8,
const n = utf8ByteSequenceLength(utf8[src_i]) catch return error.InvalidUtf8;
const next_src_i = src_i + n;
const codepoint = utf8Decode(utf8[src_i..next_src_i]) catch return error.InvalidUtf8;
if (codepoint < 0x10000) {
const short = @intCast(u16, codepoint);
utf16le[dest_i] = mem.nativeToLittle(u16, short);
dest_i += 1;
} else {
const high = @intCast(u16, (codepoint - 0x10000) >> 10) + 0xD800;
const low = @intCast(u16, codepoint & 0x3FF) + 0xDC00;
utf16le[dest_i] = mem.nativeToLittle(u16, high);
utf16le[dest_i + 1] = mem.nativeToLittle(u16, low);
dest_i += 2;
}
src_i = next_src_i;
}
return dest_i;
}