std: slightly improve codegen of std.unicode.utf8ValidateSlice

This commit is contained in:
Jacob Young 2023-10-21 13:43:06 -04:00 committed by Andrew Kelley
parent b4d4d19958
commit ccc9ebf0bd

View File

@ -201,21 +201,18 @@ pub fn utf8CountCodepoints(s: []const u8) !usize {
pub fn utf8ValidateSlice(input: []const u8) bool { pub fn utf8ValidateSlice(input: []const u8) bool {
var remaining = input; var remaining = input;
const V_len = std.simd.suggestVectorSize(usize) orelse 1; const chunk_len = std.simd.suggestVectorSize(u8) orelse 1;
const V = @Vector(V_len, usize); const Chunk = @Vector(chunk_len, u8);
const u8s_in_vector = @sizeOf(usize) * V_len;
// Fast path. Check for and skip ASCII characters at the start of the input. // Fast path. Check for and skip ASCII characters at the start of the input.
while (remaining.len >= u8s_in_vector) { while (remaining.len >= chunk_len) {
const chunk: V = @bitCast(remaining[0..u8s_in_vector].*); const chunk: Chunk = remaining[0..chunk_len].*;
const swapped = mem.littleToNative(V, chunk); const mask: Chunk = @splat(0x80);
const reduced = @reduce(.Or, swapped); if (@reduce(.Or, chunk & mask == mask)) {
const mask: usize = @bitCast([1]u8{0x80} ** @sizeOf(usize)); // found a non ASCII byte
if (reduced & mask != 0) {
// Found a non ASCII byte
break; break;
} }
remaining = remaining[u8s_in_vector..]; remaining = remaining[chunk_len..];
} }
// default lowest and highest continuation byte // default lowest and highest continuation byte