From 61e9e82bdc10110b74bdeb973cc542c7b73a4ae2 Mon Sep 17 00:00:00 2001 From: LemonBoy Date: Sun, 13 Sep 2020 21:12:21 +0200 Subject: [PATCH] std: Make the CRC32 calculation slightly faster Speed up a little the slicing-by-8 code path by replacing the (load+shift+xor)*4 sequence with a single u32 load plus a xor. Before: ``` iterative: 1018 MiB/s [000000006c3b110d] small keys: 1075 MiB/s [0035bf3dcac00000] ``` After: ``` iterative: 1114 MiB/s [000000006c3b110d] small keys: 1324 MiB/s [0035bf3dcac00000] ``` --- lib/std/hash/crc.zig | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/lib/std/hash/crc.zig b/lib/std/hash/crc.zig index 37695df8b3..6290369fca 100644 --- a/lib/std/hash/crc.zig +++ b/lib/std/hash/crc.zig @@ -71,10 +71,7 @@ pub fn Crc32WithPoly(comptime poly: Polynomial) type { const p = input[i .. i + 8]; // Unrolling this way gives ~50Mb/s increase - self.crc ^= (@as(u32, p[0]) << 0); - self.crc ^= (@as(u32, p[1]) << 8); - self.crc ^= (@as(u32, p[2]) << 16); - self.crc ^= (@as(u32, p[3]) << 24); + self.crc ^= std.mem.readIntLittle(u32, p[0..4]); self.crc = lookup_tables[0][p[7]] ^