From 6150da3df99b41f89ea01a72e6c1b76fe4c36f89 Mon Sep 17 00:00:00 2001 From: Sahnvour Date: Thu, 27 Jun 2019 23:21:35 +0200 Subject: [PATCH] direct port of wyhash v2 also inspired by https://github.com/ManDeJan/zig-wyhash --- std/hash.zig | 4 ++ std/hash/wyhash.zig | 99 +++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 103 insertions(+) create mode 100644 std/hash/wyhash.zig diff --git a/std/hash.zig b/std/hash.zig index 148504aa39..723860da3b 100644 --- a/std/hash.zig +++ b/std/hash.zig @@ -16,6 +16,7 @@ pub const SipHash128 = siphash.SipHash128; pub const murmur = @import("hash/murmur.zig"); pub const Murmur2_32 = murmur.Murmur2_32; + pub const Murmur2_64 = murmur.Murmur2_64; pub const Murmur3_32 = murmur.Murmur3_32; @@ -23,6 +24,8 @@ pub const cityhash = @import("hash/cityhash.zig"); pub const CityHash32 = cityhash.CityHash32; pub const CityHash64 = cityhash.CityHash64; +pub const wyhash = @import("hash/wyhash.zig").hash; + test "hash" { _ = @import("hash/adler.zig"); _ = @import("hash/crc.zig"); @@ -30,4 +33,5 @@ test "hash" { _ = @import("hash/siphash.zig"); _ = @import("hash/murmur.zig"); _ = @import("hash/cityhash.zig"); + _ = @import("hash/wyhash.zig"); } diff --git a/std/hash/wyhash.zig b/std/hash/wyhash.zig new file mode 100644 index 0000000000..57efe8fd63 --- /dev/null +++ b/std/hash/wyhash.zig @@ -0,0 +1,99 @@ +const std = @import("std"); +const mem = std.mem; + +const primes = [_]u64{ + 0xa0761d6478bd642f, + 0xe7037ed1a0b428db, + 0x8ebc6af09c88c6e3, + 0x589965cc75374cc3, + 0x1d8e4e27c47d124f, +}; + +fn read_bytes(comptime bytes: u8, data: []const u8) u64 { + return mem.readVarInt(u64, data[0..bytes], @import("builtin").endian); +} + +fn read_8bytes_swapped(data: []const u8) u64 { + return (read_bytes(4, data) << 32 | read_bytes(4, data[4..])); +} + +fn mum(a: u64, b: u64) u64 { + var r: u128 = @intCast(u128, a) * @intCast(u128, b); + r = (r >> 64) ^ r; + return @truncate(u64, r); +} + +fn mix0(a: u64, b: u64, seed: u64) u64 { + return mum(a ^ seed ^ primes[0], b ^ seed ^ primes[1]); +} + +fn mix1(a: u64, b: u64, seed: u64) u64 { + return mum(a ^ seed ^ primes[2], b ^ seed ^ primes[3]); +} + +pub fn hash(key: []const u8, initial_seed: u64) u64 { + var seed = initial_seed; + + var i: usize = 0; + while (i + 32 <= key.len) : (i += 32) { + seed = mix0( + read_bytes(8, key[i..]), + read_bytes(8, key[i + 8 ..]), + seed, + ) ^ mix1( + read_bytes(8, key[i + 16 ..]), + read_bytes(8, key[i + 24 ..]), + seed, + ); + } + + const rem_len = @truncate(u5, key.len); + const rem_key = key[i..]; + seed = switch (rem_len) { + 0 => seed, + 1 => mix0(read_bytes(1, rem_key), primes[4], seed), + 2 => mix0(read_bytes(2, rem_key), primes[4], seed), + 3 => mix0((read_bytes(2, rem_key) << 8) | read_bytes(1, rem_key[2..]), primes[4], seed), + 4 => mix0(read_bytes(4, rem_key), primes[4], seed), + 5 => mix0((read_bytes(4, rem_key) << 8) | read_bytes(1, rem_key[4..]), primes[4], seed), + 6 => mix0((read_bytes(4, rem_key) << 16) | read_bytes(2, rem_key[4..]), primes[4], seed), + 7 => mix0((read_bytes(4, rem_key) << 24) | (read_bytes(2, rem_key[4..]) << 8) | read_bytes(1, rem_key[6..]), primes[4], seed), + 8 => mix0(read_8bytes_swapped(rem_key), primes[4], seed), + 9 => mix0(read_8bytes_swapped(rem_key), read_bytes(1, rem_key[8..]), seed), + 10 => mix0(read_8bytes_swapped(rem_key), read_bytes(2, rem_key[8..]), seed), + 11 => mix0(read_8bytes_swapped(rem_key), (read_bytes(2, rem_key[8..]) << 8) | read_bytes(1, rem_key[10..]), seed), + 12 => mix0(read_8bytes_swapped(rem_key), read_bytes(4, rem_key[8..]), seed), + 13 => mix0(read_8bytes_swapped(rem_key), (read_bytes(4, rem_key[8..]) << 8) | read_bytes(1, rem_key[12..]), seed), + 14 => mix0(read_8bytes_swapped(rem_key), (read_bytes(4, rem_key[8..]) << 16) | read_bytes(2, rem_key[12..]), seed), + 15 => mix0(read_8bytes_swapped(rem_key), (read_bytes(4, rem_key[8..]) << 24) | (read_bytes(2, rem_key[12..]) << 8) | read_bytes(1, rem_key[14..]), seed), + 16 => mix0(read_8bytes_swapped(rem_key), read_8bytes_swapped(rem_key[8..]), seed), + 17 => mix0(read_8bytes_swapped(rem_key), read_8bytes_swapped(rem_key[8..]), seed) ^ mix1(read_bytes(1, rem_key[16..]), primes[4], seed), + 18 => mix0(read_8bytes_swapped(rem_key), read_8bytes_swapped(rem_key[8..]), seed) ^ mix1(read_bytes(2, rem_key[16..]), primes[4], seed), + 19 => mix0(read_8bytes_swapped(rem_key), read_8bytes_swapped(rem_key[8..]), seed) ^ mix1((read_bytes(2, rem_key[16..]) << 8) | read_bytes(1, rem_key[18..]), primes[4], seed), + 20 => mix0(read_8bytes_swapped(rem_key), read_8bytes_swapped(rem_key[8..]), seed) ^ mix1(read_bytes(4, rem_key[16..]), primes[4], seed), + 21 => mix0(read_8bytes_swapped(rem_key), read_8bytes_swapped(rem_key[8..]), seed) ^ mix1((read_bytes(4, rem_key[16..]) << 8) | read_bytes(1, rem_key[20..]), primes[4], seed), + 22 => mix0(read_8bytes_swapped(rem_key), read_8bytes_swapped(rem_key[8..]), seed) ^ mix1((read_bytes(4, rem_key[16..]) << 16) | read_bytes(2, rem_key[20..]), primes[4], seed), + 23 => mix0(read_8bytes_swapped(rem_key), read_8bytes_swapped(rem_key[8..]), seed) ^ mix1((read_bytes(4, rem_key[16..]) << 24) | (read_bytes(2, rem_key[20..]) << 8) | read_bytes(1, rem_key[22..]), primes[4], seed), + 24 => mix0(read_8bytes_swapped(rem_key), read_8bytes_swapped(rem_key[8..]), seed) ^ mix1(read_8bytes_swapped(rem_key[16..]), primes[4], seed), + 25 => mix0(read_8bytes_swapped(rem_key), read_8bytes_swapped(rem_key[8..]), seed) ^ mix1(read_8bytes_swapped(rem_key[16..]), read_bytes(1, rem_key[24..]), seed), + 26 => mix0(read_8bytes_swapped(rem_key), read_8bytes_swapped(rem_key[8..]), seed) ^ mix1(read_8bytes_swapped(rem_key[16..]), read_bytes(2, rem_key[24..]), seed), + 27 => mix0(read_8bytes_swapped(rem_key), read_8bytes_swapped(rem_key[8..]), seed) ^ mix1(read_8bytes_swapped(rem_key[16..]), (read_bytes(2, rem_key[24..]) << 8) | read_bytes(1, rem_key[26..]), seed), + 28 => mix0(read_8bytes_swapped(rem_key), read_8bytes_swapped(rem_key[8..]), seed) ^ mix1(read_8bytes_swapped(rem_key[16..]), read_bytes(4, rem_key[24..]), seed), + 29 => mix0(read_8bytes_swapped(rem_key), read_8bytes_swapped(rem_key[8..]), seed) ^ mix1(read_8bytes_swapped(rem_key[16..]), (read_bytes(4, rem_key[24..]) << 8) | read_bytes(1, rem_key[28..]), seed), + 30 => mix0(read_8bytes_swapped(rem_key), read_8bytes_swapped(rem_key[8..]), seed) ^ mix1(read_8bytes_swapped(rem_key[16..]), (read_bytes(4, rem_key[24..]) << 16) | read_bytes(2, rem_key[28..]), seed), + 31 => mix0(read_8bytes_swapped(rem_key), read_8bytes_swapped(rem_key[8..]), seed) ^ mix1(read_8bytes_swapped(rem_key[16..]), (read_bytes(4, rem_key[24..]) << 24) | (read_bytes(2, rem_key[28..]) << 8) | read_bytes(1, rem_key[30..]), seed), + }; + + return mum(seed ^ key.len, primes[4]); +} + +test "test vectors" { + const expectEqual = std.testing.expectEqual; + expectEqual(hash("", 0), 0x0); + expectEqual(hash("a", 1), 0xbed235177f41d328); + expectEqual(hash("abc", 2), 0xbe348debe59b27c3); + expectEqual(hash("message digest", 3), 0x37320f657213a290); + expectEqual(hash("abcdefghijklmnopqrstuvwxyz", 4), 0xd0b270e1d8a7019c); + expectEqual(hash("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789", 5), 0x602a1894d3bbfe7f); + expectEqual(hash("12345678901234567890123456789012345678901234567890123456789012345678901234567890", 6), 0x829e9c148b75970e); +}