mirror of
https://github.com/ziglang/zig.git
synced 2026-02-13 04:48:20 +00:00
wyhash: readd the stateful streaming version so that both are available
and rename the stateless one so that's it is not the default option
This commit is contained in:
parent
4c882e731f
commit
90e921f7a7
@ -29,6 +29,7 @@ pub const CityHash64 = cityhash.CityHash64;
|
||||
|
||||
const wyhash = @import("hash/wyhash.zig");
|
||||
pub const Wyhash = wyhash.Wyhash;
|
||||
pub const WyhashStateless = wyhash.WyhashStateless;
|
||||
|
||||
test "hash" {
|
||||
_ = @import("hash/adler.zig");
|
||||
|
||||
@ -28,6 +28,11 @@ const hashes = [_]Hash{
|
||||
.name = "wyhash",
|
||||
.init_u64 = 0,
|
||||
},
|
||||
Hash{
|
||||
.ty = hash.WyhashStateless,
|
||||
.name = "wyhash-stateless",
|
||||
.init_u64 = 0,
|
||||
},
|
||||
Hash{
|
||||
.ty = hash.SipHash64(1, 3),
|
||||
.name = "siphash(1,3)",
|
||||
|
||||
@ -31,13 +31,20 @@ fn mix1(a: u64, b: u64, seed: u64) u64 {
|
||||
return mum(a ^ seed ^ primes[2], b ^ seed ^ primes[3]);
|
||||
}
|
||||
|
||||
/// Fast non-cryptographic 64bit hash function.
|
||||
/// See https://github.com/wangyi-fudan/wyhash
|
||||
pub const Wyhash = struct {
|
||||
seed: u64,
|
||||
|
||||
buf: [32]u8,
|
||||
buf_len: usize,
|
||||
msg_len: usize,
|
||||
|
||||
pub fn init(seed: u64) Wyhash {
|
||||
return Wyhash{
|
||||
.seed = seed,
|
||||
.buf = undefined,
|
||||
.buf_len = 0,
|
||||
.msg_len = 0,
|
||||
};
|
||||
}
|
||||
@ -56,7 +63,110 @@ pub const Wyhash = struct {
|
||||
);
|
||||
}
|
||||
|
||||
fn partial(self: *Wyhash, b: []const u8) void {
|
||||
pub fn update(self: *Wyhash, b: []const u8) void {
|
||||
var off: usize = 0;
|
||||
|
||||
// Partial from previous.
|
||||
if (self.buf_len != 0 and self.buf_len + b.len > 32) {
|
||||
off += 32 - self.buf_len;
|
||||
mem.copy(u8, self.buf[self.buf_len..], b[0..off]);
|
||||
self.round(self.buf[0..]);
|
||||
self.buf_len = 0;
|
||||
}
|
||||
|
||||
// Full middle blocks.
|
||||
while (off + 32 <= b.len) : (off += 32) {
|
||||
@inlineCall(self.round, b[off .. off + 32]);
|
||||
}
|
||||
|
||||
// Remainder for next pass.
|
||||
mem.copy(u8, self.buf[self.buf_len..], b[off..]);
|
||||
self.buf_len += @intCast(u8, b[off..].len);
|
||||
self.msg_len += b.len;
|
||||
}
|
||||
|
||||
pub fn final(self: *Wyhash) u64 {
|
||||
const seed = self.seed;
|
||||
const rem_len = @intCast(u5, self.buf_len);
|
||||
const rem_key = self.buf[0..self.buf_len];
|
||||
|
||||
self.seed = switch (rem_len) {
|
||||
0 => seed,
|
||||
1 => mix0(read_bytes(1, rem_key), primes[4], seed),
|
||||
2 => mix0(read_bytes(2, rem_key), primes[4], seed),
|
||||
3 => mix0((read_bytes(2, rem_key) << 8) | read_bytes(1, rem_key[2..]), primes[4], seed),
|
||||
4 => mix0(read_bytes(4, rem_key), primes[4], seed),
|
||||
5 => mix0((read_bytes(4, rem_key) << 8) | read_bytes(1, rem_key[4..]), primes[4], seed),
|
||||
6 => mix0((read_bytes(4, rem_key) << 16) | read_bytes(2, rem_key[4..]), primes[4], seed),
|
||||
7 => mix0((read_bytes(4, rem_key) << 24) | (read_bytes(2, rem_key[4..]) << 8) | read_bytes(1, rem_key[6..]), primes[4], seed),
|
||||
8 => mix0(read_8bytes_swapped(rem_key), primes[4], seed),
|
||||
9 => mix0(read_8bytes_swapped(rem_key), read_bytes(1, rem_key[8..]), seed),
|
||||
10 => mix0(read_8bytes_swapped(rem_key), read_bytes(2, rem_key[8..]), seed),
|
||||
11 => mix0(read_8bytes_swapped(rem_key), (read_bytes(2, rem_key[8..]) << 8) | read_bytes(1, rem_key[10..]), seed),
|
||||
12 => mix0(read_8bytes_swapped(rem_key), read_bytes(4, rem_key[8..]), seed),
|
||||
13 => mix0(read_8bytes_swapped(rem_key), (read_bytes(4, rem_key[8..]) << 8) | read_bytes(1, rem_key[12..]), seed),
|
||||
14 => mix0(read_8bytes_swapped(rem_key), (read_bytes(4, rem_key[8..]) << 16) | read_bytes(2, rem_key[12..]), seed),
|
||||
15 => mix0(read_8bytes_swapped(rem_key), (read_bytes(4, rem_key[8..]) << 24) | (read_bytes(2, rem_key[12..]) << 8) | read_bytes(1, rem_key[14..]), seed),
|
||||
16 => mix0(read_8bytes_swapped(rem_key), read_8bytes_swapped(rem_key[8..]), seed),
|
||||
17 => mix0(read_8bytes_swapped(rem_key), read_8bytes_swapped(rem_key[8..]), seed) ^ mix1(read_bytes(1, rem_key[16..]), primes[4], seed),
|
||||
18 => mix0(read_8bytes_swapped(rem_key), read_8bytes_swapped(rem_key[8..]), seed) ^ mix1(read_bytes(2, rem_key[16..]), primes[4], seed),
|
||||
19 => mix0(read_8bytes_swapped(rem_key), read_8bytes_swapped(rem_key[8..]), seed) ^ mix1((read_bytes(2, rem_key[16..]) << 8) | read_bytes(1, rem_key[18..]), primes[4], seed),
|
||||
20 => mix0(read_8bytes_swapped(rem_key), read_8bytes_swapped(rem_key[8..]), seed) ^ mix1(read_bytes(4, rem_key[16..]), primes[4], seed),
|
||||
21 => mix0(read_8bytes_swapped(rem_key), read_8bytes_swapped(rem_key[8..]), seed) ^ mix1((read_bytes(4, rem_key[16..]) << 8) | read_bytes(1, rem_key[20..]), primes[4], seed),
|
||||
22 => mix0(read_8bytes_swapped(rem_key), read_8bytes_swapped(rem_key[8..]), seed) ^ mix1((read_bytes(4, rem_key[16..]) << 16) | read_bytes(2, rem_key[20..]), primes[4], seed),
|
||||
23 => mix0(read_8bytes_swapped(rem_key), read_8bytes_swapped(rem_key[8..]), seed) ^ mix1((read_bytes(4, rem_key[16..]) << 24) | (read_bytes(2, rem_key[20..]) << 8) | read_bytes(1, rem_key[22..]), primes[4], seed),
|
||||
24 => mix0(read_8bytes_swapped(rem_key), read_8bytes_swapped(rem_key[8..]), seed) ^ mix1(read_8bytes_swapped(rem_key[16..]), primes[4], seed),
|
||||
25 => mix0(read_8bytes_swapped(rem_key), read_8bytes_swapped(rem_key[8..]), seed) ^ mix1(read_8bytes_swapped(rem_key[16..]), read_bytes(1, rem_key[24..]), seed),
|
||||
26 => mix0(read_8bytes_swapped(rem_key), read_8bytes_swapped(rem_key[8..]), seed) ^ mix1(read_8bytes_swapped(rem_key[16..]), read_bytes(2, rem_key[24..]), seed),
|
||||
27 => mix0(read_8bytes_swapped(rem_key), read_8bytes_swapped(rem_key[8..]), seed) ^ mix1(read_8bytes_swapped(rem_key[16..]), (read_bytes(2, rem_key[24..]) << 8) | read_bytes(1, rem_key[26..]), seed),
|
||||
28 => mix0(read_8bytes_swapped(rem_key), read_8bytes_swapped(rem_key[8..]), seed) ^ mix1(read_8bytes_swapped(rem_key[16..]), read_bytes(4, rem_key[24..]), seed),
|
||||
29 => mix0(read_8bytes_swapped(rem_key), read_8bytes_swapped(rem_key[8..]), seed) ^ mix1(read_8bytes_swapped(rem_key[16..]), (read_bytes(4, rem_key[24..]) << 8) | read_bytes(1, rem_key[28..]), seed),
|
||||
30 => mix0(read_8bytes_swapped(rem_key), read_8bytes_swapped(rem_key[8..]), seed) ^ mix1(read_8bytes_swapped(rem_key[16..]), (read_bytes(4, rem_key[24..]) << 16) | read_bytes(2, rem_key[28..]), seed),
|
||||
31 => mix0(read_8bytes_swapped(rem_key), read_8bytes_swapped(rem_key[8..]), seed) ^ mix1(read_8bytes_swapped(rem_key[16..]), (read_bytes(4, rem_key[24..]) << 24) | (read_bytes(2, rem_key[28..]) << 8) | read_bytes(1, rem_key[30..]), seed),
|
||||
};
|
||||
|
||||
return mum(self.seed ^ self.msg_len, primes[4]);
|
||||
}
|
||||
|
||||
pub fn hash(seed: u64, input: []const u8) u64 {
|
||||
var c = Wyhash.init(seed);
|
||||
@inlineCall(c.update, input);
|
||||
return @inlineCall(c.final);
|
||||
}
|
||||
};
|
||||
|
||||
/// Wyhash version where state is not preserved between successive `update`
|
||||
/// calls, ie. it will have different results between hashing the data in
|
||||
/// one or several steps.
|
||||
/// This allows it to be faster.
|
||||
pub const WyhashStateless = struct {
|
||||
seed: u64,
|
||||
msg_len: usize,
|
||||
|
||||
const Self = @This();
|
||||
|
||||
pub fn init(seed: u64) Self {
|
||||
return Self{
|
||||
.seed = seed,
|
||||
.msg_len = 0,
|
||||
};
|
||||
}
|
||||
|
||||
fn round(self: *Self, b: []const u8) void {
|
||||
std.debug.assert(b.len == 32);
|
||||
|
||||
self.seed = mix0(
|
||||
read_bytes(8, b[0..]),
|
||||
read_bytes(8, b[8..]),
|
||||
self.seed,
|
||||
) ^ mix1(
|
||||
read_bytes(8, b[16..]),
|
||||
read_bytes(8, b[24..]),
|
||||
self.seed,
|
||||
);
|
||||
}
|
||||
|
||||
fn partial(self: *Self, b: []const u8) void {
|
||||
const rem_key = b;
|
||||
const rem_len = b.len;
|
||||
|
||||
@ -98,7 +208,7 @@ pub const Wyhash = struct {
|
||||
self.seed = seed;
|
||||
}
|
||||
|
||||
pub fn update(self: *Wyhash, b: []const u8) void {
|
||||
pub fn update(self: *Self, b: []const u8) void {
|
||||
var off: usize = 0;
|
||||
|
||||
// Full middle blocks.
|
||||
@ -110,19 +220,20 @@ pub const Wyhash = struct {
|
||||
self.msg_len += b.len;
|
||||
}
|
||||
|
||||
pub fn final(self: *Wyhash) u64 {
|
||||
pub fn final(self: *Self) u64 {
|
||||
return mum(self.seed ^ self.msg_len, primes[4]);
|
||||
}
|
||||
|
||||
pub fn hash(seed: u64, input: []const u8) u64 {
|
||||
var c = Wyhash.init(seed);
|
||||
var c = Self.init(seed);
|
||||
@inlineCall(c.update, input);
|
||||
return @inlineCall(c.final);
|
||||
}
|
||||
};
|
||||
|
||||
const expectEqual = std.testing.expectEqual;
|
||||
|
||||
test "test vectors" {
|
||||
const expectEqual = std.testing.expectEqual;
|
||||
const hash = Wyhash.hash;
|
||||
|
||||
expectEqual(hash(0, ""), 0x0);
|
||||
@ -133,3 +244,38 @@ test "test vectors" {
|
||||
expectEqual(hash(5, "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789"), 0x602a1894d3bbfe7f);
|
||||
expectEqual(hash(6, "12345678901234567890123456789012345678901234567890123456789012345678901234567890"), 0x829e9c148b75970e);
|
||||
}
|
||||
|
||||
test "test vectors streaming" {
|
||||
var wh = Wyhash.init(5);
|
||||
for ("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789") |e| {
|
||||
wh.update(mem.asBytes(&e));
|
||||
}
|
||||
expectEqual(wh.final(), 0x602a1894d3bbfe7f);
|
||||
|
||||
const pattern = "1234567890";
|
||||
const count = 8;
|
||||
const result = 0x829e9c148b75970e;
|
||||
expectEqual(Wyhash.hash(6, pattern ** 8), result);
|
||||
|
||||
wh = Wyhash.init(6);
|
||||
var i: u32 = 0;
|
||||
while (i < count) : (i += 1) {
|
||||
wh.update(pattern);
|
||||
}
|
||||
expectEqual(wh.final(), result);
|
||||
}
|
||||
|
||||
test "test vectors stateless" {
|
||||
const hash = WyhashStateless.hash;
|
||||
|
||||
expectEqual(hash(0, ""), 0x0);
|
||||
expectEqual(hash(1, "a"), 0xbed235177f41d328);
|
||||
expectEqual(hash(2, "abc"), 0xbe348debe59b27c3);
|
||||
expectEqual(hash(3, "message digest"), 0x37320f657213a290);
|
||||
expectEqual(hash(4, "abcdefghijklmnopqrstuvwxyz"), 0xd0b270e1d8a7019c);
|
||||
expectEqual(hash(5, "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789"), 0x602a1894d3bbfe7f);
|
||||
expectEqual(hash(6, "12345678901234567890123456789012345678901234567890123456789012345678901234567890"), 0x829e9c148b75970e);
|
||||
|
||||
// We don't check for the streaming API having the same results, as it is
|
||||
// not required to.
|
||||
}
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user