mirror of
https://github.com/ziglang/zig.git
synced 2026-02-20 08:14:48 +00:00
Merge pull request #2797 from Sahnvour/hashing
hash algorithm improvements
This commit is contained in:
commit
8c99a51993
10
std/hash.zig
10
std/hash.zig
@ -1,6 +1,9 @@
|
||||
const adler = @import("hash/adler.zig");
|
||||
pub const Adler32 = adler.Adler32;
|
||||
|
||||
const auto_hash = @import("hash/auto_hash.zig");
|
||||
pub const autoHash = auto_hash.autoHash;
|
||||
|
||||
// pub for polynomials + generic crc32 construction
|
||||
pub const crc = @import("hash/crc.zig");
|
||||
pub const Crc32 = crc.Crc32;
|
||||
@ -16,6 +19,8 @@ pub const SipHash128 = siphash.SipHash128;
|
||||
|
||||
pub const murmur = @import("hash/murmur.zig");
|
||||
pub const Murmur2_32 = murmur.Murmur2_32;
|
||||
|
||||
|
||||
pub const Murmur2_64 = murmur.Murmur2_64;
|
||||
pub const Murmur3_32 = murmur.Murmur3_32;
|
||||
|
||||
@ -23,11 +28,16 @@ pub const cityhash = @import("hash/cityhash.zig");
|
||||
pub const CityHash32 = cityhash.CityHash32;
|
||||
pub const CityHash64 = cityhash.CityHash64;
|
||||
|
||||
const wyhash = @import("hash/wyhash.zig");
|
||||
pub const Wyhash = wyhash.Wyhash;
|
||||
|
||||
test "hash" {
|
||||
_ = @import("hash/adler.zig");
|
||||
_ = @import("hash/auto_hash.zig");
|
||||
_ = @import("hash/crc.zig");
|
||||
_ = @import("hash/fnv.zig");
|
||||
_ = @import("hash/siphash.zig");
|
||||
_ = @import("hash/murmur.zig");
|
||||
_ = @import("hash/cityhash.zig");
|
||||
_ = @import("hash/wyhash.zig");
|
||||
}
|
||||
|
||||
210
std/hash/auto_hash.zig
Normal file
210
std/hash/auto_hash.zig
Normal file
@ -0,0 +1,210 @@
|
||||
const std = @import("std");
|
||||
const builtin = @import("builtin");
|
||||
const mem = std.mem;
|
||||
const meta = std.meta;
|
||||
|
||||
/// Provides generic hashing for any eligible type.
|
||||
/// Only hashes `key` itself, pointers are not followed.
|
||||
pub fn autoHash(hasher: var, key: var) void {
|
||||
const Key = @typeOf(key);
|
||||
switch (@typeInfo(Key)) {
|
||||
builtin.TypeId.NoReturn,
|
||||
builtin.TypeId.Opaque,
|
||||
builtin.TypeId.Undefined,
|
||||
builtin.TypeId.ArgTuple,
|
||||
builtin.TypeId.Void,
|
||||
builtin.TypeId.Null,
|
||||
builtin.TypeId.BoundFn,
|
||||
builtin.TypeId.ComptimeFloat,
|
||||
builtin.TypeId.ComptimeInt,
|
||||
builtin.TypeId.Type,
|
||||
builtin.TypeId.EnumLiteral,
|
||||
=> @compileError("cannot hash this type"),
|
||||
|
||||
// Help the optimizer see that hashing an int is easy by inlining!
|
||||
// TODO Check if the situation is better after #561 is resolved.
|
||||
builtin.TypeId.Int => @inlineCall(hasher.update, std.mem.asBytes(&key)),
|
||||
|
||||
builtin.TypeId.Float => |info| autoHash(hasher, @bitCast(@IntType(false, info.bits), key)),
|
||||
|
||||
builtin.TypeId.Bool => autoHash(hasher, @boolToInt(key)),
|
||||
builtin.TypeId.Enum => autoHash(hasher, @enumToInt(key)),
|
||||
builtin.TypeId.ErrorSet => autoHash(hasher, @errorToInt(key)),
|
||||
builtin.TypeId.Promise, builtin.TypeId.Fn => autoHash(hasher, @ptrToInt(key)),
|
||||
|
||||
builtin.TypeId.Pointer => |info| switch (info.size) {
|
||||
builtin.TypeInfo.Pointer.Size.One,
|
||||
builtin.TypeInfo.Pointer.Size.Many,
|
||||
builtin.TypeInfo.Pointer.Size.C,
|
||||
=> autoHash(hasher, @ptrToInt(key)),
|
||||
|
||||
builtin.TypeInfo.Pointer.Size.Slice => {
|
||||
autoHash(hasher, key.ptr);
|
||||
autoHash(hasher, key.len);
|
||||
},
|
||||
},
|
||||
|
||||
builtin.TypeId.Optional => if (key) |k| autoHash(hasher, k),
|
||||
|
||||
builtin.TypeId.Array => {
|
||||
// TODO detect via a trait when Key has no padding bits to
|
||||
// hash it as an array of bytes.
|
||||
// Otherwise, hash every element.
|
||||
for (key) |element| {
|
||||
autoHash(hasher, element);
|
||||
}
|
||||
},
|
||||
|
||||
builtin.TypeId.Vector => |info| {
|
||||
if (info.child.bit_count % 8 == 0) {
|
||||
// If there's no unused bits in the child type, we can just hash
|
||||
// this as an array of bytes.
|
||||
hasher.update(mem.asBytes(&key));
|
||||
} else {
|
||||
// Otherwise, hash every element.
|
||||
// TODO remove the copy to an array once field access is done.
|
||||
const array: [info.len]info.child = key;
|
||||
comptime var i: u32 = 0;
|
||||
inline while (i < info.len) : (i += 1) {
|
||||
autoHash(hasher, array[i]);
|
||||
}
|
||||
}
|
||||
},
|
||||
|
||||
builtin.TypeId.Struct => |info| {
|
||||
// TODO detect via a trait when Key has no padding bits to
|
||||
// hash it as an array of bytes.
|
||||
// Otherwise, hash every field.
|
||||
inline for (info.fields) |field| {
|
||||
// We reuse the hash of the previous field as the seed for the
|
||||
// next one so that they're dependant.
|
||||
autoHash(hasher, @field(key, field.name));
|
||||
}
|
||||
},
|
||||
|
||||
builtin.TypeId.Union => |info| blk: {
|
||||
if (info.tag_type) |tag_type| {
|
||||
const tag = meta.activeTag(key);
|
||||
const s = autoHash(hasher, tag);
|
||||
inline for (info.fields) |field| {
|
||||
const enum_field = field.enum_field.?;
|
||||
if (enum_field.value == @enumToInt(tag)) {
|
||||
autoHash(hasher, @field(key, enum_field.name));
|
||||
// TODO use a labelled break when it does not crash the compiler.
|
||||
// break :blk;
|
||||
return;
|
||||
}
|
||||
}
|
||||
unreachable;
|
||||
} else @compileError("cannot hash untagged union type: " ++ @typeName(Key) ++ ", provide your own hash function");
|
||||
},
|
||||
|
||||
builtin.TypeId.ErrorUnion => blk: {
|
||||
const payload = key catch |err| {
|
||||
autoHash(hasher, err);
|
||||
break :blk;
|
||||
};
|
||||
autoHash(hasher, payload);
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
const testing = std.testing;
|
||||
const Wyhash = std.hash.Wyhash;
|
||||
|
||||
fn testAutoHash(key: var) u64 {
|
||||
// Any hash could be used here, for testing autoHash.
|
||||
var hasher = Wyhash.init(0);
|
||||
autoHash(&hasher, key);
|
||||
return hasher.final();
|
||||
}
|
||||
|
||||
test "autoHash slice" {
|
||||
// Allocate one array dynamically so that we're assured it is not merged
|
||||
// with the other by the optimization passes.
|
||||
const array1 = try std.heap.direct_allocator.create([6]u32);
|
||||
defer std.heap.direct_allocator.destroy(array1);
|
||||
array1.* = [_]u32{ 1, 2, 3, 4, 5, 6 };
|
||||
const array2 = [_]u32{ 1, 2, 3, 4, 5, 6 };
|
||||
const a = array1[0..];
|
||||
const b = array2[0..];
|
||||
const c = array1[0..3];
|
||||
testing.expect(testAutoHash(a) == testAutoHash(a));
|
||||
testing.expect(testAutoHash(a) != testAutoHash(array1));
|
||||
testing.expect(testAutoHash(a) != testAutoHash(b));
|
||||
testing.expect(testAutoHash(a) != testAutoHash(c));
|
||||
}
|
||||
|
||||
test "testAutoHash optional" {
|
||||
const a: ?u32 = 123;
|
||||
const b: ?u32 = null;
|
||||
testing.expectEqual(testAutoHash(a), testAutoHash(u32(123)));
|
||||
testing.expect(testAutoHash(a) != testAutoHash(b));
|
||||
testing.expectEqual(testAutoHash(b), 0);
|
||||
}
|
||||
|
||||
test "testAutoHash array" {
|
||||
const a = [_]u32{ 1, 2, 3 };
|
||||
const h = testAutoHash(a);
|
||||
var hasher = Wyhash.init(0);
|
||||
autoHash(&hasher, u32(1));
|
||||
autoHash(&hasher, u32(2));
|
||||
autoHash(&hasher, u32(3));
|
||||
testing.expectEqual(h, hasher.final());
|
||||
}
|
||||
|
||||
test "testAutoHash struct" {
|
||||
const Foo = struct {
|
||||
a: u32 = 1,
|
||||
b: u32 = 2,
|
||||
c: u32 = 3,
|
||||
};
|
||||
const f = Foo{};
|
||||
const h = testAutoHash(f);
|
||||
var hasher = Wyhash.init(0);
|
||||
autoHash(&hasher, u32(1));
|
||||
autoHash(&hasher, u32(2));
|
||||
autoHash(&hasher, u32(3));
|
||||
testing.expectEqual(h, hasher.final());
|
||||
}
|
||||
|
||||
test "testAutoHash union" {
|
||||
const Foo = union(enum) {
|
||||
A: u32,
|
||||
B: f32,
|
||||
C: u32,
|
||||
};
|
||||
|
||||
const a = Foo{ .A = 18 };
|
||||
var b = Foo{ .B = 12.34 };
|
||||
const c = Foo{ .C = 18 };
|
||||
testing.expect(testAutoHash(a) == testAutoHash(a));
|
||||
testing.expect(testAutoHash(a) != testAutoHash(b));
|
||||
testing.expect(testAutoHash(a) != testAutoHash(c));
|
||||
|
||||
b = Foo{ .A = 18 };
|
||||
testing.expect(testAutoHash(a) == testAutoHash(b));
|
||||
}
|
||||
|
||||
test "testAutoHash vector" {
|
||||
const a: @Vector(4, u32) = [_]u32{ 1, 2, 3, 4 };
|
||||
const b: @Vector(4, u32) = [_]u32{ 1, 2, 3, 5 };
|
||||
const c: @Vector(4, u31) = [_]u31{ 1, 2, 3, 4 };
|
||||
testing.expect(testAutoHash(a) == testAutoHash(a));
|
||||
testing.expect(testAutoHash(a) != testAutoHash(b));
|
||||
testing.expect(testAutoHash(a) != testAutoHash(c));
|
||||
}
|
||||
|
||||
test "testAutoHash error union" {
|
||||
const Errors = error{Test};
|
||||
const Foo = struct {
|
||||
a: u32 = 1,
|
||||
b: u32 = 2,
|
||||
c: u32 = 3,
|
||||
};
|
||||
const f = Foo{};
|
||||
const g: Errors!Foo = Errors.Test;
|
||||
testing.expect(testAutoHash(f) != testAutoHash(g));
|
||||
testing.expect(testAutoHash(f) == testAutoHash(Foo{}));
|
||||
testing.expect(testAutoHash(g) == testAutoHash(Errors.Test));
|
||||
}
|
||||
148
std/hash/throughput_test.zig
Normal file
148
std/hash/throughput_test.zig
Normal file
@ -0,0 +1,148 @@
|
||||
const builtin = @import("builtin");
|
||||
const std = @import("std");
|
||||
const time = std.time;
|
||||
const Timer = time.Timer;
|
||||
const hash = std.hash;
|
||||
|
||||
const KiB = 1024;
|
||||
const MiB = 1024 * KiB;
|
||||
const GiB = 1024 * MiB;
|
||||
|
||||
var prng = std.rand.DefaultPrng.init(0);
|
||||
|
||||
const Hash = struct {
|
||||
ty: type,
|
||||
name: []const u8,
|
||||
init_u8s: ?[]const u8 = null,
|
||||
init_u64: ?u64 = null,
|
||||
};
|
||||
|
||||
const siphash_key = "0123456789abcdef";
|
||||
|
||||
const hashes = [_]Hash{
|
||||
Hash{ .ty = hash.Wyhash, .name = "wyhash", .init_u64 = 0 },
|
||||
Hash{ .ty = hash.SipHash64(1, 3), .name = "siphash(1,3)", .init_u8s = siphash_key },
|
||||
Hash{ .ty = hash.SipHash64(2, 4), .name = "siphash(2,4)", .init_u8s = siphash_key },
|
||||
Hash{ .ty = hash.Fnv1a_64, .name = "fnv1a" },
|
||||
Hash{ .ty = hash.Crc32, .name = "crc32" },
|
||||
};
|
||||
|
||||
const Result = struct {
|
||||
hash: u64,
|
||||
throughput: u64,
|
||||
};
|
||||
|
||||
pub fn benchmarkHash(comptime H: var, bytes: usize) !Result {
|
||||
var h = blk: {
|
||||
if (H.init_u8s) |init| {
|
||||
break :blk H.ty.init(init);
|
||||
}
|
||||
if (H.init_u64) |init| {
|
||||
break :blk H.ty.init(init);
|
||||
}
|
||||
break :blk H.ty.init();
|
||||
};
|
||||
|
||||
var block: [8192]u8 = undefined;
|
||||
prng.random.bytes(block[0..]);
|
||||
|
||||
var offset: usize = 0;
|
||||
var timer = try Timer.start();
|
||||
const start = timer.lap();
|
||||
while (offset < bytes) : (offset += block.len) {
|
||||
h.update(block[0..]);
|
||||
}
|
||||
const end = timer.read();
|
||||
|
||||
const elapsed_s = @intToFloat(f64, end - start) / time.ns_per_s;
|
||||
const throughput = @floatToInt(u64, @intToFloat(f64, bytes) / elapsed_s);
|
||||
|
||||
return Result{
|
||||
.hash = h.final(),
|
||||
.throughput = throughput,
|
||||
};
|
||||
}
|
||||
|
||||
fn usage() void {
|
||||
std.debug.warn(
|
||||
\\throughput_test [options]
|
||||
\\
|
||||
\\Options:
|
||||
\\ --filter [test-name]
|
||||
\\ --seed [int]
|
||||
\\ --count [int]
|
||||
\\ --help
|
||||
\\
|
||||
);
|
||||
}
|
||||
|
||||
fn mode(comptime x: comptime_int) comptime_int {
|
||||
return if (builtin.mode == builtin.Mode.Debug) x / 64 else x;
|
||||
}
|
||||
|
||||
// TODO(#1358): Replace with builtin formatted padding when available.
|
||||
fn printPad(stdout: var, s: []const u8) !void {
|
||||
var i: usize = 0;
|
||||
while (i < 12 - s.len) : (i += 1) {
|
||||
try stdout.print(" ");
|
||||
}
|
||||
try stdout.print("{}", s);
|
||||
}
|
||||
|
||||
pub fn main() !void {
|
||||
var stdout_file = try std.io.getStdOut();
|
||||
var stdout_out_stream = stdout_file.outStream();
|
||||
const stdout = &stdout_out_stream.stream;
|
||||
|
||||
var buffer: [1024]u8 = undefined;
|
||||
var fixed = std.heap.FixedBufferAllocator.init(buffer[0..]);
|
||||
const args = try std.process.argsAlloc(&fixed.allocator);
|
||||
|
||||
var filter: ?[]u8 = "";
|
||||
var count: usize = mode(128 * MiB);
|
||||
|
||||
var i: usize = 1;
|
||||
while (i < args.len) : (i += 1) {
|
||||
if (std.mem.eql(u8, args[i], "--seed")) {
|
||||
i += 1;
|
||||
if (i == args.len) {
|
||||
usage();
|
||||
std.os.exit(1);
|
||||
}
|
||||
|
||||
const seed = try std.fmt.parseUnsigned(u32, args[i], 10);
|
||||
prng.seed(seed);
|
||||
} else if (std.mem.eql(u8, args[i], "--filter")) {
|
||||
i += 1;
|
||||
if (i == args.len) {
|
||||
usage();
|
||||
std.os.exit(1);
|
||||
}
|
||||
|
||||
filter = args[i];
|
||||
} else if (std.mem.eql(u8, args[i], "--count")) {
|
||||
i += 1;
|
||||
if (i == args.len) {
|
||||
usage();
|
||||
std.os.exit(1);
|
||||
}
|
||||
|
||||
const c = try std.fmt.parseUnsigned(u32, args[i], 10);
|
||||
count = c * MiB;
|
||||
} else if (std.mem.eql(u8, args[i], "--help")) {
|
||||
usage();
|
||||
return;
|
||||
} else {
|
||||
usage();
|
||||
std.os.exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
inline for (hashes) |H| {
|
||||
if (filter == null or std.mem.indexOf(u8, H.name, filter.?) != null) {
|
||||
const result = try benchmarkHash(H, count);
|
||||
try printPad(stdout, H.name);
|
||||
try stdout.print(": {:4} MiB/s [{:16}]\n", result.throughput / (1 * MiB), result.hash);
|
||||
}
|
||||
}
|
||||
}
|
||||
135
std/hash/wyhash.zig
Normal file
135
std/hash/wyhash.zig
Normal file
@ -0,0 +1,135 @@
|
||||
const std = @import("std");
|
||||
const mem = std.mem;
|
||||
|
||||
const primes = [_]u64{
|
||||
0xa0761d6478bd642f,
|
||||
0xe7037ed1a0b428db,
|
||||
0x8ebc6af09c88c6e3,
|
||||
0x589965cc75374cc3,
|
||||
0x1d8e4e27c47d124f,
|
||||
};
|
||||
|
||||
fn read_bytes(comptime bytes: u8, data: []const u8) u64 {
|
||||
return mem.readVarInt(u64, data[0..bytes], .Little);
|
||||
}
|
||||
|
||||
fn read_8bytes_swapped(data: []const u8) u64 {
|
||||
return (read_bytes(4, data) << 32 | read_bytes(4, data[4..]));
|
||||
}
|
||||
|
||||
fn mum(a: u64, b: u64) u64 {
|
||||
var r = std.math.mulWide(u64, a, b);
|
||||
r = (r >> 64) ^ r;
|
||||
return @truncate(u64, r);
|
||||
}
|
||||
|
||||
fn mix0(a: u64, b: u64, seed: u64) u64 {
|
||||
return mum(a ^ seed ^ primes[0], b ^ seed ^ primes[1]);
|
||||
}
|
||||
|
||||
fn mix1(a: u64, b: u64, seed: u64) u64 {
|
||||
return mum(a ^ seed ^ primes[2], b ^ seed ^ primes[3]);
|
||||
}
|
||||
|
||||
pub const Wyhash = struct {
|
||||
seed: u64,
|
||||
msg_len: usize,
|
||||
|
||||
pub fn init(seed: u64) Wyhash {
|
||||
return Wyhash{
|
||||
.seed = seed,
|
||||
.msg_len = 0,
|
||||
};
|
||||
}
|
||||
|
||||
fn round(self: *Wyhash, b: []const u8) void {
|
||||
std.debug.assert(b.len == 32);
|
||||
|
||||
self.seed = mix0(
|
||||
read_bytes(8, b[0..]),
|
||||
read_bytes(8, b[8..]),
|
||||
self.seed,
|
||||
) ^ mix1(
|
||||
read_bytes(8, b[16..]),
|
||||
read_bytes(8, b[24..]),
|
||||
self.seed,
|
||||
);
|
||||
}
|
||||
|
||||
fn partial(self: *Wyhash, b: []const u8) void {
|
||||
const rem_key = b;
|
||||
const rem_len = b.len;
|
||||
|
||||
var seed = self.seed;
|
||||
seed = switch (@intCast(u5, rem_len)) {
|
||||
0 => seed,
|
||||
1 => mix0(read_bytes(1, rem_key), primes[4], seed),
|
||||
2 => mix0(read_bytes(2, rem_key), primes[4], seed),
|
||||
3 => mix0((read_bytes(2, rem_key) << 8) | read_bytes(1, rem_key[2..]), primes[4], seed),
|
||||
4 => mix0(read_bytes(4, rem_key), primes[4], seed),
|
||||
5 => mix0((read_bytes(4, rem_key) << 8) | read_bytes(1, rem_key[4..]), primes[4], seed),
|
||||
6 => mix0((read_bytes(4, rem_key) << 16) | read_bytes(2, rem_key[4..]), primes[4], seed),
|
||||
7 => mix0((read_bytes(4, rem_key) << 24) | (read_bytes(2, rem_key[4..]) << 8) | read_bytes(1, rem_key[6..]), primes[4], seed),
|
||||
8 => mix0(read_8bytes_swapped(rem_key), primes[4], seed),
|
||||
9 => mix0(read_8bytes_swapped(rem_key), read_bytes(1, rem_key[8..]), seed),
|
||||
10 => mix0(read_8bytes_swapped(rem_key), read_bytes(2, rem_key[8..]), seed),
|
||||
11 => mix0(read_8bytes_swapped(rem_key), (read_bytes(2, rem_key[8..]) << 8) | read_bytes(1, rem_key[10..]), seed),
|
||||
12 => mix0(read_8bytes_swapped(rem_key), read_bytes(4, rem_key[8..]), seed),
|
||||
13 => mix0(read_8bytes_swapped(rem_key), (read_bytes(4, rem_key[8..]) << 8) | read_bytes(1, rem_key[12..]), seed),
|
||||
14 => mix0(read_8bytes_swapped(rem_key), (read_bytes(4, rem_key[8..]) << 16) | read_bytes(2, rem_key[12..]), seed),
|
||||
15 => mix0(read_8bytes_swapped(rem_key), (read_bytes(4, rem_key[8..]) << 24) | (read_bytes(2, rem_key[12..]) << 8) | read_bytes(1, rem_key[14..]), seed),
|
||||
16 => mix0(read_8bytes_swapped(rem_key), read_8bytes_swapped(rem_key[8..]), seed),
|
||||
17 => mix0(read_8bytes_swapped(rem_key), read_8bytes_swapped(rem_key[8..]), seed) ^ mix1(read_bytes(1, rem_key[16..]), primes[4], seed),
|
||||
18 => mix0(read_8bytes_swapped(rem_key), read_8bytes_swapped(rem_key[8..]), seed) ^ mix1(read_bytes(2, rem_key[16..]), primes[4], seed),
|
||||
19 => mix0(read_8bytes_swapped(rem_key), read_8bytes_swapped(rem_key[8..]), seed) ^ mix1((read_bytes(2, rem_key[16..]) << 8) | read_bytes(1, rem_key[18..]), primes[4], seed),
|
||||
20 => mix0(read_8bytes_swapped(rem_key), read_8bytes_swapped(rem_key[8..]), seed) ^ mix1(read_bytes(4, rem_key[16..]), primes[4], seed),
|
||||
21 => mix0(read_8bytes_swapped(rem_key), read_8bytes_swapped(rem_key[8..]), seed) ^ mix1((read_bytes(4, rem_key[16..]) << 8) | read_bytes(1, rem_key[20..]), primes[4], seed),
|
||||
22 => mix0(read_8bytes_swapped(rem_key), read_8bytes_swapped(rem_key[8..]), seed) ^ mix1((read_bytes(4, rem_key[16..]) << 16) | read_bytes(2, rem_key[20..]), primes[4], seed),
|
||||
23 => mix0(read_8bytes_swapped(rem_key), read_8bytes_swapped(rem_key[8..]), seed) ^ mix1((read_bytes(4, rem_key[16..]) << 24) | (read_bytes(2, rem_key[20..]) << 8) | read_bytes(1, rem_key[22..]), primes[4], seed),
|
||||
24 => mix0(read_8bytes_swapped(rem_key), read_8bytes_swapped(rem_key[8..]), seed) ^ mix1(read_8bytes_swapped(rem_key[16..]), primes[4], seed),
|
||||
25 => mix0(read_8bytes_swapped(rem_key), read_8bytes_swapped(rem_key[8..]), seed) ^ mix1(read_8bytes_swapped(rem_key[16..]), read_bytes(1, rem_key[24..]), seed),
|
||||
26 => mix0(read_8bytes_swapped(rem_key), read_8bytes_swapped(rem_key[8..]), seed) ^ mix1(read_8bytes_swapped(rem_key[16..]), read_bytes(2, rem_key[24..]), seed),
|
||||
27 => mix0(read_8bytes_swapped(rem_key), read_8bytes_swapped(rem_key[8..]), seed) ^ mix1(read_8bytes_swapped(rem_key[16..]), (read_bytes(2, rem_key[24..]) << 8) | read_bytes(1, rem_key[26..]), seed),
|
||||
28 => mix0(read_8bytes_swapped(rem_key), read_8bytes_swapped(rem_key[8..]), seed) ^ mix1(read_8bytes_swapped(rem_key[16..]), read_bytes(4, rem_key[24..]), seed),
|
||||
29 => mix0(read_8bytes_swapped(rem_key), read_8bytes_swapped(rem_key[8..]), seed) ^ mix1(read_8bytes_swapped(rem_key[16..]), (read_bytes(4, rem_key[24..]) << 8) | read_bytes(1, rem_key[28..]), seed),
|
||||
30 => mix0(read_8bytes_swapped(rem_key), read_8bytes_swapped(rem_key[8..]), seed) ^ mix1(read_8bytes_swapped(rem_key[16..]), (read_bytes(4, rem_key[24..]) << 16) | read_bytes(2, rem_key[28..]), seed),
|
||||
31 => mix0(read_8bytes_swapped(rem_key), read_8bytes_swapped(rem_key[8..]), seed) ^ mix1(read_8bytes_swapped(rem_key[16..]), (read_bytes(4, rem_key[24..]) << 24) | (read_bytes(2, rem_key[28..]) << 8) | read_bytes(1, rem_key[30..]), seed),
|
||||
};
|
||||
self.seed = seed;
|
||||
}
|
||||
|
||||
pub fn update(self: *Wyhash, b: []const u8) void {
|
||||
var off: usize = 0;
|
||||
|
||||
// Full middle blocks.
|
||||
while (off + 32 <= b.len) : (off += 32) {
|
||||
@inlineCall(self.round, b[off .. off + 32]);
|
||||
}
|
||||
|
||||
self.partial(b[off..]);
|
||||
self.msg_len += b.len;
|
||||
}
|
||||
|
||||
pub fn final(self: *Wyhash) u64 {
|
||||
return mum(self.seed ^ self.msg_len, primes[4]);
|
||||
}
|
||||
|
||||
pub fn hash(seed: u64, input: []const u8) u64 {
|
||||
var c = Wyhash.init(seed);
|
||||
c.update(input);
|
||||
return c.final();
|
||||
}
|
||||
};
|
||||
|
||||
test "test vectors" {
|
||||
const expectEqual = std.testing.expectEqual;
|
||||
const hash = Wyhash.hash;
|
||||
|
||||
expectEqual(hash(0, ""), 0x0);
|
||||
expectEqual(hash(1, "a"), 0xbed235177f41d328);
|
||||
expectEqual(hash(2, "abc"), 0xbe348debe59b27c3);
|
||||
expectEqual(hash(3, "message digest"), 0x37320f657213a290);
|
||||
expectEqual(hash(4, "abcdefghijklmnopqrstuvwxyz"), 0xd0b270e1d8a7019c);
|
||||
expectEqual(hash(5, "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789"), 0x602a1894d3bbfe7f);
|
||||
expectEqual(hash(6, "12345678901234567890123456789012345678901234567890123456789012345678901234567890"), 0x829e9c148b75970e);
|
||||
}
|
||||
123
std/hash_map.zig
123
std/hash_map.zig
@ -4,6 +4,9 @@ const assert = debug.assert;
|
||||
const testing = std.testing;
|
||||
const math = std.math;
|
||||
const mem = std.mem;
|
||||
const meta = std.meta;
|
||||
const autoHash = std.hash.autoHash;
|
||||
const Wyhash = std.hash.Wyhash;
|
||||
const Allocator = mem.Allocator;
|
||||
const builtin = @import("builtin");
|
||||
|
||||
@ -448,15 +451,17 @@ test "iterator hash map" {
|
||||
try reset_map.putNoClobber(2, 22);
|
||||
try reset_map.putNoClobber(3, 33);
|
||||
|
||||
// TODO this test depends on the hashing algorithm, because it assumes the
|
||||
// order of the elements in the hashmap. This should not be the case.
|
||||
var keys = [_]i32{
|
||||
1,
|
||||
3,
|
||||
2,
|
||||
1,
|
||||
};
|
||||
var values = [_]i32{
|
||||
11,
|
||||
33,
|
||||
22,
|
||||
11,
|
||||
};
|
||||
|
||||
var it = reset_map.iterator();
|
||||
@ -518,8 +523,9 @@ pub fn getTrivialEqlFn(comptime K: type) (fn (K, K) bool) {
|
||||
pub fn getAutoHashFn(comptime K: type) (fn (K) u32) {
|
||||
return struct {
|
||||
fn hash(key: K) u32 {
|
||||
comptime var rng = comptime std.rand.DefaultPrng.init(0);
|
||||
return autoHash(key, &rng.random, u32);
|
||||
var hasher = Wyhash.init(0);
|
||||
autoHash(&hasher, key);
|
||||
return @truncate(u32, hasher.final());
|
||||
}
|
||||
}.hash;
|
||||
}
|
||||
@ -527,114 +533,7 @@ pub fn getAutoHashFn(comptime K: type) (fn (K) u32) {
|
||||
pub fn getAutoEqlFn(comptime K: type) (fn (K, K) bool) {
|
||||
return struct {
|
||||
fn eql(a: K, b: K) bool {
|
||||
return autoEql(a, b);
|
||||
return meta.eql(a, b);
|
||||
}
|
||||
}.eql;
|
||||
}
|
||||
|
||||
// TODO improve these hash functions
|
||||
pub fn autoHash(key: var, comptime rng: *std.rand.Random, comptime HashInt: type) HashInt {
|
||||
switch (@typeInfo(@typeOf(key))) {
|
||||
builtin.TypeId.NoReturn,
|
||||
builtin.TypeId.Opaque,
|
||||
builtin.TypeId.Undefined,
|
||||
builtin.TypeId.ArgTuple,
|
||||
=> @compileError("cannot hash this type"),
|
||||
|
||||
builtin.TypeId.Void,
|
||||
builtin.TypeId.Null,
|
||||
=> return 0,
|
||||
|
||||
builtin.TypeId.Int => |info| {
|
||||
const unsigned_x = @bitCast(@IntType(false, info.bits), key);
|
||||
if (info.bits <= HashInt.bit_count) {
|
||||
return HashInt(unsigned_x) ^ comptime rng.scalar(HashInt);
|
||||
} else {
|
||||
return @truncate(HashInt, unsigned_x ^ comptime rng.scalar(@typeOf(unsigned_x)));
|
||||
}
|
||||
},
|
||||
|
||||
builtin.TypeId.Float => |info| {
|
||||
return autoHash(@bitCast(@IntType(false, info.bits), key), rng, HashInt);
|
||||
},
|
||||
builtin.TypeId.Bool => return autoHash(@boolToInt(key), rng, HashInt),
|
||||
builtin.TypeId.Enum => return autoHash(@enumToInt(key), rng, HashInt),
|
||||
builtin.TypeId.ErrorSet => return autoHash(@errorToInt(key), rng, HashInt),
|
||||
builtin.TypeId.Promise, builtin.TypeId.Fn => return autoHash(@ptrToInt(key), rng, HashInt),
|
||||
|
||||
builtin.TypeId.BoundFn,
|
||||
builtin.TypeId.ComptimeFloat,
|
||||
builtin.TypeId.ComptimeInt,
|
||||
builtin.TypeId.Type,
|
||||
builtin.TypeId.EnumLiteral,
|
||||
=> return 0,
|
||||
|
||||
builtin.TypeId.Pointer => |info| switch (info.size) {
|
||||
builtin.TypeInfo.Pointer.Size.One => @compileError("TODO auto hash for single item pointers"),
|
||||
builtin.TypeInfo.Pointer.Size.Many => @compileError("TODO auto hash for many item pointers"),
|
||||
builtin.TypeInfo.Pointer.Size.C => @compileError("TODO auto hash C pointers"),
|
||||
builtin.TypeInfo.Pointer.Size.Slice => {
|
||||
const interval = std.math.max(1, key.len / 256);
|
||||
var i: usize = 0;
|
||||
var h = comptime rng.scalar(HashInt);
|
||||
while (i < key.len) : (i += interval) {
|
||||
h ^= autoHash(key[i], rng, HashInt);
|
||||
}
|
||||
return h;
|
||||
},
|
||||
},
|
||||
|
||||
builtin.TypeId.Optional => @compileError("TODO auto hash for optionals"),
|
||||
builtin.TypeId.Array => @compileError("TODO auto hash for arrays"),
|
||||
builtin.TypeId.Vector => @compileError("TODO auto hash for vectors"),
|
||||
builtin.TypeId.Struct => @compileError("TODO auto hash for structs"),
|
||||
builtin.TypeId.Union => @compileError("TODO auto hash for unions"),
|
||||
builtin.TypeId.ErrorUnion => @compileError("TODO auto hash for unions"),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn autoEql(a: var, b: @typeOf(a)) bool {
|
||||
switch (@typeInfo(@typeOf(a))) {
|
||||
builtin.TypeId.NoReturn,
|
||||
builtin.TypeId.Opaque,
|
||||
builtin.TypeId.Undefined,
|
||||
builtin.TypeId.ArgTuple,
|
||||
=> @compileError("cannot test equality of this type"),
|
||||
builtin.TypeId.Void,
|
||||
builtin.TypeId.Null,
|
||||
=> return true,
|
||||
builtin.TypeId.Bool,
|
||||
builtin.TypeId.Int,
|
||||
builtin.TypeId.Float,
|
||||
builtin.TypeId.ComptimeFloat,
|
||||
builtin.TypeId.ComptimeInt,
|
||||
builtin.TypeId.EnumLiteral,
|
||||
builtin.TypeId.Promise,
|
||||
builtin.TypeId.Enum,
|
||||
builtin.TypeId.BoundFn,
|
||||
builtin.TypeId.Fn,
|
||||
builtin.TypeId.ErrorSet,
|
||||
builtin.TypeId.Type,
|
||||
=> return a == b,
|
||||
|
||||
builtin.TypeId.Pointer => |info| switch (info.size) {
|
||||
builtin.TypeInfo.Pointer.Size.One => @compileError("TODO auto eql for single item pointers"),
|
||||
builtin.TypeInfo.Pointer.Size.Many => @compileError("TODO auto eql for many item pointers"),
|
||||
builtin.TypeInfo.Pointer.Size.C => @compileError("TODO auto eql for C pointers"),
|
||||
builtin.TypeInfo.Pointer.Size.Slice => {
|
||||
if (a.len != b.len) return false;
|
||||
for (a) |a_item, i| {
|
||||
if (!autoEql(a_item, b[i])) return false;
|
||||
}
|
||||
return true;
|
||||
},
|
||||
},
|
||||
|
||||
builtin.TypeId.Optional => @compileError("TODO auto eql for optionals"),
|
||||
builtin.TypeId.Array => @compileError("TODO auto eql for arrays"),
|
||||
builtin.TypeId.Struct => @compileError("TODO auto eql for structs"),
|
||||
builtin.TypeId.Union => @compileError("TODO auto eql for unions"),
|
||||
builtin.TypeId.ErrorUnion => @compileError("TODO auto eql for unions"),
|
||||
builtin.TypeId.Vector => @compileError("TODO auto eql for vectors"),
|
||||
}
|
||||
}
|
||||
|
||||
@ -102,9 +102,19 @@ test "HeaderEntry" {
|
||||
testing.expectEqualSlices(u8, "x", e.value);
|
||||
}
|
||||
|
||||
fn stringEql(a: []const u8, b: []const u8) bool {
|
||||
if (a.len != b.len) return false;
|
||||
if (a.ptr == b.ptr) return true;
|
||||
return mem.compare(u8, a, b) == .Equal;
|
||||
}
|
||||
|
||||
fn stringHash(s: []const u8) u32 {
|
||||
return @truncate(u32, std.hash.Wyhash.hash(0, s));
|
||||
}
|
||||
|
||||
const HeaderList = std.ArrayList(HeaderEntry);
|
||||
const HeaderIndexList = std.ArrayList(usize);
|
||||
const HeaderIndex = std.AutoHashMap([]const u8, HeaderIndexList);
|
||||
const HeaderIndex = std.HashMap([]const u8, HeaderIndexList, stringHash, stringEql);
|
||||
|
||||
pub const Headers = struct {
|
||||
// the owned header field name is stored in the index as part of the key
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user