mirror of
https://github.com/ziglang/zig.git
synced 2026-01-03 12:03:19 +00:00
Merge pull request #3060 from Sahnvour/hashing
auto_hash with deep/shallow hashing
This commit is contained in:
commit
ac477f3c9a
@ -719,6 +719,16 @@ pub const WatchEventId = enum {
|
||||
Delete,
|
||||
};
|
||||
|
||||
fn eqlString(a: []const u16, b: []const u16) bool {
|
||||
if (a.len != b.len) return false;
|
||||
if (a.ptr == b.ptr) return true;
|
||||
return mem.compare(u16, a, b) == .Equal;
|
||||
}
|
||||
|
||||
fn hashString(s: []const u16) u32 {
|
||||
return @truncate(u32, std.hash.Wyhash.hash(0, @sliceToBytes(s)));
|
||||
}
|
||||
|
||||
//pub const WatchEventError = error{
|
||||
// UserResourceLimitReached,
|
||||
// SystemResources,
|
||||
@ -736,7 +746,7 @@ pub const WatchEventId = enum {
|
||||
// file_table: FileTable,
|
||||
// table_lock: event.Lock,
|
||||
//
|
||||
// const FileTable = std.AutoHashMap([]const u8, *Put);
|
||||
// const FileTable = std.StringHashmap(*Put);
|
||||
// const Put = struct {
|
||||
// putter: anyframe,
|
||||
// value_ptr: *V,
|
||||
@ -755,8 +765,8 @@ pub const WatchEventId = enum {
|
||||
// all_putters: std.atomic.Queue(anyframe),
|
||||
// ref_count: std.atomic.Int(usize),
|
||||
//
|
||||
// const DirTable = std.AutoHashMap([]const u8, *Dir);
|
||||
// const FileTable = std.AutoHashMap([]const u16, V);
|
||||
// const DirTable = std.StringHashMap(*Dir);
|
||||
// const FileTable = std.HashMap([]const u16, V, hashString, eqlString);
|
||||
//
|
||||
// const Dir = struct {
|
||||
// putter: anyframe,
|
||||
@ -772,7 +782,7 @@ pub const WatchEventId = enum {
|
||||
// table_lock: event.Lock,
|
||||
//
|
||||
// const WdTable = std.AutoHashMap(i32, Dir);
|
||||
// const FileTable = std.AutoHashMap([]const u8, V);
|
||||
// const FileTable = std.StringHashMap(V);
|
||||
//
|
||||
// const Dir = struct {
|
||||
// dirname: []const u8,
|
||||
@ -780,7 +790,7 @@ pub const WatchEventId = enum {
|
||||
// };
|
||||
// };
|
||||
//
|
||||
// const FileToHandle = std.AutoHashMap([]const u8, anyframe);
|
||||
// const FileToHandle = std.StringHashMap(anyframe);
|
||||
//
|
||||
// const Self = @This();
|
||||
//
|
||||
|
||||
@ -3,9 +3,76 @@ const builtin = @import("builtin");
|
||||
const mem = std.mem;
|
||||
const meta = std.meta;
|
||||
|
||||
/// Describes how pointer types should be hashed.
|
||||
pub const HashStrategy = enum {
|
||||
/// Do not follow pointers, only hash their value.
|
||||
Shallow,
|
||||
|
||||
/// Follow pointers, hash the pointee content.
|
||||
/// Only dereferences one level, ie. it is changed into .Shallow when a
|
||||
/// pointer type is encountered.
|
||||
Deep,
|
||||
|
||||
/// Follow pointers, hash the pointee content.
|
||||
/// Dereferences all pointers encountered.
|
||||
/// Assumes no cycle.
|
||||
DeepRecursive,
|
||||
};
|
||||
|
||||
/// Helper function to hash a pointer and mutate the strategy if needed.
|
||||
pub fn hashPointer(hasher: var, key: var, comptime strat: HashStrategy) void {
|
||||
const info = @typeInfo(@typeOf(key));
|
||||
|
||||
switch (info.Pointer.size) {
|
||||
builtin.TypeInfo.Pointer.Size.One => switch (strat) {
|
||||
.Shallow => hash(hasher, @ptrToInt(key), .Shallow),
|
||||
.Deep => hash(hasher, key.*, .Shallow),
|
||||
.DeepRecursive => hash(hasher, key.*, .DeepRecursive),
|
||||
},
|
||||
|
||||
builtin.TypeInfo.Pointer.Size.Slice => switch (strat) {
|
||||
.Shallow => {
|
||||
hashPointer(hasher, key.ptr, .Shallow);
|
||||
hash(hasher, key.len, .Shallow);
|
||||
},
|
||||
.Deep => hashArray(hasher, key, .Shallow),
|
||||
.DeepRecursive => hashArray(hasher, key, .DeepRecursive),
|
||||
},
|
||||
|
||||
builtin.TypeInfo.Pointer.Size.Many,
|
||||
builtin.TypeInfo.Pointer.Size.C,
|
||||
=> switch (strat) {
|
||||
.Shallow => hash(hasher, @ptrToInt(key), .Shallow),
|
||||
else => @compileError(
|
||||
\\ unknown-length pointers and C pointers cannot be hashed deeply.
|
||||
\\ Consider providing your own hash function.
|
||||
),
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
/// Helper function to hash a set of contiguous objects, from an array or slice.
|
||||
pub fn hashArray(hasher: var, key: var, comptime strat: HashStrategy) void {
|
||||
switch (strat) {
|
||||
.Shallow => {
|
||||
// TODO detect via a trait when Key has no padding bits to
|
||||
// hash it as an array of bytes.
|
||||
// Otherwise, hash every element.
|
||||
for (key) |element| {
|
||||
hash(hasher, element, .Shallow);
|
||||
}
|
||||
},
|
||||
else => {
|
||||
for (key) |element| {
|
||||
hash(hasher, element, strat);
|
||||
}
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
/// Provides generic hashing for any eligible type.
|
||||
/// Only hashes `key` itself, pointers are not followed.
|
||||
pub fn autoHash(hasher: var, key: var) void {
|
||||
/// Strategy is provided to determine if pointers should be followed or not.
|
||||
pub fn hash(hasher: var, key: var, comptime strat: HashStrategy) void {
|
||||
const Key = @typeOf(key);
|
||||
switch (@typeInfo(Key)) {
|
||||
.NoReturn,
|
||||
@ -26,35 +93,18 @@ pub fn autoHash(hasher: var, key: var) void {
|
||||
// TODO Check if the situation is better after #561 is resolved.
|
||||
.Int => @inlineCall(hasher.update, std.mem.asBytes(&key)),
|
||||
|
||||
.Float => |info| autoHash(hasher, @bitCast(@IntType(false, info.bits), key)),
|
||||
.Float => |info| hash(hasher, @bitCast(@IntType(false, info.bits), key), strat),
|
||||
|
||||
.Bool => autoHash(hasher, @boolToInt(key)),
|
||||
.Enum => autoHash(hasher, @enumToInt(key)),
|
||||
.ErrorSet => autoHash(hasher, @errorToInt(key)),
|
||||
.AnyFrame, .Fn => autoHash(hasher, @ptrToInt(key)),
|
||||
.Bool => hash(hasher, @boolToInt(key), strat),
|
||||
.Enum => hash(hasher, @enumToInt(key), strat),
|
||||
.ErrorSet => hash(hasher, @errorToInt(key), strat),
|
||||
.AnyFrame, .Fn => hash(hasher, @ptrToInt(key), strat),
|
||||
|
||||
.Pointer => |info| switch (info.size) {
|
||||
builtin.TypeInfo.Pointer.Size.One,
|
||||
builtin.TypeInfo.Pointer.Size.Many,
|
||||
builtin.TypeInfo.Pointer.Size.C,
|
||||
=> autoHash(hasher, @ptrToInt(key)),
|
||||
.Pointer => @inlineCall(hashPointer, hasher, key, strat),
|
||||
|
||||
builtin.TypeInfo.Pointer.Size.Slice => {
|
||||
autoHash(hasher, key.ptr);
|
||||
autoHash(hasher, key.len);
|
||||
},
|
||||
},
|
||||
.Optional => if (key) |k| hash(hasher, k, strat),
|
||||
|
||||
.Optional => if (key) |k| autoHash(hasher, k),
|
||||
|
||||
.Array => {
|
||||
// TODO detect via a trait when Key has no padding bits to
|
||||
// hash it as an array of bytes.
|
||||
// Otherwise, hash every element.
|
||||
for (key) |element| {
|
||||
autoHash(hasher, element);
|
||||
}
|
||||
},
|
||||
.Array => hashArray(hasher, key, strat),
|
||||
|
||||
.Vector => |info| {
|
||||
if (info.child.bit_count % 8 == 0) {
|
||||
@ -67,7 +117,7 @@ pub fn autoHash(hasher: var, key: var) void {
|
||||
const array: [info.len]info.child = key;
|
||||
comptime var i: u32 = 0;
|
||||
inline while (i < info.len) : (i += 1) {
|
||||
autoHash(hasher, array[i]);
|
||||
hash(hasher, array[i], strat);
|
||||
}
|
||||
}
|
||||
},
|
||||
@ -79,19 +129,19 @@ pub fn autoHash(hasher: var, key: var) void {
|
||||
inline for (info.fields) |field| {
|
||||
// We reuse the hash of the previous field as the seed for the
|
||||
// next one so that they're dependant.
|
||||
autoHash(hasher, @field(key, field.name));
|
||||
hash(hasher, @field(key, field.name), strat);
|
||||
}
|
||||
},
|
||||
|
||||
.Union => |info| blk: {
|
||||
if (info.tag_type) |tag_type| {
|
||||
const tag = meta.activeTag(key);
|
||||
const s = autoHash(hasher, tag);
|
||||
const s = hash(hasher, tag, strat);
|
||||
inline for (info.fields) |field| {
|
||||
const enum_field = field.enum_field.?;
|
||||
if (enum_field.value == @enumToInt(tag)) {
|
||||
autoHash(hasher, @field(key, enum_field.name));
|
||||
// TODO use a labelled break when it does not crash the compiler.
|
||||
hash(hasher, @field(key, enum_field.name), strat);
|
||||
// TODO use a labelled break when it does not crash the compiler. cf #2908
|
||||
// break :blk;
|
||||
return;
|
||||
}
|
||||
@ -102,25 +152,77 @@ pub fn autoHash(hasher: var, key: var) void {
|
||||
|
||||
.ErrorUnion => blk: {
|
||||
const payload = key catch |err| {
|
||||
autoHash(hasher, err);
|
||||
hash(hasher, err, strat);
|
||||
break :blk;
|
||||
};
|
||||
autoHash(hasher, payload);
|
||||
hash(hasher, payload, strat);
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
/// Provides generic hashing for any eligible type.
|
||||
/// Only hashes `key` itself, pointers are not followed.
|
||||
/// Slices are rejected to avoid ambiguity on the user's intention.
|
||||
pub fn autoHash(hasher: var, key: var) void {
|
||||
const Key = @typeOf(key);
|
||||
if (comptime meta.trait.isSlice(Key))
|
||||
@compileError("std.auto_hash.autoHash does not allow slices (here " ++ @typeName(Key) ++ " because the intent is unclear. Consider using std.auto_hash.hash or providing your own hash function instead.");
|
||||
|
||||
hash(hasher, key, .Shallow);
|
||||
}
|
||||
|
||||
const testing = std.testing;
|
||||
const Wyhash = std.hash.Wyhash;
|
||||
|
||||
fn testAutoHash(key: var) u64 {
|
||||
fn testHash(key: var) u64 {
|
||||
// Any hash could be used here, for testing autoHash.
|
||||
var hasher = Wyhash.init(0);
|
||||
autoHash(&hasher, key);
|
||||
hash(&hasher, key, .Shallow);
|
||||
return hasher.final();
|
||||
}
|
||||
|
||||
test "autoHash slice" {
|
||||
fn testHashShallow(key: var) u64 {
|
||||
// Any hash could be used here, for testing autoHash.
|
||||
var hasher = Wyhash.init(0);
|
||||
hash(&hasher, key, .Shallow);
|
||||
return hasher.final();
|
||||
}
|
||||
|
||||
fn testHashDeep(key: var) u64 {
|
||||
// Any hash could be used here, for testing autoHash.
|
||||
var hasher = Wyhash.init(0);
|
||||
hash(&hasher, key, .Deep);
|
||||
return hasher.final();
|
||||
}
|
||||
|
||||
fn testHashDeepRecursive(key: var) u64 {
|
||||
// Any hash could be used here, for testing autoHash.
|
||||
var hasher = Wyhash.init(0);
|
||||
hash(&hasher, key, .DeepRecursive);
|
||||
return hasher.final();
|
||||
}
|
||||
|
||||
test "hash pointer" {
|
||||
const array = [_]u32{ 123, 123, 123 };
|
||||
const a = &array[0];
|
||||
const b = &array[1];
|
||||
const c = &array[2];
|
||||
const d = a;
|
||||
|
||||
testing.expect(testHashShallow(a) == testHashShallow(d));
|
||||
testing.expect(testHashShallow(a) != testHashShallow(c));
|
||||
testing.expect(testHashShallow(a) != testHashShallow(b));
|
||||
|
||||
testing.expect(testHashDeep(a) == testHashDeep(a));
|
||||
testing.expect(testHashDeep(a) == testHashDeep(c));
|
||||
testing.expect(testHashDeep(a) == testHashDeep(b));
|
||||
|
||||
testing.expect(testHashDeepRecursive(a) == testHashDeepRecursive(a));
|
||||
testing.expect(testHashDeepRecursive(a) == testHashDeepRecursive(c));
|
||||
testing.expect(testHashDeepRecursive(a) == testHashDeepRecursive(b));
|
||||
}
|
||||
|
||||
test "hash slice shallow" {
|
||||
// Allocate one array dynamically so that we're assured it is not merged
|
||||
// with the other by the optimization passes.
|
||||
const array1 = try std.heap.direct_allocator.create([6]u32);
|
||||
@ -130,23 +232,78 @@ test "autoHash slice" {
|
||||
const a = array1[0..];
|
||||
const b = array2[0..];
|
||||
const c = array1[0..3];
|
||||
testing.expect(testAutoHash(a) == testAutoHash(a));
|
||||
testing.expect(testAutoHash(a) != testAutoHash(array1));
|
||||
testing.expect(testAutoHash(a) != testAutoHash(b));
|
||||
testing.expect(testAutoHash(a) != testAutoHash(c));
|
||||
testing.expect(testHashShallow(a) == testHashShallow(a));
|
||||
testing.expect(testHashShallow(a) != testHashShallow(array1));
|
||||
testing.expect(testHashShallow(a) != testHashShallow(b));
|
||||
testing.expect(testHashShallow(a) != testHashShallow(c));
|
||||
}
|
||||
|
||||
test "testAutoHash optional" {
|
||||
test "hash slice deep" {
|
||||
// Allocate one array dynamically so that we're assured it is not merged
|
||||
// with the other by the optimization passes.
|
||||
const array1 = try std.heap.direct_allocator.create([6]u32);
|
||||
defer std.heap.direct_allocator.destroy(array1);
|
||||
array1.* = [_]u32{ 1, 2, 3, 4, 5, 6 };
|
||||
const array2 = [_]u32{ 1, 2, 3, 4, 5, 6 };
|
||||
const a = array1[0..];
|
||||
const b = array2[0..];
|
||||
const c = array1[0..3];
|
||||
testing.expect(testHashDeep(a) == testHashDeep(a));
|
||||
testing.expect(testHashDeep(a) == testHashDeep(array1));
|
||||
testing.expect(testHashDeep(a) == testHashDeep(b));
|
||||
testing.expect(testHashDeep(a) != testHashDeep(c));
|
||||
}
|
||||
|
||||
test "hash struct deep" {
|
||||
const Foo = struct {
|
||||
a: u32,
|
||||
b: f64,
|
||||
c: *bool,
|
||||
|
||||
const Self = @This();
|
||||
|
||||
pub fn init(allocator: *mem.Allocator, a_: u32, b_: f64, c_: bool) !Self {
|
||||
const ptr = try allocator.create(bool);
|
||||
ptr.* = c_;
|
||||
return Self{ .a = a_, .b = b_, .c = ptr };
|
||||
}
|
||||
};
|
||||
|
||||
const allocator = std.heap.direct_allocator;
|
||||
const foo = try Foo.init(allocator, 123, 1.0, true);
|
||||
const bar = try Foo.init(allocator, 123, 1.0, true);
|
||||
const baz = try Foo.init(allocator, 123, 1.0, false);
|
||||
defer allocator.destroy(foo.c);
|
||||
defer allocator.destroy(bar.c);
|
||||
defer allocator.destroy(baz.c);
|
||||
|
||||
testing.expect(testHashDeep(foo) == testHashDeep(bar));
|
||||
testing.expect(testHashDeep(foo) != testHashDeep(baz));
|
||||
testing.expect(testHashDeep(bar) != testHashDeep(baz));
|
||||
|
||||
var hasher = Wyhash.init(0);
|
||||
const h = testHashDeep(foo);
|
||||
autoHash(&hasher, foo.a);
|
||||
autoHash(&hasher, foo.b);
|
||||
autoHash(&hasher, foo.c.*);
|
||||
testing.expectEqual(h, hasher.final());
|
||||
|
||||
const h2 = testHashDeepRecursive(&foo);
|
||||
testing.expect(h2 != testHashDeep(&foo));
|
||||
testing.expect(h2 == testHashDeep(foo));
|
||||
}
|
||||
|
||||
test "testHash optional" {
|
||||
const a: ?u32 = 123;
|
||||
const b: ?u32 = null;
|
||||
testing.expectEqual(testAutoHash(a), testAutoHash(u32(123)));
|
||||
testing.expect(testAutoHash(a) != testAutoHash(b));
|
||||
testing.expectEqual(testAutoHash(b), 0);
|
||||
testing.expectEqual(testHash(a), testHash(u32(123)));
|
||||
testing.expect(testHash(a) != testHash(b));
|
||||
testing.expectEqual(testHash(b), 0);
|
||||
}
|
||||
|
||||
test "testAutoHash array" {
|
||||
test "testHash array" {
|
||||
const a = [_]u32{ 1, 2, 3 };
|
||||
const h = testAutoHash(a);
|
||||
const h = testHash(a);
|
||||
var hasher = Wyhash.init(0);
|
||||
autoHash(&hasher, u32(1));
|
||||
autoHash(&hasher, u32(2));
|
||||
@ -154,14 +311,14 @@ test "testAutoHash array" {
|
||||
testing.expectEqual(h, hasher.final());
|
||||
}
|
||||
|
||||
test "testAutoHash struct" {
|
||||
test "testHash struct" {
|
||||
const Foo = struct {
|
||||
a: u32 = 1,
|
||||
b: u32 = 2,
|
||||
c: u32 = 3,
|
||||
};
|
||||
const f = Foo{};
|
||||
const h = testAutoHash(f);
|
||||
const h = testHash(f);
|
||||
var hasher = Wyhash.init(0);
|
||||
autoHash(&hasher, u32(1));
|
||||
autoHash(&hasher, u32(2));
|
||||
@ -169,7 +326,7 @@ test "testAutoHash struct" {
|
||||
testing.expectEqual(h, hasher.final());
|
||||
}
|
||||
|
||||
test "testAutoHash union" {
|
||||
test "testHash union" {
|
||||
const Foo = union(enum) {
|
||||
A: u32,
|
||||
B: f32,
|
||||
@ -179,24 +336,24 @@ test "testAutoHash union" {
|
||||
const a = Foo{ .A = 18 };
|
||||
var b = Foo{ .B = 12.34 };
|
||||
const c = Foo{ .C = 18 };
|
||||
testing.expect(testAutoHash(a) == testAutoHash(a));
|
||||
testing.expect(testAutoHash(a) != testAutoHash(b));
|
||||
testing.expect(testAutoHash(a) != testAutoHash(c));
|
||||
testing.expect(testHash(a) == testHash(a));
|
||||
testing.expect(testHash(a) != testHash(b));
|
||||
testing.expect(testHash(a) != testHash(c));
|
||||
|
||||
b = Foo{ .A = 18 };
|
||||
testing.expect(testAutoHash(a) == testAutoHash(b));
|
||||
testing.expect(testHash(a) == testHash(b));
|
||||
}
|
||||
|
||||
test "testAutoHash vector" {
|
||||
test "testHash vector" {
|
||||
const a: @Vector(4, u32) = [_]u32{ 1, 2, 3, 4 };
|
||||
const b: @Vector(4, u32) = [_]u32{ 1, 2, 3, 5 };
|
||||
const c: @Vector(4, u31) = [_]u31{ 1, 2, 3, 4 };
|
||||
testing.expect(testAutoHash(a) == testAutoHash(a));
|
||||
testing.expect(testAutoHash(a) != testAutoHash(b));
|
||||
testing.expect(testAutoHash(a) != testAutoHash(c));
|
||||
testing.expect(testHash(a) == testHash(a));
|
||||
testing.expect(testHash(a) != testHash(b));
|
||||
testing.expect(testHash(a) != testHash(c));
|
||||
}
|
||||
|
||||
test "testAutoHash error union" {
|
||||
test "testHash error union" {
|
||||
const Errors = error{Test};
|
||||
const Foo = struct {
|
||||
a: u32 = 1,
|
||||
@ -205,7 +362,7 @@ test "testAutoHash error union" {
|
||||
};
|
||||
const f = Foo{};
|
||||
const g: Errors!Foo = Errors.Test;
|
||||
testing.expect(testAutoHash(f) != testAutoHash(g));
|
||||
testing.expect(testAutoHash(f) == testAutoHash(Foo{}));
|
||||
testing.expect(testAutoHash(g) == testAutoHash(Errors.Test));
|
||||
testing.expect(testHash(f) != testHash(g));
|
||||
testing.expect(testHash(f) == testHash(Foo{}));
|
||||
testing.expect(testHash(g) == testHash(Errors.Test));
|
||||
}
|
||||
|
||||
@ -86,7 +86,7 @@ const Result = struct {
|
||||
throughput: u64,
|
||||
};
|
||||
|
||||
const block_size: usize = 8192;
|
||||
const block_size: usize = 8 * 8192;
|
||||
|
||||
pub fn benchmarkHash(comptime H: var, bytes: usize) !Result {
|
||||
var h = blk: {
|
||||
|
||||
@ -10,7 +10,8 @@ const primes = [_]u64{
|
||||
};
|
||||
|
||||
fn read_bytes(comptime bytes: u8, data: []const u8) u64 {
|
||||
return mem.readVarInt(u64, data[0..bytes], .Little);
|
||||
const T = @IntType(false, 8 * bytes);
|
||||
return mem.readIntSliceLittle(T, data[0..bytes]);
|
||||
}
|
||||
|
||||
fn read_8bytes_swapped(data: []const u8) u64 {
|
||||
@ -31,18 +32,21 @@ fn mix1(a: u64, b: u64, seed: u64) u64 {
|
||||
return mum(a ^ seed ^ primes[2], b ^ seed ^ primes[3]);
|
||||
}
|
||||
|
||||
pub const Wyhash = struct {
|
||||
// Wyhash version which does not store internal state for handling partial buffers.
|
||||
// This is needed so that we can maximize the speed for the short key case, which will
|
||||
// use the non-iterative api which the public Wyhash exposes.
|
||||
const WyhashStateless = struct {
|
||||
seed: u64,
|
||||
msg_len: usize,
|
||||
|
||||
pub fn init(seed: u64) Wyhash {
|
||||
return Wyhash{
|
||||
pub fn init(seed: u64) WyhashStateless {
|
||||
return WyhashStateless{
|
||||
.seed = seed,
|
||||
.msg_len = 0,
|
||||
};
|
||||
}
|
||||
|
||||
fn round(self: *Wyhash, b: []const u8) void {
|
||||
fn round(self: *WyhashStateless, b: []const u8) void {
|
||||
std.debug.assert(b.len == 32);
|
||||
|
||||
self.seed = mix0(
|
||||
@ -56,12 +60,25 @@ pub const Wyhash = struct {
|
||||
);
|
||||
}
|
||||
|
||||
fn partial(self: *Wyhash, b: []const u8) void {
|
||||
const rem_key = b;
|
||||
const rem_len = b.len;
|
||||
pub fn update(self: *WyhashStateless, b: []const u8) void {
|
||||
std.debug.assert(b.len % 32 == 0);
|
||||
|
||||
var seed = self.seed;
|
||||
seed = switch (@intCast(u5, rem_len)) {
|
||||
var off: usize = 0;
|
||||
while (off < b.len) : (off += 32) {
|
||||
@inlineCall(self.round, b[off .. off + 32]);
|
||||
}
|
||||
|
||||
self.msg_len += b.len;
|
||||
}
|
||||
|
||||
pub fn final(self: *WyhashStateless, b: []const u8) u64 {
|
||||
std.debug.assert(b.len < 32);
|
||||
|
||||
const seed = self.seed;
|
||||
const rem_len = @intCast(u5, b.len);
|
||||
const rem_key = b[0..rem_len];
|
||||
|
||||
self.seed = switch (rem_len) {
|
||||
0 => seed,
|
||||
1 => mix0(read_bytes(1, rem_key), primes[4], seed),
|
||||
2 => mix0(read_bytes(2, rem_key), primes[4], seed),
|
||||
@ -95,34 +112,70 @@ pub const Wyhash = struct {
|
||||
30 => mix0(read_8bytes_swapped(rem_key), read_8bytes_swapped(rem_key[8..]), seed) ^ mix1(read_8bytes_swapped(rem_key[16..]), (read_bytes(4, rem_key[24..]) << 16) | read_bytes(2, rem_key[28..]), seed),
|
||||
31 => mix0(read_8bytes_swapped(rem_key), read_8bytes_swapped(rem_key[8..]), seed) ^ mix1(read_8bytes_swapped(rem_key[16..]), (read_bytes(4, rem_key[24..]) << 24) | (read_bytes(2, rem_key[28..]) << 8) | read_bytes(1, rem_key[30..]), seed),
|
||||
};
|
||||
self.seed = seed;
|
||||
|
||||
self.msg_len += b.len;
|
||||
return mum(self.seed ^ self.msg_len, primes[4]);
|
||||
}
|
||||
|
||||
pub fn hash(seed: u64, input: []const u8) u64 {
|
||||
const aligned_len = input.len - (input.len % 32);
|
||||
|
||||
var c = WyhashStateless.init(seed);
|
||||
@inlineCall(c.update, input[0..aligned_len]);
|
||||
return @inlineCall(c.final, input[aligned_len..]);
|
||||
}
|
||||
};
|
||||
|
||||
/// Fast non-cryptographic 64bit hash function.
|
||||
/// See https://github.com/wangyi-fudan/wyhash
|
||||
pub const Wyhash = struct {
|
||||
state: WyhashStateless,
|
||||
|
||||
buf: [32]u8,
|
||||
buf_len: usize,
|
||||
|
||||
pub fn init(seed: u64) Wyhash {
|
||||
return Wyhash{
|
||||
.state = WyhashStateless.init(seed),
|
||||
.buf = undefined,
|
||||
.buf_len = 0,
|
||||
};
|
||||
}
|
||||
|
||||
pub fn update(self: *Wyhash, b: []const u8) void {
|
||||
var off: usize = 0;
|
||||
|
||||
// Full middle blocks.
|
||||
while (off + 32 <= b.len) : (off += 32) {
|
||||
@inlineCall(self.round, b[off .. off + 32]);
|
||||
if (self.buf_len != 0 and self.buf_len + b.len >= 32) {
|
||||
off += 32 - self.buf_len;
|
||||
mem.copy(u8, self.buf[self.buf_len..], b[0..off]);
|
||||
self.state.update(self.buf[0..]);
|
||||
self.buf_len = 0;
|
||||
}
|
||||
|
||||
self.partial(b[off..]);
|
||||
self.msg_len += b.len;
|
||||
const remain_len = b.len - off;
|
||||
const aligned_len = remain_len - (remain_len % 32);
|
||||
self.state.update(b[off .. off + aligned_len]);
|
||||
|
||||
mem.copy(u8, self.buf[self.buf_len..], b[off + aligned_len ..]);
|
||||
self.buf_len += @intCast(u8, b[off + aligned_len ..].len);
|
||||
}
|
||||
|
||||
pub fn final(self: *Wyhash) u64 {
|
||||
return mum(self.seed ^ self.msg_len, primes[4]);
|
||||
const seed = self.state.seed;
|
||||
const rem_len = @intCast(u5, self.buf_len);
|
||||
const rem_key = self.buf[0..self.buf_len];
|
||||
|
||||
return self.state.final(rem_key);
|
||||
}
|
||||
|
||||
pub fn hash(seed: u64, input: []const u8) u64 {
|
||||
var c = Wyhash.init(seed);
|
||||
@inlineCall(c.update, input);
|
||||
return @inlineCall(c.final);
|
||||
return WyhashStateless.hash(seed, input);
|
||||
}
|
||||
};
|
||||
|
||||
const expectEqual = std.testing.expectEqual;
|
||||
|
||||
test "test vectors" {
|
||||
const expectEqual = std.testing.expectEqual;
|
||||
const hash = Wyhash.hash;
|
||||
|
||||
expectEqual(hash(0, ""), 0x0);
|
||||
@ -133,3 +186,46 @@ test "test vectors" {
|
||||
expectEqual(hash(5, "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789"), 0x602a1894d3bbfe7f);
|
||||
expectEqual(hash(6, "12345678901234567890123456789012345678901234567890123456789012345678901234567890"), 0x829e9c148b75970e);
|
||||
}
|
||||
|
||||
test "test vectors streaming" {
|
||||
var wh = Wyhash.init(5);
|
||||
for ("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789") |e| {
|
||||
wh.update(mem.asBytes(&e));
|
||||
}
|
||||
expectEqual(wh.final(), 0x602a1894d3bbfe7f);
|
||||
|
||||
const pattern = "1234567890";
|
||||
const count = 8;
|
||||
const result = 0x829e9c148b75970e;
|
||||
expectEqual(Wyhash.hash(6, pattern ** 8), result);
|
||||
|
||||
wh = Wyhash.init(6);
|
||||
var i: u32 = 0;
|
||||
while (i < count) : (i += 1) {
|
||||
wh.update(pattern);
|
||||
}
|
||||
expectEqual(wh.final(), result);
|
||||
}
|
||||
|
||||
test "iterative non-divisible update" {
|
||||
var buf: [8192]u8 = undefined;
|
||||
for (buf) |*e, i| {
|
||||
e.* = @truncate(u8, i);
|
||||
}
|
||||
|
||||
const seed = 0x128dad08f;
|
||||
|
||||
var end: usize = 32;
|
||||
while (end < buf.len) : (end += 32) {
|
||||
const non_iterative_hash = Wyhash.hash(seed, buf[0..end]);
|
||||
|
||||
var wy = Wyhash.init(seed);
|
||||
var i: usize = 0;
|
||||
while (i < end) : (i += 33) {
|
||||
wy.update(buf[i..std.math.min(i + 33, end)]);
|
||||
}
|
||||
const iterative_hash = wy.final();
|
||||
|
||||
std.testing.expectEqual(iterative_hash, non_iterative_hash);
|
||||
}
|
||||
}
|
||||
|
||||
@ -17,6 +17,21 @@ pub fn AutoHashMap(comptime K: type, comptime V: type) type {
|
||||
return HashMap(K, V, getAutoHashFn(K), getAutoEqlFn(K));
|
||||
}
|
||||
|
||||
/// Builtin hashmap for strings as keys.
|
||||
pub fn StringHashMap(comptime V: type) type {
|
||||
return HashMap([]const u8, V, hashString, eqlString);
|
||||
}
|
||||
|
||||
pub fn eqlString(a: []const u8, b: []const u8) bool {
|
||||
if (a.len != b.len) return false;
|
||||
if (a.ptr == b.ptr) return true;
|
||||
return mem.compare(u8, a, b) == .Equal;
|
||||
}
|
||||
|
||||
pub fn hashString(s: []const u8) u32 {
|
||||
return @truncate(u32, std.hash.Wyhash.hash(0, s));
|
||||
}
|
||||
|
||||
pub fn HashMap(comptime K: type, comptime V: type, comptime hash: fn (key: K) u32, comptime eql: fn (a: K, b: K) bool) type {
|
||||
return struct {
|
||||
entries: []Entry,
|
||||
|
||||
@ -102,19 +102,9 @@ test "HeaderEntry" {
|
||||
testing.expectEqualSlices(u8, "x", e.value);
|
||||
}
|
||||
|
||||
fn stringEql(a: []const u8, b: []const u8) bool {
|
||||
if (a.len != b.len) return false;
|
||||
if (a.ptr == b.ptr) return true;
|
||||
return mem.compare(u8, a, b) == .Equal;
|
||||
}
|
||||
|
||||
fn stringHash(s: []const u8) u32 {
|
||||
return @truncate(u32, std.hash.Wyhash.hash(0, s));
|
||||
}
|
||||
|
||||
const HeaderList = std.ArrayList(HeaderEntry);
|
||||
const HeaderIndexList = std.ArrayList(usize);
|
||||
const HeaderIndex = std.HashMap([]const u8, HeaderIndexList, stringHash, stringEql);
|
||||
const HeaderIndex = std.StringHashMap(HeaderIndexList);
|
||||
|
||||
pub const Headers = struct {
|
||||
// the owned header field name is stored in the index as part of the key
|
||||
|
||||
@ -17,6 +17,7 @@ pub const SinglyLinkedList = @import("linked_list.zig").SinglyLinkedList;
|
||||
pub const StaticallyInitializedMutex = @import("statically_initialized_mutex.zig").StaticallyInitializedMutex;
|
||||
pub const SegmentedList = @import("segmented_list.zig").SegmentedList;
|
||||
pub const SpinLock = @import("spinlock.zig").SpinLock;
|
||||
pub const StringHashMap = @import("hash_map.zig").StringHashMap;
|
||||
pub const ChildProcess = @import("child_process.zig").ChildProcess;
|
||||
pub const TailQueue = @import("linked_list.zig").TailQueue;
|
||||
pub const Thread = @import("thread.zig").Thread;
|
||||
|
||||
@ -504,12 +504,9 @@ const Contents = struct {
|
||||
}
|
||||
};
|
||||
|
||||
comptime {
|
||||
@compileError("the behavior of std.AutoHashMap changed and []const u8 will be treated as a pointer. will need to update the hash maps to actually do some kind of hashing on the slices.");
|
||||
}
|
||||
const HashToContents = std.AutoHashMap([]const u8, Contents);
|
||||
const HashToContents = std.StringHashMap(Contents);
|
||||
const TargetToHash = std.HashMap(DestTarget, []const u8, DestTarget.hash, DestTarget.eql);
|
||||
const PathTable = std.AutoHashMap([]const u8, *TargetToHash);
|
||||
const PathTable = std.StringHashMap(*TargetToHash);
|
||||
|
||||
const LibCVendor = enum {
|
||||
musl,
|
||||
|
||||
@ -118,7 +118,7 @@ const FunctionSet = struct {
|
||||
list: std.ArrayList(VersionedFn),
|
||||
fn_vers_list: FnVersionList,
|
||||
};
|
||||
const FnVersionList = std.AutoHashMap([]const u8, std.ArrayList(usize));
|
||||
const FnVersionList = std.StringHashMap(std.ArrayList(usize));
|
||||
|
||||
const VersionedFn = struct {
|
||||
ver: []const u8, // example: "GLIBC_2.15"
|
||||
@ -140,8 +140,8 @@ pub fn main() !void {
|
||||
const prefix = try fs.path.join(allocator, [_][]const u8{ in_glibc_dir, "sysdeps", "unix", "sysv", "linux" });
|
||||
const glibc_out_dir = try fs.path.join(allocator, [_][]const u8{ zig_src_dir, "libc", "glibc" });
|
||||
|
||||
var global_fn_set = std.AutoHashMap([]const u8, Function).init(allocator);
|
||||
var global_ver_set = std.AutoHashMap([]const u8, usize).init(allocator);
|
||||
var global_fn_set = std.StringHashMap(Function).init(allocator);
|
||||
var global_ver_set = std.StringHashMap(usize).init(allocator);
|
||||
var target_functions = std.AutoHashMap(usize, FunctionSet).init(allocator);
|
||||
|
||||
for (abi_lists) |*abi_list| {
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user