Merge pull request #17046 from tiehuis/improve-hash-tests

improve std/hash test coverage
This commit is contained in:
Andrew Kelley 2023-09-13 18:22:56 -04:00 committed by GitHub
commit 0e2f002a7b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
10 changed files with 202 additions and 112 deletions

View File

@ -3,7 +3,7 @@
// https://tools.ietf.org/html/rfc1950#section-9
// https://github.com/madler/zlib/blob/master/adler32.c
const std = @import("../std.zig");
const std = @import("std");
const testing = std.testing;
pub const Adler32 = struct {
@ -126,3 +126,9 @@ test "adler32 very long with variation" {
try testing.expectEqual(@as(u32, 0x5af38d6e), std.hash.Adler32.hash(long[0..]));
}
const verify = @import("verify.zig");
test "adler32 iterative" {
try verify.iterativeApi(Adler32);
}

View File

@ -342,64 +342,35 @@ pub const CityHash64 = struct {
}
};
fn SMHasherTest(comptime hash_fn: anytype) u32 {
const HashResult = @typeInfo(@TypeOf(hash_fn)).Fn.return_type.?;
var key: [256]u8 = undefined;
var hashes_bytes: [256 * @sizeOf(HashResult)]u8 = undefined;
@memset(&key, 0);
@memset(&hashes_bytes, 0);
var i: u32 = 0;
while (i < 256) : (i += 1) {
key[i] = @as(u8, @intCast(i));
var h: HashResult = hash_fn(key[0..i], 256 - i);
// comptime can't really do reinterpret casting yet,
// so we need to write the bytes manually.
for (hashes_bytes[i * @sizeOf(HashResult) ..][0..@sizeOf(HashResult)]) |*byte| {
byte.* = @as(u8, @truncate(h));
h = h >> 8;
}
}
return @as(u32, @truncate(hash_fn(&hashes_bytes, 0)));
}
fn CityHash32hashIgnoreSeed(str: []const u8, seed: u32) u32 {
_ = seed;
return CityHash32.hash(str);
}
const verify = @import("verify.zig");
test "cityhash32" {
const Test = struct {
fn doTest() !void {
// Note: SMHasher doesn't provide a 32bit version of the algorithm.
// Note: The implementation was verified against the Google Abseil version.
try std.testing.expectEqual(SMHasherTest(CityHash32hashIgnoreSeed), 0x68254F81);
try std.testing.expectEqual(SMHasherTest(CityHash32hashIgnoreSeed), 0x68254F81);
fn do() !void {
// SMHasher doesn't provide a 32bit version of the algorithm.
// The implementation was verified against the Google Abseil version.
try std.testing.expectEqual(verify.smhasher(CityHash32hashIgnoreSeed), 0x68254F81);
}
};
try Test.doTest();
// TODO This is uncommented to prevent OOM on the CI server. Re-enable this test
// case once we ship stage2.
//@setEvalBranchQuota(50000);
//comptime Test.doTest();
try Test.do();
@setEvalBranchQuota(75000);
try comptime Test.do();
}
test "cityhash64" {
const Test = struct {
fn doTest() !void {
// Note: This is not compliant with the SMHasher implementation of CityHash64!
// Note: The implementation was verified against the Google Abseil version.
try std.testing.expectEqual(SMHasherTest(CityHash64.hashWithSeed), 0x5FABC5C5);
fn do() !void {
// This is not compliant with the SMHasher implementation of CityHash64!
// The implementation was verified against the Google Abseil version.
try std.testing.expectEqual(verify.smhasher(CityHash64.hashWithSeed), 0x5FABC5C5);
}
};
try Test.doTest();
// TODO This is uncommented to prevent OOM on the CI server. Re-enable this test
// case once we ship stage2.
//@setEvalBranchQuota(50000);
//comptime Test.doTest();
try Test.do();
@setEvalBranchQuota(75000);
try comptime Test.do();
}

View File

@ -5,7 +5,7 @@
// - Crc32SmallWithPoly uses only 64 bytes of memory but is slower. Be aware that this is
// still moderately fast just slow relative to the slicing approach.
const std = @import("../std.zig");
const std = @import("std");
const builtin = @import("builtin");
const debug = std.debug;
const testing = std.testing;
@ -194,6 +194,8 @@ pub fn Crc32WithPoly(comptime poly: Polynomial) type {
};
}
const verify = @import("verify.zig");
test "crc32 ieee" {
const Crc32Ieee = Crc32WithPoly(.IEEE);
@ -210,6 +212,10 @@ test "crc32 castagnoli" {
try testing.expect(Crc32Castagnoli.hash("abc") == 0x364b3fb7);
}
test "crc32 iterative" {
try verify.iterativeApi(Crc32WithPoly(.IEEE));
}
// half-byte lookup table implementation.
pub fn Crc32SmallWithPoly(comptime poly: Polynomial) type {
return struct {
@ -258,6 +264,10 @@ pub fn Crc32SmallWithPoly(comptime poly: Polynomial) type {
};
}
test "small crc32 iterative" {
try verify.iterativeApi(Crc32SmallWithPoly(.IEEE));
}
test "small crc32 ieee" {
const Crc32Ieee = Crc32SmallWithPoly(.IEEE);

View File

@ -1,6 +1,6 @@
//! This file is auto-generated by tools/update_crc_catalog.zig.
const std = @import("../../std.zig");
const std = @import("std");
const testing = std.testing;
const catalog = @import("catalog.zig");

View File

@ -4,7 +4,7 @@
//
// https://tools.ietf.org/html/draft-eastlake-fnv-14
const std = @import("../std.zig");
const std = @import("std");
const testing = std.testing;
pub const Fnv1a_32 = Fnv1a(u32, 0x01000193, 0x811c9dc5);
@ -40,19 +40,24 @@ fn Fnv1a(comptime T: type, comptime prime: T, comptime offset: T) type {
};
}
const verify = @import("verify.zig");
test "fnv1a-32" {
try testing.expect(Fnv1a_32.hash("") == 0x811c9dc5);
try testing.expect(Fnv1a_32.hash("a") == 0xe40c292c);
try testing.expect(Fnv1a_32.hash("foobar") == 0xbf9cf968);
try verify.iterativeApi(Fnv1a_32);
}
test "fnv1a-64" {
try testing.expect(Fnv1a_64.hash("") == 0xcbf29ce484222325);
try testing.expect(Fnv1a_64.hash("a") == 0xaf63dc4c8601ec8c);
try testing.expect(Fnv1a_64.hash("foobar") == 0x85944171f73967e8);
try verify.iterativeApi(Fnv1a_64);
}
test "fnv1a-128" {
try testing.expect(Fnv1a_128.hash("") == 0x6c62272e07bb014262b821756295c58d);
try testing.expect(Fnv1a_128.hash("a") == 0xd228cb696f1a8caf78912b704e4a8964);
try verify.iterativeApi(Fnv1a_128);
}

View File

@ -279,26 +279,9 @@ pub const Murmur3_32 = struct {
}
};
fn SMHasherTest(comptime hash_fn: anytype, comptime hashbits: u32) u32 {
const hashbytes = hashbits / 8;
var key: [256]u8 = [1]u8{0} ** 256;
var hashes: [hashbytes * 256]u8 = [1]u8{0} ** (hashbytes * 256);
var i: u32 = 0;
while (i < 256) : (i += 1) {
key[i] = @as(u8, @truncate(i));
var h = hash_fn(key[0..i], 256 - i);
if (native_endian == .Big)
h = @byteSwap(h);
@memcpy(hashes[i * hashbytes ..][0..hashbytes], @as([*]u8, @ptrCast(&h)));
}
return @as(u32, @truncate(hash_fn(&hashes, 0)));
}
const verify = @import("verify.zig");
test "murmur2_32" {
try testing.expectEqual(SMHasherTest(Murmur2_32.hashWithSeed, 32), 0x27864C1E);
var v0: u32 = 0x12345678;
var v1: u64 = 0x1234567812345678;
var v0le: u32 = v0;
@ -311,8 +294,18 @@ test "murmur2_32" {
try testing.expectEqual(Murmur2_32.hash(@as([*]u8, @ptrCast(&v1le))[0..8]), Murmur2_32.hashUint64(v1));
}
test "murmur2_32 smhasher" {
const Test = struct {
fn do() !void {
try testing.expectEqual(verify.smhasher(Murmur2_32.hashWithSeed), 0x27864C1E);
}
};
try Test.do();
@setEvalBranchQuota(30000);
try comptime Test.do();
}
test "murmur2_64" {
try std.testing.expectEqual(SMHasherTest(Murmur2_64.hashWithSeed, 64), 0x1F0D3804);
var v0: u32 = 0x12345678;
var v1: u64 = 0x1234567812345678;
var v0le: u32 = v0;
@ -325,8 +318,18 @@ test "murmur2_64" {
try testing.expectEqual(Murmur2_64.hash(@as([*]u8, @ptrCast(&v1le))[0..8]), Murmur2_64.hashUint64(v1));
}
test "mumur2_64 smhasher" {
const Test = struct {
fn do() !void {
try std.testing.expectEqual(verify.smhasher(Murmur2_64.hashWithSeed), 0x1F0D3804);
}
};
try Test.do();
@setEvalBranchQuota(30000);
try comptime Test.do();
}
test "murmur3_32" {
try std.testing.expectEqual(SMHasherTest(Murmur3_32.hashWithSeed, 32), 0xB0F57EE3);
var v0: u32 = 0x12345678;
var v1: u64 = 0x1234567812345678;
var v0le: u32 = v0;
@ -338,3 +341,14 @@ test "murmur3_32" {
try testing.expectEqual(Murmur3_32.hash(@as([*]u8, @ptrCast(&v0le))[0..4]), Murmur3_32.hashUint32(v0));
try testing.expectEqual(Murmur3_32.hash(@as([*]u8, @ptrCast(&v1le))[0..8]), Murmur3_32.hashUint64(v1));
}
test "mumur3_32 smhasher" {
const Test = struct {
fn do() !void {
try std.testing.expectEqual(verify.smhasher(Murmur3_32.hashWithSeed), 0xB0F57EE3);
}
};
try Test.do();
@setEvalBranchQuota(30000);
try comptime Test.do();
}

62
lib/std/hash/verify.zig Normal file
View File

@ -0,0 +1,62 @@
const std = @import("std");
fn hashMaybeSeed(comptime hash_fn: anytype, seed: anytype, buf: []const u8) @typeInfo(@TypeOf(hash_fn)).Fn.return_type.? {
const HashFn = @typeInfo(@TypeOf(hash_fn)).Fn;
if (HashFn.params.len > 1) {
if (@typeInfo(HashFn.params[0].type.?) == .Int) {
return hash_fn(@intCast(seed), buf);
} else {
return hash_fn(buf, @intCast(seed));
}
} else {
return hash_fn(buf);
}
}
fn initMaybeSeed(comptime Hash: anytype, seed: anytype) Hash {
const HashFn = @typeInfo(@TypeOf(Hash.init)).Fn;
if (HashFn.params.len == 1) {
return Hash.init(@intCast(seed));
} else {
return Hash.init();
}
}
// Returns a verification code, the same as used by SMHasher.
//
// Hash keys of the form {0}, {0,1}, {0,1,2}... up to N=255, using 256-N as seed.
// First four-bytes of the hash, interpreted as little-endian is the verification code.
pub fn smhasher(comptime hash_fn: anytype) u32 {
const HashFnTy = @typeInfo(@TypeOf(hash_fn)).Fn;
const HashResult = HashFnTy.return_type.?;
const hash_size = @sizeOf(HashResult);
var buf: [256]u8 = undefined;
var buf_all: [256 * hash_size]u8 = undefined;
for (0..256) |i| {
buf[i] = @intCast(i);
const h = hashMaybeSeed(hash_fn, 256 - i, buf[0..i]);
std.mem.writeIntLittle(HashResult, buf_all[i * hash_size ..][0..hash_size], h);
}
return @truncate(hashMaybeSeed(hash_fn, 0, buf_all[0..]));
}
pub fn iterativeApi(comptime Hash: anytype) !void {
// Sum(1..32) = 528
var buf: [528]u8 = [_]u8{0} ** 528;
var len: usize = 0;
const seed = 0;
var hasher = initMaybeSeed(Hash, seed);
for (1..32) |i| {
const r = hashMaybeSeed(Hash.hash, seed, buf[0 .. len + i]);
hasher.update(buf[len..][0..i]);
const f1 = hasher.final();
const f2 = hasher.final();
if (f1 != f2) return error.IterativeHashWasNotIdempotent;
if (f1 != r) return error.IterativeHashDidNotMatchDirect;
len += i;
}
}

View File

@ -66,7 +66,7 @@ pub const Wyhash = struct {
}
pub fn final(self: *Wyhash) u64 {
var input = self.buf[0..self.buf_len];
var input: []const u8 = self.buf[0..self.buf_len];
var newSelf = self.shallowCopy(); // ensure idempotency
if (self.total_len <= 16) {
@ -196,6 +196,7 @@ pub const Wyhash = struct {
}
};
const verify = @import("verify.zig");
const expectEqual = std.testing.expectEqual;
const TestVector = struct {
@ -229,51 +230,26 @@ test "test vectors at comptime" {
}
}
test "test vectors streaming" {
const step = 5;
for (vectors) |e| {
var wh = Wyhash.init(e.seed);
var i: usize = 0;
while (i < e.input.len) : (i += step) {
const len = if (i + step > e.input.len) e.input.len - i else step;
wh.update(e.input[i..][0..len]);
test "smhasher" {
const Test = struct {
fn do() !void {
try expectEqual(verify.smhasher(Wyhash.hash), 0xBD5E840C);
}
try expectEqual(e.expected, wh.final());
}
};
try Test.do();
@setEvalBranchQuota(50000);
try comptime Test.do();
}
test "test ensure idempotent final call" {
const e: TestVector = .{ .seed = 6, .expected = 0xc39cab13b115aad3, .input = "12345678901234567890123456789012345678901234567890123456789012345678901234567890" };
var wh = Wyhash.init(e.seed);
wh.update(e.input);
for (0..10) |_| {
try expectEqual(e.expected, wh.final());
}
}
test "iterative non-divisible update" {
var buf: [8192]u8 = undefined;
for (&buf, 0..) |*e, i| {
e.* = @as(u8, @truncate(i));
}
const seed = 0x128dad08f;
var end: usize = 32;
while (end < buf.len) : (end += 32) {
const non_iterative_hash = Wyhash.hash(seed, buf[0..end]);
var wy = Wyhash.init(seed);
var i: usize = 0;
while (i < end) : (i += 33) {
wy.update(buf[i..@min(i + 33, end)]);
test "iterative api" {
const Test = struct {
fn do() !void {
try verify.iterativeApi(Wyhash);
}
const iterative_hash = wy.final();
try std.testing.expectEqual(iterative_hash, non_iterative_hash);
}
};
try Test.do();
@setEvalBranchQuota(50000);
try comptime Test.do();
}
test "iterative maintains last sixteen" {

View File

@ -438,6 +438,8 @@ fn validateType(comptime T: type) void {
}
}
const verify = @import("verify.zig");
fn testExpect(comptime H: type, seed: anytype, input: []const u8, expected: u64) !void {
try expectEqual(expected, H.hash(0, input));
@ -457,6 +459,28 @@ test "xxhash64" {
try testExpect(H, 0, "12345678901234567890123456789012345678901234567890123456789012345678901234567890", 0xe04a477f19ee145d);
}
test "xxhash64 smhasher" {
const Test = struct {
fn do() !void {
try expectEqual(verify.smhasher(XxHash64.hash), 0x024B7CF4);
}
};
try Test.do();
@setEvalBranchQuota(75000);
comptime try Test.do();
}
test "xxhash64 iterative api" {
const Test = struct {
fn do() !void {
try verify.iterativeApi(XxHash64);
}
};
try Test.do();
@setEvalBranchQuota(30000);
comptime try Test.do();
}
test "xxhash32" {
const H = XxHash32;
@ -468,3 +492,25 @@ test "xxhash32" {
try testExpect(H, 0, "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789", 0x9c285e64);
try testExpect(H, 0, "12345678901234567890123456789012345678901234567890123456789012345678901234567890", 0x9c05f475);
}
test "xxhash32 smhasher" {
const Test = struct {
fn do() !void {
try expectEqual(verify.smhasher(XxHash32.hash), 0xBA88B743);
}
};
try Test.do();
@setEvalBranchQuota(75000);
comptime try Test.do();
}
test "xxhash32 iterative api" {
const Test = struct {
fn do() !void {
try verify.iterativeApi(XxHash32);
}
};
try Test.do();
@setEvalBranchQuota(30000);
comptime try Test.do();
}

View File

@ -55,7 +55,7 @@ pub fn main() anyerror!void {
try test_writer.writeAll(
\\//! This file is auto-generated by tools/update_crc_catalog.zig.
\\
\\const std = @import("../../std.zig");
\\const std = @import("std");
\\const testing = std.testing;
\\const catalog = @import("catalog.zig");
\\