From 7ae1b3a6b348a24f07bacb6e19f1aa6d7b3ba32d Mon Sep 17 00:00:00 2001 From: Sahnvour Date: Sun, 26 Jul 2020 22:01:33 +0200 Subject: [PATCH 1/3] add trait hasUniqueRepresentation --- lib/std/meta/trait.zig | 68 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 68 insertions(+) diff --git a/lib/std/meta/trait.zig b/lib/std/meta/trait.zig index 5cea0ecb9a..7414edac4f 100644 --- a/lib/std/meta/trait.zig +++ b/lib/std/meta/trait.zig @@ -416,3 +416,71 @@ test "std.meta.trait.hasFunctions" { testing.expect(!hasFunctions(TestStruct2, .{ "a", "b", "c" })); testing.expect(!hasFunctions(TestStruct2, tuple)); } + +/// True if every value of the type `T` has a unique bit pattern representing it. +/// In other words, `T` has no unused bits and no padding. +pub fn hasUniqueRepresentation(comptime T: type) bool { + switch (@typeInfo(T)) { + else => return false, // TODO can we know if it's true for some of these types ? + + .AnyFrame, + .Bool, + .BoundFn, + .Enum, + .ErrorSet, + .Fn, + .Int, // TODO check that it is still true + .Pointer, + => return true, + + .Array => |info| return comptime hasUniqueRepresentation(info.child), + + .Struct => |info| { + var sum_size = @as(usize, 0); + + inline for (info.fields) |field| { + const FieldType = field.field_type; + if (comptime !hasUniqueRepresentation(FieldType)) return false; + sum_size += @sizeOf(FieldType); + } + + return @sizeOf(T) == sum_size; + }, + + .Vector => |info| return comptime hasUniqueRepresentation(info.child), + } +} + +test "std.meta.trait.hasUniqueRepresentation" { + const TestStruct1 = struct { + a: u32, + b: u32, + }; + + testing.expect(hasUniqueRepresentation(TestStruct1)); + + const TestStruct2 = struct { + a: u32, + b: u16, + }; + + testing.expect(!hasUniqueRepresentation(TestStruct2)); + + const TestStruct3 = struct { + a: u32, + b: u32, + }; + + testing.expect(hasUniqueRepresentation(TestStruct3)); + + testing.expect(hasUniqueRepresentation(i1)); + testing.expect(hasUniqueRepresentation(u2)); + testing.expect(hasUniqueRepresentation(i3)); + testing.expect(hasUniqueRepresentation(u4)); + testing.expect(hasUniqueRepresentation(i5)); + testing.expect(hasUniqueRepresentation(u6)); + testing.expect(hasUniqueRepresentation(i7)); + testing.expect(hasUniqueRepresentation(u8)); + testing.expect(hasUniqueRepresentation(i9)); + testing.expect(hasUniqueRepresentation(u10)); +} From 345cb3200c353d6fb7aeb0e058986d8ca59ced1e Mon Sep 17 00:00:00 2001 From: Sahnvour Date: Sun, 26 Jul 2020 22:04:10 +0200 Subject: [PATCH 2/3] improve autoHash type switch floats shouldn't be autoHash'd as they have multiple representations for some values, preventing it by default is safer --- lib/std/hash/auto_hash.zig | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/lib/std/hash/auto_hash.zig b/lib/std/hash/auto_hash.zig index a3e1a390c2..aaa68ca8fd 100644 --- a/lib/std/hash/auto_hash.zig +++ b/lib/std/hash/auto_hash.zig @@ -81,24 +81,22 @@ pub fn hash(hasher: anytype, key: anytype, comptime strat: HashStrategy) void { .Undefined, .Void, .Null, - .BoundFn, .ComptimeFloat, .ComptimeInt, .Type, .EnumLiteral, .Frame, + .Float, => @compileError("cannot hash this type"), // Help the optimizer see that hashing an int is easy by inlining! // TODO Check if the situation is better after #561 is resolved. .Int => @call(.{ .modifier = .always_inline }, hasher.update, .{std.mem.asBytes(&key)}), - .Float => |info| hash(hasher, @bitCast(std.meta.Int(false, info.bits), key), strat), - .Bool => hash(hasher, @boolToInt(key), strat), .Enum => hash(hasher, @enumToInt(key), strat), .ErrorSet => hash(hasher, @errorToInt(key), strat), - .AnyFrame, .Fn => hash(hasher, @ptrToInt(key), strat), + .AnyFrame, .BoundFn, .Fn => hash(hasher, @ptrToInt(key), strat), .Pointer => @call(.{ .modifier = .always_inline }, hashPointer, .{ hasher, key, strat }), @@ -266,12 +264,12 @@ test "hash slice deep" { test "hash struct deep" { const Foo = struct { a: u32, - b: f64, + b: u16, c: *bool, const Self = @This(); - pub fn init(allocator: *mem.Allocator, a_: u32, b_: f64, c_: bool) !Self { + pub fn init(allocator: *mem.Allocator, a_: u32, b_: u16, c_: bool) !Self { const ptr = try allocator.create(bool); ptr.* = c_; return Self{ .a = a_, .b = b_, .c = ptr }; @@ -279,9 +277,9 @@ test "hash struct deep" { }; const allocator = std.testing.allocator; - const foo = try Foo.init(allocator, 123, 1.0, true); - const bar = try Foo.init(allocator, 123, 1.0, true); - const baz = try Foo.init(allocator, 123, 1.0, false); + const foo = try Foo.init(allocator, 123, 10, true); + const bar = try Foo.init(allocator, 123, 10, true); + const baz = try Foo.init(allocator, 123, 10, false); defer allocator.destroy(foo.c); defer allocator.destroy(bar.c); defer allocator.destroy(baz.c); @@ -338,12 +336,12 @@ test "testHash struct" { test "testHash union" { const Foo = union(enum) { A: u32, - B: f32, + B: bool, C: u32, }; const a = Foo{ .A = 18 }; - var b = Foo{ .B = 12.34 }; + var b = Foo{ .B = true }; const c = Foo{ .C = 18 }; testing.expect(testHash(a) == testHash(a)); testing.expect(testHash(a) != testHash(b)); From f67ce1e35fe3ecf19b50f64b9fe2d85747f7934d Mon Sep 17 00:00:00 2001 From: Sahnvour Date: Sun, 26 Jul 2020 22:08:48 +0200 Subject: [PATCH 3/3] make use of hasUniqueRepresentation to speed up hashing facilities, fastpath in getAutoHashFn is particularly important for hashmap performance gives a 1.18x speedup on gotta-go-fast hashmap bench --- lib/std/hash/auto_hash.zig | 12 ++++++------ lib/std/hash_map.zig | 11 ++++++++--- 2 files changed, 14 insertions(+), 9 deletions(-) diff --git a/lib/std/hash/auto_hash.zig b/lib/std/hash/auto_hash.zig index aaa68ca8fd..85f8e4b0d2 100644 --- a/lib/std/hash/auto_hash.zig +++ b/lib/std/hash/auto_hash.zig @@ -56,9 +56,6 @@ pub fn hashPointer(hasher: anytype, key: anytype, comptime strat: HashStrategy) pub fn hashArray(hasher: anytype, key: anytype, comptime strat: HashStrategy) void { switch (strat) { .Shallow => { - // TODO detect via a trait when Key has no padding bits to - // hash it as an array of bytes. - // Otherwise, hash every element. for (key) |element| { hash(hasher, element, .Shallow); } @@ -75,6 +72,12 @@ pub fn hashArray(hasher: anytype, key: anytype, comptime strat: HashStrategy) vo /// Strategy is provided to determine if pointers should be followed or not. pub fn hash(hasher: anytype, key: anytype, comptime strat: HashStrategy) void { const Key = @TypeOf(key); + + if (strat == .Shallow and comptime meta.trait.hasUniqueRepresentation(Key)) { + @call(.{ .modifier = .always_inline }, hasher.update, .{mem.asBytes(&key)}); + return; + } + switch (@typeInfo(Key)) { .NoReturn, .Opaque, @@ -119,9 +122,6 @@ pub fn hash(hasher: anytype, key: anytype, comptime strat: HashStrategy) void { }, .Struct => |info| { - // TODO detect via a trait when Key has no padding bits to - // hash it as an array of bytes. - // Otherwise, hash every field. inline for (info.fields) |field| { // We reuse the hash of the previous field as the seed for the // next one so that they're dependant. diff --git a/lib/std/hash_map.zig b/lib/std/hash_map.zig index 3952ecb4b2..c81def3a00 100644 --- a/lib/std/hash_map.zig +++ b/lib/std/hash_map.zig @@ -5,6 +5,7 @@ const testing = std.testing; const math = std.math; const mem = std.mem; const meta = std.meta; +const trait = meta.trait; const autoHash = std.hash.autoHash; const Wyhash = std.hash.Wyhash; const Allocator = mem.Allocator; @@ -1023,9 +1024,13 @@ pub fn getTrivialEqlFn(comptime K: type) (fn (K, K) bool) { pub fn getAutoHashFn(comptime K: type) (fn (K) u32) { return struct { fn hash(key: K) u32 { - var hasher = Wyhash.init(0); - autoHash(&hasher, key); - return @truncate(u32, hasher.final()); + if (comptime trait.hasUniqueRepresentation(K)) { + return @truncate(u32, Wyhash.hash(0, std.mem.asBytes(&key))); + } else { + var hasher = Wyhash.init(0); + autoHash(&hasher, key); + return @truncate(u32, hasher.final()); + } } }.hash; }