From 8b7c59a41419b8802af843ac023ba0c6fbfbb83b Mon Sep 17 00:00:00 2001 From: Ryan Liptak Date: Wed, 1 May 2019 23:38:52 -0700 Subject: [PATCH 1/6] std.HashMap: add public ensureCapacity fn --- std/hash_map.zig | 30 ++++++++++++++++++++---------- 1 file changed, 20 insertions(+), 10 deletions(-) diff --git a/std/hash_map.zig b/std/hash_map.zig index 6ea128c9ad..aae31063e5 100644 --- a/std/hash_map.zig +++ b/std/hash_map.zig @@ -118,7 +118,7 @@ pub fn HashMap(comptime K: type, comptime V: type, comptime hash: fn (key: K) u3 }; } self.incrementModificationCount(); - try self.ensureCapacity(); + try self.autoCapacity(); const put_result = self.internalPut(key); assert(put_result.old_kv == null); return GetOrPutResult{ @@ -135,15 +135,15 @@ pub fn HashMap(comptime K: type, comptime V: type, comptime hash: fn (key: K) u3 return res.kv; } - fn ensureCapacity(self: *Self) !void { - if (self.entries.len == 0) { - return self.initCapacity(16); + /// Sets the capacity to the new capacity if the new + /// capacity is greater than the current capacity. + pub fn ensureCapacity(self: *Self, new_capacity: usize) !void { + if (new_capacity <= self.entries.len) { + return; } - - // if we get too full (60%), double the capacity - if (self.size * 5 >= self.entries.len * 3) { - const old_entries = self.entries; - try self.initCapacity(self.entries.len * 2); + const old_entries = self.entries; + try self.initCapacity(new_capacity); + if (old_entries.len > 0) { // dump all of the old elements into the new table for (old_entries) |*old_entry| { if (old_entry.used) { @@ -157,7 +157,7 @@ pub fn HashMap(comptime K: type, comptime V: type, comptime hash: fn (key: K) u3 /// Returns the kv pair that was already there. pub fn put(self: *Self, key: K, value: V) !?KV { self.incrementModificationCount(); - try self.ensureCapacity(); + try self.autoCapacity(); const put_result = self.internalPut(key); put_result.new_entry.kv.value = value; @@ -227,6 +227,16 @@ pub fn HashMap(comptime K: type, comptime V: type, comptime hash: fn (key: K) u3 return other; } + fn autoCapacity(self: *Self) !void { + if (self.entries.len == 0) { + return self.ensureCapacity(16); + } + // if we get too full (60%), double the capacity + if (self.size * 5 >= self.entries.len * 3) { + return self.ensureCapacity(self.entries.len * 2); + } + } + fn initCapacity(hm: *Self, capacity: usize) !void { hm.entries = try hm.allocator.alloc(Entry, capacity); hm.size = 0; From 0afa2d040a6f51b5423269cb588f4fa483e8cfba Mon Sep 17 00:00:00 2001 From: Ryan Liptak Date: Thu, 2 May 2019 00:58:26 -0700 Subject: [PATCH 2/6] make std.HashMap.ensureCapacity round up to the nearest power of two --- std/hash_map.zig | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/std/hash_map.zig b/std/hash_map.zig index aae31063e5..c8fbd58065 100644 --- a/std/hash_map.zig +++ b/std/hash_map.zig @@ -141,8 +141,15 @@ pub fn HashMap(comptime K: type, comptime V: type, comptime hash: fn (key: K) u3 if (new_capacity <= self.entries.len) { return; } + // make sure capacity is a power of two + var capacity = new_capacity; + const is_power_of_two = capacity & (capacity-1) == 0; + if (!is_power_of_two) { + const pow = math.log2_int_ceil(usize, capacity); + capacity = math.pow(usize, 2, pow); + } const old_entries = self.entries; - try self.initCapacity(new_capacity); + try self.initCapacity(capacity); if (old_entries.len > 0) { // dump all of the old elements into the new table for (old_entries) |*old_entry| { From 4d42275d03a3f24beca2d93e02ff12e52a48a91b Mon Sep 17 00:00:00 2001 From: Ryan Liptak Date: Thu, 2 May 2019 17:01:30 -0700 Subject: [PATCH 3/6] std.HashMap: make ensureCapacity optimize for the expected count, add ensureCapacityExact --- std/hash_map.zig | 40 +++++++++++++++++++++++++++++----------- 1 file changed, 29 insertions(+), 11 deletions(-) diff --git a/std/hash_map.zig b/std/hash_map.zig index c8fbd58065..9d4c5318db 100644 --- a/std/hash_map.zig +++ b/std/hash_map.zig @@ -135,21 +135,39 @@ pub fn HashMap(comptime K: type, comptime V: type, comptime hash: fn (key: K) u3 return res.kv; } + fn optimizedCapacity(expected_count: usize) usize { + // ensure that the hash map will be at most 60% full if + // new_capacity items are put into the hash map + var optimized_capacity = expected_count * 5 / 3; + // round capacity to the next power of two + const is_power_of_two = optimized_capacity & (optimized_capacity-1) == 0; + if (!is_power_of_two) { + const pow = math.log2_int_ceil(usize, optimized_capacity); + optimized_capacity = math.pow(usize, 2, pow); + } + return optimized_capacity; + } + + /// Increase capacity so that the hash map will be at most + /// 60% full when expected_count items are put into it + pub fn ensureCapacity(self: *Self, expected_count: usize) !void { + const optimized_capacity = optimizedCapacity(expected_count); + return self.ensureCapacityExact(optimized_capacity); + } + /// Sets the capacity to the new capacity if the new /// capacity is greater than the current capacity. - pub fn ensureCapacity(self: *Self, new_capacity: usize) !void { + /// New capacity must be a power of two. + pub fn ensureCapacityExact(self: *Self, new_capacity: usize) !void { + const is_power_of_two = new_capacity & (new_capacity-1) == 0; + assert(is_power_of_two); + if (new_capacity <= self.entries.len) { return; } - // make sure capacity is a power of two - var capacity = new_capacity; - const is_power_of_two = capacity & (capacity-1) == 0; - if (!is_power_of_two) { - const pow = math.log2_int_ceil(usize, capacity); - capacity = math.pow(usize, 2, pow); - } + const old_entries = self.entries; - try self.initCapacity(capacity); + try self.initCapacity(new_capacity); if (old_entries.len > 0) { // dump all of the old elements into the new table for (old_entries) |*old_entry| { @@ -236,11 +254,11 @@ pub fn HashMap(comptime K: type, comptime V: type, comptime hash: fn (key: K) u3 fn autoCapacity(self: *Self) !void { if (self.entries.len == 0) { - return self.ensureCapacity(16); + return self.ensureCapacityExact(16); } // if we get too full (60%), double the capacity if (self.size * 5 >= self.entries.len * 3) { - return self.ensureCapacity(self.entries.len * 2); + return self.ensureCapacityExact(self.entries.len * 2); } } From 26591d4f2264c3f6636598dc86d4e4bed528658e Mon Sep 17 00:00:00 2001 From: Ryan Liptak Date: Thu, 2 May 2019 17:42:38 -0700 Subject: [PATCH 4/6] std.HashMap: add putAssumeCapacity fn --- std/hash_map.zig | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/std/hash_map.zig b/std/hash_map.zig index 9d4c5318db..31750d1106 100644 --- a/std/hash_map.zig +++ b/std/hash_map.zig @@ -181,8 +181,13 @@ pub fn HashMap(comptime K: type, comptime V: type, comptime hash: fn (key: K) u3 /// Returns the kv pair that was already there. pub fn put(self: *Self, key: K, value: V) !?KV { - self.incrementModificationCount(); try self.autoCapacity(); + return putAssumeCapacity(self, key, value); + } + + pub fn putAssumeCapacity(self: *Self, key: K, value: V) ?KV { + assert(self.count() < self.entries.len); + self.incrementModificationCount(); const put_result = self.internalPut(key); put_result.new_entry.kv.value = value; From 13a7b8586a600c117cc2d090a283e5a6344d42c4 Mon Sep 17 00:00:00 2001 From: Ryan Liptak Date: Thu, 2 May 2019 17:43:51 -0700 Subject: [PATCH 5/6] std.HashMap: make ensureCapacityExact private --- std/hash_map.zig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/std/hash_map.zig b/std/hash_map.zig index 31750d1106..427c3860f6 100644 --- a/std/hash_map.zig +++ b/std/hash_map.zig @@ -158,7 +158,7 @@ pub fn HashMap(comptime K: type, comptime V: type, comptime hash: fn (key: K) u3 /// Sets the capacity to the new capacity if the new /// capacity is greater than the current capacity. /// New capacity must be a power of two. - pub fn ensureCapacityExact(self: *Self, new_capacity: usize) !void { + fn ensureCapacityExact(self: *Self, new_capacity: usize) !void { const is_power_of_two = new_capacity & (new_capacity-1) == 0; assert(is_power_of_two); From cf8dde2d686199474847c6c4e342dc1ac46a435b Mon Sep 17 00:00:00 2001 From: Ryan Liptak Date: Fri, 3 May 2019 21:15:00 -0700 Subject: [PATCH 6/6] std.HashMap: cleanup ensureCapacity + add test - Cleaned up some comments - Removed the "is power of two" check from optimizedCapacity since the * 5 / 3 is unlikely to end up with a power of two, so it's a wasted check the majority of the time - Made ensureCapacity/ensureCapacityExact increment the modification count if they resize the hash map so that we can catch resizes while iterating, which would likely break the iterator state --- std/hash_map.zig | 31 +++++++++++++++++++++++-------- 1 file changed, 23 insertions(+), 8 deletions(-) diff --git a/std/hash_map.zig b/std/hash_map.zig index 427c3860f6..9cd1ea052c 100644 --- a/std/hash_map.zig +++ b/std/hash_map.zig @@ -137,18 +137,14 @@ pub fn HashMap(comptime K: type, comptime V: type, comptime hash: fn (key: K) u3 fn optimizedCapacity(expected_count: usize) usize { // ensure that the hash map will be at most 60% full if - // new_capacity items are put into the hash map + // expected_count items are put into it var optimized_capacity = expected_count * 5 / 3; // round capacity to the next power of two - const is_power_of_two = optimized_capacity & (optimized_capacity-1) == 0; - if (!is_power_of_two) { - const pow = math.log2_int_ceil(usize, optimized_capacity); - optimized_capacity = math.pow(usize, 2, pow); - } - return optimized_capacity; + const pow = math.log2_int_ceil(usize, optimized_capacity); + return math.pow(usize, 2, pow); } - /// Increase capacity so that the hash map will be at most + /// Increases capacity so that the hash map will be at most /// 60% full when expected_count items are put into it pub fn ensureCapacity(self: *Self, expected_count: usize) !void { const optimized_capacity = optimizedCapacity(expected_count); @@ -168,6 +164,7 @@ pub fn HashMap(comptime K: type, comptime V: type, comptime hash: fn (key: K) u3 const old_entries = self.entries; try self.initCapacity(new_capacity); + self.incrementModificationCount(); if (old_entries.len > 0) { // dump all of the old elements into the new table for (old_entries) |*old_entry| { @@ -467,6 +464,24 @@ test "iterator hash map" { testing.expect(entry.value == values[0]); } +test "ensure capacity" { + var direct_allocator = std.heap.DirectAllocator.init(); + defer direct_allocator.deinit(); + + var map = AutoHashMap(i32, i32).init(&direct_allocator.allocator); + defer map.deinit(); + + try map.ensureCapacity(20); + const initialCapacity = map.entries.len; + testing.expect(initialCapacity >= 20); + var i : i32 = 0; + while (i < 20) : (i += 1) { + testing.expect(map.putAssumeCapacity(i, i+10) == null); + } + // shouldn't resize from putAssumeCapacity + testing.expect(initialCapacity == map.entries.len); +} + pub fn getHashPtrAddrFn(comptime K: type) (fn (K) u32) { return struct { fn hash(key: K) u32 {