mirror of
https://github.com/ziglang/zig.git
synced 2025-12-07 14:53:08 +00:00
we only effectively need 1 control bit to represent 2 special states for the metadata (free and tombstone) this should reduce the number of actual element equality tests, but since it's very low already, the impact is negligible
1251 lines
41 KiB
Zig
1251 lines
41 KiB
Zig
// SPDX-License-Identifier: MIT
|
|
// Copyright (c) 2015-2021 Zig Contributors
|
|
// This file is part of [zig](https://ziglang.org/), which is MIT licensed.
|
|
// The MIT license requires this copyright notice to be included in all copies
|
|
// and substantial portions of the software.
|
|
const std = @import("std.zig");
|
|
const builtin = @import("builtin");
|
|
const assert = debug.assert;
|
|
const autoHash = std.hash.autoHash;
|
|
const debug = std.debug;
|
|
const warn = debug.warn;
|
|
const math = std.math;
|
|
const mem = std.mem;
|
|
const meta = std.meta;
|
|
const trait = meta.trait;
|
|
const Allocator = mem.Allocator;
|
|
const Wyhash = std.hash.Wyhash;
|
|
|
|
pub fn getAutoHashFn(comptime K: type) (fn (K) u64) {
|
|
comptime {
|
|
assert(@hasDecl(std, "StringHashMap")); // detect when the following message needs updated
|
|
if (K == []const u8) {
|
|
@compileError("std.auto_hash.autoHash does not allow slices here (" ++
|
|
@typeName(K) ++
|
|
") because the intent is unclear. " ++
|
|
"Consider using std.StringHashMap for hashing the contents of []const u8. " ++
|
|
"Alternatively, consider using std.auto_hash.hash or providing your own hash function instead.");
|
|
}
|
|
}
|
|
|
|
return struct {
|
|
fn hash(key: K) u64 {
|
|
if (comptime trait.hasUniqueRepresentation(K)) {
|
|
return Wyhash.hash(0, std.mem.asBytes(&key));
|
|
} else {
|
|
var hasher = Wyhash.init(0);
|
|
autoHash(&hasher, key);
|
|
return hasher.final();
|
|
}
|
|
}
|
|
}.hash;
|
|
}
|
|
|
|
pub fn getAutoEqlFn(comptime K: type) (fn (K, K) bool) {
|
|
return struct {
|
|
fn eql(a: K, b: K) bool {
|
|
return meta.eql(a, b);
|
|
}
|
|
}.eql;
|
|
}
|
|
|
|
pub fn AutoHashMap(comptime K: type, comptime V: type) type {
|
|
return HashMap(K, V, getAutoHashFn(K), getAutoEqlFn(K), default_max_load_percentage);
|
|
}
|
|
|
|
pub fn AutoHashMapUnmanaged(comptime K: type, comptime V: type) type {
|
|
return HashMapUnmanaged(K, V, getAutoHashFn(K), getAutoEqlFn(K), default_max_load_percentage);
|
|
}
|
|
|
|
/// Builtin hashmap for strings as keys.
|
|
pub fn StringHashMap(comptime V: type) type {
|
|
return HashMap([]const u8, V, hashString, eqlString, default_max_load_percentage);
|
|
}
|
|
|
|
pub fn StringHashMapUnmanaged(comptime V: type) type {
|
|
return HashMapUnmanaged([]const u8, V, hashString, eqlString, default_max_load_percentage);
|
|
}
|
|
|
|
pub fn eqlString(a: []const u8, b: []const u8) bool {
|
|
return mem.eql(u8, a, b);
|
|
}
|
|
|
|
pub fn hashString(s: []const u8) u64 {
|
|
return std.hash.Wyhash.hash(0, s);
|
|
}
|
|
|
|
/// Deprecated use `default_max_load_percentage`
|
|
pub const DefaultMaxLoadPercentage = default_max_load_percentage;
|
|
|
|
pub const default_max_load_percentage = 80;
|
|
|
|
/// General purpose hash table.
|
|
/// No order is guaranteed and any modification invalidates live iterators.
|
|
/// It provides fast operations (lookup, insertion, deletion) with quite high
|
|
/// load factors (up to 80% by default) for a low memory usage.
|
|
/// For a hash map that can be initialized directly that does not store an Allocator
|
|
/// field, see `HashMapUnmanaged`.
|
|
/// If iterating over the table entries is a strong usecase and needs to be fast,
|
|
/// prefer the alternative `std.ArrayHashMap`.
|
|
pub fn HashMap(
|
|
comptime K: type,
|
|
comptime V: type,
|
|
comptime hashFn: fn (key: K) u64,
|
|
comptime eqlFn: fn (a: K, b: K) bool,
|
|
comptime max_load_percentage: u64,
|
|
) type {
|
|
return struct {
|
|
unmanaged: Unmanaged,
|
|
allocator: *Allocator,
|
|
|
|
pub const Unmanaged = HashMapUnmanaged(K, V, hashFn, eqlFn, max_load_percentage);
|
|
pub const Entry = Unmanaged.Entry;
|
|
pub const Hash = Unmanaged.Hash;
|
|
pub const Iterator = Unmanaged.Iterator;
|
|
pub const Size = Unmanaged.Size;
|
|
pub const GetOrPutResult = Unmanaged.GetOrPutResult;
|
|
|
|
const Self = @This();
|
|
|
|
pub fn init(allocator: *Allocator) Self {
|
|
return .{
|
|
.unmanaged = .{},
|
|
.allocator = allocator,
|
|
};
|
|
}
|
|
|
|
pub fn deinit(self: *Self) void {
|
|
self.unmanaged.deinit(self.allocator);
|
|
self.* = undefined;
|
|
}
|
|
|
|
pub fn clearRetainingCapacity(self: *Self) void {
|
|
return self.unmanaged.clearRetainingCapacity();
|
|
}
|
|
|
|
pub fn clearAndFree(self: *Self) void {
|
|
return self.unmanaged.clearAndFree(self.allocator);
|
|
}
|
|
|
|
pub fn count(self: Self) Size {
|
|
return self.unmanaged.count();
|
|
}
|
|
|
|
pub fn iterator(self: *const Self) Iterator {
|
|
return self.unmanaged.iterator();
|
|
}
|
|
|
|
/// If key exists this function cannot fail.
|
|
/// If there is an existing item with `key`, then the result
|
|
/// `Entry` pointer points to it, and found_existing is true.
|
|
/// Otherwise, puts a new item with undefined value, and
|
|
/// the `Entry` pointer points to it. Caller should then initialize
|
|
/// the value (but not the key).
|
|
pub fn getOrPut(self: *Self, key: K) !GetOrPutResult {
|
|
return self.unmanaged.getOrPut(self.allocator, key);
|
|
}
|
|
|
|
/// If there is an existing item with `key`, then the result
|
|
/// `Entry` pointer points to it, and found_existing is true.
|
|
/// Otherwise, puts a new item with undefined value, and
|
|
/// the `Entry` pointer points to it. Caller should then initialize
|
|
/// the value (but not the key).
|
|
/// If a new entry needs to be stored, this function asserts there
|
|
/// is enough capacity to store it.
|
|
pub fn getOrPutAssumeCapacity(self: *Self, key: K) GetOrPutResult {
|
|
return self.unmanaged.getOrPutAssumeCapacity(key);
|
|
}
|
|
|
|
pub fn getOrPutValue(self: *Self, key: K, value: V) !*Entry {
|
|
return self.unmanaged.getOrPutValue(self.allocator, key, value);
|
|
}
|
|
|
|
/// Increases capacity, guaranteeing that insertions up until the
|
|
/// `expected_count` will not cause an allocation, and therefore cannot fail.
|
|
pub fn ensureCapacity(self: *Self, expected_count: Size) !void {
|
|
return self.unmanaged.ensureCapacity(self.allocator, expected_count);
|
|
}
|
|
|
|
/// Returns the number of total elements which may be present before it is
|
|
/// no longer guaranteed that no allocations will be performed.
|
|
pub fn capacity(self: *Self) Size {
|
|
return self.unmanaged.capacity();
|
|
}
|
|
|
|
/// Clobbers any existing data. To detect if a put would clobber
|
|
/// existing data, see `getOrPut`.
|
|
pub fn put(self: *Self, key: K, value: V) !void {
|
|
return self.unmanaged.put(self.allocator, key, value);
|
|
}
|
|
|
|
/// Inserts a key-value pair into the hash map, asserting that no previous
|
|
/// entry with the same key is already present
|
|
pub fn putNoClobber(self: *Self, key: K, value: V) !void {
|
|
return self.unmanaged.putNoClobber(self.allocator, key, value);
|
|
}
|
|
|
|
/// Asserts there is enough capacity to store the new key-value pair.
|
|
/// Clobbers any existing data. To detect if a put would clobber
|
|
/// existing data, see `getOrPutAssumeCapacity`.
|
|
pub fn putAssumeCapacity(self: *Self, key: K, value: V) void {
|
|
return self.unmanaged.putAssumeCapacity(key, value);
|
|
}
|
|
|
|
/// Asserts there is enough capacity to store the new key-value pair.
|
|
/// Asserts that it does not clobber any existing data.
|
|
/// To detect if a put would clobber existing data, see `getOrPutAssumeCapacity`.
|
|
pub fn putAssumeCapacityNoClobber(self: *Self, key: K, value: V) void {
|
|
return self.unmanaged.putAssumeCapacityNoClobber(key, value);
|
|
}
|
|
|
|
/// Inserts a new `Entry` into the hash map, returning the previous one, if any.
|
|
pub fn fetchPut(self: *Self, key: K, value: V) !?Entry {
|
|
return self.unmanaged.fetchPut(self.allocator, key, value);
|
|
}
|
|
|
|
/// Inserts a new `Entry` into the hash map, returning the previous one, if any.
|
|
/// If insertion happuns, asserts there is enough capacity without allocating.
|
|
pub fn fetchPutAssumeCapacity(self: *Self, key: K, value: V) ?Entry {
|
|
return self.unmanaged.fetchPutAssumeCapacity(key, value);
|
|
}
|
|
|
|
pub fn get(self: Self, key: K) ?V {
|
|
return self.unmanaged.get(key);
|
|
}
|
|
|
|
pub fn getEntry(self: Self, key: K) ?*Entry {
|
|
return self.unmanaged.getEntry(key);
|
|
}
|
|
|
|
pub fn contains(self: Self, key: K) bool {
|
|
return self.unmanaged.contains(key);
|
|
}
|
|
|
|
/// If there is an `Entry` with a matching key, it is deleted from
|
|
/// the hash map, and then returned from this function.
|
|
pub fn remove(self: *Self, key: K) ?Entry {
|
|
return self.unmanaged.remove(key);
|
|
}
|
|
|
|
/// Asserts there is an `Entry` with matching key, deletes it from the hash map,
|
|
/// and discards it.
|
|
pub fn removeAssertDiscard(self: *Self, key: K) void {
|
|
return self.unmanaged.removeAssertDiscard(key);
|
|
}
|
|
|
|
pub fn clone(self: Self) !Self {
|
|
var other = try self.unmanaged.clone(self.allocator);
|
|
return other.promote(self.allocator);
|
|
}
|
|
};
|
|
}
|
|
|
|
/// A HashMap based on open addressing and linear probing.
|
|
/// A lookup or modification typically occurs only 2 cache misses.
|
|
/// No order is guaranteed and any modification invalidates live iterators.
|
|
/// It achieves good performance with quite high load factors (by default,
|
|
/// grow is triggered at 80% full) and only one byte of overhead per element.
|
|
/// The struct itself is only 16 bytes for a small footprint. This comes at
|
|
/// the price of handling size with u32, which should be reasonnable enough
|
|
/// for almost all uses.
|
|
/// Deletions are achieved with tombstones.
|
|
pub fn HashMapUnmanaged(
|
|
comptime K: type,
|
|
comptime V: type,
|
|
hashFn: fn (key: K) u64,
|
|
eqlFn: fn (a: K, b: K) bool,
|
|
comptime max_load_percentage: u64,
|
|
) type {
|
|
comptime assert(max_load_percentage > 0 and max_load_percentage < 100);
|
|
|
|
return struct {
|
|
const Self = @This();
|
|
|
|
// This is actually a midway pointer to the single buffer containing
|
|
// a `Header` field, the `Metadata`s and `Entry`s.
|
|
// At `-@sizeOf(Header)` is the Header field.
|
|
// At `sizeOf(Metadata) * capacity + offset`, which is pointed to by
|
|
// self.header().entries, is the array of entries.
|
|
// This means that the hashmap only holds one live allocation, to
|
|
// reduce memory fragmentation and struct size.
|
|
/// Pointer to the metadata.
|
|
metadata: ?[*]Metadata = null,
|
|
|
|
/// Current number of elements in the hashmap.
|
|
size: Size = 0,
|
|
|
|
// Having a countdown to grow reduces the number of instructions to
|
|
// execute when determining if the hashmap has enough capacity already.
|
|
/// Number of available slots before a grow is needed to satisfy the
|
|
/// `max_load_percentage`.
|
|
available: Size = 0,
|
|
|
|
// This is purely empirical and not a /very smart magic constantâ„¢/.
|
|
/// Capacity of the first grow when bootstrapping the hashmap.
|
|
const minimal_capacity = 8;
|
|
|
|
// This hashmap is specially designed for sizes that fit in a u32.
|
|
const Size = u32;
|
|
|
|
// u64 hashes guarantee us that the fingerprint bits will never be used
|
|
// to compute the index of a slot, maximizing the use of entropy.
|
|
const Hash = u64;
|
|
|
|
pub const Entry = struct {
|
|
key: K,
|
|
value: V,
|
|
};
|
|
|
|
const Header = packed struct {
|
|
entries: [*]Entry,
|
|
capacity: Size,
|
|
};
|
|
|
|
/// Metadata for a slot. It can be in three states: empty, used or
|
|
/// tombstone. Tombstones indicate that an entry was previously used,
|
|
/// they are a simple way to handle removal.
|
|
/// To this state, we add 7 bits from the slot's key hash. These are
|
|
/// used as a fast way to disambiguate between entries without
|
|
/// having to use the equality function. If two fingerprints are
|
|
/// different, we know that we don't have to compare the keys at all.
|
|
/// The 7 bits are the highest ones from a 64 bit hash. This way, not
|
|
/// only we use the `log2(capacity)` lowest bits from the hash to determine
|
|
/// a slot index, but we use 7 more bits to quickly resolve collisions
|
|
/// when multiple elements with different hashes end up wanting to be in the same slot.
|
|
/// Not using the equality function means we don't have to read into
|
|
/// the entries array, likely avoiding a cache miss and a potentially
|
|
/// costly function call.
|
|
const Metadata = packed struct {
|
|
const FingerPrint = u7;
|
|
|
|
const free: FingerPrint = 0;
|
|
const tombstone: FingerPrint = 1;
|
|
|
|
fingerprint: FingerPrint = free,
|
|
used: u1 = 0,
|
|
|
|
pub fn isUsed(self: Metadata) bool {
|
|
return self.used == 1;
|
|
}
|
|
|
|
pub fn isTombstone(self: Metadata) bool {
|
|
return !self.isUsed() and self.fingerprint == tombstone;
|
|
}
|
|
|
|
pub fn takeFingerprint(hash: Hash) FingerPrint {
|
|
const hash_bits = @typeInfo(Hash).Int.bits;
|
|
const fp_bits = @typeInfo(FingerPrint).Int.bits;
|
|
return @truncate(FingerPrint, hash >> (hash_bits - fp_bits));
|
|
}
|
|
|
|
pub fn fill(self: *Metadata, fp: FingerPrint) void {
|
|
self.used = 1;
|
|
self.fingerprint = fp;
|
|
}
|
|
|
|
pub fn remove(self: *Metadata) void {
|
|
self.used = 0;
|
|
self.fingerprint = tombstone;
|
|
}
|
|
};
|
|
|
|
comptime {
|
|
assert(@sizeOf(Metadata) == 1);
|
|
assert(@alignOf(Metadata) == 1);
|
|
}
|
|
|
|
const Iterator = struct {
|
|
hm: *const Self,
|
|
index: Size = 0,
|
|
|
|
pub fn next(it: *Iterator) ?*Entry {
|
|
assert(it.index <= it.hm.capacity());
|
|
if (it.hm.size == 0) return null;
|
|
|
|
const cap = it.hm.capacity();
|
|
const end = it.hm.metadata.? + cap;
|
|
var metadata = it.hm.metadata.? + it.index;
|
|
|
|
while (metadata != end) : ({
|
|
metadata += 1;
|
|
it.index += 1;
|
|
}) {
|
|
if (metadata[0].isUsed()) {
|
|
const entry = &it.hm.entries()[it.index];
|
|
it.index += 1;
|
|
return entry;
|
|
}
|
|
}
|
|
|
|
return null;
|
|
}
|
|
};
|
|
|
|
pub const GetOrPutResult = struct {
|
|
entry: *Entry,
|
|
found_existing: bool,
|
|
};
|
|
|
|
pub const Managed = HashMap(K, V, hashFn, eqlFn, max_load_percentage);
|
|
|
|
pub fn promote(self: Self, allocator: *Allocator) Managed {
|
|
return .{
|
|
.unmanaged = self,
|
|
.allocator = allocator,
|
|
};
|
|
}
|
|
|
|
fn isUnderMaxLoadPercentage(size: Size, cap: Size) bool {
|
|
return size * 100 < max_load_percentage * cap;
|
|
}
|
|
|
|
pub fn deinit(self: *Self, allocator: *Allocator) void {
|
|
self.deallocate(allocator);
|
|
self.* = undefined;
|
|
}
|
|
|
|
fn deallocate(self: *Self, allocator: *Allocator) void {
|
|
if (self.metadata == null) return;
|
|
|
|
const cap = self.capacity();
|
|
const meta_size = @sizeOf(Header) + cap * @sizeOf(Metadata);
|
|
|
|
const alignment = @alignOf(Entry) - 1;
|
|
const entries_size = @as(usize, cap) * @sizeOf(Entry) + alignment;
|
|
|
|
const total_size = meta_size + entries_size;
|
|
|
|
var slice: []u8 = undefined;
|
|
slice.ptr = @intToPtr([*]u8, @ptrToInt(self.header()));
|
|
slice.len = total_size;
|
|
allocator.free(slice);
|
|
|
|
self.metadata = null;
|
|
self.available = 0;
|
|
}
|
|
|
|
fn capacityForSize(size: Size) Size {
|
|
var new_cap = @truncate(u32, (@as(u64, size) * 100) / max_load_percentage + 1);
|
|
new_cap = math.ceilPowerOfTwo(u32, new_cap) catch unreachable;
|
|
return new_cap;
|
|
}
|
|
|
|
pub fn ensureCapacity(self: *Self, allocator: *Allocator, new_size: Size) !void {
|
|
if (new_size > self.size)
|
|
try self.growIfNeeded(allocator, new_size - self.size);
|
|
}
|
|
|
|
pub fn clearRetainingCapacity(self: *Self) void {
|
|
if (self.metadata) |_| {
|
|
self.initMetadatas();
|
|
self.size = 0;
|
|
self.available = @truncate(u32, (self.capacity() * max_load_percentage) / 100);
|
|
}
|
|
}
|
|
|
|
pub fn clearAndFree(self: *Self, allocator: *Allocator) void {
|
|
self.deallocate(allocator);
|
|
self.size = 0;
|
|
self.available = 0;
|
|
}
|
|
|
|
pub fn count(self: *const Self) Size {
|
|
return self.size;
|
|
}
|
|
|
|
fn header(self: *const Self) *Header {
|
|
return @ptrCast(*Header, @ptrCast([*]Header, self.metadata.?) - 1);
|
|
}
|
|
|
|
fn entries(self: *const Self) [*]Entry {
|
|
return self.header().entries;
|
|
}
|
|
|
|
pub fn capacity(self: *const Self) Size {
|
|
if (self.metadata == null) return 0;
|
|
|
|
return self.header().capacity;
|
|
}
|
|
|
|
pub fn iterator(self: *const Self) Iterator {
|
|
return .{ .hm = self };
|
|
}
|
|
|
|
/// Insert an entry in the map. Assumes it is not already present.
|
|
pub fn putNoClobber(self: *Self, allocator: *Allocator, key: K, value: V) !void {
|
|
assert(!self.contains(key));
|
|
try self.growIfNeeded(allocator, 1);
|
|
|
|
self.putAssumeCapacityNoClobber(key, value);
|
|
}
|
|
|
|
/// Asserts there is enough capacity to store the new key-value pair.
|
|
/// Clobbers any existing data. To detect if a put would clobber
|
|
/// existing data, see `getOrPutAssumeCapacity`.
|
|
pub fn putAssumeCapacity(self: *Self, key: K, value: V) void {
|
|
const gop = self.getOrPutAssumeCapacity(key);
|
|
gop.entry.value = value;
|
|
}
|
|
|
|
/// Insert an entry in the map. Assumes it is not already present,
|
|
/// and that no allocation is needed.
|
|
pub fn putAssumeCapacityNoClobber(self: *Self, key: K, value: V) void {
|
|
assert(!self.contains(key));
|
|
|
|
const hash = hashFn(key);
|
|
const mask = self.capacity() - 1;
|
|
var idx = @truncate(usize, hash & mask);
|
|
|
|
var metadata = self.metadata.? + idx;
|
|
while (metadata[0].isUsed()) {
|
|
idx = (idx + 1) & mask;
|
|
metadata = self.metadata.? + idx;
|
|
}
|
|
|
|
if (!metadata[0].isTombstone()) {
|
|
assert(self.available > 0);
|
|
self.available -= 1;
|
|
}
|
|
|
|
const fingerprint = Metadata.takeFingerprint(hash);
|
|
metadata[0].fill(fingerprint);
|
|
self.entries()[idx] = Entry{ .key = key, .value = value };
|
|
|
|
self.size += 1;
|
|
}
|
|
|
|
/// Inserts a new `Entry` into the hash map, returning the previous one, if any.
|
|
pub fn fetchPut(self: *Self, allocator: *Allocator, key: K, value: V) !?Entry {
|
|
const gop = try self.getOrPut(allocator, key);
|
|
var result: ?Entry = null;
|
|
if (gop.found_existing) {
|
|
result = gop.entry.*;
|
|
}
|
|
gop.entry.value = value;
|
|
return result;
|
|
}
|
|
|
|
/// Inserts a new `Entry` into the hash map, returning the previous one, if any.
|
|
/// If insertion happens, asserts there is enough capacity without allocating.
|
|
pub fn fetchPutAssumeCapacity(self: *Self, key: K, value: V) ?Entry {
|
|
const gop = self.getOrPutAssumeCapacity(key);
|
|
var result: ?Entry = null;
|
|
if (gop.found_existing) {
|
|
result = gop.entry.*;
|
|
}
|
|
gop.entry.value = value;
|
|
return result;
|
|
}
|
|
|
|
pub fn getEntry(self: Self, key: K) ?*Entry {
|
|
if (self.size == 0) {
|
|
return null;
|
|
}
|
|
|
|
const hash = hashFn(key);
|
|
const mask = self.capacity() - 1;
|
|
const fingerprint = Metadata.takeFingerprint(hash);
|
|
var idx = @truncate(usize, hash & mask);
|
|
|
|
var metadata = self.metadata.? + idx;
|
|
while (metadata[0].isUsed() or metadata[0].isTombstone()) {
|
|
if (metadata[0].isUsed() and metadata[0].fingerprint == fingerprint) {
|
|
const entry = &self.entries()[idx];
|
|
if (eqlFn(entry.key, key)) {
|
|
return entry;
|
|
}
|
|
}
|
|
idx = (idx + 1) & mask;
|
|
metadata = self.metadata.? + idx;
|
|
}
|
|
|
|
return null;
|
|
}
|
|
|
|
/// Insert an entry if the associated key is not already present, otherwise update preexisting value.
|
|
pub fn put(self: *Self, allocator: *Allocator, key: K, value: V) !void {
|
|
const result = try self.getOrPut(allocator, key);
|
|
result.entry.value = value;
|
|
}
|
|
|
|
/// Get an optional pointer to the value associated with key, if present.
|
|
pub fn get(self: Self, key: K) ?V {
|
|
if (self.size == 0) {
|
|
return null;
|
|
}
|
|
|
|
const hash = hashFn(key);
|
|
const mask = self.capacity() - 1;
|
|
const fingerprint = Metadata.takeFingerprint(hash);
|
|
var idx = @truncate(usize, hash & mask);
|
|
|
|
var metadata = self.metadata.? + idx;
|
|
while (metadata[0].isUsed() or metadata[0].isTombstone()) {
|
|
if (metadata[0].isUsed() and metadata[0].fingerprint == fingerprint) {
|
|
const entry = &self.entries()[idx];
|
|
if (eqlFn(entry.key, key)) {
|
|
return entry.value;
|
|
}
|
|
}
|
|
idx = (idx + 1) & mask;
|
|
metadata = self.metadata.? + idx;
|
|
}
|
|
|
|
return null;
|
|
}
|
|
|
|
pub fn getOrPut(self: *Self, allocator: *Allocator, key: K) !GetOrPutResult {
|
|
try self.growIfNeeded(allocator, 1);
|
|
|
|
return self.getOrPutAssumeCapacity(key);
|
|
}
|
|
|
|
pub fn getOrPutAssumeCapacity(self: *Self, key: K) GetOrPutResult {
|
|
const hash = hashFn(key);
|
|
const mask = self.capacity() - 1;
|
|
const fingerprint = Metadata.takeFingerprint(hash);
|
|
var idx = @truncate(usize, hash & mask);
|
|
|
|
var first_tombstone_idx: usize = self.capacity(); // invalid index
|
|
var metadata = self.metadata.? + idx;
|
|
while (metadata[0].isUsed() or metadata[0].isTombstone()) {
|
|
if (metadata[0].isUsed() and metadata[0].fingerprint == fingerprint) {
|
|
const entry = &self.entries()[idx];
|
|
if (eqlFn(entry.key, key)) {
|
|
return GetOrPutResult{ .entry = entry, .found_existing = true };
|
|
}
|
|
} else if (first_tombstone_idx == self.capacity() and metadata[0].isTombstone()) {
|
|
first_tombstone_idx = idx;
|
|
}
|
|
|
|
idx = (idx + 1) & mask;
|
|
metadata = self.metadata.? + idx;
|
|
}
|
|
|
|
if (first_tombstone_idx < self.capacity()) {
|
|
// Cheap try to lower probing lengths after deletions. Recycle a tombstone.
|
|
idx = first_tombstone_idx;
|
|
metadata = self.metadata.? + idx;
|
|
} else {
|
|
// We're using a slot previously free.
|
|
self.available -= 1;
|
|
}
|
|
|
|
metadata[0].fill(fingerprint);
|
|
const entry = &self.entries()[idx];
|
|
entry.* = .{ .key = key, .value = undefined };
|
|
self.size += 1;
|
|
|
|
return GetOrPutResult{ .entry = entry, .found_existing = false };
|
|
}
|
|
|
|
pub fn getOrPutValue(self: *Self, allocator: *Allocator, key: K, value: V) !*Entry {
|
|
const res = try self.getOrPut(allocator, key);
|
|
if (!res.found_existing) res.entry.value = value;
|
|
return res.entry;
|
|
}
|
|
|
|
/// Return true if there is a value associated with key in the map.
|
|
pub fn contains(self: *const Self, key: K) bool {
|
|
return self.get(key) != null;
|
|
}
|
|
|
|
/// If there is an `Entry` with a matching key, it is deleted from
|
|
/// the hash map, and then returned from this function.
|
|
pub fn remove(self: *Self, key: K) ?Entry {
|
|
if (self.size == 0) return null;
|
|
|
|
const hash = hashFn(key);
|
|
const mask = self.capacity() - 1;
|
|
const fingerprint = Metadata.takeFingerprint(hash);
|
|
var idx = @truncate(usize, hash & mask);
|
|
|
|
var metadata = self.metadata.? + idx;
|
|
while (metadata[0].isUsed() or metadata[0].isTombstone()) {
|
|
if (metadata[0].isUsed() and metadata[0].fingerprint == fingerprint) {
|
|
const entry = &self.entries()[idx];
|
|
if (eqlFn(entry.key, key)) {
|
|
const removed_entry = entry.*;
|
|
metadata[0].remove();
|
|
entry.* = undefined;
|
|
self.size -= 1;
|
|
return removed_entry;
|
|
}
|
|
}
|
|
idx = (idx + 1) & mask;
|
|
metadata = self.metadata.? + idx;
|
|
}
|
|
|
|
return null;
|
|
}
|
|
|
|
/// Asserts there is an `Entry` with matching key, deletes it from the hash map,
|
|
/// and discards it.
|
|
pub fn removeAssertDiscard(self: *Self, key: K) void {
|
|
assert(self.contains(key));
|
|
|
|
const hash = hashFn(key);
|
|
const mask = self.capacity() - 1;
|
|
const fingerprint = Metadata.takeFingerprint(hash);
|
|
var idx = @truncate(usize, hash & mask);
|
|
|
|
var metadata = self.metadata.? + idx;
|
|
while (metadata[0].isUsed() or metadata[0].isTombstone()) {
|
|
if (metadata[0].isUsed() and metadata[0].fingerprint == fingerprint) {
|
|
const entry = &self.entries()[idx];
|
|
if (eqlFn(entry.key, key)) {
|
|
metadata[0].remove();
|
|
entry.* = undefined;
|
|
self.size -= 1;
|
|
return;
|
|
}
|
|
}
|
|
idx = (idx + 1) & mask;
|
|
metadata = self.metadata.? + idx;
|
|
}
|
|
|
|
unreachable;
|
|
}
|
|
|
|
fn initMetadatas(self: *Self) void {
|
|
@memset(@ptrCast([*]u8, self.metadata.?), 0, @sizeOf(Metadata) * self.capacity());
|
|
}
|
|
|
|
// This counts the number of occupied slots, used + tombstones, which is
|
|
// what has to stay under the max_load_percentage of capacity.
|
|
fn load(self: *const Self) Size {
|
|
const max_load = (self.capacity() * max_load_percentage) / 100;
|
|
assert(max_load >= self.available);
|
|
return @truncate(Size, max_load - self.available);
|
|
}
|
|
|
|
fn growIfNeeded(self: *Self, allocator: *Allocator, new_count: Size) !void {
|
|
if (new_count > self.available) {
|
|
try self.grow(allocator, capacityForSize(self.load() + new_count));
|
|
}
|
|
}
|
|
|
|
pub fn clone(self: Self, allocator: *Allocator) !Self {
|
|
var other = Self{};
|
|
if (self.size == 0)
|
|
return other;
|
|
|
|
const new_cap = capacityForSize(self.size);
|
|
try other.allocate(allocator, new_cap);
|
|
other.initMetadatas();
|
|
other.available = @truncate(u32, (new_cap * max_load_percentage) / 100);
|
|
|
|
var i: Size = 0;
|
|
var metadata = self.metadata.?;
|
|
var entr = self.entries();
|
|
while (i < self.capacity()) : (i += 1) {
|
|
if (metadata[i].isUsed()) {
|
|
const entry = &entr[i];
|
|
other.putAssumeCapacityNoClobber(entry.key, entry.value);
|
|
if (other.size == self.size)
|
|
break;
|
|
}
|
|
}
|
|
|
|
return other;
|
|
}
|
|
|
|
fn grow(self: *Self, allocator: *Allocator, new_capacity: Size) !void {
|
|
const new_cap = std.math.max(new_capacity, minimal_capacity);
|
|
assert(new_cap > self.capacity());
|
|
assert(std.math.isPowerOfTwo(new_cap));
|
|
|
|
var map = Self{};
|
|
defer map.deinit(allocator);
|
|
try map.allocate(allocator, new_cap);
|
|
map.initMetadatas();
|
|
map.available = @truncate(u32, (new_cap * max_load_percentage) / 100);
|
|
|
|
if (self.size != 0) {
|
|
const old_capacity = self.capacity();
|
|
var i: Size = 0;
|
|
var metadata = self.metadata.?;
|
|
var entr = self.entries();
|
|
while (i < old_capacity) : (i += 1) {
|
|
if (metadata[i].isUsed()) {
|
|
const entry = &entr[i];
|
|
map.putAssumeCapacityNoClobber(entry.key, entry.value);
|
|
if (map.size == self.size)
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
self.size = 0;
|
|
std.mem.swap(Self, self, &map);
|
|
}
|
|
|
|
fn allocate(self: *Self, allocator: *Allocator, new_capacity: Size) !void {
|
|
const meta_size = @sizeOf(Header) + new_capacity * @sizeOf(Metadata);
|
|
|
|
const alignment = @alignOf(Entry) - 1;
|
|
const entries_size = @as(usize, new_capacity) * @sizeOf(Entry) + alignment;
|
|
|
|
const total_size = meta_size + entries_size;
|
|
|
|
const slice = try allocator.alignedAlloc(u8, @alignOf(Header), total_size);
|
|
const ptr = @ptrToInt(slice.ptr);
|
|
|
|
const metadata = ptr + @sizeOf(Header);
|
|
var entry_ptr = ptr + meta_size;
|
|
entry_ptr = (entry_ptr + alignment) & ~@as(usize, alignment);
|
|
assert(entry_ptr + @as(usize, new_capacity) * @sizeOf(Entry) <= ptr + total_size);
|
|
|
|
const hdr = @intToPtr(*Header, ptr);
|
|
hdr.entries = @intToPtr([*]Entry, entry_ptr);
|
|
hdr.capacity = new_capacity;
|
|
self.metadata = @intToPtr([*]Metadata, metadata);
|
|
}
|
|
};
|
|
}
|
|
|
|
const testing = std.testing;
|
|
const expect = std.testing.expect;
|
|
const expectEqual = std.testing.expectEqual;
|
|
|
|
test "std.hash_map basic usage" {
|
|
var map = AutoHashMap(u32, u32).init(std.testing.allocator);
|
|
defer map.deinit();
|
|
|
|
const count = 5;
|
|
var i: u32 = 0;
|
|
var total: u32 = 0;
|
|
while (i < count) : (i += 1) {
|
|
try map.put(i, i);
|
|
total += i;
|
|
}
|
|
|
|
var sum: u32 = 0;
|
|
var it = map.iterator();
|
|
while (it.next()) |kv| {
|
|
sum += kv.key;
|
|
}
|
|
try expect(sum == total);
|
|
|
|
i = 0;
|
|
sum = 0;
|
|
while (i < count) : (i += 1) {
|
|
try expectEqual(map.get(i).?, i);
|
|
sum += map.get(i).?;
|
|
}
|
|
try expectEqual(total, sum);
|
|
}
|
|
|
|
test "std.hash_map ensureCapacity" {
|
|
var map = AutoHashMap(i32, i32).init(std.testing.allocator);
|
|
defer map.deinit();
|
|
|
|
try map.ensureCapacity(20);
|
|
const initial_capacity = map.capacity();
|
|
try testing.expect(initial_capacity >= 20);
|
|
var i: i32 = 0;
|
|
while (i < 20) : (i += 1) {
|
|
try testing.expect(map.fetchPutAssumeCapacity(i, i + 10) == null);
|
|
}
|
|
// shouldn't resize from putAssumeCapacity
|
|
try testing.expect(initial_capacity == map.capacity());
|
|
}
|
|
|
|
test "std.hash_map ensureCapacity with tombstones" {
|
|
var map = AutoHashMap(i32, i32).init(std.testing.allocator);
|
|
defer map.deinit();
|
|
|
|
var i: i32 = 0;
|
|
while (i < 100) : (i += 1) {
|
|
try map.ensureCapacity(@intCast(u32, map.count() + 1));
|
|
map.putAssumeCapacity(i, i);
|
|
// Remove to create tombstones that still count as load in the hashmap.
|
|
_ = map.remove(i);
|
|
}
|
|
}
|
|
|
|
test "std.hash_map clearRetainingCapacity" {
|
|
var map = AutoHashMap(u32, u32).init(std.testing.allocator);
|
|
defer map.deinit();
|
|
|
|
map.clearRetainingCapacity();
|
|
|
|
try map.put(1, 1);
|
|
try expectEqual(map.get(1).?, 1);
|
|
try expectEqual(map.count(), 1);
|
|
|
|
map.clearRetainingCapacity();
|
|
map.putAssumeCapacity(1, 1);
|
|
try expectEqual(map.get(1).?, 1);
|
|
try expectEqual(map.count(), 1);
|
|
|
|
const cap = map.capacity();
|
|
try expect(cap > 0);
|
|
|
|
map.clearRetainingCapacity();
|
|
map.clearRetainingCapacity();
|
|
try expectEqual(map.count(), 0);
|
|
try expectEqual(map.capacity(), cap);
|
|
try expect(!map.contains(1));
|
|
}
|
|
|
|
test "std.hash_map grow" {
|
|
var map = AutoHashMap(u32, u32).init(std.testing.allocator);
|
|
defer map.deinit();
|
|
|
|
const growTo = 12456;
|
|
|
|
var i: u32 = 0;
|
|
while (i < growTo) : (i += 1) {
|
|
try map.put(i, i);
|
|
}
|
|
try expectEqual(map.count(), growTo);
|
|
|
|
i = 0;
|
|
var it = map.iterator();
|
|
while (it.next()) |kv| {
|
|
try expectEqual(kv.key, kv.value);
|
|
i += 1;
|
|
}
|
|
try expectEqual(i, growTo);
|
|
|
|
i = 0;
|
|
while (i < growTo) : (i += 1) {
|
|
try expectEqual(map.get(i).?, i);
|
|
}
|
|
}
|
|
|
|
test "std.hash_map clone" {
|
|
var map = AutoHashMap(u32, u32).init(std.testing.allocator);
|
|
defer map.deinit();
|
|
|
|
var a = try map.clone();
|
|
defer a.deinit();
|
|
|
|
try expectEqual(a.count(), 0);
|
|
|
|
try a.put(1, 1);
|
|
try a.put(2, 2);
|
|
try a.put(3, 3);
|
|
|
|
var b = try a.clone();
|
|
defer b.deinit();
|
|
|
|
try expectEqual(b.count(), 3);
|
|
try expectEqual(b.get(1), 1);
|
|
try expectEqual(b.get(2), 2);
|
|
try expectEqual(b.get(3), 3);
|
|
}
|
|
|
|
test "std.hash_map ensureCapacity with existing elements" {
|
|
var map = AutoHashMap(u32, u32).init(std.testing.allocator);
|
|
defer map.deinit();
|
|
|
|
try map.put(0, 0);
|
|
try expectEqual(map.count(), 1);
|
|
try expectEqual(map.capacity(), @TypeOf(map).Unmanaged.minimal_capacity);
|
|
|
|
try map.ensureCapacity(65);
|
|
try expectEqual(map.count(), 1);
|
|
try expectEqual(map.capacity(), 128);
|
|
}
|
|
|
|
test "std.hash_map ensureCapacity satisfies max load factor" {
|
|
var map = AutoHashMap(u32, u32).init(std.testing.allocator);
|
|
defer map.deinit();
|
|
|
|
try map.ensureCapacity(127);
|
|
try expectEqual(map.capacity(), 256);
|
|
}
|
|
|
|
test "std.hash_map remove" {
|
|
var map = AutoHashMap(u32, u32).init(std.testing.allocator);
|
|
defer map.deinit();
|
|
|
|
var i: u32 = 0;
|
|
while (i < 16) : (i += 1) {
|
|
try map.put(i, i);
|
|
}
|
|
|
|
i = 0;
|
|
while (i < 16) : (i += 1) {
|
|
if (i % 3 == 0) {
|
|
_ = map.remove(i);
|
|
}
|
|
}
|
|
try expectEqual(map.count(), 10);
|
|
var it = map.iterator();
|
|
while (it.next()) |kv| {
|
|
try expectEqual(kv.key, kv.value);
|
|
try expect(kv.key % 3 != 0);
|
|
}
|
|
|
|
i = 0;
|
|
while (i < 16) : (i += 1) {
|
|
if (i % 3 == 0) {
|
|
try expect(!map.contains(i));
|
|
} else {
|
|
try expectEqual(map.get(i).?, i);
|
|
}
|
|
}
|
|
}
|
|
|
|
test "std.hash_map reverse removes" {
|
|
var map = AutoHashMap(u32, u32).init(std.testing.allocator);
|
|
defer map.deinit();
|
|
|
|
var i: u32 = 0;
|
|
while (i < 16) : (i += 1) {
|
|
try map.putNoClobber(i, i);
|
|
}
|
|
|
|
i = 16;
|
|
while (i > 0) : (i -= 1) {
|
|
_ = map.remove(i - 1);
|
|
try expect(!map.contains(i - 1));
|
|
var j: u32 = 0;
|
|
while (j < i - 1) : (j += 1) {
|
|
try expectEqual(map.get(j).?, j);
|
|
}
|
|
}
|
|
|
|
try expectEqual(map.count(), 0);
|
|
}
|
|
|
|
test "std.hash_map multiple removes on same metadata" {
|
|
var map = AutoHashMap(u32, u32).init(std.testing.allocator);
|
|
defer map.deinit();
|
|
|
|
var i: u32 = 0;
|
|
while (i < 16) : (i += 1) {
|
|
try map.put(i, i);
|
|
}
|
|
|
|
_ = map.remove(7);
|
|
_ = map.remove(15);
|
|
_ = map.remove(14);
|
|
_ = map.remove(13);
|
|
try expect(!map.contains(7));
|
|
try expect(!map.contains(15));
|
|
try expect(!map.contains(14));
|
|
try expect(!map.contains(13));
|
|
|
|
i = 0;
|
|
while (i < 13) : (i += 1) {
|
|
if (i == 7) {
|
|
try expect(!map.contains(i));
|
|
} else {
|
|
try expectEqual(map.get(i).?, i);
|
|
}
|
|
}
|
|
|
|
try map.put(15, 15);
|
|
try map.put(13, 13);
|
|
try map.put(14, 14);
|
|
try map.put(7, 7);
|
|
i = 0;
|
|
while (i < 16) : (i += 1) {
|
|
try expectEqual(map.get(i).?, i);
|
|
}
|
|
}
|
|
|
|
test "std.hash_map put and remove loop in random order" {
|
|
var map = AutoHashMap(u32, u32).init(std.testing.allocator);
|
|
defer map.deinit();
|
|
|
|
var keys = std.ArrayList(u32).init(std.testing.allocator);
|
|
defer keys.deinit();
|
|
|
|
const size = 32;
|
|
const iterations = 100;
|
|
|
|
var i: u32 = 0;
|
|
while (i < size) : (i += 1) {
|
|
try keys.append(i);
|
|
}
|
|
var rng = std.rand.DefaultPrng.init(0);
|
|
|
|
while (i < iterations) : (i += 1) {
|
|
std.rand.Random.shuffle(&rng.random, u32, keys.items);
|
|
|
|
for (keys.items) |key| {
|
|
try map.put(key, key);
|
|
}
|
|
try expectEqual(map.count(), size);
|
|
|
|
for (keys.items) |key| {
|
|
_ = map.remove(key);
|
|
}
|
|
try expectEqual(map.count(), 0);
|
|
}
|
|
}
|
|
|
|
test "std.hash_map remove one million elements in random order" {
|
|
const Map = AutoHashMap(u32, u32);
|
|
const n = 1000 * 1000;
|
|
var map = Map.init(std.heap.page_allocator);
|
|
defer map.deinit();
|
|
|
|
var keys = std.ArrayList(u32).init(std.heap.page_allocator);
|
|
defer keys.deinit();
|
|
|
|
var i: u32 = 0;
|
|
while (i < n) : (i += 1) {
|
|
keys.append(i) catch unreachable;
|
|
}
|
|
|
|
var rng = std.rand.DefaultPrng.init(0);
|
|
std.rand.Random.shuffle(&rng.random, u32, keys.items);
|
|
|
|
for (keys.items) |key| {
|
|
map.put(key, key) catch unreachable;
|
|
}
|
|
|
|
std.rand.Random.shuffle(&rng.random, u32, keys.items);
|
|
i = 0;
|
|
while (i < n) : (i += 1) {
|
|
const key = keys.items[i];
|
|
_ = map.remove(key);
|
|
}
|
|
}
|
|
|
|
test "std.hash_map put" {
|
|
var map = AutoHashMap(u32, u32).init(std.testing.allocator);
|
|
defer map.deinit();
|
|
|
|
var i: u32 = 0;
|
|
while (i < 16) : (i += 1) {
|
|
try map.put(i, i);
|
|
}
|
|
|
|
i = 0;
|
|
while (i < 16) : (i += 1) {
|
|
try expectEqual(map.get(i).?, i);
|
|
}
|
|
|
|
i = 0;
|
|
while (i < 16) : (i += 1) {
|
|
try map.put(i, i * 16 + 1);
|
|
}
|
|
|
|
i = 0;
|
|
while (i < 16) : (i += 1) {
|
|
try expectEqual(map.get(i).?, i * 16 + 1);
|
|
}
|
|
}
|
|
|
|
test "std.hash_map putAssumeCapacity" {
|
|
var map = AutoHashMap(u32, u32).init(std.testing.allocator);
|
|
defer map.deinit();
|
|
|
|
try map.ensureCapacity(20);
|
|
var i: u32 = 0;
|
|
while (i < 20) : (i += 1) {
|
|
map.putAssumeCapacityNoClobber(i, i);
|
|
}
|
|
|
|
i = 0;
|
|
var sum = i;
|
|
while (i < 20) : (i += 1) {
|
|
sum += map.get(i).?;
|
|
}
|
|
try expectEqual(sum, 190);
|
|
|
|
i = 0;
|
|
while (i < 20) : (i += 1) {
|
|
map.putAssumeCapacity(i, 1);
|
|
}
|
|
|
|
i = 0;
|
|
sum = i;
|
|
while (i < 20) : (i += 1) {
|
|
sum += map.get(i).?;
|
|
}
|
|
try expectEqual(sum, 20);
|
|
}
|
|
|
|
test "std.hash_map getOrPut" {
|
|
var map = AutoHashMap(u32, u32).init(std.testing.allocator);
|
|
defer map.deinit();
|
|
|
|
var i: u32 = 0;
|
|
while (i < 10) : (i += 1) {
|
|
try map.put(i * 2, 2);
|
|
}
|
|
|
|
i = 0;
|
|
while (i < 20) : (i += 1) {
|
|
var n = try map.getOrPutValue(i, 1);
|
|
}
|
|
|
|
i = 0;
|
|
var sum = i;
|
|
while (i < 20) : (i += 1) {
|
|
sum += map.get(i).?;
|
|
}
|
|
|
|
try expectEqual(sum, 30);
|
|
}
|
|
|
|
test "std.hash_map basic hash map usage" {
|
|
var map = AutoHashMap(i32, i32).init(std.testing.allocator);
|
|
defer map.deinit();
|
|
|
|
try testing.expect((try map.fetchPut(1, 11)) == null);
|
|
try testing.expect((try map.fetchPut(2, 22)) == null);
|
|
try testing.expect((try map.fetchPut(3, 33)) == null);
|
|
try testing.expect((try map.fetchPut(4, 44)) == null);
|
|
|
|
try map.putNoClobber(5, 55);
|
|
try testing.expect((try map.fetchPut(5, 66)).?.value == 55);
|
|
try testing.expect((try map.fetchPut(5, 55)).?.value == 66);
|
|
|
|
const gop1 = try map.getOrPut(5);
|
|
try testing.expect(gop1.found_existing == true);
|
|
try testing.expect(gop1.entry.value == 55);
|
|
gop1.entry.value = 77;
|
|
try testing.expect(map.getEntry(5).?.value == 77);
|
|
|
|
const gop2 = try map.getOrPut(99);
|
|
try testing.expect(gop2.found_existing == false);
|
|
gop2.entry.value = 42;
|
|
try testing.expect(map.getEntry(99).?.value == 42);
|
|
|
|
const gop3 = try map.getOrPutValue(5, 5);
|
|
try testing.expect(gop3.value == 77);
|
|
|
|
const gop4 = try map.getOrPutValue(100, 41);
|
|
try testing.expect(gop4.value == 41);
|
|
|
|
try testing.expect(map.contains(2));
|
|
try testing.expect(map.getEntry(2).?.value == 22);
|
|
try testing.expect(map.get(2).? == 22);
|
|
|
|
const rmv1 = map.remove(2);
|
|
try testing.expect(rmv1.?.key == 2);
|
|
try testing.expect(rmv1.?.value == 22);
|
|
try testing.expect(map.remove(2) == null);
|
|
try testing.expect(map.getEntry(2) == null);
|
|
try testing.expect(map.get(2) == null);
|
|
|
|
map.removeAssertDiscard(3);
|
|
}
|
|
|
|
test "std.hash_map clone" {
|
|
var original = AutoHashMap(i32, i32).init(std.testing.allocator);
|
|
defer original.deinit();
|
|
|
|
var i: u8 = 0;
|
|
while (i < 10) : (i += 1) {
|
|
try original.putNoClobber(i, i * 10);
|
|
}
|
|
|
|
var copy = try original.clone();
|
|
defer copy.deinit();
|
|
|
|
i = 0;
|
|
while (i < 10) : (i += 1) {
|
|
try testing.expect(copy.get(i).? == i * 10);
|
|
}
|
|
}
|