zig/lib/std/array_hash_map.zig
Andrew Kelley c9cc09a3bf Merge remote-tracking branch 'origin/master' into stage2-whole-file-astgen
Conflicts:
 * lib/std/os/linux.zig
 * lib/std/os/windows/bits.zig
 * src/Module.zig
 * src/Sema.zig
 * test/stage2/test.zig

Mainly I wanted Jakub's new macOS code for respecting stack size, since
we now depend on it for debug builds able to pass one of the test cases
for recursive comptime function calls with `@setEvalBranchQuota`.

The conflicts were all trivial.
2021-05-12 16:41:20 -07:00

1442 lines
58 KiB
Zig

// SPDX-License-Identifier: MIT
// Copyright (c) 2015-2021 Zig Contributors
// This file is part of [zig](https://ziglang.org/), which is MIT licensed.
// The MIT license requires this copyright notice to be included in all copies
// and substantial portions of the software.
const std = @import("std.zig");
const debug = std.debug;
const assert = debug.assert;
const testing = std.testing;
const math = std.math;
const mem = std.mem;
const meta = std.meta;
const trait = meta.trait;
const autoHash = std.hash.autoHash;
const Wyhash = std.hash.Wyhash;
const Allocator = mem.Allocator;
const builtin = std.builtin;
const hash_map = @This();
pub fn AutoArrayHashMap(comptime K: type, comptime V: type) type {
return ArrayHashMap(K, V, getAutoHashFn(K), getAutoEqlFn(K), !autoEqlIsCheap(K));
}
pub fn AutoArrayHashMapUnmanaged(comptime K: type, comptime V: type) type {
return ArrayHashMapUnmanaged(K, V, getAutoHashFn(K), getAutoEqlFn(K), !autoEqlIsCheap(K));
}
/// Builtin hashmap for strings as keys.
pub fn StringArrayHashMap(comptime V: type) type {
return ArrayHashMap([]const u8, V, hashString, eqlString, true);
}
pub fn StringArrayHashMapUnmanaged(comptime V: type) type {
return ArrayHashMapUnmanaged([]const u8, V, hashString, eqlString, true);
}
pub fn eqlString(a: []const u8, b: []const u8) bool {
return mem.eql(u8, a, b);
}
pub fn hashString(s: []const u8) u32 {
return @truncate(u32, std.hash.Wyhash.hash(0, s));
}
/// Insertion order is preserved.
/// Deletions perform a "swap removal" on the entries list.
/// Modifying the hash map while iterating is allowed, however one must understand
/// the (well defined) behavior when mixing insertions and deletions with iteration.
/// For a hash map that can be initialized directly that does not store an Allocator
/// field, see `ArrayHashMapUnmanaged`.
/// When `store_hash` is `false`, this data structure is biased towards cheap `eql`
/// functions. It does not store each item's hash in the table. Setting `store_hash`
/// to `true` incurs slightly more memory cost by storing each key's hash in the table
/// but only has to call `eql` for hash collisions.
/// If typical operations (except iteration over entries) need to be faster, prefer
/// the alternative `std.HashMap`.
pub fn ArrayHashMap(
comptime K: type,
comptime V: type,
comptime hash: fn (key: K) u32,
comptime eql: fn (a: K, b: K) bool,
comptime store_hash: bool,
) type {
return struct {
unmanaged: Unmanaged,
allocator: *Allocator,
pub const Unmanaged = ArrayHashMapUnmanaged(K, V, hash, eql, store_hash);
pub const Entry = Unmanaged.Entry;
pub const Hash = Unmanaged.Hash;
pub const GetOrPutResult = Unmanaged.GetOrPutResult;
/// Deprecated. Iterate using `items`.
pub const Iterator = struct {
hm: *const Self,
/// Iterator through the entry array.
index: usize,
pub fn next(it: *Iterator) ?*Entry {
if (it.index >= it.hm.unmanaged.entries.items.len) return null;
const result = &it.hm.unmanaged.entries.items[it.index];
it.index += 1;
return result;
}
/// Reset the iterator to the initial index
pub fn reset(it: *Iterator) void {
it.index = 0;
}
};
const Self = @This();
const Index = Unmanaged.Index;
pub fn init(allocator: *Allocator) Self {
return .{
.unmanaged = .{},
.allocator = allocator,
};
}
/// `ArrayHashMap` takes ownership of the passed in array list. The array list must have
/// been allocated with `allocator`.
/// Deinitialize with `deinit`.
pub fn fromOwnedArrayList(allocator: *Allocator, entries: std.ArrayListUnmanaged(Entry)) !Self {
return Self{
.unmanaged = try Unmanaged.fromOwnedArrayList(allocator, entries),
.allocator = allocator,
};
}
pub fn deinit(self: *Self) void {
self.unmanaged.deinit(self.allocator);
self.* = undefined;
}
pub fn clearRetainingCapacity(self: *Self) void {
return self.unmanaged.clearRetainingCapacity();
}
pub fn clearAndFree(self: *Self) void {
return self.unmanaged.clearAndFree(self.allocator);
}
pub fn count(self: Self) usize {
return self.unmanaged.count();
}
pub fn iterator(self: *const Self) Iterator {
return Iterator{
.hm = self,
.index = 0,
};
}
/// If key exists this function cannot fail.
/// If there is an existing item with `key`, then the result
/// `Entry` pointer points to it, and found_existing is true.
/// Otherwise, puts a new item with undefined value, and
/// the `Entry` pointer points to it. Caller should then initialize
/// the value (but not the key).
pub fn getOrPut(self: *Self, key: K) !GetOrPutResult {
return self.unmanaged.getOrPut(self.allocator, key);
}
/// If there is an existing item with `key`, then the result
/// `Entry` pointer points to it, and found_existing is true.
/// Otherwise, puts a new item with undefined value, and
/// the `Entry` pointer points to it. Caller should then initialize
/// the value (but not the key).
/// If a new entry needs to be stored, this function asserts there
/// is enough capacity to store it.
pub fn getOrPutAssumeCapacity(self: *Self, key: K) GetOrPutResult {
return self.unmanaged.getOrPutAssumeCapacity(key);
}
pub fn getOrPutValue(self: *Self, key: K, value: V) !*Entry {
return self.unmanaged.getOrPutValue(self.allocator, key, value);
}
/// Deprecated: call `ensureUnusedCapacity` or `ensureTotalCapacity`.
pub const ensureCapacity = ensureTotalCapacity;
/// Increases capacity, guaranteeing that insertions up until the
/// `expected_count` will not cause an allocation, and therefore cannot fail.
pub fn ensureTotalCapacity(self: *Self, new_capacity: usize) !void {
return self.unmanaged.ensureTotalCapacity(self.allocator, new_capacity);
}
/// Increases capacity, guaranteeing that insertions up until
/// `additional_count` **more** items will not cause an allocation, and
/// therefore cannot fail.
pub fn ensureUnusedCapacity(self: *Self, additional_count: usize) !void {
return self.unmanaged.ensureUnusedCapacity(self.allocator, additional_count);
}
/// Returns the number of total elements which may be present before it is
/// no longer guaranteed that no allocations will be performed.
pub fn capacity(self: *Self) usize {
return self.unmanaged.capacity();
}
/// Clobbers any existing data. To detect if a put would clobber
/// existing data, see `getOrPut`.
pub fn put(self: *Self, key: K, value: V) !void {
return self.unmanaged.put(self.allocator, key, value);
}
/// Inserts a key-value pair into the hash map, asserting that no previous
/// entry with the same key is already present
pub fn putNoClobber(self: *Self, key: K, value: V) !void {
return self.unmanaged.putNoClobber(self.allocator, key, value);
}
/// Asserts there is enough capacity to store the new key-value pair.
/// Clobbers any existing data. To detect if a put would clobber
/// existing data, see `getOrPutAssumeCapacity`.
pub fn putAssumeCapacity(self: *Self, key: K, value: V) void {
return self.unmanaged.putAssumeCapacity(key, value);
}
/// Asserts there is enough capacity to store the new key-value pair.
/// Asserts that it does not clobber any existing data.
/// To detect if a put would clobber existing data, see `getOrPutAssumeCapacity`.
pub fn putAssumeCapacityNoClobber(self: *Self, key: K, value: V) void {
return self.unmanaged.putAssumeCapacityNoClobber(key, value);
}
/// Inserts a new `Entry` into the hash map, returning the previous one, if any.
pub fn fetchPut(self: *Self, key: K, value: V) !?Entry {
return self.unmanaged.fetchPut(self.allocator, key, value);
}
/// Inserts a new `Entry` into the hash map, returning the previous one, if any.
/// If insertion happuns, asserts there is enough capacity without allocating.
pub fn fetchPutAssumeCapacity(self: *Self, key: K, value: V) ?Entry {
return self.unmanaged.fetchPutAssumeCapacity(key, value);
}
pub fn getEntry(self: Self, key: K) ?*Entry {
return self.unmanaged.getEntry(key);
}
pub fn getIndex(self: Self, key: K) ?usize {
return self.unmanaged.getIndex(key);
}
pub fn get(self: Self, key: K) ?V {
return self.unmanaged.get(key);
}
pub fn contains(self: Self, key: K) bool {
return self.unmanaged.contains(key);
}
/// If there is an `Entry` with a matching key, it is deleted from
/// the hash map, and then returned from this function. The entry is
/// removed from the underlying array by swapping it with the last
/// element.
pub fn swapRemove(self: *Self, key: K) ?Entry {
return self.unmanaged.swapRemove(key);
}
/// If there is an `Entry` with a matching key, it is deleted from
/// the hash map, and then returned from this function. The entry is
/// removed from the underlying array by shifting all elements forward
/// thereby maintaining the current ordering.
pub fn orderedRemove(self: *Self, key: K) ?Entry {
return self.unmanaged.orderedRemove(key);
}
/// TODO: deprecated: call swapRemoveAssertDiscard instead.
pub fn removeAssertDiscard(self: *Self, key: K) void {
return self.unmanaged.removeAssertDiscard(key);
}
/// Asserts there is an `Entry` with matching key, deletes it from the hash map
/// by swapping it with the last element, and discards it.
pub fn swapRemoveAssertDiscard(self: *Self, key: K) void {
return self.unmanaged.swapRemoveAssertDiscard(key);
}
/// Asserts there is an `Entry` with matching key, deletes it from the hash map
/// by by shifting all elements forward thereby maintaining the current ordering.
pub fn orderedRemoveAssertDiscard(self: *Self, key: K) void {
return self.unmanaged.orderedRemoveAssertDiscard(key);
}
pub fn items(self: Self) []Entry {
return self.unmanaged.items();
}
pub fn clone(self: Self) !Self {
var other = try self.unmanaged.clone(self.allocator);
return other.promote(self.allocator);
}
/// Rebuilds the key indexes. If the underlying entries has been modified directly, users
/// can call `reIndex` to update the indexes to account for these new entries.
pub fn reIndex(self: *Self) !void {
return self.unmanaged.reIndex(self.allocator);
}
/// Shrinks the underlying `Entry` array to `new_len` elements and discards any associated
/// index entries. Keeps capacity the same.
pub fn shrinkRetainingCapacity(self: *Self, new_len: usize) void {
return self.unmanaged.shrinkRetainingCapacity(new_len);
}
/// Shrinks the underlying `Entry` array to `new_len` elements and discards any associated
/// index entries. Reduces allocated capacity.
pub fn shrinkAndFree(self: *Self, new_len: usize) void {
return self.unmanaged.shrinkAndFree(self.allocator, new_len);
}
/// Removes the last inserted `Entry` in the hash map and returns it.
pub fn pop(self: *Self) Entry {
return self.unmanaged.pop();
}
};
}
/// General purpose hash table.
/// Insertion order is preserved.
/// Deletions perform a "swap removal" on the entries list.
/// Modifying the hash map while iterating is allowed, however one must understand
/// the (well defined) behavior when mixing insertions and deletions with iteration.
/// This type does not store an Allocator field - the Allocator must be passed in
/// with each function call that requires it. See `ArrayHashMap` for a type that stores
/// an Allocator field for convenience.
/// Can be initialized directly using the default field values.
/// This type is designed to have low overhead for small numbers of entries. When
/// `store_hash` is `false` and the number of entries in the map is less than 9,
/// the overhead cost of using `ArrayHashMapUnmanaged` rather than `std.ArrayList` is
/// only a single pointer-sized integer.
/// When `store_hash` is `false`, this data structure is biased towards cheap `eql`
/// functions. It does not store each item's hash in the table. Setting `store_hash`
/// to `true` incurs slightly more memory cost by storing each key's hash in the table
/// but guarantees only one call to `eql` per insertion/deletion.
pub fn ArrayHashMapUnmanaged(
comptime K: type,
comptime V: type,
comptime hash: fn (key: K) u32,
comptime eql: fn (a: K, b: K) bool,
comptime store_hash: bool,
) type {
return struct {
/// It is permitted to access this field directly.
entries: std.ArrayListUnmanaged(Entry) = .{},
/// When entries length is less than `linear_scan_max`, this remains `null`.
/// Once entries length grows big enough, this field is allocated. There is
/// an IndexHeader followed by an array of Index(I) structs, where I is defined
/// by how many total indexes there are.
index_header: ?*IndexHeader = null,
/// Modifying the key is illegal behavior.
/// Modifying the value is allowed.
/// Entry pointers become invalid whenever this ArrayHashMap is modified,
/// unless `ensureCapacity` was previously used.
pub const Entry = struct {
/// This field is `void` if `store_hash` is `false`.
hash: Hash,
key: K,
value: V,
};
pub const Hash = if (store_hash) u32 else void;
pub const GetOrPutResult = struct {
entry: *Entry,
found_existing: bool,
index: usize,
};
pub const Managed = ArrayHashMap(K, V, hash, eql, store_hash);
const Self = @This();
const linear_scan_max = 8;
const RemovalType = enum {
swap,
ordered,
index_only,
};
pub fn promote(self: Self, allocator: *Allocator) Managed {
return .{
.unmanaged = self,
.allocator = allocator,
};
}
/// `ArrayHashMapUnmanaged` takes ownership of the passed in array list. The array list must
/// have been allocated with `allocator`.
/// Deinitialize with `deinit`.
pub fn fromOwnedArrayList(allocator: *Allocator, entries: std.ArrayListUnmanaged(Entry)) !Self {
var array_hash_map = Self{ .entries = entries };
try array_hash_map.reIndex(allocator);
return array_hash_map;
}
pub fn deinit(self: *Self, allocator: *Allocator) void {
self.entries.deinit(allocator);
if (self.index_header) |header| {
header.free(allocator);
}
self.* = undefined;
}
pub fn clearRetainingCapacity(self: *Self) void {
self.entries.items.len = 0;
if (self.index_header) |header| {
header.max_distance_from_start_index = 0;
switch (header.capacityIndexType()) {
.u8 => mem.set(Index(u8), header.indexes(u8), Index(u8).empty),
.u16 => mem.set(Index(u16), header.indexes(u16), Index(u16).empty),
.u32 => mem.set(Index(u32), header.indexes(u32), Index(u32).empty),
.usize => mem.set(Index(usize), header.indexes(usize), Index(usize).empty),
}
}
}
pub fn clearAndFree(self: *Self, allocator: *Allocator) void {
self.entries.shrinkAndFree(allocator, 0);
if (self.index_header) |header| {
header.free(allocator);
self.index_header = null;
}
}
pub fn count(self: Self) usize {
return self.entries.items.len;
}
/// If key exists this function cannot fail.
/// If there is an existing item with `key`, then the result
/// `Entry` pointer points to it, and found_existing is true.
/// Otherwise, puts a new item with undefined value, and
/// the `Entry` pointer points to it. Caller should then initialize
/// the value (but not the key).
pub fn getOrPut(self: *Self, allocator: *Allocator, key: K) !GetOrPutResult {
self.ensureCapacity(allocator, self.entries.items.len + 1) catch |err| {
// "If key exists this function cannot fail."
const index = self.getIndex(key) orelse return err;
return GetOrPutResult{
.entry = &self.entries.items[index],
.found_existing = true,
.index = index,
};
};
return self.getOrPutAssumeCapacity(key);
}
/// If there is an existing item with `key`, then the result
/// `Entry` pointer points to it, and found_existing is true.
/// Otherwise, puts a new item with undefined value, and
/// the `Entry` pointer points to it. Caller should then initialize
/// the value (but not the key).
/// If a new entry needs to be stored, this function asserts there
/// is enough capacity to store it.
pub fn getOrPutAssumeCapacity(self: *Self, key: K) GetOrPutResult {
const header = self.index_header orelse {
// Linear scan.
const h = if (store_hash) hash(key) else {};
for (self.entries.items) |*item, i| {
if (item.hash == h and eql(key, item.key)) {
return GetOrPutResult{
.entry = item,
.found_existing = true,
.index = i,
};
}
}
const new_entry = self.entries.addOneAssumeCapacity();
new_entry.* = .{
.hash = if (store_hash) h else {},
.key = key,
.value = undefined,
};
return GetOrPutResult{
.entry = new_entry,
.found_existing = false,
.index = self.entries.items.len - 1,
};
};
switch (header.capacityIndexType()) {
.u8 => return self.getOrPutInternal(key, header, u8),
.u16 => return self.getOrPutInternal(key, header, u16),
.u32 => return self.getOrPutInternal(key, header, u32),
.usize => return self.getOrPutInternal(key, header, usize),
}
}
pub fn getOrPutValue(self: *Self, allocator: *Allocator, key: K, value: V) !*Entry {
const res = try self.getOrPut(allocator, key);
if (!res.found_existing)
res.entry.value = value;
return res.entry;
}
/// Deprecated: call `ensureUnusedCapacity` or `ensureTotalCapacity`.
pub const ensureCapacity = ensureTotalCapacity;
/// Increases capacity, guaranteeing that insertions up until the
/// `expected_count` will not cause an allocation, and therefore cannot fail.
pub fn ensureTotalCapacity(self: *Self, allocator: *Allocator, new_capacity: usize) !void {
try self.entries.ensureTotalCapacity(allocator, new_capacity);
if (new_capacity <= linear_scan_max) return;
// Ensure that the indexes will be at most 60% full if
// `new_capacity` items are put into it.
const needed_len = new_capacity * 5 / 3;
if (self.index_header) |header| {
if (needed_len > header.indexes_len) {
// An overflow here would mean the amount of memory required would not
// be representable in the address space.
const new_indexes_len = math.ceilPowerOfTwo(usize, needed_len) catch unreachable;
const new_header = try IndexHeader.alloc(allocator, new_indexes_len);
self.insertAllEntriesIntoNewHeader(new_header);
header.free(allocator);
self.index_header = new_header;
}
} else {
// An overflow here would mean the amount of memory required would not
// be representable in the address space.
const new_indexes_len = math.ceilPowerOfTwo(usize, needed_len) catch unreachable;
const header = try IndexHeader.alloc(allocator, new_indexes_len);
self.insertAllEntriesIntoNewHeader(header);
self.index_header = header;
}
}
/// Increases capacity, guaranteeing that insertions up until
/// `additional_count` **more** items will not cause an allocation, and
/// therefore cannot fail.
pub fn ensureUnusedCapacity(
self: *Self,
allocator: *Allocator,
additional_capacity: usize,
) !void {
return self.ensureTotalCapacity(allocator, self.count() + additional_capacity);
}
/// Returns the number of total elements which may be present before it is
/// no longer guaranteed that no allocations will be performed.
pub fn capacity(self: Self) usize {
const entry_cap = self.entries.capacity;
const header = self.index_header orelse return math.min(linear_scan_max, entry_cap);
const indexes_cap = (header.indexes_len + 1) * 3 / 4;
return math.min(entry_cap, indexes_cap);
}
/// Clobbers any existing data. To detect if a put would clobber
/// existing data, see `getOrPut`.
pub fn put(self: *Self, allocator: *Allocator, key: K, value: V) !void {
const result = try self.getOrPut(allocator, key);
result.entry.value = value;
}
/// Inserts a key-value pair into the hash map, asserting that no previous
/// entry with the same key is already present
pub fn putNoClobber(self: *Self, allocator: *Allocator, key: K, value: V) !void {
const result = try self.getOrPut(allocator, key);
assert(!result.found_existing);
result.entry.value = value;
}
/// Asserts there is enough capacity to store the new key-value pair.
/// Clobbers any existing data. To detect if a put would clobber
/// existing data, see `getOrPutAssumeCapacity`.
pub fn putAssumeCapacity(self: *Self, key: K, value: V) void {
const result = self.getOrPutAssumeCapacity(key);
result.entry.value = value;
}
/// Asserts there is enough capacity to store the new key-value pair.
/// Asserts that it does not clobber any existing data.
/// To detect if a put would clobber existing data, see `getOrPutAssumeCapacity`.
pub fn putAssumeCapacityNoClobber(self: *Self, key: K, value: V) void {
const result = self.getOrPutAssumeCapacity(key);
assert(!result.found_existing);
result.entry.value = value;
}
/// Inserts a new `Entry` into the hash map, returning the previous one, if any.
pub fn fetchPut(self: *Self, allocator: *Allocator, key: K, value: V) !?Entry {
const gop = try self.getOrPut(allocator, key);
var result: ?Entry = null;
if (gop.found_existing) {
result = gop.entry.*;
}
gop.entry.value = value;
return result;
}
/// Inserts a new `Entry` into the hash map, returning the previous one, if any.
/// If insertion happens, asserts there is enough capacity without allocating.
pub fn fetchPutAssumeCapacity(self: *Self, key: K, value: V) ?Entry {
const gop = self.getOrPutAssumeCapacity(key);
var result: ?Entry = null;
if (gop.found_existing) {
result = gop.entry.*;
}
gop.entry.value = value;
return result;
}
pub fn getEntry(self: Self, key: K) ?*Entry {
const index = self.getIndex(key) orelse return null;
return &self.entries.items[index];
}
pub fn getIndex(self: Self, key: K) ?usize {
const header = self.index_header orelse {
// Linear scan.
const h = if (store_hash) hash(key) else {};
for (self.entries.items) |*item, i| {
if (item.hash == h and eql(key, item.key)) {
return i;
}
}
return null;
};
switch (header.capacityIndexType()) {
.u8 => return self.getInternal(key, header, u8),
.u16 => return self.getInternal(key, header, u16),
.u32 => return self.getInternal(key, header, u32),
.usize => return self.getInternal(key, header, usize),
}
}
pub fn get(self: Self, key: K) ?V {
return if (self.getEntry(key)) |entry| entry.value else null;
}
pub fn contains(self: Self, key: K) bool {
return self.getEntry(key) != null;
}
/// If there is an `Entry` with a matching key, it is deleted from
/// the hash map, and then returned from this function. The entry is
/// removed from the underlying array by swapping it with the last
/// element.
pub fn swapRemove(self: *Self, key: K) ?Entry {
return self.removeInternal(key, .swap);
}
/// If there is an `Entry` with a matching key, it is deleted from
/// the hash map, and then returned from this function. The entry is
/// removed from the underlying array by shifting all elements forward
/// thereby maintaining the current ordering.
pub fn orderedRemove(self: *Self, key: K) ?Entry {
return self.removeInternal(key, .ordered);
}
/// TODO deprecated: call swapRemoveAssertDiscard instead.
pub fn removeAssertDiscard(self: *Self, key: K) void {
return self.swapRemoveAssertDiscard(key);
}
/// Asserts there is an `Entry` with matching key, deletes it from the hash map
/// by swapping it with the last element, and discards it.
pub fn swapRemoveAssertDiscard(self: *Self, key: K) void {
assert(self.swapRemove(key) != null);
}
/// Asserts there is an `Entry` with matching key, deletes it from the hash map
/// by by shifting all elements forward thereby maintaining the current ordering.
pub fn orderedRemoveAssertDiscard(self: *Self, key: K) void {
assert(self.orderedRemove(key) != null);
}
pub fn items(self: Self) []Entry {
return self.entries.items;
}
pub fn clone(self: Self, allocator: *Allocator) !Self {
var other: Self = .{};
try other.entries.appendSlice(allocator, self.entries.items);
if (self.index_header) |header| {
const new_header = try IndexHeader.alloc(allocator, header.indexes_len);
other.insertAllEntriesIntoNewHeader(new_header);
other.index_header = new_header;
}
return other;
}
/// Rebuilds the key indexes. If the underlying entries has been modified directly, users
/// can call `reIndex` to update the indexes to account for these new entries.
pub fn reIndex(self: *Self, allocator: *Allocator) !void {
if (self.entries.capacity <= linear_scan_max) return;
// We're going to rebuild the index header and replace the existing one (if any). The
// indexes should sized such that they will be at most 60% full.
const needed_len = self.entries.capacity * 5 / 3;
const new_indexes_len = math.ceilPowerOfTwo(usize, needed_len) catch unreachable;
const new_header = try IndexHeader.alloc(allocator, new_indexes_len);
self.insertAllEntriesIntoNewHeader(new_header);
if (self.index_header) |header|
header.free(allocator);
self.index_header = new_header;
}
/// Shrinks the underlying `Entry` array to `new_len` elements and discards any associated
/// index entries. Keeps capacity the same.
pub fn shrinkRetainingCapacity(self: *Self, new_len: usize) void {
// Remove index entries from the new length onwards.
// Explicitly choose to ONLY remove index entries and not the underlying array list
// entries as we're going to remove them in the subsequent shrink call.
var i: usize = new_len;
while (i < self.entries.items.len) : (i += 1)
_ = self.removeWithHash(self.entries.items[i].key, self.entries.items[i].hash, .index_only);
self.entries.shrinkRetainingCapacity(new_len);
}
/// Shrinks the underlying `Entry` array to `new_len` elements and discards any associated
/// index entries. Reduces allocated capacity.
pub fn shrinkAndFree(self: *Self, allocator: *Allocator, new_len: usize) void {
// Remove index entries from the new length onwards.
// Explicitly choose to ONLY remove index entries and not the underlying array list
// entries as we're going to remove them in the subsequent shrink call.
var i: usize = new_len;
while (i < self.entries.items.len) : (i += 1)
_ = self.removeWithHash(self.entries.items[i].key, self.entries.items[i].hash, .index_only);
self.entries.shrinkAndFree(allocator, new_len);
}
/// Removes the last inserted `Entry` in the hash map and returns it.
pub fn pop(self: *Self) Entry {
const top = self.entries.items[self.entries.items.len - 1];
_ = self.removeWithHash(top.key, top.hash, .index_only);
self.entries.items.len -= 1;
return top;
}
fn removeInternal(self: *Self, key: K, comptime removal_type: RemovalType) ?Entry {
const key_hash = if (store_hash) hash(key) else {};
return self.removeWithHash(key, key_hash, removal_type);
}
fn removeWithHash(self: *Self, key: K, key_hash: Hash, comptime removal_type: RemovalType) ?Entry {
const header = self.index_header orelse {
// If we're only removing index entries and we have no index header, there's no need
// to continue.
if (removal_type == .index_only) return null;
// Linear scan.
for (self.entries.items) |item, i| {
if (item.hash == key_hash and eql(key, item.key)) {
switch (removal_type) {
.swap => return self.entries.swapRemove(i),
.ordered => return self.entries.orderedRemove(i),
.index_only => unreachable,
}
}
}
return null;
};
switch (header.capacityIndexType()) {
.u8 => return self.removeWithIndex(key, key_hash, header, u8, removal_type),
.u16 => return self.removeWithIndex(key, key_hash, header, u16, removal_type),
.u32 => return self.removeWithIndex(key, key_hash, header, u32, removal_type),
.usize => return self.removeWithIndex(key, key_hash, header, usize, removal_type),
}
}
fn removeWithIndex(self: *Self, key: K, key_hash: Hash, header: *IndexHeader, comptime I: type, comptime removal_type: RemovalType) ?Entry {
const indexes = header.indexes(I);
const h = if (store_hash) key_hash else hash(key);
const start_index = header.constrainIndex(h);
var roll_over: usize = 0;
while (roll_over <= header.max_distance_from_start_index) : (roll_over += 1) {
const index_index = header.constrainIndex(start_index + roll_over);
var index = &indexes[index_index];
if (index.isEmpty())
return null;
const entry = &self.entries.items[index.entry_index];
const hash_match = if (store_hash) h == entry.hash else true;
if (!hash_match or !eql(key, entry.key))
continue;
var removed_entry: ?Entry = undefined;
switch (removal_type) {
.swap => {
removed_entry = self.entries.swapRemove(index.entry_index);
if (self.entries.items.len > 0 and self.entries.items.len != index.entry_index) {
// Because of the swap remove, now we need to update the index that was
// pointing to the last entry and is now pointing to this removed item slot.
self.updateEntryIndex(header, self.entries.items.len, index.entry_index, I, indexes);
}
},
.ordered => {
removed_entry = self.entries.orderedRemove(index.entry_index);
var i: usize = index.entry_index;
while (i < self.entries.items.len) : (i += 1) {
// Because of the ordered remove, everything from the entry index onwards has
// been shifted forward so we'll need to update the index entries.
self.updateEntryIndex(header, i + 1, i, I, indexes);
}
},
.index_only => removed_entry = null,
}
// Now we have to shift over the following indexes.
roll_over += 1;
while (roll_over < header.indexes_len) : (roll_over += 1) {
const next_index_index = header.constrainIndex(start_index + roll_over);
const next_index = &indexes[next_index_index];
if (next_index.isEmpty() or next_index.distance_from_start_index == 0) {
index.setEmpty();
return removed_entry;
}
index.* = next_index.*;
index.distance_from_start_index -= 1;
index = next_index;
}
unreachable;
}
return null;
}
fn updateEntryIndex(
self: *Self,
header: *IndexHeader,
old_entry_index: usize,
new_entry_index: usize,
comptime I: type,
indexes: []Index(I),
) void {
const h = if (store_hash) self.entries.items[new_entry_index].hash else hash(self.entries.items[new_entry_index].key);
const start_index = header.constrainIndex(h);
var roll_over: usize = 0;
while (roll_over <= header.max_distance_from_start_index) : (roll_over += 1) {
const index_index = header.constrainIndex(start_index + roll_over);
const index = &indexes[index_index];
if (index.entry_index == old_entry_index) {
index.entry_index = @intCast(I, new_entry_index);
return;
}
}
unreachable;
}
/// Must ensureCapacity before calling this.
fn getOrPutInternal(self: *Self, key: K, header: *IndexHeader, comptime I: type) GetOrPutResult {
const indexes = header.indexes(I);
const h = hash(key);
const start_index = header.constrainIndex(h);
var roll_over: usize = 0;
var distance_from_start_index: usize = 0;
while (roll_over <= header.indexes_len) : ({
roll_over += 1;
distance_from_start_index += 1;
}) {
const index_index = header.constrainIndex(start_index + roll_over);
const index = indexes[index_index];
if (index.isEmpty()) {
indexes[index_index] = .{
.distance_from_start_index = @intCast(I, distance_from_start_index),
.entry_index = @intCast(I, self.entries.items.len),
};
header.maybeBumpMax(distance_from_start_index);
const new_entry = self.entries.addOneAssumeCapacity();
new_entry.* = .{
.hash = if (store_hash) h else {},
.key = key,
.value = undefined,
};
return .{
.found_existing = false,
.entry = new_entry,
.index = self.entries.items.len - 1,
};
}
// This pointer survives the following append because we call
// entries.ensureCapacity before getOrPutInternal.
const entry = &self.entries.items[index.entry_index];
const hash_match = if (store_hash) h == entry.hash else true;
if (hash_match and eql(key, entry.key)) {
return .{
.found_existing = true,
.entry = entry,
.index = index.entry_index,
};
}
if (index.distance_from_start_index < distance_from_start_index) {
// In this case, we did not find the item. We will put a new entry.
// However, we will use this index for the new entry, and move
// the previous index down the line, to keep the max_distance_from_start_index
// as small as possible.
indexes[index_index] = .{
.distance_from_start_index = @intCast(I, distance_from_start_index),
.entry_index = @intCast(I, self.entries.items.len),
};
header.maybeBumpMax(distance_from_start_index);
const new_entry = self.entries.addOneAssumeCapacity();
new_entry.* = .{
.hash = if (store_hash) h else {},
.key = key,
.value = undefined,
};
distance_from_start_index = index.distance_from_start_index;
var prev_entry_index = index.entry_index;
// Find somewhere to put the index we replaced by shifting
// following indexes backwards.
roll_over += 1;
distance_from_start_index += 1;
while (roll_over < header.indexes_len) : ({
roll_over += 1;
distance_from_start_index += 1;
}) {
const next_index_index = header.constrainIndex(start_index + roll_over);
const next_index = indexes[next_index_index];
if (next_index.isEmpty()) {
header.maybeBumpMax(distance_from_start_index);
indexes[next_index_index] = .{
.entry_index = prev_entry_index,
.distance_from_start_index = @intCast(I, distance_from_start_index),
};
return .{
.found_existing = false,
.entry = new_entry,
.index = self.entries.items.len - 1,
};
}
if (next_index.distance_from_start_index < distance_from_start_index) {
header.maybeBumpMax(distance_from_start_index);
indexes[next_index_index] = .{
.entry_index = prev_entry_index,
.distance_from_start_index = @intCast(I, distance_from_start_index),
};
distance_from_start_index = next_index.distance_from_start_index;
prev_entry_index = next_index.entry_index;
}
}
unreachable;
}
}
unreachable;
}
fn getInternal(self: Self, key: K, header: *IndexHeader, comptime I: type) ?usize {
const indexes = header.indexes(I);
const h = hash(key);
const start_index = header.constrainIndex(h);
var roll_over: usize = 0;
while (roll_over <= header.max_distance_from_start_index) : (roll_over += 1) {
const index_index = header.constrainIndex(start_index + roll_over);
const index = indexes[index_index];
if (index.isEmpty())
return null;
const entry = &self.entries.items[index.entry_index];
const hash_match = if (store_hash) h == entry.hash else true;
if (hash_match and eql(key, entry.key))
return index.entry_index;
}
return null;
}
fn insertAllEntriesIntoNewHeader(self: *Self, header: *IndexHeader) void {
switch (header.capacityIndexType()) {
.u8 => return self.insertAllEntriesIntoNewHeaderGeneric(header, u8),
.u16 => return self.insertAllEntriesIntoNewHeaderGeneric(header, u16),
.u32 => return self.insertAllEntriesIntoNewHeaderGeneric(header, u32),
.usize => return self.insertAllEntriesIntoNewHeaderGeneric(header, usize),
}
}
fn insertAllEntriesIntoNewHeaderGeneric(self: *Self, header: *IndexHeader, comptime I: type) void {
const indexes = header.indexes(I);
entry_loop: for (self.entries.items) |entry, i| {
const h = if (store_hash) entry.hash else hash(entry.key);
const start_index = header.constrainIndex(h);
var entry_index = i;
var roll_over: usize = 0;
var distance_from_start_index: usize = 0;
while (roll_over < header.indexes_len) : ({
roll_over += 1;
distance_from_start_index += 1;
}) {
const index_index = header.constrainIndex(start_index + roll_over);
const next_index = indexes[index_index];
if (next_index.isEmpty()) {
header.maybeBumpMax(distance_from_start_index);
indexes[index_index] = .{
.distance_from_start_index = @intCast(I, distance_from_start_index),
.entry_index = @intCast(I, entry_index),
};
continue :entry_loop;
}
if (next_index.distance_from_start_index < distance_from_start_index) {
header.maybeBumpMax(distance_from_start_index);
indexes[index_index] = .{
.distance_from_start_index = @intCast(I, distance_from_start_index),
.entry_index = @intCast(I, entry_index),
};
distance_from_start_index = next_index.distance_from_start_index;
entry_index = next_index.entry_index;
}
}
unreachable;
}
}
};
}
const CapacityIndexType = enum { u8, u16, u32, usize };
fn capacityIndexType(indexes_len: usize) CapacityIndexType {
if (indexes_len < math.maxInt(u8))
return .u8;
if (indexes_len < math.maxInt(u16))
return .u16;
if (indexes_len < math.maxInt(u32))
return .u32;
return .usize;
}
fn capacityIndexSize(indexes_len: usize) usize {
switch (capacityIndexType(indexes_len)) {
.u8 => return @sizeOf(Index(u8)),
.u16 => return @sizeOf(Index(u16)),
.u32 => return @sizeOf(Index(u32)),
.usize => return @sizeOf(Index(usize)),
}
}
fn Index(comptime I: type) type {
return extern struct {
entry_index: I,
distance_from_start_index: I,
const Self = @This();
const empty = Self{
.entry_index = math.maxInt(I),
.distance_from_start_index = undefined,
};
fn isEmpty(idx: Self) bool {
return idx.entry_index == math.maxInt(I);
}
fn setEmpty(idx: *Self) void {
idx.entry_index = math.maxInt(I);
}
};
}
/// This struct is trailed by an array of `Index(I)`, where `I`
/// and the array length are determined by `indexes_len`.
const IndexHeader = struct {
max_distance_from_start_index: usize,
indexes_len: usize,
fn constrainIndex(header: IndexHeader, i: usize) usize {
// This is an optimization for modulo of power of two integers;
// it requires `indexes_len` to always be a power of two.
return i & (header.indexes_len - 1);
}
fn indexes(header: *IndexHeader, comptime I: type) []Index(I) {
const start = @ptrCast([*]Index(I), @ptrCast([*]u8, header) + @sizeOf(IndexHeader));
return start[0..header.indexes_len];
}
fn capacityIndexType(header: IndexHeader) CapacityIndexType {
return hash_map.capacityIndexType(header.indexes_len);
}
fn maybeBumpMax(header: *IndexHeader, distance_from_start_index: usize) void {
if (distance_from_start_index > header.max_distance_from_start_index) {
header.max_distance_from_start_index = distance_from_start_index;
}
}
fn alloc(allocator: *Allocator, len: usize) !*IndexHeader {
const index_size = hash_map.capacityIndexSize(len);
const nbytes = @sizeOf(IndexHeader) + index_size * len;
const bytes = try allocator.allocAdvanced(u8, @alignOf(IndexHeader), nbytes, .exact);
@memset(bytes.ptr + @sizeOf(IndexHeader), 0xff, bytes.len - @sizeOf(IndexHeader));
const result = @ptrCast(*IndexHeader, bytes.ptr);
result.* = .{
.max_distance_from_start_index = 0,
.indexes_len = len,
};
return result;
}
fn free(header: *IndexHeader, allocator: *Allocator) void {
const index_size = hash_map.capacityIndexSize(header.indexes_len);
const ptr = @ptrCast([*]u8, header);
const slice = ptr[0 .. @sizeOf(IndexHeader) + header.indexes_len * index_size];
allocator.free(slice);
}
};
test "basic hash map usage" {
var map = AutoArrayHashMap(i32, i32).init(std.testing.allocator);
defer map.deinit();
try testing.expect((try map.fetchPut(1, 11)) == null);
try testing.expect((try map.fetchPut(2, 22)) == null);
try testing.expect((try map.fetchPut(3, 33)) == null);
try testing.expect((try map.fetchPut(4, 44)) == null);
try map.putNoClobber(5, 55);
try testing.expect((try map.fetchPut(5, 66)).?.value == 55);
try testing.expect((try map.fetchPut(5, 55)).?.value == 66);
const gop1 = try map.getOrPut(5);
try testing.expect(gop1.found_existing == true);
try testing.expect(gop1.entry.value == 55);
try testing.expect(gop1.index == 4);
gop1.entry.value = 77;
try testing.expect(map.getEntry(5).?.value == 77);
const gop2 = try map.getOrPut(99);
try testing.expect(gop2.found_existing == false);
try testing.expect(gop2.index == 5);
gop2.entry.value = 42;
try testing.expect(map.getEntry(99).?.value == 42);
const gop3 = try map.getOrPutValue(5, 5);
try testing.expect(gop3.value == 77);
const gop4 = try map.getOrPutValue(100, 41);
try testing.expect(gop4.value == 41);
try testing.expect(map.contains(2));
try testing.expect(map.getEntry(2).?.value == 22);
try testing.expect(map.get(2).? == 22);
const rmv1 = map.swapRemove(2);
try testing.expect(rmv1.?.key == 2);
try testing.expect(rmv1.?.value == 22);
try testing.expect(map.swapRemove(2) == null);
try testing.expect(map.getEntry(2) == null);
try testing.expect(map.get(2) == null);
// Since we've used `swapRemove` above, the index of this entry should remain unchanged.
try testing.expect(map.getIndex(100).? == 1);
const gop5 = try map.getOrPut(5);
try testing.expect(gop5.found_existing == true);
try testing.expect(gop5.entry.value == 77);
try testing.expect(gop5.index == 4);
// Whereas, if we do an `orderedRemove`, it should move the index forward one spot.
const rmv2 = map.orderedRemove(100);
try testing.expect(rmv2.?.key == 100);
try testing.expect(rmv2.?.value == 41);
try testing.expect(map.orderedRemove(100) == null);
try testing.expect(map.getEntry(100) == null);
try testing.expect(map.get(100) == null);
const gop6 = try map.getOrPut(5);
try testing.expect(gop6.found_existing == true);
try testing.expect(gop6.entry.value == 77);
try testing.expect(gop6.index == 3);
map.removeAssertDiscard(3);
}
test "iterator hash map" {
var reset_map = AutoArrayHashMap(i32, i32).init(std.testing.allocator);
defer reset_map.deinit();
// test ensureCapacity with a 0 parameter
try reset_map.ensureCapacity(0);
try reset_map.putNoClobber(0, 11);
try reset_map.putNoClobber(1, 22);
try reset_map.putNoClobber(2, 33);
var keys = [_]i32{
0, 2, 1,
};
var values = [_]i32{
11, 33, 22,
};
var buffer = [_]i32{
0, 0, 0,
};
var it = reset_map.iterator();
const first_entry = it.next().?;
it.reset();
var count: usize = 0;
while (it.next()) |entry| : (count += 1) {
buffer[@intCast(usize, entry.key)] = entry.value;
}
try testing.expect(count == 3);
try testing.expect(it.next() == null);
for (buffer) |v, i| {
try testing.expect(buffer[@intCast(usize, keys[i])] == values[i]);
}
it.reset();
count = 0;
while (it.next()) |entry| {
buffer[@intCast(usize, entry.key)] = entry.value;
count += 1;
if (count >= 2) break;
}
for (buffer[0..2]) |v, i| {
try testing.expect(buffer[@intCast(usize, keys[i])] == values[i]);
}
it.reset();
var entry = it.next().?;
try testing.expect(entry.key == first_entry.key);
try testing.expect(entry.value == first_entry.value);
}
test "ensure capacity" {
var map = AutoArrayHashMap(i32, i32).init(std.testing.allocator);
defer map.deinit();
try map.ensureCapacity(20);
const initial_capacity = map.capacity();
try testing.expect(initial_capacity >= 20);
var i: i32 = 0;
while (i < 20) : (i += 1) {
try testing.expect(map.fetchPutAssumeCapacity(i, i + 10) == null);
}
// shouldn't resize from putAssumeCapacity
try testing.expect(initial_capacity == map.capacity());
}
test "clone" {
var original = AutoArrayHashMap(i32, i32).init(std.testing.allocator);
defer original.deinit();
// put more than `linear_scan_max` so we can test that the index header is properly cloned
var i: u8 = 0;
while (i < 10) : (i += 1) {
try original.putNoClobber(i, i * 10);
}
var copy = try original.clone();
defer copy.deinit();
i = 0;
while (i < 10) : (i += 1) {
try testing.expect(copy.get(i).? == i * 10);
}
}
test "shrink" {
var map = AutoArrayHashMap(i32, i32).init(std.testing.allocator);
defer map.deinit();
// This test is more interesting if we insert enough entries to allocate the index header.
const num_entries = 20;
var i: i32 = 0;
while (i < num_entries) : (i += 1)
try testing.expect((try map.fetchPut(i, i * 10)) == null);
try testing.expect(map.unmanaged.index_header != null);
try testing.expect(map.count() == num_entries);
// Test `shrinkRetainingCapacity`.
map.shrinkRetainingCapacity(17);
try testing.expect(map.count() == 17);
try testing.expect(map.capacity() == 20);
i = 0;
while (i < num_entries) : (i += 1) {
const gop = try map.getOrPut(i);
if (i < 17) {
try testing.expect(gop.found_existing == true);
try testing.expect(gop.entry.value == i * 10);
} else try testing.expect(gop.found_existing == false);
}
// Test `shrinkAndFree`.
map.shrinkAndFree(15);
try testing.expect(map.count() == 15);
try testing.expect(map.capacity() == 15);
i = 0;
while (i < num_entries) : (i += 1) {
const gop = try map.getOrPut(i);
if (i < 15) {
try testing.expect(gop.found_existing == true);
try testing.expect(gop.entry.value == i * 10);
} else try testing.expect(gop.found_existing == false);
}
}
test "pop" {
var map = AutoArrayHashMap(i32, i32).init(std.testing.allocator);
defer map.deinit();
// Insert just enough entries so that the map expands. Afterwards,
// pop all entries out of the map.
var i: i32 = 0;
while (i < 9) : (i += 1) {
try testing.expect((try map.fetchPut(i, i)) == null);
}
while (i > 0) : (i -= 1) {
const pop = map.pop();
try testing.expect(pop.key == i - 1 and pop.value == i - 1);
}
}
test "reIndex" {
var map = AutoArrayHashMap(i32, i32).init(std.testing.allocator);
defer map.deinit();
// Populate via the API.
const num_indexed_entries = 20;
var i: i32 = 0;
while (i < num_indexed_entries) : (i += 1)
try testing.expect((try map.fetchPut(i, i * 10)) == null);
// Make sure we allocated an index header.
try testing.expect(map.unmanaged.index_header != null);
// Now write to the underlying array list directly.
const num_unindexed_entries = 20;
const hash = getAutoHashFn(i32);
var al = &map.unmanaged.entries;
while (i < num_indexed_entries + num_unindexed_entries) : (i += 1) {
try al.append(std.testing.allocator, .{
.key = i,
.value = i * 10,
.hash = {},
});
}
// After reindexing, we should see everything.
try map.reIndex();
i = 0;
while (i < num_indexed_entries + num_unindexed_entries) : (i += 1) {
const gop = try map.getOrPut(i);
try testing.expect(gop.found_existing == true);
try testing.expect(gop.entry.value == i * 10);
try testing.expect(gop.index == i);
}
}
test "fromOwnedArrayList" {
const array_hash_map_type = AutoArrayHashMap(i32, i32);
var al = std.ArrayListUnmanaged(array_hash_map_type.Entry){};
const hash = getAutoHashFn(i32);
// Populate array list.
const num_entries = 20;
var i: i32 = 0;
while (i < num_entries) : (i += 1) {
try al.append(std.testing.allocator, .{
.key = i,
.value = i * 10,
.hash = {},
});
}
// Now instantiate using `fromOwnedArrayList`.
var map = try array_hash_map_type.fromOwnedArrayList(std.testing.allocator, al);
defer map.deinit();
i = 0;
while (i < num_entries) : (i += 1) {
const gop = try map.getOrPut(i);
try testing.expect(gop.found_existing == true);
try testing.expect(gop.entry.value == i * 10);
try testing.expect(gop.index == i);
}
}
test "auto store_hash" {
const HasCheapEql = AutoArrayHashMap(i32, i32);
const HasExpensiveEql = AutoArrayHashMap([32]i32, i32);
try testing.expect(meta.fieldInfo(HasCheapEql.Entry, .hash).field_type == void);
try testing.expect(meta.fieldInfo(HasExpensiveEql.Entry, .hash).field_type != void);
const HasCheapEqlUn = AutoArrayHashMapUnmanaged(i32, i32);
const HasExpensiveEqlUn = AutoArrayHashMapUnmanaged([32]i32, i32);
try testing.expect(meta.fieldInfo(HasCheapEqlUn.Entry, .hash).field_type == void);
try testing.expect(meta.fieldInfo(HasExpensiveEqlUn.Entry, .hash).field_type != void);
}
pub fn getHashPtrAddrFn(comptime K: type) (fn (K) u32) {
return struct {
fn hash(key: K) u32 {
return getAutoHashFn(usize)(@ptrToInt(key));
}
}.hash;
}
pub fn getTrivialEqlFn(comptime K: type) (fn (K, K) bool) {
return struct {
fn eql(a: K, b: K) bool {
return a == b;
}
}.eql;
}
pub fn getAutoHashFn(comptime K: type) (fn (K) u32) {
return struct {
fn hash(key: K) u32 {
if (comptime trait.hasUniqueRepresentation(K)) {
return @truncate(u32, Wyhash.hash(0, std.mem.asBytes(&key)));
} else {
var hasher = Wyhash.init(0);
autoHash(&hasher, key);
return @truncate(u32, hasher.final());
}
}
}.hash;
}
pub fn getAutoEqlFn(comptime K: type) (fn (K, K) bool) {
return struct {
fn eql(a: K, b: K) bool {
return meta.eql(a, b);
}
}.eql;
}
pub fn autoEqlIsCheap(comptime K: type) bool {
return switch (@typeInfo(K)) {
.Bool,
.Int,
.Float,
.Pointer,
.ComptimeFloat,
.ComptimeInt,
.Enum,
.Fn,
.ErrorSet,
.AnyFrame,
.EnumLiteral,
=> true,
else => false,
};
}
pub fn getAutoHashStratFn(comptime K: type, comptime strategy: std.hash.Strategy) (fn (K) u32) {
return struct {
fn hash(key: K) u32 {
var hasher = Wyhash.init(0);
std.hash.autoHashStrat(&hasher, key, strategy);
return @truncate(u32, hasher.final());
}
}.hash;
}