mirror of
https://github.com/ziglang/zig.git
synced 2025-12-28 09:03:21 +00:00
421 lines
17 KiB
C++
421 lines
17 KiB
C++
/*
|
|
* Copyright (c) 2015 Andrew Kelley
|
|
*
|
|
* This file is part of zig, which is MIT licensed.
|
|
* See http://opensource.org/licenses/MIT
|
|
*/
|
|
|
|
#ifndef ZIG_HASH_MAP_HPP
|
|
#define ZIG_HASH_MAP_HPP
|
|
|
|
#include "util.hpp"
|
|
|
|
#include <stdint.h>
|
|
|
|
template<typename K, typename V, uint32_t (*HashFunction)(K key), bool (*EqualFn)(K a, K b)>
|
|
class HashMap {
|
|
public:
|
|
void init(int capacity) {
|
|
init_capacity(capacity);
|
|
}
|
|
void deinit(void) {
|
|
_entries.deinit();
|
|
heap::c_allocator.deallocate(_index_bytes,
|
|
_indexes_len * capacity_index_size(_indexes_len));
|
|
}
|
|
|
|
struct Entry {
|
|
uint32_t hash;
|
|
uint32_t distance_from_start_index;
|
|
K key;
|
|
V value;
|
|
};
|
|
|
|
void clear() {
|
|
_entries.clear();
|
|
memset(_index_bytes, 0, _indexes_len * capacity_index_size(_indexes_len));
|
|
_max_distance_from_start_index = 0;
|
|
_modification_count += 1;
|
|
}
|
|
|
|
size_t size() const {
|
|
return _entries.length;
|
|
}
|
|
|
|
void put(const K &key, const V &value) {
|
|
_modification_count += 1;
|
|
|
|
// This allows us to take a pointer to an entry in `internal_put` which
|
|
// will not become a dead pointer when the array list is appended.
|
|
_entries.ensure_capacity(_entries.length + 1);
|
|
|
|
if (_index_bytes == nullptr) {
|
|
if (_entries.length < 16) {
|
|
_entries.append({HashFunction(key), 0, key, value});
|
|
return;
|
|
} else {
|
|
_indexes_len = 32;
|
|
_index_bytes = heap::c_allocator.allocate<uint8_t>(_indexes_len);
|
|
_max_distance_from_start_index = 0;
|
|
for (size_t i = 0; i < _entries.length; i += 1) {
|
|
Entry *entry = &_entries.items[i];
|
|
put_index(entry, i, _index_bytes);
|
|
}
|
|
return internal_put(key, value, _index_bytes);
|
|
}
|
|
}
|
|
|
|
// if we would get too full (60%), double the indexes size
|
|
if ((_entries.length + 1) * 5 >= _indexes_len * 3) {
|
|
heap::c_allocator.deallocate(_index_bytes,
|
|
_indexes_len * capacity_index_size(_indexes_len));
|
|
_indexes_len *= 2;
|
|
size_t sz = capacity_index_size(_indexes_len);
|
|
// This zero initializes the bytes, setting them all empty.
|
|
_index_bytes = heap::c_allocator.allocate<uint8_t>(_indexes_len * sz);
|
|
_max_distance_from_start_index = 0;
|
|
for (size_t i = 0; i < _entries.length; i += 1) {
|
|
Entry *entry = &_entries.items[i];
|
|
switch (sz) {
|
|
case 1:
|
|
put_index(entry, i, (uint8_t*)_index_bytes);
|
|
continue;
|
|
case 2:
|
|
put_index(entry, i, (uint16_t*)_index_bytes);
|
|
continue;
|
|
case 4:
|
|
put_index(entry, i, (uint32_t*)_index_bytes);
|
|
continue;
|
|
default:
|
|
put_index(entry, i, (size_t*)_index_bytes);
|
|
continue;
|
|
}
|
|
}
|
|
}
|
|
|
|
switch (capacity_index_size(_indexes_len)) {
|
|
case 1: return internal_put(key, value, (uint8_t*)_index_bytes);
|
|
case 2: return internal_put(key, value, (uint16_t*)_index_bytes);
|
|
case 4: return internal_put(key, value, (uint32_t*)_index_bytes);
|
|
default: return internal_put(key, value, (size_t*)_index_bytes);
|
|
}
|
|
}
|
|
|
|
Entry *put_unique(const K &key, const V &value) {
|
|
// TODO make this more efficient
|
|
Entry *entry = internal_get(key);
|
|
if (entry)
|
|
return entry;
|
|
put(key, value);
|
|
return nullptr;
|
|
}
|
|
|
|
const V &get(const K &key) const {
|
|
Entry *entry = internal_get(key);
|
|
if (!entry)
|
|
zig_panic("key not found");
|
|
return entry->value;
|
|
}
|
|
|
|
Entry *maybe_get(const K &key) const {
|
|
return internal_get(key);
|
|
}
|
|
|
|
bool remove(const K &key) {
|
|
bool deleted_something = maybe_remove(key);
|
|
if (!deleted_something)
|
|
zig_panic("key not found");
|
|
return deleted_something;
|
|
}
|
|
|
|
bool maybe_remove(const K &key) {
|
|
_modification_count += 1;
|
|
if (_index_bytes == nullptr) {
|
|
uint32_t hash = HashFunction(key);
|
|
for (size_t i = 0; i < _entries.length; i += 1) {
|
|
if (_entries.items[i].hash == hash && EqualFn(_entries.items[i].key, key)) {
|
|
_entries.swap_remove(i);
|
|
return true;
|
|
}
|
|
}
|
|
return false;
|
|
}
|
|
switch (capacity_index_size(_indexes_len)) {
|
|
case 1: return internal_remove(key, (uint8_t*)_index_bytes);
|
|
case 2: return internal_remove(key, (uint16_t*)_index_bytes);
|
|
case 4: return internal_remove(key, (uint32_t*)_index_bytes);
|
|
default: return internal_remove(key, (size_t*)_index_bytes);
|
|
}
|
|
}
|
|
|
|
class Iterator {
|
|
public:
|
|
Entry *next() {
|
|
if (_inital_modification_count != _table->_modification_count)
|
|
zig_panic("concurrent modification");
|
|
if (_index >= _table->_entries.length)
|
|
return nullptr;
|
|
Entry *entry = &_table->_entries.items[_index];
|
|
_index += 1;
|
|
return entry;
|
|
}
|
|
private:
|
|
const HashMap * _table;
|
|
// iterator through the entry array
|
|
size_t _index = 0;
|
|
// used to detect concurrent modification
|
|
uint32_t _inital_modification_count;
|
|
Iterator(const HashMap * table) :
|
|
_table(table), _inital_modification_count(table->_modification_count) {
|
|
}
|
|
friend HashMap;
|
|
};
|
|
|
|
// you must not modify the underlying HashMap while this iterator is still in use
|
|
Iterator entry_iterator() const {
|
|
return Iterator(this);
|
|
}
|
|
|
|
private:
|
|
// Maintains insertion order.
|
|
ZigList<Entry> _entries;
|
|
// If _indexes_len is less than 2**8, this is an array of uint8_t.
|
|
// If _indexes_len is less than 2**16, it is an array of uint16_t.
|
|
// If _indexes_len is less than 2**32, it is an array of uint32_t.
|
|
// Otherwise it is size_t.
|
|
// It's off by 1. 0 means empty slot, 1 means index 0, etc.
|
|
uint8_t *_index_bytes;
|
|
// This is the number of indexes. When indexes are bytes, it equals number of bytes.
|
|
// When indexes are uint16_t, _indexes_len is half the number of bytes.
|
|
size_t _indexes_len;
|
|
|
|
size_t _max_distance_from_start_index;
|
|
// This is used to detect bugs where a hashtable is edited while an iterator is running.
|
|
uint32_t _modification_count;
|
|
|
|
void init_capacity(size_t capacity) {
|
|
_entries = {};
|
|
_entries.ensure_capacity(capacity);
|
|
_indexes_len = 0;
|
|
if (capacity >= 16) {
|
|
// So that at capacity it will only be 60% full.
|
|
_indexes_len = capacity * 5 / 3;
|
|
size_t sz = capacity_index_size(_indexes_len);
|
|
// This zero initializes _index_bytes which sets them all to empty.
|
|
_index_bytes = heap::c_allocator.allocate<uint8_t>(_indexes_len * sz);
|
|
} else {
|
|
_index_bytes = nullptr;
|
|
}
|
|
|
|
_max_distance_from_start_index = 0;
|
|
_modification_count = 0;
|
|
}
|
|
|
|
static size_t capacity_index_size(size_t len) {
|
|
if (len < UINT8_MAX)
|
|
return 1;
|
|
if (len < UINT16_MAX)
|
|
return 2;
|
|
if (len < UINT32_MAX)
|
|
return 4;
|
|
return sizeof(size_t);
|
|
}
|
|
|
|
template <typename I>
|
|
void internal_put(const K &key, const V &value, I *indexes) {
|
|
uint32_t hash = HashFunction(key);
|
|
uint32_t distance_from_start_index = 0;
|
|
size_t start_index = hash_to_index(hash);
|
|
for (size_t roll_over = 0; roll_over < _indexes_len;
|
|
roll_over += 1, distance_from_start_index += 1)
|
|
{
|
|
size_t index_index = (start_index + roll_over) % _indexes_len;
|
|
I index_data = indexes[index_index];
|
|
if (index_data == 0) {
|
|
_entries.append_assuming_capacity({ hash, distance_from_start_index, key, value });
|
|
indexes[index_index] = _entries.length;
|
|
if (distance_from_start_index > _max_distance_from_start_index)
|
|
_max_distance_from_start_index = distance_from_start_index;
|
|
return;
|
|
}
|
|
// This pointer survives the following append because we call
|
|
// _entries.ensure_capacity before internal_put.
|
|
Entry *entry = &_entries.items[index_data - 1];
|
|
if (entry->hash == hash && EqualFn(entry->key, key)) {
|
|
*entry = {hash, distance_from_start_index, key, value};
|
|
if (distance_from_start_index > _max_distance_from_start_index)
|
|
_max_distance_from_start_index = distance_from_start_index;
|
|
return;
|
|
}
|
|
if (entry->distance_from_start_index < distance_from_start_index) {
|
|
// In this case, we did not find the item. We will put a new entry.
|
|
// However, we will use this index for the new entry, and move
|
|
// the previous index down the line, to keep the _max_distance_from_start_index
|
|
// as small as possible.
|
|
_entries.append_assuming_capacity({ hash, distance_from_start_index, key, value });
|
|
indexes[index_index] = _entries.length;
|
|
if (distance_from_start_index > _max_distance_from_start_index)
|
|
_max_distance_from_start_index = distance_from_start_index;
|
|
|
|
distance_from_start_index = entry->distance_from_start_index;
|
|
|
|
// Find somewhere to put the index we replaced by shifting
|
|
// following indexes backwards.
|
|
roll_over += 1;
|
|
distance_from_start_index += 1;
|
|
for (; roll_over < _indexes_len; roll_over += 1, distance_from_start_index += 1) {
|
|
size_t index_index = (start_index + roll_over) % _indexes_len;
|
|
I next_index_data = indexes[index_index];
|
|
if (next_index_data == 0) {
|
|
if (distance_from_start_index > _max_distance_from_start_index)
|
|
_max_distance_from_start_index = distance_from_start_index;
|
|
entry->distance_from_start_index = distance_from_start_index;
|
|
indexes[index_index] = index_data;
|
|
return;
|
|
}
|
|
Entry *next_entry = &_entries.items[next_index_data - 1];
|
|
if (next_entry->distance_from_start_index < distance_from_start_index) {
|
|
if (distance_from_start_index > _max_distance_from_start_index)
|
|
_max_distance_from_start_index = distance_from_start_index;
|
|
entry->distance_from_start_index = distance_from_start_index;
|
|
indexes[index_index] = index_data;
|
|
distance_from_start_index = next_entry->distance_from_start_index;
|
|
entry = next_entry;
|
|
index_data = next_index_data;
|
|
}
|
|
}
|
|
zig_unreachable();
|
|
}
|
|
}
|
|
zig_unreachable();
|
|
}
|
|
|
|
template <typename I>
|
|
void put_index(Entry *entry, size_t entry_index, I *indexes) {
|
|
size_t start_index = hash_to_index(entry->hash);
|
|
size_t index_data = entry_index + 1;
|
|
for (size_t roll_over = 0, distance_from_start_index = 0;
|
|
roll_over < _indexes_len; roll_over += 1, distance_from_start_index += 1)
|
|
{
|
|
size_t index_index = (start_index + roll_over) % _indexes_len;
|
|
size_t next_index_data = indexes[index_index];
|
|
if (next_index_data == 0) {
|
|
if (distance_from_start_index > _max_distance_from_start_index)
|
|
_max_distance_from_start_index = distance_from_start_index;
|
|
entry->distance_from_start_index = distance_from_start_index;
|
|
indexes[index_index] = index_data;
|
|
return;
|
|
}
|
|
Entry *next_entry = &_entries.items[next_index_data - 1];
|
|
if (next_entry->distance_from_start_index < distance_from_start_index) {
|
|
if (distance_from_start_index > _max_distance_from_start_index)
|
|
_max_distance_from_start_index = distance_from_start_index;
|
|
entry->distance_from_start_index = distance_from_start_index;
|
|
indexes[index_index] = index_data;
|
|
distance_from_start_index = next_entry->distance_from_start_index;
|
|
entry = next_entry;
|
|
index_data = next_index_data;
|
|
}
|
|
}
|
|
zig_unreachable();
|
|
}
|
|
|
|
Entry *internal_get(const K &key) const {
|
|
if (_index_bytes == nullptr) {
|
|
uint32_t hash = HashFunction(key);
|
|
for (size_t i = 0; i < _entries.length; i += 1) {
|
|
if (_entries.items[i].hash == hash && EqualFn(_entries.items[i].key, key)) {
|
|
return &_entries.items[i];
|
|
}
|
|
}
|
|
return nullptr;
|
|
}
|
|
switch (capacity_index_size(_indexes_len)) {
|
|
case 1: return internal_get2(key, (uint8_t*)_index_bytes);
|
|
case 2: return internal_get2(key, (uint16_t*)_index_bytes);
|
|
case 4: return internal_get2(key, (uint32_t*)_index_bytes);
|
|
default: return internal_get2(key, (size_t*)_index_bytes);
|
|
}
|
|
}
|
|
|
|
template <typename I>
|
|
Entry *internal_get2(const K &key, I *indexes) const {
|
|
uint32_t hash = HashFunction(key);
|
|
size_t start_index = hash_to_index(hash);
|
|
for (size_t roll_over = 0; roll_over <= _max_distance_from_start_index; roll_over += 1) {
|
|
size_t index_index = (start_index + roll_over) % _indexes_len;
|
|
size_t index_data = indexes[index_index];
|
|
if (index_data == 0)
|
|
return nullptr;
|
|
|
|
Entry *entry = &_entries.items[index_data - 1];
|
|
if (entry->hash == hash && EqualFn(entry->key, key))
|
|
return entry;
|
|
}
|
|
return nullptr;
|
|
}
|
|
|
|
size_t hash_to_index(uint32_t hash) const {
|
|
return ((size_t)hash) % _indexes_len;
|
|
}
|
|
|
|
template <typename I>
|
|
bool internal_remove(const K &key, I *indexes) {
|
|
uint32_t hash = HashFunction(key);
|
|
size_t start_index = hash_to_index(hash);
|
|
for (size_t roll_over = 0; roll_over <= _max_distance_from_start_index; roll_over += 1) {
|
|
size_t index_index = (start_index + roll_over) % _indexes_len;
|
|
size_t index_data = indexes[index_index];
|
|
if (index_data == 0)
|
|
return false;
|
|
|
|
size_t index = index_data - 1;
|
|
Entry *entry = &_entries.items[index];
|
|
if (entry->hash != hash || !EqualFn(entry->key, key))
|
|
continue;
|
|
|
|
size_t prev_index = index_index;
|
|
_entries.swap_remove(index);
|
|
if (_entries.length > 0 && _entries.length != index) {
|
|
// Because of the swap remove, now we need to update the index that was
|
|
// pointing to the last entry and is now pointing to this removed item slot.
|
|
update_entry_index(_entries.length, index, indexes);
|
|
}
|
|
|
|
// Now we have to shift over the following indexes.
|
|
roll_over += 1;
|
|
for (; roll_over < _indexes_len; roll_over += 1) {
|
|
size_t next_index = (start_index + roll_over) % _indexes_len;
|
|
if (indexes[next_index] == 0) {
|
|
indexes[prev_index] = 0;
|
|
return true;
|
|
}
|
|
Entry *next_entry = &_entries.items[indexes[next_index] - 1];
|
|
if (next_entry->distance_from_start_index == 0) {
|
|
indexes[prev_index] = 0;
|
|
return true;
|
|
}
|
|
indexes[prev_index] = indexes[next_index];
|
|
prev_index = next_index;
|
|
next_entry->distance_from_start_index -= 1;
|
|
}
|
|
zig_unreachable();
|
|
}
|
|
return false;
|
|
}
|
|
|
|
template <typename I>
|
|
void update_entry_index(size_t old_entry_index, size_t new_entry_index, I *indexes) {
|
|
size_t start_index = hash_to_index(_entries.items[new_entry_index].hash);
|
|
for (size_t roll_over = 0; roll_over <= _max_distance_from_start_index; roll_over += 1) {
|
|
size_t index_index = (start_index + roll_over) % _indexes_len;
|
|
if (indexes[index_index] == old_entry_index + 1) {
|
|
indexes[index_index] = new_entry_index + 1;
|
|
return;
|
|
}
|
|
}
|
|
zig_unreachable();
|
|
}
|
|
};
|
|
#endif
|