diff --git a/src/hash_map.hpp b/src/hash_map.hpp index 45440d61e7..8681e5b761 100644 --- a/src/hash_map.hpp +++ b/src/hash_map.hpp @@ -25,6 +25,8 @@ public: } struct Entry { + uint32_t hash; + uint32_t distance_from_start_index; K key; V value; }; @@ -43,6 +45,26 @@ public: void put(const K &key, const V &value) { _modification_count += 1; + // This allows us to take a pointer to an entry in `internal_put` which + // will not become a dead pointer when the array list is appended. + _entries.ensure_capacity(_entries.length + 1); + + if (_index_bytes == nullptr) { + if (_entries.length < 16) { + _entries.append({HashFunction(key), 0, key, value}); + return; + } else { + _indexes_len = 32; + _index_bytes = heap::c_allocator.allocate(_indexes_len); + _max_distance_from_start_index = 0; + for (size_t i = 0; i < _entries.length; i += 1) { + Entry *entry = &_entries.items[i]; + put_index(entry, i, _index_bytes); + } + return internal_put(key, value, _index_bytes); + } + } + // if we would get too full (60%), double the indexes size if ((_entries.length + 1) * 5 >= _indexes_len * 3) { heap::c_allocator.deallocate(_index_bytes, @@ -56,22 +78,21 @@ public: Entry *entry = &_entries.items[i]; switch (sz) { case 1: - put_index(key_to_index(entry->key), i, (uint8_t*)_index_bytes); + put_index(entry, i, (uint8_t*)_index_bytes); continue; case 2: - put_index(key_to_index(entry->key), i, (uint16_t*)_index_bytes); + put_index(entry, i, (uint16_t*)_index_bytes); continue; case 4: - put_index(key_to_index(entry->key), i, (uint32_t*)_index_bytes); + put_index(entry, i, (uint32_t*)_index_bytes); continue; default: - put_index(key_to_index(entry->key), i, (size_t*)_index_bytes); + put_index(entry, i, (size_t*)_index_bytes); continue; } } } - switch (capacity_index_size(_indexes_len)) { case 1: return internal_put(key, value, (uint8_t*)_index_bytes); case 2: return internal_put(key, value, (uint16_t*)_index_bytes); @@ -109,6 +130,16 @@ public: bool maybe_remove(const K &key) { _modification_count += 1; + if (_index_bytes == nullptr) { + uint32_t hash = HashFunction(key); + for (size_t i = 0; i < _entries.length; i += 1) { + if (_entries.items[i].hash == hash && EqualFn(_entries.items[i].key, key)) { + _entries.swap_remove(i); + return true; + } + } + return false; + } switch (capacity_index_size(_indexes_len)) { case 1: return internal_remove(key, (uint8_t*)_index_bytes); case 2: return internal_remove(key, (uint16_t*)_index_bytes); @@ -165,11 +196,16 @@ private: void init_capacity(size_t capacity) { _entries = {}; _entries.ensure_capacity(capacity); - // So that at capacity it will only be 60% full. - _indexes_len = capacity * 5 / 3; - size_t sz = capacity_index_size(_indexes_len); - // This zero initializes _index_bytes which sets them all to empty. - _index_bytes = heap::c_allocator.allocate(_indexes_len * sz); + _indexes_len = 0; + if (capacity >= 16) { + // So that at capacity it will only be 60% full. + _indexes_len = capacity * 5 / 3; + size_t sz = capacity_index_size(_indexes_len); + // This zero initializes _index_bytes which sets them all to empty. + _index_bytes = heap::c_allocator.allocate(_indexes_len * sz); + } else { + _index_bytes = nullptr; + } _max_distance_from_start_index = 0; _modification_count = 0; @@ -187,47 +223,113 @@ private: template void internal_put(const K &key, const V &value, I *indexes) { - size_t start_index = key_to_index(key); - for (size_t roll_over = 0, distance_from_start_index = 0; - roll_over < _indexes_len; roll_over += 1, distance_from_start_index += 1) + uint32_t hash = HashFunction(key); + uint32_t distance_from_start_index = 0; + size_t start_index = hash_to_index(hash); + for (size_t roll_over = 0; roll_over < _indexes_len; + roll_over += 1, distance_from_start_index += 1) { size_t index_index = (start_index + roll_over) % _indexes_len; I index_data = indexes[index_index]; if (index_data == 0) { - _entries.append({key, value}); + _entries.append_assuming_capacity({ hash, distance_from_start_index, key, value }); indexes[index_index] = _entries.length; if (distance_from_start_index > _max_distance_from_start_index) _max_distance_from_start_index = distance_from_start_index; return; } + // This pointer survives the following append because we call + // _entries.ensure_capacity before internal_put. Entry *entry = &_entries.items[index_data - 1]; - if (EqualFn(entry->key, key)) { - *entry = {key, value}; + if (entry->hash == hash && EqualFn(entry->key, key)) { + *entry = {hash, distance_from_start_index, key, value}; if (distance_from_start_index > _max_distance_from_start_index) _max_distance_from_start_index = distance_from_start_index; return; } + if (entry->distance_from_start_index < distance_from_start_index) { + // In this case, we did not find the item. We will put a new entry. + // However, we will use this index for the new entry, and move + // the previous index down the line, to keep the _max_distance_from_start_index + // as small as possible. + _entries.append_assuming_capacity({ hash, distance_from_start_index, key, value }); + indexes[index_index] = _entries.length; + if (distance_from_start_index > _max_distance_from_start_index) + _max_distance_from_start_index = distance_from_start_index; + + distance_from_start_index = entry->distance_from_start_index; + + // Find somewhere to put the index we replaced by shifting + // following indexes backwards. + roll_over += 1; + distance_from_start_index += 1; + for (; roll_over < _indexes_len; roll_over += 1, distance_from_start_index += 1) { + size_t index_index = (start_index + roll_over) % _indexes_len; + I next_index_data = indexes[index_index]; + if (next_index_data == 0) { + if (distance_from_start_index > _max_distance_from_start_index) + _max_distance_from_start_index = distance_from_start_index; + entry->distance_from_start_index = distance_from_start_index; + indexes[index_index] = index_data; + return; + } + Entry *next_entry = &_entries.items[next_index_data - 1]; + if (next_entry->distance_from_start_index < distance_from_start_index) { + if (distance_from_start_index > _max_distance_from_start_index) + _max_distance_from_start_index = distance_from_start_index; + entry->distance_from_start_index = distance_from_start_index; + indexes[index_index] = index_data; + distance_from_start_index = next_entry->distance_from_start_index; + entry = next_entry; + index_data = next_index_data; + } + } + zig_unreachable(); + } } zig_unreachable(); } template - void put_index(size_t start_index, size_t entry_index, I *indexes) { + void put_index(Entry *entry, size_t entry_index, I *indexes) { + size_t start_index = hash_to_index(entry->hash); + size_t index_data = entry_index + 1; for (size_t roll_over = 0, distance_from_start_index = 0; roll_over < _indexes_len; roll_over += 1, distance_from_start_index += 1) { size_t index_index = (start_index + roll_over) % _indexes_len; - if (indexes[index_index] == 0) { - indexes[index_index] = entry_index + 1; + size_t next_index_data = indexes[index_index]; + if (next_index_data == 0) { if (distance_from_start_index > _max_distance_from_start_index) _max_distance_from_start_index = distance_from_start_index; + entry->distance_from_start_index = distance_from_start_index; + indexes[index_index] = index_data; return; } + Entry *next_entry = &_entries.items[next_index_data - 1]; + if (next_entry->distance_from_start_index < distance_from_start_index) { + if (distance_from_start_index > _max_distance_from_start_index) + _max_distance_from_start_index = distance_from_start_index; + entry->distance_from_start_index = distance_from_start_index; + indexes[index_index] = index_data; + distance_from_start_index = next_entry->distance_from_start_index; + entry = next_entry; + index_data = next_index_data; + } } zig_unreachable(); } Entry *internal_get(const K &key) const { + if (_index_bytes == nullptr) { + uint32_t hash = HashFunction(key); + for (size_t i = 0; i < _entries.length; i += 1) { + if (_entries.items[i].hash == hash && EqualFn(_entries.items[i].key, key)) { + return &_entries.items[i]; + } + } + return nullptr; + } switch (capacity_index_size(_indexes_len)) { case 1: return internal_get2(key, (uint8_t*)_index_bytes); case 2: return internal_get2(key, (uint16_t*)_index_bytes); @@ -238,7 +340,8 @@ private: template Entry *internal_get2(const K &key, I *indexes) const { - size_t start_index = key_to_index(key); + uint32_t hash = HashFunction(key); + size_t start_index = hash_to_index(hash); for (size_t roll_over = 0; roll_over <= _max_distance_from_start_index; roll_over += 1) { size_t index_index = (start_index + roll_over) % _indexes_len; size_t index_data = indexes[index_index]; @@ -246,19 +349,20 @@ private: return nullptr; Entry *entry = &_entries.items[index_data - 1]; - if (EqualFn(entry->key, key)) + if (entry->hash == hash && EqualFn(entry->key, key)) return entry; } return nullptr; } - size_t key_to_index(const K &key) const { - return ((size_t)HashFunction(key)) % _indexes_len; + size_t hash_to_index(uint32_t hash) const { + return ((size_t)hash) % _indexes_len; } template bool internal_remove(const K &key, I *indexes) { - size_t start_index = key_to_index(key); + uint32_t hash = HashFunction(key); + size_t start_index = hash_to_index(hash); for (size_t roll_over = 0; roll_over <= _max_distance_from_start_index; roll_over += 1) { size_t index_index = (start_index + roll_over) % _indexes_len; size_t index_data = indexes[index_index]; @@ -267,10 +371,10 @@ private: size_t index = index_data - 1; Entry *entry = &_entries.items[index]; - if (!EqualFn(entry->key, key)) + if (entry->hash != hash || !EqualFn(entry->key, key)) continue; - indexes[index_index] = 0; + size_t prev_index = index_index; _entries.swap_remove(index); if (_entries.length > 0 && _entries.length != index) { // Because of the swap remove, now we need to update the index that was @@ -280,24 +384,29 @@ private: // Now we have to shift over the following indexes. roll_over += 1; - for (; roll_over <= _max_distance_from_start_index; roll_over += 1) { + for (; roll_over < _indexes_len; roll_over += 1) { size_t next_index = (start_index + roll_over) % _indexes_len; - if (indexes[next_index] == 0) - break; - size_t next_start_index = key_to_index(_entries.items[indexes[next_index]].key); - if (next_start_index != start_index) - break; - indexes[next_index - 1] = indexes[next_index]; + if (indexes[next_index] == 0) { + indexes[prev_index] = 0; + return true; + } + Entry *next_entry = &_entries.items[indexes[next_index] - 1]; + if (next_entry->distance_from_start_index == 0) { + indexes[prev_index] = 0; + return true; + } + indexes[prev_index] = indexes[next_index]; + prev_index = next_index; + next_entry->distance_from_start_index -= 1; } - - return true; + zig_unreachable(); } return false; } template void update_entry_index(size_t old_entry_index, size_t new_entry_index, I *indexes) { - size_t start_index = key_to_index(_entries.items[new_entry_index].key); + size_t start_index = hash_to_index(_entries.items[new_entry_index].hash); for (size_t roll_over = 0; roll_over <= _max_distance_from_start_index; roll_over += 1) { size_t index_index = (start_index + roll_over) % _indexes_len; if (indexes[index_index] == old_entry_index + 1) { diff --git a/src/list.hpp b/src/list.hpp index 2c6d90c855..803a251437 100644 --- a/src/list.hpp +++ b/src/list.hpp @@ -19,6 +19,9 @@ struct ZigList { ensure_capacity(length + 1); items[length++] = item; } + void append_assuming_capacity(const T& item) { + items[length++] = item; + } // remember that the pointer to this item is invalid after you // modify the length of the list const T & at(size_t index) const {