diff --git a/src/hash_map.hpp b/src/hash_map.hpp index 45440d61e7..ce5369eac8 100644 --- a/src/hash_map.hpp +++ b/src/hash_map.hpp @@ -25,6 +25,8 @@ public: } struct Entry { + uint32_t hash; + uint32_t distance_from_start_index; K key; V value; }; @@ -56,21 +58,24 @@ public: Entry *entry = &_entries.items[i]; switch (sz) { case 1: - put_index(key_to_index(entry->key), i, (uint8_t*)_index_bytes); + put_index(entry, i, (uint8_t*)_index_bytes); continue; case 2: - put_index(key_to_index(entry->key), i, (uint16_t*)_index_bytes); + put_index(entry, i, (uint16_t*)_index_bytes); continue; case 4: - put_index(key_to_index(entry->key), i, (uint32_t*)_index_bytes); + put_index(entry, i, (uint32_t*)_index_bytes); continue; default: - put_index(key_to_index(entry->key), i, (size_t*)_index_bytes); + put_index(entry, i, (size_t*)_index_bytes); continue; } } } + // This allows us to take a pointer to an entry in `internal_put` which + // will not become a dead pointer when the array list is appended. + _entries.ensure_capacity(_entries.length + 1); switch (capacity_index_size(_indexes_len)) { case 1: return internal_put(key, value, (uint8_t*)_index_bytes); @@ -187,42 +192,99 @@ private: template void internal_put(const K &key, const V &value, I *indexes) { - size_t start_index = key_to_index(key); - for (size_t roll_over = 0, distance_from_start_index = 0; - roll_over < _indexes_len; roll_over += 1, distance_from_start_index += 1) + uint32_t hash = HashFunction(key); + uint32_t distance_from_start_index = 0; + size_t start_index = hash_to_index(hash); + for (size_t roll_over = 0; roll_over < _indexes_len; + roll_over += 1, distance_from_start_index += 1) { size_t index_index = (start_index + roll_over) % _indexes_len; I index_data = indexes[index_index]; if (index_data == 0) { - _entries.append({key, value}); + _entries.append_assuming_capacity({ hash, distance_from_start_index, key, value }); indexes[index_index] = _entries.length; if (distance_from_start_index > _max_distance_from_start_index) _max_distance_from_start_index = distance_from_start_index; return; } + // This pointer survives the following append because we call + // _entries.ensure_capacity before internal_put. Entry *entry = &_entries.items[index_data - 1]; - if (EqualFn(entry->key, key)) { - *entry = {key, value}; + if (entry->hash == hash && EqualFn(entry->key, key)) { + *entry = {hash, distance_from_start_index, key, value}; if (distance_from_start_index > _max_distance_from_start_index) _max_distance_from_start_index = distance_from_start_index; return; } + if (entry->distance_from_start_index < distance_from_start_index) { + // In this case, we did not find the item. We will put a new entry. + // However, we will use this index for the new entry, and move + // the previous index down the line, to keep the _max_distance_from_start_index + // as small as possible. + _entries.append_assuming_capacity({ hash, distance_from_start_index, key, value }); + indexes[index_index] = _entries.length; + if (distance_from_start_index > _max_distance_from_start_index) + _max_distance_from_start_index = distance_from_start_index; + + distance_from_start_index = entry->distance_from_start_index; + + // Find somewhere to put the index we replaced by shifting + // following indexes backwards. + roll_over += 1; + distance_from_start_index += 1; + for (; roll_over < _indexes_len; roll_over += 1, distance_from_start_index += 1) { + size_t index_index = (start_index + roll_over) % _indexes_len; + I next_index_data = indexes[index_index]; + if (next_index_data == 0) { + if (distance_from_start_index > _max_distance_from_start_index) + _max_distance_from_start_index = distance_from_start_index; + entry->distance_from_start_index = distance_from_start_index; + indexes[index_index] = index_data; + return; + } + Entry *next_entry = &_entries.items[next_index_data - 1]; + if (next_entry->distance_from_start_index < distance_from_start_index) { + if (distance_from_start_index > _max_distance_from_start_index) + _max_distance_from_start_index = distance_from_start_index; + entry->distance_from_start_index = distance_from_start_index; + indexes[index_index] = index_data; + distance_from_start_index = next_entry->distance_from_start_index; + entry = next_entry; + index_data = next_index_data; + } + } + zig_unreachable(); + } } zig_unreachable(); } template - void put_index(size_t start_index, size_t entry_index, I *indexes) { + void put_index(Entry *entry, size_t entry_index, I *indexes) { + size_t start_index = hash_to_index(entry->hash); + size_t index_data = entry_index + 1; for (size_t roll_over = 0, distance_from_start_index = 0; roll_over < _indexes_len; roll_over += 1, distance_from_start_index += 1) { size_t index_index = (start_index + roll_over) % _indexes_len; - if (indexes[index_index] == 0) { - indexes[index_index] = entry_index + 1; + size_t next_index_data = indexes[index_index]; + if (next_index_data == 0) { if (distance_from_start_index > _max_distance_from_start_index) _max_distance_from_start_index = distance_from_start_index; + entry->distance_from_start_index = distance_from_start_index; + indexes[index_index] = index_data; return; } + Entry *next_entry = &_entries.items[next_index_data - 1]; + if (next_entry->distance_from_start_index < distance_from_start_index) { + if (distance_from_start_index > _max_distance_from_start_index) + _max_distance_from_start_index = distance_from_start_index; + entry->distance_from_start_index = distance_from_start_index; + indexes[index_index] = index_data; + distance_from_start_index = next_entry->distance_from_start_index; + entry = next_entry; + index_data = next_index_data; + } } zig_unreachable(); } @@ -238,7 +300,8 @@ private: template Entry *internal_get2(const K &key, I *indexes) const { - size_t start_index = key_to_index(key); + uint32_t hash = HashFunction(key); + size_t start_index = hash_to_index(hash); for (size_t roll_over = 0; roll_over <= _max_distance_from_start_index; roll_over += 1) { size_t index_index = (start_index + roll_over) % _indexes_len; size_t index_data = indexes[index_index]; @@ -246,19 +309,20 @@ private: return nullptr; Entry *entry = &_entries.items[index_data - 1]; - if (EqualFn(entry->key, key)) + if (entry->hash == hash && EqualFn(entry->key, key)) return entry; } return nullptr; } - size_t key_to_index(const K &key) const { - return ((size_t)HashFunction(key)) % _indexes_len; + size_t hash_to_index(uint32_t hash) const { + return ((size_t)hash) % _indexes_len; } template bool internal_remove(const K &key, I *indexes) { - size_t start_index = key_to_index(key); + uint32_t hash = HashFunction(key); + size_t start_index = hash_to_index(hash); for (size_t roll_over = 0; roll_over <= _max_distance_from_start_index; roll_over += 1) { size_t index_index = (start_index + roll_over) % _indexes_len; size_t index_data = indexes[index_index]; @@ -267,10 +331,10 @@ private: size_t index = index_data - 1; Entry *entry = &_entries.items[index]; - if (!EqualFn(entry->key, key)) + if (entry->hash != hash || !EqualFn(entry->key, key)) continue; - indexes[index_index] = 0; + size_t prev_index = index_index; _entries.swap_remove(index); if (_entries.length > 0 && _entries.length != index) { // Because of the swap remove, now we need to update the index that was @@ -280,24 +344,29 @@ private: // Now we have to shift over the following indexes. roll_over += 1; - for (; roll_over <= _max_distance_from_start_index; roll_over += 1) { + for (; roll_over < _indexes_len; roll_over += 1) { size_t next_index = (start_index + roll_over) % _indexes_len; - if (indexes[next_index] == 0) - break; - size_t next_start_index = key_to_index(_entries.items[indexes[next_index]].key); - if (next_start_index != start_index) - break; - indexes[next_index - 1] = indexes[next_index]; + if (indexes[next_index] == 0) { + indexes[prev_index] = 0; + return true; + } + Entry *next_entry = &_entries.items[indexes[next_index] - 1]; + if (next_entry->distance_from_start_index == 0) { + indexes[prev_index] = 0; + return true; + } + indexes[prev_index] = indexes[next_index]; + prev_index = next_index; + next_entry->distance_from_start_index -= 1; } - - return true; + zig_unreachable(); } return false; } template void update_entry_index(size_t old_entry_index, size_t new_entry_index, I *indexes) { - size_t start_index = key_to_index(_entries.items[new_entry_index].key); + size_t start_index = hash_to_index(_entries.items[new_entry_index].hash); for (size_t roll_over = 0; roll_over <= _max_distance_from_start_index; roll_over += 1) { size_t index_index = (start_index + roll_over) % _indexes_len; if (indexes[index_index] == old_entry_index + 1) { diff --git a/src/list.hpp b/src/list.hpp index 2c6d90c855..803a251437 100644 --- a/src/list.hpp +++ b/src/list.hpp @@ -19,6 +19,9 @@ struct ZigList { ensure_capacity(length + 1); items[length++] = item; } + void append_assuming_capacity(const T& item) { + items[length++] = item; + } // remember that the pointer to this item is invalid after you // modify the length of the list const T & at(size_t index) const {