From b3b6ccba50ef7a683ad05546cba2b71e7d10489f Mon Sep 17 00:00:00 2001 From: Andrew Kelley Date: Fri, 3 Jul 2020 23:57:24 +0000 Subject: [PATCH 1/4] reimplement std.HashMap * breaking changes to the API. Some of the weird decisions from before are changed to what would be more expected. - `get` returns `?V`, use `getEntry` for the old API. - `put` returns `!void`, use `fetchPut` for the old API. * HashMap now has a comptime parameter of whether to store hashes with entries. AutoHashMap has heuristics on whether to set this parameter. For example, for integers, it is false, since equality checking is cheap, but for strings, it is true, since equality checking is probably expensive. * The implementation has a separate array for entry_index / distance_from_start_index. Entries no longer has holes; it is an ArrayList, and iteration is simpler and more cache coherent. This is inspired by Python's new dictionaries. * HashMap is separated into an "unmanaged" and a "managed" API. The unmanaged API is where the actual implementation is; the managed API wraps it and provides a more convenient API, storing the allocator. * Memory usage: When there are less than or equal to 8 entries, HashMap now incurs only a single pointer-size integer as overhead, opposed to using an ArrayList. * Since the entries array is separate from the indexes array, the holes in the indexes array take up less room than the holes in the entries array otherwise would. However the entries array also allocates additional capacity for appending into the array. * HashMap now maintains insertion order. Deletion performs a "swap remove". It's now possible to modify the HashMap while iterating. --- lib/std/array_list.zig | 16 + lib/std/debug.zig | 2 +- lib/std/hash_map.zig | 1112 ++++++++++++++++++++++++++++------------ 3 files changed, 793 insertions(+), 337 deletions(-) diff --git a/lib/std/array_list.zig b/lib/std/array_list.zig index b57d051d2b..a68c1fa9d6 100644 --- a/lib/std/array_list.zig +++ b/lib/std/array_list.zig @@ -210,6 +210,14 @@ pub fn ArrayListAligned(comptime T: type, comptime alignment: ?u29) type { self.capacity = new_len; } + /// Reduce length to `new_len`. + /// Invalidates element pointers. + /// Keeps capacity the same. + pub fn shrinkRetainingCapacity(self: *Self, new_len: usize) void { + assert(new_len <= self.items.len); + self.items.len = new_len; + } + pub fn ensureCapacity(self: *Self, new_capacity: usize) !void { var better_capacity = self.capacity; if (better_capacity >= new_capacity) return; @@ -432,6 +440,14 @@ pub fn ArrayListAlignedUnmanaged(comptime T: type, comptime alignment: ?u29) typ self.capacity = new_len; } + /// Reduce length to `new_len`. + /// Invalidates element pointers. + /// Keeps capacity the same. + pub fn shrinkRetainingCapacity(self: *Self, new_len: usize) void { + assert(new_len <= self.items.len); + self.items.len = new_len; + } + pub fn ensureCapacity(self: *Self, allocator: *Allocator, new_capacity: usize) !void { var better_capacity = self.capacity; if (better_capacity >= new_capacity) return; diff --git a/lib/std/debug.zig b/lib/std/debug.zig index 92b79be35c..e9bafec94c 100644 --- a/lib/std/debug.zig +++ b/lib/std/debug.zig @@ -1278,7 +1278,7 @@ pub const DebugInfo = struct { else => return error.MissingDebugInfo, } - if (self.address_map.getValue(ctx.base_address)) |obj_di| { + if (self.address_map.get(ctx.base_address)) |obj_di| { return obj_di; } diff --git a/lib/std/hash_map.zig b/lib/std/hash_map.zig index bcd4280153..4b91a83ba2 100644 --- a/lib/std/hash_map.zig +++ b/lib/std/hash_map.zig @@ -9,17 +9,15 @@ const autoHash = std.hash.autoHash; const Wyhash = std.hash.Wyhash; const Allocator = mem.Allocator; const builtin = @import("builtin"); - -const want_modification_safety = std.debug.runtime_safety; -const debug_u32 = if (want_modification_safety) u32 else void; +const hash_map = @This(); pub fn AutoHashMap(comptime K: type, comptime V: type) type { - return HashMap(K, V, getAutoHashFn(K), getAutoEqlFn(K)); + return HashMap(K, V, getAutoHashFn(K), getAutoEqlFn(K), autoEqlIsCheap(K)); } /// Builtin hashmap for strings as keys. pub fn StringHashMap(comptime V: type) type { - return HashMap([]const u8, V, hashString, eqlString); + return HashMap([]const u8, V, hashString, eqlString, true); } pub fn eqlString(a: []const u8, b: []const u8) bool { @@ -30,422 +28,846 @@ pub fn hashString(s: []const u8) u32 { return @truncate(u32, std.hash.Wyhash.hash(0, s)); } -pub fn HashMap(comptime K: type, comptime V: type, comptime hash: fn (key: K) u32, comptime eql: fn (a: K, b: K) bool) type { +/// Insertion order is preserved. +/// Deletions perform a "swap removal" on the entries list. +/// Modifying the hash map while iterating is allowed, however one must understand +/// the (well defined) behavior when mixing insertions and deletions with iteration. +/// For a hash map that can be initialized directly that does not store an Allocator +/// field, see `HashMapUnmanaged`. +/// When `store_hash` is `false`, this data structure is biased towards cheap `eql` +/// functions. It does not store each item's hash in the table. Setting `store_hash` +/// to `true` incurs slightly more memory cost by storing each key's hash in the table +/// but only has to call `eql` for hash collisions. +pub fn HashMap( + comptime K: type, + comptime V: type, + comptime hash: fn (key: K) u32, + comptime eql: fn (a: K, b: K) bool, + comptime store_hash: bool, +) type { return struct { - entries: []Entry, - size: usize, - max_distance_from_start_index: usize, + unmanaged: Unmanaged, allocator: *Allocator, - /// This is used to detect bugs where a hashtable is edited while an iterator is running. - modification_count: debug_u32, - - const Self = @This(); - - /// A *KV is a mutable pointer into this HashMap's internal storage. - /// Modifying the key is undefined behavior. - /// Modifying the value is harmless. - /// *KV pointers become invalid whenever this HashMap is modified, - /// and then any access to the *KV is undefined behavior. - pub const KV = struct { - key: K, - value: V, - }; - - const Entry = struct { - used: bool, - distance_from_start_index: usize, - kv: KV, - }; - - pub const GetOrPutResult = struct { - kv: *KV, - found_existing: bool, - }; + pub const Unmanaged = HashMapUnmanaged(K, V, hash, eql, store_hash); + pub const Entry = Unmanaged.Entry; + pub const Hash = Unmanaged.Hash; + pub const GetOrPutResult = Unmanaged.GetOrPutResult; + /// Deprecated. Iterate using `items`. pub const Iterator = struct { hm: *const Self, - // how many items have we returned - count: usize, - // iterator through the entry array + /// Iterator through the entry array. index: usize, - // used to detect concurrent modification - initial_modification_count: debug_u32, - pub fn next(it: *Iterator) ?*KV { - if (want_modification_safety) { - assert(it.initial_modification_count == it.hm.modification_count); // concurrent modification - } - if (it.count >= it.hm.size) return null; - while (it.index < it.hm.entries.len) : (it.index += 1) { - const entry = &it.hm.entries[it.index]; - if (entry.used) { - it.index += 1; - it.count += 1; - return &entry.kv; - } - } - unreachable; // no next item + pub fn next(it: *Iterator) ?*Entry { + if (it.index >= it.hm.unmanaged.entries.items.len) return null; + const result = &it.hm.unmanaged.entries.items[it.index]; + it.index += 1; + return result; } - // Reset the iterator to the initial index + /// Reset the iterator to the initial index pub fn reset(it: *Iterator) void { - it.count = 0; it.index = 0; - // Resetting the modification count too - it.initial_modification_count = it.hm.modification_count; } }; + const Self = @This(); + const Index = Unmanaged.Index; + pub fn init(allocator: *Allocator) Self { - return Self{ - .entries = &[_]Entry{}, + return .{ + .unmanaged = .{}, .allocator = allocator, - .size = 0, - .max_distance_from_start_index = 0, - .modification_count = if (want_modification_safety) 0 else {}, }; } - pub fn deinit(hm: Self) void { - hm.allocator.free(hm.entries); + pub fn deinit(self: *Self) void { + self.unmanaged.deinit(self.allocator); + self.* = undefined; } - pub fn clear(hm: *Self) void { - for (hm.entries) |*entry| { - entry.used = false; - } - hm.size = 0; - hm.max_distance_from_start_index = 0; - hm.incrementModificationCount(); + pub fn clearRetainingCapacity(self: *Self) void { + return self.unmanaged.clearRetainingCapacity(); } + pub fn clearAndFree(self: *Self, allocator: *Allocator) void { + return self.unmanaged.clearAndFree(self.allocator); + } + + /// Deprecated. Use `items().len`. pub fn count(self: Self) usize { - return self.size; + return self.items().len; + } + + /// Deprecated. Iterate using `items`. + pub fn iterator(self: *const Self) Iterator { + return Iterator{ + .hm = self, + .index = 0, + }; } /// If key exists this function cannot fail. /// If there is an existing item with `key`, then the result - /// kv pointer points to it, and found_existing is true. + /// `Entry` pointer points to it, and found_existing is true. /// Otherwise, puts a new item with undefined value, and - /// the kv pointer points to it. Caller should then initialize - /// the data. + /// the `Entry` pointer points to it. Caller should then initialize + /// the value (but not the key). pub fn getOrPut(self: *Self, key: K) !GetOrPutResult { - // TODO this implementation can be improved - we should only - // have to hash once and find the entry once. - if (self.get(key)) |kv| { - return GetOrPutResult{ - .kv = kv, - .found_existing = true, - }; - } - self.incrementModificationCount(); - try self.autoCapacity(); - const put_result = self.internalPut(key); - assert(put_result.old_kv == null); - return GetOrPutResult{ - .kv = &put_result.new_entry.kv, - .found_existing = false, - }; + return self.unmanaged.getOrPut(self.allocator, key); } - pub fn getOrPutValue(self: *Self, key: K, value: V) !*KV { - const res = try self.getOrPut(key); - if (!res.found_existing) - res.kv.value = value; - - return res.kv; + /// If there is an existing item with `key`, then the result + /// `Entry` pointer points to it, and found_existing is true. + /// Otherwise, puts a new item with undefined value, and + /// the `Entry` pointer points to it. Caller should then initialize + /// the value (but not the key). + /// If a new entry needs to be stored, this function asserts there + /// is enough capacity to store it. + pub fn getOrPutAssumeCapacity(self: *Self, key: K) GetOrPutResult { + return self.unmanaged.getOrPutAssumeCapacity(key); } - fn optimizedCapacity(expected_count: usize) usize { - // ensure that the hash map will be at most 60% full if - // expected_count items are put into it - var optimized_capacity = expected_count * 5 / 3; - // an overflow here would mean the amount of memory required would not - // be representable in the address space - return math.ceilPowerOfTwo(usize, optimized_capacity) catch unreachable; + pub fn getOrPutValue(self: *Self, key: K, value: V) !*Entry { + return self.unmanaged.getOrPutValue(self.allocator, key, value); } - /// Increases capacity so that the hash map will be at most - /// 60% full when expected_count items are put into it - pub fn ensureCapacity(self: *Self, expected_count: usize) !void { - if (expected_count == 0) return; - const optimized_capacity = optimizedCapacity(expected_count); - return self.ensureCapacityExact(optimized_capacity); + /// Increases capacity, guaranteeing that insertions up until the + /// `expected_count` will not cause an allocation, and therefore cannot fail. + pub fn ensureCapacity(self: *Self, new_capacity: usize) !void { + return self.unmanaged.ensureCapacity(self.allocator, new_capacity); } - /// Sets the capacity to the new capacity if the new - /// capacity is greater than the current capacity. - /// New capacity must be a power of two. - fn ensureCapacityExact(self: *Self, new_capacity: usize) !void { - // capacity must always be a power of two to allow for modulo - // optimization in the constrainIndex fn - assert(math.isPowerOfTwo(new_capacity)); - - if (new_capacity <= self.entries.len) { - return; - } - - const old_entries = self.entries; - try self.initCapacity(new_capacity); - self.incrementModificationCount(); - if (old_entries.len > 0) { - // dump all of the old elements into the new table - for (old_entries) |*old_entry| { - if (old_entry.used) { - self.internalPut(old_entry.kv.key).new_entry.kv.value = old_entry.kv.value; - } - } - self.allocator.free(old_entries); - } + /// Returns the number of total elements which may be present before it is + /// no longer guaranteed that no allocations will be performed. + pub fn capacity(self: *Self) usize { + return self.unmanaged.capacity(); } - /// Returns the kv pair that was already there. - pub fn put(self: *Self, key: K, value: V) !?KV { - try self.autoCapacity(); - return putAssumeCapacity(self, key, value); + /// Clobbers any existing data. To detect if a put would clobber + /// existing data, see `getOrPut`. + pub fn put(self: *Self, key: K, value: V) !void { + return self.unmanaged.put(self.allocator, key, value); } - /// Calls put() and asserts that no kv pair is clobbered. + /// Inserts a key-value pair into the hash map, asserting that no previous + /// entry with the same key is already present pub fn putNoClobber(self: *Self, key: K, value: V) !void { - assert((try self.put(key, value)) == null); + return self.unmanaged.putNoClobber(self.allocator, key, value); } - pub fn putAssumeCapacity(self: *Self, key: K, value: V) ?KV { - assert(self.count() < self.entries.len); - self.incrementModificationCount(); - - const put_result = self.internalPut(key); - put_result.new_entry.kv.value = value; - return put_result.old_kv; + /// Asserts there is enough capacity to store the new key-value pair. + /// Clobbers any existing data. To detect if a put would clobber + /// existing data, see `getOrPutAssumeCapacity`. + pub fn putAssumeCapacity(self: *Self, key: K, value: V) void { + return self.unmanaged.putAssumeCapacity(key, value); } + /// Asserts there is enough capacity to store the new key-value pair. + /// Asserts that it does not clobber any existing data. + /// To detect if a put would clobber existing data, see `getOrPutAssumeCapacity`. pub fn putAssumeCapacityNoClobber(self: *Self, key: K, value: V) void { - assert(self.putAssumeCapacity(key, value) == null); + return self.unmanaged.putAssumeCapacityNoClobber(key, value); } - pub fn get(hm: *const Self, key: K) ?*KV { - if (hm.entries.len == 0) { - return null; - } - return hm.internalGet(key); + /// Inserts a new `Entry` into the hash map, returning the previous one, if any. + pub fn fetchPut(self: *Self, key: K, value: V) !?Entry { + return self.unmanaged.fetchPut(self.allocator, key, value); } - pub fn getValue(hm: *const Self, key: K) ?V { - return if (hm.get(key)) |kv| kv.value else null; + /// Inserts a new `Entry` into the hash map, returning the previous one, if any. + /// If insertion happuns, asserts there is enough capacity without allocating. + pub fn fetchPutAssumeCapacity(self: *Self, key: K, value: V) ?Entry { + return self.unmanaged.fetchPutAssumeCapacity(key, value); } - pub fn contains(hm: *const Self, key: K) bool { - return hm.get(key) != null; + pub fn getEntry(self: Self, key: K) ?*Entry { + return self.unmanaged.getEntry(key); } - /// Returns any kv pair that was removed. - pub fn remove(hm: *Self, key: K) ?KV { - if (hm.entries.len == 0) return null; - hm.incrementModificationCount(); - const start_index = hm.keyToIndex(key); - { - var roll_over: usize = 0; - while (roll_over <= hm.max_distance_from_start_index) : (roll_over += 1) { - const index = hm.constrainIndex(start_index + roll_over); - var entry = &hm.entries[index]; - - if (!entry.used) return null; - - if (!eql(entry.kv.key, key)) continue; - - const removed_kv = entry.kv; - while (roll_over < hm.entries.len) : (roll_over += 1) { - const next_index = hm.constrainIndex(start_index + roll_over + 1); - const next_entry = &hm.entries[next_index]; - if (!next_entry.used or next_entry.distance_from_start_index == 0) { - entry.used = false; - hm.size -= 1; - return removed_kv; - } - entry.* = next_entry.*; - entry.distance_from_start_index -= 1; - entry = next_entry; - } - unreachable; // shifting everything in the table - } - } - return null; + pub fn get(self: Self, key: K) ?V { + return self.unmanaged.get(key); } - /// Calls remove(), asserts that a kv pair is removed, and discards it. - pub fn removeAssertDiscard(hm: *Self, key: K) void { - assert(hm.remove(key) != null); + pub fn contains(self: Self, key: K) bool { + return self.unmanaged.contains(key); } - pub fn iterator(hm: *const Self) Iterator { - return Iterator{ - .hm = hm, - .count = 0, - .index = 0, - .initial_modification_count = hm.modification_count, - }; + /// If there is an `Entry` with a matching key, it is deleted from + /// the hash map, and then returned from this function. + pub fn remove(self: *Self, key: K) ?Entry { + return self.unmanaged.remove(key); + } + + /// Asserts there is an `Entry` with matching key, deletes it from the hash map, + /// and discards it. + pub fn removeAssertDiscard(self: *Self, key: K) void { + return self.unmanaged.removeAssertDiscard(key); + } + + pub fn items(self: Self) []Entry { + return self.unmanaged.items(); } pub fn clone(self: Self) !Self { - var other = Self.init(self.allocator); - try other.initCapacity(self.entries.len); - var it = self.iterator(); - while (it.next()) |entry| { - try other.putNoClobber(entry.key, entry.value); + var other = try self.unmanaged.clone(self.allocator); + return other.promote(self.allocator); + } + }; +} + +/// General purpose hash table. +/// Insertion order is preserved. +/// Deletions perform a "swap removal" on the entries list. +/// Modifying the hash map while iterating is allowed, however one must understand +/// the (well defined) behavior when mixing insertions and deletions with iteration. +/// This type does not store an Allocator field - the Allocator must be passed in +/// with each function call that requires it. See `HashMap` for a type that stores +/// an Allocator field for convenience. +/// Can be initialized directly using the default field values. +/// This type is designed to have low overhead for small numbers of entries. When +/// `store_hash` is `false` and the number of entries in the map is less than 9, +/// the overhead cost of using `HashMapUnmanaged` rather than `std.ArrayList` is +/// only a single pointer-sized integer. +/// When `store_hash` is `false`, this data structure is biased towards cheap `eql` +/// functions. It does not store each item's hash in the table. Setting `store_hash` +/// to `true` incurs slightly more memory cost by storing each key's hash in the table +/// but guarantees only one call to `eql` per insertion/deletion. +pub fn HashMapUnmanaged( + comptime K: type, + comptime V: type, + comptime hash: fn (key: K) u32, + comptime eql: fn (a: K, b: K) bool, + comptime store_hash: bool, +) type { + return struct { + /// It is permitted to access this field directly. + entries: std.ArrayListUnmanaged(Entry) = .{}, + + /// When entries length is less than `linear_scan_max`, this remains `null`. + /// Once entries length grows big enough, this field is allocated. There is + /// an IndexHeader followed by an array of Index(I) structs, where I is defined + /// by how many total indexes there are. + index_header: ?*IndexHeader = null, + + /// Modifying the key is illegal behavior. + /// Modifying the value is allowed. + /// Entry pointers become invalid whenever this HashMap is modified, + /// unless `ensureCapacity` was previously used. + pub const Entry = struct { + /// This field is `void` if `store_hash` is `false`. + hash: Hash, + key: K, + value: V, + }; + + pub const Hash = if (store_hash) u32 else void; + + pub const GetOrPutResult = struct { + entry: *Entry, + found_existing: bool, + }; + + pub const Managed = HashMap(K, V, hash, eql, store_hash); + + const Self = @This(); + + const linear_scan_max = 8; + + pub fn promote(self: Self, allocator: *Allocator) Managed { + return .{ + .unmanaged = self, + .allocator = allocator, + }; + } + + pub fn deinit(self: *Self, allocator: *Allocator) void { + self.entries.deinit(allocator); + if (self.index_header) |header| { + header.free(allocator); + } + self.* = undefined; + } + + pub fn clearRetainingCapacity(self: *Self) void { + self.entries.items.len = 0; + if (self.header) |header| { + header.max_distance_from_start_index = 0; + const indexes = header.indexes(u8); + @memset(indexes.ptr, 0xff, indexes.len); + } + } + + pub fn clearAndFree(self: *Self, allocator: *Allocator) void { + self.entries.shrink(allocator, 0); + if (self.header) |header| { + header.free(allocator); + self.header = null; + } + } + + /// If key exists this function cannot fail. + /// If there is an existing item with `key`, then the result + /// `Entry` pointer points to it, and found_existing is true. + /// Otherwise, puts a new item with undefined value, and + /// the `Entry` pointer points to it. Caller should then initialize + /// the value (but not the key). + pub fn getOrPut(self: *Self, allocator: *Allocator, key: K) !GetOrPutResult { + self.ensureCapacity(allocator, self.entries.items.len + 1) catch |err| { + // "If key exists this function cannot fail." + return GetOrPutResult{ + .entry = self.getEntry(key) orelse return err, + .found_existing = true, + }; + }; + return self.getOrPutAssumeCapacity(key); + } + + /// If there is an existing item with `key`, then the result + /// `Entry` pointer points to it, and found_existing is true. + /// Otherwise, puts a new item with undefined value, and + /// the `Entry` pointer points to it. Caller should then initialize + /// the value (but not the key). + /// If a new entry needs to be stored, this function asserts there + /// is enough capacity to store it. + pub fn getOrPutAssumeCapacity(self: *Self, key: K) GetOrPutResult { + const header = self.index_header orelse { + // Linear scan. + const h = if (store_hash) hash(key) else {}; + for (self.entries.items) |*item| { + if (item.hash == h and eql(key, item.key)) { + return GetOrPutResult{ + .entry = item, + .found_existing = true, + }; + } + } + const new_entry = self.entries.addOneAssumeCapacity(); + new_entry.* = .{ + .hash = if (store_hash) h else {}, + .key = key, + .value = undefined, + }; + return GetOrPutResult{ + .entry = new_entry, + .found_existing = false, + }; + }; + + switch (header.capacityIndexType()) { + .u8 => return self.getOrPutInternal(key, header, u8), + .u16 => return self.getOrPutInternal(key, header, u16), + .u32 => return self.getOrPutInternal(key, header, u32), + .usize => return self.getOrPutInternal(key, header, usize), + } + } + + pub fn getOrPutValue(self: *Self, allocator: *Allocator, key: K, value: V) !*Entry { + const res = try self.getOrPut(allocator, key); + if (!res.found_existing) + res.entry.value = value; + + return res.entry; + } + + /// Increases capacity, guaranteeing that insertions up until the + /// `expected_count` will not cause an allocation, and therefore cannot fail. + pub fn ensureCapacity(self: *Self, allocator: *Allocator, new_capacity: usize) !void { + try self.entries.ensureCapacity(allocator, new_capacity); + if (new_capacity <= linear_scan_max) return; + + // Resize if indexes would be more than 75% full. + const needed_len = new_capacity * 4 / 3; + if (self.index_header) |header| { + if (needed_len > header.indexes_len) { + var new_indexes_len = header.indexes_len; + while (true) { + new_indexes_len += new_indexes_len / 2 + 8; + if (new_indexes_len >= needed_len) break; + } + const new_header = try IndexHeader.alloc(allocator, new_indexes_len); + self.insertAllEntriesIntoNewHeader(new_header); + header.free(allocator); + self.index_header = new_header; + } + } else { + const header = try IndexHeader.alloc(allocator, needed_len); + self.insertAllEntriesIntoNewHeader(header); + self.index_header = header; + } + } + + /// Returns the number of total elements which may be present before it is + /// no longer guaranteed that no allocations will be performed. + pub fn capacity(self: Self) usize { + const entry_cap = self.entries.capacity; + const header = self.index_header orelse return math.min(linear_scan_max, entry_cap); + const indexes_cap = (header.indexes_len + 1) * 3 / 4; + return math.min(entry_cap, indexes_cap); + } + + /// Clobbers any existing data. To detect if a put would clobber + /// existing data, see `getOrPut`. + pub fn put(self: *Self, allocator: *Allocator, key: K, value: V) !void { + const result = try self.getOrPut(allocator, key); + result.entry.value = value; + } + + /// Inserts a key-value pair into the hash map, asserting that no previous + /// entry with the same key is already present + pub fn putNoClobber(self: *Self, allocator: *Allocator, key: K, value: V) !void { + const result = try self.getOrPut(allocator, key); + assert(!result.found_existing); + result.entry.value = value; + } + + /// Asserts there is enough capacity to store the new key-value pair. + /// Clobbers any existing data. To detect if a put would clobber + /// existing data, see `getOrPutAssumeCapacity`. + pub fn putAssumeCapacity(self: *Self, key: K, value: V) void { + const result = self.getOrPutAssumeCapacity(key); + result.entry.value = value; + } + + /// Asserts there is enough capacity to store the new key-value pair. + /// Asserts that it does not clobber any existing data. + /// To detect if a put would clobber existing data, see `getOrPutAssumeCapacity`. + pub fn putAssumeCapacityNoClobber(self: *Self, key: K, value: V) void { + const result = self.getOrPutAssumeCapacity(key); + assert(!result.found_existing); + result.entry.value = value; + } + + /// Inserts a new `Entry` into the hash map, returning the previous one, if any. + pub fn fetchPut(self: *Self, allocator: *Allocator, key: K, value: V) !?Entry { + const gop = try self.getOrPut(allocator, key); + var result: ?Entry = null; + if (gop.found_existing) { + result = gop.entry.*; + } + gop.entry.value = value; + return result; + } + + /// Inserts a new `Entry` into the hash map, returning the previous one, if any. + /// If insertion happuns, asserts there is enough capacity without allocating. + pub fn fetchPutAssumeCapacity(self: *Self, key: K, value: V) ?Entry { + const gop = self.getOrPutAssumeCapacity(key); + var result: ?Entry = null; + if (gop.found_existing) { + result = gop.entry.*; + } + gop.entry.value = value; + return result; + } + + pub fn getEntry(self: Self, key: K) ?*Entry { + const header = self.index_header orelse { + // Linear scan. + const h = if (store_hash) hash(key) else {}; + for (self.entries.items) |*item| { + if (item.hash == h and eql(key, item.key)) { + return item; + } + } + return null; + }; + + switch (header.capacityIndexType()) { + .u8 => return self.getInternal(key, header, u8), + .u16 => return self.getInternal(key, header, u16), + .u32 => return self.getInternal(key, header, u32), + .usize => return self.getInternal(key, header, usize), + } + } + + pub fn get(self: Self, key: K) ?V { + return if (self.getEntry(key)) |entry| entry.value else null; + } + + pub fn contains(self: Self, key: K) bool { + return self.getEntry(key) != null; + } + + /// If there is an `Entry` with a matching key, it is deleted from + /// the hash map, and then returned from this function. + pub fn remove(self: *Self, key: K) ?Entry { + const header = self.index_header orelse { + // Linear scan. + const h = if (store_hash) hash(key) else {}; + for (self.entries.items) |item, i| { + if (item.hash == h and eql(key, item.key)) { + return self.entries.swapRemove(i); + } + } + return null; + }; + switch (header.capacityIndexType()) { + .u8 => return self.removeInternal(key, header, u8), + .u16 => return self.removeInternal(key, header, u16), + .u32 => return self.removeInternal(key, header, u32), + .usize => return self.removeInternal(key, header, usize), + } + } + + /// Asserts there is an `Entry` with matching key, deletes it from the hash map, + /// and discards it. + pub fn removeAssertDiscard(self: *Self, key: K) void { + assert(self.remove(key) != null); + } + + pub fn items(self: Self) []Entry { + return self.entries.items; + } + + pub fn clone(self: Self, allocator: *Allocator) !Self { + // TODO this can be made more efficient by directly allocating + // the memory slices and memcpying the elements. + var other = Self.init(); + try other.initCapacity(allocator, self.entries.len); + for (self.entries.items) |entry| { + other.putAssumeCapacityNoClobber(entry.key, entry.value); } return other; } - fn autoCapacity(self: *Self) !void { - if (self.entries.len == 0) { - return self.ensureCapacityExact(16); - } - // if we get too full (60%), double the capacity - if (self.size * 5 >= self.entries.len * 3) { - return self.ensureCapacityExact(self.entries.len * 2); - } - } - - fn initCapacity(hm: *Self, capacity: usize) !void { - hm.entries = try hm.allocator.alloc(Entry, capacity); - hm.size = 0; - hm.max_distance_from_start_index = 0; - for (hm.entries) |*entry| { - entry.used = false; - } - } - - fn incrementModificationCount(hm: *Self) void { - if (want_modification_safety) { - hm.modification_count +%= 1; - } - } - - const InternalPutResult = struct { - new_entry: *Entry, - old_kv: ?KV, - }; - - /// Returns a pointer to the new entry. - /// Asserts that there is enough space for the new item. - fn internalPut(self: *Self, orig_key: K) InternalPutResult { - var key = orig_key; - var value: V = undefined; - const start_index = self.keyToIndex(key); + fn removeInternal(self: *Self, key: K, header: *IndexHeader, comptime I: type) ?Entry { + const indexes = header.indexes(I); + const h = hash(key); + const start_index = header.hashToIndex(h); var roll_over: usize = 0; - var distance_from_start_index: usize = 0; - var got_result_entry = false; - var result = InternalPutResult{ - .new_entry = undefined, - .old_kv = null, - }; - while (roll_over < self.entries.len) : ({ - roll_over += 1; - distance_from_start_index += 1; - }) { - const index = self.constrainIndex(start_index + roll_over); - const entry = &self.entries[index]; + while (roll_over <= header.max_distance_from_start_index) : (roll_over += 1) { + const index_index = (start_index + roll_over) % header.indexes_len; + var index = &indexes[index_index]; + if (index.isEmpty()) + return null; - if (entry.used and !eql(entry.kv.key, key)) { - if (entry.distance_from_start_index < distance_from_start_index) { - // robin hood to the rescue - const tmp = entry.*; - self.max_distance_from_start_index = math.max(self.max_distance_from_start_index, distance_from_start_index); - if (!got_result_entry) { - got_result_entry = true; - result.new_entry = entry; - } - entry.* = Entry{ - .used = true, - .distance_from_start_index = distance_from_start_index, - .kv = KV{ - .key = key, - .value = value, - }, - }; - key = tmp.kv.key; - value = tmp.kv.value; - distance_from_start_index = tmp.distance_from_start_index; - } + const entry = &self.entries.items[index.entry_index]; + + const hash_match = if (store_hash) h == entry.hash else true; + if (!hash_match or !eql(key, entry.key)) continue; + + const removed_entry = self.entries.swapRemove(index.entry_index); + if (self.entries.items.len > 0 and self.entries.items.len != index.entry_index) { + // Because of the swap remove, now we need to update the index that was + // pointing to the last entry and is now pointing to this removed item slot. + self.updateEntryIndex(header, self.entries.items.len, index.entry_index, I, indexes); } - if (entry.used) { - result.old_kv = entry.kv; - } else { - // adding an entry. otherwise overwriting old value with - // same key - self.size += 1; - } - - self.max_distance_from_start_index = math.max(distance_from_start_index, self.max_distance_from_start_index); - if (!got_result_entry) { - result.new_entry = entry; - } - entry.* = Entry{ - .used = true, - .distance_from_start_index = distance_from_start_index, - .kv = KV{ - .key = key, - .value = value, - }, - }; - return result; - } - unreachable; // put into a full map - } - - fn internalGet(hm: Self, key: K) ?*KV { - const start_index = hm.keyToIndex(key); - { - var roll_over: usize = 0; - while (roll_over <= hm.max_distance_from_start_index) : (roll_over += 1) { - const index = hm.constrainIndex(start_index + roll_over); - const entry = &hm.entries[index]; - - if (!entry.used) return null; - if (eql(entry.kv.key, key)) return &entry.kv; + // Now we have to shift over the following indexes. + roll_over += 1; + while (roll_over < header.indexes_len) : (roll_over += 1) { + const next_index_index = (start_index + roll_over) % header.indexes_len; + const next_index = &indexes[next_index_index]; + if (next_index.isEmpty() or next_index.distance_from_start_index == 0) { + index.setEmpty(); + return removed_entry; + } + index.* = next_index.*; + index.distance_from_start_index -= 1; + index = next_index; } + unreachable; } return null; } - fn keyToIndex(hm: Self, key: K) usize { - return hm.constrainIndex(@as(usize, hash(key))); + fn updateEntryIndex( + self: *Self, + header: *IndexHeader, + old_entry_index: usize, + new_entry_index: usize, + comptime I: type, + indexes: []Index(I), + ) void { + const h = if (store_hash) self.entries.items[new_entry_index].hash else hash(self.entries.items[new_entry_index].key); + const start_index = header.hashToIndex(h); + var roll_over: usize = 0; + while (roll_over <= header.max_distance_from_start_index) : (roll_over += 1) { + const index_index = (start_index + roll_over) % header.indexes_len; + const index = &indexes[index_index]; + if (index.entry_index == old_entry_index) { + index.entry_index = @intCast(I, new_entry_index); + return; + } + } + unreachable; } - fn constrainIndex(hm: Self, i: usize) usize { - // this is an optimization for modulo of power of two integers; - // it requires hm.entries.len to always be a power of two - return i & (hm.entries.len - 1); + /// Must ensureCapacity before calling this. + fn getOrPutInternal(self: *Self, key: K, header: *IndexHeader, comptime I: type) GetOrPutResult { + const indexes = header.indexes(I); + const h = hash(key); + const start_index = header.hashToIndex(h); + var roll_over: usize = 0; + var distance_from_start_index: usize = 0; + while (roll_over <= header.indexes_len) : ({ + roll_over += 1; + distance_from_start_index += 1; + }) { + const index_index = (start_index + roll_over) % header.indexes_len; + const index = indexes[index_index]; + if (index.isEmpty()) { + indexes[index_index] = .{ + .distance_from_start_index = @intCast(I, distance_from_start_index), + .entry_index = @intCast(I, self.entries.items.len), + }; + header.maybeBumpMax(distance_from_start_index); + const new_entry = self.entries.addOneAssumeCapacity(); + new_entry.* = .{ + .hash = if (store_hash) h else {}, + .key = key, + .value = undefined, + }; + return .{ + .found_existing = false, + .entry = new_entry, + }; + } + + // This pointer survives the following append because we call + // entries.ensureCapacity before getOrPutInternal. + const entry = &self.entries.items[index.entry_index]; + const hash_match = if (store_hash) h == entry.hash else true; + if (hash_match and eql(key, entry.key)) { + return .{ + .found_existing = true, + .entry = entry, + }; + } + if (index.distance_from_start_index < distance_from_start_index) { + // In this case, we did not find the item. We will put a new entry. + // However, we will use this index for the new entry, and move + // the previous index down the line, to keep the max_distance_from_start_index + // as small as possible. + indexes[index_index] = .{ + .distance_from_start_index = @intCast(I, distance_from_start_index), + .entry_index = @intCast(I, self.entries.items.len), + }; + header.maybeBumpMax(distance_from_start_index); + const new_entry = self.entries.addOneAssumeCapacity(); + new_entry.* = .{ + .hash = if (store_hash) h else {}, + .key = key, + .value = undefined, + }; + + distance_from_start_index = index.distance_from_start_index; + var prev_entry_index = index.entry_index; + + // Find somewhere to put the index we replaced by shifting + // following indexes backwards. + roll_over += 1; + distance_from_start_index += 1; + while (roll_over < header.indexes_len) : ({ + roll_over += 1; + distance_from_start_index += 1; + }) { + const next_index_index = (start_index + roll_over) % header.indexes_len; + const next_index = indexes[next_index_index]; + if (next_index.isEmpty()) { + header.maybeBumpMax(distance_from_start_index); + indexes[next_index_index] = .{ + .entry_index = prev_entry_index, + .distance_from_start_index = @intCast(I, distance_from_start_index), + }; + return .{ + .found_existing = false, + .entry = new_entry, + }; + } + if (next_index.distance_from_start_index < distance_from_start_index) { + header.maybeBumpMax(distance_from_start_index); + indexes[next_index_index] = .{ + .entry_index = prev_entry_index, + .distance_from_start_index = @intCast(I, distance_from_start_index), + }; + distance_from_start_index = next_index.distance_from_start_index; + prev_entry_index = next_index.entry_index; + } + } + unreachable; + } + } + unreachable; + } + + fn getInternal(self: Self, key: K, header: *IndexHeader, comptime I: type) ?*Entry { + const indexes = header.indexes(I); + const h = hash(key); + const start_index = header.hashToIndex(h); + var roll_over: usize = 0; + while (roll_over <= header.max_distance_from_start_index) : (roll_over += 1) { + const index_index = (start_index + roll_over) % header.indexes_len; + const index = indexes[index_index]; + if (index.isEmpty()) + return null; + + const entry = &self.entries.items[index.entry_index]; + const hash_match = if (store_hash) h == entry.hash else true; + if (hash_match and eql(key, entry.key)) + return entry; + } + return null; + } + + fn insertAllEntriesIntoNewHeader(self: *Self, header: *IndexHeader) void { + switch (header.capacityIndexType()) { + .u8 => return self.insertAllEntriesIntoNewHeaderGeneric(header, u8), + .u16 => return self.insertAllEntriesIntoNewHeaderGeneric(header, u16), + .u32 => return self.insertAllEntriesIntoNewHeaderGeneric(header, u32), + .usize => return self.insertAllEntriesIntoNewHeaderGeneric(header, usize), + } + } + + fn insertAllEntriesIntoNewHeaderGeneric(self: *Self, header: *IndexHeader, comptime I: type) void { + const indexes = header.indexes(I); + entry_loop: for (self.entries.items) |entry, i| { + const h = if (store_hash) entry.hash else hash(entry.key); + const start_index = header.hashToIndex(h); + var entry_index = i; + var roll_over: usize = 0; + var distance_from_start_index: usize = 0; + while (roll_over < header.indexes_len) : ({ + roll_over += 1; + distance_from_start_index += 1; + }) { + const index_index = (start_index + roll_over) % header.indexes_len; + const next_index = indexes[index_index]; + if (next_index.isEmpty()) { + header.maybeBumpMax(distance_from_start_index); + indexes[index_index] = .{ + .distance_from_start_index = @intCast(I, distance_from_start_index), + .entry_index = @intCast(I, entry_index), + }; + continue :entry_loop; + } + if (next_index.distance_from_start_index < distance_from_start_index) { + header.maybeBumpMax(distance_from_start_index); + indexes[index_index] = .{ + .distance_from_start_index = @intCast(I, distance_from_start_index), + .entry_index = @intCast(I, entry_index), + }; + distance_from_start_index = next_index.distance_from_start_index; + entry_index = next_index.entry_index; + } + } + unreachable; + } } }; } +const CapacityIndexType = enum { u8, u16, u32, usize }; + +fn capacityIndexType(indexes_len: usize) CapacityIndexType { + if (indexes_len < math.maxInt(u8)) + return .u8; + if (indexes_len < math.maxInt(u16)) + return .u16; + if (indexes_len < math.maxInt(u32)) + return .u32; + return .usize; +} + +fn capacityIndexSize(indexes_len: usize) usize { + switch (capacityIndexType(indexes_len)) { + .u8 => return @sizeOf(Index(u8)), + .u16 => return @sizeOf(Index(u16)), + .u32 => return @sizeOf(Index(u32)), + .usize => return @sizeOf(Index(usize)), + } +} + +fn Index(comptime I: type) type { + return extern struct { + entry_index: I, + distance_from_start_index: I, + + const Self = @This(); + + fn isEmpty(idx: Self) bool { + return idx.entry_index == math.maxInt(I); + } + + fn setEmpty(idx: *Self) void { + idx.entry_index = math.maxInt(I); + } + }; +} + +/// This struct is trailed by an array of `Index(I)`, where `I` +/// and the array length are determined by `indexes_len`. +const IndexHeader = struct { + max_distance_from_start_index: usize, + indexes_len: usize, + + fn hashToIndex(header: IndexHeader, h: u32) usize { + return @as(usize, h) % header.indexes_len; + } + + fn indexes(header: *IndexHeader, comptime I: type) []Index(I) { + const start = @ptrCast([*]Index(I), @ptrCast([*]u8, header) + @sizeOf(IndexHeader)); + return start[0..header.indexes_len]; + } + + fn capacityIndexType(header: IndexHeader) CapacityIndexType { + return hash_map.capacityIndexType(header.indexes_len); + } + + fn maybeBumpMax(header: *IndexHeader, distance_from_start_index: usize) void { + if (distance_from_start_index > header.max_distance_from_start_index) { + header.max_distance_from_start_index = distance_from_start_index; + } + } + + fn alloc(allocator: *Allocator, len: usize) !*IndexHeader { + const index_size = hash_map.capacityIndexSize(len); + const nbytes = @sizeOf(IndexHeader) + index_size * len; + const bytes = try allocator.allocAdvanced(u8, @alignOf(IndexHeader), nbytes, .exact); + @memset(bytes.ptr + @sizeOf(IndexHeader), 0xff, bytes.len - @sizeOf(IndexHeader)); + const result = @ptrCast(*IndexHeader, bytes.ptr); + result.* = .{ + .max_distance_from_start_index = 0, + .indexes_len = len, + }; + return result; + } + + fn free(header: *IndexHeader, allocator: *Allocator) void { + const index_size = hash_map.capacityIndexSize(header.indexes_len); + const ptr = @ptrCast([*]u8, header); + const slice = ptr[0 .. @sizeOf(IndexHeader) + header.indexes_len * index_size]; + allocator.free(slice); + } +}; + test "basic hash map usage" { var map = AutoHashMap(i32, i32).init(std.testing.allocator); defer map.deinit(); - testing.expect((try map.put(1, 11)) == null); - testing.expect((try map.put(2, 22)) == null); - testing.expect((try map.put(3, 33)) == null); - testing.expect((try map.put(4, 44)) == null); + testing.expect((try map.fetchPut(1, 11)) == null); + testing.expect((try map.fetchPut(2, 22)) == null); + testing.expect((try map.fetchPut(3, 33)) == null); + testing.expect((try map.fetchPut(4, 44)) == null); try map.putNoClobber(5, 55); - testing.expect((try map.put(5, 66)).?.value == 55); - testing.expect((try map.put(5, 55)).?.value == 66); + testing.expect((try map.fetchPut(5, 66)).?.value == 55); + testing.expect((try map.fetchPut(5, 55)).?.value == 66); const gop1 = try map.getOrPut(5); testing.expect(gop1.found_existing == true); - testing.expect(gop1.kv.value == 55); - gop1.kv.value = 77; - testing.expect(map.get(5).?.value == 77); + testing.expect(gop1.entry.value == 55); + gop1.entry.value = 77; + testing.expect(map.getEntry(5).?.value == 77); const gop2 = try map.getOrPut(99); testing.expect(gop2.found_existing == false); - gop2.kv.value = 42; - testing.expect(map.get(99).?.value == 42); + gop2.entry.value = 42; + testing.expect(map.getEntry(99).?.value == 42); const gop3 = try map.getOrPutValue(5, 5); testing.expect(gop3.value == 77); @@ -454,15 +876,15 @@ test "basic hash map usage" { testing.expect(gop4.value == 41); testing.expect(map.contains(2)); - testing.expect(map.get(2).?.value == 22); - testing.expect(map.getValue(2).? == 22); + testing.expect(map.getEntry(2).?.value == 22); + testing.expect(map.get(2).? == 22); const rmv1 = map.remove(2); testing.expect(rmv1.?.key == 2); testing.expect(rmv1.?.value == 22); testing.expect(map.remove(2) == null); + testing.expect(map.getEntry(2) == null); testing.expect(map.get(2) == null); - testing.expect(map.getValue(2) == null); map.removeAssertDiscard(3); } @@ -498,8 +920,8 @@ test "iterator hash map" { it.reset(); var count: usize = 0; - while (it.next()) |kv| : (count += 1) { - buffer[@intCast(usize, kv.key)] = kv.value; + while (it.next()) |entry| : (count += 1) { + buffer[@intCast(usize, entry.key)] = entry.value; } testing.expect(count == 3); testing.expect(it.next() == null); @@ -510,8 +932,8 @@ test "iterator hash map" { it.reset(); count = 0; - while (it.next()) |kv| { - buffer[@intCast(usize, kv.key)] = kv.value; + while (it.next()) |entry| { + buffer[@intCast(usize, entry.key)] = entry.value; count += 1; if (count >= 2) break; } @@ -531,14 +953,14 @@ test "ensure capacity" { defer map.deinit(); try map.ensureCapacity(20); - const initialCapacity = map.entries.len; - testing.expect(initialCapacity >= 20); + const initial_capacity = map.capacity(); + testing.expect(initial_capacity >= 20); var i: i32 = 0; while (i < 20) : (i += 1) { - testing.expect(map.putAssumeCapacity(i, i + 10) == null); + testing.expect(map.fetchPutAssumeCapacity(i, i + 10) == null); } // shouldn't resize from putAssumeCapacity - testing.expect(initialCapacity == map.entries.len); + testing.expect(initial_capacity == map.capacity()); } pub fn getHashPtrAddrFn(comptime K: type) (fn (K) u32) { @@ -575,6 +997,24 @@ pub fn getAutoEqlFn(comptime K: type) (fn (K, K) bool) { }.eql; } +pub fn autoEqlIsCheap(comptime K: type) bool { + return switch (@typeInfo(K)) { + .Bool, + .Int, + .Float, + .Pointer, + .ComptimeFloat, + .ComptimeInt, + .Enum, + .Fn, + .ErrorSet, + .AnyFrame, + .EnumLiteral, + => true, + else => false, + }; +} + pub fn getAutoHashStratFn(comptime K: type, comptime strategy: std.hash.Strategy) (fn (K) u32) { return struct { fn hash(key: K) u32 { From 632acffcbd96a085ea92899e6f37465e40178f44 Mon Sep 17 00:00:00 2001 From: Andrew Kelley Date: Sat, 4 Jul 2020 01:31:29 +0000 Subject: [PATCH 2/4] update std lib to new hash map API --- lib/std/buf_map.zig | 15 ++++--- lib/std/buf_set.zig | 8 ++-- lib/std/build.zig | 12 +++--- lib/std/hash_map.zig | 25 ++++++++---- lib/std/http/headers.zig | 72 ++++++++++++++++----------------- lib/std/json.zig | 56 ++++++++++++------------- src-self-hosted/main.zig | 4 +- src-self-hosted/translate_c.zig | 27 ++++++------- 8 files changed, 111 insertions(+), 108 deletions(-) diff --git a/lib/std/buf_map.zig b/lib/std/buf_map.zig index e8bc735b57..651561b374 100644 --- a/lib/std/buf_map.zig +++ b/lib/std/buf_map.zig @@ -33,10 +33,10 @@ pub const BufMap = struct { pub fn setMove(self: *BufMap, key: []u8, value: []u8) !void { const get_or_put = try self.hash_map.getOrPut(key); if (get_or_put.found_existing) { - self.free(get_or_put.kv.key); - get_or_put.kv.key = key; + self.free(get_or_put.entry.key); + get_or_put.entry.key = key; } - get_or_put.kv.value = value; + get_or_put.entry.value = value; } /// `key` and `value` are copied into the BufMap. @@ -45,19 +45,18 @@ pub const BufMap = struct { errdefer self.free(value_copy); const get_or_put = try self.hash_map.getOrPut(key); if (get_or_put.found_existing) { - self.free(get_or_put.kv.value); + self.free(get_or_put.entry.value); } else { - get_or_put.kv.key = self.copy(key) catch |err| { + get_or_put.entry.key = self.copy(key) catch |err| { _ = self.hash_map.remove(key); return err; }; } - get_or_put.kv.value = value_copy; + get_or_put.entry.value = value_copy; } pub fn get(self: BufMap, key: []const u8) ?[]const u8 { - const entry = self.hash_map.get(key) orelse return null; - return entry.value; + return self.hash_map.get(key); } pub fn delete(self: *BufMap, key: []const u8) void { diff --git a/lib/std/buf_set.zig b/lib/std/buf_set.zig index 89df0478ff..d8a0264bd7 100644 --- a/lib/std/buf_set.zig +++ b/lib/std/buf_set.zig @@ -14,14 +14,12 @@ pub const BufSet = struct { return self; } - pub fn deinit(self: *const BufSet) void { - var it = self.hash_map.iterator(); - while (true) { - const entry = it.next() orelse break; + pub fn deinit(self: *BufSet) void { + for (self.hash_map.items()) |entry| { self.free(entry.key); } - self.hash_map.deinit(); + self.* = undefined; } pub fn put(self: *BufSet, key: []const u8) !void { diff --git a/lib/std/build.zig b/lib/std/build.zig index df1dc6d73a..5619bc5fe6 100644 --- a/lib/std/build.zig +++ b/lib/std/build.zig @@ -422,12 +422,12 @@ pub const Builder = struct { .type_id = type_id, .description = description, }; - if ((self.available_options_map.put(name, available_option) catch unreachable) != null) { + if ((self.available_options_map.fetchPut(name, available_option) catch unreachable) != null) { panic("Option '{}' declared twice", .{name}); } self.available_options_list.append(available_option) catch unreachable; - const entry = self.user_input_options.get(name) orelse return null; + const entry = self.user_input_options.getEntry(name) orelse return null; entry.value.used = true; switch (type_id) { TypeId.Bool => switch (entry.value.value) { @@ -634,7 +634,7 @@ pub const Builder = struct { pub fn addUserInputOption(self: *Builder, name: []const u8, value: []const u8) !bool { const gop = try self.user_input_options.getOrPut(name); if (!gop.found_existing) { - gop.kv.value = UserInputOption{ + gop.entry.value = UserInputOption{ .name = name, .value = UserValue{ .Scalar = value }, .used = false, @@ -643,7 +643,7 @@ pub const Builder = struct { } // option already exists - switch (gop.kv.value.value) { + switch (gop.entry.value.value) { UserValue.Scalar => |s| { // turn it into a list var list = ArrayList([]const u8).init(self.allocator); @@ -675,7 +675,7 @@ pub const Builder = struct { pub fn addUserInputFlag(self: *Builder, name: []const u8) !bool { const gop = try self.user_input_options.getOrPut(name); if (!gop.found_existing) { - gop.kv.value = UserInputOption{ + gop.entry.value = UserInputOption{ .name = name, .value = UserValue{ .Flag = {} }, .used = false, @@ -684,7 +684,7 @@ pub const Builder = struct { } // option already exists - switch (gop.kv.value.value) { + switch (gop.entry.value.value) { UserValue.Scalar => |s| { warn("Flag '-D{}' conflicts with option '-D{}={}'.\n", .{ name, name, s }); return true; diff --git a/lib/std/hash_map.zig b/lib/std/hash_map.zig index 4b91a83ba2..d0b18d9419 100644 --- a/lib/std/hash_map.zig +++ b/lib/std/hash_map.zig @@ -293,18 +293,22 @@ pub fn HashMapUnmanaged( pub fn clearRetainingCapacity(self: *Self) void { self.entries.items.len = 0; - if (self.header) |header| { + if (self.index_header) |header| { header.max_distance_from_start_index = 0; - const indexes = header.indexes(u8); - @memset(indexes.ptr, 0xff, indexes.len); + switch (header.capacityIndexType()) { + .u8 => mem.set(Index(u8), header.indexes(u8), Index(u8).empty), + .u16 => mem.set(Index(u16), header.indexes(u16), Index(u16).empty), + .u32 => mem.set(Index(u32), header.indexes(u32), Index(u32).empty), + .usize => mem.set(Index(usize), header.indexes(usize), Index(usize).empty), + } } } pub fn clearAndFree(self: *Self, allocator: *Allocator) void { self.entries.shrink(allocator, 0); - if (self.header) |header| { + if (self.index_header) |header| { header.free(allocator); - self.header = null; + self.index_header = null; } } @@ -378,13 +382,13 @@ pub fn HashMapUnmanaged( try self.entries.ensureCapacity(allocator, new_capacity); if (new_capacity <= linear_scan_max) return; - // Resize if indexes would be more than 75% full. - const needed_len = new_capacity * 4 / 3; + // Resize if indexes would be more than 60% full. + const needed_len = new_capacity * 5 / 3; if (self.index_header) |header| { if (needed_len > header.indexes_len) { var new_indexes_len = header.indexes_len; while (true) { - new_indexes_len += new_indexes_len / 2 + 8; + new_indexes_len *= new_indexes_len / 2 + 8; if (new_indexes_len >= needed_len) break; } const new_header = try IndexHeader.alloc(allocator, new_indexes_len); @@ -789,6 +793,11 @@ fn Index(comptime I: type) type { const Self = @This(); + const empty = Self{ + .entry_index = math.maxInt(I), + .distance_from_start_index = undefined, + }; + fn isEmpty(idx: Self) bool { return idx.entry_index == math.maxInt(I); } diff --git a/lib/std/http/headers.zig b/lib/std/http/headers.zig index ba929a446c..1c48a07819 100644 --- a/lib/std/http/headers.zig +++ b/lib/std/http/headers.zig @@ -118,13 +118,12 @@ pub const Headers = struct { }; } - pub fn deinit(self: Self) void { + pub fn deinit(self: *Self) void { { - var it = self.index.iterator(); - while (it.next()) |kv| { - var dex = &kv.value; + for (self.index.items()) |*entry| { + const dex = &entry.value; dex.deinit(); - self.allocator.free(kv.key); + self.allocator.free(entry.key); } self.index.deinit(); } @@ -134,6 +133,7 @@ pub const Headers = struct { } self.data.deinit(); } + self.* = undefined; } pub fn clone(self: Self, allocator: *Allocator) !Self { @@ -155,10 +155,10 @@ pub const Headers = struct { const n = self.data.items.len + 1; try self.data.ensureCapacity(n); var entry: HeaderEntry = undefined; - if (self.index.get(name)) |kv| { + if (self.index.getEntry(name)) |kv| { entry = try HeaderEntry.init(self.allocator, kv.key, value, never_index); errdefer entry.deinit(); - var dex = &kv.value; + const dex = &kv.value; try dex.append(n - 1); } else { const name_dup = try mem.dupe(self.allocator, u8, name); @@ -195,7 +195,7 @@ pub const Headers = struct { /// Returns boolean indicating if something was deleted. pub fn delete(self: *Self, name: []const u8) bool { if (self.index.remove(name)) |kv| { - var dex = &kv.value; + const dex = &kv.value; // iterate backwards var i = dex.items.len; while (i > 0) { @@ -207,7 +207,7 @@ pub const Headers = struct { } dex.deinit(); self.allocator.free(kv.key); - self.rebuild_index(); + self.rebuildIndex(); return true; } else { return false; @@ -216,45 +216,52 @@ pub const Headers = struct { /// Removes the element at the specified index. /// Moves items down to fill the empty space. + /// TODO this implementation can be replaced by adding + /// orderedRemove to the new hash table implementation as an + /// alternative to swapRemove. pub fn orderedRemove(self: *Self, i: usize) void { const removed = self.data.orderedRemove(i); - const kv = self.index.get(removed.name).?; - var dex = &kv.value; + const kv = self.index.getEntry(removed.name).?; + const dex = &kv.value; if (dex.items.len == 1) { // was last item; delete the index - _ = self.index.remove(kv.key); dex.deinit(); removed.deinit(); - self.allocator.free(kv.key); + const key = kv.key; + _ = self.index.remove(key); // invalidates `kv` and `dex` + self.allocator.free(key); } else { dex.shrink(dex.items.len - 1); removed.deinit(); } // if it was the last item; no need to rebuild index if (i != self.data.items.len) { - self.rebuild_index(); + self.rebuildIndex(); } } /// Removes the element at the specified index. /// The empty slot is filled from the end of the list. + /// TODO this implementation can be replaced by simply using the + /// new hash table which does swap removal. pub fn swapRemove(self: *Self, i: usize) void { const removed = self.data.swapRemove(i); - const kv = self.index.get(removed.name).?; - var dex = &kv.value; + const kv = self.index.getEntry(removed.name).?; + const dex = &kv.value; if (dex.items.len == 1) { // was last item; delete the index - _ = self.index.remove(kv.key); dex.deinit(); removed.deinit(); - self.allocator.free(kv.key); + const key = kv.key; + _ = self.index.remove(key); // invalidates `kv` and `dex` + self.allocator.free(key); } else { dex.shrink(dex.items.len - 1); removed.deinit(); } // if it was the last item; no need to rebuild index if (i != self.data.items.len) { - self.rebuild_index(); + self.rebuildIndex(); } } @@ -266,11 +273,7 @@ pub const Headers = struct { /// Returns a list of indices containing headers with the given name. /// The returned list should not be modified by the caller. pub fn getIndices(self: Self, name: []const u8) ?HeaderIndexList { - if (self.index.get(name)) |kv| { - return kv.value; - } else { - return null; - } + return self.index.get(name); } /// Returns a slice containing each header with the given name. @@ -325,25 +328,20 @@ pub const Headers = struct { return buf; } - fn rebuild_index(self: *Self) void { - { // clear out the indexes - var it = self.index.iterator(); - while (it.next()) |kv| { - var dex = &kv.value; - dex.items.len = 0; // keeps capacity available - } + fn rebuildIndex(self: *Self) void { + // clear out the indexes + for (self.index.items()) |*entry| { + entry.value.shrinkRetainingCapacity(0); } - { // fill up indexes again; we know capacity is fine from before - for (self.data.span()) |entry, i| { - var dex = &self.index.get(entry.name).?.value; - dex.appendAssumeCapacity(i); - } + // fill up indexes again; we know capacity is fine from before + for (self.data.items) |entry, i| { + self.index.getEntry(entry.name).?.value.appendAssumeCapacity(i); } } pub fn sort(self: *Self) void { std.sort.sort(HeaderEntry, self.data.items, {}, HeaderEntry.compare); - self.rebuild_index(); + self.rebuildIndex(); } pub fn format( diff --git a/lib/std/json.zig b/lib/std/json.zig index 6377b69a80..a8b19756da 100644 --- a/lib/std/json.zig +++ b/lib/std/json.zig @@ -2149,27 +2149,27 @@ test "json.parser.dynamic" { var root = tree.root; - var image = root.Object.get("Image").?.value; + var image = root.Object.get("Image").?; - const width = image.Object.get("Width").?.value; + const width = image.Object.get("Width").?; testing.expect(width.Integer == 800); - const height = image.Object.get("Height").?.value; + const height = image.Object.get("Height").?; testing.expect(height.Integer == 600); - const title = image.Object.get("Title").?.value; + const title = image.Object.get("Title").?; testing.expect(mem.eql(u8, title.String, "View from 15th Floor")); - const animated = image.Object.get("Animated").?.value; + const animated = image.Object.get("Animated").?; testing.expect(animated.Bool == false); - const array_of_object = image.Object.get("ArrayOfObject").?.value; + const array_of_object = image.Object.get("ArrayOfObject").?; testing.expect(array_of_object.Array.items.len == 1); - const obj0 = array_of_object.Array.items[0].Object.get("n").?.value; + const obj0 = array_of_object.Array.items[0].Object.get("n").?; testing.expect(mem.eql(u8, obj0.String, "m")); - const double = image.Object.get("double").?.value; + const double = image.Object.get("double").?; testing.expect(double.Float == 1.3412); } @@ -2217,12 +2217,12 @@ test "write json then parse it" { var tree = try parser.parse(fixed_buffer_stream.getWritten()); defer tree.deinit(); - testing.expect(tree.root.Object.get("f").?.value.Bool == false); - testing.expect(tree.root.Object.get("t").?.value.Bool == true); - testing.expect(tree.root.Object.get("int").?.value.Integer == 1234); - testing.expect(tree.root.Object.get("array").?.value.Array.items[0].Null == {}); - testing.expect(tree.root.Object.get("array").?.value.Array.items[1].Float == 12.34); - testing.expect(mem.eql(u8, tree.root.Object.get("str").?.value.String, "hello")); + testing.expect(tree.root.Object.get("f").?.Bool == false); + testing.expect(tree.root.Object.get("t").?.Bool == true); + testing.expect(tree.root.Object.get("int").?.Integer == 1234); + testing.expect(tree.root.Object.get("array").?.Array.items[0].Null == {}); + testing.expect(tree.root.Object.get("array").?.Array.items[1].Float == 12.34); + testing.expect(mem.eql(u8, tree.root.Object.get("str").?.String, "hello")); } fn test_parse(arena_allocator: *std.mem.Allocator, json_str: []const u8) !Value { @@ -2245,7 +2245,7 @@ test "integer after float has proper type" { \\ "ints": [1, 2, 3] \\} ); - std.testing.expect(json.Object.getValue("ints").?.Array.items[0] == .Integer); + std.testing.expect(json.Object.get("ints").?.Array.items[0] == .Integer); } test "escaped characters" { @@ -2271,16 +2271,16 @@ test "escaped characters" { const obj = (try test_parse(&arena_allocator.allocator, input)).Object; - testing.expectEqualSlices(u8, obj.get("backslash").?.value.String, "\\"); - testing.expectEqualSlices(u8, obj.get("forwardslash").?.value.String, "/"); - testing.expectEqualSlices(u8, obj.get("newline").?.value.String, "\n"); - testing.expectEqualSlices(u8, obj.get("carriagereturn").?.value.String, "\r"); - testing.expectEqualSlices(u8, obj.get("tab").?.value.String, "\t"); - testing.expectEqualSlices(u8, obj.get("formfeed").?.value.String, "\x0C"); - testing.expectEqualSlices(u8, obj.get("backspace").?.value.String, "\x08"); - testing.expectEqualSlices(u8, obj.get("doublequote").?.value.String, "\""); - testing.expectEqualSlices(u8, obj.get("unicode").?.value.String, "ą"); - testing.expectEqualSlices(u8, obj.get("surrogatepair").?.value.String, "😂"); + testing.expectEqualSlices(u8, obj.get("backslash").?.String, "\\"); + testing.expectEqualSlices(u8, obj.get("forwardslash").?.String, "/"); + testing.expectEqualSlices(u8, obj.get("newline").?.String, "\n"); + testing.expectEqualSlices(u8, obj.get("carriagereturn").?.String, "\r"); + testing.expectEqualSlices(u8, obj.get("tab").?.String, "\t"); + testing.expectEqualSlices(u8, obj.get("formfeed").?.String, "\x0C"); + testing.expectEqualSlices(u8, obj.get("backspace").?.String, "\x08"); + testing.expectEqualSlices(u8, obj.get("doublequote").?.String, "\""); + testing.expectEqualSlices(u8, obj.get("unicode").?.String, "ą"); + testing.expectEqualSlices(u8, obj.get("surrogatepair").?.String, "😂"); } test "string copy option" { @@ -2306,11 +2306,11 @@ test "string copy option" { const obj_copy = tree_copy.root.Object; for ([_][]const u8{ "noescape", "simple", "unicode", "surrogatepair" }) |field_name| { - testing.expectEqualSlices(u8, obj_nocopy.getValue(field_name).?.String, obj_copy.getValue(field_name).?.String); + testing.expectEqualSlices(u8, obj_nocopy.get(field_name).?.String, obj_copy.get(field_name).?.String); } - const nocopy_addr = &obj_nocopy.getValue("noescape").?.String[0]; - const copy_addr = &obj_copy.getValue("noescape").?.String[0]; + const nocopy_addr = &obj_nocopy.get("noescape").?.String[0]; + const copy_addr = &obj_copy.get("noescape").?.String[0]; var found_nocopy = false; for (input) |_, index| { diff --git a/src-self-hosted/main.zig b/src-self-hosted/main.zig index 3743b4f334..33f422692c 100644 --- a/src-self-hosted/main.zig +++ b/src-self-hosted/main.zig @@ -720,7 +720,7 @@ fn fmtPathDir( defer dir.close(); const stat = try dir.stat(); - if (try fmt.seen.put(stat.inode, {})) |_| return; + if (try fmt.seen.fetchPut(stat.inode, {})) |_| return; var dir_it = dir.iterate(); while (try dir_it.next()) |entry| { @@ -768,7 +768,7 @@ fn fmtPathFile( defer fmt.gpa.free(source_code); // Add to set after no longer possible to get error.IsDir. - if (try fmt.seen.put(stat.inode, {})) |_| return; + if (try fmt.seen.fetchPut(stat.inode, {})) |_| return; const tree = try std.zig.parse(fmt.gpa, source_code); defer tree.deinit(); diff --git a/src-self-hosted/translate_c.zig b/src-self-hosted/translate_c.zig index 171846d380..261cef37b5 100644 --- a/src-self-hosted/translate_c.zig +++ b/src-self-hosted/translate_c.zig @@ -20,7 +20,7 @@ pub const Error = error{OutOfMemory}; const TypeError = Error || error{UnsupportedType}; const TransError = TypeError || error{UnsupportedTranslation}; -const DeclTable = std.HashMap(usize, []const u8, addrHash, addrEql); +const DeclTable = std.HashMap(usize, []const u8, addrHash, addrEql, false); fn addrHash(x: usize) u32 { switch (@typeInfo(usize).Int.bits) { @@ -776,8 +776,8 @@ fn checkForBuiltinTypedef(checked_name: []const u8) ?[]const u8 { } fn transTypeDef(c: *Context, typedef_decl: *const ZigClangTypedefNameDecl, top_level_visit: bool) Error!?*ast.Node { - if (c.decl_table.get(@ptrToInt(ZigClangTypedefNameDecl_getCanonicalDecl(typedef_decl)))) |kv| - return transCreateNodeIdentifier(c, kv.value); // Avoid processing this decl twice + if (c.decl_table.get(@ptrToInt(ZigClangTypedefNameDecl_getCanonicalDecl(typedef_decl)))) |name| + return transCreateNodeIdentifier(c, name); // Avoid processing this decl twice const rp = makeRestorePoint(c); const typedef_name = try c.str(ZigClangNamedDecl_getName_bytes_begin(@ptrCast(*const ZigClangNamedDecl, typedef_decl))); @@ -818,8 +818,8 @@ fn transCreateNodeTypedef(rp: RestorePoint, typedef_decl: *const ZigClangTypedef } fn transRecordDecl(c: *Context, record_decl: *const ZigClangRecordDecl) Error!?*ast.Node { - if (c.decl_table.get(@ptrToInt(ZigClangRecordDecl_getCanonicalDecl(record_decl)))) |kv| - return try transCreateNodeIdentifier(c, kv.value); // Avoid processing this decl twice + if (c.decl_table.get(@ptrToInt(ZigClangRecordDecl_getCanonicalDecl(record_decl)))) |name| + return try transCreateNodeIdentifier(c, name); // Avoid processing this decl twice const record_loc = ZigClangRecordDecl_getLocation(record_decl); var bare_name = try c.str(ZigClangNamedDecl_getName_bytes_begin(@ptrCast(*const ZigClangNamedDecl, record_decl))); @@ -969,7 +969,7 @@ fn transRecordDecl(c: *Context, record_decl: *const ZigClangRecordDecl) Error!?* fn transEnumDecl(c: *Context, enum_decl: *const ZigClangEnumDecl) Error!?*ast.Node { if (c.decl_table.get(@ptrToInt(ZigClangEnumDecl_getCanonicalDecl(enum_decl)))) |name| - return try transCreateNodeIdentifier(c, name.value); // Avoid processing this decl twice + return try transCreateNodeIdentifier(c, name); // Avoid processing this decl twice const rp = makeRestorePoint(c); const enum_loc = ZigClangEnumDecl_getLocation(enum_decl); @@ -2130,7 +2130,7 @@ fn transInitListExprRecord( var raw_name = try rp.c.str(ZigClangNamedDecl_getName_bytes_begin(@ptrCast(*const ZigClangNamedDecl, field_decl))); if (ZigClangFieldDecl_isAnonymousStructOrUnion(field_decl)) { const name = rp.c.decl_table.get(@ptrToInt(ZigClangFieldDecl_getCanonicalDecl(field_decl))).?; - raw_name = try mem.dupe(rp.c.arena, u8, name.value); + raw_name = try mem.dupe(rp.c.arena, u8, name); } const field_name_tok = try appendIdentifier(rp.c, raw_name); @@ -2855,7 +2855,7 @@ fn transMemberExpr(rp: RestorePoint, scope: *Scope, stmt: *const ZigClangMemberE const field_decl = @ptrCast(*const struct_ZigClangFieldDecl, member_decl); if (ZigClangFieldDecl_isAnonymousStructOrUnion(field_decl)) { const name = rp.c.decl_table.get(@ptrToInt(ZigClangFieldDecl_getCanonicalDecl(field_decl))).?; - break :blk try mem.dupe(rp.c.arena, u8, name.value); + break :blk try mem.dupe(rp.c.arena, u8, name); } } const decl = @ptrCast(*const ZigClangNamedDecl, member_decl); @@ -6040,8 +6040,8 @@ fn getContainer(c: *Context, node: *ast.Node) ?*ast.Node { } else if (node.id == .PrefixOp) { return node; } else if (node.cast(ast.Node.Identifier)) |ident| { - if (c.global_scope.sym_table.get(tokenSlice(c, ident.token))) |kv| { - if (kv.value.cast(ast.Node.VarDecl)) |var_decl| + if (c.global_scope.sym_table.get(tokenSlice(c, ident.token))) |value| { + if (value.cast(ast.Node.VarDecl)) |var_decl| return getContainer(c, var_decl.init_node.?); } } else if (node.cast(ast.Node.InfixOp)) |infix| { @@ -6064,8 +6064,8 @@ fn getContainer(c: *Context, node: *ast.Node) ?*ast.Node { fn getContainerTypeOf(c: *Context, ref: *ast.Node) ?*ast.Node { if (ref.cast(ast.Node.Identifier)) |ident| { - if (c.global_scope.sym_table.get(tokenSlice(c, ident.token))) |kv| { - if (kv.value.cast(ast.Node.VarDecl)) |var_decl| { + if (c.global_scope.sym_table.get(tokenSlice(c, ident.token))) |value| { + if (value.cast(ast.Node.VarDecl)) |var_decl| { if (var_decl.type_node) |ty| return getContainer(c, ty); } @@ -6104,8 +6104,7 @@ fn getFnProto(c: *Context, ref: *ast.Node) ?*ast.Node.FnProto { } fn addMacros(c: *Context) !void { - var macro_it = c.global_scope.macro_table.iterator(); - while (macro_it.next()) |kv| { + for (c.global_scope.macro_table.items()) |kv| { if (getFnProto(c, kv.value)) |proto_node| { // If a macro aliases a global variable which is a function pointer, we conclude that // the macro is intended to represent a function that assumes the function pointer From 3c8b13d998c5c58c8171d36d7506ea3a181d0db9 Mon Sep 17 00:00:00 2001 From: Andrew Kelley Date: Sat, 4 Jul 2020 02:07:27 +0000 Subject: [PATCH 3/4] std hash map: do the pow2 improvement again it's a noticeable speedup --- lib/std/hash_map.zig | 46 ++++++++++++++++++++++++-------------------- 1 file changed, 25 insertions(+), 21 deletions(-) diff --git a/lib/std/hash_map.zig b/lib/std/hash_map.zig index d0b18d9419..0fe8ae34f1 100644 --- a/lib/std/hash_map.zig +++ b/lib/std/hash_map.zig @@ -382,22 +382,24 @@ pub fn HashMapUnmanaged( try self.entries.ensureCapacity(allocator, new_capacity); if (new_capacity <= linear_scan_max) return; - // Resize if indexes would be more than 60% full. + // Ensure that the indexes will be at most 60% full if + // `new_capacity` items are put into it. const needed_len = new_capacity * 5 / 3; if (self.index_header) |header| { if (needed_len > header.indexes_len) { - var new_indexes_len = header.indexes_len; - while (true) { - new_indexes_len *= new_indexes_len / 2 + 8; - if (new_indexes_len >= needed_len) break; - } + // An overflow here would mean the amount of memory required would not + // be representable in the address space. + const new_indexes_len = math.ceilPowerOfTwo(usize, needed_len) catch unreachable; const new_header = try IndexHeader.alloc(allocator, new_indexes_len); self.insertAllEntriesIntoNewHeader(new_header); header.free(allocator); self.index_header = new_header; } } else { - const header = try IndexHeader.alloc(allocator, needed_len); + // An overflow here would mean the amount of memory required would not + // be representable in the address space. + const new_indexes_len = math.ceilPowerOfTwo(usize, needed_len) catch unreachable; + const header = try IndexHeader.alloc(allocator, new_indexes_len); self.insertAllEntriesIntoNewHeader(header); self.index_header = header; } @@ -540,10 +542,10 @@ pub fn HashMapUnmanaged( fn removeInternal(self: *Self, key: K, header: *IndexHeader, comptime I: type) ?Entry { const indexes = header.indexes(I); const h = hash(key); - const start_index = header.hashToIndex(h); + const start_index = header.constrainIndex(h); var roll_over: usize = 0; while (roll_over <= header.max_distance_from_start_index) : (roll_over += 1) { - const index_index = (start_index + roll_over) % header.indexes_len; + const index_index = header.constrainIndex(start_index + roll_over); var index = &indexes[index_index]; if (index.isEmpty()) return null; @@ -564,7 +566,7 @@ pub fn HashMapUnmanaged( // Now we have to shift over the following indexes. roll_over += 1; while (roll_over < header.indexes_len) : (roll_over += 1) { - const next_index_index = (start_index + roll_over) % header.indexes_len; + const next_index_index = header.constrainIndex(start_index + roll_over); const next_index = &indexes[next_index_index]; if (next_index.isEmpty() or next_index.distance_from_start_index == 0) { index.setEmpty(); @@ -588,10 +590,10 @@ pub fn HashMapUnmanaged( indexes: []Index(I), ) void { const h = if (store_hash) self.entries.items[new_entry_index].hash else hash(self.entries.items[new_entry_index].key); - const start_index = header.hashToIndex(h); + const start_index = header.constrainIndex(h); var roll_over: usize = 0; while (roll_over <= header.max_distance_from_start_index) : (roll_over += 1) { - const index_index = (start_index + roll_over) % header.indexes_len; + const index_index = header.constrainIndex(start_index + roll_over); const index = &indexes[index_index]; if (index.entry_index == old_entry_index) { index.entry_index = @intCast(I, new_entry_index); @@ -605,14 +607,14 @@ pub fn HashMapUnmanaged( fn getOrPutInternal(self: *Self, key: K, header: *IndexHeader, comptime I: type) GetOrPutResult { const indexes = header.indexes(I); const h = hash(key); - const start_index = header.hashToIndex(h); + const start_index = header.constrainIndex(h); var roll_over: usize = 0; var distance_from_start_index: usize = 0; while (roll_over <= header.indexes_len) : ({ roll_over += 1; distance_from_start_index += 1; }) { - const index_index = (start_index + roll_over) % header.indexes_len; + const index_index = header.constrainIndex(start_index + roll_over); const index = indexes[index_index]; if (index.isEmpty()) { indexes[index_index] = .{ @@ -670,7 +672,7 @@ pub fn HashMapUnmanaged( roll_over += 1; distance_from_start_index += 1; }) { - const next_index_index = (start_index + roll_over) % header.indexes_len; + const next_index_index = header.constrainIndex(start_index + roll_over); const next_index = indexes[next_index_index]; if (next_index.isEmpty()) { header.maybeBumpMax(distance_from_start_index); @@ -702,10 +704,10 @@ pub fn HashMapUnmanaged( fn getInternal(self: Self, key: K, header: *IndexHeader, comptime I: type) ?*Entry { const indexes = header.indexes(I); const h = hash(key); - const start_index = header.hashToIndex(h); + const start_index = header.constrainIndex(h); var roll_over: usize = 0; while (roll_over <= header.max_distance_from_start_index) : (roll_over += 1) { - const index_index = (start_index + roll_over) % header.indexes_len; + const index_index = header.constrainIndex(start_index + roll_over); const index = indexes[index_index]; if (index.isEmpty()) return null; @@ -731,7 +733,7 @@ pub fn HashMapUnmanaged( const indexes = header.indexes(I); entry_loop: for (self.entries.items) |entry, i| { const h = if (store_hash) entry.hash else hash(entry.key); - const start_index = header.hashToIndex(h); + const start_index = header.constrainIndex(h); var entry_index = i; var roll_over: usize = 0; var distance_from_start_index: usize = 0; @@ -739,7 +741,7 @@ pub fn HashMapUnmanaged( roll_over += 1; distance_from_start_index += 1; }) { - const index_index = (start_index + roll_over) % header.indexes_len; + const index_index = header.constrainIndex(start_index + roll_over); const next_index = indexes[index_index]; if (next_index.isEmpty()) { header.maybeBumpMax(distance_from_start_index); @@ -814,8 +816,10 @@ const IndexHeader = struct { max_distance_from_start_index: usize, indexes_len: usize, - fn hashToIndex(header: IndexHeader, h: u32) usize { - return @as(usize, h) % header.indexes_len; + fn constrainIndex(header: IndexHeader, i: usize) usize { + // This is an optimization for modulo of power of two integers; + // it requires `indexes_len` to always be a power of two. + return i & (header.indexes_len - 1); } fn indexes(header: *IndexHeader, comptime I: type) []Index(I) { From 3a89f214aa672c5844def1704845ad38ea60bdcd Mon Sep 17 00:00:00 2001 From: Andrew Kelley Date: Sat, 4 Jul 2020 22:25:49 +0000 Subject: [PATCH 4/4] update more HashMap API usage --- doc/docgen.zig | 2 +- doc/langref.html.in | 14 +-- lib/std/debug.zig | 6 +- lib/std/hash_map.zig | 2 +- src-self-hosted/Module.zig | 197 +++++++++++++++--------------------- src-self-hosted/codegen.zig | 4 +- src-self-hosted/link.zig | 6 +- src-self-hosted/zir.zig | 34 +++---- 8 files changed, 112 insertions(+), 153 deletions(-) diff --git a/doc/docgen.zig b/doc/docgen.zig index 7886c7cc90..e2acfae768 100644 --- a/doc/docgen.zig +++ b/doc/docgen.zig @@ -392,7 +392,7 @@ fn genToc(allocator: *mem.Allocator, tokenizer: *Tokenizer) !Toc { .n = header_stack_size, }, }); - if (try urls.put(urlized, tag_token)) |entry| { + if (try urls.fetchPut(urlized, tag_token)) |entry| { parseError(tokenizer, tag_token, "duplicate header url: #{}", .{urlized}) catch {}; parseError(tokenizer, entry.value, "other tag here", .{}) catch {}; return error.ParseError; diff --git a/doc/langref.html.in b/doc/langref.html.in index dfbb93decf..d7b9de3c6e 100644 --- a/doc/langref.html.in +++ b/doc/langref.html.in @@ -5363,11 +5363,11 @@ const std = @import("std"); const assert = std.debug.assert; test "turn HashMap into a set with void" { - var map = std.HashMap(i32, void, hash_i32, eql_i32).init(std.testing.allocator); + var map = std.AutoHashMap(i32, void).init(std.testing.allocator); defer map.deinit(); - _ = try map.put(1, {}); - _ = try map.put(2, {}); + try map.put(1, {}); + try map.put(2, {}); assert(map.contains(2)); assert(!map.contains(3)); @@ -5375,14 +5375,6 @@ test "turn HashMap into a set with void" { _ = map.remove(2); assert(!map.contains(2)); } - -fn hash_i32(x: i32) u32 { - return @bitCast(u32, x); -} - -fn eql_i32(a: i32, b: i32) bool { - return a == b; -} {#code_end#}

Note that this is different from using a dummy value for the hash map value. By using {#syntax#}void{#endsyntax#} as the type of the value, the hash map entry type has no value field, and diff --git a/lib/std/debug.zig b/lib/std/debug.zig index e9bafec94c..e6d0c17da4 100644 --- a/lib/std/debug.zig +++ b/lib/std/debug.zig @@ -1132,7 +1132,7 @@ pub const DebugInfo = struct { const seg_end = seg_start + segment_cmd.vmsize; if (rebased_address >= seg_start and rebased_address < seg_end) { - if (self.address_map.getValue(base_address)) |obj_di| { + if (self.address_map.get(base_address)) |obj_di| { return obj_di; } @@ -1204,7 +1204,7 @@ pub const DebugInfo = struct { const seg_end = seg_start + info.SizeOfImage; if (address >= seg_start and address < seg_end) { - if (self.address_map.getValue(seg_start)) |obj_di| { + if (self.address_map.get(seg_start)) |obj_di| { return obj_di; } @@ -1441,7 +1441,7 @@ pub const ModuleDebugInfo = switch (builtin.os.tag) { const o_file_path = mem.spanZ(self.strings[symbol.ofile.?.n_strx..]); // Check if its debug infos are already in the cache - var o_file_di = self.ofiles.getValue(o_file_path) orelse + var o_file_di = self.ofiles.get(o_file_path) orelse (self.loadOFile(o_file_path) catch |err| switch (err) { error.FileNotFound, error.MissingDebugInfo, diff --git a/lib/std/hash_map.zig b/lib/std/hash_map.zig index 0fe8ae34f1..aaec9a4d58 100644 --- a/lib/std/hash_map.zig +++ b/lib/std/hash_map.zig @@ -458,7 +458,7 @@ pub fn HashMapUnmanaged( } /// Inserts a new `Entry` into the hash map, returning the previous one, if any. - /// If insertion happuns, asserts there is enough capacity without allocating. + /// If insertion happens, asserts there is enough capacity without allocating. pub fn fetchPutAssumeCapacity(self: *Self, key: K, value: V) ?Entry { const gop = self.getOrPutAssumeCapacity(key); var result: ?Entry = null; diff --git a/src-self-hosted/Module.zig b/src-self-hosted/Module.zig index 7a61cd5ccd..0c80803fc7 100644 --- a/src-self-hosted/Module.zig +++ b/src-self-hosted/Module.zig @@ -75,7 +75,7 @@ deletion_set: std.ArrayListUnmanaged(*Decl) = .{}, keep_source_files_loaded: bool, -const DeclTable = std.HashMap(Scope.NameHash, *Decl, Scope.name_hash_hash, Scope.name_hash_eql); +const DeclTable = std.HashMap(Scope.NameHash, *Decl, Scope.name_hash_hash, Scope.name_hash_eql, false); const WorkItem = union(enum) { /// Write the machine code for a Decl to the output file. @@ -795,49 +795,38 @@ pub fn deinit(self: *Module) void { const allocator = self.allocator; self.deletion_set.deinit(allocator); self.work_queue.deinit(); - { - var it = self.decl_table.iterator(); - while (it.next()) |kv| { - kv.value.destroy(allocator); - } - self.decl_table.deinit(); + + for (self.decl_table.items()) |entry| { + entry.value.destroy(allocator); } - { - var it = self.failed_decls.iterator(); - while (it.next()) |kv| { - kv.value.destroy(allocator); - } - self.failed_decls.deinit(); + self.decl_table.deinit(); + + for (self.failed_decls.items()) |entry| { + entry.value.destroy(allocator); } - { - var it = self.failed_files.iterator(); - while (it.next()) |kv| { - kv.value.destroy(allocator); - } - self.failed_files.deinit(); + self.failed_decls.deinit(); + + for (self.failed_files.items()) |entry| { + entry.value.destroy(allocator); } - { - var it = self.failed_exports.iterator(); - while (it.next()) |kv| { - kv.value.destroy(allocator); - } - self.failed_exports.deinit(); + self.failed_files.deinit(); + + for (self.failed_exports.items()) |entry| { + entry.value.destroy(allocator); } - { - var it = self.decl_exports.iterator(); - while (it.next()) |kv| { - const export_list = kv.value; - allocator.free(export_list); - } - self.decl_exports.deinit(); + self.failed_exports.deinit(); + + for (self.decl_exports.items()) |entry| { + const export_list = entry.value; + allocator.free(export_list); } - { - var it = self.export_owners.iterator(); - while (it.next()) |kv| { - freeExportList(allocator, kv.value); - } - self.export_owners.deinit(); + self.decl_exports.deinit(); + + for (self.export_owners.items()) |entry| { + freeExportList(allocator, entry.value); } + self.export_owners.deinit(); + self.symbol_exports.deinit(); self.root_scope.destroy(allocator); self.* = undefined; @@ -918,9 +907,9 @@ pub fn makeBinFileWritable(self: *Module) !void { } pub fn totalErrorCount(self: *Module) usize { - const total = self.failed_decls.size + - self.failed_files.size + - self.failed_exports.size; + const total = self.failed_decls.items().len + + self.failed_files.items().len + + self.failed_exports.items().len; return if (total == 0) @boolToInt(self.link_error_flags.no_entry_point_found) else total; } @@ -931,32 +920,23 @@ pub fn getAllErrorsAlloc(self: *Module) !AllErrors { var errors = std.ArrayList(AllErrors.Message).init(self.allocator); defer errors.deinit(); - { - var it = self.failed_files.iterator(); - while (it.next()) |kv| { - const scope = kv.key; - const err_msg = kv.value; - const source = try scope.getSource(self); - try AllErrors.add(&arena, &errors, scope.subFilePath(), source, err_msg.*); - } + for (self.failed_files.items()) |entry| { + const scope = entry.key; + const err_msg = entry.value; + const source = try scope.getSource(self); + try AllErrors.add(&arena, &errors, scope.subFilePath(), source, err_msg.*); } - { - var it = self.failed_decls.iterator(); - while (it.next()) |kv| { - const decl = kv.key; - const err_msg = kv.value; - const source = try decl.scope.getSource(self); - try AllErrors.add(&arena, &errors, decl.scope.subFilePath(), source, err_msg.*); - } + for (self.failed_decls.items()) |entry| { + const decl = entry.key; + const err_msg = entry.value; + const source = try decl.scope.getSource(self); + try AllErrors.add(&arena, &errors, decl.scope.subFilePath(), source, err_msg.*); } - { - var it = self.failed_exports.iterator(); - while (it.next()) |kv| { - const decl = kv.key.owner_decl; - const err_msg = kv.value; - const source = try decl.scope.getSource(self); - try AllErrors.add(&arena, &errors, decl.scope.subFilePath(), source, err_msg.*); - } + for (self.failed_exports.items()) |entry| { + const decl = entry.key.owner_decl; + const err_msg = entry.value; + const source = try decl.scope.getSource(self); + try AllErrors.add(&arena, &errors, decl.scope.subFilePath(), source, err_msg.*); } if (errors.items.len == 0 and self.link_error_flags.no_entry_point_found) { @@ -1016,7 +996,7 @@ pub fn performAllTheWork(self: *Module) error{OutOfMemory}!void { decl.analysis = .dependency_failure; }, else => { - try self.failed_decls.ensureCapacity(self.failed_decls.size + 1); + try self.failed_decls.ensureCapacity(self.failed_decls.items().len + 1); self.failed_decls.putAssumeCapacityNoClobber(decl, try ErrorMsg.create( self.allocator, decl.src(), @@ -1086,7 +1066,7 @@ fn ensureDeclAnalyzed(self: *Module, decl: *Decl) InnerError!void { error.OutOfMemory => return error.OutOfMemory, error.AnalysisFail => return error.AnalysisFail, else => { - try self.failed_decls.ensureCapacity(self.failed_decls.size + 1); + try self.failed_decls.ensureCapacity(self.failed_decls.items().len + 1); self.failed_decls.putAssumeCapacityNoClobber(decl, try ErrorMsg.create( self.allocator, decl.src(), @@ -1636,7 +1616,7 @@ fn declareDeclDependency(self: *Module, depender: *Decl, dependee: *Decl) !void fn getSrcModule(self: *Module, root_scope: *Scope.ZIRModule) !*zir.Module { switch (root_scope.status) { .never_loaded, .unloaded_success => { - try self.failed_files.ensureCapacity(self.failed_files.size + 1); + try self.failed_files.ensureCapacity(self.failed_files.items().len + 1); const source = try root_scope.getSource(self); @@ -1677,7 +1657,7 @@ fn getAstTree(self: *Module, root_scope: *Scope.File) !*ast.Tree { switch (root_scope.status) { .never_loaded, .unloaded_success => { - try self.failed_files.ensureCapacity(self.failed_files.size + 1); + try self.failed_files.ensureCapacity(self.failed_files.items().len + 1); const source = try root_scope.getSource(self); @@ -1745,8 +1725,7 @@ fn analyzeRootSrcFile(self: *Module, root_scope: *Scope.File) !void { const name = tree.tokenSliceLoc(name_loc); const name_hash = root_scope.fullyQualifiedNameHash(name); const contents_hash = std.zig.hashSrc(tree.getNodeSource(src_decl)); - if (self.decl_table.get(name_hash)) |kv| { - const decl = kv.value; + if (self.decl_table.get(name_hash)) |decl| { // Update the AST Node index of the decl, even if its contents are unchanged, it may // have been re-ordered. decl.src_index = decl_i; @@ -1774,14 +1753,11 @@ fn analyzeRootSrcFile(self: *Module, root_scope: *Scope.File) !void { // TODO also look for global variable declarations // TODO also look for comptime blocks and exported globals } - { - // Handle explicitly deleted decls from the source code. Not to be confused - // with when we delete decls because they are no longer referenced. - var it = deleted_decls.iterator(); - while (it.next()) |kv| { - //std.debug.warn("noticed '{}' deleted from source\n", .{kv.key.name}); - try self.deleteDecl(kv.key); - } + // Handle explicitly deleted decls from the source code. Not to be confused + // with when we delete decls because they are no longer referenced. + for (deleted_decls.items()) |entry| { + //std.debug.warn("noticed '{}' deleted from source\n", .{entry.key.name}); + try self.deleteDecl(entry.key); } } @@ -1800,18 +1776,14 @@ fn analyzeRootZIRModule(self: *Module, root_scope: *Scope.ZIRModule) !void { // we know which ones have been deleted. var deleted_decls = std.AutoHashMap(*Decl, void).init(self.allocator); defer deleted_decls.deinit(); - try deleted_decls.ensureCapacity(self.decl_table.size); - { - var it = self.decl_table.iterator(); - while (it.next()) |kv| { - deleted_decls.putAssumeCapacityNoClobber(kv.value, {}); - } + try deleted_decls.ensureCapacity(self.decl_table.items().len); + for (self.decl_table.items()) |entry| { + deleted_decls.putAssumeCapacityNoClobber(entry.value, {}); } for (src_module.decls) |src_decl, decl_i| { const name_hash = root_scope.fullyQualifiedNameHash(src_decl.name); - if (self.decl_table.get(name_hash)) |kv| { - const decl = kv.value; + if (self.decl_table.get(name_hash)) |decl| { deleted_decls.removeAssertDiscard(decl); //std.debug.warn("'{}' contents: '{}'\n", .{ src_decl.name, src_decl.contents }); if (!srcHashEql(src_decl.contents_hash, decl.contents_hash)) { @@ -1835,14 +1807,11 @@ fn analyzeRootZIRModule(self: *Module, root_scope: *Scope.ZIRModule) !void { for (exports_to_resolve.items) |export_decl| { _ = try self.resolveZirDecl(&root_scope.base, export_decl); } - { - // Handle explicitly deleted decls from the source code. Not to be confused - // with when we delete decls because they are no longer referenced. - var it = deleted_decls.iterator(); - while (it.next()) |kv| { - //std.debug.warn("noticed '{}' deleted from source\n", .{kv.key.name}); - try self.deleteDecl(kv.key); - } + // Handle explicitly deleted decls from the source code. Not to be confused + // with when we delete decls because they are no longer referenced. + for (deleted_decls.items()) |entry| { + //std.debug.warn("noticed '{}' deleted from source\n", .{entry.key.name}); + try self.deleteDecl(entry.key); } } @@ -1888,7 +1857,7 @@ fn deleteDeclExports(self: *Module, decl: *Decl) void { const kv = self.export_owners.remove(decl) orelse return; for (kv.value) |exp| { - if (self.decl_exports.get(exp.exported_decl)) |decl_exports_kv| { + if (self.decl_exports.getEntry(exp.exported_decl)) |decl_exports_kv| { // Remove exports with owner_decl matching the regenerating decl. const list = decl_exports_kv.value; var i: usize = 0; @@ -1983,7 +1952,7 @@ fn createNewDecl( name_hash: Scope.NameHash, contents_hash: std.zig.SrcHash, ) !*Decl { - try self.decl_table.ensureCapacity(self.decl_table.size + 1); + try self.decl_table.ensureCapacity(self.decl_table.items().len + 1); const new_decl = try self.allocateNewDecl(scope, src_index, contents_hash); errdefer self.allocator.destroy(new_decl); new_decl.name = try mem.dupeZ(self.allocator, u8, decl_name); @@ -2043,7 +2012,7 @@ fn resolveZirDecl(self: *Module, scope: *Scope, src_decl: *zir.Decl) InnerError! fn resolveZirDeclHavingIndex(self: *Module, scope: *Scope, src_decl: *zir.Decl, src_index: usize) InnerError!*Decl { const name_hash = scope.namespace().fullyQualifiedNameHash(src_decl.name); - const decl = self.decl_table.getValue(name_hash).?; + const decl = self.decl_table.get(name_hash).?; decl.src_index = src_index; try self.ensureDeclAnalyzed(decl); return decl; @@ -2148,8 +2117,8 @@ fn analyzeExport(self: *Module, scope: *Scope, src: usize, symbol_name: []const else => return self.fail(scope, src, "unable to export type '{}'", .{typed_value.ty}), } - try self.decl_exports.ensureCapacity(self.decl_exports.size + 1); - try self.export_owners.ensureCapacity(self.export_owners.size + 1); + try self.decl_exports.ensureCapacity(self.decl_exports.items().len + 1); + try self.export_owners.ensureCapacity(self.export_owners.items().len + 1); const new_export = try self.allocator.create(Export); errdefer self.allocator.destroy(new_export); @@ -2168,23 +2137,23 @@ fn analyzeExport(self: *Module, scope: *Scope, src: usize, symbol_name: []const // Add to export_owners table. const eo_gop = self.export_owners.getOrPut(owner_decl) catch unreachable; if (!eo_gop.found_existing) { - eo_gop.kv.value = &[0]*Export{}; + eo_gop.entry.value = &[0]*Export{}; } - eo_gop.kv.value = try self.allocator.realloc(eo_gop.kv.value, eo_gop.kv.value.len + 1); - eo_gop.kv.value[eo_gop.kv.value.len - 1] = new_export; - errdefer eo_gop.kv.value = self.allocator.shrink(eo_gop.kv.value, eo_gop.kv.value.len - 1); + eo_gop.entry.value = try self.allocator.realloc(eo_gop.entry.value, eo_gop.entry.value.len + 1); + eo_gop.entry.value[eo_gop.entry.value.len - 1] = new_export; + errdefer eo_gop.entry.value = self.allocator.shrink(eo_gop.entry.value, eo_gop.entry.value.len - 1); // Add to exported_decl table. const de_gop = self.decl_exports.getOrPut(exported_decl) catch unreachable; if (!de_gop.found_existing) { - de_gop.kv.value = &[0]*Export{}; + de_gop.entry.value = &[0]*Export{}; } - de_gop.kv.value = try self.allocator.realloc(de_gop.kv.value, de_gop.kv.value.len + 1); - de_gop.kv.value[de_gop.kv.value.len - 1] = new_export; - errdefer de_gop.kv.value = self.allocator.shrink(de_gop.kv.value, de_gop.kv.value.len - 1); + de_gop.entry.value = try self.allocator.realloc(de_gop.entry.value, de_gop.entry.value.len + 1); + de_gop.entry.value[de_gop.entry.value.len - 1] = new_export; + errdefer de_gop.entry.value = self.allocator.shrink(de_gop.entry.value, de_gop.entry.value.len - 1); if (self.symbol_exports.get(symbol_name)) |_| { - try self.failed_exports.ensureCapacity(self.failed_exports.size + 1); + try self.failed_exports.ensureCapacity(self.failed_exports.items().len + 1); self.failed_exports.putAssumeCapacityNoClobber(new_export, try ErrorMsg.create( self.allocator, src, @@ -2197,10 +2166,10 @@ fn analyzeExport(self: *Module, scope: *Scope, src: usize, symbol_name: []const } try self.symbol_exports.putNoClobber(symbol_name, new_export); - self.bin_file.updateDeclExports(self, exported_decl, de_gop.kv.value) catch |err| switch (err) { + self.bin_file.updateDeclExports(self, exported_decl, de_gop.entry.value) catch |err| switch (err) { error.OutOfMemory => return error.OutOfMemory, else => { - try self.failed_exports.ensureCapacity(self.failed_exports.size + 1); + try self.failed_exports.ensureCapacity(self.failed_exports.items().len + 1); self.failed_exports.putAssumeCapacityNoClobber(new_export, try ErrorMsg.create( self.allocator, src, @@ -2494,7 +2463,7 @@ fn getNextAnonNameIndex(self: *Module) usize { fn lookupDeclName(self: *Module, scope: *Scope, ident_name: []const u8) ?*Decl { const namespace = scope.namespace(); const name_hash = namespace.fullyQualifiedNameHash(ident_name); - return self.decl_table.getValue(name_hash); + return self.decl_table.get(name_hash); } fn analyzeInstExport(self: *Module, scope: *Scope, export_inst: *zir.Inst.Export) InnerError!*Inst { @@ -3489,8 +3458,8 @@ fn failNode( fn failWithOwnedErrorMsg(self: *Module, scope: *Scope, src: usize, err_msg: *ErrorMsg) InnerError { { errdefer err_msg.destroy(self.allocator); - try self.failed_decls.ensureCapacity(self.failed_decls.size + 1); - try self.failed_files.ensureCapacity(self.failed_files.size + 1); + try self.failed_decls.ensureCapacity(self.failed_decls.items().len + 1); + try self.failed_files.ensureCapacity(self.failed_files.items().len + 1); } switch (scope.tag) { .decl => { diff --git a/src-self-hosted/codegen.zig b/src-self-hosted/codegen.zig index 73758bda87..8885ed2825 100644 --- a/src-self-hosted/codegen.zig +++ b/src-self-hosted/codegen.zig @@ -705,7 +705,7 @@ const Function = struct { } fn resolveInst(self: *Function, inst: *ir.Inst) !MCValue { - if (self.inst_table.getValue(inst)) |mcv| { + if (self.inst_table.get(inst)) |mcv| { return mcv; } if (inst.cast(ir.Inst.Constant)) |const_inst| { @@ -713,7 +713,7 @@ const Function = struct { try self.inst_table.putNoClobber(inst, mcvalue); return mcvalue; } else { - return self.inst_table.getValue(inst).?; + return self.inst_table.get(inst).?; } } diff --git a/src-self-hosted/link.zig b/src-self-hosted/link.zig index c6acf21b84..c615ad35fd 100644 --- a/src-self-hosted/link.zig +++ b/src-self-hosted/link.zig @@ -1071,7 +1071,7 @@ pub const ElfFile = struct { try self.file.?.pwriteAll(code, file_offset); // Since we updated the vaddr and the size, each corresponding export symbol also needs to be updated. - const decl_exports = module.decl_exports.getValue(decl) orelse &[0]*Module.Export{}; + const decl_exports = module.decl_exports.get(decl) orelse &[0]*Module.Export{}; return self.updateDeclExports(module, decl, decl_exports); } @@ -1093,7 +1093,7 @@ pub const ElfFile = struct { for (exports) |exp| { if (exp.options.section) |section_name| { if (!mem.eql(u8, section_name, ".text")) { - try module.failed_exports.ensureCapacity(module.failed_exports.size + 1); + try module.failed_exports.ensureCapacity(module.failed_exports.items().len + 1); module.failed_exports.putAssumeCapacityNoClobber( exp, try Module.ErrorMsg.create(self.allocator, 0, "Unimplemented: ExportOptions.section", .{}), @@ -1111,7 +1111,7 @@ pub const ElfFile = struct { }, .Weak => elf.STB_WEAK, .LinkOnce => { - try module.failed_exports.ensureCapacity(module.failed_exports.size + 1); + try module.failed_exports.ensureCapacity(module.failed_exports.items().len + 1); module.failed_exports.putAssumeCapacityNoClobber( exp, try Module.ErrorMsg.create(self.allocator, 0, "Unimplemented: GlobalLinkage.LinkOnce", .{}), diff --git a/src-self-hosted/zir.zig b/src-self-hosted/zir.zig index 92dbc66e2b..7dceaaea1b 100644 --- a/src-self-hosted/zir.zig +++ b/src-self-hosted/zir.zig @@ -758,7 +758,7 @@ pub const Module = struct { } fn writeInstParamToStream(self: Module, stream: var, inst: *Inst, inst_table: *const InstPtrTable) !void { - if (inst_table.getValue(inst)) |info| { + if (inst_table.get(inst)) |info| { if (info.index) |i| { try stream.print("%{}", .{info.index}); } else { @@ -843,7 +843,7 @@ const Parser = struct { skipSpace(self); const decl = try parseInstruction(self, &body_context, ident); const ident_index = body_context.instructions.items.len; - if (try body_context.name_map.put(ident, decl.inst)) |_| { + if (try body_context.name_map.fetchPut(ident, decl.inst)) |_| { return self.fail("redefinition of identifier '{}'", .{ident}); } try body_context.instructions.append(decl.inst); @@ -929,7 +929,7 @@ const Parser = struct { skipSpace(self); const decl = try parseInstruction(self, null, ident); const ident_index = self.decls.items.len; - if (try self.global_name_map.put(ident, decl.inst)) |_| { + if (try self.global_name_map.fetchPut(ident, decl.inst)) |_| { return self.fail("redefinition of identifier '{}'", .{ident}); } try self.decls.append(self.allocator, decl); @@ -1153,7 +1153,7 @@ const Parser = struct { else => continue, }; const ident = self.source[name_start..self.i]; - const kv = map.get(ident) orelse { + return map.get(ident) orelse { const bad_name = self.source[name_start - 1 .. self.i]; const src = name_start - 1; if (local_ref) { @@ -1172,7 +1172,6 @@ const Parser = struct { return &declval.base; } }; - return kv.value; } fn generateName(self: *Parser) ![]u8 { @@ -1219,13 +1218,12 @@ const EmitZIR = struct { // by the hash table. var src_decls = std.ArrayList(*IrModule.Decl).init(self.allocator); defer src_decls.deinit(); - try src_decls.ensureCapacity(self.old_module.decl_table.size); - try self.decls.ensureCapacity(self.allocator, self.old_module.decl_table.size); - try self.names.ensureCapacity(self.old_module.decl_table.size); + try src_decls.ensureCapacity(self.old_module.decl_table.items().len); + try self.decls.ensureCapacity(self.allocator, self.old_module.decl_table.items().len); + try self.names.ensureCapacity(self.old_module.decl_table.items().len); - var decl_it = self.old_module.decl_table.iterator(); - while (decl_it.next()) |kv| { - const decl = kv.value; + for (self.old_module.decl_table.items()) |entry| { + const decl = entry.value; src_decls.appendAssumeCapacity(decl); self.names.putAssumeCapacityNoClobber(mem.spanZ(decl.name), {}); } @@ -1248,7 +1246,7 @@ const EmitZIR = struct { .codegen_failure, .dependency_failure, .codegen_failure_retryable, - => if (self.old_module.failed_decls.getValue(ir_decl)) |err_msg| { + => if (self.old_module.failed_decls.get(ir_decl)) |err_msg| { const fail_inst = try self.arena.allocator.create(Inst.CompileError); fail_inst.* = .{ .base = .{ @@ -1270,7 +1268,7 @@ const EmitZIR = struct { continue; }, } - if (self.old_module.export_owners.getValue(ir_decl)) |exports| { + if (self.old_module.export_owners.get(ir_decl)) |exports| { for (exports) |module_export| { const symbol_name = try self.emitStringLiteral(module_export.src, module_export.options.name); const export_inst = try self.arena.allocator.create(Inst.Export); @@ -1314,7 +1312,7 @@ const EmitZIR = struct { try new_body.inst_table.putNoClobber(inst, new_inst); return new_inst; } else { - return new_body.inst_table.getValue(inst).?; + return new_body.inst_table.get(inst).?; } } @@ -1424,7 +1422,7 @@ const EmitZIR = struct { try self.emitBody(body, &inst_table, &instructions); }, .sema_failure => { - const err_msg = self.old_module.failed_decls.getValue(module_fn.owner_decl).?; + const err_msg = self.old_module.failed_decls.get(module_fn.owner_decl).?; const fail_inst = try self.arena.allocator.create(Inst.CompileError); fail_inst.* = .{ .base = .{ @@ -1841,7 +1839,7 @@ const EmitZIR = struct { self.next_auto_name += 1; const gop = try self.names.getOrPut(proposed_name); if (!gop.found_existing) { - gop.kv.value = {}; + gop.entry.value = {}; return proposed_name; } } @@ -1861,9 +1859,9 @@ const EmitZIR = struct { }, .kw_args = .{}, }; - gop.kv.value = try self.emitUnnamedDecl(&primitive_inst.base); + gop.entry.value = try self.emitUnnamedDecl(&primitive_inst.base); } - return gop.kv.value; + return gop.entry.value; } fn emitStringLiteral(self: *EmitZIR, src: usize, str: []const u8) !*Decl {