std: make ArrayHashMap eql function accept an additional param

which is the index of the key that already exists in the hash map.

This enables the use case of using `AutoArrayHashMap(void, void)` which
may seem surprising at first, but is actually pretty handy!
This commit includes a proof-of-concept of how I want to use it, with a
new InternArena abstraction for stage2 that provides a compact way to
store values (and types) in an "internment arena", thus making types
stored exactly once (per arena), representable with a single u32 as a
reference to a type within an InternArena, and comparable with a
simple u32 integer comparison. If both types are in the same
InternArena, you can check if they are equal by seeing if their index is
the same.

What's neat about `AutoArrayHashMap(void, void)` is that it allows us to
look up the indexes by key, *without actually storing the keys*.
Instead, keys are treated as ephemeral values that are constructed as
needed.

As a result, we have an extremely efficient encoding of types and
values, represented only by three arrays, which has no pointers, and can
therefore be serialized and deserialized by a single writev/readv call.
The `map` field is denormalized data and can be computed from the other
two fields.

This is in contrast to our current Type/Value system which makes
extensive use of pointers.

The test at the bottom of InternArena.zig passes in this commit.
This commit is contained in:
Andrew Kelley 2022-01-31 01:10:50 -07:00
parent 91ad96b88a
commit cf88cf2657
5 changed files with 357 additions and 25 deletions

View File

@ -37,8 +37,9 @@ pub const StringContext = struct {
_ = self; _ = self;
return hashString(s); return hashString(s);
} }
pub fn eql(self: @This(), a: []const u8, b: []const u8) bool { pub fn eql(self: @This(), a: []const u8, b: []const u8, b_index: usize) bool {
_ = self; _ = self;
_ = b_index;
return eqlString(a, b); return eqlString(a, b);
} }
}; };
@ -76,7 +77,7 @@ pub fn ArrayHashMap(
comptime Context: type, comptime Context: type,
comptime store_hash: bool, comptime store_hash: bool,
) type { ) type {
comptime std.hash_map.verifyContext(Context, K, K, u32); comptime std.hash_map.verifyContext(Context, K, K, u32, true);
return struct { return struct {
unmanaged: Unmanaged, unmanaged: Unmanaged,
allocator: Allocator, allocator: Allocator,
@ -462,7 +463,7 @@ pub fn ArrayHashMapUnmanaged(
comptime Context: type, comptime Context: type,
comptime store_hash: bool, comptime store_hash: bool,
) type { ) type {
comptime std.hash_map.verifyContext(Context, K, K, u32); comptime std.hash_map.verifyContext(Context, K, K, u32, true);
return struct { return struct {
/// It is permitted to access this field directly. /// It is permitted to access this field directly.
entries: DataList = .{}, entries: DataList = .{},
@ -700,7 +701,7 @@ pub fn ArrayHashMapUnmanaged(
const hashes_array = slice.items(.hash); const hashes_array = slice.items(.hash);
const keys_array = slice.items(.key); const keys_array = slice.items(.key);
for (keys_array) |*item_key, i| { for (keys_array) |*item_key, i| {
if (hashes_array[i] == h and checkedEql(ctx, key, item_key.*)) { if (hashes_array[i] == h and checkedEql(ctx, key, item_key.*, i)) {
return GetOrPutResult{ return GetOrPutResult{
.key_ptr = item_key, .key_ptr = item_key,
// workaround for #6974 // workaround for #6974
@ -933,7 +934,7 @@ pub fn ArrayHashMapUnmanaged(
const hashes_array = slice.items(.hash); const hashes_array = slice.items(.hash);
const keys_array = slice.items(.key); const keys_array = slice.items(.key);
for (keys_array) |*item_key, i| { for (keys_array) |*item_key, i| {
if (hashes_array[i] == h and checkedEql(ctx, key, item_key.*)) { if (hashes_array[i] == h and checkedEql(ctx, key, item_key.*, i)) {
return i; return i;
} }
} }
@ -1245,7 +1246,7 @@ pub fn ArrayHashMapUnmanaged(
const keys_array = slice.items(.key); const keys_array = slice.items(.key);
for (keys_array) |*item_key, i| { for (keys_array) |*item_key, i| {
const hash_match = if (store_hash) hashes_array[i] == key_hash else true; const hash_match = if (store_hash) hashes_array[i] == key_hash else true;
if (hash_match and key_ctx.eql(key, item_key.*)) { if (hash_match and key_ctx.eql(key, item_key.*, i)) {
const removed_entry: KV = .{ const removed_entry: KV = .{
.key = keys_array[i], .key = keys_array[i],
.value = slice.items(.value)[i], .value = slice.items(.value)[i],
@ -1286,7 +1287,7 @@ pub fn ArrayHashMapUnmanaged(
const keys_array = slice.items(.key); const keys_array = slice.items(.key);
for (keys_array) |*item_key, i| { for (keys_array) |*item_key, i| {
const hash_match = if (store_hash) hashes_array[i] == key_hash else true; const hash_match = if (store_hash) hashes_array[i] == key_hash else true;
if (hash_match and key_ctx.eql(key, item_key.*)) { if (hash_match and key_ctx.eql(key, item_key.*, i)) {
switch (removal_type) { switch (removal_type) {
.swap => self.entries.swapRemove(i), .swap => self.entries.swapRemove(i),
.ordered => self.entries.orderedRemove(i), .ordered => self.entries.orderedRemove(i),
@ -1483,8 +1484,9 @@ pub fn ArrayHashMapUnmanaged(
// This pointer survives the following append because we call // This pointer survives the following append because we call
// entries.ensureTotalCapacity before getOrPutInternal. // entries.ensureTotalCapacity before getOrPutInternal.
const hash_match = if (store_hash) h == hashes_array[slot_data.entry_index] else true; const i = slot_data.entry_index;
if (hash_match and checkedEql(ctx, key, keys_array[slot_data.entry_index])) { const hash_match = if (store_hash) h == hashes_array[i] else true;
if (hash_match and checkedEql(ctx, key, keys_array[i], i)) {
return .{ return .{
.found_existing = true, .found_existing = true,
.key_ptr = &keys_array[slot_data.entry_index], .key_ptr = &keys_array[slot_data.entry_index],
@ -1571,8 +1573,9 @@ pub fn ArrayHashMapUnmanaged(
if (slot_data.isEmpty() or slot_data.distance_from_start_index < distance_from_start_index) if (slot_data.isEmpty() or slot_data.distance_from_start_index < distance_from_start_index)
return null; return null;
const hash_match = if (store_hash) h == hashes_array[slot_data.entry_index] else true; const i = slot_data.entry_index;
if (hash_match and checkedEql(ctx, key, keys_array[slot_data.entry_index])) const hash_match = if (store_hash) h == hashes_array[i] else true;
if (hash_match and checkedEql(ctx, key, keys_array[i], i))
return slot; return slot;
} }
unreachable; unreachable;
@ -1624,7 +1627,7 @@ pub fn ArrayHashMapUnmanaged(
} }
inline fn checkedHash(ctx: anytype, key: anytype) u32 { inline fn checkedHash(ctx: anytype, key: anytype) u32 {
comptime std.hash_map.verifyContext(@TypeOf(ctx), @TypeOf(key), K, u32); comptime std.hash_map.verifyContext(@TypeOf(ctx), @TypeOf(key), K, u32, true);
// If you get a compile error on the next line, it means that // If you get a compile error on the next line, it means that
const hash = ctx.hash(key); // your generic hash function doesn't accept your key const hash = ctx.hash(key); // your generic hash function doesn't accept your key
if (@TypeOf(hash) != u32) { if (@TypeOf(hash) != u32) {
@ -1633,10 +1636,10 @@ pub fn ArrayHashMapUnmanaged(
} }
return hash; return hash;
} }
inline fn checkedEql(ctx: anytype, a: anytype, b: K) bool { inline fn checkedEql(ctx: anytype, a: anytype, b: K, b_index: usize) bool {
comptime std.hash_map.verifyContext(@TypeOf(ctx), @TypeOf(a), K, u32); comptime std.hash_map.verifyContext(@TypeOf(ctx), @TypeOf(a), K, u32, true);
// If you get a compile error on the next line, it means that // If you get a compile error on the next line, it means that
const eql = ctx.eql(a, b); // your generic eql function doesn't accept (self, adapt key, K) const eql = ctx.eql(a, b, b_index); // your generic eql function doesn't accept (self, adapt key, K, index)
if (@TypeOf(eql) != bool) { if (@TypeOf(eql) != bool) {
@compileError("Context " ++ @typeName(@TypeOf(ctx)) ++ " has a generic eql function that returns the wrong type!\n" ++ @compileError("Context " ++ @typeName(@TypeOf(ctx)) ++ " has a generic eql function that returns the wrong type!\n" ++
@typeName(bool) ++ " was expected, but found " ++ @typeName(@TypeOf(eql))); @typeName(bool) ++ " was expected, but found " ++ @typeName(@TypeOf(eql)));
@ -2255,9 +2258,10 @@ pub fn getAutoHashFn(comptime K: type, comptime Context: type) (fn (Context, K)
}.hash; }.hash;
} }
pub fn getAutoEqlFn(comptime K: type, comptime Context: type) (fn (Context, K, K) bool) { pub fn getAutoEqlFn(comptime K: type, comptime Context: type) (fn (Context, K, K, usize) bool) {
return struct { return struct {
fn eql(ctx: Context, a: K, b: K) bool { fn eql(ctx: Context, a: K, b: K, b_index: usize) bool {
_ = b_index;
_ = ctx; _ = ctx;
return meta.eql(a, b); return meta.eql(a, b);
} }

View File

@ -202,12 +202,14 @@ pub const TypeInfo = union(enum) {
/// therefore must be kept in sync with the compiler implementation. /// therefore must be kept in sync with the compiler implementation.
pub const Int = struct { pub const Int = struct {
signedness: Signedness, signedness: Signedness,
/// TODO make this u16 instead of comptime_int
bits: comptime_int, bits: comptime_int,
}; };
/// This data structure is used by the Zig language code generation and /// This data structure is used by the Zig language code generation and
/// therefore must be kept in sync with the compiler implementation. /// therefore must be kept in sync with the compiler implementation.
pub const Float = struct { pub const Float = struct {
/// TODO make this u16 instead of comptime_int
bits: comptime_int, bits: comptime_int,
}; };
@ -217,6 +219,7 @@ pub const TypeInfo = union(enum) {
size: Size, size: Size,
is_const: bool, is_const: bool,
is_volatile: bool, is_volatile: bool,
/// TODO make this u16 instead of comptime_int
alignment: comptime_int, alignment: comptime_int,
address_space: AddressSpace, address_space: AddressSpace,
child: type, child: type,

View File

@ -81,7 +81,6 @@ pub fn hash(hasher: anytype, key: anytype, comptime strat: HashStrategy) void {
.NoReturn, .NoReturn,
.Opaque, .Opaque,
.Undefined, .Undefined,
.Void,
.Null, .Null,
.ComptimeFloat, .ComptimeFloat,
.ComptimeInt, .ComptimeInt,
@ -91,6 +90,8 @@ pub fn hash(hasher: anytype, key: anytype, comptime strat: HashStrategy) void {
.Float, .Float,
=> @compileError("unable to hash type " ++ @typeName(Key)), => @compileError("unable to hash type " ++ @typeName(Key)),
.Void => return,
// Help the optimizer see that hashing an int is easy by inlining! // Help the optimizer see that hashing an int is easy by inlining!
// TODO Check if the situation is better after #561 is resolved. // TODO Check if the situation is better after #561 is resolved.
.Int => { .Int => {

View File

@ -131,7 +131,13 @@ pub const default_max_load_percentage = 80;
/// If you are passing a context to a *Adapted function, PseudoKey is the type /// If you are passing a context to a *Adapted function, PseudoKey is the type
/// of the key parameter. Otherwise, when creating a HashMap or HashMapUnmanaged /// of the key parameter. Otherwise, when creating a HashMap or HashMapUnmanaged
/// type, PseudoKey = Key = K. /// type, PseudoKey = Key = K.
pub fn verifyContext(comptime RawContext: type, comptime PseudoKey: type, comptime Key: type, comptime Hash: type) void { pub fn verifyContext(
comptime RawContext: type,
comptime PseudoKey: type,
comptime Key: type,
comptime Hash: type,
comptime is_array: bool,
) void {
comptime { comptime {
var allow_const_ptr = false; var allow_const_ptr = false;
var allow_mutable_ptr = false; var allow_mutable_ptr = false;
@ -166,7 +172,9 @@ pub fn verifyContext(comptime RawContext: type, comptime PseudoKey: type, compti
const prefix = "\n "; const prefix = "\n ";
const deep_prefix = prefix ++ " "; const deep_prefix = prefix ++ " ";
const hash_signature = "fn (self, " ++ @typeName(PseudoKey) ++ ") " ++ @typeName(Hash); const hash_signature = "fn (self, " ++ @typeName(PseudoKey) ++ ") " ++ @typeName(Hash);
const eql_signature = "fn (self, " ++ @typeName(PseudoKey) ++ ", " ++ @typeName(Key) ++ ") bool"; const index_param = if (is_array) ", b_index: usize" else "";
const eql_signature = "fn (self, " ++ @typeName(PseudoKey) ++ ", " ++
@typeName(Key) ++ index_param ++ ") bool";
const err_invalid_hash_signature = prefix ++ @typeName(Context) ++ ".hash must be " ++ hash_signature ++ const err_invalid_hash_signature = prefix ++ @typeName(Context) ++ ".hash must be " ++ hash_signature ++
deep_prefix ++ "but is actually " ++ @typeName(@TypeOf(Context.hash)); deep_prefix ++ "but is actually " ++ @typeName(@TypeOf(Context.hash));
const err_invalid_eql_signature = prefix ++ @typeName(Context) ++ ".eql must be " ++ eql_signature ++ const err_invalid_eql_signature = prefix ++ @typeName(Context) ++ ".eql must be " ++ eql_signature ++
@ -255,7 +263,8 @@ pub fn verifyContext(comptime RawContext: type, comptime PseudoKey: type, compti
const info = @typeInfo(@TypeOf(eql)); const info = @typeInfo(@TypeOf(eql));
if (info == .Fn) { if (info == .Fn) {
const func = info.Fn; const func = info.Fn;
if (func.args.len != 3) { const args_len = if (is_array) 4 else 3;
if (func.args.len != args_len) {
errors = errors ++ lazy.err_invalid_eql_signature; errors = errors ++ lazy.err_invalid_eql_signature;
} else { } else {
var emitted_signature = false; var emitted_signature = false;
@ -360,7 +369,7 @@ pub fn HashMap(
comptime Context: type, comptime Context: type,
comptime max_load_percentage: u64, comptime max_load_percentage: u64,
) type { ) type {
comptime verifyContext(Context, K, K, u64); comptime verifyContext(Context, K, K, u64, false);
return struct { return struct {
unmanaged: Unmanaged, unmanaged: Unmanaged,
allocator: Allocator, allocator: Allocator,
@ -683,7 +692,7 @@ pub fn HashMapUnmanaged(
) type { ) type {
if (max_load_percentage <= 0 or max_load_percentage >= 100) if (max_load_percentage <= 0 or max_load_percentage >= 100)
@compileError("max_load_percentage must be between 0 and 100."); @compileError("max_load_percentage must be between 0 and 100.");
comptime verifyContext(Context, K, K, u64); comptime verifyContext(Context, K, K, u64, false);
return struct { return struct {
const Self = @This(); const Self = @This();
@ -1108,7 +1117,7 @@ pub fn HashMapUnmanaged(
/// from this function. To encourage that, this function is /// from this function. To encourage that, this function is
/// marked as inline. /// marked as inline.
inline fn getIndex(self: Self, key: anytype, ctx: anytype) ?usize { inline fn getIndex(self: Self, key: anytype, ctx: anytype) ?usize {
comptime verifyContext(@TypeOf(ctx), @TypeOf(key), K, Hash); comptime verifyContext(@TypeOf(ctx), @TypeOf(key), K, Hash, false);
if (self.size == 0) { if (self.size == 0) {
return null; return null;
@ -1291,7 +1300,7 @@ pub fn HashMapUnmanaged(
return result; return result;
} }
pub fn getOrPutAssumeCapacityAdapted(self: *Self, key: anytype, ctx: anytype) GetOrPutResult { pub fn getOrPutAssumeCapacityAdapted(self: *Self, key: anytype, ctx: anytype) GetOrPutResult {
comptime verifyContext(@TypeOf(ctx), @TypeOf(key), K, Hash); comptime verifyContext(@TypeOf(ctx), @TypeOf(key), K, Hash, false);
// If you get a compile error on this line, it means that your generic hash // If you get a compile error on this line, it means that your generic hash
// function is invalid for these parameters. // function is invalid for these parameters.

315
src/InternArena.zig Normal file
View File

@ -0,0 +1,315 @@
map: std.AutoArrayHashMapUnmanaged(void, void) = .{},
items: std.MultiArrayList(Item) = .{},
extra: std.ArrayListUnmanaged(u32) = .{},
const InternArena = @This();
const std = @import("std");
const Allocator = std.mem.Allocator;
const assert = std.debug.assert;
const KeyAdapter = struct {
intern_arena: *const InternArena,
pub fn eql(ctx: @This(), a: Key, b_void: void, b_map_index: usize) bool {
_ = b_void;
return ctx.intern_arena.indexToKey(@intToEnum(Index, b_map_index)).eql(a);
}
pub fn hash(ctx: @This(), a: Key) u32 {
_ = ctx;
return a.hash();
}
};
pub const Key = union(enum) {
int_type: struct {
signedness: std.builtin.Signedness,
bits: u16,
},
ptr_type: struct {
elem_type: Index,
sentinel: Index,
alignment: u16,
size: std.builtin.TypeInfo.Pointer.Size,
is_const: bool,
is_volatile: bool,
is_allowzero: bool,
address_space: std.builtin.AddressSpace,
},
array_type: struct {
len: u64,
child: Index,
sentinel: Index,
},
vector_type: struct {
len: u32,
child: Index,
},
optional_type: struct {
payload_type: Index,
},
error_union_type: struct {
error_set_type: Index,
payload_type: Index,
},
simple: Simple,
pub fn hash(key: Key) u32 {
var hasher = std.hash.Wyhash.init(0);
switch (key) {
.int_type => |int_type| {
std.hash.autoHash(&hasher, int_type);
},
.array_type => |array_type| {
std.hash.autoHash(&hasher, array_type);
},
else => @panic("TODO"),
}
return @truncate(u32, hasher.final());
}
pub fn eql(a: Key, b: Key) bool {
const KeyTag = std.meta.Tag(Key);
const a_tag: KeyTag = a;
const b_tag: KeyTag = b;
if (a_tag != b_tag) return false;
switch (a) {
.int_type => |a_info| {
const b_info = b.int_type;
return std.meta.eql(a_info, b_info);
},
.array_type => |a_info| {
const b_info = b.array_type;
return std.meta.eql(a_info, b_info);
},
else => @panic("TODO"),
}
}
};
pub const Item = struct {
tag: Tag,
/// The doc comments on the respective Tag explain how to interpret this.
data: u32,
};
/// Represents an index into `map`. It represents the canonical index
/// of a `Value` within this `InternArena`. The values are typed.
/// Two values which have the same type can be equality compared simply
/// by checking if their indexes are equal, provided they are both in
/// the same `InternArena`.
pub const Index = enum(u32) {
none = std.math.maxInt(u32),
_,
};
pub const Tag = enum(u8) {
/// An integer type.
/// data is number of bits
type_int_signed,
/// An integer type.
/// data is number of bits
type_int_unsigned,
/// An array type.
/// data is payload to Array.
type_array,
/// A type or value that can be represented with only an enum tag.
/// data is Simple enum value
simple,
/// An unsigned integer value that can be represented by u32.
/// data is integer value
int_u32,
/// An unsigned integer value that can be represented by i32.
/// data is integer value bitcasted to u32.
int_i32,
/// A positive integer value that does not fit in 32 bits.
/// data is a extra index to BigInt.
int_big_positive,
/// A negative integer value that does not fit in 32 bits.
/// data is a extra index to BigInt.
int_big_negative,
/// A float value that can be represented by f32.
/// data is float value bitcasted to u32.
float_f32,
/// A float value that can be represented by f64.
/// data is payload index to Float64.
float_f64,
/// A float value that can be represented by f128.
/// data is payload index to Float128.
float_f128,
};
pub const Simple = enum(u32) {
f16,
f32,
f64,
f80,
f128,
usize,
isize,
c_short,
c_ushort,
c_int,
c_uint,
c_long,
c_ulong,
c_longlong,
c_ulonglong,
c_longdouble,
anyopaque,
bool,
void,
type,
anyerror,
comptime_int,
comptime_float,
noreturn,
@"anyframe",
null_type,
undefined_type,
enum_literal_type,
@"undefined",
void_value,
@"null",
bool_true,
bool_false,
};
pub const Array = struct {
len: u32,
child: Index,
};
pub fn deinit(ia: *InternArena, gpa: Allocator) void {
ia.map.deinit(gpa);
ia.items.deinit(gpa);
ia.extra.deinit(gpa);
}
pub fn indexToKey(ia: InternArena, index: Index) Key {
const data = ia.items.items(.data)[@enumToInt(index)];
return switch (ia.items.items(.tag)[@enumToInt(index)]) {
.type_int_signed => .{
.int_type = .{
.signedness = .signed,
.bits = @intCast(u16, data),
},
},
.type_int_unsigned => .{
.int_type = .{
.signedness = .unsigned,
.bits = @intCast(u16, data),
},
},
.type_array => {
const array_info = ia.extraData(Array, data);
return .{ .array_type = .{
.len = array_info.len,
.child = array_info.child,
.sentinel = .none,
} };
},
.simple => .{ .simple = @intToEnum(Simple, data) },
else => @panic("TODO"),
};
}
pub fn get(ia: *InternArena, gpa: Allocator, key: Key) Allocator.Error!Index {
const adapter: KeyAdapter = .{ .intern_arena = ia };
const gop = try ia.map.getOrPutAdapted(gpa, key, adapter);
if (gop.found_existing) {
return @intToEnum(Index, gop.index);
}
switch (key) {
.int_type => |int_type| {
const tag: Tag = switch (int_type.signedness) {
.signed => .type_int_signed,
.unsigned => .type_int_unsigned,
};
try ia.items.append(gpa, .{
.tag = tag,
.data = int_type.bits,
});
},
.array_type => |array_type| {
const len = @intCast(u32, array_type.len); // TODO have a big_array encoding
assert(array_type.sentinel == .none); // TODO have a sentinel_array encoding
try ia.items.append(gpa, .{
.tag = .type_array,
.data = try ia.addExtra(gpa, Array{
.len = len,
.child = array_type.child,
}),
});
},
else => @panic("TODO"),
}
return @intToEnum(Index, ia.items.len - 1);
}
fn addExtra(ia: *InternArena, gpa: Allocator, extra: anytype) Allocator.Error!u32 {
const fields = std.meta.fields(@TypeOf(extra));
try ia.extra.ensureUnusedCapacity(gpa, fields.len);
return ia.addExtraAssumeCapacity(extra);
}
fn addExtraAssumeCapacity(ia: *InternArena, extra: anytype) u32 {
const fields = std.meta.fields(@TypeOf(extra));
const result = @intCast(u32, ia.extra.items.len);
inline for (fields) |field| {
ia.extra.appendAssumeCapacity(switch (field.field_type) {
u32 => @field(extra, field.name),
Index => @enumToInt(@field(extra, field.name)),
i32 => @bitCast(u32, @field(extra, field.name)),
else => @compileError("bad field type"),
});
}
return result;
}
fn extraData(ia: InternArena, comptime T: type, index: usize) T {
const fields = std.meta.fields(T);
var i: usize = index;
var result: T = undefined;
inline for (fields) |field| {
@field(result, field.name) = switch (field.field_type) {
u32 => ia.extra.items[i],
Index => @intToEnum(Index, ia.extra.items[i]),
i32 => @bitCast(i32, ia.extra.items[i]),
else => @compileError("bad field type"),
};
i += 1;
}
return result;
}
test "basic usage" {
const gpa = std.testing.allocator;
var ia: InternArena = .{};
defer ia.deinit(gpa);
const i32_type = try ia.get(gpa, .{ .int_type = .{
.signedness = .signed,
.bits = 32,
} });
const array_i32 = try ia.get(gpa, .{ .array_type = .{
.len = 10,
.child = i32_type,
.sentinel = .none,
} });
const another_i32_type = try ia.get(gpa, .{ .int_type = .{
.signedness = .signed,
.bits = 32,
} });
try std.testing.expect(another_i32_type == i32_type);
const another_array_i32 = try ia.get(gpa, .{ .array_type = .{
.len = 10,
.child = i32_type,
.sentinel = .none,
} });
try std.testing.expect(another_array_i32 == array_i32);
}