mirror of
https://github.com/ziglang/zig.git
synced 2025-12-30 18:13:19 +00:00
This commit removes the `field_call_bind` and `field_call_bind_named` ZIR
instructions, replacing them with a `field_call` instruction which does the bind
and call in one.
`field_call_bind` is an unfortunate instruction. It's tied into one very
specific usage pattern - its result can only be used as a callee. This means
that it creates a value of a "pseudo-type" of sorts, `bound_fn` - this type used
to exist in Zig, but now we just hide it from the user and have AstGen ensure
it's only used in one way. This is quite silly - `Type` and `Value` should, as
much as possible, reflect real Zig types and values.
It makes sense to instead encode the `a.b()` syntax as its own ZIR instruction,
so that's what we do here. This commit introduces a new instruction,
`field_call`. It's like `call`, but rather than a callee ref, it contains a ref
to the object pointer (`&a` in `a.b()`) and the string field name (`b`). This
eliminates `bound_fn` from the language, and slightly decreases the size of
generated ZIR - stats below.
This commit does remove a few usages which used to be allowed:
- `@field(a, "b")()`
- `@call(.auto, a.b, .{})`
- `@call(.auto, @field(a, "b"), .{})`
These forms used to work just like `a.b()`, but are no longer allowed. I believe
this is the correct choice for a few reasons:
- `a.b()` is a purely *syntactic* form; for instance, `(a.b)()` is not valid.
This means it is *not* inconsistent to not allow it in these cases; the
special case here isn't "a field access as a callee", but rather this exact
syntactic form.
- The second argument to `@call` looks much more visually distinct from the
callee in standard call syntax. To me, this makes it seem strange for that
argument to not work like a normal expression in this context.
- A more practical argument: it's confusing! `@field` and `@call` are used in
very different contexts to standard function calls: the former normally hints
at some comptime machinery, and the latter that you want more precise control
over parts of a function call. In these contexts, you don't want implicit
arguments adding extra confusion: you want to be very explicit about what
you're doing.
Lastly, some stats. I mentioned before that this change slightly reduces the
size of ZIR - this is due to two instructions (`field_call_bind` then `call`)
being replaced with one (`field_call`). Here are some numbers:
+--------------+----------+----------+--------+
| File | Before | After | Change |
+--------------+----------+----------+--------+
| Sema.zig | 4.72M | 4.53M | -4% |
| AstGen.zig | 1.52M | 1.48M | -3% |
| hash_map.zig | 283.9K | 276.2K | -3% |
| math.zig | 312.6K | 305.3K | -2% |
+--------------+----------+----------+--------+
231 lines
9.9 KiB
Zig
231 lines
9.9 KiB
Zig
const std = @import("std");
|
|
const mem = std.mem;
|
|
|
|
const primes = [_]u64{
|
|
0xa0761d6478bd642f,
|
|
0xe7037ed1a0b428db,
|
|
0x8ebc6af09c88c6e3,
|
|
0x589965cc75374cc3,
|
|
0x1d8e4e27c47d124f,
|
|
};
|
|
|
|
fn read_bytes(comptime bytes: u8, data: []const u8) u64 {
|
|
const T = std.meta.Int(.unsigned, 8 * bytes);
|
|
return mem.readIntLittle(T, data[0..bytes]);
|
|
}
|
|
|
|
fn read_8bytes_swapped(data: []const u8) u64 {
|
|
return (read_bytes(4, data) << 32 | read_bytes(4, data[4..]));
|
|
}
|
|
|
|
fn mum(a: u64, b: u64) u64 {
|
|
var r = std.math.mulWide(u64, a, b);
|
|
r = (r >> 64) ^ r;
|
|
return @truncate(u64, r);
|
|
}
|
|
|
|
fn mix0(a: u64, b: u64, seed: u64) u64 {
|
|
return mum(a ^ seed ^ primes[0], b ^ seed ^ primes[1]);
|
|
}
|
|
|
|
fn mix1(a: u64, b: u64, seed: u64) u64 {
|
|
return mum(a ^ seed ^ primes[2], b ^ seed ^ primes[3]);
|
|
}
|
|
|
|
// Wyhash version which does not store internal state for handling partial buffers.
|
|
// This is needed so that we can maximize the speed for the short key case, which will
|
|
// use the non-iterative api which the public Wyhash exposes.
|
|
const WyhashStateless = struct {
|
|
seed: u64,
|
|
msg_len: usize,
|
|
|
|
pub fn init(seed: u64) WyhashStateless {
|
|
return WyhashStateless{
|
|
.seed = seed,
|
|
.msg_len = 0,
|
|
};
|
|
}
|
|
|
|
fn round(self: *WyhashStateless, b: []const u8) void {
|
|
std.debug.assert(b.len == 32);
|
|
|
|
self.seed = mix0(
|
|
read_bytes(8, b[0..]),
|
|
read_bytes(8, b[8..]),
|
|
self.seed,
|
|
) ^ mix1(
|
|
read_bytes(8, b[16..]),
|
|
read_bytes(8, b[24..]),
|
|
self.seed,
|
|
);
|
|
}
|
|
|
|
pub fn update(self: *WyhashStateless, b: []const u8) void {
|
|
std.debug.assert(b.len % 32 == 0);
|
|
|
|
var off: usize = 0;
|
|
while (off < b.len) : (off += 32) {
|
|
@call(.always_inline, round, .{ self, b[off..][0..32] });
|
|
}
|
|
|
|
self.msg_len += b.len;
|
|
}
|
|
|
|
pub fn final(self: *WyhashStateless, b: []const u8) u64 {
|
|
std.debug.assert(b.len < 32);
|
|
|
|
const seed = self.seed;
|
|
const rem_len = @intCast(u5, b.len);
|
|
const rem_key = b[0..rem_len];
|
|
|
|
self.seed = switch (rem_len) {
|
|
0 => seed,
|
|
1 => mix0(read_bytes(1, rem_key), primes[4], seed),
|
|
2 => mix0(read_bytes(2, rem_key), primes[4], seed),
|
|
3 => mix0((read_bytes(2, rem_key) << 8) | read_bytes(1, rem_key[2..]), primes[4], seed),
|
|
4 => mix0(read_bytes(4, rem_key), primes[4], seed),
|
|
5 => mix0((read_bytes(4, rem_key) << 8) | read_bytes(1, rem_key[4..]), primes[4], seed),
|
|
6 => mix0((read_bytes(4, rem_key) << 16) | read_bytes(2, rem_key[4..]), primes[4], seed),
|
|
7 => mix0((read_bytes(4, rem_key) << 24) | (read_bytes(2, rem_key[4..]) << 8) | read_bytes(1, rem_key[6..]), primes[4], seed),
|
|
8 => mix0(read_8bytes_swapped(rem_key), primes[4], seed),
|
|
9 => mix0(read_8bytes_swapped(rem_key), read_bytes(1, rem_key[8..]), seed),
|
|
10 => mix0(read_8bytes_swapped(rem_key), read_bytes(2, rem_key[8..]), seed),
|
|
11 => mix0(read_8bytes_swapped(rem_key), (read_bytes(2, rem_key[8..]) << 8) | read_bytes(1, rem_key[10..]), seed),
|
|
12 => mix0(read_8bytes_swapped(rem_key), read_bytes(4, rem_key[8..]), seed),
|
|
13 => mix0(read_8bytes_swapped(rem_key), (read_bytes(4, rem_key[8..]) << 8) | read_bytes(1, rem_key[12..]), seed),
|
|
14 => mix0(read_8bytes_swapped(rem_key), (read_bytes(4, rem_key[8..]) << 16) | read_bytes(2, rem_key[12..]), seed),
|
|
15 => mix0(read_8bytes_swapped(rem_key), (read_bytes(4, rem_key[8..]) << 24) | (read_bytes(2, rem_key[12..]) << 8) | read_bytes(1, rem_key[14..]), seed),
|
|
16 => mix0(read_8bytes_swapped(rem_key), read_8bytes_swapped(rem_key[8..]), seed),
|
|
17 => mix0(read_8bytes_swapped(rem_key), read_8bytes_swapped(rem_key[8..]), seed) ^ mix1(read_bytes(1, rem_key[16..]), primes[4], seed),
|
|
18 => mix0(read_8bytes_swapped(rem_key), read_8bytes_swapped(rem_key[8..]), seed) ^ mix1(read_bytes(2, rem_key[16..]), primes[4], seed),
|
|
19 => mix0(read_8bytes_swapped(rem_key), read_8bytes_swapped(rem_key[8..]), seed) ^ mix1((read_bytes(2, rem_key[16..]) << 8) | read_bytes(1, rem_key[18..]), primes[4], seed),
|
|
20 => mix0(read_8bytes_swapped(rem_key), read_8bytes_swapped(rem_key[8..]), seed) ^ mix1(read_bytes(4, rem_key[16..]), primes[4], seed),
|
|
21 => mix0(read_8bytes_swapped(rem_key), read_8bytes_swapped(rem_key[8..]), seed) ^ mix1((read_bytes(4, rem_key[16..]) << 8) | read_bytes(1, rem_key[20..]), primes[4], seed),
|
|
22 => mix0(read_8bytes_swapped(rem_key), read_8bytes_swapped(rem_key[8..]), seed) ^ mix1((read_bytes(4, rem_key[16..]) << 16) | read_bytes(2, rem_key[20..]), primes[4], seed),
|
|
23 => mix0(read_8bytes_swapped(rem_key), read_8bytes_swapped(rem_key[8..]), seed) ^ mix1((read_bytes(4, rem_key[16..]) << 24) | (read_bytes(2, rem_key[20..]) << 8) | read_bytes(1, rem_key[22..]), primes[4], seed),
|
|
24 => mix0(read_8bytes_swapped(rem_key), read_8bytes_swapped(rem_key[8..]), seed) ^ mix1(read_8bytes_swapped(rem_key[16..]), primes[4], seed),
|
|
25 => mix0(read_8bytes_swapped(rem_key), read_8bytes_swapped(rem_key[8..]), seed) ^ mix1(read_8bytes_swapped(rem_key[16..]), read_bytes(1, rem_key[24..]), seed),
|
|
26 => mix0(read_8bytes_swapped(rem_key), read_8bytes_swapped(rem_key[8..]), seed) ^ mix1(read_8bytes_swapped(rem_key[16..]), read_bytes(2, rem_key[24..]), seed),
|
|
27 => mix0(read_8bytes_swapped(rem_key), read_8bytes_swapped(rem_key[8..]), seed) ^ mix1(read_8bytes_swapped(rem_key[16..]), (read_bytes(2, rem_key[24..]) << 8) | read_bytes(1, rem_key[26..]), seed),
|
|
28 => mix0(read_8bytes_swapped(rem_key), read_8bytes_swapped(rem_key[8..]), seed) ^ mix1(read_8bytes_swapped(rem_key[16..]), read_bytes(4, rem_key[24..]), seed),
|
|
29 => mix0(read_8bytes_swapped(rem_key), read_8bytes_swapped(rem_key[8..]), seed) ^ mix1(read_8bytes_swapped(rem_key[16..]), (read_bytes(4, rem_key[24..]) << 8) | read_bytes(1, rem_key[28..]), seed),
|
|
30 => mix0(read_8bytes_swapped(rem_key), read_8bytes_swapped(rem_key[8..]), seed) ^ mix1(read_8bytes_swapped(rem_key[16..]), (read_bytes(4, rem_key[24..]) << 16) | read_bytes(2, rem_key[28..]), seed),
|
|
31 => mix0(read_8bytes_swapped(rem_key), read_8bytes_swapped(rem_key[8..]), seed) ^ mix1(read_8bytes_swapped(rem_key[16..]), (read_bytes(4, rem_key[24..]) << 24) | (read_bytes(2, rem_key[28..]) << 8) | read_bytes(1, rem_key[30..]), seed),
|
|
};
|
|
|
|
self.msg_len += b.len;
|
|
return mum(self.seed ^ self.msg_len, primes[4]);
|
|
}
|
|
|
|
pub fn hash(seed: u64, input: []const u8) u64 {
|
|
const aligned_len = input.len - (input.len % 32);
|
|
|
|
var c = WyhashStateless.init(seed);
|
|
@call(.always_inline, update, .{ &c, input[0..aligned_len] });
|
|
return @call(.always_inline, final, .{ &c, input[aligned_len..] });
|
|
}
|
|
};
|
|
|
|
/// Fast non-cryptographic 64bit hash function.
|
|
/// See https://github.com/wangyi-fudan/wyhash
|
|
pub const Wyhash = struct {
|
|
state: WyhashStateless,
|
|
|
|
buf: [32]u8,
|
|
buf_len: usize,
|
|
|
|
pub fn init(seed: u64) Wyhash {
|
|
return Wyhash{
|
|
.state = WyhashStateless.init(seed),
|
|
.buf = undefined,
|
|
.buf_len = 0,
|
|
};
|
|
}
|
|
|
|
pub fn update(self: *Wyhash, b: []const u8) void {
|
|
var off: usize = 0;
|
|
|
|
if (self.buf_len != 0 and self.buf_len + b.len >= 32) {
|
|
off += 32 - self.buf_len;
|
|
@memcpy(self.buf[self.buf_len..][0..off], b[0..off]);
|
|
self.state.update(self.buf[0..]);
|
|
self.buf_len = 0;
|
|
}
|
|
|
|
const remain_len = b.len - off;
|
|
const aligned_len = remain_len - (remain_len % 32);
|
|
self.state.update(b[off .. off + aligned_len]);
|
|
|
|
const src = b[off + aligned_len ..];
|
|
@memcpy(self.buf[self.buf_len..][0..src.len], src);
|
|
self.buf_len += @intCast(u8, b[off + aligned_len ..].len);
|
|
}
|
|
|
|
pub fn final(self: *Wyhash) u64 {
|
|
const rem_key = self.buf[0..self.buf_len];
|
|
|
|
return self.state.final(rem_key);
|
|
}
|
|
|
|
pub fn hash(seed: u64, input: []const u8) u64 {
|
|
return WyhashStateless.hash(seed, input);
|
|
}
|
|
};
|
|
|
|
const expectEqual = std.testing.expectEqual;
|
|
|
|
test "test vectors" {
|
|
const hash = Wyhash.hash;
|
|
|
|
try expectEqual(hash(0, ""), 0x0);
|
|
try expectEqual(hash(1, "a"), 0xbed235177f41d328);
|
|
try expectEqual(hash(2, "abc"), 0xbe348debe59b27c3);
|
|
try expectEqual(hash(3, "message digest"), 0x37320f657213a290);
|
|
try expectEqual(hash(4, "abcdefghijklmnopqrstuvwxyz"), 0xd0b270e1d8a7019c);
|
|
try expectEqual(hash(5, "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789"), 0x602a1894d3bbfe7f);
|
|
try expectEqual(hash(6, "12345678901234567890123456789012345678901234567890123456789012345678901234567890"), 0x829e9c148b75970e);
|
|
}
|
|
|
|
test "test vectors streaming" {
|
|
var wh = Wyhash.init(5);
|
|
for ("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789") |e| {
|
|
wh.update(mem.asBytes(&e));
|
|
}
|
|
try expectEqual(wh.final(), 0x602a1894d3bbfe7f);
|
|
|
|
const pattern = "1234567890";
|
|
const count = 8;
|
|
const result = 0x829e9c148b75970e;
|
|
try expectEqual(Wyhash.hash(6, pattern ** 8), result);
|
|
|
|
wh = Wyhash.init(6);
|
|
var i: u32 = 0;
|
|
while (i < count) : (i += 1) {
|
|
wh.update(pattern);
|
|
}
|
|
try expectEqual(wh.final(), result);
|
|
}
|
|
|
|
test "iterative non-divisible update" {
|
|
var buf: [8192]u8 = undefined;
|
|
for (&buf, 0..) |*e, i| {
|
|
e.* = @truncate(u8, i);
|
|
}
|
|
|
|
const seed = 0x128dad08f;
|
|
|
|
var end: usize = 32;
|
|
while (end < buf.len) : (end += 32) {
|
|
const non_iterative_hash = Wyhash.hash(seed, buf[0..end]);
|
|
|
|
var wy = Wyhash.init(seed);
|
|
var i: usize = 0;
|
|
while (i < end) : (i += 33) {
|
|
wy.update(buf[i..std.math.min(i + 33, end)]);
|
|
}
|
|
const iterative_hash = wy.final();
|
|
|
|
try std.testing.expectEqual(iterative_hash, non_iterative_hash);
|
|
}
|
|
}
|