mirror of
https://github.com/ziglang/zig.git
synced 2026-02-13 21:08:36 +00:00
Replace hand-written endian-specific loads with std.mem.readInt*() (#16431)
And when we have the choice, favor little-endian because it's 2023. Gives a slight performance improvement: md5: 552 -> 555 MiB/s sha1: 768 -> 786 MiB/s sha512: 211 -> 217 MiB/s
This commit is contained in:
parent
a86f589a9f
commit
a0b35249a2
@ -122,14 +122,14 @@ pub const Block = struct {
|
||||
|
||||
// Last round uses s-box directly and XORs to produce output.
|
||||
var x: [4]u8 = undefined;
|
||||
x = sbox_lookup(&sbox_encrypt, @as(u8, @truncate(s3 >> 24)), @as(u8, @truncate(s2 >> 16)), @as(u8, @truncate(s1 >> 8)), @as(u8, @truncate(s0)));
|
||||
var t0 = @as(u32, x[0]) << 24 | @as(u32, x[1]) << 16 | @as(u32, x[2]) << 8 | @as(u32, x[3]);
|
||||
x = sbox_lookup(&sbox_encrypt, @as(u8, @truncate(s0 >> 24)), @as(u8, @truncate(s3 >> 16)), @as(u8, @truncate(s2 >> 8)), @as(u8, @truncate(s1)));
|
||||
var t1 = @as(u32, x[0]) << 24 | @as(u32, x[1]) << 16 | @as(u32, x[2]) << 8 | @as(u32, x[3]);
|
||||
x = sbox_lookup(&sbox_encrypt, @as(u8, @truncate(s1 >> 24)), @as(u8, @truncate(s0 >> 16)), @as(u8, @truncate(s3 >> 8)), @as(u8, @truncate(s2)));
|
||||
var t2 = @as(u32, x[0]) << 24 | @as(u32, x[1]) << 16 | @as(u32, x[2]) << 8 | @as(u32, x[3]);
|
||||
x = sbox_lookup(&sbox_encrypt, @as(u8, @truncate(s2 >> 24)), @as(u8, @truncate(s1 >> 16)), @as(u8, @truncate(s0 >> 8)), @as(u8, @truncate(s3)));
|
||||
var t3 = @as(u32, x[0]) << 24 | @as(u32, x[1]) << 16 | @as(u32, x[2]) << 8 | @as(u32, x[3]);
|
||||
x = sbox_lookup(&sbox_encrypt, @as(u8, @truncate(s0)), @as(u8, @truncate(s1 >> 8)), @as(u8, @truncate(s2 >> 16)), @as(u8, @truncate(s3 >> 24)));
|
||||
var t0 = mem.readIntLittle(u32, &x);
|
||||
x = sbox_lookup(&sbox_encrypt, @as(u8, @truncate(s1)), @as(u8, @truncate(s2 >> 8)), @as(u8, @truncate(s3 >> 16)), @as(u8, @truncate(s0 >> 24)));
|
||||
var t1 = mem.readIntLittle(u32, &x);
|
||||
x = sbox_lookup(&sbox_encrypt, @as(u8, @truncate(s2)), @as(u8, @truncate(s3 >> 8)), @as(u8, @truncate(s0 >> 16)), @as(u8, @truncate(s1 >> 24)));
|
||||
var t2 = mem.readIntLittle(u32, &x);
|
||||
x = sbox_lookup(&sbox_encrypt, @as(u8, @truncate(s3)), @as(u8, @truncate(s0 >> 8)), @as(u8, @truncate(s1 >> 16)), @as(u8, @truncate(s2 >> 24)));
|
||||
var t3 = mem.readIntLittle(u32, &x);
|
||||
|
||||
t0 ^= round_key.repr[0];
|
||||
t1 ^= round_key.repr[1];
|
||||
@ -218,14 +218,14 @@ pub const Block = struct {
|
||||
|
||||
// Last round uses s-box directly and XORs to produce output.
|
||||
var x: [4]u8 = undefined;
|
||||
x = sbox_lookup(&sbox_decrypt, @as(u8, @truncate(s1 >> 24)), @as(u8, @truncate(s2 >> 16)), @as(u8, @truncate(s3 >> 8)), @as(u8, @truncate(s0)));
|
||||
var t0 = @as(u32, x[0]) << 24 | @as(u32, x[1]) << 16 | @as(u32, x[2]) << 8 | @as(u32, x[3]);
|
||||
x = sbox_lookup(&sbox_decrypt, @as(u8, @truncate(s2 >> 24)), @as(u8, @truncate(s3 >> 16)), @as(u8, @truncate(s0 >> 8)), @as(u8, @truncate(s1)));
|
||||
var t1 = @as(u32, x[0]) << 24 | @as(u32, x[1]) << 16 | @as(u32, x[2]) << 8 | @as(u32, x[3]);
|
||||
x = sbox_lookup(&sbox_decrypt, @as(u8, @truncate(s3 >> 24)), @as(u8, @truncate(s0 >> 16)), @as(u8, @truncate(s1 >> 8)), @as(u8, @truncate(s2)));
|
||||
var t2 = @as(u32, x[0]) << 24 | @as(u32, x[1]) << 16 | @as(u32, x[2]) << 8 | @as(u32, x[3]);
|
||||
x = sbox_lookup(&sbox_decrypt, @as(u8, @truncate(s0 >> 24)), @as(u8, @truncate(s1 >> 16)), @as(u8, @truncate(s2 >> 8)), @as(u8, @truncate(s3)));
|
||||
var t3 = @as(u32, x[0]) << 24 | @as(u32, x[1]) << 16 | @as(u32, x[2]) << 8 | @as(u32, x[3]);
|
||||
x = sbox_lookup(&sbox_decrypt, @as(u8, @truncate(s0)), @as(u8, @truncate(s3 >> 8)), @as(u8, @truncate(s2 >> 16)), @as(u8, @truncate(s1 >> 24)));
|
||||
var t0 = mem.readIntLittle(u32, &x);
|
||||
x = sbox_lookup(&sbox_decrypt, @as(u8, @truncate(s1)), @as(u8, @truncate(s0 >> 8)), @as(u8, @truncate(s3 >> 16)), @as(u8, @truncate(s2 >> 24)));
|
||||
var t1 = mem.readIntLittle(u32, &x);
|
||||
x = sbox_lookup(&sbox_decrypt, @as(u8, @truncate(s2)), @as(u8, @truncate(s1 >> 8)), @as(u8, @truncate(s0 >> 16)), @as(u8, @truncate(s3 >> 24)));
|
||||
var t2 = mem.readIntLittle(u32, &x);
|
||||
x = sbox_lookup(&sbox_decrypt, @as(u8, @truncate(s3)), @as(u8, @truncate(s2 >> 8)), @as(u8, @truncate(s1 >> 16)), @as(u8, @truncate(s0 >> 24)));
|
||||
var t3 = mem.readIntLittle(u32, &x);
|
||||
|
||||
t0 ^= round_key.repr[0];
|
||||
t1 ^= round_key.repr[1];
|
||||
@ -349,7 +349,7 @@ fn KeySchedule(comptime Aes: type) type {
|
||||
// Apply sbox_encrypt to each byte in w.
|
||||
fn func(w: u32) u32 {
|
||||
const x = sbox_lookup(&sbox_key_schedule, @as(u8, @truncate(w)), @as(u8, @truncate(w >> 8)), @as(u8, @truncate(w >> 16)), @as(u8, @truncate(w >> 24)));
|
||||
return @as(u32, x[3]) << 24 | @as(u32, x[2]) << 16 | @as(u32, x[1]) << 8 | @as(u32, x[0]);
|
||||
return mem.readIntLittle(u32, &x);
|
||||
}
|
||||
}.func;
|
||||
|
||||
|
||||
@ -121,12 +121,7 @@ pub const Md5 = struct {
|
||||
|
||||
var i: usize = 0;
|
||||
while (i < 16) : (i += 1) {
|
||||
// NOTE: Performing or's separately improves perf by ~10%
|
||||
s[i] = 0;
|
||||
s[i] |= @as(u32, b[i * 4 + 0]);
|
||||
s[i] |= @as(u32, b[i * 4 + 1]) << 8;
|
||||
s[i] |= @as(u32, b[i * 4 + 2]) << 16;
|
||||
s[i] |= @as(u32, b[i * 4 + 3]) << 24;
|
||||
s[i] = mem.readIntLittle(u32, b[i * 4 ..][0..4]);
|
||||
}
|
||||
|
||||
var v: [4]u32 = [_]u32{
|
||||
|
||||
@ -151,7 +151,7 @@ pub const Sha1 = struct {
|
||||
roundParam(0, 1, 2, 3, 4, 15),
|
||||
};
|
||||
inline for (round0a) |r| {
|
||||
s[r.i] = (@as(u32, b[r.i * 4 + 0]) << 24) | (@as(u32, b[r.i * 4 + 1]) << 16) | (@as(u32, b[r.i * 4 + 2]) << 8) | (@as(u32, b[r.i * 4 + 3]) << 0);
|
||||
s[r.i] = mem.readIntBig(u32, b[r.i * 4 ..][0..4]);
|
||||
|
||||
v[r.e] = v[r.e] +% math.rotl(u32, v[r.a], @as(u32, 5)) +% 0x5A827999 +% s[r.i & 0xf] +% ((v[r.b] & v[r.c]) | (~v[r.b] & v[r.d]));
|
||||
v[r.b] = math.rotl(u32, v[r.b], @as(u32, 30));
|
||||
|
||||
@ -678,15 +678,7 @@ fn Sha2x64(comptime params: Sha2Params64) type {
|
||||
|
||||
var i: usize = 0;
|
||||
while (i < 16) : (i += 1) {
|
||||
s[i] = 0;
|
||||
s[i] |= @as(u64, b[i * 8 + 0]) << 56;
|
||||
s[i] |= @as(u64, b[i * 8 + 1]) << 48;
|
||||
s[i] |= @as(u64, b[i * 8 + 2]) << 40;
|
||||
s[i] |= @as(u64, b[i * 8 + 3]) << 32;
|
||||
s[i] |= @as(u64, b[i * 8 + 4]) << 24;
|
||||
s[i] |= @as(u64, b[i * 8 + 5]) << 16;
|
||||
s[i] |= @as(u64, b[i * 8 + 6]) << 8;
|
||||
s[i] |= @as(u64, b[i * 8 + 7]) << 0;
|
||||
s[i] = mem.readIntBig(u64, b[i * 8 ..][0..8]);
|
||||
}
|
||||
while (i < 80) : (i += 1) {
|
||||
s[i] = s[i - 16] +% s[i - 7] +%
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user