From 868a46eb43e68971634c046c8317c1b83cae21ae Mon Sep 17 00:00:00 2001 From: Frank Denis Date: Mon, 28 Sep 2020 23:23:32 +0200 Subject: [PATCH 1/4] std/crypto: make gimli slightly faster Before: gimli-hash: 120 MiB/s gimli-aead: 130 MiB/s After: gimli-hash: 195 MiB/s gimli-aead: 208 MiB/s Also fixes in-place decryption by the way. If the input & output buffers were the same, decryption used to fail. Return on decryption error in the benchmark to detect similar issues in future AEADs even in non release-fast mode. --- lib/std/crypto/benchmark.zig | 2 +- lib/std/crypto/gimli.zig | 34 +++++++++++++++++++++------------- 2 files changed, 22 insertions(+), 14 deletions(-) diff --git a/lib/std/crypto/benchmark.zig b/lib/std/crypto/benchmark.zig index 860f1269f0..4397f7312a 100644 --- a/lib/std/crypto/benchmark.zig +++ b/lib/std/crypto/benchmark.zig @@ -168,7 +168,7 @@ pub fn benchmarkAead(comptime Aead: anytype, comptime bytes: comptime_int) !u64 const start = timer.lap(); while (offset < bytes) : (offset += in.len) { Aead.encrypt(in[0..], tag[0..], in[0..], &[_]u8{}, nonce, key); - Aead.decrypt(in[0..], in[0..], tag, &[_]u8{}, nonce, key) catch unreachable; + try Aead.decrypt(in[0..], in[0..], tag, &[_]u8{}, nonce, key); } mem.doNotOptimizeAway(&in); const end = timer.read(); diff --git a/lib/std/crypto/gimli.zig b/lib/std/crypto/gimli.zig index 5b572aad7d..e5f93f5833 100644 --- a/lib/std/crypto/gimli.zig +++ b/lib/std/crypto/gimli.zig @@ -40,8 +40,8 @@ pub const State = struct { pub fn permute(self: *Self) void { const state = &self.data; - var round = @as(u32, 24); - while (round > 0) : (round -= 1) { + comptime var round = @as(u32, 24); + inline while (round > 0) : (round -= 1) { var column = @as(usize, 0); while (column < 4) : (column += 1) { const x = math.rotl(u32, state[column], 24); @@ -249,15 +249,19 @@ pub const Aead = struct { in = in[State.RATE..]; out = out[State.RATE..]; }) { - for (buf[0..State.RATE]) |*p, i| { - p.* ^= in[i]; - out[i] = p.*; + const d = in[0..State.RATE]; + for (d) |v, i| { + buf[i] ^= v; + } + for (d) |_, i| { + out[i] = buf[i]; } state.permute(); } - for (buf[0..in.len]) |*p, i| { - p.* ^= in[i]; - out[i] = p.*; + const d = in[0..]; + for (d) |v, i| { + buf[i] ^= v; + out[i] = buf[i]; } // XOR 1 into the next byte of the state @@ -291,15 +295,19 @@ pub const Aead = struct { in = in[State.RATE..]; out = out[State.RATE..]; }) { - for (buf[0..State.RATE]) |*p, i| { - out[i] = p.* ^ in[i]; - p.* = in[i]; + const d = in[0..State.RATE].*; + for (d) |v, i| { + out[i] = buf[i] ^ v; + } + for (d) |v, i| { + buf[i] = v; } state.permute(); } for (buf[0..in.len]) |*p, i| { - out[i] = p.* ^ in[i]; - p.* = in[i]; + const d = in[i]; + out[i] = p.* ^ d; + p.* = d; } // XOR 1 into the next byte of the state From 613f8fe83fc2db4bc39f18ad1a8190d33a4a1181 Mon Sep 17 00:00:00 2001 From: Frank Denis Date: Tue, 29 Sep 2020 00:41:37 +0200 Subject: [PATCH 2/4] Use mem.copy() instead of manual iterations --- lib/std/crypto/gimli.zig | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/lib/std/crypto/gimli.zig b/lib/std/crypto/gimli.zig index e5f93f5833..181aa1ed53 100644 --- a/lib/std/crypto/gimli.zig +++ b/lib/std/crypto/gimli.zig @@ -249,17 +249,13 @@ pub const Aead = struct { in = in[State.RATE..]; out = out[State.RATE..]; }) { - const d = in[0..State.RATE]; - for (d) |v, i| { + for (in[0..State.RATE]) |v, i| { buf[i] ^= v; } - for (d) |_, i| { - out[i] = buf[i]; - } + mem.copy(u8, out[0..State.RATE], buf[0..State.RATE]); state.permute(); } - const d = in[0..]; - for (d) |v, i| { + for (in[0..]) |v, i| { buf[i] ^= v; out[i] = buf[i]; } @@ -299,9 +295,7 @@ pub const Aead = struct { for (d) |v, i| { out[i] = buf[i] ^ v; } - for (d) |v, i| { - buf[i] = v; - } + mem.copy(u8, buf[0..State.RATE], d[0..State.RATE]); state.permute(); } for (buf[0..in.len]) |*p, i| { From 4194714965a8080e6faa87d9859cc90aab07fe54 Mon Sep 17 00:00:00 2001 From: Frank Denis Date: Tue, 29 Sep 2020 13:09:11 +0200 Subject: [PATCH 3/4] Don't unroll the gimli permutation on release-small --- lib/std/crypto/gimli.zig | 38 +++++++++++++++++++++++++++++++++++++- 1 file changed, 37 insertions(+), 1 deletion(-) diff --git a/lib/std/crypto/gimli.zig b/lib/std/crypto/gimli.zig index 181aa1ed53..847562e395 100644 --- a/lib/std/crypto/gimli.zig +++ b/lib/std/crypto/gimli.zig @@ -38,7 +38,7 @@ pub const State = struct { return mem.sliceAsBytes(self.data[0..]); } - pub fn permute(self: *Self) void { + fn _permute_unrolled(self: *Self) void { const state = &self.data; comptime var round = @as(u32, 24); inline while (round > 0) : (round -= 1) { @@ -66,6 +66,42 @@ pub const State = struct { } } + fn _permute_small(self: *Self) void { + const state = &self.data; + var round = @as(u32, 24); + while (round > 0) : (round -= 1) { + var column = @as(usize, 0); + while (column < 4) : (column += 1) { + const x = math.rotl(u32, state[column], 24); + const y = math.rotl(u32, state[4 + column], 9); + const z = state[8 + column]; + state[8 + column] = ((x ^ (z << 1)) ^ ((y & z) << 2)); + state[4 + column] = ((y ^ x) ^ ((x | z) << 1)); + state[column] = ((z ^ y) ^ ((x & y) << 3)); + } + switch (round & 3) { + 0 => { + mem.swap(u32, &state[0], &state[1]); + mem.swap(u32, &state[2], &state[3]); + state[0] ^= round | 0x9e377900; + }, + 2 => { + mem.swap(u32, &state[0], &state[2]); + mem.swap(u32, &state[1], &state[3]); + }, + else => {}, + } + } + } + + pub fn permute(self: *Self) void { + if (std.builtin.mode == .ReleaseSmall) { + self._permute_small(); + } else { + self._permute_unrolled(); + } + } + pub fn squeeze(self: *Self, out: []u8) void { var i = @as(usize, 0); while (i + RATE <= out.len) : (i += RATE) { From 56d820087d712c3b3e93e8aeed8d556509050479 Mon Sep 17 00:00:00 2001 From: Frank Denis Date: Tue, 29 Sep 2020 14:01:08 +0200 Subject: [PATCH 4/4] gimli: make permute a constant, remove leading underscore --- lib/std/crypto/gimli.zig | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/lib/std/crypto/gimli.zig b/lib/std/crypto/gimli.zig index 847562e395..10e8a7dff0 100644 --- a/lib/std/crypto/gimli.zig +++ b/lib/std/crypto/gimli.zig @@ -38,7 +38,7 @@ pub const State = struct { return mem.sliceAsBytes(self.data[0..]); } - fn _permute_unrolled(self: *Self) void { + fn permute_unrolled(self: *Self) void { const state = &self.data; comptime var round = @as(u32, 24); inline while (round > 0) : (round -= 1) { @@ -66,7 +66,7 @@ pub const State = struct { } } - fn _permute_small(self: *Self) void { + fn permute_small(self: *Self) void { const state = &self.data; var round = @as(u32, 24); while (round > 0) : (round -= 1) { @@ -94,13 +94,7 @@ pub const State = struct { } } - pub fn permute(self: *Self) void { - if (std.builtin.mode == .ReleaseSmall) { - self._permute_small(); - } else { - self._permute_unrolled(); - } - } + pub const permute = if (std.builtin.mode == .ReleaseSmall) permute_small else permute_unrolled; pub fn squeeze(self: *Self, out: []u8) void { var i = @as(usize, 0);