crypto.aescrypto.encrypt: do not add the round key in an asm block (#14899)

Apple M1/M2 have an EOR3 instruction that can XOR 2 operands with
another one, and LLVM knows how to take advantage of it.

However, two EOR can't be automatically combined into an EOR3 if
one of them is in an assembly block.

That simple change speeds up ciphers doing an AES round immediately
followed by a XOR operation on Apple Silicon.

Before:

   aegis-128l mac:      12534 MiB/s
    aegis-256 mac:       6722 MiB/s
       aegis-128l:      10634 MiB/s
        aegis-256:       6133 MiB/s
       aes128-gcm:       3890 MiB/s
       aes256-gcm:       3122 MiB/s
       aes128-ocb:       2832 MiB/s
       aes256-ocb:       2057 MiB/s

After:

   aegis-128l mac:      15667 MiB/s
    aegis-256 mac:       8240 MiB/s
       aegis-128l:      12656 MiB/s
        aegis-256:       7214 MiB/s
       aes128-gcm:       3976 MiB/s
       aes256-gcm:       3202 MiB/s
       aes128-ocb:       2835 MiB/s
       aes256-ocb:       2118 MiB/s
This commit is contained in:
Frank Denis 2023-03-13 08:06:27 +01:00 committed by GitHub
parent 10c74631b3
commit 1d96a17af4
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -32,62 +32,54 @@ pub const Block = struct {
/// Encrypt a block with a round key.
pub inline fn encrypt(block: Block, round_key: Block) Block {
return Block{
.repr = asm (
.repr = (asm (
\\ mov %[out].16b, %[in].16b
\\ aese %[out].16b, %[zero].16b
\\ aesmc %[out].16b, %[out].16b
\\ eor %[out].16b, %[out].16b, %[rk].16b
: [out] "=&x" (-> BlockVec),
: [in] "x" (block.repr),
[rk] "x" (round_key.repr),
[zero] "x" (zero),
),
)) ^ round_key.repr,
};
}
/// Encrypt a block with the last round key.
pub inline fn encryptLast(block: Block, round_key: Block) Block {
return Block{
.repr = asm (
.repr = (asm (
\\ mov %[out].16b, %[in].16b
\\ aese %[out].16b, %[zero].16b
\\ eor %[out].16b, %[out].16b, %[rk].16b
: [out] "=&x" (-> BlockVec),
: [in] "x" (block.repr),
[rk] "x" (round_key.repr),
[zero] "x" (zero),
),
)) ^ round_key.repr,
};
}
/// Decrypt a block with a round key.
pub inline fn decrypt(block: Block, inv_round_key: Block) Block {
return Block{
.repr = asm (
.repr = (asm (
\\ mov %[out].16b, %[in].16b
\\ aesd %[out].16b, %[zero].16b
\\ aesimc %[out].16b, %[out].16b
\\ eor %[out].16b, %[out].16b, %[rk].16b
: [out] "=&x" (-> BlockVec),
: [in] "x" (block.repr),
[rk] "x" (inv_round_key.repr),
[zero] "x" (zero),
),
)) ^ inv_round_key.repr,
};
}
/// Decrypt a block with the last round key.
pub inline fn decryptLast(block: Block, inv_round_key: Block) Block {
return Block{
.repr = asm (
.repr = (asm (
\\ mov %[out].16b, %[in].16b
\\ aesd %[out].16b, %[zero].16b
\\ eor %[out].16b, %[out].16b, %[rk].16b
: [out] "=&x" (-> BlockVec),
: [in] "x" (block.repr),
[rk] "x" (inv_round_key.repr),
[zero] "x" (zero),
),
)) ^ inv_round_key.repr,
};
}