summaryrefslogtreecommitdiff
path: root/crypto/aegis128-neon-inner.c
diff options
context:
space:
mode:
authorArd Biesheuvel <ard.biesheuvel@linaro.org>2019-10-14 18:16:45 +0200
committerHerbert Xu <herbert@gondor.apana.org.au>2019-10-26 02:06:05 +1100
commit3d6fa83591686877180c3b32c976d54fa7bf3525 (patch)
treecf766a20dad226d2ed4abb5a668ad0fc833031e4 /crypto/aegis128-neon-inner.c
parent4fcc8127e025f266de29eaea47fa29457d42fe07 (diff)
downloadlinux-crypto-3d6fa83591686877180c3b32c976d54fa7bf3525.tar.gz
linux-crypto-3d6fa83591686877180c3b32c976d54fa7bf3525.zip
crypto: aegis128 - duplicate init() and final() hooks in SIMD code
In order to speed up aegis128 processing even more, duplicate the init() and final() routines as SIMD versions in their entirety. This results in a 2x speedup on ARM Cortex-A57 for ~1500 byte packets (using AES instructions). Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Diffstat (limited to 'crypto/aegis128-neon-inner.c')
-rw-r--r--crypto/aegis128-neon-inner.c50
1 files changed, 50 insertions, 0 deletions
diff --git a/crypto/aegis128-neon-inner.c b/crypto/aegis128-neon-inner.c
index f05310ca..2a660ac1 100644
--- a/crypto/aegis128-neon-inner.c
+++ b/crypto/aegis128-neon-inner.c
@@ -132,6 +132,36 @@ void preload_sbox(void)
:: "r"(crypto_aes_sbox));
}
+void crypto_aegis128_init_neon(void *state, const void *key, const void *iv)
+{
+ static const uint8_t const0[] = {
+ 0x00, 0x01, 0x01, 0x02, 0x03, 0x05, 0x08, 0x0d,
+ 0x15, 0x22, 0x37, 0x59, 0x90, 0xe9, 0x79, 0x62,
+ };
+ static const uint8_t const1[] = {
+ 0xdb, 0x3d, 0x18, 0x55, 0x6d, 0xc2, 0x2f, 0xf1,
+ 0x20, 0x11, 0x31, 0x42, 0x73, 0xb5, 0x28, 0xdd,
+ };
+ uint8x16_t k = vld1q_u8(key);
+ uint8x16_t kiv = k ^ vld1q_u8(iv);
+ struct aegis128_state st = {{
+ kiv,
+ vld1q_u8(const1),
+ vld1q_u8(const0),
+ k ^ vld1q_u8(const0),
+ k ^ vld1q_u8(const1),
+ }};
+ int i;
+
+ preload_sbox();
+
+ for (i = 0; i < 5; i++) {
+ st = aegis128_update_neon(st, k);
+ st = aegis128_update_neon(st, kiv);
+ }
+ aegis128_save_state_neon(st, state);
+}
+
void crypto_aegis128_update_neon(void *state, const void *msg)
{
struct aegis128_state st = aegis128_load_state_neon(state);
@@ -210,3 +240,23 @@ void crypto_aegis128_decrypt_chunk_neon(void *state, void *dst, const void *src,
aegis128_save_state_neon(st, state);
}
+
+void crypto_aegis128_final_neon(void *state, void *tag_xor, uint64_t assoclen,
+ uint64_t cryptlen)
+{
+ struct aegis128_state st = aegis128_load_state_neon(state);
+ uint8x16_t v;
+ int i;
+
+ preload_sbox();
+
+ v = st.v[3] ^ (uint8x16_t)vcombine_u64(vmov_n_u64(8 * assoclen),
+ vmov_n_u64(8 * cryptlen));
+
+ for (i = 0; i < 7; i++)
+ st = aegis128_update_neon(st, v);
+
+ v = vld1q_u8(tag_xor);
+ v ^= st.v[0] ^ st.v[1] ^ st.v[2] ^ st.v[3] ^ st.v[4];
+ vst1q_u8(tag_xor, v);
+}