summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorArd Biesheuvel <ard.biesheuvel@linaro.org>2019-10-14 18:16:45 +0200
committerHerbert Xu <herbert@gondor.apana.org.au>2019-10-26 02:06:05 +1100
commit3d6fa83591686877180c3b32c976d54fa7bf3525 (patch)
treecf766a20dad226d2ed4abb5a668ad0fc833031e4
parent4fcc8127e025f266de29eaea47fa29457d42fe07 (diff)
downloadlinux-crypto-3d6fa83591686877180c3b32c976d54fa7bf3525.tar.gz
linux-crypto-3d6fa83591686877180c3b32c976d54fa7bf3525.zip
crypto: aegis128 - duplicate init() and final() hooks in SIMD code
In order to speed up aegis128 processing even more, duplicate the init() and final() routines as SIMD versions in their entirety. This results in a 2x speedup on ARM Cortex-A57 for ~1500 byte packets (using AES instructions). Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
-rw-r--r--crypto/aegis128-core.c38
-rw-r--r--crypto/aegis128-neon-inner.c50
-rw-r--r--crypto/aegis128-neon.c21
3 files changed, 97 insertions, 12 deletions
diff --git a/crypto/aegis128-core.c b/crypto/aegis128-core.c
index fe7ab66d..71c11cb5 100644
--- a/crypto/aegis128-core.c
+++ b/crypto/aegis128-core.c
@@ -60,10 +60,16 @@ static bool aegis128_do_simd(void)
bool crypto_aegis128_have_simd(void);
void crypto_aegis128_update_simd(struct aegis_state *state, const void *msg);
+void crypto_aegis128_init_simd(struct aegis_state *state,
+ const union aegis_block *key,
+ const u8 *iv);
void crypto_aegis128_encrypt_chunk_simd(struct aegis_state *state, u8 *dst,
const u8 *src, unsigned int size);
void crypto_aegis128_decrypt_chunk_simd(struct aegis_state *state, u8 *dst,
const u8 *src, unsigned int size);
+void crypto_aegis128_final_simd(struct aegis_state *state,
+ union aegis_block *tag_xor,
+ u64 assoclen, u64 cryptlen);
static void crypto_aegis128_update(struct aegis_state *state)
{
@@ -395,17 +401,21 @@ static int crypto_aegis128_encrypt(struct aead_request *req)
struct skcipher_walk walk;
struct aegis_state state;
- crypto_aegis128_init(&state, &ctx->key, req->iv);
- crypto_aegis128_process_ad(&state, req->src, req->assoclen);
-
skcipher_walk_aead_encrypt(&walk, req, false);
- if (aegis128_do_simd())
+ if (aegis128_do_simd()) {
+ crypto_aegis128_init_simd(&state, &ctx->key, req->iv);
+ crypto_aegis128_process_ad(&state, req->src, req->assoclen);
crypto_aegis128_process_crypt(&state, req, &walk,
crypto_aegis128_encrypt_chunk_simd);
- else
+ crypto_aegis128_final_simd(&state, &tag, req->assoclen,
+ cryptlen);
+ } else {
+ crypto_aegis128_init(&state, &ctx->key, req->iv);
+ crypto_aegis128_process_ad(&state, req->src, req->assoclen);
crypto_aegis128_process_crypt(&state, req, &walk,
crypto_aegis128_encrypt_chunk);
- crypto_aegis128_final(&state, &tag, req->assoclen, cryptlen);
+ crypto_aegis128_final(&state, &tag, req->assoclen, cryptlen);
+ }
scatterwalk_map_and_copy(tag.bytes, req->dst, req->assoclen + cryptlen,
authsize, 1);
@@ -426,17 +436,21 @@ static int crypto_aegis128_decrypt(struct aead_request *req)
scatterwalk_map_and_copy(tag.bytes, req->src, req->assoclen + cryptlen,
authsize, 0);
- crypto_aegis128_init(&state, &ctx->key, req->iv);
- crypto_aegis128_process_ad(&state, req->src, req->assoclen);
-
skcipher_walk_aead_decrypt(&walk, req, false);
- if (aegis128_do_simd())
+ if (aegis128_do_simd()) {
+ crypto_aegis128_init_simd(&state, &ctx->key, req->iv);
+ crypto_aegis128_process_ad(&state, req->src, req->assoclen);
crypto_aegis128_process_crypt(&state, req, &walk,
crypto_aegis128_decrypt_chunk_simd);
- else
+ crypto_aegis128_final_simd(&state, &tag, req->assoclen,
+ cryptlen);
+ } else {
+ crypto_aegis128_init(&state, &ctx->key, req->iv);
+ crypto_aegis128_process_ad(&state, req->src, req->assoclen);
crypto_aegis128_process_crypt(&state, req, &walk,
crypto_aegis128_decrypt_chunk);
- crypto_aegis128_final(&state, &tag, req->assoclen, cryptlen);
+ crypto_aegis128_final(&state, &tag, req->assoclen, cryptlen);
+ }
return crypto_memneq(tag.bytes, zeros, authsize) ? -EBADMSG : 0;
}
diff --git a/crypto/aegis128-neon-inner.c b/crypto/aegis128-neon-inner.c
index f05310ca..2a660ac1 100644
--- a/crypto/aegis128-neon-inner.c
+++ b/crypto/aegis128-neon-inner.c
@@ -132,6 +132,36 @@ void preload_sbox(void)
:: "r"(crypto_aes_sbox));
}
+void crypto_aegis128_init_neon(void *state, const void *key, const void *iv)
+{
+ static const uint8_t const0[] = {
+ 0x00, 0x01, 0x01, 0x02, 0x03, 0x05, 0x08, 0x0d,
+ 0x15, 0x22, 0x37, 0x59, 0x90, 0xe9, 0x79, 0x62,
+ };
+ static const uint8_t const1[] = {
+ 0xdb, 0x3d, 0x18, 0x55, 0x6d, 0xc2, 0x2f, 0xf1,
+ 0x20, 0x11, 0x31, 0x42, 0x73, 0xb5, 0x28, 0xdd,
+ };
+ uint8x16_t k = vld1q_u8(key);
+ uint8x16_t kiv = k ^ vld1q_u8(iv);
+ struct aegis128_state st = {{
+ kiv,
+ vld1q_u8(const1),
+ vld1q_u8(const0),
+ k ^ vld1q_u8(const0),
+ k ^ vld1q_u8(const1),
+ }};
+ int i;
+
+ preload_sbox();
+
+ for (i = 0; i < 5; i++) {
+ st = aegis128_update_neon(st, k);
+ st = aegis128_update_neon(st, kiv);
+ }
+ aegis128_save_state_neon(st, state);
+}
+
void crypto_aegis128_update_neon(void *state, const void *msg)
{
struct aegis128_state st = aegis128_load_state_neon(state);
@@ -210,3 +240,23 @@ void crypto_aegis128_decrypt_chunk_neon(void *state, void *dst, const void *src,
aegis128_save_state_neon(st, state);
}
+
+void crypto_aegis128_final_neon(void *state, void *tag_xor, uint64_t assoclen,
+ uint64_t cryptlen)
+{
+ struct aegis128_state st = aegis128_load_state_neon(state);
+ uint8x16_t v;
+ int i;
+
+ preload_sbox();
+
+ v = st.v[3] ^ (uint8x16_t)vcombine_u64(vmov_n_u64(8 * assoclen),
+ vmov_n_u64(8 * cryptlen));
+
+ for (i = 0; i < 7; i++)
+ st = aegis128_update_neon(st, v);
+
+ v = vld1q_u8(tag_xor);
+ v ^= st.v[0] ^ st.v[1] ^ st.v[2] ^ st.v[3] ^ st.v[4];
+ vst1q_u8(tag_xor, v);
+}
diff --git a/crypto/aegis128-neon.c b/crypto/aegis128-neon.c
index 751f9c19..8271b1fa 100644
--- a/crypto/aegis128-neon.c
+++ b/crypto/aegis128-neon.c
@@ -8,11 +8,14 @@
#include "aegis.h"
+void crypto_aegis128_init_neon(void *state, const void *key, const void *iv);
void crypto_aegis128_update_neon(void *state, const void *msg);
void crypto_aegis128_encrypt_chunk_neon(void *state, void *dst, const void *src,
unsigned int size);
void crypto_aegis128_decrypt_chunk_neon(void *state, void *dst, const void *src,
unsigned int size);
+void crypto_aegis128_final_neon(void *state, void *tag_xor, uint64_t assoclen,
+ uint64_t cryptlen);
int aegis128_have_aes_insn __ro_after_init;
@@ -25,6 +28,15 @@ bool crypto_aegis128_have_simd(void)
return IS_ENABLED(CONFIG_ARM64);
}
+void crypto_aegis128_init_simd(union aegis_block *state,
+ const union aegis_block *key,
+ const u8 *iv)
+{
+ kernel_neon_begin();
+ crypto_aegis128_init_neon(state, key, iv);
+ kernel_neon_end();
+}
+
void crypto_aegis128_update_simd(union aegis_block *state, const void *msg)
{
kernel_neon_begin();
@@ -47,3 +59,12 @@ void crypto_aegis128_decrypt_chunk_simd(union aegis_block *state, u8 *dst,
crypto_aegis128_decrypt_chunk_neon(state, dst, src, size);
kernel_neon_end();
}
+
+void crypto_aegis128_final_simd(union aegis_block *state,
+ union aegis_block *tag_xor,
+ u64 assoclen, u64 cryptlen)
+{
+ kernel_neon_begin();
+ crypto_aegis128_final_neon(state, tag_xor, assoclen, cryptlen);
+ kernel_neon_end();
+}