From 50a0dceaa2634cd5a2a645ae0714da22fbbb2358 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Fri, 19 Jan 2018 12:04:33 +0000 Subject: crypto: sha3-generic - fixes for alignment and big endian operation Ensure that the input is byte swabbed before injecting it into the SHA3 transform. Use the get_unaligned() accessor for this so that we don't perform unaligned access inadvertently on architectures that do not support that. Cc: Fixes: dc4b27a1bc222c3b ("crypto: sha3 - Add SHA-3 hash algorithm") Signed-off-by: Ard Biesheuvel Signed-off-by: Herbert Xu --- crypto/sha3_generic.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'crypto/sha3_generic.c') diff --git a/crypto/sha3_generic.c b/crypto/sha3_generic.c index 7e8ed962..a68be626 100644 --- a/crypto/sha3_generic.c +++ b/crypto/sha3_generic.c @@ -18,6 +18,7 @@ #include #include #include +#include #define KECCAK_ROUNDS 24 @@ -149,7 +150,7 @@ static int sha3_update(struct shash_desc *desc, const u8 *data, unsigned int i; for (i = 0; i < sctx->rsizw; i++) - sctx->st[i] ^= ((u64 *) src)[i]; + sctx->st[i] ^= get_unaligned_le64(src + 8 * i); keccakf(sctx->st); done += sctx->rsiz; @@ -174,7 +175,7 @@ static int sha3_final(struct shash_desc *desc, u8 *out) sctx->buf[sctx->rsiz - 1] |= 0x80; for (i = 0; i < sctx->rsizw; i++) - sctx->st[i] ^= ((u64 *) sctx->buf)[i]; + sctx->st[i] ^= get_unaligned_le64(sctx->buf + 8 * i); keccakf(sctx->st); -- cgit v1.2.3 From 116121f60112aefdcda040aae3f8677e778e7303 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Fri, 19 Jan 2018 12:04:34 +0000 Subject: crypto: sha3-generic - rewrite KECCAK transform to help the compiler optimize The way the KECCAK transform is currently coded involves many references into the state array using indexes that are calculated at runtime using simple but non-trivial arithmetic. This forces the compiler to treat the state matrix as an array in memory rather than keep it in registers, which results in poor performance. So instead, let's rephrase the algorithm using fixed array indexes only. This helps the compiler keep the state matrix in registers, resulting in the following speedup (SHA3-256 performance in cycles per byte): before after speedup Intel Core i7 @ 2.0 GHz (2.9 turbo) 100.6 35.7 2.8x Cortex-A57 @ 2.0 GHz (64-bit mode) 101.6 12.7 8.0x Cortex-A53 @ 1.0 GHz 224.4 15.8 14.2x Cortex-A57 @ 2.0 GHz (32-bit mode) 201.8 63.0 3.2x Signed-off-by: Ard Biesheuvel Signed-off-by: Herbert Xu --- crypto/sha3_generic.c | 134 ++++++++++++++++++++++++++++++++++++-------------- 1 file changed, 96 insertions(+), 38 deletions(-) (limited to 'crypto/sha3_generic.c') diff --git a/crypto/sha3_generic.c b/crypto/sha3_generic.c index a68be626..5fecb609 100644 --- a/crypto/sha3_generic.c +++ b/crypto/sha3_generic.c @@ -5,6 +5,7 @@ * http://nvlpubs.nist.gov/nistpubs/FIPS/NIST.FIPS.202.pdf * * SHA-3 code by Jeff Garzik + * Ard Biesheuvel * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the Free @@ -22,8 +23,6 @@ #define KECCAK_ROUNDS 24 -#define ROTL64(x, y) (((x) << (y)) | ((x) >> (64 - (y)))) - static const u64 keccakf_rndc[24] = { 0x0000000000000001ULL, 0x0000000000008082ULL, 0x800000000000808aULL, 0x8000000080008000ULL, 0x000000000000808bULL, 0x0000000080000001ULL, @@ -35,53 +34,112 @@ static const u64 keccakf_rndc[24] = { 0x8000000000008080ULL, 0x0000000080000001ULL, 0x8000000080008008ULL }; -static const int keccakf_rotc[24] = { - 1, 3, 6, 10, 15, 21, 28, 36, 45, 55, 2, 14, - 27, 41, 56, 8, 25, 43, 62, 18, 39, 61, 20, 44 -}; - -static const int keccakf_piln[24] = { - 10, 7, 11, 17, 18, 3, 5, 16, 8, 21, 24, 4, - 15, 23, 19, 13, 12, 2, 20, 14, 22, 9, 6, 1 -}; - /* update the state with given number of rounds */ -static void keccakf(u64 st[25]) +static void __attribute__((__optimize__("O3"))) keccakf(u64 st[25]) { - int i, j, round; - u64 t, bc[5]; + u64 t[5], tt, bc[5]; + int round; for (round = 0; round < KECCAK_ROUNDS; round++) { /* Theta */ - for (i = 0; i < 5; i++) - bc[i] = st[i] ^ st[i + 5] ^ st[i + 10] ^ st[i + 15] - ^ st[i + 20]; - - for (i = 0; i < 5; i++) { - t = bc[(i + 4) % 5] ^ ROTL64(bc[(i + 1) % 5], 1); - for (j = 0; j < 25; j += 5) - st[j + i] ^= t; - } + bc[0] = st[0] ^ st[5] ^ st[10] ^ st[15] ^ st[20]; + bc[1] = st[1] ^ st[6] ^ st[11] ^ st[16] ^ st[21]; + bc[2] = st[2] ^ st[7] ^ st[12] ^ st[17] ^ st[22]; + bc[3] = st[3] ^ st[8] ^ st[13] ^ st[18] ^ st[23]; + bc[4] = st[4] ^ st[9] ^ st[14] ^ st[19] ^ st[24]; + + t[0] = bc[4] ^ rol64(bc[1], 1); + t[1] = bc[0] ^ rol64(bc[2], 1); + t[2] = bc[1] ^ rol64(bc[3], 1); + t[3] = bc[2] ^ rol64(bc[4], 1); + t[4] = bc[3] ^ rol64(bc[0], 1); + + st[0] ^= t[0]; /* Rho Pi */ - t = st[1]; - for (i = 0; i < 24; i++) { - j = keccakf_piln[i]; - bc[0] = st[j]; - st[j] = ROTL64(t, keccakf_rotc[i]); - t = bc[0]; - } + tt = st[1]; + st[ 1] = rol64(st[ 6] ^ t[1], 44); + st[ 6] = rol64(st[ 9] ^ t[4], 20); + st[ 9] = rol64(st[22] ^ t[2], 61); + st[22] = rol64(st[14] ^ t[4], 39); + st[14] = rol64(st[20] ^ t[0], 18); + st[20] = rol64(st[ 2] ^ t[2], 62); + st[ 2] = rol64(st[12] ^ t[2], 43); + st[12] = rol64(st[13] ^ t[3], 25); + st[13] = rol64(st[19] ^ t[4], 8); + st[19] = rol64(st[23] ^ t[3], 56); + st[23] = rol64(st[15] ^ t[0], 41); + st[15] = rol64(st[ 4] ^ t[4], 27); + st[ 4] = rol64(st[24] ^ t[4], 14); + st[24] = rol64(st[21] ^ t[1], 2); + st[21] = rol64(st[ 8] ^ t[3], 55); + st[ 8] = rol64(st[16] ^ t[1], 45); + st[16] = rol64(st[ 5] ^ t[0], 36); + st[ 5] = rol64(st[ 3] ^ t[3], 28); + st[ 3] = rol64(st[18] ^ t[3], 21); + st[18] = rol64(st[17] ^ t[2], 15); + st[17] = rol64(st[11] ^ t[1], 10); + st[11] = rol64(st[ 7] ^ t[2], 6); + st[ 7] = rol64(st[10] ^ t[0], 3); + st[10] = rol64( tt ^ t[1], 1); /* Chi */ - for (j = 0; j < 25; j += 5) { - for (i = 0; i < 5; i++) - bc[i] = st[j + i]; - for (i = 0; i < 5; i++) - st[j + i] ^= (~bc[(i + 1) % 5]) & - bc[(i + 2) % 5]; - } + bc[ 0] = ~st[ 1] & st[ 2]; + bc[ 1] = ~st[ 2] & st[ 3]; + bc[ 2] = ~st[ 3] & st[ 4]; + bc[ 3] = ~st[ 4] & st[ 0]; + bc[ 4] = ~st[ 0] & st[ 1]; + st[ 0] ^= bc[ 0]; + st[ 1] ^= bc[ 1]; + st[ 2] ^= bc[ 2]; + st[ 3] ^= bc[ 3]; + st[ 4] ^= bc[ 4]; + + bc[ 0] = ~st[ 6] & st[ 7]; + bc[ 1] = ~st[ 7] & st[ 8]; + bc[ 2] = ~st[ 8] & st[ 9]; + bc[ 3] = ~st[ 9] & st[ 5]; + bc[ 4] = ~st[ 5] & st[ 6]; + st[ 5] ^= bc[ 0]; + st[ 6] ^= bc[ 1]; + st[ 7] ^= bc[ 2]; + st[ 8] ^= bc[ 3]; + st[ 9] ^= bc[ 4]; + + bc[ 0] = ~st[11] & st[12]; + bc[ 1] = ~st[12] & st[13]; + bc[ 2] = ~st[13] & st[14]; + bc[ 3] = ~st[14] & st[10]; + bc[ 4] = ~st[10] & st[11]; + st[10] ^= bc[ 0]; + st[11] ^= bc[ 1]; + st[12] ^= bc[ 2]; + st[13] ^= bc[ 3]; + st[14] ^= bc[ 4]; + + bc[ 0] = ~st[16] & st[17]; + bc[ 1] = ~st[17] & st[18]; + bc[ 2] = ~st[18] & st[19]; + bc[ 3] = ~st[19] & st[15]; + bc[ 4] = ~st[15] & st[16]; + st[15] ^= bc[ 0]; + st[16] ^= bc[ 1]; + st[17] ^= bc[ 2]; + st[18] ^= bc[ 3]; + st[19] ^= bc[ 4]; + + bc[ 0] = ~st[21] & st[22]; + bc[ 1] = ~st[22] & st[23]; + bc[ 2] = ~st[23] & st[24]; + bc[ 3] = ~st[24] & st[20]; + bc[ 4] = ~st[20] & st[21]; + st[20] ^= bc[ 0]; + st[21] ^= bc[ 1]; + st[22] ^= bc[ 2]; + st[23] ^= bc[ 3]; + st[24] ^= bc[ 4]; /* Iota */ st[0] ^= keccakf_rndc[round]; -- cgit v1.2.3 From 9fb5b09b8d34b49c67c05eab0d3d92e806c03b24 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Fri, 19 Jan 2018 12:04:35 +0000 Subject: crypto: sha3-generic - simplify code In preparation of exposing the generic SHA3 implementation to other versions as a fallback, simplify the code, and remove an inconsistency in the output handling (endian swabbing rsizw words of state before writing the output does not make sense) Signed-off-by: Ard Biesheuvel Signed-off-by: Herbert Xu --- crypto/sha3_generic.c | 184 ++++++++++++++++---------------------------------- 1 file changed, 59 insertions(+), 125 deletions(-) (limited to 'crypto/sha3_generic.c') diff --git a/crypto/sha3_generic.c b/crypto/sha3_generic.c index 5fecb609..c7084a24 100644 --- a/crypto/sha3_generic.c +++ b/crypto/sha3_generic.c @@ -18,7 +18,6 @@ #include #include #include -#include #include #define KECCAK_ROUNDS 24 @@ -146,43 +145,16 @@ static void __attribute__((__optimize__("O3"))) keccakf(u64 st[25]) } } -static void sha3_init(struct sha3_state *sctx, unsigned int digest_sz) -{ - memset(sctx, 0, sizeof(*sctx)); - sctx->md_len = digest_sz; - sctx->rsiz = 200 - 2 * digest_sz; - sctx->rsizw = sctx->rsiz / 8; -} - -static int sha3_224_init(struct shash_desc *desc) +static int sha3_init(struct shash_desc *desc) { struct sha3_state *sctx = shash_desc_ctx(desc); + unsigned int digest_size = crypto_shash_digestsize(desc->tfm); - sha3_init(sctx, SHA3_224_DIGEST_SIZE); - return 0; -} - -static int sha3_256_init(struct shash_desc *desc) -{ - struct sha3_state *sctx = shash_desc_ctx(desc); - - sha3_init(sctx, SHA3_256_DIGEST_SIZE); - return 0; -} - -static int sha3_384_init(struct shash_desc *desc) -{ - struct sha3_state *sctx = shash_desc_ctx(desc); - - sha3_init(sctx, SHA3_384_DIGEST_SIZE); - return 0; -} - -static int sha3_512_init(struct shash_desc *desc) -{ - struct sha3_state *sctx = shash_desc_ctx(desc); + sctx->rsiz = 200 - 2 * digest_size; + sctx->rsizw = sctx->rsiz / 8; + sctx->partial = 0; - sha3_init(sctx, SHA3_512_DIGEST_SIZE); + memset(sctx->st, 0, sizeof(sctx->st)); return 0; } @@ -227,6 +199,8 @@ static int sha3_final(struct shash_desc *desc, u8 *out) { struct sha3_state *sctx = shash_desc_ctx(desc); unsigned int i, inlen = sctx->partial; + unsigned int digest_size = crypto_shash_digestsize(desc->tfm); + __le64 *digest = (__le64 *)out; sctx->buf[inlen++] = 0x06; memset(sctx->buf + inlen, 0, sctx->rsiz - inlen); @@ -237,110 +211,70 @@ static int sha3_final(struct shash_desc *desc, u8 *out) keccakf(sctx->st); - for (i = 0; i < sctx->rsizw; i++) - sctx->st[i] = cpu_to_le64(sctx->st[i]); + for (i = 0; i < digest_size / 8; i++) + put_unaligned_le64(sctx->st[i], digest++); - memcpy(out, sctx->st, sctx->md_len); + if (digest_size & 4) + put_unaligned_le32(sctx->st[i], (__le32 *)digest); memset(sctx, 0, sizeof(*sctx)); return 0; } -static struct shash_alg sha3_224 = { - .digestsize = SHA3_224_DIGEST_SIZE, - .init = sha3_224_init, - .update = sha3_update, - .final = sha3_final, - .descsize = sizeof(struct sha3_state), - .base = { - .cra_name = "sha3-224", - .cra_driver_name = "sha3-224-generic", - .cra_flags = CRYPTO_ALG_TYPE_SHASH, - .cra_blocksize = SHA3_224_BLOCK_SIZE, - .cra_module = THIS_MODULE, - } -}; - -static struct shash_alg sha3_256 = { - .digestsize = SHA3_256_DIGEST_SIZE, - .init = sha3_256_init, - .update = sha3_update, - .final = sha3_final, - .descsize = sizeof(struct sha3_state), - .base = { - .cra_name = "sha3-256", - .cra_driver_name = "sha3-256-generic", - .cra_flags = CRYPTO_ALG_TYPE_SHASH, - .cra_blocksize = SHA3_256_BLOCK_SIZE, - .cra_module = THIS_MODULE, - } -}; - -static struct shash_alg sha3_384 = { - .digestsize = SHA3_384_DIGEST_SIZE, - .init = sha3_384_init, - .update = sha3_update, - .final = sha3_final, - .descsize = sizeof(struct sha3_state), - .base = { - .cra_name = "sha3-384", - .cra_driver_name = "sha3-384-generic", - .cra_flags = CRYPTO_ALG_TYPE_SHASH, - .cra_blocksize = SHA3_384_BLOCK_SIZE, - .cra_module = THIS_MODULE, - } -}; - -static struct shash_alg sha3_512 = { - .digestsize = SHA3_512_DIGEST_SIZE, - .init = sha3_512_init, - .update = sha3_update, - .final = sha3_final, - .descsize = sizeof(struct sha3_state), - .base = { - .cra_name = "sha3-512", - .cra_driver_name = "sha3-512-generic", - .cra_flags = CRYPTO_ALG_TYPE_SHASH, - .cra_blocksize = SHA3_512_BLOCK_SIZE, - .cra_module = THIS_MODULE, - } -}; +static struct shash_alg algs[] = { { + .digestsize = SHA3_224_DIGEST_SIZE, + .init = sha3_init, + .update = sha3_update, + .final = sha3_final, + .descsize = sizeof(struct sha3_state), + .base.cra_name = "sha3-224", + .base.cra_driver_name = "sha3-224-generic", + .base.cra_flags = CRYPTO_ALG_TYPE_SHASH, + .base.cra_blocksize = SHA3_224_BLOCK_SIZE, + .base.cra_module = THIS_MODULE, +}, { + .digestsize = SHA3_256_DIGEST_SIZE, + .init = sha3_init, + .update = sha3_update, + .final = sha3_final, + .descsize = sizeof(struct sha3_state), + .base.cra_name = "sha3-256", + .base.cra_driver_name = "sha3-256-generic", + .base.cra_flags = CRYPTO_ALG_TYPE_SHASH, + .base.cra_blocksize = SHA3_256_BLOCK_SIZE, + .base.cra_module = THIS_MODULE, +}, { + .digestsize = SHA3_384_DIGEST_SIZE, + .init = sha3_init, + .update = sha3_update, + .final = sha3_final, + .descsize = sizeof(struct sha3_state), + .base.cra_name = "sha3-384", + .base.cra_driver_name = "sha3-384-generic", + .base.cra_flags = CRYPTO_ALG_TYPE_SHASH, + .base.cra_blocksize = SHA3_384_BLOCK_SIZE, + .base.cra_module = THIS_MODULE, +}, { + .digestsize = SHA3_512_DIGEST_SIZE, + .init = sha3_init, + .update = sha3_update, + .final = sha3_final, + .descsize = sizeof(struct sha3_state), + .base.cra_name = "sha3-512", + .base.cra_driver_name = "sha3-512-generic", + .base.cra_flags = CRYPTO_ALG_TYPE_SHASH, + .base.cra_blocksize = SHA3_512_BLOCK_SIZE, + .base.cra_module = THIS_MODULE, +} }; static int __init sha3_generic_mod_init(void) { - int ret; - - ret = crypto_register_shash(&sha3_224); - if (ret < 0) - goto err_out; - ret = crypto_register_shash(&sha3_256); - if (ret < 0) - goto err_out_224; - ret = crypto_register_shash(&sha3_384); - if (ret < 0) - goto err_out_256; - ret = crypto_register_shash(&sha3_512); - if (ret < 0) - goto err_out_384; - - return 0; - -err_out_384: - crypto_unregister_shash(&sha3_384); -err_out_256: - crypto_unregister_shash(&sha3_256); -err_out_224: - crypto_unregister_shash(&sha3_224); -err_out: - return ret; + return crypto_register_shashes(algs, ARRAY_SIZE(algs)); } static void __exit sha3_generic_mod_fini(void) { - crypto_unregister_shash(&sha3_224); - crypto_unregister_shash(&sha3_256); - crypto_unregister_shash(&sha3_384); - crypto_unregister_shash(&sha3_512); + crypto_unregister_shashes(algs, ARRAY_SIZE(algs)); } module_init(sha3_generic_mod_init); -- cgit v1.2.3 From 05c0463c8af9154c19d7824aed5fe1b540b7c396 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Fri, 19 Jan 2018 12:04:36 +0000 Subject: crypto: sha3-generic - export init/update/final routines To allow accelerated implementations to fall back to the generic routines, e.g., in contexts where a SIMD based implementation is not allowed to run, expose the generic SHA3 init/update/final routines to other modules. Signed-off-by: Ard Biesheuvel Signed-off-by: Herbert Xu --- crypto/sha3_generic.c | 33 ++++++++++++++++++--------------- 1 file changed, 18 insertions(+), 15 deletions(-) (limited to 'crypto/sha3_generic.c') diff --git a/crypto/sha3_generic.c b/crypto/sha3_generic.c index c7084a24..a965b9d8 100644 --- a/crypto/sha3_generic.c +++ b/crypto/sha3_generic.c @@ -145,7 +145,7 @@ static void __attribute__((__optimize__("O3"))) keccakf(u64 st[25]) } } -static int sha3_init(struct shash_desc *desc) +int crypto_sha3_init(struct shash_desc *desc) { struct sha3_state *sctx = shash_desc_ctx(desc); unsigned int digest_size = crypto_shash_digestsize(desc->tfm); @@ -157,8 +157,9 @@ static int sha3_init(struct shash_desc *desc) memset(sctx->st, 0, sizeof(sctx->st)); return 0; } +EXPORT_SYMBOL(crypto_sha3_init); -static int sha3_update(struct shash_desc *desc, const u8 *data, +int crypto_sha3_update(struct shash_desc *desc, const u8 *data, unsigned int len) { struct sha3_state *sctx = shash_desc_ctx(desc); @@ -194,8 +195,9 @@ static int sha3_update(struct shash_desc *desc, const u8 *data, return 0; } +EXPORT_SYMBOL(crypto_sha3_update); -static int sha3_final(struct shash_desc *desc, u8 *out) +int crypto_sha3_final(struct shash_desc *desc, u8 *out) { struct sha3_state *sctx = shash_desc_ctx(desc); unsigned int i, inlen = sctx->partial; @@ -220,12 +222,13 @@ static int sha3_final(struct shash_desc *desc, u8 *out) memset(sctx, 0, sizeof(*sctx)); return 0; } +EXPORT_SYMBOL(crypto_sha3_final); static struct shash_alg algs[] = { { .digestsize = SHA3_224_DIGEST_SIZE, - .init = sha3_init, - .update = sha3_update, - .final = sha3_final, + .init = crypto_sha3_init, + .update = crypto_sha3_update, + .final = crypto_sha3_final, .descsize = sizeof(struct sha3_state), .base.cra_name = "sha3-224", .base.cra_driver_name = "sha3-224-generic", @@ -234,9 +237,9 @@ static struct shash_alg algs[] = { { .base.cra_module = THIS_MODULE, }, { .digestsize = SHA3_256_DIGEST_SIZE, - .init = sha3_init, - .update = sha3_update, - .final = sha3_final, + .init = crypto_sha3_init, + .update = crypto_sha3_update, + .final = crypto_sha3_final, .descsize = sizeof(struct sha3_state), .base.cra_name = "sha3-256", .base.cra_driver_name = "sha3-256-generic", @@ -245,9 +248,9 @@ static struct shash_alg algs[] = { { .base.cra_module = THIS_MODULE, }, { .digestsize = SHA3_384_DIGEST_SIZE, - .init = sha3_init, - .update = sha3_update, - .final = sha3_final, + .init = crypto_sha3_init, + .update = crypto_sha3_update, + .final = crypto_sha3_final, .descsize = sizeof(struct sha3_state), .base.cra_name = "sha3-384", .base.cra_driver_name = "sha3-384-generic", @@ -256,9 +259,9 @@ static struct shash_alg algs[] = { { .base.cra_module = THIS_MODULE, }, { .digestsize = SHA3_512_DIGEST_SIZE, - .init = sha3_init, - .update = sha3_update, - .final = sha3_final, + .init = crypto_sha3_init, + .update = crypto_sha3_update, + .final = crypto_sha3_final, .descsize = sizeof(struct sha3_state), .base.cra_name = "sha3-512", .base.cra_driver_name = "sha3-512-generic", -- cgit v1.2.3