From 4923750a382297097b6654585728c68d77bbb2af Mon Sep 17 00:00:00 2001 From: Gideon Israel Dsouza Date: Sat, 31 Dec 2016 21:26:23 +0530 Subject: crypto: Replaced gcc specific attributes with macros from compiler.h Continuing from this commit: 52f5684c8e1e ("kernel: use macros from compiler.h instead of __attribute__((...))") I submitted 4 total patches. They are part of task I've taken up to increase compiler portability in the kernel. I've cleaned up the subsystems under /kernel /mm /block and /security, this patch targets /crypto. There is which provides macros for various gcc specific constructs. Eg: __weak for __attribute__((weak)). I've cleaned all instances of gcc specific attributes with the right macros for the crypto subsystem. I had to make one additional change into compiler-gcc.h for the case when one wants to use this: __attribute__((aligned) and not specify an alignment factor. From the gcc docs, this will result in the largest alignment for that data type on the target machine so I've named the macro __aligned_largest. Please advise if another name is more appropriate. Signed-off-by: Gideon Israel Dsouza Signed-off-by: Herbert Xu --- crypto/cts.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'crypto/cts.c') diff --git a/crypto/cts.c b/crypto/cts.c index 00254d76..a1335d6c 100644 --- a/crypto/cts.c +++ b/crypto/cts.c @@ -49,6 +49,7 @@ #include #include #include +#include struct crypto_cts_ctx { struct crypto_skcipher *child; @@ -103,7 +104,7 @@ static int cts_cbc_encrypt(struct skcipher_request *req) struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); struct skcipher_request *subreq = &rctx->subreq; int bsize = crypto_skcipher_blocksize(tfm); - u8 d[bsize * 2] __attribute__ ((aligned(__alignof__(u32)))); + u8 d[bsize * 2] __aligned(__alignof__(u32)); struct scatterlist *sg; unsigned int offset; int lastn; @@ -183,7 +184,7 @@ static int cts_cbc_decrypt(struct skcipher_request *req) struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); struct skcipher_request *subreq = &rctx->subreq; int bsize = crypto_skcipher_blocksize(tfm); - u8 d[bsize * 2] __attribute__ ((aligned(__alignof__(u32)))); + u8 d[bsize * 2] __aligned(__alignof__(u32)); struct scatterlist *sg; unsigned int offset; u8 *space; -- cgit v1.2.3 From 34bd71c0061c774d52b7cac58120ff37f112da58 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Sun, 5 Feb 2017 10:06:12 +0000 Subject: crypto: algapi - make crypto_xor() and crypto_inc() alignment agnostic Instead of unconditionally forcing 4 byte alignment for all generic chaining modes that rely on crypto_xor() or crypto_inc() (which may result in unnecessary copying of data when the underlying hardware can perform unaligned accesses efficiently), make those functions deal with unaligned input explicitly, but only if the Kconfig symbol HAVE_EFFICIENT_UNALIGNED_ACCESS is set. This will allow us to drop the alignmasks from the CBC, CMAC, CTR, CTS, PCBC and SEQIV drivers. For crypto_inc(), this simply involves making the 4-byte stride conditional on HAVE_EFFICIENT_UNALIGNED_ACCESS being set, given that it typically operates on 16 byte buffers. For crypto_xor(), an algorithm is implemented that simply runs through the input using the largest strides possible if unaligned accesses are allowed. If they are not, an optimal sequence of memory accesses is emitted that takes the relative alignment of the input buffers into account, e.g., if the relative misalignment of dst and src is 4 bytes, the entire xor operation will be completed using 4 byte loads and stores (modulo unaligned bits at the start and end). Note that all expressions involving misalign are simply eliminated by the compiler when HAVE_EFFICIENT_UNALIGNED_ACCESS is defined. Signed-off-by: Ard Biesheuvel Signed-off-by: Herbert Xu --- crypto/algapi.c | 68 ++++++++++++++++++++++++++++++++++++++++++--------------- crypto/cbc.c | 3 --- crypto/cmac.c | 3 +-- crypto/ctr.c | 2 +- crypto/cts.c | 3 --- crypto/pcbc.c | 3 --- crypto/seqiv.c | 2 -- 7 files changed, 52 insertions(+), 32 deletions(-) (limited to 'crypto/cts.c') diff --git a/crypto/algapi.c b/crypto/algapi.c index 1fad2a6b..6b52e8f0 100644 --- a/crypto/algapi.c +++ b/crypto/algapi.c @@ -962,34 +962,66 @@ void crypto_inc(u8 *a, unsigned int size) __be32 *b = (__be32 *)(a + size); u32 c; - for (; size >= 4; size -= 4) { - c = be32_to_cpu(*--b) + 1; - *b = cpu_to_be32(c); - if (c) - return; - } + if (IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) || + !((unsigned long)b & (__alignof__(*b) - 1))) + for (; size >= 4; size -= 4) { + c = be32_to_cpu(*--b) + 1; + *b = cpu_to_be32(c); + if (c) + return; + } crypto_inc_byte(a, size); } EXPORT_SYMBOL_GPL(crypto_inc); -static inline void crypto_xor_byte(u8 *a, const u8 *b, unsigned int size) +void __crypto_xor(u8 *dst, const u8 *src, unsigned int len) { - for (; size; size--) - *a++ ^= *b++; -} + int relalign = 0; + + if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)) { + int size = sizeof(unsigned long); + int d = ((unsigned long)dst ^ (unsigned long)src) & (size - 1); + + relalign = d ? 1 << __ffs(d) : size; + + /* + * If we care about alignment, process as many bytes as + * needed to advance dst and src to values whose alignments + * equal their relative alignment. This will allow us to + * process the remainder of the input using optimal strides. + */ + while (((unsigned long)dst & (relalign - 1)) && len > 0) { + *dst++ ^= *src++; + len--; + } + } -void crypto_xor(u8 *dst, const u8 *src, unsigned int size) -{ - u32 *a = (u32 *)dst; - u32 *b = (u32 *)src; + while (IS_ENABLED(CONFIG_64BIT) && len >= 8 && !(relalign & 7)) { + *(u64 *)dst ^= *(u64 *)src; + dst += 8; + src += 8; + len -= 8; + } - for (; size >= 4; size -= 4) - *a++ ^= *b++; + while (len >= 4 && !(relalign & 3)) { + *(u32 *)dst ^= *(u32 *)src; + dst += 4; + src += 4; + len -= 4; + } + + while (len >= 2 && !(relalign & 1)) { + *(u16 *)dst ^= *(u16 *)src; + dst += 2; + src += 2; + len -= 2; + } - crypto_xor_byte((u8 *)a, (u8 *)b, size); + while (len--) + *dst++ ^= *src++; } -EXPORT_SYMBOL_GPL(crypto_xor); +EXPORT_SYMBOL_GPL(__crypto_xor); unsigned int crypto_alg_extsize(struct crypto_alg *alg) { diff --git a/crypto/cbc.c b/crypto/cbc.c index 68f751a4..bc160a31 100644 --- a/crypto/cbc.c +++ b/crypto/cbc.c @@ -145,9 +145,6 @@ static int crypto_cbc_create(struct crypto_template *tmpl, struct rtattr **tb) inst->alg.base.cra_blocksize = alg->cra_blocksize; inst->alg.base.cra_alignmask = alg->cra_alignmask; - /* We access the data as u32s when xoring. */ - inst->alg.base.cra_alignmask |= __alignof__(u32) - 1; - inst->alg.ivsize = alg->cra_blocksize; inst->alg.min_keysize = alg->cra_cipher.cia_min_keysize; inst->alg.max_keysize = alg->cra_cipher.cia_max_keysize; diff --git a/crypto/cmac.c b/crypto/cmac.c index 04080dca..16301f52 100644 --- a/crypto/cmac.c +++ b/crypto/cmac.c @@ -260,8 +260,7 @@ static int cmac_create(struct crypto_template *tmpl, struct rtattr **tb) if (err) goto out_free_inst; - /* We access the data as u32s when xoring. */ - alignmask = alg->cra_alignmask | (__alignof__(u32) - 1); + alignmask = alg->cra_alignmask; inst->alg.base.cra_alignmask = alignmask; inst->alg.base.cra_priority = alg->cra_priority; inst->alg.base.cra_blocksize = alg->cra_blocksize; diff --git a/crypto/ctr.c b/crypto/ctr.c index a9a7a44f..a4f4a898 100644 --- a/crypto/ctr.c +++ b/crypto/ctr.c @@ -209,7 +209,7 @@ static struct crypto_instance *crypto_ctr_alloc(struct rtattr **tb) inst->alg.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER; inst->alg.cra_priority = alg->cra_priority; inst->alg.cra_blocksize = 1; - inst->alg.cra_alignmask = alg->cra_alignmask | (__alignof__(u32) - 1); + inst->alg.cra_alignmask = alg->cra_alignmask; inst->alg.cra_type = &crypto_blkcipher_type; inst->alg.cra_blkcipher.ivsize = alg->cra_blocksize; diff --git a/crypto/cts.c b/crypto/cts.c index a1335d6c..243f591d 100644 --- a/crypto/cts.c +++ b/crypto/cts.c @@ -374,9 +374,6 @@ static int crypto_cts_create(struct crypto_template *tmpl, struct rtattr **tb) inst->alg.base.cra_blocksize = alg->base.cra_blocksize; inst->alg.base.cra_alignmask = alg->base.cra_alignmask; - /* We access the data as u32s when xoring. */ - inst->alg.base.cra_alignmask |= __alignof__(u32) - 1; - inst->alg.ivsize = alg->base.cra_blocksize; inst->alg.chunksize = crypto_skcipher_alg_chunksize(alg); inst->alg.min_keysize = crypto_skcipher_alg_min_keysize(alg); diff --git a/crypto/pcbc.c b/crypto/pcbc.c index 11d24867..29dd2b4a 100644 --- a/crypto/pcbc.c +++ b/crypto/pcbc.c @@ -260,9 +260,6 @@ static int crypto_pcbc_create(struct crypto_template *tmpl, struct rtattr **tb) inst->alg.base.cra_blocksize = alg->cra_blocksize; inst->alg.base.cra_alignmask = alg->cra_alignmask; - /* We access the data as u32s when xoring. */ - inst->alg.base.cra_alignmask |= __alignof__(u32) - 1; - inst->alg.ivsize = alg->cra_blocksize; inst->alg.min_keysize = alg->cra_cipher.cia_min_keysize; inst->alg.max_keysize = alg->cra_cipher.cia_max_keysize; diff --git a/crypto/seqiv.c b/crypto/seqiv.c index c7049231..570b7d1a 100644 --- a/crypto/seqiv.c +++ b/crypto/seqiv.c @@ -153,8 +153,6 @@ static int seqiv_aead_create(struct crypto_template *tmpl, struct rtattr **tb) if (IS_ERR(inst)) return PTR_ERR(inst); - inst->alg.base.cra_alignmask |= __alignof__(u32) - 1; - spawn = aead_instance_ctx(inst); alg = crypto_spawn_aead_alg(spawn); -- cgit v1.2.3