From 899635655e977a2688ec3bf2430a08444a2a0004 Mon Sep 17 00:00:00 2001 From: Cesar Eduardo Barros Date: Mon, 25 Nov 2013 22:00:41 -0200 Subject: crypto: more robust crypto_memneq Disabling compiler optimizations can be fragile, since a new optimization could be added to -O0 or -Os that breaks the assumptions the code is making. Instead of disabling compiler optimizations, use a dummy inline assembly (based on RELOC_HIDE) to block the problematic kinds of optimization, while still allowing other optimizations to be applied to the code. The dummy inline assembly is added after every OR, and has the accumulator variable as its input and output. The compiler is forced to assume that the dummy inline assembly could both depend on the accumulator variable and change the accumulator variable, so it is forced to compute the value correctly before the inline assembly, and cannot assume anything about its value after the inline assembly. This change should be enough to make crypto_memneq work correctly (with data-independent timing) even if it is inlined at its call sites. That can be done later in a followup patch. Compile-tested on x86_64. Signed-off-by: Cesar Eduardo Barros Acked-by: Daniel Borkmann Signed-off-by: Herbert Xu --- crypto/Makefile | 5 ---- crypto/memneq.c | 79 +++++++++++++++++++++++++++++++++++++++------------------ 2 files changed, 54 insertions(+), 30 deletions(-) diff --git a/crypto/Makefile b/crypto/Makefile index 989c510d..b29402a7 100644 --- a/crypto/Makefile +++ b/crypto/Makefile @@ -2,11 +2,6 @@ # Cryptographic API # -# memneq MUST be built with -Os or -O0 to prevent early-return optimizations -# that will defeat memneq's actual purpose to prevent timing attacks. -CFLAGS_REMOVE_memneq.o := -O1 -O2 -O3 -CFLAGS_memneq.o := -Os - obj-$(CONFIG_CRYPTO) += crypto.o crypto-y := api.o cipher.o compress.o memneq.o diff --git a/crypto/memneq.c b/crypto/memneq.c index cd016222..570f6f34 100644 --- a/crypto/memneq.c +++ b/crypto/memneq.c @@ -72,6 +72,7 @@ __crypto_memneq_generic(const void *a, const void *b, size_t size) #if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) while (size >= sizeof(unsigned long)) { neq |= *(unsigned long *)a ^ *(unsigned long *)b; + OPTIMIZER_HIDE_VAR(neq); a += sizeof(unsigned long); b += sizeof(unsigned long); size -= sizeof(unsigned long); @@ -79,6 +80,7 @@ __crypto_memneq_generic(const void *a, const void *b, size_t size) #endif /* CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS */ while (size > 0) { neq |= *(unsigned char *)a ^ *(unsigned char *)b; + OPTIMIZER_HIDE_VAR(neq); a += 1; b += 1; size -= 1; @@ -89,33 +91,60 @@ __crypto_memneq_generic(const void *a, const void *b, size_t size) /* Loop-free fast-path for frequently used 16-byte size */ static inline unsigned long __crypto_memneq_16(const void *a, const void *b) { + unsigned long neq = 0; + #ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS - if (sizeof(unsigned long) == 8) - return ((*(unsigned long *)(a) ^ *(unsigned long *)(b)) - | (*(unsigned long *)(a+8) ^ *(unsigned long *)(b+8))); - else if (sizeof(unsigned int) == 4) - return ((*(unsigned int *)(a) ^ *(unsigned int *)(b)) - | (*(unsigned int *)(a+4) ^ *(unsigned int *)(b+4)) - | (*(unsigned int *)(a+8) ^ *(unsigned int *)(b+8)) - | (*(unsigned int *)(a+12) ^ *(unsigned int *)(b+12))); - else + if (sizeof(unsigned long) == 8) { + neq |= *(unsigned long *)(a) ^ *(unsigned long *)(b); + OPTIMIZER_HIDE_VAR(neq); + neq |= *(unsigned long *)(a+8) ^ *(unsigned long *)(b+8); + OPTIMIZER_HIDE_VAR(neq); + } else if (sizeof(unsigned int) == 4) { + neq |= *(unsigned int *)(a) ^ *(unsigned int *)(b); + OPTIMIZER_HIDE_VAR(neq); + neq |= *(unsigned int *)(a+4) ^ *(unsigned int *)(b+4); + OPTIMIZER_HIDE_VAR(neq); + neq |= *(unsigned int *)(a+8) ^ *(unsigned int *)(b+8); + OPTIMIZER_HIDE_VAR(neq); + neq |= *(unsigned int *)(a+12) ^ *(unsigned int *)(b+12); + OPTIMIZER_HIDE_VAR(neq); + } else { #endif /* CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS */ - return ((*(unsigned char *)(a) ^ *(unsigned char *)(b)) - | (*(unsigned char *)(a+1) ^ *(unsigned char *)(b+1)) - | (*(unsigned char *)(a+2) ^ *(unsigned char *)(b+2)) - | (*(unsigned char *)(a+3) ^ *(unsigned char *)(b+3)) - | (*(unsigned char *)(a+4) ^ *(unsigned char *)(b+4)) - | (*(unsigned char *)(a+5) ^ *(unsigned char *)(b+5)) - | (*(unsigned char *)(a+6) ^ *(unsigned char *)(b+6)) - | (*(unsigned char *)(a+7) ^ *(unsigned char *)(b+7)) - | (*(unsigned char *)(a+8) ^ *(unsigned char *)(b+8)) - | (*(unsigned char *)(a+9) ^ *(unsigned char *)(b+9)) - | (*(unsigned char *)(a+10) ^ *(unsigned char *)(b+10)) - | (*(unsigned char *)(a+11) ^ *(unsigned char *)(b+11)) - | (*(unsigned char *)(a+12) ^ *(unsigned char *)(b+12)) - | (*(unsigned char *)(a+13) ^ *(unsigned char *)(b+13)) - | (*(unsigned char *)(a+14) ^ *(unsigned char *)(b+14)) - | (*(unsigned char *)(a+15) ^ *(unsigned char *)(b+15))); + neq |= *(unsigned char *)(a) ^ *(unsigned char *)(b); + OPTIMIZER_HIDE_VAR(neq); + neq |= *(unsigned char *)(a+1) ^ *(unsigned char *)(b+1); + OPTIMIZER_HIDE_VAR(neq); + neq |= *(unsigned char *)(a+2) ^ *(unsigned char *)(b+2); + OPTIMIZER_HIDE_VAR(neq); + neq |= *(unsigned char *)(a+3) ^ *(unsigned char *)(b+3); + OPTIMIZER_HIDE_VAR(neq); + neq |= *(unsigned char *)(a+4) ^ *(unsigned char *)(b+4); + OPTIMIZER_HIDE_VAR(neq); + neq |= *(unsigned char *)(a+5) ^ *(unsigned char *)(b+5); + OPTIMIZER_HIDE_VAR(neq); + neq |= *(unsigned char *)(a+6) ^ *(unsigned char *)(b+6); + OPTIMIZER_HIDE_VAR(neq); + neq |= *(unsigned char *)(a+7) ^ *(unsigned char *)(b+7); + OPTIMIZER_HIDE_VAR(neq); + neq |= *(unsigned char *)(a+8) ^ *(unsigned char *)(b+8); + OPTIMIZER_HIDE_VAR(neq); + neq |= *(unsigned char *)(a+9) ^ *(unsigned char *)(b+9); + OPTIMIZER_HIDE_VAR(neq); + neq |= *(unsigned char *)(a+10) ^ *(unsigned char *)(b+10); + OPTIMIZER_HIDE_VAR(neq); + neq |= *(unsigned char *)(a+11) ^ *(unsigned char *)(b+11); + OPTIMIZER_HIDE_VAR(neq); + neq |= *(unsigned char *)(a+12) ^ *(unsigned char *)(b+12); + OPTIMIZER_HIDE_VAR(neq); + neq |= *(unsigned char *)(a+13) ^ *(unsigned char *)(b+13); + OPTIMIZER_HIDE_VAR(neq); + neq |= *(unsigned char *)(a+14) ^ *(unsigned char *)(b+14); + OPTIMIZER_HIDE_VAR(neq); + neq |= *(unsigned char *)(a+15) ^ *(unsigned char *)(b+15); + OPTIMIZER_HIDE_VAR(neq); + } + + return neq; } /* Compare two areas of memory without leaking timing information, -- cgit v1.2.3 From 0a7be0a5838101af7d24f47c715ed353341e88bf Mon Sep 17 00:00:00 2001 From: Mathias Krause Date: Thu, 28 Nov 2013 19:20:04 +0100 Subject: crypto: pcrypt - Fix wrong usage of rcu_dereference() A kernel with enabled lockdep complains about the wrong usage of rcu_dereference() under a rcu_read_lock_bh() protected region. =============================== [ INFO: suspicious RCU usage. ] 3.13.0-rc1+ #126 Not tainted ------------------------------- linux/crypto/pcrypt.c:81 suspicious rcu_dereference_check() usage! other info that might help us debug this: rcu_scheduler_active = 1, debug_locks = 1 1 lock held by cryptomgr_test/153: #0: (rcu_read_lock_bh){.+....}, at: [] pcrypt_do_parallel.isra.2+0x5/0x200 Fix that by using rcu_dereference_bh() instead. Signed-off-by: Mathias Krause Cc: "David S. Miller" Acked-by: Steffen Klassert Signed-off-by: Herbert Xu --- crypto/pcrypt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crypto/pcrypt.c b/crypto/pcrypt.c index f8c920ca..309d345e 100644 --- a/crypto/pcrypt.c +++ b/crypto/pcrypt.c @@ -78,7 +78,7 @@ static int pcrypt_do_parallel(struct padata_priv *padata, unsigned int *cb_cpu, cpu = *cb_cpu; rcu_read_lock_bh(); - cpumask = rcu_dereference(pcrypt->cb_cpumask); + cpumask = rcu_dereference_bh(pcrypt->cb_cpumask); if (cpumask_test_cpu(cpu, cpumask->mask)) goto out; -- cgit v1.2.3 From cbab377a633a32bcb18d3cee2905ab720e7e921f Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 6 Dec 2013 00:33:33 +0100 Subject: crypto: memneq - fix for archs without efficient unaligned access Commit 899635655e97 introduced a possible build error for archs that do not have CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS set. :/ Fix this up by bringing else braces outside of the ifdef. Reported-by: Fengguang Wu Fixes: 899635655e97 ("crypto: more robust crypto_memneq") Signed-off-by: Daniel Borkmann Acked-By: Cesar Eduardo Barros Signed-off-by: Herbert Xu --- crypto/memneq.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/crypto/memneq.c b/crypto/memneq.c index 570f6f34..afed1bd1 100644 --- a/crypto/memneq.c +++ b/crypto/memneq.c @@ -108,8 +108,9 @@ static inline unsigned long __crypto_memneq_16(const void *a, const void *b) OPTIMIZER_HIDE_VAR(neq); neq |= *(unsigned int *)(a+12) ^ *(unsigned int *)(b+12); OPTIMIZER_HIDE_VAR(neq); - } else { + } else #endif /* CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS */ + { neq |= *(unsigned char *)(a) ^ *(unsigned char *)(b); OPTIMIZER_HIDE_VAR(neq); neq |= *(unsigned char *)(a+1) ^ *(unsigned char *)(b+1); -- cgit v1.2.3 From 78470cf88700c8629e17b2935ab1a4ac00e5819b Mon Sep 17 00:00:00 2001 From: Tim Chen Date: Wed, 11 Dec 2013 14:28:47 -0800 Subject: crypto: tcrypt - Added speed tests for AEAD crypto alogrithms in tcrypt test suite Adding simple speed tests for a range of block sizes for AEAD crypto algorithms. Signed-off-by: Tim Chen Signed-off-by: Herbert Xu --- crypto/tcrypt.c | 270 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ crypto/tcrypt.h | 10 +++ 2 files changed, 280 insertions(+) diff --git a/crypto/tcrypt.c b/crypto/tcrypt.c index 001f07cd..0d9003ae 100644 --- a/crypto/tcrypt.c +++ b/crypto/tcrypt.c @@ -137,7 +137,272 @@ out: return ret; } +static int test_aead_jiffies(struct aead_request *req, int enc, + int blen, int sec) +{ + unsigned long start, end; + int bcount; + int ret; + + for (start = jiffies, end = start + sec * HZ, bcount = 0; + time_before(jiffies, end); bcount++) { + if (enc) + ret = crypto_aead_encrypt(req); + else + ret = crypto_aead_decrypt(req); + + if (ret) + return ret; + } + + printk("%d operations in %d seconds (%ld bytes)\n", + bcount, sec, (long)bcount * blen); + return 0; +} + +static int test_aead_cycles(struct aead_request *req, int enc, int blen) +{ + unsigned long cycles = 0; + int ret = 0; + int i; + + local_irq_disable(); + + /* Warm-up run. */ + for (i = 0; i < 4; i++) { + if (enc) + ret = crypto_aead_encrypt(req); + else + ret = crypto_aead_decrypt(req); + + if (ret) + goto out; + } + + /* The real thing. */ + for (i = 0; i < 8; i++) { + cycles_t start, end; + + start = get_cycles(); + if (enc) + ret = crypto_aead_encrypt(req); + else + ret = crypto_aead_decrypt(req); + end = get_cycles(); + + if (ret) + goto out; + + cycles += end - start; + } + +out: + local_irq_enable(); + + if (ret == 0) + printk("1 operation in %lu cycles (%d bytes)\n", + (cycles + 4) / 8, blen); + + return ret; +} + static u32 block_sizes[] = { 16, 64, 256, 1024, 8192, 0 }; +static u32 aead_sizes[] = { 16, 64, 256, 512, 1024, 2048, 4096, 8192, 0 }; + +#define XBUFSIZE 8 +#define MAX_IVLEN 32 + +static int testmgr_alloc_buf(char *buf[XBUFSIZE]) +{ + int i; + + for (i = 0; i < XBUFSIZE; i++) { + buf[i] = (void *)__get_free_page(GFP_KERNEL); + if (!buf[i]) + goto err_free_buf; + } + + return 0; + +err_free_buf: + while (i-- > 0) + free_page((unsigned long)buf[i]); + + return -ENOMEM; +} + +static void testmgr_free_buf(char *buf[XBUFSIZE]) +{ + int i; + + for (i = 0; i < XBUFSIZE; i++) + free_page((unsigned long)buf[i]); +} + +static void sg_init_aead(struct scatterlist *sg, char *xbuf[XBUFSIZE], + unsigned int buflen) +{ + int np = (buflen + PAGE_SIZE - 1)/PAGE_SIZE; + int k, rem; + + np = (np > XBUFSIZE) ? XBUFSIZE : np; + rem = buflen % PAGE_SIZE; + if (np > XBUFSIZE) { + rem = PAGE_SIZE; + np = XBUFSIZE; + } + sg_init_table(sg, np); + for (k = 0; k < np; ++k) { + if (k == (np-1)) + sg_set_buf(&sg[k], xbuf[k], rem); + else + sg_set_buf(&sg[k], xbuf[k], PAGE_SIZE); + } +} + +static void test_aead_speed(const char *algo, int enc, unsigned int sec, + struct aead_speed_template *template, + unsigned int tcount, u8 authsize, + unsigned int aad_size, u8 *keysize) +{ + unsigned int i, j; + struct crypto_aead *tfm; + int ret = -ENOMEM; + const char *key; + struct aead_request *req; + struct scatterlist *sg; + struct scatterlist *asg; + struct scatterlist *sgout; + const char *e; + void *assoc; + char iv[MAX_IVLEN]; + char *xbuf[XBUFSIZE]; + char *xoutbuf[XBUFSIZE]; + char *axbuf[XBUFSIZE]; + unsigned int *b_size; + unsigned int iv_len; + + if (enc == ENCRYPT) + e = "encryption"; + else + e = "decryption"; + + if (testmgr_alloc_buf(xbuf)) + goto out_noxbuf; + if (testmgr_alloc_buf(axbuf)) + goto out_noaxbuf; + if (testmgr_alloc_buf(xoutbuf)) + goto out_nooutbuf; + + sg = kmalloc(sizeof(*sg) * 8 * 3, GFP_KERNEL); + if (!sg) + goto out_nosg; + asg = &sg[8]; + sgout = &asg[8]; + + + printk(KERN_INFO "\ntesting speed of %s %s\n", algo, e); + + tfm = crypto_alloc_aead(algo, 0, 0); + + if (IS_ERR(tfm)) { + pr_err("alg: aead: Failed to load transform for %s: %ld\n", algo, + PTR_ERR(tfm)); + return; + } + + req = aead_request_alloc(tfm, GFP_KERNEL); + if (!req) { + pr_err("alg: aead: Failed to allocate request for %s\n", + algo); + goto out; + } + + i = 0; + do { + b_size = aead_sizes; + do { + assoc = axbuf[0]; + + if (aad_size < PAGE_SIZE) + memset(assoc, 0xff, aad_size); + else { + pr_err("associate data length (%u) too big\n", + aad_size); + goto out_nosg; + } + sg_init_one(&asg[0], assoc, aad_size); + + if ((*keysize + *b_size) > TVMEMSIZE * PAGE_SIZE) { + pr_err("template (%u) too big for tvmem (%lu)\n", + *keysize + *b_size, + TVMEMSIZE * PAGE_SIZE); + goto out; + } + + key = tvmem[0]; + for (j = 0; j < tcount; j++) { + if (template[j].klen == *keysize) { + key = template[j].key; + break; + } + } + ret = crypto_aead_setkey(tfm, key, *keysize); + ret = crypto_aead_setauthsize(tfm, authsize); + + iv_len = crypto_aead_ivsize(tfm); + if (iv_len) + memset(&iv, 0xff, iv_len); + + crypto_aead_clear_flags(tfm, ~0); + printk(KERN_INFO "test %u (%d bit key, %d byte blocks): ", + i, *keysize * 8, *b_size); + + + memset(tvmem[0], 0xff, PAGE_SIZE); + + if (ret) { + pr_err("setkey() failed flags=%x\n", + crypto_aead_get_flags(tfm)); + goto out; + } + + sg_init_aead(&sg[0], xbuf, + *b_size + (enc ? authsize : 0)); + + sg_init_aead(&sgout[0], xoutbuf, + *b_size + (enc ? authsize : 0)); + + aead_request_set_crypt(req, sg, sgout, *b_size, iv); + aead_request_set_assoc(req, asg, aad_size); + + if (sec) + ret = test_aead_jiffies(req, enc, *b_size, sec); + else + ret = test_aead_cycles(req, enc, *b_size); + + if (ret) { + pr_err("%s() failed return code=%d\n", e, ret); + break; + } + b_size++; + i++; + } while (*b_size); + keysize++; + } while (*keysize); + +out: + crypto_free_aead(tfm); + kfree(sg); +out_nosg: + testmgr_free_buf(xoutbuf); +out_nooutbuf: + testmgr_free_buf(axbuf); +out_noaxbuf: + testmgr_free_buf(xbuf); +out_noxbuf: + return; +} static void test_cipher_speed(const char *algo, int enc, unsigned int sec, struct cipher_speed_template *template, @@ -1427,6 +1692,11 @@ static int do_test(int m) speed_template_32_64); break; + case 211: + test_aead_speed("rfc4106(gcm(aes))", ENCRYPT, sec, + NULL, 0, 16, 8, aead_speed_template_20); + break; + case 300: /* fall through */ diff --git a/crypto/tcrypt.h b/crypto/tcrypt.h index ecdeeb1a..6c7e21a0 100644 --- a/crypto/tcrypt.h +++ b/crypto/tcrypt.h @@ -22,6 +22,11 @@ struct cipher_speed_template { unsigned int klen; }; +struct aead_speed_template { + const char *key; + unsigned int klen; +}; + struct hash_speed { unsigned int blen; /* buffer length */ unsigned int plen; /* per-update length */ @@ -57,6 +62,11 @@ static u8 speed_template_32_48[] = {32, 48, 0}; static u8 speed_template_32_48_64[] = {32, 48, 64, 0}; static u8 speed_template_32_64[] = {32, 64, 0}; +/* + * AEAD speed tests + */ +static u8 aead_speed_template_20[] = {20, 0}; + /* * Digest speed tests */ -- cgit v1.2.3 From ea99853924f8abacac7f4b65254cfb2165f403ba Mon Sep 17 00:00:00 2001 From: Marek Vasut Date: Tue, 10 Dec 2013 20:26:19 +0100 Subject: crypto: ahash - Fully restore ahash request before completing When finishing the ahash request, the ahash_op_unaligned_done() will call complete() on the request. Yet, this will not call the correct complete callback. The correct complete callback was previously stored in the requests' private data, as seen in ahash_op_unaligned(). This patch restores the correct complete callback and .data field of the request before calling complete() on it. Signed-off-by: Marek Vasut Cc: David S. Miller Cc: Fabio Estevam Cc: Shawn Guo Signed-off-by: Herbert Xu --- crypto/ahash.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/crypto/ahash.c b/crypto/ahash.c index 793a27f2..a92dc382 100644 --- a/crypto/ahash.c +++ b/crypto/ahash.c @@ -213,7 +213,10 @@ static void ahash_op_unaligned_done(struct crypto_async_request *req, int err) ahash_op_unaligned_finish(areq, err); - complete(data, err); + areq->base.complete = complete; + areq->base.data = data; + + complete(&areq->base, err); } static int ahash_op_unaligned(struct ahash_request *req, -- cgit v1.2.3