mirror of
				git://git.openwrt.org/openwrt/openwrt.git
				synced 2025-11-03 22:44:27 -05:00 
			
		
		
		
	Without this patch, the chacha block counter is not incremented on neon rounds, resulting in incorrect calculations and corrupt packets. This also switches to using `--no-numbered --zero-commit` so that future diffs are smaller. Reported-by: Hans Geiblinger <cybrnook2002@yahoo.com> Reviewed-by: Ilya Lipnitskiy <ilya.lipnitskiy@gmail.com> Cc: David Bauer <mail@david-bauer.net> Cc: Petr Štetiar <ynezz@true.cz> Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
		
			
				
	
	
		
			1165 lines
		
	
	
		
			32 KiB
		
	
	
	
		
			Diff
		
	
	
	
	
	
			
		
		
	
	
			1165 lines
		
	
	
		
			32 KiB
		
	
	
	
		
			Diff
		
	
	
	
	
	
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
 | 
						|
From: "Jason A. Donenfeld" <Jason@zx2c4.com>
 | 
						|
Date: Sun, 5 Jan 2020 22:40:46 -0500
 | 
						|
Subject: [PATCH] crypto: poly1305 - add new 32 and 64-bit generic versions
 | 
						|
 | 
						|
commit 1c08a104360f3e18f4ee6346c21cc3923efb952e upstream.
 | 
						|
 | 
						|
These two C implementations from Zinc -- a 32x32 one and a 64x64 one,
 | 
						|
depending on the platform -- come from Andrew Moon's public domain
 | 
						|
poly1305-donna portable code, modified for usage in the kernel. The
 | 
						|
precomputation in the 32-bit version and the use of 64x64 multiplies in
 | 
						|
the 64-bit version make these perform better than the code it replaces.
 | 
						|
Moon's code is also very widespread and has received many eyeballs of
 | 
						|
scrutiny.
 | 
						|
 | 
						|
There's a bit of interference between the x86 implementation, which
 | 
						|
relies on internal details of the old scalar implementation. In the next
 | 
						|
commit, the x86 implementation will be replaced with a faster one that
 | 
						|
doesn't rely on this, so none of this matters much. But for now, to keep
 | 
						|
this passing the tests, we inline the bits of the old implementation
 | 
						|
that the x86 implementation relied on. Also, since we now support a
 | 
						|
slightly larger key space, via the union, some offsets had to be fixed
 | 
						|
up.
 | 
						|
 | 
						|
Nonce calculation was folded in with the emit function, to take
 | 
						|
advantage of 64x64 arithmetic. However, Adiantum appeared to rely on no
 | 
						|
nonce handling in emit, so this path was conditionalized. We also
 | 
						|
introduced a new struct, poly1305_core_key, to represent the precise
 | 
						|
amount of space that particular implementation uses.
 | 
						|
 | 
						|
Testing with kbench9000, depending on the CPU, the update function for
 | 
						|
the 32x32 version has been improved by 4%-7%, and for the 64x64 by
 | 
						|
19%-30%. The 32x32 gains are small, but I think there's great value in
 | 
						|
having a parallel implementation to the 64x64 one so that the two can be
 | 
						|
compared side-by-side as nice stand-alone units.
 | 
						|
 | 
						|
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
 | 
						|
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
 | 
						|
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
 | 
						|
---
 | 
						|
 arch/x86/crypto/poly1305-avx2-x86_64.S |  20 +--
 | 
						|
 arch/x86/crypto/poly1305_glue.c        | 215 +++++++++++++++++++++++--
 | 
						|
 crypto/adiantum.c                      |   4 +-
 | 
						|
 crypto/nhpoly1305.c                    |   2 +-
 | 
						|
 crypto/poly1305_generic.c              |  25 ++-
 | 
						|
 include/crypto/internal/poly1305.h     |  45 ++----
 | 
						|
 include/crypto/nhpoly1305.h            |   4 +-
 | 
						|
 include/crypto/poly1305.h              |  26 ++-
 | 
						|
 lib/crypto/Makefile                    |   4 +-
 | 
						|
 lib/crypto/poly1305-donna32.c          | 204 +++++++++++++++++++++++
 | 
						|
 lib/crypto/poly1305-donna64.c          | 185 +++++++++++++++++++++
 | 
						|
 lib/crypto/poly1305.c                  | 169 +------------------
 | 
						|
 12 files changed, 675 insertions(+), 228 deletions(-)
 | 
						|
 create mode 100644 lib/crypto/poly1305-donna32.c
 | 
						|
 create mode 100644 lib/crypto/poly1305-donna64.c
 | 
						|
 | 
						|
--- a/arch/x86/crypto/poly1305-avx2-x86_64.S
 | 
						|
+++ b/arch/x86/crypto/poly1305-avx2-x86_64.S
 | 
						|
@@ -34,16 +34,16 @@ ORMASK:	.octa 0x000000000100000000000000
 | 
						|
 #define u2 0x08(%r8)
 | 
						|
 #define u3 0x0c(%r8)
 | 
						|
 #define u4 0x10(%r8)
 | 
						|
-#define w0 0x14(%r8)
 | 
						|
-#define w1 0x18(%r8)
 | 
						|
-#define w2 0x1c(%r8)
 | 
						|
-#define w3 0x20(%r8)
 | 
						|
-#define w4 0x24(%r8)
 | 
						|
-#define y0 0x28(%r8)
 | 
						|
-#define y1 0x2c(%r8)
 | 
						|
-#define y2 0x30(%r8)
 | 
						|
-#define y3 0x34(%r8)
 | 
						|
-#define y4 0x38(%r8)
 | 
						|
+#define w0 0x18(%r8)
 | 
						|
+#define w1 0x1c(%r8)
 | 
						|
+#define w2 0x20(%r8)
 | 
						|
+#define w3 0x24(%r8)
 | 
						|
+#define w4 0x28(%r8)
 | 
						|
+#define y0 0x30(%r8)
 | 
						|
+#define y1 0x34(%r8)
 | 
						|
+#define y2 0x38(%r8)
 | 
						|
+#define y3 0x3c(%r8)
 | 
						|
+#define y4 0x40(%r8)
 | 
						|
 #define m %rsi
 | 
						|
 #define hc0 %ymm0
 | 
						|
 #define hc1 %ymm1
 | 
						|
--- a/arch/x86/crypto/poly1305_glue.c
 | 
						|
+++ b/arch/x86/crypto/poly1305_glue.c
 | 
						|
@@ -25,6 +25,21 @@ asmlinkage void poly1305_4block_avx2(u32
 | 
						|
 static __ro_after_init DEFINE_STATIC_KEY_FALSE(poly1305_use_simd);
 | 
						|
 static __ro_after_init DEFINE_STATIC_KEY_FALSE(poly1305_use_avx2);
 | 
						|
 
 | 
						|
+static inline u64 mlt(u64 a, u64 b)
 | 
						|
+{
 | 
						|
+	return a * b;
 | 
						|
+}
 | 
						|
+
 | 
						|
+static inline u32 sr(u64 v, u_char n)
 | 
						|
+{
 | 
						|
+	return v >> n;
 | 
						|
+}
 | 
						|
+
 | 
						|
+static inline u32 and(u32 v, u32 mask)
 | 
						|
+{
 | 
						|
+	return v & mask;
 | 
						|
+}
 | 
						|
+
 | 
						|
 static void poly1305_simd_mult(u32 *a, const u32 *b)
 | 
						|
 {
 | 
						|
 	u8 m[POLY1305_BLOCK_SIZE];
 | 
						|
@@ -36,6 +51,168 @@ static void poly1305_simd_mult(u32 *a, c
 | 
						|
 	poly1305_block_sse2(a, m, b, 1);
 | 
						|
 }
 | 
						|
 
 | 
						|
+static void poly1305_integer_setkey(struct poly1305_key *key, const u8 *raw_key)
 | 
						|
+{
 | 
						|
+	/* r &= 0xffffffc0ffffffc0ffffffc0fffffff */
 | 
						|
+	key->r[0] = (get_unaligned_le32(raw_key +  0) >> 0) & 0x3ffffff;
 | 
						|
+	key->r[1] = (get_unaligned_le32(raw_key +  3) >> 2) & 0x3ffff03;
 | 
						|
+	key->r[2] = (get_unaligned_le32(raw_key +  6) >> 4) & 0x3ffc0ff;
 | 
						|
+	key->r[3] = (get_unaligned_le32(raw_key +  9) >> 6) & 0x3f03fff;
 | 
						|
+	key->r[4] = (get_unaligned_le32(raw_key + 12) >> 8) & 0x00fffff;
 | 
						|
+}
 | 
						|
+
 | 
						|
+static void poly1305_integer_blocks(struct poly1305_state *state,
 | 
						|
+				    const struct poly1305_key *key,
 | 
						|
+				    const void *src,
 | 
						|
+				    unsigned int nblocks, u32 hibit)
 | 
						|
+{
 | 
						|
+	u32 r0, r1, r2, r3, r4;
 | 
						|
+	u32 s1, s2, s3, s4;
 | 
						|
+	u32 h0, h1, h2, h3, h4;
 | 
						|
+	u64 d0, d1, d2, d3, d4;
 | 
						|
+
 | 
						|
+	if (!nblocks)
 | 
						|
+		return;
 | 
						|
+
 | 
						|
+	r0 = key->r[0];
 | 
						|
+	r1 = key->r[1];
 | 
						|
+	r2 = key->r[2];
 | 
						|
+	r3 = key->r[3];
 | 
						|
+	r4 = key->r[4];
 | 
						|
+
 | 
						|
+	s1 = r1 * 5;
 | 
						|
+	s2 = r2 * 5;
 | 
						|
+	s3 = r3 * 5;
 | 
						|
+	s4 = r4 * 5;
 | 
						|
+
 | 
						|
+	h0 = state->h[0];
 | 
						|
+	h1 = state->h[1];
 | 
						|
+	h2 = state->h[2];
 | 
						|
+	h3 = state->h[3];
 | 
						|
+	h4 = state->h[4];
 | 
						|
+
 | 
						|
+	do {
 | 
						|
+		/* h += m[i] */
 | 
						|
+		h0 += (get_unaligned_le32(src +  0) >> 0) & 0x3ffffff;
 | 
						|
+		h1 += (get_unaligned_le32(src +  3) >> 2) & 0x3ffffff;
 | 
						|
+		h2 += (get_unaligned_le32(src +  6) >> 4) & 0x3ffffff;
 | 
						|
+		h3 += (get_unaligned_le32(src +  9) >> 6) & 0x3ffffff;
 | 
						|
+		h4 += (get_unaligned_le32(src + 12) >> 8) | (hibit << 24);
 | 
						|
+
 | 
						|
+		/* h *= r */
 | 
						|
+		d0 = mlt(h0, r0) + mlt(h1, s4) + mlt(h2, s3) +
 | 
						|
+		     mlt(h3, s2) + mlt(h4, s1);
 | 
						|
+		d1 = mlt(h0, r1) + mlt(h1, r0) + mlt(h2, s4) +
 | 
						|
+		     mlt(h3, s3) + mlt(h4, s2);
 | 
						|
+		d2 = mlt(h0, r2) + mlt(h1, r1) + mlt(h2, r0) +
 | 
						|
+		     mlt(h3, s4) + mlt(h4, s3);
 | 
						|
+		d3 = mlt(h0, r3) + mlt(h1, r2) + mlt(h2, r1) +
 | 
						|
+		     mlt(h3, r0) + mlt(h4, s4);
 | 
						|
+		d4 = mlt(h0, r4) + mlt(h1, r3) + mlt(h2, r2) +
 | 
						|
+		     mlt(h3, r1) + mlt(h4, r0);
 | 
						|
+
 | 
						|
+		/* (partial) h %= p */
 | 
						|
+		d1 += sr(d0, 26);     h0 = and(d0, 0x3ffffff);
 | 
						|
+		d2 += sr(d1, 26);     h1 = and(d1, 0x3ffffff);
 | 
						|
+		d3 += sr(d2, 26);     h2 = and(d2, 0x3ffffff);
 | 
						|
+		d4 += sr(d3, 26);     h3 = and(d3, 0x3ffffff);
 | 
						|
+		h0 += sr(d4, 26) * 5; h4 = and(d4, 0x3ffffff);
 | 
						|
+		h1 += h0 >> 26;       h0 = h0 & 0x3ffffff;
 | 
						|
+
 | 
						|
+		src += POLY1305_BLOCK_SIZE;
 | 
						|
+	} while (--nblocks);
 | 
						|
+
 | 
						|
+	state->h[0] = h0;
 | 
						|
+	state->h[1] = h1;
 | 
						|
+	state->h[2] = h2;
 | 
						|
+	state->h[3] = h3;
 | 
						|
+	state->h[4] = h4;
 | 
						|
+}
 | 
						|
+
 | 
						|
+static void poly1305_integer_emit(const struct poly1305_state *state, void *dst)
 | 
						|
+{
 | 
						|
+	u32 h0, h1, h2, h3, h4;
 | 
						|
+	u32 g0, g1, g2, g3, g4;
 | 
						|
+	u32 mask;
 | 
						|
+
 | 
						|
+	/* fully carry h */
 | 
						|
+	h0 = state->h[0];
 | 
						|
+	h1 = state->h[1];
 | 
						|
+	h2 = state->h[2];
 | 
						|
+	h3 = state->h[3];
 | 
						|
+	h4 = state->h[4];
 | 
						|
+
 | 
						|
+	h2 += (h1 >> 26);     h1 = h1 & 0x3ffffff;
 | 
						|
+	h3 += (h2 >> 26);     h2 = h2 & 0x3ffffff;
 | 
						|
+	h4 += (h3 >> 26);     h3 = h3 & 0x3ffffff;
 | 
						|
+	h0 += (h4 >> 26) * 5; h4 = h4 & 0x3ffffff;
 | 
						|
+	h1 += (h0 >> 26);     h0 = h0 & 0x3ffffff;
 | 
						|
+
 | 
						|
+	/* compute h + -p */
 | 
						|
+	g0 = h0 + 5;
 | 
						|
+	g1 = h1 + (g0 >> 26);             g0 &= 0x3ffffff;
 | 
						|
+	g2 = h2 + (g1 >> 26);             g1 &= 0x3ffffff;
 | 
						|
+	g3 = h3 + (g2 >> 26);             g2 &= 0x3ffffff;
 | 
						|
+	g4 = h4 + (g3 >> 26) - (1 << 26); g3 &= 0x3ffffff;
 | 
						|
+
 | 
						|
+	/* select h if h < p, or h + -p if h >= p */
 | 
						|
+	mask = (g4 >> ((sizeof(u32) * 8) - 1)) - 1;
 | 
						|
+	g0 &= mask;
 | 
						|
+	g1 &= mask;
 | 
						|
+	g2 &= mask;
 | 
						|
+	g3 &= mask;
 | 
						|
+	g4 &= mask;
 | 
						|
+	mask = ~mask;
 | 
						|
+	h0 = (h0 & mask) | g0;
 | 
						|
+	h1 = (h1 & mask) | g1;
 | 
						|
+	h2 = (h2 & mask) | g2;
 | 
						|
+	h3 = (h3 & mask) | g3;
 | 
						|
+	h4 = (h4 & mask) | g4;
 | 
						|
+
 | 
						|
+	/* h = h % (2^128) */
 | 
						|
+	put_unaligned_le32((h0 >>  0) | (h1 << 26), dst +  0);
 | 
						|
+	put_unaligned_le32((h1 >>  6) | (h2 << 20), dst +  4);
 | 
						|
+	put_unaligned_le32((h2 >> 12) | (h3 << 14), dst +  8);
 | 
						|
+	put_unaligned_le32((h3 >> 18) | (h4 <<  8), dst + 12);
 | 
						|
+}
 | 
						|
+
 | 
						|
+void poly1305_init_arch(struct poly1305_desc_ctx *desc, const u8 *key)
 | 
						|
+{
 | 
						|
+	poly1305_integer_setkey(desc->opaque_r, key);
 | 
						|
+	desc->s[0] = get_unaligned_le32(key + 16);
 | 
						|
+	desc->s[1] = get_unaligned_le32(key + 20);
 | 
						|
+	desc->s[2] = get_unaligned_le32(key + 24);
 | 
						|
+	desc->s[3] = get_unaligned_le32(key + 28);
 | 
						|
+	poly1305_core_init(&desc->h);
 | 
						|
+	desc->buflen = 0;
 | 
						|
+	desc->sset = true;
 | 
						|
+	desc->rset = 1;
 | 
						|
+}
 | 
						|
+EXPORT_SYMBOL_GPL(poly1305_init_arch);
 | 
						|
+
 | 
						|
+static unsigned int crypto_poly1305_setdesckey(struct poly1305_desc_ctx *dctx,
 | 
						|
+					       const u8 *src, unsigned int srclen)
 | 
						|
+{
 | 
						|
+	if (!dctx->sset) {
 | 
						|
+		if (!dctx->rset && srclen >= POLY1305_BLOCK_SIZE) {
 | 
						|
+			poly1305_integer_setkey(dctx->r, src);
 | 
						|
+			src += POLY1305_BLOCK_SIZE;
 | 
						|
+			srclen -= POLY1305_BLOCK_SIZE;
 | 
						|
+			dctx->rset = 1;
 | 
						|
+		}
 | 
						|
+		if (srclen >= POLY1305_BLOCK_SIZE) {
 | 
						|
+			dctx->s[0] = get_unaligned_le32(src +  0);
 | 
						|
+			dctx->s[1] = get_unaligned_le32(src +  4);
 | 
						|
+			dctx->s[2] = get_unaligned_le32(src +  8);
 | 
						|
+			dctx->s[3] = get_unaligned_le32(src + 12);
 | 
						|
+			src += POLY1305_BLOCK_SIZE;
 | 
						|
+			srclen -= POLY1305_BLOCK_SIZE;
 | 
						|
+			dctx->sset = true;
 | 
						|
+		}
 | 
						|
+	}
 | 
						|
+	return srclen;
 | 
						|
+}
 | 
						|
+
 | 
						|
 static unsigned int poly1305_scalar_blocks(struct poly1305_desc_ctx *dctx,
 | 
						|
 					   const u8 *src, unsigned int srclen)
 | 
						|
 {
 | 
						|
@@ -47,8 +224,8 @@ static unsigned int poly1305_scalar_bloc
 | 
						|
 		srclen = datalen;
 | 
						|
 	}
 | 
						|
 	if (srclen >= POLY1305_BLOCK_SIZE) {
 | 
						|
-		poly1305_core_blocks(&dctx->h, dctx->r, src,
 | 
						|
-				     srclen / POLY1305_BLOCK_SIZE, 1);
 | 
						|
+		poly1305_integer_blocks(&dctx->h, dctx->opaque_r, src,
 | 
						|
+					srclen / POLY1305_BLOCK_SIZE, 1);
 | 
						|
 		srclen %= POLY1305_BLOCK_SIZE;
 | 
						|
 	}
 | 
						|
 	return srclen;
 | 
						|
@@ -105,12 +282,6 @@ static unsigned int poly1305_simd_blocks
 | 
						|
 	return srclen;
 | 
						|
 }
 | 
						|
 
 | 
						|
-void poly1305_init_arch(struct poly1305_desc_ctx *desc, const u8 *key)
 | 
						|
-{
 | 
						|
-	poly1305_init_generic(desc, key);
 | 
						|
-}
 | 
						|
-EXPORT_SYMBOL(poly1305_init_arch);
 | 
						|
-
 | 
						|
 void poly1305_update_arch(struct poly1305_desc_ctx *dctx, const u8 *src,
 | 
						|
 			  unsigned int srclen)
 | 
						|
 {
 | 
						|
@@ -158,9 +329,31 @@ void poly1305_update_arch(struct poly130
 | 
						|
 }
 | 
						|
 EXPORT_SYMBOL(poly1305_update_arch);
 | 
						|
 
 | 
						|
-void poly1305_final_arch(struct poly1305_desc_ctx *desc, u8 *digest)
 | 
						|
+void poly1305_final_arch(struct poly1305_desc_ctx *desc, u8 *dst)
 | 
						|
 {
 | 
						|
-	poly1305_final_generic(desc, digest);
 | 
						|
+	__le32 digest[4];
 | 
						|
+	u64 f = 0;
 | 
						|
+
 | 
						|
+	if (unlikely(desc->buflen)) {
 | 
						|
+		desc->buf[desc->buflen++] = 1;
 | 
						|
+		memset(desc->buf + desc->buflen, 0,
 | 
						|
+		       POLY1305_BLOCK_SIZE - desc->buflen);
 | 
						|
+		poly1305_integer_blocks(&desc->h, desc->opaque_r, desc->buf, 1, 0);
 | 
						|
+	}
 | 
						|
+
 | 
						|
+	poly1305_integer_emit(&desc->h, digest);
 | 
						|
+
 | 
						|
+	/* mac = (h + s) % (2^128) */
 | 
						|
+	f = (f >> 32) + le32_to_cpu(digest[0]) + desc->s[0];
 | 
						|
+	put_unaligned_le32(f, dst + 0);
 | 
						|
+	f = (f >> 32) + le32_to_cpu(digest[1]) + desc->s[1];
 | 
						|
+	put_unaligned_le32(f, dst + 4);
 | 
						|
+	f = (f >> 32) + le32_to_cpu(digest[2]) + desc->s[2];
 | 
						|
+	put_unaligned_le32(f, dst + 8);
 | 
						|
+	f = (f >> 32) + le32_to_cpu(digest[3]) + desc->s[3];
 | 
						|
+	put_unaligned_le32(f, dst + 12);
 | 
						|
+
 | 
						|
+	*desc = (struct poly1305_desc_ctx){};
 | 
						|
 }
 | 
						|
 EXPORT_SYMBOL(poly1305_final_arch);
 | 
						|
 
 | 
						|
@@ -183,7 +376,7 @@ static int crypto_poly1305_final(struct
 | 
						|
 	if (unlikely(!dctx->sset))
 | 
						|
 		return -ENOKEY;
 | 
						|
 
 | 
						|
-	poly1305_final_generic(dctx, dst);
 | 
						|
+	poly1305_final_arch(dctx, dst);
 | 
						|
 	return 0;
 | 
						|
 }
 | 
						|
 
 | 
						|
--- a/crypto/adiantum.c
 | 
						|
+++ b/crypto/adiantum.c
 | 
						|
@@ -72,7 +72,7 @@ struct adiantum_tfm_ctx {
 | 
						|
 	struct crypto_skcipher *streamcipher;
 | 
						|
 	struct crypto_cipher *blockcipher;
 | 
						|
 	struct crypto_shash *hash;
 | 
						|
-	struct poly1305_key header_hash_key;
 | 
						|
+	struct poly1305_core_key header_hash_key;
 | 
						|
 };
 | 
						|
 
 | 
						|
 struct adiantum_request_ctx {
 | 
						|
@@ -249,7 +249,7 @@ static void adiantum_hash_header(struct
 | 
						|
 	poly1305_core_blocks(&state, &tctx->header_hash_key, req->iv,
 | 
						|
 			     TWEAK_SIZE / POLY1305_BLOCK_SIZE, 1);
 | 
						|
 
 | 
						|
-	poly1305_core_emit(&state, &rctx->header_hash);
 | 
						|
+	poly1305_core_emit(&state, NULL, &rctx->header_hash);
 | 
						|
 }
 | 
						|
 
 | 
						|
 /* Hash the left-hand part (the "bulk") of the message using NHPoly1305 */
 | 
						|
--- a/crypto/nhpoly1305.c
 | 
						|
+++ b/crypto/nhpoly1305.c
 | 
						|
@@ -210,7 +210,7 @@ int crypto_nhpoly1305_final_helper(struc
 | 
						|
 	if (state->nh_remaining)
 | 
						|
 		process_nh_hash_value(state, key);
 | 
						|
 
 | 
						|
-	poly1305_core_emit(&state->poly_state, dst);
 | 
						|
+	poly1305_core_emit(&state->poly_state, NULL, dst);
 | 
						|
 	return 0;
 | 
						|
 }
 | 
						|
 EXPORT_SYMBOL(crypto_nhpoly1305_final_helper);
 | 
						|
--- a/crypto/poly1305_generic.c
 | 
						|
+++ b/crypto/poly1305_generic.c
 | 
						|
@@ -31,6 +31,29 @@ static int crypto_poly1305_init(struct s
 | 
						|
 	return 0;
 | 
						|
 }
 | 
						|
 
 | 
						|
+static unsigned int crypto_poly1305_setdesckey(struct poly1305_desc_ctx *dctx,
 | 
						|
+					       const u8 *src, unsigned int srclen)
 | 
						|
+{
 | 
						|
+	if (!dctx->sset) {
 | 
						|
+		if (!dctx->rset && srclen >= POLY1305_BLOCK_SIZE) {
 | 
						|
+			poly1305_core_setkey(&dctx->core_r, src);
 | 
						|
+			src += POLY1305_BLOCK_SIZE;
 | 
						|
+			srclen -= POLY1305_BLOCK_SIZE;
 | 
						|
+			dctx->rset = 2;
 | 
						|
+		}
 | 
						|
+		if (srclen >= POLY1305_BLOCK_SIZE) {
 | 
						|
+			dctx->s[0] = get_unaligned_le32(src +  0);
 | 
						|
+			dctx->s[1] = get_unaligned_le32(src +  4);
 | 
						|
+			dctx->s[2] = get_unaligned_le32(src +  8);
 | 
						|
+			dctx->s[3] = get_unaligned_le32(src + 12);
 | 
						|
+			src += POLY1305_BLOCK_SIZE;
 | 
						|
+			srclen -= POLY1305_BLOCK_SIZE;
 | 
						|
+			dctx->sset = true;
 | 
						|
+		}
 | 
						|
+	}
 | 
						|
+	return srclen;
 | 
						|
+}
 | 
						|
+
 | 
						|
 static void poly1305_blocks(struct poly1305_desc_ctx *dctx, const u8 *src,
 | 
						|
 			    unsigned int srclen)
 | 
						|
 {
 | 
						|
@@ -42,7 +65,7 @@ static void poly1305_blocks(struct poly1
 | 
						|
 		srclen = datalen;
 | 
						|
 	}
 | 
						|
 
 | 
						|
-	poly1305_core_blocks(&dctx->h, dctx->r, src,
 | 
						|
+	poly1305_core_blocks(&dctx->h, &dctx->core_r, src,
 | 
						|
 			     srclen / POLY1305_BLOCK_SIZE, 1);
 | 
						|
 }
 | 
						|
 
 | 
						|
--- a/include/crypto/internal/poly1305.h
 | 
						|
+++ b/include/crypto/internal/poly1305.h
 | 
						|
@@ -11,48 +11,23 @@
 | 
						|
 #include <crypto/poly1305.h>
 | 
						|
 
 | 
						|
 /*
 | 
						|
- * Poly1305 core functions.  These implement the ε-almost-∆-universal hash
 | 
						|
- * function underlying the Poly1305 MAC, i.e. they don't add an encrypted nonce
 | 
						|
- * ("s key") at the end.  They also only support block-aligned inputs.
 | 
						|
+ * Poly1305 core functions.  These only accept whole blocks; the caller must
 | 
						|
+ * handle any needed block buffering and padding.  'hibit' must be 1 for any
 | 
						|
+ * full blocks, or 0 for the final block if it had to be padded.  If 'nonce' is
 | 
						|
+ * non-NULL, then it's added at the end to compute the Poly1305 MAC.  Otherwise,
 | 
						|
+ * only the ε-almost-∆-universal hash function (not the full MAC) is computed.
 | 
						|
  */
 | 
						|
-void poly1305_core_setkey(struct poly1305_key *key, const u8 *raw_key);
 | 
						|
+
 | 
						|
+void poly1305_core_setkey(struct poly1305_core_key *key, const u8 *raw_key);
 | 
						|
 static inline void poly1305_core_init(struct poly1305_state *state)
 | 
						|
 {
 | 
						|
 	*state = (struct poly1305_state){};
 | 
						|
 }
 | 
						|
 
 | 
						|
 void poly1305_core_blocks(struct poly1305_state *state,
 | 
						|
-			  const struct poly1305_key *key, const void *src,
 | 
						|
+			  const struct poly1305_core_key *key, const void *src,
 | 
						|
 			  unsigned int nblocks, u32 hibit);
 | 
						|
-void poly1305_core_emit(const struct poly1305_state *state, void *dst);
 | 
						|
-
 | 
						|
-/*
 | 
						|
- * Poly1305 requires a unique key for each tag, which implies that we can't set
 | 
						|
- * it on the tfm that gets accessed by multiple users simultaneously. Instead we
 | 
						|
- * expect the key as the first 32 bytes in the update() call.
 | 
						|
- */
 | 
						|
-static inline
 | 
						|
-unsigned int crypto_poly1305_setdesckey(struct poly1305_desc_ctx *dctx,
 | 
						|
-					const u8 *src, unsigned int srclen)
 | 
						|
-{
 | 
						|
-	if (!dctx->sset) {
 | 
						|
-		if (!dctx->rset && srclen >= POLY1305_BLOCK_SIZE) {
 | 
						|
-			poly1305_core_setkey(dctx->r, src);
 | 
						|
-			src += POLY1305_BLOCK_SIZE;
 | 
						|
-			srclen -= POLY1305_BLOCK_SIZE;
 | 
						|
-			dctx->rset = 1;
 | 
						|
-		}
 | 
						|
-		if (srclen >= POLY1305_BLOCK_SIZE) {
 | 
						|
-			dctx->s[0] = get_unaligned_le32(src +  0);
 | 
						|
-			dctx->s[1] = get_unaligned_le32(src +  4);
 | 
						|
-			dctx->s[2] = get_unaligned_le32(src +  8);
 | 
						|
-			dctx->s[3] = get_unaligned_le32(src + 12);
 | 
						|
-			src += POLY1305_BLOCK_SIZE;
 | 
						|
-			srclen -= POLY1305_BLOCK_SIZE;
 | 
						|
-			dctx->sset = true;
 | 
						|
-		}
 | 
						|
-	}
 | 
						|
-	return srclen;
 | 
						|
-}
 | 
						|
+void poly1305_core_emit(const struct poly1305_state *state, const u32 nonce[4],
 | 
						|
+			void *dst);
 | 
						|
 
 | 
						|
 #endif
 | 
						|
--- a/include/crypto/nhpoly1305.h
 | 
						|
+++ b/include/crypto/nhpoly1305.h
 | 
						|
@@ -7,7 +7,7 @@
 | 
						|
 #define _NHPOLY1305_H
 | 
						|
 
 | 
						|
 #include <crypto/hash.h>
 | 
						|
-#include <crypto/poly1305.h>
 | 
						|
+#include <crypto/internal/poly1305.h>
 | 
						|
 
 | 
						|
 /* NH parameterization: */
 | 
						|
 
 | 
						|
@@ -33,7 +33,7 @@
 | 
						|
 #define NHPOLY1305_KEY_SIZE	(POLY1305_BLOCK_SIZE + NH_KEY_BYTES)
 | 
						|
 
 | 
						|
 struct nhpoly1305_key {
 | 
						|
-	struct poly1305_key poly_key;
 | 
						|
+	struct poly1305_core_key poly_key;
 | 
						|
 	u32 nh_key[NH_KEY_WORDS];
 | 
						|
 };
 | 
						|
 
 | 
						|
--- a/include/crypto/poly1305.h
 | 
						|
+++ b/include/crypto/poly1305.h
 | 
						|
@@ -13,12 +13,29 @@
 | 
						|
 #define POLY1305_KEY_SIZE	32
 | 
						|
 #define POLY1305_DIGEST_SIZE	16
 | 
						|
 
 | 
						|
+/* The poly1305_key and poly1305_state types are mostly opaque and
 | 
						|
+ * implementation-defined. Limbs might be in base 2^64 or base 2^26, or
 | 
						|
+ * different yet. The union type provided keeps these 64-bit aligned for the
 | 
						|
+ * case in which this is implemented using 64x64 multiplies.
 | 
						|
+ */
 | 
						|
+
 | 
						|
 struct poly1305_key {
 | 
						|
-	u32 r[5];	/* key, base 2^26 */
 | 
						|
+	union {
 | 
						|
+		u32 r[5];
 | 
						|
+		u64 r64[3];
 | 
						|
+	};
 | 
						|
+};
 | 
						|
+
 | 
						|
+struct poly1305_core_key {
 | 
						|
+	struct poly1305_key key;
 | 
						|
+	struct poly1305_key precomputed_s;
 | 
						|
 };
 | 
						|
 
 | 
						|
 struct poly1305_state {
 | 
						|
-	u32 h[5];	/* accumulator, base 2^26 */
 | 
						|
+	union {
 | 
						|
+		u32 h[5];
 | 
						|
+		u64 h64[3];
 | 
						|
+	};
 | 
						|
 };
 | 
						|
 
 | 
						|
 struct poly1305_desc_ctx {
 | 
						|
@@ -35,7 +52,10 @@ struct poly1305_desc_ctx {
 | 
						|
 	/* accumulator */
 | 
						|
 	struct poly1305_state h;
 | 
						|
 	/* key */
 | 
						|
-	struct poly1305_key r[CONFIG_CRYPTO_LIB_POLY1305_RSIZE];
 | 
						|
+	union {
 | 
						|
+		struct poly1305_key opaque_r[CONFIG_CRYPTO_LIB_POLY1305_RSIZE];
 | 
						|
+		struct poly1305_core_key core_r;
 | 
						|
+	};
 | 
						|
 };
 | 
						|
 
 | 
						|
 void poly1305_init_arch(struct poly1305_desc_ctx *desc, const u8 *key);
 | 
						|
--- a/lib/crypto/Makefile
 | 
						|
+++ b/lib/crypto/Makefile
 | 
						|
@@ -28,7 +28,9 @@ obj-$(CONFIG_CRYPTO_LIB_DES)			+= libdes
 | 
						|
 libdes-y					:= des.o
 | 
						|
 
 | 
						|
 obj-$(CONFIG_CRYPTO_LIB_POLY1305_GENERIC)	+= libpoly1305.o
 | 
						|
-libpoly1305-y					:= poly1305.o
 | 
						|
+libpoly1305-y					:= poly1305-donna32.o
 | 
						|
+libpoly1305-$(CONFIG_ARCH_SUPPORTS_INT128)	:= poly1305-donna64.o
 | 
						|
+libpoly1305-y					+= poly1305.o
 | 
						|
 
 | 
						|
 obj-$(CONFIG_CRYPTO_LIB_SHA256)			+= libsha256.o
 | 
						|
 libsha256-y					:= sha256.o
 | 
						|
--- /dev/null
 | 
						|
+++ b/lib/crypto/poly1305-donna32.c
 | 
						|
@@ -0,0 +1,204 @@
 | 
						|
+// SPDX-License-Identifier: GPL-2.0 OR MIT
 | 
						|
+/*
 | 
						|
+ * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
 | 
						|
+ *
 | 
						|
+ * This is based in part on Andrew Moon's poly1305-donna, which is in the
 | 
						|
+ * public domain.
 | 
						|
+ */
 | 
						|
+
 | 
						|
+#include <linux/kernel.h>
 | 
						|
+#include <asm/unaligned.h>
 | 
						|
+#include <crypto/internal/poly1305.h>
 | 
						|
+
 | 
						|
+void poly1305_core_setkey(struct poly1305_core_key *key, const u8 raw_key[16])
 | 
						|
+{
 | 
						|
+	/* r &= 0xffffffc0ffffffc0ffffffc0fffffff */
 | 
						|
+	key->key.r[0] = (get_unaligned_le32(&raw_key[0])) & 0x3ffffff;
 | 
						|
+	key->key.r[1] = (get_unaligned_le32(&raw_key[3]) >> 2) & 0x3ffff03;
 | 
						|
+	key->key.r[2] = (get_unaligned_le32(&raw_key[6]) >> 4) & 0x3ffc0ff;
 | 
						|
+	key->key.r[3] = (get_unaligned_le32(&raw_key[9]) >> 6) & 0x3f03fff;
 | 
						|
+	key->key.r[4] = (get_unaligned_le32(&raw_key[12]) >> 8) & 0x00fffff;
 | 
						|
+
 | 
						|
+	/* s = 5*r */
 | 
						|
+	key->precomputed_s.r[0] = key->key.r[1] * 5;
 | 
						|
+	key->precomputed_s.r[1] = key->key.r[2] * 5;
 | 
						|
+	key->precomputed_s.r[2] = key->key.r[3] * 5;
 | 
						|
+	key->precomputed_s.r[3] = key->key.r[4] * 5;
 | 
						|
+}
 | 
						|
+EXPORT_SYMBOL(poly1305_core_setkey);
 | 
						|
+
 | 
						|
+void poly1305_core_blocks(struct poly1305_state *state,
 | 
						|
+			  const struct poly1305_core_key *key, const void *src,
 | 
						|
+			  unsigned int nblocks, u32 hibit)
 | 
						|
+{
 | 
						|
+	const u8 *input = src;
 | 
						|
+	u32 r0, r1, r2, r3, r4;
 | 
						|
+	u32 s1, s2, s3, s4;
 | 
						|
+	u32 h0, h1, h2, h3, h4;
 | 
						|
+	u64 d0, d1, d2, d3, d4;
 | 
						|
+	u32 c;
 | 
						|
+
 | 
						|
+	if (!nblocks)
 | 
						|
+		return;
 | 
						|
+
 | 
						|
+	hibit <<= 24;
 | 
						|
+
 | 
						|
+	r0 = key->key.r[0];
 | 
						|
+	r1 = key->key.r[1];
 | 
						|
+	r2 = key->key.r[2];
 | 
						|
+	r3 = key->key.r[3];
 | 
						|
+	r4 = key->key.r[4];
 | 
						|
+
 | 
						|
+	s1 = key->precomputed_s.r[0];
 | 
						|
+	s2 = key->precomputed_s.r[1];
 | 
						|
+	s3 = key->precomputed_s.r[2];
 | 
						|
+	s4 = key->precomputed_s.r[3];
 | 
						|
+
 | 
						|
+	h0 = state->h[0];
 | 
						|
+	h1 = state->h[1];
 | 
						|
+	h2 = state->h[2];
 | 
						|
+	h3 = state->h[3];
 | 
						|
+	h4 = state->h[4];
 | 
						|
+
 | 
						|
+	do {
 | 
						|
+		/* h += m[i] */
 | 
						|
+		h0 += (get_unaligned_le32(&input[0])) & 0x3ffffff;
 | 
						|
+		h1 += (get_unaligned_le32(&input[3]) >> 2) & 0x3ffffff;
 | 
						|
+		h2 += (get_unaligned_le32(&input[6]) >> 4) & 0x3ffffff;
 | 
						|
+		h3 += (get_unaligned_le32(&input[9]) >> 6) & 0x3ffffff;
 | 
						|
+		h4 += (get_unaligned_le32(&input[12]) >> 8) | hibit;
 | 
						|
+
 | 
						|
+		/* h *= r */
 | 
						|
+		d0 = ((u64)h0 * r0) + ((u64)h1 * s4) +
 | 
						|
+		     ((u64)h2 * s3) + ((u64)h3 * s2) +
 | 
						|
+		     ((u64)h4 * s1);
 | 
						|
+		d1 = ((u64)h0 * r1) + ((u64)h1 * r0) +
 | 
						|
+		     ((u64)h2 * s4) + ((u64)h3 * s3) +
 | 
						|
+		     ((u64)h4 * s2);
 | 
						|
+		d2 = ((u64)h0 * r2) + ((u64)h1 * r1) +
 | 
						|
+		     ((u64)h2 * r0) + ((u64)h3 * s4) +
 | 
						|
+		     ((u64)h4 * s3);
 | 
						|
+		d3 = ((u64)h0 * r3) + ((u64)h1 * r2) +
 | 
						|
+		     ((u64)h2 * r1) + ((u64)h3 * r0) +
 | 
						|
+		     ((u64)h4 * s4);
 | 
						|
+		d4 = ((u64)h0 * r4) + ((u64)h1 * r3) +
 | 
						|
+		     ((u64)h2 * r2) + ((u64)h3 * r1) +
 | 
						|
+		     ((u64)h4 * r0);
 | 
						|
+
 | 
						|
+		/* (partial) h %= p */
 | 
						|
+		c = (u32)(d0 >> 26);
 | 
						|
+		h0 = (u32)d0 & 0x3ffffff;
 | 
						|
+		d1 += c;
 | 
						|
+		c = (u32)(d1 >> 26);
 | 
						|
+		h1 = (u32)d1 & 0x3ffffff;
 | 
						|
+		d2 += c;
 | 
						|
+		c = (u32)(d2 >> 26);
 | 
						|
+		h2 = (u32)d2 & 0x3ffffff;
 | 
						|
+		d3 += c;
 | 
						|
+		c = (u32)(d3 >> 26);
 | 
						|
+		h3 = (u32)d3 & 0x3ffffff;
 | 
						|
+		d4 += c;
 | 
						|
+		c = (u32)(d4 >> 26);
 | 
						|
+		h4 = (u32)d4 & 0x3ffffff;
 | 
						|
+		h0 += c * 5;
 | 
						|
+		c = (h0 >> 26);
 | 
						|
+		h0 = h0 & 0x3ffffff;
 | 
						|
+		h1 += c;
 | 
						|
+
 | 
						|
+		input += POLY1305_BLOCK_SIZE;
 | 
						|
+	} while (--nblocks);
 | 
						|
+
 | 
						|
+	state->h[0] = h0;
 | 
						|
+	state->h[1] = h1;
 | 
						|
+	state->h[2] = h2;
 | 
						|
+	state->h[3] = h3;
 | 
						|
+	state->h[4] = h4;
 | 
						|
+}
 | 
						|
+EXPORT_SYMBOL(poly1305_core_blocks);
 | 
						|
+
 | 
						|
+void poly1305_core_emit(const struct poly1305_state *state, const u32 nonce[4],
 | 
						|
+			void *dst)
 | 
						|
+{
 | 
						|
+	u8 *mac = dst;
 | 
						|
+	u32 h0, h1, h2, h3, h4, c;
 | 
						|
+	u32 g0, g1, g2, g3, g4;
 | 
						|
+	u64 f;
 | 
						|
+	u32 mask;
 | 
						|
+
 | 
						|
+	/* fully carry h */
 | 
						|
+	h0 = state->h[0];
 | 
						|
+	h1 = state->h[1];
 | 
						|
+	h2 = state->h[2];
 | 
						|
+	h3 = state->h[3];
 | 
						|
+	h4 = state->h[4];
 | 
						|
+
 | 
						|
+	c = h1 >> 26;
 | 
						|
+	h1 = h1 & 0x3ffffff;
 | 
						|
+	h2 += c;
 | 
						|
+	c = h2 >> 26;
 | 
						|
+	h2 = h2 & 0x3ffffff;
 | 
						|
+	h3 += c;
 | 
						|
+	c = h3 >> 26;
 | 
						|
+	h3 = h3 & 0x3ffffff;
 | 
						|
+	h4 += c;
 | 
						|
+	c = h4 >> 26;
 | 
						|
+	h4 = h4 & 0x3ffffff;
 | 
						|
+	h0 += c * 5;
 | 
						|
+	c = h0 >> 26;
 | 
						|
+	h0 = h0 & 0x3ffffff;
 | 
						|
+	h1 += c;
 | 
						|
+
 | 
						|
+	/* compute h + -p */
 | 
						|
+	g0 = h0 + 5;
 | 
						|
+	c = g0 >> 26;
 | 
						|
+	g0 &= 0x3ffffff;
 | 
						|
+	g1 = h1 + c;
 | 
						|
+	c = g1 >> 26;
 | 
						|
+	g1 &= 0x3ffffff;
 | 
						|
+	g2 = h2 + c;
 | 
						|
+	c = g2 >> 26;
 | 
						|
+	g2 &= 0x3ffffff;
 | 
						|
+	g3 = h3 + c;
 | 
						|
+	c = g3 >> 26;
 | 
						|
+	g3 &= 0x3ffffff;
 | 
						|
+	g4 = h4 + c - (1UL << 26);
 | 
						|
+
 | 
						|
+	/* select h if h < p, or h + -p if h >= p */
 | 
						|
+	mask = (g4 >> ((sizeof(u32) * 8) - 1)) - 1;
 | 
						|
+	g0 &= mask;
 | 
						|
+	g1 &= mask;
 | 
						|
+	g2 &= mask;
 | 
						|
+	g3 &= mask;
 | 
						|
+	g4 &= mask;
 | 
						|
+	mask = ~mask;
 | 
						|
+
 | 
						|
+	h0 = (h0 & mask) | g0;
 | 
						|
+	h1 = (h1 & mask) | g1;
 | 
						|
+	h2 = (h2 & mask) | g2;
 | 
						|
+	h3 = (h3 & mask) | g3;
 | 
						|
+	h4 = (h4 & mask) | g4;
 | 
						|
+
 | 
						|
+	/* h = h % (2^128) */
 | 
						|
+	h0 = ((h0) | (h1 << 26)) & 0xffffffff;
 | 
						|
+	h1 = ((h1 >> 6) | (h2 << 20)) & 0xffffffff;
 | 
						|
+	h2 = ((h2 >> 12) | (h3 << 14)) & 0xffffffff;
 | 
						|
+	h3 = ((h3 >> 18) | (h4 << 8)) & 0xffffffff;
 | 
						|
+
 | 
						|
+	if (likely(nonce)) {
 | 
						|
+		/* mac = (h + nonce) % (2^128) */
 | 
						|
+		f = (u64)h0 + nonce[0];
 | 
						|
+		h0 = (u32)f;
 | 
						|
+		f = (u64)h1 + nonce[1] + (f >> 32);
 | 
						|
+		h1 = (u32)f;
 | 
						|
+		f = (u64)h2 + nonce[2] + (f >> 32);
 | 
						|
+		h2 = (u32)f;
 | 
						|
+		f = (u64)h3 + nonce[3] + (f >> 32);
 | 
						|
+		h3 = (u32)f;
 | 
						|
+	}
 | 
						|
+
 | 
						|
+	put_unaligned_le32(h0, &mac[0]);
 | 
						|
+	put_unaligned_le32(h1, &mac[4]);
 | 
						|
+	put_unaligned_le32(h2, &mac[8]);
 | 
						|
+	put_unaligned_le32(h3, &mac[12]);
 | 
						|
+}
 | 
						|
+EXPORT_SYMBOL(poly1305_core_emit);
 | 
						|
--- /dev/null
 | 
						|
+++ b/lib/crypto/poly1305-donna64.c
 | 
						|
@@ -0,0 +1,185 @@
 | 
						|
+// SPDX-License-Identifier: GPL-2.0 OR MIT
 | 
						|
+/*
 | 
						|
+ * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
 | 
						|
+ *
 | 
						|
+ * This is based in part on Andrew Moon's poly1305-donna, which is in the
 | 
						|
+ * public domain.
 | 
						|
+ */
 | 
						|
+
 | 
						|
+#include <linux/kernel.h>
 | 
						|
+#include <asm/unaligned.h>
 | 
						|
+#include <crypto/internal/poly1305.h>
 | 
						|
+
 | 
						|
+typedef __uint128_t u128;
 | 
						|
+
 | 
						|
+void poly1305_core_setkey(struct poly1305_core_key *key, const u8 raw_key[16])
 | 
						|
+{
 | 
						|
+	u64 t0, t1;
 | 
						|
+
 | 
						|
+	/* r &= 0xffffffc0ffffffc0ffffffc0fffffff */
 | 
						|
+	t0 = get_unaligned_le64(&raw_key[0]);
 | 
						|
+	t1 = get_unaligned_le64(&raw_key[8]);
 | 
						|
+
 | 
						|
+	key->key.r64[0] = t0 & 0xffc0fffffffULL;
 | 
						|
+	key->key.r64[1] = ((t0 >> 44) | (t1 << 20)) & 0xfffffc0ffffULL;
 | 
						|
+	key->key.r64[2] = ((t1 >> 24)) & 0x00ffffffc0fULL;
 | 
						|
+
 | 
						|
+	/* s = 20*r */
 | 
						|
+	key->precomputed_s.r64[0] = key->key.r64[1] * 20;
 | 
						|
+	key->precomputed_s.r64[1] = key->key.r64[2] * 20;
 | 
						|
+}
 | 
						|
+EXPORT_SYMBOL(poly1305_core_setkey);
 | 
						|
+
 | 
						|
+void poly1305_core_blocks(struct poly1305_state *state,
 | 
						|
+			  const struct poly1305_core_key *key, const void *src,
 | 
						|
+			  unsigned int nblocks, u32 hibit)
 | 
						|
+{
 | 
						|
+	const u8 *input = src;
 | 
						|
+	u64 hibit64;
 | 
						|
+	u64 r0, r1, r2;
 | 
						|
+	u64 s1, s2;
 | 
						|
+	u64 h0, h1, h2;
 | 
						|
+	u64 c;
 | 
						|
+	u128 d0, d1, d2, d;
 | 
						|
+
 | 
						|
+	if (!nblocks)
 | 
						|
+		return;
 | 
						|
+
 | 
						|
+	hibit64 = ((u64)hibit) << 40;
 | 
						|
+
 | 
						|
+	r0 = key->key.r64[0];
 | 
						|
+	r1 = key->key.r64[1];
 | 
						|
+	r2 = key->key.r64[2];
 | 
						|
+
 | 
						|
+	h0 = state->h64[0];
 | 
						|
+	h1 = state->h64[1];
 | 
						|
+	h2 = state->h64[2];
 | 
						|
+
 | 
						|
+	s1 = key->precomputed_s.r64[0];
 | 
						|
+	s2 = key->precomputed_s.r64[1];
 | 
						|
+
 | 
						|
+	do {
 | 
						|
+		u64 t0, t1;
 | 
						|
+
 | 
						|
+		/* h += m[i] */
 | 
						|
+		t0 = get_unaligned_le64(&input[0]);
 | 
						|
+		t1 = get_unaligned_le64(&input[8]);
 | 
						|
+
 | 
						|
+		h0 += t0 & 0xfffffffffffULL;
 | 
						|
+		h1 += ((t0 >> 44) | (t1 << 20)) & 0xfffffffffffULL;
 | 
						|
+		h2 += (((t1 >> 24)) & 0x3ffffffffffULL) | hibit64;
 | 
						|
+
 | 
						|
+		/* h *= r */
 | 
						|
+		d0 = (u128)h0 * r0;
 | 
						|
+		d = (u128)h1 * s2;
 | 
						|
+		d0 += d;
 | 
						|
+		d = (u128)h2 * s1;
 | 
						|
+		d0 += d;
 | 
						|
+		d1 = (u128)h0 * r1;
 | 
						|
+		d = (u128)h1 * r0;
 | 
						|
+		d1 += d;
 | 
						|
+		d = (u128)h2 * s2;
 | 
						|
+		d1 += d;
 | 
						|
+		d2 = (u128)h0 * r2;
 | 
						|
+		d = (u128)h1 * r1;
 | 
						|
+		d2 += d;
 | 
						|
+		d = (u128)h2 * r0;
 | 
						|
+		d2 += d;
 | 
						|
+
 | 
						|
+		/* (partial) h %= p */
 | 
						|
+		c = (u64)(d0 >> 44);
 | 
						|
+		h0 = (u64)d0 & 0xfffffffffffULL;
 | 
						|
+		d1 += c;
 | 
						|
+		c = (u64)(d1 >> 44);
 | 
						|
+		h1 = (u64)d1 & 0xfffffffffffULL;
 | 
						|
+		d2 += c;
 | 
						|
+		c = (u64)(d2 >> 42);
 | 
						|
+		h2 = (u64)d2 & 0x3ffffffffffULL;
 | 
						|
+		h0 += c * 5;
 | 
						|
+		c = h0 >> 44;
 | 
						|
+		h0 = h0 & 0xfffffffffffULL;
 | 
						|
+		h1 += c;
 | 
						|
+
 | 
						|
+		input += POLY1305_BLOCK_SIZE;
 | 
						|
+	} while (--nblocks);
 | 
						|
+
 | 
						|
+	state->h64[0] = h0;
 | 
						|
+	state->h64[1] = h1;
 | 
						|
+	state->h64[2] = h2;
 | 
						|
+}
 | 
						|
+EXPORT_SYMBOL(poly1305_core_blocks);
 | 
						|
+
 | 
						|
+void poly1305_core_emit(const struct poly1305_state *state, const u32 nonce[4],
 | 
						|
+			void *dst)
 | 
						|
+{
 | 
						|
+	u8 *mac = dst;
 | 
						|
+	u64 h0, h1, h2, c;
 | 
						|
+	u64 g0, g1, g2;
 | 
						|
+	u64 t0, t1;
 | 
						|
+
 | 
						|
+	/* fully carry h */
 | 
						|
+	h0 = state->h64[0];
 | 
						|
+	h1 = state->h64[1];
 | 
						|
+	h2 = state->h64[2];
 | 
						|
+
 | 
						|
+	c = h1 >> 44;
 | 
						|
+	h1 &= 0xfffffffffffULL;
 | 
						|
+	h2 += c;
 | 
						|
+	c = h2 >> 42;
 | 
						|
+	h2 &= 0x3ffffffffffULL;
 | 
						|
+	h0 += c * 5;
 | 
						|
+	c = h0 >> 44;
 | 
						|
+	h0 &= 0xfffffffffffULL;
 | 
						|
+	h1 += c;
 | 
						|
+	c = h1 >> 44;
 | 
						|
+	h1 &= 0xfffffffffffULL;
 | 
						|
+	h2 += c;
 | 
						|
+	c = h2 >> 42;
 | 
						|
+	h2 &= 0x3ffffffffffULL;
 | 
						|
+	h0 += c * 5;
 | 
						|
+	c = h0 >> 44;
 | 
						|
+	h0 &= 0xfffffffffffULL;
 | 
						|
+	h1 += c;
 | 
						|
+
 | 
						|
+	/* compute h + -p */
 | 
						|
+	g0 = h0 + 5;
 | 
						|
+	c  = g0 >> 44;
 | 
						|
+	g0 &= 0xfffffffffffULL;
 | 
						|
+	g1 = h1 + c;
 | 
						|
+	c  = g1 >> 44;
 | 
						|
+	g1 &= 0xfffffffffffULL;
 | 
						|
+	g2 = h2 + c - (1ULL << 42);
 | 
						|
+
 | 
						|
+	/* select h if h < p, or h + -p if h >= p */
 | 
						|
+	c = (g2 >> ((sizeof(u64) * 8) - 1)) - 1;
 | 
						|
+	g0 &= c;
 | 
						|
+	g1 &= c;
 | 
						|
+	g2 &= c;
 | 
						|
+	c  = ~c;
 | 
						|
+	h0 = (h0 & c) | g0;
 | 
						|
+	h1 = (h1 & c) | g1;
 | 
						|
+	h2 = (h2 & c) | g2;
 | 
						|
+
 | 
						|
+	if (likely(nonce)) {
 | 
						|
+		/* h = (h + nonce) */
 | 
						|
+		t0 = ((u64)nonce[1] << 32) | nonce[0];
 | 
						|
+		t1 = ((u64)nonce[3] << 32) | nonce[2];
 | 
						|
+
 | 
						|
+		h0 += t0 & 0xfffffffffffULL;
 | 
						|
+		c = h0 >> 44;
 | 
						|
+		h0 &= 0xfffffffffffULL;
 | 
						|
+		h1 += (((t0 >> 44) | (t1 << 20)) & 0xfffffffffffULL) + c;
 | 
						|
+		c = h1 >> 44;
 | 
						|
+		h1 &= 0xfffffffffffULL;
 | 
						|
+		h2 += (((t1 >> 24)) & 0x3ffffffffffULL) + c;
 | 
						|
+		h2 &= 0x3ffffffffffULL;
 | 
						|
+	}
 | 
						|
+
 | 
						|
+	/* mac = h % (2^128) */
 | 
						|
+	h0 = h0 | (h1 << 44);
 | 
						|
+	h1 = (h1 >> 20) | (h2 << 24);
 | 
						|
+
 | 
						|
+	put_unaligned_le64(h0, &mac[0]);
 | 
						|
+	put_unaligned_le64(h1, &mac[8]);
 | 
						|
+}
 | 
						|
+EXPORT_SYMBOL(poly1305_core_emit);
 | 
						|
--- a/lib/crypto/poly1305.c
 | 
						|
+++ b/lib/crypto/poly1305.c
 | 
						|
@@ -12,151 +12,9 @@
 | 
						|
 #include <linux/module.h>
 | 
						|
 #include <asm/unaligned.h>
 | 
						|
 
 | 
						|
-static inline u64 mlt(u64 a, u64 b)
 | 
						|
-{
 | 
						|
-	return a * b;
 | 
						|
-}
 | 
						|
-
 | 
						|
-static inline u32 sr(u64 v, u_char n)
 | 
						|
-{
 | 
						|
-	return v >> n;
 | 
						|
-}
 | 
						|
-
 | 
						|
-static inline u32 and(u32 v, u32 mask)
 | 
						|
-{
 | 
						|
-	return v & mask;
 | 
						|
-}
 | 
						|
-
 | 
						|
-void poly1305_core_setkey(struct poly1305_key *key, const u8 *raw_key)
 | 
						|
-{
 | 
						|
-	/* r &= 0xffffffc0ffffffc0ffffffc0fffffff */
 | 
						|
-	key->r[0] = (get_unaligned_le32(raw_key +  0) >> 0) & 0x3ffffff;
 | 
						|
-	key->r[1] = (get_unaligned_le32(raw_key +  3) >> 2) & 0x3ffff03;
 | 
						|
-	key->r[2] = (get_unaligned_le32(raw_key +  6) >> 4) & 0x3ffc0ff;
 | 
						|
-	key->r[3] = (get_unaligned_le32(raw_key +  9) >> 6) & 0x3f03fff;
 | 
						|
-	key->r[4] = (get_unaligned_le32(raw_key + 12) >> 8) & 0x00fffff;
 | 
						|
-}
 | 
						|
-EXPORT_SYMBOL_GPL(poly1305_core_setkey);
 | 
						|
-
 | 
						|
-void poly1305_core_blocks(struct poly1305_state *state,
 | 
						|
-			  const struct poly1305_key *key, const void *src,
 | 
						|
-			  unsigned int nblocks, u32 hibit)
 | 
						|
-{
 | 
						|
-	u32 r0, r1, r2, r3, r4;
 | 
						|
-	u32 s1, s2, s3, s4;
 | 
						|
-	u32 h0, h1, h2, h3, h4;
 | 
						|
-	u64 d0, d1, d2, d3, d4;
 | 
						|
-
 | 
						|
-	if (!nblocks)
 | 
						|
-		return;
 | 
						|
-
 | 
						|
-	r0 = key->r[0];
 | 
						|
-	r1 = key->r[1];
 | 
						|
-	r2 = key->r[2];
 | 
						|
-	r3 = key->r[3];
 | 
						|
-	r4 = key->r[4];
 | 
						|
-
 | 
						|
-	s1 = r1 * 5;
 | 
						|
-	s2 = r2 * 5;
 | 
						|
-	s3 = r3 * 5;
 | 
						|
-	s4 = r4 * 5;
 | 
						|
-
 | 
						|
-	h0 = state->h[0];
 | 
						|
-	h1 = state->h[1];
 | 
						|
-	h2 = state->h[2];
 | 
						|
-	h3 = state->h[3];
 | 
						|
-	h4 = state->h[4];
 | 
						|
-
 | 
						|
-	do {
 | 
						|
-		/* h += m[i] */
 | 
						|
-		h0 += (get_unaligned_le32(src +  0) >> 0) & 0x3ffffff;
 | 
						|
-		h1 += (get_unaligned_le32(src +  3) >> 2) & 0x3ffffff;
 | 
						|
-		h2 += (get_unaligned_le32(src +  6) >> 4) & 0x3ffffff;
 | 
						|
-		h3 += (get_unaligned_le32(src +  9) >> 6) & 0x3ffffff;
 | 
						|
-		h4 += (get_unaligned_le32(src + 12) >> 8) | (hibit << 24);
 | 
						|
-
 | 
						|
-		/* h *= r */
 | 
						|
-		d0 = mlt(h0, r0) + mlt(h1, s4) + mlt(h2, s3) +
 | 
						|
-		     mlt(h3, s2) + mlt(h4, s1);
 | 
						|
-		d1 = mlt(h0, r1) + mlt(h1, r0) + mlt(h2, s4) +
 | 
						|
-		     mlt(h3, s3) + mlt(h4, s2);
 | 
						|
-		d2 = mlt(h0, r2) + mlt(h1, r1) + mlt(h2, r0) +
 | 
						|
-		     mlt(h3, s4) + mlt(h4, s3);
 | 
						|
-		d3 = mlt(h0, r3) + mlt(h1, r2) + mlt(h2, r1) +
 | 
						|
-		     mlt(h3, r0) + mlt(h4, s4);
 | 
						|
-		d4 = mlt(h0, r4) + mlt(h1, r3) + mlt(h2, r2) +
 | 
						|
-		     mlt(h3, r1) + mlt(h4, r0);
 | 
						|
-
 | 
						|
-		/* (partial) h %= p */
 | 
						|
-		d1 += sr(d0, 26);     h0 = and(d0, 0x3ffffff);
 | 
						|
-		d2 += sr(d1, 26);     h1 = and(d1, 0x3ffffff);
 | 
						|
-		d3 += sr(d2, 26);     h2 = and(d2, 0x3ffffff);
 | 
						|
-		d4 += sr(d3, 26);     h3 = and(d3, 0x3ffffff);
 | 
						|
-		h0 += sr(d4, 26) * 5; h4 = and(d4, 0x3ffffff);
 | 
						|
-		h1 += h0 >> 26;       h0 = h0 & 0x3ffffff;
 | 
						|
-
 | 
						|
-		src += POLY1305_BLOCK_SIZE;
 | 
						|
-	} while (--nblocks);
 | 
						|
-
 | 
						|
-	state->h[0] = h0;
 | 
						|
-	state->h[1] = h1;
 | 
						|
-	state->h[2] = h2;
 | 
						|
-	state->h[3] = h3;
 | 
						|
-	state->h[4] = h4;
 | 
						|
-}
 | 
						|
-EXPORT_SYMBOL_GPL(poly1305_core_blocks);
 | 
						|
-
 | 
						|
-void poly1305_core_emit(const struct poly1305_state *state, void *dst)
 | 
						|
-{
 | 
						|
-	u32 h0, h1, h2, h3, h4;
 | 
						|
-	u32 g0, g1, g2, g3, g4;
 | 
						|
-	u32 mask;
 | 
						|
-
 | 
						|
-	/* fully carry h */
 | 
						|
-	h0 = state->h[0];
 | 
						|
-	h1 = state->h[1];
 | 
						|
-	h2 = state->h[2];
 | 
						|
-	h3 = state->h[3];
 | 
						|
-	h4 = state->h[4];
 | 
						|
-
 | 
						|
-	h2 += (h1 >> 26);     h1 = h1 & 0x3ffffff;
 | 
						|
-	h3 += (h2 >> 26);     h2 = h2 & 0x3ffffff;
 | 
						|
-	h4 += (h3 >> 26);     h3 = h3 & 0x3ffffff;
 | 
						|
-	h0 += (h4 >> 26) * 5; h4 = h4 & 0x3ffffff;
 | 
						|
-	h1 += (h0 >> 26);     h0 = h0 & 0x3ffffff;
 | 
						|
-
 | 
						|
-	/* compute h + -p */
 | 
						|
-	g0 = h0 + 5;
 | 
						|
-	g1 = h1 + (g0 >> 26);             g0 &= 0x3ffffff;
 | 
						|
-	g2 = h2 + (g1 >> 26);             g1 &= 0x3ffffff;
 | 
						|
-	g3 = h3 + (g2 >> 26);             g2 &= 0x3ffffff;
 | 
						|
-	g4 = h4 + (g3 >> 26) - (1 << 26); g3 &= 0x3ffffff;
 | 
						|
-
 | 
						|
-	/* select h if h < p, or h + -p if h >= p */
 | 
						|
-	mask = (g4 >> ((sizeof(u32) * 8) - 1)) - 1;
 | 
						|
-	g0 &= mask;
 | 
						|
-	g1 &= mask;
 | 
						|
-	g2 &= mask;
 | 
						|
-	g3 &= mask;
 | 
						|
-	g4 &= mask;
 | 
						|
-	mask = ~mask;
 | 
						|
-	h0 = (h0 & mask) | g0;
 | 
						|
-	h1 = (h1 & mask) | g1;
 | 
						|
-	h2 = (h2 & mask) | g2;
 | 
						|
-	h3 = (h3 & mask) | g3;
 | 
						|
-	h4 = (h4 & mask) | g4;
 | 
						|
-
 | 
						|
-	/* h = h % (2^128) */
 | 
						|
-	put_unaligned_le32((h0 >>  0) | (h1 << 26), dst +  0);
 | 
						|
-	put_unaligned_le32((h1 >>  6) | (h2 << 20), dst +  4);
 | 
						|
-	put_unaligned_le32((h2 >> 12) | (h3 << 14), dst +  8);
 | 
						|
-	put_unaligned_le32((h3 >> 18) | (h4 <<  8), dst + 12);
 | 
						|
-}
 | 
						|
-EXPORT_SYMBOL_GPL(poly1305_core_emit);
 | 
						|
-
 | 
						|
 void poly1305_init_generic(struct poly1305_desc_ctx *desc, const u8 *key)
 | 
						|
 {
 | 
						|
-	poly1305_core_setkey(desc->r, key);
 | 
						|
+	poly1305_core_setkey(&desc->core_r, key);
 | 
						|
 	desc->s[0] = get_unaligned_le32(key + 16);
 | 
						|
 	desc->s[1] = get_unaligned_le32(key + 20);
 | 
						|
 	desc->s[2] = get_unaligned_le32(key + 24);
 | 
						|
@@ -164,7 +22,7 @@ void poly1305_init_generic(struct poly13
 | 
						|
 	poly1305_core_init(&desc->h);
 | 
						|
 	desc->buflen = 0;
 | 
						|
 	desc->sset = true;
 | 
						|
-	desc->rset = 1;
 | 
						|
+	desc->rset = 2;
 | 
						|
 }
 | 
						|
 EXPORT_SYMBOL_GPL(poly1305_init_generic);
 | 
						|
 
 | 
						|
@@ -181,13 +39,14 @@ void poly1305_update_generic(struct poly
 | 
						|
 		desc->buflen += bytes;
 | 
						|
 
 | 
						|
 		if (desc->buflen == POLY1305_BLOCK_SIZE) {
 | 
						|
-			poly1305_core_blocks(&desc->h, desc->r, desc->buf, 1, 1);
 | 
						|
+			poly1305_core_blocks(&desc->h, &desc->core_r, desc->buf,
 | 
						|
+					     1, 1);
 | 
						|
 			desc->buflen = 0;
 | 
						|
 		}
 | 
						|
 	}
 | 
						|
 
 | 
						|
 	if (likely(nbytes >= POLY1305_BLOCK_SIZE)) {
 | 
						|
-		poly1305_core_blocks(&desc->h, desc->r, src,
 | 
						|
+		poly1305_core_blocks(&desc->h, &desc->core_r, src,
 | 
						|
 				     nbytes / POLY1305_BLOCK_SIZE, 1);
 | 
						|
 		src += nbytes - (nbytes % POLY1305_BLOCK_SIZE);
 | 
						|
 		nbytes %= POLY1305_BLOCK_SIZE;
 | 
						|
@@ -202,28 +61,14 @@ EXPORT_SYMBOL_GPL(poly1305_update_generi
 | 
						|
 
 | 
						|
 void poly1305_final_generic(struct poly1305_desc_ctx *desc, u8 *dst)
 | 
						|
 {
 | 
						|
-	__le32 digest[4];
 | 
						|
-	u64 f = 0;
 | 
						|
-
 | 
						|
 	if (unlikely(desc->buflen)) {
 | 
						|
 		desc->buf[desc->buflen++] = 1;
 | 
						|
 		memset(desc->buf + desc->buflen, 0,
 | 
						|
 		       POLY1305_BLOCK_SIZE - desc->buflen);
 | 
						|
-		poly1305_core_blocks(&desc->h, desc->r, desc->buf, 1, 0);
 | 
						|
+		poly1305_core_blocks(&desc->h, &desc->core_r, desc->buf, 1, 0);
 | 
						|
 	}
 | 
						|
 
 | 
						|
-	poly1305_core_emit(&desc->h, digest);
 | 
						|
-
 | 
						|
-	/* mac = (h + s) % (2^128) */
 | 
						|
-	f = (f >> 32) + le32_to_cpu(digest[0]) + desc->s[0];
 | 
						|
-	put_unaligned_le32(f, dst + 0);
 | 
						|
-	f = (f >> 32) + le32_to_cpu(digest[1]) + desc->s[1];
 | 
						|
-	put_unaligned_le32(f, dst + 4);
 | 
						|
-	f = (f >> 32) + le32_to_cpu(digest[2]) + desc->s[2];
 | 
						|
-	put_unaligned_le32(f, dst + 8);
 | 
						|
-	f = (f >> 32) + le32_to_cpu(digest[3]) + desc->s[3];
 | 
						|
-	put_unaligned_le32(f, dst + 12);
 | 
						|
-
 | 
						|
+	poly1305_core_emit(&desc->h, desc->s, dst);
 | 
						|
 	*desc = (struct poly1305_desc_ctx){};
 | 
						|
 }
 | 
						|
 EXPORT_SYMBOL_GPL(poly1305_final_generic);
 |