mirror of
				git://git.openwrt.org/openwrt/openwrt.git
				synced 2025-11-03 14:34:27 -05:00 
			
		
		
		
	Without this patch, the chacha block counter is not incremented on neon rounds, resulting in incorrect calculations and corrupt packets. This also switches to using `--no-numbered --zero-commit` so that future diffs are smaller. Reported-by: Hans Geiblinger <cybrnook2002@yahoo.com> Reviewed-by: Ilya Lipnitskiy <ilya.lipnitskiy@gmail.com> Cc: David Bauer <mail@david-bauer.net> Cc: Petr Štetiar <ynezz@true.cz> Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
		
			
				
	
	
		
			692 lines
		
	
	
		
			19 KiB
		
	
	
	
		
			Diff
		
	
	
	
	
	
			
		
		
	
	
			692 lines
		
	
	
		
			19 KiB
		
	
	
	
		
			Diff
		
	
	
	
	
	
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
 | 
						|
From: Ard Biesheuvel <ardb@kernel.org>
 | 
						|
Date: Fri, 8 Nov 2019 13:22:14 +0100
 | 
						|
Subject: [PATCH] crypto: arm/chacha - remove dependency on generic ChaCha
 | 
						|
 driver
 | 
						|
 | 
						|
commit b36d8c09e710c71f6a9690b6586fea2d1c9e1e27 upstream.
 | 
						|
 | 
						|
Instead of falling back to the generic ChaCha skcipher driver for
 | 
						|
non-SIMD cases, use a fast scalar implementation for ARM authored
 | 
						|
by Eric Biggers. This removes the module dependency on chacha-generic
 | 
						|
altogether, which also simplifies things when we expose the ChaCha
 | 
						|
library interface from this module.
 | 
						|
 | 
						|
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
 | 
						|
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
 | 
						|
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
 | 
						|
---
 | 
						|
 arch/arm/crypto/Kconfig              |   4 +-
 | 
						|
 arch/arm/crypto/Makefile             |   3 +-
 | 
						|
 arch/arm/crypto/chacha-glue.c        | 304 +++++++++++++++++++++++++++
 | 
						|
 arch/arm/crypto/chacha-neon-glue.c   | 202 ------------------
 | 
						|
 arch/arm/crypto/chacha-scalar-core.S |  65 +++---
 | 
						|
 arch/arm64/crypto/chacha-neon-glue.c |   2 +-
 | 
						|
 6 files changed, 340 insertions(+), 240 deletions(-)
 | 
						|
 create mode 100644 arch/arm/crypto/chacha-glue.c
 | 
						|
 delete mode 100644 arch/arm/crypto/chacha-neon-glue.c
 | 
						|
 | 
						|
--- a/arch/arm/crypto/Kconfig
 | 
						|
+++ b/arch/arm/crypto/Kconfig
 | 
						|
@@ -127,10 +127,8 @@ config CRYPTO_CRC32_ARM_CE
 | 
						|
 	select CRYPTO_HASH
 | 
						|
 
 | 
						|
 config CRYPTO_CHACHA20_NEON
 | 
						|
-	tristate "NEON accelerated ChaCha stream cipher algorithms"
 | 
						|
-	depends on KERNEL_MODE_NEON
 | 
						|
+	tristate "NEON and scalar accelerated ChaCha stream cipher algorithms"
 | 
						|
 	select CRYPTO_BLKCIPHER
 | 
						|
-	select CRYPTO_CHACHA20
 | 
						|
 
 | 
						|
 config CRYPTO_NHPOLY1305_NEON
 | 
						|
 	tristate "NEON accelerated NHPoly1305 hash function (for Adiantum)"
 | 
						|
--- a/arch/arm/crypto/Makefile
 | 
						|
+++ b/arch/arm/crypto/Makefile
 | 
						|
@@ -53,7 +53,8 @@ aes-arm-ce-y	:= aes-ce-core.o aes-ce-glu
 | 
						|
 ghash-arm-ce-y	:= ghash-ce-core.o ghash-ce-glue.o
 | 
						|
 crct10dif-arm-ce-y	:= crct10dif-ce-core.o crct10dif-ce-glue.o
 | 
						|
 crc32-arm-ce-y:= crc32-ce-core.o crc32-ce-glue.o
 | 
						|
-chacha-neon-y := chacha-neon-core.o chacha-neon-glue.o
 | 
						|
+chacha-neon-y := chacha-scalar-core.o chacha-glue.o
 | 
						|
+chacha-neon-$(CONFIG_KERNEL_MODE_NEON) += chacha-neon-core.o
 | 
						|
 nhpoly1305-neon-y := nh-neon-core.o nhpoly1305-neon-glue.o
 | 
						|
 
 | 
						|
 ifdef REGENERATE_ARM_CRYPTO
 | 
						|
--- /dev/null
 | 
						|
+++ b/arch/arm/crypto/chacha-glue.c
 | 
						|
@@ -0,0 +1,304 @@
 | 
						|
+// SPDX-License-Identifier: GPL-2.0
 | 
						|
+/*
 | 
						|
+ * ARM NEON accelerated ChaCha and XChaCha stream ciphers,
 | 
						|
+ * including ChaCha20 (RFC7539)
 | 
						|
+ *
 | 
						|
+ * Copyright (C) 2016-2019 Linaro, Ltd. <ard.biesheuvel@linaro.org>
 | 
						|
+ * Copyright (C) 2015 Martin Willi
 | 
						|
+ */
 | 
						|
+
 | 
						|
+#include <crypto/algapi.h>
 | 
						|
+#include <crypto/internal/chacha.h>
 | 
						|
+#include <crypto/internal/simd.h>
 | 
						|
+#include <crypto/internal/skcipher.h>
 | 
						|
+#include <linux/kernel.h>
 | 
						|
+#include <linux/module.h>
 | 
						|
+
 | 
						|
+#include <asm/cputype.h>
 | 
						|
+#include <asm/hwcap.h>
 | 
						|
+#include <asm/neon.h>
 | 
						|
+#include <asm/simd.h>
 | 
						|
+
 | 
						|
+asmlinkage void chacha_block_xor_neon(const u32 *state, u8 *dst, const u8 *src,
 | 
						|
+				      int nrounds);
 | 
						|
+asmlinkage void chacha_4block_xor_neon(const u32 *state, u8 *dst, const u8 *src,
 | 
						|
+				       int nrounds);
 | 
						|
+asmlinkage void hchacha_block_arm(const u32 *state, u32 *out, int nrounds);
 | 
						|
+asmlinkage void hchacha_block_neon(const u32 *state, u32 *out, int nrounds);
 | 
						|
+
 | 
						|
+asmlinkage void chacha_doarm(u8 *dst, const u8 *src, unsigned int bytes,
 | 
						|
+			     const u32 *state, int nrounds);
 | 
						|
+
 | 
						|
+static inline bool neon_usable(void)
 | 
						|
+{
 | 
						|
+	return crypto_simd_usable();
 | 
						|
+}
 | 
						|
+
 | 
						|
+static void chacha_doneon(u32 *state, u8 *dst, const u8 *src,
 | 
						|
+			  unsigned int bytes, int nrounds)
 | 
						|
+{
 | 
						|
+	u8 buf[CHACHA_BLOCK_SIZE];
 | 
						|
+
 | 
						|
+	while (bytes >= CHACHA_BLOCK_SIZE * 4) {
 | 
						|
+		chacha_4block_xor_neon(state, dst, src, nrounds);
 | 
						|
+		bytes -= CHACHA_BLOCK_SIZE * 4;
 | 
						|
+		src += CHACHA_BLOCK_SIZE * 4;
 | 
						|
+		dst += CHACHA_BLOCK_SIZE * 4;
 | 
						|
+		state[12] += 4;
 | 
						|
+	}
 | 
						|
+	while (bytes >= CHACHA_BLOCK_SIZE) {
 | 
						|
+		chacha_block_xor_neon(state, dst, src, nrounds);
 | 
						|
+		bytes -= CHACHA_BLOCK_SIZE;
 | 
						|
+		src += CHACHA_BLOCK_SIZE;
 | 
						|
+		dst += CHACHA_BLOCK_SIZE;
 | 
						|
+		state[12]++;
 | 
						|
+	}
 | 
						|
+	if (bytes) {
 | 
						|
+		memcpy(buf, src, bytes);
 | 
						|
+		chacha_block_xor_neon(state, buf, buf, nrounds);
 | 
						|
+		memcpy(dst, buf, bytes);
 | 
						|
+	}
 | 
						|
+}
 | 
						|
+
 | 
						|
+static int chacha_stream_xor(struct skcipher_request *req,
 | 
						|
+			     const struct chacha_ctx *ctx, const u8 *iv,
 | 
						|
+			     bool neon)
 | 
						|
+{
 | 
						|
+	struct skcipher_walk walk;
 | 
						|
+	u32 state[16];
 | 
						|
+	int err;
 | 
						|
+
 | 
						|
+	err = skcipher_walk_virt(&walk, req, false);
 | 
						|
+
 | 
						|
+	chacha_init_generic(state, ctx->key, iv);
 | 
						|
+
 | 
						|
+	while (walk.nbytes > 0) {
 | 
						|
+		unsigned int nbytes = walk.nbytes;
 | 
						|
+
 | 
						|
+		if (nbytes < walk.total)
 | 
						|
+			nbytes = round_down(nbytes, walk.stride);
 | 
						|
+
 | 
						|
+		if (!neon) {
 | 
						|
+			chacha_doarm(walk.dst.virt.addr, walk.src.virt.addr,
 | 
						|
+				     nbytes, state, ctx->nrounds);
 | 
						|
+			state[12] += DIV_ROUND_UP(nbytes, CHACHA_BLOCK_SIZE);
 | 
						|
+		} else {
 | 
						|
+			kernel_neon_begin();
 | 
						|
+			chacha_doneon(state, walk.dst.virt.addr,
 | 
						|
+				      walk.src.virt.addr, nbytes, ctx->nrounds);
 | 
						|
+			kernel_neon_end();
 | 
						|
+		}
 | 
						|
+		err = skcipher_walk_done(&walk, walk.nbytes - nbytes);
 | 
						|
+	}
 | 
						|
+
 | 
						|
+	return err;
 | 
						|
+}
 | 
						|
+
 | 
						|
+static int do_chacha(struct skcipher_request *req, bool neon)
 | 
						|
+{
 | 
						|
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
 | 
						|
+	struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm);
 | 
						|
+
 | 
						|
+	return chacha_stream_xor(req, ctx, req->iv, neon);
 | 
						|
+}
 | 
						|
+
 | 
						|
+static int chacha_arm(struct skcipher_request *req)
 | 
						|
+{
 | 
						|
+	return do_chacha(req, false);
 | 
						|
+}
 | 
						|
+
 | 
						|
+static int chacha_neon(struct skcipher_request *req)
 | 
						|
+{
 | 
						|
+	return do_chacha(req, neon_usable());
 | 
						|
+}
 | 
						|
+
 | 
						|
+static int do_xchacha(struct skcipher_request *req, bool neon)
 | 
						|
+{
 | 
						|
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
 | 
						|
+	struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm);
 | 
						|
+	struct chacha_ctx subctx;
 | 
						|
+	u32 state[16];
 | 
						|
+	u8 real_iv[16];
 | 
						|
+
 | 
						|
+	chacha_init_generic(state, ctx->key, req->iv);
 | 
						|
+
 | 
						|
+	if (!neon) {
 | 
						|
+		hchacha_block_arm(state, subctx.key, ctx->nrounds);
 | 
						|
+	} else {
 | 
						|
+		kernel_neon_begin();
 | 
						|
+		hchacha_block_neon(state, subctx.key, ctx->nrounds);
 | 
						|
+		kernel_neon_end();
 | 
						|
+	}
 | 
						|
+	subctx.nrounds = ctx->nrounds;
 | 
						|
+
 | 
						|
+	memcpy(&real_iv[0], req->iv + 24, 8);
 | 
						|
+	memcpy(&real_iv[8], req->iv + 16, 8);
 | 
						|
+	return chacha_stream_xor(req, &subctx, real_iv, neon);
 | 
						|
+}
 | 
						|
+
 | 
						|
+static int xchacha_arm(struct skcipher_request *req)
 | 
						|
+{
 | 
						|
+	return do_xchacha(req, false);
 | 
						|
+}
 | 
						|
+
 | 
						|
+static int xchacha_neon(struct skcipher_request *req)
 | 
						|
+{
 | 
						|
+	return do_xchacha(req, neon_usable());
 | 
						|
+}
 | 
						|
+
 | 
						|
+static struct skcipher_alg arm_algs[] = {
 | 
						|
+	{
 | 
						|
+		.base.cra_name		= "chacha20",
 | 
						|
+		.base.cra_driver_name	= "chacha20-arm",
 | 
						|
+		.base.cra_priority	= 200,
 | 
						|
+		.base.cra_blocksize	= 1,
 | 
						|
+		.base.cra_ctxsize	= sizeof(struct chacha_ctx),
 | 
						|
+		.base.cra_module	= THIS_MODULE,
 | 
						|
+
 | 
						|
+		.min_keysize		= CHACHA_KEY_SIZE,
 | 
						|
+		.max_keysize		= CHACHA_KEY_SIZE,
 | 
						|
+		.ivsize			= CHACHA_IV_SIZE,
 | 
						|
+		.chunksize		= CHACHA_BLOCK_SIZE,
 | 
						|
+		.setkey			= chacha20_setkey,
 | 
						|
+		.encrypt		= chacha_arm,
 | 
						|
+		.decrypt		= chacha_arm,
 | 
						|
+	}, {
 | 
						|
+		.base.cra_name		= "xchacha20",
 | 
						|
+		.base.cra_driver_name	= "xchacha20-arm",
 | 
						|
+		.base.cra_priority	= 200,
 | 
						|
+		.base.cra_blocksize	= 1,
 | 
						|
+		.base.cra_ctxsize	= sizeof(struct chacha_ctx),
 | 
						|
+		.base.cra_module	= THIS_MODULE,
 | 
						|
+
 | 
						|
+		.min_keysize		= CHACHA_KEY_SIZE,
 | 
						|
+		.max_keysize		= CHACHA_KEY_SIZE,
 | 
						|
+		.ivsize			= XCHACHA_IV_SIZE,
 | 
						|
+		.chunksize		= CHACHA_BLOCK_SIZE,
 | 
						|
+		.setkey			= chacha20_setkey,
 | 
						|
+		.encrypt		= xchacha_arm,
 | 
						|
+		.decrypt		= xchacha_arm,
 | 
						|
+	}, {
 | 
						|
+		.base.cra_name		= "xchacha12",
 | 
						|
+		.base.cra_driver_name	= "xchacha12-arm",
 | 
						|
+		.base.cra_priority	= 200,
 | 
						|
+		.base.cra_blocksize	= 1,
 | 
						|
+		.base.cra_ctxsize	= sizeof(struct chacha_ctx),
 | 
						|
+		.base.cra_module	= THIS_MODULE,
 | 
						|
+
 | 
						|
+		.min_keysize		= CHACHA_KEY_SIZE,
 | 
						|
+		.max_keysize		= CHACHA_KEY_SIZE,
 | 
						|
+		.ivsize			= XCHACHA_IV_SIZE,
 | 
						|
+		.chunksize		= CHACHA_BLOCK_SIZE,
 | 
						|
+		.setkey			= chacha12_setkey,
 | 
						|
+		.encrypt		= xchacha_arm,
 | 
						|
+		.decrypt		= xchacha_arm,
 | 
						|
+	},
 | 
						|
+};
 | 
						|
+
 | 
						|
+static struct skcipher_alg neon_algs[] = {
 | 
						|
+	{
 | 
						|
+		.base.cra_name		= "chacha20",
 | 
						|
+		.base.cra_driver_name	= "chacha20-neon",
 | 
						|
+		.base.cra_priority	= 300,
 | 
						|
+		.base.cra_blocksize	= 1,
 | 
						|
+		.base.cra_ctxsize	= sizeof(struct chacha_ctx),
 | 
						|
+		.base.cra_module	= THIS_MODULE,
 | 
						|
+
 | 
						|
+		.min_keysize		= CHACHA_KEY_SIZE,
 | 
						|
+		.max_keysize		= CHACHA_KEY_SIZE,
 | 
						|
+		.ivsize			= CHACHA_IV_SIZE,
 | 
						|
+		.chunksize		= CHACHA_BLOCK_SIZE,
 | 
						|
+		.walksize		= 4 * CHACHA_BLOCK_SIZE,
 | 
						|
+		.setkey			= chacha20_setkey,
 | 
						|
+		.encrypt		= chacha_neon,
 | 
						|
+		.decrypt		= chacha_neon,
 | 
						|
+	}, {
 | 
						|
+		.base.cra_name		= "xchacha20",
 | 
						|
+		.base.cra_driver_name	= "xchacha20-neon",
 | 
						|
+		.base.cra_priority	= 300,
 | 
						|
+		.base.cra_blocksize	= 1,
 | 
						|
+		.base.cra_ctxsize	= sizeof(struct chacha_ctx),
 | 
						|
+		.base.cra_module	= THIS_MODULE,
 | 
						|
+
 | 
						|
+		.min_keysize		= CHACHA_KEY_SIZE,
 | 
						|
+		.max_keysize		= CHACHA_KEY_SIZE,
 | 
						|
+		.ivsize			= XCHACHA_IV_SIZE,
 | 
						|
+		.chunksize		= CHACHA_BLOCK_SIZE,
 | 
						|
+		.walksize		= 4 * CHACHA_BLOCK_SIZE,
 | 
						|
+		.setkey			= chacha20_setkey,
 | 
						|
+		.encrypt		= xchacha_neon,
 | 
						|
+		.decrypt		= xchacha_neon,
 | 
						|
+	}, {
 | 
						|
+		.base.cra_name		= "xchacha12",
 | 
						|
+		.base.cra_driver_name	= "xchacha12-neon",
 | 
						|
+		.base.cra_priority	= 300,
 | 
						|
+		.base.cra_blocksize	= 1,
 | 
						|
+		.base.cra_ctxsize	= sizeof(struct chacha_ctx),
 | 
						|
+		.base.cra_module	= THIS_MODULE,
 | 
						|
+
 | 
						|
+		.min_keysize		= CHACHA_KEY_SIZE,
 | 
						|
+		.max_keysize		= CHACHA_KEY_SIZE,
 | 
						|
+		.ivsize			= XCHACHA_IV_SIZE,
 | 
						|
+		.chunksize		= CHACHA_BLOCK_SIZE,
 | 
						|
+		.walksize		= 4 * CHACHA_BLOCK_SIZE,
 | 
						|
+		.setkey			= chacha12_setkey,
 | 
						|
+		.encrypt		= xchacha_neon,
 | 
						|
+		.decrypt		= xchacha_neon,
 | 
						|
+	}
 | 
						|
+};
 | 
						|
+
 | 
						|
+static int __init chacha_simd_mod_init(void)
 | 
						|
+{
 | 
						|
+	int err;
 | 
						|
+
 | 
						|
+	err = crypto_register_skciphers(arm_algs, ARRAY_SIZE(arm_algs));
 | 
						|
+	if (err)
 | 
						|
+		return err;
 | 
						|
+
 | 
						|
+	if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && (elf_hwcap & HWCAP_NEON)) {
 | 
						|
+		int i;
 | 
						|
+
 | 
						|
+		switch (read_cpuid_part()) {
 | 
						|
+		case ARM_CPU_PART_CORTEX_A7:
 | 
						|
+		case ARM_CPU_PART_CORTEX_A5:
 | 
						|
+			/*
 | 
						|
+			 * The Cortex-A7 and Cortex-A5 do not perform well with
 | 
						|
+			 * the NEON implementation but do incredibly with the
 | 
						|
+			 * scalar one and use less power.
 | 
						|
+			 */
 | 
						|
+			for (i = 0; i < ARRAY_SIZE(neon_algs); i++)
 | 
						|
+				neon_algs[i].base.cra_priority = 0;
 | 
						|
+			break;
 | 
						|
+		}
 | 
						|
+
 | 
						|
+		err = crypto_register_skciphers(neon_algs, ARRAY_SIZE(neon_algs));
 | 
						|
+		if (err)
 | 
						|
+			crypto_unregister_skciphers(arm_algs, ARRAY_SIZE(arm_algs));
 | 
						|
+	}
 | 
						|
+	return err;
 | 
						|
+}
 | 
						|
+
 | 
						|
+static void __exit chacha_simd_mod_fini(void)
 | 
						|
+{
 | 
						|
+	crypto_unregister_skciphers(arm_algs, ARRAY_SIZE(arm_algs));
 | 
						|
+	if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && (elf_hwcap & HWCAP_NEON))
 | 
						|
+		crypto_unregister_skciphers(neon_algs, ARRAY_SIZE(neon_algs));
 | 
						|
+}
 | 
						|
+
 | 
						|
+module_init(chacha_simd_mod_init);
 | 
						|
+module_exit(chacha_simd_mod_fini);
 | 
						|
+
 | 
						|
+MODULE_DESCRIPTION("ChaCha and XChaCha stream ciphers (scalar and NEON accelerated)");
 | 
						|
+MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
 | 
						|
+MODULE_LICENSE("GPL v2");
 | 
						|
+MODULE_ALIAS_CRYPTO("chacha20");
 | 
						|
+MODULE_ALIAS_CRYPTO("chacha20-arm");
 | 
						|
+MODULE_ALIAS_CRYPTO("xchacha20");
 | 
						|
+MODULE_ALIAS_CRYPTO("xchacha20-arm");
 | 
						|
+MODULE_ALIAS_CRYPTO("xchacha12");
 | 
						|
+MODULE_ALIAS_CRYPTO("xchacha12-arm");
 | 
						|
+#ifdef CONFIG_KERNEL_MODE_NEON
 | 
						|
+MODULE_ALIAS_CRYPTO("chacha20-neon");
 | 
						|
+MODULE_ALIAS_CRYPTO("xchacha20-neon");
 | 
						|
+MODULE_ALIAS_CRYPTO("xchacha12-neon");
 | 
						|
+#endif
 | 
						|
--- a/arch/arm/crypto/chacha-neon-glue.c
 | 
						|
+++ /dev/null
 | 
						|
@@ -1,202 +0,0 @@
 | 
						|
-/*
 | 
						|
- * ARM NEON accelerated ChaCha and XChaCha stream ciphers,
 | 
						|
- * including ChaCha20 (RFC7539)
 | 
						|
- *
 | 
						|
- * Copyright (C) 2016 Linaro, Ltd. <ard.biesheuvel@linaro.org>
 | 
						|
- *
 | 
						|
- * This program is free software; you can redistribute it and/or modify
 | 
						|
- * it under the terms of the GNU General Public License version 2 as
 | 
						|
- * published by the Free Software Foundation.
 | 
						|
- *
 | 
						|
- * Based on:
 | 
						|
- * ChaCha20 256-bit cipher algorithm, RFC7539, SIMD glue code
 | 
						|
- *
 | 
						|
- * Copyright (C) 2015 Martin Willi
 | 
						|
- *
 | 
						|
- * This program is free software; you can redistribute it and/or modify
 | 
						|
- * it under the terms of the GNU General Public License as published by
 | 
						|
- * the Free Software Foundation; either version 2 of the License, or
 | 
						|
- * (at your option) any later version.
 | 
						|
- */
 | 
						|
-
 | 
						|
-#include <crypto/algapi.h>
 | 
						|
-#include <crypto/internal/chacha.h>
 | 
						|
-#include <crypto/internal/simd.h>
 | 
						|
-#include <crypto/internal/skcipher.h>
 | 
						|
-#include <linux/kernel.h>
 | 
						|
-#include <linux/module.h>
 | 
						|
-
 | 
						|
-#include <asm/hwcap.h>
 | 
						|
-#include <asm/neon.h>
 | 
						|
-#include <asm/simd.h>
 | 
						|
-
 | 
						|
-asmlinkage void chacha_block_xor_neon(const u32 *state, u8 *dst, const u8 *src,
 | 
						|
-				      int nrounds);
 | 
						|
-asmlinkage void chacha_4block_xor_neon(const u32 *state, u8 *dst, const u8 *src,
 | 
						|
-				       int nrounds);
 | 
						|
-asmlinkage void hchacha_block_neon(const u32 *state, u32 *out, int nrounds);
 | 
						|
-
 | 
						|
-static void chacha_doneon(u32 *state, u8 *dst, const u8 *src,
 | 
						|
-			  unsigned int bytes, int nrounds)
 | 
						|
-{
 | 
						|
-	u8 buf[CHACHA_BLOCK_SIZE];
 | 
						|
-
 | 
						|
-	while (bytes >= CHACHA_BLOCK_SIZE * 4) {
 | 
						|
-		chacha_4block_xor_neon(state, dst, src, nrounds);
 | 
						|
-		bytes -= CHACHA_BLOCK_SIZE * 4;
 | 
						|
-		src += CHACHA_BLOCK_SIZE * 4;
 | 
						|
-		dst += CHACHA_BLOCK_SIZE * 4;
 | 
						|
-		state[12] += 4;
 | 
						|
-	}
 | 
						|
-	while (bytes >= CHACHA_BLOCK_SIZE) {
 | 
						|
-		chacha_block_xor_neon(state, dst, src, nrounds);
 | 
						|
-		bytes -= CHACHA_BLOCK_SIZE;
 | 
						|
-		src += CHACHA_BLOCK_SIZE;
 | 
						|
-		dst += CHACHA_BLOCK_SIZE;
 | 
						|
-		state[12]++;
 | 
						|
-	}
 | 
						|
-	if (bytes) {
 | 
						|
-		memcpy(buf, src, bytes);
 | 
						|
-		chacha_block_xor_neon(state, buf, buf, nrounds);
 | 
						|
-		memcpy(dst, buf, bytes);
 | 
						|
-	}
 | 
						|
-}
 | 
						|
-
 | 
						|
-static int chacha_neon_stream_xor(struct skcipher_request *req,
 | 
						|
-				  const struct chacha_ctx *ctx, const u8 *iv)
 | 
						|
-{
 | 
						|
-	struct skcipher_walk walk;
 | 
						|
-	u32 state[16];
 | 
						|
-	int err;
 | 
						|
-
 | 
						|
-	err = skcipher_walk_virt(&walk, req, false);
 | 
						|
-
 | 
						|
-	crypto_chacha_init(state, ctx, iv);
 | 
						|
-
 | 
						|
-	while (walk.nbytes > 0) {
 | 
						|
-		unsigned int nbytes = walk.nbytes;
 | 
						|
-
 | 
						|
-		if (nbytes < walk.total)
 | 
						|
-			nbytes = round_down(nbytes, walk.stride);
 | 
						|
-
 | 
						|
-		kernel_neon_begin();
 | 
						|
-		chacha_doneon(state, walk.dst.virt.addr, walk.src.virt.addr,
 | 
						|
-			      nbytes, ctx->nrounds);
 | 
						|
-		kernel_neon_end();
 | 
						|
-		err = skcipher_walk_done(&walk, walk.nbytes - nbytes);
 | 
						|
-	}
 | 
						|
-
 | 
						|
-	return err;
 | 
						|
-}
 | 
						|
-
 | 
						|
-static int chacha_neon(struct skcipher_request *req)
 | 
						|
-{
 | 
						|
-	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
 | 
						|
-	struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm);
 | 
						|
-
 | 
						|
-	if (req->cryptlen <= CHACHA_BLOCK_SIZE || !crypto_simd_usable())
 | 
						|
-		return crypto_chacha_crypt(req);
 | 
						|
-
 | 
						|
-	return chacha_neon_stream_xor(req, ctx, req->iv);
 | 
						|
-}
 | 
						|
-
 | 
						|
-static int xchacha_neon(struct skcipher_request *req)
 | 
						|
-{
 | 
						|
-	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
 | 
						|
-	struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm);
 | 
						|
-	struct chacha_ctx subctx;
 | 
						|
-	u32 state[16];
 | 
						|
-	u8 real_iv[16];
 | 
						|
-
 | 
						|
-	if (req->cryptlen <= CHACHA_BLOCK_SIZE || !crypto_simd_usable())
 | 
						|
-		return crypto_xchacha_crypt(req);
 | 
						|
-
 | 
						|
-	crypto_chacha_init(state, ctx, req->iv);
 | 
						|
-
 | 
						|
-	kernel_neon_begin();
 | 
						|
-	hchacha_block_neon(state, subctx.key, ctx->nrounds);
 | 
						|
-	kernel_neon_end();
 | 
						|
-	subctx.nrounds = ctx->nrounds;
 | 
						|
-
 | 
						|
-	memcpy(&real_iv[0], req->iv + 24, 8);
 | 
						|
-	memcpy(&real_iv[8], req->iv + 16, 8);
 | 
						|
-	return chacha_neon_stream_xor(req, &subctx, real_iv);
 | 
						|
-}
 | 
						|
-
 | 
						|
-static struct skcipher_alg algs[] = {
 | 
						|
-	{
 | 
						|
-		.base.cra_name		= "chacha20",
 | 
						|
-		.base.cra_driver_name	= "chacha20-neon",
 | 
						|
-		.base.cra_priority	= 300,
 | 
						|
-		.base.cra_blocksize	= 1,
 | 
						|
-		.base.cra_ctxsize	= sizeof(struct chacha_ctx),
 | 
						|
-		.base.cra_module	= THIS_MODULE,
 | 
						|
-
 | 
						|
-		.min_keysize		= CHACHA_KEY_SIZE,
 | 
						|
-		.max_keysize		= CHACHA_KEY_SIZE,
 | 
						|
-		.ivsize			= CHACHA_IV_SIZE,
 | 
						|
-		.chunksize		= CHACHA_BLOCK_SIZE,
 | 
						|
-		.walksize		= 4 * CHACHA_BLOCK_SIZE,
 | 
						|
-		.setkey			= crypto_chacha20_setkey,
 | 
						|
-		.encrypt		= chacha_neon,
 | 
						|
-		.decrypt		= chacha_neon,
 | 
						|
-	}, {
 | 
						|
-		.base.cra_name		= "xchacha20",
 | 
						|
-		.base.cra_driver_name	= "xchacha20-neon",
 | 
						|
-		.base.cra_priority	= 300,
 | 
						|
-		.base.cra_blocksize	= 1,
 | 
						|
-		.base.cra_ctxsize	= sizeof(struct chacha_ctx),
 | 
						|
-		.base.cra_module	= THIS_MODULE,
 | 
						|
-
 | 
						|
-		.min_keysize		= CHACHA_KEY_SIZE,
 | 
						|
-		.max_keysize		= CHACHA_KEY_SIZE,
 | 
						|
-		.ivsize			= XCHACHA_IV_SIZE,
 | 
						|
-		.chunksize		= CHACHA_BLOCK_SIZE,
 | 
						|
-		.walksize		= 4 * CHACHA_BLOCK_SIZE,
 | 
						|
-		.setkey			= crypto_chacha20_setkey,
 | 
						|
-		.encrypt		= xchacha_neon,
 | 
						|
-		.decrypt		= xchacha_neon,
 | 
						|
-	}, {
 | 
						|
-		.base.cra_name		= "xchacha12",
 | 
						|
-		.base.cra_driver_name	= "xchacha12-neon",
 | 
						|
-		.base.cra_priority	= 300,
 | 
						|
-		.base.cra_blocksize	= 1,
 | 
						|
-		.base.cra_ctxsize	= sizeof(struct chacha_ctx),
 | 
						|
-		.base.cra_module	= THIS_MODULE,
 | 
						|
-
 | 
						|
-		.min_keysize		= CHACHA_KEY_SIZE,
 | 
						|
-		.max_keysize		= CHACHA_KEY_SIZE,
 | 
						|
-		.ivsize			= XCHACHA_IV_SIZE,
 | 
						|
-		.chunksize		= CHACHA_BLOCK_SIZE,
 | 
						|
-		.walksize		= 4 * CHACHA_BLOCK_SIZE,
 | 
						|
-		.setkey			= crypto_chacha12_setkey,
 | 
						|
-		.encrypt		= xchacha_neon,
 | 
						|
-		.decrypt		= xchacha_neon,
 | 
						|
-	}
 | 
						|
-};
 | 
						|
-
 | 
						|
-static int __init chacha_simd_mod_init(void)
 | 
						|
-{
 | 
						|
-	if (!(elf_hwcap & HWCAP_NEON))
 | 
						|
-		return -ENODEV;
 | 
						|
-
 | 
						|
-	return crypto_register_skciphers(algs, ARRAY_SIZE(algs));
 | 
						|
-}
 | 
						|
-
 | 
						|
-static void __exit chacha_simd_mod_fini(void)
 | 
						|
-{
 | 
						|
-	crypto_unregister_skciphers(algs, ARRAY_SIZE(algs));
 | 
						|
-}
 | 
						|
-
 | 
						|
-module_init(chacha_simd_mod_init);
 | 
						|
-module_exit(chacha_simd_mod_fini);
 | 
						|
-
 | 
						|
-MODULE_DESCRIPTION("ChaCha and XChaCha stream ciphers (NEON accelerated)");
 | 
						|
-MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
 | 
						|
-MODULE_LICENSE("GPL v2");
 | 
						|
-MODULE_ALIAS_CRYPTO("chacha20");
 | 
						|
-MODULE_ALIAS_CRYPTO("chacha20-neon");
 | 
						|
-MODULE_ALIAS_CRYPTO("xchacha20");
 | 
						|
-MODULE_ALIAS_CRYPTO("xchacha20-neon");
 | 
						|
-MODULE_ALIAS_CRYPTO("xchacha12");
 | 
						|
-MODULE_ALIAS_CRYPTO("xchacha12-neon");
 | 
						|
--- a/arch/arm/crypto/chacha-scalar-core.S
 | 
						|
+++ b/arch/arm/crypto/chacha-scalar-core.S
 | 
						|
@@ -41,14 +41,6 @@
 | 
						|
 	X14	.req	r12
 | 
						|
 	X15	.req	r14
 | 
						|
 
 | 
						|
-.Lexpand_32byte_k:
 | 
						|
-	// "expand 32-byte k"
 | 
						|
-	.word	0x61707865, 0x3320646e, 0x79622d32, 0x6b206574
 | 
						|
-
 | 
						|
-#ifdef __thumb2__
 | 
						|
-#  define adrl adr
 | 
						|
-#endif
 | 
						|
-
 | 
						|
 .macro __rev		out, in,  t0, t1, t2
 | 
						|
 .if __LINUX_ARM_ARCH__ >= 6
 | 
						|
 	rev		\out, \in
 | 
						|
@@ -391,61 +383,65 @@
 | 
						|
 .endm	// _chacha
 | 
						|
 
 | 
						|
 /*
 | 
						|
- * void chacha20_arm(u8 *out, const u8 *in, size_t len, const u32 key[8],
 | 
						|
- *		     const u32 iv[4]);
 | 
						|
+ * void chacha_doarm(u8 *dst, const u8 *src, unsigned int bytes,
 | 
						|
+ *		     const u32 *state, int nrounds);
 | 
						|
  */
 | 
						|
-ENTRY(chacha20_arm)
 | 
						|
+ENTRY(chacha_doarm)
 | 
						|
 	cmp		r2, #0			// len == 0?
 | 
						|
 	reteq		lr
 | 
						|
 
 | 
						|
+	ldr		ip, [sp]
 | 
						|
+	cmp		ip, #12
 | 
						|
+
 | 
						|
 	push		{r0-r2,r4-r11,lr}
 | 
						|
 
 | 
						|
 	// Push state x0-x15 onto stack.
 | 
						|
 	// Also store an extra copy of x10-x11 just before the state.
 | 
						|
 
 | 
						|
-	ldr		r4, [sp, #48]		// iv
 | 
						|
-	mov		r0, sp
 | 
						|
-	sub		sp, #80
 | 
						|
-
 | 
						|
-	// iv: x12-x15
 | 
						|
-	ldm		r4, {X12,X13,X14,X15}
 | 
						|
-	stmdb		r0!, {X12,X13,X14,X15}
 | 
						|
+	add		X12, r3, #48
 | 
						|
+	ldm		X12, {X12,X13,X14,X15}
 | 
						|
+	push		{X12,X13,X14,X15}
 | 
						|
+	sub		sp, sp, #64
 | 
						|
 
 | 
						|
-	// key: x4-x11
 | 
						|
-	__ldrd		X8_X10, X9_X11, r3, 24
 | 
						|
+	__ldrd		X8_X10, X9_X11, r3, 40
 | 
						|
 	__strd		X8_X10, X9_X11, sp, 8
 | 
						|
-	stmdb		r0!, {X8_X10, X9_X11}
 | 
						|
-	ldm		r3, {X4-X9_X11}
 | 
						|
-	stmdb		r0!, {X4-X9_X11}
 | 
						|
-
 | 
						|
-	// constants: x0-x3
 | 
						|
-	adrl		X3, .Lexpand_32byte_k
 | 
						|
-	ldm		X3, {X0-X3}
 | 
						|
+	__strd		X8_X10, X9_X11, sp, 56
 | 
						|
+	ldm		r3, {X0-X9_X11}
 | 
						|
 	__strd		X0, X1, sp, 16
 | 
						|
 	__strd		X2, X3, sp, 24
 | 
						|
+	__strd		X4, X5, sp, 32
 | 
						|
+	__strd		X6, X7, sp, 40
 | 
						|
+	__strd		X8_X10, X9_X11, sp, 48
 | 
						|
 
 | 
						|
+	beq		1f
 | 
						|
 	_chacha		20
 | 
						|
 
 | 
						|
-	add		sp, #76
 | 
						|
+0:	add		sp, #76
 | 
						|
 	pop		{r4-r11, pc}
 | 
						|
-ENDPROC(chacha20_arm)
 | 
						|
+
 | 
						|
+1:	_chacha		12
 | 
						|
+	b		0b
 | 
						|
+ENDPROC(chacha_doarm)
 | 
						|
 
 | 
						|
 /*
 | 
						|
- * void hchacha20_arm(const u32 state[16], u32 out[8]);
 | 
						|
+ * void hchacha_block_arm(const u32 state[16], u32 out[8], int nrounds);
 | 
						|
  */
 | 
						|
-ENTRY(hchacha20_arm)
 | 
						|
+ENTRY(hchacha_block_arm)
 | 
						|
 	push		{r1,r4-r11,lr}
 | 
						|
 
 | 
						|
+	cmp		r2, #12			// ChaCha12 ?
 | 
						|
+
 | 
						|
 	mov		r14, r0
 | 
						|
 	ldmia		r14!, {r0-r11}		// load x0-x11
 | 
						|
 	push		{r10-r11}		// store x10-x11 to stack
 | 
						|
 	ldm		r14, {r10-r12,r14}	// load x12-x15
 | 
						|
 	sub		sp, #8
 | 
						|
 
 | 
						|
+	beq		1f
 | 
						|
 	_chacha_permute	20
 | 
						|
 
 | 
						|
 	// Skip over (unused0-unused1, x10-x11)
 | 
						|
-	add		sp, #16
 | 
						|
+0:	add		sp, #16
 | 
						|
 
 | 
						|
 	// Fix up rotations of x12-x15
 | 
						|
 	ror		X12, X12, #drot
 | 
						|
@@ -458,4 +454,7 @@ ENTRY(hchacha20_arm)
 | 
						|
 	stm		r4, {X0,X1,X2,X3,X12,X13,X14,X15}
 | 
						|
 
 | 
						|
 	pop		{r4-r11,pc}
 | 
						|
-ENDPROC(hchacha20_arm)
 | 
						|
+
 | 
						|
+1:	_chacha_permute	12
 | 
						|
+	b		0b
 | 
						|
+ENDPROC(hchacha_block_arm)
 | 
						|
--- a/arch/arm64/crypto/chacha-neon-glue.c
 | 
						|
+++ b/arch/arm64/crypto/chacha-neon-glue.c
 | 
						|
@@ -1,5 +1,5 @@
 | 
						|
 /*
 | 
						|
- * ARM NEON accelerated ChaCha and XChaCha stream ciphers,
 | 
						|
+ * ARM NEON and scalar accelerated ChaCha and XChaCha stream ciphers,
 | 
						|
  * including ChaCha20 (RFC7539)
 | 
						|
  *
 | 
						|
  * Copyright (C) 2016 - 2017 Linaro, Ltd. <ard.biesheuvel@linaro.org>
 |