diff --git a/arch/arm/mach-at91/at91sam9g45.c b/arch/arm/mach-at91/at91sam9g45.c
index 977127368a7dbd10d3785d4383840be2b72f840d..ef6cedd52e3c2e71fdfb6dd8e68158b8ef622420 100644
--- a/arch/arm/mach-at91/at91sam9g45.c
+++ b/arch/arm/mach-at91/at91sam9g45.c
@@ -183,6 +183,13 @@ static struct clk adc_op_clk = {
 	.rate_hz	= 13200000,
 };
 
+/* AES/TDES/SHA clock - Only for sam9m11/sam9g56 */
+static struct clk aestdessha_clk = {
+	.name		= "aestdessha_clk",
+	.pmc_mask	= 1 << AT91SAM9G45_ID_AESTDESSHA,
+	.type		= CLK_TYPE_PERIPHERAL,
+};
+
 static struct clk *periph_clocks[] __initdata = {
 	&pioA_clk,
 	&pioB_clk,
@@ -212,6 +219,7 @@ static struct clk *periph_clocks[] __initdata = {
 	&udphs_clk,
 	&mmc1_clk,
 	&adc_op_clk,
+	&aestdessha_clk,
 	// irq0
 };
 
@@ -232,6 +240,9 @@ static struct clk_lookup periph_clocks_lookups[] = {
 	CLKDEV_CON_DEV_ID("pclk", "ssc.0", &ssc0_clk),
 	CLKDEV_CON_DEV_ID("pclk", "ssc.1", &ssc1_clk),
 	CLKDEV_CON_DEV_ID(NULL, "atmel-trng", &trng_clk),
+	CLKDEV_CON_DEV_ID(NULL, "atmel_sha", &aestdessha_clk),
+	CLKDEV_CON_DEV_ID(NULL, "atmel_tdes", &aestdessha_clk),
+	CLKDEV_CON_DEV_ID(NULL, "atmel_aes", &aestdessha_clk),
 	/* more usart lookup table for DT entries */
 	CLKDEV_CON_DEV_ID("usart", "ffffee00.serial", &mck),
 	CLKDEV_CON_DEV_ID("usart", "fff8c000.serial", &usart0_clk),
@@ -388,7 +399,7 @@ static unsigned int at91sam9g45_default_irq_priority[NR_AIC_IRQS] __initdata = {
 	3,	/* Ethernet */
 	0,	/* Image Sensor Interface */
 	2,	/* USB Device High speed port */
-	0,
+	0,	/* AESTDESSHA Crypto HW Accelerators */
 	0,	/* Multimedia Card Interface 1 */
 	0,
 	0,	/* Advanced Interrupt Controller (IRQ0) */
diff --git a/arch/arm/mach-at91/at91sam9g45_devices.c b/arch/arm/mach-at91/at91sam9g45_devices.c
index 40fb79df2de0b0ebb19c2d4eb4258490d9839cc9..06073996a38241d50fb1595c52017a50567d7e44 100644
--- a/arch/arm/mach-at91/at91sam9g45_devices.c
+++ b/arch/arm/mach-at91/at91sam9g45_devices.c
@@ -18,6 +18,7 @@
 #include <linux/platform_device.h>
 #include <linux/i2c-gpio.h>
 #include <linux/atmel-mci.h>
+#include <linux/platform_data/atmel-aes.h>
 
 #include <linux/platform_data/at91_adc.h>
 
@@ -1830,6 +1831,130 @@ void __init at91_register_uart(unsigned id, unsigned portnr, unsigned pins) {}
 void __init at91_add_device_serial(void) {}
 #endif
 
+/* --------------------------------------------------------------------
+ *  SHA1/SHA256
+ * -------------------------------------------------------------------- */
+
+#if defined(CONFIG_CRYPTO_DEV_ATMEL_SHA) || defined(CONFIG_CRYPTO_DEV_ATMEL_SHA_MODULE)
+static struct resource sha_resources[] = {
+	{
+		.start	= AT91SAM9G45_BASE_SHA,
+		.end	= AT91SAM9G45_BASE_SHA + SZ_16K - 1,
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] = {
+		.start	= AT91SAM9G45_ID_AESTDESSHA,
+		.end	= AT91SAM9G45_ID_AESTDESSHA,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device at91sam9g45_sha_device = {
+	.name	= "atmel_sha",
+	.id		= -1,
+	.resource	= sha_resources,
+	.num_resources	= ARRAY_SIZE(sha_resources),
+};
+
+static void __init at91_add_device_sha(void)
+{
+	platform_device_register(&at91sam9g45_sha_device);
+}
+#else
+static void __init at91_add_device_sha(void) {}
+#endif
+
+/* --------------------------------------------------------------------
+ *  DES/TDES
+ * -------------------------------------------------------------------- */
+
+#if defined(CONFIG_CRYPTO_DEV_ATMEL_TDES) || defined(CONFIG_CRYPTO_DEV_ATMEL_TDES_MODULE)
+static struct resource tdes_resources[] = {
+	[0] = {
+		.start	= AT91SAM9G45_BASE_TDES,
+		.end	= AT91SAM9G45_BASE_TDES + SZ_16K - 1,
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] = {
+		.start	= AT91SAM9G45_ID_AESTDESSHA,
+		.end	= AT91SAM9G45_ID_AESTDESSHA,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device at91sam9g45_tdes_device = {
+	.name	= "atmel_tdes",
+	.id		= -1,
+	.resource	= tdes_resources,
+	.num_resources	= ARRAY_SIZE(tdes_resources),
+};
+
+static void __init at91_add_device_tdes(void)
+{
+	platform_device_register(&at91sam9g45_tdes_device);
+}
+#else
+static void __init at91_add_device_tdes(void) {}
+#endif
+
+/* --------------------------------------------------------------------
+ *  AES
+ * -------------------------------------------------------------------- */
+
+#if defined(CONFIG_CRYPTO_DEV_ATMEL_AES) || defined(CONFIG_CRYPTO_DEV_ATMEL_AES_MODULE)
+static struct aes_platform_data aes_data;
+static u64 aes_dmamask = DMA_BIT_MASK(32);
+
+static struct resource aes_resources[] = {
+	[0] = {
+		.start	= AT91SAM9G45_BASE_AES,
+		.end	= AT91SAM9G45_BASE_AES + SZ_16K - 1,
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] = {
+		.start	= AT91SAM9G45_ID_AESTDESSHA,
+		.end	= AT91SAM9G45_ID_AESTDESSHA,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device at91sam9g45_aes_device = {
+	.name	= "atmel_aes",
+	.id		= -1,
+	.dev	= {
+		.dma_mask		= &aes_dmamask,
+		.coherent_dma_mask	= DMA_BIT_MASK(32),
+		.platform_data		= &aes_data,
+	},
+	.resource	= aes_resources,
+	.num_resources	= ARRAY_SIZE(aes_resources),
+};
+
+static void __init at91_add_device_aes(void)
+{
+	struct at_dma_slave	*atslave;
+	struct aes_dma_data	*alt_atslave;
+
+	alt_atslave = kzalloc(sizeof(struct aes_dma_data), GFP_KERNEL);
+
+	/* DMA TX slave channel configuration */
+	atslave = &alt_atslave->txdata;
+	atslave->dma_dev = &at_hdmac_device.dev;
+	atslave->cfg = ATC_FIFOCFG_ENOUGHSPACE	| ATC_SRC_H2SEL_HW |
+						ATC_SRC_PER(AT_DMA_ID_AES_RX);
+
+	/* DMA RX slave channel configuration */
+	atslave = &alt_atslave->rxdata;
+	atslave->dma_dev = &at_hdmac_device.dev;
+	atslave->cfg = ATC_FIFOCFG_ENOUGHSPACE	| ATC_DST_H2SEL_HW |
+						ATC_DST_PER(AT_DMA_ID_AES_TX);
+
+	aes_data.dma_slave = alt_atslave;
+	platform_device_register(&at91sam9g45_aes_device);
+}
+#else
+static void __init at91_add_device_aes(void) {}
+#endif
 
 /* -------------------------------------------------------------------- */
 /*
@@ -1847,6 +1972,9 @@ static int __init at91_add_standard_devices(void)
 	at91_add_device_trng();
 	at91_add_device_watchdog();
 	at91_add_device_tc();
+	at91_add_device_sha();
+	at91_add_device_tdes();
+	at91_add_device_aes();
 	return 0;
 }
 
diff --git a/arch/arm/mach-at91/include/mach/at91sam9g45.h b/arch/arm/mach-at91/include/mach/at91sam9g45.h
index 3a4da24d59112209613a6cab1ed132103b4c8772..8eba1021f533ac9f87ad45da184b1301c3d98f40 100644
--- a/arch/arm/mach-at91/include/mach/at91sam9g45.h
+++ b/arch/arm/mach-at91/include/mach/at91sam9g45.h
@@ -136,6 +136,8 @@
 #define AT_DMA_ID_SSC1_RX	 8
 #define AT_DMA_ID_AC97_TX	 9
 #define AT_DMA_ID_AC97_RX	10
+#define AT_DMA_ID_AES_TX	11
+#define AT_DMA_ID_AES_RX	12
 #define AT_DMA_ID_MCI1		13
 
 #endif
diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile
index 950d1f7a5a395baf7f5deae6d1e2888e9b92138b..159e94f4b22aa3b808587812ccc013ff7bc18ee7 100644
--- a/arch/powerpc/Makefile
+++ b/arch/powerpc/Makefile
@@ -149,7 +149,6 @@ core-$(CONFIG_KVM) 		+= arch/powerpc/kvm/
 core-$(CONFIG_PERF_EVENTS)	+= arch/powerpc/perf/
 
 drivers-$(CONFIG_OPROFILE)	+= arch/powerpc/oprofile/
-drivers-$(CONFIG_CRYPTO_DEV_NX) += drivers/crypto/nx/
 
 # Default to zImage, override when needed
 all: zImage
diff --git a/arch/s390/crypto/crypto_des.h b/arch/s390/crypto/crypto_des.h
deleted file mode 100644
index 6210457ceebb89c597994064af8dab46a26ad0cc..0000000000000000000000000000000000000000
--- a/arch/s390/crypto/crypto_des.h
+++ /dev/null
@@ -1,18 +0,0 @@
-/*
- * Cryptographic API.
- *
- * Function for checking keys for the DES and Tripple DES Encryption
- * algorithms.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- */
-#ifndef __CRYPTO_DES_H__
-#define __CRYPTO_DES_H__
-
-extern int crypto_des_check_key(const u8*, unsigned int, u32*);
-
-#endif /*__CRYPTO_DES_H__*/
diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile
index e191ac048b597f9c9715aaea0e65640b65bf6152..e908e5de82d3caace8e418e2de3ff70efe12245d 100644
--- a/arch/x86/crypto/Makefile
+++ b/arch/x86/crypto/Makefile
@@ -2,6 +2,9 @@
 # Arch-specific CryptoAPI modules.
 #
 
+obj-$(CONFIG_CRYPTO_ABLK_HELPER_X86) += ablk_helper.o
+obj-$(CONFIG_CRYPTO_GLUE_HELPER_X86) += glue_helper.o
+
 obj-$(CONFIG_CRYPTO_AES_586) += aes-i586.o
 obj-$(CONFIG_CRYPTO_TWOFISH_586) += twofish-i586.o
 obj-$(CONFIG_CRYPTO_SALSA20_586) += salsa20-i586.o
@@ -12,8 +15,10 @@ obj-$(CONFIG_CRYPTO_CAMELLIA_X86_64) += camellia-x86_64.o
 obj-$(CONFIG_CRYPTO_BLOWFISH_X86_64) += blowfish-x86_64.o
 obj-$(CONFIG_CRYPTO_TWOFISH_X86_64) += twofish-x86_64.o
 obj-$(CONFIG_CRYPTO_TWOFISH_X86_64_3WAY) += twofish-x86_64-3way.o
+obj-$(CONFIG_CRYPTO_TWOFISH_AVX_X86_64) += twofish-avx-x86_64.o
 obj-$(CONFIG_CRYPTO_SALSA20_X86_64) += salsa20-x86_64.o
 obj-$(CONFIG_CRYPTO_SERPENT_SSE2_X86_64) += serpent-sse2-x86_64.o
+obj-$(CONFIG_CRYPTO_SERPENT_AVX_X86_64) += serpent-avx-x86_64.o
 obj-$(CONFIG_CRYPTO_AES_NI_INTEL) += aesni-intel.o
 obj-$(CONFIG_CRYPTO_GHASH_CLMUL_NI_INTEL) += ghash-clmulni-intel.o
 
@@ -30,16 +35,11 @@ camellia-x86_64-y := camellia-x86_64-asm_64.o camellia_glue.o
 blowfish-x86_64-y := blowfish-x86_64-asm_64.o blowfish_glue.o
 twofish-x86_64-y := twofish-x86_64-asm_64.o twofish_glue.o
 twofish-x86_64-3way-y := twofish-x86_64-asm_64-3way.o twofish_glue_3way.o
+twofish-avx-x86_64-y := twofish-avx-x86_64-asm_64.o twofish_avx_glue.o
 salsa20-x86_64-y := salsa20-x86_64-asm_64.o salsa20_glue.o
 serpent-sse2-x86_64-y := serpent-sse2-x86_64-asm_64.o serpent_sse2_glue.o
+serpent-avx-x86_64-y := serpent-avx-x86_64-asm_64.o serpent_avx_glue.o
 
 aesni-intel-y := aesni-intel_asm.o aesni-intel_glue.o fpu.o
-
 ghash-clmulni-intel-y := ghash-clmulni-intel_asm.o ghash-clmulni-intel_glue.o
-
-# enable AVX support only when $(AS) can actually assemble the instructions
-ifeq ($(call as-instr,vpxor %xmm0$(comma)%xmm1$(comma)%xmm2,yes,no),yes)
-AFLAGS_sha1_ssse3_asm.o += -DSHA1_ENABLE_AVX_SUPPORT
-CFLAGS_sha1_ssse3_glue.o += -DSHA1_ENABLE_AVX_SUPPORT
-endif
 sha1-ssse3-y := sha1_ssse3_asm.o sha1_ssse3_glue.o
diff --git a/arch/x86/crypto/ablk_helper.c b/arch/x86/crypto/ablk_helper.c
new file mode 100644
index 0000000000000000000000000000000000000000..43282fe04a8b726e57048d8e67fd96d209c76041
--- /dev/null
+++ b/arch/x86/crypto/ablk_helper.c
@@ -0,0 +1,149 @@
+/*
+ * Shared async block cipher helpers
+ *
+ * Copyright (c) 2012 Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
+ *
+ * Based on aesni-intel_glue.c by:
+ *  Copyright (C) 2008, Intel Corp.
+ *    Author: Huang Ying <ying.huang@intel.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307
+ * USA
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/crypto.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <crypto/algapi.h>
+#include <crypto/cryptd.h>
+#include <asm/i387.h>
+#include <asm/crypto/ablk_helper.h>
+
+int ablk_set_key(struct crypto_ablkcipher *tfm, const u8 *key,
+		 unsigned int key_len)
+{
+	struct async_helper_ctx *ctx = crypto_ablkcipher_ctx(tfm);
+	struct crypto_ablkcipher *child = &ctx->cryptd_tfm->base;
+	int err;
+
+	crypto_ablkcipher_clear_flags(child, CRYPTO_TFM_REQ_MASK);
+	crypto_ablkcipher_set_flags(child, crypto_ablkcipher_get_flags(tfm)
+				    & CRYPTO_TFM_REQ_MASK);
+	err = crypto_ablkcipher_setkey(child, key, key_len);
+	crypto_ablkcipher_set_flags(tfm, crypto_ablkcipher_get_flags(child)
+				    & CRYPTO_TFM_RES_MASK);
+	return err;
+}
+EXPORT_SYMBOL_GPL(ablk_set_key);
+
+int __ablk_encrypt(struct ablkcipher_request *req)
+{
+	struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req);
+	struct async_helper_ctx *ctx = crypto_ablkcipher_ctx(tfm);
+	struct blkcipher_desc desc;
+
+	desc.tfm = cryptd_ablkcipher_child(ctx->cryptd_tfm);
+	desc.info = req->info;
+	desc.flags = 0;
+
+	return crypto_blkcipher_crt(desc.tfm)->encrypt(
+		&desc, req->dst, req->src, req->nbytes);
+}
+EXPORT_SYMBOL_GPL(__ablk_encrypt);
+
+int ablk_encrypt(struct ablkcipher_request *req)
+{
+	struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req);
+	struct async_helper_ctx *ctx = crypto_ablkcipher_ctx(tfm);
+
+	if (!irq_fpu_usable()) {
+		struct ablkcipher_request *cryptd_req =
+			ablkcipher_request_ctx(req);
+
+		memcpy(cryptd_req, req, sizeof(*req));
+		ablkcipher_request_set_tfm(cryptd_req, &ctx->cryptd_tfm->base);
+
+		return crypto_ablkcipher_encrypt(cryptd_req);
+	} else {
+		return __ablk_encrypt(req);
+	}
+}
+EXPORT_SYMBOL_GPL(ablk_encrypt);
+
+int ablk_decrypt(struct ablkcipher_request *req)
+{
+	struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req);
+	struct async_helper_ctx *ctx = crypto_ablkcipher_ctx(tfm);
+
+	if (!irq_fpu_usable()) {
+		struct ablkcipher_request *cryptd_req =
+			ablkcipher_request_ctx(req);
+
+		memcpy(cryptd_req, req, sizeof(*req));
+		ablkcipher_request_set_tfm(cryptd_req, &ctx->cryptd_tfm->base);
+
+		return crypto_ablkcipher_decrypt(cryptd_req);
+	} else {
+		struct blkcipher_desc desc;
+
+		desc.tfm = cryptd_ablkcipher_child(ctx->cryptd_tfm);
+		desc.info = req->info;
+		desc.flags = 0;
+
+		return crypto_blkcipher_crt(desc.tfm)->decrypt(
+			&desc, req->dst, req->src, req->nbytes);
+	}
+}
+EXPORT_SYMBOL_GPL(ablk_decrypt);
+
+void ablk_exit(struct crypto_tfm *tfm)
+{
+	struct async_helper_ctx *ctx = crypto_tfm_ctx(tfm);
+
+	cryptd_free_ablkcipher(ctx->cryptd_tfm);
+}
+EXPORT_SYMBOL_GPL(ablk_exit);
+
+int ablk_init_common(struct crypto_tfm *tfm, const char *drv_name)
+{
+	struct async_helper_ctx *ctx = crypto_tfm_ctx(tfm);
+	struct cryptd_ablkcipher *cryptd_tfm;
+
+	cryptd_tfm = cryptd_alloc_ablkcipher(drv_name, 0, 0);
+	if (IS_ERR(cryptd_tfm))
+		return PTR_ERR(cryptd_tfm);
+
+	ctx->cryptd_tfm = cryptd_tfm;
+	tfm->crt_ablkcipher.reqsize = sizeof(struct ablkcipher_request) +
+		crypto_ablkcipher_reqsize(&cryptd_tfm->base);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(ablk_init_common);
+
+int ablk_init(struct crypto_tfm *tfm)
+{
+	char drv_name[CRYPTO_MAX_ALG_NAME];
+
+	snprintf(drv_name, sizeof(drv_name), "__driver-%s",
+					crypto_tfm_alg_driver_name(tfm));
+
+	return ablk_init_common(tfm, drv_name);
+}
+EXPORT_SYMBOL_GPL(ablk_init);
+
+MODULE_LICENSE("GPL");
diff --git a/arch/x86/crypto/aes_glue.c b/arch/x86/crypto/aes_glue.c
index 8efcf42a9d7e318b4cddb93a820cd01d1bd01f4e..59b37deb8c8df03bc4360feb4cde2eae7db83a4a 100644
--- a/arch/x86/crypto/aes_glue.c
+++ b/arch/x86/crypto/aes_glue.c
@@ -5,7 +5,7 @@
 
 #include <linux/module.h>
 #include <crypto/aes.h>
-#include <asm/aes.h>
+#include <asm/crypto/aes.h>
 
 asmlinkage void aes_enc_blk(struct crypto_aes_ctx *ctx, u8 *out, const u8 *in);
 asmlinkage void aes_dec_blk(struct crypto_aes_ctx *ctx, u8 *out, const u8 *in);
diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c
index ac7f5cd019e876f44eaaa3330bc9ef87f61b6c3d..34fdcff4d2c8e42d7ed2c1908b5409d1982b942a 100644
--- a/arch/x86/crypto/aesni-intel_glue.c
+++ b/arch/x86/crypto/aesni-intel_glue.c
@@ -30,7 +30,8 @@
 #include <crypto/ctr.h>
 #include <asm/cpu_device_id.h>
 #include <asm/i387.h>
-#include <asm/aes.h>
+#include <asm/crypto/aes.h>
+#include <asm/crypto/ablk_helper.h>
 #include <crypto/scatterwalk.h>
 #include <crypto/internal/aead.h>
 #include <linux/workqueue.h>
@@ -52,10 +53,6 @@
 #define HAS_XTS
 #endif
 
-struct async_aes_ctx {
-	struct cryptd_ablkcipher *cryptd_tfm;
-};
-
 /* This data is stored at the end of the crypto_tfm struct.
  * It's a type of per "session" data storage location.
  * This needs to be 16 byte aligned.
@@ -377,87 +374,6 @@ static int ctr_crypt(struct blkcipher_desc *desc,
 }
 #endif
 
-static int ablk_set_key(struct crypto_ablkcipher *tfm, const u8 *key,
-			unsigned int key_len)
-{
-	struct async_aes_ctx *ctx = crypto_ablkcipher_ctx(tfm);
-	struct crypto_ablkcipher *child = &ctx->cryptd_tfm->base;
-	int err;
-
-	crypto_ablkcipher_clear_flags(child, CRYPTO_TFM_REQ_MASK);
-	crypto_ablkcipher_set_flags(child, crypto_ablkcipher_get_flags(tfm)
-				    & CRYPTO_TFM_REQ_MASK);
-	err = crypto_ablkcipher_setkey(child, key, key_len);
-	crypto_ablkcipher_set_flags(tfm, crypto_ablkcipher_get_flags(child)
-				    & CRYPTO_TFM_RES_MASK);
-	return err;
-}
-
-static int ablk_encrypt(struct ablkcipher_request *req)
-{
-	struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req);
-	struct async_aes_ctx *ctx = crypto_ablkcipher_ctx(tfm);
-
-	if (!irq_fpu_usable()) {
-		struct ablkcipher_request *cryptd_req =
-			ablkcipher_request_ctx(req);
-		memcpy(cryptd_req, req, sizeof(*req));
-		ablkcipher_request_set_tfm(cryptd_req, &ctx->cryptd_tfm->base);
-		return crypto_ablkcipher_encrypt(cryptd_req);
-	} else {
-		struct blkcipher_desc desc;
-		desc.tfm = cryptd_ablkcipher_child(ctx->cryptd_tfm);
-		desc.info = req->info;
-		desc.flags = 0;
-		return crypto_blkcipher_crt(desc.tfm)->encrypt(
-			&desc, req->dst, req->src, req->nbytes);
-	}
-}
-
-static int ablk_decrypt(struct ablkcipher_request *req)
-{
-	struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req);
-	struct async_aes_ctx *ctx = crypto_ablkcipher_ctx(tfm);
-
-	if (!irq_fpu_usable()) {
-		struct ablkcipher_request *cryptd_req =
-			ablkcipher_request_ctx(req);
-		memcpy(cryptd_req, req, sizeof(*req));
-		ablkcipher_request_set_tfm(cryptd_req, &ctx->cryptd_tfm->base);
-		return crypto_ablkcipher_decrypt(cryptd_req);
-	} else {
-		struct blkcipher_desc desc;
-		desc.tfm = cryptd_ablkcipher_child(ctx->cryptd_tfm);
-		desc.info = req->info;
-		desc.flags = 0;
-		return crypto_blkcipher_crt(desc.tfm)->decrypt(
-			&desc, req->dst, req->src, req->nbytes);
-	}
-}
-
-static void ablk_exit(struct crypto_tfm *tfm)
-{
-	struct async_aes_ctx *ctx = crypto_tfm_ctx(tfm);
-
-	cryptd_free_ablkcipher(ctx->cryptd_tfm);
-}
-
-static int ablk_init_common(struct crypto_tfm *tfm, const char *drv_name)
-{
-	struct async_aes_ctx *ctx = crypto_tfm_ctx(tfm);
-	struct cryptd_ablkcipher *cryptd_tfm;
-
-	cryptd_tfm = cryptd_alloc_ablkcipher(drv_name, 0, 0);
-	if (IS_ERR(cryptd_tfm))
-		return PTR_ERR(cryptd_tfm);
-
-	ctx->cryptd_tfm = cryptd_tfm;
-	tfm->crt_ablkcipher.reqsize = sizeof(struct ablkcipher_request) +
-		crypto_ablkcipher_reqsize(&cryptd_tfm->base);
-
-	return 0;
-}
-
 static int ablk_ecb_init(struct crypto_tfm *tfm)
 {
 	return ablk_init_common(tfm, "__driver-ecb-aes-aesni");
@@ -613,7 +529,7 @@ static int rfc4106_set_key(struct crypto_aead *parent, const u8 *key,
 	struct crypto_aead *cryptd_child = cryptd_aead_child(ctx->cryptd_tfm);
 	struct aesni_rfc4106_gcm_ctx *child_ctx =
                                  aesni_rfc4106_gcm_ctx_get(cryptd_child);
-	u8 *new_key_mem = NULL;
+	u8 *new_key_align, *new_key_mem = NULL;
 
 	if (key_len < 4) {
 		crypto_tfm_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
@@ -637,9 +553,9 @@ static int rfc4106_set_key(struct crypto_aead *parent, const u8 *key,
 		if (!new_key_mem)
 			return -ENOMEM;
 
-		new_key_mem = PTR_ALIGN(new_key_mem, AESNI_ALIGN);
-		memcpy(new_key_mem, key, key_len);
-		key = new_key_mem;
+		new_key_align = PTR_ALIGN(new_key_mem, AESNI_ALIGN);
+		memcpy(new_key_align, key, key_len);
+		key = new_key_align;
 	}
 
 	if (!irq_fpu_usable())
@@ -968,7 +884,7 @@ static struct crypto_alg aesni_algs[] = { {
 	.cra_priority		= 400,
 	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
 	.cra_blocksize		= AES_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct async_aes_ctx),
+	.cra_ctxsize		= sizeof(struct async_helper_ctx),
 	.cra_alignmask		= 0,
 	.cra_type		= &crypto_ablkcipher_type,
 	.cra_module		= THIS_MODULE,
@@ -989,7 +905,7 @@ static struct crypto_alg aesni_algs[] = { {
 	.cra_priority		= 400,
 	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
 	.cra_blocksize		= AES_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct async_aes_ctx),
+	.cra_ctxsize		= sizeof(struct async_helper_ctx),
 	.cra_alignmask		= 0,
 	.cra_type		= &crypto_ablkcipher_type,
 	.cra_module		= THIS_MODULE,
@@ -1033,7 +949,7 @@ static struct crypto_alg aesni_algs[] = { {
 	.cra_priority		= 400,
 	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
 	.cra_blocksize		= 1,
-	.cra_ctxsize		= sizeof(struct async_aes_ctx),
+	.cra_ctxsize		= sizeof(struct async_helper_ctx),
 	.cra_alignmask		= 0,
 	.cra_type		= &crypto_ablkcipher_type,
 	.cra_module		= THIS_MODULE,
@@ -1098,7 +1014,7 @@ static struct crypto_alg aesni_algs[] = { {
 	.cra_priority		= 400,
 	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
 	.cra_blocksize		= 1,
-	.cra_ctxsize		= sizeof(struct async_aes_ctx),
+	.cra_ctxsize		= sizeof(struct async_helper_ctx),
 	.cra_alignmask		= 0,
 	.cra_type		= &crypto_ablkcipher_type,
 	.cra_module		= THIS_MODULE,
@@ -1126,7 +1042,7 @@ static struct crypto_alg aesni_algs[] = { {
 	.cra_priority		= 400,
 	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
 	.cra_blocksize		= AES_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct async_aes_ctx),
+	.cra_ctxsize		= sizeof(struct async_helper_ctx),
 	.cra_alignmask		= 0,
 	.cra_type		= &crypto_ablkcipher_type,
 	.cra_module		= THIS_MODULE,
@@ -1150,7 +1066,7 @@ static struct crypto_alg aesni_algs[] = { {
 	.cra_priority		= 400,
 	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
 	.cra_blocksize		= AES_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct async_aes_ctx),
+	.cra_ctxsize		= sizeof(struct async_helper_ctx),
 	.cra_alignmask		= 0,
 	.cra_type		= &crypto_ablkcipher_type,
 	.cra_module		= THIS_MODULE,
@@ -1174,7 +1090,7 @@ static struct crypto_alg aesni_algs[] = { {
 	.cra_priority		= 400,
 	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
 	.cra_blocksize		= AES_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct async_aes_ctx),
+	.cra_ctxsize		= sizeof(struct async_helper_ctx),
 	.cra_alignmask		= 0,
 	.cra_type		= &crypto_ablkcipher_type,
 	.cra_module		= THIS_MODULE,
diff --git a/arch/x86/crypto/camellia_glue.c b/arch/x86/crypto/camellia_glue.c
index 3306dc0b139e4c3e9b08a0a2689df729fdc9cf3a..eeb2b3b743e909855ffe6626d1e9d78efe8fd94f 100644
--- a/arch/x86/crypto/camellia_glue.c
+++ b/arch/x86/crypto/camellia_glue.c
@@ -5,10 +5,6 @@
  *
  * Camellia parts based on code by:
  *  Copyright (C) 2006 NTT (Nippon Telegraph and Telephone Corporation)
- * CBC & ECB parts based on code (crypto/cbc.c,ecb.c) by:
- *   Copyright (c) 2006 Herbert Xu <herbert@gondor.apana.org.au>
- * CTR part based on code (crypto/ctr.c) by:
- *   (C) Copyright IBM Corp. 2007 - Joy Latten <latten@us.ibm.com>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
@@ -34,9 +30,9 @@
 #include <linux/module.h>
 #include <linux/types.h>
 #include <crypto/algapi.h>
-#include <crypto/b128ops.h>
 #include <crypto/lrw.h>
 #include <crypto/xts.h>
+#include <asm/crypto/glue_helper.h>
 
 #define CAMELLIA_MIN_KEY_SIZE	16
 #define CAMELLIA_MAX_KEY_SIZE	32
@@ -1312,307 +1308,128 @@ static int camellia_setkey(struct crypto_tfm *tfm, const u8 *in_key,
 				 &tfm->crt_flags);
 }
 
-static int ecb_crypt(struct blkcipher_desc *desc, struct blkcipher_walk *walk,
-		     void (*fn)(struct camellia_ctx *, u8 *, const u8 *),
-		     void (*fn_2way)(struct camellia_ctx *, u8 *, const u8 *))
+static void camellia_decrypt_cbc_2way(void *ctx, u128 *dst, const u128 *src)
 {
-	struct camellia_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
-	unsigned int bsize = CAMELLIA_BLOCK_SIZE;
-	unsigned int nbytes;
-	int err;
-
-	err = blkcipher_walk_virt(desc, walk);
-
-	while ((nbytes = walk->nbytes)) {
-		u8 *wsrc = walk->src.virt.addr;
-		u8 *wdst = walk->dst.virt.addr;
-
-		/* Process two block batch */
-		if (nbytes >= bsize * 2) {
-			do {
-				fn_2way(ctx, wdst, wsrc);
-
-				wsrc += bsize * 2;
-				wdst += bsize * 2;
-				nbytes -= bsize * 2;
-			} while (nbytes >= bsize * 2);
-
-			if (nbytes < bsize)
-				goto done;
-		}
-
-		/* Handle leftovers */
-		do {
-			fn(ctx, wdst, wsrc);
-
-			wsrc += bsize;
-			wdst += bsize;
-			nbytes -= bsize;
-		} while (nbytes >= bsize);
-
-done:
-		err = blkcipher_walk_done(desc, walk, nbytes);
-	}
-
-	return err;
-}
-
-static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
-{
-	struct blkcipher_walk walk;
-
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-	return ecb_crypt(desc, &walk, camellia_enc_blk, camellia_enc_blk_2way);
-}
+	u128 iv = *src;
 
-static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
-{
-	struct blkcipher_walk walk;
-
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-	return ecb_crypt(desc, &walk, camellia_dec_blk, camellia_dec_blk_2way);
-}
+	camellia_dec_blk_2way(ctx, (u8 *)dst, (u8 *)src);
 
-static unsigned int __cbc_encrypt(struct blkcipher_desc *desc,
-				  struct blkcipher_walk *walk)
-{
-	struct camellia_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
-	unsigned int bsize = CAMELLIA_BLOCK_SIZE;
-	unsigned int nbytes = walk->nbytes;
-	u128 *src = (u128 *)walk->src.virt.addr;
-	u128 *dst = (u128 *)walk->dst.virt.addr;
-	u128 *iv = (u128 *)walk->iv;
-
-	do {
-		u128_xor(dst, src, iv);
-		camellia_enc_blk(ctx, (u8 *)dst, (u8 *)dst);
-		iv = dst;
-
-		src += 1;
-		dst += 1;
-		nbytes -= bsize;
-	} while (nbytes >= bsize);
-
-	u128_xor((u128 *)walk->iv, (u128 *)walk->iv, iv);
-	return nbytes;
+	u128_xor(&dst[1], &dst[1], &iv);
 }
 
-static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
+static void camellia_crypt_ctr(void *ctx, u128 *dst, const u128 *src, u128 *iv)
 {
-	struct blkcipher_walk walk;
-	int err;
+	be128 ctrblk;
 
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-	err = blkcipher_walk_virt(desc, &walk);
+	if (dst != src)
+		*dst = *src;
 
-	while ((nbytes = walk.nbytes)) {
-		nbytes = __cbc_encrypt(desc, &walk);
-		err = blkcipher_walk_done(desc, &walk, nbytes);
-	}
+	u128_to_be128(&ctrblk, iv);
+	u128_inc(iv);
 
-	return err;
+	camellia_enc_blk_xor(ctx, (u8 *)dst, (u8 *)&ctrblk);
 }
 
-static unsigned int __cbc_decrypt(struct blkcipher_desc *desc,
-				  struct blkcipher_walk *walk)
+static void camellia_crypt_ctr_2way(void *ctx, u128 *dst, const u128 *src,
+				    u128 *iv)
 {
-	struct camellia_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
-	unsigned int bsize = CAMELLIA_BLOCK_SIZE;
-	unsigned int nbytes = walk->nbytes;
-	u128 *src = (u128 *)walk->src.virt.addr;
-	u128 *dst = (u128 *)walk->dst.virt.addr;
-	u128 ivs[2 - 1];
-	u128 last_iv;
+	be128 ctrblks[2];
 
-	/* Start of the last block. */
-	src += nbytes / bsize - 1;
-	dst += nbytes / bsize - 1;
-
-	last_iv = *src;
-
-	/* Process two block batch */
-	if (nbytes >= bsize * 2) {
-		do {
-			nbytes -= bsize * (2 - 1);
-			src -= 2 - 1;
-			dst -= 2 - 1;
-
-			ivs[0] = src[0];
-
-			camellia_dec_blk_2way(ctx, (u8 *)dst, (u8 *)src);
-
-			u128_xor(dst + 1, dst + 1, ivs + 0);
-
-			nbytes -= bsize;
-			if (nbytes < bsize)
-				goto done;
-
-			u128_xor(dst, dst, src - 1);
-			src -= 1;
-			dst -= 1;
-		} while (nbytes >= bsize * 2);
-
-		if (nbytes < bsize)
-			goto done;
+	if (dst != src) {
+		dst[0] = src[0];
+		dst[1] = src[1];
 	}
 
-	/* Handle leftovers */
-	for (;;) {
-		camellia_dec_blk(ctx, (u8 *)dst, (u8 *)src);
-
-		nbytes -= bsize;
-		if (nbytes < bsize)
-			break;
+	u128_to_be128(&ctrblks[0], iv);
+	u128_inc(iv);
+	u128_to_be128(&ctrblks[1], iv);
+	u128_inc(iv);
 
-		u128_xor(dst, dst, src - 1);
-		src -= 1;
-		dst -= 1;
-	}
-
-done:
-	u128_xor(dst, dst, (u128 *)walk->iv);
-	*(u128 *)walk->iv = last_iv;
-
-	return nbytes;
+	camellia_enc_blk_xor_2way(ctx, (u8 *)dst, (u8 *)ctrblks);
 }
 
-static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
-{
-	struct blkcipher_walk walk;
-	int err;
-
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-	err = blkcipher_walk_virt(desc, &walk);
+static const struct common_glue_ctx camellia_enc = {
+	.num_funcs = 2,
+	.fpu_blocks_limit = -1,
+
+	.funcs = { {
+		.num_blocks = 2,
+		.fn_u = { .ecb = GLUE_FUNC_CAST(camellia_enc_blk_2way) }
+	}, {
+		.num_blocks = 1,
+		.fn_u = { .ecb = GLUE_FUNC_CAST(camellia_enc_blk) }
+	} }
+};
 
-	while ((nbytes = walk.nbytes)) {
-		nbytes = __cbc_decrypt(desc, &walk);
-		err = blkcipher_walk_done(desc, &walk, nbytes);
-	}
+static const struct common_glue_ctx camellia_ctr = {
+	.num_funcs = 2,
+	.fpu_blocks_limit = -1,
+
+	.funcs = { {
+		.num_blocks = 2,
+		.fn_u = { .ctr = GLUE_CTR_FUNC_CAST(camellia_crypt_ctr_2way) }
+	}, {
+		.num_blocks = 1,
+		.fn_u = { .ctr = GLUE_CTR_FUNC_CAST(camellia_crypt_ctr) }
+	} }
+};
 
-	return err;
-}
+static const struct common_glue_ctx camellia_dec = {
+	.num_funcs = 2,
+	.fpu_blocks_limit = -1,
+
+	.funcs = { {
+		.num_blocks = 2,
+		.fn_u = { .ecb = GLUE_FUNC_CAST(camellia_dec_blk_2way) }
+	}, {
+		.num_blocks = 1,
+		.fn_u = { .ecb = GLUE_FUNC_CAST(camellia_dec_blk) }
+	} }
+};
 
-static inline void u128_to_be128(be128 *dst, const u128 *src)
-{
-	dst->a = cpu_to_be64(src->a);
-	dst->b = cpu_to_be64(src->b);
-}
+static const struct common_glue_ctx camellia_dec_cbc = {
+	.num_funcs = 2,
+	.fpu_blocks_limit = -1,
+
+	.funcs = { {
+		.num_blocks = 2,
+		.fn_u = { .cbc = GLUE_CBC_FUNC_CAST(camellia_decrypt_cbc_2way) }
+	}, {
+		.num_blocks = 1,
+		.fn_u = { .cbc = GLUE_CBC_FUNC_CAST(camellia_dec_blk) }
+	} }
+};
 
-static inline void be128_to_u128(u128 *dst, const be128 *src)
+static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
+		       struct scatterlist *src, unsigned int nbytes)
 {
-	dst->a = be64_to_cpu(src->a);
-	dst->b = be64_to_cpu(src->b);
+	return glue_ecb_crypt_128bit(&camellia_enc, desc, dst, src, nbytes);
 }
 
-static inline void u128_inc(u128 *i)
+static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
+		       struct scatterlist *src, unsigned int nbytes)
 {
-	i->b++;
-	if (!i->b)
-		i->a++;
+	return glue_ecb_crypt_128bit(&camellia_dec, desc, dst, src, nbytes);
 }
 
-static void ctr_crypt_final(struct blkcipher_desc *desc,
-			    struct blkcipher_walk *walk)
+static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
+		       struct scatterlist *src, unsigned int nbytes)
 {
-	struct camellia_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
-	u8 keystream[CAMELLIA_BLOCK_SIZE];
-	u8 *src = walk->src.virt.addr;
-	u8 *dst = walk->dst.virt.addr;
-	unsigned int nbytes = walk->nbytes;
-	u128 ctrblk;
-
-	memcpy(keystream, src, nbytes);
-	camellia_enc_blk_xor(ctx, keystream, walk->iv);
-	memcpy(dst, keystream, nbytes);
-
-	be128_to_u128(&ctrblk, (be128 *)walk->iv);
-	u128_inc(&ctrblk);
-	u128_to_be128((be128 *)walk->iv, &ctrblk);
+	return glue_cbc_encrypt_128bit(GLUE_FUNC_CAST(camellia_enc_blk), desc,
+				       dst, src, nbytes);
 }
 
-static unsigned int __ctr_crypt(struct blkcipher_desc *desc,
-				struct blkcipher_walk *walk)
+static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
+		       struct scatterlist *src, unsigned int nbytes)
 {
-	struct camellia_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
-	unsigned int bsize = CAMELLIA_BLOCK_SIZE;
-	unsigned int nbytes = walk->nbytes;
-	u128 *src = (u128 *)walk->src.virt.addr;
-	u128 *dst = (u128 *)walk->dst.virt.addr;
-	u128 ctrblk;
-	be128 ctrblocks[2];
-
-	be128_to_u128(&ctrblk, (be128 *)walk->iv);
-
-	/* Process two block batch */
-	if (nbytes >= bsize * 2) {
-		do {
-			if (dst != src) {
-				dst[0] = src[0];
-				dst[1] = src[1];
-			}
-
-			/* create ctrblks for parallel encrypt */
-			u128_to_be128(&ctrblocks[0], &ctrblk);
-			u128_inc(&ctrblk);
-			u128_to_be128(&ctrblocks[1], &ctrblk);
-			u128_inc(&ctrblk);
-
-			camellia_enc_blk_xor_2way(ctx, (u8 *)dst,
-						 (u8 *)ctrblocks);
-
-			src += 2;
-			dst += 2;
-			nbytes -= bsize * 2;
-		} while (nbytes >= bsize * 2);
-
-		if (nbytes < bsize)
-			goto done;
-	}
-
-	/* Handle leftovers */
-	do {
-		if (dst != src)
-			*dst = *src;
-
-		u128_to_be128(&ctrblocks[0], &ctrblk);
-		u128_inc(&ctrblk);
-
-		camellia_enc_blk_xor(ctx, (u8 *)dst, (u8 *)ctrblocks);
-
-		src += 1;
-		dst += 1;
-		nbytes -= bsize;
-	} while (nbytes >= bsize);
-
-done:
-	u128_to_be128((be128 *)walk->iv, &ctrblk);
-	return nbytes;
+	return glue_cbc_decrypt_128bit(&camellia_dec_cbc, desc, dst, src,
+				       nbytes);
 }
 
 static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst,
 		     struct scatterlist *src, unsigned int nbytes)
 {
-	struct blkcipher_walk walk;
-	int err;
-
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-	err = blkcipher_walk_virt_block(desc, &walk, CAMELLIA_BLOCK_SIZE);
-
-	while ((nbytes = walk.nbytes) >= CAMELLIA_BLOCK_SIZE) {
-		nbytes = __ctr_crypt(desc, &walk);
-		err = blkcipher_walk_done(desc, &walk, nbytes);
-	}
-
-	if (walk.nbytes) {
-		ctr_crypt_final(desc, &walk);
-		err = blkcipher_walk_done(desc, &walk, 0);
-	}
-
-	return err;
+	return glue_ctr_crypt_128bit(&camellia_ctr, desc, dst, src, nbytes);
 }
 
 static void encrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes)
diff --git a/arch/x86/crypto/glue_helper.c b/arch/x86/crypto/glue_helper.c
new file mode 100644
index 0000000000000000000000000000000000000000..4854f0f31e4fa99bc65dc555060e9c913c7e3b70
--- /dev/null
+++ b/arch/x86/crypto/glue_helper.c
@@ -0,0 +1,307 @@
+/*
+ * Shared glue code for 128bit block ciphers
+ *
+ * Copyright (c) 2012 Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
+ *
+ * CBC & ECB parts based on code (crypto/cbc.c,ecb.c) by:
+ *   Copyright (c) 2006 Herbert Xu <herbert@gondor.apana.org.au>
+ * CTR part based on code (crypto/ctr.c) by:
+ *   (C) Copyright IBM Corp. 2007 - Joy Latten <latten@us.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307
+ * USA
+ *
+ */
+
+#include <linux/module.h>
+#include <crypto/b128ops.h>
+#include <crypto/lrw.h>
+#include <crypto/xts.h>
+#include <asm/crypto/glue_helper.h>
+#include <crypto/scatterwalk.h>
+
+static int __glue_ecb_crypt_128bit(const struct common_glue_ctx *gctx,
+				   struct blkcipher_desc *desc,
+				   struct blkcipher_walk *walk)
+{
+	void *ctx = crypto_blkcipher_ctx(desc->tfm);
+	const unsigned int bsize = 128 / 8;
+	unsigned int nbytes, i, func_bytes;
+	bool fpu_enabled = false;
+	int err;
+
+	err = blkcipher_walk_virt(desc, walk);
+
+	while ((nbytes = walk->nbytes)) {
+		u8 *wsrc = walk->src.virt.addr;
+		u8 *wdst = walk->dst.virt.addr;
+
+		fpu_enabled = glue_fpu_begin(bsize, gctx->fpu_blocks_limit,
+					     desc, fpu_enabled, nbytes);
+
+		for (i = 0; i < gctx->num_funcs; i++) {
+			func_bytes = bsize * gctx->funcs[i].num_blocks;
+
+			/* Process multi-block batch */
+			if (nbytes >= func_bytes) {
+				do {
+					gctx->funcs[i].fn_u.ecb(ctx, wdst,
+								wsrc);
+
+					wsrc += func_bytes;
+					wdst += func_bytes;
+					nbytes -= func_bytes;
+				} while (nbytes >= func_bytes);
+
+				if (nbytes < bsize)
+					goto done;
+			}
+		}
+
+done:
+		err = blkcipher_walk_done(desc, walk, nbytes);
+	}
+
+	glue_fpu_end(fpu_enabled);
+	return err;
+}
+
+int glue_ecb_crypt_128bit(const struct common_glue_ctx *gctx,
+			  struct blkcipher_desc *desc, struct scatterlist *dst,
+			  struct scatterlist *src, unsigned int nbytes)
+{
+	struct blkcipher_walk walk;
+
+	blkcipher_walk_init(&walk, dst, src, nbytes);
+	return __glue_ecb_crypt_128bit(gctx, desc, &walk);
+}
+EXPORT_SYMBOL_GPL(glue_ecb_crypt_128bit);
+
+static unsigned int __glue_cbc_encrypt_128bit(const common_glue_func_t fn,
+					      struct blkcipher_desc *desc,
+					      struct blkcipher_walk *walk)
+{
+	void *ctx = crypto_blkcipher_ctx(desc->tfm);
+	const unsigned int bsize = 128 / 8;
+	unsigned int nbytes = walk->nbytes;
+	u128 *src = (u128 *)walk->src.virt.addr;
+	u128 *dst = (u128 *)walk->dst.virt.addr;
+	u128 *iv = (u128 *)walk->iv;
+
+	do {
+		u128_xor(dst, src, iv);
+		fn(ctx, (u8 *)dst, (u8 *)dst);
+		iv = dst;
+
+		src += 1;
+		dst += 1;
+		nbytes -= bsize;
+	} while (nbytes >= bsize);
+
+	u128_xor((u128 *)walk->iv, (u128 *)walk->iv, iv);
+	return nbytes;
+}
+
+int glue_cbc_encrypt_128bit(const common_glue_func_t fn,
+			    struct blkcipher_desc *desc,
+			    struct scatterlist *dst,
+			    struct scatterlist *src, unsigned int nbytes)
+{
+	struct blkcipher_walk walk;
+	int err;
+
+	blkcipher_walk_init(&walk, dst, src, nbytes);
+	err = blkcipher_walk_virt(desc, &walk);
+
+	while ((nbytes = walk.nbytes)) {
+		nbytes = __glue_cbc_encrypt_128bit(fn, desc, &walk);
+		err = blkcipher_walk_done(desc, &walk, nbytes);
+	}
+
+	return err;
+}
+EXPORT_SYMBOL_GPL(glue_cbc_encrypt_128bit);
+
+static unsigned int
+__glue_cbc_decrypt_128bit(const struct common_glue_ctx *gctx,
+			  struct blkcipher_desc *desc,
+			  struct blkcipher_walk *walk)
+{
+	void *ctx = crypto_blkcipher_ctx(desc->tfm);
+	const unsigned int bsize = 128 / 8;
+	unsigned int nbytes = walk->nbytes;
+	u128 *src = (u128 *)walk->src.virt.addr;
+	u128 *dst = (u128 *)walk->dst.virt.addr;
+	u128 last_iv;
+	unsigned int num_blocks, func_bytes;
+	unsigned int i;
+
+	/* Start of the last block. */
+	src += nbytes / bsize - 1;
+	dst += nbytes / bsize - 1;
+
+	last_iv = *src;
+
+	for (i = 0; i < gctx->num_funcs; i++) {
+		num_blocks = gctx->funcs[i].num_blocks;
+		func_bytes = bsize * num_blocks;
+
+		/* Process multi-block batch */
+		if (nbytes >= func_bytes) {
+			do {
+				nbytes -= func_bytes - bsize;
+				src -= num_blocks - 1;
+				dst -= num_blocks - 1;
+
+				gctx->funcs[i].fn_u.cbc(ctx, dst, src);
+
+				nbytes -= bsize;
+				if (nbytes < bsize)
+					goto done;
+
+				u128_xor(dst, dst, src - 1);
+				src -= 1;
+				dst -= 1;
+			} while (nbytes >= func_bytes);
+
+			if (nbytes < bsize)
+				goto done;
+		}
+	}
+
+done:
+	u128_xor(dst, dst, (u128 *)walk->iv);
+	*(u128 *)walk->iv = last_iv;
+
+	return nbytes;
+}
+
+int glue_cbc_decrypt_128bit(const struct common_glue_ctx *gctx,
+			    struct blkcipher_desc *desc,
+			    struct scatterlist *dst,
+			    struct scatterlist *src, unsigned int nbytes)
+{
+	const unsigned int bsize = 128 / 8;
+	bool fpu_enabled = false;
+	struct blkcipher_walk walk;
+	int err;
+
+	blkcipher_walk_init(&walk, dst, src, nbytes);
+	err = blkcipher_walk_virt(desc, &walk);
+
+	while ((nbytes = walk.nbytes)) {
+		fpu_enabled = glue_fpu_begin(bsize, gctx->fpu_blocks_limit,
+					     desc, fpu_enabled, nbytes);
+		nbytes = __glue_cbc_decrypt_128bit(gctx, desc, &walk);
+		err = blkcipher_walk_done(desc, &walk, nbytes);
+	}
+
+	glue_fpu_end(fpu_enabled);
+	return err;
+}
+EXPORT_SYMBOL_GPL(glue_cbc_decrypt_128bit);
+
+static void glue_ctr_crypt_final_128bit(const common_glue_ctr_func_t fn_ctr,
+					struct blkcipher_desc *desc,
+					struct blkcipher_walk *walk)
+{
+	void *ctx = crypto_blkcipher_ctx(desc->tfm);
+	u8 *src = (u8 *)walk->src.virt.addr;
+	u8 *dst = (u8 *)walk->dst.virt.addr;
+	unsigned int nbytes = walk->nbytes;
+	u128 ctrblk;
+	u128 tmp;
+
+	be128_to_u128(&ctrblk, (be128 *)walk->iv);
+
+	memcpy(&tmp, src, nbytes);
+	fn_ctr(ctx, &tmp, &tmp, &ctrblk);
+	memcpy(dst, &tmp, nbytes);
+
+	u128_to_be128((be128 *)walk->iv, &ctrblk);
+}
+EXPORT_SYMBOL_GPL(glue_ctr_crypt_final_128bit);
+
+static unsigned int __glue_ctr_crypt_128bit(const struct common_glue_ctx *gctx,
+					    struct blkcipher_desc *desc,
+					    struct blkcipher_walk *walk)
+{
+	const unsigned int bsize = 128 / 8;
+	void *ctx = crypto_blkcipher_ctx(desc->tfm);
+	unsigned int nbytes = walk->nbytes;
+	u128 *src = (u128 *)walk->src.virt.addr;
+	u128 *dst = (u128 *)walk->dst.virt.addr;
+	u128 ctrblk;
+	unsigned int num_blocks, func_bytes;
+	unsigned int i;
+
+	be128_to_u128(&ctrblk, (be128 *)walk->iv);
+
+	/* Process multi-block batch */
+	for (i = 0; i < gctx->num_funcs; i++) {
+		num_blocks = gctx->funcs[i].num_blocks;
+		func_bytes = bsize * num_blocks;
+
+		if (nbytes >= func_bytes) {
+			do {
+				gctx->funcs[i].fn_u.ctr(ctx, dst, src, &ctrblk);
+
+				src += num_blocks;
+				dst += num_blocks;
+				nbytes -= func_bytes;
+			} while (nbytes >= func_bytes);
+
+			if (nbytes < bsize)
+				goto done;
+		}
+	}
+
+done:
+	u128_to_be128((be128 *)walk->iv, &ctrblk);
+	return nbytes;
+}
+
+int glue_ctr_crypt_128bit(const struct common_glue_ctx *gctx,
+			  struct blkcipher_desc *desc, struct scatterlist *dst,
+			  struct scatterlist *src, unsigned int nbytes)
+{
+	const unsigned int bsize = 128 / 8;
+	bool fpu_enabled = false;
+	struct blkcipher_walk walk;
+	int err;
+
+	blkcipher_walk_init(&walk, dst, src, nbytes);
+	err = blkcipher_walk_virt_block(desc, &walk, bsize);
+
+	while ((nbytes = walk.nbytes) >= bsize) {
+		fpu_enabled = glue_fpu_begin(bsize, gctx->fpu_blocks_limit,
+					     desc, fpu_enabled, nbytes);
+		nbytes = __glue_ctr_crypt_128bit(gctx, desc, &walk);
+		err = blkcipher_walk_done(desc, &walk, nbytes);
+	}
+
+	glue_fpu_end(fpu_enabled);
+
+	if (walk.nbytes) {
+		glue_ctr_crypt_final_128bit(
+			gctx->funcs[gctx->num_funcs - 1].fn_u.ctr, desc, &walk);
+		err = blkcipher_walk_done(desc, &walk, 0);
+	}
+
+	return err;
+}
+EXPORT_SYMBOL_GPL(glue_ctr_crypt_128bit);
+
+MODULE_LICENSE("GPL");
diff --git a/arch/x86/crypto/serpent-avx-x86_64-asm_64.S b/arch/x86/crypto/serpent-avx-x86_64-asm_64.S
new file mode 100644
index 0000000000000000000000000000000000000000..504106bf04a289c2e1d2d1c7de6bf970062e1279
--- /dev/null
+++ b/arch/x86/crypto/serpent-avx-x86_64-asm_64.S
@@ -0,0 +1,704 @@
+/*
+ * Serpent Cipher 8-way parallel algorithm (x86_64/AVX)
+ *
+ * Copyright (C) 2012 Johannes Goetzfried
+ *     <Johannes.Goetzfried@informatik.stud.uni-erlangen.de>
+ *
+ * Based on arch/x86/crypto/serpent-sse2-x86_64-asm_64.S by
+ *  Copyright (C) 2011 Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307
+ * USA
+ *
+ */
+
+.file "serpent-avx-x86_64-asm_64.S"
+.text
+
+#define CTX %rdi
+
+/**********************************************************************
+  8-way AVX serpent
+ **********************************************************************/
+#define RA1 %xmm0
+#define RB1 %xmm1
+#define RC1 %xmm2
+#define RD1 %xmm3
+#define RE1 %xmm4
+
+#define tp  %xmm5
+
+#define RA2 %xmm6
+#define RB2 %xmm7
+#define RC2 %xmm8
+#define RD2 %xmm9
+#define RE2 %xmm10
+
+#define RNOT %xmm11
+
+#define RK0 %xmm12
+#define RK1 %xmm13
+#define RK2 %xmm14
+#define RK3 %xmm15
+
+
+#define S0_1(x0, x1, x2, x3, x4)      \
+	vpor		x0,   x3, tp; \
+	vpxor		x3,   x0, x0; \
+	vpxor		x2,   x3, x4; \
+	vpxor		RNOT, x4, x4; \
+	vpxor		x1,   tp, x3; \
+	vpand		x0,   x1, x1; \
+	vpxor		x4,   x1, x1; \
+	vpxor		x0,   x2, x2;
+#define S0_2(x0, x1, x2, x3, x4)      \
+	vpxor		x3,   x0, x0; \
+	vpor		x0,   x4, x4; \
+	vpxor		x2,   x0, x0; \
+	vpand		x1,   x2, x2; \
+	vpxor		x2,   x3, x3; \
+	vpxor		RNOT, x1, x1; \
+	vpxor		x4,   x2, x2; \
+	vpxor		x2,   x1, x1;
+
+#define S1_1(x0, x1, x2, x3, x4)      \
+	vpxor		x0,   x1, tp; \
+	vpxor		x3,   x0, x0; \
+	vpxor		RNOT, x3, x3; \
+	vpand		tp,   x1, x4; \
+	vpor		tp,   x0, x0; \
+	vpxor		x2,   x3, x3; \
+	vpxor		x3,   x0, x0; \
+	vpxor		x3,   tp, x1;
+#define S1_2(x0, x1, x2, x3, x4)      \
+	vpxor		x4,   x3, x3; \
+	vpor		x4,   x1, x1; \
+	vpxor		x2,   x4, x4; \
+	vpand		x0,   x2, x2; \
+	vpxor		x1,   x2, x2; \
+	vpor		x0,   x1, x1; \
+	vpxor		RNOT, x0, x0; \
+	vpxor		x2,   x0, x0; \
+	vpxor		x1,   x4, x4;
+
+#define S2_1(x0, x1, x2, x3, x4)      \
+	vpxor		RNOT, x3, x3; \
+	vpxor		x0,   x1, x1; \
+	vpand		x2,   x0, tp; \
+	vpxor		x3,   tp, tp; \
+	vpor		x0,   x3, x3; \
+	vpxor		x1,   x2, x2; \
+	vpxor		x1,   x3, x3; \
+	vpand		tp,   x1, x1;
+#define S2_2(x0, x1, x2, x3, x4)      \
+	vpxor		x2,   tp, tp; \
+	vpand		x3,   x2, x2; \
+	vpor		x1,   x3, x3; \
+	vpxor		RNOT, tp, tp; \
+	vpxor		tp,   x3, x3; \
+	vpxor		tp,   x0, x4; \
+	vpxor		x2,   tp, x0; \
+	vpor		x2,   x1, x1;
+
+#define S3_1(x0, x1, x2, x3, x4)      \
+	vpxor		x3,   x1, tp; \
+	vpor		x0,   x3, x3; \
+	vpand		x0,   x1, x4; \
+	vpxor		x2,   x0, x0; \
+	vpxor		tp,   x2, x2; \
+	vpand		x3,   tp, x1; \
+	vpxor		x3,   x2, x2; \
+	vpor		x4,   x0, x0; \
+	vpxor		x3,   x4, x4;
+#define S3_2(x0, x1, x2, x3, x4)      \
+	vpxor		x0,   x1, x1; \
+	vpand		x3,   x0, x0; \
+	vpand		x4,   x3, x3; \
+	vpxor		x2,   x3, x3; \
+	vpor		x1,   x4, x4; \
+	vpand		x1,   x2, x2; \
+	vpxor		x3,   x4, x4; \
+	vpxor		x3,   x0, x0; \
+	vpxor		x2,   x3, x3;
+
+#define S4_1(x0, x1, x2, x3, x4)      \
+	vpand		x0,   x3, tp; \
+	vpxor		x3,   x0, x0; \
+	vpxor		x2,   tp, tp; \
+	vpor		x3,   x2, x2; \
+	vpxor		x1,   x0, x0; \
+	vpxor		tp,   x3, x4; \
+	vpor		x0,   x2, x2; \
+	vpxor		x1,   x2, x2;
+#define S4_2(x0, x1, x2, x3, x4)      \
+	vpand		x0,   x1, x1; \
+	vpxor		x4,   x1, x1; \
+	vpand		x2,   x4, x4; \
+	vpxor		tp,   x2, x2; \
+	vpxor		x0,   x4, x4; \
+	vpor		x1,   tp, x3; \
+	vpxor		RNOT, x1, x1; \
+	vpxor		x0,   x3, x3;
+
+#define S5_1(x0, x1, x2, x3, x4)      \
+	vpor		x0,   x1, tp; \
+	vpxor		tp,   x2, x2; \
+	vpxor		RNOT, x3, x3; \
+	vpxor		x0,   x1, x4; \
+	vpxor		x2,   x0, x0; \
+	vpand		x4,   tp, x1; \
+	vpor		x3,   x4, x4; \
+	vpxor		x0,   x4, x4;
+#define S5_2(x0, x1, x2, x3, x4)      \
+	vpand		x3,   x0, x0; \
+	vpxor		x3,   x1, x1; \
+	vpxor		x2,   x3, x3; \
+	vpxor		x1,   x0, x0; \
+	vpand		x4,   x2, x2; \
+	vpxor		x2,   x1, x1; \
+	vpand		x0,   x2, x2; \
+	vpxor		x2,   x3, x3;
+
+#define S6_1(x0, x1, x2, x3, x4)      \
+	vpxor		x0,   x3, x3; \
+	vpxor		x2,   x1, tp; \
+	vpxor		x0,   x2, x2; \
+	vpand		x3,   x0, x0; \
+	vpor		x3,   tp, tp; \
+	vpxor		RNOT, x1, x4; \
+	vpxor		tp,   x0, x0; \
+	vpxor		x2,   tp, x1;
+#define S6_2(x0, x1, x2, x3, x4)      \
+	vpxor		x4,   x3, x3; \
+	vpxor		x0,   x4, x4; \
+	vpand		x0,   x2, x2; \
+	vpxor		x1,   x4, x4; \
+	vpxor		x3,   x2, x2; \
+	vpand		x1,   x3, x3; \
+	vpxor		x0,   x3, x3; \
+	vpxor		x2,   x1, x1;
+
+#define S7_1(x0, x1, x2, x3, x4)      \
+	vpxor		RNOT, x1, tp; \
+	vpxor		RNOT, x0, x0; \
+	vpand		x2,   tp, x1; \
+	vpxor		x3,   x1, x1; \
+	vpor		tp,   x3, x3; \
+	vpxor		x2,   tp, x4; \
+	vpxor		x3,   x2, x2; \
+	vpxor		x0,   x3, x3; \
+	vpor		x1,   x0, x0;
+#define S7_2(x0, x1, x2, x3, x4)      \
+	vpand		x0,   x2, x2; \
+	vpxor		x4,   x0, x0; \
+	vpxor		x3,   x4, x4; \
+	vpand		x0,   x3, x3; \
+	vpxor		x1,   x4, x4; \
+	vpxor		x4,   x2, x2; \
+	vpxor		x1,   x3, x3; \
+	vpor		x0,   x4, x4; \
+	vpxor		x1,   x4, x4;
+
+#define SI0_1(x0, x1, x2, x3, x4)     \
+	vpxor		x0,   x1, x1; \
+	vpor		x1,   x3, tp; \
+	vpxor		x1,   x3, x4; \
+	vpxor		RNOT, x0, x0; \
+	vpxor		tp,   x2, x2; \
+	vpxor		x0,   tp, x3; \
+	vpand		x1,   x0, x0; \
+	vpxor		x2,   x0, x0;
+#define SI0_2(x0, x1, x2, x3, x4)     \
+	vpand		x3,   x2, x2; \
+	vpxor		x4,   x3, x3; \
+	vpxor		x3,   x2, x2; \
+	vpxor		x3,   x1, x1; \
+	vpand		x0,   x3, x3; \
+	vpxor		x0,   x1, x1; \
+	vpxor		x2,   x0, x0; \
+	vpxor		x3,   x4, x4;
+
+#define SI1_1(x0, x1, x2, x3, x4)     \
+	vpxor		x3,   x1, x1; \
+	vpxor		x2,   x0, tp; \
+	vpxor		RNOT, x2, x2; \
+	vpor		x1,   x0, x4; \
+	vpxor		x3,   x4, x4; \
+	vpand		x1,   x3, x3; \
+	vpxor		x2,   x1, x1; \
+	vpand		x4,   x2, x2;
+#define SI1_2(x0, x1, x2, x3, x4)     \
+	vpxor		x1,   x4, x4; \
+	vpor		x3,   x1, x1; \
+	vpxor		tp,   x3, x3; \
+	vpxor		tp,   x2, x2; \
+	vpor		x4,   tp, x0; \
+	vpxor		x4,   x2, x2; \
+	vpxor		x0,   x1, x1; \
+	vpxor		x1,   x4, x4;
+
+#define SI2_1(x0, x1, x2, x3, x4)     \
+	vpxor		x1,   x2, x2; \
+	vpxor		RNOT, x3, tp; \
+	vpor		x2,   tp, tp; \
+	vpxor		x3,   x2, x2; \
+	vpxor		x0,   x3, x4; \
+	vpxor		x1,   tp, x3; \
+	vpor		x2,   x1, x1; \
+	vpxor		x0,   x2, x2;
+#define SI2_2(x0, x1, x2, x3, x4)     \
+	vpxor		x4,   x1, x1; \
+	vpor		x3,   x4, x4; \
+	vpxor		x3,   x2, x2; \
+	vpxor		x2,   x4, x4; \
+	vpand		x1,   x2, x2; \
+	vpxor		x3,   x2, x2; \
+	vpxor		x4,   x3, x3; \
+	vpxor		x0,   x4, x4;
+
+#define SI3_1(x0, x1, x2, x3, x4)     \
+	vpxor		x1,   x2, x2; \
+	vpand		x2,   x1, tp; \
+	vpxor		x0,   tp, tp; \
+	vpor		x1,   x0, x0; \
+	vpxor		x3,   x1, x4; \
+	vpxor		x3,   x0, x0; \
+	vpor		tp,   x3, x3; \
+	vpxor		x2,   tp, x1;
+#define SI3_2(x0, x1, x2, x3, x4)     \
+	vpxor		x3,   x1, x1; \
+	vpxor		x2,   x0, x0; \
+	vpxor		x3,   x2, x2; \
+	vpand		x1,   x3, x3; \
+	vpxor		x0,   x1, x1; \
+	vpand		x2,   x0, x0; \
+	vpxor		x3,   x4, x4; \
+	vpxor		x0,   x3, x3; \
+	vpxor		x1,   x0, x0;
+
+#define SI4_1(x0, x1, x2, x3, x4)     \
+	vpxor		x3,   x2, x2; \
+	vpand		x1,   x0, tp; \
+	vpxor		x2,   tp, tp; \
+	vpor		x3,   x2, x2; \
+	vpxor		RNOT, x0, x4; \
+	vpxor		tp,   x1, x1; \
+	vpxor		x2,   tp, x0; \
+	vpand		x4,   x2, x2;
+#define SI4_2(x0, x1, x2, x3, x4)     \
+	vpxor		x0,   x2, x2; \
+	vpor		x4,   x0, x0; \
+	vpxor		x3,   x0, x0; \
+	vpand		x2,   x3, x3; \
+	vpxor		x3,   x4, x4; \
+	vpxor		x1,   x3, x3; \
+	vpand		x0,   x1, x1; \
+	vpxor		x1,   x4, x4; \
+	vpxor		x3,   x0, x0;
+
+#define SI5_1(x0, x1, x2, x3, x4)     \
+	vpor		x2,   x1, tp; \
+	vpxor		x1,   x2, x2; \
+	vpxor		x3,   tp, tp; \
+	vpand		x1,   x3, x3; \
+	vpxor		x3,   x2, x2; \
+	vpor		x0,   x3, x3; \
+	vpxor		RNOT, x0, x0; \
+	vpxor		x2,   x3, x3; \
+	vpor		x0,   x2, x2;
+#define SI5_2(x0, x1, x2, x3, x4)     \
+	vpxor		tp,   x1, x4; \
+	vpxor		x4,   x2, x2; \
+	vpand		x0,   x4, x4; \
+	vpxor		tp,   x0, x0; \
+	vpxor		x3,   tp, x1; \
+	vpand		x2,   x0, x0; \
+	vpxor		x3,   x2, x2; \
+	vpxor		x2,   x0, x0; \
+	vpxor		x4,   x2, x2; \
+	vpxor		x3,   x4, x4;
+
+#define SI6_1(x0, x1, x2, x3, x4)     \
+	vpxor		x2,   x0, x0; \
+	vpand		x3,   x0, tp; \
+	vpxor		x3,   x2, x2; \
+	vpxor		x2,   tp, tp; \
+	vpxor		x1,   x3, x3; \
+	vpor		x0,   x2, x2; \
+	vpxor		x3,   x2, x2; \
+	vpand		tp,   x3, x3;
+#define SI6_2(x0, x1, x2, x3, x4)     \
+	vpxor		RNOT, tp, tp; \
+	vpxor		x1,   x3, x3; \
+	vpand		x2,   x1, x1; \
+	vpxor		tp,   x0, x4; \
+	vpxor		x4,   x3, x3; \
+	vpxor		x2,   x4, x4; \
+	vpxor		x1,   tp, x0; \
+	vpxor		x0,   x2, x2;
+
+#define SI7_1(x0, x1, x2, x3, x4)     \
+	vpand		x0,   x3, tp; \
+	vpxor		x2,   x0, x0; \
+	vpor		x3,   x2, x2; \
+	vpxor		x1,   x3, x4; \
+	vpxor		RNOT, x0, x0; \
+	vpor		tp,   x1, x1; \
+	vpxor		x0,   x4, x4; \
+	vpand		x2,   x0, x0; \
+	vpxor		x1,   x0, x0;
+#define SI7_2(x0, x1, x2, x3, x4)     \
+	vpand		x2,   x1, x1; \
+	vpxor		x2,   tp, x3; \
+	vpxor		x3,   x4, x4; \
+	vpand		x3,   x2, x2; \
+	vpor		x0,   x3, x3; \
+	vpxor		x4,   x1, x1; \
+	vpxor		x4,   x3, x3; \
+	vpand		x0,   x4, x4; \
+	vpxor		x2,   x4, x4;
+
+#define get_key(i, j, t) \
+	vbroadcastss (4*(i)+(j))*4(CTX), t;
+
+#define K2(x0, x1, x2, x3, x4, i) \
+	get_key(i, 0, RK0); \
+	get_key(i, 1, RK1); \
+	get_key(i, 2, RK2); \
+	get_key(i, 3, RK3); \
+	vpxor RK0,	x0 ## 1, x0 ## 1; \
+	vpxor RK1,	x1 ## 1, x1 ## 1; \
+	vpxor RK2,	x2 ## 1, x2 ## 1; \
+	vpxor RK3,	x3 ## 1, x3 ## 1; \
+		vpxor RK0,	x0 ## 2, x0 ## 2; \
+		vpxor RK1,	x1 ## 2, x1 ## 2; \
+		vpxor RK2,	x2 ## 2, x2 ## 2; \
+		vpxor RK3,	x3 ## 2, x3 ## 2;
+
+#define LK2(x0, x1, x2, x3, x4, i) \
+	vpslld $13,		x0 ## 1, x4 ## 1;          \
+	vpsrld $(32 - 13),	x0 ## 1, x0 ## 1;          \
+	vpor			x4 ## 1, x0 ## 1, x0 ## 1; \
+	vpxor			x0 ## 1, x1 ## 1, x1 ## 1; \
+	vpslld $3,		x2 ## 1, x4 ## 1;          \
+	vpsrld $(32 - 3),	x2 ## 1, x2 ## 1;          \
+	vpor			x4 ## 1, x2 ## 1, x2 ## 1; \
+	vpxor			x2 ## 1, x1 ## 1, x1 ## 1; \
+		vpslld $13,		x0 ## 2, x4 ## 2;          \
+		vpsrld $(32 - 13),	x0 ## 2, x0 ## 2;          \
+		vpor			x4 ## 2, x0 ## 2, x0 ## 2; \
+		vpxor			x0 ## 2, x1 ## 2, x1 ## 2; \
+		vpslld $3,		x2 ## 2, x4 ## 2;          \
+		vpsrld $(32 - 3),	x2 ## 2, x2 ## 2;          \
+		vpor			x4 ## 2, x2 ## 2, x2 ## 2; \
+		vpxor			x2 ## 2, x1 ## 2, x1 ## 2; \
+	vpslld $1,		x1 ## 1, x4 ## 1;          \
+	vpsrld $(32 - 1),	x1 ## 1, x1 ## 1;          \
+	vpor			x4 ## 1, x1 ## 1, x1 ## 1; \
+	vpslld $3,		x0 ## 1, x4 ## 1;          \
+	vpxor			x2 ## 1, x3 ## 1, x3 ## 1; \
+	vpxor			x4 ## 1, x3 ## 1, x3 ## 1; \
+	get_key(i, 1, RK1); \
+		vpslld $1,		x1 ## 2, x4 ## 2;          \
+		vpsrld $(32 - 1),	x1 ## 2, x1 ## 2;          \
+		vpor			x4 ## 2, x1 ## 2, x1 ## 2; \
+		vpslld $3,		x0 ## 2, x4 ## 2;          \
+		vpxor			x2 ## 2, x3 ## 2, x3 ## 2; \
+		vpxor			x4 ## 2, x3 ## 2, x3 ## 2; \
+		get_key(i, 3, RK3); \
+	vpslld $7,		x3 ## 1, x4 ## 1;          \
+	vpsrld $(32 - 7),	x3 ## 1, x3 ## 1;          \
+	vpor			x4 ## 1, x3 ## 1, x3 ## 1; \
+	vpslld $7,		x1 ## 1, x4 ## 1;          \
+	vpxor			x1 ## 1, x0 ## 1, x0 ## 1; \
+	vpxor			x3 ## 1, x0 ## 1, x0 ## 1; \
+	vpxor			x3 ## 1, x2 ## 1, x2 ## 1; \
+	vpxor			x4 ## 1, x2 ## 1, x2 ## 1; \
+	get_key(i, 0, RK0); \
+		vpslld $7,		x3 ## 2, x4 ## 2;          \
+		vpsrld $(32 - 7),	x3 ## 2, x3 ## 2;          \
+		vpor			x4 ## 2, x3 ## 2, x3 ## 2; \
+		vpslld $7,		x1 ## 2, x4 ## 2;          \
+		vpxor			x1 ## 2, x0 ## 2, x0 ## 2; \
+		vpxor			x3 ## 2, x0 ## 2, x0 ## 2; \
+		vpxor			x3 ## 2, x2 ## 2, x2 ## 2; \
+		vpxor			x4 ## 2, x2 ## 2, x2 ## 2; \
+		get_key(i, 2, RK2); \
+	vpxor			RK1, x1 ## 1, x1 ## 1;     \
+	vpxor			RK3, x3 ## 1, x3 ## 1;     \
+	vpslld $5,		x0 ## 1, x4 ## 1;          \
+	vpsrld $(32 - 5),	x0 ## 1, x0 ## 1;          \
+	vpor			x4 ## 1, x0 ## 1, x0 ## 1; \
+	vpslld $22,		x2 ## 1, x4 ## 1;          \
+	vpsrld $(32 - 22),	x2 ## 1, x2 ## 1;          \
+	vpor			x4 ## 1, x2 ## 1, x2 ## 1; \
+	vpxor			RK0, x0 ## 1, x0 ## 1;     \
+	vpxor			RK2, x2 ## 1, x2 ## 1;     \
+		vpxor			RK1, x1 ## 2, x1 ## 2;     \
+		vpxor			RK3, x3 ## 2, x3 ## 2;     \
+		vpslld $5,		x0 ## 2, x4 ## 2;          \
+		vpsrld $(32 - 5),	x0 ## 2, x0 ## 2;          \
+		vpor			x4 ## 2, x0 ## 2, x0 ## 2; \
+		vpslld $22,		x2 ## 2, x4 ## 2;          \
+		vpsrld $(32 - 22),	x2 ## 2, x2 ## 2;          \
+		vpor			x4 ## 2, x2 ## 2, x2 ## 2; \
+		vpxor			RK0, x0 ## 2, x0 ## 2;     \
+		vpxor			RK2, x2 ## 2, x2 ## 2;
+
+#define KL2(x0, x1, x2, x3, x4, i) \
+	vpxor			RK0, x0 ## 1, x0 ## 1;     \
+	vpxor			RK2, x2 ## 1, x2 ## 1;     \
+	vpsrld $5,		x0 ## 1, x4 ## 1;          \
+	vpslld $(32 - 5),	x0 ## 1, x0 ## 1;          \
+	vpor			x4 ## 1, x0 ## 1, x0 ## 1; \
+	vpxor			RK3, x3 ## 1, x3 ## 1;     \
+	vpxor			RK1, x1 ## 1, x1 ## 1;     \
+	vpsrld $22,		x2 ## 1, x4 ## 1;          \
+	vpslld $(32 - 22),	x2 ## 1, x2 ## 1;          \
+	vpor			x4 ## 1, x2 ## 1, x2 ## 1; \
+	vpxor			x3 ## 1, x2 ## 1, x2 ## 1; \
+		vpxor			RK0, x0 ## 2, x0 ## 2;     \
+		vpxor			RK2, x2 ## 2, x2 ## 2;     \
+		vpsrld $5,		x0 ## 2, x4 ## 2;          \
+		vpslld $(32 - 5),	x0 ## 2, x0 ## 2;          \
+		vpor			x4 ## 2, x0 ## 2, x0 ## 2; \
+		vpxor			RK3, x3 ## 2, x3 ## 2;     \
+		vpxor			RK1, x1 ## 2, x1 ## 2;     \
+		vpsrld $22,		x2 ## 2, x4 ## 2;          \
+		vpslld $(32 - 22),	x2 ## 2, x2 ## 2;          \
+		vpor			x4 ## 2, x2 ## 2, x2 ## 2; \
+		vpxor			x3 ## 2, x2 ## 2, x2 ## 2; \
+	vpxor			x3 ## 1, x0 ## 1, x0 ## 1; \
+	vpslld $7,		x1 ## 1, x4 ## 1;          \
+	vpxor			x1 ## 1, x0 ## 1, x0 ## 1; \
+	vpxor			x4 ## 1, x2 ## 1, x2 ## 1; \
+	vpsrld $1,		x1 ## 1, x4 ## 1;          \
+	vpslld $(32 - 1),	x1 ## 1, x1 ## 1;          \
+	vpor			x4 ## 1, x1 ## 1, x1 ## 1; \
+		vpxor			x3 ## 2, x0 ## 2, x0 ## 2; \
+		vpslld $7,		x1 ## 2, x4 ## 2;          \
+		vpxor			x1 ## 2, x0 ## 2, x0 ## 2; \
+		vpxor			x4 ## 2, x2 ## 2, x2 ## 2; \
+		vpsrld $1,		x1 ## 2, x4 ## 2;          \
+		vpslld $(32 - 1),	x1 ## 2, x1 ## 2;          \
+		vpor			x4 ## 2, x1 ## 2, x1 ## 2; \
+	vpsrld $7,		x3 ## 1, x4 ## 1;          \
+	vpslld $(32 - 7),	x3 ## 1, x3 ## 1;          \
+	vpor			x4 ## 1, x3 ## 1, x3 ## 1; \
+	vpxor			x0 ## 1, x1 ## 1, x1 ## 1; \
+	vpslld $3,		x0 ## 1, x4 ## 1;          \
+	vpxor			x4 ## 1, x3 ## 1, x3 ## 1; \
+		vpsrld $7,		x3 ## 2, x4 ## 2;          \
+		vpslld $(32 - 7),	x3 ## 2, x3 ## 2;          \
+		vpor			x4 ## 2, x3 ## 2, x3 ## 2; \
+		vpxor			x0 ## 2, x1 ## 2, x1 ## 2; \
+		vpslld $3,		x0 ## 2, x4 ## 2;          \
+		vpxor			x4 ## 2, x3 ## 2, x3 ## 2; \
+	vpsrld $13,		x0 ## 1, x4 ## 1;          \
+	vpslld $(32 - 13),	x0 ## 1, x0 ## 1;          \
+	vpor			x4 ## 1, x0 ## 1, x0 ## 1; \
+	vpxor			x2 ## 1, x1 ## 1, x1 ## 1; \
+	vpxor			x2 ## 1, x3 ## 1, x3 ## 1; \
+	vpsrld $3,		x2 ## 1, x4 ## 1;          \
+	vpslld $(32 - 3),	x2 ## 1, x2 ## 1;          \
+	vpor			x4 ## 1, x2 ## 1, x2 ## 1; \
+		vpsrld $13,		x0 ## 2, x4 ## 2;          \
+		vpslld $(32 - 13),	x0 ## 2, x0 ## 2;          \
+		vpor			x4 ## 2, x0 ## 2, x0 ## 2; \
+		vpxor			x2 ## 2, x1 ## 2, x1 ## 2; \
+		vpxor			x2 ## 2, x3 ## 2, x3 ## 2; \
+		vpsrld $3,		x2 ## 2, x4 ## 2;          \
+		vpslld $(32 - 3),	x2 ## 2, x2 ## 2;          \
+		vpor			x4 ## 2, x2 ## 2, x2 ## 2;
+
+#define S(SBOX, x0, x1, x2, x3, x4) \
+	SBOX ## _1(x0 ## 1, x1 ## 1, x2 ## 1, x3 ## 1, x4 ## 1); \
+	SBOX ## _2(x0 ## 1, x1 ## 1, x2 ## 1, x3 ## 1, x4 ## 1); \
+	SBOX ## _1(x0 ## 2, x1 ## 2, x2 ## 2, x3 ## 2, x4 ## 2); \
+	SBOX ## _2(x0 ## 2, x1 ## 2, x2 ## 2, x3 ## 2, x4 ## 2);
+
+#define SP(SBOX, x0, x1, x2, x3, x4, i) \
+	get_key(i, 0, RK0); \
+	SBOX ## _1(x0 ## 1, x1 ## 1, x2 ## 1, x3 ## 1, x4 ## 1); \
+	get_key(i, 2, RK2); \
+	SBOX ## _2(x0 ## 1, x1 ## 1, x2 ## 1, x3 ## 1, x4 ## 1); \
+	get_key(i, 3, RK3); \
+	SBOX ## _1(x0 ## 2, x1 ## 2, x2 ## 2, x3 ## 2, x4 ## 2); \
+	get_key(i, 1, RK1); \
+	SBOX ## _2(x0 ## 2, x1 ## 2, x2 ## 2, x3 ## 2, x4 ## 2); \
+
+#define transpose_4x4(x0, x1, x2, x3, t0, t1, t2) \
+	vpunpckldq		x1, x0, t0; \
+	vpunpckhdq		x1, x0, t2; \
+	vpunpckldq		x3, x2, t1; \
+	vpunpckhdq		x3, x2, x3; \
+	\
+	vpunpcklqdq		t1, t0, x0; \
+	vpunpckhqdq		t1, t0, x1; \
+	vpunpcklqdq		x3, t2, x2; \
+	vpunpckhqdq		x3, t2, x3;
+
+#define read_blocks(in, x0, x1, x2, x3, t0, t1, t2) \
+	vmovdqu (0*4*4)(in),	x0; \
+	vmovdqu (1*4*4)(in),	x1; \
+	vmovdqu (2*4*4)(in),	x2; \
+	vmovdqu (3*4*4)(in),	x3; \
+	\
+	transpose_4x4(x0, x1, x2, x3, t0, t1, t2)
+
+#define write_blocks(out, x0, x1, x2, x3, t0, t1, t2) \
+	transpose_4x4(x0, x1, x2, x3, t0, t1, t2) \
+	\
+	vmovdqu x0,		(0*4*4)(out); \
+	vmovdqu x1,		(1*4*4)(out); \
+	vmovdqu x2,		(2*4*4)(out); \
+	vmovdqu x3,		(3*4*4)(out);
+
+#define xor_blocks(out, x0, x1, x2, x3, t0, t1, t2) \
+	transpose_4x4(x0, x1, x2, x3, t0, t1, t2) \
+	\
+	vpxor (0*4*4)(out),	x0, x0;       \
+	vmovdqu x0,		(0*4*4)(out); \
+	vpxor (1*4*4)(out),	x1, x1;       \
+	vmovdqu x1,		(1*4*4)(out); \
+	vpxor (2*4*4)(out),	x2, x2;       \
+	vmovdqu x2,		(2*4*4)(out); \
+	vpxor (3*4*4)(out),	x3, x3;       \
+	vmovdqu x3,		(3*4*4)(out);
+
+.align 8
+.global __serpent_enc_blk_8way_avx
+.type   __serpent_enc_blk_8way_avx,@function;
+
+__serpent_enc_blk_8way_avx:
+	/* input:
+	 *	%rdi: ctx, CTX
+	 *	%rsi: dst
+	 *	%rdx: src
+	 *	%rcx: bool, if true: xor output
+	 */
+
+	vpcmpeqd RNOT, RNOT, RNOT;
+
+	leaq (4*4*4)(%rdx), %rax;
+	read_blocks(%rdx, RA1, RB1, RC1, RD1, RK0, RK1, RK2);
+	read_blocks(%rax, RA2, RB2, RC2, RD2, RK0, RK1, RK2);
+
+						 K2(RA, RB, RC, RD, RE, 0);
+	S(S0, RA, RB, RC, RD, RE);		LK2(RC, RB, RD, RA, RE, 1);
+	S(S1, RC, RB, RD, RA, RE);		LK2(RE, RD, RA, RC, RB, 2);
+	S(S2, RE, RD, RA, RC, RB);		LK2(RB, RD, RE, RC, RA, 3);
+	S(S3, RB, RD, RE, RC, RA);		LK2(RC, RA, RD, RB, RE, 4);
+	S(S4, RC, RA, RD, RB, RE);		LK2(RA, RD, RB, RE, RC, 5);
+	S(S5, RA, RD, RB, RE, RC);		LK2(RC, RA, RD, RE, RB, 6);
+	S(S6, RC, RA, RD, RE, RB);		LK2(RD, RB, RA, RE, RC, 7);
+	S(S7, RD, RB, RA, RE, RC);		LK2(RC, RA, RE, RD, RB, 8);
+	S(S0, RC, RA, RE, RD, RB);		LK2(RE, RA, RD, RC, RB, 9);
+	S(S1, RE, RA, RD, RC, RB);		LK2(RB, RD, RC, RE, RA, 10);
+	S(S2, RB, RD, RC, RE, RA);		LK2(RA, RD, RB, RE, RC, 11);
+	S(S3, RA, RD, RB, RE, RC);		LK2(RE, RC, RD, RA, RB, 12);
+	S(S4, RE, RC, RD, RA, RB);		LK2(RC, RD, RA, RB, RE, 13);
+	S(S5, RC, RD, RA, RB, RE);		LK2(RE, RC, RD, RB, RA, 14);
+	S(S6, RE, RC, RD, RB, RA);		LK2(RD, RA, RC, RB, RE, 15);
+	S(S7, RD, RA, RC, RB, RE);		LK2(RE, RC, RB, RD, RA, 16);
+	S(S0, RE, RC, RB, RD, RA);		LK2(RB, RC, RD, RE, RA, 17);
+	S(S1, RB, RC, RD, RE, RA);		LK2(RA, RD, RE, RB, RC, 18);
+	S(S2, RA, RD, RE, RB, RC);		LK2(RC, RD, RA, RB, RE, 19);
+	S(S3, RC, RD, RA, RB, RE);		LK2(RB, RE, RD, RC, RA, 20);
+	S(S4, RB, RE, RD, RC, RA);		LK2(RE, RD, RC, RA, RB, 21);
+	S(S5, RE, RD, RC, RA, RB);		LK2(RB, RE, RD, RA, RC, 22);
+	S(S6, RB, RE, RD, RA, RC);		LK2(RD, RC, RE, RA, RB, 23);
+	S(S7, RD, RC, RE, RA, RB);		LK2(RB, RE, RA, RD, RC, 24);
+	S(S0, RB, RE, RA, RD, RC);		LK2(RA, RE, RD, RB, RC, 25);
+	S(S1, RA, RE, RD, RB, RC);		LK2(RC, RD, RB, RA, RE, 26);
+	S(S2, RC, RD, RB, RA, RE);		LK2(RE, RD, RC, RA, RB, 27);
+	S(S3, RE, RD, RC, RA, RB);		LK2(RA, RB, RD, RE, RC, 28);
+	S(S4, RA, RB, RD, RE, RC);		LK2(RB, RD, RE, RC, RA, 29);
+	S(S5, RB, RD, RE, RC, RA);		LK2(RA, RB, RD, RC, RE, 30);
+	S(S6, RA, RB, RD, RC, RE);		LK2(RD, RE, RB, RC, RA, 31);
+	S(S7, RD, RE, RB, RC, RA);		 K2(RA, RB, RC, RD, RE, 32);
+
+	leaq (4*4*4)(%rsi), %rax;
+
+	testb %cl, %cl;
+	jnz __enc_xor8;
+
+	write_blocks(%rsi, RA1, RB1, RC1, RD1, RK0, RK1, RK2);
+	write_blocks(%rax, RA2, RB2, RC2, RD2, RK0, RK1, RK2);
+
+	ret;
+
+__enc_xor8:
+	xor_blocks(%rsi, RA1, RB1, RC1, RD1, RK0, RK1, RK2);
+	xor_blocks(%rax, RA2, RB2, RC2, RD2, RK0, RK1, RK2);
+
+	ret;
+
+.align 8
+.global serpent_dec_blk_8way_avx
+.type   serpent_dec_blk_8way_avx,@function;
+
+serpent_dec_blk_8way_avx:
+	/* input:
+	 *	%rdi: ctx, CTX
+	 *	%rsi: dst
+	 *	%rdx: src
+	 */
+
+	vpcmpeqd RNOT, RNOT, RNOT;
+
+	leaq (4*4*4)(%rdx), %rax;
+	read_blocks(%rdx, RA1, RB1, RC1, RD1, RK0, RK1, RK2);
+	read_blocks(%rax, RA2, RB2, RC2, RD2, RK0, RK1, RK2);
+
+						 K2(RA, RB, RC, RD, RE, 32);
+	SP(SI7, RA, RB, RC, RD, RE, 31);	KL2(RB, RD, RA, RE, RC, 31);
+	SP(SI6, RB, RD, RA, RE, RC, 30);	KL2(RA, RC, RE, RB, RD, 30);
+	SP(SI5, RA, RC, RE, RB, RD, 29);	KL2(RC, RD, RA, RE, RB, 29);
+	SP(SI4, RC, RD, RA, RE, RB, 28);	KL2(RC, RA, RB, RE, RD, 28);
+	SP(SI3, RC, RA, RB, RE, RD, 27);	KL2(RB, RC, RD, RE, RA, 27);
+	SP(SI2, RB, RC, RD, RE, RA, 26);	KL2(RC, RA, RE, RD, RB, 26);
+	SP(SI1, RC, RA, RE, RD, RB, 25);	KL2(RB, RA, RE, RD, RC, 25);
+	SP(SI0, RB, RA, RE, RD, RC, 24);	KL2(RE, RC, RA, RB, RD, 24);
+	SP(SI7, RE, RC, RA, RB, RD, 23);	KL2(RC, RB, RE, RD, RA, 23);
+	SP(SI6, RC, RB, RE, RD, RA, 22);	KL2(RE, RA, RD, RC, RB, 22);
+	SP(SI5, RE, RA, RD, RC, RB, 21);	KL2(RA, RB, RE, RD, RC, 21);
+	SP(SI4, RA, RB, RE, RD, RC, 20);	KL2(RA, RE, RC, RD, RB, 20);
+	SP(SI3, RA, RE, RC, RD, RB, 19);	KL2(RC, RA, RB, RD, RE, 19);
+	SP(SI2, RC, RA, RB, RD, RE, 18);	KL2(RA, RE, RD, RB, RC, 18);
+	SP(SI1, RA, RE, RD, RB, RC, 17);	KL2(RC, RE, RD, RB, RA, 17);
+	SP(SI0, RC, RE, RD, RB, RA, 16);	KL2(RD, RA, RE, RC, RB, 16);
+	SP(SI7, RD, RA, RE, RC, RB, 15);	KL2(RA, RC, RD, RB, RE, 15);
+	SP(SI6, RA, RC, RD, RB, RE, 14);	KL2(RD, RE, RB, RA, RC, 14);
+	SP(SI5, RD, RE, RB, RA, RC, 13);	KL2(RE, RC, RD, RB, RA, 13);
+	SP(SI4, RE, RC, RD, RB, RA, 12);	KL2(RE, RD, RA, RB, RC, 12);
+	SP(SI3, RE, RD, RA, RB, RC, 11);	KL2(RA, RE, RC, RB, RD, 11);
+	SP(SI2, RA, RE, RC, RB, RD, 10);	KL2(RE, RD, RB, RC, RA, 10);
+	SP(SI1, RE, RD, RB, RC, RA, 9);		KL2(RA, RD, RB, RC, RE, 9);
+	SP(SI0, RA, RD, RB, RC, RE, 8);		KL2(RB, RE, RD, RA, RC, 8);
+	SP(SI7, RB, RE, RD, RA, RC, 7);		KL2(RE, RA, RB, RC, RD, 7);
+	SP(SI6, RE, RA, RB, RC, RD, 6);		KL2(RB, RD, RC, RE, RA, 6);
+	SP(SI5, RB, RD, RC, RE, RA, 5);		KL2(RD, RA, RB, RC, RE, 5);
+	SP(SI4, RD, RA, RB, RC, RE, 4);		KL2(RD, RB, RE, RC, RA, 4);
+	SP(SI3, RD, RB, RE, RC, RA, 3);		KL2(RE, RD, RA, RC, RB, 3);
+	SP(SI2, RE, RD, RA, RC, RB, 2);		KL2(RD, RB, RC, RA, RE, 2);
+	SP(SI1, RD, RB, RC, RA, RE, 1);		KL2(RE, RB, RC, RA, RD, 1);
+	S(SI0, RE, RB, RC, RA, RD);		 K2(RC, RD, RB, RE, RA, 0);
+
+	leaq (4*4*4)(%rsi), %rax;
+	write_blocks(%rsi, RC1, RD1, RB1, RE1, RK0, RK1, RK2);
+	write_blocks(%rax, RC2, RD2, RB2, RE2, RK0, RK1, RK2);
+
+	ret;
diff --git a/arch/x86/crypto/serpent_avx_glue.c b/arch/x86/crypto/serpent_avx_glue.c
new file mode 100644
index 0000000000000000000000000000000000000000..b36bdac237eb0ee736f93ca09b62c14defabde36
--- /dev/null
+++ b/arch/x86/crypto/serpent_avx_glue.c
@@ -0,0 +1,636 @@
+/*
+ * Glue Code for AVX assembler versions of Serpent Cipher
+ *
+ * Copyright (C) 2012 Johannes Goetzfried
+ *     <Johannes.Goetzfried@informatik.stud.uni-erlangen.de>
+ *
+ * Glue code based on serpent_sse2_glue.c by:
+ *  Copyright (C) 2011 Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307
+ * USA
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/hardirq.h>
+#include <linux/types.h>
+#include <linux/crypto.h>
+#include <linux/err.h>
+#include <crypto/algapi.h>
+#include <crypto/serpent.h>
+#include <crypto/cryptd.h>
+#include <crypto/b128ops.h>
+#include <crypto/ctr.h>
+#include <crypto/lrw.h>
+#include <crypto/xts.h>
+#include <asm/xcr.h>
+#include <asm/xsave.h>
+#include <asm/crypto/serpent-avx.h>
+#include <asm/crypto/ablk_helper.h>
+#include <asm/crypto/glue_helper.h>
+
+static void serpent_decrypt_cbc_xway(void *ctx, u128 *dst, const u128 *src)
+{
+	u128 ivs[SERPENT_PARALLEL_BLOCKS - 1];
+	unsigned int j;
+
+	for (j = 0; j < SERPENT_PARALLEL_BLOCKS - 1; j++)
+		ivs[j] = src[j];
+
+	serpent_dec_blk_xway(ctx, (u8 *)dst, (u8 *)src);
+
+	for (j = 0; j < SERPENT_PARALLEL_BLOCKS - 1; j++)
+		u128_xor(dst + (j + 1), dst + (j + 1), ivs + j);
+}
+
+static void serpent_crypt_ctr(void *ctx, u128 *dst, const u128 *src, u128 *iv)
+{
+	be128 ctrblk;
+
+	u128_to_be128(&ctrblk, iv);
+	u128_inc(iv);
+
+	__serpent_encrypt(ctx, (u8 *)&ctrblk, (u8 *)&ctrblk);
+	u128_xor(dst, src, (u128 *)&ctrblk);
+}
+
+static void serpent_crypt_ctr_xway(void *ctx, u128 *dst, const u128 *src,
+				   u128 *iv)
+{
+	be128 ctrblks[SERPENT_PARALLEL_BLOCKS];
+	unsigned int i;
+
+	for (i = 0; i < SERPENT_PARALLEL_BLOCKS; i++) {
+		if (dst != src)
+			dst[i] = src[i];
+
+		u128_to_be128(&ctrblks[i], iv);
+		u128_inc(iv);
+	}
+
+	serpent_enc_blk_xway_xor(ctx, (u8 *)dst, (u8 *)ctrblks);
+}
+
+static const struct common_glue_ctx serpent_enc = {
+	.num_funcs = 2,
+	.fpu_blocks_limit = SERPENT_PARALLEL_BLOCKS,
+
+	.funcs = { {
+		.num_blocks = SERPENT_PARALLEL_BLOCKS,
+		.fn_u = { .ecb = GLUE_FUNC_CAST(serpent_enc_blk_xway) }
+	}, {
+		.num_blocks = 1,
+		.fn_u = { .ecb = GLUE_FUNC_CAST(__serpent_encrypt) }
+	} }
+};
+
+static const struct common_glue_ctx serpent_ctr = {
+	.num_funcs = 2,
+	.fpu_blocks_limit = SERPENT_PARALLEL_BLOCKS,
+
+	.funcs = { {
+		.num_blocks = SERPENT_PARALLEL_BLOCKS,
+		.fn_u = { .ctr = GLUE_CTR_FUNC_CAST(serpent_crypt_ctr_xway) }
+	}, {
+		.num_blocks = 1,
+		.fn_u = { .ctr = GLUE_CTR_FUNC_CAST(serpent_crypt_ctr) }
+	} }
+};
+
+static const struct common_glue_ctx serpent_dec = {
+	.num_funcs = 2,
+	.fpu_blocks_limit = SERPENT_PARALLEL_BLOCKS,
+
+	.funcs = { {
+		.num_blocks = SERPENT_PARALLEL_BLOCKS,
+		.fn_u = { .ecb = GLUE_FUNC_CAST(serpent_dec_blk_xway) }
+	}, {
+		.num_blocks = 1,
+		.fn_u = { .ecb = GLUE_FUNC_CAST(__serpent_decrypt) }
+	} }
+};
+
+static const struct common_glue_ctx serpent_dec_cbc = {
+	.num_funcs = 2,
+	.fpu_blocks_limit = SERPENT_PARALLEL_BLOCKS,
+
+	.funcs = { {
+		.num_blocks = SERPENT_PARALLEL_BLOCKS,
+		.fn_u = { .cbc = GLUE_CBC_FUNC_CAST(serpent_decrypt_cbc_xway) }
+	}, {
+		.num_blocks = 1,
+		.fn_u = { .cbc = GLUE_CBC_FUNC_CAST(__serpent_decrypt) }
+	} }
+};
+
+static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
+		       struct scatterlist *src, unsigned int nbytes)
+{
+	return glue_ecb_crypt_128bit(&serpent_enc, desc, dst, src, nbytes);
+}
+
+static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
+		       struct scatterlist *src, unsigned int nbytes)
+{
+	return glue_ecb_crypt_128bit(&serpent_dec, desc, dst, src, nbytes);
+}
+
+static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
+		       struct scatterlist *src, unsigned int nbytes)
+{
+	return glue_cbc_encrypt_128bit(GLUE_FUNC_CAST(__serpent_encrypt), desc,
+				     dst, src, nbytes);
+}
+
+static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
+		       struct scatterlist *src, unsigned int nbytes)
+{
+	return glue_cbc_decrypt_128bit(&serpent_dec_cbc, desc, dst, src,
+				       nbytes);
+}
+
+static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst,
+		     struct scatterlist *src, unsigned int nbytes)
+{
+	return glue_ctr_crypt_128bit(&serpent_ctr, desc, dst, src, nbytes);
+}
+
+static inline bool serpent_fpu_begin(bool fpu_enabled, unsigned int nbytes)
+{
+	return glue_fpu_begin(SERPENT_BLOCK_SIZE, SERPENT_PARALLEL_BLOCKS,
+			      NULL, fpu_enabled, nbytes);
+}
+
+static inline void serpent_fpu_end(bool fpu_enabled)
+{
+	glue_fpu_end(fpu_enabled);
+}
+
+struct crypt_priv {
+	struct serpent_ctx *ctx;
+	bool fpu_enabled;
+};
+
+static void encrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes)
+{
+	const unsigned int bsize = SERPENT_BLOCK_SIZE;
+	struct crypt_priv *ctx = priv;
+	int i;
+
+	ctx->fpu_enabled = serpent_fpu_begin(ctx->fpu_enabled, nbytes);
+
+	if (nbytes == bsize * SERPENT_PARALLEL_BLOCKS) {
+		serpent_enc_blk_xway(ctx->ctx, srcdst, srcdst);
+		return;
+	}
+
+	for (i = 0; i < nbytes / bsize; i++, srcdst += bsize)
+		__serpent_encrypt(ctx->ctx, srcdst, srcdst);
+}
+
+static void decrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes)
+{
+	const unsigned int bsize = SERPENT_BLOCK_SIZE;
+	struct crypt_priv *ctx = priv;
+	int i;
+
+	ctx->fpu_enabled = serpent_fpu_begin(ctx->fpu_enabled, nbytes);
+
+	if (nbytes == bsize * SERPENT_PARALLEL_BLOCKS) {
+		serpent_dec_blk_xway(ctx->ctx, srcdst, srcdst);
+		return;
+	}
+
+	for (i = 0; i < nbytes / bsize; i++, srcdst += bsize)
+		__serpent_decrypt(ctx->ctx, srcdst, srcdst);
+}
+
+struct serpent_lrw_ctx {
+	struct lrw_table_ctx lrw_table;
+	struct serpent_ctx serpent_ctx;
+};
+
+static int lrw_serpent_setkey(struct crypto_tfm *tfm, const u8 *key,
+			      unsigned int keylen)
+{
+	struct serpent_lrw_ctx *ctx = crypto_tfm_ctx(tfm);
+	int err;
+
+	err = __serpent_setkey(&ctx->serpent_ctx, key, keylen -
+							SERPENT_BLOCK_SIZE);
+	if (err)
+		return err;
+
+	return lrw_init_table(&ctx->lrw_table, key + keylen -
+						SERPENT_BLOCK_SIZE);
+}
+
+static int lrw_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
+		       struct scatterlist *src, unsigned int nbytes)
+{
+	struct serpent_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+	be128 buf[SERPENT_PARALLEL_BLOCKS];
+	struct crypt_priv crypt_ctx = {
+		.ctx = &ctx->serpent_ctx,
+		.fpu_enabled = false,
+	};
+	struct lrw_crypt_req req = {
+		.tbuf = buf,
+		.tbuflen = sizeof(buf),
+
+		.table_ctx = &ctx->lrw_table,
+		.crypt_ctx = &crypt_ctx,
+		.crypt_fn = encrypt_callback,
+	};
+	int ret;
+
+	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
+	ret = lrw_crypt(desc, dst, src, nbytes, &req);
+	serpent_fpu_end(crypt_ctx.fpu_enabled);
+
+	return ret;
+}
+
+static int lrw_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
+		       struct scatterlist *src, unsigned int nbytes)
+{
+	struct serpent_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+	be128 buf[SERPENT_PARALLEL_BLOCKS];
+	struct crypt_priv crypt_ctx = {
+		.ctx = &ctx->serpent_ctx,
+		.fpu_enabled = false,
+	};
+	struct lrw_crypt_req req = {
+		.tbuf = buf,
+		.tbuflen = sizeof(buf),
+
+		.table_ctx = &ctx->lrw_table,
+		.crypt_ctx = &crypt_ctx,
+		.crypt_fn = decrypt_callback,
+	};
+	int ret;
+
+	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
+	ret = lrw_crypt(desc, dst, src, nbytes, &req);
+	serpent_fpu_end(crypt_ctx.fpu_enabled);
+
+	return ret;
+}
+
+static void lrw_exit_tfm(struct crypto_tfm *tfm)
+{
+	struct serpent_lrw_ctx *ctx = crypto_tfm_ctx(tfm);
+
+	lrw_free_table(&ctx->lrw_table);
+}
+
+struct serpent_xts_ctx {
+	struct serpent_ctx tweak_ctx;
+	struct serpent_ctx crypt_ctx;
+};
+
+static int xts_serpent_setkey(struct crypto_tfm *tfm, const u8 *key,
+			      unsigned int keylen)
+{
+	struct serpent_xts_ctx *ctx = crypto_tfm_ctx(tfm);
+	u32 *flags = &tfm->crt_flags;
+	int err;
+
+	/* key consists of keys of equal size concatenated, therefore
+	 * the length must be even
+	 */
+	if (keylen % 2) {
+		*flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
+		return -EINVAL;
+	}
+
+	/* first half of xts-key is for crypt */
+	err = __serpent_setkey(&ctx->crypt_ctx, key, keylen / 2);
+	if (err)
+		return err;
+
+	/* second half of xts-key is for tweak */
+	return __serpent_setkey(&ctx->tweak_ctx, key + keylen / 2, keylen / 2);
+}
+
+static int xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
+		       struct scatterlist *src, unsigned int nbytes)
+{
+	struct serpent_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+	be128 buf[SERPENT_PARALLEL_BLOCKS];
+	struct crypt_priv crypt_ctx = {
+		.ctx = &ctx->crypt_ctx,
+		.fpu_enabled = false,
+	};
+	struct xts_crypt_req req = {
+		.tbuf = buf,
+		.tbuflen = sizeof(buf),
+
+		.tweak_ctx = &ctx->tweak_ctx,
+		.tweak_fn = XTS_TWEAK_CAST(__serpent_encrypt),
+		.crypt_ctx = &crypt_ctx,
+		.crypt_fn = encrypt_callback,
+	};
+	int ret;
+
+	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
+	ret = xts_crypt(desc, dst, src, nbytes, &req);
+	serpent_fpu_end(crypt_ctx.fpu_enabled);
+
+	return ret;
+}
+
+static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
+		       struct scatterlist *src, unsigned int nbytes)
+{
+	struct serpent_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+	be128 buf[SERPENT_PARALLEL_BLOCKS];
+	struct crypt_priv crypt_ctx = {
+		.ctx = &ctx->crypt_ctx,
+		.fpu_enabled = false,
+	};
+	struct xts_crypt_req req = {
+		.tbuf = buf,
+		.tbuflen = sizeof(buf),
+
+		.tweak_ctx = &ctx->tweak_ctx,
+		.tweak_fn = XTS_TWEAK_CAST(__serpent_encrypt),
+		.crypt_ctx = &crypt_ctx,
+		.crypt_fn = decrypt_callback,
+	};
+	int ret;
+
+	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
+	ret = xts_crypt(desc, dst, src, nbytes, &req);
+	serpent_fpu_end(crypt_ctx.fpu_enabled);
+
+	return ret;
+}
+
+static struct crypto_alg serpent_algs[10] = { {
+	.cra_name		= "__ecb-serpent-avx",
+	.cra_driver_name	= "__driver-ecb-serpent-avx",
+	.cra_priority		= 0,
+	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
+	.cra_blocksize		= SERPENT_BLOCK_SIZE,
+	.cra_ctxsize		= sizeof(struct serpent_ctx),
+	.cra_alignmask		= 0,
+	.cra_type		= &crypto_blkcipher_type,
+	.cra_module		= THIS_MODULE,
+	.cra_list		= LIST_HEAD_INIT(serpent_algs[0].cra_list),
+	.cra_u = {
+		.blkcipher = {
+			.min_keysize	= SERPENT_MIN_KEY_SIZE,
+			.max_keysize	= SERPENT_MAX_KEY_SIZE,
+			.setkey		= serpent_setkey,
+			.encrypt	= ecb_encrypt,
+			.decrypt	= ecb_decrypt,
+		},
+	},
+}, {
+	.cra_name		= "__cbc-serpent-avx",
+	.cra_driver_name	= "__driver-cbc-serpent-avx",
+	.cra_priority		= 0,
+	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
+	.cra_blocksize		= SERPENT_BLOCK_SIZE,
+	.cra_ctxsize		= sizeof(struct serpent_ctx),
+	.cra_alignmask		= 0,
+	.cra_type		= &crypto_blkcipher_type,
+	.cra_module		= THIS_MODULE,
+	.cra_list		= LIST_HEAD_INIT(serpent_algs[1].cra_list),
+	.cra_u = {
+		.blkcipher = {
+			.min_keysize	= SERPENT_MIN_KEY_SIZE,
+			.max_keysize	= SERPENT_MAX_KEY_SIZE,
+			.setkey		= serpent_setkey,
+			.encrypt	= cbc_encrypt,
+			.decrypt	= cbc_decrypt,
+		},
+	},
+}, {
+	.cra_name		= "__ctr-serpent-avx",
+	.cra_driver_name	= "__driver-ctr-serpent-avx",
+	.cra_priority		= 0,
+	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
+	.cra_blocksize		= 1,
+	.cra_ctxsize		= sizeof(struct serpent_ctx),
+	.cra_alignmask		= 0,
+	.cra_type		= &crypto_blkcipher_type,
+	.cra_module		= THIS_MODULE,
+	.cra_list		= LIST_HEAD_INIT(serpent_algs[2].cra_list),
+	.cra_u = {
+		.blkcipher = {
+			.min_keysize	= SERPENT_MIN_KEY_SIZE,
+			.max_keysize	= SERPENT_MAX_KEY_SIZE,
+			.ivsize		= SERPENT_BLOCK_SIZE,
+			.setkey		= serpent_setkey,
+			.encrypt	= ctr_crypt,
+			.decrypt	= ctr_crypt,
+		},
+	},
+}, {
+	.cra_name		= "__lrw-serpent-avx",
+	.cra_driver_name	= "__driver-lrw-serpent-avx",
+	.cra_priority		= 0,
+	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
+	.cra_blocksize		= SERPENT_BLOCK_SIZE,
+	.cra_ctxsize		= sizeof(struct serpent_lrw_ctx),
+	.cra_alignmask		= 0,
+	.cra_type		= &crypto_blkcipher_type,
+	.cra_module		= THIS_MODULE,
+	.cra_list		= LIST_HEAD_INIT(serpent_algs[3].cra_list),
+	.cra_exit		= lrw_exit_tfm,
+	.cra_u = {
+		.blkcipher = {
+			.min_keysize	= SERPENT_MIN_KEY_SIZE +
+					  SERPENT_BLOCK_SIZE,
+			.max_keysize	= SERPENT_MAX_KEY_SIZE +
+					  SERPENT_BLOCK_SIZE,
+			.ivsize		= SERPENT_BLOCK_SIZE,
+			.setkey		= lrw_serpent_setkey,
+			.encrypt	= lrw_encrypt,
+			.decrypt	= lrw_decrypt,
+		},
+	},
+}, {
+	.cra_name		= "__xts-serpent-avx",
+	.cra_driver_name	= "__driver-xts-serpent-avx",
+	.cra_priority		= 0,
+	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
+	.cra_blocksize		= SERPENT_BLOCK_SIZE,
+	.cra_ctxsize		= sizeof(struct serpent_xts_ctx),
+	.cra_alignmask		= 0,
+	.cra_type		= &crypto_blkcipher_type,
+	.cra_module		= THIS_MODULE,
+	.cra_list		= LIST_HEAD_INIT(serpent_algs[4].cra_list),
+	.cra_u = {
+		.blkcipher = {
+			.min_keysize	= SERPENT_MIN_KEY_SIZE * 2,
+			.max_keysize	= SERPENT_MAX_KEY_SIZE * 2,
+			.ivsize		= SERPENT_BLOCK_SIZE,
+			.setkey		= xts_serpent_setkey,
+			.encrypt	= xts_encrypt,
+			.decrypt	= xts_decrypt,
+		},
+	},
+}, {
+	.cra_name		= "ecb(serpent)",
+	.cra_driver_name	= "ecb-serpent-avx",
+	.cra_priority		= 500,
+	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
+	.cra_blocksize		= SERPENT_BLOCK_SIZE,
+	.cra_ctxsize		= sizeof(struct async_helper_ctx),
+	.cra_alignmask		= 0,
+	.cra_type		= &crypto_ablkcipher_type,
+	.cra_module		= THIS_MODULE,
+	.cra_list		= LIST_HEAD_INIT(serpent_algs[5].cra_list),
+	.cra_init		= ablk_init,
+	.cra_exit		= ablk_exit,
+	.cra_u = {
+		.ablkcipher = {
+			.min_keysize	= SERPENT_MIN_KEY_SIZE,
+			.max_keysize	= SERPENT_MAX_KEY_SIZE,
+			.setkey		= ablk_set_key,
+			.encrypt	= ablk_encrypt,
+			.decrypt	= ablk_decrypt,
+		},
+	},
+}, {
+	.cra_name		= "cbc(serpent)",
+	.cra_driver_name	= "cbc-serpent-avx",
+	.cra_priority		= 500,
+	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
+	.cra_blocksize		= SERPENT_BLOCK_SIZE,
+	.cra_ctxsize		= sizeof(struct async_helper_ctx),
+	.cra_alignmask		= 0,
+	.cra_type		= &crypto_ablkcipher_type,
+	.cra_module		= THIS_MODULE,
+	.cra_list		= LIST_HEAD_INIT(serpent_algs[6].cra_list),
+	.cra_init		= ablk_init,
+	.cra_exit		= ablk_exit,
+	.cra_u = {
+		.ablkcipher = {
+			.min_keysize	= SERPENT_MIN_KEY_SIZE,
+			.max_keysize	= SERPENT_MAX_KEY_SIZE,
+			.ivsize		= SERPENT_BLOCK_SIZE,
+			.setkey		= ablk_set_key,
+			.encrypt	= __ablk_encrypt,
+			.decrypt	= ablk_decrypt,
+		},
+	},
+}, {
+	.cra_name		= "ctr(serpent)",
+	.cra_driver_name	= "ctr-serpent-avx",
+	.cra_priority		= 500,
+	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
+	.cra_blocksize		= 1,
+	.cra_ctxsize		= sizeof(struct async_helper_ctx),
+	.cra_alignmask		= 0,
+	.cra_type		= &crypto_ablkcipher_type,
+	.cra_module		= THIS_MODULE,
+	.cra_list		= LIST_HEAD_INIT(serpent_algs[7].cra_list),
+	.cra_init		= ablk_init,
+	.cra_exit		= ablk_exit,
+	.cra_u = {
+		.ablkcipher = {
+			.min_keysize	= SERPENT_MIN_KEY_SIZE,
+			.max_keysize	= SERPENT_MAX_KEY_SIZE,
+			.ivsize		= SERPENT_BLOCK_SIZE,
+			.setkey		= ablk_set_key,
+			.encrypt	= ablk_encrypt,
+			.decrypt	= ablk_encrypt,
+			.geniv		= "chainiv",
+		},
+	},
+}, {
+	.cra_name		= "lrw(serpent)",
+	.cra_driver_name	= "lrw-serpent-avx",
+	.cra_priority		= 500,
+	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
+	.cra_blocksize		= SERPENT_BLOCK_SIZE,
+	.cra_ctxsize		= sizeof(struct async_helper_ctx),
+	.cra_alignmask		= 0,
+	.cra_type		= &crypto_ablkcipher_type,
+	.cra_module		= THIS_MODULE,
+	.cra_list		= LIST_HEAD_INIT(serpent_algs[8].cra_list),
+	.cra_init		= ablk_init,
+	.cra_exit		= ablk_exit,
+	.cra_u = {
+		.ablkcipher = {
+			.min_keysize	= SERPENT_MIN_KEY_SIZE +
+					  SERPENT_BLOCK_SIZE,
+			.max_keysize	= SERPENT_MAX_KEY_SIZE +
+					  SERPENT_BLOCK_SIZE,
+			.ivsize		= SERPENT_BLOCK_SIZE,
+			.setkey		= ablk_set_key,
+			.encrypt	= ablk_encrypt,
+			.decrypt	= ablk_decrypt,
+		},
+	},
+}, {
+	.cra_name		= "xts(serpent)",
+	.cra_driver_name	= "xts-serpent-avx",
+	.cra_priority		= 500,
+	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
+	.cra_blocksize		= SERPENT_BLOCK_SIZE,
+	.cra_ctxsize		= sizeof(struct async_helper_ctx),
+	.cra_alignmask		= 0,
+	.cra_type		= &crypto_ablkcipher_type,
+	.cra_module		= THIS_MODULE,
+	.cra_list		= LIST_HEAD_INIT(serpent_algs[9].cra_list),
+	.cra_init		= ablk_init,
+	.cra_exit		= ablk_exit,
+	.cra_u = {
+		.ablkcipher = {
+			.min_keysize	= SERPENT_MIN_KEY_SIZE * 2,
+			.max_keysize	= SERPENT_MAX_KEY_SIZE * 2,
+			.ivsize		= SERPENT_BLOCK_SIZE,
+			.setkey		= ablk_set_key,
+			.encrypt	= ablk_encrypt,
+			.decrypt	= ablk_decrypt,
+		},
+	},
+} };
+
+static int __init serpent_init(void)
+{
+	u64 xcr0;
+
+	if (!cpu_has_avx || !cpu_has_osxsave) {
+		printk(KERN_INFO "AVX instructions are not detected.\n");
+		return -ENODEV;
+	}
+
+	xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK);
+	if ((xcr0 & (XSTATE_SSE | XSTATE_YMM)) != (XSTATE_SSE | XSTATE_YMM)) {
+		printk(KERN_INFO "AVX detected but unusable.\n");
+		return -ENODEV;
+	}
+
+	return crypto_register_algs(serpent_algs, ARRAY_SIZE(serpent_algs));
+}
+
+static void __exit serpent_exit(void)
+{
+	crypto_unregister_algs(serpent_algs, ARRAY_SIZE(serpent_algs));
+}
+
+module_init(serpent_init);
+module_exit(serpent_exit);
+
+MODULE_DESCRIPTION("Serpent Cipher Algorithm, AVX optimized");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("serpent");
diff --git a/arch/x86/crypto/serpent_sse2_glue.c b/arch/x86/crypto/serpent_sse2_glue.c
index 4b21be85e0a139787f92d5d739c2c259dd7133fa..d679c8675f4ad1994b8e3e71b47fe318c611babe 100644
--- a/arch/x86/crypto/serpent_sse2_glue.c
+++ b/arch/x86/crypto/serpent_sse2_glue.c
@@ -41,358 +41,145 @@
 #include <crypto/ctr.h>
 #include <crypto/lrw.h>
 #include <crypto/xts.h>
-#include <asm/i387.h>
-#include <asm/serpent.h>
-#include <crypto/scatterwalk.h>
-#include <linux/workqueue.h>
-#include <linux/spinlock.h>
-
-struct async_serpent_ctx {
-	struct cryptd_ablkcipher *cryptd_tfm;
-};
+#include <asm/crypto/serpent-sse2.h>
+#include <asm/crypto/ablk_helper.h>
+#include <asm/crypto/glue_helper.h>
 
-static inline bool serpent_fpu_begin(bool fpu_enabled, unsigned int nbytes)
-{
-	if (fpu_enabled)
-		return true;
-
-	/* SSE2 is only used when chunk to be processed is large enough, so
-	 * do not enable FPU until it is necessary.
-	 */
-	if (nbytes < SERPENT_BLOCK_SIZE * SERPENT_PARALLEL_BLOCKS)
-		return false;
-
-	kernel_fpu_begin();
-	return true;
-}
-
-static inline void serpent_fpu_end(bool fpu_enabled)
+static void serpent_decrypt_cbc_xway(void *ctx, u128 *dst, const u128 *src)
 {
-	if (fpu_enabled)
-		kernel_fpu_end();
-}
-
-static int ecb_crypt(struct blkcipher_desc *desc, struct blkcipher_walk *walk,
-		     bool enc)
-{
-	bool fpu_enabled = false;
-	struct serpent_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
-	const unsigned int bsize = SERPENT_BLOCK_SIZE;
-	unsigned int nbytes;
-	int err;
-
-	err = blkcipher_walk_virt(desc, walk);
-	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
-
-	while ((nbytes = walk->nbytes)) {
-		u8 *wsrc = walk->src.virt.addr;
-		u8 *wdst = walk->dst.virt.addr;
-
-		fpu_enabled = serpent_fpu_begin(fpu_enabled, nbytes);
-
-		/* Process multi-block batch */
-		if (nbytes >= bsize * SERPENT_PARALLEL_BLOCKS) {
-			do {
-				if (enc)
-					serpent_enc_blk_xway(ctx, wdst, wsrc);
-				else
-					serpent_dec_blk_xway(ctx, wdst, wsrc);
-
-				wsrc += bsize * SERPENT_PARALLEL_BLOCKS;
-				wdst += bsize * SERPENT_PARALLEL_BLOCKS;
-				nbytes -= bsize * SERPENT_PARALLEL_BLOCKS;
-			} while (nbytes >= bsize * SERPENT_PARALLEL_BLOCKS);
-
-			if (nbytes < bsize)
-				goto done;
-		}
-
-		/* Handle leftovers */
-		do {
-			if (enc)
-				__serpent_encrypt(ctx, wdst, wsrc);
-			else
-				__serpent_decrypt(ctx, wdst, wsrc);
-
-			wsrc += bsize;
-			wdst += bsize;
-			nbytes -= bsize;
-		} while (nbytes >= bsize);
-
-done:
-		err = blkcipher_walk_done(desc, walk, nbytes);
-	}
+	u128 ivs[SERPENT_PARALLEL_BLOCKS - 1];
+	unsigned int j;
 
-	serpent_fpu_end(fpu_enabled);
-	return err;
-}
+	for (j = 0; j < SERPENT_PARALLEL_BLOCKS - 1; j++)
+		ivs[j] = src[j];
 
-static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
-{
-	struct blkcipher_walk walk;
+	serpent_dec_blk_xway(ctx, (u8 *)dst, (u8 *)src);
 
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-	return ecb_crypt(desc, &walk, true);
+	for (j = 0; j < SERPENT_PARALLEL_BLOCKS - 1; j++)
+		u128_xor(dst + (j + 1), dst + (j + 1), ivs + j);
 }
 
-static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
+static void serpent_crypt_ctr(void *ctx, u128 *dst, const u128 *src, u128 *iv)
 {
-	struct blkcipher_walk walk;
+	be128 ctrblk;
 
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-	return ecb_crypt(desc, &walk, false);
-}
+	u128_to_be128(&ctrblk, iv);
+	u128_inc(iv);
 
-static unsigned int __cbc_encrypt(struct blkcipher_desc *desc,
-				  struct blkcipher_walk *walk)
-{
-	struct serpent_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
-	const unsigned int bsize = SERPENT_BLOCK_SIZE;
-	unsigned int nbytes = walk->nbytes;
-	u128 *src = (u128 *)walk->src.virt.addr;
-	u128 *dst = (u128 *)walk->dst.virt.addr;
-	u128 *iv = (u128 *)walk->iv;
-
-	do {
-		u128_xor(dst, src, iv);
-		__serpent_encrypt(ctx, (u8 *)dst, (u8 *)dst);
-		iv = dst;
-
-		src += 1;
-		dst += 1;
-		nbytes -= bsize;
-	} while (nbytes >= bsize);
-
-	u128_xor((u128 *)walk->iv, (u128 *)walk->iv, iv);
-	return nbytes;
+	__serpent_encrypt(ctx, (u8 *)&ctrblk, (u8 *)&ctrblk);
+	u128_xor(dst, src, (u128 *)&ctrblk);
 }
 
-static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
+static void serpent_crypt_ctr_xway(void *ctx, u128 *dst, const u128 *src,
+				   u128 *iv)
 {
-	struct blkcipher_walk walk;
-	int err;
+	be128 ctrblks[SERPENT_PARALLEL_BLOCKS];
+	unsigned int i;
 
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-	err = blkcipher_walk_virt(desc, &walk);
+	for (i = 0; i < SERPENT_PARALLEL_BLOCKS; i++) {
+		if (dst != src)
+			dst[i] = src[i];
 
-	while ((nbytes = walk.nbytes)) {
-		nbytes = __cbc_encrypt(desc, &walk);
-		err = blkcipher_walk_done(desc, &walk, nbytes);
+		u128_to_be128(&ctrblks[i], iv);
+		u128_inc(iv);
 	}
 
-	return err;
+	serpent_enc_blk_xway_xor(ctx, (u8 *)dst, (u8 *)ctrblks);
 }
 
-static unsigned int __cbc_decrypt(struct blkcipher_desc *desc,
-				  struct blkcipher_walk *walk)
-{
-	struct serpent_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
-	const unsigned int bsize = SERPENT_BLOCK_SIZE;
-	unsigned int nbytes = walk->nbytes;
-	u128 *src = (u128 *)walk->src.virt.addr;
-	u128 *dst = (u128 *)walk->dst.virt.addr;
-	u128 ivs[SERPENT_PARALLEL_BLOCKS - 1];
-	u128 last_iv;
-	int i;
-
-	/* Start of the last block. */
-	src += nbytes / bsize - 1;
-	dst += nbytes / bsize - 1;
-
-	last_iv = *src;
-
-	/* Process multi-block batch */
-	if (nbytes >= bsize * SERPENT_PARALLEL_BLOCKS) {
-		do {
-			nbytes -= bsize * (SERPENT_PARALLEL_BLOCKS - 1);
-			src -= SERPENT_PARALLEL_BLOCKS - 1;
-			dst -= SERPENT_PARALLEL_BLOCKS - 1;
-
-			for (i = 0; i < SERPENT_PARALLEL_BLOCKS - 1; i++)
-				ivs[i] = src[i];
-
-			serpent_dec_blk_xway(ctx, (u8 *)dst, (u8 *)src);
-
-			for (i = 0; i < SERPENT_PARALLEL_BLOCKS - 1; i++)
-				u128_xor(dst + (i + 1), dst + (i + 1), ivs + i);
-
-			nbytes -= bsize;
-			if (nbytes < bsize)
-				goto done;
+static const struct common_glue_ctx serpent_enc = {
+	.num_funcs = 2,
+	.fpu_blocks_limit = SERPENT_PARALLEL_BLOCKS,
 
-			u128_xor(dst, dst, src - 1);
-			src -= 1;
-			dst -= 1;
-		} while (nbytes >= bsize * SERPENT_PARALLEL_BLOCKS);
-
-		if (nbytes < bsize)
-			goto done;
-	}
-
-	/* Handle leftovers */
-	for (;;) {
-		__serpent_decrypt(ctx, (u8 *)dst, (u8 *)src);
-
-		nbytes -= bsize;
-		if (nbytes < bsize)
-			break;
+	.funcs = { {
+		.num_blocks = SERPENT_PARALLEL_BLOCKS,
+		.fn_u = { .ecb = GLUE_FUNC_CAST(serpent_enc_blk_xway) }
+	}, {
+		.num_blocks = 1,
+		.fn_u = { .ecb = GLUE_FUNC_CAST(__serpent_encrypt) }
+	} }
+};
 
-		u128_xor(dst, dst, src - 1);
-		src -= 1;
-		dst -= 1;
-	}
+static const struct common_glue_ctx serpent_ctr = {
+	.num_funcs = 2,
+	.fpu_blocks_limit = SERPENT_PARALLEL_BLOCKS,
+
+	.funcs = { {
+		.num_blocks = SERPENT_PARALLEL_BLOCKS,
+		.fn_u = { .ctr = GLUE_CTR_FUNC_CAST(serpent_crypt_ctr_xway) }
+	}, {
+		.num_blocks = 1,
+		.fn_u = { .ctr = GLUE_CTR_FUNC_CAST(serpent_crypt_ctr) }
+	} }
+};
 
-done:
-	u128_xor(dst, dst, (u128 *)walk->iv);
-	*(u128 *)walk->iv = last_iv;
+static const struct common_glue_ctx serpent_dec = {
+	.num_funcs = 2,
+	.fpu_blocks_limit = SERPENT_PARALLEL_BLOCKS,
+
+	.funcs = { {
+		.num_blocks = SERPENT_PARALLEL_BLOCKS,
+		.fn_u = { .ecb = GLUE_FUNC_CAST(serpent_dec_blk_xway) }
+	}, {
+		.num_blocks = 1,
+		.fn_u = { .ecb = GLUE_FUNC_CAST(__serpent_decrypt) }
+	} }
+};
 
-	return nbytes;
-}
+static const struct common_glue_ctx serpent_dec_cbc = {
+	.num_funcs = 2,
+	.fpu_blocks_limit = SERPENT_PARALLEL_BLOCKS,
+
+	.funcs = { {
+		.num_blocks = SERPENT_PARALLEL_BLOCKS,
+		.fn_u = { .cbc = GLUE_CBC_FUNC_CAST(serpent_decrypt_cbc_xway) }
+	}, {
+		.num_blocks = 1,
+		.fn_u = { .cbc = GLUE_CBC_FUNC_CAST(__serpent_decrypt) }
+	} }
+};
 
-static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
+static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
 		       struct scatterlist *src, unsigned int nbytes)
 {
-	bool fpu_enabled = false;
-	struct blkcipher_walk walk;
-	int err;
-
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-	err = blkcipher_walk_virt(desc, &walk);
-	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
-
-	while ((nbytes = walk.nbytes)) {
-		fpu_enabled = serpent_fpu_begin(fpu_enabled, nbytes);
-		nbytes = __cbc_decrypt(desc, &walk);
-		err = blkcipher_walk_done(desc, &walk, nbytes);
-	}
-
-	serpent_fpu_end(fpu_enabled);
-	return err;
+	return glue_ecb_crypt_128bit(&serpent_enc, desc, dst, src, nbytes);
 }
 
-static inline void u128_to_be128(be128 *dst, const u128 *src)
+static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
+		       struct scatterlist *src, unsigned int nbytes)
 {
-	dst->a = cpu_to_be64(src->a);
-	dst->b = cpu_to_be64(src->b);
+	return glue_ecb_crypt_128bit(&serpent_dec, desc, dst, src, nbytes);
 }
 
-static inline void be128_to_u128(u128 *dst, const be128 *src)
+static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
+		       struct scatterlist *src, unsigned int nbytes)
 {
-	dst->a = be64_to_cpu(src->a);
-	dst->b = be64_to_cpu(src->b);
+	return glue_cbc_encrypt_128bit(GLUE_FUNC_CAST(__serpent_encrypt), desc,
+				     dst, src, nbytes);
 }
 
-static inline void u128_inc(u128 *i)
+static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
+		       struct scatterlist *src, unsigned int nbytes)
 {
-	i->b++;
-	if (!i->b)
-		i->a++;
+	return glue_cbc_decrypt_128bit(&serpent_dec_cbc, desc, dst, src,
+				       nbytes);
 }
 
-static void ctr_crypt_final(struct blkcipher_desc *desc,
-			    struct blkcipher_walk *walk)
+static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst,
+		     struct scatterlist *src, unsigned int nbytes)
 {
-	struct serpent_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
-	u8 *ctrblk = walk->iv;
-	u8 keystream[SERPENT_BLOCK_SIZE];
-	u8 *src = walk->src.virt.addr;
-	u8 *dst = walk->dst.virt.addr;
-	unsigned int nbytes = walk->nbytes;
-
-	__serpent_encrypt(ctx, keystream, ctrblk);
-	crypto_xor(keystream, src, nbytes);
-	memcpy(dst, keystream, nbytes);
-
-	crypto_inc(ctrblk, SERPENT_BLOCK_SIZE);
+	return glue_ctr_crypt_128bit(&serpent_ctr, desc, dst, src, nbytes);
 }
 
-static unsigned int __ctr_crypt(struct blkcipher_desc *desc,
-				struct blkcipher_walk *walk)
+static inline bool serpent_fpu_begin(bool fpu_enabled, unsigned int nbytes)
 {
-	struct serpent_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
-	const unsigned int bsize = SERPENT_BLOCK_SIZE;
-	unsigned int nbytes = walk->nbytes;
-	u128 *src = (u128 *)walk->src.virt.addr;
-	u128 *dst = (u128 *)walk->dst.virt.addr;
-	u128 ctrblk;
-	be128 ctrblocks[SERPENT_PARALLEL_BLOCKS];
-	int i;
-
-	be128_to_u128(&ctrblk, (be128 *)walk->iv);
-
-	/* Process multi-block batch */
-	if (nbytes >= bsize * SERPENT_PARALLEL_BLOCKS) {
-		do {
-			/* create ctrblks for parallel encrypt */
-			for (i = 0; i < SERPENT_PARALLEL_BLOCKS; i++) {
-				if (dst != src)
-					dst[i] = src[i];
-
-				u128_to_be128(&ctrblocks[i], &ctrblk);
-				u128_inc(&ctrblk);
-			}
-
-			serpent_enc_blk_xway_xor(ctx, (u8 *)dst,
-						 (u8 *)ctrblocks);
-
-			src += SERPENT_PARALLEL_BLOCKS;
-			dst += SERPENT_PARALLEL_BLOCKS;
-			nbytes -= bsize * SERPENT_PARALLEL_BLOCKS;
-		} while (nbytes >= bsize * SERPENT_PARALLEL_BLOCKS);
-
-		if (nbytes < bsize)
-			goto done;
-	}
-
-	/* Handle leftovers */
-	do {
-		if (dst != src)
-			*dst = *src;
-
-		u128_to_be128(&ctrblocks[0], &ctrblk);
-		u128_inc(&ctrblk);
-
-		__serpent_encrypt(ctx, (u8 *)ctrblocks, (u8 *)ctrblocks);
-		u128_xor(dst, dst, (u128 *)ctrblocks);
-
-		src += 1;
-		dst += 1;
-		nbytes -= bsize;
-	} while (nbytes >= bsize);
-
-done:
-	u128_to_be128((be128 *)walk->iv, &ctrblk);
-	return nbytes;
+	return glue_fpu_begin(SERPENT_BLOCK_SIZE, SERPENT_PARALLEL_BLOCKS,
+			      NULL, fpu_enabled, nbytes);
 }
 
-static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		     struct scatterlist *src, unsigned int nbytes)
+static inline void serpent_fpu_end(bool fpu_enabled)
 {
-	bool fpu_enabled = false;
-	struct blkcipher_walk walk;
-	int err;
-
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-	err = blkcipher_walk_virt_block(desc, &walk, SERPENT_BLOCK_SIZE);
-	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
-
-	while ((nbytes = walk.nbytes) >= SERPENT_BLOCK_SIZE) {
-		fpu_enabled = serpent_fpu_begin(fpu_enabled, nbytes);
-		nbytes = __ctr_crypt(desc, &walk);
-		err = blkcipher_walk_done(desc, &walk, nbytes);
-	}
-
-	serpent_fpu_end(fpu_enabled);
-
-	if (walk.nbytes) {
-		ctr_crypt_final(desc, &walk);
-		err = blkcipher_walk_done(desc, &walk, 0);
-	}
-
-	return err;
+	glue_fpu_end(fpu_enabled);
 }
 
 struct crypt_priv {
@@ -596,106 +383,6 @@ static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
 	return ret;
 }
 
-static int ablk_set_key(struct crypto_ablkcipher *tfm, const u8 *key,
-			unsigned int key_len)
-{
-	struct async_serpent_ctx *ctx = crypto_ablkcipher_ctx(tfm);
-	struct crypto_ablkcipher *child = &ctx->cryptd_tfm->base;
-	int err;
-
-	crypto_ablkcipher_clear_flags(child, CRYPTO_TFM_REQ_MASK);
-	crypto_ablkcipher_set_flags(child, crypto_ablkcipher_get_flags(tfm)
-				    & CRYPTO_TFM_REQ_MASK);
-	err = crypto_ablkcipher_setkey(child, key, key_len);
-	crypto_ablkcipher_set_flags(tfm, crypto_ablkcipher_get_flags(child)
-				    & CRYPTO_TFM_RES_MASK);
-	return err;
-}
-
-static int __ablk_encrypt(struct ablkcipher_request *req)
-{
-	struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req);
-	struct async_serpent_ctx *ctx = crypto_ablkcipher_ctx(tfm);
-	struct blkcipher_desc desc;
-
-	desc.tfm = cryptd_ablkcipher_child(ctx->cryptd_tfm);
-	desc.info = req->info;
-	desc.flags = 0;
-
-	return crypto_blkcipher_crt(desc.tfm)->encrypt(
-		&desc, req->dst, req->src, req->nbytes);
-}
-
-static int ablk_encrypt(struct ablkcipher_request *req)
-{
-	struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req);
-	struct async_serpent_ctx *ctx = crypto_ablkcipher_ctx(tfm);
-
-	if (!irq_fpu_usable()) {
-		struct ablkcipher_request *cryptd_req =
-			ablkcipher_request_ctx(req);
-
-		memcpy(cryptd_req, req, sizeof(*req));
-		ablkcipher_request_set_tfm(cryptd_req, &ctx->cryptd_tfm->base);
-
-		return crypto_ablkcipher_encrypt(cryptd_req);
-	} else {
-		return __ablk_encrypt(req);
-	}
-}
-
-static int ablk_decrypt(struct ablkcipher_request *req)
-{
-	struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req);
-	struct async_serpent_ctx *ctx = crypto_ablkcipher_ctx(tfm);
-
-	if (!irq_fpu_usable()) {
-		struct ablkcipher_request *cryptd_req =
-			ablkcipher_request_ctx(req);
-
-		memcpy(cryptd_req, req, sizeof(*req));
-		ablkcipher_request_set_tfm(cryptd_req, &ctx->cryptd_tfm->base);
-
-		return crypto_ablkcipher_decrypt(cryptd_req);
-	} else {
-		struct blkcipher_desc desc;
-
-		desc.tfm = cryptd_ablkcipher_child(ctx->cryptd_tfm);
-		desc.info = req->info;
-		desc.flags = 0;
-
-		return crypto_blkcipher_crt(desc.tfm)->decrypt(
-			&desc, req->dst, req->src, req->nbytes);
-	}
-}
-
-static void ablk_exit(struct crypto_tfm *tfm)
-{
-	struct async_serpent_ctx *ctx = crypto_tfm_ctx(tfm);
-
-	cryptd_free_ablkcipher(ctx->cryptd_tfm);
-}
-
-static int ablk_init(struct crypto_tfm *tfm)
-{
-	struct async_serpent_ctx *ctx = crypto_tfm_ctx(tfm);
-	struct cryptd_ablkcipher *cryptd_tfm;
-	char drv_name[CRYPTO_MAX_ALG_NAME];
-
-	snprintf(drv_name, sizeof(drv_name), "__driver-%s",
-					crypto_tfm_alg_driver_name(tfm));
-
-	cryptd_tfm = cryptd_alloc_ablkcipher(drv_name, 0, 0);
-	if (IS_ERR(cryptd_tfm))
-		return PTR_ERR(cryptd_tfm);
-
-	ctx->cryptd_tfm = cryptd_tfm;
-	tfm->crt_ablkcipher.reqsize = sizeof(struct ablkcipher_request) +
-		crypto_ablkcipher_reqsize(&cryptd_tfm->base);
-
-	return 0;
-}
-
 static struct crypto_alg serpent_algs[10] = { {
 	.cra_name		= "__ecb-serpent-sse2",
 	.cra_driver_name	= "__driver-ecb-serpent-sse2",
@@ -808,7 +495,7 @@ static struct crypto_alg serpent_algs[10] = { {
 	.cra_priority		= 400,
 	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
 	.cra_blocksize		= SERPENT_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct async_serpent_ctx),
+	.cra_ctxsize		= sizeof(struct async_helper_ctx),
 	.cra_alignmask		= 0,
 	.cra_type		= &crypto_ablkcipher_type,
 	.cra_module		= THIS_MODULE,
@@ -830,7 +517,7 @@ static struct crypto_alg serpent_algs[10] = { {
 	.cra_priority		= 400,
 	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
 	.cra_blocksize		= SERPENT_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct async_serpent_ctx),
+	.cra_ctxsize		= sizeof(struct async_helper_ctx),
 	.cra_alignmask		= 0,
 	.cra_type		= &crypto_ablkcipher_type,
 	.cra_module		= THIS_MODULE,
@@ -853,7 +540,7 @@ static struct crypto_alg serpent_algs[10] = { {
 	.cra_priority		= 400,
 	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
 	.cra_blocksize		= 1,
-	.cra_ctxsize		= sizeof(struct async_serpent_ctx),
+	.cra_ctxsize		= sizeof(struct async_helper_ctx),
 	.cra_alignmask		= 0,
 	.cra_type		= &crypto_ablkcipher_type,
 	.cra_module		= THIS_MODULE,
@@ -877,7 +564,7 @@ static struct crypto_alg serpent_algs[10] = { {
 	.cra_priority		= 400,
 	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
 	.cra_blocksize		= SERPENT_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct async_serpent_ctx),
+	.cra_ctxsize		= sizeof(struct async_helper_ctx),
 	.cra_alignmask		= 0,
 	.cra_type		= &crypto_ablkcipher_type,
 	.cra_module		= THIS_MODULE,
@@ -902,7 +589,7 @@ static struct crypto_alg serpent_algs[10] = { {
 	.cra_priority		= 400,
 	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
 	.cra_blocksize		= SERPENT_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct async_serpent_ctx),
+	.cra_ctxsize		= sizeof(struct async_helper_ctx),
 	.cra_alignmask		= 0,
 	.cra_type		= &crypto_ablkcipher_type,
 	.cra_module		= THIS_MODULE,
diff --git a/arch/x86/crypto/sha1_ssse3_asm.S b/arch/x86/crypto/sha1_ssse3_asm.S
index b2c2f57d70e8447e593fc9de00fbb2ca3e05756f..49d6987a73d9dca7360471f146447615e32d0256 100644
--- a/arch/x86/crypto/sha1_ssse3_asm.S
+++ b/arch/x86/crypto/sha1_ssse3_asm.S
@@ -468,7 +468,7 @@ W_PRECALC_SSSE3
  */
 SHA1_VECTOR_ASM     sha1_transform_ssse3
 
-#ifdef SHA1_ENABLE_AVX_SUPPORT
+#ifdef CONFIG_AS_AVX
 
 .macro W_PRECALC_AVX
 
diff --git a/arch/x86/crypto/sha1_ssse3_glue.c b/arch/x86/crypto/sha1_ssse3_glue.c
index f916499d0abecec3a30f717fd0232e97c621b28a..4a11a9d72451625c747e3111a6e09593f15c40b1 100644
--- a/arch/x86/crypto/sha1_ssse3_glue.c
+++ b/arch/x86/crypto/sha1_ssse3_glue.c
@@ -35,7 +35,7 @@
 
 asmlinkage void sha1_transform_ssse3(u32 *digest, const char *data,
 				     unsigned int rounds);
-#ifdef SHA1_ENABLE_AVX_SUPPORT
+#ifdef CONFIG_AS_AVX
 asmlinkage void sha1_transform_avx(u32 *digest, const char *data,
 				   unsigned int rounds);
 #endif
@@ -184,7 +184,7 @@ static struct shash_alg alg = {
 	}
 };
 
-#ifdef SHA1_ENABLE_AVX_SUPPORT
+#ifdef CONFIG_AS_AVX
 static bool __init avx_usable(void)
 {
 	u64 xcr0;
@@ -209,7 +209,7 @@ static int __init sha1_ssse3_mod_init(void)
 	if (cpu_has_ssse3)
 		sha1_transform_asm = sha1_transform_ssse3;
 
-#ifdef SHA1_ENABLE_AVX_SUPPORT
+#ifdef CONFIG_AS_AVX
 	/* allow AVX to override SSSE3, it's a little faster */
 	if (avx_usable())
 		sha1_transform_asm = sha1_transform_avx;
diff --git a/arch/x86/crypto/twofish-avx-x86_64-asm_64.S b/arch/x86/crypto/twofish-avx-x86_64-asm_64.S
new file mode 100644
index 0000000000000000000000000000000000000000..35f45574390d1d0da07305ac031e88119d2eb9c7
--- /dev/null
+++ b/arch/x86/crypto/twofish-avx-x86_64-asm_64.S
@@ -0,0 +1,300 @@
+/*
+ * Twofish Cipher 8-way parallel algorithm (AVX/x86_64)
+ *
+ * Copyright (C) 2012 Johannes Goetzfried
+ *     <Johannes.Goetzfried@informatik.stud.uni-erlangen.de>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307
+ * USA
+ *
+ */
+
+.file "twofish-avx-x86_64-asm_64.S"
+.text
+
+/* structure of crypto context */
+#define s0	0
+#define s1	1024
+#define s2	2048
+#define s3	3072
+#define w	4096
+#define k	4128
+
+/**********************************************************************
+  8-way AVX twofish
+ **********************************************************************/
+#define CTX %rdi
+
+#define RA1 %xmm0
+#define RB1 %xmm1
+#define RC1 %xmm2
+#define RD1 %xmm3
+
+#define RA2 %xmm4
+#define RB2 %xmm5
+#define RC2 %xmm6
+#define RD2 %xmm7
+
+#define RX %xmm8
+#define RY %xmm9
+
+#define RK1 %xmm10
+#define RK2 %xmm11
+
+#define RID1  %rax
+#define RID1b %al
+#define RID2  %rbx
+#define RID2b %bl
+
+#define RGI1   %rdx
+#define RGI1bl %dl
+#define RGI1bh %dh
+#define RGI2   %rcx
+#define RGI2bl %cl
+#define RGI2bh %ch
+
+#define RGS1  %r8
+#define RGS1d %r8d
+#define RGS2  %r9
+#define RGS2d %r9d
+#define RGS3  %r10
+#define RGS3d %r10d
+
+
+#define lookup_32bit(t0, t1, t2, t3, src, dst) \
+	movb		src ## bl,        RID1b;     \
+	movb		src ## bh,        RID2b;     \
+	movl		t0(CTX, RID1, 4), dst ## d;  \
+	xorl		t1(CTX, RID2, 4), dst ## d;  \
+	shrq $16,	src;                         \
+	movb		src ## bl,        RID1b;     \
+	movb		src ## bh,        RID2b;     \
+	xorl		t2(CTX, RID1, 4), dst ## d;  \
+	xorl		t3(CTX, RID2, 4), dst ## d;
+
+#define G(a, x, t0, t1, t2, t3) \
+	vmovq		a,    RGI1;               \
+	vpsrldq $8,	a,    x;                  \
+	vmovq		x,    RGI2;               \
+	\
+	lookup_32bit(t0, t1, t2, t3, RGI1, RGS1); \
+	shrq $16,	RGI1;                     \
+	lookup_32bit(t0, t1, t2, t3, RGI1, RGS2); \
+	shlq $32,	RGS2;                     \
+	orq		RGS1, RGS2;               \
+	\
+	lookup_32bit(t0, t1, t2, t3, RGI2, RGS1); \
+	shrq $16,	RGI2;                     \
+	lookup_32bit(t0, t1, t2, t3, RGI2, RGS3); \
+	shlq $32,	RGS3;                     \
+	orq		RGS1, RGS3;               \
+	\
+	vmovq		RGS2, x;                  \
+	vpinsrq $1,	RGS3, x, x;
+
+#define encround(a, b, c, d, x, y) \
+	G(a, x, s0, s1, s2, s3);           \
+	G(b, y, s1, s2, s3, s0);           \
+	vpaddd			x, y,   x; \
+	vpaddd			y, x,   y; \
+	vpaddd			x, RK1, x; \
+	vpaddd			y, RK2, y; \
+	vpxor			x, c,   c; \
+	vpsrld $1,		c, x;      \
+	vpslld $(32 - 1),	c, c;      \
+	vpor			c, x,   c; \
+	vpslld $1,		d, x;      \
+	vpsrld $(32 - 1),	d, d;      \
+	vpor			d, x,   d; \
+	vpxor			d, y,   d;
+
+#define decround(a, b, c, d, x, y) \
+	G(a, x, s0, s1, s2, s3);           \
+	G(b, y, s1, s2, s3, s0);           \
+	vpaddd			x, y,   x; \
+	vpaddd			y, x,   y; \
+	vpaddd			y, RK2, y; \
+	vpxor			d, y,   d; \
+	vpsrld $1,		d, y;      \
+	vpslld $(32 - 1),	d, d;      \
+	vpor			d, y,   d; \
+	vpslld $1,		c, y;      \
+	vpsrld $(32 - 1),	c, c;      \
+	vpor			c, y,   c; \
+	vpaddd			x, RK1, x; \
+	vpxor			x, c,   c;
+
+#define encrypt_round(n, a, b, c, d) \
+	vbroadcastss (k+4*(2*(n)))(CTX),   RK1;           \
+	vbroadcastss (k+4*(2*(n)+1))(CTX), RK2;           \
+	encround(a ## 1, b ## 1, c ## 1, d ## 1, RX, RY); \
+	encround(a ## 2, b ## 2, c ## 2, d ## 2, RX, RY);
+
+#define decrypt_round(n, a, b, c, d) \
+	vbroadcastss (k+4*(2*(n)))(CTX),   RK1;           \
+	vbroadcastss (k+4*(2*(n)+1))(CTX), RK2;           \
+	decround(a ## 1, b ## 1, c ## 1, d ## 1, RX, RY); \
+	decround(a ## 2, b ## 2, c ## 2, d ## 2, RX, RY);
+
+#define encrypt_cycle(n) \
+	encrypt_round((2*n), RA, RB, RC, RD);       \
+	encrypt_round(((2*n) + 1), RC, RD, RA, RB);
+
+#define decrypt_cycle(n) \
+	decrypt_round(((2*n) + 1), RC, RD, RA, RB); \
+	decrypt_round((2*n), RA, RB, RC, RD);
+
+
+#define transpose_4x4(x0, x1, x2, x3, t0, t1, t2) \
+	vpunpckldq		x1, x0, t0; \
+	vpunpckhdq		x1, x0, t2; \
+	vpunpckldq		x3, x2, t1; \
+	vpunpckhdq		x3, x2, x3; \
+	\
+	vpunpcklqdq		t1, t0, x0; \
+	vpunpckhqdq		t1, t0, x1; \
+	vpunpcklqdq		x3, t2, x2; \
+	vpunpckhqdq		x3, t2, x3;
+
+#define inpack_blocks(in, x0, x1, x2, x3, wkey, t0, t1, t2) \
+	vpxor (0*4*4)(in),	wkey, x0; \
+	vpxor (1*4*4)(in),	wkey, x1; \
+	vpxor (2*4*4)(in),	wkey, x2; \
+	vpxor (3*4*4)(in),	wkey, x3; \
+	\
+	transpose_4x4(x0, x1, x2, x3, t0, t1, t2)
+
+#define outunpack_blocks(out, x0, x1, x2, x3, wkey, t0, t1, t2) \
+	transpose_4x4(x0, x1, x2, x3, t0, t1, t2) \
+	\
+	vpxor		x0, wkey, x0;     \
+	vmovdqu 	x0, (0*4*4)(out); \
+	vpxor		x1, wkey, x1;     \
+	vmovdqu		x1, (1*4*4)(out); \
+	vpxor		x2, wkey, x2;     \
+	vmovdqu		x2, (2*4*4)(out); \
+	vpxor		x3, wkey, x3;     \
+	vmovdqu		x3, (3*4*4)(out);
+
+#define outunpack_xor_blocks(out, x0, x1, x2, x3, wkey, t0, t1, t2) \
+	transpose_4x4(x0, x1, x2, x3, t0, t1, t2) \
+	\
+	vpxor		x0, wkey, x0;         \
+	vpxor		(0*4*4)(out), x0, x0; \
+	vmovdqu 	x0, (0*4*4)(out);     \
+	vpxor		x1, wkey, x1;         \
+	vpxor		(1*4*4)(out), x1, x1; \
+	vmovdqu	        x1, (1*4*4)(out);     \
+	vpxor		x2, wkey, x2;         \
+	vpxor           (2*4*4)(out), x2, x2; \
+	vmovdqu		x2, (2*4*4)(out);     \
+	vpxor		x3, wkey, x3;         \
+	vpxor           (3*4*4)(out), x3, x3; \
+	vmovdqu		x3, (3*4*4)(out);
+
+.align 8
+.global __twofish_enc_blk_8way
+.type   __twofish_enc_blk_8way,@function;
+
+__twofish_enc_blk_8way:
+	/* input:
+	 *	%rdi: ctx, CTX
+	 *	%rsi: dst
+	 *	%rdx: src
+	 *	%rcx: bool, if true: xor output
+	 */
+
+	pushq %rbx;
+	pushq %rcx;
+
+	vmovdqu w(CTX), RK1;
+
+	leaq (4*4*4)(%rdx), %rax;
+	inpack_blocks(%rdx, RA1, RB1, RC1, RD1, RK1, RX, RY, RK2);
+	inpack_blocks(%rax, RA2, RB2, RC2, RD2, RK1, RX, RY, RK2);
+
+	xorq RID1, RID1;
+	xorq RID2, RID2;
+
+	encrypt_cycle(0);
+	encrypt_cycle(1);
+	encrypt_cycle(2);
+	encrypt_cycle(3);
+	encrypt_cycle(4);
+	encrypt_cycle(5);
+	encrypt_cycle(6);
+	encrypt_cycle(7);
+
+	vmovdqu (w+4*4)(CTX), RK1;
+
+	popq %rcx;
+	popq %rbx;
+
+	leaq (4*4*4)(%rsi), %rax;
+
+	testb %cl, %cl;
+	jnz __enc_xor8;
+
+	outunpack_blocks(%rsi, RC1, RD1, RA1, RB1, RK1, RX, RY, RK2);
+	outunpack_blocks(%rax, RC2, RD2, RA2, RB2, RK1, RX, RY, RK2);
+
+	ret;
+
+__enc_xor8:
+	outunpack_xor_blocks(%rsi, RC1, RD1, RA1, RB1, RK1, RX, RY, RK2);
+	outunpack_xor_blocks(%rax, RC2, RD2, RA2, RB2, RK1, RX, RY, RK2);
+
+	ret;
+
+.align 8
+.global twofish_dec_blk_8way
+.type   twofish_dec_blk_8way,@function;
+
+twofish_dec_blk_8way:
+	/* input:
+	 *	%rdi: ctx, CTX
+	 *	%rsi: dst
+	 *	%rdx: src
+	 */
+
+	pushq %rbx;
+
+	vmovdqu (w+4*4)(CTX), RK1;
+
+	leaq (4*4*4)(%rdx), %rax;
+	inpack_blocks(%rdx, RC1, RD1, RA1, RB1, RK1, RX, RY, RK2);
+	inpack_blocks(%rax, RC2, RD2, RA2, RB2, RK1, RX, RY, RK2);
+
+	xorq RID1, RID1;
+	xorq RID2, RID2;
+
+	decrypt_cycle(7);
+	decrypt_cycle(6);
+	decrypt_cycle(5);
+	decrypt_cycle(4);
+	decrypt_cycle(3);
+	decrypt_cycle(2);
+	decrypt_cycle(1);
+	decrypt_cycle(0);
+
+	vmovdqu (w)(CTX), RK1;
+
+	popq %rbx;
+
+	leaq (4*4*4)(%rsi), %rax;
+	outunpack_blocks(%rsi, RA1, RB1, RC1, RD1, RK1, RX, RY, RK2);
+	outunpack_blocks(%rax, RA2, RB2, RC2, RD2, RK1, RX, RY, RK2);
+
+	ret;
diff --git a/arch/x86/crypto/twofish_avx_glue.c b/arch/x86/crypto/twofish_avx_glue.c
new file mode 100644
index 0000000000000000000000000000000000000000..782b67ddaf6a833724973c9959e42d2e0cb41ec1
--- /dev/null
+++ b/arch/x86/crypto/twofish_avx_glue.c
@@ -0,0 +1,624 @@
+/*
+ * Glue Code for AVX assembler version of Twofish Cipher
+ *
+ * Copyright (C) 2012 Johannes Goetzfried
+ *     <Johannes.Goetzfried@informatik.stud.uni-erlangen.de>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307
+ * USA
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/hardirq.h>
+#include <linux/types.h>
+#include <linux/crypto.h>
+#include <linux/err.h>
+#include <crypto/algapi.h>
+#include <crypto/twofish.h>
+#include <crypto/cryptd.h>
+#include <crypto/b128ops.h>
+#include <crypto/ctr.h>
+#include <crypto/lrw.h>
+#include <crypto/xts.h>
+#include <asm/i387.h>
+#include <asm/xcr.h>
+#include <asm/xsave.h>
+#include <asm/crypto/twofish.h>
+#include <asm/crypto/ablk_helper.h>
+#include <asm/crypto/glue_helper.h>
+#include <crypto/scatterwalk.h>
+#include <linux/workqueue.h>
+#include <linux/spinlock.h>
+
+#define TWOFISH_PARALLEL_BLOCKS 8
+
+static inline void twofish_enc_blk_3way(struct twofish_ctx *ctx, u8 *dst,
+					const u8 *src)
+{
+	__twofish_enc_blk_3way(ctx, dst, src, false);
+}
+
+/* 8-way parallel cipher functions */
+asmlinkage void __twofish_enc_blk_8way(struct twofish_ctx *ctx, u8 *dst,
+				       const u8 *src, bool xor);
+asmlinkage void twofish_dec_blk_8way(struct twofish_ctx *ctx, u8 *dst,
+				     const u8 *src);
+
+static inline void twofish_enc_blk_xway(struct twofish_ctx *ctx, u8 *dst,
+					const u8 *src)
+{
+	__twofish_enc_blk_8way(ctx, dst, src, false);
+}
+
+static inline void twofish_enc_blk_xway_xor(struct twofish_ctx *ctx, u8 *dst,
+					    const u8 *src)
+{
+	__twofish_enc_blk_8way(ctx, dst, src, true);
+}
+
+static inline void twofish_dec_blk_xway(struct twofish_ctx *ctx, u8 *dst,
+					const u8 *src)
+{
+	twofish_dec_blk_8way(ctx, dst, src);
+}
+
+static void twofish_dec_blk_cbc_xway(void *ctx, u128 *dst, const u128 *src)
+{
+	u128 ivs[TWOFISH_PARALLEL_BLOCKS - 1];
+	unsigned int j;
+
+	for (j = 0; j < TWOFISH_PARALLEL_BLOCKS - 1; j++)
+		ivs[j] = src[j];
+
+	twofish_dec_blk_xway(ctx, (u8 *)dst, (u8 *)src);
+
+	for (j = 0; j < TWOFISH_PARALLEL_BLOCKS - 1; j++)
+		u128_xor(dst + (j + 1), dst + (j + 1), ivs + j);
+}
+
+static void twofish_enc_blk_ctr_xway(void *ctx, u128 *dst, const u128 *src,
+				     u128 *iv)
+{
+	be128 ctrblks[TWOFISH_PARALLEL_BLOCKS];
+	unsigned int i;
+
+	for (i = 0; i < TWOFISH_PARALLEL_BLOCKS; i++) {
+		if (dst != src)
+			dst[i] = src[i];
+
+		u128_to_be128(&ctrblks[i], iv);
+		u128_inc(iv);
+	}
+
+	twofish_enc_blk_xway_xor(ctx, (u8 *)dst, (u8 *)ctrblks);
+}
+
+static const struct common_glue_ctx twofish_enc = {
+	.num_funcs = 3,
+	.fpu_blocks_limit = TWOFISH_PARALLEL_BLOCKS,
+
+	.funcs = { {
+		.num_blocks = TWOFISH_PARALLEL_BLOCKS,
+		.fn_u = { .ecb = GLUE_FUNC_CAST(twofish_enc_blk_xway) }
+	}, {
+		.num_blocks = 3,
+		.fn_u = { .ecb = GLUE_FUNC_CAST(twofish_enc_blk_3way) }
+	}, {
+		.num_blocks = 1,
+		.fn_u = { .ecb = GLUE_FUNC_CAST(twofish_enc_blk) }
+	} }
+};
+
+static const struct common_glue_ctx twofish_ctr = {
+	.num_funcs = 3,
+	.fpu_blocks_limit = TWOFISH_PARALLEL_BLOCKS,
+
+	.funcs = { {
+		.num_blocks = TWOFISH_PARALLEL_BLOCKS,
+		.fn_u = { .ctr = GLUE_CTR_FUNC_CAST(twofish_enc_blk_ctr_xway) }
+	}, {
+		.num_blocks = 3,
+		.fn_u = { .ctr = GLUE_CTR_FUNC_CAST(twofish_enc_blk_ctr_3way) }
+	}, {
+		.num_blocks = 1,
+		.fn_u = { .ctr = GLUE_CTR_FUNC_CAST(twofish_enc_blk_ctr) }
+	} }
+};
+
+static const struct common_glue_ctx twofish_dec = {
+	.num_funcs = 3,
+	.fpu_blocks_limit = TWOFISH_PARALLEL_BLOCKS,
+
+	.funcs = { {
+		.num_blocks = TWOFISH_PARALLEL_BLOCKS,
+		.fn_u = { .ecb = GLUE_FUNC_CAST(twofish_dec_blk_xway) }
+	}, {
+		.num_blocks = 3,
+		.fn_u = { .ecb = GLUE_FUNC_CAST(twofish_dec_blk_3way) }
+	}, {
+		.num_blocks = 1,
+		.fn_u = { .ecb = GLUE_FUNC_CAST(twofish_dec_blk) }
+	} }
+};
+
+static const struct common_glue_ctx twofish_dec_cbc = {
+	.num_funcs = 3,
+	.fpu_blocks_limit = TWOFISH_PARALLEL_BLOCKS,
+
+	.funcs = { {
+		.num_blocks = TWOFISH_PARALLEL_BLOCKS,
+		.fn_u = { .cbc = GLUE_CBC_FUNC_CAST(twofish_dec_blk_cbc_xway) }
+	}, {
+		.num_blocks = 3,
+		.fn_u = { .cbc = GLUE_CBC_FUNC_CAST(twofish_dec_blk_cbc_3way) }
+	}, {
+		.num_blocks = 1,
+		.fn_u = { .cbc = GLUE_CBC_FUNC_CAST(twofish_dec_blk) }
+	} }
+};
+
+static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
+		       struct scatterlist *src, unsigned int nbytes)
+{
+	return glue_ecb_crypt_128bit(&twofish_enc, desc, dst, src, nbytes);
+}
+
+static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
+		       struct scatterlist *src, unsigned int nbytes)
+{
+	return glue_ecb_crypt_128bit(&twofish_dec, desc, dst, src, nbytes);
+}
+
+static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
+		       struct scatterlist *src, unsigned int nbytes)
+{
+	return glue_cbc_encrypt_128bit(GLUE_FUNC_CAST(twofish_enc_blk), desc,
+				       dst, src, nbytes);
+}
+
+static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
+		       struct scatterlist *src, unsigned int nbytes)
+{
+	return glue_cbc_decrypt_128bit(&twofish_dec_cbc, desc, dst, src,
+				       nbytes);
+}
+
+static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst,
+		     struct scatterlist *src, unsigned int nbytes)
+{
+	return glue_ctr_crypt_128bit(&twofish_ctr, desc, dst, src, nbytes);
+}
+
+static inline bool twofish_fpu_begin(bool fpu_enabled, unsigned int nbytes)
+{
+	return glue_fpu_begin(TF_BLOCK_SIZE, TWOFISH_PARALLEL_BLOCKS, NULL,
+			      fpu_enabled, nbytes);
+}
+
+static inline void twofish_fpu_end(bool fpu_enabled)
+{
+	glue_fpu_end(fpu_enabled);
+}
+
+struct crypt_priv {
+	struct twofish_ctx *ctx;
+	bool fpu_enabled;
+};
+
+static void encrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes)
+{
+	const unsigned int bsize = TF_BLOCK_SIZE;
+	struct crypt_priv *ctx = priv;
+	int i;
+
+	ctx->fpu_enabled = twofish_fpu_begin(ctx->fpu_enabled, nbytes);
+
+	if (nbytes == bsize * TWOFISH_PARALLEL_BLOCKS) {
+		twofish_enc_blk_xway(ctx->ctx, srcdst, srcdst);
+		return;
+	}
+
+	for (i = 0; i < nbytes / (bsize * 3); i++, srcdst += bsize * 3)
+		twofish_enc_blk_3way(ctx->ctx, srcdst, srcdst);
+
+	nbytes %= bsize * 3;
+
+	for (i = 0; i < nbytes / bsize; i++, srcdst += bsize)
+		twofish_enc_blk(ctx->ctx, srcdst, srcdst);
+}
+
+static void decrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes)
+{
+	const unsigned int bsize = TF_BLOCK_SIZE;
+	struct crypt_priv *ctx = priv;
+	int i;
+
+	ctx->fpu_enabled = twofish_fpu_begin(ctx->fpu_enabled, nbytes);
+
+	if (nbytes == bsize * TWOFISH_PARALLEL_BLOCKS) {
+		twofish_dec_blk_xway(ctx->ctx, srcdst, srcdst);
+		return;
+	}
+
+	for (i = 0; i < nbytes / (bsize * 3); i++, srcdst += bsize * 3)
+		twofish_dec_blk_3way(ctx->ctx, srcdst, srcdst);
+
+	nbytes %= bsize * 3;
+
+	for (i = 0; i < nbytes / bsize; i++, srcdst += bsize)
+		twofish_dec_blk(ctx->ctx, srcdst, srcdst);
+}
+
+static int lrw_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
+		       struct scatterlist *src, unsigned int nbytes)
+{
+	struct twofish_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+	be128 buf[TWOFISH_PARALLEL_BLOCKS];
+	struct crypt_priv crypt_ctx = {
+		.ctx = &ctx->twofish_ctx,
+		.fpu_enabled = false,
+	};
+	struct lrw_crypt_req req = {
+		.tbuf = buf,
+		.tbuflen = sizeof(buf),
+
+		.table_ctx = &ctx->lrw_table,
+		.crypt_ctx = &crypt_ctx,
+		.crypt_fn = encrypt_callback,
+	};
+	int ret;
+
+	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
+	ret = lrw_crypt(desc, dst, src, nbytes, &req);
+	twofish_fpu_end(crypt_ctx.fpu_enabled);
+
+	return ret;
+}
+
+static int lrw_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
+		       struct scatterlist *src, unsigned int nbytes)
+{
+	struct twofish_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+	be128 buf[TWOFISH_PARALLEL_BLOCKS];
+	struct crypt_priv crypt_ctx = {
+		.ctx = &ctx->twofish_ctx,
+		.fpu_enabled = false,
+	};
+	struct lrw_crypt_req req = {
+		.tbuf = buf,
+		.tbuflen = sizeof(buf),
+
+		.table_ctx = &ctx->lrw_table,
+		.crypt_ctx = &crypt_ctx,
+		.crypt_fn = decrypt_callback,
+	};
+	int ret;
+
+	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
+	ret = lrw_crypt(desc, dst, src, nbytes, &req);
+	twofish_fpu_end(crypt_ctx.fpu_enabled);
+
+	return ret;
+}
+
+static int xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
+		       struct scatterlist *src, unsigned int nbytes)
+{
+	struct twofish_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+	be128 buf[TWOFISH_PARALLEL_BLOCKS];
+	struct crypt_priv crypt_ctx = {
+		.ctx = &ctx->crypt_ctx,
+		.fpu_enabled = false,
+	};
+	struct xts_crypt_req req = {
+		.tbuf = buf,
+		.tbuflen = sizeof(buf),
+
+		.tweak_ctx = &ctx->tweak_ctx,
+		.tweak_fn = XTS_TWEAK_CAST(twofish_enc_blk),
+		.crypt_ctx = &crypt_ctx,
+		.crypt_fn = encrypt_callback,
+	};
+	int ret;
+
+	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
+	ret = xts_crypt(desc, dst, src, nbytes, &req);
+	twofish_fpu_end(crypt_ctx.fpu_enabled);
+
+	return ret;
+}
+
+static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
+		       struct scatterlist *src, unsigned int nbytes)
+{
+	struct twofish_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+	be128 buf[TWOFISH_PARALLEL_BLOCKS];
+	struct crypt_priv crypt_ctx = {
+		.ctx = &ctx->crypt_ctx,
+		.fpu_enabled = false,
+	};
+	struct xts_crypt_req req = {
+		.tbuf = buf,
+		.tbuflen = sizeof(buf),
+
+		.tweak_ctx = &ctx->tweak_ctx,
+		.tweak_fn = XTS_TWEAK_CAST(twofish_enc_blk),
+		.crypt_ctx = &crypt_ctx,
+		.crypt_fn = decrypt_callback,
+	};
+	int ret;
+
+	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
+	ret = xts_crypt(desc, dst, src, nbytes, &req);
+	twofish_fpu_end(crypt_ctx.fpu_enabled);
+
+	return ret;
+}
+
+static struct crypto_alg twofish_algs[10] = { {
+	.cra_name		= "__ecb-twofish-avx",
+	.cra_driver_name	= "__driver-ecb-twofish-avx",
+	.cra_priority		= 0,
+	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
+	.cra_blocksize		= TF_BLOCK_SIZE,
+	.cra_ctxsize		= sizeof(struct twofish_ctx),
+	.cra_alignmask		= 0,
+	.cra_type		= &crypto_blkcipher_type,
+	.cra_module		= THIS_MODULE,
+	.cra_list		= LIST_HEAD_INIT(twofish_algs[0].cra_list),
+	.cra_u = {
+		.blkcipher = {
+			.min_keysize	= TF_MIN_KEY_SIZE,
+			.max_keysize	= TF_MAX_KEY_SIZE,
+			.setkey		= twofish_setkey,
+			.encrypt	= ecb_encrypt,
+			.decrypt	= ecb_decrypt,
+		},
+	},
+}, {
+	.cra_name		= "__cbc-twofish-avx",
+	.cra_driver_name	= "__driver-cbc-twofish-avx",
+	.cra_priority		= 0,
+	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
+	.cra_blocksize		= TF_BLOCK_SIZE,
+	.cra_ctxsize		= sizeof(struct twofish_ctx),
+	.cra_alignmask		= 0,
+	.cra_type		= &crypto_blkcipher_type,
+	.cra_module		= THIS_MODULE,
+	.cra_list		= LIST_HEAD_INIT(twofish_algs[1].cra_list),
+	.cra_u = {
+		.blkcipher = {
+			.min_keysize	= TF_MIN_KEY_SIZE,
+			.max_keysize	= TF_MAX_KEY_SIZE,
+			.setkey		= twofish_setkey,
+			.encrypt	= cbc_encrypt,
+			.decrypt	= cbc_decrypt,
+		},
+	},
+}, {
+	.cra_name		= "__ctr-twofish-avx",
+	.cra_driver_name	= "__driver-ctr-twofish-avx",
+	.cra_priority		= 0,
+	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
+	.cra_blocksize		= 1,
+	.cra_ctxsize		= sizeof(struct twofish_ctx),
+	.cra_alignmask		= 0,
+	.cra_type		= &crypto_blkcipher_type,
+	.cra_module		= THIS_MODULE,
+	.cra_list		= LIST_HEAD_INIT(twofish_algs[2].cra_list),
+	.cra_u = {
+		.blkcipher = {
+			.min_keysize	= TF_MIN_KEY_SIZE,
+			.max_keysize	= TF_MAX_KEY_SIZE,
+			.ivsize		= TF_BLOCK_SIZE,
+			.setkey		= twofish_setkey,
+			.encrypt	= ctr_crypt,
+			.decrypt	= ctr_crypt,
+		},
+	},
+}, {
+	.cra_name		= "__lrw-twofish-avx",
+	.cra_driver_name	= "__driver-lrw-twofish-avx",
+	.cra_priority		= 0,
+	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
+	.cra_blocksize		= TF_BLOCK_SIZE,
+	.cra_ctxsize		= sizeof(struct twofish_lrw_ctx),
+	.cra_alignmask		= 0,
+	.cra_type		= &crypto_blkcipher_type,
+	.cra_module		= THIS_MODULE,
+	.cra_list		= LIST_HEAD_INIT(twofish_algs[3].cra_list),
+	.cra_exit		= lrw_twofish_exit_tfm,
+	.cra_u = {
+		.blkcipher = {
+			.min_keysize	= TF_MIN_KEY_SIZE +
+					  TF_BLOCK_SIZE,
+			.max_keysize	= TF_MAX_KEY_SIZE +
+					  TF_BLOCK_SIZE,
+			.ivsize		= TF_BLOCK_SIZE,
+			.setkey		= lrw_twofish_setkey,
+			.encrypt	= lrw_encrypt,
+			.decrypt	= lrw_decrypt,
+		},
+	},
+}, {
+	.cra_name		= "__xts-twofish-avx",
+	.cra_driver_name	= "__driver-xts-twofish-avx",
+	.cra_priority		= 0,
+	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
+	.cra_blocksize		= TF_BLOCK_SIZE,
+	.cra_ctxsize		= sizeof(struct twofish_xts_ctx),
+	.cra_alignmask		= 0,
+	.cra_type		= &crypto_blkcipher_type,
+	.cra_module		= THIS_MODULE,
+	.cra_list		= LIST_HEAD_INIT(twofish_algs[4].cra_list),
+	.cra_u = {
+		.blkcipher = {
+			.min_keysize	= TF_MIN_KEY_SIZE * 2,
+			.max_keysize	= TF_MAX_KEY_SIZE * 2,
+			.ivsize		= TF_BLOCK_SIZE,
+			.setkey		= xts_twofish_setkey,
+			.encrypt	= xts_encrypt,
+			.decrypt	= xts_decrypt,
+		},
+	},
+}, {
+	.cra_name		= "ecb(twofish)",
+	.cra_driver_name	= "ecb-twofish-avx",
+	.cra_priority		= 400,
+	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
+	.cra_blocksize		= TF_BLOCK_SIZE,
+	.cra_ctxsize		= sizeof(struct async_helper_ctx),
+	.cra_alignmask		= 0,
+	.cra_type		= &crypto_ablkcipher_type,
+	.cra_module		= THIS_MODULE,
+	.cra_list		= LIST_HEAD_INIT(twofish_algs[5].cra_list),
+	.cra_init		= ablk_init,
+	.cra_exit		= ablk_exit,
+	.cra_u = {
+		.ablkcipher = {
+			.min_keysize	= TF_MIN_KEY_SIZE,
+			.max_keysize	= TF_MAX_KEY_SIZE,
+			.setkey		= ablk_set_key,
+			.encrypt	= ablk_encrypt,
+			.decrypt	= ablk_decrypt,
+		},
+	},
+}, {
+	.cra_name		= "cbc(twofish)",
+	.cra_driver_name	= "cbc-twofish-avx",
+	.cra_priority		= 400,
+	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
+	.cra_blocksize		= TF_BLOCK_SIZE,
+	.cra_ctxsize		= sizeof(struct async_helper_ctx),
+	.cra_alignmask		= 0,
+	.cra_type		= &crypto_ablkcipher_type,
+	.cra_module		= THIS_MODULE,
+	.cra_list		= LIST_HEAD_INIT(twofish_algs[6].cra_list),
+	.cra_init		= ablk_init,
+	.cra_exit		= ablk_exit,
+	.cra_u = {
+		.ablkcipher = {
+			.min_keysize	= TF_MIN_KEY_SIZE,
+			.max_keysize	= TF_MAX_KEY_SIZE,
+			.ivsize		= TF_BLOCK_SIZE,
+			.setkey		= ablk_set_key,
+			.encrypt	= __ablk_encrypt,
+			.decrypt	= ablk_decrypt,
+		},
+	},
+}, {
+	.cra_name		= "ctr(twofish)",
+	.cra_driver_name	= "ctr-twofish-avx",
+	.cra_priority		= 400,
+	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
+	.cra_blocksize		= 1,
+	.cra_ctxsize		= sizeof(struct async_helper_ctx),
+	.cra_alignmask		= 0,
+	.cra_type		= &crypto_ablkcipher_type,
+	.cra_module		= THIS_MODULE,
+	.cra_list		= LIST_HEAD_INIT(twofish_algs[7].cra_list),
+	.cra_init		= ablk_init,
+	.cra_exit		= ablk_exit,
+	.cra_u = {
+		.ablkcipher = {
+			.min_keysize	= TF_MIN_KEY_SIZE,
+			.max_keysize	= TF_MAX_KEY_SIZE,
+			.ivsize		= TF_BLOCK_SIZE,
+			.setkey		= ablk_set_key,
+			.encrypt	= ablk_encrypt,
+			.decrypt	= ablk_encrypt,
+			.geniv		= "chainiv",
+		},
+	},
+}, {
+	.cra_name		= "lrw(twofish)",
+	.cra_driver_name	= "lrw-twofish-avx",
+	.cra_priority		= 400,
+	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
+	.cra_blocksize		= TF_BLOCK_SIZE,
+	.cra_ctxsize		= sizeof(struct async_helper_ctx),
+	.cra_alignmask		= 0,
+	.cra_type		= &crypto_ablkcipher_type,
+	.cra_module		= THIS_MODULE,
+	.cra_list		= LIST_HEAD_INIT(twofish_algs[8].cra_list),
+	.cra_init		= ablk_init,
+	.cra_exit		= ablk_exit,
+	.cra_u = {
+		.ablkcipher = {
+			.min_keysize	= TF_MIN_KEY_SIZE +
+					  TF_BLOCK_SIZE,
+			.max_keysize	= TF_MAX_KEY_SIZE +
+					  TF_BLOCK_SIZE,
+			.ivsize		= TF_BLOCK_SIZE,
+			.setkey		= ablk_set_key,
+			.encrypt	= ablk_encrypt,
+			.decrypt	= ablk_decrypt,
+		},
+	},
+}, {
+	.cra_name		= "xts(twofish)",
+	.cra_driver_name	= "xts-twofish-avx",
+	.cra_priority		= 400,
+	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
+	.cra_blocksize		= TF_BLOCK_SIZE,
+	.cra_ctxsize		= sizeof(struct async_helper_ctx),
+	.cra_alignmask		= 0,
+	.cra_type		= &crypto_ablkcipher_type,
+	.cra_module		= THIS_MODULE,
+	.cra_list		= LIST_HEAD_INIT(twofish_algs[9].cra_list),
+	.cra_init		= ablk_init,
+	.cra_exit		= ablk_exit,
+	.cra_u = {
+		.ablkcipher = {
+			.min_keysize	= TF_MIN_KEY_SIZE * 2,
+			.max_keysize	= TF_MAX_KEY_SIZE * 2,
+			.ivsize		= TF_BLOCK_SIZE,
+			.setkey		= ablk_set_key,
+			.encrypt	= ablk_encrypt,
+			.decrypt	= ablk_decrypt,
+		},
+	},
+} };
+
+static int __init twofish_init(void)
+{
+	u64 xcr0;
+
+	if (!cpu_has_avx || !cpu_has_osxsave) {
+		printk(KERN_INFO "AVX instructions are not detected.\n");
+		return -ENODEV;
+	}
+
+	xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK);
+	if ((xcr0 & (XSTATE_SSE | XSTATE_YMM)) != (XSTATE_SSE | XSTATE_YMM)) {
+		printk(KERN_INFO "AVX detected but unusable.\n");
+		return -ENODEV;
+	}
+
+	return crypto_register_algs(twofish_algs, ARRAY_SIZE(twofish_algs));
+}
+
+static void __exit twofish_exit(void)
+{
+	crypto_unregister_algs(twofish_algs, ARRAY_SIZE(twofish_algs));
+}
+
+module_init(twofish_init);
+module_exit(twofish_exit);
+
+MODULE_DESCRIPTION("Twofish Cipher Algorithm, AVX optimized");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("twofish");
diff --git a/arch/x86/crypto/twofish_glue_3way.c b/arch/x86/crypto/twofish_glue_3way.c
index 922ab24cce3152c1a2737043d316e015ed794e27..15f9347316c8409a243f7db60b91f128de4618a9 100644
--- a/arch/x86/crypto/twofish_glue_3way.c
+++ b/arch/x86/crypto/twofish_glue_3way.c
@@ -3,11 +3,6 @@
  *
  * Copyright (c) 2011 Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
  *
- * CBC & ECB parts based on code (crypto/cbc.c,ecb.c) by:
- *   Copyright (c) 2006 Herbert Xu <herbert@gondor.apana.org.au>
- * CTR part based on code (crypto/ctr.c) by:
- *   (C) Copyright IBM Corp. 2007 - Joy Latten <latten@us.ibm.com>
- *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
  * the Free Software Foundation; either version 2 of the License, or
@@ -33,20 +28,13 @@
 #include <crypto/algapi.h>
 #include <crypto/twofish.h>
 #include <crypto/b128ops.h>
+#include <asm/crypto/twofish.h>
+#include <asm/crypto/glue_helper.h>
 #include <crypto/lrw.h>
 #include <crypto/xts.h>
 
-/* regular block cipher functions from twofish_x86_64 module */
-asmlinkage void twofish_enc_blk(struct twofish_ctx *ctx, u8 *dst,
-				const u8 *src);
-asmlinkage void twofish_dec_blk(struct twofish_ctx *ctx, u8 *dst,
-				const u8 *src);
-
-/* 3-way parallel cipher functions */
-asmlinkage void __twofish_enc_blk_3way(struct twofish_ctx *ctx, u8 *dst,
-				       const u8 *src, bool xor);
-asmlinkage void twofish_dec_blk_3way(struct twofish_ctx *ctx, u8 *dst,
-				     const u8 *src);
+EXPORT_SYMBOL_GPL(__twofish_enc_blk_3way);
+EXPORT_SYMBOL_GPL(twofish_dec_blk_3way);
 
 static inline void twofish_enc_blk_3way(struct twofish_ctx *ctx, u8 *dst,
 					const u8 *src)
@@ -60,311 +48,139 @@ static inline void twofish_enc_blk_xor_3way(struct twofish_ctx *ctx, u8 *dst,
 	__twofish_enc_blk_3way(ctx, dst, src, true);
 }
 
-static int ecb_crypt(struct blkcipher_desc *desc, struct blkcipher_walk *walk,
-		     void (*fn)(struct twofish_ctx *, u8 *, const u8 *),
-		     void (*fn_3way)(struct twofish_ctx *, u8 *, const u8 *))
-{
-	struct twofish_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
-	unsigned int bsize = TF_BLOCK_SIZE;
-	unsigned int nbytes;
-	int err;
-
-	err = blkcipher_walk_virt(desc, walk);
-
-	while ((nbytes = walk->nbytes)) {
-		u8 *wsrc = walk->src.virt.addr;
-		u8 *wdst = walk->dst.virt.addr;
-
-		/* Process three block batch */
-		if (nbytes >= bsize * 3) {
-			do {
-				fn_3way(ctx, wdst, wsrc);
-
-				wsrc += bsize * 3;
-				wdst += bsize * 3;
-				nbytes -= bsize * 3;
-			} while (nbytes >= bsize * 3);
-
-			if (nbytes < bsize)
-				goto done;
-		}
-
-		/* Handle leftovers */
-		do {
-			fn(ctx, wdst, wsrc);
-
-			wsrc += bsize;
-			wdst += bsize;
-			nbytes -= bsize;
-		} while (nbytes >= bsize);
-
-done:
-		err = blkcipher_walk_done(desc, walk, nbytes);
-	}
-
-	return err;
-}
-
-static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
+void twofish_dec_blk_cbc_3way(void *ctx, u128 *dst, const u128 *src)
 {
-	struct blkcipher_walk walk;
+	u128 ivs[2];
 
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-	return ecb_crypt(desc, &walk, twofish_enc_blk, twofish_enc_blk_3way);
-}
+	ivs[0] = src[0];
+	ivs[1] = src[1];
 
-static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
-{
-	struct blkcipher_walk walk;
+	twofish_dec_blk_3way(ctx, (u8 *)dst, (u8 *)src);
 
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-	return ecb_crypt(desc, &walk, twofish_dec_blk, twofish_dec_blk_3way);
+	u128_xor(&dst[1], &dst[1], &ivs[0]);
+	u128_xor(&dst[2], &dst[2], &ivs[1]);
 }
+EXPORT_SYMBOL_GPL(twofish_dec_blk_cbc_3way);
 
-static unsigned int __cbc_encrypt(struct blkcipher_desc *desc,
-				  struct blkcipher_walk *walk)
-{
-	struct twofish_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
-	unsigned int bsize = TF_BLOCK_SIZE;
-	unsigned int nbytes = walk->nbytes;
-	u128 *src = (u128 *)walk->src.virt.addr;
-	u128 *dst = (u128 *)walk->dst.virt.addr;
-	u128 *iv = (u128 *)walk->iv;
-
-	do {
-		u128_xor(dst, src, iv);
-		twofish_enc_blk(ctx, (u8 *)dst, (u8 *)dst);
-		iv = dst;
-
-		src += 1;
-		dst += 1;
-		nbytes -= bsize;
-	} while (nbytes >= bsize);
-
-	u128_xor((u128 *)walk->iv, (u128 *)walk->iv, iv);
-	return nbytes;
-}
-
-static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
+void twofish_enc_blk_ctr(void *ctx, u128 *dst, const u128 *src, u128 *iv)
 {
-	struct blkcipher_walk walk;
-	int err;
+	be128 ctrblk;
 
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-	err = blkcipher_walk_virt(desc, &walk);
+	if (dst != src)
+		*dst = *src;
 
-	while ((nbytes = walk.nbytes)) {
-		nbytes = __cbc_encrypt(desc, &walk);
-		err = blkcipher_walk_done(desc, &walk, nbytes);
-	}
+	u128_to_be128(&ctrblk, iv);
+	u128_inc(iv);
 
-	return err;
+	twofish_enc_blk(ctx, (u8 *)&ctrblk, (u8 *)&ctrblk);
+	u128_xor(dst, dst, (u128 *)&ctrblk);
 }
+EXPORT_SYMBOL_GPL(twofish_enc_blk_ctr);
 
-static unsigned int __cbc_decrypt(struct blkcipher_desc *desc,
-				  struct blkcipher_walk *walk)
+void twofish_enc_blk_ctr_3way(void *ctx, u128 *dst, const u128 *src,
+				     u128 *iv)
 {
-	struct twofish_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
-	unsigned int bsize = TF_BLOCK_SIZE;
-	unsigned int nbytes = walk->nbytes;
-	u128 *src = (u128 *)walk->src.virt.addr;
-	u128 *dst = (u128 *)walk->dst.virt.addr;
-	u128 ivs[3 - 1];
-	u128 last_iv;
-
-	/* Start of the last block. */
-	src += nbytes / bsize - 1;
-	dst += nbytes / bsize - 1;
-
-	last_iv = *src;
-
-	/* Process three block batch */
-	if (nbytes >= bsize * 3) {
-		do {
-			nbytes -= bsize * (3 - 1);
-			src -= 3 - 1;
-			dst -= 3 - 1;
-
-			ivs[0] = src[0];
-			ivs[1] = src[1];
-
-			twofish_dec_blk_3way(ctx, (u8 *)dst, (u8 *)src);
-
-			u128_xor(dst + 1, dst + 1, ivs + 0);
-			u128_xor(dst + 2, dst + 2, ivs + 1);
-
-			nbytes -= bsize;
-			if (nbytes < bsize)
-				goto done;
-
-			u128_xor(dst, dst, src - 1);
-			src -= 1;
-			dst -= 1;
-		} while (nbytes >= bsize * 3);
-
-		if (nbytes < bsize)
-			goto done;
-	}
-
-	/* Handle leftovers */
-	for (;;) {
-		twofish_dec_blk(ctx, (u8 *)dst, (u8 *)src);
-
-		nbytes -= bsize;
-		if (nbytes < bsize)
-			break;
+	be128 ctrblks[3];
 
-		u128_xor(dst, dst, src - 1);
-		src -= 1;
-		dst -= 1;
+	if (dst != src) {
+		dst[0] = src[0];
+		dst[1] = src[1];
+		dst[2] = src[2];
 	}
 
-done:
-	u128_xor(dst, dst, (u128 *)walk->iv);
-	*(u128 *)walk->iv = last_iv;
+	u128_to_be128(&ctrblks[0], iv);
+	u128_inc(iv);
+	u128_to_be128(&ctrblks[1], iv);
+	u128_inc(iv);
+	u128_to_be128(&ctrblks[2], iv);
+	u128_inc(iv);
 
-	return nbytes;
+	twofish_enc_blk_xor_3way(ctx, (u8 *)dst, (u8 *)ctrblks);
 }
+EXPORT_SYMBOL_GPL(twofish_enc_blk_ctr_3way);
+
+static const struct common_glue_ctx twofish_enc = {
+	.num_funcs = 2,
+	.fpu_blocks_limit = -1,
+
+	.funcs = { {
+		.num_blocks = 3,
+		.fn_u = { .ecb = GLUE_FUNC_CAST(twofish_enc_blk_3way) }
+	}, {
+		.num_blocks = 1,
+		.fn_u = { .ecb = GLUE_FUNC_CAST(twofish_enc_blk) }
+	} }
+};
 
-static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
-{
-	struct blkcipher_walk walk;
-	int err;
-
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-	err = blkcipher_walk_virt(desc, &walk);
+static const struct common_glue_ctx twofish_ctr = {
+	.num_funcs = 2,
+	.fpu_blocks_limit = -1,
+
+	.funcs = { {
+		.num_blocks = 3,
+		.fn_u = { .ecb = GLUE_FUNC_CAST(twofish_enc_blk_ctr_3way) }
+	}, {
+		.num_blocks = 1,
+		.fn_u = { .ecb = GLUE_FUNC_CAST(twofish_enc_blk_ctr) }
+	} }
+};
 
-	while ((nbytes = walk.nbytes)) {
-		nbytes = __cbc_decrypt(desc, &walk);
-		err = blkcipher_walk_done(desc, &walk, nbytes);
-	}
+static const struct common_glue_ctx twofish_dec = {
+	.num_funcs = 2,
+	.fpu_blocks_limit = -1,
+
+	.funcs = { {
+		.num_blocks = 3,
+		.fn_u = { .ecb = GLUE_FUNC_CAST(twofish_dec_blk_3way) }
+	}, {
+		.num_blocks = 1,
+		.fn_u = { .ecb = GLUE_FUNC_CAST(twofish_dec_blk) }
+	} }
+};
 
-	return err;
-}
+static const struct common_glue_ctx twofish_dec_cbc = {
+	.num_funcs = 2,
+	.fpu_blocks_limit = -1,
+
+	.funcs = { {
+		.num_blocks = 3,
+		.fn_u = { .cbc = GLUE_CBC_FUNC_CAST(twofish_dec_blk_cbc_3way) }
+	}, {
+		.num_blocks = 1,
+		.fn_u = { .cbc = GLUE_CBC_FUNC_CAST(twofish_dec_blk) }
+	} }
+};
 
-static inline void u128_to_be128(be128 *dst, const u128 *src)
+static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
+		       struct scatterlist *src, unsigned int nbytes)
 {
-	dst->a = cpu_to_be64(src->a);
-	dst->b = cpu_to_be64(src->b);
+	return glue_ecb_crypt_128bit(&twofish_enc, desc, dst, src, nbytes);
 }
 
-static inline void be128_to_u128(u128 *dst, const be128 *src)
+static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
+		       struct scatterlist *src, unsigned int nbytes)
 {
-	dst->a = be64_to_cpu(src->a);
-	dst->b = be64_to_cpu(src->b);
+	return glue_ecb_crypt_128bit(&twofish_dec, desc, dst, src, nbytes);
 }
 
-static inline void u128_inc(u128 *i)
+static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
+		       struct scatterlist *src, unsigned int nbytes)
 {
-	i->b++;
-	if (!i->b)
-		i->a++;
+	return glue_cbc_encrypt_128bit(GLUE_FUNC_CAST(twofish_enc_blk), desc,
+				       dst, src, nbytes);
 }
 
-static void ctr_crypt_final(struct blkcipher_desc *desc,
-			    struct blkcipher_walk *walk)
+static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
+		       struct scatterlist *src, unsigned int nbytes)
 {
-	struct twofish_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
-	u8 *ctrblk = walk->iv;
-	u8 keystream[TF_BLOCK_SIZE];
-	u8 *src = walk->src.virt.addr;
-	u8 *dst = walk->dst.virt.addr;
-	unsigned int nbytes = walk->nbytes;
-
-	twofish_enc_blk(ctx, keystream, ctrblk);
-	crypto_xor(keystream, src, nbytes);
-	memcpy(dst, keystream, nbytes);
-
-	crypto_inc(ctrblk, TF_BLOCK_SIZE);
-}
-
-static unsigned int __ctr_crypt(struct blkcipher_desc *desc,
-				struct blkcipher_walk *walk)
-{
-	struct twofish_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
-	unsigned int bsize = TF_BLOCK_SIZE;
-	unsigned int nbytes = walk->nbytes;
-	u128 *src = (u128 *)walk->src.virt.addr;
-	u128 *dst = (u128 *)walk->dst.virt.addr;
-	u128 ctrblk;
-	be128 ctrblocks[3];
-
-	be128_to_u128(&ctrblk, (be128 *)walk->iv);
-
-	/* Process three block batch */
-	if (nbytes >= bsize * 3) {
-		do {
-			if (dst != src) {
-				dst[0] = src[0];
-				dst[1] = src[1];
-				dst[2] = src[2];
-			}
-
-			/* create ctrblks for parallel encrypt */
-			u128_to_be128(&ctrblocks[0], &ctrblk);
-			u128_inc(&ctrblk);
-			u128_to_be128(&ctrblocks[1], &ctrblk);
-			u128_inc(&ctrblk);
-			u128_to_be128(&ctrblocks[2], &ctrblk);
-			u128_inc(&ctrblk);
-
-			twofish_enc_blk_xor_3way(ctx, (u8 *)dst,
-						 (u8 *)ctrblocks);
-
-			src += 3;
-			dst += 3;
-			nbytes -= bsize * 3;
-		} while (nbytes >= bsize * 3);
-
-		if (nbytes < bsize)
-			goto done;
-	}
-
-	/* Handle leftovers */
-	do {
-		if (dst != src)
-			*dst = *src;
-
-		u128_to_be128(&ctrblocks[0], &ctrblk);
-		u128_inc(&ctrblk);
-
-		twofish_enc_blk(ctx, (u8 *)ctrblocks, (u8 *)ctrblocks);
-		u128_xor(dst, dst, (u128 *)ctrblocks);
-
-		src += 1;
-		dst += 1;
-		nbytes -= bsize;
-	} while (nbytes >= bsize);
-
-done:
-	u128_to_be128((be128 *)walk->iv, &ctrblk);
-	return nbytes;
+	return glue_cbc_decrypt_128bit(&twofish_dec_cbc, desc, dst, src,
+				       nbytes);
 }
 
 static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst,
 		     struct scatterlist *src, unsigned int nbytes)
 {
-	struct blkcipher_walk walk;
-	int err;
-
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-	err = blkcipher_walk_virt_block(desc, &walk, TF_BLOCK_SIZE);
-
-	while ((nbytes = walk.nbytes) >= TF_BLOCK_SIZE) {
-		nbytes = __ctr_crypt(desc, &walk);
-		err = blkcipher_walk_done(desc, &walk, nbytes);
-	}
-
-	if (walk.nbytes) {
-		ctr_crypt_final(desc, &walk);
-		err = blkcipher_walk_done(desc, &walk, 0);
-	}
-
-	return err;
+	return glue_ctr_crypt_128bit(&twofish_ctr, desc, dst, src, nbytes);
 }
 
 static void encrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes)
@@ -397,13 +213,8 @@ static void decrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes)
 		twofish_dec_blk(ctx, srcdst, srcdst);
 }
 
-struct twofish_lrw_ctx {
-	struct lrw_table_ctx lrw_table;
-	struct twofish_ctx twofish_ctx;
-};
-
-static int lrw_twofish_setkey(struct crypto_tfm *tfm, const u8 *key,
-			      unsigned int keylen)
+int lrw_twofish_setkey(struct crypto_tfm *tfm, const u8 *key,
+		       unsigned int keylen)
 {
 	struct twofish_lrw_ctx *ctx = crypto_tfm_ctx(tfm);
 	int err;
@@ -415,6 +226,7 @@ static int lrw_twofish_setkey(struct crypto_tfm *tfm, const u8 *key,
 
 	return lrw_init_table(&ctx->lrw_table, key + keylen - TF_BLOCK_SIZE);
 }
+EXPORT_SYMBOL_GPL(lrw_twofish_setkey);
 
 static int lrw_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
 		       struct scatterlist *src, unsigned int nbytes)
@@ -450,20 +262,16 @@ static int lrw_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
 	return lrw_crypt(desc, dst, src, nbytes, &req);
 }
 
-static void lrw_exit_tfm(struct crypto_tfm *tfm)
+void lrw_twofish_exit_tfm(struct crypto_tfm *tfm)
 {
 	struct twofish_lrw_ctx *ctx = crypto_tfm_ctx(tfm);
 
 	lrw_free_table(&ctx->lrw_table);
 }
+EXPORT_SYMBOL_GPL(lrw_twofish_exit_tfm);
 
-struct twofish_xts_ctx {
-	struct twofish_ctx tweak_ctx;
-	struct twofish_ctx crypt_ctx;
-};
-
-static int xts_twofish_setkey(struct crypto_tfm *tfm, const u8 *key,
-			      unsigned int keylen)
+int xts_twofish_setkey(struct crypto_tfm *tfm, const u8 *key,
+		       unsigned int keylen)
 {
 	struct twofish_xts_ctx *ctx = crypto_tfm_ctx(tfm);
 	u32 *flags = &tfm->crt_flags;
@@ -486,6 +294,7 @@ static int xts_twofish_setkey(struct crypto_tfm *tfm, const u8 *key,
 	return __twofish_setkey(&ctx->tweak_ctx, key + keylen / 2, keylen / 2,
 				flags);
 }
+EXPORT_SYMBOL_GPL(xts_twofish_setkey);
 
 static int xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
 		       struct scatterlist *src, unsigned int nbytes)
@@ -596,7 +405,7 @@ static struct crypto_alg tf_algs[5] = { {
 	.cra_type		= &crypto_blkcipher_type,
 	.cra_module		= THIS_MODULE,
 	.cra_list		= LIST_HEAD_INIT(tf_algs[3].cra_list),
-	.cra_exit		= lrw_exit_tfm,
+	.cra_exit		= lrw_twofish_exit_tfm,
 	.cra_u = {
 		.blkcipher = {
 			.min_keysize	= TF_MIN_KEY_SIZE + TF_BLOCK_SIZE,
diff --git a/arch/x86/include/asm/crypto/ablk_helper.h b/arch/x86/include/asm/crypto/ablk_helper.h
new file mode 100644
index 0000000000000000000000000000000000000000..4f93df50c23e4401339bc7d9714e6bdaee62fec9
--- /dev/null
+++ b/arch/x86/include/asm/crypto/ablk_helper.h
@@ -0,0 +1,31 @@
+/*
+ * Shared async block cipher helpers
+ */
+
+#ifndef _CRYPTO_ABLK_HELPER_H
+#define _CRYPTO_ABLK_HELPER_H
+
+#include <linux/crypto.h>
+#include <linux/kernel.h>
+#include <crypto/cryptd.h>
+
+struct async_helper_ctx {
+	struct cryptd_ablkcipher *cryptd_tfm;
+};
+
+extern int ablk_set_key(struct crypto_ablkcipher *tfm, const u8 *key,
+			unsigned int key_len);
+
+extern int __ablk_encrypt(struct ablkcipher_request *req);
+
+extern int ablk_encrypt(struct ablkcipher_request *req);
+
+extern int ablk_decrypt(struct ablkcipher_request *req);
+
+extern void ablk_exit(struct crypto_tfm *tfm);
+
+extern int ablk_init_common(struct crypto_tfm *tfm, const char *drv_name);
+
+extern int ablk_init(struct crypto_tfm *tfm);
+
+#endif /* _CRYPTO_ABLK_HELPER_H */
diff --git a/arch/x86/include/asm/aes.h b/arch/x86/include/asm/crypto/aes.h
similarity index 100%
rename from arch/x86/include/asm/aes.h
rename to arch/x86/include/asm/crypto/aes.h
diff --git a/arch/x86/include/asm/crypto/glue_helper.h b/arch/x86/include/asm/crypto/glue_helper.h
new file mode 100644
index 0000000000000000000000000000000000000000..3e408bddc96f1f80ae66c60152c14533fdd12012
--- /dev/null
+++ b/arch/x86/include/asm/crypto/glue_helper.h
@@ -0,0 +1,115 @@
+/*
+ * Shared glue code for 128bit block ciphers
+ */
+
+#ifndef _CRYPTO_GLUE_HELPER_H
+#define _CRYPTO_GLUE_HELPER_H
+
+#include <linux/kernel.h>
+#include <linux/crypto.h>
+#include <asm/i387.h>
+#include <crypto/b128ops.h>
+
+typedef void (*common_glue_func_t)(void *ctx, u8 *dst, const u8 *src);
+typedef void (*common_glue_cbc_func_t)(void *ctx, u128 *dst, const u128 *src);
+typedef void (*common_glue_ctr_func_t)(void *ctx, u128 *dst, const u128 *src,
+				       u128 *iv);
+
+#define GLUE_FUNC_CAST(fn) ((common_glue_func_t)(fn))
+#define GLUE_CBC_FUNC_CAST(fn) ((common_glue_cbc_func_t)(fn))
+#define GLUE_CTR_FUNC_CAST(fn) ((common_glue_ctr_func_t)(fn))
+
+struct common_glue_func_entry {
+	unsigned int num_blocks; /* number of blocks that @fn will process */
+	union {
+		common_glue_func_t ecb;
+		common_glue_cbc_func_t cbc;
+		common_glue_ctr_func_t ctr;
+	} fn_u;
+};
+
+struct common_glue_ctx {
+	unsigned int num_funcs;
+	int fpu_blocks_limit; /* -1 means fpu not needed at all */
+
+	/*
+	 * First funcs entry must have largest num_blocks and last funcs entry
+	 * must have num_blocks == 1!
+	 */
+	struct common_glue_func_entry funcs[];
+};
+
+static inline bool glue_fpu_begin(unsigned int bsize, int fpu_blocks_limit,
+				  struct blkcipher_desc *desc,
+				  bool fpu_enabled, unsigned int nbytes)
+{
+	if (likely(fpu_blocks_limit < 0))
+		return false;
+
+	if (fpu_enabled)
+		return true;
+
+	/*
+	 * Vector-registers are only used when chunk to be processed is large
+	 * enough, so do not enable FPU until it is necessary.
+	 */
+	if (nbytes < bsize * (unsigned int)fpu_blocks_limit)
+		return false;
+
+	if (desc) {
+		/* prevent sleeping if FPU is in use */
+		desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
+	}
+
+	kernel_fpu_begin();
+	return true;
+}
+
+static inline void glue_fpu_end(bool fpu_enabled)
+{
+	if (fpu_enabled)
+		kernel_fpu_end();
+}
+
+static inline void u128_to_be128(be128 *dst, const u128 *src)
+{
+	dst->a = cpu_to_be64(src->a);
+	dst->b = cpu_to_be64(src->b);
+}
+
+static inline void be128_to_u128(u128 *dst, const be128 *src)
+{
+	dst->a = be64_to_cpu(src->a);
+	dst->b = be64_to_cpu(src->b);
+}
+
+static inline void u128_inc(u128 *i)
+{
+	i->b++;
+	if (!i->b)
+		i->a++;
+}
+
+extern int glue_ecb_crypt_128bit(const struct common_glue_ctx *gctx,
+				 struct blkcipher_desc *desc,
+				 struct scatterlist *dst,
+				 struct scatterlist *src, unsigned int nbytes);
+
+extern int glue_cbc_encrypt_128bit(const common_glue_func_t fn,
+				   struct blkcipher_desc *desc,
+				   struct scatterlist *dst,
+				   struct scatterlist *src,
+				   unsigned int nbytes);
+
+extern int glue_cbc_decrypt_128bit(const struct common_glue_ctx *gctx,
+				   struct blkcipher_desc *desc,
+				   struct scatterlist *dst,
+				   struct scatterlist *src,
+				   unsigned int nbytes);
+
+extern int glue_ctr_crypt_128bit(const struct common_glue_ctx *gctx,
+				 struct blkcipher_desc *desc,
+				 struct scatterlist *dst,
+				 struct scatterlist *src, unsigned int nbytes);
+
+#endif /* _CRYPTO_GLUE_HELPER_H */
diff --git a/arch/x86/include/asm/crypto/serpent-avx.h b/arch/x86/include/asm/crypto/serpent-avx.h
new file mode 100644
index 0000000000000000000000000000000000000000..432deedd294570bd1b0807491fca242027d70ab7
--- /dev/null
+++ b/arch/x86/include/asm/crypto/serpent-avx.h
@@ -0,0 +1,32 @@
+#ifndef ASM_X86_SERPENT_AVX_H
+#define ASM_X86_SERPENT_AVX_H
+
+#include <linux/crypto.h>
+#include <crypto/serpent.h>
+
+#define SERPENT_PARALLEL_BLOCKS 8
+
+asmlinkage void __serpent_enc_blk_8way_avx(struct serpent_ctx *ctx, u8 *dst,
+					   const u8 *src, bool xor);
+asmlinkage void serpent_dec_blk_8way_avx(struct serpent_ctx *ctx, u8 *dst,
+					 const u8 *src);
+
+static inline void serpent_enc_blk_xway(struct serpent_ctx *ctx, u8 *dst,
+				   const u8 *src)
+{
+	__serpent_enc_blk_8way_avx(ctx, dst, src, false);
+}
+
+static inline void serpent_enc_blk_xway_xor(struct serpent_ctx *ctx, u8 *dst,
+				       const u8 *src)
+{
+	__serpent_enc_blk_8way_avx(ctx, dst, src, true);
+}
+
+static inline void serpent_dec_blk_xway(struct serpent_ctx *ctx, u8 *dst,
+				   const u8 *src)
+{
+	serpent_dec_blk_8way_avx(ctx, dst, src);
+}
+
+#endif
diff --git a/arch/x86/include/asm/serpent.h b/arch/x86/include/asm/crypto/serpent-sse2.h
similarity index 95%
rename from arch/x86/include/asm/serpent.h
rename to arch/x86/include/asm/crypto/serpent-sse2.h
index d3ef63fe0c8193b9a50204d3d2106b4e24801bc2..e6e77dffbdabf10e830c6c7be5a70cdc016c862a 100644
--- a/arch/x86/include/asm/serpent.h
+++ b/arch/x86/include/asm/crypto/serpent-sse2.h
@@ -1,5 +1,5 @@
-#ifndef ASM_X86_SERPENT_H
-#define ASM_X86_SERPENT_H
+#ifndef ASM_X86_SERPENT_SSE2_H
+#define ASM_X86_SERPENT_SSE2_H
 
 #include <linux/crypto.h>
 #include <crypto/serpent.h>
diff --git a/arch/x86/include/asm/crypto/twofish.h b/arch/x86/include/asm/crypto/twofish.h
new file mode 100644
index 0000000000000000000000000000000000000000..9d2c514bd5f90020b3d22036a73ab816735b3416
--- /dev/null
+++ b/arch/x86/include/asm/crypto/twofish.h
@@ -0,0 +1,46 @@
+#ifndef ASM_X86_TWOFISH_H
+#define ASM_X86_TWOFISH_H
+
+#include <linux/crypto.h>
+#include <crypto/twofish.h>
+#include <crypto/lrw.h>
+#include <crypto/b128ops.h>
+
+struct twofish_lrw_ctx {
+	struct lrw_table_ctx lrw_table;
+	struct twofish_ctx twofish_ctx;
+};
+
+struct twofish_xts_ctx {
+	struct twofish_ctx tweak_ctx;
+	struct twofish_ctx crypt_ctx;
+};
+
+/* regular block cipher functions from twofish_x86_64 module */
+asmlinkage void twofish_enc_blk(struct twofish_ctx *ctx, u8 *dst,
+				const u8 *src);
+asmlinkage void twofish_dec_blk(struct twofish_ctx *ctx, u8 *dst,
+				const u8 *src);
+
+/* 3-way parallel cipher functions */
+asmlinkage void __twofish_enc_blk_3way(struct twofish_ctx *ctx, u8 *dst,
+				       const u8 *src, bool xor);
+asmlinkage void twofish_dec_blk_3way(struct twofish_ctx *ctx, u8 *dst,
+				     const u8 *src);
+
+/* helpers from twofish_x86_64-3way module */
+extern void twofish_dec_blk_cbc_3way(void *ctx, u128 *dst, const u128 *src);
+extern void twofish_enc_blk_ctr(void *ctx, u128 *dst, const u128 *src,
+				u128 *iv);
+extern void twofish_enc_blk_ctr_3way(void *ctx, u128 *dst, const u128 *src,
+				     u128 *iv);
+
+extern int lrw_twofish_setkey(struct crypto_tfm *tfm, const u8 *key,
+			      unsigned int keylen);
+
+extern void lrw_twofish_exit_tfm(struct crypto_tfm *tfm);
+
+extern int xts_twofish_setkey(struct crypto_tfm *tfm, const u8 *key,
+			      unsigned int keylen);
+
+#endif /* ASM_X86_TWOFISH_H */
diff --git a/crypto/Kconfig b/crypto/Kconfig
index 8e84225c096b6adfafcde59d08e066e5751af9c2..a3238051b03e73b2ab3f79e267b1e47a8716dc74 100644
--- a/crypto/Kconfig
+++ b/crypto/Kconfig
@@ -174,6 +174,16 @@ config CRYPTO_TEST
 	help
 	  Quick & dirty crypto test module.
 
+config CRYPTO_ABLK_HELPER_X86
+	tristate
+	depends on X86
+	select CRYPTO_CRYPTD
+
+config CRYPTO_GLUE_HELPER_X86
+	tristate
+	depends on X86
+	select CRYPTO_ALGAPI
+
 comment "Authenticated Encryption with Associated Data"
 
 config CRYPTO_CCM
@@ -552,6 +562,7 @@ config CRYPTO_AES_NI_INTEL
 	select CRYPTO_AES_X86_64 if 64BIT
 	select CRYPTO_AES_586 if !64BIT
 	select CRYPTO_CRYPTD
+	select CRYPTO_ABLK_HELPER_X86
 	select CRYPTO_ALGAPI
 	help
 	  Use Intel AES-NI instructions for AES algorithm.
@@ -593,7 +604,7 @@ config CRYPTO_ANUBIS
 
 config CRYPTO_ARC4
 	tristate "ARC4 cipher algorithm"
-	select CRYPTO_ALGAPI
+	select CRYPTO_BLKCIPHER
 	help
 	  ARC4 cipher algorithm.
 
@@ -660,6 +671,7 @@ config CRYPTO_CAMELLIA_X86_64
 	depends on X86 && 64BIT
 	depends on CRYPTO
 	select CRYPTO_ALGAPI
+	select CRYPTO_GLUE_HELPER_X86
 	select CRYPTO_LRW
 	select CRYPTO_XTS
 	help
@@ -786,6 +798,8 @@ config CRYPTO_SERPENT_SSE2_X86_64
 	depends on X86 && 64BIT
 	select CRYPTO_ALGAPI
 	select CRYPTO_CRYPTD
+	select CRYPTO_ABLK_HELPER_X86
+	select CRYPTO_GLUE_HELPER_X86
 	select CRYPTO_SERPENT
 	select CRYPTO_LRW
 	select CRYPTO_XTS
@@ -806,6 +820,8 @@ config CRYPTO_SERPENT_SSE2_586
 	depends on X86 && !64BIT
 	select CRYPTO_ALGAPI
 	select CRYPTO_CRYPTD
+	select CRYPTO_ABLK_HELPER_X86
+	select CRYPTO_GLUE_HELPER_X86
 	select CRYPTO_SERPENT
 	select CRYPTO_LRW
 	select CRYPTO_XTS
@@ -821,6 +837,28 @@ config CRYPTO_SERPENT_SSE2_586
 	  See also:
 	  <http://www.cl.cam.ac.uk/~rja14/serpent.html>
 
+config CRYPTO_SERPENT_AVX_X86_64
+	tristate "Serpent cipher algorithm (x86_64/AVX)"
+	depends on X86 && 64BIT
+	select CRYPTO_ALGAPI
+	select CRYPTO_CRYPTD
+	select CRYPTO_ABLK_HELPER_X86
+	select CRYPTO_GLUE_HELPER_X86
+	select CRYPTO_SERPENT
+	select CRYPTO_LRW
+	select CRYPTO_XTS
+	help
+	  Serpent cipher algorithm, by Anderson, Biham & Knudsen.
+
+	  Keys are allowed to be from 0 to 256 bits in length, in steps
+	  of 8 bits.
+
+	  This module provides the Serpent cipher algorithm that processes
+	  eight blocks parallel using the AVX instruction set.
+
+	  See also:
+	  <http://www.cl.cam.ac.uk/~rja14/serpent.html>
+
 config CRYPTO_TEA
 	tristate "TEA, XTEA and XETA cipher algorithms"
 	select CRYPTO_ALGAPI
@@ -897,6 +935,7 @@ config CRYPTO_TWOFISH_X86_64_3WAY
 	select CRYPTO_ALGAPI
 	select CRYPTO_TWOFISH_COMMON
 	select CRYPTO_TWOFISH_X86_64
+	select CRYPTO_GLUE_HELPER_X86
 	select CRYPTO_LRW
 	select CRYPTO_XTS
 	help
@@ -913,6 +952,32 @@ config CRYPTO_TWOFISH_X86_64_3WAY
 	  See also:
 	  <http://www.schneier.com/twofish.html>
 
+config CRYPTO_TWOFISH_AVX_X86_64
+	tristate "Twofish cipher algorithm (x86_64/AVX)"
+	depends on X86 && 64BIT
+	select CRYPTO_ALGAPI
+	select CRYPTO_CRYPTD
+	select CRYPTO_ABLK_HELPER_X86
+	select CRYPTO_GLUE_HELPER_X86
+	select CRYPTO_TWOFISH_COMMON
+	select CRYPTO_TWOFISH_X86_64
+	select CRYPTO_TWOFISH_X86_64_3WAY
+	select CRYPTO_LRW
+	select CRYPTO_XTS
+	help
+	  Twofish cipher algorithm (x86_64/AVX).
+
+	  Twofish was submitted as an AES (Advanced Encryption Standard)
+	  candidate cipher by researchers at CounterPane Systems.  It is a
+	  16 round block cipher supporting key sizes of 128, 192, and 256
+	  bits.
+
+	  This module provides the Twofish cipher algorithm that processes
+	  eight blocks parallel using the AVX Instruction Set.
+
+	  See also:
+	  <http://www.schneier.com/twofish.html>
+
 comment "Compression"
 
 config CRYPTO_DEFLATE
diff --git a/crypto/algapi.c b/crypto/algapi.c
index 056571b85445a20bf80f0054c0b2365a666edfde..c3b9bfeeb7ffcf7ab04d11a2c9a1e053da6c972e 100644
--- a/crypto/algapi.c
+++ b/crypto/algapi.c
@@ -24,22 +24,6 @@
 
 static LIST_HEAD(crypto_template_list);
 
-void crypto_larval_error(const char *name, u32 type, u32 mask)
-{
-	struct crypto_alg *alg;
-
-	alg = crypto_alg_lookup(name, type, mask);
-
-	if (alg) {
-		if (crypto_is_larval(alg)) {
-			struct crypto_larval *larval = (void *)alg;
-			complete_all(&larval->completion);
-		}
-		crypto_mod_put(alg);
-	}
-}
-EXPORT_SYMBOL_GPL(crypto_larval_error);
-
 static inline int crypto_set_driver_name(struct crypto_alg *alg)
 {
 	static const char suffix[] = "-generic";
@@ -295,7 +279,6 @@ void crypto_alg_tested(const char *name, int err)
 				continue;
 
 			larval->adult = alg;
-			complete_all(&larval->completion);
 			continue;
 		}
 
diff --git a/crypto/algboss.c b/crypto/algboss.c
index 791d194958fa18ac0fb8006e5d565f789c1e4f92..769219b293098d43bd8406e296ff8b388869e8c1 100644
--- a/crypto/algboss.c
+++ b/crypto/algboss.c
@@ -11,6 +11,7 @@
  */
 
 #include <crypto/internal/aead.h>
+#include <linux/completion.h>
 #include <linux/ctype.h>
 #include <linux/err.h>
 #include <linux/init.h>
@@ -47,6 +48,8 @@ struct cryptomgr_param {
 	char larval[CRYPTO_MAX_ALG_NAME];
 	char template[CRYPTO_MAX_ALG_NAME];
 
+	struct completion *completion;
+
 	u32 otype;
 	u32 omask;
 };
@@ -66,7 +69,7 @@ static int cryptomgr_probe(void *data)
 
 	tmpl = crypto_lookup_template(param->template);
 	if (!tmpl)
-		goto err;
+		goto out;
 
 	do {
 		if (tmpl->create) {
@@ -83,16 +86,10 @@ static int cryptomgr_probe(void *data)
 
 	crypto_tmpl_put(tmpl);
 
-	if (err)
-		goto err;
-
 out:
+	complete_all(param->completion);
 	kfree(param);
 	module_put_and_exit(0);
-
-err:
-	crypto_larval_error(param->larval, param->otype, param->omask);
-	goto out;
 }
 
 static int cryptomgr_schedule_probe(struct crypto_larval *larval)
@@ -192,10 +189,14 @@ static int cryptomgr_schedule_probe(struct crypto_larval *larval)
 
 	memcpy(param->larval, larval->alg.cra_name, CRYPTO_MAX_ALG_NAME);
 
+	param->completion = &larval->completion;
+
 	thread = kthread_run(cryptomgr_probe, param, "cryptomgr_probe");
 	if (IS_ERR(thread))
 		goto err_free_param;
 
+	wait_for_completion_interruptible(&larval->completion);
+
 	return NOTIFY_STOP;
 
 err_free_param:
diff --git a/crypto/arc4.c b/crypto/arc4.c
index 0d12a96da1d8629fc1b226f8f7d181a56332168b..5a772c3657d58d55c5bb453dbd6cc73a2e2a7bea 100644
--- a/crypto/arc4.c
+++ b/crypto/arc4.c
@@ -11,17 +11,19 @@
  * (at your option) any later version.
  *
  */
+
 #include <linux/module.h>
 #include <linux/init.h>
 #include <linux/crypto.h>
+#include <crypto/algapi.h>
 
 #define ARC4_MIN_KEY_SIZE	1
 #define ARC4_MAX_KEY_SIZE	256
 #define ARC4_BLOCK_SIZE		1
 
 struct arc4_ctx {
-	u8 S[256];
-	u8 x, y;
+	u32 S[256];
+	u32 x, y;
 };
 
 static int arc4_set_key(struct crypto_tfm *tfm, const u8 *in_key,
@@ -37,7 +39,7 @@ static int arc4_set_key(struct crypto_tfm *tfm, const u8 *in_key,
 		ctx->S[i] = i;
 
 	for (i = 0; i < 256; i++) {
-		u8 a = ctx->S[i];
+		u32 a = ctx->S[i];
 		j = (j + in_key[k] + a) & 0xff;
 		ctx->S[i] = ctx->S[j];
 		ctx->S[j] = a;
@@ -48,51 +50,114 @@ static int arc4_set_key(struct crypto_tfm *tfm, const u8 *in_key,
 	return 0;
 }
 
-static void arc4_crypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
+static void arc4_crypt(struct arc4_ctx *ctx, u8 *out, const u8 *in,
+		       unsigned int len)
 {
-	struct arc4_ctx *ctx = crypto_tfm_ctx(tfm);
+	u32 *const S = ctx->S;
+	u32 x, y, a, b;
+	u32 ty, ta, tb;
+
+	if (len == 0)
+		return;
 
-	u8 *const S = ctx->S;
-	u8 x = ctx->x;
-	u8 y = ctx->y;
-	u8 a, b;
+	x = ctx->x;
+	y = ctx->y;
 
 	a = S[x];
 	y = (y + a) & 0xff;
 	b = S[y];
-	S[x] = b;
-	S[y] = a;
-	x = (x + 1) & 0xff;
-	*out++ = *in ^ S[(a + b) & 0xff];
+
+	do {
+		S[y] = a;
+		a = (a + b) & 0xff;
+		S[x] = b;
+		x = (x + 1) & 0xff;
+		ta = S[x];
+		ty = (y + ta) & 0xff;
+		tb = S[ty];
+		*out++ = *in++ ^ S[a];
+		if (--len == 0)
+			break;
+		y = ty;
+		a = ta;
+		b = tb;
+	} while (true);
 
 	ctx->x = x;
 	ctx->y = y;
 }
 
-static struct crypto_alg arc4_alg = {
+static void arc4_crypt_one(struct crypto_tfm *tfm, u8 *out, const u8 *in)
+{
+	arc4_crypt(crypto_tfm_ctx(tfm), out, in, 1);
+}
+
+static int ecb_arc4_crypt(struct blkcipher_desc *desc, struct scatterlist *dst,
+			  struct scatterlist *src, unsigned int nbytes)
+{
+	struct arc4_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+	struct blkcipher_walk walk;
+	int err;
+
+	blkcipher_walk_init(&walk, dst, src, nbytes);
+
+	err = blkcipher_walk_virt(desc, &walk);
+
+	while (walk.nbytes > 0) {
+		u8 *wsrc = walk.src.virt.addr;
+		u8 *wdst = walk.dst.virt.addr;
+
+		arc4_crypt(ctx, wdst, wsrc, walk.nbytes);
+
+		err = blkcipher_walk_done(desc, &walk, 0);
+	}
+
+	return err;
+}
+
+static struct crypto_alg arc4_algs[2] = { {
 	.cra_name		=	"arc4",
 	.cra_flags		=	CRYPTO_ALG_TYPE_CIPHER,
 	.cra_blocksize		=	ARC4_BLOCK_SIZE,
 	.cra_ctxsize		=	sizeof(struct arc4_ctx),
 	.cra_module		=	THIS_MODULE,
-	.cra_list		=	LIST_HEAD_INIT(arc4_alg.cra_list),
-	.cra_u			=	{ .cipher = {
-	.cia_min_keysize	=	ARC4_MIN_KEY_SIZE,
-	.cia_max_keysize	=	ARC4_MAX_KEY_SIZE,
-	.cia_setkey		=	arc4_set_key,
-	.cia_encrypt		=	arc4_crypt,
-	.cia_decrypt		=	arc4_crypt } }
-};
+	.cra_u			=	{
+		.cipher = {
+			.cia_min_keysize	=	ARC4_MIN_KEY_SIZE,
+			.cia_max_keysize	=	ARC4_MAX_KEY_SIZE,
+			.cia_setkey		=	arc4_set_key,
+			.cia_encrypt		=	arc4_crypt_one,
+			.cia_decrypt		=	arc4_crypt_one,
+		},
+	},
+}, {
+	.cra_name		=	"ecb(arc4)",
+	.cra_priority		=	100,
+	.cra_flags		=	CRYPTO_ALG_TYPE_BLKCIPHER,
+	.cra_blocksize		=	ARC4_BLOCK_SIZE,
+	.cra_ctxsize		=	sizeof(struct arc4_ctx),
+	.cra_alignmask		=	0,
+	.cra_type		=	&crypto_blkcipher_type,
+	.cra_module		=	THIS_MODULE,
+	.cra_u			=	{
+		.blkcipher = {
+			.min_keysize	=	ARC4_MIN_KEY_SIZE,
+			.max_keysize	=	ARC4_MAX_KEY_SIZE,
+			.setkey		=	arc4_set_key,
+			.encrypt	=	ecb_arc4_crypt,
+			.decrypt	=	ecb_arc4_crypt,
+		},
+	},
+} };
 
 static int __init arc4_init(void)
 {
-	return crypto_register_alg(&arc4_alg);
+	return crypto_register_algs(arc4_algs, ARRAY_SIZE(arc4_algs));
 }
 
-
 static void __exit arc4_exit(void)
 {
-	crypto_unregister_alg(&arc4_alg);
+	crypto_unregister_algs(arc4_algs, ARRAY_SIZE(arc4_algs));
 }
 
 module_init(arc4_init);
diff --git a/crypto/internal.h b/crypto/internal.h
index b865ca1a8613b33baa9b64ffa6c32b89039f3bf8..9ebedae3fb54abc26a97918b3fe7aaabb2b56b82 100644
--- a/crypto/internal.h
+++ b/crypto/internal.h
@@ -83,7 +83,6 @@ void crypto_exit_compress_ops(struct crypto_tfm *tfm);
 struct crypto_larval *crypto_larval_alloc(const char *name, u32 type, u32 mask);
 void crypto_larval_kill(struct crypto_alg *alg);
 struct crypto_alg *crypto_larval_lookup(const char *name, u32 type, u32 mask);
-void crypto_larval_error(const char *name, u32 type, u32 mask);
 void crypto_alg_tested(const char *name, int err);
 
 void crypto_remove_spawns(struct crypto_alg *alg, struct list_head *list,
diff --git a/crypto/tcrypt.c b/crypto/tcrypt.c
index 8f147bff09800d494b9336c381d871904255a815..5cf2ccb1540cb1731e68e3b9575ce00abe3554e0 100644
--- a/crypto/tcrypt.c
+++ b/crypto/tcrypt.c
@@ -809,7 +809,7 @@ static void test_acipher_speed(const char *algo, int enc, unsigned int sec,
 			       struct cipher_speed_template *template,
 			       unsigned int tcount, u8 *keysize)
 {
-	unsigned int ret, i, j, iv_len;
+	unsigned int ret, i, j, k, iv_len;
 	struct tcrypt_result tresult;
 	const char *key;
 	char iv[128];
@@ -883,11 +883,23 @@ static void test_acipher_speed(const char *algo, int enc, unsigned int sec,
 			}
 
 			sg_init_table(sg, TVMEMSIZE);
-			sg_set_buf(sg, tvmem[0] + *keysize,
+
+			k = *keysize + *b_size;
+			if (k > PAGE_SIZE) {
+				sg_set_buf(sg, tvmem[0] + *keysize,
 				   PAGE_SIZE - *keysize);
-			for (j = 1; j < TVMEMSIZE; j++) {
-				sg_set_buf(sg + j, tvmem[j], PAGE_SIZE);
-				memset(tvmem[j], 0xff, PAGE_SIZE);
+				k -= PAGE_SIZE;
+				j = 1;
+				while (k > PAGE_SIZE) {
+					sg_set_buf(sg + j, tvmem[j], PAGE_SIZE);
+					memset(tvmem[j], 0xff, PAGE_SIZE);
+					j++;
+					k -= PAGE_SIZE;
+				}
+				sg_set_buf(sg + j, tvmem[j], k);
+				memset(tvmem[j], 0xff, k);
+			} else {
+				sg_set_buf(sg, tvmem[0] + *keysize, *b_size);
 			}
 
 			iv_len = crypto_ablkcipher_ivsize(tfm);
@@ -1192,6 +1204,9 @@ static int do_test(int m)
 	case 109:
 		ret += tcrypt_test("vmac(aes)");
 		break;
+	case 110:
+		ret += tcrypt_test("hmac(crc32)");
+		break;
 
 	case 150:
 		ret += tcrypt_test("ansi_cprng");
@@ -1339,6 +1354,11 @@ static int do_test(int m)
 				  speed_template_32_64);
 		break;
 
+	case 208:
+		test_cipher_speed("ecb(arc4)", ENCRYPT, sec, NULL, 0,
+				  speed_template_8);
+		break;
+
 	case 300:
 		/* fall through */
 
@@ -1512,6 +1532,14 @@ static int do_test(int m)
 				   speed_template_16_24_32);
 		test_acipher_speed("ctr(aes)", DECRYPT, sec, NULL, 0,
 				   speed_template_16_24_32);
+		test_acipher_speed("cfb(aes)", ENCRYPT, sec, NULL, 0,
+				   speed_template_16_24_32);
+		test_acipher_speed("cfb(aes)", DECRYPT, sec, NULL, 0,
+				   speed_template_16_24_32);
+		test_acipher_speed("ofb(aes)", ENCRYPT, sec, NULL, 0,
+				   speed_template_16_24_32);
+		test_acipher_speed("ofb(aes)", DECRYPT, sec, NULL, 0,
+				   speed_template_16_24_32);
 		break;
 
 	case 501:
@@ -1527,6 +1555,18 @@ static int do_test(int m)
 		test_acipher_speed("cbc(des3_ede)", DECRYPT, sec,
 				   des3_speed_template, DES3_SPEED_VECTORS,
 				   speed_template_24);
+		test_acipher_speed("cfb(des3_ede)", ENCRYPT, sec,
+				   des3_speed_template, DES3_SPEED_VECTORS,
+				   speed_template_24);
+		test_acipher_speed("cfb(des3_ede)", DECRYPT, sec,
+				   des3_speed_template, DES3_SPEED_VECTORS,
+				   speed_template_24);
+		test_acipher_speed("ofb(des3_ede)", ENCRYPT, sec,
+				   des3_speed_template, DES3_SPEED_VECTORS,
+				   speed_template_24);
+		test_acipher_speed("ofb(des3_ede)", DECRYPT, sec,
+				   des3_speed_template, DES3_SPEED_VECTORS,
+				   speed_template_24);
 		break;
 
 	case 502:
@@ -1538,6 +1578,14 @@ static int do_test(int m)
 				   speed_template_8);
 		test_acipher_speed("cbc(des)", DECRYPT, sec, NULL, 0,
 				   speed_template_8);
+		test_acipher_speed("cfb(des)", ENCRYPT, sec, NULL, 0,
+				   speed_template_8);
+		test_acipher_speed("cfb(des)", DECRYPT, sec, NULL, 0,
+				   speed_template_8);
+		test_acipher_speed("ofb(des)", ENCRYPT, sec, NULL, 0,
+				   speed_template_8);
+		test_acipher_speed("ofb(des)", DECRYPT, sec, NULL, 0,
+				   speed_template_8);
 		break;
 
 	case 503:
@@ -1563,6 +1611,34 @@ static int do_test(int m)
 				   speed_template_32_64);
 		break;
 
+	case 504:
+		test_acipher_speed("ecb(twofish)", ENCRYPT, sec, NULL, 0,
+				   speed_template_16_24_32);
+		test_acipher_speed("ecb(twofish)", DECRYPT, sec, NULL, 0,
+				   speed_template_16_24_32);
+		test_acipher_speed("cbc(twofish)", ENCRYPT, sec, NULL, 0,
+				   speed_template_16_24_32);
+		test_acipher_speed("cbc(twofish)", DECRYPT, sec, NULL, 0,
+				   speed_template_16_24_32);
+		test_acipher_speed("ctr(twofish)", ENCRYPT, sec, NULL, 0,
+				   speed_template_16_24_32);
+		test_acipher_speed("ctr(twofish)", DECRYPT, sec, NULL, 0,
+				   speed_template_16_24_32);
+		test_acipher_speed("lrw(twofish)", ENCRYPT, sec, NULL, 0,
+				   speed_template_32_40_48);
+		test_acipher_speed("lrw(twofish)", DECRYPT, sec, NULL, 0,
+				   speed_template_32_40_48);
+		test_acipher_speed("xts(twofish)", ENCRYPT, sec, NULL, 0,
+				   speed_template_32_48_64);
+		test_acipher_speed("xts(twofish)", DECRYPT, sec, NULL, 0,
+				   speed_template_32_48_64);
+		break;
+
+	case 505:
+		test_acipher_speed("ecb(arc4)", ENCRYPT, sec, NULL, 0,
+				   speed_template_8);
+		break;
+
 	case 1000:
 		test_available();
 		break;
diff --git a/crypto/testmgr.c b/crypto/testmgr.c
index 5674878ff6c18d244ec23b4f8b7815647a814c9b..a2ca7431760a245374a269ed19563ccc4d3ece91 100644
--- a/crypto/testmgr.c
+++ b/crypto/testmgr.c
@@ -1534,6 +1534,21 @@ static int alg_test_null(const struct alg_test_desc *desc,
 /* Please keep this list sorted by algorithm name. */
 static const struct alg_test_desc alg_test_descs[] = {
 	{
+		.alg = "__cbc-serpent-avx",
+		.test = alg_test_null,
+		.suite = {
+			.cipher = {
+				.enc = {
+					.vecs = NULL,
+					.count = 0
+				},
+				.dec = {
+					.vecs = NULL,
+					.count = 0
+				}
+			}
+		}
+	}, {
 		.alg = "__cbc-serpent-sse2",
 		.test = alg_test_null,
 		.suite = {
@@ -1548,9 +1563,40 @@ static const struct alg_test_desc alg_test_descs[] = {
 				}
 			}
 		}
+	}, {
+		.alg = "__cbc-twofish-avx",
+		.test = alg_test_null,
+		.suite = {
+			.cipher = {
+				.enc = {
+					.vecs = NULL,
+					.count = 0
+				},
+				.dec = {
+					.vecs = NULL,
+					.count = 0
+				}
+			}
+		}
 	}, {
 		.alg = "__driver-cbc-aes-aesni",
 		.test = alg_test_null,
+		.fips_allowed = 1,
+		.suite = {
+			.cipher = {
+				.enc = {
+					.vecs = NULL,
+					.count = 0
+				},
+				.dec = {
+					.vecs = NULL,
+					.count = 0
+				}
+			}
+		}
+	}, {
+		.alg = "__driver-cbc-serpent-avx",
+		.test = alg_test_null,
 		.suite = {
 			.cipher = {
 				.enc = {
@@ -1578,9 +1624,40 @@ static const struct alg_test_desc alg_test_descs[] = {
 				}
 			}
 		}
+	}, {
+		.alg = "__driver-cbc-twofish-avx",
+		.test = alg_test_null,
+		.suite = {
+			.cipher = {
+				.enc = {
+					.vecs = NULL,
+					.count = 0
+				},
+				.dec = {
+					.vecs = NULL,
+					.count = 0
+				}
+			}
+		}
 	}, {
 		.alg = "__driver-ecb-aes-aesni",
 		.test = alg_test_null,
+		.fips_allowed = 1,
+		.suite = {
+			.cipher = {
+				.enc = {
+					.vecs = NULL,
+					.count = 0
+				},
+				.dec = {
+					.vecs = NULL,
+					.count = 0
+				}
+			}
+		}
+	}, {
+		.alg = "__driver-ecb-serpent-avx",
+		.test = alg_test_null,
 		.suite = {
 			.cipher = {
 				.enc = {
@@ -1608,9 +1685,25 @@ static const struct alg_test_desc alg_test_descs[] = {
 				}
 			}
 		}
+	}, {
+		.alg = "__driver-ecb-twofish-avx",
+		.test = alg_test_null,
+		.suite = {
+			.cipher = {
+				.enc = {
+					.vecs = NULL,
+					.count = 0
+				},
+				.dec = {
+					.vecs = NULL,
+					.count = 0
+				}
+			}
+		}
 	}, {
 		.alg = "__ghash-pclmulqdqni",
 		.test = alg_test_null,
+		.fips_allowed = 1,
 		.suite = {
 			.hash = {
 				.vecs = NULL,
@@ -1627,6 +1720,42 @@ static const struct alg_test_desc alg_test_descs[] = {
 				.count = ANSI_CPRNG_AES_TEST_VECTORS
 			}
 		}
+	}, {
+		.alg = "authenc(hmac(sha1),cbc(aes))",
+		.test = alg_test_aead,
+		.fips_allowed = 1,
+		.suite = {
+			.aead = {
+				.enc = {
+					.vecs = hmac_sha1_aes_cbc_enc_tv_template,
+					.count = HMAC_SHA1_AES_CBC_ENC_TEST_VECTORS
+				}
+			}
+		}
+	}, {
+		.alg = "authenc(hmac(sha256),cbc(aes))",
+		.test = alg_test_aead,
+		.fips_allowed = 1,
+		.suite = {
+			.aead = {
+				.enc = {
+					.vecs = hmac_sha256_aes_cbc_enc_tv_template,
+					.count = HMAC_SHA256_AES_CBC_ENC_TEST_VECTORS
+				}
+			}
+		}
+	}, {
+		.alg = "authenc(hmac(sha512),cbc(aes))",
+		.test = alg_test_aead,
+		.fips_allowed = 1,
+		.suite = {
+			.aead = {
+				.enc = {
+					.vecs = hmac_sha512_aes_cbc_enc_tv_template,
+					.count = HMAC_SHA512_AES_CBC_ENC_TEST_VECTORS
+				}
+			}
+		}
 	}, {
 		.alg = "cbc(aes)",
 		.test = alg_test_skcipher,
@@ -1775,9 +1904,41 @@ static const struct alg_test_desc alg_test_descs[] = {
 				.count = CRC32C_TEST_VECTORS
 			}
 		}
+	}, {
+		.alg = "cryptd(__driver-cbc-aes-aesni)",
+		.test = alg_test_null,
+		.fips_allowed = 1,
+		.suite = {
+			.cipher = {
+				.enc = {
+					.vecs = NULL,
+					.count = 0
+				},
+				.dec = {
+					.vecs = NULL,
+					.count = 0
+				}
+			}
+		}
 	}, {
 		.alg = "cryptd(__driver-ecb-aes-aesni)",
 		.test = alg_test_null,
+		.fips_allowed = 1,
+		.suite = {
+			.cipher = {
+				.enc = {
+					.vecs = NULL,
+					.count = 0
+				},
+				.dec = {
+					.vecs = NULL,
+					.count = 0
+				}
+			}
+		}
+	}, {
+		.alg = "cryptd(__driver-ecb-serpent-avx)",
+		.test = alg_test_null,
 		.suite = {
 			.cipher = {
 				.enc = {
@@ -1805,9 +1966,41 @@ static const struct alg_test_desc alg_test_descs[] = {
 				}
 			}
 		}
+	}, {
+		.alg = "cryptd(__driver-ecb-twofish-avx)",
+		.test = alg_test_null,
+		.suite = {
+			.cipher = {
+				.enc = {
+					.vecs = NULL,
+					.count = 0
+				},
+				.dec = {
+					.vecs = NULL,
+					.count = 0
+				}
+			}
+		}
+	}, {
+		.alg = "cryptd(__driver-gcm-aes-aesni)",
+		.test = alg_test_null,
+		.fips_allowed = 1,
+		.suite = {
+			.cipher = {
+				.enc = {
+					.vecs = NULL,
+					.count = 0
+				},
+				.dec = {
+					.vecs = NULL,
+					.count = 0
+				}
+			}
+		}
 	}, {
 		.alg = "cryptd(__ghash-pclmulqdqni)",
 		.test = alg_test_null,
+		.fips_allowed = 1,
 		.suite = {
 			.hash = {
 				.vecs = NULL,
@@ -1923,6 +2116,7 @@ static const struct alg_test_desc alg_test_descs[] = {
 	}, {
 		.alg = "ecb(__aes-aesni)",
 		.test = alg_test_null,
+		.fips_allowed = 1,
 		.suite = {
 			.cipher = {
 				.enc = {
@@ -2219,6 +2413,15 @@ static const struct alg_test_desc alg_test_descs[] = {
 				.count = GHASH_TEST_VECTORS
 			}
 		}
+	}, {
+		.alg = "hmac(crc32)",
+		.test = alg_test_hash,
+		.suite = {
+			.hash = {
+				.vecs = bfin_crc_tv_template,
+				.count = BFIN_CRC_TEST_VECTORS
+			}
+		}
 	}, {
 		.alg = "hmac(md5)",
 		.test = alg_test_hash,
diff --git a/crypto/testmgr.h b/crypto/testmgr.h
index 36e5a8ee0e1e8ef8d3c85e9cadbb435ecc51530d..f8179e0344ed6b76b286b65eec5e82a1fe49d922 100644
--- a/crypto/testmgr.h
+++ b/crypto/testmgr.h
@@ -2765,8 +2765,62 @@ static struct cipher_testvec tf_enc_tv_template[] = {
 			  "\x1E\x92\x29\xC0\x34\xCB\x62\xF9"
 			  "\x6D\x04\x9B\x0F\xA6\x3D\xD4\x48"
 			  "\xDF\x76\x0D\x81\x18\xAF\x23\xBA"
-			  "\x51\xE8\x5C\xF3\x8A\x21\x95\x2C",
-		.ilen	= 64,
+			  "\x51\xE8\x5C\xF3\x8A\x21\x95\x2C"
+			  "\xC3\x37\xCE\x65\xFC\x70\x07\x9E"
+			  "\x12\xA9\x40\xD7\x4B\xE2\x79\x10"
+			  "\x84\x1B\xB2\x26\xBD\x54\xEB\x5F"
+			  "\xF6\x8D\x01\x98\x2F\xC6\x3A\xD1"
+			  "\x68\xFF\x73\x0A\xA1\x15\xAC\x43"
+			  "\xDA\x4E\xE5\x7C\x13\x87\x1E\xB5"
+			  "\x29\xC0\x57\xEE\x62\xF9\x90\x04"
+			  "\x9B\x32\xC9\x3D\xD4\x6B\x02\x76"
+			  "\x0D\xA4\x18\xAF\x46\xDD\x51\xE8"
+			  "\x7F\x16\x8A\x21\xB8\x2C\xC3\x5A"
+			  "\xF1\x65\xFC\x93\x07\x9E\x35\xCC"
+			  "\x40\xD7\x6E\x05\x79\x10\xA7\x1B"
+			  "\xB2\x49\xE0\x54\xEB\x82\x19\x8D"
+			  "\x24\xBB\x2F\xC6\x5D\xF4\x68\xFF"
+			  "\x96\x0A\xA1\x38\xCF\x43\xDA\x71"
+			  "\x08\x7C\x13\xAA\x1E\xB5\x4C\xE3"
+			  "\x57\xEE\x85\x1C\x90\x27\xBE\x32"
+			  "\xC9\x60\xF7\x6B\x02\x99\x0D\xA4"
+			  "\x3B\xD2\x46\xDD\x74\x0B\x7F\x16"
+			  "\xAD\x21\xB8\x4F\xE6\x5A\xF1\x88"
+			  "\x1F\x93\x2A\xC1\x35\xCC\x63\xFA"
+			  "\x6E\x05\x9C\x10\xA7\x3E\xD5\x49"
+			  "\xE0\x77\x0E\x82\x19\xB0\x24\xBB"
+			  "\x52\xE9\x5D\xF4\x8B\x22\x96\x2D"
+			  "\xC4\x38\xCF\x66\xFD\x71\x08\x9F"
+			  "\x13\xAA\x41\xD8\x4C\xE3\x7A\x11"
+			  "\x85\x1C\xB3\x27\xBE\x55\xEC\x60"
+			  "\xF7\x8E\x02\x99\x30\xC7\x3B\xD2"
+			  "\x69\x00\x74\x0B\xA2\x16\xAD\x44"
+			  "\xDB\x4F\xE6\x7D\x14\x88\x1F\xB6"
+			  "\x2A\xC1\x58\xEF\x63\xFA\x91\x05"
+			  "\x9C\x33\xCA\x3E\xD5\x6C\x03\x77"
+			  "\x0E\xA5\x19\xB0\x47\xDE\x52\xE9"
+			  "\x80\x17\x8B\x22\xB9\x2D\xC4\x5B"
+			  "\xF2\x66\xFD\x94\x08\x9F\x36\xCD"
+			  "\x41\xD8\x6F\x06\x7A\x11\xA8\x1C"
+			  "\xB3\x4A\xE1\x55\xEC\x83\x1A\x8E"
+			  "\x25\xBC\x30\xC7\x5E\xF5\x69\x00"
+			  "\x97\x0B\xA2\x39\xD0\x44\xDB\x72"
+			  "\x09\x7D\x14\xAB\x1F\xB6\x4D\xE4"
+			  "\x58\xEF\x86\x1D\x91\x28\xBF\x33"
+			  "\xCA\x61\xF8\x6C\x03\x9A\x0E\xA5"
+			  "\x3C\xD3\x47\xDE\x75\x0C\x80\x17"
+			  "\xAE\x22\xB9\x50\xE7\x5B\xF2\x89"
+			  "\x20\x94\x2B\xC2\x36\xCD\x64\xFB"
+			  "\x6F\x06\x9D\x11\xA8\x3F\xD6\x4A"
+			  "\xE1\x78\x0F\x83\x1A\xB1\x25\xBC"
+			  "\x53\xEA\x5E\xF5\x8C\x00\x97\x2E"
+			  "\xC5\x39\xD0\x67\xFE\x72\x09\xA0"
+			  "\x14\xAB\x42\xD9\x4D\xE4\x7B\x12"
+			  "\x86\x1D\xB4\x28\xBF\x56\xED\x61"
+			  "\xF8\x8F\x03\x9A\x31\xC8\x3C\xD3"
+			  "\x6A\x01\x75\x0C\xA3\x17\xAE\x45"
+			  "\xDC\x50\xE7\x7E\x15\x89\x20\xB7",
+		.ilen	= 496,
 		.result	= "\x88\xCB\x1E\xC2\xAF\x8A\x97\xFF"
 			  "\xF6\x90\x46\x9C\x4A\x0F\x08\xDC"
 			  "\xDE\xAB\xAD\xFA\xFC\xA8\xC2\x3D"
@@ -2774,8 +2828,62 @@ static struct cipher_testvec tf_enc_tv_template[] = {
 			  "\x34\x9E\xB6\x08\xB2\xDD\xA8\xF5"
 			  "\xDF\xFA\xC7\xE8\x09\x50\x76\x08"
 			  "\xA2\xB6\x6A\x59\xC0\x2B\x6D\x05"
-			  "\x89\xF6\x82\xF0\xD3\xDB\x06\x02",
-		.rlen	= 64,
+			  "\x89\xF6\x82\xF0\xD3\xDB\x06\x02"
+			  "\xB5\x11\x5C\x5E\x79\x1A\xAC\x43"
+			  "\x5C\xC0\x30\x4B\x6B\x16\xA1\x40"
+			  "\x80\x27\x88\xBA\x2C\x74\x42\xE0"
+			  "\x1B\xA5\x85\x08\xB9\xE6\x22\x7A"
+			  "\x36\x3B\x0D\x9F\xA0\x22\x6C\x2A"
+			  "\x91\x75\x47\xBC\x67\x21\x4E\xF9"
+			  "\xEA\xFF\xD9\xD5\xC0\xFC\x9E\x2C"
+			  "\x3E\xAD\xC6\x61\x0E\x93\x7A\x22"
+			  "\x09\xC8\x8D\xC1\x8E\xB4\x8B\x5C"
+			  "\xC6\x24\x42\xB8\x23\x66\x80\xA9"
+			  "\x32\x0B\x7A\x29\xBF\xB3\x0B\x63"
+			  "\x43\x27\x13\xA9\xBE\xEB\xBD\xF3"
+			  "\x33\x62\x70\xE2\x1B\x86\x7A\xA1"
+			  "\x51\x4A\x16\xFE\x29\x63\x7E\xD0"
+			  "\x7A\xA4\x6E\x2C\xF8\xC1\xDB\xE8"
+			  "\xCB\x4D\xD2\x8C\x04\x14\xB4\x66"
+			  "\x41\xB7\x3A\x96\x16\x7C\x1D\x5B"
+			  "\xB6\x41\x42\x64\x43\xEE\x6E\x7C"
+			  "\x8B\xAF\x01\x9C\xA4\x6E\x75\x8F"
+			  "\xDE\x10\x9F\xA6\xE7\xD6\x44\x97"
+			  "\x66\xA3\x96\x0F\x1C\x25\x60\xF5"
+			  "\x3C\x2E\x32\x69\x0E\x82\xFF\x27"
+			  "\x0F\xB5\x06\xDA\xD8\x31\x15\x6C"
+			  "\xDF\x18\x6C\x87\xF5\x3B\x11\x9A"
+			  "\x1B\x42\x1F\x5B\x29\x19\x96\x13"
+			  "\x68\x2E\x5E\x08\x1C\x8F\x32\x4B"
+			  "\x81\x77\x6D\xF4\xA0\x01\x42\xEC"
+			  "\xDD\x5B\xFD\x3A\x8E\x6A\x14\xFB"
+			  "\x83\x54\xDF\x0F\x86\xB7\xEA\x40"
+			  "\x46\x39\xF7\x2A\x89\x8D\x4E\x96"
+			  "\x5F\x5F\x6D\x76\xC6\x13\x9D\x3D"
+			  "\x1D\x5F\x0C\x7D\xE2\xBC\xC2\x16"
+			  "\x16\xBE\x89\x3E\xB0\x61\xA2\x5D"
+			  "\xAF\xD1\x40\x5F\x1A\xB8\x26\x41"
+			  "\xC6\xBD\x36\xEF\xED\x29\x50\x6D"
+			  "\x10\xEF\x26\xE8\xA8\x93\x11\x3F"
+			  "\x2D\x1F\x88\x20\x77\x45\xF5\x66"
+			  "\x08\xB9\xF1\xEF\xB1\x93\xA8\x81"
+			  "\x65\xC5\xCD\x3E\x8C\x06\x60\x2C"
+			  "\xB2\x10\x7A\xCA\x05\x25\x59\xDB"
+			  "\xC7\x28\xF5\x20\x35\x52\x9E\x62"
+			  "\xF8\x88\x24\x1C\x4D\x84\x12\x39"
+			  "\x39\xE4\x2E\xF4\xD4\x9D\x2B\xBC"
+			  "\x87\x66\xE6\xC0\x6B\x31\x9A\x66"
+			  "\x03\xDC\x95\xD8\x6B\xD0\x30\x8F"
+			  "\xDF\x8F\x8D\xFA\xEC\x1F\x08\xBD"
+			  "\xA3\x63\xE2\x71\x4F\x03\x94\x87"
+			  "\x50\xDF\x15\x1F\xED\x3A\xA3\x7F"
+			  "\x1F\x2A\xB5\xA1\x69\xAC\x4B\x0D"
+			  "\x84\x9B\x2A\xE9\x55\xDD\x46\x91"
+			  "\x15\x33\xF3\x2B\x9B\x46\x97\x00"
+			  "\xF0\x29\xD8\x59\x5D\x33\x37\xF9"
+			  "\x58\x33\x9B\x78\xC7\x58\x48\x6B"
+			  "\x2C\x75\x64\xC4\xCA\xC1\x7E\xD5",
+		.rlen	= 496,
 	},
 };
 
@@ -2822,8 +2930,62 @@ static struct cipher_testvec tf_dec_tv_template[] = {
 			  "\x34\x9E\xB6\x08\xB2\xDD\xA8\xF5"
 			  "\xDF\xFA\xC7\xE8\x09\x50\x76\x08"
 			  "\xA2\xB6\x6A\x59\xC0\x2B\x6D\x05"
-			  "\x89\xF6\x82\xF0\xD3\xDB\x06\x02",
-		.ilen	= 64,
+			  "\x89\xF6\x82\xF0\xD3\xDB\x06\x02"
+			  "\xB5\x11\x5C\x5E\x79\x1A\xAC\x43"
+			  "\x5C\xC0\x30\x4B\x6B\x16\xA1\x40"
+			  "\x80\x27\x88\xBA\x2C\x74\x42\xE0"
+			  "\x1B\xA5\x85\x08\xB9\xE6\x22\x7A"
+			  "\x36\x3B\x0D\x9F\xA0\x22\x6C\x2A"
+			  "\x91\x75\x47\xBC\x67\x21\x4E\xF9"
+			  "\xEA\xFF\xD9\xD5\xC0\xFC\x9E\x2C"
+			  "\x3E\xAD\xC6\x61\x0E\x93\x7A\x22"
+			  "\x09\xC8\x8D\xC1\x8E\xB4\x8B\x5C"
+			  "\xC6\x24\x42\xB8\x23\x66\x80\xA9"
+			  "\x32\x0B\x7A\x29\xBF\xB3\x0B\x63"
+			  "\x43\x27\x13\xA9\xBE\xEB\xBD\xF3"
+			  "\x33\x62\x70\xE2\x1B\x86\x7A\xA1"
+			  "\x51\x4A\x16\xFE\x29\x63\x7E\xD0"
+			  "\x7A\xA4\x6E\x2C\xF8\xC1\xDB\xE8"
+			  "\xCB\x4D\xD2\x8C\x04\x14\xB4\x66"
+			  "\x41\xB7\x3A\x96\x16\x7C\x1D\x5B"
+			  "\xB6\x41\x42\x64\x43\xEE\x6E\x7C"
+			  "\x8B\xAF\x01\x9C\xA4\x6E\x75\x8F"
+			  "\xDE\x10\x9F\xA6\xE7\xD6\x44\x97"
+			  "\x66\xA3\x96\x0F\x1C\x25\x60\xF5"
+			  "\x3C\x2E\x32\x69\x0E\x82\xFF\x27"
+			  "\x0F\xB5\x06\xDA\xD8\x31\x15\x6C"
+			  "\xDF\x18\x6C\x87\xF5\x3B\x11\x9A"
+			  "\x1B\x42\x1F\x5B\x29\x19\x96\x13"
+			  "\x68\x2E\x5E\x08\x1C\x8F\x32\x4B"
+			  "\x81\x77\x6D\xF4\xA0\x01\x42\xEC"
+			  "\xDD\x5B\xFD\x3A\x8E\x6A\x14\xFB"
+			  "\x83\x54\xDF\x0F\x86\xB7\xEA\x40"
+			  "\x46\x39\xF7\x2A\x89\x8D\x4E\x96"
+			  "\x5F\x5F\x6D\x76\xC6\x13\x9D\x3D"
+			  "\x1D\x5F\x0C\x7D\xE2\xBC\xC2\x16"
+			  "\x16\xBE\x89\x3E\xB0\x61\xA2\x5D"
+			  "\xAF\xD1\x40\x5F\x1A\xB8\x26\x41"
+			  "\xC6\xBD\x36\xEF\xED\x29\x50\x6D"
+			  "\x10\xEF\x26\xE8\xA8\x93\x11\x3F"
+			  "\x2D\x1F\x88\x20\x77\x45\xF5\x66"
+			  "\x08\xB9\xF1\xEF\xB1\x93\xA8\x81"
+			  "\x65\xC5\xCD\x3E\x8C\x06\x60\x2C"
+			  "\xB2\x10\x7A\xCA\x05\x25\x59\xDB"
+			  "\xC7\x28\xF5\x20\x35\x52\x9E\x62"
+			  "\xF8\x88\x24\x1C\x4D\x84\x12\x39"
+			  "\x39\xE4\x2E\xF4\xD4\x9D\x2B\xBC"
+			  "\x87\x66\xE6\xC0\x6B\x31\x9A\x66"
+			  "\x03\xDC\x95\xD8\x6B\xD0\x30\x8F"
+			  "\xDF\x8F\x8D\xFA\xEC\x1F\x08\xBD"
+			  "\xA3\x63\xE2\x71\x4F\x03\x94\x87"
+			  "\x50\xDF\x15\x1F\xED\x3A\xA3\x7F"
+			  "\x1F\x2A\xB5\xA1\x69\xAC\x4B\x0D"
+			  "\x84\x9B\x2A\xE9\x55\xDD\x46\x91"
+			  "\x15\x33\xF3\x2B\x9B\x46\x97\x00"
+			  "\xF0\x29\xD8\x59\x5D\x33\x37\xF9"
+			  "\x58\x33\x9B\x78\xC7\x58\x48\x6B"
+			  "\x2C\x75\x64\xC4\xCA\xC1\x7E\xD5",
+		.ilen	= 496,
 		.result	= "\x56\xED\x84\x1B\x8F\x26\xBD\x31"
 			  "\xC8\x5F\xF6\x6A\x01\x98\x0C\xA3"
 			  "\x3A\xD1\x45\xDC\x73\x0A\x7E\x15"
@@ -2831,8 +2993,62 @@ static struct cipher_testvec tf_dec_tv_template[] = {
 			  "\x1E\x92\x29\xC0\x34\xCB\x62\xF9"
 			  "\x6D\x04\x9B\x0F\xA6\x3D\xD4\x48"
 			  "\xDF\x76\x0D\x81\x18\xAF\x23\xBA"
-			  "\x51\xE8\x5C\xF3\x8A\x21\x95\x2C",
-		.rlen	= 64,
+			  "\x51\xE8\x5C\xF3\x8A\x21\x95\x2C"
+			  "\xC3\x37\xCE\x65\xFC\x70\x07\x9E"
+			  "\x12\xA9\x40\xD7\x4B\xE2\x79\x10"
+			  "\x84\x1B\xB2\x26\xBD\x54\xEB\x5F"
+			  "\xF6\x8D\x01\x98\x2F\xC6\x3A\xD1"
+			  "\x68\xFF\x73\x0A\xA1\x15\xAC\x43"
+			  "\xDA\x4E\xE5\x7C\x13\x87\x1E\xB5"
+			  "\x29\xC0\x57\xEE\x62\xF9\x90\x04"
+			  "\x9B\x32\xC9\x3D\xD4\x6B\x02\x76"
+			  "\x0D\xA4\x18\xAF\x46\xDD\x51\xE8"
+			  "\x7F\x16\x8A\x21\xB8\x2C\xC3\x5A"
+			  "\xF1\x65\xFC\x93\x07\x9E\x35\xCC"
+			  "\x40\xD7\x6E\x05\x79\x10\xA7\x1B"
+			  "\xB2\x49\xE0\x54\xEB\x82\x19\x8D"
+			  "\x24\xBB\x2F\xC6\x5D\xF4\x68\xFF"
+			  "\x96\x0A\xA1\x38\xCF\x43\xDA\x71"
+			  "\x08\x7C\x13\xAA\x1E\xB5\x4C\xE3"
+			  "\x57\xEE\x85\x1C\x90\x27\xBE\x32"
+			  "\xC9\x60\xF7\x6B\x02\x99\x0D\xA4"
+			  "\x3B\xD2\x46\xDD\x74\x0B\x7F\x16"
+			  "\xAD\x21\xB8\x4F\xE6\x5A\xF1\x88"
+			  "\x1F\x93\x2A\xC1\x35\xCC\x63\xFA"
+			  "\x6E\x05\x9C\x10\xA7\x3E\xD5\x49"
+			  "\xE0\x77\x0E\x82\x19\xB0\x24\xBB"
+			  "\x52\xE9\x5D\xF4\x8B\x22\x96\x2D"
+			  "\xC4\x38\xCF\x66\xFD\x71\x08\x9F"
+			  "\x13\xAA\x41\xD8\x4C\xE3\x7A\x11"
+			  "\x85\x1C\xB3\x27\xBE\x55\xEC\x60"
+			  "\xF7\x8E\x02\x99\x30\xC7\x3B\xD2"
+			  "\x69\x00\x74\x0B\xA2\x16\xAD\x44"
+			  "\xDB\x4F\xE6\x7D\x14\x88\x1F\xB6"
+			  "\x2A\xC1\x58\xEF\x63\xFA\x91\x05"
+			  "\x9C\x33\xCA\x3E\xD5\x6C\x03\x77"
+			  "\x0E\xA5\x19\xB0\x47\xDE\x52\xE9"
+			  "\x80\x17\x8B\x22\xB9\x2D\xC4\x5B"
+			  "\xF2\x66\xFD\x94\x08\x9F\x36\xCD"
+			  "\x41\xD8\x6F\x06\x7A\x11\xA8\x1C"
+			  "\xB3\x4A\xE1\x55\xEC\x83\x1A\x8E"
+			  "\x25\xBC\x30\xC7\x5E\xF5\x69\x00"
+			  "\x97\x0B\xA2\x39\xD0\x44\xDB\x72"
+			  "\x09\x7D\x14\xAB\x1F\xB6\x4D\xE4"
+			  "\x58\xEF\x86\x1D\x91\x28\xBF\x33"
+			  "\xCA\x61\xF8\x6C\x03\x9A\x0E\xA5"
+			  "\x3C\xD3\x47\xDE\x75\x0C\x80\x17"
+			  "\xAE\x22\xB9\x50\xE7\x5B\xF2\x89"
+			  "\x20\x94\x2B\xC2\x36\xCD\x64\xFB"
+			  "\x6F\x06\x9D\x11\xA8\x3F\xD6\x4A"
+			  "\xE1\x78\x0F\x83\x1A\xB1\x25\xBC"
+			  "\x53\xEA\x5E\xF5\x8C\x00\x97\x2E"
+			  "\xC5\x39\xD0\x67\xFE\x72\x09\xA0"
+			  "\x14\xAB\x42\xD9\x4D\xE4\x7B\x12"
+			  "\x86\x1D\xB4\x28\xBF\x56\xED\x61"
+			  "\xF8\x8F\x03\x9A\x31\xC8\x3C\xD3"
+			  "\x6A\x01\x75\x0C\xA3\x17\xAE\x45"
+			  "\xDC\x50\xE7\x7E\x15\x89\x20\xB7",
+		.rlen	= 496,
 	},
 };
 
@@ -2894,8 +3110,62 @@ static struct cipher_testvec tf_cbc_enc_tv_template[] = {
 			  "\x1E\x92\x29\xC0\x34\xCB\x62\xF9"
 			  "\x6D\x04\x9B\x0F\xA6\x3D\xD4\x48"
 			  "\xDF\x76\x0D\x81\x18\xAF\x23\xBA"
-			  "\x51\xE8\x5C\xF3\x8A\x21\x95\x2C",
-		.ilen	= 64,
+			  "\x51\xE8\x5C\xF3\x8A\x21\x95\x2C"
+			  "\xC3\x37\xCE\x65\xFC\x70\x07\x9E"
+			  "\x12\xA9\x40\xD7\x4B\xE2\x79\x10"
+			  "\x84\x1B\xB2\x26\xBD\x54\xEB\x5F"
+			  "\xF6\x8D\x01\x98\x2F\xC6\x3A\xD1"
+			  "\x68\xFF\x73\x0A\xA1\x15\xAC\x43"
+			  "\xDA\x4E\xE5\x7C\x13\x87\x1E\xB5"
+			  "\x29\xC0\x57\xEE\x62\xF9\x90\x04"
+			  "\x9B\x32\xC9\x3D\xD4\x6B\x02\x76"
+			  "\x0D\xA4\x18\xAF\x46\xDD\x51\xE8"
+			  "\x7F\x16\x8A\x21\xB8\x2C\xC3\x5A"
+			  "\xF1\x65\xFC\x93\x07\x9E\x35\xCC"
+			  "\x40\xD7\x6E\x05\x79\x10\xA7\x1B"
+			  "\xB2\x49\xE0\x54\xEB\x82\x19\x8D"
+			  "\x24\xBB\x2F\xC6\x5D\xF4\x68\xFF"
+			  "\x96\x0A\xA1\x38\xCF\x43\xDA\x71"
+			  "\x08\x7C\x13\xAA\x1E\xB5\x4C\xE3"
+			  "\x57\xEE\x85\x1C\x90\x27\xBE\x32"
+			  "\xC9\x60\xF7\x6B\x02\x99\x0D\xA4"
+			  "\x3B\xD2\x46\xDD\x74\x0B\x7F\x16"
+			  "\xAD\x21\xB8\x4F\xE6\x5A\xF1\x88"
+			  "\x1F\x93\x2A\xC1\x35\xCC\x63\xFA"
+			  "\x6E\x05\x9C\x10\xA7\x3E\xD5\x49"
+			  "\xE0\x77\x0E\x82\x19\xB0\x24\xBB"
+			  "\x52\xE9\x5D\xF4\x8B\x22\x96\x2D"
+			  "\xC4\x38\xCF\x66\xFD\x71\x08\x9F"
+			  "\x13\xAA\x41\xD8\x4C\xE3\x7A\x11"
+			  "\x85\x1C\xB3\x27\xBE\x55\xEC\x60"
+			  "\xF7\x8E\x02\x99\x30\xC7\x3B\xD2"
+			  "\x69\x00\x74\x0B\xA2\x16\xAD\x44"
+			  "\xDB\x4F\xE6\x7D\x14\x88\x1F\xB6"
+			  "\x2A\xC1\x58\xEF\x63\xFA\x91\x05"
+			  "\x9C\x33\xCA\x3E\xD5\x6C\x03\x77"
+			  "\x0E\xA5\x19\xB0\x47\xDE\x52\xE9"
+			  "\x80\x17\x8B\x22\xB9\x2D\xC4\x5B"
+			  "\xF2\x66\xFD\x94\x08\x9F\x36\xCD"
+			  "\x41\xD8\x6F\x06\x7A\x11\xA8\x1C"
+			  "\xB3\x4A\xE1\x55\xEC\x83\x1A\x8E"
+			  "\x25\xBC\x30\xC7\x5E\xF5\x69\x00"
+			  "\x97\x0B\xA2\x39\xD0\x44\xDB\x72"
+			  "\x09\x7D\x14\xAB\x1F\xB6\x4D\xE4"
+			  "\x58\xEF\x86\x1D\x91\x28\xBF\x33"
+			  "\xCA\x61\xF8\x6C\x03\x9A\x0E\xA5"
+			  "\x3C\xD3\x47\xDE\x75\x0C\x80\x17"
+			  "\xAE\x22\xB9\x50\xE7\x5B\xF2\x89"
+			  "\x20\x94\x2B\xC2\x36\xCD\x64\xFB"
+			  "\x6F\x06\x9D\x11\xA8\x3F\xD6\x4A"
+			  "\xE1\x78\x0F\x83\x1A\xB1\x25\xBC"
+			  "\x53\xEA\x5E\xF5\x8C\x00\x97\x2E"
+			  "\xC5\x39\xD0\x67\xFE\x72\x09\xA0"
+			  "\x14\xAB\x42\xD9\x4D\xE4\x7B\x12"
+			  "\x86\x1D\xB4\x28\xBF\x56\xED\x61"
+			  "\xF8\x8F\x03\x9A\x31\xC8\x3C\xD3"
+			  "\x6A\x01\x75\x0C\xA3\x17\xAE\x45"
+			  "\xDC\x50\xE7\x7E\x15\x89\x20\xB7",
+		.ilen	= 496,
 		.result	= "\xC8\xFF\xF2\x53\xA6\x27\x09\xD1"
 			  "\x33\x38\xC2\xC0\x0C\x14\x7E\xB5"
 			  "\x26\x1B\x05\x0C\x05\x12\x3F\xC0"
@@ -2903,8 +3173,62 @@ static struct cipher_testvec tf_cbc_enc_tv_template[] = {
 			  "\x3D\x32\xDF\xDA\x56\x00\x6E\xEE"
 			  "\x5B\x2A\x72\x9D\xC2\x4D\x19\xBC"
 			  "\x8C\x53\xFA\x87\x6F\xDD\x81\xA3"
-			  "\xB1\xD3\x44\x65\xDF\xE7\x63\x38",
-		.rlen	= 64,
+			  "\xB1\xD3\x44\x65\xDF\xE7\x63\x38"
+			  "\x4A\xFC\xDC\xEC\x3F\x26\x8E\xB8"
+			  "\x43\xFC\xFE\x18\xB5\x11\x6D\x31"
+			  "\x81\x8B\x0D\x75\xF6\x80\xEC\x84"
+			  "\x04\xB9\xE6\x09\x63\xED\x39\xDB"
+			  "\xC3\xF6\x14\xD6\x6E\x5E\x8B\xBD"
+			  "\x3E\xFA\xD7\x98\x50\x6F\xD9\x63"
+			  "\x02\xCD\x0D\x39\x4B\x0D\xEC\x80"
+			  "\xE3\x6A\x17\xF4\xCC\xAD\xFF\x68"
+			  "\x45\xDD\xC8\x83\x1D\x41\x96\x0D"
+			  "\x91\x2E\x05\xD3\x59\x82\xE0\x43"
+			  "\x90\x4F\xB9\xF7\xAD\x6B\x2E\xAF"
+			  "\xA7\x84\x00\x53\xCD\x6F\xD1\x0C"
+			  "\x4E\xF9\x5A\x23\xFB\xCA\xC7\xD3"
+			  "\xA9\xAA\x9D\xB2\x3F\x66\xF1\xAC"
+			  "\x25\x21\x8F\xF7\xEF\xF2\x6A\xDF"
+			  "\xE8\xDA\x75\x1A\x8A\xF1\xDD\x38"
+			  "\x1F\xF9\x3D\x68\x4A\xBB\x9E\x34"
+			  "\x1F\x66\x1F\x9C\x2B\x54\xFF\x60"
+			  "\x7F\x29\x4B\x55\x80\x8F\x4E\xA7"
+			  "\xA6\x9A\x0A\xD9\x0D\x19\x00\xF8"
+			  "\x1F\xBC\x0C\x40\x6B\xEC\x99\x25"
+			  "\x94\x70\x74\x0E\x1D\xC5\xBC\x12"
+			  "\xF3\x42\xBE\x95\xBF\xFB\x4E\x55"
+			  "\x9A\xB9\xCE\x14\x16\x5B\xDC\xD3"
+			  "\x75\x42\x62\x04\x31\x1F\x95\x7C"
+			  "\x66\x1A\x97\xDC\x2F\x40\x5C\x39"
+			  "\x78\xE6\x02\xDB\x49\xE1\xC6\x47"
+			  "\xC2\x78\x9A\xBB\xF3\xBE\xCB\x93"
+			  "\xD8\xB8\xE8\xBB\x8C\xB3\x9B\xA7"
+			  "\xC2\x89\xF3\x91\x88\x83\x3D\xF0"
+			  "\x29\xA2\xCD\xB5\x79\x16\xC2\x40"
+			  "\x11\x03\x8E\x9C\xFD\xC9\x43\xC4"
+			  "\xC2\x19\xF0\x4A\x32\xEF\x0C\x2B"
+			  "\xD3\x2B\xE9\xD4\x4C\xDE\x95\xCF"
+			  "\x04\x03\xD3\x2C\x7F\x82\xC8\xFA"
+			  "\x0F\xD8\x7A\x39\x7B\x01\x41\x9C"
+			  "\x78\xB6\xC9\xBF\xF9\x78\x57\x88"
+			  "\xB1\xA5\xE1\xE0\xD9\x16\xD4\xC8"
+			  "\xEE\xC4\xBE\x7B\x55\x59\x00\x48"
+			  "\x1B\xBC\x14\xFA\x2A\x9D\xC9\x1C"
+			  "\xFB\x28\x3F\x95\xDD\xB7\xD6\xCE"
+			  "\x3A\x7F\x09\x0C\x0E\x69\x30\x7D"
+			  "\xBC\x68\x9C\x91\x2A\x59\x57\x04"
+			  "\xED\x1A\x1E\x00\xB1\x85\x92\x04"
+			  "\x28\x8C\x0C\x3C\xC1\xD5\x12\xF7"
+			  "\x4C\x3E\xB0\xE7\x86\x62\x68\x91"
+			  "\xFC\xC4\xE2\xCE\xA6\xDC\x5E\x93"
+			  "\x5D\x8D\x8C\x68\xB3\xB2\xB9\x64"
+			  "\x16\xB8\xC8\x6F\xD8\xEE\x21\xBD"
+			  "\xAC\x18\x0C\x7D\x0D\x05\xAB\xF1"
+			  "\xFA\xDD\xE2\x48\xDF\x4C\x02\x39"
+			  "\x69\xA1\x62\xBD\x49\x3A\x9D\x91"
+			  "\x30\x70\x56\xA4\x37\xDD\x7C\xC0"
+			  "\x0A\xA3\x30\x10\x26\x25\x41\x2C",
+		.rlen	= 496,
 	},
 };
 
@@ -2966,8 +3290,62 @@ static struct cipher_testvec tf_cbc_dec_tv_template[] = {
 			  "\x3D\x32\xDF\xDA\x56\x00\x6E\xEE"
 			  "\x5B\x2A\x72\x9D\xC2\x4D\x19\xBC"
 			  "\x8C\x53\xFA\x87\x6F\xDD\x81\xA3"
-			  "\xB1\xD3\x44\x65\xDF\xE7\x63\x38",
-		.ilen	= 64,
+			  "\xB1\xD3\x44\x65\xDF\xE7\x63\x38"
+			  "\x4A\xFC\xDC\xEC\x3F\x26\x8E\xB8"
+			  "\x43\xFC\xFE\x18\xB5\x11\x6D\x31"
+			  "\x81\x8B\x0D\x75\xF6\x80\xEC\x84"
+			  "\x04\xB9\xE6\x09\x63\xED\x39\xDB"
+			  "\xC3\xF6\x14\xD6\x6E\x5E\x8B\xBD"
+			  "\x3E\xFA\xD7\x98\x50\x6F\xD9\x63"
+			  "\x02\xCD\x0D\x39\x4B\x0D\xEC\x80"
+			  "\xE3\x6A\x17\xF4\xCC\xAD\xFF\x68"
+			  "\x45\xDD\xC8\x83\x1D\x41\x96\x0D"
+			  "\x91\x2E\x05\xD3\x59\x82\xE0\x43"
+			  "\x90\x4F\xB9\xF7\xAD\x6B\x2E\xAF"
+			  "\xA7\x84\x00\x53\xCD\x6F\xD1\x0C"
+			  "\x4E\xF9\x5A\x23\xFB\xCA\xC7\xD3"
+			  "\xA9\xAA\x9D\xB2\x3F\x66\xF1\xAC"
+			  "\x25\x21\x8F\xF7\xEF\xF2\x6A\xDF"
+			  "\xE8\xDA\x75\x1A\x8A\xF1\xDD\x38"
+			  "\x1F\xF9\x3D\x68\x4A\xBB\x9E\x34"
+			  "\x1F\x66\x1F\x9C\x2B\x54\xFF\x60"
+			  "\x7F\x29\x4B\x55\x80\x8F\x4E\xA7"
+			  "\xA6\x9A\x0A\xD9\x0D\x19\x00\xF8"
+			  "\x1F\xBC\x0C\x40\x6B\xEC\x99\x25"
+			  "\x94\x70\x74\x0E\x1D\xC5\xBC\x12"
+			  "\xF3\x42\xBE\x95\xBF\xFB\x4E\x55"
+			  "\x9A\xB9\xCE\x14\x16\x5B\xDC\xD3"
+			  "\x75\x42\x62\x04\x31\x1F\x95\x7C"
+			  "\x66\x1A\x97\xDC\x2F\x40\x5C\x39"
+			  "\x78\xE6\x02\xDB\x49\xE1\xC6\x47"
+			  "\xC2\x78\x9A\xBB\xF3\xBE\xCB\x93"
+			  "\xD8\xB8\xE8\xBB\x8C\xB3\x9B\xA7"
+			  "\xC2\x89\xF3\x91\x88\x83\x3D\xF0"
+			  "\x29\xA2\xCD\xB5\x79\x16\xC2\x40"
+			  "\x11\x03\x8E\x9C\xFD\xC9\x43\xC4"
+			  "\xC2\x19\xF0\x4A\x32\xEF\x0C\x2B"
+			  "\xD3\x2B\xE9\xD4\x4C\xDE\x95\xCF"
+			  "\x04\x03\xD3\x2C\x7F\x82\xC8\xFA"
+			  "\x0F\xD8\x7A\x39\x7B\x01\x41\x9C"
+			  "\x78\xB6\xC9\xBF\xF9\x78\x57\x88"
+			  "\xB1\xA5\xE1\xE0\xD9\x16\xD4\xC8"
+			  "\xEE\xC4\xBE\x7B\x55\x59\x00\x48"
+			  "\x1B\xBC\x14\xFA\x2A\x9D\xC9\x1C"
+			  "\xFB\x28\x3F\x95\xDD\xB7\xD6\xCE"
+			  "\x3A\x7F\x09\x0C\x0E\x69\x30\x7D"
+			  "\xBC\x68\x9C\x91\x2A\x59\x57\x04"
+			  "\xED\x1A\x1E\x00\xB1\x85\x92\x04"
+			  "\x28\x8C\x0C\x3C\xC1\xD5\x12\xF7"
+			  "\x4C\x3E\xB0\xE7\x86\x62\x68\x91"
+			  "\xFC\xC4\xE2\xCE\xA6\xDC\x5E\x93"
+			  "\x5D\x8D\x8C\x68\xB3\xB2\xB9\x64"
+			  "\x16\xB8\xC8\x6F\xD8\xEE\x21\xBD"
+			  "\xAC\x18\x0C\x7D\x0D\x05\xAB\xF1"
+			  "\xFA\xDD\xE2\x48\xDF\x4C\x02\x39"
+			  "\x69\xA1\x62\xBD\x49\x3A\x9D\x91"
+			  "\x30\x70\x56\xA4\x37\xDD\x7C\xC0"
+			  "\x0A\xA3\x30\x10\x26\x25\x41\x2C",
+		.ilen	= 496,
 		.result	= "\x56\xED\x84\x1B\x8F\x26\xBD\x31"
 			  "\xC8\x5F\xF6\x6A\x01\x98\x0C\xA3"
 			  "\x3A\xD1\x45\xDC\x73\x0A\x7E\x15"
@@ -2975,8 +3353,62 @@ static struct cipher_testvec tf_cbc_dec_tv_template[] = {
 			  "\x1E\x92\x29\xC0\x34\xCB\x62\xF9"
 			  "\x6D\x04\x9B\x0F\xA6\x3D\xD4\x48"
 			  "\xDF\x76\x0D\x81\x18\xAF\x23\xBA"
-			  "\x51\xE8\x5C\xF3\x8A\x21\x95\x2C",
-		.rlen	= 64,
+			  "\x51\xE8\x5C\xF3\x8A\x21\x95\x2C"
+			  "\xC3\x37\xCE\x65\xFC\x70\x07\x9E"
+			  "\x12\xA9\x40\xD7\x4B\xE2\x79\x10"
+			  "\x84\x1B\xB2\x26\xBD\x54\xEB\x5F"
+			  "\xF6\x8D\x01\x98\x2F\xC6\x3A\xD1"
+			  "\x68\xFF\x73\x0A\xA1\x15\xAC\x43"
+			  "\xDA\x4E\xE5\x7C\x13\x87\x1E\xB5"
+			  "\x29\xC0\x57\xEE\x62\xF9\x90\x04"
+			  "\x9B\x32\xC9\x3D\xD4\x6B\x02\x76"
+			  "\x0D\xA4\x18\xAF\x46\xDD\x51\xE8"
+			  "\x7F\x16\x8A\x21\xB8\x2C\xC3\x5A"
+			  "\xF1\x65\xFC\x93\x07\x9E\x35\xCC"
+			  "\x40\xD7\x6E\x05\x79\x10\xA7\x1B"
+			  "\xB2\x49\xE0\x54\xEB\x82\x19\x8D"
+			  "\x24\xBB\x2F\xC6\x5D\xF4\x68\xFF"
+			  "\x96\x0A\xA1\x38\xCF\x43\xDA\x71"
+			  "\x08\x7C\x13\xAA\x1E\xB5\x4C\xE3"
+			  "\x57\xEE\x85\x1C\x90\x27\xBE\x32"
+			  "\xC9\x60\xF7\x6B\x02\x99\x0D\xA4"
+			  "\x3B\xD2\x46\xDD\x74\x0B\x7F\x16"
+			  "\xAD\x21\xB8\x4F\xE6\x5A\xF1\x88"
+			  "\x1F\x93\x2A\xC1\x35\xCC\x63\xFA"
+			  "\x6E\x05\x9C\x10\xA7\x3E\xD5\x49"
+			  "\xE0\x77\x0E\x82\x19\xB0\x24\xBB"
+			  "\x52\xE9\x5D\xF4\x8B\x22\x96\x2D"
+			  "\xC4\x38\xCF\x66\xFD\x71\x08\x9F"
+			  "\x13\xAA\x41\xD8\x4C\xE3\x7A\x11"
+			  "\x85\x1C\xB3\x27\xBE\x55\xEC\x60"
+			  "\xF7\x8E\x02\x99\x30\xC7\x3B\xD2"
+			  "\x69\x00\x74\x0B\xA2\x16\xAD\x44"
+			  "\xDB\x4F\xE6\x7D\x14\x88\x1F\xB6"
+			  "\x2A\xC1\x58\xEF\x63\xFA\x91\x05"
+			  "\x9C\x33\xCA\x3E\xD5\x6C\x03\x77"
+			  "\x0E\xA5\x19\xB0\x47\xDE\x52\xE9"
+			  "\x80\x17\x8B\x22\xB9\x2D\xC4\x5B"
+			  "\xF2\x66\xFD\x94\x08\x9F\x36\xCD"
+			  "\x41\xD8\x6F\x06\x7A\x11\xA8\x1C"
+			  "\xB3\x4A\xE1\x55\xEC\x83\x1A\x8E"
+			  "\x25\xBC\x30\xC7\x5E\xF5\x69\x00"
+			  "\x97\x0B\xA2\x39\xD0\x44\xDB\x72"
+			  "\x09\x7D\x14\xAB\x1F\xB6\x4D\xE4"
+			  "\x58\xEF\x86\x1D\x91\x28\xBF\x33"
+			  "\xCA\x61\xF8\x6C\x03\x9A\x0E\xA5"
+			  "\x3C\xD3\x47\xDE\x75\x0C\x80\x17"
+			  "\xAE\x22\xB9\x50\xE7\x5B\xF2\x89"
+			  "\x20\x94\x2B\xC2\x36\xCD\x64\xFB"
+			  "\x6F\x06\x9D\x11\xA8\x3F\xD6\x4A"
+			  "\xE1\x78\x0F\x83\x1A\xB1\x25\xBC"
+			  "\x53\xEA\x5E\xF5\x8C\x00\x97\x2E"
+			  "\xC5\x39\xD0\x67\xFE\x72\x09\xA0"
+			  "\x14\xAB\x42\xD9\x4D\xE4\x7B\x12"
+			  "\x86\x1D\xB4\x28\xBF\x56\xED\x61"
+			  "\xF8\x8F\x03\x9A\x31\xC8\x3C\xD3"
+			  "\x6A\x01\x75\x0C\xA3\x17\xAE\x45"
+			  "\xDC\x50\xE7\x7E\x15\x89\x20\xB7",
+		.rlen	= 496,
 	},
 };
 
@@ -2996,8 +3428,62 @@ static struct cipher_testvec tf_ctr_enc_tv_template[] = {
 			  "\x1E\x92\x29\xC0\x34\xCB\x62\xF9"
 			  "\x6D\x04\x9B\x0F\xA6\x3D\xD4\x48"
 			  "\xDF\x76\x0D\x81\x18\xAF\x23\xBA"
-			  "\x51\xE8\x5C\xF3\x8A\x21\x95\x2C",
-		.ilen	= 64,
+			  "\x51\xE8\x5C\xF3\x8A\x21\x95\x2C"
+			  "\xC3\x37\xCE\x65\xFC\x70\x07\x9E"
+			  "\x12\xA9\x40\xD7\x4B\xE2\x79\x10"
+			  "\x84\x1B\xB2\x26\xBD\x54\xEB\x5F"
+			  "\xF6\x8D\x01\x98\x2F\xC6\x3A\xD1"
+			  "\x68\xFF\x73\x0A\xA1\x15\xAC\x43"
+			  "\xDA\x4E\xE5\x7C\x13\x87\x1E\xB5"
+			  "\x29\xC0\x57\xEE\x62\xF9\x90\x04"
+			  "\x9B\x32\xC9\x3D\xD4\x6B\x02\x76"
+			  "\x0D\xA4\x18\xAF\x46\xDD\x51\xE8"
+			  "\x7F\x16\x8A\x21\xB8\x2C\xC3\x5A"
+			  "\xF1\x65\xFC\x93\x07\x9E\x35\xCC"
+			  "\x40\xD7\x6E\x05\x79\x10\xA7\x1B"
+			  "\xB2\x49\xE0\x54\xEB\x82\x19\x8D"
+			  "\x24\xBB\x2F\xC6\x5D\xF4\x68\xFF"
+			  "\x96\x0A\xA1\x38\xCF\x43\xDA\x71"
+			  "\x08\x7C\x13\xAA\x1E\xB5\x4C\xE3"
+			  "\x57\xEE\x85\x1C\x90\x27\xBE\x32"
+			  "\xC9\x60\xF7\x6B\x02\x99\x0D\xA4"
+			  "\x3B\xD2\x46\xDD\x74\x0B\x7F\x16"
+			  "\xAD\x21\xB8\x4F\xE6\x5A\xF1\x88"
+			  "\x1F\x93\x2A\xC1\x35\xCC\x63\xFA"
+			  "\x6E\x05\x9C\x10\xA7\x3E\xD5\x49"
+			  "\xE0\x77\x0E\x82\x19\xB0\x24\xBB"
+			  "\x52\xE9\x5D\xF4\x8B\x22\x96\x2D"
+			  "\xC4\x38\xCF\x66\xFD\x71\x08\x9F"
+			  "\x13\xAA\x41\xD8\x4C\xE3\x7A\x11"
+			  "\x85\x1C\xB3\x27\xBE\x55\xEC\x60"
+			  "\xF7\x8E\x02\x99\x30\xC7\x3B\xD2"
+			  "\x69\x00\x74\x0B\xA2\x16\xAD\x44"
+			  "\xDB\x4F\xE6\x7D\x14\x88\x1F\xB6"
+			  "\x2A\xC1\x58\xEF\x63\xFA\x91\x05"
+			  "\x9C\x33\xCA\x3E\xD5\x6C\x03\x77"
+			  "\x0E\xA5\x19\xB0\x47\xDE\x52\xE9"
+			  "\x80\x17\x8B\x22\xB9\x2D\xC4\x5B"
+			  "\xF2\x66\xFD\x94\x08\x9F\x36\xCD"
+			  "\x41\xD8\x6F\x06\x7A\x11\xA8\x1C"
+			  "\xB3\x4A\xE1\x55\xEC\x83\x1A\x8E"
+			  "\x25\xBC\x30\xC7\x5E\xF5\x69\x00"
+			  "\x97\x0B\xA2\x39\xD0\x44\xDB\x72"
+			  "\x09\x7D\x14\xAB\x1F\xB6\x4D\xE4"
+			  "\x58\xEF\x86\x1D\x91\x28\xBF\x33"
+			  "\xCA\x61\xF8\x6C\x03\x9A\x0E\xA5"
+			  "\x3C\xD3\x47\xDE\x75\x0C\x80\x17"
+			  "\xAE\x22\xB9\x50\xE7\x5B\xF2\x89"
+			  "\x20\x94\x2B\xC2\x36\xCD\x64\xFB"
+			  "\x6F\x06\x9D\x11\xA8\x3F\xD6\x4A"
+			  "\xE1\x78\x0F\x83\x1A\xB1\x25\xBC"
+			  "\x53\xEA\x5E\xF5\x8C\x00\x97\x2E"
+			  "\xC5\x39\xD0\x67\xFE\x72\x09\xA0"
+			  "\x14\xAB\x42\xD9\x4D\xE4\x7B\x12"
+			  "\x86\x1D\xB4\x28\xBF\x56\xED\x61"
+			  "\xF8\x8F\x03\x9A\x31\xC8\x3C\xD3"
+			  "\x6A\x01\x75\x0C\xA3\x17\xAE\x45"
+			  "\xDC\x50\xE7\x7E\x15\x89\x20\xB7",
+		.ilen	= 496,
 		.result	= "\xDF\xDD\x69\xFA\xB0\x2E\xFD\xFE"
 			  "\x70\x9E\xC5\x4B\xC9\xD4\xA1\x30"
 			  "\x26\x9B\x89\xA1\xEE\x43\xE0\x52"
@@ -3005,8 +3491,62 @@ static struct cipher_testvec tf_ctr_enc_tv_template[] = {
 			  "\x9F\x8D\x40\x9F\x24\xFD\x92\xA0"
 			  "\xBC\x8F\x35\xDD\x67\x38\xD8\xAA"
 			  "\xCF\xF8\x48\xCA\xFB\xE4\x5C\x60"
-			  "\x01\x41\x21\x12\x38\xAB\x52\x4F",
-		.rlen	= 64,
+			  "\x01\x41\x21\x12\x38\xAB\x52\x4F"
+			  "\xA8\x57\x20\xE0\x21\x6A\x17\x0D"
+			  "\x0E\xF9\x8E\x49\x42\x00\x3C\x94"
+			  "\x14\xC0\xD0\x8D\x8A\x98\xEB\x29"
+			  "\xEC\xAE\x96\x44\xC0\x3C\x48\xDC"
+			  "\x29\x35\x25\x2F\xE7\x11\x6C\x68"
+			  "\xC8\x67\x0A\x2F\xF4\x07\xBE\xF9"
+			  "\x2C\x31\x87\x40\xAB\xB2\xB6\xFA"
+			  "\xD2\xC9\x6D\x5C\x50\xE9\xE6\x7E"
+			  "\xE3\x0A\xD2\xD5\x6D\x8D\x64\x9E"
+			  "\x70\xCE\x03\x76\xDD\xE0\xF0\x8C"
+			  "\x84\x86\x8B\x6A\xFE\xC7\xF9\x69"
+			  "\x2E\xFE\xFC\xC2\xC4\x1A\x55\x58"
+			  "\xB3\xBE\xE2\x7E\xED\x39\x42\x6C"
+			  "\xB4\x42\x97\x9A\xEC\xE1\x0A\x06"
+			  "\x02\xC5\x03\x9D\xC4\x48\x15\x66"
+			  "\x35\x6A\xC2\xC9\xA2\x26\x30\xBB"
+			  "\xDB\x2D\xC8\x08\x2B\xA0\x29\x1A"
+			  "\x23\x61\x48\xEA\x80\x04\x27\xAA"
+			  "\x69\x49\xE8\xE8\x4A\x83\x6B\x5A"
+			  "\xCA\x7C\xD3\xB1\xB5\x0B\xCC\x23"
+			  "\x74\x1F\xA9\x87\xCD\xED\xC0\x2D"
+			  "\xBF\xEB\xCF\x16\x2D\x2A\x2E\x1D"
+			  "\x96\xBA\x36\x11\x45\x41\xDA\xCE"
+			  "\xA4\x48\x80\x8B\x06\xF4\x98\x89"
+			  "\x8B\x23\x08\x53\xF4\xD4\x5A\x24"
+			  "\x8B\xF8\x43\x73\xD1\xEE\xC4\xB0"
+			  "\xF8\xFE\x09\x0C\x75\x05\x38\x0B"
+			  "\x7C\x81\xDE\x9D\xE4\x61\x37\x63"
+			  "\x63\xAD\x12\xD2\x04\xB9\xCE\x45"
+			  "\x5A\x1A\x6E\xB3\x78\x2A\xA4\x74"
+			  "\x86\xD0\xE3\xFF\xDA\x38\x9C\xB5"
+			  "\xB8\xB1\xDB\x38\x2F\xC5\x6A\xB4"
+			  "\xEB\x6E\x96\xE8\x43\x80\xB5\x51"
+			  "\x61\x2D\x48\xAA\x07\x65\x11\x8C"
+			  "\x48\xE3\x90\x7E\x78\x3A\xEC\x97"
+			  "\x05\x3D\x84\xE7\x90\x2B\xAA\xBD"
+			  "\x83\x29\x0E\x1A\x81\x73\x7B\xE0"
+			  "\x7A\x01\x4A\x37\x3B\x77\x7F\x8D"
+			  "\x49\xA4\x2F\x6E\xBE\x68\x99\x08"
+			  "\x99\xAA\x4C\x12\x04\xAE\x1F\x77"
+			  "\x35\x88\xF1\x65\x06\x0A\x0B\x4D"
+			  "\x47\xF9\x50\x38\x5D\x71\xF9\x6E"
+			  "\xDE\xEC\x61\x35\x2C\x4C\x96\x50"
+			  "\xE8\x28\x93\x9C\x7E\x01\xC6\x04"
+			  "\xB2\xD6\xBC\x6C\x17\xEB\xC1\x7D"
+			  "\x11\xE9\x43\x83\x76\xAA\x53\x37"
+			  "\x0C\x1D\x39\x89\x53\x72\x09\x7E"
+			  "\xD9\x85\x16\x04\xA5\x2C\x05\x6F"
+			  "\x17\x0C\x6E\x66\xAA\x84\xA7\xD9"
+			  "\xE2\xD9\xC4\xEB\x43\x3E\xB1\x8D"
+			  "\x7C\x36\xC7\x71\x70\x9C\x10\xD8"
+			  "\xE8\x47\x2A\x4D\xFD\xA1\xBC\xE3"
+			  "\xB9\x32\xE2\xC1\x82\xAC\xFE\xCC"
+			  "\xC5\xC9\x7F\x9E\xCF\x33\x7A\xDF",
+		.rlen	= 496,
 	}, { /* Generated with Crypto++ */
 		.key	= "\x85\x62\x3F\x1C\xF9\xD6\x1C\xF9"
 			  "\xD6\xB3\x90\x6D\x4A\x90\x6D\x4A"
@@ -3023,8 +3563,62 @@ static struct cipher_testvec tf_ctr_enc_tv_template[] = {
 			  "\x6D\x04\x9B\x0F\xA6\x3D\xD4\x48"
 			  "\xDF\x76\x0D\x81\x18\xAF\x23\xBA"
 			  "\x51\xE8\x5C\xF3\x8A\x21\x95\x2C"
-			  "\xC3\x37\xCE",
-		.ilen	= 67,
+			  "\xC3\x37\xCE\x65\xFC\x70\x07\x9E"
+			  "\x12\xA9\x40\xD7\x4B\xE2\x79\x10"
+			  "\x84\x1B\xB2\x26\xBD\x54\xEB\x5F"
+			  "\xF6\x8D\x01\x98\x2F\xC6\x3A\xD1"
+			  "\x68\xFF\x73\x0A\xA1\x15\xAC\x43"
+			  "\xDA\x4E\xE5\x7C\x13\x87\x1E\xB5"
+			  "\x29\xC0\x57\xEE\x62\xF9\x90\x04"
+			  "\x9B\x32\xC9\x3D\xD4\x6B\x02\x76"
+			  "\x0D\xA4\x18\xAF\x46\xDD\x51\xE8"
+			  "\x7F\x16\x8A\x21\xB8\x2C\xC3\x5A"
+			  "\xF1\x65\xFC\x93\x07\x9E\x35\xCC"
+			  "\x40\xD7\x6E\x05\x79\x10\xA7\x1B"
+			  "\xB2\x49\xE0\x54\xEB\x82\x19\x8D"
+			  "\x24\xBB\x2F\xC6\x5D\xF4\x68\xFF"
+			  "\x96\x0A\xA1\x38\xCF\x43\xDA\x71"
+			  "\x08\x7C\x13\xAA\x1E\xB5\x4C\xE3"
+			  "\x57\xEE\x85\x1C\x90\x27\xBE\x32"
+			  "\xC9\x60\xF7\x6B\x02\x99\x0D\xA4"
+			  "\x3B\xD2\x46\xDD\x74\x0B\x7F\x16"
+			  "\xAD\x21\xB8\x4F\xE6\x5A\xF1\x88"
+			  "\x1F\x93\x2A\xC1\x35\xCC\x63\xFA"
+			  "\x6E\x05\x9C\x10\xA7\x3E\xD5\x49"
+			  "\xE0\x77\x0E\x82\x19\xB0\x24\xBB"
+			  "\x52\xE9\x5D\xF4\x8B\x22\x96\x2D"
+			  "\xC4\x38\xCF\x66\xFD\x71\x08\x9F"
+			  "\x13\xAA\x41\xD8\x4C\xE3\x7A\x11"
+			  "\x85\x1C\xB3\x27\xBE\x55\xEC\x60"
+			  "\xF7\x8E\x02\x99\x30\xC7\x3B\xD2"
+			  "\x69\x00\x74\x0B\xA2\x16\xAD\x44"
+			  "\xDB\x4F\xE6\x7D\x14\x88\x1F\xB6"
+			  "\x2A\xC1\x58\xEF\x63\xFA\x91\x05"
+			  "\x9C\x33\xCA\x3E\xD5\x6C\x03\x77"
+			  "\x0E\xA5\x19\xB0\x47\xDE\x52\xE9"
+			  "\x80\x17\x8B\x22\xB9\x2D\xC4\x5B"
+			  "\xF2\x66\xFD\x94\x08\x9F\x36\xCD"
+			  "\x41\xD8\x6F\x06\x7A\x11\xA8\x1C"
+			  "\xB3\x4A\xE1\x55\xEC\x83\x1A\x8E"
+			  "\x25\xBC\x30\xC7\x5E\xF5\x69\x00"
+			  "\x97\x0B\xA2\x39\xD0\x44\xDB\x72"
+			  "\x09\x7D\x14\xAB\x1F\xB6\x4D\xE4"
+			  "\x58\xEF\x86\x1D\x91\x28\xBF\x33"
+			  "\xCA\x61\xF8\x6C\x03\x9A\x0E\xA5"
+			  "\x3C\xD3\x47\xDE\x75\x0C\x80\x17"
+			  "\xAE\x22\xB9\x50\xE7\x5B\xF2\x89"
+			  "\x20\x94\x2B\xC2\x36\xCD\x64\xFB"
+			  "\x6F\x06\x9D\x11\xA8\x3F\xD6\x4A"
+			  "\xE1\x78\x0F\x83\x1A\xB1\x25\xBC"
+			  "\x53\xEA\x5E\xF5\x8C\x00\x97\x2E"
+			  "\xC5\x39\xD0\x67\xFE\x72\x09\xA0"
+			  "\x14\xAB\x42\xD9\x4D\xE4\x7B\x12"
+			  "\x86\x1D\xB4\x28\xBF\x56\xED\x61"
+			  "\xF8\x8F\x03\x9A\x31\xC8\x3C\xD3"
+			  "\x6A\x01\x75\x0C\xA3\x17\xAE\x45"
+			  "\xDC\x50\xE7\x7E\x15\x89\x20\xB7"
+			  "\x2B\xC2\x59",
+		.ilen	= 499,
 		.result	= "\xDF\xDD\x69\xFA\xB0\x2E\xFD\xFE"
 			  "\x70\x9E\xC5\x4B\xC9\xD4\xA1\x30"
 			  "\x26\x9B\x89\xA1\xEE\x43\xE0\x52"
@@ -3033,8 +3627,62 @@ static struct cipher_testvec tf_ctr_enc_tv_template[] = {
 			  "\xBC\x8F\x35\xDD\x67\x38\xD8\xAA"
 			  "\xCF\xF8\x48\xCA\xFB\xE4\x5C\x60"
 			  "\x01\x41\x21\x12\x38\xAB\x52\x4F"
-			  "\xA8\x57\x20",
-		.rlen	= 67,
+			  "\xA8\x57\x20\xE0\x21\x6A\x17\x0D"
+			  "\x0E\xF9\x8E\x49\x42\x00\x3C\x94"
+			  "\x14\xC0\xD0\x8D\x8A\x98\xEB\x29"
+			  "\xEC\xAE\x96\x44\xC0\x3C\x48\xDC"
+			  "\x29\x35\x25\x2F\xE7\x11\x6C\x68"
+			  "\xC8\x67\x0A\x2F\xF4\x07\xBE\xF9"
+			  "\x2C\x31\x87\x40\xAB\xB2\xB6\xFA"
+			  "\xD2\xC9\x6D\x5C\x50\xE9\xE6\x7E"
+			  "\xE3\x0A\xD2\xD5\x6D\x8D\x64\x9E"
+			  "\x70\xCE\x03\x76\xDD\xE0\xF0\x8C"
+			  "\x84\x86\x8B\x6A\xFE\xC7\xF9\x69"
+			  "\x2E\xFE\xFC\xC2\xC4\x1A\x55\x58"
+			  "\xB3\xBE\xE2\x7E\xED\x39\x42\x6C"
+			  "\xB4\x42\x97\x9A\xEC\xE1\x0A\x06"
+			  "\x02\xC5\x03\x9D\xC4\x48\x15\x66"
+			  "\x35\x6A\xC2\xC9\xA2\x26\x30\xBB"
+			  "\xDB\x2D\xC8\x08\x2B\xA0\x29\x1A"
+			  "\x23\x61\x48\xEA\x80\x04\x27\xAA"
+			  "\x69\x49\xE8\xE8\x4A\x83\x6B\x5A"
+			  "\xCA\x7C\xD3\xB1\xB5\x0B\xCC\x23"
+			  "\x74\x1F\xA9\x87\xCD\xED\xC0\x2D"
+			  "\xBF\xEB\xCF\x16\x2D\x2A\x2E\x1D"
+			  "\x96\xBA\x36\x11\x45\x41\xDA\xCE"
+			  "\xA4\x48\x80\x8B\x06\xF4\x98\x89"
+			  "\x8B\x23\x08\x53\xF4\xD4\x5A\x24"
+			  "\x8B\xF8\x43\x73\xD1\xEE\xC4\xB0"
+			  "\xF8\xFE\x09\x0C\x75\x05\x38\x0B"
+			  "\x7C\x81\xDE\x9D\xE4\x61\x37\x63"
+			  "\x63\xAD\x12\xD2\x04\xB9\xCE\x45"
+			  "\x5A\x1A\x6E\xB3\x78\x2A\xA4\x74"
+			  "\x86\xD0\xE3\xFF\xDA\x38\x9C\xB5"
+			  "\xB8\xB1\xDB\x38\x2F\xC5\x6A\xB4"
+			  "\xEB\x6E\x96\xE8\x43\x80\xB5\x51"
+			  "\x61\x2D\x48\xAA\x07\x65\x11\x8C"
+			  "\x48\xE3\x90\x7E\x78\x3A\xEC\x97"
+			  "\x05\x3D\x84\xE7\x90\x2B\xAA\xBD"
+			  "\x83\x29\x0E\x1A\x81\x73\x7B\xE0"
+			  "\x7A\x01\x4A\x37\x3B\x77\x7F\x8D"
+			  "\x49\xA4\x2F\x6E\xBE\x68\x99\x08"
+			  "\x99\xAA\x4C\x12\x04\xAE\x1F\x77"
+			  "\x35\x88\xF1\x65\x06\x0A\x0B\x4D"
+			  "\x47\xF9\x50\x38\x5D\x71\xF9\x6E"
+			  "\xDE\xEC\x61\x35\x2C\x4C\x96\x50"
+			  "\xE8\x28\x93\x9C\x7E\x01\xC6\x04"
+			  "\xB2\xD6\xBC\x6C\x17\xEB\xC1\x7D"
+			  "\x11\xE9\x43\x83\x76\xAA\x53\x37"
+			  "\x0C\x1D\x39\x89\x53\x72\x09\x7E"
+			  "\xD9\x85\x16\x04\xA5\x2C\x05\x6F"
+			  "\x17\x0C\x6E\x66\xAA\x84\xA7\xD9"
+			  "\xE2\xD9\xC4\xEB\x43\x3E\xB1\x8D"
+			  "\x7C\x36\xC7\x71\x70\x9C\x10\xD8"
+			  "\xE8\x47\x2A\x4D\xFD\xA1\xBC\xE3"
+			  "\xB9\x32\xE2\xC1\x82\xAC\xFE\xCC"
+			  "\xC5\xC9\x7F\x9E\xCF\x33\x7A\xDF"
+			  "\x6C\x82\x9D",
+		.rlen	= 499,
 	},
 };
 
@@ -3054,8 +3702,62 @@ static struct cipher_testvec tf_ctr_dec_tv_template[] = {
 			  "\x9F\x8D\x40\x9F\x24\xFD\x92\xA0"
 			  "\xBC\x8F\x35\xDD\x67\x38\xD8\xAA"
 			  "\xCF\xF8\x48\xCA\xFB\xE4\x5C\x60"
-			  "\x01\x41\x21\x12\x38\xAB\x52\x4F",
-		.ilen	= 64,
+			  "\x01\x41\x21\x12\x38\xAB\x52\x4F"
+			  "\xA8\x57\x20\xE0\x21\x6A\x17\x0D"
+			  "\x0E\xF9\x8E\x49\x42\x00\x3C\x94"
+			  "\x14\xC0\xD0\x8D\x8A\x98\xEB\x29"
+			  "\xEC\xAE\x96\x44\xC0\x3C\x48\xDC"
+			  "\x29\x35\x25\x2F\xE7\x11\x6C\x68"
+			  "\xC8\x67\x0A\x2F\xF4\x07\xBE\xF9"
+			  "\x2C\x31\x87\x40\xAB\xB2\xB6\xFA"
+			  "\xD2\xC9\x6D\x5C\x50\xE9\xE6\x7E"
+			  "\xE3\x0A\xD2\xD5\x6D\x8D\x64\x9E"
+			  "\x70\xCE\x03\x76\xDD\xE0\xF0\x8C"
+			  "\x84\x86\x8B\x6A\xFE\xC7\xF9\x69"
+			  "\x2E\xFE\xFC\xC2\xC4\x1A\x55\x58"
+			  "\xB3\xBE\xE2\x7E\xED\x39\x42\x6C"
+			  "\xB4\x42\x97\x9A\xEC\xE1\x0A\x06"
+			  "\x02\xC5\x03\x9D\xC4\x48\x15\x66"
+			  "\x35\x6A\xC2\xC9\xA2\x26\x30\xBB"
+			  "\xDB\x2D\xC8\x08\x2B\xA0\x29\x1A"
+			  "\x23\x61\x48\xEA\x80\x04\x27\xAA"
+			  "\x69\x49\xE8\xE8\x4A\x83\x6B\x5A"
+			  "\xCA\x7C\xD3\xB1\xB5\x0B\xCC\x23"
+			  "\x74\x1F\xA9\x87\xCD\xED\xC0\x2D"
+			  "\xBF\xEB\xCF\x16\x2D\x2A\x2E\x1D"
+			  "\x96\xBA\x36\x11\x45\x41\xDA\xCE"
+			  "\xA4\x48\x80\x8B\x06\xF4\x98\x89"
+			  "\x8B\x23\x08\x53\xF4\xD4\x5A\x24"
+			  "\x8B\xF8\x43\x73\xD1\xEE\xC4\xB0"
+			  "\xF8\xFE\x09\x0C\x75\x05\x38\x0B"
+			  "\x7C\x81\xDE\x9D\xE4\x61\x37\x63"
+			  "\x63\xAD\x12\xD2\x04\xB9\xCE\x45"
+			  "\x5A\x1A\x6E\xB3\x78\x2A\xA4\x74"
+			  "\x86\xD0\xE3\xFF\xDA\x38\x9C\xB5"
+			  "\xB8\xB1\xDB\x38\x2F\xC5\x6A\xB4"
+			  "\xEB\x6E\x96\xE8\x43\x80\xB5\x51"
+			  "\x61\x2D\x48\xAA\x07\x65\x11\x8C"
+			  "\x48\xE3\x90\x7E\x78\x3A\xEC\x97"
+			  "\x05\x3D\x84\xE7\x90\x2B\xAA\xBD"
+			  "\x83\x29\x0E\x1A\x81\x73\x7B\xE0"
+			  "\x7A\x01\x4A\x37\x3B\x77\x7F\x8D"
+			  "\x49\xA4\x2F\x6E\xBE\x68\x99\x08"
+			  "\x99\xAA\x4C\x12\x04\xAE\x1F\x77"
+			  "\x35\x88\xF1\x65\x06\x0A\x0B\x4D"
+			  "\x47\xF9\x50\x38\x5D\x71\xF9\x6E"
+			  "\xDE\xEC\x61\x35\x2C\x4C\x96\x50"
+			  "\xE8\x28\x93\x9C\x7E\x01\xC6\x04"
+			  "\xB2\xD6\xBC\x6C\x17\xEB\xC1\x7D"
+			  "\x11\xE9\x43\x83\x76\xAA\x53\x37"
+			  "\x0C\x1D\x39\x89\x53\x72\x09\x7E"
+			  "\xD9\x85\x16\x04\xA5\x2C\x05\x6F"
+			  "\x17\x0C\x6E\x66\xAA\x84\xA7\xD9"
+			  "\xE2\xD9\xC4\xEB\x43\x3E\xB1\x8D"
+			  "\x7C\x36\xC7\x71\x70\x9C\x10\xD8"
+			  "\xE8\x47\x2A\x4D\xFD\xA1\xBC\xE3"
+			  "\xB9\x32\xE2\xC1\x82\xAC\xFE\xCC"
+			  "\xC5\xC9\x7F\x9E\xCF\x33\x7A\xDF",
+		.ilen	= 496,
 		.result	= "\x56\xED\x84\x1B\x8F\x26\xBD\x31"
 			  "\xC8\x5F\xF6\x6A\x01\x98\x0C\xA3"
 			  "\x3A\xD1\x45\xDC\x73\x0A\x7E\x15"
@@ -3063,8 +3765,62 @@ static struct cipher_testvec tf_ctr_dec_tv_template[] = {
 			  "\x1E\x92\x29\xC0\x34\xCB\x62\xF9"
 			  "\x6D\x04\x9B\x0F\xA6\x3D\xD4\x48"
 			  "\xDF\x76\x0D\x81\x18\xAF\x23\xBA"
-			  "\x51\xE8\x5C\xF3\x8A\x21\x95\x2C",
-		.rlen	= 64,
+			  "\x51\xE8\x5C\xF3\x8A\x21\x95\x2C"
+			  "\xC3\x37\xCE\x65\xFC\x70\x07\x9E"
+			  "\x12\xA9\x40\xD7\x4B\xE2\x79\x10"
+			  "\x84\x1B\xB2\x26\xBD\x54\xEB\x5F"
+			  "\xF6\x8D\x01\x98\x2F\xC6\x3A\xD1"
+			  "\x68\xFF\x73\x0A\xA1\x15\xAC\x43"
+			  "\xDA\x4E\xE5\x7C\x13\x87\x1E\xB5"
+			  "\x29\xC0\x57\xEE\x62\xF9\x90\x04"
+			  "\x9B\x32\xC9\x3D\xD4\x6B\x02\x76"
+			  "\x0D\xA4\x18\xAF\x46\xDD\x51\xE8"
+			  "\x7F\x16\x8A\x21\xB8\x2C\xC3\x5A"
+			  "\xF1\x65\xFC\x93\x07\x9E\x35\xCC"
+			  "\x40\xD7\x6E\x05\x79\x10\xA7\x1B"
+			  "\xB2\x49\xE0\x54\xEB\x82\x19\x8D"
+			  "\x24\xBB\x2F\xC6\x5D\xF4\x68\xFF"
+			  "\x96\x0A\xA1\x38\xCF\x43\xDA\x71"
+			  "\x08\x7C\x13\xAA\x1E\xB5\x4C\xE3"
+			  "\x57\xEE\x85\x1C\x90\x27\xBE\x32"
+			  "\xC9\x60\xF7\x6B\x02\x99\x0D\xA4"
+			  "\x3B\xD2\x46\xDD\x74\x0B\x7F\x16"
+			  "\xAD\x21\xB8\x4F\xE6\x5A\xF1\x88"
+			  "\x1F\x93\x2A\xC1\x35\xCC\x63\xFA"
+			  "\x6E\x05\x9C\x10\xA7\x3E\xD5\x49"
+			  "\xE0\x77\x0E\x82\x19\xB0\x24\xBB"
+			  "\x52\xE9\x5D\xF4\x8B\x22\x96\x2D"
+			  "\xC4\x38\xCF\x66\xFD\x71\x08\x9F"
+			  "\x13\xAA\x41\xD8\x4C\xE3\x7A\x11"
+			  "\x85\x1C\xB3\x27\xBE\x55\xEC\x60"
+			  "\xF7\x8E\x02\x99\x30\xC7\x3B\xD2"
+			  "\x69\x00\x74\x0B\xA2\x16\xAD\x44"
+			  "\xDB\x4F\xE6\x7D\x14\x88\x1F\xB6"
+			  "\x2A\xC1\x58\xEF\x63\xFA\x91\x05"
+			  "\x9C\x33\xCA\x3E\xD5\x6C\x03\x77"
+			  "\x0E\xA5\x19\xB0\x47\xDE\x52\xE9"
+			  "\x80\x17\x8B\x22\xB9\x2D\xC4\x5B"
+			  "\xF2\x66\xFD\x94\x08\x9F\x36\xCD"
+			  "\x41\xD8\x6F\x06\x7A\x11\xA8\x1C"
+			  "\xB3\x4A\xE1\x55\xEC\x83\x1A\x8E"
+			  "\x25\xBC\x30\xC7\x5E\xF5\x69\x00"
+			  "\x97\x0B\xA2\x39\xD0\x44\xDB\x72"
+			  "\x09\x7D\x14\xAB\x1F\xB6\x4D\xE4"
+			  "\x58\xEF\x86\x1D\x91\x28\xBF\x33"
+			  "\xCA\x61\xF8\x6C\x03\x9A\x0E\xA5"
+			  "\x3C\xD3\x47\xDE\x75\x0C\x80\x17"
+			  "\xAE\x22\xB9\x50\xE7\x5B\xF2\x89"
+			  "\x20\x94\x2B\xC2\x36\xCD\x64\xFB"
+			  "\x6F\x06\x9D\x11\xA8\x3F\xD6\x4A"
+			  "\xE1\x78\x0F\x83\x1A\xB1\x25\xBC"
+			  "\x53\xEA\x5E\xF5\x8C\x00\x97\x2E"
+			  "\xC5\x39\xD0\x67\xFE\x72\x09\xA0"
+			  "\x14\xAB\x42\xD9\x4D\xE4\x7B\x12"
+			  "\x86\x1D\xB4\x28\xBF\x56\xED\x61"
+			  "\xF8\x8F\x03\x9A\x31\xC8\x3C\xD3"
+			  "\x6A\x01\x75\x0C\xA3\x17\xAE\x45"
+			  "\xDC\x50\xE7\x7E\x15\x89\x20\xB7",
+		.rlen	= 496,
 	}, { /* Generated with Crypto++ */
 		.key	= "\x85\x62\x3F\x1C\xF9\xD6\x1C\xF9"
 			  "\xD6\xB3\x90\x6D\x4A\x90\x6D\x4A"
@@ -3081,8 +3837,62 @@ static struct cipher_testvec tf_ctr_dec_tv_template[] = {
 			  "\xBC\x8F\x35\xDD\x67\x38\xD8\xAA"
 			  "\xCF\xF8\x48\xCA\xFB\xE4\x5C\x60"
 			  "\x01\x41\x21\x12\x38\xAB\x52\x4F"
-			  "\xA8\x57\x20",
-		.ilen	= 67,
+			  "\xA8\x57\x20\xE0\x21\x6A\x17\x0D"
+			  "\x0E\xF9\x8E\x49\x42\x00\x3C\x94"
+			  "\x14\xC0\xD0\x8D\x8A\x98\xEB\x29"
+			  "\xEC\xAE\x96\x44\xC0\x3C\x48\xDC"
+			  "\x29\x35\x25\x2F\xE7\x11\x6C\x68"
+			  "\xC8\x67\x0A\x2F\xF4\x07\xBE\xF9"
+			  "\x2C\x31\x87\x40\xAB\xB2\xB6\xFA"
+			  "\xD2\xC9\x6D\x5C\x50\xE9\xE6\x7E"
+			  "\xE3\x0A\xD2\xD5\x6D\x8D\x64\x9E"
+			  "\x70\xCE\x03\x76\xDD\xE0\xF0\x8C"
+			  "\x84\x86\x8B\x6A\xFE\xC7\xF9\x69"
+			  "\x2E\xFE\xFC\xC2\xC4\x1A\x55\x58"
+			  "\xB3\xBE\xE2\x7E\xED\x39\x42\x6C"
+			  "\xB4\x42\x97\x9A\xEC\xE1\x0A\x06"
+			  "\x02\xC5\x03\x9D\xC4\x48\x15\x66"
+			  "\x35\x6A\xC2\xC9\xA2\x26\x30\xBB"
+			  "\xDB\x2D\xC8\x08\x2B\xA0\x29\x1A"
+			  "\x23\x61\x48\xEA\x80\x04\x27\xAA"
+			  "\x69\x49\xE8\xE8\x4A\x83\x6B\x5A"
+			  "\xCA\x7C\xD3\xB1\xB5\x0B\xCC\x23"
+			  "\x74\x1F\xA9\x87\xCD\xED\xC0\x2D"
+			  "\xBF\xEB\xCF\x16\x2D\x2A\x2E\x1D"
+			  "\x96\xBA\x36\x11\x45\x41\xDA\xCE"
+			  "\xA4\x48\x80\x8B\x06\xF4\x98\x89"
+			  "\x8B\x23\x08\x53\xF4\xD4\x5A\x24"
+			  "\x8B\xF8\x43\x73\xD1\xEE\xC4\xB0"
+			  "\xF8\xFE\x09\x0C\x75\x05\x38\x0B"
+			  "\x7C\x81\xDE\x9D\xE4\x61\x37\x63"
+			  "\x63\xAD\x12\xD2\x04\xB9\xCE\x45"
+			  "\x5A\x1A\x6E\xB3\x78\x2A\xA4\x74"
+			  "\x86\xD0\xE3\xFF\xDA\x38\x9C\xB5"
+			  "\xB8\xB1\xDB\x38\x2F\xC5\x6A\xB4"
+			  "\xEB\x6E\x96\xE8\x43\x80\xB5\x51"
+			  "\x61\x2D\x48\xAA\x07\x65\x11\x8C"
+			  "\x48\xE3\x90\x7E\x78\x3A\xEC\x97"
+			  "\x05\x3D\x84\xE7\x90\x2B\xAA\xBD"
+			  "\x83\x29\x0E\x1A\x81\x73\x7B\xE0"
+			  "\x7A\x01\x4A\x37\x3B\x77\x7F\x8D"
+			  "\x49\xA4\x2F\x6E\xBE\x68\x99\x08"
+			  "\x99\xAA\x4C\x12\x04\xAE\x1F\x77"
+			  "\x35\x88\xF1\x65\x06\x0A\x0B\x4D"
+			  "\x47\xF9\x50\x38\x5D\x71\xF9\x6E"
+			  "\xDE\xEC\x61\x35\x2C\x4C\x96\x50"
+			  "\xE8\x28\x93\x9C\x7E\x01\xC6\x04"
+			  "\xB2\xD6\xBC\x6C\x17\xEB\xC1\x7D"
+			  "\x11\xE9\x43\x83\x76\xAA\x53\x37"
+			  "\x0C\x1D\x39\x89\x53\x72\x09\x7E"
+			  "\xD9\x85\x16\x04\xA5\x2C\x05\x6F"
+			  "\x17\x0C\x6E\x66\xAA\x84\xA7\xD9"
+			  "\xE2\xD9\xC4\xEB\x43\x3E\xB1\x8D"
+			  "\x7C\x36\xC7\x71\x70\x9C\x10\xD8"
+			  "\xE8\x47\x2A\x4D\xFD\xA1\xBC\xE3"
+			  "\xB9\x32\xE2\xC1\x82\xAC\xFE\xCC"
+			  "\xC5\xC9\x7F\x9E\xCF\x33\x7A\xDF"
+			  "\x6C\x82\x9D",
+		.ilen	= 499,
 		.result	= "\x56\xED\x84\x1B\x8F\x26\xBD\x31"
 			  "\xC8\x5F\xF6\x6A\x01\x98\x0C\xA3"
 			  "\x3A\xD1\x45\xDC\x73\x0A\x7E\x15"
@@ -3091,8 +3901,62 @@ static struct cipher_testvec tf_ctr_dec_tv_template[] = {
 			  "\x6D\x04\x9B\x0F\xA6\x3D\xD4\x48"
 			  "\xDF\x76\x0D\x81\x18\xAF\x23\xBA"
 			  "\x51\xE8\x5C\xF3\x8A\x21\x95\x2C"
-			  "\xC3\x37\xCE",
-		.rlen	= 67,
+			  "\xC3\x37\xCE\x65\xFC\x70\x07\x9E"
+			  "\x12\xA9\x40\xD7\x4B\xE2\x79\x10"
+			  "\x84\x1B\xB2\x26\xBD\x54\xEB\x5F"
+			  "\xF6\x8D\x01\x98\x2F\xC6\x3A\xD1"
+			  "\x68\xFF\x73\x0A\xA1\x15\xAC\x43"
+			  "\xDA\x4E\xE5\x7C\x13\x87\x1E\xB5"
+			  "\x29\xC0\x57\xEE\x62\xF9\x90\x04"
+			  "\x9B\x32\xC9\x3D\xD4\x6B\x02\x76"
+			  "\x0D\xA4\x18\xAF\x46\xDD\x51\xE8"
+			  "\x7F\x16\x8A\x21\xB8\x2C\xC3\x5A"
+			  "\xF1\x65\xFC\x93\x07\x9E\x35\xCC"
+			  "\x40\xD7\x6E\x05\x79\x10\xA7\x1B"
+			  "\xB2\x49\xE0\x54\xEB\x82\x19\x8D"
+			  "\x24\xBB\x2F\xC6\x5D\xF4\x68\xFF"
+			  "\x96\x0A\xA1\x38\xCF\x43\xDA\x71"
+			  "\x08\x7C\x13\xAA\x1E\xB5\x4C\xE3"
+			  "\x57\xEE\x85\x1C\x90\x27\xBE\x32"
+			  "\xC9\x60\xF7\x6B\x02\x99\x0D\xA4"
+			  "\x3B\xD2\x46\xDD\x74\x0B\x7F\x16"
+			  "\xAD\x21\xB8\x4F\xE6\x5A\xF1\x88"
+			  "\x1F\x93\x2A\xC1\x35\xCC\x63\xFA"
+			  "\x6E\x05\x9C\x10\xA7\x3E\xD5\x49"
+			  "\xE0\x77\x0E\x82\x19\xB0\x24\xBB"
+			  "\x52\xE9\x5D\xF4\x8B\x22\x96\x2D"
+			  "\xC4\x38\xCF\x66\xFD\x71\x08\x9F"
+			  "\x13\xAA\x41\xD8\x4C\xE3\x7A\x11"
+			  "\x85\x1C\xB3\x27\xBE\x55\xEC\x60"
+			  "\xF7\x8E\x02\x99\x30\xC7\x3B\xD2"
+			  "\x69\x00\x74\x0B\xA2\x16\xAD\x44"
+			  "\xDB\x4F\xE6\x7D\x14\x88\x1F\xB6"
+			  "\x2A\xC1\x58\xEF\x63\xFA\x91\x05"
+			  "\x9C\x33\xCA\x3E\xD5\x6C\x03\x77"
+			  "\x0E\xA5\x19\xB0\x47\xDE\x52\xE9"
+			  "\x80\x17\x8B\x22\xB9\x2D\xC4\x5B"
+			  "\xF2\x66\xFD\x94\x08\x9F\x36\xCD"
+			  "\x41\xD8\x6F\x06\x7A\x11\xA8\x1C"
+			  "\xB3\x4A\xE1\x55\xEC\x83\x1A\x8E"
+			  "\x25\xBC\x30\xC7\x5E\xF5\x69\x00"
+			  "\x97\x0B\xA2\x39\xD0\x44\xDB\x72"
+			  "\x09\x7D\x14\xAB\x1F\xB6\x4D\xE4"
+			  "\x58\xEF\x86\x1D\x91\x28\xBF\x33"
+			  "\xCA\x61\xF8\x6C\x03\x9A\x0E\xA5"
+			  "\x3C\xD3\x47\xDE\x75\x0C\x80\x17"
+			  "\xAE\x22\xB9\x50\xE7\x5B\xF2\x89"
+			  "\x20\x94\x2B\xC2\x36\xCD\x64\xFB"
+			  "\x6F\x06\x9D\x11\xA8\x3F\xD6\x4A"
+			  "\xE1\x78\x0F\x83\x1A\xB1\x25\xBC"
+			  "\x53\xEA\x5E\xF5\x8C\x00\x97\x2E"
+			  "\xC5\x39\xD0\x67\xFE\x72\x09\xA0"
+			  "\x14\xAB\x42\xD9\x4D\xE4\x7B\x12"
+			  "\x86\x1D\xB4\x28\xBF\x56\xED\x61"
+			  "\xF8\x8F\x03\x9A\x31\xC8\x3C\xD3"
+			  "\x6A\x01\x75\x0C\xA3\x17\xAE\x45"
+			  "\xDC\x50\xE7\x7E\x15\x89\x20\xB7"
+			  "\x2B\xC2\x59",
+		.rlen	= 499,
 	},
 };
 
@@ -6111,6 +6975,9 @@ static struct cipher_testvec cast6_dec_tv_template[] = {
 #define AES_DEC_TEST_VECTORS 3
 #define AES_CBC_ENC_TEST_VECTORS 4
 #define AES_CBC_DEC_TEST_VECTORS 4
+#define HMAC_SHA1_AES_CBC_ENC_TEST_VECTORS 7
+#define HMAC_SHA256_AES_CBC_ENC_TEST_VECTORS 7
+#define HMAC_SHA512_AES_CBC_ENC_TEST_VECTORS 7
 #define AES_LRW_ENC_TEST_VECTORS 8
 #define AES_LRW_DEC_TEST_VECTORS 8
 #define AES_XTS_ENC_TEST_VECTORS 5
@@ -6368,6 +7235,837 @@ static struct cipher_testvec aes_cbc_dec_tv_template[] = {
 	},
 };
 
+static struct aead_testvec hmac_sha1_aes_cbc_enc_tv_template[] = {
+	{ /* RFC 3602 Case 1 */
+#ifdef __LITTLE_ENDIAN
+		.key    = "\x08\x00"		/* rta length */
+			  "\x01\x00"		/* rta type */
+#else
+		.key    = "\x00\x08"		/* rta length */
+			  "\x00\x01"		/* rta type */
+#endif
+			  "\x00\x00\x00\x10"	/* enc key length */
+			  "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00"
+			  "\x06\xa9\x21\x40\x36\xb8\xa1\x5b"
+			  "\x51\x2e\x03\xd5\x34\x12\x00\x06",
+		.klen   = 8 + 20 + 16,
+		.iv     = "\x3d\xaf\xba\x42\x9d\x9e\xb4\x30"
+			  "\xb4\x22\xda\x80\x2c\x9f\xac\x41",
+		.input  = "Single block msg",
+		.ilen   = 16,
+		.result = "\xe3\x53\x77\x9c\x10\x79\xae\xb8"
+			  "\x27\x08\x94\x2d\xbe\x77\x18\x1a"
+			  "\x1b\x13\xcb\xaf\x89\x5e\xe1\x2c"
+			  "\x13\xc5\x2e\xa3\xcc\xed\xdc\xb5"
+			  "\x03\x71\xa2\x06",
+		.rlen   = 16 + 20,
+	}, { /* RFC 3602 Case 2 */
+#ifdef __LITTLE_ENDIAN
+		.key    = "\x08\x00"		/* rta length */
+			  "\x01\x00"		/* rta type */
+#else
+		.key    = "\x00\x08"		/* rta length */
+			  "\x00\x01"		/* rta type */
+#endif
+			  "\x00\x00\x00\x10"	/* enc key length */
+			  "\x20\x21\x22\x23\x24\x25\x26\x27"
+			  "\x28\x29\x2a\x2b\x2c\x2d\x2e\x2f"
+			  "\x30\x31\x32\x33"
+			  "\xc2\x86\x69\x6d\x88\x7c\x9a\xa0"
+			  "\x61\x1b\xbb\x3e\x20\x25\xa4\x5a",
+		.klen   = 8 + 20 + 16,
+		.iv     = "\x56\x2e\x17\x99\x6d\x09\x3d\x28"
+			  "\xdd\xb3\xba\x69\x5a\x2e\x6f\x58",
+		.input  = "\x00\x01\x02\x03\x04\x05\x06\x07"
+			  "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"
+			  "\x10\x11\x12\x13\x14\x15\x16\x17"
+			  "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f",
+		.ilen   = 32,
+		.result = "\xd2\x96\xcd\x94\xc2\xcc\xcf\x8a"
+			  "\x3a\x86\x30\x28\xb5\xe1\xdc\x0a"
+			  "\x75\x86\x60\x2d\x25\x3c\xff\xf9"
+			  "\x1b\x82\x66\xbe\xa6\xd6\x1a\xb1"
+			  "\xad\x9b\x4c\x5c\x85\xe1\xda\xae"
+			  "\xee\x81\x4e\xd7\xdb\x74\xcf\x58"
+			  "\x65\x39\xf8\xde",
+		.rlen   = 32 + 20,
+	}, { /* RFC 3602 Case 3 */
+#ifdef __LITTLE_ENDIAN
+		.key    = "\x08\x00"		/* rta length */
+			  "\x01\x00"            /* rta type */
+#else
+		.key    = "\x00\x08"		/* rta length */
+			  "\x00\x01"		/* rta type */
+#endif
+			  "\x00\x00\x00\x10"	/* enc key length */
+			  "\x11\x22\x33\x44\x55\x66\x77\x88"
+			  "\x99\xaa\xbb\xcc\xdd\xee\xff\x11"
+			  "\x22\x33\x44\x55"
+			  "\x6c\x3e\xa0\x47\x76\x30\xce\x21"
+			  "\xa2\xce\x33\x4a\xa7\x46\xc2\xcd",
+		.klen   = 8 + 20 + 16,
+		.iv     = "\xc7\x82\xdc\x4c\x09\x8c\x66\xcb"
+			  "\xd9\xcd\x27\xd8\x25\x68\x2c\x81",
+		.input  = "This is a 48-byte message (exactly 3 AES blocks)",
+		.ilen   = 48,
+		.result = "\xd0\xa0\x2b\x38\x36\x45\x17\x53"
+			  "\xd4\x93\x66\x5d\x33\xf0\xe8\x86"
+			  "\x2d\xea\x54\xcd\xb2\x93\xab\xc7"
+			  "\x50\x69\x39\x27\x67\x72\xf8\xd5"
+			  "\x02\x1c\x19\x21\x6b\xad\x52\x5c"
+			  "\x85\x79\x69\x5d\x83\xba\x26\x84"
+			  "\xc2\xec\x0c\xf8\x7f\x05\xba\xca"
+			  "\xff\xee\x4c\xd0\x93\xe6\x36\x7f"
+			  "\x8d\x62\xf2\x1e",
+		.rlen   = 48 + 20,
+	}, { /* RFC 3602 Case 4 */
+#ifdef __LITTLE_ENDIAN
+		.key    = "\x08\x00"		/* rta length */
+			  "\x01\x00"		/* rta type */
+#else
+		.key    = "\x00\x08"		/* rta length */
+			  "\x00\x01"            /* rta type */
+#endif
+			  "\x00\x00\x00\x10"	/* enc key length */
+			  "\x11\x22\x33\x44\x55\x66\x77\x88"
+			  "\x99\xaa\xbb\xcc\xdd\xee\xff\x11"
+			  "\x22\x33\x44\x55"
+			  "\x56\xe4\x7a\x38\xc5\x59\x89\x74"
+			  "\xbc\x46\x90\x3d\xba\x29\x03\x49",
+		.klen   = 8 + 20 + 16,
+		.iv     = "\x8c\xe8\x2e\xef\xbe\xa0\xda\x3c"
+			  "\x44\x69\x9e\xd7\xdb\x51\xb7\xd9",
+		.input  = "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7"
+			  "\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
+			  "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7"
+			  "\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
+			  "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7"
+			  "\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
+			  "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7"
+			  "\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf",
+		.ilen   = 64,
+		.result = "\xc3\x0e\x32\xff\xed\xc0\x77\x4e"
+			  "\x6a\xff\x6a\xf0\x86\x9f\x71\xaa"
+			  "\x0f\x3a\xf0\x7a\x9a\x31\xa9\xc6"
+			  "\x84\xdb\x20\x7e\xb0\xef\x8e\x4e"
+			  "\x35\x90\x7a\xa6\x32\xc3\xff\xdf"
+			  "\x86\x8b\xb7\xb2\x9d\x3d\x46\xad"
+			  "\x83\xce\x9f\x9a\x10\x2e\xe9\x9d"
+			  "\x49\xa5\x3e\x87\xf4\xc3\xda\x55"
+			  "\x1c\x45\x57\xa9\x56\xcb\xa9\x2d"
+			  "\x18\xac\xf1\xc7\x5d\xd1\xcd\x0d"
+			  "\x1d\xbe\xc6\xe9",
+		.rlen   = 64 + 20,
+	}, { /* RFC 3602 Case 5 */
+#ifdef __LITTLE_ENDIAN
+		.key    = "\x08\x00"		/* rta length */
+			  "\x01\x00"            /* rta type */
+#else
+		.key    = "\x00\x08"		/* rta length */
+			  "\x00\x01"            /* rta type */
+#endif
+			  "\x00\x00\x00\x10"	/* enc key length */
+			  "\x11\x22\x33\x44\x55\x66\x77\x88"
+			  "\x99\xaa\xbb\xcc\xdd\xee\xff\x11"
+			  "\x22\x33\x44\x55"
+			  "\x90\xd3\x82\xb4\x10\xee\xba\x7a"
+			  "\xd9\x38\xc4\x6c\xec\x1a\x82\xbf",
+		.klen   = 8 + 20 + 16,
+		.iv     = "\xe9\x6e\x8c\x08\xab\x46\x57\x63"
+			  "\xfd\x09\x8d\x45\xdd\x3f\xf8\x93",
+		.assoc  = "\x00\x00\x43\x21\x00\x00\x00\x01",
+		.alen   = 8,
+		.input  = "\x08\x00\x0e\xbd\xa7\x0a\x00\x00"
+			  "\x8e\x9c\x08\x3d\xb9\x5b\x07\x00"
+			  "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"
+			  "\x10\x11\x12\x13\x14\x15\x16\x17"
+			  "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f"
+			  "\x20\x21\x22\x23\x24\x25\x26\x27"
+			  "\x28\x29\x2a\x2b\x2c\x2d\x2e\x2f"
+			  "\x30\x31\x32\x33\x34\x35\x36\x37"
+			  "\x01\x02\x03\x04\x05\x06\x07\x08"
+			  "\x09\x0a\x0b\x0c\x0d\x0e\x0e\x01",
+		.ilen   = 80,
+		.result = "\xf6\x63\xc2\x5d\x32\x5c\x18\xc6"
+			  "\xa9\x45\x3e\x19\x4e\x12\x08\x49"
+			  "\xa4\x87\x0b\x66\xcc\x6b\x99\x65"
+			  "\x33\x00\x13\xb4\x89\x8d\xc8\x56"
+			  "\xa4\x69\x9e\x52\x3a\x55\xdb\x08"
+			  "\x0b\x59\xec\x3a\x8e\x4b\x7e\x52"
+			  "\x77\x5b\x07\xd1\xdb\x34\xed\x9c"
+			  "\x53\x8a\xb5\x0c\x55\x1b\x87\x4a"
+			  "\xa2\x69\xad\xd0\x47\xad\x2d\x59"
+			  "\x13\xac\x19\xb7\xcf\xba\xd4\xa6"
+			  "\x58\xc6\x84\x75\xe4\xe9\x6b\x0c"
+			  "\xe1\xc5\x0b\x73\x4d\x82\x55\xa8"
+			  "\x85\xe1\x59\xf7",
+		.rlen   = 80 + 20,
+       }, { /* NIST SP800-38A F.2.3 CBC-AES192.Encrypt */
+#ifdef __LITTLE_ENDIAN
+		.key    = "\x08\x00"            /* rta length */
+			  "\x01\x00"		/* rta type */
+#else
+		.key    = "\x00\x08"		/* rta length */
+			  "\x00\x01"            /* rta type */
+#endif
+			  "\x00\x00\x00\x18"	/* enc key length */
+			  "\x11\x22\x33\x44\x55\x66\x77\x88"
+			  "\x99\xaa\xbb\xcc\xdd\xee\xff\x11"
+			  "\x22\x33\x44\x55"
+			  "\x8e\x73\xb0\xf7\xda\x0e\x64\x52"
+			  "\xc8\x10\xf3\x2b\x80\x90\x79\xe5"
+			  "\x62\xf8\xea\xd2\x52\x2c\x6b\x7b",
+		.klen   = 8 + 20 + 24,
+		.iv     = "\x00\x01\x02\x03\x04\x05\x06\x07"
+			  "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f",
+		.input  = "\x6b\xc1\xbe\xe2\x2e\x40\x9f\x96"
+			  "\xe9\x3d\x7e\x11\x73\x93\x17\x2a"
+			  "\xae\x2d\x8a\x57\x1e\x03\xac\x9c"
+			  "\x9e\xb7\x6f\xac\x45\xaf\x8e\x51"
+			  "\x30\xc8\x1c\x46\xa3\x5c\xe4\x11"
+			  "\xe5\xfb\xc1\x19\x1a\x0a\x52\xef"
+			  "\xf6\x9f\x24\x45\xdf\x4f\x9b\x17"
+			  "\xad\x2b\x41\x7b\xe6\x6c\x37\x10",
+		.ilen   = 64,
+		.result = "\x4f\x02\x1d\xb2\x43\xbc\x63\x3d"
+			  "\x71\x78\x18\x3a\x9f\xa0\x71\xe8"
+			  "\xb4\xd9\xad\xa9\xad\x7d\xed\xf4"
+			  "\xe5\xe7\x38\x76\x3f\x69\x14\x5a"
+			  "\x57\x1b\x24\x20\x12\xfb\x7a\xe0"
+			  "\x7f\xa9\xba\xac\x3d\xf1\x02\xe0"
+			  "\x08\xb0\xe2\x79\x88\x59\x88\x81"
+			  "\xd9\x20\xa9\xe6\x4f\x56\x15\xcd"
+			  "\x73\xe3\x19\x3f\x8b\xc9\xc6\xf4"
+			  "\x5a\xf1\x5b\xa8\x98\x07\xc5\x36"
+			  "\x47\x4c\xfc\x36",
+		.rlen   = 64 + 20,
+	}, { /* NIST SP800-38A F.2.5 CBC-AES256.Encrypt */
+#ifdef __LITTLE_ENDIAN
+		.key    = "\x08\x00"		/* rta length */
+			  "\x01\x00"		/* rta type */
+#else
+		.key    = "\x00\x08"		/* rta length */
+			  "\x00\x01"            /* rta type */
+#endif
+			  "\x00\x00\x00\x20"	/* enc key length */
+			  "\x11\x22\x33\x44\x55\x66\x77\x88"
+			  "\x99\xaa\xbb\xcc\xdd\xee\xff\x11"
+			  "\x22\x33\x44\x55"
+			  "\x60\x3d\xeb\x10\x15\xca\x71\xbe"
+			  "\x2b\x73\xae\xf0\x85\x7d\x77\x81"
+			  "\x1f\x35\x2c\x07\x3b\x61\x08\xd7"
+			  "\x2d\x98\x10\xa3\x09\x14\xdf\xf4",
+		.klen   = 8 + 20 + 32,
+		.iv     = "\x00\x01\x02\x03\x04\x05\x06\x07"
+			  "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f",
+		.input  = "\x6b\xc1\xbe\xe2\x2e\x40\x9f\x96"
+			  "\xe9\x3d\x7e\x11\x73\x93\x17\x2a"
+			  "\xae\x2d\x8a\x57\x1e\x03\xac\x9c"
+			  "\x9e\xb7\x6f\xac\x45\xaf\x8e\x51"
+			  "\x30\xc8\x1c\x46\xa3\x5c\xe4\x11"
+			  "\xe5\xfb\xc1\x19\x1a\x0a\x52\xef"
+			  "\xf6\x9f\x24\x45\xdf\x4f\x9b\x17"
+			  "\xad\x2b\x41\x7b\xe6\x6c\x37\x10",
+		.ilen   = 64,
+		.result = "\xf5\x8c\x4c\x04\xd6\xe5\xf1\xba"
+			  "\x77\x9e\xab\xfb\x5f\x7b\xfb\xd6"
+			  "\x9c\xfc\x4e\x96\x7e\xdb\x80\x8d"
+			  "\x67\x9f\x77\x7b\xc6\x70\x2c\x7d"
+			  "\x39\xf2\x33\x69\xa9\xd9\xba\xcf"
+			  "\xa5\x30\xe2\x63\x04\x23\x14\x61"
+			  "\xb2\xeb\x05\xe2\xc3\x9b\xe9\xfc"
+			  "\xda\x6c\x19\x07\x8c\x6a\x9d\x1b"
+			  "\xa3\xe8\x9b\x17\xe3\xf4\x7f\xde"
+			  "\x1b\x9f\xc6\x81\x26\x43\x4a\x87"
+			  "\x51\xee\xd6\x4e",
+		.rlen   = 64 + 20,
+	},
+};
+
+static struct aead_testvec hmac_sha256_aes_cbc_enc_tv_template[] = {
+	{ /* RFC 3602 Case 1 */
+#ifdef __LITTLE_ENDIAN
+		.key    = "\x08\x00"		/* rta length */
+			  "\x01\x00"		/* rta type */
+#else
+		.key    = "\x00\x08"		/* rta length */
+			  "\x00\x01"		/* rta type */
+#endif
+			  "\x00\x00\x00\x10"	/* enc key length */
+			  "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x06\xa9\x21\x40\x36\xb8\xa1\x5b"
+			  "\x51\x2e\x03\xd5\x34\x12\x00\x06",
+		.klen   = 8 + 32 + 16,
+		.iv     = "\x3d\xaf\xba\x42\x9d\x9e\xb4\x30"
+			  "\xb4\x22\xda\x80\x2c\x9f\xac\x41",
+		.input  = "Single block msg",
+		.ilen   = 16,
+		.result = "\xe3\x53\x77\x9c\x10\x79\xae\xb8"
+			  "\x27\x08\x94\x2d\xbe\x77\x18\x1a"
+			  "\xcc\xde\x2d\x6a\xae\xf1\x0b\xcc"
+			  "\x38\x06\x38\x51\xb4\xb8\xf3\x5b"
+			  "\x5c\x34\xa6\xa3\x6e\x0b\x05\xe5"
+			  "\x6a\x6d\x44\xaa\x26\xa8\x44\xa5",
+		.rlen   = 16 + 32,
+	}, { /* RFC 3602 Case 2 */
+#ifdef __LITTLE_ENDIAN
+		.key    = "\x08\x00"		/* rta length */
+			  "\x01\x00"		/* rta type */
+#else
+		.key    = "\x00\x08"		/* rta length */
+			  "\x00\x01"		/* rta type */
+#endif
+			  "\x00\x00\x00\x10"	/* enc key length */
+			  "\x20\x21\x22\x23\x24\x25\x26\x27"
+			  "\x28\x29\x2a\x2b\x2c\x2d\x2e\x2f"
+			  "\x30\x31\x32\x33\x34\x35\x36\x37"
+			  "\x38\x39\x3a\x3b\x3c\x3d\x3e\x3f"
+			  "\xc2\x86\x69\x6d\x88\x7c\x9a\xa0"
+			  "\x61\x1b\xbb\x3e\x20\x25\xa4\x5a",
+		.klen   = 8 + 32 + 16,
+		.iv     = "\x56\x2e\x17\x99\x6d\x09\x3d\x28"
+			  "\xdd\xb3\xba\x69\x5a\x2e\x6f\x58",
+		.input  = "\x00\x01\x02\x03\x04\x05\x06\x07"
+			  "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"
+			  "\x10\x11\x12\x13\x14\x15\x16\x17"
+			  "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f",
+		.ilen   = 32,
+		.result = "\xd2\x96\xcd\x94\xc2\xcc\xcf\x8a"
+			  "\x3a\x86\x30\x28\xb5\xe1\xdc\x0a"
+			  "\x75\x86\x60\x2d\x25\x3c\xff\xf9"
+			  "\x1b\x82\x66\xbe\xa6\xd6\x1a\xb1"
+			  "\xf5\x33\x53\xf3\x68\x85\x2a\x99"
+			  "\x0e\x06\x58\x8f\xba\xf6\x06\xda"
+			  "\x49\x69\x0d\x5b\xd4\x36\x06\x62"
+			  "\x35\x5e\x54\x58\x53\x4d\xdf\xbf",
+		.rlen   = 32 + 32,
+	}, { /* RFC 3602 Case 3 */
+#ifdef __LITTLE_ENDIAN
+		.key    = "\x08\x00"		/* rta length */
+			  "\x01\x00"            /* rta type */
+#else
+		.key    = "\x00\x08"		/* rta length */
+			  "\x00\x01"		/* rta type */
+#endif
+			  "\x00\x00\x00\x10"	/* enc key length */
+			  "\x11\x22\x33\x44\x55\x66\x77\x88"
+			  "\x99\xaa\xbb\xcc\xdd\xee\xff\x11"
+			  "\x22\x33\x44\x55\x66\x77\x88\x99"
+			  "\xaa\xbb\xcc\xdd\xee\xff\x11\x22"
+			  "\x6c\x3e\xa0\x47\x76\x30\xce\x21"
+			  "\xa2\xce\x33\x4a\xa7\x46\xc2\xcd",
+		.klen   = 8 + 32 + 16,
+		.iv     = "\xc7\x82\xdc\x4c\x09\x8c\x66\xcb"
+			  "\xd9\xcd\x27\xd8\x25\x68\x2c\x81",
+		.input  = "This is a 48-byte message (exactly 3 AES blocks)",
+		.ilen   = 48,
+		.result = "\xd0\xa0\x2b\x38\x36\x45\x17\x53"
+			  "\xd4\x93\x66\x5d\x33\xf0\xe8\x86"
+			  "\x2d\xea\x54\xcd\xb2\x93\xab\xc7"
+			  "\x50\x69\x39\x27\x67\x72\xf8\xd5"
+			  "\x02\x1c\x19\x21\x6b\xad\x52\x5c"
+			  "\x85\x79\x69\x5d\x83\xba\x26\x84"
+			  "\x68\xb9\x3e\x90\x38\xa0\x88\x01"
+			  "\xe7\xc6\xce\x10\x31\x2f\x9b\x1d"
+			  "\x24\x78\xfb\xbe\x02\xe0\x4f\x40"
+			  "\x10\xbd\xaa\xc6\xa7\x79\xe0\x1a",
+		.rlen   = 48 + 32,
+	}, { /* RFC 3602 Case 4 */
+#ifdef __LITTLE_ENDIAN
+		.key    = "\x08\x00"		/* rta length */
+			  "\x01\x00"		/* rta type */
+#else
+		.key    = "\x00\x08"		/* rta length */
+			  "\x00\x01"            /* rta type */
+#endif
+			  "\x00\x00\x00\x10"	/* enc key length */
+			  "\x11\x22\x33\x44\x55\x66\x77\x88"
+			  "\x99\xaa\xbb\xcc\xdd\xee\xff\x11"
+			  "\x22\x33\x44\x55\x66\x77\x88\x99"
+			  "\xaa\xbb\xcc\xdd\xee\xff\x11\x22"
+			  "\x56\xe4\x7a\x38\xc5\x59\x89\x74"
+			  "\xbc\x46\x90\x3d\xba\x29\x03\x49",
+		.klen   = 8 + 32 + 16,
+		.iv     = "\x8c\xe8\x2e\xef\xbe\xa0\xda\x3c"
+			  "\x44\x69\x9e\xd7\xdb\x51\xb7\xd9",
+		.input  = "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7"
+			  "\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
+			  "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7"
+			  "\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
+			  "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7"
+			  "\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
+			  "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7"
+			  "\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf",
+		.ilen   = 64,
+		.result = "\xc3\x0e\x32\xff\xed\xc0\x77\x4e"
+			  "\x6a\xff\x6a\xf0\x86\x9f\x71\xaa"
+			  "\x0f\x3a\xf0\x7a\x9a\x31\xa9\xc6"
+			  "\x84\xdb\x20\x7e\xb0\xef\x8e\x4e"
+			  "\x35\x90\x7a\xa6\x32\xc3\xff\xdf"
+			  "\x86\x8b\xb7\xb2\x9d\x3d\x46\xad"
+			  "\x83\xce\x9f\x9a\x10\x2e\xe9\x9d"
+			  "\x49\xa5\x3e\x87\xf4\xc3\xda\x55"
+			  "\x7a\x1b\xd4\x3c\xdb\x17\x95\xe2"
+			  "\xe0\x93\xec\xc9\x9f\xf7\xce\xd8"
+			  "\x3f\x54\xe2\x49\x39\xe3\x71\x25"
+			  "\x2b\x6c\xe9\x5d\xec\xec\x2b\x64",
+		.rlen   = 64 + 32,
+	}, { /* RFC 3602 Case 5 */
+#ifdef __LITTLE_ENDIAN
+		.key    = "\x08\x00"		/* rta length */
+			  "\x01\x00"            /* rta type */
+#else
+		.key    = "\x00\x08"		/* rta length */
+			  "\x00\x01"            /* rta type */
+#endif
+			  "\x00\x00\x00\x10"	/* enc key length */
+			  "\x11\x22\x33\x44\x55\x66\x77\x88"
+			  "\x99\xaa\xbb\xcc\xdd\xee\xff\x11"
+			  "\x22\x33\x44\x55\x66\x77\x88\x99"
+			  "\xaa\xbb\xcc\xdd\xee\xff\x11\x22"
+			  "\x90\xd3\x82\xb4\x10\xee\xba\x7a"
+			  "\xd9\x38\xc4\x6c\xec\x1a\x82\xbf",
+		.klen   = 8 + 32 + 16,
+		.iv     = "\xe9\x6e\x8c\x08\xab\x46\x57\x63"
+			  "\xfd\x09\x8d\x45\xdd\x3f\xf8\x93",
+		.assoc  = "\x00\x00\x43\x21\x00\x00\x00\x01",
+		.alen   = 8,
+		.input  = "\x08\x00\x0e\xbd\xa7\x0a\x00\x00"
+			  "\x8e\x9c\x08\x3d\xb9\x5b\x07\x00"
+			  "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"
+			  "\x10\x11\x12\x13\x14\x15\x16\x17"
+			  "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f"
+			  "\x20\x21\x22\x23\x24\x25\x26\x27"
+			  "\x28\x29\x2a\x2b\x2c\x2d\x2e\x2f"
+			  "\x30\x31\x32\x33\x34\x35\x36\x37"
+			  "\x01\x02\x03\x04\x05\x06\x07\x08"
+			  "\x09\x0a\x0b\x0c\x0d\x0e\x0e\x01",
+		.ilen   = 80,
+		.result = "\xf6\x63\xc2\x5d\x32\x5c\x18\xc6"
+			  "\xa9\x45\x3e\x19\x4e\x12\x08\x49"
+			  "\xa4\x87\x0b\x66\xcc\x6b\x99\x65"
+			  "\x33\x00\x13\xb4\x89\x8d\xc8\x56"
+			  "\xa4\x69\x9e\x52\x3a\x55\xdb\x08"
+			  "\x0b\x59\xec\x3a\x8e\x4b\x7e\x52"
+			  "\x77\x5b\x07\xd1\xdb\x34\xed\x9c"
+			  "\x53\x8a\xb5\x0c\x55\x1b\x87\x4a"
+			  "\xa2\x69\xad\xd0\x47\xad\x2d\x59"
+			  "\x13\xac\x19\xb7\xcf\xba\xd4\xa6"
+			  "\xbb\xd4\x0f\xbe\xa3\x3b\x4c\xb8"
+			  "\x3a\xd2\xe1\x03\x86\xa5\x59\xb7"
+			  "\x73\xc3\x46\x20\x2c\xb1\xef\x68"
+			  "\xbb\x8a\x32\x7e\x12\x8c\x69\xcf",
+		.rlen   = 80 + 32,
+       }, { /* NIST SP800-38A F.2.3 CBC-AES192.Encrypt */
+#ifdef __LITTLE_ENDIAN
+		.key    = "\x08\x00"            /* rta length */
+			  "\x01\x00"		/* rta type */
+#else
+		.key    = "\x00\x08"		/* rta length */
+			  "\x00\x01"            /* rta type */
+#endif
+			  "\x00\x00\x00\x18"	/* enc key length */
+			  "\x11\x22\x33\x44\x55\x66\x77\x88"
+			  "\x99\xaa\xbb\xcc\xdd\xee\xff\x11"
+			  "\x22\x33\x44\x55\x66\x77\x88\x99"
+			  "\xaa\xbb\xcc\xdd\xee\xff\x11\x22"
+			  "\x8e\x73\xb0\xf7\xda\x0e\x64\x52"
+			  "\xc8\x10\xf3\x2b\x80\x90\x79\xe5"
+			  "\x62\xf8\xea\xd2\x52\x2c\x6b\x7b",
+		.klen   = 8 + 32 + 24,
+		.iv     = "\x00\x01\x02\x03\x04\x05\x06\x07"
+			  "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f",
+		.input  = "\x6b\xc1\xbe\xe2\x2e\x40\x9f\x96"
+			  "\xe9\x3d\x7e\x11\x73\x93\x17\x2a"
+			  "\xae\x2d\x8a\x57\x1e\x03\xac\x9c"
+			  "\x9e\xb7\x6f\xac\x45\xaf\x8e\x51"
+			  "\x30\xc8\x1c\x46\xa3\x5c\xe4\x11"
+			  "\xe5\xfb\xc1\x19\x1a\x0a\x52\xef"
+			  "\xf6\x9f\x24\x45\xdf\x4f\x9b\x17"
+			  "\xad\x2b\x41\x7b\xe6\x6c\x37\x10",
+		.ilen   = 64,
+		.result = "\x4f\x02\x1d\xb2\x43\xbc\x63\x3d"
+			  "\x71\x78\x18\x3a\x9f\xa0\x71\xe8"
+			  "\xb4\xd9\xad\xa9\xad\x7d\xed\xf4"
+			  "\xe5\xe7\x38\x76\x3f\x69\x14\x5a"
+			  "\x57\x1b\x24\x20\x12\xfb\x7a\xe0"
+			  "\x7f\xa9\xba\xac\x3d\xf1\x02\xe0"
+			  "\x08\xb0\xe2\x79\x88\x59\x88\x81"
+			  "\xd9\x20\xa9\xe6\x4f\x56\x15\xcd"
+			  "\x2f\xee\x5f\xdb\x66\xfe\x79\x09"
+			  "\x61\x81\x31\xea\x5b\x3d\x8e\xfb"
+			  "\xca\x71\x85\x93\xf7\x85\x55\x8b"
+			  "\x7a\xe4\x94\xca\x8b\xba\x19\x33",
+		.rlen   = 64 + 32,
+	}, { /* NIST SP800-38A F.2.5 CBC-AES256.Encrypt */
+#ifdef __LITTLE_ENDIAN
+		.key    = "\x08\x00"		/* rta length */
+			  "\x01\x00"		/* rta type */
+#else
+		.key    = "\x00\x08"		/* rta length */
+			  "\x00\x01"            /* rta type */
+#endif
+			  "\x00\x00\x00\x20"	/* enc key length */
+			  "\x11\x22\x33\x44\x55\x66\x77\x88"
+			  "\x99\xaa\xbb\xcc\xdd\xee\xff\x11"
+			  "\x22\x33\x44\x55\x66\x77\x88\x99"
+			  "\xaa\xbb\xcc\xdd\xee\xff\x11\x22"
+			  "\x60\x3d\xeb\x10\x15\xca\x71\xbe"
+			  "\x2b\x73\xae\xf0\x85\x7d\x77\x81"
+			  "\x1f\x35\x2c\x07\x3b\x61\x08\xd7"
+			  "\x2d\x98\x10\xa3\x09\x14\xdf\xf4",
+		.klen   = 8 + 32 + 32,
+		.iv     = "\x00\x01\x02\x03\x04\x05\x06\x07"
+			  "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f",
+		.input  = "\x6b\xc1\xbe\xe2\x2e\x40\x9f\x96"
+			  "\xe9\x3d\x7e\x11\x73\x93\x17\x2a"
+			  "\xae\x2d\x8a\x57\x1e\x03\xac\x9c"
+			  "\x9e\xb7\x6f\xac\x45\xaf\x8e\x51"
+			  "\x30\xc8\x1c\x46\xa3\x5c\xe4\x11"
+			  "\xe5\xfb\xc1\x19\x1a\x0a\x52\xef"
+			  "\xf6\x9f\x24\x45\xdf\x4f\x9b\x17"
+			  "\xad\x2b\x41\x7b\xe6\x6c\x37\x10",
+		.ilen   = 64,
+		.result = "\xf5\x8c\x4c\x04\xd6\xe5\xf1\xba"
+			  "\x77\x9e\xab\xfb\x5f\x7b\xfb\xd6"
+			  "\x9c\xfc\x4e\x96\x7e\xdb\x80\x8d"
+			  "\x67\x9f\x77\x7b\xc6\x70\x2c\x7d"
+			  "\x39\xf2\x33\x69\xa9\xd9\xba\xcf"
+			  "\xa5\x30\xe2\x63\x04\x23\x14\x61"
+			  "\xb2\xeb\x05\xe2\xc3\x9b\xe9\xfc"
+			  "\xda\x6c\x19\x07\x8c\x6a\x9d\x1b"
+			  "\x24\x29\xed\xc2\x31\x49\xdb\xb1"
+			  "\x8f\x74\xbd\x17\x92\x03\xbe\x8f"
+			  "\xf3\x61\xde\x1c\xe9\xdb\xcd\xd0"
+			  "\xcc\xce\xe9\x85\x57\xcf\x6f\x5f",
+		.rlen   = 64 + 32,
+	},
+};
+
+static struct aead_testvec hmac_sha512_aes_cbc_enc_tv_template[] = {
+	{ /* RFC 3602 Case 1 */
+#ifdef __LITTLE_ENDIAN
+		.key    = "\x08\x00"		/* rta length */
+			  "\x01\x00"		/* rta type */
+#else
+		.key    = "\x00\x08"		/* rta length */
+			  "\x00\x01"		/* rta type */
+#endif
+			  "\x00\x00\x00\x10"	/* enc key length */
+			  "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x06\xa9\x21\x40\x36\xb8\xa1\x5b"
+			  "\x51\x2e\x03\xd5\x34\x12\x00\x06",
+		.klen   = 8 + 64 + 16,
+		.iv     = "\x3d\xaf\xba\x42\x9d\x9e\xb4\x30"
+			  "\xb4\x22\xda\x80\x2c\x9f\xac\x41",
+		.input  = "Single block msg",
+		.ilen   = 16,
+		.result = "\xe3\x53\x77\x9c\x10\x79\xae\xb8"
+			  "\x27\x08\x94\x2d\xbe\x77\x18\x1a"
+			  "\x3f\xdc\xad\x90\x03\x63\x5e\x68"
+			  "\xc3\x13\xdd\xa4\x5c\x4d\x54\xa7"
+			  "\x19\x6e\x03\x75\x2b\xa1\x62\xce"
+			  "\xe0\xc6\x96\x75\xb2\x14\xca\x96"
+			  "\xec\xbd\x50\x08\x07\x64\x1a\x49"
+			  "\xe8\x9a\x7c\x06\x3d\xcb\xff\xb2"
+			  "\xfa\x20\x89\xdd\x9c\xac\x9e\x16"
+			  "\x18\x8a\xa0\x6d\x01\x6c\xa3\x3a",
+		.rlen   = 16 + 64,
+	}, { /* RFC 3602 Case 2 */
+#ifdef __LITTLE_ENDIAN
+		.key    = "\x08\x00"		/* rta length */
+			  "\x01\x00"		/* rta type */
+#else
+		.key    = "\x00\x08"		/* rta length */
+			  "\x00\x01"		/* rta type */
+#endif
+			  "\x00\x00\x00\x10"	/* enc key length */
+			  "\x20\x21\x22\x23\x24\x25\x26\x27"
+			  "\x28\x29\x2a\x2b\x2c\x2d\x2e\x2f"
+			  "\x30\x31\x32\x33\x34\x35\x36\x37"
+			  "\x38\x39\x3a\x3b\x3c\x3d\x3e\x3f"
+			  "\x40\x41\x42\x43\x44\x45\x46\x47"
+			  "\x48\x49\x4a\x4b\x4c\x4d\x4e\x4f"
+			  "\x50\x51\x52\x53\x54\x55\x56\x57"
+			  "\x58\x59\x5a\x5b\x5c\x5d\x5e\x5f"
+			  "\xc2\x86\x69\x6d\x88\x7c\x9a\xa0"
+			  "\x61\x1b\xbb\x3e\x20\x25\xa4\x5a",
+		.klen   = 8 + 64 + 16,
+		.iv     = "\x56\x2e\x17\x99\x6d\x09\x3d\x28"
+			  "\xdd\xb3\xba\x69\x5a\x2e\x6f\x58",
+		.input  = "\x00\x01\x02\x03\x04\x05\x06\x07"
+			  "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"
+			  "\x10\x11\x12\x13\x14\x15\x16\x17"
+			  "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f",
+		.ilen   = 32,
+		.result = "\xd2\x96\xcd\x94\xc2\xcc\xcf\x8a"
+			  "\x3a\x86\x30\x28\xb5\xe1\xdc\x0a"
+			  "\x75\x86\x60\x2d\x25\x3c\xff\xf9"
+			  "\x1b\x82\x66\xbe\xa6\xd6\x1a\xb1"
+			  "\xda\xb2\x0c\xb2\x26\xc4\xd5\xef"
+			  "\x60\x38\xa4\x5e\x9a\x8c\x1b\x41"
+			  "\x03\x9f\xc4\x64\x7f\x01\x42\x9b"
+			  "\x0e\x1b\xea\xef\xbc\x88\x19\x5e"
+			  "\x31\x7e\xc2\x95\xfc\x09\x32\x0a"
+			  "\x46\x32\x7c\x41\x9c\x59\x3e\xe9"
+			  "\x8f\x9f\xd4\x31\xd6\x22\xbd\xf8"
+			  "\xf7\x0a\x94\xe5\xa9\xc3\xf6\x9d",
+		.rlen   = 32 + 64,
+	}, { /* RFC 3602 Case 3 */
+#ifdef __LITTLE_ENDIAN
+		.key    = "\x08\x00"		/* rta length */
+			  "\x01\x00"            /* rta type */
+#else
+		.key    = "\x00\x08"		/* rta length */
+			  "\x00\x01"		/* rta type */
+#endif
+			  "\x00\x00\x00\x10"	/* enc key length */
+			  "\x11\x22\x33\x44\x55\x66\x77\x88"
+			  "\x99\xaa\xbb\xcc\xdd\xee\xff\x11"
+			  "\x22\x33\x44\x55\x66\x77\x88\x99"
+			  "\xaa\xbb\xcc\xdd\xee\xff\x11\x22"
+			  "\x33\x44\x55\x66\x77\x88\x99\xaa"
+			  "\xbb\xcc\xdd\xee\xff\x11\x22\x33"
+			  "\x44\x55\x66\x77\x88\x99\xaa\xbb"
+			  "\xcc\xdd\xee\xff\x11\x22\x33\x44"
+			  "\x6c\x3e\xa0\x47\x76\x30\xce\x21"
+			  "\xa2\xce\x33\x4a\xa7\x46\xc2\xcd",
+		.klen   = 8 + 64 + 16,
+		.iv     = "\xc7\x82\xdc\x4c\x09\x8c\x66\xcb"
+			  "\xd9\xcd\x27\xd8\x25\x68\x2c\x81",
+		.input  = "This is a 48-byte message (exactly 3 AES blocks)",
+		.ilen   = 48,
+		.result = "\xd0\xa0\x2b\x38\x36\x45\x17\x53"
+			  "\xd4\x93\x66\x5d\x33\xf0\xe8\x86"
+			  "\x2d\xea\x54\xcd\xb2\x93\xab\xc7"
+			  "\x50\x69\x39\x27\x67\x72\xf8\xd5"
+			  "\x02\x1c\x19\x21\x6b\xad\x52\x5c"
+			  "\x85\x79\x69\x5d\x83\xba\x26\x84"
+			  "\x64\x19\x17\x5b\x57\xe0\x21\x0f"
+			  "\xca\xdb\xa1\x26\x38\x14\xa2\x69"
+			  "\xdb\x54\x67\x80\xc0\x54\xe0\xfd"
+			  "\x3e\x91\xe7\x91\x7f\x13\x38\x44"
+			  "\xb7\xb1\xd6\xc8\x7d\x48\x8d\x41"
+			  "\x08\xea\x29\x6c\x74\x67\x3f\xb0"
+			  "\xac\x7f\x5c\x1d\xf5\xee\x22\x66"
+			  "\x27\xa6\xb6\x13\xba\xba\xf0\xc2",
+		.rlen   = 48 + 64,
+	}, { /* RFC 3602 Case 4 */
+#ifdef __LITTLE_ENDIAN
+		.key    = "\x08\x00"		/* rta length */
+			  "\x01\x00"		/* rta type */
+#else
+		.key    = "\x00\x08"		/* rta length */
+			  "\x00\x01"            /* rta type */
+#endif
+			  "\x00\x00\x00\x10"	/* enc key length */
+			  "\x11\x22\x33\x44\x55\x66\x77\x88"
+			  "\x99\xaa\xbb\xcc\xdd\xee\xff\x11"
+			  "\x22\x33\x44\x55\x66\x77\x88\x99"
+			  "\xaa\xbb\xcc\xdd\xee\xff\x11\x22"
+			  "\x33\x44\x55\x66\x77\x88\x99\xaa"
+			  "\xbb\xcc\xdd\xee\xff\x11\x22\x33"
+			  "\x44\x55\x66\x77\x88\x99\xaa\xbb"
+			  "\xcc\xdd\xee\xff\x11\x22\x33\x44"
+			  "\x56\xe4\x7a\x38\xc5\x59\x89\x74"
+			  "\xbc\x46\x90\x3d\xba\x29\x03\x49",
+		.klen   = 8 + 64 + 16,
+		.iv     = "\x8c\xe8\x2e\xef\xbe\xa0\xda\x3c"
+			  "\x44\x69\x9e\xd7\xdb\x51\xb7\xd9",
+		.input  = "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7"
+			  "\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
+			  "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7"
+			  "\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
+			  "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7"
+			  "\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
+			  "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7"
+			  "\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf",
+		.ilen   = 64,
+		.result = "\xc3\x0e\x32\xff\xed\xc0\x77\x4e"
+			  "\x6a\xff\x6a\xf0\x86\x9f\x71\xaa"
+			  "\x0f\x3a\xf0\x7a\x9a\x31\xa9\xc6"
+			  "\x84\xdb\x20\x7e\xb0\xef\x8e\x4e"
+			  "\x35\x90\x7a\xa6\x32\xc3\xff\xdf"
+			  "\x86\x8b\xb7\xb2\x9d\x3d\x46\xad"
+			  "\x83\xce\x9f\x9a\x10\x2e\xe9\x9d"
+			  "\x49\xa5\x3e\x87\xf4\xc3\xda\x55"
+			  "\x82\xcd\x42\x28\x21\x20\x15\xcc"
+			  "\xb7\xb2\x48\x40\xc7\x64\x41\x3a"
+			  "\x61\x32\x82\x85\xcf\x27\xed\xb4"
+			  "\xe4\x68\xa2\xf5\x79\x26\x27\xb2"
+			  "\x51\x67\x6a\xc4\xf0\x66\x55\x50"
+			  "\xbc\x6f\xed\xd5\x8d\xde\x23\x7c"
+			  "\x62\x98\x14\xd7\x2f\x37\x8d\xdf"
+			  "\xf4\x33\x80\xeb\x8e\xb4\xa4\xda",
+		.rlen   = 64 + 64,
+	}, { /* RFC 3602 Case 5 */
+#ifdef __LITTLE_ENDIAN
+		.key    = "\x08\x00"		/* rta length */
+			  "\x01\x00"            /* rta type */
+#else
+		.key    = "\x00\x08"		/* rta length */
+			  "\x00\x01"            /* rta type */
+#endif
+			  "\x00\x00\x00\x10"	/* enc key length */
+			  "\x11\x22\x33\x44\x55\x66\x77\x88"
+			  "\x99\xaa\xbb\xcc\xdd\xee\xff\x11"
+			  "\x22\x33\x44\x55\x66\x77\x88\x99"
+			  "\xaa\xbb\xcc\xdd\xee\xff\x11\x22"
+			  "\x33\x44\x55\x66\x77\x88\x99\xaa"
+			  "\xbb\xcc\xdd\xee\xff\x11\x22\x33"
+			  "\x44\x55\x66\x77\x88\x99\xaa\xbb"
+			  "\xcc\xdd\xee\xff\x11\x22\x33\x44"
+			  "\x90\xd3\x82\xb4\x10\xee\xba\x7a"
+			  "\xd9\x38\xc4\x6c\xec\x1a\x82\xbf",
+		.klen   = 8 + 64 + 16,
+		.iv     = "\xe9\x6e\x8c\x08\xab\x46\x57\x63"
+			  "\xfd\x09\x8d\x45\xdd\x3f\xf8\x93",
+		.assoc  = "\x00\x00\x43\x21\x00\x00\x00\x01",
+		.alen   = 8,
+		.input  = "\x08\x00\x0e\xbd\xa7\x0a\x00\x00"
+			  "\x8e\x9c\x08\x3d\xb9\x5b\x07\x00"
+			  "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"
+			  "\x10\x11\x12\x13\x14\x15\x16\x17"
+			  "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f"
+			  "\x20\x21\x22\x23\x24\x25\x26\x27"
+			  "\x28\x29\x2a\x2b\x2c\x2d\x2e\x2f"
+			  "\x30\x31\x32\x33\x34\x35\x36\x37"
+			  "\x01\x02\x03\x04\x05\x06\x07\x08"
+			  "\x09\x0a\x0b\x0c\x0d\x0e\x0e\x01",
+		.ilen   = 80,
+		.result = "\xf6\x63\xc2\x5d\x32\x5c\x18\xc6"
+			  "\xa9\x45\x3e\x19\x4e\x12\x08\x49"
+			  "\xa4\x87\x0b\x66\xcc\x6b\x99\x65"
+			  "\x33\x00\x13\xb4\x89\x8d\xc8\x56"
+			  "\xa4\x69\x9e\x52\x3a\x55\xdb\x08"
+			  "\x0b\x59\xec\x3a\x8e\x4b\x7e\x52"
+			  "\x77\x5b\x07\xd1\xdb\x34\xed\x9c"
+			  "\x53\x8a\xb5\x0c\x55\x1b\x87\x4a"
+			  "\xa2\x69\xad\xd0\x47\xad\x2d\x59"
+			  "\x13\xac\x19\xb7\xcf\xba\xd4\xa6"
+			  "\x74\x84\x94\xe2\xd7\x7a\xf9\xbf"
+			  "\x00\x8a\xa2\xd5\xb7\xf3\x60\xcf"
+			  "\xa0\x47\xdf\x4e\x09\xf4\xb1\x7f"
+			  "\x14\xd9\x3d\x53\x8e\x12\xb3\x00"
+			  "\x4c\x0a\x4e\x32\x40\x43\x88\xce"
+			  "\x92\x26\xc1\x76\x20\x11\xeb\xba"
+			  "\x62\x4f\x9a\x62\x25\xc3\x75\x80"
+			  "\xb7\x0a\x17\xf5\xd7\x94\xb4\x14",
+		.rlen   = 80 + 64,
+       }, { /* NIST SP800-38A F.2.3 CBC-AES192.Encrypt */
+#ifdef __LITTLE_ENDIAN
+		.key    = "\x08\x00"            /* rta length */
+			  "\x01\x00"		/* rta type */
+#else
+		.key    = "\x00\x08"		/* rta length */
+			  "\x00\x01"            /* rta type */
+#endif
+			  "\x00\x00\x00\x18"	/* enc key length */
+			  "\x11\x22\x33\x44\x55\x66\x77\x88"
+			  "\x99\xaa\xbb\xcc\xdd\xee\xff\x11"
+			  "\x22\x33\x44\x55\x66\x77\x88\x99"
+			  "\xaa\xbb\xcc\xdd\xee\xff\x11\x22"
+			  "\x33\x44\x55\x66\x77\x88\x99\xaa"
+			  "\xbb\xcc\xdd\xee\xff\x11\x22\x33"
+			  "\x44\x55\x66\x77\x88\x99\xaa\xbb"
+			  "\xcc\xdd\xee\xff\x11\x22\x33\x44"
+			  "\x8e\x73\xb0\xf7\xda\x0e\x64\x52"
+			  "\xc8\x10\xf3\x2b\x80\x90\x79\xe5"
+			  "\x62\xf8\xea\xd2\x52\x2c\x6b\x7b",
+		.klen   = 8 + 64 + 24,
+		.iv     = "\x00\x01\x02\x03\x04\x05\x06\x07"
+			  "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f",
+		.input  = "\x6b\xc1\xbe\xe2\x2e\x40\x9f\x96"
+			  "\xe9\x3d\x7e\x11\x73\x93\x17\x2a"
+			  "\xae\x2d\x8a\x57\x1e\x03\xac\x9c"
+			  "\x9e\xb7\x6f\xac\x45\xaf\x8e\x51"
+			  "\x30\xc8\x1c\x46\xa3\x5c\xe4\x11"
+			  "\xe5\xfb\xc1\x19\x1a\x0a\x52\xef"
+			  "\xf6\x9f\x24\x45\xdf\x4f\x9b\x17"
+			  "\xad\x2b\x41\x7b\xe6\x6c\x37\x10",
+		.ilen   = 64,
+		.result = "\x4f\x02\x1d\xb2\x43\xbc\x63\x3d"
+			  "\x71\x78\x18\x3a\x9f\xa0\x71\xe8"
+			  "\xb4\xd9\xad\xa9\xad\x7d\xed\xf4"
+			  "\xe5\xe7\x38\x76\x3f\x69\x14\x5a"
+			  "\x57\x1b\x24\x20\x12\xfb\x7a\xe0"
+			  "\x7f\xa9\xba\xac\x3d\xf1\x02\xe0"
+			  "\x08\xb0\xe2\x79\x88\x59\x88\x81"
+			  "\xd9\x20\xa9\xe6\x4f\x56\x15\xcd"
+			  "\x77\x4b\x69\x9d\x3a\x0d\xb4\x99"
+			  "\x8f\xc6\x8e\x0e\x72\x58\xe3\x56"
+			  "\xbb\x21\xd2\x7d\x93\x11\x17\x91"
+			  "\xc4\x83\xfd\x0a\xea\x71\xfe\x77"
+			  "\xae\x6f\x0a\xa5\xf0\xcf\xe1\x35"
+			  "\xba\x03\xd5\x32\xfa\x5f\x41\x58"
+			  "\x8d\x43\x98\xa7\x94\x16\x07\x02"
+			  "\x0f\xb6\x81\x50\x28\x95\x2e\x75",
+		.rlen   = 64 + 64,
+	}, { /* NIST SP800-38A F.2.5 CBC-AES256.Encrypt */
+#ifdef __LITTLE_ENDIAN
+		.key    = "\x08\x00"		/* rta length */
+			  "\x01\x00"		/* rta type */
+#else
+		.key    = "\x00\x08"		/* rta length */
+			  "\x00\x01"            /* rta type */
+#endif
+			  "\x00\x00\x00\x20"	/* enc key length */
+			  "\x11\x22\x33\x44\x55\x66\x77\x88"
+			  "\x99\xaa\xbb\xcc\xdd\xee\xff\x11"
+			  "\x22\x33\x44\x55\x66\x77\x88\x99"
+			  "\xaa\xbb\xcc\xdd\xee\xff\x11\x22"
+			  "\x33\x44\x55\x66\x77\x88\x99\xaa"
+			  "\xbb\xcc\xdd\xee\xff\x11\x22\x33"
+			  "\x44\x55\x66\x77\x88\x99\xaa\xbb"
+			  "\xcc\xdd\xee\xff\x11\x22\x33\x44"
+			  "\x60\x3d\xeb\x10\x15\xca\x71\xbe"
+			  "\x2b\x73\xae\xf0\x85\x7d\x77\x81"
+			  "\x1f\x35\x2c\x07\x3b\x61\x08\xd7"
+			  "\x2d\x98\x10\xa3\x09\x14\xdf\xf4",
+		.klen   = 8 + 64 + 32,
+		.iv     = "\x00\x01\x02\x03\x04\x05\x06\x07"
+			  "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f",
+		.input  = "\x6b\xc1\xbe\xe2\x2e\x40\x9f\x96"
+			  "\xe9\x3d\x7e\x11\x73\x93\x17\x2a"
+			  "\xae\x2d\x8a\x57\x1e\x03\xac\x9c"
+			  "\x9e\xb7\x6f\xac\x45\xaf\x8e\x51"
+			  "\x30\xc8\x1c\x46\xa3\x5c\xe4\x11"
+			  "\xe5\xfb\xc1\x19\x1a\x0a\x52\xef"
+			  "\xf6\x9f\x24\x45\xdf\x4f\x9b\x17"
+			  "\xad\x2b\x41\x7b\xe6\x6c\x37\x10",
+		.ilen   = 64,
+		.result = "\xf5\x8c\x4c\x04\xd6\xe5\xf1\xba"
+			  "\x77\x9e\xab\xfb\x5f\x7b\xfb\xd6"
+			  "\x9c\xfc\x4e\x96\x7e\xdb\x80\x8d"
+			  "\x67\x9f\x77\x7b\xc6\x70\x2c\x7d"
+			  "\x39\xf2\x33\x69\xa9\xd9\xba\xcf"
+			  "\xa5\x30\xe2\x63\x04\x23\x14\x61"
+			  "\xb2\xeb\x05\xe2\xc3\x9b\xe9\xfc"
+			  "\xda\x6c\x19\x07\x8c\x6a\x9d\x1b"
+			  "\xb2\x27\x69\x7f\x45\x64\x79\x2b"
+			  "\xb7\xb8\x4c\xd4\x75\x94\x68\x40"
+			  "\x2a\xea\x91\xc7\x3f\x7c\xed\x7b"
+			  "\x95\x2c\x9b\xa8\xf5\xe5\x52\x8d"
+			  "\x6b\xe1\xae\xf1\x74\xfa\x0d\x0c"
+			  "\xe3\x8d\x64\xc3\x8d\xff\x7c\x8c"
+			  "\xdb\xbf\xa0\xb4\x01\xa2\xa8\xa2"
+			  "\x2c\xb1\x62\x2c\x10\xca\xf1\x21",
+		.rlen   = 64 + 64,
+	},
+};
+
 static struct cipher_testvec aes_lrw_enc_tv_template[] = {
 	/* from http://grouper.ieee.org/groups/1619/email/pdf00017.pdf */
 	{ /* LRW-32-AES 1 */
@@ -14858,4 +16556,94 @@ static struct hash_testvec crc32c_tv_template[] = {
 	},
 };
 
+/*
+ * Blakcifn CRC test vectors
+ */
+#define BFIN_CRC_TEST_VECTORS 6
+
+static struct hash_testvec bfin_crc_tv_template[] = {
+	{
+		.psize = 0,
+		.digest = "\x00\x00\x00\x00",
+	},
+	{
+		.key = "\x87\xa9\xcb\xed",
+		.ksize = 4,
+		.psize = 0,
+		.digest = "\x87\xa9\xcb\xed",
+	},
+	{
+		.key = "\xff\xff\xff\xff",
+		.ksize = 4,
+		.plaintext = "\x01\x02\x03\x04\x05\x06\x07\x08"
+			     "\x09\x0a\x0b\x0c\x0d\x0e\x0f\x10"
+			     "\x11\x12\x13\x14\x15\x16\x17\x18"
+			     "\x19\x1a\x1b\x1c\x1d\x1e\x1f\x20"
+			     "\x21\x22\x23\x24\x25\x26\x27\x28",
+		.psize = 40,
+		.digest = "\x84\x0c\x8d\xa2",
+	},
+	{
+		.key = "\xff\xff\xff\xff",
+		.ksize = 4,
+		.plaintext = "\x01\x02\x03\x04\x05\x06\x07\x08"
+			     "\x09\x0a\x0b\x0c\x0d\x0e\x0f\x10"
+			     "\x11\x12\x13\x14\x15\x16\x17\x18"
+			     "\x19\x1a\x1b\x1c\x1d\x1e\x1f\x20"
+			     "\x21\x22\x23\x24\x25\x26",
+		.psize = 38,
+		.digest = "\x8c\x58\xec\xb7",
+	},
+	{
+		.key = "\xff\xff\xff\xff",
+		.ksize = 4,
+		.plaintext = "\x01\x02\x03\x04\x05\x06\x07\x08"
+			     "\x09\x0a\x0b\x0c\x0d\x0e\x0f\x10"
+			     "\x11\x12\x13\x14\x15\x16\x17\x18"
+			     "\x19\x1a\x1b\x1c\x1d\x1e\x1f\x20"
+			     "\x21\x22\x23\x24\x25\x26\x27",
+		.psize = 39,
+		.digest = "\xdc\x50\x28\x7b",
+	},
+	{
+		.key = "\xff\xff\xff\xff",
+		.ksize = 4,
+		.plaintext = "\x01\x02\x03\x04\x05\x06\x07\x08"
+			     "\x09\x0a\x0b\x0c\x0d\x0e\x0f\x10"
+			     "\x11\x12\x13\x14\x15\x16\x17\x18"
+			     "\x19\x1a\x1b\x1c\x1d\x1e\x1f\x20"
+			     "\x21\x22\x23\x24\x25\x26\x27\x28"
+			     "\x29\x2a\x2b\x2c\x2d\x2e\x2f\x30"
+			     "\x31\x32\x33\x34\x35\x36\x37\x38"
+			     "\x39\x3a\x3b\x3c\x3d\x3e\x3f\x40"
+			     "\x41\x42\x43\x44\x45\x46\x47\x48"
+			     "\x49\x4a\x4b\x4c\x4d\x4e\x4f\x50"
+			     "\x51\x52\x53\x54\x55\x56\x57\x58"
+			     "\x59\x5a\x5b\x5c\x5d\x5e\x5f\x60"
+			     "\x61\x62\x63\x64\x65\x66\x67\x68"
+			     "\x69\x6a\x6b\x6c\x6d\x6e\x6f\x70"
+			     "\x71\x72\x73\x74\x75\x76\x77\x78"
+			     "\x79\x7a\x7b\x7c\x7d\x7e\x7f\x80"
+			     "\x81\x82\x83\x84\x85\x86\x87\x88"
+			     "\x89\x8a\x8b\x8c\x8d\x8e\x8f\x90"
+			     "\x91\x92\x93\x94\x95\x96\x97\x98"
+			     "\x99\x9a\x9b\x9c\x9d\x9e\x9f\xa0"
+			     "\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8"
+			     "\xa9\xaa\xab\xac\xad\xae\xaf\xb0"
+			     "\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8"
+			     "\xb9\xba\xbb\xbc\xbd\xbe\xbf\xc0"
+			     "\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8"
+			     "\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0"
+			     "\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8"
+			     "\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0"
+			     "\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8"
+			     "\xe9\xea\xeb\xec\xed\xee\xef\xf0",
+		.psize = 240,
+		.digest = "\x10\x19\x4a\x5c",
+		.np = 2,
+		.tap = { 31, 209 }
+	},
+
+};
+
 #endif	/* _CRYPTO_TESTMGR_H */
diff --git a/drivers/char/hw_random/Kconfig b/drivers/char/hw_random/Kconfig
index f45dad39a18bee7138f12fe8e6cb28417a77c665..b01d67328243c28dff8291932f16c7bc29b7e888 100644
--- a/drivers/char/hw_random/Kconfig
+++ b/drivers/char/hw_random/Kconfig
@@ -263,3 +263,15 @@ config HW_RANDOM_PSERIES
 	  module will be called pseries-rng.
 
 	  If unsure, say Y.
+
+config HW_RANDOM_EXYNOS
+	tristate "EXYNOS HW random number generator support"
+	depends on HW_RANDOM && HAS_IOMEM && HAVE_CLK
+	---help---
+	  This driver provides kernel-side support for the Random Number
+	  Generator hardware found on EXYNOS SOCs.
+
+	  To compile this driver as a module, choose M here: the
+	  module will be called exynos-rng.
+
+	  If unsure, say Y.
diff --git a/drivers/char/hw_random/Makefile b/drivers/char/hw_random/Makefile
index d901dfa3032104773e735f5fb3c16e23994a4e70..8d6d173b65e6ea51cb806b60400e0d157629da96 100644
--- a/drivers/char/hw_random/Makefile
+++ b/drivers/char/hw_random/Makefile
@@ -23,3 +23,4 @@ obj-$(CONFIG_HW_RANDOM_NOMADIK) += nomadik-rng.o
 obj-$(CONFIG_HW_RANDOM_PICOXCELL) += picoxcell-rng.o
 obj-$(CONFIG_HW_RANDOM_PPC4XX) += ppc4xx-rng.o
 obj-$(CONFIG_HW_RANDOM_PSERIES) += pseries-rng.o
+obj-$(CONFIG_HW_RANDOM_EXYNOS)	+= exynos-rng.o
diff --git a/drivers/char/hw_random/exynos-rng.c b/drivers/char/hw_random/exynos-rng.c
new file mode 100644
index 0000000000000000000000000000000000000000..232ba9ce579cf69ea71844ec3d1bd199d4459852
--- /dev/null
+++ b/drivers/char/hw_random/exynos-rng.c
@@ -0,0 +1,182 @@
+/*
+ * exynos-rng.c - Random Number Generator driver for the exynos
+ *
+ * Copyright (C) 2012 Samsung Electronics
+ * Jonghwa Lee <jonghwa3.lee@smasung.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation;
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+#include <linux/hw_random.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/io.h>
+#include <linux/platform_device.h>
+#include <linux/clk.h>
+#include <linux/pm_runtime.h>
+#include <linux/err.h>
+
+#define EXYNOS_PRNG_STATUS_OFFSET	0x10
+#define EXYNOS_PRNG_SEED_OFFSET		0x140
+#define EXYNOS_PRNG_OUT1_OFFSET		0x160
+#define SEED_SETTING_DONE		BIT(1)
+#define PRNG_START			0x18
+#define PRNG_DONE			BIT(5)
+#define EXYNOS_AUTOSUSPEND_DELAY	100
+
+struct exynos_rng {
+	struct device *dev;
+	struct hwrng rng;
+	void __iomem *mem;
+	struct clk *clk;
+};
+
+static u32 exynos_rng_readl(struct exynos_rng *rng, u32 offset)
+{
+	return	__raw_readl(rng->mem + offset);
+}
+
+static void exynos_rng_writel(struct exynos_rng *rng, u32 val, u32 offset)
+{
+	__raw_writel(val, rng->mem + offset);
+}
+
+static int exynos_init(struct hwrng *rng)
+{
+	struct exynos_rng *exynos_rng = container_of(rng,
+						struct exynos_rng, rng);
+	int i;
+	int ret = 0;
+
+	pm_runtime_get_sync(exynos_rng->dev);
+
+	for (i = 0 ; i < 5 ; i++)
+		exynos_rng_writel(exynos_rng, jiffies,
+				EXYNOS_PRNG_SEED_OFFSET + 4*i);
+
+	if (!(exynos_rng_readl(exynos_rng, EXYNOS_PRNG_STATUS_OFFSET)
+						 & SEED_SETTING_DONE))
+		ret = -EIO;
+
+	pm_runtime_put_noidle(exynos_rng->dev);
+
+	return ret;
+}
+
+static int exynos_read(struct hwrng *rng, void *buf,
+					size_t max, bool wait)
+{
+	struct exynos_rng *exynos_rng = container_of(rng,
+						struct exynos_rng, rng);
+	u32 *data = buf;
+
+	pm_runtime_get_sync(exynos_rng->dev);
+
+	exynos_rng_writel(exynos_rng, PRNG_START, 0);
+
+	while (!(exynos_rng_readl(exynos_rng,
+			EXYNOS_PRNG_STATUS_OFFSET) & PRNG_DONE))
+		cpu_relax();
+
+	exynos_rng_writel(exynos_rng, PRNG_DONE, EXYNOS_PRNG_STATUS_OFFSET);
+
+	*data = exynos_rng_readl(exynos_rng, EXYNOS_PRNG_OUT1_OFFSET);
+
+	pm_runtime_mark_last_busy(exynos_rng->dev);
+	pm_runtime_autosuspend(exynos_rng->dev);
+
+	return 4;
+}
+
+static int __devinit exynos_rng_probe(struct platform_device *pdev)
+{
+	struct exynos_rng *exynos_rng;
+
+	exynos_rng = devm_kzalloc(&pdev->dev, sizeof(struct exynos_rng),
+					GFP_KERNEL);
+	if (!exynos_rng)
+		return -ENOMEM;
+
+	exynos_rng->dev = &pdev->dev;
+	exynos_rng->rng.name = "exynos";
+	exynos_rng->rng.init =	exynos_init;
+	exynos_rng->rng.read = exynos_read;
+	exynos_rng->clk = devm_clk_get(&pdev->dev, "secss");
+	if (IS_ERR(exynos_rng->clk)) {
+		dev_err(&pdev->dev, "Couldn't get clock.\n");
+		return -ENOENT;
+	}
+
+	exynos_rng->mem = devm_request_and_ioremap(&pdev->dev,
+			platform_get_resource(pdev, IORESOURCE_MEM, 0));
+	if (!exynos_rng->mem)
+		return -EBUSY;
+
+	platform_set_drvdata(pdev, exynos_rng);
+
+	pm_runtime_set_autosuspend_delay(&pdev->dev, EXYNOS_AUTOSUSPEND_DELAY);
+	pm_runtime_use_autosuspend(&pdev->dev);
+	pm_runtime_enable(&pdev->dev);
+
+	return hwrng_register(&exynos_rng->rng);
+}
+
+static int __devexit exynos_rng_remove(struct platform_device *pdev)
+{
+	struct exynos_rng *exynos_rng = platform_get_drvdata(pdev);
+
+	hwrng_unregister(&exynos_rng->rng);
+
+	return 0;
+}
+
+static int exynos_rng_runtime_suspend(struct device *dev)
+{
+	struct platform_device *pdev = to_platform_device(dev);
+	struct exynos_rng *exynos_rng = platform_get_drvdata(pdev);
+
+	clk_disable_unprepare(exynos_rng->clk);
+
+	return 0;
+}
+
+static int exynos_rng_runtime_resume(struct device *dev)
+{
+	struct platform_device *pdev = to_platform_device(dev);
+	struct exynos_rng *exynos_rng = platform_get_drvdata(pdev);
+
+	return clk_prepare_enable(exynos_rng->clk);
+}
+
+
+UNIVERSAL_DEV_PM_OPS(exynos_rng_pm_ops, exynos_rng_runtime_suspend,
+					exynos_rng_runtime_resume, NULL);
+
+static struct platform_driver exynos_rng_driver = {
+	.driver		= {
+		.name	= "exynos-rng",
+		.owner	= THIS_MODULE,
+		.pm	= &exynos_rng_pm_ops,
+	},
+	.probe		= exynos_rng_probe,
+	.remove		= __devexit_p(exynos_rng_remove),
+};
+
+module_platform_driver(exynos_rng_driver);
+
+MODULE_DESCRIPTION("EXYNOS 4 H/W Random Number Generator driver");
+MODULE_AUTHOR("Jonghwa Lee <jonghwa3.lee@samsung.com>");
+MODULE_LICENSE("GPL");
diff --git a/drivers/char/hw_random/mxc-rnga.c b/drivers/char/hw_random/mxc-rnga.c
index 187c6be80f43e48195339b3b79504982de2947c5..85074de5042eb58c9f39d3fc59bc676a64b943d9 100644
--- a/drivers/char/hw_random/mxc-rnga.c
+++ b/drivers/char/hw_random/mxc-rnga.c
@@ -24,6 +24,7 @@
 #include <linux/ioport.h>
 #include <linux/platform_device.h>
 #include <linux/hw_random.h>
+#include <linux/delay.h>
 #include <linux/io.h>
 
 /* RNGA Registers */
@@ -60,16 +61,20 @@
 
 static struct platform_device *rng_dev;
 
-static int mxc_rnga_data_present(struct hwrng *rng)
+static int mxc_rnga_data_present(struct hwrng *rng, int wait)
 {
-	int level;
 	void __iomem *rng_base = (void __iomem *)rng->priv;
-
-	/* how many random numbers is in FIFO? [0-16] */
-	level = ((__raw_readl(rng_base + RNGA_STATUS) &
-			RNGA_STATUS_LEVEL_MASK) >> 8);
-
-	return level > 0 ? 1 : 0;
+	int i;
+
+	for (i = 0; i < 20; i++) {
+		/* how many random numbers are in FIFO? [0-16] */
+		int level = (__raw_readl(rng_base + RNGA_STATUS) &
+				RNGA_STATUS_LEVEL_MASK) >> 8;
+		if (level || !wait)
+			return !!level;
+		udelay(10);
+	}
+	return 0;
 }
 
 static int mxc_rnga_data_read(struct hwrng *rng, u32 * data)
diff --git a/drivers/crypto/Kconfig b/drivers/crypto/Kconfig
index 1092a770482e53ce8597b39c9bba9dfa647db572..7d74d092aa8fe1bac90c6ccd8652099d138b6e2e 100644
--- a/drivers/crypto/Kconfig
+++ b/drivers/crypto/Kconfig
@@ -298,7 +298,7 @@ config CRYPTO_DEV_TEGRA_AES
 	  will be called tegra-aes.
 
 config CRYPTO_DEV_NX
-	tristate "Support for Power7+ in-Nest cryptographic accleration"
+	tristate "Support for Power7+ in-Nest cryptographic acceleration"
 	depends on PPC64 && IBMVIO
 	select CRYPTO_AES
 	select CRYPTO_CBC
@@ -325,4 +325,58 @@ if CRYPTO_DEV_UX500
 	source "drivers/crypto/ux500/Kconfig"
 endif # if CRYPTO_DEV_UX500
 
+config CRYPTO_DEV_BFIN_CRC
+	tristate "Support for Blackfin CRC hardware"
+	depends on BF60x
+	help
+	  Newer Blackfin processors have CRC hardware. Select this if you
+	  want to use the Blackfin CRC module.
+
+config CRYPTO_DEV_ATMEL_AES
+	tristate "Support for Atmel AES hw accelerator"
+	depends on ARCH_AT91
+	select CRYPTO_CBC
+	select CRYPTO_ECB
+	select CRYPTO_AES
+	select CRYPTO_ALGAPI
+	select CRYPTO_BLKCIPHER
+	select CONFIG_AT_HDMAC
+	help
+	  Some Atmel processors have AES hw accelerator.
+	  Select this if you want to use the Atmel module for
+	  AES algorithms.
+
+	  To compile this driver as a module, choose M here: the module
+	  will be called atmel-aes.
+
+config CRYPTO_DEV_ATMEL_TDES
+	tristate "Support for Atmel DES/TDES hw accelerator"
+	depends on ARCH_AT91
+	select CRYPTO_DES
+	select CRYPTO_CBC
+	select CRYPTO_ECB
+	select CRYPTO_ALGAPI
+	select CRYPTO_BLKCIPHER
+	help
+	  Some Atmel processors have DES/TDES hw accelerator.
+	  Select this if you want to use the Atmel module for
+	  DES/TDES algorithms.
+
+	  To compile this driver as a module, choose M here: the module
+	  will be called atmel-tdes.
+
+config CRYPTO_DEV_ATMEL_SHA
+	tristate "Support for Atmel SHA1/SHA256 hw accelerator"
+	depends on ARCH_AT91
+	select CRYPTO_SHA1
+	select CRYPTO_SHA256
+	select CRYPTO_ALGAPI
+	help
+	  Some Atmel processors have SHA1/SHA256 hw accelerator.
+	  Select this if you want to use the Atmel module for
+	  SHA1/SHA256 algorithms.
+
+	  To compile this driver as a module, choose M here: the module
+	  will be called atmel-sha.
+
 endif # CRYPTO_HW
diff --git a/drivers/crypto/Makefile b/drivers/crypto/Makefile
index 01390325d72dd06fb1fbb6d362d12df4582229e4..880a47b0b02360d649fca47e8cdb27a1a4b4c87c 100644
--- a/drivers/crypto/Makefile
+++ b/drivers/crypto/Makefile
@@ -14,4 +14,9 @@ obj-$(CONFIG_CRYPTO_DEV_OMAP_AES) += omap-aes.o
 obj-$(CONFIG_CRYPTO_DEV_PICOXCELL) += picoxcell_crypto.o
 obj-$(CONFIG_CRYPTO_DEV_S5P) += s5p-sss.o
 obj-$(CONFIG_CRYPTO_DEV_TEGRA_AES) += tegra-aes.o
-obj-$(CONFIG_CRYPTO_DEV_UX500) += ux500/
\ No newline at end of file
+obj-$(CONFIG_CRYPTO_DEV_UX500) += ux500/
+obj-$(CONFIG_CRYPTO_DEV_BFIN_CRC) += bfin_crc.o
+obj-$(CONFIG_CRYPTO_DEV_NX) += nx/
+obj-$(CONFIG_CRYPTO_DEV_ATMEL_AES) += atmel-aes.o
+obj-$(CONFIG_CRYPTO_DEV_ATMEL_TDES) += atmel-tdes.o
+obj-$(CONFIG_CRYPTO_DEV_ATMEL_SHA) += atmel-sha.o
diff --git a/drivers/crypto/atmel-aes-regs.h b/drivers/crypto/atmel-aes-regs.h
new file mode 100644
index 0000000000000000000000000000000000000000..2786bb1a5aa0012bb6ec7d2e44b0459481867885
--- /dev/null
+++ b/drivers/crypto/atmel-aes-regs.h
@@ -0,0 +1,62 @@
+#ifndef __ATMEL_AES_REGS_H__
+#define __ATMEL_AES_REGS_H__
+
+#define AES_CR			0x00
+#define AES_CR_START		(1 << 0)
+#define AES_CR_SWRST		(1 << 8)
+#define AES_CR_LOADSEED		(1 << 16)
+
+#define	AES_MR			0x04
+#define AES_MR_CYPHER_DEC		(0 << 0)
+#define AES_MR_CYPHER_ENC		(1 << 0)
+#define	AES_MR_DUALBUFF			(1 << 3)
+#define AES_MR_PROCDLY_MASK		(0xF << 4)
+#define AES_MR_PROCDLY_OFFSET	4
+#define AES_MR_SMOD_MASK		(0x3 << 8)
+#define AES_MR_SMOD_MANUAL		(0x0 << 8)
+#define AES_MR_SMOD_AUTO		(0x1 << 8)
+#define AES_MR_SMOD_IDATAR0		(0x2 << 8)
+#define	AES_MR_KEYSIZE_MASK		(0x3 << 10)
+#define	AES_MR_KEYSIZE_128		(0x0 << 10)
+#define	AES_MR_KEYSIZE_192		(0x1 << 10)
+#define	AES_MR_KEYSIZE_256		(0x2 << 10)
+#define AES_MR_OPMOD_MASK		(0x7 << 12)
+#define AES_MR_OPMOD_ECB		(0x0 << 12)
+#define AES_MR_OPMOD_CBC		(0x1 << 12)
+#define AES_MR_OPMOD_OFB		(0x2 << 12)
+#define AES_MR_OPMOD_CFB		(0x3 << 12)
+#define AES_MR_OPMOD_CTR		(0x4 << 12)
+#define AES_MR_LOD				(0x1 << 15)
+#define AES_MR_CFBS_MASK		(0x7 << 16)
+#define AES_MR_CFBS_128b		(0x0 << 16)
+#define AES_MR_CFBS_64b			(0x1 << 16)
+#define AES_MR_CFBS_32b			(0x2 << 16)
+#define AES_MR_CFBS_16b			(0x3 << 16)
+#define AES_MR_CFBS_8b			(0x4 << 16)
+#define AES_MR_CKEY_MASK		(0xF << 20)
+#define AES_MR_CKEY_OFFSET		20
+#define AES_MR_CMTYP_MASK		(0x1F << 24)
+#define AES_MR_CMTYP_OFFSET		24
+
+#define	AES_IER		0x10
+#define	AES_IDR		0x14
+#define	AES_IMR		0x18
+#define	AES_ISR		0x1C
+#define AES_INT_DATARDY		(1 << 0)
+#define AES_INT_URAD		(1 << 8)
+#define AES_ISR_URAT_MASK	(0xF << 12)
+#define AES_ISR_URAT_IDR_WR_PROC	(0x0 << 12)
+#define AES_ISR_URAT_ODR_RD_PROC	(0x1 << 12)
+#define AES_ISR_URAT_MR_WR_PROC		(0x2 << 12)
+#define AES_ISR_URAT_ODR_RD_SUBK	(0x3 << 12)
+#define AES_ISR_URAT_MR_WR_SUBK		(0x4 << 12)
+#define AES_ISR_URAT_WOR_RD			(0x5 << 12)
+
+#define AES_KEYWR(x)	(0x20 + ((x) * 0x04))
+#define AES_IDATAR(x)	(0x40 + ((x) * 0x04))
+#define AES_ODATAR(x)	(0x50 + ((x) * 0x04))
+#define AES_IVR(x)		(0x60 + ((x) * 0x04))
+
+#define AES_HW_VERSION	0xFC
+
+#endif /* __ATMEL_AES_REGS_H__ */
diff --git a/drivers/crypto/atmel-aes.c b/drivers/crypto/atmel-aes.c
new file mode 100644
index 0000000000000000000000000000000000000000..6bb20fffbf496e85fdccf2139f2fbe786032ba79
--- /dev/null
+++ b/drivers/crypto/atmel-aes.c
@@ -0,0 +1,1206 @@
+/*
+ * Cryptographic API.
+ *
+ * Support for ATMEL AES HW acceleration.
+ *
+ * Copyright (c) 2012 Eukréa Electromatique - ATMEL
+ * Author: Nicolas Royer <nicolas@eukrea.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation.
+ *
+ * Some ideas are from omap-aes.c driver.
+ */
+
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/err.h>
+#include <linux/clk.h>
+#include <linux/io.h>
+#include <linux/hw_random.h>
+#include <linux/platform_device.h>
+
+#include <linux/device.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/errno.h>
+#include <linux/interrupt.h>
+#include <linux/kernel.h>
+#include <linux/clk.h>
+#include <linux/irq.h>
+#include <linux/io.h>
+#include <linux/platform_device.h>
+#include <linux/scatterlist.h>
+#include <linux/dma-mapping.h>
+#include <linux/delay.h>
+#include <linux/crypto.h>
+#include <linux/cryptohash.h>
+#include <crypto/scatterwalk.h>
+#include <crypto/algapi.h>
+#include <crypto/aes.h>
+#include <crypto/hash.h>
+#include <crypto/internal/hash.h>
+#include <linux/platform_data/atmel-aes.h>
+#include "atmel-aes-regs.h"
+
+#define CFB8_BLOCK_SIZE		1
+#define CFB16_BLOCK_SIZE	2
+#define CFB32_BLOCK_SIZE	4
+#define CFB64_BLOCK_SIZE	8
+
+/* AES flags */
+#define AES_FLAGS_MODE_MASK	0x01ff
+#define AES_FLAGS_ENCRYPT	BIT(0)
+#define AES_FLAGS_CBC		BIT(1)
+#define AES_FLAGS_CFB		BIT(2)
+#define AES_FLAGS_CFB8		BIT(3)
+#define AES_FLAGS_CFB16		BIT(4)
+#define AES_FLAGS_CFB32		BIT(5)
+#define AES_FLAGS_CFB64		BIT(6)
+#define AES_FLAGS_OFB		BIT(7)
+#define AES_FLAGS_CTR		BIT(8)
+
+#define AES_FLAGS_INIT		BIT(16)
+#define AES_FLAGS_DMA		BIT(17)
+#define AES_FLAGS_BUSY		BIT(18)
+
+#define AES_FLAGS_DUALBUFF	BIT(24)
+
+#define ATMEL_AES_QUEUE_LENGTH	1
+#define ATMEL_AES_CACHE_SIZE	0
+
+#define ATMEL_AES_DMA_THRESHOLD		16
+
+
+struct atmel_aes_dev;
+
+struct atmel_aes_ctx {
+	struct atmel_aes_dev *dd;
+
+	int		keylen;
+	u32		key[AES_KEYSIZE_256 / sizeof(u32)];
+};
+
+struct atmel_aes_reqctx {
+	unsigned long mode;
+};
+
+struct atmel_aes_dma {
+	struct dma_chan			*chan;
+	struct dma_slave_config dma_conf;
+};
+
+struct atmel_aes_dev {
+	struct list_head	list;
+	unsigned long		phys_base;
+	void __iomem		*io_base;
+
+	struct atmel_aes_ctx	*ctx;
+	struct device		*dev;
+	struct clk		*iclk;
+	int	irq;
+
+	unsigned long		flags;
+	int	err;
+
+	spinlock_t		lock;
+	struct crypto_queue	queue;
+
+	struct tasklet_struct	done_task;
+	struct tasklet_struct	queue_task;
+
+	struct ablkcipher_request	*req;
+	size_t	total;
+
+	struct scatterlist	*in_sg;
+	unsigned int		nb_in_sg;
+
+	struct scatterlist	*out_sg;
+	unsigned int		nb_out_sg;
+
+	size_t	bufcnt;
+
+	u8	buf_in[ATMEL_AES_DMA_THRESHOLD] __aligned(sizeof(u32));
+	int	dma_in;
+	struct atmel_aes_dma	dma_lch_in;
+
+	u8	buf_out[ATMEL_AES_DMA_THRESHOLD] __aligned(sizeof(u32));
+	int	dma_out;
+	struct atmel_aes_dma	dma_lch_out;
+
+	u32	hw_version;
+};
+
+struct atmel_aes_drv {
+	struct list_head	dev_list;
+	spinlock_t		lock;
+};
+
+static struct atmel_aes_drv atmel_aes = {
+	.dev_list = LIST_HEAD_INIT(atmel_aes.dev_list),
+	.lock = __SPIN_LOCK_UNLOCKED(atmel_aes.lock),
+};
+
+static int atmel_aes_sg_length(struct ablkcipher_request *req,
+			struct scatterlist *sg)
+{
+	unsigned int total = req->nbytes;
+	int sg_nb;
+	unsigned int len;
+	struct scatterlist *sg_list;
+
+	sg_nb = 0;
+	sg_list = sg;
+	total = req->nbytes;
+
+	while (total) {
+		len = min(sg_list->length, total);
+
+		sg_nb++;
+		total -= len;
+
+		sg_list = sg_next(sg_list);
+		if (!sg_list)
+			total = 0;
+	}
+
+	return sg_nb;
+}
+
+static inline u32 atmel_aes_read(struct atmel_aes_dev *dd, u32 offset)
+{
+	return readl_relaxed(dd->io_base + offset);
+}
+
+static inline void atmel_aes_write(struct atmel_aes_dev *dd,
+					u32 offset, u32 value)
+{
+	writel_relaxed(value, dd->io_base + offset);
+}
+
+static void atmel_aes_read_n(struct atmel_aes_dev *dd, u32 offset,
+					u32 *value, int count)
+{
+	for (; count--; value++, offset += 4)
+		*value = atmel_aes_read(dd, offset);
+}
+
+static void atmel_aes_write_n(struct atmel_aes_dev *dd, u32 offset,
+					u32 *value, int count)
+{
+	for (; count--; value++, offset += 4)
+		atmel_aes_write(dd, offset, *value);
+}
+
+static void atmel_aes_dualbuff_test(struct atmel_aes_dev *dd)
+{
+	atmel_aes_write(dd, AES_MR, AES_MR_DUALBUFF);
+
+	if (atmel_aes_read(dd, AES_MR) & AES_MR_DUALBUFF)
+		dd->flags |= AES_FLAGS_DUALBUFF;
+}
+
+static struct atmel_aes_dev *atmel_aes_find_dev(struct atmel_aes_ctx *ctx)
+{
+	struct atmel_aes_dev *aes_dd = NULL;
+	struct atmel_aes_dev *tmp;
+
+	spin_lock_bh(&atmel_aes.lock);
+	if (!ctx->dd) {
+		list_for_each_entry(tmp, &atmel_aes.dev_list, list) {
+			aes_dd = tmp;
+			break;
+		}
+		ctx->dd = aes_dd;
+	} else {
+		aes_dd = ctx->dd;
+	}
+
+	spin_unlock_bh(&atmel_aes.lock);
+
+	return aes_dd;
+}
+
+static int atmel_aes_hw_init(struct atmel_aes_dev *dd)
+{
+	clk_prepare_enable(dd->iclk);
+
+	if (!(dd->flags & AES_FLAGS_INIT)) {
+		atmel_aes_write(dd, AES_CR, AES_CR_SWRST);
+		atmel_aes_dualbuff_test(dd);
+		dd->flags |= AES_FLAGS_INIT;
+		dd->err = 0;
+	}
+
+	return 0;
+}
+
+static void atmel_aes_hw_version_init(struct atmel_aes_dev *dd)
+{
+	atmel_aes_hw_init(dd);
+
+	dd->hw_version = atmel_aes_read(dd, AES_HW_VERSION);
+
+	clk_disable_unprepare(dd->iclk);
+}
+
+static void atmel_aes_finish_req(struct atmel_aes_dev *dd, int err)
+{
+	struct ablkcipher_request *req = dd->req;
+
+	clk_disable_unprepare(dd->iclk);
+	dd->flags &= ~AES_FLAGS_BUSY;
+
+	req->base.complete(&req->base, err);
+}
+
+static void atmel_aes_dma_callback(void *data)
+{
+	struct atmel_aes_dev *dd = data;
+
+	/* dma_lch_out - completed */
+	tasklet_schedule(&dd->done_task);
+}
+
+static int atmel_aes_crypt_dma(struct atmel_aes_dev *dd)
+{
+	struct dma_async_tx_descriptor	*in_desc, *out_desc;
+	int nb_dma_sg_in, nb_dma_sg_out;
+
+	dd->nb_in_sg = atmel_aes_sg_length(dd->req, dd->in_sg);
+	if (!dd->nb_in_sg)
+		goto exit_err;
+
+	nb_dma_sg_in = dma_map_sg(dd->dev, dd->in_sg, dd->nb_in_sg,
+			DMA_TO_DEVICE);
+	if (!nb_dma_sg_in)
+		goto exit_err;
+
+	in_desc = dmaengine_prep_slave_sg(dd->dma_lch_in.chan, dd->in_sg,
+				nb_dma_sg_in, DMA_MEM_TO_DEV,
+				DMA_PREP_INTERRUPT  |  DMA_CTRL_ACK);
+
+	if (!in_desc)
+		goto unmap_in;
+
+	/* callback not needed */
+
+	dd->nb_out_sg = atmel_aes_sg_length(dd->req, dd->out_sg);
+	if (!dd->nb_out_sg)
+		goto unmap_in;
+
+	nb_dma_sg_out = dma_map_sg(dd->dev, dd->out_sg, dd->nb_out_sg,
+			DMA_FROM_DEVICE);
+	if (!nb_dma_sg_out)
+		goto unmap_out;
+
+	out_desc = dmaengine_prep_slave_sg(dd->dma_lch_out.chan, dd->out_sg,
+				nb_dma_sg_out, DMA_DEV_TO_MEM,
+				DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
+
+	if (!out_desc)
+		goto unmap_out;
+
+	out_desc->callback = atmel_aes_dma_callback;
+	out_desc->callback_param = dd;
+
+	dd->total -= dd->req->nbytes;
+
+	dmaengine_submit(out_desc);
+	dma_async_issue_pending(dd->dma_lch_out.chan);
+
+	dmaengine_submit(in_desc);
+	dma_async_issue_pending(dd->dma_lch_in.chan);
+
+	return 0;
+
+unmap_out:
+	dma_unmap_sg(dd->dev, dd->out_sg, dd->nb_out_sg,
+		DMA_FROM_DEVICE);
+unmap_in:
+	dma_unmap_sg(dd->dev, dd->in_sg, dd->nb_in_sg,
+		DMA_TO_DEVICE);
+exit_err:
+	return -EINVAL;
+}
+
+static int atmel_aes_crypt_cpu_start(struct atmel_aes_dev *dd)
+{
+	dd->flags &= ~AES_FLAGS_DMA;
+
+	/* use cache buffers */
+	dd->nb_in_sg = atmel_aes_sg_length(dd->req, dd->in_sg);
+	if (!dd->nb_in_sg)
+		return -EINVAL;
+
+	dd->nb_out_sg = atmel_aes_sg_length(dd->req, dd->out_sg);
+	if (!dd->nb_in_sg)
+		return -EINVAL;
+
+	dd->bufcnt = sg_copy_to_buffer(dd->in_sg, dd->nb_in_sg,
+					dd->buf_in, dd->total);
+
+	if (!dd->bufcnt)
+		return -EINVAL;
+
+	dd->total -= dd->bufcnt;
+
+	atmel_aes_write(dd, AES_IER, AES_INT_DATARDY);
+	atmel_aes_write_n(dd, AES_IDATAR(0), (u32 *) dd->buf_in,
+				dd->bufcnt >> 2);
+
+	return 0;
+}
+
+static int atmel_aes_crypt_dma_start(struct atmel_aes_dev *dd)
+{
+	int err;
+
+	if (dd->flags & AES_FLAGS_CFB8) {
+		dd->dma_lch_in.dma_conf.dst_addr_width =
+			DMA_SLAVE_BUSWIDTH_1_BYTE;
+		dd->dma_lch_out.dma_conf.src_addr_width =
+			DMA_SLAVE_BUSWIDTH_1_BYTE;
+	} else if (dd->flags & AES_FLAGS_CFB16) {
+		dd->dma_lch_in.dma_conf.dst_addr_width =
+			DMA_SLAVE_BUSWIDTH_2_BYTES;
+		dd->dma_lch_out.dma_conf.src_addr_width =
+			DMA_SLAVE_BUSWIDTH_2_BYTES;
+	} else {
+		dd->dma_lch_in.dma_conf.dst_addr_width =
+			DMA_SLAVE_BUSWIDTH_4_BYTES;
+		dd->dma_lch_out.dma_conf.src_addr_width =
+			DMA_SLAVE_BUSWIDTH_4_BYTES;
+	}
+
+	dmaengine_slave_config(dd->dma_lch_in.chan, &dd->dma_lch_in.dma_conf);
+	dmaengine_slave_config(dd->dma_lch_out.chan, &dd->dma_lch_out.dma_conf);
+
+	dd->flags |= AES_FLAGS_DMA;
+	err = atmel_aes_crypt_dma(dd);
+
+	return err;
+}
+
+static int atmel_aes_write_ctrl(struct atmel_aes_dev *dd)
+{
+	int err;
+	u32 valcr = 0, valmr = 0;
+
+	err = atmel_aes_hw_init(dd);
+
+	if (err)
+		return err;
+
+	/* MR register must be set before IV registers */
+	if (dd->ctx->keylen == AES_KEYSIZE_128)
+		valmr |= AES_MR_KEYSIZE_128;
+	else if (dd->ctx->keylen == AES_KEYSIZE_192)
+		valmr |= AES_MR_KEYSIZE_192;
+	else
+		valmr |= AES_MR_KEYSIZE_256;
+
+	if (dd->flags & AES_FLAGS_CBC) {
+		valmr |= AES_MR_OPMOD_CBC;
+	} else if (dd->flags & AES_FLAGS_CFB) {
+		valmr |= AES_MR_OPMOD_CFB;
+		if (dd->flags & AES_FLAGS_CFB8)
+			valmr |= AES_MR_CFBS_8b;
+		else if (dd->flags & AES_FLAGS_CFB16)
+			valmr |= AES_MR_CFBS_16b;
+		else if (dd->flags & AES_FLAGS_CFB32)
+			valmr |= AES_MR_CFBS_32b;
+		else if (dd->flags & AES_FLAGS_CFB64)
+			valmr |= AES_MR_CFBS_64b;
+	} else if (dd->flags & AES_FLAGS_OFB) {
+		valmr |= AES_MR_OPMOD_OFB;
+	} else if (dd->flags & AES_FLAGS_CTR) {
+		valmr |= AES_MR_OPMOD_CTR;
+	} else {
+		valmr |= AES_MR_OPMOD_ECB;
+	}
+
+	if (dd->flags & AES_FLAGS_ENCRYPT)
+		valmr |= AES_MR_CYPHER_ENC;
+
+	if (dd->total > ATMEL_AES_DMA_THRESHOLD) {
+		valmr |= AES_MR_SMOD_IDATAR0;
+		if (dd->flags & AES_FLAGS_DUALBUFF)
+			valmr |= AES_MR_DUALBUFF;
+	} else {
+		valmr |= AES_MR_SMOD_AUTO;
+	}
+
+	atmel_aes_write(dd, AES_CR, valcr);
+	atmel_aes_write(dd, AES_MR, valmr);
+
+	atmel_aes_write_n(dd, AES_KEYWR(0), dd->ctx->key,
+						dd->ctx->keylen >> 2);
+
+	if (((dd->flags & AES_FLAGS_CBC) || (dd->flags & AES_FLAGS_CFB) ||
+	   (dd->flags & AES_FLAGS_OFB) || (dd->flags & AES_FLAGS_CTR)) &&
+	   dd->req->info) {
+		atmel_aes_write_n(dd, AES_IVR(0), dd->req->info, 4);
+	}
+
+	return 0;
+}
+
+static int atmel_aes_handle_queue(struct atmel_aes_dev *dd,
+			       struct ablkcipher_request *req)
+{
+	struct crypto_async_request *async_req, *backlog;
+	struct atmel_aes_ctx *ctx;
+	struct atmel_aes_reqctx *rctx;
+	unsigned long flags;
+	int err, ret = 0;
+
+	spin_lock_irqsave(&dd->lock, flags);
+	if (req)
+		ret = ablkcipher_enqueue_request(&dd->queue, req);
+	if (dd->flags & AES_FLAGS_BUSY) {
+		spin_unlock_irqrestore(&dd->lock, flags);
+		return ret;
+	}
+	backlog = crypto_get_backlog(&dd->queue);
+	async_req = crypto_dequeue_request(&dd->queue);
+	if (async_req)
+		dd->flags |= AES_FLAGS_BUSY;
+	spin_unlock_irqrestore(&dd->lock, flags);
+
+	if (!async_req)
+		return ret;
+
+	if (backlog)
+		backlog->complete(backlog, -EINPROGRESS);
+
+	req = ablkcipher_request_cast(async_req);
+
+	/* assign new request to device */
+	dd->req = req;
+	dd->total = req->nbytes;
+	dd->in_sg = req->src;
+	dd->out_sg = req->dst;
+
+	rctx = ablkcipher_request_ctx(req);
+	ctx = crypto_ablkcipher_ctx(crypto_ablkcipher_reqtfm(req));
+	rctx->mode &= AES_FLAGS_MODE_MASK;
+	dd->flags = (dd->flags & ~AES_FLAGS_MODE_MASK) | rctx->mode;
+	dd->ctx = ctx;
+	ctx->dd = dd;
+
+	err = atmel_aes_write_ctrl(dd);
+	if (!err) {
+		if (dd->total > ATMEL_AES_DMA_THRESHOLD)
+			err = atmel_aes_crypt_dma_start(dd);
+		else
+			err = atmel_aes_crypt_cpu_start(dd);
+	}
+	if (err) {
+		/* aes_task will not finish it, so do it here */
+		atmel_aes_finish_req(dd, err);
+		tasklet_schedule(&dd->queue_task);
+	}
+
+	return ret;
+}
+
+static int atmel_aes_crypt_dma_stop(struct atmel_aes_dev *dd)
+{
+	int err = -EINVAL;
+
+	if (dd->flags & AES_FLAGS_DMA) {
+		dma_unmap_sg(dd->dev, dd->out_sg,
+			dd->nb_out_sg, DMA_FROM_DEVICE);
+		dma_unmap_sg(dd->dev, dd->in_sg,
+			dd->nb_in_sg, DMA_TO_DEVICE);
+		err = 0;
+	}
+
+	return err;
+}
+
+static int atmel_aes_crypt(struct ablkcipher_request *req, unsigned long mode)
+{
+	struct atmel_aes_ctx *ctx = crypto_ablkcipher_ctx(
+			crypto_ablkcipher_reqtfm(req));
+	struct atmel_aes_reqctx *rctx = ablkcipher_request_ctx(req);
+	struct atmel_aes_dev *dd;
+
+	if (!IS_ALIGNED(req->nbytes, AES_BLOCK_SIZE)) {
+		pr_err("request size is not exact amount of AES blocks\n");
+		return -EINVAL;
+	}
+
+	dd = atmel_aes_find_dev(ctx);
+	if (!dd)
+		return -ENODEV;
+
+	rctx->mode = mode;
+
+	return atmel_aes_handle_queue(dd, req);
+}
+
+static bool atmel_aes_filter(struct dma_chan *chan, void *slave)
+{
+	struct at_dma_slave	*sl = slave;
+
+	if (sl && sl->dma_dev == chan->device->dev) {
+		chan->private = sl;
+		return true;
+	} else {
+		return false;
+	}
+}
+
+static int atmel_aes_dma_init(struct atmel_aes_dev *dd)
+{
+	int err = -ENOMEM;
+	struct aes_platform_data	*pdata;
+	dma_cap_mask_t mask_in, mask_out;
+
+	pdata = dd->dev->platform_data;
+
+	if (pdata && pdata->dma_slave->txdata.dma_dev &&
+		pdata->dma_slave->rxdata.dma_dev) {
+
+		/* Try to grab 2 DMA channels */
+		dma_cap_zero(mask_in);
+		dma_cap_set(DMA_SLAVE, mask_in);
+
+		dd->dma_lch_in.chan = dma_request_channel(mask_in,
+				atmel_aes_filter, &pdata->dma_slave->rxdata);
+		if (!dd->dma_lch_in.chan)
+			goto err_dma_in;
+
+		dd->dma_lch_in.dma_conf.direction = DMA_MEM_TO_DEV;
+		dd->dma_lch_in.dma_conf.dst_addr = dd->phys_base +
+			AES_IDATAR(0);
+		dd->dma_lch_in.dma_conf.src_maxburst = 1;
+		dd->dma_lch_in.dma_conf.dst_maxburst = 1;
+		dd->dma_lch_in.dma_conf.device_fc = false;
+
+		dma_cap_zero(mask_out);
+		dma_cap_set(DMA_SLAVE, mask_out);
+		dd->dma_lch_out.chan = dma_request_channel(mask_out,
+				atmel_aes_filter, &pdata->dma_slave->txdata);
+		if (!dd->dma_lch_out.chan)
+			goto err_dma_out;
+
+		dd->dma_lch_out.dma_conf.direction = DMA_DEV_TO_MEM;
+		dd->dma_lch_out.dma_conf.src_addr = dd->phys_base +
+			AES_ODATAR(0);
+		dd->dma_lch_out.dma_conf.src_maxburst = 1;
+		dd->dma_lch_out.dma_conf.dst_maxburst = 1;
+		dd->dma_lch_out.dma_conf.device_fc = false;
+
+		return 0;
+	} else {
+		return -ENODEV;
+	}
+
+err_dma_out:
+	dma_release_channel(dd->dma_lch_in.chan);
+err_dma_in:
+	return err;
+}
+
+static void atmel_aes_dma_cleanup(struct atmel_aes_dev *dd)
+{
+	dma_release_channel(dd->dma_lch_in.chan);
+	dma_release_channel(dd->dma_lch_out.chan);
+}
+
+static int atmel_aes_setkey(struct crypto_ablkcipher *tfm, const u8 *key,
+			   unsigned int keylen)
+{
+	struct atmel_aes_ctx *ctx = crypto_ablkcipher_ctx(tfm);
+
+	if (keylen != AES_KEYSIZE_128 && keylen != AES_KEYSIZE_192 &&
+		   keylen != AES_KEYSIZE_256) {
+		crypto_ablkcipher_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
+		return -EINVAL;
+	}
+
+	memcpy(ctx->key, key, keylen);
+	ctx->keylen = keylen;
+
+	return 0;
+}
+
+static int atmel_aes_ecb_encrypt(struct ablkcipher_request *req)
+{
+	return atmel_aes_crypt(req,
+		AES_FLAGS_ENCRYPT);
+}
+
+static int atmel_aes_ecb_decrypt(struct ablkcipher_request *req)
+{
+	return atmel_aes_crypt(req,
+		0);
+}
+
+static int atmel_aes_cbc_encrypt(struct ablkcipher_request *req)
+{
+	return atmel_aes_crypt(req,
+		AES_FLAGS_ENCRYPT | AES_FLAGS_CBC);
+}
+
+static int atmel_aes_cbc_decrypt(struct ablkcipher_request *req)
+{
+	return atmel_aes_crypt(req,
+		AES_FLAGS_CBC);
+}
+
+static int atmel_aes_ofb_encrypt(struct ablkcipher_request *req)
+{
+	return atmel_aes_crypt(req,
+		AES_FLAGS_ENCRYPT | AES_FLAGS_OFB);
+}
+
+static int atmel_aes_ofb_decrypt(struct ablkcipher_request *req)
+{
+	return atmel_aes_crypt(req,
+		AES_FLAGS_OFB);
+}
+
+static int atmel_aes_cfb_encrypt(struct ablkcipher_request *req)
+{
+	return atmel_aes_crypt(req,
+		AES_FLAGS_ENCRYPT | AES_FLAGS_CFB);
+}
+
+static int atmel_aes_cfb_decrypt(struct ablkcipher_request *req)
+{
+	return atmel_aes_crypt(req,
+		AES_FLAGS_CFB);
+}
+
+static int atmel_aes_cfb64_encrypt(struct ablkcipher_request *req)
+{
+	return atmel_aes_crypt(req,
+		AES_FLAGS_ENCRYPT | AES_FLAGS_CFB | AES_FLAGS_CFB64);
+}
+
+static int atmel_aes_cfb64_decrypt(struct ablkcipher_request *req)
+{
+	return atmel_aes_crypt(req,
+		AES_FLAGS_CFB | AES_FLAGS_CFB64);
+}
+
+static int atmel_aes_cfb32_encrypt(struct ablkcipher_request *req)
+{
+	return atmel_aes_crypt(req,
+		AES_FLAGS_ENCRYPT | AES_FLAGS_CFB | AES_FLAGS_CFB32);
+}
+
+static int atmel_aes_cfb32_decrypt(struct ablkcipher_request *req)
+{
+	return atmel_aes_crypt(req,
+		AES_FLAGS_CFB | AES_FLAGS_CFB32);
+}
+
+static int atmel_aes_cfb16_encrypt(struct ablkcipher_request *req)
+{
+	return atmel_aes_crypt(req,
+		AES_FLAGS_ENCRYPT | AES_FLAGS_CFB | AES_FLAGS_CFB16);
+}
+
+static int atmel_aes_cfb16_decrypt(struct ablkcipher_request *req)
+{
+	return atmel_aes_crypt(req,
+		AES_FLAGS_CFB | AES_FLAGS_CFB16);
+}
+
+static int atmel_aes_cfb8_encrypt(struct ablkcipher_request *req)
+{
+	return atmel_aes_crypt(req,
+		AES_FLAGS_ENCRYPT |	AES_FLAGS_CFB | AES_FLAGS_CFB8);
+}
+
+static int atmel_aes_cfb8_decrypt(struct ablkcipher_request *req)
+{
+	return atmel_aes_crypt(req,
+		AES_FLAGS_CFB | AES_FLAGS_CFB8);
+}
+
+static int atmel_aes_ctr_encrypt(struct ablkcipher_request *req)
+{
+	return atmel_aes_crypt(req,
+		AES_FLAGS_ENCRYPT | AES_FLAGS_CTR);
+}
+
+static int atmel_aes_ctr_decrypt(struct ablkcipher_request *req)
+{
+	return atmel_aes_crypt(req,
+		AES_FLAGS_CTR);
+}
+
+static int atmel_aes_cra_init(struct crypto_tfm *tfm)
+{
+	tfm->crt_ablkcipher.reqsize = sizeof(struct atmel_aes_reqctx);
+
+	return 0;
+}
+
+static void atmel_aes_cra_exit(struct crypto_tfm *tfm)
+{
+}
+
+static struct crypto_alg aes_algs[] = {
+{
+	.cra_name		= "ecb(aes)",
+	.cra_driver_name	= "atmel-ecb-aes",
+	.cra_priority		= 100,
+	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
+	.cra_blocksize		= AES_BLOCK_SIZE,
+	.cra_ctxsize		= sizeof(struct atmel_aes_ctx),
+	.cra_alignmask		= 0x0,
+	.cra_type		= &crypto_ablkcipher_type,
+	.cra_module		= THIS_MODULE,
+	.cra_init		= atmel_aes_cra_init,
+	.cra_exit		= atmel_aes_cra_exit,
+	.cra_u.ablkcipher = {
+		.min_keysize	= AES_MIN_KEY_SIZE,
+		.max_keysize	= AES_MAX_KEY_SIZE,
+		.setkey		= atmel_aes_setkey,
+		.encrypt	= atmel_aes_ecb_encrypt,
+		.decrypt	= atmel_aes_ecb_decrypt,
+	}
+},
+{
+	.cra_name		= "cbc(aes)",
+	.cra_driver_name	= "atmel-cbc-aes",
+	.cra_priority		= 100,
+	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
+	.cra_blocksize		= AES_BLOCK_SIZE,
+	.cra_ctxsize		= sizeof(struct atmel_aes_ctx),
+	.cra_alignmask		= 0x0,
+	.cra_type		= &crypto_ablkcipher_type,
+	.cra_module		= THIS_MODULE,
+	.cra_init		= atmel_aes_cra_init,
+	.cra_exit		= atmel_aes_cra_exit,
+	.cra_u.ablkcipher = {
+		.min_keysize	= AES_MIN_KEY_SIZE,
+		.max_keysize	= AES_MAX_KEY_SIZE,
+		.ivsize		= AES_BLOCK_SIZE,
+		.setkey		= atmel_aes_setkey,
+		.encrypt	= atmel_aes_cbc_encrypt,
+		.decrypt	= atmel_aes_cbc_decrypt,
+	}
+},
+{
+	.cra_name		= "ofb(aes)",
+	.cra_driver_name	= "atmel-ofb-aes",
+	.cra_priority		= 100,
+	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
+	.cra_blocksize		= AES_BLOCK_SIZE,
+	.cra_ctxsize		= sizeof(struct atmel_aes_ctx),
+	.cra_alignmask		= 0x0,
+	.cra_type		= &crypto_ablkcipher_type,
+	.cra_module		= THIS_MODULE,
+	.cra_init		= atmel_aes_cra_init,
+	.cra_exit		= atmel_aes_cra_exit,
+	.cra_u.ablkcipher = {
+		.min_keysize	= AES_MIN_KEY_SIZE,
+		.max_keysize	= AES_MAX_KEY_SIZE,
+		.ivsize		= AES_BLOCK_SIZE,
+		.setkey		= atmel_aes_setkey,
+		.encrypt	= atmel_aes_ofb_encrypt,
+		.decrypt	= atmel_aes_ofb_decrypt,
+	}
+},
+{
+	.cra_name		= "cfb(aes)",
+	.cra_driver_name	= "atmel-cfb-aes",
+	.cra_priority		= 100,
+	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
+	.cra_blocksize		= AES_BLOCK_SIZE,
+	.cra_ctxsize		= sizeof(struct atmel_aes_ctx),
+	.cra_alignmask		= 0x0,
+	.cra_type		= &crypto_ablkcipher_type,
+	.cra_module		= THIS_MODULE,
+	.cra_init		= atmel_aes_cra_init,
+	.cra_exit		= atmel_aes_cra_exit,
+	.cra_u.ablkcipher = {
+		.min_keysize	= AES_MIN_KEY_SIZE,
+		.max_keysize	= AES_MAX_KEY_SIZE,
+		.ivsize		= AES_BLOCK_SIZE,
+		.setkey		= atmel_aes_setkey,
+		.encrypt	= atmel_aes_cfb_encrypt,
+		.decrypt	= atmel_aes_cfb_decrypt,
+	}
+},
+{
+	.cra_name		= "cfb32(aes)",
+	.cra_driver_name	= "atmel-cfb32-aes",
+	.cra_priority		= 100,
+	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
+	.cra_blocksize		= CFB32_BLOCK_SIZE,
+	.cra_ctxsize		= sizeof(struct atmel_aes_ctx),
+	.cra_alignmask		= 0x0,
+	.cra_type		= &crypto_ablkcipher_type,
+	.cra_module		= THIS_MODULE,
+	.cra_init		= atmel_aes_cra_init,
+	.cra_exit		= atmel_aes_cra_exit,
+	.cra_u.ablkcipher = {
+		.min_keysize	= AES_MIN_KEY_SIZE,
+		.max_keysize	= AES_MAX_KEY_SIZE,
+		.ivsize		= AES_BLOCK_SIZE,
+		.setkey		= atmel_aes_setkey,
+		.encrypt	= atmel_aes_cfb32_encrypt,
+		.decrypt	= atmel_aes_cfb32_decrypt,
+	}
+},
+{
+	.cra_name		= "cfb16(aes)",
+	.cra_driver_name	= "atmel-cfb16-aes",
+	.cra_priority		= 100,
+	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
+	.cra_blocksize		= CFB16_BLOCK_SIZE,
+	.cra_ctxsize		= sizeof(struct atmel_aes_ctx),
+	.cra_alignmask		= 0x0,
+	.cra_type		= &crypto_ablkcipher_type,
+	.cra_module		= THIS_MODULE,
+	.cra_init		= atmel_aes_cra_init,
+	.cra_exit		= atmel_aes_cra_exit,
+	.cra_u.ablkcipher = {
+		.min_keysize	= AES_MIN_KEY_SIZE,
+		.max_keysize	= AES_MAX_KEY_SIZE,
+		.ivsize		= AES_BLOCK_SIZE,
+		.setkey		= atmel_aes_setkey,
+		.encrypt	= atmel_aes_cfb16_encrypt,
+		.decrypt	= atmel_aes_cfb16_decrypt,
+	}
+},
+{
+	.cra_name		= "cfb8(aes)",
+	.cra_driver_name	= "atmel-cfb8-aes",
+	.cra_priority		= 100,
+	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
+	.cra_blocksize		= CFB64_BLOCK_SIZE,
+	.cra_ctxsize		= sizeof(struct atmel_aes_ctx),
+	.cra_alignmask		= 0x0,
+	.cra_type		= &crypto_ablkcipher_type,
+	.cra_module		= THIS_MODULE,
+	.cra_init		= atmel_aes_cra_init,
+	.cra_exit		= atmel_aes_cra_exit,
+	.cra_u.ablkcipher = {
+		.min_keysize	= AES_MIN_KEY_SIZE,
+		.max_keysize	= AES_MAX_KEY_SIZE,
+		.ivsize		= AES_BLOCK_SIZE,
+		.setkey		= atmel_aes_setkey,
+		.encrypt	= atmel_aes_cfb8_encrypt,
+		.decrypt	= atmel_aes_cfb8_decrypt,
+	}
+},
+{
+	.cra_name		= "ctr(aes)",
+	.cra_driver_name	= "atmel-ctr-aes",
+	.cra_priority		= 100,
+	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
+	.cra_blocksize		= AES_BLOCK_SIZE,
+	.cra_ctxsize		= sizeof(struct atmel_aes_ctx),
+	.cra_alignmask		= 0x0,
+	.cra_type		= &crypto_ablkcipher_type,
+	.cra_module		= THIS_MODULE,
+	.cra_init		= atmel_aes_cra_init,
+	.cra_exit		= atmel_aes_cra_exit,
+	.cra_u.ablkcipher = {
+		.min_keysize	= AES_MIN_KEY_SIZE,
+		.max_keysize	= AES_MAX_KEY_SIZE,
+		.ivsize		= AES_BLOCK_SIZE,
+		.setkey		= atmel_aes_setkey,
+		.encrypt	= atmel_aes_ctr_encrypt,
+		.decrypt	= atmel_aes_ctr_decrypt,
+	}
+},
+};
+
+static struct crypto_alg aes_cfb64_alg[] = {
+{
+	.cra_name		= "cfb64(aes)",
+	.cra_driver_name	= "atmel-cfb64-aes",
+	.cra_priority		= 100,
+	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
+	.cra_blocksize		= CFB64_BLOCK_SIZE,
+	.cra_ctxsize		= sizeof(struct atmel_aes_ctx),
+	.cra_alignmask		= 0x0,
+	.cra_type		= &crypto_ablkcipher_type,
+	.cra_module		= THIS_MODULE,
+	.cra_init		= atmel_aes_cra_init,
+	.cra_exit		= atmel_aes_cra_exit,
+	.cra_u.ablkcipher = {
+		.min_keysize	= AES_MIN_KEY_SIZE,
+		.max_keysize	= AES_MAX_KEY_SIZE,
+		.ivsize		= AES_BLOCK_SIZE,
+		.setkey		= atmel_aes_setkey,
+		.encrypt	= atmel_aes_cfb64_encrypt,
+		.decrypt	= atmel_aes_cfb64_decrypt,
+	}
+},
+};
+
+static void atmel_aes_queue_task(unsigned long data)
+{
+	struct atmel_aes_dev *dd = (struct atmel_aes_dev *)data;
+
+	atmel_aes_handle_queue(dd, NULL);
+}
+
+static void atmel_aes_done_task(unsigned long data)
+{
+	struct atmel_aes_dev *dd = (struct atmel_aes_dev *) data;
+	int err;
+
+	if (!(dd->flags & AES_FLAGS_DMA)) {
+		atmel_aes_read_n(dd, AES_ODATAR(0), (u32 *) dd->buf_out,
+				dd->bufcnt >> 2);
+
+		if (sg_copy_from_buffer(dd->out_sg, dd->nb_out_sg,
+			dd->buf_out, dd->bufcnt))
+			err = 0;
+		else
+			err = -EINVAL;
+
+		goto cpu_end;
+	}
+
+	err = atmel_aes_crypt_dma_stop(dd);
+
+	err = dd->err ? : err;
+
+	if (dd->total && !err) {
+		err = atmel_aes_crypt_dma_start(dd);
+		if (!err)
+			return; /* DMA started. Not fininishing. */
+	}
+
+cpu_end:
+	atmel_aes_finish_req(dd, err);
+	atmel_aes_handle_queue(dd, NULL);
+}
+
+static irqreturn_t atmel_aes_irq(int irq, void *dev_id)
+{
+	struct atmel_aes_dev *aes_dd = dev_id;
+	u32 reg;
+
+	reg = atmel_aes_read(aes_dd, AES_ISR);
+	if (reg & atmel_aes_read(aes_dd, AES_IMR)) {
+		atmel_aes_write(aes_dd, AES_IDR, reg);
+		if (AES_FLAGS_BUSY & aes_dd->flags)
+			tasklet_schedule(&aes_dd->done_task);
+		else
+			dev_warn(aes_dd->dev, "AES interrupt when no active requests.\n");
+		return IRQ_HANDLED;
+	}
+
+	return IRQ_NONE;
+}
+
+static void atmel_aes_unregister_algs(struct atmel_aes_dev *dd)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(aes_algs); i++)
+		crypto_unregister_alg(&aes_algs[i]);
+	if (dd->hw_version >= 0x130)
+		crypto_unregister_alg(&aes_cfb64_alg[0]);
+}
+
+static int atmel_aes_register_algs(struct atmel_aes_dev *dd)
+{
+	int err, i, j;
+
+	for (i = 0; i < ARRAY_SIZE(aes_algs); i++) {
+		INIT_LIST_HEAD(&aes_algs[i].cra_list);
+		err = crypto_register_alg(&aes_algs[i]);
+		if (err)
+			goto err_aes_algs;
+	}
+
+	atmel_aes_hw_version_init(dd);
+
+	if (dd->hw_version >= 0x130) {
+		INIT_LIST_HEAD(&aes_cfb64_alg[0].cra_list);
+		err = crypto_register_alg(&aes_cfb64_alg[0]);
+		if (err)
+			goto err_aes_cfb64_alg;
+	}
+
+	return 0;
+
+err_aes_cfb64_alg:
+	i = ARRAY_SIZE(aes_algs);
+err_aes_algs:
+	for (j = 0; j < i; j++)
+		crypto_unregister_alg(&aes_algs[j]);
+
+	return err;
+}
+
+static int __devinit atmel_aes_probe(struct platform_device *pdev)
+{
+	struct atmel_aes_dev *aes_dd;
+	struct aes_platform_data	*pdata;
+	struct device *dev = &pdev->dev;
+	struct resource *aes_res;
+	unsigned long aes_phys_size;
+	int err;
+
+	pdata = pdev->dev.platform_data;
+	if (!pdata) {
+		err = -ENXIO;
+		goto aes_dd_err;
+	}
+
+	aes_dd = kzalloc(sizeof(struct atmel_aes_dev), GFP_KERNEL);
+	if (aes_dd == NULL) {
+		dev_err(dev, "unable to alloc data struct.\n");
+		err = -ENOMEM;
+		goto aes_dd_err;
+	}
+
+	aes_dd->dev = dev;
+
+	platform_set_drvdata(pdev, aes_dd);
+
+	INIT_LIST_HEAD(&aes_dd->list);
+
+	tasklet_init(&aes_dd->done_task, atmel_aes_done_task,
+					(unsigned long)aes_dd);
+	tasklet_init(&aes_dd->queue_task, atmel_aes_queue_task,
+					(unsigned long)aes_dd);
+
+	crypto_init_queue(&aes_dd->queue, ATMEL_AES_QUEUE_LENGTH);
+
+	aes_dd->irq = -1;
+
+	/* Get the base address */
+	aes_res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (!aes_res) {
+		dev_err(dev, "no MEM resource info\n");
+		err = -ENODEV;
+		goto res_err;
+	}
+	aes_dd->phys_base = aes_res->start;
+	aes_phys_size = resource_size(aes_res);
+
+	/* Get the IRQ */
+	aes_dd->irq = platform_get_irq(pdev,  0);
+	if (aes_dd->irq < 0) {
+		dev_err(dev, "no IRQ resource info\n");
+		err = aes_dd->irq;
+		goto aes_irq_err;
+	}
+
+	err = request_irq(aes_dd->irq, atmel_aes_irq, IRQF_SHARED, "atmel-aes",
+						aes_dd);
+	if (err) {
+		dev_err(dev, "unable to request aes irq.\n");
+		goto aes_irq_err;
+	}
+
+	/* Initializing the clock */
+	aes_dd->iclk = clk_get(&pdev->dev, NULL);
+	if (IS_ERR(aes_dd->iclk)) {
+		dev_err(dev, "clock intialization failed.\n");
+		err = PTR_ERR(aes_dd->iclk);
+		goto clk_err;
+	}
+
+	aes_dd->io_base = ioremap(aes_dd->phys_base, aes_phys_size);
+	if (!aes_dd->io_base) {
+		dev_err(dev, "can't ioremap\n");
+		err = -ENOMEM;
+		goto aes_io_err;
+	}
+
+	err = atmel_aes_dma_init(aes_dd);
+	if (err)
+		goto err_aes_dma;
+
+	spin_lock(&atmel_aes.lock);
+	list_add_tail(&aes_dd->list, &atmel_aes.dev_list);
+	spin_unlock(&atmel_aes.lock);
+
+	err = atmel_aes_register_algs(aes_dd);
+	if (err)
+		goto err_algs;
+
+	dev_info(dev, "Atmel AES\n");
+
+	return 0;
+
+err_algs:
+	spin_lock(&atmel_aes.lock);
+	list_del(&aes_dd->list);
+	spin_unlock(&atmel_aes.lock);
+	atmel_aes_dma_cleanup(aes_dd);
+err_aes_dma:
+	iounmap(aes_dd->io_base);
+aes_io_err:
+	clk_put(aes_dd->iclk);
+clk_err:
+	free_irq(aes_dd->irq, aes_dd);
+aes_irq_err:
+res_err:
+	tasklet_kill(&aes_dd->done_task);
+	tasklet_kill(&aes_dd->queue_task);
+	kfree(aes_dd);
+	aes_dd = NULL;
+aes_dd_err:
+	dev_err(dev, "initialization failed.\n");
+
+	return err;
+}
+
+static int __devexit atmel_aes_remove(struct platform_device *pdev)
+{
+	static struct atmel_aes_dev *aes_dd;
+
+	aes_dd = platform_get_drvdata(pdev);
+	if (!aes_dd)
+		return -ENODEV;
+	spin_lock(&atmel_aes.lock);
+	list_del(&aes_dd->list);
+	spin_unlock(&atmel_aes.lock);
+
+	atmel_aes_unregister_algs(aes_dd);
+
+	tasklet_kill(&aes_dd->done_task);
+	tasklet_kill(&aes_dd->queue_task);
+
+	atmel_aes_dma_cleanup(aes_dd);
+
+	iounmap(aes_dd->io_base);
+
+	clk_put(aes_dd->iclk);
+
+	if (aes_dd->irq > 0)
+		free_irq(aes_dd->irq, aes_dd);
+
+	kfree(aes_dd);
+	aes_dd = NULL;
+
+	return 0;
+}
+
+static struct platform_driver atmel_aes_driver = {
+	.probe		= atmel_aes_probe,
+	.remove		= __devexit_p(atmel_aes_remove),
+	.driver		= {
+		.name	= "atmel_aes",
+		.owner	= THIS_MODULE,
+	},
+};
+
+module_platform_driver(atmel_aes_driver);
+
+MODULE_DESCRIPTION("Atmel AES hw acceleration support.");
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Nicolas Royer - Eukréa Electromatique");
diff --git a/drivers/crypto/atmel-sha-regs.h b/drivers/crypto/atmel-sha-regs.h
new file mode 100644
index 0000000000000000000000000000000000000000..dc53a20d7da1869858fdc8e4edd2aec97c0e3d96
--- /dev/null
+++ b/drivers/crypto/atmel-sha-regs.h
@@ -0,0 +1,46 @@
+#ifndef __ATMEL_SHA_REGS_H__
+#define __ATMEL_SHA_REGS_H__
+
+#define SHA_REG_DIGEST(x)		(0x80 + ((x) * 0x04))
+#define SHA_REG_DIN(x)			(0x40 + ((x) * 0x04))
+
+#define SHA_CR				0x00
+#define SHA_CR_START			(1 << 0)
+#define SHA_CR_FIRST			(1 << 4)
+#define SHA_CR_SWRST			(1 << 8)
+
+#define SHA_MR				0x04
+#define SHA_MR_MODE_MASK		(0x3 << 0)
+#define SHA_MR_MODE_MANUAL		0x0
+#define SHA_MR_MODE_AUTO		0x1
+#define SHA_MR_MODE_PDC			0x2
+#define	SHA_MR_DUALBUFF			(1 << 3)
+#define SHA_MR_PROCDLY			(1 << 4)
+#define SHA_MR_ALGO_SHA1		(0 << 8)
+#define SHA_MR_ALGO_SHA256		(1 << 8)
+
+#define SHA_IER				0x10
+#define SHA_IDR				0x14
+#define SHA_IMR				0x18
+#define SHA_ISR				0x1C
+#define SHA_INT_DATARDY			(1 << 0)
+#define SHA_INT_ENDTX			(1 << 1)
+#define SHA_INT_TXBUFE			(1 << 2)
+#define SHA_INT_URAD			(1 << 8)
+#define SHA_ISR_URAT_MASK		(0x7 << 12)
+#define SHA_ISR_URAT_IDR		(0x0 << 12)
+#define SHA_ISR_URAT_ODR		(0x1 << 12)
+#define SHA_ISR_URAT_MR			(0x2 << 12)
+#define SHA_ISR_URAT_WO			(0x5 << 12)
+
+#define SHA_TPR				0x108
+#define SHA_TCR				0x10C
+#define SHA_TNPR			0x118
+#define SHA_TNCR			0x11C
+#define SHA_PTCR			0x120
+#define SHA_PTCR_TXTEN		(1 << 8)
+#define SHA_PTCR_TXTDIS		(1 << 9)
+#define SHA_PTSR			0x124
+#define SHA_PTSR_TXTEN		(1 << 8)
+
+#endif /* __ATMEL_SHA_REGS_H__ */
diff --git a/drivers/crypto/atmel-sha.c b/drivers/crypto/atmel-sha.c
new file mode 100644
index 0000000000000000000000000000000000000000..f938b9d79b662357a3df324ee325bcd124189906
--- /dev/null
+++ b/drivers/crypto/atmel-sha.c
@@ -0,0 +1,1112 @@
+/*
+ * Cryptographic API.
+ *
+ * Support for ATMEL SHA1/SHA256 HW acceleration.
+ *
+ * Copyright (c) 2012 Eukréa Electromatique - ATMEL
+ * Author: Nicolas Royer <nicolas@eukrea.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation.
+ *
+ * Some ideas are from omap-sham.c drivers.
+ */
+
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/err.h>
+#include <linux/clk.h>
+#include <linux/io.h>
+#include <linux/hw_random.h>
+#include <linux/platform_device.h>
+
+#include <linux/device.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/errno.h>
+#include <linux/interrupt.h>
+#include <linux/kernel.h>
+#include <linux/clk.h>
+#include <linux/irq.h>
+#include <linux/io.h>
+#include <linux/platform_device.h>
+#include <linux/scatterlist.h>
+#include <linux/dma-mapping.h>
+#include <linux/delay.h>
+#include <linux/crypto.h>
+#include <linux/cryptohash.h>
+#include <crypto/scatterwalk.h>
+#include <crypto/algapi.h>
+#include <crypto/sha.h>
+#include <crypto/hash.h>
+#include <crypto/internal/hash.h>
+#include "atmel-sha-regs.h"
+
+/* SHA flags */
+#define SHA_FLAGS_BUSY			BIT(0)
+#define	SHA_FLAGS_FINAL			BIT(1)
+#define SHA_FLAGS_DMA_ACTIVE	BIT(2)
+#define SHA_FLAGS_OUTPUT_READY	BIT(3)
+#define SHA_FLAGS_INIT			BIT(4)
+#define SHA_FLAGS_CPU			BIT(5)
+#define SHA_FLAGS_DMA_READY		BIT(6)
+
+#define SHA_FLAGS_FINUP		BIT(16)
+#define SHA_FLAGS_SG		BIT(17)
+#define SHA_FLAGS_SHA1		BIT(18)
+#define SHA_FLAGS_SHA256	BIT(19)
+#define SHA_FLAGS_ERROR		BIT(20)
+#define SHA_FLAGS_PAD		BIT(21)
+
+#define SHA_FLAGS_DUALBUFF	BIT(24)
+
+#define SHA_OP_UPDATE	1
+#define SHA_OP_FINAL	2
+
+#define SHA_BUFFER_LEN		PAGE_SIZE
+
+#define ATMEL_SHA_DMA_THRESHOLD		56
+
+
+struct atmel_sha_dev;
+
+struct atmel_sha_reqctx {
+	struct atmel_sha_dev	*dd;
+	unsigned long	flags;
+	unsigned long	op;
+
+	u8	digest[SHA256_DIGEST_SIZE] __aligned(sizeof(u32));
+	size_t	digcnt;
+	size_t	bufcnt;
+	size_t	buflen;
+	dma_addr_t	dma_addr;
+
+	/* walk state */
+	struct scatterlist	*sg;
+	unsigned int	offset;	/* offset in current sg */
+	unsigned int	total;	/* total request */
+
+	u8	buffer[0] __aligned(sizeof(u32));
+};
+
+struct atmel_sha_ctx {
+	struct atmel_sha_dev	*dd;
+
+	unsigned long		flags;
+
+	/* fallback stuff */
+	struct crypto_shash	*fallback;
+
+};
+
+#define ATMEL_SHA_QUEUE_LENGTH	1
+
+struct atmel_sha_dev {
+	struct list_head	list;
+	unsigned long		phys_base;
+	struct device		*dev;
+	struct clk			*iclk;
+	int					irq;
+	void __iomem		*io_base;
+
+	spinlock_t		lock;
+	int			err;
+	struct tasklet_struct	done_task;
+
+	unsigned long		flags;
+	struct crypto_queue	queue;
+	struct ahash_request	*req;
+};
+
+struct atmel_sha_drv {
+	struct list_head	dev_list;
+	spinlock_t		lock;
+};
+
+static struct atmel_sha_drv atmel_sha = {
+	.dev_list = LIST_HEAD_INIT(atmel_sha.dev_list),
+	.lock = __SPIN_LOCK_UNLOCKED(atmel_sha.lock),
+};
+
+static inline u32 atmel_sha_read(struct atmel_sha_dev *dd, u32 offset)
+{
+	return readl_relaxed(dd->io_base + offset);
+}
+
+static inline void atmel_sha_write(struct atmel_sha_dev *dd,
+					u32 offset, u32 value)
+{
+	writel_relaxed(value, dd->io_base + offset);
+}
+
+static void atmel_sha_dualbuff_test(struct atmel_sha_dev *dd)
+{
+	atmel_sha_write(dd, SHA_MR, SHA_MR_DUALBUFF);
+
+	if (atmel_sha_read(dd, SHA_MR) & SHA_MR_DUALBUFF)
+		dd->flags |= SHA_FLAGS_DUALBUFF;
+}
+
+static size_t atmel_sha_append_sg(struct atmel_sha_reqctx *ctx)
+{
+	size_t count;
+
+	while ((ctx->bufcnt < ctx->buflen) && ctx->total) {
+		count = min(ctx->sg->length - ctx->offset, ctx->total);
+		count = min(count, ctx->buflen - ctx->bufcnt);
+
+		if (count <= 0)
+			break;
+
+		scatterwalk_map_and_copy(ctx->buffer + ctx->bufcnt, ctx->sg,
+			ctx->offset, count, 0);
+
+		ctx->bufcnt += count;
+		ctx->offset += count;
+		ctx->total -= count;
+
+		if (ctx->offset == ctx->sg->length) {
+			ctx->sg = sg_next(ctx->sg);
+			if (ctx->sg)
+				ctx->offset = 0;
+			else
+				ctx->total = 0;
+		}
+	}
+
+	return 0;
+}
+
+/*
+ * The purpose of this padding is to ensure that the padded message
+ * is a multiple of 512 bits. The bit "1" is appended at the end of
+ * the message followed by "padlen-1" zero bits. Then a 64 bits block
+ * equals to the message length in bits is appended.
+ *
+ * padlen is calculated as followed:
+ *  - if message length < 56 bytes then padlen = 56 - message length
+ *  - else padlen = 64 + 56 - message length
+ */
+static void atmel_sha_fill_padding(struct atmel_sha_reqctx *ctx, int length)
+{
+	unsigned int index, padlen;
+	u64 bits;
+	u64 size;
+
+	bits = (ctx->bufcnt + ctx->digcnt + length) << 3;
+	size = cpu_to_be64(bits);
+
+	index = ctx->bufcnt & 0x3f;
+	padlen = (index < 56) ? (56 - index) : ((64+56) - index);
+	*(ctx->buffer + ctx->bufcnt) = 0x80;
+	memset(ctx->buffer + ctx->bufcnt + 1, 0, padlen-1);
+	memcpy(ctx->buffer + ctx->bufcnt + padlen, &size, 8);
+	ctx->bufcnt += padlen + 8;
+	ctx->flags |= SHA_FLAGS_PAD;
+}
+
+static int atmel_sha_init(struct ahash_request *req)
+{
+	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+	struct atmel_sha_ctx *tctx = crypto_ahash_ctx(tfm);
+	struct atmel_sha_reqctx *ctx = ahash_request_ctx(req);
+	struct atmel_sha_dev *dd = NULL;
+	struct atmel_sha_dev *tmp;
+
+	spin_lock_bh(&atmel_sha.lock);
+	if (!tctx->dd) {
+		list_for_each_entry(tmp, &atmel_sha.dev_list, list) {
+			dd = tmp;
+			break;
+		}
+		tctx->dd = dd;
+	} else {
+		dd = tctx->dd;
+	}
+
+	spin_unlock_bh(&atmel_sha.lock);
+
+	ctx->dd = dd;
+
+	ctx->flags = 0;
+
+	dev_dbg(dd->dev, "init: digest size: %d\n",
+		crypto_ahash_digestsize(tfm));
+
+	if (crypto_ahash_digestsize(tfm) == SHA1_DIGEST_SIZE)
+		ctx->flags |= SHA_FLAGS_SHA1;
+	else if (crypto_ahash_digestsize(tfm) == SHA256_DIGEST_SIZE)
+		ctx->flags |= SHA_FLAGS_SHA256;
+
+	ctx->bufcnt = 0;
+	ctx->digcnt = 0;
+	ctx->buflen = SHA_BUFFER_LEN;
+
+	return 0;
+}
+
+static void atmel_sha_write_ctrl(struct atmel_sha_dev *dd, int dma)
+{
+	struct atmel_sha_reqctx *ctx = ahash_request_ctx(dd->req);
+	u32 valcr = 0, valmr = SHA_MR_MODE_AUTO;
+
+	if (likely(dma)) {
+		atmel_sha_write(dd, SHA_IER, SHA_INT_TXBUFE);
+		valmr = SHA_MR_MODE_PDC;
+		if (dd->flags & SHA_FLAGS_DUALBUFF)
+			valmr = SHA_MR_DUALBUFF;
+	} else {
+		atmel_sha_write(dd, SHA_IER, SHA_INT_DATARDY);
+	}
+
+	if (ctx->flags & SHA_FLAGS_SHA256)
+		valmr |= SHA_MR_ALGO_SHA256;
+
+	/* Setting CR_FIRST only for the first iteration */
+	if (!ctx->digcnt)
+		valcr = SHA_CR_FIRST;
+
+	atmel_sha_write(dd, SHA_CR, valcr);
+	atmel_sha_write(dd, SHA_MR, valmr);
+}
+
+static int atmel_sha_xmit_cpu(struct atmel_sha_dev *dd, const u8 *buf,
+			      size_t length, int final)
+{
+	struct atmel_sha_reqctx *ctx = ahash_request_ctx(dd->req);
+	int count, len32;
+	const u32 *buffer = (const u32 *)buf;
+
+	dev_dbg(dd->dev, "xmit_cpu: digcnt: %d, length: %d, final: %d\n",
+						ctx->digcnt, length, final);
+
+	atmel_sha_write_ctrl(dd, 0);
+
+	/* should be non-zero before next lines to disable clocks later */
+	ctx->digcnt += length;
+
+	if (final)
+		dd->flags |= SHA_FLAGS_FINAL; /* catch last interrupt */
+
+	len32 = DIV_ROUND_UP(length, sizeof(u32));
+
+	dd->flags |= SHA_FLAGS_CPU;
+
+	for (count = 0; count < len32; count++)
+		atmel_sha_write(dd, SHA_REG_DIN(count), buffer[count]);
+
+	return -EINPROGRESS;
+}
+
+static int atmel_sha_xmit_pdc(struct atmel_sha_dev *dd, dma_addr_t dma_addr1,
+		size_t length1, dma_addr_t dma_addr2, size_t length2, int final)
+{
+	struct atmel_sha_reqctx *ctx = ahash_request_ctx(dd->req);
+	int len32;
+
+	dev_dbg(dd->dev, "xmit_pdc: digcnt: %d, length: %d, final: %d\n",
+						ctx->digcnt, length1, final);
+
+	len32 = DIV_ROUND_UP(length1, sizeof(u32));
+	atmel_sha_write(dd, SHA_PTCR, SHA_PTCR_TXTDIS);
+	atmel_sha_write(dd, SHA_TPR, dma_addr1);
+	atmel_sha_write(dd, SHA_TCR, len32);
+
+	len32 = DIV_ROUND_UP(length2, sizeof(u32));
+	atmel_sha_write(dd, SHA_TNPR, dma_addr2);
+	atmel_sha_write(dd, SHA_TNCR, len32);
+
+	atmel_sha_write_ctrl(dd, 1);
+
+	/* should be non-zero before next lines to disable clocks later */
+	ctx->digcnt += length1;
+
+	if (final)
+		dd->flags |= SHA_FLAGS_FINAL; /* catch last interrupt */
+
+	dd->flags |=  SHA_FLAGS_DMA_ACTIVE;
+
+	/* Start DMA transfer */
+	atmel_sha_write(dd, SHA_PTCR, SHA_PTCR_TXTEN);
+
+	return -EINPROGRESS;
+}
+
+static int atmel_sha_update_cpu(struct atmel_sha_dev *dd)
+{
+	struct atmel_sha_reqctx *ctx = ahash_request_ctx(dd->req);
+	int bufcnt;
+
+	atmel_sha_append_sg(ctx);
+	atmel_sha_fill_padding(ctx, 0);
+
+	bufcnt = ctx->bufcnt;
+	ctx->bufcnt = 0;
+
+	return atmel_sha_xmit_cpu(dd, ctx->buffer, bufcnt, 1);
+}
+
+static int atmel_sha_xmit_dma_map(struct atmel_sha_dev *dd,
+					struct atmel_sha_reqctx *ctx,
+					size_t length, int final)
+{
+	ctx->dma_addr = dma_map_single(dd->dev, ctx->buffer,
+				ctx->buflen + SHA1_BLOCK_SIZE, DMA_TO_DEVICE);
+	if (dma_mapping_error(dd->dev, ctx->dma_addr)) {
+		dev_err(dd->dev, "dma %u bytes error\n", ctx->buflen +
+				SHA1_BLOCK_SIZE);
+		return -EINVAL;
+	}
+
+	ctx->flags &= ~SHA_FLAGS_SG;
+
+	/* next call does not fail... so no unmap in the case of error */
+	return atmel_sha_xmit_pdc(dd, ctx->dma_addr, length, 0, 0, final);
+}
+
+static int atmel_sha_update_dma_slow(struct atmel_sha_dev *dd)
+{
+	struct atmel_sha_reqctx *ctx = ahash_request_ctx(dd->req);
+	unsigned int final;
+	size_t count;
+
+	atmel_sha_append_sg(ctx);
+
+	final = (ctx->flags & SHA_FLAGS_FINUP) && !ctx->total;
+
+	dev_dbg(dd->dev, "slow: bufcnt: %u, digcnt: %d, final: %d\n",
+					 ctx->bufcnt, ctx->digcnt, final);
+
+	if (final)
+		atmel_sha_fill_padding(ctx, 0);
+
+	if (final || (ctx->bufcnt == ctx->buflen && ctx->total)) {
+		count = ctx->bufcnt;
+		ctx->bufcnt = 0;
+		return atmel_sha_xmit_dma_map(dd, ctx, count, final);
+	}
+
+	return 0;
+}
+
+static int atmel_sha_update_dma_start(struct atmel_sha_dev *dd)
+{
+	struct atmel_sha_reqctx *ctx = ahash_request_ctx(dd->req);
+	unsigned int length, final, tail;
+	struct scatterlist *sg;
+	unsigned int count;
+
+	if (!ctx->total)
+		return 0;
+
+	if (ctx->bufcnt || ctx->offset)
+		return atmel_sha_update_dma_slow(dd);
+
+	dev_dbg(dd->dev, "fast: digcnt: %d, bufcnt: %u, total: %u\n",
+			ctx->digcnt, ctx->bufcnt, ctx->total);
+
+	sg = ctx->sg;
+
+	if (!IS_ALIGNED(sg->offset, sizeof(u32)))
+		return atmel_sha_update_dma_slow(dd);
+
+	if (!sg_is_last(sg) && !IS_ALIGNED(sg->length, SHA1_BLOCK_SIZE))
+		/* size is not SHA1_BLOCK_SIZE aligned */
+		return atmel_sha_update_dma_slow(dd);
+
+	length = min(ctx->total, sg->length);
+
+	if (sg_is_last(sg)) {
+		if (!(ctx->flags & SHA_FLAGS_FINUP)) {
+			/* not last sg must be SHA1_BLOCK_SIZE aligned */
+			tail = length & (SHA1_BLOCK_SIZE - 1);
+			length -= tail;
+			if (length == 0) {
+				/* offset where to start slow */
+				ctx->offset = length;
+				return atmel_sha_update_dma_slow(dd);
+			}
+		}
+	}
+
+	ctx->total -= length;
+	ctx->offset = length; /* offset where to start slow */
+
+	final = (ctx->flags & SHA_FLAGS_FINUP) && !ctx->total;
+
+	/* Add padding */
+	if (final) {
+		tail = length & (SHA1_BLOCK_SIZE - 1);
+		length -= tail;
+		ctx->total += tail;
+		ctx->offset = length; /* offset where to start slow */
+
+		sg = ctx->sg;
+		atmel_sha_append_sg(ctx);
+
+		atmel_sha_fill_padding(ctx, length);
+
+		ctx->dma_addr = dma_map_single(dd->dev, ctx->buffer,
+			ctx->buflen + SHA1_BLOCK_SIZE, DMA_TO_DEVICE);
+		if (dma_mapping_error(dd->dev, ctx->dma_addr)) {
+			dev_err(dd->dev, "dma %u bytes error\n",
+				ctx->buflen + SHA1_BLOCK_SIZE);
+			return -EINVAL;
+		}
+
+		if (length == 0) {
+			ctx->flags &= ~SHA_FLAGS_SG;
+			count = ctx->bufcnt;
+			ctx->bufcnt = 0;
+			return atmel_sha_xmit_pdc(dd, ctx->dma_addr, count, 0,
+					0, final);
+		} else {
+			ctx->sg = sg;
+			if (!dma_map_sg(dd->dev, ctx->sg, 1,
+				DMA_TO_DEVICE)) {
+					dev_err(dd->dev, "dma_map_sg  error\n");
+					return -EINVAL;
+			}
+
+			ctx->flags |= SHA_FLAGS_SG;
+
+			count = ctx->bufcnt;
+			ctx->bufcnt = 0;
+			return atmel_sha_xmit_pdc(dd, sg_dma_address(ctx->sg),
+					length, ctx->dma_addr, count, final);
+		}
+	}
+
+	if (!dma_map_sg(dd->dev, ctx->sg, 1, DMA_TO_DEVICE)) {
+		dev_err(dd->dev, "dma_map_sg  error\n");
+		return -EINVAL;
+	}
+
+	ctx->flags |= SHA_FLAGS_SG;
+
+	/* next call does not fail... so no unmap in the case of error */
+	return atmel_sha_xmit_pdc(dd, sg_dma_address(ctx->sg), length, 0,
+								0, final);
+}
+
+static int atmel_sha_update_dma_stop(struct atmel_sha_dev *dd)
+{
+	struct atmel_sha_reqctx *ctx = ahash_request_ctx(dd->req);
+
+	if (ctx->flags & SHA_FLAGS_SG) {
+		dma_unmap_sg(dd->dev, ctx->sg, 1, DMA_TO_DEVICE);
+		if (ctx->sg->length == ctx->offset) {
+			ctx->sg = sg_next(ctx->sg);
+			if (ctx->sg)
+				ctx->offset = 0;
+		}
+		if (ctx->flags & SHA_FLAGS_PAD)
+			dma_unmap_single(dd->dev, ctx->dma_addr,
+				ctx->buflen + SHA1_BLOCK_SIZE, DMA_TO_DEVICE);
+	} else {
+		dma_unmap_single(dd->dev, ctx->dma_addr, ctx->buflen +
+						SHA1_BLOCK_SIZE, DMA_TO_DEVICE);
+	}
+
+	return 0;
+}
+
+static int atmel_sha_update_req(struct atmel_sha_dev *dd)
+{
+	struct ahash_request *req = dd->req;
+	struct atmel_sha_reqctx *ctx = ahash_request_ctx(req);
+	int err;
+
+	dev_dbg(dd->dev, "update_req: total: %u, digcnt: %d, finup: %d\n",
+		 ctx->total, ctx->digcnt, (ctx->flags & SHA_FLAGS_FINUP) != 0);
+
+	if (ctx->flags & SHA_FLAGS_CPU)
+		err = atmel_sha_update_cpu(dd);
+	else
+		err = atmel_sha_update_dma_start(dd);
+
+	/* wait for dma completion before can take more data */
+	dev_dbg(dd->dev, "update: err: %d, digcnt: %d\n",
+			err, ctx->digcnt);
+
+	return err;
+}
+
+static int atmel_sha_final_req(struct atmel_sha_dev *dd)
+{
+	struct ahash_request *req = dd->req;
+	struct atmel_sha_reqctx *ctx = ahash_request_ctx(req);
+	int err = 0;
+	int count;
+
+	if (ctx->bufcnt >= ATMEL_SHA_DMA_THRESHOLD) {
+		atmel_sha_fill_padding(ctx, 0);
+		count = ctx->bufcnt;
+		ctx->bufcnt = 0;
+		err = atmel_sha_xmit_dma_map(dd, ctx, count, 1);
+	}
+	/* faster to handle last block with cpu */
+	else {
+		atmel_sha_fill_padding(ctx, 0);
+		count = ctx->bufcnt;
+		ctx->bufcnt = 0;
+		err = atmel_sha_xmit_cpu(dd, ctx->buffer, count, 1);
+	}
+
+	dev_dbg(dd->dev, "final_req: err: %d\n", err);
+
+	return err;
+}
+
+static void atmel_sha_copy_hash(struct ahash_request *req)
+{
+	struct atmel_sha_reqctx *ctx = ahash_request_ctx(req);
+	u32 *hash = (u32 *)ctx->digest;
+	int i;
+
+	if (likely(ctx->flags & SHA_FLAGS_SHA1))
+		for (i = 0; i < SHA1_DIGEST_SIZE / sizeof(u32); i++)
+			hash[i] = atmel_sha_read(ctx->dd, SHA_REG_DIGEST(i));
+	else
+		for (i = 0; i < SHA256_DIGEST_SIZE / sizeof(u32); i++)
+			hash[i] = atmel_sha_read(ctx->dd, SHA_REG_DIGEST(i));
+}
+
+static void atmel_sha_copy_ready_hash(struct ahash_request *req)
+{
+	struct atmel_sha_reqctx *ctx = ahash_request_ctx(req);
+
+	if (!req->result)
+		return;
+
+	if (likely(ctx->flags & SHA_FLAGS_SHA1))
+		memcpy(req->result, ctx->digest, SHA1_DIGEST_SIZE);
+	else
+		memcpy(req->result, ctx->digest, SHA256_DIGEST_SIZE);
+}
+
+static int atmel_sha_finish(struct ahash_request *req)
+{
+	struct atmel_sha_reqctx *ctx = ahash_request_ctx(req);
+	struct atmel_sha_dev *dd = ctx->dd;
+	int err = 0;
+
+	if (ctx->digcnt)
+		atmel_sha_copy_ready_hash(req);
+
+	dev_dbg(dd->dev, "digcnt: %d, bufcnt: %d\n", ctx->digcnt,
+		ctx->bufcnt);
+
+	return err;
+}
+
+static void atmel_sha_finish_req(struct ahash_request *req, int err)
+{
+	struct atmel_sha_reqctx *ctx = ahash_request_ctx(req);
+	struct atmel_sha_dev *dd = ctx->dd;
+
+	if (!err) {
+		atmel_sha_copy_hash(req);
+		if (SHA_FLAGS_FINAL & dd->flags)
+			err = atmel_sha_finish(req);
+	} else {
+		ctx->flags |= SHA_FLAGS_ERROR;
+	}
+
+	/* atomic operation is not needed here */
+	dd->flags &= ~(SHA_FLAGS_BUSY | SHA_FLAGS_FINAL | SHA_FLAGS_CPU |
+			SHA_FLAGS_DMA_READY | SHA_FLAGS_OUTPUT_READY);
+
+	clk_disable_unprepare(dd->iclk);
+
+	if (req->base.complete)
+		req->base.complete(&req->base, err);
+
+	/* handle new request */
+	tasklet_schedule(&dd->done_task);
+}
+
+static int atmel_sha_hw_init(struct atmel_sha_dev *dd)
+{
+	clk_prepare_enable(dd->iclk);
+
+	if (SHA_FLAGS_INIT & dd->flags) {
+		atmel_sha_write(dd, SHA_CR, SHA_CR_SWRST);
+		atmel_sha_dualbuff_test(dd);
+		dd->flags |= SHA_FLAGS_INIT;
+		dd->err = 0;
+	}
+
+	return 0;
+}
+
+static int atmel_sha_handle_queue(struct atmel_sha_dev *dd,
+				  struct ahash_request *req)
+{
+	struct crypto_async_request *async_req, *backlog;
+	struct atmel_sha_reqctx *ctx;
+	unsigned long flags;
+	int err = 0, ret = 0;
+
+	spin_lock_irqsave(&dd->lock, flags);
+	if (req)
+		ret = ahash_enqueue_request(&dd->queue, req);
+
+	if (SHA_FLAGS_BUSY & dd->flags) {
+		spin_unlock_irqrestore(&dd->lock, flags);
+		return ret;
+	}
+
+	backlog = crypto_get_backlog(&dd->queue);
+	async_req = crypto_dequeue_request(&dd->queue);
+	if (async_req)
+		dd->flags |= SHA_FLAGS_BUSY;
+
+	spin_unlock_irqrestore(&dd->lock, flags);
+
+	if (!async_req)
+		return ret;
+
+	if (backlog)
+		backlog->complete(backlog, -EINPROGRESS);
+
+	req = ahash_request_cast(async_req);
+	dd->req = req;
+	ctx = ahash_request_ctx(req);
+
+	dev_dbg(dd->dev, "handling new req, op: %lu, nbytes: %d\n",
+						ctx->op, req->nbytes);
+
+	err = atmel_sha_hw_init(dd);
+
+	if (err)
+		goto err1;
+
+	if (ctx->op == SHA_OP_UPDATE) {
+		err = atmel_sha_update_req(dd);
+		if (err != -EINPROGRESS && (ctx->flags & SHA_FLAGS_FINUP)) {
+			/* no final() after finup() */
+			err = atmel_sha_final_req(dd);
+		}
+	} else if (ctx->op == SHA_OP_FINAL) {
+		err = atmel_sha_final_req(dd);
+	}
+
+err1:
+	if (err != -EINPROGRESS)
+		/* done_task will not finish it, so do it here */
+		atmel_sha_finish_req(req, err);
+
+	dev_dbg(dd->dev, "exit, err: %d\n", err);
+
+	return ret;
+}
+
+static int atmel_sha_enqueue(struct ahash_request *req, unsigned int op)
+{
+	struct atmel_sha_reqctx *ctx = ahash_request_ctx(req);
+	struct atmel_sha_ctx *tctx = crypto_tfm_ctx(req->base.tfm);
+	struct atmel_sha_dev *dd = tctx->dd;
+
+	ctx->op = op;
+
+	return atmel_sha_handle_queue(dd, req);
+}
+
+static int atmel_sha_update(struct ahash_request *req)
+{
+	struct atmel_sha_reqctx *ctx = ahash_request_ctx(req);
+
+	if (!req->nbytes)
+		return 0;
+
+	ctx->total = req->nbytes;
+	ctx->sg = req->src;
+	ctx->offset = 0;
+
+	if (ctx->flags & SHA_FLAGS_FINUP) {
+		if (ctx->bufcnt + ctx->total < ATMEL_SHA_DMA_THRESHOLD)
+			/* faster to use CPU for short transfers */
+			ctx->flags |= SHA_FLAGS_CPU;
+	} else if (ctx->bufcnt + ctx->total < ctx->buflen) {
+		atmel_sha_append_sg(ctx);
+		return 0;
+	}
+	return atmel_sha_enqueue(req, SHA_OP_UPDATE);
+}
+
+static int atmel_sha_final(struct ahash_request *req)
+{
+	struct atmel_sha_reqctx *ctx = ahash_request_ctx(req);
+	struct atmel_sha_ctx *tctx = crypto_tfm_ctx(req->base.tfm);
+	struct atmel_sha_dev *dd = tctx->dd;
+
+	int err = 0;
+
+	ctx->flags |= SHA_FLAGS_FINUP;
+
+	if (ctx->flags & SHA_FLAGS_ERROR)
+		return 0; /* uncompleted hash is not needed */
+
+	if (ctx->bufcnt) {
+		return atmel_sha_enqueue(req, SHA_OP_FINAL);
+	} else if (!(ctx->flags & SHA_FLAGS_PAD)) { /* add padding */
+		err = atmel_sha_hw_init(dd);
+		if (err)
+			goto err1;
+
+		dd->flags |= SHA_FLAGS_BUSY;
+		err = atmel_sha_final_req(dd);
+	} else {
+		/* copy ready hash (+ finalize hmac) */
+		return atmel_sha_finish(req);
+	}
+
+err1:
+	if (err != -EINPROGRESS)
+		/* done_task will not finish it, so do it here */
+		atmel_sha_finish_req(req, err);
+
+	return err;
+}
+
+static int atmel_sha_finup(struct ahash_request *req)
+{
+	struct atmel_sha_reqctx *ctx = ahash_request_ctx(req);
+	int err1, err2;
+
+	ctx->flags |= SHA_FLAGS_FINUP;
+
+	err1 = atmel_sha_update(req);
+	if (err1 == -EINPROGRESS || err1 == -EBUSY)
+		return err1;
+
+	/*
+	 * final() has to be always called to cleanup resources
+	 * even if udpate() failed, except EINPROGRESS
+	 */
+	err2 = atmel_sha_final(req);
+
+	return err1 ?: err2;
+}
+
+static int atmel_sha_digest(struct ahash_request *req)
+{
+	return atmel_sha_init(req) ?: atmel_sha_finup(req);
+}
+
+static int atmel_sha_cra_init_alg(struct crypto_tfm *tfm, const char *alg_base)
+{
+	struct atmel_sha_ctx *tctx = crypto_tfm_ctx(tfm);
+	const char *alg_name = crypto_tfm_alg_name(tfm);
+
+	/* Allocate a fallback and abort if it failed. */
+	tctx->fallback = crypto_alloc_shash(alg_name, 0,
+					    CRYPTO_ALG_NEED_FALLBACK);
+	if (IS_ERR(tctx->fallback)) {
+		pr_err("atmel-sha: fallback driver '%s' could not be loaded.\n",
+				alg_name);
+		return PTR_ERR(tctx->fallback);
+	}
+	crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm),
+				 sizeof(struct atmel_sha_reqctx) +
+				 SHA_BUFFER_LEN + SHA256_BLOCK_SIZE);
+
+	return 0;
+}
+
+static int atmel_sha_cra_init(struct crypto_tfm *tfm)
+{
+	return atmel_sha_cra_init_alg(tfm, NULL);
+}
+
+static void atmel_sha_cra_exit(struct crypto_tfm *tfm)
+{
+	struct atmel_sha_ctx *tctx = crypto_tfm_ctx(tfm);
+
+	crypto_free_shash(tctx->fallback);
+	tctx->fallback = NULL;
+}
+
+static struct ahash_alg sha_algs[] = {
+{
+	.init		= atmel_sha_init,
+	.update		= atmel_sha_update,
+	.final		= atmel_sha_final,
+	.finup		= atmel_sha_finup,
+	.digest		= atmel_sha_digest,
+	.halg = {
+		.digestsize	= SHA1_DIGEST_SIZE,
+		.base	= {
+			.cra_name		= "sha1",
+			.cra_driver_name	= "atmel-sha1",
+			.cra_priority		= 100,
+			.cra_flags		= CRYPTO_ALG_ASYNC |
+						CRYPTO_ALG_NEED_FALLBACK,
+			.cra_blocksize		= SHA1_BLOCK_SIZE,
+			.cra_ctxsize		= sizeof(struct atmel_sha_ctx),
+			.cra_alignmask		= 0,
+			.cra_module		= THIS_MODULE,
+			.cra_init		= atmel_sha_cra_init,
+			.cra_exit		= atmel_sha_cra_exit,
+		}
+	}
+},
+{
+	.init		= atmel_sha_init,
+	.update		= atmel_sha_update,
+	.final		= atmel_sha_final,
+	.finup		= atmel_sha_finup,
+	.digest		= atmel_sha_digest,
+	.halg = {
+		.digestsize	= SHA256_DIGEST_SIZE,
+		.base	= {
+			.cra_name		= "sha256",
+			.cra_driver_name	= "atmel-sha256",
+			.cra_priority		= 100,
+			.cra_flags		= CRYPTO_ALG_ASYNC |
+						CRYPTO_ALG_NEED_FALLBACK,
+			.cra_blocksize		= SHA256_BLOCK_SIZE,
+			.cra_ctxsize		= sizeof(struct atmel_sha_ctx),
+			.cra_alignmask		= 0,
+			.cra_module		= THIS_MODULE,
+			.cra_init		= atmel_sha_cra_init,
+			.cra_exit		= atmel_sha_cra_exit,
+		}
+	}
+},
+};
+
+static void atmel_sha_done_task(unsigned long data)
+{
+	struct atmel_sha_dev *dd = (struct atmel_sha_dev *)data;
+	int err = 0;
+
+	if (!(SHA_FLAGS_BUSY & dd->flags)) {
+		atmel_sha_handle_queue(dd, NULL);
+		return;
+	}
+
+	if (SHA_FLAGS_CPU & dd->flags) {
+		if (SHA_FLAGS_OUTPUT_READY & dd->flags) {
+			dd->flags &= ~SHA_FLAGS_OUTPUT_READY;
+			goto finish;
+		}
+	} else if (SHA_FLAGS_DMA_READY & dd->flags) {
+		if (SHA_FLAGS_DMA_ACTIVE & dd->flags) {
+			dd->flags &= ~SHA_FLAGS_DMA_ACTIVE;
+			atmel_sha_update_dma_stop(dd);
+			if (dd->err) {
+				err = dd->err;
+				goto finish;
+			}
+		}
+		if (SHA_FLAGS_OUTPUT_READY & dd->flags) {
+			/* hash or semi-hash ready */
+			dd->flags &= ~(SHA_FLAGS_DMA_READY |
+						SHA_FLAGS_OUTPUT_READY);
+			err = atmel_sha_update_dma_start(dd);
+			if (err != -EINPROGRESS)
+				goto finish;
+		}
+	}
+	return;
+
+finish:
+	/* finish curent request */
+	atmel_sha_finish_req(dd->req, err);
+}
+
+static irqreturn_t atmel_sha_irq(int irq, void *dev_id)
+{
+	struct atmel_sha_dev *sha_dd = dev_id;
+	u32 reg;
+
+	reg = atmel_sha_read(sha_dd, SHA_ISR);
+	if (reg & atmel_sha_read(sha_dd, SHA_IMR)) {
+		atmel_sha_write(sha_dd, SHA_IDR, reg);
+		if (SHA_FLAGS_BUSY & sha_dd->flags) {
+			sha_dd->flags |= SHA_FLAGS_OUTPUT_READY;
+			if (!(SHA_FLAGS_CPU & sha_dd->flags))
+				sha_dd->flags |= SHA_FLAGS_DMA_READY;
+			tasklet_schedule(&sha_dd->done_task);
+		} else {
+			dev_warn(sha_dd->dev, "SHA interrupt when no active requests.\n");
+		}
+		return IRQ_HANDLED;
+	}
+
+	return IRQ_NONE;
+}
+
+static void atmel_sha_unregister_algs(struct atmel_sha_dev *dd)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(sha_algs); i++)
+		crypto_unregister_ahash(&sha_algs[i]);
+}
+
+static int atmel_sha_register_algs(struct atmel_sha_dev *dd)
+{
+	int err, i, j;
+
+	for (i = 0; i < ARRAY_SIZE(sha_algs); i++) {
+		err = crypto_register_ahash(&sha_algs[i]);
+		if (err)
+			goto err_sha_algs;
+	}
+
+	return 0;
+
+err_sha_algs:
+	for (j = 0; j < i; j++)
+		crypto_unregister_ahash(&sha_algs[j]);
+
+	return err;
+}
+
+static int __devinit atmel_sha_probe(struct platform_device *pdev)
+{
+	struct atmel_sha_dev *sha_dd;
+	struct device *dev = &pdev->dev;
+	struct resource *sha_res;
+	unsigned long sha_phys_size;
+	int err;
+
+	sha_dd = kzalloc(sizeof(struct atmel_sha_dev), GFP_KERNEL);
+	if (sha_dd == NULL) {
+		dev_err(dev, "unable to alloc data struct.\n");
+		err = -ENOMEM;
+		goto sha_dd_err;
+	}
+
+	sha_dd->dev = dev;
+
+	platform_set_drvdata(pdev, sha_dd);
+
+	INIT_LIST_HEAD(&sha_dd->list);
+
+	tasklet_init(&sha_dd->done_task, atmel_sha_done_task,
+					(unsigned long)sha_dd);
+
+	crypto_init_queue(&sha_dd->queue, ATMEL_SHA_QUEUE_LENGTH);
+
+	sha_dd->irq = -1;
+
+	/* Get the base address */
+	sha_res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (!sha_res) {
+		dev_err(dev, "no MEM resource info\n");
+		err = -ENODEV;
+		goto res_err;
+	}
+	sha_dd->phys_base = sha_res->start;
+	sha_phys_size = resource_size(sha_res);
+
+	/* Get the IRQ */
+	sha_dd->irq = platform_get_irq(pdev,  0);
+	if (sha_dd->irq < 0) {
+		dev_err(dev, "no IRQ resource info\n");
+		err = sha_dd->irq;
+		goto res_err;
+	}
+
+	err = request_irq(sha_dd->irq, atmel_sha_irq, IRQF_SHARED, "atmel-sha",
+						sha_dd);
+	if (err) {
+		dev_err(dev, "unable to request sha irq.\n");
+		goto res_err;
+	}
+
+	/* Initializing the clock */
+	sha_dd->iclk = clk_get(&pdev->dev, NULL);
+	if (IS_ERR(sha_dd->iclk)) {
+		dev_err(dev, "clock intialization failed.\n");
+		err = PTR_ERR(sha_dd->iclk);
+		goto clk_err;
+	}
+
+	sha_dd->io_base = ioremap(sha_dd->phys_base, sha_phys_size);
+	if (!sha_dd->io_base) {
+		dev_err(dev, "can't ioremap\n");
+		err = -ENOMEM;
+		goto sha_io_err;
+	}
+
+	spin_lock(&atmel_sha.lock);
+	list_add_tail(&sha_dd->list, &atmel_sha.dev_list);
+	spin_unlock(&atmel_sha.lock);
+
+	err = atmel_sha_register_algs(sha_dd);
+	if (err)
+		goto err_algs;
+
+	dev_info(dev, "Atmel SHA1/SHA256\n");
+
+	return 0;
+
+err_algs:
+	spin_lock(&atmel_sha.lock);
+	list_del(&sha_dd->list);
+	spin_unlock(&atmel_sha.lock);
+	iounmap(sha_dd->io_base);
+sha_io_err:
+	clk_put(sha_dd->iclk);
+clk_err:
+	free_irq(sha_dd->irq, sha_dd);
+res_err:
+	tasklet_kill(&sha_dd->done_task);
+	kfree(sha_dd);
+	sha_dd = NULL;
+sha_dd_err:
+	dev_err(dev, "initialization failed.\n");
+
+	return err;
+}
+
+static int __devexit atmel_sha_remove(struct platform_device *pdev)
+{
+	static struct atmel_sha_dev *sha_dd;
+
+	sha_dd = platform_get_drvdata(pdev);
+	if (!sha_dd)
+		return -ENODEV;
+	spin_lock(&atmel_sha.lock);
+	list_del(&sha_dd->list);
+	spin_unlock(&atmel_sha.lock);
+
+	atmel_sha_unregister_algs(sha_dd);
+
+	tasklet_kill(&sha_dd->done_task);
+
+	iounmap(sha_dd->io_base);
+
+	clk_put(sha_dd->iclk);
+
+	if (sha_dd->irq >= 0)
+		free_irq(sha_dd->irq, sha_dd);
+
+	kfree(sha_dd);
+	sha_dd = NULL;
+
+	return 0;
+}
+
+static struct platform_driver atmel_sha_driver = {
+	.probe		= atmel_sha_probe,
+	.remove		= __devexit_p(atmel_sha_remove),
+	.driver		= {
+		.name	= "atmel_sha",
+		.owner	= THIS_MODULE,
+	},
+};
+
+module_platform_driver(atmel_sha_driver);
+
+MODULE_DESCRIPTION("Atmel SHA1/SHA256 hw acceleration support.");
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Nicolas Royer - Eukréa Electromatique");
diff --git a/drivers/crypto/atmel-tdes-regs.h b/drivers/crypto/atmel-tdes-regs.h
new file mode 100644
index 0000000000000000000000000000000000000000..5ac2a900d80c3ac54e3146b2b464968fb889ebb4
--- /dev/null
+++ b/drivers/crypto/atmel-tdes-regs.h
@@ -0,0 +1,89 @@
+#ifndef __ATMEL_TDES_REGS_H__
+#define __ATMEL_TDES_REGS_H__
+
+#define TDES_CR			0x00
+#define TDES_CR_START			(1 << 0)
+#define TDES_CR_SWRST			(1 << 8)
+#define TDES_CR_LOADSEED		(1 << 16)
+
+#define	TDES_MR			0x04
+#define TDES_MR_CYPHER_DEC		(0 << 0)
+#define TDES_MR_CYPHER_ENC		(1 << 0)
+#define TDES_MR_TDESMOD_MASK	(0x3 << 1)
+#define TDES_MR_TDESMOD_DES		(0x0 << 1)
+#define TDES_MR_TDESMOD_TDES	(0x1 << 1)
+#define TDES_MR_TDESMOD_XTEA	(0x2 << 1)
+#define TDES_MR_KEYMOD_3KEY		(0 << 4)
+#define TDES_MR_KEYMOD_2KEY		(1 << 4)
+#define TDES_MR_SMOD_MASK		(0x3 << 8)
+#define TDES_MR_SMOD_MANUAL		(0x0 << 8)
+#define TDES_MR_SMOD_AUTO		(0x1 << 8)
+#define TDES_MR_SMOD_PDC		(0x2 << 8)
+#define TDES_MR_OPMOD_MASK		(0x3 << 12)
+#define TDES_MR_OPMOD_ECB		(0x0 << 12)
+#define TDES_MR_OPMOD_CBC		(0x1 << 12)
+#define TDES_MR_OPMOD_OFB		(0x2 << 12)
+#define TDES_MR_OPMOD_CFB		(0x3 << 12)
+#define TDES_MR_LOD				(0x1 << 15)
+#define TDES_MR_CFBS_MASK		(0x3 << 16)
+#define TDES_MR_CFBS_64b		(0x0 << 16)
+#define TDES_MR_CFBS_32b		(0x1 << 16)
+#define TDES_MR_CFBS_16b		(0x2 << 16)
+#define TDES_MR_CFBS_8b			(0x3 << 16)
+#define TDES_MR_CKEY_MASK		(0xF << 20)
+#define TDES_MR_CKEY_OFFSET		20
+#define TDES_MR_CTYPE_MASK		(0x3F << 24)
+#define TDES_MR_CTYPE_OFFSET	24
+
+#define	TDES_IER		0x10
+#define	TDES_IDR		0x14
+#define	TDES_IMR		0x18
+#define	TDES_ISR		0x1C
+#define TDES_INT_DATARDY		(1 << 0)
+#define TDES_INT_ENDRX			(1 << 1)
+#define TDES_INT_ENDTX			(1 << 2)
+#define TDES_INT_RXBUFF			(1 << 3)
+#define TDES_INT_TXBUFE			(1 << 4)
+#define TDES_INT_URAD			(1 << 8)
+#define TDES_ISR_URAT_MASK		(0x3 << 12)
+#define TDES_ISR_URAT_IDR		(0x0 << 12)
+#define TDES_ISR_URAT_ODR		(0x1 << 12)
+#define TDES_ISR_URAT_MR		(0x2 << 12)
+#define TDES_ISR_URAT_WO		(0x3 << 12)
+
+
+#define	TDES_KEY1W1R	0x20
+#define	TDES_KEY1W2R	0x24
+#define	TDES_KEY2W1R	0x28
+#define	TDES_KEY2W2R	0x2C
+#define	TDES_KEY3W1R	0x30
+#define	TDES_KEY3W2R	0x34
+#define	TDES_IDATA1R	0x40
+#define	TDES_IDATA2R	0x44
+#define	TDES_ODATA1R	0x50
+#define	TDES_ODATA2R	0x54
+#define	TDES_IV1R		0x60
+#define	TDES_IV2R		0x64
+
+#define	TDES_XTEARNDR	0x70
+#define	TDES_XTEARNDR_XTEA_RNDS_MASK	(0x3F << 0)
+#define	TDES_XTEARNDR_XTEA_RNDS_OFFSET	0
+
+#define TDES_RPR		0x100
+#define TDES_RCR		0x104
+#define TDES_TPR		0x108
+#define TDES_TCR		0x10C
+#define TDES_RNPR		0x118
+#define TDES_RNCR		0x11C
+#define TDES_TNPR		0x118
+#define TDES_TNCR		0x11C
+#define TDES_PTCR		0x120
+#define TDES_PTCR_RXTEN			(1 << 0)
+#define TDES_PTCR_RXTDIS		(1 << 1)
+#define TDES_PTCR_TXTEN			(1 << 8)
+#define TDES_PTCR_TXTDIS		(1 << 9)
+#define TDES_PTSR		0x124
+#define TDES_PTSR_RXTEN			(1 << 0)
+#define TDES_PTSR_TXTEN			(1 << 8)
+
+#endif /* __ATMEL_TDES_REGS_H__ */
diff --git a/drivers/crypto/atmel-tdes.c b/drivers/crypto/atmel-tdes.c
new file mode 100644
index 0000000000000000000000000000000000000000..eb2b61e57e2de394ad211dd661f405aac8be2ef3
--- /dev/null
+++ b/drivers/crypto/atmel-tdes.c
@@ -0,0 +1,1215 @@
+/*
+ * Cryptographic API.
+ *
+ * Support for ATMEL DES/TDES HW acceleration.
+ *
+ * Copyright (c) 2012 Eukréa Electromatique - ATMEL
+ * Author: Nicolas Royer <nicolas@eukrea.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation.
+ *
+ * Some ideas are from omap-aes.c drivers.
+ */
+
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/err.h>
+#include <linux/clk.h>
+#include <linux/io.h>
+#include <linux/hw_random.h>
+#include <linux/platform_device.h>
+
+#include <linux/device.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/errno.h>
+#include <linux/interrupt.h>
+#include <linux/kernel.h>
+#include <linux/clk.h>
+#include <linux/irq.h>
+#include <linux/io.h>
+#include <linux/platform_device.h>
+#include <linux/scatterlist.h>
+#include <linux/dma-mapping.h>
+#include <linux/delay.h>
+#include <linux/crypto.h>
+#include <linux/cryptohash.h>
+#include <crypto/scatterwalk.h>
+#include <crypto/algapi.h>
+#include <crypto/des.h>
+#include <crypto/hash.h>
+#include <crypto/internal/hash.h>
+#include "atmel-tdes-regs.h"
+
+/* TDES flags  */
+#define TDES_FLAGS_MODE_MASK		0x007f
+#define TDES_FLAGS_ENCRYPT	BIT(0)
+#define TDES_FLAGS_CBC		BIT(1)
+#define TDES_FLAGS_CFB		BIT(2)
+#define TDES_FLAGS_CFB8		BIT(3)
+#define TDES_FLAGS_CFB16	BIT(4)
+#define TDES_FLAGS_CFB32	BIT(5)
+#define TDES_FLAGS_OFB		BIT(6)
+
+#define TDES_FLAGS_INIT		BIT(16)
+#define TDES_FLAGS_FAST		BIT(17)
+#define TDES_FLAGS_BUSY		BIT(18)
+
+#define ATMEL_TDES_QUEUE_LENGTH	1
+
+#define CFB8_BLOCK_SIZE		1
+#define CFB16_BLOCK_SIZE	2
+#define CFB32_BLOCK_SIZE	4
+#define CFB64_BLOCK_SIZE	8
+
+
+struct atmel_tdes_dev;
+
+struct atmel_tdes_ctx {
+	struct atmel_tdes_dev *dd;
+
+	int		keylen;
+	u32		key[3*DES_KEY_SIZE / sizeof(u32)];
+	unsigned long	flags;
+};
+
+struct atmel_tdes_reqctx {
+	unsigned long mode;
+};
+
+struct atmel_tdes_dev {
+	struct list_head	list;
+	unsigned long		phys_base;
+	void __iomem		*io_base;
+
+	struct atmel_tdes_ctx	*ctx;
+	struct device		*dev;
+	struct clk			*iclk;
+	int					irq;
+
+	unsigned long		flags;
+	int			err;
+
+	spinlock_t		lock;
+	struct crypto_queue	queue;
+
+	struct tasklet_struct	done_task;
+	struct tasklet_struct	queue_task;
+
+	struct ablkcipher_request	*req;
+	size_t				total;
+
+	struct scatterlist	*in_sg;
+	size_t				in_offset;
+	struct scatterlist	*out_sg;
+	size_t				out_offset;
+
+	size_t	buflen;
+	size_t	dma_size;
+
+	void	*buf_in;
+	int		dma_in;
+	dma_addr_t	dma_addr_in;
+
+	void	*buf_out;
+	int		dma_out;
+	dma_addr_t	dma_addr_out;
+};
+
+struct atmel_tdes_drv {
+	struct list_head	dev_list;
+	spinlock_t		lock;
+};
+
+static struct atmel_tdes_drv atmel_tdes = {
+	.dev_list = LIST_HEAD_INIT(atmel_tdes.dev_list),
+	.lock = __SPIN_LOCK_UNLOCKED(atmel_tdes.lock),
+};
+
+static int atmel_tdes_sg_copy(struct scatterlist **sg, size_t *offset,
+			void *buf, size_t buflen, size_t total, int out)
+{
+	unsigned int count, off = 0;
+
+	while (buflen && total) {
+		count = min((*sg)->length - *offset, total);
+		count = min(count, buflen);
+
+		if (!count)
+			return off;
+
+		scatterwalk_map_and_copy(buf + off, *sg, *offset, count, out);
+
+		off += count;
+		buflen -= count;
+		*offset += count;
+		total -= count;
+
+		if (*offset == (*sg)->length) {
+			*sg = sg_next(*sg);
+			if (*sg)
+				*offset = 0;
+			else
+				total = 0;
+		}
+	}
+
+	return off;
+}
+
+static inline u32 atmel_tdes_read(struct atmel_tdes_dev *dd, u32 offset)
+{
+	return readl_relaxed(dd->io_base + offset);
+}
+
+static inline void atmel_tdes_write(struct atmel_tdes_dev *dd,
+					u32 offset, u32 value)
+{
+	writel_relaxed(value, dd->io_base + offset);
+}
+
+static void atmel_tdes_write_n(struct atmel_tdes_dev *dd, u32 offset,
+					u32 *value, int count)
+{
+	for (; count--; value++, offset += 4)
+		atmel_tdes_write(dd, offset, *value);
+}
+
+static struct atmel_tdes_dev *atmel_tdes_find_dev(struct atmel_tdes_ctx *ctx)
+{
+	struct atmel_tdes_dev *tdes_dd = NULL;
+	struct atmel_tdes_dev *tmp;
+
+	spin_lock_bh(&atmel_tdes.lock);
+	if (!ctx->dd) {
+		list_for_each_entry(tmp, &atmel_tdes.dev_list, list) {
+			tdes_dd = tmp;
+			break;
+		}
+		ctx->dd = tdes_dd;
+	} else {
+		tdes_dd = ctx->dd;
+	}
+	spin_unlock_bh(&atmel_tdes.lock);
+
+	return tdes_dd;
+}
+
+static int atmel_tdes_hw_init(struct atmel_tdes_dev *dd)
+{
+	clk_prepare_enable(dd->iclk);
+
+	if (!(dd->flags & TDES_FLAGS_INIT)) {
+		atmel_tdes_write(dd, TDES_CR, TDES_CR_SWRST);
+		dd->flags |= TDES_FLAGS_INIT;
+		dd->err = 0;
+	}
+
+	return 0;
+}
+
+static int atmel_tdes_write_ctrl(struct atmel_tdes_dev *dd)
+{
+	int err;
+	u32 valcr = 0, valmr = TDES_MR_SMOD_PDC;
+
+	err = atmel_tdes_hw_init(dd);
+
+	if (err)
+		return err;
+
+	atmel_tdes_write(dd, TDES_PTCR, TDES_PTCR_TXTDIS|TDES_PTCR_RXTDIS);
+
+	/* MR register must be set before IV registers */
+	if (dd->ctx->keylen > (DES_KEY_SIZE << 1)) {
+		valmr |= TDES_MR_KEYMOD_3KEY;
+		valmr |= TDES_MR_TDESMOD_TDES;
+	} else if (dd->ctx->keylen > DES_KEY_SIZE) {
+		valmr |= TDES_MR_KEYMOD_2KEY;
+		valmr |= TDES_MR_TDESMOD_TDES;
+	} else {
+		valmr |= TDES_MR_TDESMOD_DES;
+	}
+
+	if (dd->flags & TDES_FLAGS_CBC) {
+		valmr |= TDES_MR_OPMOD_CBC;
+	} else if (dd->flags & TDES_FLAGS_CFB) {
+		valmr |= TDES_MR_OPMOD_CFB;
+
+		if (dd->flags & TDES_FLAGS_CFB8)
+			valmr |= TDES_MR_CFBS_8b;
+		else if (dd->flags & TDES_FLAGS_CFB16)
+			valmr |= TDES_MR_CFBS_16b;
+		else if (dd->flags & TDES_FLAGS_CFB32)
+			valmr |= TDES_MR_CFBS_32b;
+	} else if (dd->flags & TDES_FLAGS_OFB) {
+		valmr |= TDES_MR_OPMOD_OFB;
+	}
+
+	if ((dd->flags & TDES_FLAGS_ENCRYPT) || (dd->flags & TDES_FLAGS_OFB))
+		valmr |= TDES_MR_CYPHER_ENC;
+
+	atmel_tdes_write(dd, TDES_CR, valcr);
+	atmel_tdes_write(dd, TDES_MR, valmr);
+
+	atmel_tdes_write_n(dd, TDES_KEY1W1R, dd->ctx->key,
+						dd->ctx->keylen >> 2);
+
+	if (((dd->flags & TDES_FLAGS_CBC) || (dd->flags & TDES_FLAGS_CFB) ||
+		(dd->flags & TDES_FLAGS_OFB)) && dd->req->info) {
+		atmel_tdes_write_n(dd, TDES_IV1R, dd->req->info, 2);
+	}
+
+	return 0;
+}
+
+static int atmel_tdes_crypt_dma_stop(struct atmel_tdes_dev *dd)
+{
+	int err = 0;
+	size_t count;
+
+	atmel_tdes_write(dd, TDES_PTCR, TDES_PTCR_TXTDIS|TDES_PTCR_RXTDIS);
+
+	if (dd->flags & TDES_FLAGS_FAST) {
+		dma_unmap_sg(dd->dev, dd->out_sg, 1, DMA_FROM_DEVICE);
+		dma_unmap_sg(dd->dev, dd->in_sg, 1, DMA_TO_DEVICE);
+	} else {
+		dma_sync_single_for_device(dd->dev, dd->dma_addr_out,
+					   dd->dma_size, DMA_FROM_DEVICE);
+
+		/* copy data */
+		count = atmel_tdes_sg_copy(&dd->out_sg, &dd->out_offset,
+				dd->buf_out, dd->buflen, dd->dma_size, 1);
+		if (count != dd->dma_size) {
+			err = -EINVAL;
+			pr_err("not all data converted: %u\n", count);
+		}
+	}
+
+	return err;
+}
+
+static int atmel_tdes_dma_init(struct atmel_tdes_dev *dd)
+{
+	int err = -ENOMEM;
+
+	dd->buf_in = (void *)__get_free_pages(GFP_KERNEL, 0);
+	dd->buf_out = (void *)__get_free_pages(GFP_KERNEL, 0);
+	dd->buflen = PAGE_SIZE;
+	dd->buflen &= ~(DES_BLOCK_SIZE - 1);
+
+	if (!dd->buf_in || !dd->buf_out) {
+		dev_err(dd->dev, "unable to alloc pages.\n");
+		goto err_alloc;
+	}
+
+	/* MAP here */
+	dd->dma_addr_in = dma_map_single(dd->dev, dd->buf_in,
+					dd->buflen, DMA_TO_DEVICE);
+	if (dma_mapping_error(dd->dev, dd->dma_addr_in)) {
+		dev_err(dd->dev, "dma %d bytes error\n", dd->buflen);
+		err = -EINVAL;
+		goto err_map_in;
+	}
+
+	dd->dma_addr_out = dma_map_single(dd->dev, dd->buf_out,
+					dd->buflen, DMA_FROM_DEVICE);
+	if (dma_mapping_error(dd->dev, dd->dma_addr_out)) {
+		dev_err(dd->dev, "dma %d bytes error\n", dd->buflen);
+		err = -EINVAL;
+		goto err_map_out;
+	}
+
+	return 0;
+
+err_map_out:
+	dma_unmap_single(dd->dev, dd->dma_addr_in, dd->buflen,
+		DMA_TO_DEVICE);
+err_map_in:
+	free_page((unsigned long)dd->buf_out);
+	free_page((unsigned long)dd->buf_in);
+err_alloc:
+	if (err)
+		pr_err("error: %d\n", err);
+	return err;
+}
+
+static void atmel_tdes_dma_cleanup(struct atmel_tdes_dev *dd)
+{
+	dma_unmap_single(dd->dev, dd->dma_addr_out, dd->buflen,
+			 DMA_FROM_DEVICE);
+	dma_unmap_single(dd->dev, dd->dma_addr_in, dd->buflen,
+		DMA_TO_DEVICE);
+	free_page((unsigned long)dd->buf_out);
+	free_page((unsigned long)dd->buf_in);
+}
+
+static int atmel_tdes_crypt_dma(struct crypto_tfm *tfm, dma_addr_t dma_addr_in,
+			       dma_addr_t dma_addr_out, int length)
+{
+	struct atmel_tdes_ctx *ctx = crypto_tfm_ctx(tfm);
+	struct atmel_tdes_dev *dd = ctx->dd;
+	int len32;
+
+	dd->dma_size = length;
+
+	if (!(dd->flags & TDES_FLAGS_FAST)) {
+		dma_sync_single_for_device(dd->dev, dma_addr_in, length,
+					   DMA_TO_DEVICE);
+	}
+
+	if ((dd->flags & TDES_FLAGS_CFB) && (dd->flags & TDES_FLAGS_CFB8))
+		len32 = DIV_ROUND_UP(length, sizeof(u8));
+	else if ((dd->flags & TDES_FLAGS_CFB) && (dd->flags & TDES_FLAGS_CFB16))
+		len32 = DIV_ROUND_UP(length, sizeof(u16));
+	else
+		len32 = DIV_ROUND_UP(length, sizeof(u32));
+
+	atmel_tdes_write(dd, TDES_PTCR, TDES_PTCR_TXTDIS|TDES_PTCR_RXTDIS);
+	atmel_tdes_write(dd, TDES_TPR, dma_addr_in);
+	atmel_tdes_write(dd, TDES_TCR, len32);
+	atmel_tdes_write(dd, TDES_RPR, dma_addr_out);
+	atmel_tdes_write(dd, TDES_RCR, len32);
+
+	/* Enable Interrupt */
+	atmel_tdes_write(dd, TDES_IER, TDES_INT_ENDRX);
+
+	/* Start DMA transfer */
+	atmel_tdes_write(dd, TDES_PTCR, TDES_PTCR_TXTEN | TDES_PTCR_RXTEN);
+
+	return 0;
+}
+
+static int atmel_tdes_crypt_dma_start(struct atmel_tdes_dev *dd)
+{
+	struct crypto_tfm *tfm = crypto_ablkcipher_tfm(
+					crypto_ablkcipher_reqtfm(dd->req));
+	int err, fast = 0, in, out;
+	size_t count;
+	dma_addr_t addr_in, addr_out;
+
+	if (sg_is_last(dd->in_sg) && sg_is_last(dd->out_sg)) {
+		/* check for alignment */
+		in = IS_ALIGNED((u32)dd->in_sg->offset, sizeof(u32));
+		out = IS_ALIGNED((u32)dd->out_sg->offset, sizeof(u32));
+
+		fast = in && out;
+	}
+
+	if (fast)  {
+		count = min(dd->total, sg_dma_len(dd->in_sg));
+		count = min(count, sg_dma_len(dd->out_sg));
+
+		if (count != dd->total) {
+			pr_err("request length != buffer length\n");
+			return -EINVAL;
+		}
+
+		err = dma_map_sg(dd->dev, dd->in_sg, 1, DMA_TO_DEVICE);
+		if (!err) {
+			dev_err(dd->dev, "dma_map_sg() error\n");
+			return -EINVAL;
+		}
+
+		err = dma_map_sg(dd->dev, dd->out_sg, 1,
+				DMA_FROM_DEVICE);
+		if (!err) {
+			dev_err(dd->dev, "dma_map_sg() error\n");
+			dma_unmap_sg(dd->dev, dd->in_sg, 1,
+				DMA_TO_DEVICE);
+			return -EINVAL;
+		}
+
+		addr_in = sg_dma_address(dd->in_sg);
+		addr_out = sg_dma_address(dd->out_sg);
+
+		dd->flags |= TDES_FLAGS_FAST;
+
+	} else {
+		/* use cache buffers */
+		count = atmel_tdes_sg_copy(&dd->in_sg, &dd->in_offset,
+				dd->buf_in, dd->buflen, dd->total, 0);
+
+		addr_in = dd->dma_addr_in;
+		addr_out = dd->dma_addr_out;
+
+		dd->flags &= ~TDES_FLAGS_FAST;
+
+	}
+
+	dd->total -= count;
+
+	err = atmel_tdes_crypt_dma(tfm, addr_in, addr_out, count);
+	if (err) {
+		dma_unmap_sg(dd->dev, dd->in_sg, 1, DMA_TO_DEVICE);
+		dma_unmap_sg(dd->dev, dd->out_sg, 1, DMA_TO_DEVICE);
+	}
+
+	return err;
+}
+
+
+static void atmel_tdes_finish_req(struct atmel_tdes_dev *dd, int err)
+{
+	struct ablkcipher_request *req = dd->req;
+
+	clk_disable_unprepare(dd->iclk);
+
+	dd->flags &= ~TDES_FLAGS_BUSY;
+
+	req->base.complete(&req->base, err);
+}
+
+static int atmel_tdes_handle_queue(struct atmel_tdes_dev *dd,
+			       struct ablkcipher_request *req)
+{
+	struct crypto_async_request *async_req, *backlog;
+	struct atmel_tdes_ctx *ctx;
+	struct atmel_tdes_reqctx *rctx;
+	unsigned long flags;
+	int err, ret = 0;
+
+	spin_lock_irqsave(&dd->lock, flags);
+	if (req)
+		ret = ablkcipher_enqueue_request(&dd->queue, req);
+	if (dd->flags & TDES_FLAGS_BUSY) {
+		spin_unlock_irqrestore(&dd->lock, flags);
+		return ret;
+	}
+	backlog = crypto_get_backlog(&dd->queue);
+	async_req = crypto_dequeue_request(&dd->queue);
+	if (async_req)
+		dd->flags |= TDES_FLAGS_BUSY;
+	spin_unlock_irqrestore(&dd->lock, flags);
+
+	if (!async_req)
+		return ret;
+
+	if (backlog)
+		backlog->complete(backlog, -EINPROGRESS);
+
+	req = ablkcipher_request_cast(async_req);
+
+	/* assign new request to device */
+	dd->req = req;
+	dd->total = req->nbytes;
+	dd->in_offset = 0;
+	dd->in_sg = req->src;
+	dd->out_offset = 0;
+	dd->out_sg = req->dst;
+
+	rctx = ablkcipher_request_ctx(req);
+	ctx = crypto_ablkcipher_ctx(crypto_ablkcipher_reqtfm(req));
+	rctx->mode &= TDES_FLAGS_MODE_MASK;
+	dd->flags = (dd->flags & ~TDES_FLAGS_MODE_MASK) | rctx->mode;
+	dd->ctx = ctx;
+	ctx->dd = dd;
+
+	err = atmel_tdes_write_ctrl(dd);
+	if (!err)
+		err = atmel_tdes_crypt_dma_start(dd);
+	if (err) {
+		/* des_task will not finish it, so do it here */
+		atmel_tdes_finish_req(dd, err);
+		tasklet_schedule(&dd->queue_task);
+	}
+
+	return ret;
+}
+
+
+static int atmel_tdes_crypt(struct ablkcipher_request *req, unsigned long mode)
+{
+	struct atmel_tdes_ctx *ctx = crypto_ablkcipher_ctx(
+			crypto_ablkcipher_reqtfm(req));
+	struct atmel_tdes_reqctx *rctx = ablkcipher_request_ctx(req);
+	struct atmel_tdes_dev *dd;
+
+	if (mode & TDES_FLAGS_CFB8) {
+		if (!IS_ALIGNED(req->nbytes, CFB8_BLOCK_SIZE)) {
+			pr_err("request size is not exact amount of CFB8 blocks\n");
+			return -EINVAL;
+		}
+	} else if (mode & TDES_FLAGS_CFB16) {
+		if (!IS_ALIGNED(req->nbytes, CFB16_BLOCK_SIZE)) {
+			pr_err("request size is not exact amount of CFB16 blocks\n");
+			return -EINVAL;
+		}
+	} else if (mode & TDES_FLAGS_CFB32) {
+		if (!IS_ALIGNED(req->nbytes, CFB32_BLOCK_SIZE)) {
+			pr_err("request size is not exact amount of CFB32 blocks\n");
+			return -EINVAL;
+		}
+	} else if (!IS_ALIGNED(req->nbytes, DES_BLOCK_SIZE)) {
+		pr_err("request size is not exact amount of DES blocks\n");
+		return -EINVAL;
+	}
+
+	dd = atmel_tdes_find_dev(ctx);
+	if (!dd)
+		return -ENODEV;
+
+	rctx->mode = mode;
+
+	return atmel_tdes_handle_queue(dd, req);
+}
+
+static int atmel_des_setkey(struct crypto_ablkcipher *tfm, const u8 *key,
+			   unsigned int keylen)
+{
+	u32 tmp[DES_EXPKEY_WORDS];
+	int err;
+	struct crypto_tfm *ctfm = crypto_ablkcipher_tfm(tfm);
+
+	struct atmel_tdes_ctx *ctx = crypto_ablkcipher_ctx(tfm);
+
+	if (keylen != DES_KEY_SIZE) {
+		crypto_ablkcipher_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
+		return -EINVAL;
+	}
+
+	err = des_ekey(tmp, key);
+	if (err == 0 && (ctfm->crt_flags & CRYPTO_TFM_REQ_WEAK_KEY)) {
+		ctfm->crt_flags |= CRYPTO_TFM_RES_WEAK_KEY;
+		return -EINVAL;
+	}
+
+	memcpy(ctx->key, key, keylen);
+	ctx->keylen = keylen;
+
+	return 0;
+}
+
+static int atmel_tdes_setkey(struct crypto_ablkcipher *tfm, const u8 *key,
+			   unsigned int keylen)
+{
+	struct atmel_tdes_ctx *ctx = crypto_ablkcipher_ctx(tfm);
+	const char *alg_name;
+
+	alg_name = crypto_tfm_alg_name(crypto_ablkcipher_tfm(tfm));
+
+	/*
+	 * HW bug in cfb 3-keys mode.
+	 */
+	if (strstr(alg_name, "cfb") && (keylen != 2*DES_KEY_SIZE)) {
+		crypto_ablkcipher_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
+		return -EINVAL;
+	} else if ((keylen != 2*DES_KEY_SIZE) && (keylen != 3*DES_KEY_SIZE)) {
+		crypto_ablkcipher_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
+		return -EINVAL;
+	}
+
+	memcpy(ctx->key, key, keylen);
+	ctx->keylen = keylen;
+
+	return 0;
+}
+
+static int atmel_tdes_ecb_encrypt(struct ablkcipher_request *req)
+{
+	return atmel_tdes_crypt(req, TDES_FLAGS_ENCRYPT);
+}
+
+static int atmel_tdes_ecb_decrypt(struct ablkcipher_request *req)
+{
+	return atmel_tdes_crypt(req, 0);
+}
+
+static int atmel_tdes_cbc_encrypt(struct ablkcipher_request *req)
+{
+	return atmel_tdes_crypt(req, TDES_FLAGS_ENCRYPT | TDES_FLAGS_CBC);
+}
+
+static int atmel_tdes_cbc_decrypt(struct ablkcipher_request *req)
+{
+	return atmel_tdes_crypt(req, TDES_FLAGS_CBC);
+}
+static int atmel_tdes_cfb_encrypt(struct ablkcipher_request *req)
+{
+	return atmel_tdes_crypt(req, TDES_FLAGS_ENCRYPT | TDES_FLAGS_CFB);
+}
+
+static int atmel_tdes_cfb_decrypt(struct ablkcipher_request *req)
+{
+	return atmel_tdes_crypt(req, TDES_FLAGS_CFB);
+}
+
+static int atmel_tdes_cfb8_encrypt(struct ablkcipher_request *req)
+{
+	return atmel_tdes_crypt(req, TDES_FLAGS_ENCRYPT | TDES_FLAGS_CFB |
+						TDES_FLAGS_CFB8);
+}
+
+static int atmel_tdes_cfb8_decrypt(struct ablkcipher_request *req)
+{
+	return atmel_tdes_crypt(req, TDES_FLAGS_CFB | TDES_FLAGS_CFB8);
+}
+
+static int atmel_tdes_cfb16_encrypt(struct ablkcipher_request *req)
+{
+	return atmel_tdes_crypt(req, TDES_FLAGS_ENCRYPT | TDES_FLAGS_CFB |
+						TDES_FLAGS_CFB16);
+}
+
+static int atmel_tdes_cfb16_decrypt(struct ablkcipher_request *req)
+{
+	return atmel_tdes_crypt(req, TDES_FLAGS_CFB | TDES_FLAGS_CFB16);
+}
+
+static int atmel_tdes_cfb32_encrypt(struct ablkcipher_request *req)
+{
+	return atmel_tdes_crypt(req, TDES_FLAGS_ENCRYPT | TDES_FLAGS_CFB |
+						TDES_FLAGS_CFB32);
+}
+
+static int atmel_tdes_cfb32_decrypt(struct ablkcipher_request *req)
+{
+	return atmel_tdes_crypt(req, TDES_FLAGS_CFB | TDES_FLAGS_CFB32);
+}
+
+static int atmel_tdes_ofb_encrypt(struct ablkcipher_request *req)
+{
+	return atmel_tdes_crypt(req, TDES_FLAGS_ENCRYPT | TDES_FLAGS_OFB);
+}
+
+static int atmel_tdes_ofb_decrypt(struct ablkcipher_request *req)
+{
+	return atmel_tdes_crypt(req, TDES_FLAGS_OFB);
+}
+
+static int atmel_tdes_cra_init(struct crypto_tfm *tfm)
+{
+	tfm->crt_ablkcipher.reqsize = sizeof(struct atmel_tdes_reqctx);
+
+	return 0;
+}
+
+static void atmel_tdes_cra_exit(struct crypto_tfm *tfm)
+{
+}
+
+static struct crypto_alg tdes_algs[] = {
+{
+	.cra_name		= "ecb(des)",
+	.cra_driver_name	= "atmel-ecb-des",
+	.cra_priority		= 100,
+	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
+	.cra_blocksize		= DES_BLOCK_SIZE,
+	.cra_ctxsize		= sizeof(struct atmel_tdes_ctx),
+	.cra_alignmask		= 0,
+	.cra_type		= &crypto_ablkcipher_type,
+	.cra_module		= THIS_MODULE,
+	.cra_init		= atmel_tdes_cra_init,
+	.cra_exit		= atmel_tdes_cra_exit,
+	.cra_u.ablkcipher = {
+		.min_keysize	= DES_KEY_SIZE,
+		.max_keysize	= DES_KEY_SIZE,
+		.setkey		= atmel_des_setkey,
+		.encrypt	= atmel_tdes_ecb_encrypt,
+		.decrypt	= atmel_tdes_ecb_decrypt,
+	}
+},
+{
+	.cra_name		= "cbc(des)",
+	.cra_driver_name	= "atmel-cbc-des",
+	.cra_priority		= 100,
+	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
+	.cra_blocksize		= DES_BLOCK_SIZE,
+	.cra_ctxsize		= sizeof(struct atmel_tdes_ctx),
+	.cra_alignmask		= 0,
+	.cra_type		= &crypto_ablkcipher_type,
+	.cra_module		= THIS_MODULE,
+	.cra_init		= atmel_tdes_cra_init,
+	.cra_exit		= atmel_tdes_cra_exit,
+	.cra_u.ablkcipher = {
+		.min_keysize	= DES_KEY_SIZE,
+		.max_keysize	= DES_KEY_SIZE,
+		.ivsize		= DES_BLOCK_SIZE,
+		.setkey		= atmel_des_setkey,
+		.encrypt	= atmel_tdes_cbc_encrypt,
+		.decrypt	= atmel_tdes_cbc_decrypt,
+	}
+},
+{
+	.cra_name		= "cfb(des)",
+	.cra_driver_name	= "atmel-cfb-des",
+	.cra_priority		= 100,
+	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
+	.cra_blocksize		= DES_BLOCK_SIZE,
+	.cra_ctxsize		= sizeof(struct atmel_tdes_ctx),
+	.cra_alignmask		= 0,
+	.cra_type		= &crypto_ablkcipher_type,
+	.cra_module		= THIS_MODULE,
+	.cra_init		= atmel_tdes_cra_init,
+	.cra_exit		= atmel_tdes_cra_exit,
+	.cra_u.ablkcipher = {
+		.min_keysize	= DES_KEY_SIZE,
+		.max_keysize	= DES_KEY_SIZE,
+		.ivsize		= DES_BLOCK_SIZE,
+		.setkey		= atmel_des_setkey,
+		.encrypt	= atmel_tdes_cfb_encrypt,
+		.decrypt	= atmel_tdes_cfb_decrypt,
+	}
+},
+{
+	.cra_name		= "cfb8(des)",
+	.cra_driver_name	= "atmel-cfb8-des",
+	.cra_priority		= 100,
+	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
+	.cra_blocksize		= CFB8_BLOCK_SIZE,
+	.cra_ctxsize		= sizeof(struct atmel_tdes_ctx),
+	.cra_alignmask		= 0,
+	.cra_type		= &crypto_ablkcipher_type,
+	.cra_module		= THIS_MODULE,
+	.cra_init		= atmel_tdes_cra_init,
+	.cra_exit		= atmel_tdes_cra_exit,
+	.cra_u.ablkcipher = {
+		.min_keysize	= DES_KEY_SIZE,
+		.max_keysize	= DES_KEY_SIZE,
+		.ivsize		= DES_BLOCK_SIZE,
+		.setkey		= atmel_des_setkey,
+		.encrypt	= atmel_tdes_cfb8_encrypt,
+		.decrypt	= atmel_tdes_cfb8_decrypt,
+	}
+},
+{
+	.cra_name		= "cfb16(des)",
+	.cra_driver_name	= "atmel-cfb16-des",
+	.cra_priority		= 100,
+	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
+	.cra_blocksize		= CFB16_BLOCK_SIZE,
+	.cra_ctxsize		= sizeof(struct atmel_tdes_ctx),
+	.cra_alignmask		= 0,
+	.cra_type		= &crypto_ablkcipher_type,
+	.cra_module		= THIS_MODULE,
+	.cra_init		= atmel_tdes_cra_init,
+	.cra_exit		= atmel_tdes_cra_exit,
+	.cra_u.ablkcipher = {
+		.min_keysize	= DES_KEY_SIZE,
+		.max_keysize	= DES_KEY_SIZE,
+		.ivsize		= DES_BLOCK_SIZE,
+		.setkey		= atmel_des_setkey,
+		.encrypt	= atmel_tdes_cfb16_encrypt,
+		.decrypt	= atmel_tdes_cfb16_decrypt,
+	}
+},
+{
+	.cra_name		= "cfb32(des)",
+	.cra_driver_name	= "atmel-cfb32-des",
+	.cra_priority		= 100,
+	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
+	.cra_blocksize		= CFB32_BLOCK_SIZE,
+	.cra_ctxsize		= sizeof(struct atmel_tdes_ctx),
+	.cra_alignmask		= 0,
+	.cra_type		= &crypto_ablkcipher_type,
+	.cra_module		= THIS_MODULE,
+	.cra_init		= atmel_tdes_cra_init,
+	.cra_exit		= atmel_tdes_cra_exit,
+	.cra_u.ablkcipher = {
+		.min_keysize	= DES_KEY_SIZE,
+		.max_keysize	= DES_KEY_SIZE,
+		.ivsize		= DES_BLOCK_SIZE,
+		.setkey		= atmel_des_setkey,
+		.encrypt	= atmel_tdes_cfb32_encrypt,
+		.decrypt	= atmel_tdes_cfb32_decrypt,
+	}
+},
+{
+	.cra_name		= "ofb(des)",
+	.cra_driver_name	= "atmel-ofb-des",
+	.cra_priority		= 100,
+	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
+	.cra_blocksize		= DES_BLOCK_SIZE,
+	.cra_ctxsize		= sizeof(struct atmel_tdes_ctx),
+	.cra_alignmask		= 0,
+	.cra_type		= &crypto_ablkcipher_type,
+	.cra_module		= THIS_MODULE,
+	.cra_init		= atmel_tdes_cra_init,
+	.cra_exit		= atmel_tdes_cra_exit,
+	.cra_u.ablkcipher = {
+		.min_keysize	= DES_KEY_SIZE,
+		.max_keysize	= DES_KEY_SIZE,
+		.ivsize		= DES_BLOCK_SIZE,
+		.setkey		= atmel_des_setkey,
+		.encrypt	= atmel_tdes_ofb_encrypt,
+		.decrypt	= atmel_tdes_ofb_decrypt,
+	}
+},
+{
+	.cra_name		= "ecb(des3_ede)",
+	.cra_driver_name	= "atmel-ecb-tdes",
+	.cra_priority		= 100,
+	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
+	.cra_blocksize		= DES_BLOCK_SIZE,
+	.cra_ctxsize		= sizeof(struct atmel_tdes_ctx),
+	.cra_alignmask		= 0,
+	.cra_type		= &crypto_ablkcipher_type,
+	.cra_module		= THIS_MODULE,
+	.cra_init		= atmel_tdes_cra_init,
+	.cra_exit		= atmel_tdes_cra_exit,
+	.cra_u.ablkcipher = {
+		.min_keysize	= 2 * DES_KEY_SIZE,
+		.max_keysize	= 3 * DES_KEY_SIZE,
+		.setkey		= atmel_tdes_setkey,
+		.encrypt	= atmel_tdes_ecb_encrypt,
+		.decrypt	= atmel_tdes_ecb_decrypt,
+	}
+},
+{
+	.cra_name		= "cbc(des3_ede)",
+	.cra_driver_name	= "atmel-cbc-tdes",
+	.cra_priority		= 100,
+	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
+	.cra_blocksize		= DES_BLOCK_SIZE,
+	.cra_ctxsize		= sizeof(struct atmel_tdes_ctx),
+	.cra_alignmask		= 0,
+	.cra_type		= &crypto_ablkcipher_type,
+	.cra_module		= THIS_MODULE,
+	.cra_init		= atmel_tdes_cra_init,
+	.cra_exit		= atmel_tdes_cra_exit,
+	.cra_u.ablkcipher = {
+		.min_keysize	= 2*DES_KEY_SIZE,
+		.max_keysize	= 3*DES_KEY_SIZE,
+		.ivsize		= DES_BLOCK_SIZE,
+		.setkey		= atmel_tdes_setkey,
+		.encrypt	= atmel_tdes_cbc_encrypt,
+		.decrypt	= atmel_tdes_cbc_decrypt,
+	}
+},
+{
+	.cra_name		= "cfb(des3_ede)",
+	.cra_driver_name	= "atmel-cfb-tdes",
+	.cra_priority		= 100,
+	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
+	.cra_blocksize		= DES_BLOCK_SIZE,
+	.cra_ctxsize		= sizeof(struct atmel_tdes_ctx),
+	.cra_alignmask		= 0,
+	.cra_type		= &crypto_ablkcipher_type,
+	.cra_module		= THIS_MODULE,
+	.cra_init		= atmel_tdes_cra_init,
+	.cra_exit		= atmel_tdes_cra_exit,
+	.cra_u.ablkcipher = {
+		.min_keysize	= 2*DES_KEY_SIZE,
+		.max_keysize	= 2*DES_KEY_SIZE,
+		.ivsize		= DES_BLOCK_SIZE,
+		.setkey		= atmel_tdes_setkey,
+		.encrypt	= atmel_tdes_cfb_encrypt,
+		.decrypt	= atmel_tdes_cfb_decrypt,
+	}
+},
+{
+	.cra_name		= "cfb8(des3_ede)",
+	.cra_driver_name	= "atmel-cfb8-tdes",
+	.cra_priority		= 100,
+	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
+	.cra_blocksize		= CFB8_BLOCK_SIZE,
+	.cra_ctxsize		= sizeof(struct atmel_tdes_ctx),
+	.cra_alignmask		= 0,
+	.cra_type		= &crypto_ablkcipher_type,
+	.cra_module		= THIS_MODULE,
+	.cra_init		= atmel_tdes_cra_init,
+	.cra_exit		= atmel_tdes_cra_exit,
+	.cra_u.ablkcipher = {
+		.min_keysize	= 2*DES_KEY_SIZE,
+		.max_keysize	= 2*DES_KEY_SIZE,
+		.ivsize		= DES_BLOCK_SIZE,
+		.setkey		= atmel_tdes_setkey,
+		.encrypt	= atmel_tdes_cfb8_encrypt,
+		.decrypt	= atmel_tdes_cfb8_decrypt,
+	}
+},
+{
+	.cra_name		= "cfb16(des3_ede)",
+	.cra_driver_name	= "atmel-cfb16-tdes",
+	.cra_priority		= 100,
+	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
+	.cra_blocksize		= CFB16_BLOCK_SIZE,
+	.cra_ctxsize		= sizeof(struct atmel_tdes_ctx),
+	.cra_alignmask		= 0,
+	.cra_type		= &crypto_ablkcipher_type,
+	.cra_module		= THIS_MODULE,
+	.cra_init		= atmel_tdes_cra_init,
+	.cra_exit		= atmel_tdes_cra_exit,
+	.cra_u.ablkcipher = {
+		.min_keysize	= 2*DES_KEY_SIZE,
+		.max_keysize	= 2*DES_KEY_SIZE,
+		.ivsize		= DES_BLOCK_SIZE,
+		.setkey		= atmel_tdes_setkey,
+		.encrypt	= atmel_tdes_cfb16_encrypt,
+		.decrypt	= atmel_tdes_cfb16_decrypt,
+	}
+},
+{
+	.cra_name		= "cfb32(des3_ede)",
+	.cra_driver_name	= "atmel-cfb32-tdes",
+	.cra_priority		= 100,
+	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
+	.cra_blocksize		= CFB32_BLOCK_SIZE,
+	.cra_ctxsize		= sizeof(struct atmel_tdes_ctx),
+	.cra_alignmask		= 0,
+	.cra_type		= &crypto_ablkcipher_type,
+	.cra_module		= THIS_MODULE,
+	.cra_init		= atmel_tdes_cra_init,
+	.cra_exit		= atmel_tdes_cra_exit,
+	.cra_u.ablkcipher = {
+		.min_keysize	= 2*DES_KEY_SIZE,
+		.max_keysize	= 2*DES_KEY_SIZE,
+		.ivsize		= DES_BLOCK_SIZE,
+		.setkey		= atmel_tdes_setkey,
+		.encrypt	= atmel_tdes_cfb32_encrypt,
+		.decrypt	= atmel_tdes_cfb32_decrypt,
+	}
+},
+{
+	.cra_name		= "ofb(des3_ede)",
+	.cra_driver_name	= "atmel-ofb-tdes",
+	.cra_priority		= 100,
+	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
+	.cra_blocksize		= DES_BLOCK_SIZE,
+	.cra_ctxsize		= sizeof(struct atmel_tdes_ctx),
+	.cra_alignmask		= 0,
+	.cra_type		= &crypto_ablkcipher_type,
+	.cra_module		= THIS_MODULE,
+	.cra_init		= atmel_tdes_cra_init,
+	.cra_exit		= atmel_tdes_cra_exit,
+	.cra_u.ablkcipher = {
+		.min_keysize	= 2*DES_KEY_SIZE,
+		.max_keysize	= 3*DES_KEY_SIZE,
+		.ivsize		= DES_BLOCK_SIZE,
+		.setkey		= atmel_tdes_setkey,
+		.encrypt	= atmel_tdes_ofb_encrypt,
+		.decrypt	= atmel_tdes_ofb_decrypt,
+	}
+},
+};
+
+static void atmel_tdes_queue_task(unsigned long data)
+{
+	struct atmel_tdes_dev *dd = (struct atmel_tdes_dev *)data;
+
+	atmel_tdes_handle_queue(dd, NULL);
+}
+
+static void atmel_tdes_done_task(unsigned long data)
+{
+	struct atmel_tdes_dev *dd = (struct atmel_tdes_dev *) data;
+	int err;
+
+	err = atmel_tdes_crypt_dma_stop(dd);
+
+	err = dd->err ? : err;
+
+	if (dd->total && !err) {
+		err = atmel_tdes_crypt_dma_start(dd);
+		if (!err)
+			return;
+	}
+
+	atmel_tdes_finish_req(dd, err);
+	atmel_tdes_handle_queue(dd, NULL);
+}
+
+static irqreturn_t atmel_tdes_irq(int irq, void *dev_id)
+{
+	struct atmel_tdes_dev *tdes_dd = dev_id;
+	u32 reg;
+
+	reg = atmel_tdes_read(tdes_dd, TDES_ISR);
+	if (reg & atmel_tdes_read(tdes_dd, TDES_IMR)) {
+		atmel_tdes_write(tdes_dd, TDES_IDR, reg);
+		if (TDES_FLAGS_BUSY & tdes_dd->flags)
+			tasklet_schedule(&tdes_dd->done_task);
+		else
+			dev_warn(tdes_dd->dev, "TDES interrupt when no active requests.\n");
+		return IRQ_HANDLED;
+	}
+
+	return IRQ_NONE;
+}
+
+static void atmel_tdes_unregister_algs(struct atmel_tdes_dev *dd)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(tdes_algs); i++)
+		crypto_unregister_alg(&tdes_algs[i]);
+}
+
+static int atmel_tdes_register_algs(struct atmel_tdes_dev *dd)
+{
+	int err, i, j;
+
+	for (i = 0; i < ARRAY_SIZE(tdes_algs); i++) {
+		INIT_LIST_HEAD(&tdes_algs[i].cra_list);
+		err = crypto_register_alg(&tdes_algs[i]);
+		if (err)
+			goto err_tdes_algs;
+	}
+
+	return 0;
+
+err_tdes_algs:
+	for (j = 0; j < i; j++)
+		crypto_unregister_alg(&tdes_algs[j]);
+
+	return err;
+}
+
+static int __devinit atmel_tdes_probe(struct platform_device *pdev)
+{
+	struct atmel_tdes_dev *tdes_dd;
+	struct device *dev = &pdev->dev;
+	struct resource *tdes_res;
+	unsigned long tdes_phys_size;
+	int err;
+
+	tdes_dd = kzalloc(sizeof(struct atmel_tdes_dev), GFP_KERNEL);
+	if (tdes_dd == NULL) {
+		dev_err(dev, "unable to alloc data struct.\n");
+		err = -ENOMEM;
+		goto tdes_dd_err;
+	}
+
+	tdes_dd->dev = dev;
+
+	platform_set_drvdata(pdev, tdes_dd);
+
+	INIT_LIST_HEAD(&tdes_dd->list);
+
+	tasklet_init(&tdes_dd->done_task, atmel_tdes_done_task,
+					(unsigned long)tdes_dd);
+	tasklet_init(&tdes_dd->queue_task, atmel_tdes_queue_task,
+					(unsigned long)tdes_dd);
+
+	crypto_init_queue(&tdes_dd->queue, ATMEL_TDES_QUEUE_LENGTH);
+
+	tdes_dd->irq = -1;
+
+	/* Get the base address */
+	tdes_res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (!tdes_res) {
+		dev_err(dev, "no MEM resource info\n");
+		err = -ENODEV;
+		goto res_err;
+	}
+	tdes_dd->phys_base = tdes_res->start;
+	tdes_phys_size = resource_size(tdes_res);
+
+	/* Get the IRQ */
+	tdes_dd->irq = platform_get_irq(pdev,  0);
+	if (tdes_dd->irq < 0) {
+		dev_err(dev, "no IRQ resource info\n");
+		err = tdes_dd->irq;
+		goto res_err;
+	}
+
+	err = request_irq(tdes_dd->irq, atmel_tdes_irq, IRQF_SHARED,
+			"atmel-tdes", tdes_dd);
+	if (err) {
+		dev_err(dev, "unable to request tdes irq.\n");
+		goto tdes_irq_err;
+	}
+
+	/* Initializing the clock */
+	tdes_dd->iclk = clk_get(&pdev->dev, NULL);
+	if (IS_ERR(tdes_dd->iclk)) {
+		dev_err(dev, "clock intialization failed.\n");
+		err = PTR_ERR(tdes_dd->iclk);
+		goto clk_err;
+	}
+
+	tdes_dd->io_base = ioremap(tdes_dd->phys_base, tdes_phys_size);
+	if (!tdes_dd->io_base) {
+		dev_err(dev, "can't ioremap\n");
+		err = -ENOMEM;
+		goto tdes_io_err;
+	}
+
+	err = atmel_tdes_dma_init(tdes_dd);
+	if (err)
+		goto err_tdes_dma;
+
+	spin_lock(&atmel_tdes.lock);
+	list_add_tail(&tdes_dd->list, &atmel_tdes.dev_list);
+	spin_unlock(&atmel_tdes.lock);
+
+	err = atmel_tdes_register_algs(tdes_dd);
+	if (err)
+		goto err_algs;
+
+	dev_info(dev, "Atmel DES/TDES\n");
+
+	return 0;
+
+err_algs:
+	spin_lock(&atmel_tdes.lock);
+	list_del(&tdes_dd->list);
+	spin_unlock(&atmel_tdes.lock);
+	atmel_tdes_dma_cleanup(tdes_dd);
+err_tdes_dma:
+	iounmap(tdes_dd->io_base);
+tdes_io_err:
+	clk_put(tdes_dd->iclk);
+clk_err:
+	free_irq(tdes_dd->irq, tdes_dd);
+tdes_irq_err:
+res_err:
+	tasklet_kill(&tdes_dd->done_task);
+	tasklet_kill(&tdes_dd->queue_task);
+	kfree(tdes_dd);
+	tdes_dd = NULL;
+tdes_dd_err:
+	dev_err(dev, "initialization failed.\n");
+
+	return err;
+}
+
+static int __devexit atmel_tdes_remove(struct platform_device *pdev)
+{
+	static struct atmel_tdes_dev *tdes_dd;
+
+	tdes_dd = platform_get_drvdata(pdev);
+	if (!tdes_dd)
+		return -ENODEV;
+	spin_lock(&atmel_tdes.lock);
+	list_del(&tdes_dd->list);
+	spin_unlock(&atmel_tdes.lock);
+
+	atmel_tdes_unregister_algs(tdes_dd);
+
+	tasklet_kill(&tdes_dd->done_task);
+	tasklet_kill(&tdes_dd->queue_task);
+
+	atmel_tdes_dma_cleanup(tdes_dd);
+
+	iounmap(tdes_dd->io_base);
+
+	clk_put(tdes_dd->iclk);
+
+	if (tdes_dd->irq >= 0)
+		free_irq(tdes_dd->irq, tdes_dd);
+
+	kfree(tdes_dd);
+	tdes_dd = NULL;
+
+	return 0;
+}
+
+static struct platform_driver atmel_tdes_driver = {
+	.probe		= atmel_tdes_probe,
+	.remove		= __devexit_p(atmel_tdes_remove),
+	.driver		= {
+		.name	= "atmel_tdes",
+		.owner	= THIS_MODULE,
+	},
+};
+
+module_platform_driver(atmel_tdes_driver);
+
+MODULE_DESCRIPTION("Atmel DES/TDES hw acceleration support.");
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Nicolas Royer - Eukréa Electromatique");
diff --git a/drivers/crypto/bfin_crc.c b/drivers/crypto/bfin_crc.c
new file mode 100644
index 0000000000000000000000000000000000000000..5398580b431340eb85210cd80393872a730a15c3
--- /dev/null
+++ b/drivers/crypto/bfin_crc.c
@@ -0,0 +1,780 @@
+/*
+ * Cryptographic API.
+ *
+ * Support Blackfin CRC HW acceleration.
+ *
+ * Copyright 2012 Analog Devices Inc.
+ *
+ * Licensed under the GPL-2.
+ */
+
+#include <linux/err.h>
+#include <linux/device.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/errno.h>
+#include <linux/interrupt.h>
+#include <linux/kernel.h>
+#include <linux/irq.h>
+#include <linux/io.h>
+#include <linux/platform_device.h>
+#include <linux/scatterlist.h>
+#include <linux/dma-mapping.h>
+#include <linux/delay.h>
+#include <linux/unaligned/access_ok.h>
+#include <linux/crypto.h>
+#include <linux/cryptohash.h>
+#include <crypto/scatterwalk.h>
+#include <crypto/algapi.h>
+#include <crypto/hash.h>
+#include <crypto/internal/hash.h>
+
+#include <asm/blackfin.h>
+#include <asm/bfin_crc.h>
+#include <asm/dma.h>
+#include <asm/portmux.h>
+
+#define CRC_CCRYPTO_QUEUE_LENGTH	5
+
+#define DRIVER_NAME "bfin-hmac-crc"
+#define CHKSUM_DIGEST_SIZE      4
+#define CHKSUM_BLOCK_SIZE       1
+
+#define CRC_MAX_DMA_DESC	100
+
+#define CRC_CRYPTO_STATE_UPDATE		1
+#define CRC_CRYPTO_STATE_FINALUPDATE	2
+#define CRC_CRYPTO_STATE_FINISH		3
+
+struct bfin_crypto_crc {
+	struct list_head	list;
+	struct device		*dev;
+	spinlock_t		lock;
+
+	int			irq;
+	int			dma_ch;
+	u32			poly;
+	volatile struct crc_register *regs;
+
+	struct ahash_request	*req; /* current request in operation */
+	struct dma_desc_array	*sg_cpu; /* virt addr of sg dma descriptors */
+	dma_addr_t		sg_dma; /* phy addr of sg dma descriptors */
+	u8			*sg_mid_buf;
+
+	struct tasklet_struct	done_task;
+	struct crypto_queue	queue; /* waiting requests */
+
+	u8			busy:1; /* crc device in operation flag */
+};
+
+static struct bfin_crypto_crc_list {
+	struct list_head	dev_list;
+	spinlock_t		lock;
+} crc_list;
+
+struct bfin_crypto_crc_reqctx {
+	struct bfin_crypto_crc	*crc;
+
+	unsigned int		total;	/* total request bytes */
+	size_t			sg_buflen; /* bytes for this update */
+	unsigned int		sg_nents;
+	struct scatterlist	*sg; /* sg list head for this update*/
+	struct scatterlist	bufsl[2]; /* chained sg list */
+
+	size_t			bufnext_len;
+	size_t			buflast_len;
+	u8			bufnext[CHKSUM_DIGEST_SIZE]; /* extra bytes for next udpate */
+	u8			buflast[CHKSUM_DIGEST_SIZE]; /* extra bytes from last udpate */
+
+	u8			flag;
+};
+
+struct bfin_crypto_crc_ctx {
+	struct bfin_crypto_crc	*crc;
+	u32			key;
+};
+
+
+/*
+ * derive number of elements in scatterlist
+ */
+static int sg_count(struct scatterlist *sg_list)
+{
+	struct scatterlist *sg = sg_list;
+	int sg_nents = 1;
+
+	if (sg_list == NULL)
+		return 0;
+
+	while (!sg_is_last(sg)) {
+		sg_nents++;
+		sg = scatterwalk_sg_next(sg);
+	}
+
+	return sg_nents;
+}
+
+/*
+ * get element in scatter list by given index
+ */
+static struct scatterlist *sg_get(struct scatterlist *sg_list, unsigned int nents,
+				unsigned int index)
+{
+	struct scatterlist *sg = NULL;
+	int i;
+
+	for_each_sg(sg_list, sg, nents, i)
+		if (i == index)
+			break;
+
+	return sg;
+}
+
+static int bfin_crypto_crc_init_hw(struct bfin_crypto_crc *crc, u32 key)
+{
+	crc->regs->datacntrld = 0;
+	crc->regs->control = MODE_CALC_CRC << OPMODE_OFFSET;
+	crc->regs->curresult = key;
+
+	/* setup CRC interrupts */
+	crc->regs->status = CMPERRI | DCNTEXPI;
+	crc->regs->intrenset = CMPERRI | DCNTEXPI;
+	SSYNC();
+
+	return 0;
+}
+
+static int bfin_crypto_crc_init(struct ahash_request *req)
+{
+	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+	struct bfin_crypto_crc_ctx *crc_ctx = crypto_ahash_ctx(tfm);
+	struct bfin_crypto_crc_reqctx *ctx = ahash_request_ctx(req);
+	struct bfin_crypto_crc *crc;
+
+	dev_dbg(crc->dev, "crc_init\n");
+	spin_lock_bh(&crc_list.lock);
+	list_for_each_entry(crc, &crc_list.dev_list, list) {
+		crc_ctx->crc = crc;
+		break;
+	}
+	spin_unlock_bh(&crc_list.lock);
+
+	if (sg_count(req->src) > CRC_MAX_DMA_DESC) {
+		dev_dbg(crc->dev, "init: requested sg list is too big > %d\n",
+			CRC_MAX_DMA_DESC);
+		return -EINVAL;
+	}
+
+	ctx->crc = crc;
+	ctx->bufnext_len = 0;
+	ctx->buflast_len = 0;
+	ctx->sg_buflen = 0;
+	ctx->total = 0;
+	ctx->flag = 0;
+
+	/* init crc results */
+	put_unaligned_le32(crc_ctx->key, req->result);
+
+	dev_dbg(crc->dev, "init: digest size: %d\n",
+		crypto_ahash_digestsize(tfm));
+
+	return bfin_crypto_crc_init_hw(crc, crc_ctx->key);
+}
+
+static void bfin_crypto_crc_config_dma(struct bfin_crypto_crc *crc)
+{
+	struct scatterlist *sg;
+	struct bfin_crypto_crc_reqctx *ctx = ahash_request_ctx(crc->req);
+	int i = 0, j = 0;
+	unsigned long dma_config;
+	unsigned int dma_count;
+	unsigned int dma_addr;
+	unsigned int mid_dma_count = 0;
+	int dma_mod;
+
+	dma_map_sg(crc->dev, ctx->sg, ctx->sg_nents, DMA_TO_DEVICE);
+
+	for_each_sg(ctx->sg, sg, ctx->sg_nents, j) {
+		dma_config = DMAFLOW_ARRAY | RESTART | NDSIZE_3 | DMAEN | PSIZE_32;
+		dma_addr = sg_dma_address(sg);
+		/* deduce extra bytes in last sg */
+		if (sg_is_last(sg))
+			dma_count = sg_dma_len(sg) - ctx->bufnext_len;
+		else
+			dma_count = sg_dma_len(sg);
+
+		if (mid_dma_count) {
+			/* Append last middle dma buffer to 4 bytes with first
+			   bytes in current sg buffer. Move addr of current
+			   sg and deduce the length of current sg.
+			 */
+			memcpy(crc->sg_mid_buf +((i-1) << 2) + mid_dma_count,
+				(void *)dma_addr,
+				CHKSUM_DIGEST_SIZE - mid_dma_count);
+			dma_addr += CHKSUM_DIGEST_SIZE - mid_dma_count;
+			dma_count -= CHKSUM_DIGEST_SIZE - mid_dma_count;
+		}
+		/* chop current sg dma len to multiple of 32 bits */
+		mid_dma_count = dma_count % 4;
+		dma_count &= ~0x3;
+
+		if (dma_addr % 4 == 0) {
+			dma_config |= WDSIZE_32;
+			dma_count >>= 2;
+			dma_mod = 4;
+		} else if (dma_addr % 2 == 0) {
+			dma_config |= WDSIZE_16;
+			dma_count >>= 1;
+			dma_mod = 2;
+		} else {
+			dma_config |= WDSIZE_8;
+			dma_mod = 1;
+		}
+
+		crc->sg_cpu[i].start_addr = dma_addr;
+		crc->sg_cpu[i].cfg = dma_config;
+		crc->sg_cpu[i].x_count = dma_count;
+		crc->sg_cpu[i].x_modify = dma_mod;
+		dev_dbg(crc->dev, "%d: crc_dma: start_addr:0x%lx, "
+			"cfg:0x%lx, x_count:0x%lx, x_modify:0x%lx\n",
+			i, crc->sg_cpu[i].start_addr,
+			crc->sg_cpu[i].cfg, crc->sg_cpu[i].x_count,
+			crc->sg_cpu[i].x_modify);
+		i++;
+
+		if (mid_dma_count) {
+			/* copy extra bytes to next middle dma buffer */
+			dma_config = DMAFLOW_ARRAY | RESTART | NDSIZE_3 |
+				DMAEN | PSIZE_32 | WDSIZE_32;
+			memcpy(crc->sg_mid_buf + (i << 2),
+				(void *)(dma_addr + (dma_count << 2)),
+				mid_dma_count);
+			/* setup new dma descriptor for next middle dma */
+			crc->sg_cpu[i].start_addr = dma_map_single(crc->dev,
+					crc->sg_mid_buf + (i << 2),
+					CHKSUM_DIGEST_SIZE, DMA_TO_DEVICE);
+			crc->sg_cpu[i].cfg = dma_config;
+			crc->sg_cpu[i].x_count = 1;
+			crc->sg_cpu[i].x_modify = CHKSUM_DIGEST_SIZE;
+			dev_dbg(crc->dev, "%d: crc_dma: start_addr:0x%lx, "
+				"cfg:0x%lx, x_count:0x%lx, x_modify:0x%lx\n",
+				i, crc->sg_cpu[i].start_addr,
+				crc->sg_cpu[i].cfg, crc->sg_cpu[i].x_count,
+				crc->sg_cpu[i].x_modify);
+			i++;
+		}
+	}
+
+	dma_config = DMAFLOW_ARRAY | RESTART | NDSIZE_3 | DMAEN | PSIZE_32 | WDSIZE_32;
+	/* For final update req, append the buffer for next update as well*/
+	if (ctx->bufnext_len && (ctx->flag == CRC_CRYPTO_STATE_FINALUPDATE ||
+		ctx->flag == CRC_CRYPTO_STATE_FINISH)) {
+		crc->sg_cpu[i].start_addr = dma_map_single(crc->dev, ctx->bufnext,
+						CHKSUM_DIGEST_SIZE, DMA_TO_DEVICE);
+		crc->sg_cpu[i].cfg = dma_config;
+		crc->sg_cpu[i].x_count = 1;
+		crc->sg_cpu[i].x_modify = CHKSUM_DIGEST_SIZE;
+		dev_dbg(crc->dev, "%d: crc_dma: start_addr:0x%lx, "
+			"cfg:0x%lx, x_count:0x%lx, x_modify:0x%lx\n",
+			i, crc->sg_cpu[i].start_addr,
+			crc->sg_cpu[i].cfg, crc->sg_cpu[i].x_count,
+			crc->sg_cpu[i].x_modify);
+		i++;
+	}
+
+	if (i == 0)
+		return;
+
+	flush_dcache_range((unsigned int)crc->sg_cpu,
+			(unsigned int)crc->sg_cpu +
+			i * sizeof(struct dma_desc_array));
+
+	/* Set the last descriptor to stop mode */
+	crc->sg_cpu[i - 1].cfg &= ~(DMAFLOW | NDSIZE);
+	crc->sg_cpu[i - 1].cfg |= DI_EN;
+	set_dma_curr_desc_addr(crc->dma_ch, (unsigned long *)crc->sg_dma);
+	set_dma_x_count(crc->dma_ch, 0);
+	set_dma_x_modify(crc->dma_ch, 0);
+	SSYNC();
+	set_dma_config(crc->dma_ch, dma_config);
+}
+
+static int bfin_crypto_crc_handle_queue(struct bfin_crypto_crc *crc,
+				  struct ahash_request *req)
+{
+	struct crypto_async_request *async_req, *backlog;
+	struct bfin_crypto_crc_reqctx *ctx;
+	struct scatterlist *sg;
+	int ret = 0;
+	int nsg, i, j;
+	unsigned int nextlen;
+	unsigned long flags;
+
+	spin_lock_irqsave(&crc->lock, flags);
+	if (req)
+		ret = ahash_enqueue_request(&crc->queue, req);
+	if (crc->busy) {
+		spin_unlock_irqrestore(&crc->lock, flags);
+		return ret;
+	}
+	backlog = crypto_get_backlog(&crc->queue);
+	async_req = crypto_dequeue_request(&crc->queue);
+	if (async_req)
+		crc->busy = 1;
+	spin_unlock_irqrestore(&crc->lock, flags);
+
+	if (!async_req)
+		return ret;
+
+	if (backlog)
+		backlog->complete(backlog, -EINPROGRESS);
+
+	req = ahash_request_cast(async_req);
+	crc->req = req;
+	ctx = ahash_request_ctx(req);
+	ctx->sg = NULL;
+	ctx->sg_buflen = 0;
+	ctx->sg_nents = 0;
+
+	dev_dbg(crc->dev, "handling new req, flag=%u, nbytes: %d\n",
+						ctx->flag, req->nbytes);
+
+	if (ctx->flag == CRC_CRYPTO_STATE_FINISH) {
+		if (ctx->bufnext_len == 0) {
+			crc->busy = 0;
+			return 0;
+		}
+
+		/* Pack last crc update buffer to 32bit */
+		memset(ctx->bufnext + ctx->bufnext_len, 0,
+				CHKSUM_DIGEST_SIZE - ctx->bufnext_len);
+	} else {
+		/* Pack small data which is less than 32bit to buffer for next update. */
+		if (ctx->bufnext_len + req->nbytes < CHKSUM_DIGEST_SIZE) {
+			memcpy(ctx->bufnext + ctx->bufnext_len,
+				sg_virt(req->src), req->nbytes);
+			ctx->bufnext_len += req->nbytes;
+			if (ctx->flag == CRC_CRYPTO_STATE_FINALUPDATE &&
+				ctx->bufnext_len) {
+				goto finish_update;
+			} else {
+				crc->busy = 0;
+				return 0;
+			}
+		}
+
+		if (ctx->bufnext_len) {
+			/* Chain in extra bytes of last update */
+			ctx->buflast_len = ctx->bufnext_len;
+			memcpy(ctx->buflast, ctx->bufnext, ctx->buflast_len);
+
+			nsg = ctx->sg_buflen ? 2 : 1;
+			sg_init_table(ctx->bufsl, nsg);
+			sg_set_buf(ctx->bufsl, ctx->buflast, ctx->buflast_len);
+			if (nsg > 1)
+				scatterwalk_sg_chain(ctx->bufsl, nsg,
+						req->src);
+			ctx->sg = ctx->bufsl;
+		} else
+			ctx->sg = req->src;
+
+		/* Chop crc buffer size to multiple of 32 bit */
+		nsg = ctx->sg_nents = sg_count(ctx->sg);
+		ctx->sg_buflen = ctx->buflast_len + req->nbytes;
+		ctx->bufnext_len = ctx->sg_buflen % 4;
+		ctx->sg_buflen &= ~0x3;
+
+		if (ctx->bufnext_len) {
+			/* copy extra bytes to buffer for next update */
+			memset(ctx->bufnext, 0, CHKSUM_DIGEST_SIZE);
+			nextlen = ctx->bufnext_len;
+			for (i = nsg - 1; i >= 0; i--) {
+				sg = sg_get(ctx->sg, nsg, i);
+				j = min(nextlen, sg_dma_len(sg));
+				memcpy(ctx->bufnext + nextlen - j,
+					sg_virt(sg) + sg_dma_len(sg) - j, j);
+				if (j == sg_dma_len(sg))
+					ctx->sg_nents--;
+				nextlen -= j;
+				if (nextlen == 0)
+					break;
+			}
+		}
+	}
+
+finish_update:
+	if (ctx->bufnext_len && (ctx->flag == CRC_CRYPTO_STATE_FINALUPDATE ||
+		ctx->flag == CRC_CRYPTO_STATE_FINISH))
+		ctx->sg_buflen += CHKSUM_DIGEST_SIZE;
+
+	/* set CRC data count before start DMA */
+	crc->regs->datacnt = ctx->sg_buflen >> 2;
+
+	/* setup and enable CRC DMA */
+	bfin_crypto_crc_config_dma(crc);
+
+	/* finally kick off CRC operation */
+	crc->regs->control |= BLKEN;
+	SSYNC();
+
+	return -EINPROGRESS;
+}
+
+static int bfin_crypto_crc_update(struct ahash_request *req)
+{
+	struct bfin_crypto_crc_reqctx *ctx = ahash_request_ctx(req);
+
+	if (!req->nbytes)
+		return 0;
+
+	dev_dbg(ctx->crc->dev, "crc_update\n");
+	ctx->total += req->nbytes;
+	ctx->flag = CRC_CRYPTO_STATE_UPDATE;
+
+	return bfin_crypto_crc_handle_queue(ctx->crc, req);
+}
+
+static int bfin_crypto_crc_final(struct ahash_request *req)
+{
+	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+	struct bfin_crypto_crc_ctx *crc_ctx = crypto_ahash_ctx(tfm);
+	struct bfin_crypto_crc_reqctx *ctx = ahash_request_ctx(req);
+
+	dev_dbg(ctx->crc->dev, "crc_final\n");
+	ctx->flag = CRC_CRYPTO_STATE_FINISH;
+	crc_ctx->key = 0;
+
+	return bfin_crypto_crc_handle_queue(ctx->crc, req);
+}
+
+static int bfin_crypto_crc_finup(struct ahash_request *req)
+{
+	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+	struct bfin_crypto_crc_ctx *crc_ctx = crypto_ahash_ctx(tfm);
+	struct bfin_crypto_crc_reqctx *ctx = ahash_request_ctx(req);
+
+	dev_dbg(ctx->crc->dev, "crc_finishupdate\n");
+	ctx->total += req->nbytes;
+	ctx->flag = CRC_CRYPTO_STATE_FINALUPDATE;
+	crc_ctx->key = 0;
+
+	return bfin_crypto_crc_handle_queue(ctx->crc, req);
+}
+
+static int bfin_crypto_crc_digest(struct ahash_request *req)
+{
+	int ret;
+
+	ret = bfin_crypto_crc_init(req);
+	if (ret)
+		return ret;
+
+	return bfin_crypto_crc_finup(req);
+}
+
+static int bfin_crypto_crc_setkey(struct crypto_ahash *tfm, const u8 *key,
+			unsigned int keylen)
+{
+	struct bfin_crypto_crc_ctx *crc_ctx = crypto_ahash_ctx(tfm);
+
+	dev_dbg(crc_ctx->crc->dev, "crc_setkey\n");
+	if (keylen != CHKSUM_DIGEST_SIZE) {
+		crypto_ahash_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
+		return -EINVAL;
+	}
+
+	crc_ctx->key = get_unaligned_le32(key);
+
+	return 0;
+}
+
+static int bfin_crypto_crc_cra_init(struct crypto_tfm *tfm)
+{
+	struct bfin_crypto_crc_ctx *crc_ctx = crypto_tfm_ctx(tfm);
+
+	crc_ctx->key = 0;
+	crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm),
+				 sizeof(struct bfin_crypto_crc_reqctx));
+
+	return 0;
+}
+
+static void bfin_crypto_crc_cra_exit(struct crypto_tfm *tfm)
+{
+}
+
+static struct ahash_alg algs = {
+	.init		= bfin_crypto_crc_init,
+	.update		= bfin_crypto_crc_update,
+	.final		= bfin_crypto_crc_final,
+	.finup		= bfin_crypto_crc_finup,
+	.digest		= bfin_crypto_crc_digest,
+	.setkey		= bfin_crypto_crc_setkey,
+	.halg.digestsize	= CHKSUM_DIGEST_SIZE,
+	.halg.base	= {
+		.cra_name		= "hmac(crc32)",
+		.cra_driver_name	= DRIVER_NAME,
+		.cra_priority		= 100,
+		.cra_flags		= CRYPTO_ALG_TYPE_AHASH |
+						CRYPTO_ALG_ASYNC,
+		.cra_blocksize		= CHKSUM_BLOCK_SIZE,
+		.cra_ctxsize		= sizeof(struct bfin_crypto_crc_ctx),
+		.cra_alignmask		= 3,
+		.cra_module		= THIS_MODULE,
+		.cra_init		= bfin_crypto_crc_cra_init,
+		.cra_exit		= bfin_crypto_crc_cra_exit,
+	}
+};
+
+static void bfin_crypto_crc_done_task(unsigned long data)
+{
+	struct bfin_crypto_crc *crc = (struct bfin_crypto_crc *)data;
+
+	bfin_crypto_crc_handle_queue(crc, NULL);
+}
+
+static irqreturn_t bfin_crypto_crc_handler(int irq, void *dev_id)
+{
+	struct bfin_crypto_crc *crc = dev_id;
+
+	if (crc->regs->status & DCNTEXP) {
+		crc->regs->status = DCNTEXP;
+		SSYNC();
+
+		/* prepare results */
+		put_unaligned_le32(crc->regs->result, crc->req->result);
+
+		crc->regs->control &= ~BLKEN;
+		crc->busy = 0;
+
+		if (crc->req->base.complete)
+			crc->req->base.complete(&crc->req->base, 0);
+
+		tasklet_schedule(&crc->done_task);
+
+		return IRQ_HANDLED;
+	} else
+		return IRQ_NONE;
+}
+
+#ifdef CONFIG_PM
+/**
+ *	bfin_crypto_crc_suspend - suspend crc device
+ *	@pdev: device being suspended
+ *	@state: requested suspend state
+ */
+static int bfin_crypto_crc_suspend(struct platform_device *pdev, pm_message_t state)
+{
+	struct bfin_crypto_crc *crc = platform_get_drvdata(pdev);
+	int i = 100000;
+
+	while ((crc->regs->control & BLKEN) && --i)
+		cpu_relax();
+
+	if (i == 0)
+		return -EBUSY;
+
+	return 0;
+}
+#else
+# define bfin_crypto_crc_suspend NULL
+#endif
+
+#define bfin_crypto_crc_resume NULL
+
+/**
+ *	bfin_crypto_crc_probe - Initialize module
+ *
+ */
+static int __devinit bfin_crypto_crc_probe(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct resource *res;
+	struct bfin_crypto_crc *crc;
+	unsigned int timeout = 100000;
+	int ret;
+
+	crc = kzalloc(sizeof(*crc), GFP_KERNEL);
+	if (!crc) {
+		dev_err(&pdev->dev, "fail to malloc bfin_crypto_crc\n");
+		return -ENOMEM;
+	}
+
+	crc->dev = dev;
+
+	INIT_LIST_HEAD(&crc->list);
+	spin_lock_init(&crc->lock);
+	tasklet_init(&crc->done_task, bfin_crypto_crc_done_task, (unsigned long)crc);
+	crypto_init_queue(&crc->queue, CRC_CCRYPTO_QUEUE_LENGTH);
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (res == NULL) {
+		dev_err(&pdev->dev, "Cannot get IORESOURCE_MEM\n");
+		ret = -ENOENT;
+		goto out_error_free_mem;
+	}
+
+	crc->regs = ioremap(res->start, resource_size(res));
+	if (!crc->regs) {
+		dev_err(&pdev->dev, "Cannot map CRC IO\n");
+		ret = -ENXIO;
+		goto out_error_free_mem;
+	}
+
+	crc->irq = platform_get_irq(pdev, 0);
+	if (crc->irq < 0) {
+		dev_err(&pdev->dev, "No CRC DCNTEXP IRQ specified\n");
+		ret = -ENOENT;
+		goto out_error_unmap;
+	}
+
+	ret = request_irq(crc->irq, bfin_crypto_crc_handler, IRQF_SHARED, dev_name(dev), crc);
+	if (ret) {
+		dev_err(&pdev->dev, "Unable to request blackfin crc irq\n");
+		goto out_error_unmap;
+	}
+
+	res = platform_get_resource(pdev, IORESOURCE_DMA, 0);
+	if (res == NULL) {
+		dev_err(&pdev->dev, "No CRC DMA channel specified\n");
+		ret = -ENOENT;
+		goto out_error_irq;
+	}
+	crc->dma_ch = res->start;
+
+	ret = request_dma(crc->dma_ch, dev_name(dev));
+	if (ret) {
+		dev_err(&pdev->dev, "Unable to attach Blackfin CRC DMA channel\n");
+		goto out_error_irq;
+	}
+
+	crc->sg_cpu = dma_alloc_coherent(&pdev->dev, PAGE_SIZE, &crc->sg_dma, GFP_KERNEL);
+	if (crc->sg_cpu == NULL) {
+		ret = -ENOMEM;
+		goto out_error_dma;
+	}
+	/*
+	 * need at most CRC_MAX_DMA_DESC sg + CRC_MAX_DMA_DESC middle  +
+	 * 1 last + 1 next dma descriptors
+	 */
+	crc->sg_mid_buf = (u8 *)(crc->sg_cpu + ((CRC_MAX_DMA_DESC + 1) << 1));
+
+	crc->regs->control = 0;
+	SSYNC();
+	crc->regs->poly = crc->poly = (u32)pdev->dev.platform_data;
+	SSYNC();
+
+	while (!(crc->regs->status & LUTDONE) && (--timeout) > 0)
+		cpu_relax();
+
+	if (timeout == 0)
+		dev_info(&pdev->dev, "init crc poly timeout\n");
+
+	spin_lock(&crc_list.lock);
+	list_add(&crc->list, &crc_list.dev_list);
+	spin_unlock(&crc_list.lock);
+
+	platform_set_drvdata(pdev, crc);
+
+	ret = crypto_register_ahash(&algs);
+	if (ret) {
+		spin_lock(&crc_list.lock);
+		list_del(&crc->list);
+		spin_unlock(&crc_list.lock);
+		dev_err(&pdev->dev, "Cann't register crypto ahash device\n");
+		goto out_error_dma;
+	}
+
+	dev_info(&pdev->dev, "initialized\n");
+
+	return 0;
+
+out_error_dma:
+	if (crc->sg_cpu)
+		dma_free_coherent(&pdev->dev, PAGE_SIZE, crc->sg_cpu, crc->sg_dma);
+	free_dma(crc->dma_ch);
+out_error_irq:
+	free_irq(crc->irq, crc->dev);
+out_error_unmap:
+	iounmap((void *)crc->regs);
+out_error_free_mem:
+	kfree(crc);
+
+	return ret;
+}
+
+/**
+ *	bfin_crypto_crc_remove - Initialize module
+ *
+ */
+static int __devexit bfin_crypto_crc_remove(struct platform_device *pdev)
+{
+	struct bfin_crypto_crc *crc = platform_get_drvdata(pdev);
+
+	if (!crc)
+		return -ENODEV;
+
+	spin_lock(&crc_list.lock);
+	list_del(&crc->list);
+	spin_unlock(&crc_list.lock);
+
+	crypto_unregister_ahash(&algs);
+	tasklet_kill(&crc->done_task);
+	iounmap((void *)crc->regs);
+	free_dma(crc->dma_ch);
+	if (crc->irq > 0)
+		free_irq(crc->irq, crc->dev);
+	kfree(crc);
+
+	return 0;
+}
+
+static struct platform_driver bfin_crypto_crc_driver = {
+	.probe     = bfin_crypto_crc_probe,
+	.remove    = __devexit_p(bfin_crypto_crc_remove),
+	.suspend   = bfin_crypto_crc_suspend,
+	.resume    = bfin_crypto_crc_resume,
+	.driver    = {
+		.name  = DRIVER_NAME,
+		.owner = THIS_MODULE,
+	},
+};
+
+/**
+ *	bfin_crypto_crc_mod_init - Initialize module
+ *
+ *	Checks the module params and registers the platform driver.
+ *	Real work is in the platform probe function.
+ */
+static int __init bfin_crypto_crc_mod_init(void)
+{
+	int ret;
+
+	pr_info("Blackfin hardware CRC crypto driver\n");
+
+	INIT_LIST_HEAD(&crc_list.dev_list);
+	spin_lock_init(&crc_list.lock);
+
+	ret = platform_driver_register(&bfin_crypto_crc_driver);
+	if (ret) {
+		pr_info(KERN_ERR "unable to register driver\n");
+		return ret;
+	}
+
+	return 0;
+}
+
+/**
+ *	bfin_crypto_crc_mod_exit - Deinitialize module
+ */
+static void __exit bfin_crypto_crc_mod_exit(void)
+{
+	platform_driver_unregister(&bfin_crypto_crc_driver);
+}
+
+module_init(bfin_crypto_crc_mod_init);
+module_exit(bfin_crypto_crc_mod_exit);
+
+MODULE_AUTHOR("Sonic Zhang <sonic.zhang@analog.com>");
+MODULE_DESCRIPTION("Blackfin CRC hardware crypto driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/crypto/caam/Kconfig b/drivers/crypto/caam/Kconfig
index 2d876bb98ff4844016e7eed7f2c0b1e6cc653337..65c7668614ab4faf7880b9f34d9cbfc23351ce68 100644
--- a/drivers/crypto/caam/Kconfig
+++ b/drivers/crypto/caam/Kconfig
@@ -32,10 +32,13 @@ config CRYPTO_DEV_FSL_CAAM_RINGSIZE
 config CRYPTO_DEV_FSL_CAAM_INTC
 	bool "Job Ring interrupt coalescing"
 	depends on CRYPTO_DEV_FSL_CAAM
-	default y
+	default n
 	help
 	  Enable the Job Ring's interrupt coalescing feature.
 
+	  Note: the driver already provides adequate
+	  interrupt coalescing in software.
+
 config CRYPTO_DEV_FSL_CAAM_INTC_COUNT_THLD
 	int "Job Ring interrupt coalescing count threshold"
 	depends on CRYPTO_DEV_FSL_CAAM_INTC
@@ -70,3 +73,28 @@ config CRYPTO_DEV_FSL_CAAM_CRYPTO_API
 
 	  To compile this as a module, choose M here: the module
 	  will be called caamalg.
+
+config CRYPTO_DEV_FSL_CAAM_AHASH_API
+	tristate "Register hash algorithm implementations with Crypto API"
+	depends on CRYPTO_DEV_FSL_CAAM
+	default y
+	select CRYPTO_AHASH
+	help
+	  Selecting this will offload ahash for users of the
+	  scatterlist crypto API to the SEC4 via job ring.
+
+	  To compile this as a module, choose M here: the module
+	  will be called caamhash.
+
+config CRYPTO_DEV_FSL_CAAM_RNG_API
+	tristate "Register caam device for hwrng API"
+	depends on CRYPTO_DEV_FSL_CAAM
+	default y
+	select CRYPTO_RNG
+	select HW_RANDOM
+	help
+	  Selecting this will register the SEC4 hardware rng to
+	  the hw_random API for suppying the kernel entropy pool.
+
+	  To compile this as a module, choose M here: the module
+	  will be called caamrng.
diff --git a/drivers/crypto/caam/Makefile b/drivers/crypto/caam/Makefile
index ef39011b4505cd27078f089a8d26c7c8d9abc67d..b1eb44838db5644bc51dc5c2eff85929ff125aa6 100644
--- a/drivers/crypto/caam/Makefile
+++ b/drivers/crypto/caam/Makefile
@@ -4,5 +4,7 @@
 
 obj-$(CONFIG_CRYPTO_DEV_FSL_CAAM) += caam.o
 obj-$(CONFIG_CRYPTO_DEV_FSL_CAAM_CRYPTO_API) += caamalg.o
+obj-$(CONFIG_CRYPTO_DEV_FSL_CAAM_AHASH_API) += caamhash.o
+obj-$(CONFIG_CRYPTO_DEV_FSL_CAAM_RNG_API) += caamrng.o
 
-caam-objs := ctrl.o jr.o error.o
+caam-objs := ctrl.o jr.o error.o key_gen.o
diff --git a/drivers/crypto/caam/caamalg.c b/drivers/crypto/caam/caamalg.c
index 4eec389184d3f06d3fb59d161d485226b9a3ef38..0c1ea8492eff632401a5750e36bc09b88eaed6fc 100644
--- a/drivers/crypto/caam/caamalg.c
+++ b/drivers/crypto/caam/caamalg.c
@@ -37,9 +37,10 @@
  * | ShareDesc Pointer |
  * | SEQ_OUT_PTR       |
  * | (output buffer)   |
+ * | (output length)   |
  * | SEQ_IN_PTR        |
  * | (input buffer)    |
- * | LOAD (to DECO)    |
+ * | (input length)    |
  * ---------------------
  */
 
@@ -50,6 +51,8 @@
 #include "desc_constr.h"
 #include "jr.h"
 #include "error.h"
+#include "sg_sw_sec4.h"
+#include "key_gen.h"
 
 /*
  * crypto alg
@@ -62,7 +65,7 @@
 #define CAAM_MAX_IV_LENGTH		16
 
 /* length of descriptors text */
-#define DESC_JOB_IO_LEN			(CAAM_CMD_SZ * 3 + CAAM_PTR_SZ * 3)
+#define DESC_JOB_IO_LEN			(CAAM_CMD_SZ * 5 + CAAM_PTR_SZ * 3)
 
 #define DESC_AEAD_BASE			(4 * CAAM_CMD_SZ)
 #define DESC_AEAD_ENC_LEN		(DESC_AEAD_BASE + 16 * CAAM_CMD_SZ)
@@ -143,11 +146,11 @@ static inline void aead_append_ld_iv(u32 *desc, int ivsize)
  */
 static inline void ablkcipher_append_src_dst(u32 *desc)
 {
-	append_math_add(desc, VARSEQOUTLEN, SEQINLEN, REG0, CAAM_CMD_SZ); \
-	append_math_add(desc, VARSEQINLEN, SEQINLEN, REG0, CAAM_CMD_SZ); \
-	append_seq_fifo_load(desc, 0, FIFOLD_CLASS_CLASS1 | \
-			     KEY_VLF | FIFOLD_TYPE_MSG | FIFOLD_TYPE_LAST1); \
-	append_seq_fifo_store(desc, 0, FIFOST_TYPE_MESSAGE_DATA | KEY_VLF); \
+	append_math_add(desc, VARSEQOUTLEN, SEQINLEN, REG0, CAAM_CMD_SZ);
+	append_math_add(desc, VARSEQINLEN, SEQINLEN, REG0, CAAM_CMD_SZ);
+	append_seq_fifo_load(desc, 0, FIFOLD_CLASS_CLASS1 |
+			     KEY_VLF | FIFOLD_TYPE_MSG | FIFOLD_TYPE_LAST1);
+	append_seq_fifo_store(desc, 0, FIFOST_TYPE_MESSAGE_DATA | KEY_VLF);
 }
 
 /*
@@ -452,121 +455,12 @@ static int aead_setauthsize(struct crypto_aead *authenc,
 	return 0;
 }
 
-struct split_key_result {
-	struct completion completion;
-	int err;
-};
-
-static void split_key_done(struct device *dev, u32 *desc, u32 err,
-			   void *context)
+static u32 gen_split_aead_key(struct caam_ctx *ctx, const u8 *key_in,
+			      u32 authkeylen)
 {
-	struct split_key_result *res = context;
-
-#ifdef DEBUG
-	dev_err(dev, "%s %d: err 0x%x\n", __func__, __LINE__, err);
-#endif
-
-	if (err) {
-		char tmp[CAAM_ERROR_STR_MAX];
-
-		dev_err(dev, "%08x: %s\n", err, caam_jr_strstatus(tmp, err));
-	}
-
-	res->err = err;
-
-	complete(&res->completion);
-}
-
-/*
-get a split ipad/opad key
-
-Split key generation-----------------------------------------------
-
-[00] 0xb0810008    jobdesc: stidx=1 share=never len=8
-[01] 0x04000014        key: class2->keyreg len=20
-			@0xffe01000
-[03] 0x84410014  operation: cls2-op sha1 hmac init dec
-[04] 0x24940000     fifold: class2 msgdata-last2 len=0 imm
-[05] 0xa4000001       jump: class2 local all ->1 [06]
-[06] 0x64260028    fifostr: class2 mdsplit-jdk len=40
-			@0xffe04000
-*/
-static u32 gen_split_key(struct caam_ctx *ctx, const u8 *key_in, u32 authkeylen)
-{
-	struct device *jrdev = ctx->jrdev;
-	u32 *desc;
-	struct split_key_result result;
-	dma_addr_t dma_addr_in, dma_addr_out;
-	int ret = 0;
-
-	desc = kmalloc(CAAM_CMD_SZ * 6 + CAAM_PTR_SZ * 2, GFP_KERNEL | GFP_DMA);
-
-	init_job_desc(desc, 0);
-
-	dma_addr_in = dma_map_single(jrdev, (void *)key_in, authkeylen,
-				     DMA_TO_DEVICE);
-	if (dma_mapping_error(jrdev, dma_addr_in)) {
-		dev_err(jrdev, "unable to map key input memory\n");
-		kfree(desc);
-		return -ENOMEM;
-	}
-	append_key(desc, dma_addr_in, authkeylen, CLASS_2 |
-		       KEY_DEST_CLASS_REG);
-
-	/* Sets MDHA up into an HMAC-INIT */
-	append_operation(desc, ctx->alg_op | OP_ALG_DECRYPT |
-			     OP_ALG_AS_INIT);
-
-	/*
-	 * do a FIFO_LOAD of zero, this will trigger the internal key expansion
-	   into both pads inside MDHA
-	 */
-	append_fifo_load_as_imm(desc, NULL, 0, LDST_CLASS_2_CCB |
-				FIFOLD_TYPE_MSG | FIFOLD_TYPE_LAST2);
-
-	/*
-	 * FIFO_STORE with the explicit split-key content store
-	 * (0x26 output type)
-	 */
-	dma_addr_out = dma_map_single(jrdev, ctx->key, ctx->split_key_pad_len,
-				      DMA_FROM_DEVICE);
-	if (dma_mapping_error(jrdev, dma_addr_out)) {
-		dev_err(jrdev, "unable to map key output memory\n");
-		kfree(desc);
-		return -ENOMEM;
-	}
-	append_fifo_store(desc, dma_addr_out, ctx->split_key_len,
-			  LDST_CLASS_2_CCB | FIFOST_TYPE_SPLIT_KEK);
-
-#ifdef DEBUG
-	print_hex_dump(KERN_ERR, "ctx.key@"xstr(__LINE__)": ",
-		       DUMP_PREFIX_ADDRESS, 16, 4, key_in, authkeylen, 1);
-	print_hex_dump(KERN_ERR, "jobdesc@"xstr(__LINE__)": ",
-		       DUMP_PREFIX_ADDRESS, 16, 4, desc, desc_bytes(desc), 1);
-#endif
-
-	result.err = 0;
-	init_completion(&result.completion);
-
-	ret = caam_jr_enqueue(jrdev, desc, split_key_done, &result);
-	if (!ret) {
-		/* in progress */
-		wait_for_completion_interruptible(&result.completion);
-		ret = result.err;
-#ifdef DEBUG
-		print_hex_dump(KERN_ERR, "ctx.key@"xstr(__LINE__)": ",
-			       DUMP_PREFIX_ADDRESS, 16, 4, ctx->key,
-			       ctx->split_key_pad_len, 1);
-#endif
-	}
-
-	dma_unmap_single(jrdev, dma_addr_out, ctx->split_key_pad_len,
-			 DMA_FROM_DEVICE);
-	dma_unmap_single(jrdev, dma_addr_in, authkeylen, DMA_TO_DEVICE);
-
-	kfree(desc);
-
-	return ret;
+	return gen_split_key(ctx->jrdev, ctx->key, ctx->split_key_len,
+			       ctx->split_key_pad_len, key_in, authkeylen,
+			       ctx->alg_op);
 }
 
 static int aead_setkey(struct crypto_aead *aead,
@@ -610,7 +504,7 @@ static int aead_setkey(struct crypto_aead *aead,
 		       DUMP_PREFIX_ADDRESS, 16, 4, key, keylen, 1);
 #endif
 
-	ret = gen_split_key(ctx, key, authkeylen);
+	ret = gen_split_aead_key(ctx, key, authkeylen);
 	if (ret) {
 		goto badkey;
 	}
@@ -757,72 +651,78 @@ static int ablkcipher_setkey(struct crypto_ablkcipher *ablkcipher,
 	return ret;
 }
 
-struct link_tbl_entry {
-	u64 ptr;
-	u32 len;
-	u8 reserved;
-	u8 buf_pool_id;
-	u16 offset;
-};
-
 /*
  * aead_edesc - s/w-extended aead descriptor
  * @assoc_nents: number of segments in associated data (SPI+Seq) scatterlist
+ * @assoc_chained: if source is chained
  * @src_nents: number of segments in input scatterlist
+ * @src_chained: if source is chained
  * @dst_nents: number of segments in output scatterlist
+ * @dst_chained: if destination is chained
  * @iv_dma: dma address of iv for checking continuity and link table
  * @desc: h/w descriptor (variable length; must not exceed MAX_CAAM_DESCSIZE)
- * @link_tbl_bytes: length of dma mapped link_tbl space
- * @link_tbl_dma: bus physical mapped address of h/w link table
+ * @sec4_sg_bytes: length of dma mapped sec4_sg space
+ * @sec4_sg_dma: bus physical mapped address of h/w link table
  * @hw_desc: the h/w job descriptor followed by any referenced link tables
  */
 struct aead_edesc {
 	int assoc_nents;
+	bool assoc_chained;
 	int src_nents;
+	bool src_chained;
 	int dst_nents;
+	bool dst_chained;
 	dma_addr_t iv_dma;
-	int link_tbl_bytes;
-	dma_addr_t link_tbl_dma;
-	struct link_tbl_entry *link_tbl;
+	int sec4_sg_bytes;
+	dma_addr_t sec4_sg_dma;
+	struct sec4_sg_entry *sec4_sg;
 	u32 hw_desc[0];
 };
 
 /*
  * ablkcipher_edesc - s/w-extended ablkcipher descriptor
  * @src_nents: number of segments in input scatterlist
+ * @src_chained: if source is chained
  * @dst_nents: number of segments in output scatterlist
+ * @dst_chained: if destination is chained
  * @iv_dma: dma address of iv for checking continuity and link table
  * @desc: h/w descriptor (variable length; must not exceed MAX_CAAM_DESCSIZE)
- * @link_tbl_bytes: length of dma mapped link_tbl space
- * @link_tbl_dma: bus physical mapped address of h/w link table
+ * @sec4_sg_bytes: length of dma mapped sec4_sg space
+ * @sec4_sg_dma: bus physical mapped address of h/w link table
  * @hw_desc: the h/w job descriptor followed by any referenced link tables
  */
 struct ablkcipher_edesc {
 	int src_nents;
+	bool src_chained;
 	int dst_nents;
+	bool dst_chained;
 	dma_addr_t iv_dma;
-	int link_tbl_bytes;
-	dma_addr_t link_tbl_dma;
-	struct link_tbl_entry *link_tbl;
+	int sec4_sg_bytes;
+	dma_addr_t sec4_sg_dma;
+	struct sec4_sg_entry *sec4_sg;
 	u32 hw_desc[0];
 };
 
 static void caam_unmap(struct device *dev, struct scatterlist *src,
-		       struct scatterlist *dst, int src_nents, int dst_nents,
-		       dma_addr_t iv_dma, int ivsize, dma_addr_t link_tbl_dma,
-		       int link_tbl_bytes)
+		       struct scatterlist *dst, int src_nents,
+		       bool src_chained, int dst_nents, bool dst_chained,
+		       dma_addr_t iv_dma, int ivsize, dma_addr_t sec4_sg_dma,
+		       int sec4_sg_bytes)
 {
-	if (unlikely(dst != src)) {
-		dma_unmap_sg(dev, src, src_nents, DMA_TO_DEVICE);
-		dma_unmap_sg(dev, dst, dst_nents, DMA_FROM_DEVICE);
+	if (dst != src) {
+		dma_unmap_sg_chained(dev, src, src_nents ? : 1, DMA_TO_DEVICE,
+				     src_chained);
+		dma_unmap_sg_chained(dev, dst, dst_nents ? : 1, DMA_FROM_DEVICE,
+				     dst_chained);
 	} else {
-		dma_unmap_sg(dev, src, src_nents, DMA_BIDIRECTIONAL);
+		dma_unmap_sg_chained(dev, src, src_nents ? : 1,
+				     DMA_BIDIRECTIONAL, src_chained);
 	}
 
 	if (iv_dma)
 		dma_unmap_single(dev, iv_dma, ivsize, DMA_TO_DEVICE);
-	if (link_tbl_bytes)
-		dma_unmap_single(dev, link_tbl_dma, link_tbl_bytes,
+	if (sec4_sg_bytes)
+		dma_unmap_single(dev, sec4_sg_dma, sec4_sg_bytes,
 				 DMA_TO_DEVICE);
 }
 
@@ -833,12 +733,13 @@ static void aead_unmap(struct device *dev,
 	struct crypto_aead *aead = crypto_aead_reqtfm(req);
 	int ivsize = crypto_aead_ivsize(aead);
 
-	dma_unmap_sg(dev, req->assoc, edesc->assoc_nents, DMA_TO_DEVICE);
+	dma_unmap_sg_chained(dev, req->assoc, edesc->assoc_nents,
+			     DMA_TO_DEVICE, edesc->assoc_chained);
 
 	caam_unmap(dev, req->src, req->dst,
-		   edesc->src_nents, edesc->dst_nents,
-		   edesc->iv_dma, ivsize, edesc->link_tbl_dma,
-		   edesc->link_tbl_bytes);
+		   edesc->src_nents, edesc->src_chained, edesc->dst_nents,
+		   edesc->dst_chained, edesc->iv_dma, ivsize,
+		   edesc->sec4_sg_dma, edesc->sec4_sg_bytes);
 }
 
 static void ablkcipher_unmap(struct device *dev,
@@ -849,9 +750,9 @@ static void ablkcipher_unmap(struct device *dev,
 	int ivsize = crypto_ablkcipher_ivsize(ablkcipher);
 
 	caam_unmap(dev, req->src, req->dst,
-		   edesc->src_nents, edesc->dst_nents,
-		   edesc->iv_dma, ivsize, edesc->link_tbl_dma,
-		   edesc->link_tbl_bytes);
+		   edesc->src_nents, edesc->src_chained, edesc->dst_nents,
+		   edesc->dst_chained, edesc->iv_dma, ivsize,
+		   edesc->sec4_sg_dma, edesc->sec4_sg_bytes);
 }
 
 static void aead_encrypt_done(struct device *jrdev, u32 *desc, u32 err,
@@ -942,7 +843,7 @@ static void aead_decrypt_done(struct device *jrdev, u32 *desc, u32 err,
 		       sizeof(struct iphdr) + req->assoclen +
 		       ((req->cryptlen > 1500) ? 1500 : req->cryptlen) +
 		       ctx->authsize + 36, 1);
-	if (!err && edesc->link_tbl_bytes) {
+	if (!err && edesc->sec4_sg_bytes) {
 		struct scatterlist *sg = sg_last(req->src, edesc->src_nents);
 		print_hex_dump(KERN_ERR, "sglastout@"xstr(__LINE__)": ",
 			       DUMP_PREFIX_ADDRESS, 16, 4, sg_virt(sg),
@@ -1026,50 +927,6 @@ static void ablkcipher_decrypt_done(struct device *jrdev, u32 *desc, u32 err,
 	ablkcipher_request_complete(req, err);
 }
 
-static void sg_to_link_tbl_one(struct link_tbl_entry *link_tbl_ptr,
-			       dma_addr_t dma, u32 len, u32 offset)
-{
-	link_tbl_ptr->ptr = dma;
-	link_tbl_ptr->len = len;
-	link_tbl_ptr->reserved = 0;
-	link_tbl_ptr->buf_pool_id = 0;
-	link_tbl_ptr->offset = offset;
-#ifdef DEBUG
-	print_hex_dump(KERN_ERR, "link_tbl_ptr@"xstr(__LINE__)": ",
-		       DUMP_PREFIX_ADDRESS, 16, 4, link_tbl_ptr,
-		       sizeof(struct link_tbl_entry), 1);
-#endif
-}
-
-/*
- * convert scatterlist to h/w link table format
- * but does not have final bit; instead, returns last entry
- */
-static struct link_tbl_entry *sg_to_link_tbl(struct scatterlist *sg,
-					     int sg_count, struct link_tbl_entry
-					     *link_tbl_ptr, u32 offset)
-{
-	while (sg_count) {
-		sg_to_link_tbl_one(link_tbl_ptr, sg_dma_address(sg),
-				   sg_dma_len(sg), offset);
-		link_tbl_ptr++;
-		sg = sg_next(sg);
-		sg_count--;
-	}
-	return link_tbl_ptr - 1;
-}
-
-/*
- * convert scatterlist to h/w link table format
- * scatterlist must have been previously dma mapped
- */
-static void sg_to_link_tbl_last(struct scatterlist *sg, int sg_count,
-				struct link_tbl_entry *link_tbl_ptr, u32 offset)
-{
-	link_tbl_ptr = sg_to_link_tbl(sg, sg_count, link_tbl_ptr, offset);
-	link_tbl_ptr->len |= 0x40000000;
-}
-
 /*
  * Fill in aead job descriptor
  */
@@ -1085,7 +942,7 @@ static void init_aead_job(u32 *sh_desc, dma_addr_t ptr,
 	u32 *desc = edesc->hw_desc;
 	u32 out_options = 0, in_options;
 	dma_addr_t dst_dma, src_dma;
-	int len, link_tbl_index = 0;
+	int len, sec4_sg_index = 0;
 
 #ifdef DEBUG
 	debug("assoclen %d cryptlen %d authsize %d\n",
@@ -1111,9 +968,9 @@ static void init_aead_job(u32 *sh_desc, dma_addr_t ptr,
 		src_dma = sg_dma_address(req->assoc);
 		in_options = 0;
 	} else {
-		src_dma = edesc->link_tbl_dma;
-		link_tbl_index += (edesc->assoc_nents ? : 1) + 1 +
-				  (edesc->src_nents ? : 1);
+		src_dma = edesc->sec4_sg_dma;
+		sec4_sg_index += (edesc->assoc_nents ? : 1) + 1 +
+				 (edesc->src_nents ? : 1);
 		in_options = LDST_SGF;
 	}
 	if (encrypt)
@@ -1127,7 +984,7 @@ static void init_aead_job(u32 *sh_desc, dma_addr_t ptr,
 		if (all_contig) {
 			dst_dma = sg_dma_address(req->src);
 		} else {
-			dst_dma = src_dma + sizeof(struct link_tbl_entry) *
+			dst_dma = src_dma + sizeof(struct sec4_sg_entry) *
 				  ((edesc->assoc_nents ? : 1) + 1);
 			out_options = LDST_SGF;
 		}
@@ -1135,9 +992,9 @@ static void init_aead_job(u32 *sh_desc, dma_addr_t ptr,
 		if (!edesc->dst_nents) {
 			dst_dma = sg_dma_address(req->dst);
 		} else {
-			dst_dma = edesc->link_tbl_dma +
-				  link_tbl_index *
-				  sizeof(struct link_tbl_entry);
+			dst_dma = edesc->sec4_sg_dma +
+				  sec4_sg_index *
+				  sizeof(struct sec4_sg_entry);
 			out_options = LDST_SGF;
 		}
 	}
@@ -1163,7 +1020,7 @@ static void init_aead_giv_job(u32 *sh_desc, dma_addr_t ptr,
 	u32 *desc = edesc->hw_desc;
 	u32 out_options = 0, in_options;
 	dma_addr_t dst_dma, src_dma;
-	int len, link_tbl_index = 0;
+	int len, sec4_sg_index = 0;
 
 #ifdef DEBUG
 	debug("assoclen %d cryptlen %d authsize %d\n",
@@ -1188,8 +1045,8 @@ static void init_aead_giv_job(u32 *sh_desc, dma_addr_t ptr,
 		src_dma = sg_dma_address(req->assoc);
 		in_options = 0;
 	} else {
-		src_dma = edesc->link_tbl_dma;
-		link_tbl_index += edesc->assoc_nents + 1 + edesc->src_nents;
+		src_dma = edesc->sec4_sg_dma;
+		sec4_sg_index += edesc->assoc_nents + 1 + edesc->src_nents;
 		in_options = LDST_SGF;
 	}
 	append_seq_in_ptr(desc, src_dma, req->assoclen + ivsize +
@@ -1199,13 +1056,13 @@ static void init_aead_giv_job(u32 *sh_desc, dma_addr_t ptr,
 		dst_dma = edesc->iv_dma;
 	} else {
 		if (likely(req->src == req->dst)) {
-			dst_dma = src_dma + sizeof(struct link_tbl_entry) *
+			dst_dma = src_dma + sizeof(struct sec4_sg_entry) *
 				  edesc->assoc_nents;
 			out_options = LDST_SGF;
 		} else {
-			dst_dma = edesc->link_tbl_dma +
-				  link_tbl_index *
-				  sizeof(struct link_tbl_entry);
+			dst_dma = edesc->sec4_sg_dma +
+				  sec4_sg_index *
+				  sizeof(struct sec4_sg_entry);
 			out_options = LDST_SGF;
 		}
 	}
@@ -1226,7 +1083,7 @@ static void init_ablkcipher_job(u32 *sh_desc, dma_addr_t ptr,
 	u32 *desc = edesc->hw_desc;
 	u32 out_options = 0, in_options;
 	dma_addr_t dst_dma, src_dma;
-	int len, link_tbl_index = 0;
+	int len, sec4_sg_index = 0;
 
 #ifdef DEBUG
 	print_hex_dump(KERN_ERR, "presciv@"xstr(__LINE__)": ",
@@ -1244,8 +1101,8 @@ static void init_ablkcipher_job(u32 *sh_desc, dma_addr_t ptr,
 		src_dma = edesc->iv_dma;
 		in_options = 0;
 	} else {
-		src_dma = edesc->link_tbl_dma;
-		link_tbl_index += (iv_contig ? 0 : 1) + edesc->src_nents;
+		src_dma = edesc->sec4_sg_dma;
+		sec4_sg_index += (iv_contig ? 0 : 1) + edesc->src_nents;
 		in_options = LDST_SGF;
 	}
 	append_seq_in_ptr(desc, src_dma, req->nbytes + ivsize, in_options);
@@ -1254,44 +1111,22 @@ static void init_ablkcipher_job(u32 *sh_desc, dma_addr_t ptr,
 		if (!edesc->src_nents && iv_contig) {
 			dst_dma = sg_dma_address(req->src);
 		} else {
-			dst_dma = edesc->link_tbl_dma +
-				sizeof(struct link_tbl_entry);
+			dst_dma = edesc->sec4_sg_dma +
+				sizeof(struct sec4_sg_entry);
 			out_options = LDST_SGF;
 		}
 	} else {
 		if (!edesc->dst_nents) {
 			dst_dma = sg_dma_address(req->dst);
 		} else {
-			dst_dma = edesc->link_tbl_dma +
-				link_tbl_index * sizeof(struct link_tbl_entry);
+			dst_dma = edesc->sec4_sg_dma +
+				sec4_sg_index * sizeof(struct sec4_sg_entry);
 			out_options = LDST_SGF;
 		}
 	}
 	append_seq_out_ptr(desc, dst_dma, req->nbytes, out_options);
 }
 
-/*
- * derive number of elements in scatterlist
- */
-static int sg_count(struct scatterlist *sg_list, int nbytes)
-{
-	struct scatterlist *sg = sg_list;
-	int sg_nents = 0;
-
-	while (nbytes > 0) {
-		sg_nents++;
-		nbytes -= sg->length;
-		if (!sg_is_last(sg) && (sg + 1)->length == 0)
-			BUG(); /* Not support chaining */
-		sg = scatterwalk_sg_next(sg);
-	}
-
-	if (likely(sg_nents == 1))
-		return 0;
-
-	return sg_nents;
-}
-
 /*
  * allocate and map the aead extended descriptor
  */
@@ -1308,25 +1143,26 @@ static struct aead_edesc *aead_edesc_alloc(struct aead_request *req,
 	dma_addr_t iv_dma = 0;
 	int sgc;
 	bool all_contig = true;
+	bool assoc_chained = false, src_chained = false, dst_chained = false;
 	int ivsize = crypto_aead_ivsize(aead);
-	int link_tbl_index, link_tbl_len = 0, link_tbl_bytes;
+	int sec4_sg_index, sec4_sg_len = 0, sec4_sg_bytes;
 
-	assoc_nents = sg_count(req->assoc, req->assoclen);
-	src_nents = sg_count(req->src, req->cryptlen);
+	assoc_nents = sg_count(req->assoc, req->assoclen, &assoc_chained);
+	src_nents = sg_count(req->src, req->cryptlen, &src_chained);
 
 	if (unlikely(req->dst != req->src))
-		dst_nents = sg_count(req->dst, req->cryptlen);
+		dst_nents = sg_count(req->dst, req->cryptlen, &dst_chained);
 
-	sgc = dma_map_sg(jrdev, req->assoc, assoc_nents ? : 1,
-			 DMA_BIDIRECTIONAL);
+	sgc = dma_map_sg_chained(jrdev, req->assoc, assoc_nents ? : 1,
+				 DMA_BIDIRECTIONAL, assoc_chained);
 	if (likely(req->src == req->dst)) {
-		sgc = dma_map_sg(jrdev, req->src, src_nents ? : 1,
-				 DMA_BIDIRECTIONAL);
+		sgc = dma_map_sg_chained(jrdev, req->src, src_nents ? : 1,
+					 DMA_BIDIRECTIONAL, src_chained);
 	} else {
-		sgc = dma_map_sg(jrdev, req->src, src_nents ? : 1,
-				 DMA_TO_DEVICE);
-		sgc = dma_map_sg(jrdev, req->dst, dst_nents ? : 1,
-				 DMA_FROM_DEVICE);
+		sgc = dma_map_sg_chained(jrdev, req->src, src_nents ? : 1,
+					 DMA_TO_DEVICE, src_chained);
+		sgc = dma_map_sg_chained(jrdev, req->dst, dst_nents ? : 1,
+					 DMA_FROM_DEVICE, dst_chained);
 	}
 
 	/* Check if data are contiguous */
@@ -1337,50 +1173,53 @@ static struct aead_edesc *aead_edesc_alloc(struct aead_request *req,
 		all_contig = false;
 		assoc_nents = assoc_nents ? : 1;
 		src_nents = src_nents ? : 1;
-		link_tbl_len = assoc_nents + 1 + src_nents;
+		sec4_sg_len = assoc_nents + 1 + src_nents;
 	}
-	link_tbl_len += dst_nents;
+	sec4_sg_len += dst_nents;
 
-	link_tbl_bytes = link_tbl_len * sizeof(struct link_tbl_entry);
+	sec4_sg_bytes = sec4_sg_len * sizeof(struct sec4_sg_entry);
 
 	/* allocate space for base edesc and hw desc commands, link tables */
 	edesc = kmalloc(sizeof(struct aead_edesc) + desc_bytes +
-			link_tbl_bytes, GFP_DMA | flags);
+			sec4_sg_bytes, GFP_DMA | flags);
 	if (!edesc) {
 		dev_err(jrdev, "could not allocate extended descriptor\n");
 		return ERR_PTR(-ENOMEM);
 	}
 
 	edesc->assoc_nents = assoc_nents;
+	edesc->assoc_chained = assoc_chained;
 	edesc->src_nents = src_nents;
+	edesc->src_chained = src_chained;
 	edesc->dst_nents = dst_nents;
+	edesc->dst_chained = dst_chained;
 	edesc->iv_dma = iv_dma;
-	edesc->link_tbl_bytes = link_tbl_bytes;
-	edesc->link_tbl = (void *)edesc + sizeof(struct aead_edesc) +
-			  desc_bytes;
-	edesc->link_tbl_dma = dma_map_single(jrdev, edesc->link_tbl,
-					     link_tbl_bytes, DMA_TO_DEVICE);
+	edesc->sec4_sg_bytes = sec4_sg_bytes;
+	edesc->sec4_sg = (void *)edesc + sizeof(struct aead_edesc) +
+			 desc_bytes;
+	edesc->sec4_sg_dma = dma_map_single(jrdev, edesc->sec4_sg,
+					    sec4_sg_bytes, DMA_TO_DEVICE);
 	*all_contig_ptr = all_contig;
 
-	link_tbl_index = 0;
+	sec4_sg_index = 0;
 	if (!all_contig) {
-		sg_to_link_tbl(req->assoc,
-			       (assoc_nents ? : 1),
-			       edesc->link_tbl +
-			       link_tbl_index, 0);
-		link_tbl_index += assoc_nents ? : 1;
-		sg_to_link_tbl_one(edesc->link_tbl + link_tbl_index,
+		sg_to_sec4_sg(req->assoc,
+			      (assoc_nents ? : 1),
+			      edesc->sec4_sg +
+			      sec4_sg_index, 0);
+		sec4_sg_index += assoc_nents ? : 1;
+		dma_to_sec4_sg_one(edesc->sec4_sg + sec4_sg_index,
 				   iv_dma, ivsize, 0);
-		link_tbl_index += 1;
-		sg_to_link_tbl_last(req->src,
-				    (src_nents ? : 1),
-				    edesc->link_tbl +
-				    link_tbl_index, 0);
-		link_tbl_index += src_nents ? : 1;
+		sec4_sg_index += 1;
+		sg_to_sec4_sg_last(req->src,
+				   (src_nents ? : 1),
+				   edesc->sec4_sg +
+				   sec4_sg_index, 0);
+		sec4_sg_index += src_nents ? : 1;
 	}
 	if (dst_nents) {
-		sg_to_link_tbl_last(req->dst, dst_nents,
-				    edesc->link_tbl + link_tbl_index, 0);
+		sg_to_sec4_sg_last(req->dst, dst_nents,
+				   edesc->sec4_sg + sec4_sg_index, 0);
 	}
 
 	return edesc;
@@ -1487,24 +1326,25 @@ static struct aead_edesc *aead_giv_edesc_alloc(struct aead_givcrypt_request
 	int sgc;
 	u32 contig = GIV_SRC_CONTIG | GIV_DST_CONTIG;
 	int ivsize = crypto_aead_ivsize(aead);
-	int link_tbl_index, link_tbl_len = 0, link_tbl_bytes;
+	bool assoc_chained = false, src_chained = false, dst_chained = false;
+	int sec4_sg_index, sec4_sg_len = 0, sec4_sg_bytes;
 
-	assoc_nents = sg_count(req->assoc, req->assoclen);
-	src_nents = sg_count(req->src, req->cryptlen);
+	assoc_nents = sg_count(req->assoc, req->assoclen, &assoc_chained);
+	src_nents = sg_count(req->src, req->cryptlen, &src_chained);
 
 	if (unlikely(req->dst != req->src))
-		dst_nents = sg_count(req->dst, req->cryptlen);
+		dst_nents = sg_count(req->dst, req->cryptlen, &dst_chained);
 
-	sgc = dma_map_sg(jrdev, req->assoc, assoc_nents ? : 1,
-			 DMA_BIDIRECTIONAL);
+	sgc = dma_map_sg_chained(jrdev, req->assoc, assoc_nents ? : 1,
+				 DMA_BIDIRECTIONAL, assoc_chained);
 	if (likely(req->src == req->dst)) {
-		sgc = dma_map_sg(jrdev, req->src, src_nents ? : 1,
-				 DMA_BIDIRECTIONAL);
+		sgc = dma_map_sg_chained(jrdev, req->src, src_nents ? : 1,
+					 DMA_BIDIRECTIONAL, src_chained);
 	} else {
-		sgc = dma_map_sg(jrdev, req->src, src_nents ? : 1,
-				 DMA_TO_DEVICE);
-		sgc = dma_map_sg(jrdev, req->dst, dst_nents ? : 1,
-				 DMA_FROM_DEVICE);
+		sgc = dma_map_sg_chained(jrdev, req->src, src_nents ? : 1,
+					 DMA_TO_DEVICE, src_chained);
+		sgc = dma_map_sg_chained(jrdev, req->dst, dst_nents ? : 1,
+					 DMA_FROM_DEVICE, dst_chained);
 	}
 
 	/* Check if data are contiguous */
@@ -1516,58 +1356,61 @@ static struct aead_edesc *aead_giv_edesc_alloc(struct aead_givcrypt_request
 		contig &= ~GIV_DST_CONTIG;
 		if (unlikely(req->src != req->dst)) {
 			dst_nents = dst_nents ? : 1;
-			link_tbl_len += 1;
+			sec4_sg_len += 1;
 		}
 	if (!(contig & GIV_SRC_CONTIG)) {
 		assoc_nents = assoc_nents ? : 1;
 		src_nents = src_nents ? : 1;
-		link_tbl_len += assoc_nents + 1 + src_nents;
+		sec4_sg_len += assoc_nents + 1 + src_nents;
 		if (likely(req->src == req->dst))
 			contig &= ~GIV_DST_CONTIG;
 	}
-	link_tbl_len += dst_nents;
+	sec4_sg_len += dst_nents;
 
-	link_tbl_bytes = link_tbl_len * sizeof(struct link_tbl_entry);
+	sec4_sg_bytes = sec4_sg_len * sizeof(struct sec4_sg_entry);
 
 	/* allocate space for base edesc and hw desc commands, link tables */
 	edesc = kmalloc(sizeof(struct aead_edesc) + desc_bytes +
-			link_tbl_bytes, GFP_DMA | flags);
+			sec4_sg_bytes, GFP_DMA | flags);
 	if (!edesc) {
 		dev_err(jrdev, "could not allocate extended descriptor\n");
 		return ERR_PTR(-ENOMEM);
 	}
 
 	edesc->assoc_nents = assoc_nents;
+	edesc->assoc_chained = assoc_chained;
 	edesc->src_nents = src_nents;
+	edesc->src_chained = src_chained;
 	edesc->dst_nents = dst_nents;
+	edesc->dst_chained = dst_chained;
 	edesc->iv_dma = iv_dma;
-	edesc->link_tbl_bytes = link_tbl_bytes;
-	edesc->link_tbl = (void *)edesc + sizeof(struct aead_edesc) +
-			  desc_bytes;
-	edesc->link_tbl_dma = dma_map_single(jrdev, edesc->link_tbl,
-					     link_tbl_bytes, DMA_TO_DEVICE);
+	edesc->sec4_sg_bytes = sec4_sg_bytes;
+	edesc->sec4_sg = (void *)edesc + sizeof(struct aead_edesc) +
+			 desc_bytes;
+	edesc->sec4_sg_dma = dma_map_single(jrdev, edesc->sec4_sg,
+					    sec4_sg_bytes, DMA_TO_DEVICE);
 	*contig_ptr = contig;
 
-	link_tbl_index = 0;
+	sec4_sg_index = 0;
 	if (!(contig & GIV_SRC_CONTIG)) {
-		sg_to_link_tbl(req->assoc, assoc_nents,
-			       edesc->link_tbl +
-			       link_tbl_index, 0);
-		link_tbl_index += assoc_nents;
-		sg_to_link_tbl_one(edesc->link_tbl + link_tbl_index,
+		sg_to_sec4_sg(req->assoc, assoc_nents,
+			      edesc->sec4_sg +
+			      sec4_sg_index, 0);
+		sec4_sg_index += assoc_nents;
+		dma_to_sec4_sg_one(edesc->sec4_sg + sec4_sg_index,
 				   iv_dma, ivsize, 0);
-		link_tbl_index += 1;
-		sg_to_link_tbl_last(req->src, src_nents,
-				    edesc->link_tbl +
-				    link_tbl_index, 0);
-		link_tbl_index += src_nents;
+		sec4_sg_index += 1;
+		sg_to_sec4_sg_last(req->src, src_nents,
+				   edesc->sec4_sg +
+				   sec4_sg_index, 0);
+		sec4_sg_index += src_nents;
 	}
 	if (unlikely(req->src != req->dst && !(contig & GIV_DST_CONTIG))) {
-		sg_to_link_tbl_one(edesc->link_tbl + link_tbl_index,
+		dma_to_sec4_sg_one(edesc->sec4_sg + sec4_sg_index,
 				   iv_dma, ivsize, 0);
-		link_tbl_index += 1;
-		sg_to_link_tbl_last(req->dst, dst_nents,
-				    edesc->link_tbl + link_tbl_index, 0);
+		sec4_sg_index += 1;
+		sg_to_sec4_sg_last(req->dst, dst_nents,
+				   edesc->sec4_sg + sec4_sg_index, 0);
 	}
 
 	return edesc;
@@ -1633,27 +1476,28 @@ static struct ablkcipher_edesc *ablkcipher_edesc_alloc(struct ablkcipher_request
 	gfp_t flags = (req->base.flags & (CRYPTO_TFM_REQ_MAY_BACKLOG |
 					  CRYPTO_TFM_REQ_MAY_SLEEP)) ?
 		       GFP_KERNEL : GFP_ATOMIC;
-	int src_nents, dst_nents = 0, link_tbl_bytes;
+	int src_nents, dst_nents = 0, sec4_sg_bytes;
 	struct ablkcipher_edesc *edesc;
 	dma_addr_t iv_dma = 0;
 	bool iv_contig = false;
 	int sgc;
 	int ivsize = crypto_ablkcipher_ivsize(ablkcipher);
-	int link_tbl_index;
+	bool src_chained = false, dst_chained = false;
+	int sec4_sg_index;
 
-	src_nents = sg_count(req->src, req->nbytes);
+	src_nents = sg_count(req->src, req->nbytes, &src_chained);
 
-	if (unlikely(req->dst != req->src))
-		dst_nents = sg_count(req->dst, req->nbytes);
+	if (req->dst != req->src)
+		dst_nents = sg_count(req->dst, req->nbytes, &dst_chained);
 
 	if (likely(req->src == req->dst)) {
-		sgc = dma_map_sg(jrdev, req->src, src_nents ? : 1,
-				 DMA_BIDIRECTIONAL);
+		sgc = dma_map_sg_chained(jrdev, req->src, src_nents ? : 1,
+					 DMA_BIDIRECTIONAL, src_chained);
 	} else {
-		sgc = dma_map_sg(jrdev, req->src, src_nents ? : 1,
-				 DMA_TO_DEVICE);
-		sgc = dma_map_sg(jrdev, req->dst, dst_nents ? : 1,
-				 DMA_FROM_DEVICE);
+		sgc = dma_map_sg_chained(jrdev, req->src, src_nents ? : 1,
+					 DMA_TO_DEVICE, src_chained);
+		sgc = dma_map_sg_chained(jrdev, req->dst, dst_nents ? : 1,
+					 DMA_FROM_DEVICE, dst_chained);
 	}
 
 	/*
@@ -1665,44 +1509,46 @@ static struct ablkcipher_edesc *ablkcipher_edesc_alloc(struct ablkcipher_request
 		iv_contig = true;
 	else
 		src_nents = src_nents ? : 1;
-	link_tbl_bytes = ((iv_contig ? 0 : 1) + src_nents + dst_nents) *
-			 sizeof(struct link_tbl_entry);
+	sec4_sg_bytes = ((iv_contig ? 0 : 1) + src_nents + dst_nents) *
+			sizeof(struct sec4_sg_entry);
 
 	/* allocate space for base edesc and hw desc commands, link tables */
 	edesc = kmalloc(sizeof(struct ablkcipher_edesc) + desc_bytes +
-			link_tbl_bytes, GFP_DMA | flags);
+			sec4_sg_bytes, GFP_DMA | flags);
 	if (!edesc) {
 		dev_err(jrdev, "could not allocate extended descriptor\n");
 		return ERR_PTR(-ENOMEM);
 	}
 
 	edesc->src_nents = src_nents;
+	edesc->src_chained = src_chained;
 	edesc->dst_nents = dst_nents;
-	edesc->link_tbl_bytes = link_tbl_bytes;
-	edesc->link_tbl = (void *)edesc + sizeof(struct ablkcipher_edesc) +
-			  desc_bytes;
+	edesc->dst_chained = dst_chained;
+	edesc->sec4_sg_bytes = sec4_sg_bytes;
+	edesc->sec4_sg = (void *)edesc + sizeof(struct ablkcipher_edesc) +
+			 desc_bytes;
 
-	link_tbl_index = 0;
+	sec4_sg_index = 0;
 	if (!iv_contig) {
-		sg_to_link_tbl_one(edesc->link_tbl, iv_dma, ivsize, 0);
-		sg_to_link_tbl_last(req->src, src_nents,
-				    edesc->link_tbl + 1, 0);
-		link_tbl_index += 1 + src_nents;
+		dma_to_sec4_sg_one(edesc->sec4_sg, iv_dma, ivsize, 0);
+		sg_to_sec4_sg_last(req->src, src_nents,
+				   edesc->sec4_sg + 1, 0);
+		sec4_sg_index += 1 + src_nents;
 	}
 
-	if (unlikely(dst_nents)) {
-		sg_to_link_tbl_last(req->dst, dst_nents,
-			edesc->link_tbl + link_tbl_index, 0);
+	if (dst_nents) {
+		sg_to_sec4_sg_last(req->dst, dst_nents,
+			edesc->sec4_sg + sec4_sg_index, 0);
 	}
 
-	edesc->link_tbl_dma = dma_map_single(jrdev, edesc->link_tbl,
-					     link_tbl_bytes, DMA_TO_DEVICE);
+	edesc->sec4_sg_dma = dma_map_single(jrdev, edesc->sec4_sg,
+					    sec4_sg_bytes, DMA_TO_DEVICE);
 	edesc->iv_dma = iv_dma;
 
 #ifdef DEBUG
-	print_hex_dump(KERN_ERR, "ablkcipher link_tbl@"xstr(__LINE__)": ",
-		       DUMP_PREFIX_ADDRESS, 16, 4, edesc->link_tbl,
-		       link_tbl_bytes, 1);
+	print_hex_dump(KERN_ERR, "ablkcipher sec4_sg@"xstr(__LINE__)": ",
+		       DUMP_PREFIX_ADDRESS, 16, 4, edesc->sec4_sg,
+		       sec4_sg_bytes, 1);
 #endif
 
 	*iv_contig_out = iv_contig;
@@ -2227,7 +2073,7 @@ static int caam_cra_init(struct crypto_tfm *tfm)
 	 * distribute tfms across job rings to ensure in-order
 	 * crypto request processing per tfm
 	 */
-	ctx->jrdev = priv->algapi_jr[(tgt_jr / 2) % priv->num_jrs_for_algapi];
+	ctx->jrdev = priv->jrdev[(tgt_jr / 2) % priv->total_jobrs];
 
 	/* copy descriptor header template value */
 	ctx->class1_alg_type = OP_TYPE_CLASS1_ALG | caam_alg->class1_alg_type;
@@ -2264,7 +2110,6 @@ static void __exit caam_algapi_exit(void)
 	struct device *ctrldev;
 	struct caam_drv_private *priv;
 	struct caam_crypto_alg *t_alg, *n;
-	int i, err;
 
 	dev_node = of_find_compatible_node(NULL, NULL, "fsl,sec-v4.0");
 	if (!dev_node) {
@@ -2289,13 +2134,6 @@ static void __exit caam_algapi_exit(void)
 		list_del(&t_alg->entry);
 		kfree(t_alg);
 	}
-
-	for (i = 0; i < priv->total_jobrs; i++) {
-		err = caam_jr_deregister(priv->algapi_jr[i]);
-		if (err < 0)
-			break;
-	}
-	kfree(priv->algapi_jr);
 }
 
 static struct caam_crypto_alg *caam_alg_alloc(struct device *ctrldev,
@@ -2348,7 +2186,7 @@ static int __init caam_algapi_init(void)
 {
 	struct device_node *dev_node;
 	struct platform_device *pdev;
-	struct device *ctrldev, **jrdev;
+	struct device *ctrldev;
 	struct caam_drv_private *priv;
 	int i = 0, err = 0;
 
@@ -2369,24 +2207,6 @@ static int __init caam_algapi_init(void)
 
 	INIT_LIST_HEAD(&priv->alg_list);
 
-	jrdev = kmalloc(sizeof(*jrdev) * priv->total_jobrs, GFP_KERNEL);
-	if (!jrdev)
-		return -ENOMEM;
-
-	for (i = 0; i < priv->total_jobrs; i++) {
-		err = caam_jr_register(ctrldev, &jrdev[i]);
-		if (err < 0)
-			break;
-	}
-	if (err < 0 && i == 0) {
-		dev_err(ctrldev, "algapi error in job ring registration: %d\n",
-			err);
-		kfree(jrdev);
-		return err;
-	}
-
-	priv->num_jrs_for_algapi = i;
-	priv->algapi_jr = jrdev;
 	atomic_set(&priv->tfm_count, -1);
 
 	/* register crypto algorithms the device supports */
diff --git a/drivers/crypto/caam/caamhash.c b/drivers/crypto/caam/caamhash.c
new file mode 100644
index 0000000000000000000000000000000000000000..895aaf2bca92e85bb7bca724b9ef73afeb51af4a
--- /dev/null
+++ b/drivers/crypto/caam/caamhash.c
@@ -0,0 +1,1878 @@
+/*
+ * caam - Freescale FSL CAAM support for ahash functions of crypto API
+ *
+ * Copyright 2011 Freescale Semiconductor, Inc.
+ *
+ * Based on caamalg.c crypto API driver.
+ *
+ * relationship of digest job descriptor or first job descriptor after init to
+ * shared descriptors:
+ *
+ * ---------------                     ---------------
+ * | JobDesc #1  |-------------------->|  ShareDesc  |
+ * | *(packet 1) |                     |  (hashKey)  |
+ * ---------------                     | (operation) |
+ *                                     ---------------
+ *
+ * relationship of subsequent job descriptors to shared descriptors:
+ *
+ * ---------------                     ---------------
+ * | JobDesc #2  |-------------------->|  ShareDesc  |
+ * | *(packet 2) |      |------------->|  (hashKey)  |
+ * ---------------      |    |-------->| (operation) |
+ *       .              |    |         | (load ctx2) |
+ *       .              |    |         ---------------
+ * ---------------      |    |
+ * | JobDesc #3  |------|    |
+ * | *(packet 3) |           |
+ * ---------------           |
+ *       .                   |
+ *       .                   |
+ * ---------------           |
+ * | JobDesc #4  |------------
+ * | *(packet 4) |
+ * ---------------
+ *
+ * The SharedDesc never changes for a connection unless rekeyed, but
+ * each packet will likely be in a different place. So all we need
+ * to know to process the packet is where the input is, where the
+ * output goes, and what context we want to process with. Context is
+ * in the SharedDesc, packet references in the JobDesc.
+ *
+ * So, a job desc looks like:
+ *
+ * ---------------------
+ * | Header            |
+ * | ShareDesc Pointer |
+ * | SEQ_OUT_PTR       |
+ * | (output buffer)   |
+ * | (output length)   |
+ * | SEQ_IN_PTR        |
+ * | (input buffer)    |
+ * | (input length)    |
+ * ---------------------
+ */
+
+#include "compat.h"
+
+#include "regs.h"
+#include "intern.h"
+#include "desc_constr.h"
+#include "jr.h"
+#include "error.h"
+#include "sg_sw_sec4.h"
+#include "key_gen.h"
+
+#define CAAM_CRA_PRIORITY		3000
+
+/* max hash key is max split key size */
+#define CAAM_MAX_HASH_KEY_SIZE		(SHA512_DIGEST_SIZE * 2)
+
+#define CAAM_MAX_HASH_BLOCK_SIZE	SHA512_BLOCK_SIZE
+#define CAAM_MAX_HASH_DIGEST_SIZE	SHA512_DIGEST_SIZE
+
+/* length of descriptors text */
+#define DESC_JOB_IO_LEN			(CAAM_CMD_SZ * 5 + CAAM_PTR_SZ * 3)
+
+#define DESC_AHASH_BASE			(4 * CAAM_CMD_SZ)
+#define DESC_AHASH_UPDATE_LEN		(6 * CAAM_CMD_SZ)
+#define DESC_AHASH_UPDATE_FIRST_LEN	(DESC_AHASH_BASE + 4 * CAAM_CMD_SZ)
+#define DESC_AHASH_FINAL_LEN		(DESC_AHASH_BASE + 5 * CAAM_CMD_SZ)
+#define DESC_AHASH_FINUP_LEN		(DESC_AHASH_BASE + 5 * CAAM_CMD_SZ)
+#define DESC_AHASH_DIGEST_LEN		(DESC_AHASH_BASE + 4 * CAAM_CMD_SZ)
+
+#define DESC_HASH_MAX_USED_BYTES	(DESC_AHASH_FINAL_LEN + \
+					 CAAM_MAX_HASH_KEY_SIZE)
+#define DESC_HASH_MAX_USED_LEN		(DESC_HASH_MAX_USED_BYTES / CAAM_CMD_SZ)
+
+/* caam context sizes for hashes: running digest + 8 */
+#define HASH_MSG_LEN			8
+#define MAX_CTX_LEN			(HASH_MSG_LEN + SHA512_DIGEST_SIZE)
+
+#ifdef DEBUG
+/* for print_hex_dumps with line references */
+#define xstr(s) str(s)
+#define str(s) #s
+#define debug(format, arg...) printk(format, arg)
+#else
+#define debug(format, arg...)
+#endif
+
+/* ahash per-session context */
+struct caam_hash_ctx {
+	struct device *jrdev;
+	u32 sh_desc_update[DESC_HASH_MAX_USED_LEN];
+	u32 sh_desc_update_first[DESC_HASH_MAX_USED_LEN];
+	u32 sh_desc_fin[DESC_HASH_MAX_USED_LEN];
+	u32 sh_desc_digest[DESC_HASH_MAX_USED_LEN];
+	u32 sh_desc_finup[DESC_HASH_MAX_USED_LEN];
+	dma_addr_t sh_desc_update_dma;
+	dma_addr_t sh_desc_update_first_dma;
+	dma_addr_t sh_desc_fin_dma;
+	dma_addr_t sh_desc_digest_dma;
+	dma_addr_t sh_desc_finup_dma;
+	u32 alg_type;
+	u32 alg_op;
+	u8 key[CAAM_MAX_HASH_KEY_SIZE];
+	dma_addr_t key_dma;
+	int ctx_len;
+	unsigned int split_key_len;
+	unsigned int split_key_pad_len;
+};
+
+/* ahash state */
+struct caam_hash_state {
+	dma_addr_t buf_dma;
+	dma_addr_t ctx_dma;
+	u8 buf_0[CAAM_MAX_HASH_BLOCK_SIZE] ____cacheline_aligned;
+	int buflen_0;
+	u8 buf_1[CAAM_MAX_HASH_BLOCK_SIZE] ____cacheline_aligned;
+	int buflen_1;
+	u8 caam_ctx[MAX_CTX_LEN];
+	int (*update)(struct ahash_request *req);
+	int (*final)(struct ahash_request *req);
+	int (*finup)(struct ahash_request *req);
+	int current_buf;
+};
+
+/* Common job descriptor seq in/out ptr routines */
+
+/* Map state->caam_ctx, and append seq_out_ptr command that points to it */
+static inline void map_seq_out_ptr_ctx(u32 *desc, struct device *jrdev,
+				       struct caam_hash_state *state,
+				       int ctx_len)
+{
+	state->ctx_dma = dma_map_single(jrdev, state->caam_ctx,
+					ctx_len, DMA_FROM_DEVICE);
+	append_seq_out_ptr(desc, state->ctx_dma, ctx_len, 0);
+}
+
+/* Map req->result, and append seq_out_ptr command that points to it */
+static inline dma_addr_t map_seq_out_ptr_result(u32 *desc, struct device *jrdev,
+						u8 *result, int digestsize)
+{
+	dma_addr_t dst_dma;
+
+	dst_dma = dma_map_single(jrdev, result, digestsize, DMA_FROM_DEVICE);
+	append_seq_out_ptr(desc, dst_dma, digestsize, 0);
+
+	return dst_dma;
+}
+
+/* Map current buffer in state and put it in link table */
+static inline dma_addr_t buf_map_to_sec4_sg(struct device *jrdev,
+					    struct sec4_sg_entry *sec4_sg,
+					    u8 *buf, int buflen)
+{
+	dma_addr_t buf_dma;
+
+	buf_dma = dma_map_single(jrdev, buf, buflen, DMA_TO_DEVICE);
+	dma_to_sec4_sg_one(sec4_sg, buf_dma, buflen, 0);
+
+	return buf_dma;
+}
+
+/* Map req->src and put it in link table */
+static inline void src_map_to_sec4_sg(struct device *jrdev,
+				      struct scatterlist *src, int src_nents,
+				      struct sec4_sg_entry *sec4_sg,
+				      bool chained)
+{
+	dma_map_sg_chained(jrdev, src, src_nents, DMA_TO_DEVICE, chained);
+	sg_to_sec4_sg_last(src, src_nents, sec4_sg, 0);
+}
+
+/*
+ * Only put buffer in link table if it contains data, which is possible,
+ * since a buffer has previously been used, and needs to be unmapped,
+ */
+static inline dma_addr_t
+try_buf_map_to_sec4_sg(struct device *jrdev, struct sec4_sg_entry *sec4_sg,
+		       u8 *buf, dma_addr_t buf_dma, int buflen,
+		       int last_buflen)
+{
+	if (buf_dma && !dma_mapping_error(jrdev, buf_dma))
+		dma_unmap_single(jrdev, buf_dma, last_buflen, DMA_TO_DEVICE);
+	if (buflen)
+		buf_dma = buf_map_to_sec4_sg(jrdev, sec4_sg, buf, buflen);
+	else
+		buf_dma = 0;
+
+	return buf_dma;
+}
+
+/* Map state->caam_ctx, and add it to link table */
+static inline void ctx_map_to_sec4_sg(u32 *desc, struct device *jrdev,
+				      struct caam_hash_state *state,
+				      int ctx_len,
+				      struct sec4_sg_entry *sec4_sg,
+				      u32 flag)
+{
+	state->ctx_dma = dma_map_single(jrdev, state->caam_ctx, ctx_len, flag);
+	dma_to_sec4_sg_one(sec4_sg, state->ctx_dma, ctx_len, 0);
+}
+
+/* Common shared descriptor commands */
+static inline void append_key_ahash(u32 *desc, struct caam_hash_ctx *ctx)
+{
+	append_key_as_imm(desc, ctx->key, ctx->split_key_pad_len,
+			  ctx->split_key_len, CLASS_2 |
+			  KEY_DEST_MDHA_SPLIT | KEY_ENC);
+}
+
+/* Append key if it has been set */
+static inline void init_sh_desc_key_ahash(u32 *desc, struct caam_hash_ctx *ctx)
+{
+	u32 *key_jump_cmd;
+
+	init_sh_desc(desc, HDR_SHARE_WAIT);
+
+	if (ctx->split_key_len) {
+		/* Skip if already shared */
+		key_jump_cmd = append_jump(desc, JUMP_JSL | JUMP_TEST_ALL |
+					   JUMP_COND_SHRD);
+
+		append_key_ahash(desc, ctx);
+
+		set_jump_tgt_here(desc, key_jump_cmd);
+	}
+
+	/* Propagate errors from shared to job descriptor */
+	append_cmd(desc, SET_OK_NO_PROP_ERRORS | CMD_LOAD);
+}
+
+/*
+ * For ahash read data from seqin following state->caam_ctx,
+ * and write resulting class2 context to seqout, which may be state->caam_ctx
+ * or req->result
+ */
+static inline void ahash_append_load_str(u32 *desc, int digestsize)
+{
+	/* Calculate remaining bytes to read */
+	append_math_add(desc, VARSEQINLEN, SEQINLEN, REG0, CAAM_CMD_SZ);
+
+	/* Read remaining bytes */
+	append_seq_fifo_load(desc, 0, FIFOLD_CLASS_CLASS2 | FIFOLD_TYPE_LAST2 |
+			     FIFOLD_TYPE_MSG | KEY_VLF);
+
+	/* Store class2 context bytes */
+	append_seq_store(desc, digestsize, LDST_CLASS_2_CCB |
+			 LDST_SRCDST_BYTE_CONTEXT);
+}
+
+/*
+ * For ahash update, final and finup, import context, read and write to seqout
+ */
+static inline void ahash_ctx_data_to_out(u32 *desc, u32 op, u32 state,
+					 int digestsize,
+					 struct caam_hash_ctx *ctx)
+{
+	init_sh_desc_key_ahash(desc, ctx);
+
+	/* Import context from software */
+	append_cmd(desc, CMD_SEQ_LOAD | LDST_SRCDST_BYTE_CONTEXT |
+		   LDST_CLASS_2_CCB | ctx->ctx_len);
+
+	/* Class 2 operation */
+	append_operation(desc, op | state | OP_ALG_ENCRYPT);
+
+	/*
+	 * Load from buf and/or src and write to req->result or state->context
+	 */
+	ahash_append_load_str(desc, digestsize);
+}
+
+/* For ahash firsts and digest, read and write to seqout */
+static inline void ahash_data_to_out(u32 *desc, u32 op, u32 state,
+				     int digestsize, struct caam_hash_ctx *ctx)
+{
+	init_sh_desc_key_ahash(desc, ctx);
+
+	/* Class 2 operation */
+	append_operation(desc, op | state | OP_ALG_ENCRYPT);
+
+	/*
+	 * Load from buf and/or src and write to req->result or state->context
+	 */
+	ahash_append_load_str(desc, digestsize);
+}
+
+static int ahash_set_sh_desc(struct crypto_ahash *ahash)
+{
+	struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash);
+	int digestsize = crypto_ahash_digestsize(ahash);
+	struct device *jrdev = ctx->jrdev;
+	u32 have_key = 0;
+	u32 *desc;
+
+	if (ctx->split_key_len)
+		have_key = OP_ALG_AAI_HMAC_PRECOMP;
+
+	/* ahash_update shared descriptor */
+	desc = ctx->sh_desc_update;
+
+	init_sh_desc(desc, HDR_SHARE_WAIT);
+
+	/* Import context from software */
+	append_cmd(desc, CMD_SEQ_LOAD | LDST_SRCDST_BYTE_CONTEXT |
+		   LDST_CLASS_2_CCB | ctx->ctx_len);
+
+	/* Class 2 operation */
+	append_operation(desc, ctx->alg_type | OP_ALG_AS_UPDATE |
+			 OP_ALG_ENCRYPT);
+
+	/* Load data and write to result or context */
+	ahash_append_load_str(desc, ctx->ctx_len);
+
+	ctx->sh_desc_update_dma = dma_map_single(jrdev, desc, desc_bytes(desc),
+						 DMA_TO_DEVICE);
+	if (dma_mapping_error(jrdev, ctx->sh_desc_update_dma)) {
+		dev_err(jrdev, "unable to map shared descriptor\n");
+		return -ENOMEM;
+	}
+#ifdef DEBUG
+	print_hex_dump(KERN_ERR, "ahash update shdesc@"xstr(__LINE__)": ",
+		       DUMP_PREFIX_ADDRESS, 16, 4, desc, desc_bytes(desc), 1);
+#endif
+
+	/* ahash_update_first shared descriptor */
+	desc = ctx->sh_desc_update_first;
+
+	ahash_data_to_out(desc, have_key | ctx->alg_type, OP_ALG_AS_INIT,
+			  ctx->ctx_len, ctx);
+
+	ctx->sh_desc_update_first_dma = dma_map_single(jrdev, desc,
+						       desc_bytes(desc),
+						       DMA_TO_DEVICE);
+	if (dma_mapping_error(jrdev, ctx->sh_desc_update_first_dma)) {
+		dev_err(jrdev, "unable to map shared descriptor\n");
+		return -ENOMEM;
+	}
+#ifdef DEBUG
+	print_hex_dump(KERN_ERR, "ahash update first shdesc@"xstr(__LINE__)": ",
+		       DUMP_PREFIX_ADDRESS, 16, 4, desc, desc_bytes(desc), 1);
+#endif
+
+	/* ahash_final shared descriptor */
+	desc = ctx->sh_desc_fin;
+
+	ahash_ctx_data_to_out(desc, have_key | ctx->alg_type,
+			      OP_ALG_AS_FINALIZE, digestsize, ctx);
+
+	ctx->sh_desc_fin_dma = dma_map_single(jrdev, desc, desc_bytes(desc),
+					      DMA_TO_DEVICE);
+	if (dma_mapping_error(jrdev, ctx->sh_desc_fin_dma)) {
+		dev_err(jrdev, "unable to map shared descriptor\n");
+		return -ENOMEM;
+	}
+#ifdef DEBUG
+	print_hex_dump(KERN_ERR, "ahash final shdesc@"xstr(__LINE__)": ",
+		       DUMP_PREFIX_ADDRESS, 16, 4, desc,
+		       desc_bytes(desc), 1);
+#endif
+
+	/* ahash_finup shared descriptor */
+	desc = ctx->sh_desc_finup;
+
+	ahash_ctx_data_to_out(desc, have_key | ctx->alg_type,
+			      OP_ALG_AS_FINALIZE, digestsize, ctx);
+
+	ctx->sh_desc_finup_dma = dma_map_single(jrdev, desc, desc_bytes(desc),
+						DMA_TO_DEVICE);
+	if (dma_mapping_error(jrdev, ctx->sh_desc_finup_dma)) {
+		dev_err(jrdev, "unable to map shared descriptor\n");
+		return -ENOMEM;
+	}
+#ifdef DEBUG
+	print_hex_dump(KERN_ERR, "ahash finup shdesc@"xstr(__LINE__)": ",
+		       DUMP_PREFIX_ADDRESS, 16, 4, desc,
+		       desc_bytes(desc), 1);
+#endif
+
+	/* ahash_digest shared descriptor */
+	desc = ctx->sh_desc_digest;
+
+	ahash_data_to_out(desc, have_key | ctx->alg_type, OP_ALG_AS_INITFINAL,
+			  digestsize, ctx);
+
+	ctx->sh_desc_digest_dma = dma_map_single(jrdev, desc,
+						 desc_bytes(desc),
+						 DMA_TO_DEVICE);
+	if (dma_mapping_error(jrdev, ctx->sh_desc_digest_dma)) {
+		dev_err(jrdev, "unable to map shared descriptor\n");
+		return -ENOMEM;
+	}
+#ifdef DEBUG
+	print_hex_dump(KERN_ERR, "ahash digest shdesc@"xstr(__LINE__)": ",
+		       DUMP_PREFIX_ADDRESS, 16, 4, desc,
+		       desc_bytes(desc), 1);
+#endif
+
+	return 0;
+}
+
+static u32 gen_split_hash_key(struct caam_hash_ctx *ctx, const u8 *key_in,
+			      u32 keylen)
+{
+	return gen_split_key(ctx->jrdev, ctx->key, ctx->split_key_len,
+			       ctx->split_key_pad_len, key_in, keylen,
+			       ctx->alg_op);
+}
+
+/* Digest hash size if it is too large */
+static u32 hash_digest_key(struct caam_hash_ctx *ctx, const u8 *key_in,
+			   u32 *keylen, u8 *key_out, u32 digestsize)
+{
+	struct device *jrdev = ctx->jrdev;
+	u32 *desc;
+	struct split_key_result result;
+	dma_addr_t src_dma, dst_dma;
+	int ret = 0;
+
+	desc = kmalloc(CAAM_CMD_SZ * 6 + CAAM_PTR_SZ * 2, GFP_KERNEL | GFP_DMA);
+
+	init_job_desc(desc, 0);
+
+	src_dma = dma_map_single(jrdev, (void *)key_in, *keylen,
+				 DMA_TO_DEVICE);
+	if (dma_mapping_error(jrdev, src_dma)) {
+		dev_err(jrdev, "unable to map key input memory\n");
+		kfree(desc);
+		return -ENOMEM;
+	}
+	dst_dma = dma_map_single(jrdev, (void *)key_out, digestsize,
+				 DMA_FROM_DEVICE);
+	if (dma_mapping_error(jrdev, dst_dma)) {
+		dev_err(jrdev, "unable to map key output memory\n");
+		dma_unmap_single(jrdev, src_dma, *keylen, DMA_TO_DEVICE);
+		kfree(desc);
+		return -ENOMEM;
+	}
+
+	/* Job descriptor to perform unkeyed hash on key_in */
+	append_operation(desc, ctx->alg_type | OP_ALG_ENCRYPT |
+			 OP_ALG_AS_INITFINAL);
+	append_seq_in_ptr(desc, src_dma, *keylen, 0);
+	append_seq_fifo_load(desc, *keylen, FIFOLD_CLASS_CLASS2 |
+			     FIFOLD_TYPE_LAST2 | FIFOLD_TYPE_MSG);
+	append_seq_out_ptr(desc, dst_dma, digestsize, 0);
+	append_seq_store(desc, digestsize, LDST_CLASS_2_CCB |
+			 LDST_SRCDST_BYTE_CONTEXT);
+
+#ifdef DEBUG
+	print_hex_dump(KERN_ERR, "key_in@"xstr(__LINE__)": ",
+		       DUMP_PREFIX_ADDRESS, 16, 4, key_in, *keylen, 1);
+	print_hex_dump(KERN_ERR, "jobdesc@"xstr(__LINE__)": ",
+		       DUMP_PREFIX_ADDRESS, 16, 4, desc, desc_bytes(desc), 1);
+#endif
+
+	result.err = 0;
+	init_completion(&result.completion);
+
+	ret = caam_jr_enqueue(jrdev, desc, split_key_done, &result);
+	if (!ret) {
+		/* in progress */
+		wait_for_completion_interruptible(&result.completion);
+		ret = result.err;
+#ifdef DEBUG
+		print_hex_dump(KERN_ERR, "digested key@"xstr(__LINE__)": ",
+			       DUMP_PREFIX_ADDRESS, 16, 4, key_in,
+			       digestsize, 1);
+#endif
+	}
+	*keylen = digestsize;
+
+	dma_unmap_single(jrdev, src_dma, *keylen, DMA_TO_DEVICE);
+	dma_unmap_single(jrdev, dst_dma, digestsize, DMA_FROM_DEVICE);
+
+	kfree(desc);
+
+	return ret;
+}
+
+static int ahash_setkey(struct crypto_ahash *ahash,
+			const u8 *key, unsigned int keylen)
+{
+	/* Sizes for MDHA pads (*not* keys): MD5, SHA1, 224, 256, 384, 512 */
+	static const u8 mdpadlen[] = { 16, 20, 32, 32, 64, 64 };
+	struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash);
+	struct device *jrdev = ctx->jrdev;
+	int blocksize = crypto_tfm_alg_blocksize(&ahash->base);
+	int digestsize = crypto_ahash_digestsize(ahash);
+	int ret = 0;
+	u8 *hashed_key = NULL;
+
+#ifdef DEBUG
+	printk(KERN_ERR "keylen %d\n", keylen);
+#endif
+
+	if (keylen > blocksize) {
+		hashed_key = kmalloc(sizeof(u8) * digestsize, GFP_KERNEL |
+				     GFP_DMA);
+		if (!hashed_key)
+			return -ENOMEM;
+		ret = hash_digest_key(ctx, key, &keylen, hashed_key,
+				      digestsize);
+		if (ret)
+			goto badkey;
+		key = hashed_key;
+	}
+
+	/* Pick class 2 key length from algorithm submask */
+	ctx->split_key_len = mdpadlen[(ctx->alg_op & OP_ALG_ALGSEL_SUBMASK) >>
+				      OP_ALG_ALGSEL_SHIFT] * 2;
+	ctx->split_key_pad_len = ALIGN(ctx->split_key_len, 16);
+
+#ifdef DEBUG
+	printk(KERN_ERR "split_key_len %d split_key_pad_len %d\n",
+	       ctx->split_key_len, ctx->split_key_pad_len);
+	print_hex_dump(KERN_ERR, "key in @"xstr(__LINE__)": ",
+		       DUMP_PREFIX_ADDRESS, 16, 4, key, keylen, 1);
+#endif
+
+	ret = gen_split_hash_key(ctx, key, keylen);
+	if (ret)
+		goto badkey;
+
+	ctx->key_dma = dma_map_single(jrdev, ctx->key, ctx->split_key_pad_len,
+				      DMA_TO_DEVICE);
+	if (dma_mapping_error(jrdev, ctx->key_dma)) {
+		dev_err(jrdev, "unable to map key i/o memory\n");
+		return -ENOMEM;
+	}
+#ifdef DEBUG
+	print_hex_dump(KERN_ERR, "ctx.key@"xstr(__LINE__)": ",
+		       DUMP_PREFIX_ADDRESS, 16, 4, ctx->key,
+		       ctx->split_key_pad_len, 1);
+#endif
+
+	ret = ahash_set_sh_desc(ahash);
+	if (ret) {
+		dma_unmap_single(jrdev, ctx->key_dma, ctx->split_key_pad_len,
+				 DMA_TO_DEVICE);
+	}
+
+	kfree(hashed_key);
+	return ret;
+badkey:
+	kfree(hashed_key);
+	crypto_ahash_set_flags(ahash, CRYPTO_TFM_RES_BAD_KEY_LEN);
+	return -EINVAL;
+}
+
+/*
+ * ahash_edesc - s/w-extended ahash descriptor
+ * @dst_dma: physical mapped address of req->result
+ * @sec4_sg_dma: physical mapped address of h/w link table
+ * @chained: if source is chained
+ * @src_nents: number of segments in input scatterlist
+ * @sec4_sg_bytes: length of dma mapped sec4_sg space
+ * @sec4_sg: pointer to h/w link table
+ * @hw_desc: the h/w job descriptor followed by any referenced link tables
+ */
+struct ahash_edesc {
+	dma_addr_t dst_dma;
+	dma_addr_t sec4_sg_dma;
+	bool chained;
+	int src_nents;
+	int sec4_sg_bytes;
+	struct sec4_sg_entry *sec4_sg;
+	u32 hw_desc[0];
+};
+
+static inline void ahash_unmap(struct device *dev,
+			struct ahash_edesc *edesc,
+			struct ahash_request *req, int dst_len)
+{
+	if (edesc->src_nents)
+		dma_unmap_sg_chained(dev, req->src, edesc->src_nents,
+				     DMA_TO_DEVICE, edesc->chained);
+	if (edesc->dst_dma)
+		dma_unmap_single(dev, edesc->dst_dma, dst_len, DMA_FROM_DEVICE);
+
+	if (edesc->sec4_sg_bytes)
+		dma_unmap_single(dev, edesc->sec4_sg_dma,
+				 edesc->sec4_sg_bytes, DMA_TO_DEVICE);
+}
+
+static inline void ahash_unmap_ctx(struct device *dev,
+			struct ahash_edesc *edesc,
+			struct ahash_request *req, int dst_len, u32 flag)
+{
+	struct crypto_ahash *ahash = crypto_ahash_reqtfm(req);
+	struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash);
+	struct caam_hash_state *state = ahash_request_ctx(req);
+
+	if (state->ctx_dma)
+		dma_unmap_single(dev, state->ctx_dma, ctx->ctx_len, flag);
+	ahash_unmap(dev, edesc, req, dst_len);
+}
+
+static void ahash_done(struct device *jrdev, u32 *desc, u32 err,
+		       void *context)
+{
+	struct ahash_request *req = context;
+	struct ahash_edesc *edesc;
+	struct crypto_ahash *ahash = crypto_ahash_reqtfm(req);
+	int digestsize = crypto_ahash_digestsize(ahash);
+#ifdef DEBUG
+	struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash);
+	struct caam_hash_state *state = ahash_request_ctx(req);
+
+	dev_err(jrdev, "%s %d: err 0x%x\n", __func__, __LINE__, err);
+#endif
+
+	edesc = (struct ahash_edesc *)((char *)desc -
+		 offsetof(struct ahash_edesc, hw_desc));
+	if (err) {
+		char tmp[CAAM_ERROR_STR_MAX];
+
+		dev_err(jrdev, "%08x: %s\n", err, caam_jr_strstatus(tmp, err));
+	}
+
+	ahash_unmap(jrdev, edesc, req, digestsize);
+	kfree(edesc);
+
+#ifdef DEBUG
+	print_hex_dump(KERN_ERR, "ctx@"xstr(__LINE__)": ",
+		       DUMP_PREFIX_ADDRESS, 16, 4, state->caam_ctx,
+		       ctx->ctx_len, 1);
+	if (req->result)
+		print_hex_dump(KERN_ERR, "result@"xstr(__LINE__)": ",
+			       DUMP_PREFIX_ADDRESS, 16, 4, req->result,
+			       digestsize, 1);
+#endif
+
+	req->base.complete(&req->base, err);
+}
+
+static void ahash_done_bi(struct device *jrdev, u32 *desc, u32 err,
+			    void *context)
+{
+	struct ahash_request *req = context;
+	struct ahash_edesc *edesc;
+	struct crypto_ahash *ahash = crypto_ahash_reqtfm(req);
+	struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash);
+#ifdef DEBUG
+	struct caam_hash_state *state = ahash_request_ctx(req);
+	int digestsize = crypto_ahash_digestsize(ahash);
+
+	dev_err(jrdev, "%s %d: err 0x%x\n", __func__, __LINE__, err);
+#endif
+
+	edesc = (struct ahash_edesc *)((char *)desc -
+		 offsetof(struct ahash_edesc, hw_desc));
+	if (err) {
+		char tmp[CAAM_ERROR_STR_MAX];
+
+		dev_err(jrdev, "%08x: %s\n", err, caam_jr_strstatus(tmp, err));
+	}
+
+	ahash_unmap_ctx(jrdev, edesc, req, ctx->ctx_len, DMA_BIDIRECTIONAL);
+	kfree(edesc);
+
+#ifdef DEBUG
+	print_hex_dump(KERN_ERR, "ctx@"xstr(__LINE__)": ",
+		       DUMP_PREFIX_ADDRESS, 16, 4, state->caam_ctx,
+		       ctx->ctx_len, 1);
+	if (req->result)
+		print_hex_dump(KERN_ERR, "result@"xstr(__LINE__)": ",
+			       DUMP_PREFIX_ADDRESS, 16, 4, req->result,
+			       digestsize, 1);
+#endif
+
+	req->base.complete(&req->base, err);
+}
+
+static void ahash_done_ctx_src(struct device *jrdev, u32 *desc, u32 err,
+			       void *context)
+{
+	struct ahash_request *req = context;
+	struct ahash_edesc *edesc;
+	struct crypto_ahash *ahash = crypto_ahash_reqtfm(req);
+	int digestsize = crypto_ahash_digestsize(ahash);
+#ifdef DEBUG
+	struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash);
+	struct caam_hash_state *state = ahash_request_ctx(req);
+
+	dev_err(jrdev, "%s %d: err 0x%x\n", __func__, __LINE__, err);
+#endif
+
+	edesc = (struct ahash_edesc *)((char *)desc -
+		 offsetof(struct ahash_edesc, hw_desc));
+	if (err) {
+		char tmp[CAAM_ERROR_STR_MAX];
+
+		dev_err(jrdev, "%08x: %s\n", err, caam_jr_strstatus(tmp, err));
+	}
+
+	ahash_unmap_ctx(jrdev, edesc, req, digestsize, DMA_FROM_DEVICE);
+	kfree(edesc);
+
+#ifdef DEBUG
+	print_hex_dump(KERN_ERR, "ctx@"xstr(__LINE__)": ",
+		       DUMP_PREFIX_ADDRESS, 16, 4, state->caam_ctx,
+		       ctx->ctx_len, 1);
+	if (req->result)
+		print_hex_dump(KERN_ERR, "result@"xstr(__LINE__)": ",
+			       DUMP_PREFIX_ADDRESS, 16, 4, req->result,
+			       digestsize, 1);
+#endif
+
+	req->base.complete(&req->base, err);
+}
+
+static void ahash_done_ctx_dst(struct device *jrdev, u32 *desc, u32 err,
+			       void *context)
+{
+	struct ahash_request *req = context;
+	struct ahash_edesc *edesc;
+	struct crypto_ahash *ahash = crypto_ahash_reqtfm(req);
+	struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash);
+#ifdef DEBUG
+	struct caam_hash_state *state = ahash_request_ctx(req);
+	int digestsize = crypto_ahash_digestsize(ahash);
+
+	dev_err(jrdev, "%s %d: err 0x%x\n", __func__, __LINE__, err);
+#endif
+
+	edesc = (struct ahash_edesc *)((char *)desc -
+		 offsetof(struct ahash_edesc, hw_desc));
+	if (err) {
+		char tmp[CAAM_ERROR_STR_MAX];
+
+		dev_err(jrdev, "%08x: %s\n", err, caam_jr_strstatus(tmp, err));
+	}
+
+	ahash_unmap_ctx(jrdev, edesc, req, ctx->ctx_len, DMA_TO_DEVICE);
+	kfree(edesc);
+
+#ifdef DEBUG
+	print_hex_dump(KERN_ERR, "ctx@"xstr(__LINE__)": ",
+		       DUMP_PREFIX_ADDRESS, 16, 4, state->caam_ctx,
+		       ctx->ctx_len, 1);
+	if (req->result)
+		print_hex_dump(KERN_ERR, "result@"xstr(__LINE__)": ",
+			       DUMP_PREFIX_ADDRESS, 16, 4, req->result,
+			       digestsize, 1);
+#endif
+
+	req->base.complete(&req->base, err);
+}
+
+/* submit update job descriptor */
+static int ahash_update_ctx(struct ahash_request *req)
+{
+	struct crypto_ahash *ahash = crypto_ahash_reqtfm(req);
+	struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash);
+	struct caam_hash_state *state = ahash_request_ctx(req);
+	struct device *jrdev = ctx->jrdev;
+	gfp_t flags = (req->base.flags & (CRYPTO_TFM_REQ_MAY_BACKLOG |
+		       CRYPTO_TFM_REQ_MAY_SLEEP)) ? GFP_KERNEL : GFP_ATOMIC;
+	u8 *buf = state->current_buf ? state->buf_1 : state->buf_0;
+	int *buflen = state->current_buf ? &state->buflen_1 : &state->buflen_0;
+	u8 *next_buf = state->current_buf ? state->buf_0 : state->buf_1;
+	int *next_buflen = state->current_buf ? &state->buflen_0 :
+			   &state->buflen_1, last_buflen;
+	int in_len = *buflen + req->nbytes, to_hash;
+	u32 *sh_desc = ctx->sh_desc_update, *desc;
+	dma_addr_t ptr = ctx->sh_desc_update_dma;
+	int src_nents, sec4_sg_bytes, sec4_sg_src_index;
+	struct ahash_edesc *edesc;
+	bool chained = false;
+	int ret = 0;
+	int sh_len;
+
+	last_buflen = *next_buflen;
+	*next_buflen = in_len & (crypto_tfm_alg_blocksize(&ahash->base) - 1);
+	to_hash = in_len - *next_buflen;
+
+	if (to_hash) {
+		src_nents = __sg_count(req->src, req->nbytes - (*next_buflen),
+				       &chained);
+		sec4_sg_src_index = 1 + (*buflen ? 1 : 0);
+		sec4_sg_bytes = (sec4_sg_src_index + src_nents) *
+				 sizeof(struct sec4_sg_entry);
+
+		/*
+		 * allocate space for base edesc and hw desc commands,
+		 * link tables
+		 */
+		edesc = kmalloc(sizeof(struct ahash_edesc) + DESC_JOB_IO_LEN +
+				sec4_sg_bytes, GFP_DMA | flags);
+		if (!edesc) {
+			dev_err(jrdev,
+				"could not allocate extended descriptor\n");
+			return -ENOMEM;
+		}
+
+		edesc->src_nents = src_nents;
+		edesc->chained = chained;
+		edesc->sec4_sg_bytes = sec4_sg_bytes;
+		edesc->sec4_sg = (void *)edesc + sizeof(struct ahash_edesc) +
+				 DESC_JOB_IO_LEN;
+		edesc->sec4_sg_dma = dma_map_single(jrdev, edesc->sec4_sg,
+						     sec4_sg_bytes,
+						     DMA_TO_DEVICE);
+
+		ctx_map_to_sec4_sg(desc, jrdev, state, ctx->ctx_len,
+				   edesc->sec4_sg, DMA_BIDIRECTIONAL);
+
+		state->buf_dma = try_buf_map_to_sec4_sg(jrdev,
+							edesc->sec4_sg + 1,
+							buf, state->buf_dma,
+							*buflen, last_buflen);
+
+		if (src_nents) {
+			src_map_to_sec4_sg(jrdev, req->src, src_nents,
+					   edesc->sec4_sg + sec4_sg_src_index,
+					   chained);
+			if (*next_buflen) {
+				sg_copy_part(next_buf, req->src, to_hash -
+					     *buflen, req->nbytes);
+				state->current_buf = !state->current_buf;
+			}
+		} else {
+			(edesc->sec4_sg + sec4_sg_src_index - 1)->len |=
+							SEC4_SG_LEN_FIN;
+		}
+
+		sh_len = desc_len(sh_desc);
+		desc = edesc->hw_desc;
+		init_job_desc_shared(desc, ptr, sh_len, HDR_SHARE_DEFER |
+				     HDR_REVERSE);
+
+		append_seq_in_ptr(desc, edesc->sec4_sg_dma, ctx->ctx_len +
+				       to_hash, LDST_SGF);
+
+		append_seq_out_ptr(desc, state->ctx_dma, ctx->ctx_len, 0);
+
+#ifdef DEBUG
+		print_hex_dump(KERN_ERR, "jobdesc@"xstr(__LINE__)": ",
+			       DUMP_PREFIX_ADDRESS, 16, 4, desc,
+			       desc_bytes(desc), 1);
+#endif
+
+		ret = caam_jr_enqueue(jrdev, desc, ahash_done_bi, req);
+		if (!ret) {
+			ret = -EINPROGRESS;
+		} else {
+			ahash_unmap_ctx(jrdev, edesc, req, ctx->ctx_len,
+					   DMA_BIDIRECTIONAL);
+			kfree(edesc);
+		}
+	} else if (*next_buflen) {
+		sg_copy(buf + *buflen, req->src, req->nbytes);
+		*buflen = *next_buflen;
+		*next_buflen = last_buflen;
+	}
+#ifdef DEBUG
+	print_hex_dump(KERN_ERR, "buf@"xstr(__LINE__)": ",
+		       DUMP_PREFIX_ADDRESS, 16, 4, buf, *buflen, 1);
+	print_hex_dump(KERN_ERR, "next buf@"xstr(__LINE__)": ",
+		       DUMP_PREFIX_ADDRESS, 16, 4, next_buf,
+		       *next_buflen, 1);
+#endif
+
+	return ret;
+}
+
+static int ahash_final_ctx(struct ahash_request *req)
+{
+	struct crypto_ahash *ahash = crypto_ahash_reqtfm(req);
+	struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash);
+	struct caam_hash_state *state = ahash_request_ctx(req);
+	struct device *jrdev = ctx->jrdev;
+	gfp_t flags = (req->base.flags & (CRYPTO_TFM_REQ_MAY_BACKLOG |
+		       CRYPTO_TFM_REQ_MAY_SLEEP)) ? GFP_KERNEL : GFP_ATOMIC;
+	u8 *buf = state->current_buf ? state->buf_1 : state->buf_0;
+	int buflen = state->current_buf ? state->buflen_1 : state->buflen_0;
+	int last_buflen = state->current_buf ? state->buflen_0 :
+			  state->buflen_1;
+	u32 *sh_desc = ctx->sh_desc_fin, *desc;
+	dma_addr_t ptr = ctx->sh_desc_fin_dma;
+	int sec4_sg_bytes;
+	int digestsize = crypto_ahash_digestsize(ahash);
+	struct ahash_edesc *edesc;
+	int ret = 0;
+	int sh_len;
+
+	sec4_sg_bytes = (1 + (buflen ? 1 : 0)) * sizeof(struct sec4_sg_entry);
+
+	/* allocate space for base edesc and hw desc commands, link tables */
+	edesc = kmalloc(sizeof(struct ahash_edesc) + DESC_JOB_IO_LEN +
+			sec4_sg_bytes, GFP_DMA | flags);
+	if (!edesc) {
+		dev_err(jrdev, "could not allocate extended descriptor\n");
+		return -ENOMEM;
+	}
+
+	sh_len = desc_len(sh_desc);
+	desc = edesc->hw_desc;
+	init_job_desc_shared(desc, ptr, sh_len, HDR_SHARE_DEFER | HDR_REVERSE);
+
+	edesc->sec4_sg_bytes = sec4_sg_bytes;
+	edesc->sec4_sg = (void *)edesc + sizeof(struct ahash_edesc) +
+			 DESC_JOB_IO_LEN;
+	edesc->sec4_sg_dma = dma_map_single(jrdev, edesc->sec4_sg,
+					    sec4_sg_bytes, DMA_TO_DEVICE);
+	edesc->src_nents = 0;
+
+	ctx_map_to_sec4_sg(desc, jrdev, state, ctx->ctx_len, edesc->sec4_sg,
+			   DMA_TO_DEVICE);
+
+	state->buf_dma = try_buf_map_to_sec4_sg(jrdev, edesc->sec4_sg + 1,
+						buf, state->buf_dma, buflen,
+						last_buflen);
+	(edesc->sec4_sg + sec4_sg_bytes - 1)->len |= SEC4_SG_LEN_FIN;
+
+	append_seq_in_ptr(desc, edesc->sec4_sg_dma, ctx->ctx_len + buflen,
+			  LDST_SGF);
+
+	edesc->dst_dma = map_seq_out_ptr_result(desc, jrdev, req->result,
+						digestsize);
+
+#ifdef DEBUG
+	print_hex_dump(KERN_ERR, "jobdesc@"xstr(__LINE__)": ",
+		       DUMP_PREFIX_ADDRESS, 16, 4, desc, desc_bytes(desc), 1);
+#endif
+
+	ret = caam_jr_enqueue(jrdev, desc, ahash_done_ctx_src, req);
+	if (!ret) {
+		ret = -EINPROGRESS;
+	} else {
+		ahash_unmap_ctx(jrdev, edesc, req, digestsize, DMA_FROM_DEVICE);
+		kfree(edesc);
+	}
+
+	return ret;
+}
+
+static int ahash_finup_ctx(struct ahash_request *req)
+{
+	struct crypto_ahash *ahash = crypto_ahash_reqtfm(req);
+	struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash);
+	struct caam_hash_state *state = ahash_request_ctx(req);
+	struct device *jrdev = ctx->jrdev;
+	gfp_t flags = (req->base.flags & (CRYPTO_TFM_REQ_MAY_BACKLOG |
+		       CRYPTO_TFM_REQ_MAY_SLEEP)) ? GFP_KERNEL : GFP_ATOMIC;
+	u8 *buf = state->current_buf ? state->buf_1 : state->buf_0;
+	int buflen = state->current_buf ? state->buflen_1 : state->buflen_0;
+	int last_buflen = state->current_buf ? state->buflen_0 :
+			  state->buflen_1;
+	u32 *sh_desc = ctx->sh_desc_finup, *desc;
+	dma_addr_t ptr = ctx->sh_desc_finup_dma;
+	int sec4_sg_bytes, sec4_sg_src_index;
+	int src_nents;
+	int digestsize = crypto_ahash_digestsize(ahash);
+	struct ahash_edesc *edesc;
+	bool chained = false;
+	int ret = 0;
+	int sh_len;
+
+	src_nents = __sg_count(req->src, req->nbytes, &chained);
+	sec4_sg_src_index = 1 + (buflen ? 1 : 0);
+	sec4_sg_bytes = (sec4_sg_src_index + src_nents) *
+			 sizeof(struct sec4_sg_entry);
+
+	/* allocate space for base edesc and hw desc commands, link tables */
+	edesc = kmalloc(sizeof(struct ahash_edesc) + DESC_JOB_IO_LEN +
+			sec4_sg_bytes, GFP_DMA | flags);
+	if (!edesc) {
+		dev_err(jrdev, "could not allocate extended descriptor\n");
+		return -ENOMEM;
+	}
+
+	sh_len = desc_len(sh_desc);
+	desc = edesc->hw_desc;
+	init_job_desc_shared(desc, ptr, sh_len, HDR_SHARE_DEFER | HDR_REVERSE);
+
+	edesc->src_nents = src_nents;
+	edesc->chained = chained;
+	edesc->sec4_sg_bytes = sec4_sg_bytes;
+	edesc->sec4_sg = (void *)edesc + sizeof(struct ahash_edesc) +
+			 DESC_JOB_IO_LEN;
+	edesc->sec4_sg_dma = dma_map_single(jrdev, edesc->sec4_sg,
+					    sec4_sg_bytes, DMA_TO_DEVICE);
+
+	ctx_map_to_sec4_sg(desc, jrdev, state, ctx->ctx_len, edesc->sec4_sg,
+			   DMA_TO_DEVICE);
+
+	state->buf_dma = try_buf_map_to_sec4_sg(jrdev, edesc->sec4_sg + 1,
+						buf, state->buf_dma, buflen,
+						last_buflen);
+
+	src_map_to_sec4_sg(jrdev, req->src, src_nents, edesc->sec4_sg +
+			   sec4_sg_src_index, chained);
+
+	append_seq_in_ptr(desc, edesc->sec4_sg_dma, ctx->ctx_len +
+			       buflen + req->nbytes, LDST_SGF);
+
+	edesc->dst_dma = map_seq_out_ptr_result(desc, jrdev, req->result,
+						digestsize);
+
+#ifdef DEBUG
+	print_hex_dump(KERN_ERR, "jobdesc@"xstr(__LINE__)": ",
+		       DUMP_PREFIX_ADDRESS, 16, 4, desc, desc_bytes(desc), 1);
+#endif
+
+	ret = caam_jr_enqueue(jrdev, desc, ahash_done_ctx_src, req);
+	if (!ret) {
+		ret = -EINPROGRESS;
+	} else {
+		ahash_unmap_ctx(jrdev, edesc, req, digestsize, DMA_FROM_DEVICE);
+		kfree(edesc);
+	}
+
+	return ret;
+}
+
+static int ahash_digest(struct ahash_request *req)
+{
+	struct crypto_ahash *ahash = crypto_ahash_reqtfm(req);
+	struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash);
+	struct device *jrdev = ctx->jrdev;
+	gfp_t flags = (req->base.flags & (CRYPTO_TFM_REQ_MAY_BACKLOG |
+		       CRYPTO_TFM_REQ_MAY_SLEEP)) ? GFP_KERNEL : GFP_ATOMIC;
+	u32 *sh_desc = ctx->sh_desc_digest, *desc;
+	dma_addr_t ptr = ctx->sh_desc_digest_dma;
+	int digestsize = crypto_ahash_digestsize(ahash);
+	int src_nents, sec4_sg_bytes;
+	dma_addr_t src_dma;
+	struct ahash_edesc *edesc;
+	bool chained = false;
+	int ret = 0;
+	u32 options;
+	int sh_len;
+
+	src_nents = sg_count(req->src, req->nbytes, &chained);
+	dma_map_sg_chained(jrdev, req->src, src_nents ? : 1, DMA_TO_DEVICE,
+			   chained);
+	sec4_sg_bytes = src_nents * sizeof(struct sec4_sg_entry);
+
+	/* allocate space for base edesc and hw desc commands, link tables */
+	edesc = kmalloc(sizeof(struct ahash_edesc) + sec4_sg_bytes +
+			DESC_JOB_IO_LEN, GFP_DMA | flags);
+	if (!edesc) {
+		dev_err(jrdev, "could not allocate extended descriptor\n");
+		return -ENOMEM;
+	}
+	edesc->sec4_sg = (void *)edesc + sizeof(struct ahash_edesc) +
+			  DESC_JOB_IO_LEN;
+	edesc->sec4_sg_dma = dma_map_single(jrdev, edesc->sec4_sg,
+					    sec4_sg_bytes, DMA_TO_DEVICE);
+	edesc->src_nents = src_nents;
+	edesc->chained = chained;
+
+	sh_len = desc_len(sh_desc);
+	desc = edesc->hw_desc;
+	init_job_desc_shared(desc, ptr, sh_len, HDR_SHARE_DEFER | HDR_REVERSE);
+
+	if (src_nents) {
+		sg_to_sec4_sg_last(req->src, src_nents, edesc->sec4_sg, 0);
+		src_dma = edesc->sec4_sg_dma;
+		options = LDST_SGF;
+	} else {
+		src_dma = sg_dma_address(req->src);
+		options = 0;
+	}
+	append_seq_in_ptr(desc, src_dma, req->nbytes, options);
+
+	edesc->dst_dma = map_seq_out_ptr_result(desc, jrdev, req->result,
+						digestsize);
+
+#ifdef DEBUG
+	print_hex_dump(KERN_ERR, "jobdesc@"xstr(__LINE__)": ",
+		       DUMP_PREFIX_ADDRESS, 16, 4, desc, desc_bytes(desc), 1);
+#endif
+
+	ret = caam_jr_enqueue(jrdev, desc, ahash_done, req);
+	if (!ret) {
+		ret = -EINPROGRESS;
+	} else {
+		ahash_unmap(jrdev, edesc, req, digestsize);
+		kfree(edesc);
+	}
+
+	return ret;
+}
+
+/* submit ahash final if it the first job descriptor */
+static int ahash_final_no_ctx(struct ahash_request *req)
+{
+	struct crypto_ahash *ahash = crypto_ahash_reqtfm(req);
+	struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash);
+	struct caam_hash_state *state = ahash_request_ctx(req);
+	struct device *jrdev = ctx->jrdev;
+	gfp_t flags = (req->base.flags & (CRYPTO_TFM_REQ_MAY_BACKLOG |
+		       CRYPTO_TFM_REQ_MAY_SLEEP)) ? GFP_KERNEL : GFP_ATOMIC;
+	u8 *buf = state->current_buf ? state->buf_1 : state->buf_0;
+	int buflen = state->current_buf ? state->buflen_1 : state->buflen_0;
+	u32 *sh_desc = ctx->sh_desc_digest, *desc;
+	dma_addr_t ptr = ctx->sh_desc_digest_dma;
+	int digestsize = crypto_ahash_digestsize(ahash);
+	struct ahash_edesc *edesc;
+	int ret = 0;
+	int sh_len;
+
+	/* allocate space for base edesc and hw desc commands, link tables */
+	edesc = kmalloc(sizeof(struct ahash_edesc) + DESC_JOB_IO_LEN,
+			GFP_DMA | flags);
+	if (!edesc) {
+		dev_err(jrdev, "could not allocate extended descriptor\n");
+		return -ENOMEM;
+	}
+
+	sh_len = desc_len(sh_desc);
+	desc = edesc->hw_desc;
+	init_job_desc_shared(desc, ptr, sh_len, HDR_SHARE_DEFER | HDR_REVERSE);
+
+	state->buf_dma = dma_map_single(jrdev, buf, buflen, DMA_TO_DEVICE);
+
+	append_seq_in_ptr(desc, state->buf_dma, buflen, 0);
+
+	edesc->dst_dma = map_seq_out_ptr_result(desc, jrdev, req->result,
+						digestsize);
+	edesc->src_nents = 0;
+
+#ifdef DEBUG
+	print_hex_dump(KERN_ERR, "jobdesc@"xstr(__LINE__)": ",
+		       DUMP_PREFIX_ADDRESS, 16, 4, desc, desc_bytes(desc), 1);
+#endif
+
+	ret = caam_jr_enqueue(jrdev, desc, ahash_done, req);
+	if (!ret) {
+		ret = -EINPROGRESS;
+	} else {
+		ahash_unmap(jrdev, edesc, req, digestsize);
+		kfree(edesc);
+	}
+
+	return ret;
+}
+
+/* submit ahash update if it the first job descriptor after update */
+static int ahash_update_no_ctx(struct ahash_request *req)
+{
+	struct crypto_ahash *ahash = crypto_ahash_reqtfm(req);
+	struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash);
+	struct caam_hash_state *state = ahash_request_ctx(req);
+	struct device *jrdev = ctx->jrdev;
+	gfp_t flags = (req->base.flags & (CRYPTO_TFM_REQ_MAY_BACKLOG |
+		       CRYPTO_TFM_REQ_MAY_SLEEP)) ? GFP_KERNEL : GFP_ATOMIC;
+	u8 *buf = state->current_buf ? state->buf_1 : state->buf_0;
+	int *buflen = state->current_buf ? &state->buflen_1 : &state->buflen_0;
+	u8 *next_buf = state->current_buf ? state->buf_0 : state->buf_1;
+	int *next_buflen = state->current_buf ? &state->buflen_0 :
+			   &state->buflen_1;
+	int in_len = *buflen + req->nbytes, to_hash;
+	int sec4_sg_bytes, src_nents;
+	struct ahash_edesc *edesc;
+	u32 *desc, *sh_desc = ctx->sh_desc_update_first;
+	dma_addr_t ptr = ctx->sh_desc_update_first_dma;
+	bool chained = false;
+	int ret = 0;
+	int sh_len;
+
+	*next_buflen = in_len & (crypto_tfm_alg_blocksize(&ahash->base) - 1);
+	to_hash = in_len - *next_buflen;
+
+	if (to_hash) {
+		src_nents = __sg_count(req->src, req->nbytes - (*next_buflen),
+				       &chained);
+		sec4_sg_bytes = (1 + src_nents) *
+				sizeof(struct sec4_sg_entry);
+
+		/*
+		 * allocate space for base edesc and hw desc commands,
+		 * link tables
+		 */
+		edesc = kmalloc(sizeof(struct ahash_edesc) + DESC_JOB_IO_LEN +
+				sec4_sg_bytes, GFP_DMA | flags);
+		if (!edesc) {
+			dev_err(jrdev,
+				"could not allocate extended descriptor\n");
+			return -ENOMEM;
+		}
+
+		edesc->src_nents = src_nents;
+		edesc->chained = chained;
+		edesc->sec4_sg_bytes = sec4_sg_bytes;
+		edesc->sec4_sg = (void *)edesc + sizeof(struct ahash_edesc) +
+				 DESC_JOB_IO_LEN;
+		edesc->sec4_sg_dma = dma_map_single(jrdev, edesc->sec4_sg,
+						    sec4_sg_bytes,
+						    DMA_TO_DEVICE);
+
+		state->buf_dma = buf_map_to_sec4_sg(jrdev, edesc->sec4_sg,
+						    buf, *buflen);
+		src_map_to_sec4_sg(jrdev, req->src, src_nents,
+				   edesc->sec4_sg + 1, chained);
+		if (*next_buflen) {
+			sg_copy_part(next_buf, req->src, to_hash - *buflen,
+				    req->nbytes);
+			state->current_buf = !state->current_buf;
+		}
+
+		sh_len = desc_len(sh_desc);
+		desc = edesc->hw_desc;
+		init_job_desc_shared(desc, ptr, sh_len, HDR_SHARE_DEFER |
+				     HDR_REVERSE);
+
+		append_seq_in_ptr(desc, edesc->sec4_sg_dma, to_hash, LDST_SGF);
+
+		map_seq_out_ptr_ctx(desc, jrdev, state, ctx->ctx_len);
+
+#ifdef DEBUG
+		print_hex_dump(KERN_ERR, "jobdesc@"xstr(__LINE__)": ",
+			       DUMP_PREFIX_ADDRESS, 16, 4, desc,
+			       desc_bytes(desc), 1);
+#endif
+
+		ret = caam_jr_enqueue(jrdev, desc, ahash_done_ctx_dst, req);
+		if (!ret) {
+			ret = -EINPROGRESS;
+			state->update = ahash_update_ctx;
+			state->finup = ahash_finup_ctx;
+			state->final = ahash_final_ctx;
+		} else {
+			ahash_unmap_ctx(jrdev, edesc, req, ctx->ctx_len,
+					DMA_TO_DEVICE);
+			kfree(edesc);
+		}
+	} else if (*next_buflen) {
+		sg_copy(buf + *buflen, req->src, req->nbytes);
+		*buflen = *next_buflen;
+		*next_buflen = 0;
+	}
+#ifdef DEBUG
+	print_hex_dump(KERN_ERR, "buf@"xstr(__LINE__)": ",
+		       DUMP_PREFIX_ADDRESS, 16, 4, buf, *buflen, 1);
+	print_hex_dump(KERN_ERR, "next buf@"xstr(__LINE__)": ",
+		       DUMP_PREFIX_ADDRESS, 16, 4, next_buf,
+		       *next_buflen, 1);
+#endif
+
+	return ret;
+}
+
+/* submit ahash finup if it the first job descriptor after update */
+static int ahash_finup_no_ctx(struct ahash_request *req)
+{
+	struct crypto_ahash *ahash = crypto_ahash_reqtfm(req);
+	struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash);
+	struct caam_hash_state *state = ahash_request_ctx(req);
+	struct device *jrdev = ctx->jrdev;
+	gfp_t flags = (req->base.flags & (CRYPTO_TFM_REQ_MAY_BACKLOG |
+		       CRYPTO_TFM_REQ_MAY_SLEEP)) ? GFP_KERNEL : GFP_ATOMIC;
+	u8 *buf = state->current_buf ? state->buf_1 : state->buf_0;
+	int buflen = state->current_buf ? state->buflen_1 : state->buflen_0;
+	int last_buflen = state->current_buf ? state->buflen_0 :
+			  state->buflen_1;
+	u32 *sh_desc = ctx->sh_desc_digest, *desc;
+	dma_addr_t ptr = ctx->sh_desc_digest_dma;
+	int sec4_sg_bytes, sec4_sg_src_index, src_nents;
+	int digestsize = crypto_ahash_digestsize(ahash);
+	struct ahash_edesc *edesc;
+	bool chained = false;
+	int sh_len;
+	int ret = 0;
+
+	src_nents = __sg_count(req->src, req->nbytes, &chained);
+	sec4_sg_src_index = 2;
+	sec4_sg_bytes = (sec4_sg_src_index + src_nents) *
+			 sizeof(struct sec4_sg_entry);
+
+	/* allocate space for base edesc and hw desc commands, link tables */
+	edesc = kmalloc(sizeof(struct ahash_edesc) + DESC_JOB_IO_LEN +
+			sec4_sg_bytes, GFP_DMA | flags);
+	if (!edesc) {
+		dev_err(jrdev, "could not allocate extended descriptor\n");
+		return -ENOMEM;
+	}
+
+	sh_len = desc_len(sh_desc);
+	desc = edesc->hw_desc;
+	init_job_desc_shared(desc, ptr, sh_len, HDR_SHARE_DEFER | HDR_REVERSE);
+
+	edesc->src_nents = src_nents;
+	edesc->chained = chained;
+	edesc->sec4_sg_bytes = sec4_sg_bytes;
+	edesc->sec4_sg = (void *)edesc + sizeof(struct ahash_edesc) +
+			 DESC_JOB_IO_LEN;
+	edesc->sec4_sg_dma = dma_map_single(jrdev, edesc->sec4_sg,
+					    sec4_sg_bytes, DMA_TO_DEVICE);
+
+	state->buf_dma = try_buf_map_to_sec4_sg(jrdev, edesc->sec4_sg, buf,
+						state->buf_dma, buflen,
+						last_buflen);
+
+	src_map_to_sec4_sg(jrdev, req->src, src_nents, edesc->sec4_sg + 1,
+			   chained);
+
+	append_seq_in_ptr(desc, edesc->sec4_sg_dma, buflen +
+			       req->nbytes, LDST_SGF);
+
+	edesc->dst_dma = map_seq_out_ptr_result(desc, jrdev, req->result,
+						digestsize);
+
+#ifdef DEBUG
+	print_hex_dump(KERN_ERR, "jobdesc@"xstr(__LINE__)": ",
+		       DUMP_PREFIX_ADDRESS, 16, 4, desc, desc_bytes(desc), 1);
+#endif
+
+	ret = caam_jr_enqueue(jrdev, desc, ahash_done, req);
+	if (!ret) {
+		ret = -EINPROGRESS;
+	} else {
+		ahash_unmap(jrdev, edesc, req, digestsize);
+		kfree(edesc);
+	}
+
+	return ret;
+}
+
+/* submit first update job descriptor after init */
+static int ahash_update_first(struct ahash_request *req)
+{
+	struct crypto_ahash *ahash = crypto_ahash_reqtfm(req);
+	struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash);
+	struct caam_hash_state *state = ahash_request_ctx(req);
+	struct device *jrdev = ctx->jrdev;
+	gfp_t flags = (req->base.flags & (CRYPTO_TFM_REQ_MAY_BACKLOG |
+		       CRYPTO_TFM_REQ_MAY_SLEEP)) ? GFP_KERNEL : GFP_ATOMIC;
+	u8 *next_buf = state->buf_0 + state->current_buf *
+		       CAAM_MAX_HASH_BLOCK_SIZE;
+	int *next_buflen = &state->buflen_0 + state->current_buf;
+	int to_hash;
+	u32 *sh_desc = ctx->sh_desc_update_first, *desc;
+	dma_addr_t ptr = ctx->sh_desc_update_first_dma;
+	int sec4_sg_bytes, src_nents;
+	dma_addr_t src_dma;
+	u32 options;
+	struct ahash_edesc *edesc;
+	bool chained = false;
+	int ret = 0;
+	int sh_len;
+
+	*next_buflen = req->nbytes & (crypto_tfm_alg_blocksize(&ahash->base) -
+				      1);
+	to_hash = req->nbytes - *next_buflen;
+
+	if (to_hash) {
+		src_nents = sg_count(req->src, req->nbytes - (*next_buflen),
+				     &chained);
+		dma_map_sg_chained(jrdev, req->src, src_nents ? : 1,
+				   DMA_TO_DEVICE, chained);
+		sec4_sg_bytes = src_nents * sizeof(struct sec4_sg_entry);
+
+		/*
+		 * allocate space for base edesc and hw desc commands,
+		 * link tables
+		 */
+		edesc = kmalloc(sizeof(struct ahash_edesc) + DESC_JOB_IO_LEN +
+				sec4_sg_bytes, GFP_DMA | flags);
+		if (!edesc) {
+			dev_err(jrdev,
+				"could not allocate extended descriptor\n");
+			return -ENOMEM;
+		}
+
+		edesc->src_nents = src_nents;
+		edesc->chained = chained;
+		edesc->sec4_sg_bytes = sec4_sg_bytes;
+		edesc->sec4_sg = (void *)edesc + sizeof(struct ahash_edesc) +
+				 DESC_JOB_IO_LEN;
+		edesc->sec4_sg_dma = dma_map_single(jrdev, edesc->sec4_sg,
+						    sec4_sg_bytes,
+						    DMA_TO_DEVICE);
+
+		if (src_nents) {
+			sg_to_sec4_sg_last(req->src, src_nents,
+					   edesc->sec4_sg, 0);
+			src_dma = edesc->sec4_sg_dma;
+			options = LDST_SGF;
+		} else {
+			src_dma = sg_dma_address(req->src);
+			options = 0;
+		}
+
+		if (*next_buflen)
+			sg_copy_part(next_buf, req->src, to_hash, req->nbytes);
+
+		sh_len = desc_len(sh_desc);
+		desc = edesc->hw_desc;
+		init_job_desc_shared(desc, ptr, sh_len, HDR_SHARE_DEFER |
+				     HDR_REVERSE);
+
+		append_seq_in_ptr(desc, src_dma, to_hash, options);
+
+		map_seq_out_ptr_ctx(desc, jrdev, state, ctx->ctx_len);
+
+#ifdef DEBUG
+		print_hex_dump(KERN_ERR, "jobdesc@"xstr(__LINE__)": ",
+			       DUMP_PREFIX_ADDRESS, 16, 4, desc,
+			       desc_bytes(desc), 1);
+#endif
+
+		ret = caam_jr_enqueue(jrdev, desc, ahash_done_ctx_dst,
+				      req);
+		if (!ret) {
+			ret = -EINPROGRESS;
+			state->update = ahash_update_ctx;
+			state->finup = ahash_finup_ctx;
+			state->final = ahash_final_ctx;
+		} else {
+			ahash_unmap_ctx(jrdev, edesc, req, ctx->ctx_len,
+					DMA_TO_DEVICE);
+			kfree(edesc);
+		}
+	} else if (*next_buflen) {
+		state->update = ahash_update_no_ctx;
+		state->finup = ahash_finup_no_ctx;
+		state->final = ahash_final_no_ctx;
+		sg_copy(next_buf, req->src, req->nbytes);
+	}
+#ifdef DEBUG
+	print_hex_dump(KERN_ERR, "next buf@"xstr(__LINE__)": ",
+		       DUMP_PREFIX_ADDRESS, 16, 4, next_buf,
+		       *next_buflen, 1);
+#endif
+
+	return ret;
+}
+
+static int ahash_finup_first(struct ahash_request *req)
+{
+	return ahash_digest(req);
+}
+
+static int ahash_init(struct ahash_request *req)
+{
+	struct caam_hash_state *state = ahash_request_ctx(req);
+
+	state->update = ahash_update_first;
+	state->finup = ahash_finup_first;
+	state->final = ahash_final_no_ctx;
+
+	state->current_buf = 0;
+
+	return 0;
+}
+
+static int ahash_update(struct ahash_request *req)
+{
+	struct caam_hash_state *state = ahash_request_ctx(req);
+
+	return state->update(req);
+}
+
+static int ahash_finup(struct ahash_request *req)
+{
+	struct caam_hash_state *state = ahash_request_ctx(req);
+
+	return state->finup(req);
+}
+
+static int ahash_final(struct ahash_request *req)
+{
+	struct caam_hash_state *state = ahash_request_ctx(req);
+
+	return state->final(req);
+}
+
+static int ahash_export(struct ahash_request *req, void *out)
+{
+	struct crypto_ahash *ahash = crypto_ahash_reqtfm(req);
+	struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash);
+	struct caam_hash_state *state = ahash_request_ctx(req);
+
+	memcpy(out, ctx, sizeof(struct caam_hash_ctx));
+	memcpy(out + sizeof(struct caam_hash_ctx), state,
+	       sizeof(struct caam_hash_state));
+	return 0;
+}
+
+static int ahash_import(struct ahash_request *req, const void *in)
+{
+	struct crypto_ahash *ahash = crypto_ahash_reqtfm(req);
+	struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash);
+	struct caam_hash_state *state = ahash_request_ctx(req);
+
+	memcpy(ctx, in, sizeof(struct caam_hash_ctx));
+	memcpy(state, in + sizeof(struct caam_hash_ctx),
+	       sizeof(struct caam_hash_state));
+	return 0;
+}
+
+struct caam_hash_template {
+	char name[CRYPTO_MAX_ALG_NAME];
+	char driver_name[CRYPTO_MAX_ALG_NAME];
+	char hmac_name[CRYPTO_MAX_ALG_NAME];
+	char hmac_driver_name[CRYPTO_MAX_ALG_NAME];
+	unsigned int blocksize;
+	struct ahash_alg template_ahash;
+	u32 alg_type;
+	u32 alg_op;
+};
+
+/* ahash descriptors */
+static struct caam_hash_template driver_hash[] = {
+	{
+		.name = "sha1",
+		.driver_name = "sha1-caam",
+		.hmac_name = "hmac(sha1)",
+		.hmac_driver_name = "hmac-sha1-caam",
+		.blocksize = SHA1_BLOCK_SIZE,
+		.template_ahash = {
+			.init = ahash_init,
+			.update = ahash_update,
+			.final = ahash_final,
+			.finup = ahash_finup,
+			.digest = ahash_digest,
+			.export = ahash_export,
+			.import = ahash_import,
+			.setkey = ahash_setkey,
+			.halg = {
+				.digestsize = SHA1_DIGEST_SIZE,
+				},
+			},
+		.alg_type = OP_ALG_ALGSEL_SHA1,
+		.alg_op = OP_ALG_ALGSEL_SHA1 | OP_ALG_AAI_HMAC,
+	}, {
+		.name = "sha224",
+		.driver_name = "sha224-caam",
+		.hmac_name = "hmac(sha224)",
+		.hmac_driver_name = "hmac-sha224-caam",
+		.blocksize = SHA224_BLOCK_SIZE,
+		.template_ahash = {
+			.init = ahash_init,
+			.update = ahash_update,
+			.final = ahash_final,
+			.finup = ahash_finup,
+			.digest = ahash_digest,
+			.export = ahash_export,
+			.import = ahash_import,
+			.setkey = ahash_setkey,
+			.halg = {
+				.digestsize = SHA224_DIGEST_SIZE,
+				},
+			},
+		.alg_type = OP_ALG_ALGSEL_SHA224,
+		.alg_op = OP_ALG_ALGSEL_SHA224 | OP_ALG_AAI_HMAC,
+	}, {
+		.name = "sha256",
+		.driver_name = "sha256-caam",
+		.hmac_name = "hmac(sha256)",
+		.hmac_driver_name = "hmac-sha256-caam",
+		.blocksize = SHA256_BLOCK_SIZE,
+		.template_ahash = {
+			.init = ahash_init,
+			.update = ahash_update,
+			.final = ahash_final,
+			.finup = ahash_finup,
+			.digest = ahash_digest,
+			.export = ahash_export,
+			.import = ahash_import,
+			.setkey = ahash_setkey,
+			.halg = {
+				.digestsize = SHA256_DIGEST_SIZE,
+				},
+			},
+		.alg_type = OP_ALG_ALGSEL_SHA256,
+		.alg_op = OP_ALG_ALGSEL_SHA256 | OP_ALG_AAI_HMAC,
+	}, {
+		.name = "sha384",
+		.driver_name = "sha384-caam",
+		.hmac_name = "hmac(sha384)",
+		.hmac_driver_name = "hmac-sha384-caam",
+		.blocksize = SHA384_BLOCK_SIZE,
+		.template_ahash = {
+			.init = ahash_init,
+			.update = ahash_update,
+			.final = ahash_final,
+			.finup = ahash_finup,
+			.digest = ahash_digest,
+			.export = ahash_export,
+			.import = ahash_import,
+			.setkey = ahash_setkey,
+			.halg = {
+				.digestsize = SHA384_DIGEST_SIZE,
+				},
+			},
+		.alg_type = OP_ALG_ALGSEL_SHA384,
+		.alg_op = OP_ALG_ALGSEL_SHA384 | OP_ALG_AAI_HMAC,
+	}, {
+		.name = "sha512",
+		.driver_name = "sha512-caam",
+		.hmac_name = "hmac(sha512)",
+		.hmac_driver_name = "hmac-sha512-caam",
+		.blocksize = SHA512_BLOCK_SIZE,
+		.template_ahash = {
+			.init = ahash_init,
+			.update = ahash_update,
+			.final = ahash_final,
+			.finup = ahash_finup,
+			.digest = ahash_digest,
+			.export = ahash_export,
+			.import = ahash_import,
+			.setkey = ahash_setkey,
+			.halg = {
+				.digestsize = SHA512_DIGEST_SIZE,
+				},
+			},
+		.alg_type = OP_ALG_ALGSEL_SHA512,
+		.alg_op = OP_ALG_ALGSEL_SHA512 | OP_ALG_AAI_HMAC,
+	}, {
+		.name = "md5",
+		.driver_name = "md5-caam",
+		.hmac_name = "hmac(md5)",
+		.hmac_driver_name = "hmac-md5-caam",
+		.blocksize = MD5_BLOCK_WORDS * 4,
+		.template_ahash = {
+			.init = ahash_init,
+			.update = ahash_update,
+			.final = ahash_final,
+			.finup = ahash_finup,
+			.digest = ahash_digest,
+			.export = ahash_export,
+			.import = ahash_import,
+			.setkey = ahash_setkey,
+			.halg = {
+				.digestsize = MD5_DIGEST_SIZE,
+				},
+			},
+		.alg_type = OP_ALG_ALGSEL_MD5,
+		.alg_op = OP_ALG_ALGSEL_MD5 | OP_ALG_AAI_HMAC,
+	},
+};
+
+struct caam_hash_alg {
+	struct list_head entry;
+	struct device *ctrldev;
+	int alg_type;
+	int alg_op;
+	struct ahash_alg ahash_alg;
+};
+
+static int caam_hash_cra_init(struct crypto_tfm *tfm)
+{
+	struct crypto_ahash *ahash = __crypto_ahash_cast(tfm);
+	struct crypto_alg *base = tfm->__crt_alg;
+	struct hash_alg_common *halg =
+		 container_of(base, struct hash_alg_common, base);
+	struct ahash_alg *alg =
+		 container_of(halg, struct ahash_alg, halg);
+	struct caam_hash_alg *caam_hash =
+		 container_of(alg, struct caam_hash_alg, ahash_alg);
+	struct caam_hash_ctx *ctx = crypto_tfm_ctx(tfm);
+	struct caam_drv_private *priv = dev_get_drvdata(caam_hash->ctrldev);
+	/* Sizes for MDHA running digests: MD5, SHA1, 224, 256, 384, 512 */
+	static const u8 runninglen[] = { HASH_MSG_LEN + MD5_DIGEST_SIZE,
+					 HASH_MSG_LEN + SHA1_DIGEST_SIZE,
+					 HASH_MSG_LEN + 32,
+					 HASH_MSG_LEN + SHA256_DIGEST_SIZE,
+					 HASH_MSG_LEN + 64,
+					 HASH_MSG_LEN + SHA512_DIGEST_SIZE };
+	int tgt_jr = atomic_inc_return(&priv->tfm_count);
+	int ret = 0;
+
+	/*
+	 * distribute tfms across job rings to ensure in-order
+	 * crypto request processing per tfm
+	 */
+	ctx->jrdev = priv->jrdev[tgt_jr % priv->total_jobrs];
+
+	/* copy descriptor header template value */
+	ctx->alg_type = OP_TYPE_CLASS2_ALG | caam_hash->alg_type;
+	ctx->alg_op = OP_TYPE_CLASS2_ALG | caam_hash->alg_op;
+
+	ctx->ctx_len = runninglen[(ctx->alg_op & OP_ALG_ALGSEL_SUBMASK) >>
+				  OP_ALG_ALGSEL_SHIFT];
+
+	crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm),
+				 sizeof(struct caam_hash_state));
+
+	ret = ahash_set_sh_desc(ahash);
+
+	return ret;
+}
+
+static void caam_hash_cra_exit(struct crypto_tfm *tfm)
+{
+	struct caam_hash_ctx *ctx = crypto_tfm_ctx(tfm);
+
+	if (ctx->sh_desc_update_dma &&
+	    !dma_mapping_error(ctx->jrdev, ctx->sh_desc_update_dma))
+		dma_unmap_single(ctx->jrdev, ctx->sh_desc_update_dma,
+				 desc_bytes(ctx->sh_desc_update),
+				 DMA_TO_DEVICE);
+	if (ctx->sh_desc_update_first_dma &&
+	    !dma_mapping_error(ctx->jrdev, ctx->sh_desc_update_first_dma))
+		dma_unmap_single(ctx->jrdev, ctx->sh_desc_update_first_dma,
+				 desc_bytes(ctx->sh_desc_update_first),
+				 DMA_TO_DEVICE);
+	if (ctx->sh_desc_fin_dma &&
+	    !dma_mapping_error(ctx->jrdev, ctx->sh_desc_fin_dma))
+		dma_unmap_single(ctx->jrdev, ctx->sh_desc_fin_dma,
+				 desc_bytes(ctx->sh_desc_fin), DMA_TO_DEVICE);
+	if (ctx->sh_desc_digest_dma &&
+	    !dma_mapping_error(ctx->jrdev, ctx->sh_desc_digest_dma))
+		dma_unmap_single(ctx->jrdev, ctx->sh_desc_digest_dma,
+				 desc_bytes(ctx->sh_desc_digest),
+				 DMA_TO_DEVICE);
+	if (ctx->sh_desc_finup_dma &&
+	    !dma_mapping_error(ctx->jrdev, ctx->sh_desc_finup_dma))
+		dma_unmap_single(ctx->jrdev, ctx->sh_desc_finup_dma,
+				 desc_bytes(ctx->sh_desc_finup), DMA_TO_DEVICE);
+}
+
+static void __exit caam_algapi_hash_exit(void)
+{
+	struct device_node *dev_node;
+	struct platform_device *pdev;
+	struct device *ctrldev;
+	struct caam_drv_private *priv;
+	struct caam_hash_alg *t_alg, *n;
+
+	dev_node = of_find_compatible_node(NULL, NULL, "fsl,sec-v4.0");
+	if (!dev_node)
+		return;
+
+	pdev = of_find_device_by_node(dev_node);
+	if (!pdev)
+		return;
+
+	ctrldev = &pdev->dev;
+	of_node_put(dev_node);
+	priv = dev_get_drvdata(ctrldev);
+
+	if (!priv->hash_list.next)
+		return;
+
+	list_for_each_entry_safe(t_alg, n, &priv->hash_list, entry) {
+		crypto_unregister_ahash(&t_alg->ahash_alg);
+		list_del(&t_alg->entry);
+		kfree(t_alg);
+	}
+}
+
+static struct caam_hash_alg *
+caam_hash_alloc(struct device *ctrldev, struct caam_hash_template *template,
+		bool keyed)
+{
+	struct caam_hash_alg *t_alg;
+	struct ahash_alg *halg;
+	struct crypto_alg *alg;
+
+	t_alg = kzalloc(sizeof(struct caam_hash_alg), GFP_KERNEL);
+	if (!t_alg) {
+		dev_err(ctrldev, "failed to allocate t_alg\n");
+		return ERR_PTR(-ENOMEM);
+	}
+
+	t_alg->ahash_alg = template->template_ahash;
+	halg = &t_alg->ahash_alg;
+	alg = &halg->halg.base;
+
+	if (keyed) {
+		snprintf(alg->cra_name, CRYPTO_MAX_ALG_NAME, "%s",
+			 template->hmac_name);
+		snprintf(alg->cra_driver_name, CRYPTO_MAX_ALG_NAME, "%s",
+			 template->hmac_driver_name);
+	} else {
+		snprintf(alg->cra_name, CRYPTO_MAX_ALG_NAME, "%s",
+			 template->name);
+		snprintf(alg->cra_driver_name, CRYPTO_MAX_ALG_NAME, "%s",
+			 template->driver_name);
+	}
+	alg->cra_module = THIS_MODULE;
+	alg->cra_init = caam_hash_cra_init;
+	alg->cra_exit = caam_hash_cra_exit;
+	alg->cra_ctxsize = sizeof(struct caam_hash_ctx);
+	alg->cra_priority = CAAM_CRA_PRIORITY;
+	alg->cra_blocksize = template->blocksize;
+	alg->cra_alignmask = 0;
+	alg->cra_flags = CRYPTO_ALG_ASYNC | CRYPTO_ALG_TYPE_AHASH;
+	alg->cra_type = &crypto_ahash_type;
+
+	t_alg->alg_type = template->alg_type;
+	t_alg->alg_op = template->alg_op;
+	t_alg->ctrldev = ctrldev;
+
+	return t_alg;
+}
+
+static int __init caam_algapi_hash_init(void)
+{
+	struct device_node *dev_node;
+	struct platform_device *pdev;
+	struct device *ctrldev;
+	struct caam_drv_private *priv;
+	int i = 0, err = 0;
+
+	dev_node = of_find_compatible_node(NULL, NULL, "fsl,sec-v4.0");
+	if (!dev_node)
+		return -ENODEV;
+
+	pdev = of_find_device_by_node(dev_node);
+	if (!pdev)
+		return -ENODEV;
+
+	ctrldev = &pdev->dev;
+	priv = dev_get_drvdata(ctrldev);
+	of_node_put(dev_node);
+
+	INIT_LIST_HEAD(&priv->hash_list);
+
+	atomic_set(&priv->tfm_count, -1);
+
+	/* register crypto algorithms the device supports */
+	for (i = 0; i < ARRAY_SIZE(driver_hash); i++) {
+		/* TODO: check if h/w supports alg */
+		struct caam_hash_alg *t_alg;
+
+		/* register hmac version */
+		t_alg = caam_hash_alloc(ctrldev, &driver_hash[i], true);
+		if (IS_ERR(t_alg)) {
+			err = PTR_ERR(t_alg);
+			dev_warn(ctrldev, "%s alg allocation failed\n",
+				 driver_hash[i].driver_name);
+			continue;
+		}
+
+		err = crypto_register_ahash(&t_alg->ahash_alg);
+		if (err) {
+			dev_warn(ctrldev, "%s alg registration failed\n",
+				t_alg->ahash_alg.halg.base.cra_driver_name);
+			kfree(t_alg);
+		} else
+			list_add_tail(&t_alg->entry, &priv->hash_list);
+
+		/* register unkeyed version */
+		t_alg = caam_hash_alloc(ctrldev, &driver_hash[i], false);
+		if (IS_ERR(t_alg)) {
+			err = PTR_ERR(t_alg);
+			dev_warn(ctrldev, "%s alg allocation failed\n",
+				 driver_hash[i].driver_name);
+			continue;
+		}
+
+		err = crypto_register_ahash(&t_alg->ahash_alg);
+		if (err) {
+			dev_warn(ctrldev, "%s alg registration failed\n",
+				t_alg->ahash_alg.halg.base.cra_driver_name);
+			kfree(t_alg);
+		} else
+			list_add_tail(&t_alg->entry, &priv->hash_list);
+	}
+
+	return err;
+}
+
+module_init(caam_algapi_hash_init);
+module_exit(caam_algapi_hash_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("FSL CAAM support for ahash functions of crypto API");
+MODULE_AUTHOR("Freescale Semiconductor - NMG");
diff --git a/drivers/crypto/caam/caamrng.c b/drivers/crypto/caam/caamrng.c
new file mode 100644
index 0000000000000000000000000000000000000000..e2bfe161dece0fd433f457659c12871eff4fc706
--- /dev/null
+++ b/drivers/crypto/caam/caamrng.c
@@ -0,0 +1,309 @@
+/*
+ * caam - Freescale FSL CAAM support for hw_random
+ *
+ * Copyright 2011 Freescale Semiconductor, Inc.
+ *
+ * Based on caamalg.c crypto API driver.
+ *
+ * relationship between job descriptors to shared descriptors:
+ *
+ * ---------------                     --------------
+ * | JobDesc #0  |-------------------->| ShareDesc  |
+ * | *(buffer 0) |      |------------->| (generate) |
+ * ---------------      |              | (move)     |
+ *                      |              | (store)    |
+ * ---------------      |              --------------
+ * | JobDesc #1  |------|
+ * | *(buffer 1) |
+ * ---------------
+ *
+ * A job desc looks like this:
+ *
+ * ---------------------
+ * | Header            |
+ * | ShareDesc Pointer |
+ * | SEQ_OUT_PTR       |
+ * | (output buffer)   |
+ * ---------------------
+ *
+ * The SharedDesc never changes, and each job descriptor points to one of two
+ * buffers for each device, from which the data will be copied into the
+ * requested destination
+ */
+
+#include <linux/hw_random.h>
+#include <linux/completion.h>
+#include <linux/atomic.h>
+
+#include "compat.h"
+
+#include "regs.h"
+#include "intern.h"
+#include "desc_constr.h"
+#include "jr.h"
+#include "error.h"
+
+/*
+ * Maximum buffer size: maximum number of random, cache-aligned bytes that
+ * will be generated and moved to seq out ptr (extlen not allowed)
+ */
+#define RN_BUF_SIZE			(0xffff / L1_CACHE_BYTES * \
+					 L1_CACHE_BYTES)
+
+/* length of descriptors */
+#define DESC_JOB_O_LEN			(CAAM_CMD_SZ * 2 + CAAM_PTR_SZ * 2)
+#define DESC_RNG_LEN			(10 * CAAM_CMD_SZ)
+
+/* Buffer, its dma address and lock */
+struct buf_data {
+	u8 buf[RN_BUF_SIZE];
+	dma_addr_t addr;
+	struct completion filled;
+	u32 hw_desc[DESC_JOB_O_LEN];
+#define BUF_NOT_EMPTY 0
+#define BUF_EMPTY 1
+#define BUF_PENDING 2  /* Empty, but with job pending --don't submit another */
+	atomic_t empty;
+};
+
+/* rng per-device context */
+struct caam_rng_ctx {
+	struct device *jrdev;
+	dma_addr_t sh_desc_dma;
+	u32 sh_desc[DESC_RNG_LEN];
+	unsigned int cur_buf_idx;
+	int current_buf;
+	struct buf_data bufs[2];
+};
+
+static struct caam_rng_ctx rng_ctx;
+
+static inline void rng_unmap_buf(struct device *jrdev, struct buf_data *bd)
+{
+	if (bd->addr)
+		dma_unmap_single(jrdev, bd->addr, RN_BUF_SIZE,
+				 DMA_FROM_DEVICE);
+}
+
+static inline void rng_unmap_ctx(struct caam_rng_ctx *ctx)
+{
+	struct device *jrdev = ctx->jrdev;
+
+	if (ctx->sh_desc_dma)
+		dma_unmap_single(jrdev, ctx->sh_desc_dma, DESC_RNG_LEN,
+				 DMA_TO_DEVICE);
+	rng_unmap_buf(jrdev, &ctx->bufs[0]);
+	rng_unmap_buf(jrdev, &ctx->bufs[1]);
+}
+
+static void rng_done(struct device *jrdev, u32 *desc, u32 err, void *context)
+{
+	struct buf_data *bd;
+
+	bd = (struct buf_data *)((char *)desc -
+	      offsetof(struct buf_data, hw_desc));
+
+	if (err) {
+		char tmp[CAAM_ERROR_STR_MAX];
+
+		dev_err(jrdev, "%08x: %s\n", err, caam_jr_strstatus(tmp, err));
+	}
+
+	atomic_set(&bd->empty, BUF_NOT_EMPTY);
+	complete(&bd->filled);
+#ifdef DEBUG
+	print_hex_dump(KERN_ERR, "rng refreshed buf@: ",
+		       DUMP_PREFIX_ADDRESS, 16, 4, bd->buf, RN_BUF_SIZE, 1);
+#endif
+}
+
+static inline int submit_job(struct caam_rng_ctx *ctx, int to_current)
+{
+	struct buf_data *bd = &ctx->bufs[!(to_current ^ ctx->current_buf)];
+	struct device *jrdev = ctx->jrdev;
+	u32 *desc = bd->hw_desc;
+	int err;
+
+	dev_dbg(jrdev, "submitting job %d\n", !(to_current ^ ctx->current_buf));
+	init_completion(&bd->filled);
+	err = caam_jr_enqueue(jrdev, desc, rng_done, ctx);
+	if (err)
+		complete(&bd->filled); /* don't wait on failed job*/
+	else
+		atomic_inc(&bd->empty); /* note if pending */
+
+	return err;
+}
+
+static int caam_read(struct hwrng *rng, void *data, size_t max, bool wait)
+{
+	struct caam_rng_ctx *ctx = &rng_ctx;
+	struct buf_data *bd = &ctx->bufs[ctx->current_buf];
+	int next_buf_idx, copied_idx;
+	int err;
+
+	if (atomic_read(&bd->empty)) {
+		/* try to submit job if there wasn't one */
+		if (atomic_read(&bd->empty) == BUF_EMPTY) {
+			err = submit_job(ctx, 1);
+			/* if can't submit job, can't even wait */
+			if (err)
+				return 0;
+		}
+		/* no immediate data, so exit if not waiting */
+		if (!wait)
+			return 0;
+
+		/* waiting for pending job */
+		if (atomic_read(&bd->empty))
+			wait_for_completion(&bd->filled);
+	}
+
+	next_buf_idx = ctx->cur_buf_idx + max;
+	dev_dbg(ctx->jrdev, "%s: start reading at buffer %d, idx %d\n",
+		 __func__, ctx->current_buf, ctx->cur_buf_idx);
+
+	/* if enough data in current buffer */
+	if (next_buf_idx < RN_BUF_SIZE) {
+		memcpy(data, bd->buf + ctx->cur_buf_idx, max);
+		ctx->cur_buf_idx = next_buf_idx;
+		return max;
+	}
+
+	/* else, copy what's left... */
+	copied_idx = RN_BUF_SIZE - ctx->cur_buf_idx;
+	memcpy(data, bd->buf + ctx->cur_buf_idx, copied_idx);
+	ctx->cur_buf_idx = 0;
+	atomic_set(&bd->empty, BUF_EMPTY);
+
+	/* ...refill... */
+	submit_job(ctx, 1);
+
+	/* and use next buffer */
+	ctx->current_buf = !ctx->current_buf;
+	dev_dbg(ctx->jrdev, "switched to buffer %d\n", ctx->current_buf);
+
+	/* since there already is some data read, don't wait */
+	return copied_idx + caam_read(rng, data + copied_idx,
+				      max - copied_idx, false);
+}
+
+static inline void rng_create_sh_desc(struct caam_rng_ctx *ctx)
+{
+	struct device *jrdev = ctx->jrdev;
+	u32 *desc = ctx->sh_desc;
+
+	init_sh_desc(desc, HDR_SHARE_WAIT);
+
+	/* Propagate errors from shared to job descriptor */
+	append_cmd(desc, SET_OK_NO_PROP_ERRORS | CMD_LOAD);
+
+	/* Generate random bytes */
+	append_operation(desc, OP_ALG_ALGSEL_RNG | OP_TYPE_CLASS1_ALG);
+
+	/* Store bytes */
+	append_seq_fifo_store(desc, RN_BUF_SIZE, FIFOST_TYPE_RNGSTORE);
+
+	ctx->sh_desc_dma = dma_map_single(jrdev, desc, desc_bytes(desc),
+					  DMA_TO_DEVICE);
+#ifdef DEBUG
+	print_hex_dump(KERN_ERR, "rng shdesc@: ", DUMP_PREFIX_ADDRESS, 16, 4,
+		       desc, desc_bytes(desc), 1);
+#endif
+}
+
+static inline void rng_create_job_desc(struct caam_rng_ctx *ctx, int buf_id)
+{
+	struct device *jrdev = ctx->jrdev;
+	struct buf_data *bd = &ctx->bufs[buf_id];
+	u32 *desc = bd->hw_desc;
+	int sh_len = desc_len(ctx->sh_desc);
+
+	init_job_desc_shared(desc, ctx->sh_desc_dma, sh_len, HDR_SHARE_DEFER |
+			     HDR_REVERSE);
+
+	bd->addr = dma_map_single(jrdev, bd->buf, RN_BUF_SIZE, DMA_FROM_DEVICE);
+
+	append_seq_out_ptr_intlen(desc, bd->addr, RN_BUF_SIZE, 0);
+#ifdef DEBUG
+	print_hex_dump(KERN_ERR, "rng job desc@: ", DUMP_PREFIX_ADDRESS, 16, 4,
+		       desc, desc_bytes(desc), 1);
+#endif
+}
+
+static void caam_cleanup(struct hwrng *rng)
+{
+	int i;
+	struct buf_data *bd;
+
+	for (i = 0; i < 2; i++) {
+		bd = &rng_ctx.bufs[i];
+		if (atomic_read(&bd->empty) == BUF_PENDING)
+			wait_for_completion(&bd->filled);
+	}
+
+	rng_unmap_ctx(&rng_ctx);
+}
+
+static void caam_init_buf(struct caam_rng_ctx *ctx, int buf_id)
+{
+	struct buf_data *bd = &ctx->bufs[buf_id];
+
+	rng_create_job_desc(ctx, buf_id);
+	atomic_set(&bd->empty, BUF_EMPTY);
+	submit_job(ctx, buf_id == ctx->current_buf);
+	wait_for_completion(&bd->filled);
+}
+
+static void caam_init_rng(struct caam_rng_ctx *ctx, struct device *jrdev)
+{
+	ctx->jrdev = jrdev;
+	rng_create_sh_desc(ctx);
+	ctx->current_buf = 0;
+	ctx->cur_buf_idx = 0;
+	caam_init_buf(ctx, 0);
+	caam_init_buf(ctx, 1);
+}
+
+static struct hwrng caam_rng = {
+	.name		= "rng-caam",
+	.cleanup	= caam_cleanup,
+	.read		= caam_read,
+};
+
+static void __exit caam_rng_exit(void)
+{
+	hwrng_unregister(&caam_rng);
+}
+
+static int __init caam_rng_init(void)
+{
+	struct device_node *dev_node;
+	struct platform_device *pdev;
+	struct device *ctrldev;
+	struct caam_drv_private *priv;
+
+	dev_node = of_find_compatible_node(NULL, NULL, "fsl,sec-v4.0");
+	if (!dev_node)
+		return -ENODEV;
+
+	pdev = of_find_device_by_node(dev_node);
+	if (!pdev)
+		return -ENODEV;
+
+	ctrldev = &pdev->dev;
+	priv = dev_get_drvdata(ctrldev);
+	of_node_put(dev_node);
+
+	caam_init_rng(&rng_ctx, priv->jrdev[0]);
+
+	dev_info(priv->jrdev[0], "registering rng-caam\n");
+	return hwrng_register(&caam_rng);
+}
+
+module_init(caam_rng_init);
+module_exit(caam_rng_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("FSL CAAM support for hw_random API");
+MODULE_AUTHOR("Freescale Semiconductor - NMG");
diff --git a/drivers/crypto/caam/compat.h b/drivers/crypto/caam/compat.h
index a63bc65fae862e1e8eb6c3aafce736afc02926a5..762aeff626ac6f7a980c8fe3019fcd72252ecd7c 100644
--- a/drivers/crypto/caam/compat.h
+++ b/drivers/crypto/caam/compat.h
@@ -11,6 +11,7 @@
 #include <linux/device.h>
 #include <linux/interrupt.h>
 #include <linux/crypto.h>
+#include <linux/hash.h>
 #include <linux/hw_random.h>
 #include <linux/of_platform.h>
 #include <linux/dma-mapping.h>
@@ -33,5 +34,6 @@
 #include <crypto/authenc.h>
 #include <crypto/scatterwalk.h>
 #include <crypto/internal/skcipher.h>
+#include <crypto/internal/hash.h>
 
 #endif /* !defined(CAAM_COMPAT_H) */
diff --git a/drivers/crypto/caam/ctrl.c b/drivers/crypto/caam/ctrl.c
index 77557ebcd337f71d8ec2876d4b5e6d4e017393a0..414ba20c05a13f5265944a2ada5d0da97c6e447f 100644
--- a/drivers/crypto/caam/ctrl.c
+++ b/drivers/crypto/caam/ctrl.c
@@ -2,13 +2,16 @@
  * CAAM control-plane driver backend
  * Controller-level driver, kernel property detection, initialization
  *
- * Copyright 2008-2011 Freescale Semiconductor, Inc.
+ * Copyright 2008-2012 Freescale Semiconductor, Inc.
  */
 
 #include "compat.h"
 #include "regs.h"
 #include "intern.h"
 #include "jr.h"
+#include "desc_constr.h"
+#include "error.h"
+#include "ctrl.h"
 
 static int caam_remove(struct platform_device *pdev)
 {
@@ -43,10 +46,154 @@ static int caam_remove(struct platform_device *pdev)
 	return ret;
 }
 
+/*
+ * Descriptor to instantiate RNG State Handle 0 in normal mode and
+ * load the JDKEK, TDKEK and TDSK registers
+ */
+static void build_instantiation_desc(u32 *desc)
+{
+	u32 *jump_cmd;
+
+	init_job_desc(desc, 0);
+
+	/* INIT RNG in non-test mode */
+	append_operation(desc, OP_TYPE_CLASS1_ALG | OP_ALG_ALGSEL_RNG |
+			 OP_ALG_AS_INIT);
+
+	/* wait for done */
+	jump_cmd = append_jump(desc, JUMP_CLASS_CLASS1);
+	set_jump_tgt_here(desc, jump_cmd);
+
+	/*
+	 * load 1 to clear written reg:
+	 * resets the done interrrupt and returns the RNG to idle.
+	 */
+	append_load_imm_u32(desc, 1, LDST_SRCDST_WORD_CLRW);
+
+	/* generate secure keys (non-test) */
+	append_operation(desc, OP_TYPE_CLASS1_ALG | OP_ALG_ALGSEL_RNG |
+			 OP_ALG_RNG4_SK);
+}
+
+struct instantiate_result {
+	struct completion completion;
+	int err;
+};
+
+static void rng4_init_done(struct device *dev, u32 *desc, u32 err,
+			   void *context)
+{
+	struct instantiate_result *instantiation = context;
+
+	if (err) {
+		char tmp[CAAM_ERROR_STR_MAX];
+
+		dev_err(dev, "%08x: %s\n", err, caam_jr_strstatus(tmp, err));
+	}
+
+	instantiation->err = err;
+	complete(&instantiation->completion);
+}
+
+static int instantiate_rng(struct device *jrdev)
+{
+	struct instantiate_result instantiation;
+
+	dma_addr_t desc_dma;
+	u32 *desc;
+	int ret;
+
+	desc = kmalloc(CAAM_CMD_SZ * 6, GFP_KERNEL | GFP_DMA);
+	if (!desc) {
+		dev_err(jrdev, "cannot allocate RNG init descriptor memory\n");
+		return -ENOMEM;
+	}
+
+	build_instantiation_desc(desc);
+	desc_dma = dma_map_single(jrdev, desc, desc_bytes(desc), DMA_TO_DEVICE);
+	init_completion(&instantiation.completion);
+	ret = caam_jr_enqueue(jrdev, desc, rng4_init_done, &instantiation);
+	if (!ret) {
+		wait_for_completion_interruptible(&instantiation.completion);
+		ret = instantiation.err;
+		if (ret)
+			dev_err(jrdev, "unable to instantiate RNG\n");
+	}
+
+	dma_unmap_single(jrdev, desc_dma, desc_bytes(desc), DMA_TO_DEVICE);
+
+	kfree(desc);
+
+	return ret;
+}
+
+/*
+ * By default, the TRNG runs for 200 clocks per sample;
+ * 800 clocks per sample generates better entropy.
+ */
+static void kick_trng(struct platform_device *pdev)
+{
+	struct device *ctrldev = &pdev->dev;
+	struct caam_drv_private *ctrlpriv = dev_get_drvdata(ctrldev);
+	struct caam_full __iomem *topregs;
+	struct rng4tst __iomem *r4tst;
+	u32 val;
+
+	topregs = (struct caam_full __iomem *)ctrlpriv->ctrl;
+	r4tst = &topregs->ctrl.r4tst[0];
+
+	/* put RNG4 into program mode */
+	setbits32(&r4tst->rtmctl, RTMCTL_PRGM);
+	/* 800 clocks per sample */
+	val = rd_reg32(&r4tst->rtsdctl);
+	val = (val & ~RTSDCTL_ENT_DLY_MASK) | (800 << RTSDCTL_ENT_DLY_SHIFT);
+	wr_reg32(&r4tst->rtsdctl, val);
+	/* min. freq. count */
+	wr_reg32(&r4tst->rtfrqmin, 400);
+	/* max. freq. count */
+	wr_reg32(&r4tst->rtfrqmax, 6400);
+	/* put RNG4 into run mode */
+	clrbits32(&r4tst->rtmctl, RTMCTL_PRGM);
+}
+
+/**
+ * caam_get_era() - Return the ERA of the SEC on SoC, based
+ * on the SEC_VID register.
+ * Returns the ERA number (1..4) or -ENOTSUPP if the ERA is unknown.
+ * @caam_id - the value of the SEC_VID register
+ **/
+int caam_get_era(u64 caam_id)
+{
+	struct sec_vid *sec_vid = (struct sec_vid *)&caam_id;
+	static const struct {
+		u16 ip_id;
+		u8 maj_rev;
+		u8 era;
+	} caam_eras[] = {
+		{0x0A10, 1, 1},
+		{0x0A10, 2, 2},
+		{0x0A12, 1, 3},
+		{0x0A14, 1, 3},
+		{0x0A14, 2, 4},
+		{0x0A16, 1, 4},
+		{0x0A11, 1, 4}
+	};
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(caam_eras); i++)
+		if (caam_eras[i].ip_id == sec_vid->ip_id &&
+			caam_eras[i].maj_rev == sec_vid->maj_rev)
+				return caam_eras[i].era;
+
+	return -ENOTSUPP;
+}
+EXPORT_SYMBOL(caam_get_era);
+
 /* Probe routine for CAAM top (controller) level */
 static int caam_probe(struct platform_device *pdev)
 {
-	int ring, rspec;
+	int ret, ring, rspec;
+	u64 caam_id;
 	struct device *dev;
 	struct device_node *nprop, *np;
 	struct caam_ctrl __iomem *ctrl;
@@ -82,13 +229,18 @@ static int caam_probe(struct platform_device *pdev)
 
 	/*
 	 * Enable DECO watchdogs and, if this is a PHYS_ADDR_T_64BIT kernel,
-	 * 36-bit pointers in master configuration register
+	 * long pointers in master configuration register
 	 */
 	setbits32(&topregs->ctrl.mcr, MCFGR_WDENABLE |
 		  (sizeof(dma_addr_t) == sizeof(u64) ? MCFGR_LONG_PTR : 0));
 
 	if (sizeof(dma_addr_t) == sizeof(u64))
-		dma_set_mask(dev, DMA_BIT_MASK(36));
+		if (of_device_is_compatible(nprop, "fsl,sec-v5.0"))
+			dma_set_mask(dev, DMA_BIT_MASK(40));
+		else
+			dma_set_mask(dev, DMA_BIT_MASK(36));
+	else
+		dma_set_mask(dev, DMA_BIT_MASK(32));
 
 	/*
 	 * Detect and enable JobRs
@@ -141,14 +293,29 @@ static int caam_probe(struct platform_device *pdev)
 		return -ENOMEM;
 	}
 
+	/*
+	 * RNG4 based SECs (v5+) need special initialization prior
+	 * to executing any descriptors
+	 */
+	if (of_device_is_compatible(nprop, "fsl,sec-v5.0")) {
+		kick_trng(pdev);
+		ret = instantiate_rng(ctrlpriv->jrdev[0]);
+		if (ret) {
+			caam_remove(pdev);
+			return ret;
+		}
+	}
+
 	/* NOTE: RTIC detection ought to go here, around Si time */
 
 	/* Initialize queue allocator lock */
 	spin_lock_init(&ctrlpriv->jr_alloc_lock);
 
+	caam_id = rd_reg64(&topregs->ctrl.perfmon.caam_id);
+
 	/* Report "alive" for developer to see */
-	dev_info(dev, "device ID = 0x%016llx\n",
-		 rd_reg64(&topregs->ctrl.perfmon.caam_id));
+	dev_info(dev, "device ID = 0x%016llx (Era %d)\n", caam_id,
+		 caam_get_era(caam_id));
 	dev_info(dev, "job rings = %d, qi = %d\n",
 		 ctrlpriv->total_jobrs, ctrlpriv->qi_present);
 
diff --git a/drivers/crypto/caam/ctrl.h b/drivers/crypto/caam/ctrl.h
new file mode 100644
index 0000000000000000000000000000000000000000..980d44eaaf4065691eadd2538f530ed4ec25fd57
--- /dev/null
+++ b/drivers/crypto/caam/ctrl.h
@@ -0,0 +1,13 @@
+/*
+ * CAAM control-plane driver backend public-level include definitions
+ *
+ * Copyright 2012 Freescale Semiconductor, Inc.
+ */
+
+#ifndef CTRL_H
+#define CTRL_H
+
+/* Prototypes for backend-level services exposed to APIs */
+int caam_get_era(u64 caam_id);
+
+#endif /* CTRL_H */
diff --git a/drivers/crypto/caam/desc.h b/drivers/crypto/caam/desc.h
index a17c2958dab1eed5c2cd3d7a5a715085ce39f398..f7f833be8c6708792fbe47b9bca901d73939bcad 100644
--- a/drivers/crypto/caam/desc.h
+++ b/drivers/crypto/caam/desc.h
@@ -8,6 +8,16 @@
 #ifndef DESC_H
 #define DESC_H
 
+struct sec4_sg_entry {
+	u64 ptr;
+#define SEC4_SG_LEN_FIN 0x40000000
+#define SEC4_SG_LEN_EXT 0x80000000
+	u32 len;
+	u8 reserved;
+	u8 buf_pool_id;
+	u16 offset;
+};
+
 /* Max size of any CAAM descriptor in 32-bit words, inclusive of header */
 #define MAX_CAAM_DESCSIZE	64
 
@@ -1162,6 +1172,11 @@
 #define OP_ALG_AAI_GSM		(0x10 << OP_ALG_AAI_SHIFT)
 #define OP_ALG_AAI_EDGE		(0x20 << OP_ALG_AAI_SHIFT)
 
+/* RNG4 set */
+#define OP_ALG_RNG4_SHIFT	4
+#define OP_ALG_RNG4_MASK	(0x1f3 << OP_ALG_RNG4_SHIFT)
+
+#define OP_ALG_RNG4_SK		(0x100 << OP_ALG_RNG4_SHIFT)
 
 #define OP_ALG_AS_SHIFT		2
 #define OP_ALG_AS_MASK		(0x3 << OP_ALG_AS_SHIFT)
@@ -1585,20 +1600,4 @@
 #define NFIFOENTRY_PLEN_SHIFT	0
 #define NFIFOENTRY_PLEN_MASK	(0xFF << NFIFOENTRY_PLEN_SHIFT)
 
-/*
- * PDB internal definitions
- */
-
-/* IPSec ESP CBC Encap/Decap Options */
-#define PDBOPTS_ESPCBC_ARSNONE	0x00	/* no antireplay window	*/
-#define PDBOPTS_ESPCBC_ARS32	0x40	/* 32-entry antireplay window */
-#define PDBOPTS_ESPCBC_ARS64	0xc0	/* 64-entry antireplay window */
-#define PDBOPTS_ESPCBC_IVSRC	0x20	/* IV comes from internal random gen */
-#define PDBOPTS_ESPCBC_ESN	0x10	/* extended sequence included */
-#define PDBOPTS_ESPCBC_OUTFMT	0x08	/* output only decapsulation (decap) */
-#define PDBOPTS_ESPCBC_IPHDRSRC 0x08	/* IP header comes from PDB (encap) */
-#define PDBOPTS_ESPCBC_INCIPHDR 0x04	/* Prepend IP header to output frame */
-#define PDBOPTS_ESPCBC_IPVSN	0x02	/* process IPv6 header */
-#define PDBOPTS_ESPCBC_TUNNEL	0x01	/* tunnel mode next-header byte */
-
 #endif /* DESC_H */
diff --git a/drivers/crypto/caam/desc_constr.h b/drivers/crypto/caam/desc_constr.h
index 348b882275f059ede78df5d3ecf82b447c8e847e..c85c1f0584012cfc35c3ba7784e056e6d64ef855 100644
--- a/drivers/crypto/caam/desc_constr.h
+++ b/drivers/crypto/caam/desc_constr.h
@@ -1,7 +1,7 @@
 /*
  * caam descriptor construction helper functions
  *
- * Copyright 2008-2011 Freescale Semiconductor, Inc.
+ * Copyright 2008-2012 Freescale Semiconductor, Inc.
  */
 
 #include "desc.h"
@@ -51,7 +51,7 @@ static inline void *sh_desc_pdb(u32 *desc)
 
 static inline void init_desc(u32 *desc, u32 options)
 {
-	*desc = options | HDR_ONE | 1;
+	*desc = (options | HDR_ONE) + 1;
 }
 
 static inline void init_sh_desc(u32 *desc, u32 options)
@@ -62,9 +62,9 @@ static inline void init_sh_desc(u32 *desc, u32 options)
 
 static inline void init_sh_desc_pdb(u32 *desc, u32 options, size_t pdb_bytes)
 {
-	u32 pdb_len = pdb_bytes / CAAM_CMD_SZ + 1;
+	u32 pdb_len = (pdb_bytes + CAAM_CMD_SZ - 1) / CAAM_CMD_SZ;
 
-	init_sh_desc(desc, ((pdb_len << HDR_START_IDX_SHIFT) + pdb_len) |
+	init_sh_desc(desc, (((pdb_len + 1) << HDR_START_IDX_SHIFT) + pdb_len) |
 		     options);
 }
 
@@ -117,6 +117,15 @@ static inline void append_cmd_ptr(u32 *desc, dma_addr_t ptr, int len,
 	append_ptr(desc, ptr);
 }
 
+/* Write length after pointer, rather than inside command */
+static inline void append_cmd_ptr_extlen(u32 *desc, dma_addr_t ptr,
+					 unsigned int len, u32 command)
+{
+	append_cmd(desc, command);
+	append_ptr(desc, ptr);
+	append_cmd(desc, len);
+}
+
 static inline void append_cmd_data(u32 *desc, void *data, int len,
 				   u32 command)
 {
@@ -166,13 +175,22 @@ static inline void append_##cmd(u32 *desc, dma_addr_t ptr, unsigned int len, \
 	append_cmd_ptr(desc, ptr, len, CMD_##op | options); \
 }
 APPEND_CMD_PTR(key, KEY)
-APPEND_CMD_PTR(seq_in_ptr, SEQ_IN_PTR)
-APPEND_CMD_PTR(seq_out_ptr, SEQ_OUT_PTR)
 APPEND_CMD_PTR(load, LOAD)
 APPEND_CMD_PTR(store, STORE)
 APPEND_CMD_PTR(fifo_load, FIFO_LOAD)
 APPEND_CMD_PTR(fifo_store, FIFO_STORE)
 
+#define APPEND_SEQ_PTR_INTLEN(cmd, op) \
+static inline void append_seq_##cmd##_ptr_intlen(u32 *desc, dma_addr_t ptr, \
+						 unsigned int len, \
+						 u32 options) \
+{ \
+	PRINT_POS; \
+	append_cmd_ptr(desc, ptr, len, CMD_SEQ_##op##_PTR | options); \
+}
+APPEND_SEQ_PTR_INTLEN(in, IN)
+APPEND_SEQ_PTR_INTLEN(out, OUT)
+
 #define APPEND_CMD_PTR_TO_IMM(cmd, op) \
 static inline void append_##cmd##_as_imm(u32 *desc, void *data, \
 					 unsigned int len, u32 options) \
@@ -183,6 +201,33 @@ static inline void append_##cmd##_as_imm(u32 *desc, void *data, \
 APPEND_CMD_PTR_TO_IMM(load, LOAD);
 APPEND_CMD_PTR_TO_IMM(fifo_load, FIFO_LOAD);
 
+#define APPEND_CMD_PTR_EXTLEN(cmd, op) \
+static inline void append_##cmd##_extlen(u32 *desc, dma_addr_t ptr, \
+					 unsigned int len, u32 options) \
+{ \
+	PRINT_POS; \
+	append_cmd_ptr_extlen(desc, ptr, len, CMD_##op | SQIN_EXT | options); \
+}
+APPEND_CMD_PTR_EXTLEN(seq_in_ptr, SEQ_IN_PTR)
+APPEND_CMD_PTR_EXTLEN(seq_out_ptr, SEQ_OUT_PTR)
+
+/*
+ * Determine whether to store length internally or externally depending on
+ * the size of its type
+ */
+#define APPEND_CMD_PTR_LEN(cmd, op, type) \
+static inline void append_##cmd(u32 *desc, dma_addr_t ptr, \
+				type len, u32 options) \
+{ \
+	PRINT_POS; \
+	if (sizeof(type) > sizeof(u16)) \
+		append_##cmd##_extlen(desc, ptr, len, options); \
+	else \
+		append_##cmd##_intlen(desc, ptr, len, options); \
+}
+APPEND_CMD_PTR_LEN(seq_in_ptr, SEQ_IN_PTR, u32)
+APPEND_CMD_PTR_LEN(seq_out_ptr, SEQ_OUT_PTR, u32)
+
 /*
  * 2nd variant for commands whose specified immediate length differs
  * from length of immediate data provided, e.g., split keys
diff --git a/drivers/crypto/caam/error.c b/drivers/crypto/caam/error.c
index 7e2d54bffad691448e6ab5daf60a07f596cc6ac5..9955ed9643e6dc7d37cae84bab63563ef866bff6 100644
--- a/drivers/crypto/caam/error.c
+++ b/drivers/crypto/caam/error.c
@@ -39,18 +39,20 @@ static void report_ccb_status(u32 status, char *outstr)
 	char *cha_id_list[] = {
 		"",
 		"AES",
-		"DES, 3DES",
+		"DES",
 		"ARC4",
-		"MD5, SHA-1, SH-224, SHA-256, SHA-384, SHA-512",
+		"MDHA",
 		"RNG",
 		"SNOW f8",
-		"Kasumi f8, f9",
-		"All Public Key Algorithms",
-		"CRC",
+		"Kasumi f8/9",
+		"PKHA",
+		"CRCA",
 		"SNOW f9",
+		"ZUCE",
+		"ZUCA",
 	};
 	char *err_id_list[] = {
-		"None. No error.",
+		"No error.",
 		"Mode error.",
 		"Data size error.",
 		"Key size error.",
@@ -67,6 +69,20 @@ static void report_ccb_status(u32 status, char *outstr)
 		"Invalid CHA combination was selected",
 		"Invalid CHA selected.",
 	};
+	char *rng_err_id_list[] = {
+		"",
+		"",
+		"",
+		"Instantiate",
+		"Not instantiated",
+		"Test instantiate",
+		"Prediction resistance",
+		"",
+		"Prediction resistance and test request",
+		"Uninstantiate",
+		"",
+		"Secure key generation",
+	};
 	u8 cha_id = (status & JRSTA_CCBERR_CHAID_MASK) >>
 		    JRSTA_CCBERR_CHAID_SHIFT;
 	u8 err_id = status & JRSTA_CCBERR_ERRID_MASK;
@@ -81,7 +97,13 @@ static void report_ccb_status(u32 status, char *outstr)
 			   cha_id, sizeof("ff"));
 	}
 
-	if (err_id < ARRAY_SIZE(err_id_list)) {
+	if ((cha_id << JRSTA_CCBERR_CHAID_SHIFT) == JRSTA_CCBERR_CHAID_RNG &&
+	    err_id < ARRAY_SIZE(rng_err_id_list) &&
+	    strlen(rng_err_id_list[err_id])) {
+		/* RNG-only error */
+		SPRINTFCAT(outstr, "%s", rng_err_id_list[err_id],
+			   strlen(rng_err_id_list[err_id]));
+	} else if (err_id < ARRAY_SIZE(err_id_list)) {
 		SPRINTFCAT(outstr, "%s", err_id_list[err_id],
 			   strlen(err_id_list[err_id]));
 	} else {
@@ -101,10 +123,10 @@ static void report_deco_status(u32 status, char *outstr)
 		u8 value;
 		char *error_text;
 	} desc_error_list[] = {
-		{ 0x00, "None. No error." },
+		{ 0x00, "No error." },
 		{ 0x01, "SGT Length Error. The descriptor is trying to read "
 			"more data than is contained in the SGT table." },
-		{ 0x02, "Reserved." },
+		{ 0x02, "SGT Null Entry Error." },
 		{ 0x03, "Job Ring Control Error. There is a bad value in the "
 			"Job Ring Control register." },
 		{ 0x04, "Invalid Descriptor Command. The Descriptor Command "
@@ -116,7 +138,7 @@ static void report_deco_status(u32 status, char *outstr)
 		{ 0x09, "Invalid OPERATION Command" },
 		{ 0x0A, "Invalid FIFO LOAD Command" },
 		{ 0x0B, "Invalid FIFO STORE Command" },
-		{ 0x0C, "Invalid MOVE Command" },
+		{ 0x0C, "Invalid MOVE/MOVE_LEN Command" },
 		{ 0x0D, "Invalid JUMP Command. A nonlocal JUMP Command is "
 			"invalid because the target is not a Job Header "
 			"Command, or the jump is from a Trusted Descriptor to "
@@ -166,6 +188,8 @@ static void report_deco_status(u32 status, char *outstr)
 			"(input frame; block ciphers) and IPsec decap (output "
 			"frame, when doing the next header byte update) and "
 			"DCRC (output frame)." },
+		{ 0x23, "Read Input Frame error" },
+		{ 0x24, "JDKEK, TDKEK or TDSK not loaded error" },
 		{ 0x80, "DNR (do not run) error" },
 		{ 0x81, "undefined protocol command" },
 		{ 0x82, "invalid setting in PDB" },
diff --git a/drivers/crypto/caam/intern.h b/drivers/crypto/caam/intern.h
index a34be01b0b293941b3e5cae998664d615dee92ef..5cd4c1b268a1ec18c4d81d3eee0004a59eba5a36 100644
--- a/drivers/crypto/caam/intern.h
+++ b/drivers/crypto/caam/intern.h
@@ -43,7 +43,7 @@ struct caam_drv_private_jr {
 	struct device *parentdev;	/* points back to controller dev */
 	int ridx;
 	struct caam_job_ring __iomem *rregs;	/* JobR's register space */
-	struct tasklet_struct irqtask[NR_CPUS];
+	struct tasklet_struct irqtask;
 	int irq;			/* One per queue */
 	int assign;			/* busy/free */
 
@@ -86,10 +86,10 @@ struct caam_drv_private {
 
 	/* which jr allocated to scatterlist crypto */
 	atomic_t tfm_count ____cacheline_aligned;
-	int num_jrs_for_algapi;
-	struct device **algapi_jr;
 	/* list of registered crypto algorithms (mk generic context handle?) */
 	struct list_head alg_list;
+	/* list of registered hash algorithms (mk generic context handle?) */
+	struct list_head hash_list;
 
 	/*
 	 * debugfs entries for developer view into driver/device
diff --git a/drivers/crypto/caam/jr.c b/drivers/crypto/caam/jr.c
index 340fa322c0f0185be546fddffee9ada6f75ed333..53c8c51d58817cb62b4641d5e923bb4f72b39f2f 100644
--- a/drivers/crypto/caam/jr.c
+++ b/drivers/crypto/caam/jr.c
@@ -2,7 +2,7 @@
  * CAAM/SEC 4.x transport/backend driver
  * JobR backend functionality
  *
- * Copyright 2008-2011 Freescale Semiconductor, Inc.
+ * Copyright 2008-2012 Freescale Semiconductor, Inc.
  */
 
 #include "compat.h"
@@ -43,7 +43,7 @@ static irqreturn_t caam_jr_interrupt(int irq, void *st_dev)
 	wr_reg32(&jrp->rregs->jrintstatus, irqstate);
 
 	preempt_disable();
-	tasklet_schedule(&jrp->irqtask[smp_processor_id()]);
+	tasklet_schedule(&jrp->irqtask);
 	preempt_enable();
 
 	return IRQ_HANDLED;
@@ -58,17 +58,16 @@ static void caam_jr_dequeue(unsigned long devarg)
 	void (*usercall)(struct device *dev, u32 *desc, u32 status, void *arg);
 	u32 *userdesc, userstatus;
 	void *userarg;
-	unsigned long flags;
 
-	spin_lock_irqsave(&jrp->outlock, flags);
+	while (rd_reg32(&jrp->rregs->outring_used)) {
 
-	head = ACCESS_ONCE(jrp->head);
-	sw_idx = tail = jrp->tail;
+		head = ACCESS_ONCE(jrp->head);
 
-	while (CIRC_CNT(head, tail, JOBR_DEPTH) >= 1 &&
-	       rd_reg32(&jrp->rregs->outring_used)) {
+		spin_lock_bh(&jrp->outlock);
 
+		sw_idx = tail = jrp->tail;
 		hw_idx = jrp->out_ring_read_index;
+
 		for (i = 0; CIRC_CNT(head, tail + i, JOBR_DEPTH) >= 1; i++) {
 			sw_idx = (tail + i) & (JOBR_DEPTH - 1);
 
@@ -95,7 +94,8 @@ static void caam_jr_dequeue(unsigned long devarg)
 		userdesc = jrp->entinfo[sw_idx].desc_addr_virt;
 		userstatus = jrp->outring[hw_idx].jrstatus;
 
-		smp_mb();
+		/* set done */
+		wr_reg32(&jrp->rregs->outring_rmvd, 1);
 
 		jrp->out_ring_read_index = (jrp->out_ring_read_index + 1) &
 					   (JOBR_DEPTH - 1);
@@ -115,22 +115,12 @@ static void caam_jr_dequeue(unsigned long devarg)
 			jrp->tail = tail;
 		}
 
-		/* set done */
-		wr_reg32(&jrp->rregs->outring_rmvd, 1);
-
-		spin_unlock_irqrestore(&jrp->outlock, flags);
+		spin_unlock_bh(&jrp->outlock);
 
 		/* Finally, execute user's callback */
 		usercall(dev, userdesc, userstatus, userarg);
-
-		spin_lock_irqsave(&jrp->outlock, flags);
-
-		head = ACCESS_ONCE(jrp->head);
-		sw_idx = tail = jrp->tail;
 	}
 
-	spin_unlock_irqrestore(&jrp->outlock, flags);
-
 	/* reenable / unmask IRQs */
 	clrbits32(&jrp->rregs->rconfig_lo, JRCFG_IMSK);
 }
@@ -148,23 +138,22 @@ int caam_jr_register(struct device *ctrldev, struct device **rdev)
 {
 	struct caam_drv_private *ctrlpriv = dev_get_drvdata(ctrldev);
 	struct caam_drv_private_jr *jrpriv = NULL;
-	unsigned long flags;
 	int ring;
 
 	/* Lock, if free ring - assign, unlock */
-	spin_lock_irqsave(&ctrlpriv->jr_alloc_lock, flags);
+	spin_lock(&ctrlpriv->jr_alloc_lock);
 	for (ring = 0; ring < ctrlpriv->total_jobrs; ring++) {
 		jrpriv = dev_get_drvdata(ctrlpriv->jrdev[ring]);
 		if (jrpriv->assign == JOBR_UNASSIGNED) {
 			jrpriv->assign = JOBR_ASSIGNED;
 			*rdev = ctrlpriv->jrdev[ring];
-			spin_unlock_irqrestore(&ctrlpriv->jr_alloc_lock, flags);
+			spin_unlock(&ctrlpriv->jr_alloc_lock);
 			return ring;
 		}
 	}
 
 	/* If assigned, write dev where caller needs it */
-	spin_unlock_irqrestore(&ctrlpriv->jr_alloc_lock, flags);
+	spin_unlock(&ctrlpriv->jr_alloc_lock);
 	*rdev = NULL;
 
 	return -ENODEV;
@@ -182,7 +171,6 @@ int caam_jr_deregister(struct device *rdev)
 {
 	struct caam_drv_private_jr *jrpriv = dev_get_drvdata(rdev);
 	struct caam_drv_private *ctrlpriv;
-	unsigned long flags;
 
 	/* Get the owning controller's private space */
 	ctrlpriv = dev_get_drvdata(jrpriv->parentdev);
@@ -195,9 +183,9 @@ int caam_jr_deregister(struct device *rdev)
 		return -EBUSY;
 
 	/* Release ring */
-	spin_lock_irqsave(&ctrlpriv->jr_alloc_lock, flags);
+	spin_lock(&ctrlpriv->jr_alloc_lock);
 	jrpriv->assign = JOBR_UNASSIGNED;
-	spin_unlock_irqrestore(&ctrlpriv->jr_alloc_lock, flags);
+	spin_unlock(&ctrlpriv->jr_alloc_lock);
 
 	return 0;
 }
@@ -238,7 +226,6 @@ int caam_jr_enqueue(struct device *dev, u32 *desc,
 {
 	struct caam_drv_private_jr *jrp = dev_get_drvdata(dev);
 	struct caam_jrentry_info *head_entry;
-	unsigned long flags;
 	int head, tail, desc_size;
 	dma_addr_t desc_dma;
 
@@ -249,14 +236,14 @@ int caam_jr_enqueue(struct device *dev, u32 *desc,
 		return -EIO;
 	}
 
-	spin_lock_irqsave(&jrp->inplock, flags);
+	spin_lock(&jrp->inplock);
 
 	head = jrp->head;
 	tail = ACCESS_ONCE(jrp->tail);
 
 	if (!rd_reg32(&jrp->rregs->inpring_avail) ||
 	    CIRC_SPACE(head, tail, JOBR_DEPTH) <= 0) {
-		spin_unlock_irqrestore(&jrp->inplock, flags);
+		spin_unlock(&jrp->inplock);
 		dma_unmap_single(dev, desc_dma, desc_size, DMA_TO_DEVICE);
 		return -EBUSY;
 	}
@@ -276,11 +263,9 @@ int caam_jr_enqueue(struct device *dev, u32 *desc,
 				    (JOBR_DEPTH - 1);
 	jrp->head = (head + 1) & (JOBR_DEPTH - 1);
 
-	wmb();
-
 	wr_reg32(&jrp->rregs->inpring_jobadd, 1);
 
-	spin_unlock_irqrestore(&jrp->inplock, flags);
+	spin_unlock(&jrp->inplock);
 
 	return 0;
 }
@@ -337,11 +322,9 @@ static int caam_jr_init(struct device *dev)
 
 	jrp = dev_get_drvdata(dev);
 
-	/* Connect job ring interrupt handler. */
-	for_each_possible_cpu(i)
-		tasklet_init(&jrp->irqtask[i], caam_jr_dequeue,
-			     (unsigned long)dev);
+	tasklet_init(&jrp->irqtask, caam_jr_dequeue, (unsigned long)dev);
 
+	/* Connect job ring interrupt handler. */
 	error = request_irq(jrp->irq, caam_jr_interrupt, IRQF_SHARED,
 			    "caam-jobr", dev);
 	if (error) {
@@ -356,10 +339,11 @@ static int caam_jr_init(struct device *dev)
 	if (error)
 		return error;
 
-	jrp->inpring = kzalloc(sizeof(dma_addr_t) * JOBR_DEPTH,
-			       GFP_KERNEL | GFP_DMA);
-	jrp->outring = kzalloc(sizeof(struct jr_outentry) *
-			       JOBR_DEPTH, GFP_KERNEL | GFP_DMA);
+	jrp->inpring = dma_alloc_coherent(dev, sizeof(dma_addr_t) * JOBR_DEPTH,
+					  &inpbusaddr, GFP_KERNEL);
+
+	jrp->outring = dma_alloc_coherent(dev, sizeof(struct jr_outentry) *
+					  JOBR_DEPTH, &outbusaddr, GFP_KERNEL);
 
 	jrp->entinfo = kzalloc(sizeof(struct caam_jrentry_info) * JOBR_DEPTH,
 			       GFP_KERNEL);
@@ -375,31 +359,6 @@ static int caam_jr_init(struct device *dev)
 		jrp->entinfo[i].desc_addr_dma = !0;
 
 	/* Setup rings */
-	inpbusaddr = dma_map_single(dev, jrp->inpring,
-				    sizeof(u32 *) * JOBR_DEPTH,
-				    DMA_BIDIRECTIONAL);
-	if (dma_mapping_error(dev, inpbusaddr)) {
-		dev_err(dev, "caam_jr_init(): can't map input ring\n");
-		kfree(jrp->inpring);
-		kfree(jrp->outring);
-		kfree(jrp->entinfo);
-		return -EIO;
-	}
-
-	outbusaddr = dma_map_single(dev, jrp->outring,
-				    sizeof(struct jr_outentry) * JOBR_DEPTH,
-				    DMA_BIDIRECTIONAL);
-	if (dma_mapping_error(dev, outbusaddr)) {
-		dev_err(dev, "caam_jr_init(): can't map output ring\n");
-			dma_unmap_single(dev, inpbusaddr,
-					 sizeof(u32 *) * JOBR_DEPTH,
-					 DMA_BIDIRECTIONAL);
-		kfree(jrp->inpring);
-		kfree(jrp->outring);
-		kfree(jrp->entinfo);
-		return -EIO;
-	}
-
 	jrp->inp_ring_write_index = 0;
 	jrp->out_ring_read_index = 0;
 	jrp->head = 0;
@@ -431,12 +390,11 @@ int caam_jr_shutdown(struct device *dev)
 {
 	struct caam_drv_private_jr *jrp = dev_get_drvdata(dev);
 	dma_addr_t inpbusaddr, outbusaddr;
-	int ret, i;
+	int ret;
 
 	ret = caam_reset_hw_jr(dev);
 
-	for_each_possible_cpu(i)
-		tasklet_kill(&jrp->irqtask[i]);
+	tasklet_kill(&jrp->irqtask);
 
 	/* Release interrupt */
 	free_irq(jrp->irq, dev);
@@ -444,13 +402,10 @@ int caam_jr_shutdown(struct device *dev)
 	/* Free rings */
 	inpbusaddr = rd_reg64(&jrp->rregs->inpring_base);
 	outbusaddr = rd_reg64(&jrp->rregs->outring_base);
-	dma_unmap_single(dev, outbusaddr,
-			 sizeof(struct jr_outentry) * JOBR_DEPTH,
-			 DMA_BIDIRECTIONAL);
-	dma_unmap_single(dev, inpbusaddr, sizeof(u32 *) * JOBR_DEPTH,
-			 DMA_BIDIRECTIONAL);
-	kfree(jrp->outring);
-	kfree(jrp->inpring);
+	dma_free_coherent(dev, sizeof(dma_addr_t) * JOBR_DEPTH,
+			  jrp->inpring, inpbusaddr);
+	dma_free_coherent(dev, sizeof(struct jr_outentry) * JOBR_DEPTH,
+			  jrp->outring, outbusaddr);
 	kfree(jrp->entinfo);
 
 	return ret;
@@ -503,6 +458,14 @@ int caam_jr_probe(struct platform_device *pdev, struct device_node *np,
 	dev_set_drvdata(jrdev, jrpriv);
 	ctrlpriv->jrdev[ring] = jrdev;
 
+	if (sizeof(dma_addr_t) == sizeof(u64))
+		if (of_device_is_compatible(np, "fsl,sec-v5.0-job-ring"))
+			dma_set_mask(jrdev, DMA_BIT_MASK(40));
+		else
+			dma_set_mask(jrdev, DMA_BIT_MASK(36));
+	else
+		dma_set_mask(jrdev, DMA_BIT_MASK(32));
+
 	/* Identify the interrupt */
 	jrpriv->irq = of_irq_to_resource(np, 0, NULL);
 
diff --git a/drivers/crypto/caam/key_gen.c b/drivers/crypto/caam/key_gen.c
new file mode 100644
index 0000000000000000000000000000000000000000..002888185f170e92fa798233a40c64ef2258d42c
--- /dev/null
+++ b/drivers/crypto/caam/key_gen.c
@@ -0,0 +1,122 @@
+/*
+ * CAAM/SEC 4.x functions for handling key-generation jobs
+ *
+ * Copyright 2008-2011 Freescale Semiconductor, Inc.
+ *
+ */
+#include "compat.h"
+#include "jr.h"
+#include "error.h"
+#include "desc_constr.h"
+#include "key_gen.h"
+
+void split_key_done(struct device *dev, u32 *desc, u32 err,
+			   void *context)
+{
+	struct split_key_result *res = context;
+
+#ifdef DEBUG
+	dev_err(dev, "%s %d: err 0x%x\n", __func__, __LINE__, err);
+#endif
+
+	if (err) {
+		char tmp[CAAM_ERROR_STR_MAX];
+
+		dev_err(dev, "%08x: %s\n", err, caam_jr_strstatus(tmp, err));
+	}
+
+	res->err = err;
+
+	complete(&res->completion);
+}
+EXPORT_SYMBOL(split_key_done);
+/*
+get a split ipad/opad key
+
+Split key generation-----------------------------------------------
+
+[00] 0xb0810008    jobdesc: stidx=1 share=never len=8
+[01] 0x04000014        key: class2->keyreg len=20
+			@0xffe01000
+[03] 0x84410014  operation: cls2-op sha1 hmac init dec
+[04] 0x24940000     fifold: class2 msgdata-last2 len=0 imm
+[05] 0xa4000001       jump: class2 local all ->1 [06]
+[06] 0x64260028    fifostr: class2 mdsplit-jdk len=40
+			@0xffe04000
+*/
+u32 gen_split_key(struct device *jrdev, u8 *key_out, int split_key_len,
+		  int split_key_pad_len, const u8 *key_in, u32 keylen,
+		  u32 alg_op)
+{
+	u32 *desc;
+	struct split_key_result result;
+	dma_addr_t dma_addr_in, dma_addr_out;
+	int ret = 0;
+
+	desc = kmalloc(CAAM_CMD_SZ * 6 + CAAM_PTR_SZ * 2, GFP_KERNEL | GFP_DMA);
+
+	init_job_desc(desc, 0);
+
+	dma_addr_in = dma_map_single(jrdev, (void *)key_in, keylen,
+				     DMA_TO_DEVICE);
+	if (dma_mapping_error(jrdev, dma_addr_in)) {
+		dev_err(jrdev, "unable to map key input memory\n");
+		kfree(desc);
+		return -ENOMEM;
+	}
+	append_key(desc, dma_addr_in, keylen, CLASS_2 | KEY_DEST_CLASS_REG);
+
+	/* Sets MDHA up into an HMAC-INIT */
+	append_operation(desc, alg_op | OP_ALG_DECRYPT | OP_ALG_AS_INIT);
+
+	/*
+	 * do a FIFO_LOAD of zero, this will trigger the internal key expansion
+	 * into both pads inside MDHA
+	 */
+	append_fifo_load_as_imm(desc, NULL, 0, LDST_CLASS_2_CCB |
+				FIFOLD_TYPE_MSG | FIFOLD_TYPE_LAST2);
+
+	/*
+	 * FIFO_STORE with the explicit split-key content store
+	 * (0x26 output type)
+	 */
+	dma_addr_out = dma_map_single(jrdev, key_out, split_key_pad_len,
+				      DMA_FROM_DEVICE);
+	if (dma_mapping_error(jrdev, dma_addr_out)) {
+		dev_err(jrdev, "unable to map key output memory\n");
+		kfree(desc);
+		return -ENOMEM;
+	}
+	append_fifo_store(desc, dma_addr_out, split_key_len,
+			  LDST_CLASS_2_CCB | FIFOST_TYPE_SPLIT_KEK);
+
+#ifdef DEBUG
+	print_hex_dump(KERN_ERR, "ctx.key@"xstr(__LINE__)": ",
+		       DUMP_PREFIX_ADDRESS, 16, 4, key_in, keylen, 1);
+	print_hex_dump(KERN_ERR, "jobdesc@"xstr(__LINE__)": ",
+		       DUMP_PREFIX_ADDRESS, 16, 4, desc, desc_bytes(desc), 1);
+#endif
+
+	result.err = 0;
+	init_completion(&result.completion);
+
+	ret = caam_jr_enqueue(jrdev, desc, split_key_done, &result);
+	if (!ret) {
+		/* in progress */
+		wait_for_completion_interruptible(&result.completion);
+		ret = result.err;
+#ifdef DEBUG
+		print_hex_dump(KERN_ERR, "ctx.key@"xstr(__LINE__)": ",
+			       DUMP_PREFIX_ADDRESS, 16, 4, key_out,
+			       split_key_pad_len, 1);
+#endif
+	}
+
+	dma_unmap_single(jrdev, dma_addr_out, split_key_pad_len,
+			 DMA_FROM_DEVICE);
+	dma_unmap_single(jrdev, dma_addr_in, keylen, DMA_TO_DEVICE);
+
+	kfree(desc);
+
+	return ret;
+}
diff --git a/drivers/crypto/caam/key_gen.h b/drivers/crypto/caam/key_gen.h
new file mode 100644
index 0000000000000000000000000000000000000000..d95d290c6e8bd0016111bbe5e2ab97ed3dd87970
--- /dev/null
+++ b/drivers/crypto/caam/key_gen.h
@@ -0,0 +1,17 @@
+/*
+ * CAAM/SEC 4.x definitions for handling key-generation jobs
+ *
+ * Copyright 2008-2011 Freescale Semiconductor, Inc.
+ *
+ */
+
+struct split_key_result {
+	struct completion completion;
+	int err;
+};
+
+void split_key_done(struct device *dev, u32 *desc, u32 err, void *context);
+
+u32 gen_split_key(struct device *jrdev, u8 *key_out, int split_key_len,
+		    int split_key_pad_len, const u8 *key_in, u32 keylen,
+		    u32 alg_op);
diff --git a/drivers/crypto/caam/pdb.h b/drivers/crypto/caam/pdb.h
new file mode 100644
index 0000000000000000000000000000000000000000..62950d22ac13d65862b197b3e1959f3ba55ff05c
--- /dev/null
+++ b/drivers/crypto/caam/pdb.h
@@ -0,0 +1,401 @@
+/*
+ * CAAM Protocol Data Block (PDB) definition header file
+ *
+ * Copyright 2008-2012 Freescale Semiconductor, Inc.
+ *
+ */
+
+#ifndef CAAM_PDB_H
+#define CAAM_PDB_H
+
+/*
+ * PDB- IPSec ESP Header Modification Options
+ */
+#define PDBHMO_ESP_DECAP_SHIFT	12
+#define PDBHMO_ESP_ENCAP_SHIFT	4
+/*
+ * Encap and Decap - Decrement TTL (Hop Limit) - Based on the value of the
+ * Options Byte IP version (IPvsn) field:
+ * if IPv4, decrement the inner IP header TTL field (byte 8);
+ * if IPv6 decrement the inner IP header Hop Limit field (byte 7).
+*/
+#define PDBHMO_ESP_DECAP_DEC_TTL	(0x02 << PDBHMO_ESP_DECAP_SHIFT)
+#define PDBHMO_ESP_ENCAP_DEC_TTL	(0x02 << PDBHMO_ESP_ENCAP_SHIFT)
+/*
+ * Decap - DiffServ Copy - Copy the IPv4 TOS or IPv6 Traffic Class byte
+ * from the outer IP header to the inner IP header.
+ */
+#define PDBHMO_ESP_DIFFSERV		(0x01 << PDBHMO_ESP_DECAP_SHIFT)
+/*
+ * Encap- Copy DF bit -if an IPv4 tunnel mode outer IP header is coming from
+ * the PDB, copy the DF bit from the inner IP header to the outer IP header.
+ */
+#define PDBHMO_ESP_DFBIT		(0x04 << PDBHMO_ESP_ENCAP_SHIFT)
+
+/*
+ * PDB - IPSec ESP Encap/Decap Options
+ */
+#define PDBOPTS_ESP_ARSNONE	0x00 /* no antireplay window */
+#define PDBOPTS_ESP_ARS32	0x40 /* 32-entry antireplay window */
+#define PDBOPTS_ESP_ARS64	0xc0 /* 64-entry antireplay window */
+#define PDBOPTS_ESP_IVSRC	0x20 /* IV comes from internal random gen */
+#define PDBOPTS_ESP_ESN		0x10 /* extended sequence included */
+#define PDBOPTS_ESP_OUTFMT	0x08 /* output only decapsulation (decap) */
+#define PDBOPTS_ESP_IPHDRSRC	0x08 /* IP header comes from PDB (encap) */
+#define PDBOPTS_ESP_INCIPHDR	0x04 /* Prepend IP header to output frame */
+#define PDBOPTS_ESP_IPVSN	0x02 /* process IPv6 header */
+#define PDBOPTS_ESP_TUNNEL	0x01 /* tunnel mode next-header byte */
+#define PDBOPTS_ESP_IPV6	0x02 /* ip header version is V6 */
+#define PDBOPTS_ESP_DIFFSERV	0x40 /* copy TOS/TC from inner iphdr */
+#define PDBOPTS_ESP_UPDATE_CSUM 0x80 /* encap-update ip header checksum */
+#define PDBOPTS_ESP_VERIFY_CSUM 0x20 /* decap-validate ip header checksum */
+
+/*
+ * General IPSec encap/decap PDB definitions
+ */
+struct ipsec_encap_cbc {
+	u32 iv[4];
+};
+
+struct ipsec_encap_ctr {
+	u32 ctr_nonce;
+	u32 ctr_initial;
+	u32 iv[2];
+};
+
+struct ipsec_encap_ccm {
+	u32 salt; /* lower 24 bits */
+	u8 b0_flags;
+	u8 ctr_flags;
+	u16 ctr_initial;
+	u32 iv[2];
+};
+
+struct ipsec_encap_gcm {
+	u32 salt; /* lower 24 bits */
+	u32 rsvd1;
+	u32 iv[2];
+};
+
+struct ipsec_encap_pdb {
+	u8 hmo_rsvd;
+	u8 ip_nh;
+	u8 ip_nh_offset;
+	u8 options;
+	u32 seq_num_ext_hi;
+	u32 seq_num;
+	union {
+		struct ipsec_encap_cbc cbc;
+		struct ipsec_encap_ctr ctr;
+		struct ipsec_encap_ccm ccm;
+		struct ipsec_encap_gcm gcm;
+	};
+	u32 spi;
+	u16 rsvd1;
+	u16 ip_hdr_len;
+	u32 ip_hdr[0]; /* optional IP Header content */
+};
+
+struct ipsec_decap_cbc {
+	u32 rsvd[2];
+};
+
+struct ipsec_decap_ctr {
+	u32 salt;
+	u32 ctr_initial;
+};
+
+struct ipsec_decap_ccm {
+	u32 salt;
+	u8 iv_flags;
+	u8 ctr_flags;
+	u16 ctr_initial;
+};
+
+struct ipsec_decap_gcm {
+	u32 salt;
+	u32 resvd;
+};
+
+struct ipsec_decap_pdb {
+	u16 hmo_ip_hdr_len;
+	u8 ip_nh_offset;
+	u8 options;
+	union {
+		struct ipsec_decap_cbc cbc;
+		struct ipsec_decap_ctr ctr;
+		struct ipsec_decap_ccm ccm;
+		struct ipsec_decap_gcm gcm;
+	};
+	u32 seq_num_ext_hi;
+	u32 seq_num;
+	u32 anti_replay[2];
+	u32 end_index[0];
+};
+
+/*
+ * IPSec ESP Datapath Protocol Override Register (DPOVRD)
+ */
+struct ipsec_deco_dpovrd {
+#define IPSEC_ENCAP_DECO_DPOVRD_USE 0x80
+	u8 ovrd_ecn;
+	u8 ip_hdr_len;
+	u8 nh_offset;
+	u8 next_header; /* reserved if decap */
+};
+
+/*
+ * IEEE 802.11i WiFi Protocol Data Block
+ */
+#define WIFI_PDBOPTS_FCS	0x01
+#define WIFI_PDBOPTS_AR		0x40
+
+struct wifi_encap_pdb {
+	u16 mac_hdr_len;
+	u8 rsvd;
+	u8 options;
+	u8 iv_flags;
+	u8 pri;
+	u16 pn1;
+	u32 pn2;
+	u16 frm_ctrl_mask;
+	u16 seq_ctrl_mask;
+	u8 rsvd1[2];
+	u8 cnst;
+	u8 key_id;
+	u8 ctr_flags;
+	u8 rsvd2;
+	u16 ctr_init;
+};
+
+struct wifi_decap_pdb {
+	u16 mac_hdr_len;
+	u8 rsvd;
+	u8 options;
+	u8 iv_flags;
+	u8 pri;
+	u16 pn1;
+	u32 pn2;
+	u16 frm_ctrl_mask;
+	u16 seq_ctrl_mask;
+	u8 rsvd1[4];
+	u8 ctr_flags;
+	u8 rsvd2;
+	u16 ctr_init;
+};
+
+/*
+ * IEEE 802.16 WiMAX Protocol Data Block
+ */
+#define WIMAX_PDBOPTS_FCS	0x01
+#define WIMAX_PDBOPTS_AR	0x40 /* decap only */
+
+struct wimax_encap_pdb {
+	u8 rsvd[3];
+	u8 options;
+	u32 nonce;
+	u8 b0_flags;
+	u8 ctr_flags;
+	u16 ctr_init;
+	/* begin DECO writeback region */
+	u32 pn;
+	/* end DECO writeback region */
+};
+
+struct wimax_decap_pdb {
+	u8 rsvd[3];
+	u8 options;
+	u32 nonce;
+	u8 iv_flags;
+	u8 ctr_flags;
+	u16 ctr_init;
+	/* begin DECO writeback region */
+	u32 pn;
+	u8 rsvd1[2];
+	u16 antireplay_len;
+	u64 antireplay_scorecard;
+	/* end DECO writeback region */
+};
+
+/*
+ * IEEE 801.AE MacSEC Protocol Data Block
+ */
+#define MACSEC_PDBOPTS_FCS	0x01
+#define MACSEC_PDBOPTS_AR	0x40 /* used in decap only */
+
+struct macsec_encap_pdb {
+	u16 aad_len;
+	u8 rsvd;
+	u8 options;
+	u64 sci;
+	u16 ethertype;
+	u8 tci_an;
+	u8 rsvd1;
+	/* begin DECO writeback region */
+	u32 pn;
+	/* end DECO writeback region */
+};
+
+struct macsec_decap_pdb {
+	u16 aad_len;
+	u8 rsvd;
+	u8 options;
+	u64 sci;
+	u8 rsvd1[3];
+	/* begin DECO writeback region */
+	u8 antireplay_len;
+	u32 pn;
+	u64 antireplay_scorecard;
+	/* end DECO writeback region */
+};
+
+/*
+ * SSL/TLS/DTLS Protocol Data Blocks
+ */
+
+#define TLS_PDBOPTS_ARS32	0x40
+#define TLS_PDBOPTS_ARS64	0xc0
+#define TLS_PDBOPTS_OUTFMT	0x08
+#define TLS_PDBOPTS_IV_WRTBK	0x02 /* 1.1/1.2/DTLS only */
+#define TLS_PDBOPTS_EXP_RND_IV	0x01 /* 1.1/1.2/DTLS only */
+
+struct tls_block_encap_pdb {
+	u8 type;
+	u8 version[2];
+	u8 options;
+	u64 seq_num;
+	u32 iv[4];
+};
+
+struct tls_stream_encap_pdb {
+	u8 type;
+	u8 version[2];
+	u8 options;
+	u64 seq_num;
+	u8 i;
+	u8 j;
+	u8 rsvd1[2];
+};
+
+struct dtls_block_encap_pdb {
+	u8 type;
+	u8 version[2];
+	u8 options;
+	u16 epoch;
+	u16 seq_num[3];
+	u32 iv[4];
+};
+
+struct tls_block_decap_pdb {
+	u8 rsvd[3];
+	u8 options;
+	u64 seq_num;
+	u32 iv[4];
+};
+
+struct tls_stream_decap_pdb {
+	u8 rsvd[3];
+	u8 options;
+	u64 seq_num;
+	u8 i;
+	u8 j;
+	u8 rsvd1[2];
+};
+
+struct dtls_block_decap_pdb {
+	u8 rsvd[3];
+	u8 options;
+	u16 epoch;
+	u16 seq_num[3];
+	u32 iv[4];
+	u64 antireplay_scorecard;
+};
+
+/*
+ * SRTP Protocol Data Blocks
+ */
+#define SRTP_PDBOPTS_MKI	0x08
+#define SRTP_PDBOPTS_AR		0x40
+
+struct srtp_encap_pdb {
+	u8 x_len;
+	u8 mki_len;
+	u8 n_tag;
+	u8 options;
+	u32 cnst0;
+	u8 rsvd[2];
+	u16 cnst1;
+	u16 salt[7];
+	u16 cnst2;
+	u32 rsvd1;
+	u32 roc;
+	u32 opt_mki;
+};
+
+struct srtp_decap_pdb {
+	u8 x_len;
+	u8 mki_len;
+	u8 n_tag;
+	u8 options;
+	u32 cnst0;
+	u8 rsvd[2];
+	u16 cnst1;
+	u16 salt[7];
+	u16 cnst2;
+	u16 rsvd1;
+	u16 seq_num;
+	u32 roc;
+	u64 antireplay_scorecard;
+};
+
+/*
+ * DSA/ECDSA Protocol Data Blocks
+ * Two of these exist: DSA-SIGN, and DSA-VERIFY. They are similar
+ * except for the treatment of "w" for verify, "s" for sign,
+ * and the placement of "a,b".
+ */
+#define DSA_PDB_SGF_SHIFT	24
+#define DSA_PDB_SGF_MASK	(0xff << DSA_PDB_SGF_SHIFT)
+#define DSA_PDB_SGF_Q		(0x80 << DSA_PDB_SGF_SHIFT)
+#define DSA_PDB_SGF_R		(0x40 << DSA_PDB_SGF_SHIFT)
+#define DSA_PDB_SGF_G		(0x20 << DSA_PDB_SGF_SHIFT)
+#define DSA_PDB_SGF_W		(0x10 << DSA_PDB_SGF_SHIFT)
+#define DSA_PDB_SGF_S		(0x10 << DSA_PDB_SGF_SHIFT)
+#define DSA_PDB_SGF_F		(0x08 << DSA_PDB_SGF_SHIFT)
+#define DSA_PDB_SGF_C		(0x04 << DSA_PDB_SGF_SHIFT)
+#define DSA_PDB_SGF_D		(0x02 << DSA_PDB_SGF_SHIFT)
+#define DSA_PDB_SGF_AB_SIGN	(0x02 << DSA_PDB_SGF_SHIFT)
+#define DSA_PDB_SGF_AB_VERIFY	(0x01 << DSA_PDB_SGF_SHIFT)
+
+#define DSA_PDB_L_SHIFT		7
+#define DSA_PDB_L_MASK		(0x3ff << DSA_PDB_L_SHIFT)
+
+#define DSA_PDB_N_MASK		0x7f
+
+struct dsa_sign_pdb {
+	u32 sgf_ln; /* Use DSA_PDB_ defintions per above */
+	u8 *q;
+	u8 *r;
+	u8 *g;	/* or Gx,y */
+	u8 *s;
+	u8 *f;
+	u8 *c;
+	u8 *d;
+	u8 *ab; /* ECC only */
+	u8 *u;
+};
+
+struct dsa_verify_pdb {
+	u32 sgf_ln;
+	u8 *q;
+	u8 *r;
+	u8 *g;	/* or Gx,y */
+	u8 *w; /* or Wx,y */
+	u8 *f;
+	u8 *c;
+	u8 *d;
+	u8 *tmp; /* temporary data block */
+	u8 *ab; /* only used if ECC processing */
+};
+
+#endif
diff --git a/drivers/crypto/caam/regs.h b/drivers/crypto/caam/regs.h
index e9f7a70cdd5efa443a9bd4d3d928d0a562002a0d..3223fc6d647cd55c0367b5ee1f6b003bc8029080 100644
--- a/drivers/crypto/caam/regs.h
+++ b/drivers/crypto/caam/regs.h
@@ -117,6 +117,12 @@ struct jr_outentry {
 #define CHA_NUM_DECONUM_SHIFT	56
 #define CHA_NUM_DECONUM_MASK	(0xfull << CHA_NUM_DECONUM_SHIFT)
 
+struct sec_vid {
+	u16 ip_id;
+	u8 maj_rev;
+	u8 min_rev;
+};
+
 struct caam_perfmon {
 	/* Performance Monitor Registers			f00-f9f */
 	u64 req_dequeued;	/* PC_REQ_DEQ - Dequeued Requests	     */
@@ -167,7 +173,7 @@ struct partid {
 	u32 pidr;	/* partition ID, DECO */
 };
 
-/* RNG test mode (replicated twice in some configurations) */
+/* RNGB test mode (replicated twice in some configurations) */
 /* Padded out to 0x100 */
 struct rngtst {
 	u32 mode;		/* RTSTMODEx - Test mode */
@@ -200,6 +206,31 @@ struct rngtst {
 	u32 rsvd14[15];
 };
 
+/* RNG4 TRNG test registers */
+struct rng4tst {
+#define RTMCTL_PRGM 0x00010000	/* 1 -> program mode, 0 -> run mode */
+	u32 rtmctl;		/* misc. control register */
+	u32 rtscmisc;		/* statistical check misc. register */
+	u32 rtpkrrng;		/* poker range register */
+	union {
+		u32 rtpkrmax;	/* PRGM=1: poker max. limit register */
+		u32 rtpkrsq;	/* PRGM=0: poker square calc. result register */
+	};
+#define RTSDCTL_ENT_DLY_SHIFT 16
+#define RTSDCTL_ENT_DLY_MASK (0xffff << RTSDCTL_ENT_DLY_SHIFT)
+	u32 rtsdctl;		/* seed control register */
+	union {
+		u32 rtsblim;	/* PRGM=1: sparse bit limit register */
+		u32 rttotsam;	/* PRGM=0: total samples register */
+	};
+	u32 rtfrqmin;		/* frequency count min. limit register */
+	union {
+		u32 rtfrqmax;	/* PRGM=1: freq. count max. limit register */
+		u32 rtfrqcnt;	/* PRGM=0: freq. count register */
+	};
+	u32 rsvd1[56];
+};
+
 /*
  * caam_ctrl - basic core configuration
  * starts base + 0x0000 padded out to 0x1000
@@ -249,7 +280,10 @@ struct caam_ctrl {
 
 	/* RNG Test/Verification/Debug Access                   600-7ff */
 	/* (Useful in Test/Debug modes only...)                         */
-	struct rngtst rtst[2];
+	union {
+		struct rngtst rtst[2];
+		struct rng4tst r4tst[2];
+	};
 
 	u32 rsvd9[448];
 
diff --git a/drivers/crypto/caam/sg_sw_sec4.h b/drivers/crypto/caam/sg_sw_sec4.h
new file mode 100644
index 0000000000000000000000000000000000000000..e0037c8ee24386e941e5eed6189e9c2cd9747a3a
--- /dev/null
+++ b/drivers/crypto/caam/sg_sw_sec4.h
@@ -0,0 +1,156 @@
+/*
+ * CAAM/SEC 4.x functions for using scatterlists in caam driver
+ *
+ * Copyright 2008-2011 Freescale Semiconductor, Inc.
+ *
+ */
+
+struct sec4_sg_entry;
+
+/*
+ * convert single dma address to h/w link table format
+ */
+static inline void dma_to_sec4_sg_one(struct sec4_sg_entry *sec4_sg_ptr,
+				      dma_addr_t dma, u32 len, u32 offset)
+{
+	sec4_sg_ptr->ptr = dma;
+	sec4_sg_ptr->len = len;
+	sec4_sg_ptr->reserved = 0;
+	sec4_sg_ptr->buf_pool_id = 0;
+	sec4_sg_ptr->offset = offset;
+#ifdef DEBUG
+	print_hex_dump(KERN_ERR, "sec4_sg_ptr@: ",
+		       DUMP_PREFIX_ADDRESS, 16, 4, sec4_sg_ptr,
+		       sizeof(struct sec4_sg_entry), 1);
+#endif
+}
+
+/*
+ * convert scatterlist to h/w link table format
+ * but does not have final bit; instead, returns last entry
+ */
+static inline struct sec4_sg_entry *
+sg_to_sec4_sg(struct scatterlist *sg, int sg_count,
+	      struct sec4_sg_entry *sec4_sg_ptr, u32 offset)
+{
+	while (sg_count) {
+		dma_to_sec4_sg_one(sec4_sg_ptr, sg_dma_address(sg),
+				   sg_dma_len(sg), offset);
+		sec4_sg_ptr++;
+		sg = scatterwalk_sg_next(sg);
+		sg_count--;
+	}
+	return sec4_sg_ptr - 1;
+}
+
+/*
+ * convert scatterlist to h/w link table format
+ * scatterlist must have been previously dma mapped
+ */
+static inline void sg_to_sec4_sg_last(struct scatterlist *sg, int sg_count,
+				      struct sec4_sg_entry *sec4_sg_ptr,
+				      u32 offset)
+{
+	sec4_sg_ptr = sg_to_sec4_sg(sg, sg_count, sec4_sg_ptr, offset);
+	sec4_sg_ptr->len |= SEC4_SG_LEN_FIN;
+}
+
+/* count number of elements in scatterlist */
+static inline int __sg_count(struct scatterlist *sg_list, int nbytes,
+			     bool *chained)
+{
+	struct scatterlist *sg = sg_list;
+	int sg_nents = 0;
+
+	while (nbytes > 0) {
+		sg_nents++;
+		nbytes -= sg->length;
+		if (!sg_is_last(sg) && (sg + 1)->length == 0)
+			*chained = true;
+		sg = scatterwalk_sg_next(sg);
+	}
+
+	return sg_nents;
+}
+
+/* derive number of elements in scatterlist, but return 0 for 1 */
+static inline int sg_count(struct scatterlist *sg_list, int nbytes,
+			     bool *chained)
+{
+	int sg_nents = __sg_count(sg_list, nbytes, chained);
+
+	if (likely(sg_nents == 1))
+		return 0;
+
+	return sg_nents;
+}
+
+static int dma_map_sg_chained(struct device *dev, struct scatterlist *sg,
+			      unsigned int nents, enum dma_data_direction dir,
+			      bool chained)
+{
+	if (unlikely(chained)) {
+		int i;
+		for (i = 0; i < nents; i++) {
+			dma_map_sg(dev, sg, 1, dir);
+			sg = scatterwalk_sg_next(sg);
+		}
+	} else {
+		dma_map_sg(dev, sg, nents, dir);
+	}
+	return nents;
+}
+
+static int dma_unmap_sg_chained(struct device *dev, struct scatterlist *sg,
+				unsigned int nents, enum dma_data_direction dir,
+				bool chained)
+{
+	if (unlikely(chained)) {
+		int i;
+		for (i = 0; i < nents; i++) {
+			dma_unmap_sg(dev, sg, 1, dir);
+			sg = scatterwalk_sg_next(sg);
+		}
+	} else {
+		dma_unmap_sg(dev, sg, nents, dir);
+	}
+	return nents;
+}
+
+/* Copy from len bytes of sg to dest, starting from beginning */
+static inline void sg_copy(u8 *dest, struct scatterlist *sg, unsigned int len)
+{
+	struct scatterlist *current_sg = sg;
+	int cpy_index = 0, next_cpy_index = current_sg->length;
+
+	while (next_cpy_index < len) {
+		memcpy(dest + cpy_index, (u8 *) sg_virt(current_sg),
+		       current_sg->length);
+		current_sg = scatterwalk_sg_next(current_sg);
+		cpy_index = next_cpy_index;
+		next_cpy_index += current_sg->length;
+	}
+	if (cpy_index < len)
+		memcpy(dest + cpy_index, (u8 *) sg_virt(current_sg),
+		       len - cpy_index);
+}
+
+/* Copy sg data, from to_skip to end, to dest */
+static inline void sg_copy_part(u8 *dest, struct scatterlist *sg,
+				      int to_skip, unsigned int end)
+{
+	struct scatterlist *current_sg = sg;
+	int sg_index, cpy_index;
+
+	sg_index = current_sg->length;
+	while (sg_index <= to_skip) {
+		current_sg = scatterwalk_sg_next(current_sg);
+		sg_index += current_sg->length;
+	}
+	cpy_index = sg_index - to_skip;
+	memcpy(dest, (u8 *) sg_virt(current_sg) +
+	       current_sg->length - cpy_index, cpy_index);
+	current_sg = scatterwalk_sg_next(current_sg);
+	if (end - sg_index)
+		sg_copy(dest + cpy_index, current_sg, end - sg_index);
+}
diff --git a/drivers/crypto/mv_cesa.c b/drivers/crypto/mv_cesa.c
index 1cc6b3f3e262ac72ad958ea8e34a59c00663ed71..0d4071754352f54a36ff74875619ed95ffdaa246 100644
--- a/drivers/crypto/mv_cesa.c
+++ b/drivers/crypto/mv_cesa.c
@@ -24,6 +24,7 @@
 
 #define MV_CESA	"MV-CESA:"
 #define MAX_HW_HASH_SIZE	0xFFFF
+#define MV_CESA_EXPIRE		500 /* msec */
 
 /*
  * STM:
@@ -87,6 +88,7 @@ struct crypto_priv {
 	spinlock_t lock;
 	struct crypto_queue queue;
 	enum engine_status eng_st;
+	struct timer_list completion_timer;
 	struct crypto_async_request *cur_req;
 	struct req_progress p;
 	int max_req_size;
@@ -138,6 +140,29 @@ struct mv_req_hash_ctx {
 	int count_add;
 };
 
+static void mv_completion_timer_callback(unsigned long unused)
+{
+	int active = readl(cpg->reg + SEC_ACCEL_CMD) & SEC_CMD_EN_SEC_ACCL0;
+
+	printk(KERN_ERR MV_CESA
+	       "completion timer expired (CESA %sactive), cleaning up.\n",
+	       active ? "" : "in");
+
+	del_timer(&cpg->completion_timer);
+	writel(SEC_CMD_DISABLE_SEC, cpg->reg + SEC_ACCEL_CMD);
+	while(readl(cpg->reg + SEC_ACCEL_CMD) & SEC_CMD_DISABLE_SEC)
+		printk(KERN_INFO MV_CESA "%s: waiting for engine finishing\n", __func__);
+	cpg->eng_st = ENGINE_W_DEQUEUE;
+	wake_up_process(cpg->queue_th);
+}
+
+static void mv_setup_timer(void)
+{
+	setup_timer(&cpg->completion_timer, &mv_completion_timer_callback, 0);
+	mod_timer(&cpg->completion_timer,
+			jiffies + msecs_to_jiffies(MV_CESA_EXPIRE));
+}
+
 static void compute_aes_dec_key(struct mv_ctx *ctx)
 {
 	struct crypto_aes_ctx gen_aes_key;
@@ -273,12 +298,8 @@ static void mv_process_current_q(int first_block)
 			sizeof(struct sec_accel_config));
 
 	/* GO */
+	mv_setup_timer();
 	writel(SEC_CMD_EN_SEC_ACCL0, cpg->reg + SEC_ACCEL_CMD);
-
-	/*
-	 * XXX: add timer if the interrupt does not occur for some mystery
-	 * reason
-	 */
 }
 
 static void mv_crypto_algo_completion(void)
@@ -357,12 +378,8 @@ static void mv_process_hash_current(int first_block)
 	memcpy(cpg->sram + SRAM_CONFIG, &op, sizeof(struct sec_accel_config));
 
 	/* GO */
+	mv_setup_timer();
 	writel(SEC_CMD_EN_SEC_ACCL0, cpg->reg + SEC_ACCEL_CMD);
-
-	/*
-	* XXX: add timer if the interrupt does not occur for some mystery
-	* reason
-	*/
 }
 
 static inline int mv_hash_import_sha1_ctx(const struct mv_req_hash_ctx *ctx,
@@ -406,6 +423,15 @@ static int mv_hash_final_fallback(struct ahash_request *req)
 	return rc;
 }
 
+static void mv_save_digest_state(struct mv_req_hash_ctx *ctx)
+{
+	ctx->state[0] = readl(cpg->reg + DIGEST_INITIAL_VAL_A);
+	ctx->state[1] = readl(cpg->reg + DIGEST_INITIAL_VAL_B);
+	ctx->state[2] = readl(cpg->reg + DIGEST_INITIAL_VAL_C);
+	ctx->state[3] = readl(cpg->reg + DIGEST_INITIAL_VAL_D);
+	ctx->state[4] = readl(cpg->reg + DIGEST_INITIAL_VAL_E);
+}
+
 static void mv_hash_algo_completion(void)
 {
 	struct ahash_request *req = ahash_request_cast(cpg->cur_req);
@@ -420,14 +446,12 @@ static void mv_hash_algo_completion(void)
 			memcpy(req->result, cpg->sram + SRAM_DIGEST_BUF,
 			       crypto_ahash_digestsize(crypto_ahash_reqtfm
 						       (req)));
-		} else
+		} else {
+			mv_save_digest_state(ctx);
 			mv_hash_final_fallback(req);
+		}
 	} else {
-		ctx->state[0] = readl(cpg->reg + DIGEST_INITIAL_VAL_A);
-		ctx->state[1] = readl(cpg->reg + DIGEST_INITIAL_VAL_B);
-		ctx->state[2] = readl(cpg->reg + DIGEST_INITIAL_VAL_C);
-		ctx->state[3] = readl(cpg->reg + DIGEST_INITIAL_VAL_D);
-		ctx->state[4] = readl(cpg->reg + DIGEST_INITIAL_VAL_E);
+		mv_save_digest_state(ctx);
 	}
 }
 
@@ -888,6 +912,10 @@ irqreturn_t crypto_int(int irq, void *priv)
 	if (!(val & SEC_INT_ACCEL0_DONE))
 		return IRQ_NONE;
 
+	if (!del_timer(&cpg->completion_timer)) {
+		printk(KERN_WARNING MV_CESA
+		       "got an interrupt but no pending timer?\n");
+	}
 	val &= ~SEC_INT_ACCEL0_DONE;
 	writel(val, cpg->reg + FPGA_INT_STATUS);
 	writel(val, cpg->reg + SEC_ACCEL_INT_STATUS);
@@ -1061,6 +1089,7 @@ static int mv_probe(struct platform_device *pdev)
 	if (!IS_ERR(cp->clk))
 		clk_prepare_enable(cp->clk);
 
+	writel(0, cpg->reg + SEC_ACCEL_INT_STATUS);
 	writel(SEC_INT_ACCEL0_DONE, cpg->reg + SEC_ACCEL_INT_MASK);
 	writel(SEC_CFG_STOP_DIG_ERR, cpg->reg + SEC_ACCEL_CFG);
 	writel(SRAM_CONFIG, cpg->reg + SEC_ACCEL_DESC_P0);
diff --git a/drivers/crypto/talitos.c b/drivers/crypto/talitos.c
index 921039e56f87f9360156074233c9e38828bc1612..efff788d2f1d057f34ee956930213032724c1881 100644
--- a/drivers/crypto/talitos.c
+++ b/drivers/crypto/talitos.c
@@ -53,117 +53,6 @@
 
 #include "talitos.h"
 
-#define TALITOS_TIMEOUT 100000
-#define TALITOS_MAX_DATA_LEN 65535
-
-#define DESC_TYPE(desc_hdr) ((be32_to_cpu(desc_hdr) >> 3) & 0x1f)
-#define PRIMARY_EU(desc_hdr) ((be32_to_cpu(desc_hdr) >> 28) & 0xf)
-#define SECONDARY_EU(desc_hdr) ((be32_to_cpu(desc_hdr) >> 16) & 0xf)
-
-/* descriptor pointer entry */
-struct talitos_ptr {
-	__be16 len;	/* length */
-	u8 j_extent;	/* jump to sg link table and/or extent */
-	u8 eptr;	/* extended address */
-	__be32 ptr;	/* address */
-};
-
-static const struct talitos_ptr zero_entry = {
-	.len = 0,
-	.j_extent = 0,
-	.eptr = 0,
-	.ptr = 0
-};
-
-/* descriptor */
-struct talitos_desc {
-	__be32 hdr;			/* header high bits */
-	__be32 hdr_lo;			/* header low bits */
-	struct talitos_ptr ptr[7];	/* ptr/len pair array */
-};
-
-/**
- * talitos_request - descriptor submission request
- * @desc: descriptor pointer (kernel virtual)
- * @dma_desc: descriptor's physical bus address
- * @callback: whom to call when descriptor processing is done
- * @context: caller context (optional)
- */
-struct talitos_request {
-	struct talitos_desc *desc;
-	dma_addr_t dma_desc;
-	void (*callback) (struct device *dev, struct talitos_desc *desc,
-	                  void *context, int error);
-	void *context;
-};
-
-/* per-channel fifo management */
-struct talitos_channel {
-	void __iomem *reg;
-
-	/* request fifo */
-	struct talitos_request *fifo;
-
-	/* number of requests pending in channel h/w fifo */
-	atomic_t submit_count ____cacheline_aligned;
-
-	/* request submission (head) lock */
-	spinlock_t head_lock ____cacheline_aligned;
-	/* index to next free descriptor request */
-	int head;
-
-	/* request release (tail) lock */
-	spinlock_t tail_lock ____cacheline_aligned;
-	/* index to next in-progress/done descriptor request */
-	int tail;
-};
-
-struct talitos_private {
-	struct device *dev;
-	struct platform_device *ofdev;
-	void __iomem *reg;
-	int irq[2];
-
-	/* SEC global registers lock  */
-	spinlock_t reg_lock ____cacheline_aligned;
-
-	/* SEC version geometry (from device tree node) */
-	unsigned int num_channels;
-	unsigned int chfifo_len;
-	unsigned int exec_units;
-	unsigned int desc_types;
-
-	/* SEC Compatibility info */
-	unsigned long features;
-
-	/*
-	 * length of the request fifo
-	 * fifo_len is chfifo_len rounded up to next power of 2
-	 * so we can use bitwise ops to wrap
-	 */
-	unsigned int fifo_len;
-
-	struct talitos_channel *chan;
-
-	/* next channel to be assigned next incoming descriptor */
-	atomic_t last_chan ____cacheline_aligned;
-
-	/* request callback tasklet */
-	struct tasklet_struct done_task[2];
-
-	/* list of registered algorithms */
-	struct list_head alg_list;
-
-	/* hwrng device */
-	struct hwrng rng;
-};
-
-/* .features flag */
-#define TALITOS_FTR_SRC_LINK_TBL_LEN_INCLUDES_EXTENT 0x00000001
-#define TALITOS_FTR_HW_AUTH_CHECK 0x00000002
-#define TALITOS_FTR_SHA224_HWINIT 0x00000004
-#define TALITOS_FTR_HMAC_OK 0x00000008
-
 static void to_talitos_ptr(struct talitos_ptr *talitos_ptr, dma_addr_t dma_addr)
 {
 	talitos_ptr->ptr = cpu_to_be32(lower_32_bits(dma_addr));
@@ -303,11 +192,11 @@ static int init_device(struct device *dev)
  * callback must check err and feedback in descriptor header
  * for device processing status.
  */
-static int talitos_submit(struct device *dev, int ch, struct talitos_desc *desc,
-			  void (*callback)(struct device *dev,
-					   struct talitos_desc *desc,
-					   void *context, int error),
-			  void *context)
+int talitos_submit(struct device *dev, int ch, struct talitos_desc *desc,
+		   void (*callback)(struct device *dev,
+				    struct talitos_desc *desc,
+				    void *context, int error),
+		   void *context)
 {
 	struct talitos_private *priv = dev_get_drvdata(dev);
 	struct talitos_request *request;
@@ -348,6 +237,7 @@ static int talitos_submit(struct device *dev, int ch, struct talitos_desc *desc,
 
 	return -EINPROGRESS;
 }
+EXPORT_SYMBOL(talitos_submit);
 
 /*
  * process what was done, notify callback of error if not
@@ -733,7 +623,7 @@ static void talitos_unregister_rng(struct device *dev)
  * crypto alg
  */
 #define TALITOS_CRA_PRIORITY		3000
-#define TALITOS_MAX_KEY_SIZE		64
+#define TALITOS_MAX_KEY_SIZE		96
 #define TALITOS_MAX_IV_LENGTH		16 /* max of AES_BLOCK_SIZE, DES3_EDE_BLOCK_SIZE */
 
 #define MD5_BLOCK_SIZE    64
@@ -2066,6 +1956,59 @@ static struct talitos_alg_template driver_algs[] = {
 		                     DESC_HDR_MODE1_MDEU_PAD |
 		                     DESC_HDR_MODE1_MDEU_SHA1_HMAC,
 	},
+	{       .type = CRYPTO_ALG_TYPE_AEAD,
+		.alg.crypto = {
+			.cra_name = "authenc(hmac(sha224),cbc(aes))",
+			.cra_driver_name = "authenc-hmac-sha224-cbc-aes-talitos",
+			.cra_blocksize = AES_BLOCK_SIZE,
+			.cra_flags = CRYPTO_ALG_TYPE_AEAD | CRYPTO_ALG_ASYNC,
+			.cra_type = &crypto_aead_type,
+			.cra_aead = {
+				.setkey = aead_setkey,
+				.setauthsize = aead_setauthsize,
+				.encrypt = aead_encrypt,
+				.decrypt = aead_decrypt,
+				.givencrypt = aead_givencrypt,
+				.geniv = "<built-in>",
+				.ivsize = AES_BLOCK_SIZE,
+				.maxauthsize = SHA224_DIGEST_SIZE,
+			}
+		},
+		.desc_hdr_template = DESC_HDR_TYPE_IPSEC_ESP |
+				     DESC_HDR_SEL0_AESU |
+				     DESC_HDR_MODE0_AESU_CBC |
+				     DESC_HDR_SEL1_MDEUA |
+				     DESC_HDR_MODE1_MDEU_INIT |
+				     DESC_HDR_MODE1_MDEU_PAD |
+				     DESC_HDR_MODE1_MDEU_SHA224_HMAC,
+	},
+	{	.type = CRYPTO_ALG_TYPE_AEAD,
+		.alg.crypto = {
+			.cra_name = "authenc(hmac(sha224),cbc(des3_ede))",
+			.cra_driver_name = "authenc-hmac-sha224-cbc-3des-talitos",
+			.cra_blocksize = DES3_EDE_BLOCK_SIZE,
+			.cra_flags = CRYPTO_ALG_TYPE_AEAD | CRYPTO_ALG_ASYNC,
+			.cra_type = &crypto_aead_type,
+			.cra_aead = {
+				.setkey = aead_setkey,
+				.setauthsize = aead_setauthsize,
+				.encrypt = aead_encrypt,
+				.decrypt = aead_decrypt,
+				.givencrypt = aead_givencrypt,
+				.geniv = "<built-in>",
+				.ivsize = DES3_EDE_BLOCK_SIZE,
+				.maxauthsize = SHA224_DIGEST_SIZE,
+			}
+		},
+		.desc_hdr_template = DESC_HDR_TYPE_IPSEC_ESP |
+			             DESC_HDR_SEL0_DEU |
+		                     DESC_HDR_MODE0_DEU_CBC |
+		                     DESC_HDR_MODE0_DEU_3DES |
+		                     DESC_HDR_SEL1_MDEUA |
+		                     DESC_HDR_MODE1_MDEU_INIT |
+		                     DESC_HDR_MODE1_MDEU_PAD |
+		                     DESC_HDR_MODE1_MDEU_SHA224_HMAC,
+	},
 	{	.type = CRYPTO_ALG_TYPE_AEAD,
 		.alg.crypto = {
 			.cra_name = "authenc(hmac(sha256),cbc(aes))",
@@ -2119,6 +2062,112 @@ static struct talitos_alg_template driver_algs[] = {
 		                     DESC_HDR_MODE1_MDEU_PAD |
 		                     DESC_HDR_MODE1_MDEU_SHA256_HMAC,
 	},
+	{	.type = CRYPTO_ALG_TYPE_AEAD,
+		.alg.crypto = {
+			.cra_name = "authenc(hmac(sha384),cbc(aes))",
+			.cra_driver_name = "authenc-hmac-sha384-cbc-aes-talitos",
+			.cra_blocksize = AES_BLOCK_SIZE,
+			.cra_flags = CRYPTO_ALG_TYPE_AEAD | CRYPTO_ALG_ASYNC,
+			.cra_type = &crypto_aead_type,
+			.cra_aead = {
+				.setkey = aead_setkey,
+				.setauthsize = aead_setauthsize,
+				.encrypt = aead_encrypt,
+				.decrypt = aead_decrypt,
+				.givencrypt = aead_givencrypt,
+				.geniv = "<built-in>",
+				.ivsize = AES_BLOCK_SIZE,
+				.maxauthsize = SHA384_DIGEST_SIZE,
+			}
+		},
+		.desc_hdr_template = DESC_HDR_TYPE_IPSEC_ESP |
+			             DESC_HDR_SEL0_AESU |
+		                     DESC_HDR_MODE0_AESU_CBC |
+		                     DESC_HDR_SEL1_MDEUB |
+		                     DESC_HDR_MODE1_MDEU_INIT |
+		                     DESC_HDR_MODE1_MDEU_PAD |
+		                     DESC_HDR_MODE1_MDEUB_SHA384_HMAC,
+	},
+	{	.type = CRYPTO_ALG_TYPE_AEAD,
+		.alg.crypto = {
+			.cra_name = "authenc(hmac(sha384),cbc(des3_ede))",
+			.cra_driver_name = "authenc-hmac-sha384-cbc-3des-talitos",
+			.cra_blocksize = DES3_EDE_BLOCK_SIZE,
+			.cra_flags = CRYPTO_ALG_TYPE_AEAD | CRYPTO_ALG_ASYNC,
+			.cra_type = &crypto_aead_type,
+			.cra_aead = {
+				.setkey = aead_setkey,
+				.setauthsize = aead_setauthsize,
+				.encrypt = aead_encrypt,
+				.decrypt = aead_decrypt,
+				.givencrypt = aead_givencrypt,
+				.geniv = "<built-in>",
+				.ivsize = DES3_EDE_BLOCK_SIZE,
+				.maxauthsize = SHA384_DIGEST_SIZE,
+			}
+		},
+		.desc_hdr_template = DESC_HDR_TYPE_IPSEC_ESP |
+			             DESC_HDR_SEL0_DEU |
+		                     DESC_HDR_MODE0_DEU_CBC |
+		                     DESC_HDR_MODE0_DEU_3DES |
+		                     DESC_HDR_SEL1_MDEUB |
+		                     DESC_HDR_MODE1_MDEU_INIT |
+		                     DESC_HDR_MODE1_MDEU_PAD |
+		                     DESC_HDR_MODE1_MDEUB_SHA384_HMAC,
+	},
+	{	.type = CRYPTO_ALG_TYPE_AEAD,
+		.alg.crypto = {
+			.cra_name = "authenc(hmac(sha512),cbc(aes))",
+			.cra_driver_name = "authenc-hmac-sha512-cbc-aes-talitos",
+			.cra_blocksize = AES_BLOCK_SIZE,
+			.cra_flags = CRYPTO_ALG_TYPE_AEAD | CRYPTO_ALG_ASYNC,
+			.cra_type = &crypto_aead_type,
+			.cra_aead = {
+				.setkey = aead_setkey,
+				.setauthsize = aead_setauthsize,
+				.encrypt = aead_encrypt,
+				.decrypt = aead_decrypt,
+				.givencrypt = aead_givencrypt,
+				.geniv = "<built-in>",
+				.ivsize = AES_BLOCK_SIZE,
+				.maxauthsize = SHA512_DIGEST_SIZE,
+			}
+		},
+		.desc_hdr_template = DESC_HDR_TYPE_IPSEC_ESP |
+			             DESC_HDR_SEL0_AESU |
+		                     DESC_HDR_MODE0_AESU_CBC |
+		                     DESC_HDR_SEL1_MDEUB |
+		                     DESC_HDR_MODE1_MDEU_INIT |
+		                     DESC_HDR_MODE1_MDEU_PAD |
+		                     DESC_HDR_MODE1_MDEUB_SHA512_HMAC,
+	},
+	{	.type = CRYPTO_ALG_TYPE_AEAD,
+		.alg.crypto = {
+			.cra_name = "authenc(hmac(sha512),cbc(des3_ede))",
+			.cra_driver_name = "authenc-hmac-sha512-cbc-3des-talitos",
+			.cra_blocksize = DES3_EDE_BLOCK_SIZE,
+			.cra_flags = CRYPTO_ALG_TYPE_AEAD | CRYPTO_ALG_ASYNC,
+			.cra_type = &crypto_aead_type,
+			.cra_aead = {
+				.setkey = aead_setkey,
+				.setauthsize = aead_setauthsize,
+				.encrypt = aead_encrypt,
+				.decrypt = aead_decrypt,
+				.givencrypt = aead_givencrypt,
+				.geniv = "<built-in>",
+				.ivsize = DES3_EDE_BLOCK_SIZE,
+				.maxauthsize = SHA512_DIGEST_SIZE,
+			}
+		},
+		.desc_hdr_template = DESC_HDR_TYPE_IPSEC_ESP |
+			             DESC_HDR_SEL0_DEU |
+		                     DESC_HDR_MODE0_DEU_CBC |
+		                     DESC_HDR_MODE0_DEU_3DES |
+		                     DESC_HDR_SEL1_MDEUB |
+		                     DESC_HDR_MODE1_MDEU_INIT |
+		                     DESC_HDR_MODE1_MDEU_PAD |
+		                     DESC_HDR_MODE1_MDEUB_SHA512_HMAC,
+	},
 	{	.type = CRYPTO_ALG_TYPE_AEAD,
 		.alg.crypto = {
 			.cra_name = "authenc(hmac(md5),cbc(aes))",
diff --git a/drivers/crypto/talitos.h b/drivers/crypto/talitos.h
index 3c173954ef295d618da2084e5082b940708536b5..61a14054aa39414664f637e10089ad4170a0651b 100644
--- a/drivers/crypto/talitos.h
+++ b/drivers/crypto/talitos.h
@@ -28,6 +28,123 @@
  *
  */
 
+#define TALITOS_TIMEOUT 100000
+#define TALITOS_MAX_DATA_LEN 65535
+
+#define DESC_TYPE(desc_hdr) ((be32_to_cpu(desc_hdr) >> 3) & 0x1f)
+#define PRIMARY_EU(desc_hdr) ((be32_to_cpu(desc_hdr) >> 28) & 0xf)
+#define SECONDARY_EU(desc_hdr) ((be32_to_cpu(desc_hdr) >> 16) & 0xf)
+
+/* descriptor pointer entry */
+struct talitos_ptr {
+	__be16 len;     /* length */
+	u8 j_extent;    /* jump to sg link table and/or extent */
+	u8 eptr;        /* extended address */
+	__be32 ptr;     /* address */
+};
+
+static const struct talitos_ptr zero_entry = {
+	.len = 0,
+	.j_extent = 0,
+	.eptr = 0,
+	.ptr = 0
+};
+
+/* descriptor */
+struct talitos_desc {
+	__be32 hdr;                     /* header high bits */
+	__be32 hdr_lo;                  /* header low bits */
+	struct talitos_ptr ptr[7];      /* ptr/len pair array */
+};
+
+/**
+ * talitos_request - descriptor submission request
+ * @desc: descriptor pointer (kernel virtual)
+ * @dma_desc: descriptor's physical bus address
+ * @callback: whom to call when descriptor processing is done
+ * @context: caller context (optional)
+ */
+struct talitos_request {
+	struct talitos_desc *desc;
+	dma_addr_t dma_desc;
+	void (*callback) (struct device *dev, struct talitos_desc *desc,
+			  void *context, int error);
+	void *context;
+};
+
+/* per-channel fifo management */
+struct talitos_channel {
+	void __iomem *reg;
+
+	/* request fifo */
+	struct talitos_request *fifo;
+
+	/* number of requests pending in channel h/w fifo */
+	atomic_t submit_count ____cacheline_aligned;
+
+	/* request submission (head) lock */
+	spinlock_t head_lock ____cacheline_aligned;
+	/* index to next free descriptor request */
+	int head;
+
+	/* request release (tail) lock */
+	spinlock_t tail_lock ____cacheline_aligned;
+	/* index to next in-progress/done descriptor request */
+	int tail;
+};
+
+struct talitos_private {
+	struct device *dev;
+	struct platform_device *ofdev;
+	void __iomem *reg;
+	int irq[2];
+
+	/* SEC global registers lock  */
+	spinlock_t reg_lock ____cacheline_aligned;
+
+	/* SEC version geometry (from device tree node) */
+	unsigned int num_channels;
+	unsigned int chfifo_len;
+	unsigned int exec_units;
+	unsigned int desc_types;
+
+	/* SEC Compatibility info */
+	unsigned long features;
+
+	/*
+	 * length of the request fifo
+	 * fifo_len is chfifo_len rounded up to next power of 2
+	 * so we can use bitwise ops to wrap
+	 */
+	unsigned int fifo_len;
+
+	struct talitos_channel *chan;
+
+	/* next channel to be assigned next incoming descriptor */
+	atomic_t last_chan ____cacheline_aligned;
+
+	/* request callback tasklet */
+	struct tasklet_struct done_task[2];
+
+	/* list of registered algorithms */
+	struct list_head alg_list;
+
+	/* hwrng device */
+	struct hwrng rng;
+};
+
+extern int talitos_submit(struct device *dev, int ch, struct talitos_desc *desc,
+			  void (*callback)(struct device *dev,
+					   struct talitos_desc *desc,
+					   void *context, int error),
+			  void *context);
+
+/* .features flag */
+#define TALITOS_FTR_SRC_LINK_TBL_LEN_INCLUDES_EXTENT 0x00000001
+#define TALITOS_FTR_HW_AUTH_CHECK 0x00000002
+#define TALITOS_FTR_SHA224_HWINIT 0x00000004
+#define TALITOS_FTR_HMAC_OK 0x00000008
+
 /*
  * TALITOS_xxx_LO addresses point to the low data bits (32-63) of the register
  */
@@ -209,6 +326,12 @@
 					 DESC_HDR_MODE1_MDEU_HMAC)
 #define	DESC_HDR_MODE1_MDEU_SHA1_HMAC	(DESC_HDR_MODE1_MDEU_SHA1 | \
 					 DESC_HDR_MODE1_MDEU_HMAC)
+#define DESC_HDR_MODE1_MDEU_SHA224_HMAC	(DESC_HDR_MODE1_MDEU_SHA224 | \
+					 DESC_HDR_MODE1_MDEU_HMAC)
+#define DESC_HDR_MODE1_MDEUB_SHA384_HMAC	(DESC_HDR_MODE1_MDEUB_SHA384 | \
+						 DESC_HDR_MODE1_MDEU_HMAC)
+#define DESC_HDR_MODE1_MDEUB_SHA512_HMAC	(DESC_HDR_MODE1_MDEUB_SHA512 | \
+						 DESC_HDR_MODE1_MDEU_HMAC)
 
 /* direction of overall data flow (DIR) */
 #define	DESC_HDR_DIR_INBOUND		cpu_to_be32(0x00000002)
diff --git a/include/linux/platform_data/atmel-aes.h b/include/linux/platform_data/atmel-aes.h
new file mode 100644
index 0000000000000000000000000000000000000000..e7a1949bad2670b277ee95661cd7176621b0843b
--- /dev/null
+++ b/include/linux/platform_data/atmel-aes.h
@@ -0,0 +1,22 @@
+#ifndef __LINUX_ATMEL_AES_H
+#define __LINUX_ATMEL_AES_H
+
+#include <mach/at_hdmac.h>
+
+/**
+ * struct aes_dma_data - DMA data for AES
+ */
+struct aes_dma_data {
+	struct at_dma_slave	txdata;
+	struct at_dma_slave	rxdata;
+};
+
+/**
+ * struct aes_platform_data - board-specific AES configuration
+ * @dma_slave: DMA slave interface to use in data transfers.
+ */
+struct aes_platform_data {
+	struct aes_dma_data	*dma_slave;
+};
+
+#endif /* __LINUX_ATMEL_AES_H */