Merge git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6

* git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6: (125 commits) [CRYPTO] twofish: Merge common glue code [CRYPTO] hifn_795x: Fixup container_of() usage [CRYPTO] cast6: inline bloat-- [CRYPTO] api: Set default CRYPTO_MINALIGN to unsigned long long [CRYPTO] tcrypt: Make xcbc available as a standalone test [CRYPTO] xcbc: Remove bogus hash/cipher test [CRYPTO] xcbc: Fix algorithm leak when block size check fails [CRYPTO] tcrypt: Zero axbuf in the right function [CRYPTO] padlock: Only reset the key once for each CBC and ECB operation [CRYPTO] api: Include sched.h for cond_resched in scatterwalk.h [CRYPTO] salsa20-asm: Remove unnecessary dependency on CRYPTO_SALSA20 [CRYPTO] tcrypt: Add select of AEAD [CRYPTO] salsa20: Add x86-64 assembly version [CRYPTO] salsa20_i586: Salsa20 stream cipher algorithm (i586 version) [CRYPTO] gcm: Introduce rfc4106 [CRYPTO] api: Show async type [CRYPTO] chainiv: Avoid lock spinning where possible [CRYPTO] seqiv: Add select AEAD in Kconfig [CRYPTO] scatterwalk: Handle zero nbytes in scatterwalk_map_and_copy [CRYPTO] null: Allow setkey on digest_null ...

Merge git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6
* git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6: (125 commits) [CRYPTO] twofish: Merge common glue code [CRYPTO] hifn_795x: Fixup container_of() usage [CRYPTO] cast6: inline bloat-- [CRYPTO] api: Set default CRYPTO_MINALIGN to unsigned long long [CRYPTO] tcrypt: Make xcbc available as a standalone test [CRYPTO] xcbc: Remove bogus hash/cipher test [CRYPTO] xcbc: Fix algorithm leak when block size check fails [CRYPTO] tcrypt: Zero axbuf in the right function [CRYPTO] padlock: Only reset the key once for each CBC and ECB operation [CRYPTO] api: Include sched.h for cond_resched in scatterwalk.h [CRYPTO] salsa20-asm: Remove unnecessary dependency on CRYPTO_SALSA20 [CRYPTO] tcrypt: Add select of AEAD [CRYPTO] salsa20: Add x86-64 assembly version [CRYPTO] salsa20_i586: Salsa20 stream cipher algorithm (i586 version) [CRYPTO] gcm: Introduce rfc4106 [CRYPTO] api: Show async type [CRYPTO] chainiv: Avoid lock spinning where possible [CRYPTO] seqiv: Add select AEAD in Kconfig [CRYPTO] scatterwalk: Handle zero nbytes in scatterwalk_map_and_copy [CRYPTO] null: Allow setkey on digest_null ...
eba0e319 · Linus Torvalds · df8dc74e · 15e7b445 · eba0e319 · eba0e319
72 changed file
--- a/Documentation/crypto/api-intro.txt
+++ b/Documentation/crypto/api-intro.txt
@@ -33,9 +33,16 @@ The idea is to make the user interface and algorithm registration API
 very simple, while hiding the core logic from both.  Many good ideas
 from existing APIs such as Cryptoapi and Nettle have been adapted for this.

-The API currently supports three types of transforms: Ciphers, Digests and
-Compressors.  The compression algorithms especially seem to be performing
-very well so far.
+The API currently supports five main types of transforms: AEAD (Authenticated
+Encryption with Associated Data), Block Ciphers, Ciphers, Compressors and
+Hashes.
+
+Please note that Block Ciphers is somewhat of a misnomer.  It is in fact
+meant to support all ciphers including stream ciphers.  The difference
+between Block Ciphers and Ciphers is that the latter operates on exactly
+one block while the former can operate on an arbitrary amount of data,
+subject to block size requirements (i.e., non-stream ciphers can only
+process multiples of blocks).

 Support for hardware crypto devices via an asynchronous interface is
 under development.
@@ -69,29 +76,12 @@ Here's an example of how to use the API:
 Many real examples are available in the regression test module (tcrypt.c).


-CONFIGURATION NOTES
-
-As Triple DES is part of the DES module, for those using modular builds,
-add the following line to /etc/modprobe.conf:
-
-  alias des3_ede des
-
-The Null algorithms reside in the crypto_null module, so these lines
-should also be added:
-
-  alias cipher_null crypto_null
-  alias digest_null crypto_null
-  alias compress_null crypto_null
-
-The SHA384 algorithm shares code within the SHA512 module, so you'll
-also need:
-  alias sha384 sha512
-
-
 DEVELOPER NOTES

 Transforms may only be allocated in user context, and cryptographic
-methods may only be called from softirq and user contexts.
+methods may only be called from softirq and user contexts.  For
+transforms with a setkey method it too should only be called from
+user context.

 When using the API for ciphers, performance will be optimal if each
 scatterlist contains data which is a multiple of the cipher's block
@@ -130,8 +120,9 @@ might already be working on.
 BUGS

 Send bug reports to:
-Herbert Xu <herbert@gondor.apana.org.au>
-Cc: David S. Miller <davem@redhat.com>
+linux-crypto@vger.kernel.org
+Cc: Herbert Xu <herbert@gondor.apana.org.au>,
+    David S. Miller <davem@redhat.com>


 FURTHER INFORMATION

--- a/arch/s390/crypto/aes_s390.c
+++ b/arch/s390/crypto/aes_s390.c
@@ -6,6 +6,7 @@
 * s390 Version:
 *   Copyright IBM Corp. 2005,2007
 *   Author(s): Jan Glauber (jang@de.ibm.com)
+ *		Sebastian Siewior (sebastian@breakpoint.cc> SW-Fallback
 *
 * Derived from "crypto/aes_generic.c"
 *
@@ -16,17 +17,13 @@
 *
 */

+#include <crypto/aes.h>
 #include <crypto/algapi.h>
+#include <linux/err.h>
 #include <linux/module.h>
 #include <linux/init.h>
 #include "crypt_s390.h"

-#define AES_MIN_KEY_SIZE	16
-#define AES_MAX_KEY_SIZE	32
-
-/* data block size for all key lengths */
-#define AES_BLOCK_SIZE		16
-
 #define AES_KEYLEN_128		1
 #define AES_KEYLEN_192		2
 #define AES_KEYLEN_256		4
@@ -39,45 +36,89 @@ struct s390_aes_ctx {
 	long enc;
 	long dec;
 	int key_len;
+	union {
+		struct crypto_blkcipher *blk;
+		struct crypto_cipher *cip;
+	} fallback;
 };

-static int aes_set_key(struct crypto_tfm *tfm, const u8 *in_key,
-		       unsigned int key_len)
+/*
+ * Check if the key_len is supported by the HW.
+ * Returns 0 if it is, a positive number if it is not and software fallback is
+ * required or a negative number in case the key size is not valid
+ */
+static int need_fallback(unsigned int key_len)
 {
-	struct s390_aes_ctx *sctx = crypto_tfm_ctx(tfm);
-	u32 *flags = &tfm->crt_flags;
-
 	switch (key_len) {
 	case 16:
 		if (!(keylen_flag & AES_KEYLEN_128))
-			goto fail;
+			return 1;
 		break;
 	case 24:
 		if (!(keylen_flag & AES_KEYLEN_192))
-			goto fail;
-
+			return 1;
 		break;
 	case 32:
 		if (!(keylen_flag & AES_KEYLEN_256))
-			goto fail;
+			return 1;
 		break;
 	default:
-		goto fail;
+		return -1;
 		break;
 	}
+	return 0;
+}
+
+static int setkey_fallback_cip(struct crypto_tfm *tfm, const u8 *in_key,
+		unsigned int key_len)
+{
+	struct s390_aes_ctx *sctx = crypto_tfm_ctx(tfm);
+	int ret;
+
+	sctx->fallback.blk->base.crt_flags &= ~CRYPTO_TFM_REQ_MASK;
+	sctx->fallback.blk->base.crt_flags |= (tfm->crt_flags &
+			CRYPTO_TFM_REQ_MASK);
+
+	ret = crypto_cipher_setkey(sctx->fallback.cip, in_key, key_len);
+	if (ret) {
+		tfm->crt_flags &= ~CRYPTO_TFM_RES_MASK;
+		tfm->crt_flags |= (sctx->fallback.blk->base.crt_flags &
+				CRYPTO_TFM_RES_MASK);
+	}
+	return ret;
+}
+
+static int aes_set_key(struct crypto_tfm *tfm, const u8 *in_key,
+		       unsigned int key_len)
+{
+	struct s390_aes_ctx *sctx = crypto_tfm_ctx(tfm);
+	u32 *flags = &tfm->crt_flags;
+	int ret;
+
+	ret = need_fallback(key_len);
+	if (ret < 0) {
+		*flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
+		return -EINVAL;
+	}

 	sctx->key_len = key_len;
-	memcpy(sctx->key, in_key, key_len);
-	return 0;
-fail:
-	*flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
-	return -EINVAL;
+	if (!ret) {
+		memcpy(sctx->key, in_key, key_len);
+		return 0;
+	}
+
+	return setkey_fallback_cip(tfm, in_key, key_len);
 }

 static void aes_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
 {
 	const struct s390_aes_ctx *sctx = crypto_tfm_ctx(tfm);

+	if (unlikely(need_fallback(sctx->key_len))) {
+		crypto_cipher_encrypt_one(sctx->fallback.cip, out, in);
+		return;
+	}
+
 	switch (sctx->key_len) {
 	case 16:
 		crypt_s390_km(KM_AES_128_ENCRYPT, &sctx->key, out, in,
@@ -98,6 +139,11 @@ static void aes_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
 {
 	const struct s390_aes_ctx *sctx = crypto_tfm_ctx(tfm);

+	if (unlikely(need_fallback(sctx->key_len))) {
+		crypto_cipher_decrypt_one(sctx->fallback.cip, out, in);
+		return;
+	}
+
 	switch (sctx->key_len) {
 	case 16:
 		crypt_s390_km(KM_AES_128_DECRYPT, &sctx->key, out, in,
@@ -114,6 +160,29 @@ static void aes_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
 	}
 }

+static int fallback_init_cip(struct crypto_tfm *tfm)
+{
+	const char *name = tfm->__crt_alg->cra_name;
+	struct s390_aes_ctx *sctx = crypto_tfm_ctx(tfm);
+
+	sctx->fallback.cip = crypto_alloc_cipher(name, 0,
+			CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK);
+
+	if (IS_ERR(sctx->fallback.cip)) {
+		printk(KERN_ERR "Error allocating fallback algo %s\n", name);
+		return PTR_ERR(sctx->fallback.blk);
+	}
+
+	return 0;
+}
+
+static void fallback_exit_cip(struct crypto_tfm *tfm)
+{
+	struct s390_aes_ctx *sctx = crypto_tfm_ctx(tfm);
+
+	crypto_free_cipher(sctx->fallback.cip);
+	sctx->fallback.cip = NULL;
+}

 static struct crypto_alg aes_alg = {
 	.cra_name		=	"aes",
@@ -125,6 +194,8 @@ static struct crypto_alg aes_alg = {
 	.cra_ctxsize		=	sizeof(struct s390_aes_ctx),
 	.cra_module		=	THIS_MODULE,
 	.cra_list		=	LIST_HEAD_INIT(aes_alg.cra_list),
+	.cra_init               =       fallback_init_cip,
+	.cra_exit               =       fallback_exit_cip,
 	.cra_u			=	{
 		.cipher = {
 			.cia_min_keysize	=	AES_MIN_KEY_SIZE,
@@ -136,10 +207,70 @@ static struct crypto_alg aes_alg = {
 	}
 };

+static int setkey_fallback_blk(struct crypto_tfm *tfm, const u8 *key,
+		unsigned int len)
+{
+	struct s390_aes_ctx *sctx = crypto_tfm_ctx(tfm);
+	unsigned int ret;
+
+	sctx->fallback.blk->base.crt_flags &= ~CRYPTO_TFM_REQ_MASK;
+	sctx->fallback.blk->base.crt_flags |= (tfm->crt_flags &
+			CRYPTO_TFM_REQ_MASK);
+
+	ret = crypto_blkcipher_setkey(sctx->fallback.blk, key, len);
+	if (ret) {
+		tfm->crt_flags &= ~CRYPTO_TFM_RES_MASK;
+		tfm->crt_flags |= (sctx->fallback.blk->base.crt_flags &
+				CRYPTO_TFM_RES_MASK);
+	}
+	return ret;
+}
+
+static int fallback_blk_dec(struct blkcipher_desc *desc,
+		struct scatterlist *dst, struct scatterlist *src,
+		unsigned int nbytes)
+{
+	unsigned int ret;
+	struct crypto_blkcipher *tfm;
+	struct s390_aes_ctx *sctx = crypto_blkcipher_ctx(desc->tfm);
+
+	tfm = desc->tfm;
+	desc->tfm = sctx->fallback.blk;
+
+	ret = crypto_blkcipher_decrypt_iv(desc, dst, src, nbytes);
+
+	desc->tfm = tfm;
+	return ret;
+}
+
+static int fallback_blk_enc(struct blkcipher_desc *desc,
+		struct scatterlist *dst, struct scatterlist *src,
+		unsigned int nbytes)
+{
+	unsigned int ret;
+	struct crypto_blkcipher *tfm;
+	struct s390_aes_ctx *sctx = crypto_blkcipher_ctx(desc->tfm);
+
+	tfm = desc->tfm;
+	desc->tfm = sctx->fallback.blk;
+
+	ret = crypto_blkcipher_encrypt_iv(desc, dst, src, nbytes);
+
+	desc->tfm = tfm;
+	return ret;
+}
+
 static int ecb_aes_set_key(struct crypto_tfm *tfm, const u8 *in_key,
 			   unsigned int key_len)
 {
 	struct s390_aes_ctx *sctx = crypto_tfm_ctx(tfm);
+	int ret;
+
+	ret = need_fallback(key_len);
+	if (ret > 0) {
+		sctx->key_len = key_len;
+		return setkey_fallback_blk(tfm, in_key, key_len);
+	}

 	switch (key_len) {
 	case 16:
@@ -188,6 +319,9 @@ static int ecb_aes_encrypt(struct blkcipher_desc *desc,
 	struct s390_aes_ctx *sctx = crypto_blkcipher_ctx(desc->tfm);
 	struct blkcipher_walk walk;

+	if (unlikely(need_fallback(sctx->key_len)))
+		return fallback_blk_enc(desc, dst, src, nbytes);
+
 	blkcipher_walk_init(&walk, dst, src, nbytes);
 	return ecb_aes_crypt(desc, sctx->enc, sctx->key, &walk);
 }
@@ -199,10 +333,37 @@ static int ecb_aes_decrypt(struct blkcipher_desc *desc,
 	struct s390_aes_ctx *sctx = crypto_blkcipher_ctx(desc->tfm);
 	struct blkcipher_walk walk;

+	if (unlikely(need_fallback(sctx->key_len)))
+		return fallback_blk_dec(desc, dst, src, nbytes);
+
 	blkcipher_walk_init(&walk, dst, src, nbytes);
 	return ecb_aes_crypt(desc, sctx->dec, sctx->key, &walk);
 }

+static int fallback_init_blk(struct crypto_tfm *tfm)
+{
+	const char *name = tfm->__crt_alg->cra_name;
+	struct s390_aes_ctx *sctx = crypto_tfm_ctx(tfm);
+
+	sctx->fallback.blk = crypto_alloc_blkcipher(name, 0,
+			CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK);
+
+	if (IS_ERR(sctx->fallback.blk)) {
+		printk(KERN_ERR "Error allocating fallback algo %s\n", name);
+		return PTR_ERR(sctx->fallback.blk);
+	}
+
+	return 0;
+}
+
+static void fallback_exit_blk(struct crypto_tfm *tfm)
+{
+	struct s390_aes_ctx *sctx = crypto_tfm_ctx(tfm);
+
+	crypto_free_blkcipher(sctx->fallback.blk);
+	sctx->fallback.blk = NULL;
+}
+
 static struct crypto_alg ecb_aes_alg = {
 	.cra_name		=	"ecb(aes)",
 	.cra_driver_name	=	"ecb-aes-s390",
@@ -214,6 +375,8 @@ static struct crypto_alg ecb_aes_alg = {
 	.cra_type		=	&crypto_blkcipher_type,
 	.cra_module		=	THIS_MODULE,
 	.cra_list		=	LIST_HEAD_INIT(ecb_aes_alg.cra_list),
+	.cra_init		=	fallback_init_blk,
+	.cra_exit		=	fallback_exit_blk,
 	.cra_u			=	{
 		.blkcipher = {
 			.min_keysize		=	AES_MIN_KEY_SIZE,
@@ -229,6 +392,13 @@ static int cbc_aes_set_key(struct crypto_tfm *tfm, const u8 *in_key,
 			   unsigned int key_len)
 {
 	struct s390_aes_ctx *sctx = crypto_tfm_ctx(tfm);
+	int ret;
+
+	ret = need_fallback(key_len);
+	if (ret > 0) {
+		sctx->key_len = key_len;
+		return setkey_fallback_blk(tfm, in_key, key_len);
+	}

 	switch (key_len) {
 	case 16:
@@ -283,6 +453,9 @@ static int cbc_aes_encrypt(struct blkcipher_desc *desc,
 	struct s390_aes_ctx *sctx = crypto_blkcipher_ctx(desc->tfm);
 	struct blkcipher_walk walk;

+	if (unlikely(need_fallback(sctx->key_len)))
+		return fallback_blk_enc(desc, dst, src, nbytes);
+
 	blkcipher_walk_init(&walk, dst, src, nbytes);
 	return cbc_aes_crypt(desc, sctx->enc, sctx->iv, &walk);
 }
@@ -294,6 +467,9 @@ static int cbc_aes_decrypt(struct blkcipher_desc *desc,
 	struct s390_aes_ctx *sctx = crypto_blkcipher_ctx(desc->tfm);
 	struct blkcipher_walk walk;

+	if (unlikely(need_fallback(sctx->key_len)))
+		return fallback_blk_dec(desc, dst, src, nbytes);
+
 	blkcipher_walk_init(&walk, dst, src, nbytes);
 	return cbc_aes_crypt(desc, sctx->dec, sctx->iv, &walk);
 }
@@ -309,6 +485,8 @@ static struct crypto_alg cbc_aes_alg = {
 	.cra_type		=	&crypto_blkcipher_type,
 	.cra_module		=	THIS_MODULE,
 	.cra_list		=	LIST_HEAD_INIT(cbc_aes_alg.cra_list),
+	.cra_init		=	fallback_init_blk,
+	.cra_exit		=	fallback_exit_blk,
 	.cra_u			=	{
 		.blkcipher = {
 			.min_keysize		=	AES_MIN_KEY_SIZE,
@@ -336,14 +514,10 @@ static int __init aes_init(void)
 		return -EOPNOTSUPP;

 	/* z9 109 and z9 BC/EC only support 128 bit key length */
-	if (keylen_flag == AES_KEYLEN_128) {
-		aes_alg.cra_u.cipher.cia_max_keysize = AES_MIN_KEY_SIZE;
-		ecb_aes_alg.cra_u.blkcipher.max_keysize = AES_MIN_KEY_SIZE;
-		cbc_aes_alg.cra_u.blkcipher.max_keysize = AES_MIN_KEY_SIZE;
+	if (keylen_flag == AES_KEYLEN_128)
 		printk(KERN_INFO
 		       "aes_s390: hardware acceleration only available for"
 		       "128 bit keys\n");
-	}

 	ret = crypto_register_alg(&aes_alg);
 	if (ret)
@@ -382,4 +556,3 @@ MODULE_ALIAS("aes");

 MODULE_DESCRIPTION("Rijndael (AES) Cipher Algorithm");
 MODULE_LICENSE("GPL");
-
--- a/arch/x86/crypto/Makefile
+++ b/arch/x86/crypto/Makefile
@@ -4,12 +4,16 @@

 obj-$(CONFIG_CRYPTO_AES_586) += aes-i586.o
 obj-$(CONFIG_CRYPTO_TWOFISH_586) += twofish-i586.o
+obj-$(CONFIG_CRYPTO_SALSA20_586) += salsa20-i586.o

 obj-$(CONFIG_CRYPTO_AES_X86_64) += aes-x86_64.o
 obj-$(CONFIG_CRYPTO_TWOFISH_X86_64) += twofish-x86_64.o
+obj-$(CONFIG_CRYPTO_SALSA20_X86_64) += salsa20-x86_64.o

-aes-i586-y := aes-i586-asm_32.o aes_32.o
-twofish-i586-y := twofish-i586-asm_32.o twofish_32.o
+aes-i586-y := aes-i586-asm_32.o aes_glue.o
+twofish-i586-y := twofish-i586-asm_32.o twofish_glue.o
+salsa20-i586-y := salsa20-i586-asm_32.o salsa20_glue.o

-aes-x86_64-y := aes-x86_64-asm_64.o aes_64.o
-twofish-x86_64-y := twofish-x86_64-asm_64.o twofish_64.o
+aes-x86_64-y := aes-x86_64-asm_64.o aes_glue.o
+twofish-x86_64-y := twofish-x86_64-asm_64.o twofish_glue.o
+salsa20-x86_64-y := salsa20-x86_64-asm_64.o salsa20_glue.o
--- a/arch/x86/crypto/aes-i586-asm_32.S
+++ b/arch/x86/crypto/aes-i586-asm_32.S
@@ -46,9 +46,9 @@
 #define in_blk 16

 /* offsets in crypto_tfm structure */
-#define ekey (crypto_tfm_ctx_offset + 0)
-#define nrnd (crypto_tfm_ctx_offset + 256)
-#define dkey (crypto_tfm_ctx_offset + 260)
+#define klen (crypto_tfm_ctx_offset + 0)
+#define ekey (crypto_tfm_ctx_offset + 4)
+#define dkey (crypto_tfm_ctx_offset + 244)

 // register mapping for encrypt and decrypt subroutines

@@ -221,8 +221,8 @@

 .global  aes_enc_blk

-.extern  ft_tab
-.extern  fl_tab
+.extern  crypto_ft_tab
+.extern  crypto_fl_tab

 .align 4

@@ -236,7 +236,7 @@ aes_enc_blk:
 1:	push    %ebx
 	mov     in_blk+4(%esp),%r2
 	push    %esi
-	mov     nrnd(%ebp),%r3   // number of rounds
+	mov     klen(%ebp),%r3   // key size
 	push    %edi
 #if ekey != 0
 	lea     ekey(%ebp),%ebp  // key pointer
@@ -255,26 +255,26 @@ aes_enc_blk:

 	sub     $8,%esp		// space for register saves on stack
 	add     $16,%ebp	// increment to next round key
-	cmp     $12,%r3
+	cmp     $24,%r3
 	jb      4f		// 10 rounds for 128-bit key
 	lea     32(%ebp),%ebp
 	je      3f		// 12 rounds for 192-bit key
 	lea     32(%ebp),%ebp

-2:	fwd_rnd1( -64(%ebp) ,ft_tab)	// 14 rounds for 256-bit key
-	fwd_rnd2( -48(%ebp) ,ft_tab)
-3:	fwd_rnd1( -32(%ebp) ,ft_tab)	// 12 rounds for 192-bit key
-	fwd_rnd2( -16(%ebp) ,ft_tab)
-4:	fwd_rnd1(    (%ebp) ,ft_tab)	// 10 rounds for 128-bit key
-	fwd_rnd2( +16(%ebp) ,ft_tab)
-	fwd_rnd1( +32(%ebp) ,ft_tab)
-	fwd_rnd2( +48(%ebp) ,ft_tab)
-	fwd_rnd1( +64(%ebp) ,ft_tab)
-	fwd_rnd2( +80(%ebp) ,ft_tab)
-	fwd_rnd1( +96(%ebp) ,ft_tab)
-	fwd_rnd2(+112(%ebp) ,ft_tab)
-	fwd_rnd1(+128(%ebp) ,ft_tab)
-	fwd_rnd2(+144(%ebp) ,fl_tab)	// last round uses a different table
+2:	fwd_rnd1( -64(%ebp), crypto_ft_tab)	// 14 rounds for 256-bit key
+	fwd_rnd2( -48(%ebp), crypto_ft_tab)
+3:	fwd_rnd1( -32(%ebp), crypto_ft_tab)	// 12 rounds for 192-bit key
+	fwd_rnd2( -16(%ebp), crypto_ft_tab)
+4:	fwd_rnd1(    (%ebp), crypto_ft_tab)	// 10 rounds for 128-bit key
+	fwd_rnd2( +16(%ebp), crypto_ft_tab)
+	fwd_rnd1( +32(%ebp), crypto_ft_tab)
+	fwd_rnd2( +48(%ebp), crypto_ft_tab)
+	fwd_rnd1( +64(%ebp), crypto_ft_tab)
+	fwd_rnd2( +80(%ebp), crypto_ft_tab)
+	fwd_rnd1( +96(%ebp), crypto_ft_tab)
+	fwd_rnd2(+112(%ebp), crypto_ft_tab)
+	fwd_rnd1(+128(%ebp), crypto_ft_tab)
+	fwd_rnd2(+144(%ebp), crypto_fl_tab)	// last round uses a different table

 // move final values to the output array.  CAUTION: the 
 // order of these assigns rely on the register mappings
@@ -297,8 +297,8 @@ aes_enc_blk:

 .global  aes_dec_blk

-.extern  it_tab
-.extern  il_tab
+.extern  crypto_it_tab
+.extern  crypto_il_tab

 .align 4

@@ -312,14 +312,11 @@ aes_dec_blk:
 1:	push    %ebx
 	mov     in_blk+4(%esp),%r2
 	push    %esi
-	mov     nrnd(%ebp),%r3   // number of rounds
+	mov     klen(%ebp),%r3   // key size
 	push    %edi
 #if dkey != 0
 	lea     dkey(%ebp),%ebp  // key pointer
 #endif
-	mov     %r3,%r0
-	shl     $4,%r0
-	add     %r0,%ebp
 	
 // input four columns and xor in first round key

@@ -333,27 +330,27 @@ aes_dec_blk:
 	xor     12(%ebp),%r5

 	sub     $8,%esp		// space for register saves on stack
-	sub     $16,%ebp	// increment to next round key
-	cmp     $12,%r3
+	add     $16,%ebp	// increment to next round key
+	cmp     $24,%r3
 	jb      4f		// 10 rounds for 128-bit key
-	lea     -32(%ebp),%ebp
+	lea     32(%ebp),%ebp
 	je      3f		// 12 rounds for 192-bit key
-	lea     -32(%ebp),%ebp
-
-2:	inv_rnd1( +64(%ebp), it_tab)	// 14 rounds for 256-bit key
-	inv_rnd2( +48(%ebp), it_tab)
-3:	inv_rnd1( +32(%ebp), it_tab)	// 12 rounds for 192-bit key
-	inv_rnd2( +16(%ebp), it_tab)
-4:	inv_rnd1(    (%ebp), it_tab)	// 10 rounds for 128-bit key
-	inv_rnd2( -16(%ebp), it_tab)
-	inv_rnd1( -32(%ebp), it_tab)
-	inv_rnd2( -48(%ebp), it_tab)
-	inv_rnd1( -64(%ebp), it_tab)
-	inv_rnd2( -80(%ebp), it_tab)
-	inv_rnd1( -96(%ebp), it_tab)
-	inv_rnd2(-112(%ebp), it_tab)
-	inv_rnd1(-128(%ebp), it_tab)
-	inv_rnd2(-144(%ebp), il_tab)	// last round uses a different table
+	lea     32(%ebp),%ebp
+
+2:	inv_rnd1( -64(%ebp), crypto_it_tab)	// 14 rounds for 256-bit key
+	inv_rnd2( -48(%ebp), crypto_it_tab)
+3:	inv_rnd1( -32(%ebp), crypto_it_tab)	// 12 rounds for 192-bit key
+	inv_rnd2( -16(%ebp), crypto_it_tab)
+4:	inv_rnd1(    (%ebp), crypto_it_tab)	// 10 rounds for 128-bit key
+	inv_rnd2( +16(%ebp), crypto_it_tab)
+	inv_rnd1( +32(%ebp), crypto_it_tab)
+	inv_rnd2( +48(%ebp), crypto_it_tab)
+	inv_rnd1( +64(%ebp), crypto_it_tab)
+	inv_rnd2( +80(%ebp), crypto_it_tab)
+	inv_rnd1( +96(%ebp), crypto_it_tab)
+	inv_rnd2(+112(%ebp), crypto_it_tab)
+	inv_rnd1(+128(%ebp), crypto_it_tab)
+	inv_rnd2(+144(%ebp), crypto_il_tab)	// last round uses a different table

 // move final values to the output array.  CAUTION: the 
 // order of these assigns rely on the register mappings

--- a/arch/x86/crypto/aes-x86_64-asm_64.S
+++ b/arch/x86/crypto/aes-x86_64-asm_64.S
@@ -8,10 +8,10 @@
 * including this sentence is retained in full.
 */

-.extern aes_ft_tab
-.extern aes_it_tab
-.extern aes_fl_tab
-.extern aes_il_tab
+.extern crypto_ft_tab
+.extern crypto_it_tab
+.extern crypto_fl_tab
+.extern crypto_il_tab

 .text

@@ -56,13 +56,13 @@
 	.align	8;			\
 FUNC:	movq	r1,r2;			\
 	movq	r3,r4;			\
-	leaq	BASE+KEY+52(r8),r9;	\
+	leaq	BASE+KEY+48+4(r8),r9;	\
 	movq	r10,r11;		\
 	movl	(r7),r5 ## E;		\
 	movl	4(r7),r1 ## E;		\
 	movl	8(r7),r6 ## E;		\
 	movl	12(r7),r7 ## E;		\
-	movl	BASE(r8),r10 ## E;	\
+	movl	BASE+0(r8),r10 ## E;	\
 	xorl	-48(r9),r5 ## E;	\
 	xorl	-44(r9),r1 ## E;	\
 	xorl	-40(r9),r6 ## E;	\
@@ -154,37 +154,37 @@ FUNC:	movq	r1,r2;			\
 /* void aes_enc_blk(stuct crypto_tfm *tfm, u8 *out, const u8 *in) */

 	entry(aes_enc_blk,0,enc128,enc192)
-	encrypt_round(aes_ft_tab,-96)
-	encrypt_round(aes_ft_tab,-80)
-enc192:	encrypt_round(aes_ft_tab,-64)
-	encrypt_round(aes_ft_tab,-48)
-enc128:	encrypt_round(aes_ft_tab,-32)
-	encrypt_round(aes_ft_tab,-16)
-	encrypt_round(aes_ft_tab,  0)
-	encrypt_round(aes_ft_tab, 16)
-	encrypt_round(aes_ft_tab, 32)
-	encrypt_round(aes_ft_tab, 48)
-	encrypt_round(aes_ft_tab, 64)
-	encrypt_round(aes_ft_tab, 80)
-	encrypt_round(aes_ft_tab, 96)
-	encrypt_final(aes_fl_tab,112)
+	encrypt_round(crypto_ft_tab,-96)
+	encrypt_round(crypto_ft_tab,-80)
+enc192:	encrypt_round(crypto_ft_tab,-64)
+	encrypt_round(crypto_ft_tab,-48)
+enc128:	encrypt_round(crypto_ft_tab,-32)
+	encrypt_round(crypto_ft_tab,-16)
+	encrypt_round(crypto_ft_tab,  0)
+	encrypt_round(crypto_ft_tab, 16)
+	encrypt_round(crypto_ft_tab, 32)
+	encrypt_round(crypto_ft_tab, 48)
+	encrypt_round(crypto_ft_tab, 64)
+	encrypt_round(crypto_ft_tab, 80)
+	encrypt_round(crypto_ft_tab, 96)
+	encrypt_final(crypto_fl_tab,112)
 	return

 /* void aes_dec_blk(struct crypto_tfm *tfm, u8 *out, const u8 *in) */

 	entry(aes_dec_blk,240,dec128,dec192)
-	decrypt_round(aes_it_tab,-96)
-	decrypt_round(aes_it_tab,-80)
-dec192:	decrypt_round(aes_it_tab,-64)
-	decrypt_round(aes_it_tab,-48)
-dec128:	decrypt_round(aes_it_tab,-32)
-	decrypt_round(aes_it_tab,-16)
-	decrypt_round(aes_it_tab,  0)
-	decrypt_round(aes_it_tab, 16)
-	decrypt_round(aes_it_tab, 32)
-	decrypt_round(aes_it_tab, 48)
-	decrypt_round(aes_it_tab, 64)
-	decrypt_round(aes_it_tab, 80)
-	decrypt_round(aes_it_tab, 96)
-	decrypt_final(aes_il_tab,112)
+	decrypt_round(crypto_it_tab,-96)
+	decrypt_round(crypto_it_tab,-80)
+dec192:	decrypt_round(crypto_it_tab,-64)
+	decrypt_round(crypto_it_tab,-48)
+dec128:	decrypt_round(crypto_it_tab,-32)
+	decrypt_round(crypto_it_tab,-16)
+	decrypt_round(crypto_it_tab,  0)
+	decrypt_round(crypto_it_tab, 16)
+	decrypt_round(crypto_it_tab, 32)
+	decrypt_round(crypto_it_tab, 48)
+	decrypt_round(crypto_it_tab, 64)
+	decrypt_round(crypto_it_tab, 80)
+	decrypt_round(crypto_it_tab, 96)
+	decrypt_final(crypto_il_tab,112)
 	return
--- a/arch/x86/crypto/aes_32.c
+++ b/arch/x86/crypto/aes_32.c
-/* 
- * 
- * Glue Code for optimized 586 assembler version of AES
- *
- * Copyright (c) 2002, Dr Brian Gladman <>, Worcester, UK.
- * All rights reserved.
- *
- * LICENSE TERMS
- *
- * The free distribution and use of this software in both source and binary
- * form is allowed (with or without changes) provided that:
- *
- *   1. distributions of this source code include the above copyright
- *      notice, this list of conditions and the following disclaimer;
- *
- *   2. distributions in binary form include the above copyright
- *      notice, this list of conditions and the following disclaimer
- *      in the documentation and/or other associated materials;
- *
- *   3. the copyright holder's name is not used to endorse products
- *      built using this software without specific written permission.
- *
- * ALTERNATIVELY, provided that this notice is retained in full, this product
- * may be distributed under the terms of the GNU General Public License (GPL),
- * in which case the provisions of the GPL apply INSTEAD OF those given above.
- *
- * DISCLAIMER
- *
- * This software is provided 'as is' with no explicit or implied warranties
- * in respect of its properties, including, but not limited to, correctness
- * and/or fitness for purpose.
- *
- * Copyright (c) 2003, Adam J. Richter <adam@yggdrasil.com> (conversion to
- * 2.5 API).
- * Copyright (c) 2003, 2004 Fruhwirth Clemens <clemens@endorphin.org>
- * Copyright (c) 2004 Red Hat, Inc., James Morris <jmorris@redhat.com>
- *
- */
-
-#include <asm/byteorder.h>
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/init.h>
-#include <linux/types.h>
-#include <linux/crypto.h>
-#include <linux/linkage.h>
-
-asmlinkage void aes_enc_blk(struct crypto_tfm *tfm, u8 *dst, const u8 *src);
-asmlinkage void aes_dec_blk(struct crypto_tfm *tfm, u8 *dst, const u8 *src);
-
-#define AES_MIN_KEY_SIZE	16
-#define AES_MAX_KEY_SIZE	32
-#define AES_BLOCK_SIZE		16
-#define AES_KS_LENGTH		4 * AES_BLOCK_SIZE
-#define RC_LENGTH		29
-
-struct aes_ctx {
-	u32 ekey[AES_KS_LENGTH];
-	u32 rounds;
-	u32 dkey[AES_KS_LENGTH];
-};
-
-#define WPOLY 0x011b
-#define bytes2word(b0, b1, b2, b3)  \
-	(((u32)(b3) << 24) | ((u32)(b2) << 16) | ((u32)(b1) << 8) | (b0))
-
-/* define the finite field multiplies required for Rijndael */
-#define f2(x) ((x) ? pow[log[x] + 0x19] : 0)
-#define f3(x) ((x) ? pow[log[x] + 0x01] : 0)
-#define f9(x) ((x) ? pow[log[x] + 0xc7] : 0)
-#define fb(x) ((x) ? pow[log[x] + 0x68] : 0)
-#define fd(x) ((x) ? pow[log[x] + 0xee] : 0)
-#define fe(x) ((x) ? pow[log[x] + 0xdf] : 0)
-#define fi(x) ((x) ?   pow[255 - log[x]]: 0)
-
-static inline u32 upr(u32 x, int n)
-{
-	return (x << 8 * n) | (x >> (32 - 8 * n));
-}
-
-static inline u8 bval(u32 x, int n)
-{
-	return x >> 8 * n;
-}
-
-/* The forward and inverse affine transformations used in the S-box */
-#define fwd_affine(x) \
-	(w = (u32)x, w ^= (w<<1)^(w<<2)^(w<<3)^(w<<4), 0x63^(u8)(w^(w>>8)))
-
-#define inv_affine(x) \
-	(w = (u32)x, w = (w<<1)^(w<<3)^(w<<6), 0x05^(u8)(w^(w>>8)))
-
-static u32 rcon_tab[RC_LENGTH];
-
-u32 ft_tab[4][256];
-u32 fl_tab[4][256];
-static u32 im_tab[4][256];
-u32 il_tab[4][256];
-u32 it_tab[4][256];
-
-static void gen_tabs(void)
-{
-	u32 i, w;
-	u8 pow[512], log[256];
-
-	/*
-	 * log and power tables for GF(2^8) finite field with
-	 * WPOLY as modular polynomial - the simplest primitive
-	 * root is 0x03, used here to generate the tables.
-	 */
-	i = 0; w = 1; 
-	
-	do {
-		pow[i] = (u8)w;
-		pow[i + 255] = (u8)w;
-		log[w] = (u8)i++;
-		w ^=  (w << 1) ^ (w & 0x80 ? WPOLY : 0);
-	} while (w != 1);
-	
-	for(i = 0, w = 1; i < RC_LENGTH; ++i) {
-		rcon_tab[i] = bytes2word(w, 0, 0, 0);
-		w = f2(w);
-	}
-
-	for(i = 0; i < 256; ++i) {
-		u8 b;
-		
-		b = fwd_affine(fi((u8)i));
-		w = bytes2word(f2(b), b, b, f3(b));
-
-		/* tables for a normal encryption round */
-		ft_tab[0][i] = w;
-		ft_tab[1][i] = upr(w, 1);
-		ft_tab[2][i] = upr(w, 2);
-		ft_tab[3][i] = upr(w, 3);
-		w = bytes2word(b, 0, 0, 0);
-		
-		/*
-		 * tables for last encryption round
-		 * (may also be used in the key schedule)
-		 */
-		fl_tab[0][i] = w;
-		fl_tab[1][i] = upr(w, 1);
-		fl_tab[2][i] = upr(w, 2);
-		fl_tab[3][i] = upr(w, 3);
-		
-		b = fi(inv_affine((u8)i));
-		w = bytes2word(fe(b), f9(b), fd(b), fb(b));
-
-		/* tables for the inverse mix column operation  */
-		im_tab[0][b] = w;
-		im_tab[1][b] = upr(w, 1);
-		im_tab[2][b] = upr(w, 2);
-		im_tab[3][b] = upr(w, 3);
-
-		/* tables for a normal decryption round */
-		it_tab[0][i] = w;
-		it_tab[1][i] = upr(w,1);
-		it_tab[2][i] = upr(w,2);
-		it_tab[3][i] = upr(w,3);
-
-		w = bytes2word(b, 0, 0, 0);
-		
-		/* tables for last decryption round */
-		il_tab[0][i] = w;
-		il_tab[1][i] = upr(w,1);
-		il_tab[2][i] = upr(w,2);
-		il_tab[3][i] = upr(w,3);
-    }
-}
-
-#define four_tables(x,tab,vf,rf,c)		\
-(	tab[0][bval(vf(x,0,c),rf(0,c))]	^	\
-	tab[1][bval(vf(x,1,c),rf(1,c))] ^	\
-	tab[2][bval(vf(x,2,c),rf(2,c))] ^	\
-	tab[3][bval(vf(x,3,c),rf(3,c))]		\
-)
-
-#define vf1(x,r,c)  (x)
-#define rf1(r,c)    (r)
-#define rf2(r,c)    ((r-c)&3)
-
-#define inv_mcol(x) four_tables(x,im_tab,vf1,rf1,0)
-#define ls_box(x,c) four_tables(x,fl_tab,vf1,rf2,c)
-
-#define ff(x) inv_mcol(x)
-
-#define ke4(k,i)							\
-{									\
-	k[4*(i)+4] = ss[0] ^= ls_box(ss[3],3) ^ rcon_tab[i];		\
-	k[4*(i)+5] = ss[1] ^= ss[0];					\
-	k[4*(i)+6] = ss[2] ^= ss[1];					\
-	k[4*(i)+7] = ss[3] ^= ss[2];					\
-}
-
-#define kel4(k,i)							\
-{									\
-	k[4*(i)+4] = ss[0] ^= ls_box(ss[3],3) ^ rcon_tab[i];		\
-	k[4*(i)+5] = ss[1] ^= ss[0];					\
-	k[4*(i)+6] = ss[2] ^= ss[1]; k[4*(i)+7] = ss[3] ^= ss[2];	\
-}
-
-#define ke6(k,i)							\
-{									\
-	k[6*(i)+ 6] = ss[0] ^= ls_box(ss[5],3) ^ rcon_tab[i];		\
-	k[6*(i)+ 7] = ss[1] ^= ss[0];					\
-	k[6*(i)+ 8] = ss[2] ^= ss[1];					\
-	k[6*(i)+ 9] = ss[3] ^= ss[2];					\
-	k[6*(i)+10] = ss[4] ^= ss[3];					\
-	k[6*(i)+11] = ss[5] ^= ss[4];					\
-}
-
-#define kel6(k,i)							\
-{									\
-	k[6*(i)+ 6] = ss[0] ^= ls_box(ss[5],3) ^ rcon_tab[i];		\
-	k[6*(i)+ 7] = ss[1] ^= ss[0];					\
-	k[6*(i)+ 8] = ss[2] ^= ss[1];					\
-	k[6*(i)+ 9] = ss[3] ^= ss[2];					\
-}
-
-#define ke8(k,i)							\
-{									\
-	k[8*(i)+ 8] = ss[0] ^= ls_box(ss[7],3) ^ rcon_tab[i];		\
-	k[8*(i)+ 9] = ss[1] ^= ss[0];					\
-	k[8*(i)+10] = ss[2] ^= ss[1];					\
-	k[8*(i)+11] = ss[3] ^= ss[2];					\
-	k[8*(i)+12] = ss[4] ^= ls_box(ss[3],0);				\
-	k[8*(i)+13] = ss[5] ^= ss[4];					\
-	k[8*(i)+14] = ss[6] ^= ss[5];					\
-	k[8*(i)+15] = ss[7] ^= ss[6];					\
-}
-
-#define kel8(k,i)							\
-{									\
-	k[8*(i)+ 8] = ss[0] ^= ls_box(ss[7],3) ^ rcon_tab[i];		\
-	k[8*(i)+ 9] = ss[1] ^= ss[0];					\
-	k[8*(i)+10] = ss[2] ^= ss[1];					\
-	k[8*(i)+11] = ss[3] ^= ss[2];					\
-}
-
-#define kdf4(k,i)							\
-{									\
-	ss[0] = ss[0] ^ ss[2] ^ ss[1] ^ ss[3];				\
-	ss[1] = ss[1] ^ ss[3];						\
-	ss[2] = ss[2] ^ ss[3];						\
-	ss[3] = ss[3];							\
-	ss[4] = ls_box(ss[(i+3) % 4], 3) ^ rcon_tab[i];			\
-	ss[i % 4] ^= ss[4];						\
-	ss[4] ^= k[4*(i)];						\
-	k[4*(i)+4] = ff(ss[4]);						\
-	ss[4] ^= k[4*(i)+1];						\
-	k[4*(i)+5] = ff(ss[4]);						\
-	ss[4] ^= k[4*(i)+2];						\
-	k[4*(i)+6] = ff(ss[4]);						\
-	ss[4] ^= k[4*(i)+3];						\
-	k[4*(i)+7] = ff(ss[4]);						\
-}
-
-#define kd4(k,i)							\
-{									\
-	ss[4] = ls_box(ss[(i+3) % 4], 3) ^ rcon_tab[i];			\
-	ss[i % 4] ^= ss[4];						\
-	ss[4] = ff(ss[4]);						\
-	k[4*(i)+4] = ss[4] ^= k[4*(i)];					\
-	k[4*(i)+5] = ss[4] ^= k[4*(i)+1];				\
-	k[4*(i)+6] = ss[4] ^= k[4*(i)+2];				\
-	k[4*(i)+7] = ss[4] ^= k[4*(i)+3];				\
-}
-
-#define kdl4(k,i)							\
-{									\
-	ss[4] = ls_box(ss[(i+3) % 4], 3) ^ rcon_tab[i];			\
-	ss[i % 4] ^= ss[4];						\
-	k[4*(i)+4] = (ss[0] ^= ss[1]) ^ ss[2] ^ ss[3];			\
-	k[4*(i)+5] = ss[1] ^ ss[3];					\
-	k[4*(i)+6] = ss[0];						\
-	k[4*(i)+7] = ss[1];						\
-}
-
-#define kdf6(k,i)							\
-{									\
-	ss[0] ^= ls_box(ss[5],3) ^ rcon_tab[i];				\
-	k[6*(i)+ 6] = ff(ss[0]);					\
-	ss[1] ^= ss[0];							\
-	k[6*(i)+ 7] = ff(ss[1]);					\
-	ss[2] ^= ss[1];							\
-	k[6*(i)+ 8] = ff(ss[2]);					\
-	ss[3] ^= ss[2];							\
-	k[6*(i)+ 9] = ff(ss[3]);					\
-	ss[4] ^= ss[3];							\
-	k[6*(i)+10] = ff(ss[4]);					\
-	ss[5] ^= ss[4];							\
-	k[6*(i)+11] = ff(ss[5]);					\
-}
-
-#define kd6(k,i)							\
-{									\
-	ss[6] = ls_box(ss[5],3) ^ rcon_tab[i];				\
-	ss[0] ^= ss[6]; ss[6] = ff(ss[6]);				\
-	k[6*(i)+ 6] = ss[6] ^= k[6*(i)];				\
-	ss[1] ^= ss[0];							\
-	k[6*(i)+ 7] = ss[6] ^= k[6*(i)+ 1];				\
-	ss[2] ^= ss[1];							\
-	k[6*(i)+ 8] = ss[6] ^= k[6*(i)+ 2];				\
-	ss[3] ^= ss[2];							\
-	k[6*(i)+ 9] = ss[6] ^= k[6*(i)+ 3];				\
-	ss[4] ^= ss[3];							\
-	k[6*(i)+10] = ss[6] ^= k[6*(i)+ 4];				\
-	ss[5] ^= ss[4];							\
-	k[6*(i)+11] = ss[6] ^= k[6*(i)+ 5];				\
-}
-
-#define kdl6(k,i)							\
-{									\
-	ss[0] ^= ls_box(ss[5],3) ^ rcon_tab[i];				\
-	k[6*(i)+ 6] = ss[0];						\
-	ss[1] ^= ss[0];							\
-	k[6*(i)+ 7] = ss[1];						\
-	ss[2] ^= ss[1];							\
-	k[6*(i)+ 8] = ss[2];						\
-	ss[3] ^= ss[2];							\
-	k[6*(i)+ 9] = ss[3];						\
-}
-
-#define kdf8(k,i)							\
-{									\
-	ss[0] ^= ls_box(ss[7],3) ^ rcon_tab[i];				\
-	k[8*(i)+ 8] = ff(ss[0]);					\
-	ss[1] ^= ss[0];							\
-	k[8*(i)+ 9] = ff(ss[1]);					\
-	ss[2] ^= ss[1];							\
-	k[8*(i)+10] = ff(ss[2]);					\
-	ss[3] ^= ss[2];							\
-	k[8*(i)+11] = ff(ss[3]);					\
-	ss[4] ^= ls_box(ss[3],0);					\
-	k[8*(i)+12] = ff(ss[4]);					\
-	ss[5] ^= ss[4];							\
-	k[8*(i)+13] = ff(ss[5]);					\
-	ss[6] ^= ss[5];							\
-	k[8*(i)+14] = ff(ss[6]);					\
-	ss[7] ^= ss[6];							\
-	k[8*(i)+15] = ff(ss[7]);					\
-}
-
-#define kd8(k,i)							\
-{									\
-	u32 __g = ls_box(ss[7],3) ^ rcon_tab[i];			\
-	ss[0] ^= __g;							\
-	__g = ff(__g);							\
-	k[8*(i)+ 8] = __g ^= k[8*(i)];					\
-	ss[1] ^= ss[0];							\
-	k[8*(i)+ 9] = __g ^= k[8*(i)+ 1];				\
-	ss[2] ^= ss[1];							\
-	k[8*(i)+10] = __g ^= k[8*(i)+ 2];				\
-	ss[3] ^= ss[2];							\
-	k[8*(i)+11] = __g ^= k[8*(i)+ 3];				\
-	__g = ls_box(ss[3],0);						\
-	ss[4] ^= __g;							\
-	__g = ff(__g);							\
-	k[8*(i)+12] = __g ^= k[8*(i)+ 4];				\
-	ss[5] ^= ss[4];							\
-	k[8*(i)+13] = __g ^= k[8*(i)+ 5];				\
-	ss[6] ^= ss[5];							\
-	k[8*(i)+14] = __g ^= k[8*(i)+ 6];				\
-	ss[7] ^= ss[6];							\
-	k[8*(i)+15] = __g ^= k[8*(i)+ 7];				\
-}
-
-#define kdl8(k,i)							\
-{									\
-	ss[0] ^= ls_box(ss[7],3) ^ rcon_tab[i];				\
-	k[8*(i)+ 8] = ss[0];						\
-	ss[1] ^= ss[0];							\
-	k[8*(i)+ 9] = ss[1];						\
-	ss[2] ^= ss[1];							\
-	k[8*(i)+10] = ss[2];						\
-	ss[3] ^= ss[2];							\
-	k[8*(i)+11] = ss[3];						\
-}
-
-static int aes_set_key(struct crypto_tfm *tfm, const u8 *in_key,
-		       unsigned int key_len)
-{
-	int i;
-	u32 ss[8];
-	struct aes_ctx *ctx = crypto_tfm_ctx(tfm);
-	const __le32 *key = (const __le32 *)in_key;
-	u32 *flags = &tfm->crt_flags;
-
-	/* encryption schedule */
-	
-	ctx->ekey[0] = ss[0] = le32_to_cpu(key[0]);
-	ctx->ekey[1] = ss[1] = le32_to_cpu(key[1]);
-	ctx->ekey[2] = ss[2] = le32_to_cpu(key[2]);
-	ctx->ekey[3] = ss[3] = le32_to_cpu(key[3]);
-
-	switch(key_len) {
-	case 16:
-		for (i = 0; i < 9; i++)
-			ke4(ctx->ekey, i);
-		kel4(ctx->ekey, 9);
-		ctx->rounds = 10;
-		break;
-		
-	case 24:
-		ctx->ekey[4] = ss[4] = le32_to_cpu(key[4]);
-		ctx->ekey[5] = ss[5] = le32_to_cpu(key[5]);
-		for (i = 0; i < 7; i++)
-			ke6(ctx->ekey, i);
-		kel6(ctx->ekey, 7); 
-		ctx->rounds = 12;
-		break;
-
-	case 32:
-		ctx->ekey[4] = ss[4] = le32_to_cpu(key[4]);
-		ctx->ekey[5] = ss[5] = le32_to_cpu(key[5]);
-		ctx->ekey[6] = ss[6] = le32_to_cpu(key[6]);
-		ctx->ekey[7] = ss[7] = le32_to_cpu(key[7]);
-		for (i = 0; i < 6; i++)
-			ke8(ctx->ekey, i);
-		kel8(ctx->ekey, 6);
-		ctx->rounds = 14;
-		break;
-
-	default:
-		*flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
-		return -EINVAL;
-	}
-	
-	/* decryption schedule */
-	
-	ctx->dkey[0] = ss[0] = le32_to_cpu(key[0]);
-	ctx->dkey[1] = ss[1] = le32_to_cpu(key[1]);
-	ctx->dkey[2] = ss[2] = le32_to_cpu(key[2]);
-	ctx->dkey[3] = ss[3] = le32_to_cpu(key[3]);
-
-	switch (key_len) {
-	case 16:
-		kdf4(ctx->dkey, 0);
-		for (i = 1; i < 9; i++)
-			kd4(ctx->dkey, i);
-		kdl4(ctx->dkey, 9);
-		break;
-		
-	case 24:
-		ctx->dkey[4] = ff(ss[4] = le32_to_cpu(key[4]));
-		ctx->dkey[5] = ff(ss[5] = le32_to_cpu(key[5]));
-		kdf6(ctx->dkey, 0);
-		for (i = 1; i < 7; i++)
-			kd6(ctx->dkey, i);
-		kdl6(ctx->dkey, 7);
-		break;
-
-	case 32:
-		ctx->dkey[4] = ff(ss[4] = le32_to_cpu(key[4]));
-		ctx->dkey[5] = ff(ss[5] = le32_to_cpu(key[5]));
-		ctx->dkey[6] = ff(ss[6] = le32_to_cpu(key[6]));
-		ctx->dkey[7] = ff(ss[7] = le32_to_cpu(key[7]));
-		kdf8(ctx->dkey, 0);
-		for (i = 1; i < 6; i++)
-			kd8(ctx->dkey, i);
-		kdl8(ctx->dkey, 6);
-		break;
-	}
-	return 0;
-}
-
-static void aes_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
-{
-	aes_enc_blk(tfm, dst, src);
-}
-
-static void aes_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
-{
-	aes_dec_blk(tfm, dst, src);
-}
-
-static struct crypto_alg aes_alg = {
-	.cra_name		=	"aes",
-	.cra_driver_name	=	"aes-i586",
-	.cra_priority		=	200,
-	.cra_flags		=	CRYPTO_ALG_TYPE_CIPHER,
-	.cra_blocksize		=	AES_BLOCK_SIZE,
-	.cra_ctxsize		=	sizeof(struct aes_ctx),
-	.cra_module		=	THIS_MODULE,
-	.cra_list		=	LIST_HEAD_INIT(aes_alg.cra_list),
-	.cra_u			=	{
-		.cipher = {
-			.cia_min_keysize	=	AES_MIN_KEY_SIZE,
-			.cia_max_keysize	=	AES_MAX_KEY_SIZE,
-			.cia_setkey	   	= 	aes_set_key,
-			.cia_encrypt	 	=	aes_encrypt,
-			.cia_decrypt	  	=	aes_decrypt
-		}
-	}
-};
-
-static int __init aes_init(void)
-{
-	gen_tabs();
-	return crypto_register_alg(&aes_alg);
-}
-
-static void __exit aes_fini(void)
-{
-	crypto_unregister_alg(&aes_alg);
-}
-
-module_init(aes_init);
-module_exit(aes_fini);
-
-MODULE_DESCRIPTION("Rijndael (AES) Cipher Algorithm, i586 asm optimized");
-MODULE_LICENSE("Dual BSD/GPL");
-MODULE_AUTHOR("Fruhwirth Clemens, James Morris, Brian Gladman, Adam Richter");
-MODULE_ALIAS("aes");
--- a/arch/x86/crypto/aes_64.c
+++ b/arch/x86/crypto/aes_64.c
-/*
- * Cryptographic API.
- *
- * AES Cipher Algorithm.
- *
- * Based on Brian Gladman's code.
- *
- * Linux developers:
- *  Alexander Kjeldaas <astor@fast.no>
- *  Herbert Valerio Riedel <hvr@hvrlab.org>
- *  Kyle McMartin <kyle@debian.org>
- *  Adam J. Richter <adam@yggdrasil.com> (conversion to 2.5 API).
- *  Andreas Steinmetz <ast@domdv.de> (adapted to x86_64 assembler)
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * ---------------------------------------------------------------------------
- * Copyright (c) 2002, Dr Brian Gladman <brg@gladman.me.uk>, Worcester, UK.
- * All rights reserved.
- *
- * LICENSE TERMS
- *
- * The free distribution and use of this software in both source and binary
- * form is allowed (with or without changes) provided that:
- *
- *   1. distributions of this source code include the above copyright
- *      notice, this list of conditions and the following disclaimer;
- *
- *   2. distributions in binary form include the above copyright
- *      notice, this list of conditions and the following disclaimer
- *      in the documentation and/or other associated materials;
- *
- *   3. the copyright holder's name is not used to endorse products
- *      built using this software without specific written permission.
- *
- * ALTERNATIVELY, provided that this notice is retained in full, this product
- * may be distributed under the terms of the GNU General Public License (GPL),
- * in which case the provisions of the GPL apply INSTEAD OF those given above.
- *
- * DISCLAIMER
- *
- * This software is provided 'as is' with no explicit or implied warranties
- * in respect of its properties, including, but not limited to, correctness
- * and/or fitness for purpose.
- * ---------------------------------------------------------------------------
- */
-
-/* Some changes from the Gladman version:
-    s/RIJNDAEL(e_key)/E_KEY/g
-    s/RIJNDAEL(d_key)/D_KEY/g
-*/
-
-#include <asm/byteorder.h>
-#include <linux/bitops.h>
-#include <linux/crypto.h>
-#include <linux/errno.h>
-#include <linux/init.h>
-#include <linux/module.h>
-#include <linux/types.h>
-
-#define AES_MIN_KEY_SIZE	16
-#define AES_MAX_KEY_SIZE	32
-
-#define AES_BLOCK_SIZE		16
-
-/*
- * #define byte(x, nr) ((unsigned char)((x) >> (nr*8)))
- */
-static inline u8 byte(const u32 x, const unsigned n)
-{
-	return x >> (n << 3);
-}
-
-struct aes_ctx
-{
-	u32 key_length;
-	u32 buf[120];
-};
-
-#define E_KEY (&ctx->buf[0])
-#define D_KEY (&ctx->buf[60])
-
-static u8 pow_tab[256] __initdata;
-static u8 log_tab[256] __initdata;
-static u8 sbx_tab[256] __initdata;
-static u8 isb_tab[256] __initdata;
-static u32 rco_tab[10];
-u32 aes_ft_tab[4][256];
-u32 aes_it_tab[4][256];
-
-u32 aes_fl_tab[4][256];
-u32 aes_il_tab[4][256];
-
-static inline u8 f_mult(u8 a, u8 b)
-{
-	u8 aa = log_tab[a], cc = aa + log_tab[b];
-
-	return pow_tab[cc + (cc < aa ? 1 : 0)];
-}
-
-#define ff_mult(a, b) (a && b ? f_mult(a, b) : 0)
-
-#define ls_box(x)				\
-	(aes_fl_tab[0][byte(x, 0)] ^		\
-	 aes_fl_tab[1][byte(x, 1)] ^		\
-	 aes_fl_tab[2][byte(x, 2)] ^		\
-	 aes_fl_tab[3][byte(x, 3)])
-
-static void __init gen_tabs(void)
-{
-	u32 i, t;
-	u8 p, q;
-
-	/* log and power tables for GF(2**8) finite field with
-	   0x011b as modular polynomial - the simplest primitive
-	   root is 0x03, used here to generate the tables */
-
-	for (i = 0, p = 1; i < 256; ++i) {
-		pow_tab[i] = (u8)p;
-		log_tab[p] = (u8)i;
-
-		p ^= (p << 1) ^ (p & 0x80 ? 0x01b : 0);
-	}
-
-	log_tab[1] = 0;
-
-	for (i = 0, p = 1; i < 10; ++i) {
-		rco_tab[i] = p;
-
-		p = (p << 1) ^ (p & 0x80 ? 0x01b : 0);
-	}
-
-	for (i = 0; i < 256; ++i) {
-		p = (i ? pow_tab[255 - log_tab[i]] : 0);
-		q = ((p >> 7) | (p << 1)) ^ ((p >> 6) | (p << 2));
-		p ^= 0x63 ^ q ^ ((q >> 6) | (q << 2));
-		sbx_tab[i] = p;
-		isb_tab[p] = (u8)i;
-	}
-
-	for (i = 0; i < 256; ++i) {
-		p = sbx_tab[i];
-
-		t = p;
-		aes_fl_tab[0][i] = t;
-		aes_fl_tab[1][i] = rol32(t, 8);
-		aes_fl_tab[2][i] = rol32(t, 16);
-		aes_fl_tab[3][i] = rol32(t, 24);
-
-		t = ((u32)ff_mult(2, p)) |
-		    ((u32)p << 8) |
-		    ((u32)p << 16) | ((u32)ff_mult(3, p) << 24);
-
-		aes_ft_tab[0][i] = t;
-		aes_ft_tab[1][i] = rol32(t, 8);
-		aes_ft_tab[2][i] = rol32(t, 16);
-		aes_ft_tab[3][i] = rol32(t, 24);
-
-		p = isb_tab[i];
-
-		t = p;
-		aes_il_tab[0][i] = t;
-		aes_il_tab[1][i] = rol32(t, 8);
-		aes_il_tab[2][i] = rol32(t, 16);
-		aes_il_tab[3][i] = rol32(t, 24);
-
-		t = ((u32)ff_mult(14, p)) |
-		    ((u32)ff_mult(9, p) << 8) |
-		    ((u32)ff_mult(13, p) << 16) |
-		    ((u32)ff_mult(11, p) << 24);
-
-		aes_it_tab[0][i] = t;
-		aes_it_tab[1][i] = rol32(t, 8);
-		aes_it_tab[2][i] = rol32(t, 16);
-		aes_it_tab[3][i] = rol32(t, 24);
-	}
-}
-
-#define star_x(x) (((x) & 0x7f7f7f7f) << 1) ^ ((((x) & 0x80808080) >> 7) * 0x1b)
-
-#define imix_col(y, x)			\
-	u    = star_x(x);		\
-	v    = star_x(u);		\
-	w    = star_x(v);		\
-	t    = w ^ (x);			\
-	(y)  = u ^ v ^ w;		\
-	(y) ^= ror32(u ^ t,  8) ^	\
-	       ror32(v ^ t, 16) ^	\
-	       ror32(t, 24)
-
-/* initialise the key schedule from the user supplied key */
-
-#define loop4(i)					\
-{							\
-	t = ror32(t,  8); t = ls_box(t) ^ rco_tab[i];	\
-	t ^= E_KEY[4 * i];     E_KEY[4 * i + 4] = t;	\
-	t ^= E_KEY[4 * i + 1]; E_KEY[4 * i + 5] = t;	\
-	t ^= E_KEY[4 * i + 2]; E_KEY[4 * i + 6] = t;	\
-	t ^= E_KEY[4 * i + 3]; E_KEY[4 * i + 7] = t;	\
-}
-
-#define loop6(i)					\
-{							\
-	t = ror32(t,  8); t = ls_box(t) ^ rco_tab[i];	\
-	t ^= E_KEY[6 * i];     E_KEY[6 * i + 6] = t;	\
-	t ^= E_KEY[6 * i + 1]; E_KEY[6 * i + 7] = t;	\
-	t ^= E_KEY[6 * i + 2]; E_KEY[6 * i + 8] = t;	\
-	t ^= E_KEY[6 * i + 3]; E_KEY[6 * i + 9] = t;	\
-	t ^= E_KEY[6 * i + 4]; E_KEY[6 * i + 10] = t;	\
-	t ^= E_KEY[6 * i + 5]; E_KEY[6 * i + 11] = t;	\
-}
-
-#define loop8(i)					\
-{							\
-	t = ror32(t,  8); ; t = ls_box(t) ^ rco_tab[i];	\
-	t ^= E_KEY[8 * i];     E_KEY[8 * i + 8] = t;	\
-	t ^= E_KEY[8 * i + 1]; E_KEY[8 * i + 9] = t;	\
-	t ^= E_KEY[8 * i + 2]; E_KEY[8 * i + 10] = t;	\
-	t ^= E_KEY[8 * i + 3]; E_KEY[8 * i + 11] = t;	\
-	t  = E_KEY[8 * i + 4] ^ ls_box(t);		\
-	E_KEY[8 * i + 12] = t;				\
-	t ^= E_KEY[8 * i + 5]; E_KEY[8 * i + 13] = t;	\
-	t ^= E_KEY[8 * i + 6]; E_KEY[8 * i + 14] = t;	\
-	t ^= E_KEY[8 * i + 7]; E_KEY[8 * i + 15] = t;	\
-}
-
-static int aes_set_key(struct crypto_tfm *tfm, const u8 *in_key,
-		       unsigned int key_len)
-{
-	struct aes_ctx *ctx = crypto_tfm_ctx(tfm);
-	const __le32 *key = (const __le32 *)in_key;
-	u32 *flags = &tfm->crt_flags;
-	u32 i, j, t, u, v, w;
-
-	if (key_len % 8) {
-		*flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
-		return -EINVAL;
-	}
-
-	ctx->key_length = key_len;
-
-	D_KEY[key_len + 24] = E_KEY[0] = le32_to_cpu(key[0]);
-	D_KEY[key_len + 25] = E_KEY[1] = le32_to_cpu(key[1]);
-	D_KEY[key_len + 26] = E_KEY[2] = le32_to_cpu(key[2]);
-	D_KEY[key_len + 27] = E_KEY[3] = le32_to_cpu(key[3]);
-
-	switch (key_len) {
-	case 16:
-		t = E_KEY[3];
-		for (i = 0; i < 10; ++i)
-			loop4(i);
-		break;
-
-	case 24:
-		E_KEY[4] = le32_to_cpu(key[4]);
-		t = E_KEY[5] = le32_to_cpu(key[5]);
-		for (i = 0; i < 8; ++i)
-			loop6 (i);
-		break;
-
-	case 32:
-		E_KEY[4] = le32_to_cpu(key[4]);
-		E_KEY[5] = le32_to_cpu(key[5]);
-		E_KEY[6] = le32_to_cpu(key[6]);
-		t = E_KEY[7] = le32_to_cpu(key[7]);
-		for (i = 0; i < 7; ++i)
-			loop8(i);
-		break;
-	}
-
-	D_KEY[0] = E_KEY[key_len + 24];
-	D_KEY[1] = E_KEY[key_len + 25];
-	D_KEY[2] = E_KEY[key_len + 26];
-	D_KEY[3] = E_KEY[key_len + 27];
-
-	for (i = 4; i < key_len + 24; ++i) {
-		j = key_len + 24 - (i & ~3) + (i & 3);
-		imix_col(D_KEY[j], E_KEY[i]);
-	}
-
-	return 0;
-}
-
-asmlinkage void aes_enc_blk(struct crypto_tfm *tfm, u8 *out, const u8 *in);
-asmlinkage void aes_dec_blk(struct crypto_tfm *tfm, u8 *out, const u8 *in);
-
-static void aes_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
-{
-	aes_enc_blk(tfm, dst, src);
-}
-
-static void aes_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
-{
-	aes_dec_blk(tfm, dst, src);
-}
-
-static struct crypto_alg aes_alg = {
-	.cra_name		=	"aes",
-	.cra_driver_name	=	"aes-x86_64",
-	.cra_priority		=	200,
-	.cra_flags		=	CRYPTO_ALG_TYPE_CIPHER,
-	.cra_blocksize		=	AES_BLOCK_SIZE,
-	.cra_ctxsize		=	sizeof(struct aes_ctx),
-	.cra_module		=	THIS_MODULE,
-	.cra_list		=	LIST_HEAD_INIT(aes_alg.cra_list),
-	.cra_u			=	{
-		.cipher = {
-			.cia_min_keysize	=	AES_MIN_KEY_SIZE,
-			.cia_max_keysize	=	AES_MAX_KEY_SIZE,
-			.cia_setkey	   	= 	aes_set_key,
-			.cia_encrypt	 	=	aes_encrypt,
-			.cia_decrypt	  	=	aes_decrypt
-		}
-	}
-};
-
-static int __init aes_init(void)
-{
-	gen_tabs();
-	return crypto_register_alg(&aes_alg);
-}
-
-static void __exit aes_fini(void)
-{
-	crypto_unregister_alg(&aes_alg);
-}
-
-module_init(aes_init);
-module_exit(aes_fini);
-
-MODULE_DESCRIPTION("Rijndael (AES) Cipher Algorithm");
-MODULE_LICENSE("GPL");
-MODULE_ALIAS("aes");
--- a/arch/x86/crypto/aes_glue.c
+++ b/arch/x86/crypto/aes_glue.c
+/*
+ * Glue Code for the asm optimized version of the AES Cipher Algorithm
+ *
+ */
+
+#include <crypto/aes.h>
+
+asmlinkage void aes_enc_blk(struct crypto_tfm *tfm, u8 *out, const u8 *in);
+asmlinkage void aes_dec_blk(struct crypto_tfm *tfm, u8 *out, const u8 *in);
+
+static void aes_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
+{
+	aes_enc_blk(tfm, dst, src);
+}
+
+static void aes_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
+{
+	aes_dec_blk(tfm, dst, src);
+}
+
+static struct crypto_alg aes_alg = {
+	.cra_name		= "aes",
+	.cra_driver_name	= "aes-asm",
+	.cra_priority		= 200,
+	.cra_flags		= CRYPTO_ALG_TYPE_CIPHER,
+	.cra_blocksize		= AES_BLOCK_SIZE,
+	.cra_ctxsize		= sizeof(struct crypto_aes_ctx),
+	.cra_module		= THIS_MODULE,
+	.cra_list		= LIST_HEAD_INIT(aes_alg.cra_list),
+	.cra_u	= {
+		.cipher	= {
+			.cia_min_keysize	= AES_MIN_KEY_SIZE,
+			.cia_max_keysize	= AES_MAX_KEY_SIZE,
+			.cia_setkey		= crypto_aes_set_key,
+			.cia_encrypt		= aes_encrypt,
+			.cia_decrypt		= aes_decrypt
+		}
+	}
+};
+
+static int __init aes_init(void)
+{
+	return crypto_register_alg(&aes_alg);
+}
+
+static void __exit aes_fini(void)
+{
+	crypto_unregister_alg(&aes_alg);
+}
+
+module_init(aes_init);
+module_exit(aes_fini);
+
+MODULE_DESCRIPTION("Rijndael (AES) Cipher Algorithm, asm optimized");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("aes");
+MODULE_ALIAS("aes-asm");
--- a/arch/x86/crypto/salsa20-i586-asm_32.S
+++ b/arch/x86/crypto/salsa20-i586-asm_32.S
--- a/arch/x86/crypto/salsa20-x86_64-asm_64.S
+++ b/arch/x86/crypto/salsa20-x86_64-asm_64.S
+# enter ECRYPT_encrypt_bytes
+.text
+.p2align 5
+.globl ECRYPT_encrypt_bytes
+ECRYPT_encrypt_bytes:
+	mov	%rsp,%r11
+	and	$31,%r11
+	add	$256,%r11
+	sub	%r11,%rsp
+	# x = arg1
+	mov	%rdi,%r8
+	# m = arg2
+	mov	%rsi,%rsi
+	# out = arg3
+	mov	%rdx,%rdi
+	# bytes = arg4
+	mov	%rcx,%rdx
+	#               unsigned>? bytes - 0
+	cmp	$0,%rdx
+	# comment:fp stack unchanged by jump
+	# goto done if !unsigned>
+	jbe	._done
+	# comment:fp stack unchanged by fallthrough
+# start:
+._start:
+	# r11_stack = r11
+	movq	%r11,0(%rsp)
+	# r12_stack = r12
+	movq	%r12,8(%rsp)
+	# r13_stack = r13
+	movq	%r13,16(%rsp)
+	# r14_stack = r14
+	movq	%r14,24(%rsp)
+	# r15_stack = r15
+	movq	%r15,32(%rsp)
+	# rbx_stack = rbx
+	movq	%rbx,40(%rsp)
+	# rbp_stack = rbp
+	movq	%rbp,48(%rsp)
+	# in0 = *(uint64 *) (x + 0)
+	movq	0(%r8),%rcx
+	# in2 = *(uint64 *) (x + 8)
+	movq	8(%r8),%r9
+	# in4 = *(uint64 *) (x + 16)
+	movq	16(%r8),%rax
+	# in6 = *(uint64 *) (x + 24)
+	movq	24(%r8),%r10
+	# in8 = *(uint64 *) (x + 32)
+	movq	32(%r8),%r11
+	# in10 = *(uint64 *) (x + 40)
+	movq	40(%r8),%r12
+	# in12 = *(uint64 *) (x + 48)
+	movq	48(%r8),%r13
+	# in14 = *(uint64 *) (x + 56)
+	movq	56(%r8),%r14
+	# j0 = in0
+	movq	%rcx,56(%rsp)
+	# j2 = in2
+	movq	%r9,64(%rsp)
+	# j4 = in4
+	movq	%rax,72(%rsp)
+	# j6 = in6
+	movq	%r10,80(%rsp)
+	# j8 = in8
+	movq	%r11,88(%rsp)
+	# j10 = in10
+	movq	%r12,96(%rsp)
+	# j12 = in12
+	movq	%r13,104(%rsp)
+	# j14 = in14
+	movq	%r14,112(%rsp)
+	# x_backup = x
+	movq	%r8,120(%rsp)
+# bytesatleast1:
+._bytesatleast1:
+	#                   unsigned<? bytes - 64
+	cmp	$64,%rdx
+	# comment:fp stack unchanged by jump
+	#   goto nocopy if !unsigned<
+	jae	._nocopy
+	#     ctarget = out
+	movq	%rdi,128(%rsp)
+	#     out = &tmp
+	leaq	192(%rsp),%rdi
+	#     i = bytes
+	mov	%rdx,%rcx
+	#     while (i) { *out++ = *m++; --i }
+	rep	movsb
+	#     out = &tmp
+	leaq	192(%rsp),%rdi
+	#     m = &tmp
+	leaq	192(%rsp),%rsi
+	# comment:fp stack unchanged by fallthrough
+#   nocopy:
+._nocopy:
+	#   out_backup = out
+	movq	%rdi,136(%rsp)
+	#   m_backup = m
+	movq	%rsi,144(%rsp)
+	#   bytes_backup = bytes
+	movq	%rdx,152(%rsp)
+	#   x1 = j0
+	movq	56(%rsp),%rdi
+	#   x0 = x1
+	mov	%rdi,%rdx
+	#   (uint64) x1 >>= 32
+	shr	$32,%rdi
+	#   		x3 = j2
+	movq	64(%rsp),%rsi
+	#   		x2 = x3
+	mov	%rsi,%rcx
+	#   		(uint64) x3 >>= 32
+	shr	$32,%rsi
+	#   x5 = j4
+	movq	72(%rsp),%r8
+	#   x4 = x5
+	mov	%r8,%r9
+	#   (uint64) x5 >>= 32
+	shr	$32,%r8
+	#   x5_stack = x5
+	movq	%r8,160(%rsp)
+	#   		x7 = j6
+	movq	80(%rsp),%r8
+	#   		x6 = x7
+	mov	%r8,%rax
+	#   		(uint64) x7 >>= 32
+	shr	$32,%r8
+	#   x9 = j8
+	movq	88(%rsp),%r10
+	#   x8 = x9
+	mov	%r10,%r11
+	#   (uint64) x9 >>= 32
+	shr	$32,%r10
+	#   		x11 = j10
+	movq	96(%rsp),%r12
+	#   		x10 = x11
+	mov	%r12,%r13
+	#   		x10_stack = x10
+	movq	%r13,168(%rsp)
+	#   		(uint64) x11 >>= 32
+	shr	$32,%r12
+	#   x13 = j12
+	movq	104(%rsp),%r13
+	#   x12 = x13
+	mov	%r13,%r14
+	#   (uint64) x13 >>= 32
+	shr	$32,%r13
+	#   		x15 = j14
+	movq	112(%rsp),%r15
+	#   		x14 = x15
+	mov	%r15,%rbx
+	#   		(uint64) x15 >>= 32
+	shr	$32,%r15
+	#   		x15_stack = x15
+	movq	%r15,176(%rsp)
+	#   i = 20
+	mov	$20,%r15
+#   mainloop:
+._mainloop:
+	#   i_backup = i
+	movq	%r15,184(%rsp)
+	# 		x5 = x5_stack
+	movq	160(%rsp),%r15
+	# a = x12 + x0
+	lea	(%r14,%rdx),%rbp
+	# (uint32) a <<<= 7
+	rol	$7,%ebp
+	# x4 ^= a
+	xor	%rbp,%r9
+	# 		b = x1 + x5
+	lea	(%rdi,%r15),%rbp
+	# 		(uint32) b <<<= 7
+	rol	$7,%ebp
+	# 		x9 ^= b
+	xor	%rbp,%r10
+	# a = x0 + x4
+	lea	(%rdx,%r9),%rbp
+	# (uint32) a <<<= 9
+	rol	$9,%ebp
+	# x8 ^= a
+	xor	%rbp,%r11
+	# 		b = x5 + x9
+	lea	(%r15,%r10),%rbp
+	# 		(uint32) b <<<= 9
+	rol	$9,%ebp
+	# 		x13 ^= b
+	xor	%rbp,%r13
+	# a = x4 + x8
+	lea	(%r9,%r11),%rbp
+	# (uint32) a <<<= 13
+	rol	$13,%ebp
+	# x12 ^= a
+	xor	%rbp,%r14
+	# 		b = x9 + x13
+	lea	(%r10,%r13),%rbp
+	# 		(uint32) b <<<= 13
+	rol	$13,%ebp
+	# 		x1 ^= b
+	xor	%rbp,%rdi
+	# a = x8 + x12
+	lea	(%r11,%r14),%rbp
+	# (uint32) a <<<= 18
+	rol	$18,%ebp
+	# x0 ^= a
+	xor	%rbp,%rdx
+	# 		b = x13 + x1
+	lea	(%r13,%rdi),%rbp
+	# 		(uint32) b <<<= 18
+	rol	$18,%ebp
+	# 		x5 ^= b
+	xor	%rbp,%r15
+	# 				x10 = x10_stack
+	movq	168(%rsp),%rbp
+	# 		x5_stack = x5
+	movq	%r15,160(%rsp)
+	# 				c = x6 + x10
+	lea	(%rax,%rbp),%r15
+	# 				(uint32) c <<<= 7
+	rol	$7,%r15d
+	# 				x14 ^= c
+	xor	%r15,%rbx
+	# 				c = x10 + x14
+	lea	(%rbp,%rbx),%r15
+	# 				(uint32) c <<<= 9
+	rol	$9,%r15d
+	# 				x2 ^= c
+	xor	%r15,%rcx
+	# 				c = x14 + x2
+	lea	(%rbx,%rcx),%r15
+	# 				(uint32) c <<<= 13
+	rol	$13,%r15d
+	# 				x6 ^= c
+	xor	%r15,%rax
+	# 				c = x2 + x6
+	lea	(%rcx,%rax),%r15
+	# 				(uint32) c <<<= 18
+	rol	$18,%r15d
+	# 				x10 ^= c
+	xor	%r15,%rbp
+	# 						x15 = x15_stack
+	movq	176(%rsp),%r15
+	# 				x10_stack = x10
+	movq	%rbp,168(%rsp)
+	# 						d = x11 + x15
+	lea	(%r12,%r15),%rbp
+	# 						(uint32) d <<<= 7
+	rol	$7,%ebp
+	# 						x3 ^= d
+	xor	%rbp,%rsi
+	# 						d = x15 + x3
+	lea	(%r15,%rsi),%rbp
+	# 						(uint32) d <<<= 9
+	rol	$9,%ebp
+	# 						x7 ^= d
+	xor	%rbp,%r8
+	# 						d = x3 + x7
+	lea	(%rsi,%r8),%rbp
+	# 						(uint32) d <<<= 13
+	rol	$13,%ebp
+	# 						x11 ^= d
+	xor	%rbp,%r12
+	# 						d = x7 + x11
+	lea	(%r8,%r12),%rbp
+	# 						(uint32) d <<<= 18
+	rol	$18,%ebp
+	# 						x15 ^= d
+	xor	%rbp,%r15
+	# 						x15_stack = x15
+	movq	%r15,176(%rsp)
+	# 		x5 = x5_stack
+	movq	160(%rsp),%r15
+	# a = x3 + x0
+	lea	(%rsi,%rdx),%rbp
+	# (uint32) a <<<= 7
+	rol	$7,%ebp
+	# x1 ^= a
+	xor	%rbp,%rdi
+	# 		b = x4 + x5
+	lea	(%r9,%r15),%rbp
+	# 		(uint32) b <<<= 7
+	rol	$7,%ebp
+	# 		x6 ^= b
+	xor	%rbp,%rax
+	# a = x0 + x1
+	lea	(%rdx,%rdi),%rbp
+	# (uint32) a <<<= 9
+	rol	$9,%ebp
+	# x2 ^= a
+	xor	%rbp,%rcx
+	# 		b = x5 + x6
+	lea	(%r15,%rax),%rbp
+	# 		(uint32) b <<<= 9
+	rol	$9,%ebp
+	# 		x7 ^= b
+	xor	%rbp,%r8
+	# a = x1 + x2
+	lea	(%rdi,%rcx),%rbp
+	# (uint32) a <<<= 13
+	rol	$13,%ebp
+	# x3 ^= a
+	xor	%rbp,%rsi
+	# 		b = x6 + x7
+	lea	(%rax,%r8),%rbp
+	# 		(uint32) b <<<= 13
+	rol	$13,%ebp
+	# 		x4 ^= b
+	xor	%rbp,%r9
+	# a = x2 + x3
+	lea	(%rcx,%rsi),%rbp
+	# (uint32) a <<<= 18
+	rol	$18,%ebp
+	# x0 ^= a
+	xor	%rbp,%rdx
+	# 		b = x7 + x4
+	lea	(%r8,%r9),%rbp
+	# 		(uint32) b <<<= 18
+	rol	$18,%ebp
+	# 		x5 ^= b
+	xor	%rbp,%r15
+	# 				x10 = x10_stack
+	movq	168(%rsp),%rbp
+	# 		x5_stack = x5
+	movq	%r15,160(%rsp)
+	# 				c = x9 + x10
+	lea	(%r10,%rbp),%r15
+	# 				(uint32) c <<<= 7
+	rol	$7,%r15d
+	# 				x11 ^= c
+	xor	%r15,%r12
+	# 				c = x10 + x11
+	lea	(%rbp,%r12),%r15
+	# 				(uint32) c <<<= 9
+	rol	$9,%r15d
+	# 				x8 ^= c
+	xor	%r15,%r11
+	# 				c = x11 + x8
+	lea	(%r12,%r11),%r15
+	# 				(uint32) c <<<= 13
+	rol	$13,%r15d
+	# 				x9 ^= c
+	xor	%r15,%r10
+	# 				c = x8 + x9
+	lea	(%r11,%r10),%r15
+	# 				(uint32) c <<<= 18
+	rol	$18,%r15d
+	# 				x10 ^= c
+	xor	%r15,%rbp
+	# 						x15 = x15_stack
+	movq	176(%rsp),%r15
+	# 				x10_stack = x10
+	movq	%rbp,168(%rsp)
+	# 						d = x14 + x15
+	lea	(%rbx,%r15),%rbp
+	# 						(uint32) d <<<= 7
+	rol	$7,%ebp
+	# 						x12 ^= d
+	xor	%rbp,%r14
+	# 						d = x15 + x12
+	lea	(%r15,%r14),%rbp
+	# 						(uint32) d <<<= 9
+	rol	$9,%ebp
+	# 						x13 ^= d
+	xor	%rbp,%r13
+	# 						d = x12 + x13
+	lea	(%r14,%r13),%rbp
+	# 						(uint32) d <<<= 13
+	rol	$13,%ebp
+	# 						x14 ^= d
+	xor	%rbp,%rbx
+	# 						d = x13 + x14
+	lea	(%r13,%rbx),%rbp
+	# 						(uint32) d <<<= 18
+	rol	$18,%ebp
+	# 						x15 ^= d
+	xor	%rbp,%r15
+	# 						x15_stack = x15
+	movq	%r15,176(%rsp)
+	# 		x5 = x5_stack
+	movq	160(%rsp),%r15
+	# a = x12 + x0
+	lea	(%r14,%rdx),%rbp
+	# (uint32) a <<<= 7
+	rol	$7,%ebp
+	# x4 ^= a
+	xor	%rbp,%r9
+	# 		b = x1 + x5
+	lea	(%rdi,%r15),%rbp
+	# 		(uint32) b <<<= 7
+	rol	$7,%ebp
+	# 		x9 ^= b
+	xor	%rbp,%r10
+	# a = x0 + x4
+	lea	(%rdx,%r9),%rbp
+	# (uint32) a <<<= 9
+	rol	$9,%ebp
+	# x8 ^= a
+	xor	%rbp,%r11
+	# 		b = x5 + x9
+	lea	(%r15,%r10),%rbp
+	# 		(uint32) b <<<= 9
+	rol	$9,%ebp
+	# 		x13 ^= b
+	xor	%rbp,%r13
+	# a = x4 + x8
+	lea	(%r9,%r11),%rbp
+	# (uint32) a <<<= 13
+	rol	$13,%ebp
+	# x12 ^= a
+	xor	%rbp,%r14
+	# 		b = x9 + x13
+	lea	(%r10,%r13),%rbp
+	# 		(uint32) b <<<= 13
+	rol	$13,%ebp
+	# 		x1 ^= b
+	xor	%rbp,%rdi
+	# a = x8 + x12
+	lea	(%r11,%r14),%rbp
+	# (uint32) a <<<= 18
+	rol	$18,%ebp
+	# x0 ^= a
+	xor	%rbp,%rdx
+	# 		b = x13 + x1
+	lea	(%r13,%rdi),%rbp
+	# 		(uint32) b <<<= 18
+	rol	$18,%ebp
+	# 		x5 ^= b
+	xor	%rbp,%r15
+	# 				x10 = x10_stack
+	movq	168(%rsp),%rbp
+	# 		x5_stack = x5
+	movq	%r15,160(%rsp)
+	# 				c = x6 + x10
+	lea	(%rax,%rbp),%r15
+	# 				(uint32) c <<<= 7
+	rol	$7,%r15d
+	# 				x14 ^= c
+	xor	%r15,%rbx
+	# 				c = x10 + x14
+	lea	(%rbp,%rbx),%r15
+	# 				(uint32) c <<<= 9
+	rol	$9,%r15d
+	# 				x2 ^= c
+	xor	%r15,%rcx
+	# 				c = x14 + x2
+	lea	(%rbx,%rcx),%r15
+	# 				(uint32) c <<<= 13
+	rol	$13,%r15d
+	# 				x6 ^= c
+	xor	%r15,%rax
+	# 				c = x2 + x6
+	lea	(%rcx,%rax),%r15
+	# 				(uint32) c <<<= 18
+	rol	$18,%r15d
+	# 				x10 ^= c
+	xor	%r15,%rbp
+	# 						x15 = x15_stack
+	movq	176(%rsp),%r15
+	# 				x10_stack = x10
+	movq	%rbp,168(%rsp)
+	# 						d = x11 + x15
+	lea	(%r12,%r15),%rbp
+	# 						(uint32) d <<<= 7
+	rol	$7,%ebp
+	# 						x3 ^= d
+	xor	%rbp,%rsi
+	# 						d = x15 + x3
+	lea	(%r15,%rsi),%rbp
+	# 						(uint32) d <<<= 9
+	rol	$9,%ebp
+	# 						x7 ^= d
+	xor	%rbp,%r8
+	# 						d = x3 + x7
+	lea	(%rsi,%r8),%rbp
+	# 						(uint32) d <<<= 13
+	rol	$13,%ebp
+	# 						x11 ^= d
+	xor	%rbp,%r12
+	# 						d = x7 + x11
+	lea	(%r8,%r12),%rbp
+	# 						(uint32) d <<<= 18
+	rol	$18,%ebp
+	# 						x15 ^= d
+	xor	%rbp,%r15
+	# 						x15_stack = x15
+	movq	%r15,176(%rsp)
+	# 		x5 = x5_stack
+	movq	160(%rsp),%r15
+	# a = x3 + x0
+	lea	(%rsi,%rdx),%rbp
+	# (uint32) a <<<= 7
+	rol	$7,%ebp
+	# x1 ^= a
+	xor	%rbp,%rdi
+	# 		b = x4 + x5
+	lea	(%r9,%r15),%rbp
+	# 		(uint32) b <<<= 7
+	rol	$7,%ebp
+	# 		x6 ^= b
+	xor	%rbp,%rax
+	# a = x0 + x1
+	lea	(%rdx,%rdi),%rbp
+	# (uint32) a <<<= 9
+	rol	$9,%ebp
+	# x2 ^= a
+	xor	%rbp,%rcx
+	# 		b = x5 + x6
+	lea	(%r15,%rax),%rbp
+	# 		(uint32) b <<<= 9
+	rol	$9,%ebp
+	# 		x7 ^= b
+	xor	%rbp,%r8
+	# a = x1 + x2
+	lea	(%rdi,%rcx),%rbp
+	# (uint32) a <<<= 13
+	rol	$13,%ebp
+	# x3 ^= a
+	xor	%rbp,%rsi
+	# 		b = x6 + x7
+	lea	(%rax,%r8),%rbp
+	# 		(uint32) b <<<= 13
+	rol	$13,%ebp
+	# 		x4 ^= b
+	xor	%rbp,%r9
+	# a = x2 + x3
+	lea	(%rcx,%rsi),%rbp
+	# (uint32) a <<<= 18
+	rol	$18,%ebp
+	# x0 ^= a
+	xor	%rbp,%rdx
+	# 		b = x7 + x4
+	lea	(%r8,%r9),%rbp
+	# 		(uint32) b <<<= 18
+	rol	$18,%ebp
+	# 		x5 ^= b
+	xor	%rbp,%r15
+	# 				x10 = x10_stack
+	movq	168(%rsp),%rbp
+	# 		x5_stack = x5
+	movq	%r15,160(%rsp)
+	# 				c = x9 + x10
+	lea	(%r10,%rbp),%r15
+	# 				(uint32) c <<<= 7
+	rol	$7,%r15d
+	# 				x11 ^= c
+	xor	%r15,%r12
+	# 				c = x10 + x11
+	lea	(%rbp,%r12),%r15
+	# 				(uint32) c <<<= 9
+	rol	$9,%r15d
+	# 				x8 ^= c
+	xor	%r15,%r11
+	# 				c = x11 + x8
+	lea	(%r12,%r11),%r15
+	# 				(uint32) c <<<= 13
+	rol	$13,%r15d
+	# 				x9 ^= c
+	xor	%r15,%r10
+	# 				c = x8 + x9
+	lea	(%r11,%r10),%r15
+	# 				(uint32) c <<<= 18
+	rol	$18,%r15d
+	# 				x10 ^= c
+	xor	%r15,%rbp
+	# 						x15 = x15_stack
+	movq	176(%rsp),%r15
+	# 				x10_stack = x10
+	movq	%rbp,168(%rsp)
+	# 						d = x14 + x15
+	lea	(%rbx,%r15),%rbp
+	# 						(uint32) d <<<= 7
+	rol	$7,%ebp
+	# 						x12 ^= d
+	xor	%rbp,%r14
+	# 						d = x15 + x12
+	lea	(%r15,%r14),%rbp
+	# 						(uint32) d <<<= 9
+	rol	$9,%ebp
+	# 						x13 ^= d
+	xor	%rbp,%r13
+	# 						d = x12 + x13
+	lea	(%r14,%r13),%rbp
+	# 						(uint32) d <<<= 13
+	rol	$13,%ebp
+	# 						x14 ^= d
+	xor	%rbp,%rbx
+	# 						d = x13 + x14
+	lea	(%r13,%rbx),%rbp
+	# 						(uint32) d <<<= 18
+	rol	$18,%ebp
+	# 						x15 ^= d
+	xor	%rbp,%r15
+	# 						x15_stack = x15
+	movq	%r15,176(%rsp)
+	#   i = i_backup
+	movq	184(%rsp),%r15
+	#                  unsigned>? i -= 4
+	sub	$4,%r15
+	# comment:fp stack unchanged by jump
+	# goto mainloop if unsigned>
+	ja	._mainloop
+	#   (uint32) x2 += j2
+	addl	64(%rsp),%ecx
+	#   x3 <<= 32
+	shl	$32,%rsi
+	#   x3 += j2
+	addq	64(%rsp),%rsi
+	#   (uint64) x3 >>= 32
+	shr	$32,%rsi
+	#   x3 <<= 32
+	shl	$32,%rsi
+	#   x2 += x3
+	add	%rsi,%rcx
+	#   (uint32) x6 += j6
+	addl	80(%rsp),%eax
+	#   x7 <<= 32
+	shl	$32,%r8
+	#   x7 += j6
+	addq	80(%rsp),%r8
+	#   (uint64) x7 >>= 32
+	shr	$32,%r8
+	#   x7 <<= 32
+	shl	$32,%r8
+	#   x6 += x7
+	add	%r8,%rax
+	#   (uint32) x8 += j8
+	addl	88(%rsp),%r11d
+	#   x9 <<= 32
+	shl	$32,%r10
+	#   x9 += j8
+	addq	88(%rsp),%r10
+	#   (uint64) x9 >>= 32
+	shr	$32,%r10
+	#   x9 <<= 32
+	shl	$32,%r10
+	#   x8 += x9
+	add	%r10,%r11
+	#   (uint32) x12 += j12
+	addl	104(%rsp),%r14d
+	#   x13 <<= 32
+	shl	$32,%r13
+	#   x13 += j12
+	addq	104(%rsp),%r13
+	#   (uint64) x13 >>= 32
+	shr	$32,%r13
+	#   x13 <<= 32
+	shl	$32,%r13
+	#   x12 += x13
+	add	%r13,%r14
+	#   (uint32) x0 += j0
+	addl	56(%rsp),%edx
+	#   x1 <<= 32
+	shl	$32,%rdi
+	#   x1 += j0
+	addq	56(%rsp),%rdi
+	#   (uint64) x1 >>= 32
+	shr	$32,%rdi
+	#   x1 <<= 32
+	shl	$32,%rdi
+	#   x0 += x1
+	add	%rdi,%rdx
+	#   x5 = x5_stack
+	movq	160(%rsp),%rdi
+	#   (uint32) x4 += j4
+	addl	72(%rsp),%r9d
+	#   x5 <<= 32
+	shl	$32,%rdi
+	#   x5 += j4
+	addq	72(%rsp),%rdi
+	#   (uint64) x5 >>= 32
+	shr	$32,%rdi
+	#   x5 <<= 32
+	shl	$32,%rdi
+	#   x4 += x5
+	add	%rdi,%r9
+	#   x10 = x10_stack
+	movq	168(%rsp),%r8
+	#   (uint32) x10 += j10
+	addl	96(%rsp),%r8d
+	#   x11 <<= 32
+	shl	$32,%r12
+	#   x11 += j10
+	addq	96(%rsp),%r12
+	#   (uint64) x11 >>= 32
+	shr	$32,%r12
+	#   x11 <<= 32
+	shl	$32,%r12
+	#   x10 += x11
+	add	%r12,%r8
+	#   x15 = x15_stack
+	movq	176(%rsp),%rdi
+	#   (uint32) x14 += j14
+	addl	112(%rsp),%ebx
+	#   x15 <<= 32
+	shl	$32,%rdi
+	#   x15 += j14
+	addq	112(%rsp),%rdi
+	#   (uint64) x15 >>= 32
+	shr	$32,%rdi
+	#   x15 <<= 32
+	shl	$32,%rdi
+	#   x14 += x15
+	add	%rdi,%rbx
+	#   out = out_backup
+	movq	136(%rsp),%rdi
+	#   m = m_backup
+	movq	144(%rsp),%rsi
+	#   x0 ^= *(uint64 *) (m + 0)
+	xorq	0(%rsi),%rdx
+	#   *(uint64 *) (out + 0) = x0
+	movq	%rdx,0(%rdi)
+	#   x2 ^= *(uint64 *) (m + 8)
+	xorq	8(%rsi),%rcx
+	#   *(uint64 *) (out + 8) = x2
+	movq	%rcx,8(%rdi)
+	#   x4 ^= *(uint64 *) (m + 16)
+	xorq	16(%rsi),%r9
+	#   *(uint64 *) (out + 16) = x4
+	movq	%r9,16(%rdi)
+	#   x6 ^= *(uint64 *) (m + 24)
+	xorq	24(%rsi),%rax
+	#   *(uint64 *) (out + 24) = x6
+	movq	%rax,24(%rdi)
+	#   x8 ^= *(uint64 *) (m + 32)
+	xorq	32(%rsi),%r11
+	#   *(uint64 *) (out + 32) = x8
+	movq	%r11,32(%rdi)
+	#   x10 ^= *(uint64 *) (m + 40)
+	xorq	40(%rsi),%r8
+	#   *(uint64 *) (out + 40) = x10
+	movq	%r8,40(%rdi)
+	#   x12 ^= *(uint64 *) (m + 48)
+	xorq	48(%rsi),%r14
+	#   *(uint64 *) (out + 48) = x12
+	movq	%r14,48(%rdi)
+	#   x14 ^= *(uint64 *) (m + 56)
+	xorq	56(%rsi),%rbx
+	#   *(uint64 *) (out + 56) = x14
+	movq	%rbx,56(%rdi)
+	#   bytes = bytes_backup
+	movq	152(%rsp),%rdx
+	#   in8 = j8
+	movq	88(%rsp),%rcx
+	#   in8 += 1
+	add	$1,%rcx
+	#   j8 = in8
+	movq	%rcx,88(%rsp)
+	#                          unsigned>? unsigned<? bytes - 64
+	cmp	$64,%rdx
+	# comment:fp stack unchanged by jump
+	#   goto bytesatleast65 if unsigned>
+	ja	._bytesatleast65
+	# comment:fp stack unchanged by jump
+	#     goto bytesatleast64 if !unsigned<
+	jae	._bytesatleast64
+	#       m = out
+	mov	%rdi,%rsi
+	#       out = ctarget
+	movq	128(%rsp),%rdi
+	#       i = bytes
+	mov	%rdx,%rcx
+	#       while (i) { *out++ = *m++; --i }
+	rep	movsb
+	# comment:fp stack unchanged by fallthrough
+#     bytesatleast64:
+._bytesatleast64:
+	#     x = x_backup
+	movq	120(%rsp),%rdi
+	#     in8 = j8
+	movq	88(%rsp),%rsi
+	#     *(uint64 *) (x + 32) = in8
+	movq	%rsi,32(%rdi)
+	#     r11 = r11_stack
+	movq	0(%rsp),%r11
+	#     r12 = r12_stack
+	movq	8(%rsp),%r12
+	#     r13 = r13_stack
+	movq	16(%rsp),%r13
+	#     r14 = r14_stack
+	movq	24(%rsp),%r14
+	#     r15 = r15_stack
+	movq	32(%rsp),%r15
+	#     rbx = rbx_stack
+	movq	40(%rsp),%rbx
+	#     rbp = rbp_stack
+	movq	48(%rsp),%rbp
+	# comment:fp stack unchanged by fallthrough
+#     done:
+._done:
+	#     leave
+	add	%r11,%rsp
+	mov	%rdi,%rax
+	mov	%rsi,%rdx
+	ret
+#   bytesatleast65:
+._bytesatleast65:
+	#   bytes -= 64
+	sub	$64,%rdx
+	#   out += 64
+	add	$64,%rdi
+	#   m += 64
+	add	$64,%rsi
+	# comment:fp stack unchanged by jump
+	# goto bytesatleast1
+	jmp	._bytesatleast1
+# enter ECRYPT_keysetup
+.text
+.p2align 5
+.globl ECRYPT_keysetup
+ECRYPT_keysetup:
+	mov	%rsp,%r11
+	and	$31,%r11
+	add	$256,%r11
+	sub	%r11,%rsp
+	#   k = arg2
+	mov	%rsi,%rsi
+	#   kbits = arg3
+	mov	%rdx,%rdx
+	#   x = arg1
+	mov	%rdi,%rdi
+	#   in0 = *(uint64 *) (k + 0)
+	movq	0(%rsi),%r8
+	#   in2 = *(uint64 *) (k + 8)
+	movq	8(%rsi),%r9
+	#   *(uint64 *) (x + 4) = in0
+	movq	%r8,4(%rdi)
+	#   *(uint64 *) (x + 12) = in2
+	movq	%r9,12(%rdi)
+	#                    unsigned<? kbits - 256
+	cmp	$256,%rdx
+	# comment:fp stack unchanged by jump
+	#   goto kbits128 if unsigned<
+	jb	._kbits128
+#   kbits256:
+._kbits256:
+	#     in10 = *(uint64 *) (k + 16)
+	movq	16(%rsi),%rdx
+	#     in12 = *(uint64 *) (k + 24)
+	movq	24(%rsi),%rsi
+	#     *(uint64 *) (x + 44) = in10
+	movq	%rdx,44(%rdi)
+	#     *(uint64 *) (x + 52) = in12
+	movq	%rsi,52(%rdi)
+	#     in0 = 1634760805
+	mov	$1634760805,%rsi
+	#     in4 = 857760878
+	mov	$857760878,%rdx
+	#     in10 = 2036477234
+	mov	$2036477234,%rcx
+	#     in14 = 1797285236
+	mov	$1797285236,%r8
+	#     *(uint32 *) (x + 0) = in0
+	movl	%esi,0(%rdi)
+	#     *(uint32 *) (x + 20) = in4
+	movl	%edx,20(%rdi)
+	#     *(uint32 *) (x + 40) = in10
+	movl	%ecx,40(%rdi)
+	#     *(uint32 *) (x + 60) = in14
+	movl	%r8d,60(%rdi)
+	# comment:fp stack unchanged by jump
+	#   goto keysetupdone
+	jmp	._keysetupdone
+#   kbits128:
+._kbits128:
+	#     in10 = *(uint64 *) (k + 0)
+	movq	0(%rsi),%rdx
+	#     in12 = *(uint64 *) (k + 8)
+	movq	8(%rsi),%rsi
+	#     *(uint64 *) (x + 44) = in10
+	movq	%rdx,44(%rdi)
+	#     *(uint64 *) (x + 52) = in12
+	movq	%rsi,52(%rdi)
+	#     in0 = 1634760805
+	mov	$1634760805,%rsi
+	#     in4 = 824206446
+	mov	$824206446,%rdx
+	#     in10 = 2036477238
+	mov	$2036477238,%rcx
+	#     in14 = 1797285236
+	mov	$1797285236,%r8
+	#     *(uint32 *) (x + 0) = in0
+	movl	%esi,0(%rdi)
+	#     *(uint32 *) (x + 20) = in4
+	movl	%edx,20(%rdi)
+	#     *(uint32 *) (x + 40) = in10
+	movl	%ecx,40(%rdi)
+	#     *(uint32 *) (x + 60) = in14
+	movl	%r8d,60(%rdi)
+#   keysetupdone:
+._keysetupdone:
+	# leave
+	add	%r11,%rsp
+	mov	%rdi,%rax
+	mov	%rsi,%rdx
+	ret
+# enter ECRYPT_ivsetup
+.text
+.p2align 5
+.globl ECRYPT_ivsetup
+ECRYPT_ivsetup:
+	mov	%rsp,%r11
+	and	$31,%r11
+	add	$256,%r11
+	sub	%r11,%rsp
+	#   iv = arg2
+	mov	%rsi,%rsi
+	#   x = arg1
+	mov	%rdi,%rdi
+	#   in6 = *(uint64 *) (iv + 0)
+	movq	0(%rsi),%rsi
+	#   in8 = 0
+	mov	$0,%r8
+	#   *(uint64 *) (x + 24) = in6
+	movq	%rsi,24(%rdi)
+	#   *(uint64 *) (x + 32) = in8
+	movq	%r8,32(%rdi)
+	# leave
+	add	%r11,%rsp
+	mov	%rdi,%rax
+	mov	%rsi,%rdx
+	ret
--- a/arch/x86/crypto/salsa20_glue.c
+++ b/arch/x86/crypto/salsa20_glue.c
+/*
+ * Glue code for optimized assembly version of  Salsa20.
+ *
+ * Copyright (c) 2007 Tan Swee Heng <thesweeheng@gmail.com>
+ *
+ * The assembly codes are public domain assembly codes written by Daniel. J.
+ * Bernstein <djb@cr.yp.to>. The codes are modified to include indentation
+ * and to remove extraneous comments and functions that are not needed.
+ * - i586 version, renamed as salsa20-i586-asm_32.S
+ *   available from <http://cr.yp.to/snuffle/salsa20/x86-pm/salsa20.s>
+ * - x86-64 version, renamed as salsa20-x86_64-asm_64.S
+ *   available from <http://cr.yp.to/snuffle/salsa20/amd64-3/salsa20.s>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ */
+
+#include <crypto/algapi.h>
+#include <linux/module.h>
+#include <linux/crypto.h>
+
+#define SALSA20_IV_SIZE        8U
+#define SALSA20_MIN_KEY_SIZE  16U
+#define SALSA20_MAX_KEY_SIZE  32U
+
+// use the ECRYPT_* function names
+#define salsa20_keysetup        ECRYPT_keysetup
+#define salsa20_ivsetup         ECRYPT_ivsetup
+#define salsa20_encrypt_bytes   ECRYPT_encrypt_bytes
+
+struct salsa20_ctx
+{
+	u32 input[16];
+};
+
+asmlinkage void salsa20_keysetup(struct salsa20_ctx *ctx, const u8 *k,
+				 u32 keysize, u32 ivsize);
+asmlinkage void salsa20_ivsetup(struct salsa20_ctx *ctx, const u8 *iv);
+asmlinkage void salsa20_encrypt_bytes(struct salsa20_ctx *ctx,
+				      const u8 *src, u8 *dst, u32 bytes);
+
+static int setkey(struct crypto_tfm *tfm, const u8 *key,
+		  unsigned int keysize)
+{
+	struct salsa20_ctx *ctx = crypto_tfm_ctx(tfm);
+	salsa20_keysetup(ctx, key, keysize*8, SALSA20_IV_SIZE*8);
+	return 0;
+}
+
+static int encrypt(struct blkcipher_desc *desc,
+		   struct scatterlist *dst, struct scatterlist *src,
+		   unsigned int nbytes)
+{
+	struct blkcipher_walk walk;
+	struct crypto_blkcipher *tfm = desc->tfm;
+	struct salsa20_ctx *ctx = crypto_blkcipher_ctx(tfm);
+	int err;
+
+	blkcipher_walk_init(&walk, dst, src, nbytes);
+	err = blkcipher_walk_virt_block(desc, &walk, 64);
+
+	salsa20_ivsetup(ctx, walk.iv);
+
+	if (likely(walk.nbytes == nbytes))
+	{
+		salsa20_encrypt_bytes(ctx, walk.src.virt.addr,
+				      walk.dst.virt.addr, nbytes);
+		return blkcipher_walk_done(desc, &walk, 0);
+	}
+
+	while (walk.nbytes >= 64) {
+		salsa20_encrypt_bytes(ctx, walk.src.virt.addr,
+				      walk.dst.virt.addr,
+				      walk.nbytes - (walk.nbytes % 64));
+		err = blkcipher_walk_done(desc, &walk, walk.nbytes % 64);
+	}
+
+	if (walk.nbytes) {
+		salsa20_encrypt_bytes(ctx, walk.src.virt.addr,
+				      walk.dst.virt.addr, walk.nbytes);
+		err = blkcipher_walk_done(desc, &walk, 0);
+	}
+
+	return err;
+}
+
+static struct crypto_alg alg = {
+	.cra_name           =   "salsa20",
+	.cra_driver_name    =   "salsa20-asm",
+	.cra_priority       =   200,
+	.cra_flags          =   CRYPTO_ALG_TYPE_BLKCIPHER,
+	.cra_type           =   &crypto_blkcipher_type,
+	.cra_blocksize      =   1,
+	.cra_ctxsize        =   sizeof(struct salsa20_ctx),
+	.cra_alignmask      =	3,
+	.cra_module         =   THIS_MODULE,
+	.cra_list           =   LIST_HEAD_INIT(alg.cra_list),
+	.cra_u              =   {
+		.blkcipher = {
+			.setkey         =   setkey,
+			.encrypt        =   encrypt,
+			.decrypt        =   encrypt,
+			.min_keysize    =   SALSA20_MIN_KEY_SIZE,
+			.max_keysize    =   SALSA20_MAX_KEY_SIZE,
+			.ivsize         =   SALSA20_IV_SIZE,
+		}
+	}
+};
+
+static int __init init(void)
+{
+	return crypto_register_alg(&alg);
+}
+
+static void __exit fini(void)
+{
+	crypto_unregister_alg(&alg);
+}
+
+module_init(init);
+module_exit(fini);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION ("Salsa20 stream cipher algorithm (optimized assembly version)");
+MODULE_ALIAS("salsa20");
+MODULE_ALIAS("salsa20-asm");
--- a/arch/x86/crypto/twofish_64.c
+++ b/arch/x86/crypto/twofish_64.c
-/*
- * Glue Code for optimized x86_64 assembler version of TWOFISH
- *
- * Originally Twofish for GPG
- * By Matthew Skala <mskala@ansuz.sooke.bc.ca>, July 26, 1998
- * 256-bit key length added March 20, 1999
- * Some modifications to reduce the text size by Werner Koch, April, 1998
- * Ported to the kerneli patch by Marc Mutz <Marc@Mutz.com>
- * Ported to CryptoAPI by Colin Slater <hoho@tacomeat.net>
- *
- * The original author has disclaimed all copyright interest in this
- * code and thus put it in the public domain. The subsequent authors
- * have put this under the GNU General Public License.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307
- * USA
- *
- * This code is a "clean room" implementation, written from the paper
- * _Twofish: A 128-Bit Block Cipher_ by Bruce Schneier, John Kelsey,
- * Doug Whiting, David Wagner, Chris Hall, and Niels Ferguson, available
- * through http://www.counterpane.com/twofish.html
- *
- * For background information on multiplication in finite fields, used for
- * the matrix operations in the key schedule, see the book _Contemporary
- * Abstract Algebra_ by Joseph A. Gallian, especially chapter 22 in the
- * Third Edition.
- */
-
-#include <crypto/twofish.h>
-#include <linux/crypto.h>
-#include <linux/init.h>
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/types.h>
-
-asmlinkage void twofish_enc_blk(struct crypto_tfm *tfm, u8 *dst, const u8 *src);
-asmlinkage void twofish_dec_blk(struct crypto_tfm *tfm, u8 *dst, const u8 *src);
-
-static void twofish_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
-{
-	twofish_enc_blk(tfm, dst, src);
-}
-
-static void twofish_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
-{
-	twofish_dec_blk(tfm, dst, src);
-}
-
-static struct crypto_alg alg = {
-	.cra_name		=	"twofish",
-	.cra_driver_name	=	"twofish-x86_64",
-	.cra_priority		=	200,
-	.cra_flags		=	CRYPTO_ALG_TYPE_CIPHER,
-	.cra_blocksize		=	TF_BLOCK_SIZE,
-	.cra_ctxsize		=	sizeof(struct twofish_ctx),
-	.cra_alignmask		=	3,
-	.cra_module		=	THIS_MODULE,
-	.cra_list		=	LIST_HEAD_INIT(alg.cra_list),
-	.cra_u			=	{
-		.cipher = {
-			.cia_min_keysize	=	TF_MIN_KEY_SIZE,
-			.cia_max_keysize	=	TF_MAX_KEY_SIZE,
-			.cia_setkey		=	twofish_setkey,
-			.cia_encrypt		=	twofish_encrypt,
-			.cia_decrypt		=	twofish_decrypt
-		}
-	}
-};
-
-static int __init init(void)
-{
-	return crypto_register_alg(&alg);
-}
-
-static void __exit fini(void)
-{
-	crypto_unregister_alg(&alg);
-}
-
-module_init(init);
-module_exit(fini);
-
-MODULE_LICENSE("GPL");
-MODULE_DESCRIPTION ("Twofish Cipher Algorithm, x86_64 asm optimized");
-MODULE_ALIAS("twofish");
--- a/arch/x86/crypto/twofish_32.c
+++ b/arch/x86/crypto/twofish_32.c
 /*
- *  Glue Code for optimized 586 assembler version of TWOFISH
+ * Glue Code for assembler optimized version of TWOFISH
 *
 * Originally Twofish for GPG
 * By Matthew Skala <mskala@ansuz.sooke.bc.ca>, July 26, 1998
@@ -44,7 +44,6 @@
 #include <linux/module.h>
 #include <linux/types.h>

-
 asmlinkage void twofish_enc_blk(struct crypto_tfm *tfm, u8 *dst, const u8 *src);
 asmlinkage void twofish_dec_blk(struct crypto_tfm *tfm, u8 *dst, const u8 *src);

@@ -60,7 +59,7 @@ static void twofish_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)

 static struct crypto_alg alg = {
 	.cra_name		=	"twofish",
-	.cra_driver_name	=	"twofish-i586",
+	.cra_driver_name	=	"twofish-asm",
 	.cra_priority		=	200,
 	.cra_flags		=	CRYPTO_ALG_TYPE_CIPHER,
 	.cra_blocksize		=	TF_BLOCK_SIZE,
@@ -93,5 +92,6 @@ module_init(init);
 module_exit(fini);

 MODULE_LICENSE("GPL");
-MODULE_DESCRIPTION ("Twofish Cipher Algorithm, i586 asm optimized");
+MODULE_DESCRIPTION ("Twofish Cipher Algorithm, asm optimized");
 MODULE_ALIAS("twofish");
+MODULE_ALIAS("twofish-asm");
--- a/crypto/Kconfig
+++ b/crypto/Kconfig
@@ -24,10 +24,6 @@ config CRYPTO_ALGAPI
 	help
 	  This option provides the API for cryptographic algorithms.

-config CRYPTO_ABLKCIPHER
-	tristate
-	select CRYPTO_BLKCIPHER
-
 config CRYPTO_AEAD
 	tristate
 	select CRYPTO_ALGAPI
@@ -36,6 +32,15 @@ config CRYPTO_BLKCIPHER
 	tristate
 	select CRYPTO_ALGAPI

+config CRYPTO_SEQIV
+	tristate "Sequence Number IV Generator"
+	select CRYPTO_AEAD
+	select CRYPTO_BLKCIPHER
+	help
+	  This IV generator generates an IV based on a sequence number by
+	  xoring it with a salt.  This algorithm is mainly useful for CTR
+	  and similar modes.
+
 config CRYPTO_HASH
 	tristate
 	select CRYPTO_ALGAPI
@@ -91,7 +96,7 @@ config CRYPTO_SHA1
 	  SHA-1 secure hash standard (FIPS 180-1/DFIPS 180-2).

 config CRYPTO_SHA256
-	tristate "SHA256 digest algorithm"
+	tristate "SHA224 and SHA256 digest algorithm"
 	select CRYPTO_ALGAPI
 	help
 	  SHA256 secure hash standard (DFIPS 180-2).
@@ -99,6 +104,9 @@ config CRYPTO_SHA256
 	  This version of SHA implements a 256 bit hash with 128 bits of
 	  security against collision attacks.

+          This code also includes SHA-224, a 224 bit hash with 112 bits
+          of security against collision attacks.
+
 config CRYPTO_SHA512
 	tristate "SHA384 and SHA512 digest algorithms"
 	select CRYPTO_ALGAPI
@@ -195,9 +203,34 @@ config CRYPTO_XTS
 	  key size 256, 384 or 512 bits. This implementation currently
 	  can't handle a sectorsize which is not a multiple of 16 bytes.

+config CRYPTO_CTR
+	tristate "CTR support"
+	select CRYPTO_BLKCIPHER
+	select CRYPTO_SEQIV
+	select CRYPTO_MANAGER
+	help
+	  CTR: Counter mode
+	  This block cipher algorithm is required for IPSec.
+
+config CRYPTO_GCM
+	tristate "GCM/GMAC support"
+	select CRYPTO_CTR
+	select CRYPTO_AEAD
+	select CRYPTO_GF128MUL
+	help
+	  Support for Galois/Counter Mode (GCM) and Galois Message
+	  Authentication Code (GMAC). Required for IPSec.
+
+config CRYPTO_CCM
+	tristate "CCM support"
+	select CRYPTO_CTR
+	select CRYPTO_AEAD
+	help
+	  Support for Counter with CBC MAC. Required for IPsec.
+
 config CRYPTO_CRYPTD
 	tristate "Software async crypto daemon"
-	select CRYPTO_ABLKCIPHER
+	select CRYPTO_BLKCIPHER
 	select CRYPTO_MANAGER
 	help
 	  This is a generic software asynchronous crypto daemon that
@@ -320,6 +353,7 @@ config CRYPTO_AES_586
 	tristate "AES cipher algorithms (i586)"
 	depends on (X86 || UML_X86) && !64BIT
 	select CRYPTO_ALGAPI
+	select CRYPTO_AES
 	help
 	  AES cipher algorithms (FIPS-197). AES uses the Rijndael 
 	  algorithm.
@@ -341,6 +375,7 @@ config CRYPTO_AES_X86_64
 	tristate "AES cipher algorithms (x86_64)"
 	depends on (X86 || UML_X86) && 64BIT
 	select CRYPTO_ALGAPI
+	select CRYPTO_AES
 	help
 	  AES cipher algorithms (FIPS-197). AES uses the Rijndael 
 	  algorithm.
@@ -441,6 +476,46 @@ config CRYPTO_SEED
 	  See also:
 	  <http://www.kisa.or.kr/kisa/seed/jsp/seed_eng.jsp>

+config CRYPTO_SALSA20
+	tristate "Salsa20 stream cipher algorithm (EXPERIMENTAL)"
+	depends on EXPERIMENTAL
+	select CRYPTO_BLKCIPHER
+	help
+	  Salsa20 stream cipher algorithm.
+
+	  Salsa20 is a stream cipher submitted to eSTREAM, the ECRYPT
+	  Stream Cipher Project. See <http://www.ecrypt.eu.org/stream/>
+
+	  The Salsa20 stream cipher algorithm is designed by Daniel J.
+	  Bernstein <djb@cr.yp.to>. See <http://cr.yp.to/snuffle.html>
+
+config CRYPTO_SALSA20_586
+	tristate "Salsa20 stream cipher algorithm (i586) (EXPERIMENTAL)"
+	depends on (X86 || UML_X86) && !64BIT
+	depends on EXPERIMENTAL
+	select CRYPTO_BLKCIPHER
+	help
+	  Salsa20 stream cipher algorithm.
+
+	  Salsa20 is a stream cipher submitted to eSTREAM, the ECRYPT
+	  Stream Cipher Project. See <http://www.ecrypt.eu.org/stream/>
+
+	  The Salsa20 stream cipher algorithm is designed by Daniel J.
+	  Bernstein <djb@cr.yp.to>. See <http://cr.yp.to/snuffle.html>
+
+config CRYPTO_SALSA20_X86_64
+	tristate "Salsa20 stream cipher algorithm (x86_64) (EXPERIMENTAL)"
+	depends on (X86 || UML_X86) && 64BIT
+	depends on EXPERIMENTAL
+	select CRYPTO_BLKCIPHER
+	help
+	  Salsa20 stream cipher algorithm.
+
+	  Salsa20 is a stream cipher submitted to eSTREAM, the ECRYPT
+	  Stream Cipher Project. See <http://www.ecrypt.eu.org/stream/>
+
+	  The Salsa20 stream cipher algorithm is designed by Daniel J.
+	  Bernstein <djb@cr.yp.to>. See <http://cr.yp.to/snuffle.html>

 config CRYPTO_DEFLATE
 	tristate "Deflate compression algorithm"
@@ -491,6 +566,7 @@ config CRYPTO_TEST
 	tristate "Testing module"
 	depends on m
 	select CRYPTO_ALGAPI
+	select CRYPTO_AEAD
 	help
 	  Quick & dirty crypto test module.

@@ -498,10 +574,19 @@ config CRYPTO_AUTHENC
 	tristate "Authenc support"
 	select CRYPTO_AEAD
 	select CRYPTO_MANAGER
+	select CRYPTO_HASH
 	help
 	  Authenc: Combined mode wrapper for IPsec.
 	  This is required for IPSec.

+config CRYPTO_LZO
+	tristate "LZO compression algorithm"
+	select CRYPTO_ALGAPI
+	select LZO_COMPRESS
+	select LZO_DECOMPRESS
+	help
+	  This is the LZO algorithm.
+
 source "drivers/crypto/Kconfig"

 endif	# if CRYPTO
--- a/crypto/Makefile
+++ b/crypto/Makefile
@@ -8,9 +8,14 @@ crypto_algapi-$(CONFIG_PROC_FS) += proc.o
 crypto_algapi-objs := algapi.o scatterwalk.o $(crypto_algapi-y)
 obj-$(CONFIG_CRYPTO_ALGAPI) += crypto_algapi.o

-obj-$(CONFIG_CRYPTO_ABLKCIPHER) += ablkcipher.o
 obj-$(CONFIG_CRYPTO_AEAD) += aead.o
-obj-$(CONFIG_CRYPTO_BLKCIPHER) += blkcipher.o
+
+crypto_blkcipher-objs := ablkcipher.o
+crypto_blkcipher-objs += blkcipher.o
+obj-$(CONFIG_CRYPTO_BLKCIPHER) += crypto_blkcipher.o
+obj-$(CONFIG_CRYPTO_BLKCIPHER) += chainiv.o
+obj-$(CONFIG_CRYPTO_BLKCIPHER) += eseqiv.o
+obj-$(CONFIG_CRYPTO_SEQIV) += seqiv.o

 crypto_hash-objs := hash.o
 obj-$(CONFIG_CRYPTO_HASH) += crypto_hash.o
@@ -32,6 +37,9 @@ obj-$(CONFIG_CRYPTO_CBC) += cbc.o
 obj-$(CONFIG_CRYPTO_PCBC) += pcbc.o
 obj-$(CONFIG_CRYPTO_LRW) += lrw.o
 obj-$(CONFIG_CRYPTO_XTS) += xts.o
+obj-$(CONFIG_CRYPTO_CTR) += ctr.o
+obj-$(CONFIG_CRYPTO_GCM) += gcm.o
+obj-$(CONFIG_CRYPTO_CCM) += ccm.o
 obj-$(CONFIG_CRYPTO_CRYPTD) += cryptd.o
 obj-$(CONFIG_CRYPTO_DES) += des_generic.o
 obj-$(CONFIG_CRYPTO_FCRYPT) += fcrypt.o
@@ -48,10 +56,12 @@ obj-$(CONFIG_CRYPTO_TEA) += tea.o
 obj-$(CONFIG_CRYPTO_KHAZAD) += khazad.o
 obj-$(CONFIG_CRYPTO_ANUBIS) += anubis.o
 obj-$(CONFIG_CRYPTO_SEED) += seed.o
+obj-$(CONFIG_CRYPTO_SALSA20) += salsa20_generic.o
 obj-$(CONFIG_CRYPTO_DEFLATE) += deflate.o
 obj-$(CONFIG_CRYPTO_MICHAEL_MIC) += michael_mic.o
 obj-$(CONFIG_CRYPTO_CRC32C) += crc32c.o
 obj-$(CONFIG_CRYPTO_AUTHENC) += authenc.o
+obj-$(CONFIG_CRYPTO_LZO) += lzo.o

 obj-$(CONFIG_CRYPTO_TEST) += tcrypt.o


--- a/crypto/ablkcipher.c
+++ b/crypto/ablkcipher.c
@@ -13,14 +13,18 @@
 *
 */

-#include <crypto/algapi.h>
-#include <linux/errno.h>
+#include <crypto/internal/skcipher.h>
+#include <linux/err.h>
 #include <linux/init.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
+#include <linux/rtnetlink.h>
+#include <linux/sched.h>
 #include <linux/slab.h>
 #include <linux/seq_file.h>

+#include "internal.h"
+
 static int setkey_unaligned(struct crypto_ablkcipher *tfm, const u8 *key,
 			    unsigned int keylen)
 {
@@ -66,6 +70,16 @@ static unsigned int crypto_ablkcipher_ctxsize(struct crypto_alg *alg, u32 type,
 	return alg->cra_ctxsize;
 }

+int skcipher_null_givencrypt(struct skcipher_givcrypt_request *req)
+{
+	return crypto_ablkcipher_encrypt(&req->creq);
+}
+
+int skcipher_null_givdecrypt(struct skcipher_givcrypt_request *req)
+{
+	return crypto_ablkcipher_decrypt(&req->creq);
+}
+
 static int crypto_init_ablkcipher_ops(struct crypto_tfm *tfm, u32 type,
 				      u32 mask)
 {
@@ -78,6 +92,11 @@ static int crypto_init_ablkcipher_ops(struct crypto_tfm *tfm, u32 type,
 	crt->setkey = setkey;
 	crt->encrypt = alg->encrypt;
 	crt->decrypt = alg->decrypt;
+	if (!alg->ivsize) {
+		crt->givencrypt = skcipher_null_givencrypt;
+		crt->givdecrypt = skcipher_null_givdecrypt;
+	}
+	crt->base = __crypto_ablkcipher_cast(tfm);
 	crt->ivsize = alg->ivsize;

 	return 0;
@@ -90,10 +109,13 @@ static void crypto_ablkcipher_show(struct seq_file *m, struct crypto_alg *alg)
 	struct ablkcipher_alg *ablkcipher = &alg->cra_ablkcipher;

 	seq_printf(m, "type         : ablkcipher\n");
+	seq_printf(m, "async        : %s\n", alg->cra_flags & CRYPTO_ALG_ASYNC ?
+					     "yes" : "no");
 	seq_printf(m, "blocksize    : %u\n", alg->cra_blocksize);
 	seq_printf(m, "min keysize  : %u\n", ablkcipher->min_keysize);
 	seq_printf(m, "max keysize  : %u\n", ablkcipher->max_keysize);
 	seq_printf(m, "ivsize       : %u\n", ablkcipher->ivsize);
+	seq_printf(m, "geniv        : %s\n", ablkcipher->geniv ?: "<default>");
 }

 const struct crypto_type crypto_ablkcipher_type = {
@@ -105,5 +127,220 @@ const struct crypto_type crypto_ablkcipher_type = {
 };
 EXPORT_SYMBOL_GPL(crypto_ablkcipher_type);

+static int no_givdecrypt(struct skcipher_givcrypt_request *req)
+{
+	return -ENOSYS;
+}
+
+static int crypto_init_givcipher_ops(struct crypto_tfm *tfm, u32 type,
+				      u32 mask)
+{
+	struct ablkcipher_alg *alg = &tfm->__crt_alg->cra_ablkcipher;
+	struct ablkcipher_tfm *crt = &tfm->crt_ablkcipher;
+
+	if (alg->ivsize > PAGE_SIZE / 8)
+		return -EINVAL;
+
+	crt->setkey = tfm->__crt_alg->cra_flags & CRYPTO_ALG_GENIV ?
+		      alg->setkey : setkey;
+	crt->encrypt = alg->encrypt;
+	crt->decrypt = alg->decrypt;
+	crt->givencrypt = alg->givencrypt;
+	crt->givdecrypt = alg->givdecrypt ?: no_givdecrypt;
+	crt->base = __crypto_ablkcipher_cast(tfm);
+	crt->ivsize = alg->ivsize;
+
+	return 0;
+}
+
+static void crypto_givcipher_show(struct seq_file *m, struct crypto_alg *alg)
+	__attribute__ ((unused));
+static void crypto_givcipher_show(struct seq_file *m, struct crypto_alg *alg)
+{
+	struct ablkcipher_alg *ablkcipher = &alg->cra_ablkcipher;
+
+	seq_printf(m, "type         : givcipher\n");
+	seq_printf(m, "async        : %s\n", alg->cra_flags & CRYPTO_ALG_ASYNC ?
+					     "yes" : "no");
+	seq_printf(m, "blocksize    : %u\n", alg->cra_blocksize);
+	seq_printf(m, "min keysize  : %u\n", ablkcipher->min_keysize);
+	seq_printf(m, "max keysize  : %u\n", ablkcipher->max_keysize);
+	seq_printf(m, "ivsize       : %u\n", ablkcipher->ivsize);
+	seq_printf(m, "geniv        : %s\n", ablkcipher->geniv ?: "<built-in>");
+}
+
+const struct crypto_type crypto_givcipher_type = {
+	.ctxsize = crypto_ablkcipher_ctxsize,
+	.init = crypto_init_givcipher_ops,
+#ifdef CONFIG_PROC_FS
+	.show = crypto_givcipher_show,
+#endif
+};
+EXPORT_SYMBOL_GPL(crypto_givcipher_type);
+
+const char *crypto_default_geniv(const struct crypto_alg *alg)
+{
+	return alg->cra_flags & CRYPTO_ALG_ASYNC ? "eseqiv" : "chainiv";
+}
+
+static int crypto_givcipher_default(struct crypto_alg *alg, u32 type, u32 mask)
+{
+	struct rtattr *tb[3];
+	struct {
+		struct rtattr attr;
+		struct crypto_attr_type data;
+	} ptype;
+	struct {
+		struct rtattr attr;
+		struct crypto_attr_alg data;
+	} palg;
+	struct crypto_template *tmpl;
+	struct crypto_instance *inst;
+	struct crypto_alg *larval;
+	const char *geniv;
+	int err;
+
+	larval = crypto_larval_lookup(alg->cra_driver_name,
+				      CRYPTO_ALG_TYPE_GIVCIPHER,
+				      CRYPTO_ALG_TYPE_MASK);
+	err = PTR_ERR(larval);
+	if (IS_ERR(larval))
+		goto out;
+
+	err = -EAGAIN;
+	if (!crypto_is_larval(larval))
+		goto drop_larval;
+
+	ptype.attr.rta_len = sizeof(ptype);
+	ptype.attr.rta_type = CRYPTOA_TYPE;
+	ptype.data.type = type | CRYPTO_ALG_GENIV;
+	/* GENIV tells the template that we're making a default geniv. */
+	ptype.data.mask = mask | CRYPTO_ALG_GENIV;
+	tb[0] = &ptype.attr;
+
+	palg.attr.rta_len = sizeof(palg);
+	palg.attr.rta_type = CRYPTOA_ALG;
+	/* Must use the exact name to locate ourselves. */
+	memcpy(palg.data.name, alg->cra_driver_name, CRYPTO_MAX_ALG_NAME);
+	tb[1] = &palg.attr;
+
+	tb[2] = NULL;
+
+	if ((alg->cra_flags & CRYPTO_ALG_TYPE_MASK) ==
+	    CRYPTO_ALG_TYPE_BLKCIPHER)
+		geniv = alg->cra_blkcipher.geniv;
+	else
+		geniv = alg->cra_ablkcipher.geniv;
+
+	if (!geniv)
+		geniv = crypto_default_geniv(alg);
+
+	tmpl = crypto_lookup_template(geniv);
+	err = -ENOENT;
+	if (!tmpl)
+		goto kill_larval;
+
+	inst = tmpl->alloc(tb);
+	err = PTR_ERR(inst);
+	if (IS_ERR(inst))
+		goto put_tmpl;
+
+	if ((err = crypto_register_instance(tmpl, inst))) {
+		tmpl->free(inst);
+		goto put_tmpl;
+	}
+
+	/* Redo the lookup to use the instance we just registered. */
+	err = -EAGAIN;
+
+put_tmpl:
+	crypto_tmpl_put(tmpl);
+kill_larval:
+	crypto_larval_kill(larval);
+drop_larval:
+	crypto_mod_put(larval);
+out:
+	crypto_mod_put(alg);
+	return err;
+}
+
+static struct crypto_alg *crypto_lookup_skcipher(const char *name, u32 type,
+						 u32 mask)
+{
+	struct crypto_alg *alg;
+
+	alg = crypto_alg_mod_lookup(name, type, mask);
+	if (IS_ERR(alg))
+		return alg;
+
+	if ((alg->cra_flags & CRYPTO_ALG_TYPE_MASK) ==
+	    CRYPTO_ALG_TYPE_GIVCIPHER)
+		return alg;
+
+	if (!((alg->cra_flags & CRYPTO_ALG_TYPE_MASK) ==
+	      CRYPTO_ALG_TYPE_BLKCIPHER ? alg->cra_blkcipher.ivsize :
+					  alg->cra_ablkcipher.ivsize))
+		return alg;
+
+	return ERR_PTR(crypto_givcipher_default(alg, type, mask));
+}
+
+int crypto_grab_skcipher(struct crypto_skcipher_spawn *spawn, const char *name,
+			 u32 type, u32 mask)
+{
+	struct crypto_alg *alg;
+	int err;
+
+	type = crypto_skcipher_type(type);
+	mask = crypto_skcipher_mask(mask);
+
+	alg = crypto_lookup_skcipher(name, type, mask);
+	if (IS_ERR(alg))
+		return PTR_ERR(alg);
+
+	err = crypto_init_spawn(&spawn->base, alg, spawn->base.inst, mask);
+	crypto_mod_put(alg);
+	return err;
+}
+EXPORT_SYMBOL_GPL(crypto_grab_skcipher);
+
+struct crypto_ablkcipher *crypto_alloc_ablkcipher(const char *alg_name,
+						  u32 type, u32 mask)
+{
+	struct crypto_tfm *tfm;
+	int err;
+
+	type = crypto_skcipher_type(type);
+	mask = crypto_skcipher_mask(mask);
+
+	for (;;) {
+		struct crypto_alg *alg;
+
+		alg = crypto_lookup_skcipher(alg_name, type, mask);
+		if (IS_ERR(alg)) {
+			err = PTR_ERR(alg);
+			goto err;
+		}
+
+		tfm = __crypto_alloc_tfm(alg, type, mask);
+		if (!IS_ERR(tfm))
+			return __crypto_ablkcipher_cast(tfm);
+
+		crypto_mod_put(alg);
+		err = PTR_ERR(tfm);
+
+err:
+		if (err != -EAGAIN)
+			break;
+		if (signal_pending(current)) {
+			err = -EINTR;
+			break;
+		}
+	}
+
+	return ERR_PTR(err);
+}
+EXPORT_SYMBOL_GPL(crypto_alloc_ablkcipher);
+
 MODULE_LICENSE("GPL");
 MODULE_DESCRIPTION("Asynchronous block chaining cipher type");
--- a/crypto/aead.c
+++ b/crypto/aead.c
@@ -12,14 +12,17 @@
 *
 */

-#include <crypto/algapi.h>
-#include <linux/errno.h>
+#include <crypto/internal/aead.h>
+#include <linux/err.h>
 #include <linux/init.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
+#include <linux/rtnetlink.h>
 #include <linux/slab.h>
 #include <linux/seq_file.h>

+#include "internal.h"
+
 static int setkey_unaligned(struct crypto_aead *tfm, const u8 *key,
 			    unsigned int keylen)
 {
@@ -53,25 +56,54 @@ static int setkey(struct crypto_aead *tfm, const u8 *key, unsigned int keylen)
 	return aead->setkey(tfm, key, keylen);
 }

+int crypto_aead_setauthsize(struct crypto_aead *tfm, unsigned int authsize)
+{
+	struct aead_tfm *crt = crypto_aead_crt(tfm);
+	int err;
+
+	if (authsize > crypto_aead_alg(tfm)->maxauthsize)
+		return -EINVAL;
+
+	if (crypto_aead_alg(tfm)->setauthsize) {
+		err = crypto_aead_alg(tfm)->setauthsize(crt->base, authsize);
+		if (err)
+			return err;
+	}
+
+	crypto_aead_crt(crt->base)->authsize = authsize;
+	crt->authsize = authsize;
+	return 0;
+}
+EXPORT_SYMBOL_GPL(crypto_aead_setauthsize);
+
 static unsigned int crypto_aead_ctxsize(struct crypto_alg *alg, u32 type,
 					u32 mask)
 {
 	return alg->cra_ctxsize;
 }

+static int no_givcrypt(struct aead_givcrypt_request *req)
+{
+	return -ENOSYS;
+}
+
 static int crypto_init_aead_ops(struct crypto_tfm *tfm, u32 type, u32 mask)
 {
 	struct aead_alg *alg = &tfm->__crt_alg->cra_aead;
 	struct aead_tfm *crt = &tfm->crt_aead;

-	if (max(alg->authsize, alg->ivsize) > PAGE_SIZE / 8)
+	if (max(alg->maxauthsize, alg->ivsize) > PAGE_SIZE / 8)
 		return -EINVAL;

-	crt->setkey = setkey;
+	crt->setkey = tfm->__crt_alg->cra_flags & CRYPTO_ALG_GENIV ?
+		      alg->setkey : setkey;
 	crt->encrypt = alg->encrypt;
 	crt->decrypt = alg->decrypt;
+	crt->givencrypt = alg->givencrypt ?: no_givcrypt;
+	crt->givdecrypt = alg->givdecrypt ?: no_givcrypt;
+	crt->base = __crypto_aead_cast(tfm);
 	crt->ivsize = alg->ivsize;
-	crt->authsize = alg->authsize;
+	crt->authsize = alg->maxauthsize;

 	return 0;
 }
@@ -83,9 +115,12 @@ static void crypto_aead_show(struct seq_file *m, struct crypto_alg *alg)
 	struct aead_alg *aead = &alg->cra_aead;

 	seq_printf(m, "type         : aead\n");
+	seq_printf(m, "async        : %s\n", alg->cra_flags & CRYPTO_ALG_ASYNC ?
+					     "yes" : "no");
 	seq_printf(m, "blocksize    : %u\n", alg->cra_blocksize);
 	seq_printf(m, "ivsize       : %u\n", aead->ivsize);
-	seq_printf(m, "authsize     : %u\n", aead->authsize);
+	seq_printf(m, "maxauthsize  : %u\n", aead->maxauthsize);
+	seq_printf(m, "geniv        : %s\n", aead->geniv ?: "<built-in>");
 }

 const struct crypto_type crypto_aead_type = {
@@ -97,5 +132,358 @@ const struct crypto_type crypto_aead_type = {
 };
 EXPORT_SYMBOL_GPL(crypto_aead_type);

+static int aead_null_givencrypt(struct aead_givcrypt_request *req)
+{
+	return crypto_aead_encrypt(&req->areq);
+}
+
+static int aead_null_givdecrypt(struct aead_givcrypt_request *req)
+{
+	return crypto_aead_decrypt(&req->areq);
+}
+
+static int crypto_init_nivaead_ops(struct crypto_tfm *tfm, u32 type, u32 mask)
+{
+	struct aead_alg *alg = &tfm->__crt_alg->cra_aead;
+	struct aead_tfm *crt = &tfm->crt_aead;
+
+	if (max(alg->maxauthsize, alg->ivsize) > PAGE_SIZE / 8)
+		return -EINVAL;
+
+	crt->setkey = setkey;
+	crt->encrypt = alg->encrypt;
+	crt->decrypt = alg->decrypt;
+	if (!alg->ivsize) {
+		crt->givencrypt = aead_null_givencrypt;
+		crt->givdecrypt = aead_null_givdecrypt;
+	}
+	crt->base = __crypto_aead_cast(tfm);
+	crt->ivsize = alg->ivsize;
+	crt->authsize = alg->maxauthsize;
+
+	return 0;
+}
+
+static void crypto_nivaead_show(struct seq_file *m, struct crypto_alg *alg)
+	__attribute__ ((unused));
+static void crypto_nivaead_show(struct seq_file *m, struct crypto_alg *alg)
+{
+	struct aead_alg *aead = &alg->cra_aead;
+
+	seq_printf(m, "type         : nivaead\n");
+	seq_printf(m, "async        : %s\n", alg->cra_flags & CRYPTO_ALG_ASYNC ?
+					     "yes" : "no");
+	seq_printf(m, "blocksize    : %u\n", alg->cra_blocksize);
+	seq_printf(m, "ivsize       : %u\n", aead->ivsize);
+	seq_printf(m, "maxauthsize  : %u\n", aead->maxauthsize);
+	seq_printf(m, "geniv        : %s\n", aead->geniv);
+}
+
+const struct crypto_type crypto_nivaead_type = {
+	.ctxsize = crypto_aead_ctxsize,
+	.init = crypto_init_nivaead_ops,
+#ifdef CONFIG_PROC_FS
+	.show = crypto_nivaead_show,
+#endif
+};
+EXPORT_SYMBOL_GPL(crypto_nivaead_type);
+
+static int crypto_grab_nivaead(struct crypto_aead_spawn *spawn,
+			       const char *name, u32 type, u32 mask)
+{
+	struct crypto_alg *alg;
+	int err;
+
+	type &= ~(CRYPTO_ALG_TYPE_MASK | CRYPTO_ALG_GENIV);
+	type |= CRYPTO_ALG_TYPE_AEAD;
+	mask |= CRYPTO_ALG_TYPE_MASK | CRYPTO_ALG_GENIV;
+
+	alg = crypto_alg_mod_lookup(name, type, mask);
+	if (IS_ERR(alg))
+		return PTR_ERR(alg);
+
+	err = crypto_init_spawn(&spawn->base, alg, spawn->base.inst, mask);
+	crypto_mod_put(alg);
+	return err;
+}
+
+struct crypto_instance *aead_geniv_alloc(struct crypto_template *tmpl,
+					 struct rtattr **tb, u32 type,
+					 u32 mask)
+{
+	const char *name;
+	struct crypto_aead_spawn *spawn;
+	struct crypto_attr_type *algt;
+	struct crypto_instance *inst;
+	struct crypto_alg *alg;
+	int err;
+
+	algt = crypto_get_attr_type(tb);
+	err = PTR_ERR(algt);
+	if (IS_ERR(algt))
+		return ERR_PTR(err);
+
+	if ((algt->type ^ (CRYPTO_ALG_TYPE_AEAD | CRYPTO_ALG_GENIV)) &
+	    algt->mask)
+		return ERR_PTR(-EINVAL);
+
+	name = crypto_attr_alg_name(tb[1]);
+	err = PTR_ERR(name);
+	if (IS_ERR(name))
+		return ERR_PTR(err);
+
+	inst = kzalloc(sizeof(*inst) + sizeof(*spawn), GFP_KERNEL);
+	if (!inst)
+		return ERR_PTR(-ENOMEM);
+
+	spawn = crypto_instance_ctx(inst);
+
+	/* Ignore async algorithms if necessary. */
+	mask |= crypto_requires_sync(algt->type, algt->mask);
+
+	crypto_set_aead_spawn(spawn, inst);
+	err = crypto_grab_nivaead(spawn, name, type, mask);
+	if (err)
+		goto err_free_inst;
+
+	alg = crypto_aead_spawn_alg(spawn);
+
+	err = -EINVAL;
+	if (!alg->cra_aead.ivsize)
+		goto err_drop_alg;
+
+	/*
+	 * This is only true if we're constructing an algorithm with its
+	 * default IV generator.  For the default generator we elide the
+	 * template name and double-check the IV generator.
+	 */
+	if (algt->mask & CRYPTO_ALG_GENIV) {
+		if (strcmp(tmpl->name, alg->cra_aead.geniv))
+			goto err_drop_alg;
+
+		memcpy(inst->alg.cra_name, alg->cra_name, CRYPTO_MAX_ALG_NAME);
+		memcpy(inst->alg.cra_driver_name, alg->cra_driver_name,
+		       CRYPTO_MAX_ALG_NAME);
+	} else {
+		err = -ENAMETOOLONG;
+		if (snprintf(inst->alg.cra_name, CRYPTO_MAX_ALG_NAME,
+			     "%s(%s)", tmpl->name, alg->cra_name) >=
+		    CRYPTO_MAX_ALG_NAME)
+			goto err_drop_alg;
+		if (snprintf(inst->alg.cra_driver_name, CRYPTO_MAX_ALG_NAME,
+			     "%s(%s)", tmpl->name, alg->cra_driver_name) >=
+		    CRYPTO_MAX_ALG_NAME)
+			goto err_drop_alg;
+	}
+
+	inst->alg.cra_flags = CRYPTO_ALG_TYPE_AEAD | CRYPTO_ALG_GENIV;
+	inst->alg.cra_flags |= alg->cra_flags & CRYPTO_ALG_ASYNC;
+	inst->alg.cra_priority = alg->cra_priority;
+	inst->alg.cra_blocksize = alg->cra_blocksize;
+	inst->alg.cra_alignmask = alg->cra_alignmask;
+	inst->alg.cra_type = &crypto_aead_type;
+
+	inst->alg.cra_aead.ivsize = alg->cra_aead.ivsize;
+	inst->alg.cra_aead.maxauthsize = alg->cra_aead.maxauthsize;
+	inst->alg.cra_aead.geniv = alg->cra_aead.geniv;
+
+	inst->alg.cra_aead.setkey = alg->cra_aead.setkey;
+	inst->alg.cra_aead.setauthsize = alg->cra_aead.setauthsize;
+	inst->alg.cra_aead.encrypt = alg->cra_aead.encrypt;
+	inst->alg.cra_aead.decrypt = alg->cra_aead.decrypt;
+
+out:
+	return inst;
+
+err_drop_alg:
+	crypto_drop_aead(spawn);
+err_free_inst:
+	kfree(inst);
+	inst = ERR_PTR(err);
+	goto out;
+}
+EXPORT_SYMBOL_GPL(aead_geniv_alloc);
+
+void aead_geniv_free(struct crypto_instance *inst)
+{
+	crypto_drop_aead(crypto_instance_ctx(inst));
+	kfree(inst);
+}
+EXPORT_SYMBOL_GPL(aead_geniv_free);
+
+int aead_geniv_init(struct crypto_tfm *tfm)
+{
+	struct crypto_instance *inst = (void *)tfm->__crt_alg;
+	struct crypto_aead *aead;
+
+	aead = crypto_spawn_aead(crypto_instance_ctx(inst));
+	if (IS_ERR(aead))
+		return PTR_ERR(aead);
+
+	tfm->crt_aead.base = aead;
+	tfm->crt_aead.reqsize += crypto_aead_reqsize(aead);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(aead_geniv_init);
+
+void aead_geniv_exit(struct crypto_tfm *tfm)
+{
+	crypto_free_aead(tfm->crt_aead.base);
+}
+EXPORT_SYMBOL_GPL(aead_geniv_exit);
+
+static int crypto_nivaead_default(struct crypto_alg *alg, u32 type, u32 mask)
+{
+	struct rtattr *tb[3];
+	struct {
+		struct rtattr attr;
+		struct crypto_attr_type data;
+	} ptype;
+	struct {
+		struct rtattr attr;
+		struct crypto_attr_alg data;
+	} palg;
+	struct crypto_template *tmpl;
+	struct crypto_instance *inst;
+	struct crypto_alg *larval;
+	const char *geniv;
+	int err;
+
+	larval = crypto_larval_lookup(alg->cra_driver_name,
+				      CRYPTO_ALG_TYPE_AEAD | CRYPTO_ALG_GENIV,
+				      CRYPTO_ALG_TYPE_MASK | CRYPTO_ALG_GENIV);
+	err = PTR_ERR(larval);
+	if (IS_ERR(larval))
+		goto out;
+
+	err = -EAGAIN;
+	if (!crypto_is_larval(larval))
+		goto drop_larval;
+
+	ptype.attr.rta_len = sizeof(ptype);
+	ptype.attr.rta_type = CRYPTOA_TYPE;
+	ptype.data.type = type | CRYPTO_ALG_GENIV;
+	/* GENIV tells the template that we're making a default geniv. */
+	ptype.data.mask = mask | CRYPTO_ALG_GENIV;
+	tb[0] = &ptype.attr;
+
+	palg.attr.rta_len = sizeof(palg);
+	palg.attr.rta_type = CRYPTOA_ALG;
+	/* Must use the exact name to locate ourselves. */
+	memcpy(palg.data.name, alg->cra_driver_name, CRYPTO_MAX_ALG_NAME);
+	tb[1] = &palg.attr;
+
+	tb[2] = NULL;
+
+	geniv = alg->cra_aead.geniv;
+
+	tmpl = crypto_lookup_template(geniv);
+	err = -ENOENT;
+	if (!tmpl)
+		goto kill_larval;
+
+	inst = tmpl->alloc(tb);
+	err = PTR_ERR(inst);
+	if (IS_ERR(inst))
+		goto put_tmpl;
+
+	if ((err = crypto_register_instance(tmpl, inst))) {
+		tmpl->free(inst);
+		goto put_tmpl;
+	}
+
+	/* Redo the lookup to use the instance we just registered. */
+	err = -EAGAIN;
+
+put_tmpl:
+	crypto_tmpl_put(tmpl);
+kill_larval:
+	crypto_larval_kill(larval);
+drop_larval:
+	crypto_mod_put(larval);
+out:
+	crypto_mod_put(alg);
+	return err;
+}
+
+static struct crypto_alg *crypto_lookup_aead(const char *name, u32 type,
+					     u32 mask)
+{
+	struct crypto_alg *alg;
+
+	alg = crypto_alg_mod_lookup(name, type, mask);
+	if (IS_ERR(alg))
+		return alg;
+
+	if (alg->cra_type == &crypto_aead_type)
+		return alg;
+
+	if (!alg->cra_aead.ivsize)
+		return alg;
+
+	return ERR_PTR(crypto_nivaead_default(alg, type, mask));
+}
+
+int crypto_grab_aead(struct crypto_aead_spawn *spawn, const char *name,
+		     u32 type, u32 mask)
+{
+	struct crypto_alg *alg;
+	int err;
+
+	type &= ~(CRYPTO_ALG_TYPE_MASK | CRYPTO_ALG_GENIV);
+	type |= CRYPTO_ALG_TYPE_AEAD;
+	mask &= ~(CRYPTO_ALG_TYPE_MASK | CRYPTO_ALG_GENIV);
+	mask |= CRYPTO_ALG_TYPE_MASK;
+
+	alg = crypto_lookup_aead(name, type, mask);
+	if (IS_ERR(alg))
+		return PTR_ERR(alg);
+
+	err = crypto_init_spawn(&spawn->base, alg, spawn->base.inst, mask);
+	crypto_mod_put(alg);
+	return err;
+}
+EXPORT_SYMBOL_GPL(crypto_grab_aead);
+
+struct crypto_aead *crypto_alloc_aead(const char *alg_name, u32 type, u32 mask)
+{
+	struct crypto_tfm *tfm;
+	int err;
+
+	type &= ~(CRYPTO_ALG_TYPE_MASK | CRYPTO_ALG_GENIV);
+	type |= CRYPTO_ALG_TYPE_AEAD;
+	mask &= ~(CRYPTO_ALG_TYPE_MASK | CRYPTO_ALG_GENIV);
+	mask |= CRYPTO_ALG_TYPE_MASK;
+
+	for (;;) {
+		struct crypto_alg *alg;
+
+		alg = crypto_lookup_aead(alg_name, type, mask);
+		if (IS_ERR(alg)) {
+			err = PTR_ERR(alg);
+			goto err;
+		}
+
+		tfm = __crypto_alloc_tfm(alg, type, mask);
+		if (!IS_ERR(tfm))
+			return __crypto_aead_cast(tfm);
+
+		crypto_mod_put(alg);
+		err = PTR_ERR(tfm);
+
+err:
+		if (err != -EAGAIN)
+			break;
+		if (signal_pending(current)) {
+			err = -EINTR;
+			break;
+		}
+	}
+
+	return ERR_PTR(err);
+}
+EXPORT_SYMBOL_GPL(crypto_alloc_aead);
+
 MODULE_LICENSE("GPL");
 MODULE_DESCRIPTION("Authenticated Encryption with Associated Data (AEAD)");
--- a/crypto/aes_generic.c
+++ b/crypto/aes_generic.c
--- a/crypto/algapi.c
+++ b/crypto/algapi.c
@@ -472,7 +472,7 @@ int crypto_check_attr_type(struct rtattr **tb, u32 type)
 }
 EXPORT_SYMBOL_GPL(crypto_check_attr_type);

-struct crypto_alg *crypto_attr_alg(struct rtattr *rta, u32 type, u32 mask)
+const char *crypto_attr_alg_name(struct rtattr *rta)
 {
 	struct crypto_attr_alg *alga;

@@ -486,7 +486,21 @@ struct crypto_alg *crypto_attr_alg(struct rtattr *rta, u32 type, u32 mask)
 	alga = RTA_DATA(rta);
 	alga->name[CRYPTO_MAX_ALG_NAME - 1] = 0;

-	return crypto_alg_mod_lookup(alga->name, type, mask);
+	return alga->name;
+}
+EXPORT_SYMBOL_GPL(crypto_attr_alg_name);
+
+struct crypto_alg *crypto_attr_alg(struct rtattr *rta, u32 type, u32 mask)
+{
+	const char *name;
+	int err;
+
+	name = crypto_attr_alg_name(rta);
+	err = PTR_ERR(name);
+	if (IS_ERR(name))
+		return ERR_PTR(err);
+
+	return crypto_alg_mod_lookup(name, type, mask);
 }
 EXPORT_SYMBOL_GPL(crypto_attr_alg);

@@ -605,6 +619,53 @@ int crypto_tfm_in_queue(struct crypto_queue *queue, struct crypto_tfm *tfm)
 }
 EXPORT_SYMBOL_GPL(crypto_tfm_in_queue);

+static inline void crypto_inc_byte(u8 *a, unsigned int size)
+{
+	u8 *b = (a + size);
+	u8 c;
+
+	for (; size; size--) {
+		c = *--b + 1;
+		*b = c;
+		if (c)
+			break;
+	}
+}
+
+void crypto_inc(u8 *a, unsigned int size)
+{
+	__be32 *b = (__be32 *)(a + size);
+	u32 c;
+
+	for (; size >= 4; size -= 4) {
+		c = be32_to_cpu(*--b) + 1;
+		*b = cpu_to_be32(c);
+		if (c)
+			return;
+	}
+
+	crypto_inc_byte(a, size);
+}
+EXPORT_SYMBOL_GPL(crypto_inc);
+
+static inline void crypto_xor_byte(u8 *a, const u8 *b, unsigned int size)
+{
+	for (; size; size--)
+		*a++ ^= *b++;
+}
+
+void crypto_xor(u8 *dst, const u8 *src, unsigned int size)
+{
+	u32 *a = (u32 *)dst;
+	u32 *b = (u32 *)src;
+
+	for (; size >= 4; size -= 4)
+		*a++ ^= *b++;
+
+	crypto_xor_byte((u8 *)a, (u8 *)b, size);
+}
+EXPORT_SYMBOL_GPL(crypto_xor);
+
 static int __init crypto_algapi_init(void)
 {
 	crypto_init_proc();

--- a/crypto/api.c
+++ b/crypto/api.c
@@ -137,7 +137,7 @@ static struct crypto_alg *crypto_larval_alloc(const char *name, u32 type,
 	return alg;
 }

-static void crypto_larval_kill(struct crypto_alg *alg)
+void crypto_larval_kill(struct crypto_alg *alg)
 {
 	struct crypto_larval *larval = (void *)alg;

@@ -147,6 +147,7 @@ static void crypto_larval_kill(struct crypto_alg *alg)
 	complete_all(&larval->completion);
 	crypto_alg_put(alg);
 }
+EXPORT_SYMBOL_GPL(crypto_larval_kill);

 static struct crypto_alg *crypto_larval_wait(struct crypto_alg *alg)
 {
@@ -176,11 +177,9 @@ static struct crypto_alg *crypto_alg_lookup(const char *name, u32 type,
 	return alg;
 }

-struct crypto_alg *crypto_alg_mod_lookup(const char *name, u32 type, u32 mask)
+struct crypto_alg *crypto_larval_lookup(const char *name, u32 type, u32 mask)
 {
 	struct crypto_alg *alg;
-	struct crypto_alg *larval;
-	int ok;

 	if (!name)
 		return ERR_PTR(-ENOENT);
@@ -193,7 +192,17 @@ struct crypto_alg *crypto_alg_mod_lookup(const char *name, u32 type, u32 mask)
 	if (alg)
 		return crypto_is_larval(alg) ? crypto_larval_wait(alg) : alg;

-	larval = crypto_larval_alloc(name, type, mask);
+	return crypto_larval_alloc(name, type, mask);
+}
+EXPORT_SYMBOL_GPL(crypto_larval_lookup);
+
+struct crypto_alg *crypto_alg_mod_lookup(const char *name, u32 type, u32 mask)
+{
+	struct crypto_alg *alg;
+	struct crypto_alg *larval;
+	int ok;
+
+	larval = crypto_larval_lookup(name, type, mask);
 	if (IS_ERR(larval) || !crypto_is_larval(larval))
 		return larval;


--- a/crypto/authenc.c
+++ b/crypto/authenc.c
--- a/crypto/blkcipher.c
+++ b/crypto/blkcipher.c
--- a/crypto/camellia.c
+++ b/crypto/camellia.c
--- a/crypto/cast6.c
+++ b/crypto/cast6.c
@@ -369,7 +369,7 @@ static const u8 Tr[4][8] = {
 };

 /* forward octave */
-static inline void W(u32 *key, unsigned int i) {
+static void W(u32 *key, unsigned int i) {
 	u32 I;
 	key[6] ^= F1(key[7], Tr[i % 4][0], Tm[i][0]);
 	key[5] ^= F2(key[6], Tr[i % 4][1], Tm[i][1]);
@@ -428,7 +428,7 @@ static int cast6_setkey(struct crypto_tfm *tfm, const u8 *in_key,
 }

 /*forward quad round*/
-static inline void Q (u32 * block, u8 * Kr, u32 * Km) {
+static void Q (u32 * block, u8 * Kr, u32 * Km) {
 	u32 I;
 	block[2] ^= F1(block[3], Kr[0], Km[0]);
 	block[1] ^= F2(block[2], Kr[1], Km[1]);
@@ -437,7 +437,7 @@ static inline void Q (u32 * block, u8 * Kr, u32 * Km) {
 }

 /*reverse quad round*/
-static inline void QBAR (u32 * block, u8 * Kr, u32 * Km) {
+static void QBAR (u32 * block, u8 * Kr, u32 * Km) {
 	u32 I;
        block[3] ^= F1(block[0], Kr[3], Km[3]);
        block[0] ^= F3(block[1], Kr[2], Km[2]);

--- a/crypto/cbc.c
+++ b/crypto/cbc.c
--- a/crypto/ccm.c
+++ b/crypto/ccm.c
--- a/crypto/chainiv.c
+++ b/crypto/chainiv.c
--- a/crypto/cryptd.c
+++ b/crypto/cryptd.c
@@ -228,7 +228,7 @@ static struct crypto_instance *cryptd_alloc_blkcipher(
 	struct crypto_alg *alg;

 	alg = crypto_get_attr_alg(tb, CRYPTO_ALG_TYPE_BLKCIPHER,
-				  CRYPTO_ALG_TYPE_MASK | CRYPTO_ALG_ASYNC);
+				  CRYPTO_ALG_TYPE_MASK);
 	if (IS_ERR(alg))
 		return ERR_PTR(PTR_ERR(alg));

@@ -236,13 +236,15 @@ static struct crypto_instance *cryptd_alloc_blkcipher(
 	if (IS_ERR(inst))
 		goto out_put_alg;

-	inst->alg.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | CRYPTO_ALG_ASYNC;
+	inst->alg.cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC;
 	inst->alg.cra_type = &crypto_ablkcipher_type;

 	inst->alg.cra_ablkcipher.ivsize = alg->cra_blkcipher.ivsize;
 	inst->alg.cra_ablkcipher.min_keysize = alg->cra_blkcipher.min_keysize;
 	inst->alg.cra_ablkcipher.max_keysize = alg->cra_blkcipher.max_keysize;

+	inst->alg.cra_ablkcipher.geniv = alg->cra_blkcipher.geniv;
+
 	inst->alg.cra_ctxsize = sizeof(struct cryptd_blkcipher_ctx);

 	inst->alg.cra_init = cryptd_blkcipher_init_tfm;

--- a/crypto/crypto_null.c
+++ b/crypto/crypto_null.c
--- a/crypto/ctr.c
+++ b/crypto/ctr.c
--- a/crypto/des_generic.c
+++ b/crypto/des_generic.c
--- a/crypto/digest.c
+++ b/crypto/digest.c
@@ -12,6 +12,7 @@
 *
 */

+#include <crypto/scatterwalk.h>
 #include <linux/mm.h>
 #include <linux/errno.h>
 #include <linux/hardirq.h>
@@ -20,9 +21,6 @@
 #include <linux/module.h>
 #include <linux/scatterlist.h>

-#include "internal.h"
-#include "scatterwalk.h"
-
 static int init(struct hash_desc *desc)
 {
 	struct crypto_tfm *tfm = crypto_hash_tfm(desc->tfm);

--- a/crypto/eseqiv.c
+++ b/crypto/eseqiv.c
--- a/crypto/gcm.c
+++ b/crypto/gcm.c
--- a/crypto/hmac.c
+++ b/crypto/hmac.c
--- a/crypto/internal.h
+++ b/crypto/internal.h
--- a/crypto/lzo.c
+++ b/crypto/lzo.c
--- a/crypto/pcbc.c
+++ b/crypto/pcbc.c
--- a/crypto/salsa20_generic.c
+++ b/crypto/salsa20_generic.c
--- a/crypto/scatterwalk.c
+++ b/crypto/scatterwalk.c
--- a/crypto/seqiv.c
+++ b/crypto/seqiv.c
--- a/crypto/sha256_generic.c
+++ b/crypto/sha256_generic.c
--- a/crypto/tcrypt.c
+++ b/crypto/tcrypt.c
--- a/crypto/tcrypt.h
+++ b/crypto/tcrypt.h
--- a/crypto/twofish_common.c
+++ b/crypto/twofish_common.c
--- a/crypto/xcbc.c
+++ b/crypto/xcbc.c
--- a/drivers/char/hw_random/amd-rng.c
+++ b/drivers/char/hw_random/amd-rng.c
--- a/drivers/char/hw_random/core.c
+++ b/drivers/char/hw_random/core.c
--- a/drivers/char/hw_random/geode-rng.c
+++ b/drivers/char/hw_random/geode-rng.c
--- a/drivers/char/hw_random/intel-rng.c
+++ b/drivers/char/hw_random/intel-rng.c
--- a/drivers/char/hw_random/omap-rng.c
+++ b/drivers/char/hw_random/omap-rng.c
--- a/drivers/char/hw_random/pasemi-rng.c
+++ b/drivers/char/hw_random/pasemi-rng.c
--- a/drivers/char/hw_random/via-rng.c
+++ b/drivers/char/hw_random/via-rng.c
--- a/drivers/crypto/Kconfig
+++ b/drivers/crypto/Kconfig
--- a/drivers/crypto/Makefile
+++ b/drivers/crypto/Makefile
 obj-$(CONFIG_CRYPTO_DEV_PADLOCK_AES) += padlock-aes.o
 obj-$(CONFIG_CRYPTO_DEV_PADLOCK_SHA) += padlock-sha.o
 obj-$(CONFIG_CRYPTO_DEV_GEODE) += geode-aes.o
+obj-$(CONFIG_CRYPTO_DEV_HIFN_795X) += hifn_795x.o
--- a/drivers/crypto/geode-aes.c
+++ b/drivers/crypto/geode-aes.c
--- a/drivers/crypto/geode-aes.h
+++ b/drivers/crypto/geode-aes.h
--- a/drivers/crypto/hifn_795x.c
+++ b/drivers/crypto/hifn_795x.c
--- a/drivers/crypto/padlock-aes.c
+++ b/drivers/crypto/padlock-aes.c
--- a/include/crypto/aead.h
+++ b/include/crypto/aead.h
--- a/include/crypto/aes.h
+++ b/include/crypto/aes.h
--- a/include/crypto/algapi.h
+++ b/include/crypto/algapi.h
--- a/include/crypto/authenc.h
+++ b/include/crypto/authenc.h
--- a/include/crypto/ctr.h
+++ b/include/crypto/ctr.h
--- a/include/crypto/des.h
+++ b/include/crypto/des.h
--- a/include/crypto/internal/aead.h
+++ b/include/crypto/internal/aead.h
--- a/include/crypto/internal/skcipher.h
+++ b/include/crypto/internal/skcipher.h
--- a/crypto/scatterwalk.h
+++ b/crypto/scatterwalk.h
--- a/include/crypto/sha.h
+++ b/include/crypto/sha.h
--- a/include/crypto/skcipher.h
+++ b/include/crypto/skcipher.h
--- a/include/linux/crypto.h
+++ b/include/linux/crypto.h
--- a/include/linux/hw_random.h
+++ b/include/linux/hw_random.h