提交 797994f8 编写于 作者: L Linus Torvalds

Merge git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6

Pull crypto update from Herbert Xu:

 - XTS mode optimisation for twofish/cast6/camellia/aes on x86

 - AVX2/x86_64 implementation for blowfish/twofish/serpent/camellia

 - SSSE3/AVX/AVX2 optimisations for sha256/sha512

 - Added driver for SAHARA2 crypto accelerator

 - Fix for GMAC when used in non-IPsec secnarios

 - Added generic CMAC implementation (including IPsec glue)

 - IP update for crypto/atmel

 - Support for more than one device in hwrng/timeriomem

 - Added Broadcom BCM2835 RNG driver

 - Misc fixes

* git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6: (59 commits)
  crypto: caam - fix job ring cleanup code
  crypto: camellia - add AVX2/AES-NI/x86_64 assembler implementation of camellia cipher
  crypto: serpent - add AVX2/x86_64 assembler implementation of serpent cipher
  crypto: twofish - add AVX2/x86_64 assembler implementation of twofish cipher
  crypto: blowfish - add AVX2/x86_64 implementation of blowfish cipher
  crypto: tcrypt - add async cipher speed tests for blowfish
  crypto: testmgr - extend camellia test-vectors for camellia-aesni/avx2
  crypto: aesni_intel - fix Kconfig problem with CRYPTO_GLUE_HELPER_X86
  crypto: aesni_intel - add more optimized XTS mode for x86-64
  crypto: x86/camellia-aesni-avx - add more optimized XTS code
  crypto: cast6-avx: use new optimized XTS code
  crypto: x86/twofish-avx - use optimized XTS code
  crypto: x86 - add more optimized XTS-mode for serpent-avx
  xfrm: add rfc4494 AES-CMAC-96 support
  crypto: add CMAC support to CryptoAPI
  crypto: testmgr - add empty test vectors for null ciphers
  crypto: testmgr - add AES GMAC test vectors
  crypto: gcm - fix rfc4543 to handle async crypto correctly
  crypto: gcm - make GMAC work when dst and src are different
  hwrng: timeriomem - added devicetree hooks
  ...
Freescale SAHARA Cryptographic Accelerator included in some i.MX chips.
Currently only i.MX27 is supported.
Required properties:
- compatible : Should be "fsl,<soc>-sahara"
- reg : Should contain SAHARA registers location and length
- interrupts : Should contain SAHARA interrupt number
Example:
sah@10025000 {
compatible = "fsl,imx27-sahara";
reg = < 0x10025000 0x800>;
interrupts = <75>;
};
HWRNG support for the timeriomem_rng driver
Required properties:
- compatible : "timeriomem_rng"
- reg : base address to sample from
- period : wait time in microseconds to use between samples
N.B. currently 'reg' must be four bytes wide and aligned
Example:
hwrng@44 {
#address-cells = <1>;
#size-cells = <1>;
compatible = "timeriomem_rng";
reg = <0x44 0x04>;
period = <1000000>;
};
BCM2835 Random number generator
Required properties:
- compatible : should be "brcm,bcm2835-rng"
- reg : Specifies base physical address and size of the registers.
Example:
rng {
compatible = "brcm,bcm2835-rng";
reg = <0x7e104000 0x10>;
};
......@@ -63,7 +63,7 @@ Intel RNG Driver notes:
* FIXME: support poll(2)
NOTE: request_mem_region was removed, for two reasons:
NOTE: request_mem_region was removed, for three reasons:
1) Only one RNG is supported by this driver, 2) The location
used by the RNG is a fixed location in MMIO-addressable memory,
3) users with properly working BIOS e820 handling will always
......
......@@ -18,7 +18,7 @@
#include <linux/platform_device.h>
#include <linux/i2c-gpio.h>
#include <linux/atmel-mci.h>
#include <linux/platform_data/atmel-aes.h>
#include <linux/platform_data/crypto-atmel.h>
#include <linux/platform_data/at91_adc.h>
......@@ -1900,7 +1900,8 @@ static void __init at91_add_device_tdes(void) {}
* -------------------------------------------------------------------- */
#if defined(CONFIG_CRYPTO_DEV_ATMEL_AES) || defined(CONFIG_CRYPTO_DEV_ATMEL_AES_MODULE)
static struct aes_platform_data aes_data;
static struct crypto_platform_data aes_data;
static struct crypto_dma_data alt_atslave;
static u64 aes_dmamask = DMA_BIT_MASK(32);
static struct resource aes_resources[] = {
......@@ -1931,23 +1932,20 @@ static struct platform_device at91sam9g45_aes_device = {
static void __init at91_add_device_aes(void)
{
struct at_dma_slave *atslave;
struct aes_dma_data *alt_atslave;
alt_atslave = kzalloc(sizeof(struct aes_dma_data), GFP_KERNEL);
/* DMA TX slave channel configuration */
atslave = &alt_atslave->txdata;
atslave = &alt_atslave.txdata;
atslave->dma_dev = &at_hdmac_device.dev;
atslave->cfg = ATC_FIFOCFG_ENOUGHSPACE | ATC_SRC_H2SEL_HW |
ATC_SRC_PER(AT_DMA_ID_AES_RX);
/* DMA RX slave channel configuration */
atslave = &alt_atslave->rxdata;
atslave = &alt_atslave.rxdata;
atslave->dma_dev = &at_hdmac_device.dev;
atslave->cfg = ATC_FIFOCFG_ENOUGHSPACE | ATC_DST_H2SEL_HW |
ATC_DST_PER(AT_DMA_ID_AES_TX);
aes_data.dma_slave = alt_atslave;
aes_data.dma_slave = &alt_atslave;
platform_device_register(&at91sam9g45_aes_device);
}
#else
......
......@@ -2,6 +2,10 @@
# Arch-specific CryptoAPI modules.
#
avx_supported := $(call as-instr,vpxor %xmm0$(comma)%xmm0$(comma)%xmm0,yes,no)
avx2_supported := $(call as-instr,vpgatherdd %ymm0$(comma)(%eax$(comma)%ymm1\
$(comma)4)$(comma)%ymm2,yes,no)
obj-$(CONFIG_CRYPTO_ABLK_HELPER_X86) += ablk_helper.o
obj-$(CONFIG_CRYPTO_GLUE_HELPER_X86) += glue_helper.o
......@@ -12,22 +16,37 @@ obj-$(CONFIG_CRYPTO_SERPENT_SSE2_586) += serpent-sse2-i586.o
obj-$(CONFIG_CRYPTO_AES_X86_64) += aes-x86_64.o
obj-$(CONFIG_CRYPTO_CAMELLIA_X86_64) += camellia-x86_64.o
obj-$(CONFIG_CRYPTO_CAMELLIA_AESNI_AVX_X86_64) += camellia-aesni-avx-x86_64.o
obj-$(CONFIG_CRYPTO_CAST5_AVX_X86_64) += cast5-avx-x86_64.o
obj-$(CONFIG_CRYPTO_CAST6_AVX_X86_64) += cast6-avx-x86_64.o
obj-$(CONFIG_CRYPTO_BLOWFISH_X86_64) += blowfish-x86_64.o
obj-$(CONFIG_CRYPTO_TWOFISH_X86_64) += twofish-x86_64.o
obj-$(CONFIG_CRYPTO_TWOFISH_X86_64_3WAY) += twofish-x86_64-3way.o
obj-$(CONFIG_CRYPTO_TWOFISH_AVX_X86_64) += twofish-avx-x86_64.o
obj-$(CONFIG_CRYPTO_SALSA20_X86_64) += salsa20-x86_64.o
obj-$(CONFIG_CRYPTO_SERPENT_SSE2_X86_64) += serpent-sse2-x86_64.o
obj-$(CONFIG_CRYPTO_SERPENT_AVX_X86_64) += serpent-avx-x86_64.o
obj-$(CONFIG_CRYPTO_AES_NI_INTEL) += aesni-intel.o
obj-$(CONFIG_CRYPTO_GHASH_CLMUL_NI_INTEL) += ghash-clmulni-intel.o
obj-$(CONFIG_CRYPTO_CRC32C_INTEL) += crc32c-intel.o
obj-$(CONFIG_CRYPTO_SHA1_SSSE3) += sha1-ssse3.o
obj-$(CONFIG_CRYPTO_CRC32_PCLMUL) += crc32-pclmul.o
obj-$(CONFIG_CRYPTO_SHA256_SSSE3) += sha256-ssse3.o
obj-$(CONFIG_CRYPTO_SHA512_SSSE3) += sha512-ssse3.o
# These modules require assembler to support AVX.
ifeq ($(avx_supported),yes)
obj-$(CONFIG_CRYPTO_CAMELLIA_AESNI_AVX_X86_64) += \
camellia-aesni-avx-x86_64.o
obj-$(CONFIG_CRYPTO_CAST5_AVX_X86_64) += cast5-avx-x86_64.o
obj-$(CONFIG_CRYPTO_CAST6_AVX_X86_64) += cast6-avx-x86_64.o
obj-$(CONFIG_CRYPTO_TWOFISH_AVX_X86_64) += twofish-avx-x86_64.o
obj-$(CONFIG_CRYPTO_SERPENT_AVX_X86_64) += serpent-avx-x86_64.o
endif
# These modules require assembler to support AVX2.
ifeq ($(avx2_supported),yes)
obj-$(CONFIG_CRYPTO_BLOWFISH_AVX2_X86_64) += blowfish-avx2.o
obj-$(CONFIG_CRYPTO_CAMELLIA_AESNI_AVX2_X86_64) += camellia-aesni-avx2.o
obj-$(CONFIG_CRYPTO_SERPENT_AVX2_X86_64) += serpent-avx2.o
obj-$(CONFIG_CRYPTO_TWOFISH_AVX2_X86_64) += twofish-avx2.o
endif
aes-i586-y := aes-i586-asm_32.o aes_glue.o
twofish-i586-y := twofish-i586-asm_32.o twofish_glue.o
......@@ -36,21 +55,35 @@ serpent-sse2-i586-y := serpent-sse2-i586-asm_32.o serpent_sse2_glue.o
aes-x86_64-y := aes-x86_64-asm_64.o aes_glue.o
camellia-x86_64-y := camellia-x86_64-asm_64.o camellia_glue.o
camellia-aesni-avx-x86_64-y := camellia-aesni-avx-asm_64.o \
camellia_aesni_avx_glue.o
cast5-avx-x86_64-y := cast5-avx-x86_64-asm_64.o cast5_avx_glue.o
cast6-avx-x86_64-y := cast6-avx-x86_64-asm_64.o cast6_avx_glue.o
blowfish-x86_64-y := blowfish-x86_64-asm_64.o blowfish_glue.o
twofish-x86_64-y := twofish-x86_64-asm_64.o twofish_glue.o
twofish-x86_64-3way-y := twofish-x86_64-asm_64-3way.o twofish_glue_3way.o
twofish-avx-x86_64-y := twofish-avx-x86_64-asm_64.o twofish_avx_glue.o
salsa20-x86_64-y := salsa20-x86_64-asm_64.o salsa20_glue.o
serpent-sse2-x86_64-y := serpent-sse2-x86_64-asm_64.o serpent_sse2_glue.o
serpent-avx-x86_64-y := serpent-avx-x86_64-asm_64.o serpent_avx_glue.o
ifeq ($(avx_supported),yes)
camellia-aesni-avx-x86_64-y := camellia-aesni-avx-asm_64.o \
camellia_aesni_avx_glue.o
cast5-avx-x86_64-y := cast5-avx-x86_64-asm_64.o cast5_avx_glue.o
cast6-avx-x86_64-y := cast6-avx-x86_64-asm_64.o cast6_avx_glue.o
twofish-avx-x86_64-y := twofish-avx-x86_64-asm_64.o \
twofish_avx_glue.o
serpent-avx-x86_64-y := serpent-avx-x86_64-asm_64.o \
serpent_avx_glue.o
endif
ifeq ($(avx2_supported),yes)
blowfish-avx2-y := blowfish-avx2-asm_64.o blowfish_avx2_glue.o
camellia-aesni-avx2-y := camellia-aesni-avx2-asm_64.o camellia_aesni_avx2_glue.o
serpent-avx2-y := serpent-avx2-asm_64.o serpent_avx2_glue.o
twofish-avx2-y := twofish-avx2-asm_64.o twofish_avx2_glue.o
endif
aesni-intel-y := aesni-intel_asm.o aesni-intel_glue.o fpu.o
ghash-clmulni-intel-y := ghash-clmulni-intel_asm.o ghash-clmulni-intel_glue.o
sha1-ssse3-y := sha1_ssse3_asm.o sha1_ssse3_glue.o
crc32c-intel-y := crc32c-intel_glue.o
crc32c-intel-$(CONFIG_CRYPTO_CRC32C_X86_64) += crc32c-pcl-intel-asm_64.o
crc32c-intel-$(CONFIG_64BIT) += crc32c-pcl-intel-asm_64.o
crc32-pclmul-y := crc32-pclmul_asm.o crc32-pclmul_glue.o
sha256-ssse3-y := sha256-ssse3-asm.o sha256-avx-asm.o sha256-avx2-asm.o sha256_ssse3_glue.o
sha512-ssse3-y := sha512-ssse3-asm.o sha512-avx-asm.o sha512-avx2-asm.o sha512_ssse3_glue.o
......@@ -34,6 +34,10 @@
#ifdef __x86_64__
.data
.align 16
.Lgf128mul_x_ble_mask:
.octa 0x00000000000000010000000000000087
POLY: .octa 0xC2000000000000000000000000000001
TWOONE: .octa 0x00000001000000000000000000000001
......@@ -105,6 +109,8 @@ enc: .octa 0x2
#define CTR %xmm11
#define INC %xmm12
#define GF128MUL_MASK %xmm10
#ifdef __x86_64__
#define AREG %rax
#define KEYP %rdi
......@@ -2636,4 +2642,115 @@ ENTRY(aesni_ctr_enc)
.Lctr_enc_just_ret:
ret
ENDPROC(aesni_ctr_enc)
/*
* _aesni_gf128mul_x_ble: internal ABI
* Multiply in GF(2^128) for XTS IVs
* input:
* IV: current IV
* GF128MUL_MASK == mask with 0x87 and 0x01
* output:
* IV: next IV
* changed:
* CTR: == temporary value
*/
#define _aesni_gf128mul_x_ble() \
pshufd $0x13, IV, CTR; \
paddq IV, IV; \
psrad $31, CTR; \
pand GF128MUL_MASK, CTR; \
pxor CTR, IV;
/*
* void aesni_xts_crypt8(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src,
* bool enc, u8 *iv)
*/
ENTRY(aesni_xts_crypt8)
cmpb $0, %cl
movl $0, %ecx
movl $240, %r10d
leaq _aesni_enc4, %r11
leaq _aesni_dec4, %rax
cmovel %r10d, %ecx
cmoveq %rax, %r11
movdqa .Lgf128mul_x_ble_mask, GF128MUL_MASK
movups (IVP), IV
mov 480(KEYP), KLEN
addq %rcx, KEYP
movdqa IV, STATE1
pxor 0x00(INP), STATE1
movdqu IV, 0x00(OUTP)
_aesni_gf128mul_x_ble()
movdqa IV, STATE2
pxor 0x10(INP), STATE2
movdqu IV, 0x10(OUTP)
_aesni_gf128mul_x_ble()
movdqa IV, STATE3
pxor 0x20(INP), STATE3
movdqu IV, 0x20(OUTP)
_aesni_gf128mul_x_ble()
movdqa IV, STATE4
pxor 0x30(INP), STATE4
movdqu IV, 0x30(OUTP)
call *%r11
pxor 0x00(OUTP), STATE1
movdqu STATE1, 0x00(OUTP)
_aesni_gf128mul_x_ble()
movdqa IV, STATE1
pxor 0x40(INP), STATE1
movdqu IV, 0x40(OUTP)
pxor 0x10(OUTP), STATE2
movdqu STATE2, 0x10(OUTP)
_aesni_gf128mul_x_ble()
movdqa IV, STATE2
pxor 0x50(INP), STATE2
movdqu IV, 0x50(OUTP)
pxor 0x20(OUTP), STATE3
movdqu STATE3, 0x20(OUTP)
_aesni_gf128mul_x_ble()
movdqa IV, STATE3
pxor 0x60(INP), STATE3
movdqu IV, 0x60(OUTP)
pxor 0x30(OUTP), STATE4
movdqu STATE4, 0x30(OUTP)
_aesni_gf128mul_x_ble()
movdqa IV, STATE4
pxor 0x70(INP), STATE4
movdqu IV, 0x70(OUTP)
_aesni_gf128mul_x_ble()
movups IV, (IVP)
call *%r11
pxor 0x40(OUTP), STATE1
movdqu STATE1, 0x40(OUTP)
pxor 0x50(OUTP), STATE2
movdqu STATE2, 0x50(OUTP)
pxor 0x60(OUTP), STATE3
movdqu STATE3, 0x60(OUTP)
pxor 0x70(OUTP), STATE4
movdqu STATE4, 0x70(OUTP)
ret
ENDPROC(aesni_xts_crypt8)
#endif
......@@ -39,6 +39,9 @@
#include <crypto/internal/aead.h>
#include <linux/workqueue.h>
#include <linux/spinlock.h>
#ifdef CONFIG_X86_64
#include <asm/crypto/glue_helper.h>
#endif
#if defined(CONFIG_CRYPTO_PCBC) || defined(CONFIG_CRYPTO_PCBC_MODULE)
#define HAS_PCBC
......@@ -102,6 +105,9 @@ void crypto_fpu_exit(void);
asmlinkage void aesni_ctr_enc(struct crypto_aes_ctx *ctx, u8 *out,
const u8 *in, unsigned int len, u8 *iv);
asmlinkage void aesni_xts_crypt8(struct crypto_aes_ctx *ctx, u8 *out,
const u8 *in, bool enc, u8 *iv);
/* asmlinkage void aesni_gcm_enc()
* void *ctx, AES Key schedule. Starts on a 16 byte boundary.
* u8 *out, Ciphertext output. Encrypt in-place is allowed.
......@@ -510,6 +516,78 @@ static void aesni_xts_tweak(void *ctx, u8 *out, const u8 *in)
aesni_enc(ctx, out, in);
}
#ifdef CONFIG_X86_64
static void aesni_xts_enc(void *ctx, u128 *dst, const u128 *src, le128 *iv)
{
glue_xts_crypt_128bit_one(ctx, dst, src, iv, GLUE_FUNC_CAST(aesni_enc));
}
static void aesni_xts_dec(void *ctx, u128 *dst, const u128 *src, le128 *iv)
{
glue_xts_crypt_128bit_one(ctx, dst, src, iv, GLUE_FUNC_CAST(aesni_dec));
}
static void aesni_xts_enc8(void *ctx, u128 *dst, const u128 *src, le128 *iv)
{
aesni_xts_crypt8(ctx, (u8 *)dst, (const u8 *)src, true, (u8 *)iv);
}
static void aesni_xts_dec8(void *ctx, u128 *dst, const u128 *src, le128 *iv)
{
aesni_xts_crypt8(ctx, (u8 *)dst, (const u8 *)src, false, (u8 *)iv);
}
static const struct common_glue_ctx aesni_enc_xts = {
.num_funcs = 2,
.fpu_blocks_limit = 1,
.funcs = { {
.num_blocks = 8,
.fn_u = { .xts = GLUE_XTS_FUNC_CAST(aesni_xts_enc8) }
}, {
.num_blocks = 1,
.fn_u = { .xts = GLUE_XTS_FUNC_CAST(aesni_xts_enc) }
} }
};
static const struct common_glue_ctx aesni_dec_xts = {
.num_funcs = 2,
.fpu_blocks_limit = 1,
.funcs = { {
.num_blocks = 8,
.fn_u = { .xts = GLUE_XTS_FUNC_CAST(aesni_xts_dec8) }
}, {
.num_blocks = 1,
.fn_u = { .xts = GLUE_XTS_FUNC_CAST(aesni_xts_dec) }
} }
};
static int xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
struct scatterlist *src, unsigned int nbytes)
{
struct aesni_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
return glue_xts_crypt_128bit(&aesni_enc_xts, desc, dst, src, nbytes,
XTS_TWEAK_CAST(aesni_xts_tweak),
aes_ctx(ctx->raw_tweak_ctx),
aes_ctx(ctx->raw_crypt_ctx));
}
static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
struct scatterlist *src, unsigned int nbytes)
{
struct aesni_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
return glue_xts_crypt_128bit(&aesni_dec_xts, desc, dst, src, nbytes,
XTS_TWEAK_CAST(aesni_xts_tweak),
aes_ctx(ctx->raw_tweak_ctx),
aes_ctx(ctx->raw_crypt_ctx));
}
#else
static int xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
struct scatterlist *src, unsigned int nbytes)
{
......@@ -560,6 +638,8 @@ static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
return ret;
}
#endif
#ifdef CONFIG_X86_64
static int rfc4106_init(struct crypto_tfm *tfm)
{
......
/*
* x86_64/AVX2 assembler optimized version of Blowfish
*
* Copyright © 2012-2013 Jussi Kivilinna <jussi.kivilinna@iki.fi>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
*/
#include <linux/linkage.h>
.file "blowfish-avx2-asm_64.S"
.data
.align 32
.Lprefetch_mask:
.long 0*64
.long 1*64
.long 2*64
.long 3*64
.long 4*64
.long 5*64
.long 6*64
.long 7*64
.Lbswap32_mask:
.long 0x00010203
.long 0x04050607
.long 0x08090a0b
.long 0x0c0d0e0f
.Lbswap128_mask:
.byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0
.Lbswap_iv_mask:
.byte 7, 6, 5, 4, 3, 2, 1, 0, 7, 6, 5, 4, 3, 2, 1, 0
.text
/* structure of crypto context */
#define p 0
#define s0 ((16 + 2) * 4)
#define s1 ((16 + 2 + (1 * 256)) * 4)
#define s2 ((16 + 2 + (2 * 256)) * 4)
#define s3 ((16 + 2 + (3 * 256)) * 4)
/* register macros */
#define CTX %rdi
#define RIO %rdx
#define RS0 %rax
#define RS1 %r8
#define RS2 %r9
#define RS3 %r10
#define RLOOP %r11
#define RLOOPd %r11d
#define RXr0 %ymm8
#define RXr1 %ymm9
#define RXr2 %ymm10
#define RXr3 %ymm11
#define RXl0 %ymm12
#define RXl1 %ymm13
#define RXl2 %ymm14
#define RXl3 %ymm15
/* temp regs */
#define RT0 %ymm0
#define RT0x %xmm0
#define RT1 %ymm1
#define RT1x %xmm1
#define RIDX0 %ymm2
#define RIDX1 %ymm3
#define RIDX1x %xmm3
#define RIDX2 %ymm4
#define RIDX3 %ymm5
/* vpgatherdd mask and '-1' */
#define RNOT %ymm6
/* byte mask, (-1 >> 24) */
#define RBYTE %ymm7
/***********************************************************************
* 32-way AVX2 blowfish
***********************************************************************/
#define F(xl, xr) \
vpsrld $24, xl, RIDX0; \
vpsrld $16, xl, RIDX1; \
vpsrld $8, xl, RIDX2; \
vpand RBYTE, RIDX1, RIDX1; \
vpand RBYTE, RIDX2, RIDX2; \
vpand RBYTE, xl, RIDX3; \
\
vpgatherdd RNOT, (RS0, RIDX0, 4), RT0; \
vpcmpeqd RNOT, RNOT, RNOT; \
vpcmpeqd RIDX0, RIDX0, RIDX0; \
\
vpgatherdd RNOT, (RS1, RIDX1, 4), RT1; \
vpcmpeqd RIDX1, RIDX1, RIDX1; \
vpaddd RT0, RT1, RT0; \
\
vpgatherdd RIDX0, (RS2, RIDX2, 4), RT1; \
vpxor RT0, RT1, RT0; \
\
vpgatherdd RIDX1, (RS3, RIDX3, 4), RT1; \
vpcmpeqd RNOT, RNOT, RNOT; \
vpaddd RT0, RT1, RT0; \
\
vpxor RT0, xr, xr;
#define add_roundkey(xl, nmem) \
vpbroadcastd nmem, RT0; \
vpxor RT0, xl ## 0, xl ## 0; \
vpxor RT0, xl ## 1, xl ## 1; \
vpxor RT0, xl ## 2, xl ## 2; \
vpxor RT0, xl ## 3, xl ## 3;
#define round_enc() \
add_roundkey(RXr, p(CTX,RLOOP,4)); \
F(RXl0, RXr0); \
F(RXl1, RXr1); \
F(RXl2, RXr2); \
F(RXl3, RXr3); \
\
add_roundkey(RXl, p+4(CTX,RLOOP,4)); \
F(RXr0, RXl0); \
F(RXr1, RXl1); \
F(RXr2, RXl2); \
F(RXr3, RXl3);
#define round_dec() \
add_roundkey(RXr, p+4*2(CTX,RLOOP,4)); \
F(RXl0, RXr0); \
F(RXl1, RXr1); \
F(RXl2, RXr2); \
F(RXl3, RXr3); \
\
add_roundkey(RXl, p+4(CTX,RLOOP,4)); \
F(RXr0, RXl0); \
F(RXr1, RXl1); \
F(RXr2, RXl2); \
F(RXr3, RXl3);
#define init_round_constants() \
vpcmpeqd RNOT, RNOT, RNOT; \
leaq s0(CTX), RS0; \
leaq s1(CTX), RS1; \
leaq s2(CTX), RS2; \
leaq s3(CTX), RS3; \
vpsrld $24, RNOT, RBYTE;
#define transpose_2x2(x0, x1, t0) \
vpunpckldq x0, x1, t0; \
vpunpckhdq x0, x1, x1; \
\
vpunpcklqdq t0, x1, x0; \
vpunpckhqdq t0, x1, x1;
#define read_block(xl, xr) \
vbroadcasti128 .Lbswap32_mask, RT1; \
\
vpshufb RT1, xl ## 0, xl ## 0; \
vpshufb RT1, xr ## 0, xr ## 0; \
vpshufb RT1, xl ## 1, xl ## 1; \
vpshufb RT1, xr ## 1, xr ## 1; \
vpshufb RT1, xl ## 2, xl ## 2; \
vpshufb RT1, xr ## 2, xr ## 2; \
vpshufb RT1, xl ## 3, xl ## 3; \
vpshufb RT1, xr ## 3, xr ## 3; \
\
transpose_2x2(xl ## 0, xr ## 0, RT0); \
transpose_2x2(xl ## 1, xr ## 1, RT0); \
transpose_2x2(xl ## 2, xr ## 2, RT0); \
transpose_2x2(xl ## 3, xr ## 3, RT0);
#define write_block(xl, xr) \
vbroadcasti128 .Lbswap32_mask, RT1; \
\
transpose_2x2(xl ## 0, xr ## 0, RT0); \
transpose_2x2(xl ## 1, xr ## 1, RT0); \
transpose_2x2(xl ## 2, xr ## 2, RT0); \
transpose_2x2(xl ## 3, xr ## 3, RT0); \
\
vpshufb RT1, xl ## 0, xl ## 0; \
vpshufb RT1, xr ## 0, xr ## 0; \
vpshufb RT1, xl ## 1, xl ## 1; \
vpshufb RT1, xr ## 1, xr ## 1; \
vpshufb RT1, xl ## 2, xl ## 2; \
vpshufb RT1, xr ## 2, xr ## 2; \
vpshufb RT1, xl ## 3, xl ## 3; \
vpshufb RT1, xr ## 3, xr ## 3;
.align 8
__blowfish_enc_blk32:
/* input:
* %rdi: ctx, CTX
* RXl0..4, RXr0..4: plaintext
* output:
* RXl0..4, RXr0..4: ciphertext (RXl <=> RXr swapped)
*/
init_round_constants();
read_block(RXl, RXr);
movl $1, RLOOPd;
add_roundkey(RXl, p+4*(0)(CTX));
.align 4
.L__enc_loop:
round_enc();
leal 2(RLOOPd), RLOOPd;
cmpl $17, RLOOPd;
jne .L__enc_loop;
add_roundkey(RXr, p+4*(17)(CTX));
write_block(RXl, RXr);
ret;
ENDPROC(__blowfish_enc_blk32)
.align 8
__blowfish_dec_blk32:
/* input:
* %rdi: ctx, CTX
* RXl0..4, RXr0..4: ciphertext
* output:
* RXl0..4, RXr0..4: plaintext (RXl <=> RXr swapped)
*/
init_round_constants();
read_block(RXl, RXr);
movl $14, RLOOPd;
add_roundkey(RXl, p+4*(17)(CTX));
.align 4
.L__dec_loop:
round_dec();
addl $-2, RLOOPd;
jns .L__dec_loop;
add_roundkey(RXr, p+4*(0)(CTX));
write_block(RXl, RXr);
ret;
ENDPROC(__blowfish_dec_blk32)
ENTRY(blowfish_ecb_enc_32way)
/* input:
* %rdi: ctx, CTX
* %rsi: dst
* %rdx: src
*/
vzeroupper;
vmovdqu 0*32(%rdx), RXl0;
vmovdqu 1*32(%rdx), RXr0;
vmovdqu 2*32(%rdx), RXl1;
vmovdqu 3*32(%rdx), RXr1;
vmovdqu 4*32(%rdx), RXl2;
vmovdqu 5*32(%rdx), RXr2;
vmovdqu 6*32(%rdx), RXl3;
vmovdqu 7*32(%rdx), RXr3;
call __blowfish_enc_blk32;
vmovdqu RXr0, 0*32(%rsi);
vmovdqu RXl0, 1*32(%rsi);
vmovdqu RXr1, 2*32(%rsi);
vmovdqu RXl1, 3*32(%rsi);
vmovdqu RXr2, 4*32(%rsi);
vmovdqu RXl2, 5*32(%rsi);
vmovdqu RXr3, 6*32(%rsi);
vmovdqu RXl3, 7*32(%rsi);
vzeroupper;
ret;
ENDPROC(blowfish_ecb_enc_32way)
ENTRY(blowfish_ecb_dec_32way)
/* input:
* %rdi: ctx, CTX
* %rsi: dst
* %rdx: src
*/
vzeroupper;
vmovdqu 0*32(%rdx), RXl0;
vmovdqu 1*32(%rdx), RXr0;
vmovdqu 2*32(%rdx), RXl1;
vmovdqu 3*32(%rdx), RXr1;
vmovdqu 4*32(%rdx), RXl2;
vmovdqu 5*32(%rdx), RXr2;
vmovdqu 6*32(%rdx), RXl3;
vmovdqu 7*32(%rdx), RXr3;
call __blowfish_dec_blk32;
vmovdqu RXr0, 0*32(%rsi);
vmovdqu RXl0, 1*32(%rsi);
vmovdqu RXr1, 2*32(%rsi);
vmovdqu RXl1, 3*32(%rsi);
vmovdqu RXr2, 4*32(%rsi);
vmovdqu RXl2, 5*32(%rsi);
vmovdqu RXr3, 6*32(%rsi);
vmovdqu RXl3, 7*32(%rsi);
vzeroupper;
ret;
ENDPROC(blowfish_ecb_dec_32way)
ENTRY(blowfish_cbc_dec_32way)
/* input:
* %rdi: ctx, CTX
* %rsi: dst
* %rdx: src
*/
vzeroupper;
vmovdqu 0*32(%rdx), RXl0;
vmovdqu 1*32(%rdx), RXr0;
vmovdqu 2*32(%rdx), RXl1;
vmovdqu 3*32(%rdx), RXr1;
vmovdqu 4*32(%rdx), RXl2;
vmovdqu 5*32(%rdx), RXr2;
vmovdqu 6*32(%rdx), RXl3;
vmovdqu 7*32(%rdx), RXr3;
call __blowfish_dec_blk32;
/* xor with src */
vmovq (%rdx), RT0x;
vpshufd $0x4f, RT0x, RT0x;
vinserti128 $1, 8(%rdx), RT0, RT0;
vpxor RT0, RXr0, RXr0;
vpxor 0*32+24(%rdx), RXl0, RXl0;
vpxor 1*32+24(%rdx), RXr1, RXr1;
vpxor 2*32+24(%rdx), RXl1, RXl1;
vpxor 3*32+24(%rdx), RXr2, RXr2;
vpxor 4*32+24(%rdx), RXl2, RXl2;
vpxor 5*32+24(%rdx), RXr3, RXr3;
vpxor 6*32+24(%rdx), RXl3, RXl3;
vmovdqu RXr0, (0*32)(%rsi);
vmovdqu RXl0, (1*32)(%rsi);
vmovdqu RXr1, (2*32)(%rsi);
vmovdqu RXl1, (3*32)(%rsi);
vmovdqu RXr2, (4*32)(%rsi);
vmovdqu RXl2, (5*32)(%rsi);
vmovdqu RXr3, (6*32)(%rsi);
vmovdqu RXl3, (7*32)(%rsi);
vzeroupper;
ret;
ENDPROC(blowfish_cbc_dec_32way)
ENTRY(blowfish_ctr_32way)
/* input:
* %rdi: ctx, CTX
* %rsi: dst
* %rdx: src
* %rcx: iv (big endian, 64bit)
*/
vzeroupper;
vpcmpeqd RT0, RT0, RT0;
vpsrldq $8, RT0, RT0; /* a: -1, b: 0, c: -1, d: 0 */
vpcmpeqd RT1x, RT1x, RT1x;
vpaddq RT1x, RT1x, RT1x; /* a: -2, b: -2 */
vpxor RIDX0, RIDX0, RIDX0;
vinserti128 $1, RT1x, RIDX0, RIDX0; /* a: 0, b: 0, c: -2, d: -2 */
vpaddq RIDX0, RT0, RT0; /* a: -1, b: 0, c: -3, d: -2 */
vpcmpeqd RT1, RT1, RT1;
vpaddq RT1, RT1, RT1; /* a: -2, b: -2, c: -2, d: -2 */
vpaddq RT1, RT1, RIDX2; /* a: -4, b: -4, c: -4, d: -4 */
vbroadcasti128 .Lbswap_iv_mask, RIDX0;
vbroadcasti128 .Lbswap128_mask, RIDX1;
/* load IV and byteswap */
vmovq (%rcx), RT1x;
vinserti128 $1, RT1x, RT1, RT1; /* a: BE, b: 0, c: BE, d: 0 */
vpshufb RIDX0, RT1, RT1; /* a: LE, b: LE, c: LE, d: LE */
/* construct IVs */
vpsubq RT0, RT1, RT1; /* a: le1, b: le0, c: le3, d: le2 */
vpshufb RIDX1, RT1, RXl0; /* a: be0, b: be1, c: be2, d: be3 */
vpsubq RIDX2, RT1, RT1; /* le5, le4, le7, le6 */
vpshufb RIDX1, RT1, RXr0; /* be4, be5, be6, be7 */
vpsubq RIDX2, RT1, RT1;
vpshufb RIDX1, RT1, RXl1;
vpsubq RIDX2, RT1, RT1;
vpshufb RIDX1, RT1, RXr1;
vpsubq RIDX2, RT1, RT1;
vpshufb RIDX1, RT1, RXl2;
vpsubq RIDX2, RT1, RT1;
vpshufb RIDX1, RT1, RXr2;
vpsubq RIDX2, RT1, RT1;
vpshufb RIDX1, RT1, RXl3;
vpsubq RIDX2, RT1, RT1;
vpshufb RIDX1, RT1, RXr3;
/* store last IV */
vpsubq RIDX2, RT1, RT1; /* a: le33, b: le32, ... */
vpshufb RIDX1x, RT1x, RT1x; /* a: be32, ... */
vmovq RT1x, (%rcx);
call __blowfish_enc_blk32;
/* dst = src ^ iv */
vpxor 0*32(%rdx), RXr0, RXr0;
vpxor 1*32(%rdx), RXl0, RXl0;
vpxor 2*32(%rdx), RXr1, RXr1;
vpxor 3*32(%rdx), RXl1, RXl1;
vpxor 4*32(%rdx), RXr2, RXr2;
vpxor 5*32(%rdx), RXl2, RXl2;
vpxor 6*32(%rdx), RXr3, RXr3;
vpxor 7*32(%rdx), RXl3, RXl3;
vmovdqu RXr0, (0*32)(%rsi);
vmovdqu RXl0, (1*32)(%rsi);
vmovdqu RXr1, (2*32)(%rsi);
vmovdqu RXl1, (3*32)(%rsi);
vmovdqu RXr2, (4*32)(%rsi);
vmovdqu RXl2, (5*32)(%rsi);
vmovdqu RXr3, (6*32)(%rsi);
vmovdqu RXl3, (7*32)(%rsi);
vzeroupper;
ret;
ENDPROC(blowfish_ctr_32way)
/*
* Glue Code for x86_64/AVX2 assembler optimized version of Blowfish
*
* Copyright © 2012-2013 Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
*
* CBC & ECB parts based on code (crypto/cbc.c,ecb.c) by:
* Copyright (c) 2006 Herbert Xu <herbert@gondor.apana.org.au>
* CTR part based on code (crypto/ctr.c) by:
* (C) Copyright IBM Corp. 2007 - Joy Latten <latten@us.ibm.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
*/
#include <linux/module.h>
#include <linux/types.h>
#include <linux/crypto.h>
#include <linux/err.h>
#include <crypto/algapi.h>
#include <crypto/blowfish.h>
#include <crypto/cryptd.h>
#include <crypto/ctr.h>
#include <asm/i387.h>
#include <asm/xcr.h>
#include <asm/xsave.h>
#include <asm/crypto/blowfish.h>
#include <asm/crypto/ablk_helper.h>
#include <crypto/scatterwalk.h>
#define BF_AVX2_PARALLEL_BLOCKS 32
/* 32-way AVX2 parallel cipher functions */
asmlinkage void blowfish_ecb_enc_32way(struct bf_ctx *ctx, u8 *dst,
const u8 *src);
asmlinkage void blowfish_ecb_dec_32way(struct bf_ctx *ctx, u8 *dst,
const u8 *src);
asmlinkage void blowfish_cbc_dec_32way(struct bf_ctx *ctx, u8 *dst,
const u8 *src);
asmlinkage void blowfish_ctr_32way(struct bf_ctx *ctx, u8 *dst, const u8 *src,
__be64 *iv);
static inline bool bf_fpu_begin(bool fpu_enabled, unsigned int nbytes)
{
if (fpu_enabled)
return true;
/* FPU is only used when chunk to be processed is large enough, so
* do not enable FPU until it is necessary.
*/
if (nbytes < BF_BLOCK_SIZE * BF_AVX2_PARALLEL_BLOCKS)
return false;
kernel_fpu_begin();
return true;
}
static inline void bf_fpu_end(bool fpu_enabled)
{
if (fpu_enabled)
kernel_fpu_end();
}
static int ecb_crypt(struct blkcipher_desc *desc, struct blkcipher_walk *walk,
bool enc)
{
bool fpu_enabled = false;
struct bf_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
const unsigned int bsize = BF_BLOCK_SIZE;
unsigned int nbytes;
int err;
err = blkcipher_walk_virt(desc, walk);
desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
while ((nbytes = walk->nbytes)) {
u8 *wsrc = walk->src.virt.addr;
u8 *wdst = walk->dst.virt.addr;
fpu_enabled = bf_fpu_begin(fpu_enabled, nbytes);
/* Process multi-block AVX2 batch */
if (nbytes >= bsize * BF_AVX2_PARALLEL_BLOCKS) {
do {
if (enc)
blowfish_ecb_enc_32way(ctx, wdst, wsrc);
else
blowfish_ecb_dec_32way(ctx, wdst, wsrc);
wsrc += bsize * BF_AVX2_PARALLEL_BLOCKS;
wdst += bsize * BF_AVX2_PARALLEL_BLOCKS;
nbytes -= bsize * BF_AVX2_PARALLEL_BLOCKS;
} while (nbytes >= bsize * BF_AVX2_PARALLEL_BLOCKS);
if (nbytes < bsize)
goto done;
}
/* Process multi-block batch */
if (nbytes >= bsize * BF_PARALLEL_BLOCKS) {
do {
if (enc)
blowfish_enc_blk_4way(ctx, wdst, wsrc);
else
blowfish_dec_blk_4way(ctx, wdst, wsrc);
wsrc += bsize * BF_PARALLEL_BLOCKS;
wdst += bsize * BF_PARALLEL_BLOCKS;
nbytes -= bsize * BF_PARALLEL_BLOCKS;
} while (nbytes >= bsize * BF_PARALLEL_BLOCKS);
if (nbytes < bsize)
goto done;
}
/* Handle leftovers */
do {
if (enc)
blowfish_enc_blk(ctx, wdst, wsrc);
else
blowfish_dec_blk(ctx, wdst, wsrc);
wsrc += bsize;
wdst += bsize;
nbytes -= bsize;
} while (nbytes >= bsize);
done:
err = blkcipher_walk_done(desc, walk, nbytes);
}
bf_fpu_end(fpu_enabled);
return err;
}
static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
struct scatterlist *src, unsigned int nbytes)
{
struct blkcipher_walk walk;
blkcipher_walk_init(&walk, dst, src, nbytes);
return ecb_crypt(desc, &walk, true);
}
static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
struct scatterlist *src, unsigned int nbytes)
{
struct blkcipher_walk walk;
blkcipher_walk_init(&walk, dst, src, nbytes);
return ecb_crypt(desc, &walk, false);
}
static unsigned int __cbc_encrypt(struct blkcipher_desc *desc,
struct blkcipher_walk *walk)
{
struct bf_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
unsigned int bsize = BF_BLOCK_SIZE;
unsigned int nbytes = walk->nbytes;
u64 *src = (u64 *)walk->src.virt.addr;
u64 *dst = (u64 *)walk->dst.virt.addr;
u64 *iv = (u64 *)walk->iv;
do {
*dst = *src ^ *iv;
blowfish_enc_blk(ctx, (u8 *)dst, (u8 *)dst);
iv = dst;
src += 1;
dst += 1;
nbytes -= bsize;
} while (nbytes >= bsize);
*(u64 *)walk->iv = *iv;
return nbytes;
}
static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
struct scatterlist *src, unsigned int nbytes)
{
struct blkcipher_walk walk;
int err;
blkcipher_walk_init(&walk, dst, src, nbytes);
err = blkcipher_walk_virt(desc, &walk);
while ((nbytes = walk.nbytes)) {
nbytes = __cbc_encrypt(desc, &walk);
err = blkcipher_walk_done(desc, &walk, nbytes);
}
return err;
}
static unsigned int __cbc_decrypt(struct blkcipher_desc *desc,
struct blkcipher_walk *walk)
{
struct bf_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
const unsigned int bsize = BF_BLOCK_SIZE;
unsigned int nbytes = walk->nbytes;
u64 *src = (u64 *)walk->src.virt.addr;
u64 *dst = (u64 *)walk->dst.virt.addr;
u64 last_iv;
int i;
/* Start of the last block. */
src += nbytes / bsize - 1;
dst += nbytes / bsize - 1;
last_iv = *src;
/* Process multi-block AVX2 batch */
if (nbytes >= bsize * BF_AVX2_PARALLEL_BLOCKS) {
do {
nbytes -= bsize * (BF_AVX2_PARALLEL_BLOCKS - 1);
src -= BF_AVX2_PARALLEL_BLOCKS - 1;
dst -= BF_AVX2_PARALLEL_BLOCKS - 1;
blowfish_cbc_dec_32way(ctx, (u8 *)dst, (u8 *)src);
nbytes -= bsize;
if (nbytes < bsize)
goto done;
*dst ^= *(src - 1);
src -= 1;
dst -= 1;
} while (nbytes >= bsize * BF_AVX2_PARALLEL_BLOCKS);
if (nbytes < bsize)
goto done;
}
/* Process multi-block batch */
if (nbytes >= bsize * BF_PARALLEL_BLOCKS) {
u64 ivs[BF_PARALLEL_BLOCKS - 1];
do {
nbytes -= bsize * (BF_PARALLEL_BLOCKS - 1);
src -= BF_PARALLEL_BLOCKS - 1;
dst -= BF_PARALLEL_BLOCKS - 1;
for (i = 0; i < BF_PARALLEL_BLOCKS - 1; i++)
ivs[i] = src[i];
blowfish_dec_blk_4way(ctx, (u8 *)dst, (u8 *)src);
for (i = 0; i < BF_PARALLEL_BLOCKS - 1; i++)
dst[i + 1] ^= ivs[i];
nbytes -= bsize;
if (nbytes < bsize)
goto done;
*dst ^= *(src - 1);
src -= 1;
dst -= 1;
} while (nbytes >= bsize * BF_PARALLEL_BLOCKS);
if (nbytes < bsize)
goto done;
}
/* Handle leftovers */
for (;;) {
blowfish_dec_blk(ctx, (u8 *)dst, (u8 *)src);
nbytes -= bsize;
if (nbytes < bsize)
break;
*dst ^= *(src - 1);
src -= 1;
dst -= 1;
}
done:
*dst ^= *(u64 *)walk->iv;
*(u64 *)walk->iv = last_iv;
return nbytes;
}
static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
struct scatterlist *src, unsigned int nbytes)
{
bool fpu_enabled = false;
struct blkcipher_walk walk;
int err;
blkcipher_walk_init(&walk, dst, src, nbytes);
err = blkcipher_walk_virt(desc, &walk);
desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
while ((nbytes = walk.nbytes)) {
fpu_enabled = bf_fpu_begin(fpu_enabled, nbytes);
nbytes = __cbc_decrypt(desc, &walk);
err = blkcipher_walk_done(desc, &walk, nbytes);
}
bf_fpu_end(fpu_enabled);
return err;
}
static void ctr_crypt_final(struct blkcipher_desc *desc,
struct blkcipher_walk *walk)
{
struct bf_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
u8 *ctrblk = walk->iv;
u8 keystream[BF_BLOCK_SIZE];
u8 *src = walk->src.virt.addr;
u8 *dst = walk->dst.virt.addr;
unsigned int nbytes = walk->nbytes;
blowfish_enc_blk(ctx, keystream, ctrblk);
crypto_xor(keystream, src, nbytes);
memcpy(dst, keystream, nbytes);
crypto_inc(ctrblk, BF_BLOCK_SIZE);
}
static unsigned int __ctr_crypt(struct blkcipher_desc *desc,
struct blkcipher_walk *walk)
{
struct bf_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
unsigned int bsize = BF_BLOCK_SIZE;
unsigned int nbytes = walk->nbytes;
u64 *src = (u64 *)walk->src.virt.addr;
u64 *dst = (u64 *)walk->dst.virt.addr;
int i;
/* Process multi-block AVX2 batch */
if (nbytes >= bsize * BF_AVX2_PARALLEL_BLOCKS) {
do {
blowfish_ctr_32way(ctx, (u8 *)dst, (u8 *)src,
(__be64 *)walk->iv);
src += BF_AVX2_PARALLEL_BLOCKS;
dst += BF_AVX2_PARALLEL_BLOCKS;
nbytes -= bsize * BF_AVX2_PARALLEL_BLOCKS;
} while (nbytes >= bsize * BF_AVX2_PARALLEL_BLOCKS);
if (nbytes < bsize)
goto done;
}
/* Process four block batch */
if (nbytes >= bsize * BF_PARALLEL_BLOCKS) {
__be64 ctrblocks[BF_PARALLEL_BLOCKS];
u64 ctrblk = be64_to_cpu(*(__be64 *)walk->iv);
do {
/* create ctrblks for parallel encrypt */
for (i = 0; i < BF_PARALLEL_BLOCKS; i++) {
if (dst != src)
dst[i] = src[i];
ctrblocks[i] = cpu_to_be64(ctrblk++);
}
blowfish_enc_blk_xor_4way(ctx, (u8 *)dst,
(u8 *)ctrblocks);
src += BF_PARALLEL_BLOCKS;
dst += BF_PARALLEL_BLOCKS;
nbytes -= bsize * BF_PARALLEL_BLOCKS;
} while (nbytes >= bsize * BF_PARALLEL_BLOCKS);
*(__be64 *)walk->iv = cpu_to_be64(ctrblk);
if (nbytes < bsize)
goto done;
}
/* Handle leftovers */
do {
u64 ctrblk;
if (dst != src)
*dst = *src;
ctrblk = *(u64 *)walk->iv;
be64_add_cpu((__be64 *)walk->iv, 1);
blowfish_enc_blk_xor(ctx, (u8 *)dst, (u8 *)&ctrblk);
src += 1;
dst += 1;
} while ((nbytes -= bsize) >= bsize);
done:
return nbytes;
}
static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst,
struct scatterlist *src, unsigned int nbytes)
{
bool fpu_enabled = false;
struct blkcipher_walk walk;
int err;
blkcipher_walk_init(&walk, dst, src, nbytes);
err = blkcipher_walk_virt_block(desc, &walk, BF_BLOCK_SIZE);
desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
while ((nbytes = walk.nbytes) >= BF_BLOCK_SIZE) {
fpu_enabled = bf_fpu_begin(fpu_enabled, nbytes);
nbytes = __ctr_crypt(desc, &walk);
err = blkcipher_walk_done(desc, &walk, nbytes);
}
bf_fpu_end(fpu_enabled);
if (walk.nbytes) {
ctr_crypt_final(desc, &walk);
err = blkcipher_walk_done(desc, &walk, 0);
}
return err;
}
static struct crypto_alg bf_algs[6] = { {
.cra_name = "__ecb-blowfish-avx2",
.cra_driver_name = "__driver-ecb-blowfish-avx2",
.cra_priority = 0,
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
.cra_blocksize = BF_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct bf_ctx),
.cra_alignmask = 0,
.cra_type = &crypto_blkcipher_type,
.cra_module = THIS_MODULE,
.cra_u = {
.blkcipher = {
.min_keysize = BF_MIN_KEY_SIZE,
.max_keysize = BF_MAX_KEY_SIZE,
.setkey = blowfish_setkey,
.encrypt = ecb_encrypt,
.decrypt = ecb_decrypt,
},
},
}, {
.cra_name = "__cbc-blowfish-avx2",
.cra_driver_name = "__driver-cbc-blowfish-avx2",
.cra_priority = 0,
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
.cra_blocksize = BF_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct bf_ctx),
.cra_alignmask = 0,
.cra_type = &crypto_blkcipher_type,
.cra_module = THIS_MODULE,
.cra_u = {
.blkcipher = {
.min_keysize = BF_MIN_KEY_SIZE,
.max_keysize = BF_MAX_KEY_SIZE,
.setkey = blowfish_setkey,
.encrypt = cbc_encrypt,
.decrypt = cbc_decrypt,
},
},
}, {
.cra_name = "__ctr-blowfish-avx2",
.cra_driver_name = "__driver-ctr-blowfish-avx2",
.cra_priority = 0,
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
.cra_blocksize = 1,
.cra_ctxsize = sizeof(struct bf_ctx),
.cra_alignmask = 0,
.cra_type = &crypto_blkcipher_type,
.cra_module = THIS_MODULE,
.cra_u = {
.blkcipher = {
.min_keysize = BF_MIN_KEY_SIZE,
.max_keysize = BF_MAX_KEY_SIZE,
.ivsize = BF_BLOCK_SIZE,
.setkey = blowfish_setkey,
.encrypt = ctr_crypt,
.decrypt = ctr_crypt,
},
},
}, {
.cra_name = "ecb(blowfish)",
.cra_driver_name = "ecb-blowfish-avx2",
.cra_priority = 400,
.cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
.cra_blocksize = BF_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct async_helper_ctx),
.cra_alignmask = 0,
.cra_type = &crypto_ablkcipher_type,
.cra_module = THIS_MODULE,
.cra_init = ablk_init,
.cra_exit = ablk_exit,
.cra_u = {
.ablkcipher = {
.min_keysize = BF_MIN_KEY_SIZE,
.max_keysize = BF_MAX_KEY_SIZE,
.setkey = ablk_set_key,
.encrypt = ablk_encrypt,
.decrypt = ablk_decrypt,
},
},
}, {
.cra_name = "cbc(blowfish)",
.cra_driver_name = "cbc-blowfish-avx2",
.cra_priority = 400,
.cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
.cra_blocksize = BF_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct async_helper_ctx),
.cra_alignmask = 0,
.cra_type = &crypto_ablkcipher_type,
.cra_module = THIS_MODULE,
.cra_init = ablk_init,
.cra_exit = ablk_exit,
.cra_u = {
.ablkcipher = {
.min_keysize = BF_MIN_KEY_SIZE,
.max_keysize = BF_MAX_KEY_SIZE,
.ivsize = BF_BLOCK_SIZE,
.setkey = ablk_set_key,
.encrypt = __ablk_encrypt,
.decrypt = ablk_decrypt,
},
},
}, {
.cra_name = "ctr(blowfish)",
.cra_driver_name = "ctr-blowfish-avx2",
.cra_priority = 400,
.cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
.cra_blocksize = 1,
.cra_ctxsize = sizeof(struct async_helper_ctx),
.cra_alignmask = 0,
.cra_type = &crypto_ablkcipher_type,
.cra_module = THIS_MODULE,
.cra_init = ablk_init,
.cra_exit = ablk_exit,
.cra_u = {
.ablkcipher = {
.min_keysize = BF_MIN_KEY_SIZE,
.max_keysize = BF_MAX_KEY_SIZE,
.ivsize = BF_BLOCK_SIZE,
.setkey = ablk_set_key,
.encrypt = ablk_encrypt,
.decrypt = ablk_encrypt,
.geniv = "chainiv",
},
},
} };
static int __init init(void)
{
u64 xcr0;
if (!cpu_has_avx2 || !cpu_has_osxsave) {
pr_info("AVX2 instructions are not detected.\n");
return -ENODEV;
}
xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK);
if ((xcr0 & (XSTATE_SSE | XSTATE_YMM)) != (XSTATE_SSE | XSTATE_YMM)) {
pr_info("AVX detected but unusable.\n");
return -ENODEV;
}
return crypto_register_algs(bf_algs, ARRAY_SIZE(bf_algs));
}
static void __exit fini(void)
{
crypto_unregister_algs(bf_algs, ARRAY_SIZE(bf_algs));
}
module_init(init);
module_exit(fini);
MODULE_LICENSE("GPL");
MODULE_DESCRIPTION("Blowfish Cipher Algorithm, AVX2 optimized");
MODULE_ALIAS("blowfish");
MODULE_ALIAS("blowfish-asm");
/*
* Glue Code for assembler optimized version of Blowfish
*
* Copyright (c) 2011 Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
* Copyright © 2011-2013 Jussi Kivilinna <jussi.kivilinna@iki.fi>
*
* CBC & ECB parts based on code (crypto/cbc.c,ecb.c) by:
* Copyright (c) 2006 Herbert Xu <herbert@gondor.apana.org.au>
......@@ -32,40 +32,24 @@
#include <linux/module.h>
#include <linux/types.h>
#include <crypto/algapi.h>
#include <asm/crypto/blowfish.h>
/* regular block cipher functions */
asmlinkage void __blowfish_enc_blk(struct bf_ctx *ctx, u8 *dst, const u8 *src,
bool xor);
EXPORT_SYMBOL_GPL(__blowfish_enc_blk);
asmlinkage void blowfish_dec_blk(struct bf_ctx *ctx, u8 *dst, const u8 *src);
EXPORT_SYMBOL_GPL(blowfish_dec_blk);
/* 4-way parallel cipher functions */
asmlinkage void __blowfish_enc_blk_4way(struct bf_ctx *ctx, u8 *dst,
const u8 *src, bool xor);
EXPORT_SYMBOL_GPL(__blowfish_enc_blk_4way);
asmlinkage void blowfish_dec_blk_4way(struct bf_ctx *ctx, u8 *dst,
const u8 *src);
static inline void blowfish_enc_blk(struct bf_ctx *ctx, u8 *dst, const u8 *src)
{
__blowfish_enc_blk(ctx, dst, src, false);
}
static inline void blowfish_enc_blk_xor(struct bf_ctx *ctx, u8 *dst,
const u8 *src)
{
__blowfish_enc_blk(ctx, dst, src, true);
}
static inline void blowfish_enc_blk_4way(struct bf_ctx *ctx, u8 *dst,
const u8 *src)
{
__blowfish_enc_blk_4way(ctx, dst, src, false);
}
static inline void blowfish_enc_blk_xor_4way(struct bf_ctx *ctx, u8 *dst,
const u8 *src)
{
__blowfish_enc_blk_4way(ctx, dst, src, true);
}
EXPORT_SYMBOL_GPL(blowfish_dec_blk_4way);
static void blowfish_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
{
......
/*
* x86_64/AVX/AES-NI assembler implementation of Camellia
*
* Copyright © 2012 Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
* Copyright © 2012-2013 Jussi Kivilinna <jussi.kivilinna@iki.fi>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
......@@ -589,6 +589,10 @@ ENDPROC(roundsm16_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab)
.Lbswap128_mask:
.byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0
/* For XTS mode IV generation */
.Lxts_gf128mul_and_shl1_mask:
.byte 0x87, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0
/*
* pre-SubByte transform
*
......@@ -1090,3 +1094,177 @@ ENTRY(camellia_ctr_16way)
ret;
ENDPROC(camellia_ctr_16way)
#define gf128mul_x_ble(iv, mask, tmp) \
vpsrad $31, iv, tmp; \
vpaddq iv, iv, iv; \
vpshufd $0x13, tmp, tmp; \
vpand mask, tmp, tmp; \
vpxor tmp, iv, iv;
.align 8
camellia_xts_crypt_16way:
/* input:
* %rdi: ctx, CTX
* %rsi: dst (16 blocks)
* %rdx: src (16 blocks)
* %rcx: iv (t αⁿ GF(2¹²⁸))
* %r8: index for input whitening key
* %r9: pointer to __camellia_enc_blk16 or __camellia_dec_blk16
*/
subq $(16 * 16), %rsp;
movq %rsp, %rax;
vmovdqa .Lxts_gf128mul_and_shl1_mask, %xmm14;
/* load IV */
vmovdqu (%rcx), %xmm0;
vpxor 0 * 16(%rdx), %xmm0, %xmm15;
vmovdqu %xmm15, 15 * 16(%rax);
vmovdqu %xmm0, 0 * 16(%rsi);
/* construct IVs */
gf128mul_x_ble(%xmm0, %xmm14, %xmm15);
vpxor 1 * 16(%rdx), %xmm0, %xmm15;
vmovdqu %xmm15, 14 * 16(%rax);
vmovdqu %xmm0, 1 * 16(%rsi);
gf128mul_x_ble(%xmm0, %xmm14, %xmm15);
vpxor 2 * 16(%rdx), %xmm0, %xmm13;
vmovdqu %xmm0, 2 * 16(%rsi);
gf128mul_x_ble(%xmm0, %xmm14, %xmm15);
vpxor 3 * 16(%rdx), %xmm0, %xmm12;
vmovdqu %xmm0, 3 * 16(%rsi);
gf128mul_x_ble(%xmm0, %xmm14, %xmm15);
vpxor 4 * 16(%rdx), %xmm0, %xmm11;
vmovdqu %xmm0, 4 * 16(%rsi);
gf128mul_x_ble(%xmm0, %xmm14, %xmm15);
vpxor 5 * 16(%rdx), %xmm0, %xmm10;
vmovdqu %xmm0, 5 * 16(%rsi);
gf128mul_x_ble(%xmm0, %xmm14, %xmm15);
vpxor 6 * 16(%rdx), %xmm0, %xmm9;
vmovdqu %xmm0, 6 * 16(%rsi);
gf128mul_x_ble(%xmm0, %xmm14, %xmm15);
vpxor 7 * 16(%rdx), %xmm0, %xmm8;
vmovdqu %xmm0, 7 * 16(%rsi);
gf128mul_x_ble(%xmm0, %xmm14, %xmm15);
vpxor 8 * 16(%rdx), %xmm0, %xmm7;
vmovdqu %xmm0, 8 * 16(%rsi);
gf128mul_x_ble(%xmm0, %xmm14, %xmm15);
vpxor 9 * 16(%rdx), %xmm0, %xmm6;
vmovdqu %xmm0, 9 * 16(%rsi);
gf128mul_x_ble(%xmm0, %xmm14, %xmm15);
vpxor 10 * 16(%rdx), %xmm0, %xmm5;
vmovdqu %xmm0, 10 * 16(%rsi);
gf128mul_x_ble(%xmm0, %xmm14, %xmm15);
vpxor 11 * 16(%rdx), %xmm0, %xmm4;
vmovdqu %xmm0, 11 * 16(%rsi);
gf128mul_x_ble(%xmm0, %xmm14, %xmm15);
vpxor 12 * 16(%rdx), %xmm0, %xmm3;
vmovdqu %xmm0, 12 * 16(%rsi);
gf128mul_x_ble(%xmm0, %xmm14, %xmm15);
vpxor 13 * 16(%rdx), %xmm0, %xmm2;
vmovdqu %xmm0, 13 * 16(%rsi);
gf128mul_x_ble(%xmm0, %xmm14, %xmm15);
vpxor 14 * 16(%rdx), %xmm0, %xmm1;
vmovdqu %xmm0, 14 * 16(%rsi);
gf128mul_x_ble(%xmm0, %xmm14, %xmm15);
vpxor 15 * 16(%rdx), %xmm0, %xmm15;
vmovdqu %xmm15, 0 * 16(%rax);
vmovdqu %xmm0, 15 * 16(%rsi);
gf128mul_x_ble(%xmm0, %xmm14, %xmm15);
vmovdqu %xmm0, (%rcx);
/* inpack16_pre: */
vmovq (key_table)(CTX, %r8, 8), %xmm15;
vpshufb .Lpack_bswap, %xmm15, %xmm15;
vpxor 0 * 16(%rax), %xmm15, %xmm0;
vpxor %xmm1, %xmm15, %xmm1;
vpxor %xmm2, %xmm15, %xmm2;
vpxor %xmm3, %xmm15, %xmm3;
vpxor %xmm4, %xmm15, %xmm4;
vpxor %xmm5, %xmm15, %xmm5;
vpxor %xmm6, %xmm15, %xmm6;
vpxor %xmm7, %xmm15, %xmm7;
vpxor %xmm8, %xmm15, %xmm8;
vpxor %xmm9, %xmm15, %xmm9;
vpxor %xmm10, %xmm15, %xmm10;
vpxor %xmm11, %xmm15, %xmm11;
vpxor %xmm12, %xmm15, %xmm12;
vpxor %xmm13, %xmm15, %xmm13;
vpxor 14 * 16(%rax), %xmm15, %xmm14;
vpxor 15 * 16(%rax), %xmm15, %xmm15;
call *%r9;
addq $(16 * 16), %rsp;
vpxor 0 * 16(%rsi), %xmm7, %xmm7;
vpxor 1 * 16(%rsi), %xmm6, %xmm6;
vpxor 2 * 16(%rsi), %xmm5, %xmm5;
vpxor 3 * 16(%rsi), %xmm4, %xmm4;
vpxor 4 * 16(%rsi), %xmm3, %xmm3;
vpxor 5 * 16(%rsi), %xmm2, %xmm2;
vpxor 6 * 16(%rsi), %xmm1, %xmm1;
vpxor 7 * 16(%rsi), %xmm0, %xmm0;
vpxor 8 * 16(%rsi), %xmm15, %xmm15;
vpxor 9 * 16(%rsi), %xmm14, %xmm14;
vpxor 10 * 16(%rsi), %xmm13, %xmm13;
vpxor 11 * 16(%rsi), %xmm12, %xmm12;
vpxor 12 * 16(%rsi), %xmm11, %xmm11;
vpxor 13 * 16(%rsi), %xmm10, %xmm10;
vpxor 14 * 16(%rsi), %xmm9, %xmm9;
vpxor 15 * 16(%rsi), %xmm8, %xmm8;
write_output(%xmm7, %xmm6, %xmm5, %xmm4, %xmm3, %xmm2, %xmm1, %xmm0,
%xmm15, %xmm14, %xmm13, %xmm12, %xmm11, %xmm10, %xmm9,
%xmm8, %rsi);
ret;
ENDPROC(camellia_xts_crypt_16way)
ENTRY(camellia_xts_enc_16way)
/* input:
* %rdi: ctx, CTX
* %rsi: dst (16 blocks)
* %rdx: src (16 blocks)
* %rcx: iv (t αⁿ GF(2¹²⁸))
*/
xorl %r8d, %r8d; /* input whitening key, 0 for enc */
leaq __camellia_enc_blk16, %r9;
jmp camellia_xts_crypt_16way;
ENDPROC(camellia_xts_enc_16way)
ENTRY(camellia_xts_dec_16way)
/* input:
* %rdi: ctx, CTX
* %rsi: dst (16 blocks)
* %rdx: src (16 blocks)
* %rcx: iv (t αⁿ GF(2¹²⁸))
*/
cmpl $16, key_length(CTX);
movl $32, %r8d;
movl $24, %eax;
cmovel %eax, %r8d; /* input whitening key, last for dec */
leaq __camellia_dec_blk16, %r9;
jmp camellia_xts_crypt_16way;
ENDPROC(camellia_xts_dec_16way)
此差异已折叠。
/*
* Glue Code for x86_64/AVX2/AES-NI assembler optimized version of Camellia
*
* Copyright © 2013 Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
*/
#include <linux/module.h>
#include <linux/types.h>
#include <linux/crypto.h>
#include <linux/err.h>
#include <crypto/algapi.h>
#include <crypto/ctr.h>
#include <crypto/lrw.h>
#include <crypto/xts.h>
#include <asm/xcr.h>
#include <asm/xsave.h>
#include <asm/crypto/camellia.h>
#include <asm/crypto/ablk_helper.h>
#include <asm/crypto/glue_helper.h>
#define CAMELLIA_AESNI_PARALLEL_BLOCKS 16
#define CAMELLIA_AESNI_AVX2_PARALLEL_BLOCKS 32
/* 32-way AVX2/AES-NI parallel cipher functions */
asmlinkage void camellia_ecb_enc_32way(struct camellia_ctx *ctx, u8 *dst,
const u8 *src);
asmlinkage void camellia_ecb_dec_32way(struct camellia_ctx *ctx, u8 *dst,
const u8 *src);
asmlinkage void camellia_cbc_dec_32way(struct camellia_ctx *ctx, u8 *dst,
const u8 *src);
asmlinkage void camellia_ctr_32way(struct camellia_ctx *ctx, u8 *dst,
const u8 *src, le128 *iv);
asmlinkage void camellia_xts_enc_32way(struct camellia_ctx *ctx, u8 *dst,
const u8 *src, le128 *iv);
asmlinkage void camellia_xts_dec_32way(struct camellia_ctx *ctx, u8 *dst,
const u8 *src, le128 *iv);
static const struct common_glue_ctx camellia_enc = {
.num_funcs = 4,
.fpu_blocks_limit = CAMELLIA_AESNI_PARALLEL_BLOCKS,
.funcs = { {
.num_blocks = CAMELLIA_AESNI_AVX2_PARALLEL_BLOCKS,
.fn_u = { .ecb = GLUE_FUNC_CAST(camellia_ecb_enc_32way) }
}, {
.num_blocks = CAMELLIA_AESNI_PARALLEL_BLOCKS,
.fn_u = { .ecb = GLUE_FUNC_CAST(camellia_ecb_enc_16way) }
}, {
.num_blocks = 2,
.fn_u = { .ecb = GLUE_FUNC_CAST(camellia_enc_blk_2way) }
}, {
.num_blocks = 1,
.fn_u = { .ecb = GLUE_FUNC_CAST(camellia_enc_blk) }
} }
};
static const struct common_glue_ctx camellia_ctr = {
.num_funcs = 4,
.fpu_blocks_limit = CAMELLIA_AESNI_PARALLEL_BLOCKS,
.funcs = { {
.num_blocks = CAMELLIA_AESNI_AVX2_PARALLEL_BLOCKS,
.fn_u = { .ctr = GLUE_CTR_FUNC_CAST(camellia_ctr_32way) }
}, {
.num_blocks = CAMELLIA_AESNI_PARALLEL_BLOCKS,
.fn_u = { .ctr = GLUE_CTR_FUNC_CAST(camellia_ctr_16way) }
}, {
.num_blocks = 2,
.fn_u = { .ctr = GLUE_CTR_FUNC_CAST(camellia_crypt_ctr_2way) }
}, {
.num_blocks = 1,
.fn_u = { .ctr = GLUE_CTR_FUNC_CAST(camellia_crypt_ctr) }
} }
};
static const struct common_glue_ctx camellia_enc_xts = {
.num_funcs = 3,
.fpu_blocks_limit = CAMELLIA_AESNI_PARALLEL_BLOCKS,
.funcs = { {
.num_blocks = CAMELLIA_AESNI_AVX2_PARALLEL_BLOCKS,
.fn_u = { .xts = GLUE_XTS_FUNC_CAST(camellia_xts_enc_32way) }
}, {
.num_blocks = CAMELLIA_AESNI_PARALLEL_BLOCKS,
.fn_u = { .xts = GLUE_XTS_FUNC_CAST(camellia_xts_enc_16way) }
}, {
.num_blocks = 1,
.fn_u = { .xts = GLUE_XTS_FUNC_CAST(camellia_xts_enc) }
} }
};
static const struct common_glue_ctx camellia_dec = {
.num_funcs = 4,
.fpu_blocks_limit = CAMELLIA_AESNI_PARALLEL_BLOCKS,
.funcs = { {
.num_blocks = CAMELLIA_AESNI_AVX2_PARALLEL_BLOCKS,
.fn_u = { .ecb = GLUE_FUNC_CAST(camellia_ecb_dec_32way) }
}, {
.num_blocks = CAMELLIA_AESNI_PARALLEL_BLOCKS,
.fn_u = { .ecb = GLUE_FUNC_CAST(camellia_ecb_dec_16way) }
}, {
.num_blocks = 2,
.fn_u = { .ecb = GLUE_FUNC_CAST(camellia_dec_blk_2way) }
}, {
.num_blocks = 1,
.fn_u = { .ecb = GLUE_FUNC_CAST(camellia_dec_blk) }
} }
};
static const struct common_glue_ctx camellia_dec_cbc = {
.num_funcs = 4,
.fpu_blocks_limit = CAMELLIA_AESNI_PARALLEL_BLOCKS,
.funcs = { {
.num_blocks = CAMELLIA_AESNI_AVX2_PARALLEL_BLOCKS,
.fn_u = { .cbc = GLUE_CBC_FUNC_CAST(camellia_cbc_dec_32way) }
}, {
.num_blocks = CAMELLIA_AESNI_PARALLEL_BLOCKS,
.fn_u = { .cbc = GLUE_CBC_FUNC_CAST(camellia_cbc_dec_16way) }
}, {
.num_blocks = 2,
.fn_u = { .cbc = GLUE_CBC_FUNC_CAST(camellia_decrypt_cbc_2way) }
}, {
.num_blocks = 1,
.fn_u = { .cbc = GLUE_CBC_FUNC_CAST(camellia_dec_blk) }
} }
};
static const struct common_glue_ctx camellia_dec_xts = {
.num_funcs = 3,
.fpu_blocks_limit = CAMELLIA_AESNI_PARALLEL_BLOCKS,
.funcs = { {
.num_blocks = CAMELLIA_AESNI_AVX2_PARALLEL_BLOCKS,
.fn_u = { .xts = GLUE_XTS_FUNC_CAST(camellia_xts_dec_32way) }
}, {
.num_blocks = CAMELLIA_AESNI_PARALLEL_BLOCKS,
.fn_u = { .xts = GLUE_XTS_FUNC_CAST(camellia_xts_dec_16way) }
}, {
.num_blocks = 1,
.fn_u = { .xts = GLUE_XTS_FUNC_CAST(camellia_xts_dec) }
} }
};
static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
struct scatterlist *src, unsigned int nbytes)
{
return glue_ecb_crypt_128bit(&camellia_enc, desc, dst, src, nbytes);
}
static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
struct scatterlist *src, unsigned int nbytes)
{
return glue_ecb_crypt_128bit(&camellia_dec, desc, dst, src, nbytes);
}
static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
struct scatterlist *src, unsigned int nbytes)
{
return glue_cbc_encrypt_128bit(GLUE_FUNC_CAST(camellia_enc_blk), desc,
dst, src, nbytes);
}
static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
struct scatterlist *src, unsigned int nbytes)
{
return glue_cbc_decrypt_128bit(&camellia_dec_cbc, desc, dst, src,
nbytes);
}
static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst,
struct scatterlist *src, unsigned int nbytes)
{
return glue_ctr_crypt_128bit(&camellia_ctr, desc, dst, src, nbytes);
}
static inline bool camellia_fpu_begin(bool fpu_enabled, unsigned int nbytes)
{
return glue_fpu_begin(CAMELLIA_BLOCK_SIZE,
CAMELLIA_AESNI_PARALLEL_BLOCKS, NULL, fpu_enabled,
nbytes);
}
static inline void camellia_fpu_end(bool fpu_enabled)
{
glue_fpu_end(fpu_enabled);
}
static int camellia_setkey(struct crypto_tfm *tfm, const u8 *in_key,
unsigned int key_len)
{
return __camellia_setkey(crypto_tfm_ctx(tfm), in_key, key_len,
&tfm->crt_flags);
}
struct crypt_priv {
struct camellia_ctx *ctx;
bool fpu_enabled;
};
static void encrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes)
{
const unsigned int bsize = CAMELLIA_BLOCK_SIZE;
struct crypt_priv *ctx = priv;
int i;
ctx->fpu_enabled = camellia_fpu_begin(ctx->fpu_enabled, nbytes);
if (nbytes >= CAMELLIA_AESNI_AVX2_PARALLEL_BLOCKS * bsize) {
camellia_ecb_enc_32way(ctx->ctx, srcdst, srcdst);
srcdst += bsize * CAMELLIA_AESNI_AVX2_PARALLEL_BLOCKS;
nbytes -= bsize * CAMELLIA_AESNI_AVX2_PARALLEL_BLOCKS;
}
if (nbytes >= CAMELLIA_AESNI_PARALLEL_BLOCKS * bsize) {
camellia_ecb_enc_16way(ctx->ctx, srcdst, srcdst);
srcdst += bsize * CAMELLIA_AESNI_PARALLEL_BLOCKS;
nbytes -= bsize * CAMELLIA_AESNI_PARALLEL_BLOCKS;
}
while (nbytes >= CAMELLIA_PARALLEL_BLOCKS * bsize) {
camellia_enc_blk_2way(ctx->ctx, srcdst, srcdst);
srcdst += bsize * CAMELLIA_PARALLEL_BLOCKS;
nbytes -= bsize * CAMELLIA_PARALLEL_BLOCKS;
}
for (i = 0; i < nbytes / bsize; i++, srcdst += bsize)
camellia_enc_blk(ctx->ctx, srcdst, srcdst);
}
static void decrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes)
{
const unsigned int bsize = CAMELLIA_BLOCK_SIZE;
struct crypt_priv *ctx = priv;
int i;
ctx->fpu_enabled = camellia_fpu_begin(ctx->fpu_enabled, nbytes);
if (nbytes >= CAMELLIA_AESNI_AVX2_PARALLEL_BLOCKS * bsize) {
camellia_ecb_dec_32way(ctx->ctx, srcdst, srcdst);
srcdst += bsize * CAMELLIA_AESNI_AVX2_PARALLEL_BLOCKS;
nbytes -= bsize * CAMELLIA_AESNI_AVX2_PARALLEL_BLOCKS;
}
if (nbytes >= CAMELLIA_AESNI_PARALLEL_BLOCKS * bsize) {
camellia_ecb_dec_16way(ctx->ctx, srcdst, srcdst);
srcdst += bsize * CAMELLIA_AESNI_PARALLEL_BLOCKS;
nbytes -= bsize * CAMELLIA_AESNI_PARALLEL_BLOCKS;
}
while (nbytes >= CAMELLIA_PARALLEL_BLOCKS * bsize) {
camellia_dec_blk_2way(ctx->ctx, srcdst, srcdst);
srcdst += bsize * CAMELLIA_PARALLEL_BLOCKS;
nbytes -= bsize * CAMELLIA_PARALLEL_BLOCKS;
}
for (i = 0; i < nbytes / bsize; i++, srcdst += bsize)
camellia_dec_blk(ctx->ctx, srcdst, srcdst);
}
static int lrw_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
struct scatterlist *src, unsigned int nbytes)
{
struct camellia_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
be128 buf[CAMELLIA_AESNI_AVX2_PARALLEL_BLOCKS];
struct crypt_priv crypt_ctx = {
.ctx = &ctx->camellia_ctx,
.fpu_enabled = false,
};
struct lrw_crypt_req req = {
.tbuf = buf,
.tbuflen = sizeof(buf),
.table_ctx = &ctx->lrw_table,
.crypt_ctx = &crypt_ctx,
.crypt_fn = encrypt_callback,
};
int ret;
desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
ret = lrw_crypt(desc, dst, src, nbytes, &req);
camellia_fpu_end(crypt_ctx.fpu_enabled);
return ret;
}
static int lrw_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
struct scatterlist *src, unsigned int nbytes)
{
struct camellia_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
be128 buf[CAMELLIA_AESNI_AVX2_PARALLEL_BLOCKS];
struct crypt_priv crypt_ctx = {
.ctx = &ctx->camellia_ctx,
.fpu_enabled = false,
};
struct lrw_crypt_req req = {
.tbuf = buf,
.tbuflen = sizeof(buf),
.table_ctx = &ctx->lrw_table,
.crypt_ctx = &crypt_ctx,
.crypt_fn = decrypt_callback,
};
int ret;
desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
ret = lrw_crypt(desc, dst, src, nbytes, &req);
camellia_fpu_end(crypt_ctx.fpu_enabled);
return ret;
}
static int xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
struct scatterlist *src, unsigned int nbytes)
{
struct camellia_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
return glue_xts_crypt_128bit(&camellia_enc_xts, desc, dst, src, nbytes,
XTS_TWEAK_CAST(camellia_enc_blk),
&ctx->tweak_ctx, &ctx->crypt_ctx);
}
static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
struct scatterlist *src, unsigned int nbytes)
{
struct camellia_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
return glue_xts_crypt_128bit(&camellia_dec_xts, desc, dst, src, nbytes,
XTS_TWEAK_CAST(camellia_enc_blk),
&ctx->tweak_ctx, &ctx->crypt_ctx);
}
static struct crypto_alg cmll_algs[10] = { {
.cra_name = "__ecb-camellia-aesni-avx2",
.cra_driver_name = "__driver-ecb-camellia-aesni-avx2",
.cra_priority = 0,
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
.cra_blocksize = CAMELLIA_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct camellia_ctx),
.cra_alignmask = 0,
.cra_type = &crypto_blkcipher_type,
.cra_module = THIS_MODULE,
.cra_u = {
.blkcipher = {
.min_keysize = CAMELLIA_MIN_KEY_SIZE,
.max_keysize = CAMELLIA_MAX_KEY_SIZE,
.setkey = camellia_setkey,
.encrypt = ecb_encrypt,
.decrypt = ecb_decrypt,
},
},
}, {
.cra_name = "__cbc-camellia-aesni-avx2",
.cra_driver_name = "__driver-cbc-camellia-aesni-avx2",
.cra_priority = 0,
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
.cra_blocksize = CAMELLIA_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct camellia_ctx),
.cra_alignmask = 0,
.cra_type = &crypto_blkcipher_type,
.cra_module = THIS_MODULE,
.cra_u = {
.blkcipher = {
.min_keysize = CAMELLIA_MIN_KEY_SIZE,
.max_keysize = CAMELLIA_MAX_KEY_SIZE,
.setkey = camellia_setkey,
.encrypt = cbc_encrypt,
.decrypt = cbc_decrypt,
},
},
}, {
.cra_name = "__ctr-camellia-aesni-avx2",
.cra_driver_name = "__driver-ctr-camellia-aesni-avx2",
.cra_priority = 0,
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
.cra_blocksize = 1,
.cra_ctxsize = sizeof(struct camellia_ctx),
.cra_alignmask = 0,
.cra_type = &crypto_blkcipher_type,
.cra_module = THIS_MODULE,
.cra_u = {
.blkcipher = {
.min_keysize = CAMELLIA_MIN_KEY_SIZE,
.max_keysize = CAMELLIA_MAX_KEY_SIZE,
.ivsize = CAMELLIA_BLOCK_SIZE,
.setkey = camellia_setkey,
.encrypt = ctr_crypt,
.decrypt = ctr_crypt,
},
},
}, {
.cra_name = "__lrw-camellia-aesni-avx2",
.cra_driver_name = "__driver-lrw-camellia-aesni-avx2",
.cra_priority = 0,
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
.cra_blocksize = CAMELLIA_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct camellia_lrw_ctx),
.cra_alignmask = 0,
.cra_type = &crypto_blkcipher_type,
.cra_module = THIS_MODULE,
.cra_exit = lrw_camellia_exit_tfm,
.cra_u = {
.blkcipher = {
.min_keysize = CAMELLIA_MIN_KEY_SIZE +
CAMELLIA_BLOCK_SIZE,
.max_keysize = CAMELLIA_MAX_KEY_SIZE +
CAMELLIA_BLOCK_SIZE,
.ivsize = CAMELLIA_BLOCK_SIZE,
.setkey = lrw_camellia_setkey,
.encrypt = lrw_encrypt,
.decrypt = lrw_decrypt,
},
},
}, {
.cra_name = "__xts-camellia-aesni-avx2",
.cra_driver_name = "__driver-xts-camellia-aesni-avx2",
.cra_priority = 0,
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
.cra_blocksize = CAMELLIA_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct camellia_xts_ctx),
.cra_alignmask = 0,
.cra_type = &crypto_blkcipher_type,
.cra_module = THIS_MODULE,
.cra_u = {
.blkcipher = {
.min_keysize = CAMELLIA_MIN_KEY_SIZE * 2,
.max_keysize = CAMELLIA_MAX_KEY_SIZE * 2,
.ivsize = CAMELLIA_BLOCK_SIZE,
.setkey = xts_camellia_setkey,
.encrypt = xts_encrypt,
.decrypt = xts_decrypt,
},
},
}, {
.cra_name = "ecb(camellia)",
.cra_driver_name = "ecb-camellia-aesni-avx2",
.cra_priority = 500,
.cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
.cra_blocksize = CAMELLIA_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct async_helper_ctx),
.cra_alignmask = 0,
.cra_type = &crypto_ablkcipher_type,
.cra_module = THIS_MODULE,
.cra_init = ablk_init,
.cra_exit = ablk_exit,
.cra_u = {
.ablkcipher = {
.min_keysize = CAMELLIA_MIN_KEY_SIZE,
.max_keysize = CAMELLIA_MAX_KEY_SIZE,
.setkey = ablk_set_key,
.encrypt = ablk_encrypt,
.decrypt = ablk_decrypt,
},
},
}, {
.cra_name = "cbc(camellia)",
.cra_driver_name = "cbc-camellia-aesni-avx2",
.cra_priority = 500,
.cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
.cra_blocksize = CAMELLIA_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct async_helper_ctx),
.cra_alignmask = 0,
.cra_type = &crypto_ablkcipher_type,
.cra_module = THIS_MODULE,
.cra_init = ablk_init,
.cra_exit = ablk_exit,
.cra_u = {
.ablkcipher = {
.min_keysize = CAMELLIA_MIN_KEY_SIZE,
.max_keysize = CAMELLIA_MAX_KEY_SIZE,
.ivsize = CAMELLIA_BLOCK_SIZE,
.setkey = ablk_set_key,
.encrypt = __ablk_encrypt,
.decrypt = ablk_decrypt,
},
},
}, {
.cra_name = "ctr(camellia)",
.cra_driver_name = "ctr-camellia-aesni-avx2",
.cra_priority = 500,
.cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
.cra_blocksize = 1,
.cra_ctxsize = sizeof(struct async_helper_ctx),
.cra_alignmask = 0,
.cra_type = &crypto_ablkcipher_type,
.cra_module = THIS_MODULE,
.cra_init = ablk_init,
.cra_exit = ablk_exit,
.cra_u = {
.ablkcipher = {
.min_keysize = CAMELLIA_MIN_KEY_SIZE,
.max_keysize = CAMELLIA_MAX_KEY_SIZE,
.ivsize = CAMELLIA_BLOCK_SIZE,
.setkey = ablk_set_key,
.encrypt = ablk_encrypt,
.decrypt = ablk_encrypt,
.geniv = "chainiv",
},
},
}, {
.cra_name = "lrw(camellia)",
.cra_driver_name = "lrw-camellia-aesni-avx2",
.cra_priority = 500,
.cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
.cra_blocksize = CAMELLIA_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct async_helper_ctx),
.cra_alignmask = 0,
.cra_type = &crypto_ablkcipher_type,
.cra_module = THIS_MODULE,
.cra_init = ablk_init,
.cra_exit = ablk_exit,
.cra_u = {
.ablkcipher = {
.min_keysize = CAMELLIA_MIN_KEY_SIZE +
CAMELLIA_BLOCK_SIZE,
.max_keysize = CAMELLIA_MAX_KEY_SIZE +
CAMELLIA_BLOCK_SIZE,
.ivsize = CAMELLIA_BLOCK_SIZE,
.setkey = ablk_set_key,
.encrypt = ablk_encrypt,
.decrypt = ablk_decrypt,
},
},
}, {
.cra_name = "xts(camellia)",
.cra_driver_name = "xts-camellia-aesni-avx2",
.cra_priority = 500,
.cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
.cra_blocksize = CAMELLIA_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct async_helper_ctx),
.cra_alignmask = 0,
.cra_type = &crypto_ablkcipher_type,
.cra_module = THIS_MODULE,
.cra_init = ablk_init,
.cra_exit = ablk_exit,
.cra_u = {
.ablkcipher = {
.min_keysize = CAMELLIA_MIN_KEY_SIZE * 2,
.max_keysize = CAMELLIA_MAX_KEY_SIZE * 2,
.ivsize = CAMELLIA_BLOCK_SIZE,
.setkey = ablk_set_key,
.encrypt = ablk_encrypt,
.decrypt = ablk_decrypt,
},
},
} };
static int __init camellia_aesni_init(void)
{
u64 xcr0;
if (!cpu_has_avx2 || !cpu_has_avx || !cpu_has_aes || !cpu_has_osxsave) {
pr_info("AVX2 or AES-NI instructions are not detected.\n");
return -ENODEV;
}
xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK);
if ((xcr0 & (XSTATE_SSE | XSTATE_YMM)) != (XSTATE_SSE | XSTATE_YMM)) {
pr_info("AVX2 detected but unusable.\n");
return -ENODEV;
}
return crypto_register_algs(cmll_algs, ARRAY_SIZE(cmll_algs));
}
static void __exit camellia_aesni_fini(void)
{
crypto_unregister_algs(cmll_algs, ARRAY_SIZE(cmll_algs));
}
module_init(camellia_aesni_init);
module_exit(camellia_aesni_fini);
MODULE_LICENSE("GPL");
MODULE_DESCRIPTION("Camellia Cipher Algorithm, AES-NI/AVX2 optimized");
MODULE_ALIAS("camellia");
MODULE_ALIAS("camellia-asm");
/*
* Glue Code for x86_64/AVX/AES-NI assembler optimized version of Camellia
*
* Copyright © 2012 Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
* Copyright © 2012-2013 Jussi Kivilinna <jussi.kivilinna@iki.fi>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
......@@ -26,16 +26,44 @@
#define CAMELLIA_AESNI_PARALLEL_BLOCKS 16
/* 16-way AES-NI parallel cipher functions */
/* 16-way parallel cipher functions (avx/aes-ni) */
asmlinkage void camellia_ecb_enc_16way(struct camellia_ctx *ctx, u8 *dst,
const u8 *src);
EXPORT_SYMBOL_GPL(camellia_ecb_enc_16way);
asmlinkage void camellia_ecb_dec_16way(struct camellia_ctx *ctx, u8 *dst,
const u8 *src);
EXPORT_SYMBOL_GPL(camellia_ecb_dec_16way);
asmlinkage void camellia_cbc_dec_16way(struct camellia_ctx *ctx, u8 *dst,
const u8 *src);
EXPORT_SYMBOL_GPL(camellia_cbc_dec_16way);
asmlinkage void camellia_ctr_16way(struct camellia_ctx *ctx, u8 *dst,
const u8 *src, le128 *iv);
EXPORT_SYMBOL_GPL(camellia_ctr_16way);
asmlinkage void camellia_xts_enc_16way(struct camellia_ctx *ctx, u8 *dst,
const u8 *src, le128 *iv);
EXPORT_SYMBOL_GPL(camellia_xts_enc_16way);
asmlinkage void camellia_xts_dec_16way(struct camellia_ctx *ctx, u8 *dst,
const u8 *src, le128 *iv);
EXPORT_SYMBOL_GPL(camellia_xts_dec_16way);
void camellia_xts_enc(void *ctx, u128 *dst, const u128 *src, le128 *iv)
{
glue_xts_crypt_128bit_one(ctx, dst, src, iv,
GLUE_FUNC_CAST(camellia_enc_blk));
}
EXPORT_SYMBOL_GPL(camellia_xts_enc);
void camellia_xts_dec(void *ctx, u128 *dst, const u128 *src, le128 *iv)
{
glue_xts_crypt_128bit_one(ctx, dst, src, iv,
GLUE_FUNC_CAST(camellia_dec_blk));
}
EXPORT_SYMBOL_GPL(camellia_xts_dec);
static const struct common_glue_ctx camellia_enc = {
.num_funcs = 3,
......@@ -69,6 +97,19 @@ static const struct common_glue_ctx camellia_ctr = {
} }
};
static const struct common_glue_ctx camellia_enc_xts = {
.num_funcs = 2,
.fpu_blocks_limit = CAMELLIA_AESNI_PARALLEL_BLOCKS,
.funcs = { {
.num_blocks = CAMELLIA_AESNI_PARALLEL_BLOCKS,
.fn_u = { .xts = GLUE_XTS_FUNC_CAST(camellia_xts_enc_16way) }
}, {
.num_blocks = 1,
.fn_u = { .xts = GLUE_XTS_FUNC_CAST(camellia_xts_enc) }
} }
};
static const struct common_glue_ctx camellia_dec = {
.num_funcs = 3,
.fpu_blocks_limit = CAMELLIA_AESNI_PARALLEL_BLOCKS,
......@@ -101,6 +142,19 @@ static const struct common_glue_ctx camellia_dec_cbc = {
} }
};
static const struct common_glue_ctx camellia_dec_xts = {
.num_funcs = 2,
.fpu_blocks_limit = CAMELLIA_AESNI_PARALLEL_BLOCKS,
.funcs = { {
.num_blocks = CAMELLIA_AESNI_PARALLEL_BLOCKS,
.fn_u = { .xts = GLUE_XTS_FUNC_CAST(camellia_xts_dec_16way) }
}, {
.num_blocks = 1,
.fn_u = { .xts = GLUE_XTS_FUNC_CAST(camellia_xts_dec) }
} }
};
static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
struct scatterlist *src, unsigned int nbytes)
{
......@@ -261,54 +315,20 @@ static int xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
struct scatterlist *src, unsigned int nbytes)
{
struct camellia_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
be128 buf[CAMELLIA_AESNI_PARALLEL_BLOCKS];
struct crypt_priv crypt_ctx = {
.ctx = &ctx->crypt_ctx,
.fpu_enabled = false,
};
struct xts_crypt_req req = {
.tbuf = buf,
.tbuflen = sizeof(buf),
.tweak_ctx = &ctx->tweak_ctx,
.tweak_fn = XTS_TWEAK_CAST(camellia_enc_blk),
.crypt_ctx = &crypt_ctx,
.crypt_fn = encrypt_callback,
};
int ret;
desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
ret = xts_crypt(desc, dst, src, nbytes, &req);
camellia_fpu_end(crypt_ctx.fpu_enabled);
return ret;
return glue_xts_crypt_128bit(&camellia_enc_xts, desc, dst, src, nbytes,
XTS_TWEAK_CAST(camellia_enc_blk),
&ctx->tweak_ctx, &ctx->crypt_ctx);
}
static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
struct scatterlist *src, unsigned int nbytes)
{
struct camellia_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
be128 buf[CAMELLIA_AESNI_PARALLEL_BLOCKS];
struct crypt_priv crypt_ctx = {
.ctx = &ctx->crypt_ctx,
.fpu_enabled = false,
};
struct xts_crypt_req req = {
.tbuf = buf,
.tbuflen = sizeof(buf),
.tweak_ctx = &ctx->tweak_ctx,
.tweak_fn = XTS_TWEAK_CAST(camellia_enc_blk),
.crypt_ctx = &crypt_ctx,
.crypt_fn = decrypt_callback,
};
int ret;
desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
ret = xts_crypt(desc, dst, src, nbytes, &req);
camellia_fpu_end(crypt_ctx.fpu_enabled);
return ret;
return glue_xts_crypt_128bit(&camellia_dec_xts, desc, dst, src, nbytes,
XTS_TWEAK_CAST(camellia_enc_blk),
&ctx->tweak_ctx, &ctx->crypt_ctx);
}
static struct crypto_alg cmll_algs[10] = { {
......
......@@ -4,7 +4,7 @@
* Copyright (C) 2012 Johannes Goetzfried
* <Johannes.Goetzfried@informatik.stud.uni-erlangen.de>
*
* Copyright © 2012 Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
* Copyright © 2012-2013 Jussi Kivilinna <jussi.kivilinna@iki.fi>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
......@@ -227,6 +227,8 @@
.data
.align 16
.Lxts_gf128mul_and_shl1_mask:
.byte 0x87, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0
.Lbswap_mask:
.byte 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12
.Lbswap128_mask:
......@@ -424,3 +426,47 @@ ENTRY(cast6_ctr_8way)
ret;
ENDPROC(cast6_ctr_8way)
ENTRY(cast6_xts_enc_8way)
/* input:
* %rdi: ctx, CTX
* %rsi: dst
* %rdx: src
* %rcx: iv (t αⁿ GF(2¹²⁸))
*/
movq %rsi, %r11;
/* regs <= src, dst <= IVs, regs <= regs xor IVs */
load_xts_8way(%rcx, %rdx, %rsi, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2,
RX, RKR, RKM, .Lxts_gf128mul_and_shl1_mask);
call __cast6_enc_blk8;
/* dst <= regs xor IVs(in dst) */
store_xts_8way(%r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
ret;
ENDPROC(cast6_xts_enc_8way)
ENTRY(cast6_xts_dec_8way)
/* input:
* %rdi: ctx, CTX
* %rsi: dst
* %rdx: src
* %rcx: iv (t αⁿ GF(2¹²⁸))
*/
movq %rsi, %r11;
/* regs <= src, dst <= IVs, regs <= regs xor IVs */
load_xts_8way(%rcx, %rdx, %rsi, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2,
RX, RKR, RKM, .Lxts_gf128mul_and_shl1_mask);
call __cast6_dec_blk8;
/* dst <= regs xor IVs(in dst) */
store_xts_8way(%r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
ret;
ENDPROC(cast6_xts_dec_8way)
......@@ -4,6 +4,8 @@
* Copyright (C) 2012 Johannes Goetzfried
* <Johannes.Goetzfried@informatik.stud.uni-erlangen.de>
*
* Copyright © 2013 Jussi Kivilinna <jussi.kivilinna@iki.fi>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
......@@ -50,6 +52,23 @@ asmlinkage void cast6_cbc_dec_8way(struct cast6_ctx *ctx, u8 *dst,
asmlinkage void cast6_ctr_8way(struct cast6_ctx *ctx, u8 *dst, const u8 *src,
le128 *iv);
asmlinkage void cast6_xts_enc_8way(struct cast6_ctx *ctx, u8 *dst,
const u8 *src, le128 *iv);
asmlinkage void cast6_xts_dec_8way(struct cast6_ctx *ctx, u8 *dst,
const u8 *src, le128 *iv);
static void cast6_xts_enc(void *ctx, u128 *dst, const u128 *src, le128 *iv)
{
glue_xts_crypt_128bit_one(ctx, dst, src, iv,
GLUE_FUNC_CAST(__cast6_encrypt));
}
static void cast6_xts_dec(void *ctx, u128 *dst, const u128 *src, le128 *iv)
{
glue_xts_crypt_128bit_one(ctx, dst, src, iv,
GLUE_FUNC_CAST(__cast6_decrypt));
}
static void cast6_crypt_ctr(void *ctx, u128 *dst, const u128 *src, le128 *iv)
{
be128 ctrblk;
......@@ -87,6 +106,19 @@ static const struct common_glue_ctx cast6_ctr = {
} }
};
static const struct common_glue_ctx cast6_enc_xts = {
.num_funcs = 2,
.fpu_blocks_limit = CAST6_PARALLEL_BLOCKS,
.funcs = { {
.num_blocks = CAST6_PARALLEL_BLOCKS,
.fn_u = { .xts = GLUE_XTS_FUNC_CAST(cast6_xts_enc_8way) }
}, {
.num_blocks = 1,
.fn_u = { .xts = GLUE_XTS_FUNC_CAST(cast6_xts_enc) }
} }
};
static const struct common_glue_ctx cast6_dec = {
.num_funcs = 2,
.fpu_blocks_limit = CAST6_PARALLEL_BLOCKS,
......@@ -113,6 +145,19 @@ static const struct common_glue_ctx cast6_dec_cbc = {
} }
};
static const struct common_glue_ctx cast6_dec_xts = {
.num_funcs = 2,
.fpu_blocks_limit = CAST6_PARALLEL_BLOCKS,
.funcs = { {
.num_blocks = CAST6_PARALLEL_BLOCKS,
.fn_u = { .xts = GLUE_XTS_FUNC_CAST(cast6_xts_dec_8way) }
}, {
.num_blocks = 1,
.fn_u = { .xts = GLUE_XTS_FUNC_CAST(cast6_xts_dec) }
} }
};
static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
struct scatterlist *src, unsigned int nbytes)
{
......@@ -307,54 +352,20 @@ static int xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
struct scatterlist *src, unsigned int nbytes)
{
struct cast6_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
be128 buf[CAST6_PARALLEL_BLOCKS];
struct crypt_priv crypt_ctx = {
.ctx = &ctx->crypt_ctx,
.fpu_enabled = false,
};
struct xts_crypt_req req = {
.tbuf = buf,
.tbuflen = sizeof(buf),
.tweak_ctx = &ctx->tweak_ctx,
.tweak_fn = XTS_TWEAK_CAST(__cast6_encrypt),
.crypt_ctx = &crypt_ctx,
.crypt_fn = encrypt_callback,
};
int ret;
desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
ret = xts_crypt(desc, dst, src, nbytes, &req);
cast6_fpu_end(crypt_ctx.fpu_enabled);
return ret;
return glue_xts_crypt_128bit(&cast6_enc_xts, desc, dst, src, nbytes,
XTS_TWEAK_CAST(__cast6_encrypt),
&ctx->tweak_ctx, &ctx->crypt_ctx);
}
static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
struct scatterlist *src, unsigned int nbytes)
{
struct cast6_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
be128 buf[CAST6_PARALLEL_BLOCKS];
struct crypt_priv crypt_ctx = {
.ctx = &ctx->crypt_ctx,
.fpu_enabled = false,
};
struct xts_crypt_req req = {
.tbuf = buf,
.tbuflen = sizeof(buf),
.tweak_ctx = &ctx->tweak_ctx,
.tweak_fn = XTS_TWEAK_CAST(__cast6_encrypt),
.crypt_ctx = &crypt_ctx,
.crypt_fn = decrypt_callback,
};
int ret;
desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
ret = xts_crypt(desc, dst, src, nbytes, &req);
cast6_fpu_end(crypt_ctx.fpu_enabled);
return ret;
return glue_xts_crypt_128bit(&cast6_dec_xts, desc, dst, src, nbytes,
XTS_TWEAK_CAST(__cast6_encrypt),
&ctx->tweak_ctx, &ctx->crypt_ctx);
}
static struct crypto_alg cast6_algs[10] = { {
......
......@@ -101,9 +101,8 @@
* uint crc32_pclmul_le_16(unsigned char const *buffer,
* size_t len, uint crc32)
*/
.globl crc32_pclmul_le_16
.align 4, 0x90
crc32_pclmul_le_16:/* buffer and buffer size are 16 bytes aligned */
ENTRY(crc32_pclmul_le_16) /* buffer and buffer size are 16 bytes aligned */
movdqa (BUF), %xmm1
movdqa 0x10(BUF), %xmm2
movdqa 0x20(BUF), %xmm3
......@@ -244,3 +243,4 @@ fold_64:
pextrd $0x01, %xmm1, %eax
ret
ENDPROC(crc32_pclmul_le_16)
/*
* Implement fast CRC32C with PCLMULQDQ instructions. (x86_64)
*
* The white paper on CRC32C calculations with PCLMULQDQ instruction can be
* The white papers on CRC32C calculations with PCLMULQDQ instruction can be
* downloaded from:
* http://download.intel.com/design/intarch/papers/323405.pdf
* http://www.intel.com/content/dam/www/public/us/en/documents/white-papers/crc-iscsi-polynomial-crc32-instruction-paper.pdf
* http://www.intel.com/content/dam/www/public/us/en/documents/white-papers/fast-crc-computation-paper.pdf
*
* Copyright (C) 2012 Intel Corporation.
*
......@@ -42,6 +43,7 @@
* SOFTWARE.
*/
#include <asm/inst.h>
#include <linux/linkage.h>
## ISCSI CRC 32 Implementation with crc32 and pclmulqdq Instruction
......@@ -225,10 +227,10 @@ LABEL crc_ %i
movdqa (bufp), %xmm0 # 2 consts: K1:K2
movq crc_init, %xmm1 # CRC for block 1
pclmulqdq $0x00,%xmm0,%xmm1 # Multiply by K2
PCLMULQDQ 0x00,%xmm0,%xmm1 # Multiply by K2
movq crc1, %xmm2 # CRC for block 2
pclmulqdq $0x10, %xmm0, %xmm2 # Multiply by K1
PCLMULQDQ 0x10, %xmm0, %xmm2 # Multiply by K1
pxor %xmm2,%xmm1
movq %xmm1, %rax
......
/*
* Shared glue code for 128bit block ciphers, AVX assembler macros
*
* Copyright (c) 2012 Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
* Copyright © 2012-2013 Jussi Kivilinna <jussi.kivilinna@iki.fi>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
......@@ -89,3 +89,62 @@
vpxor (6*16)(src), x6, x6; \
vpxor (7*16)(src), x7, x7; \
store_8way(dst, x0, x1, x2, x3, x4, x5, x6, x7);
#define gf128mul_x_ble(iv, mask, tmp) \
vpsrad $31, iv, tmp; \
vpaddq iv, iv, iv; \
vpshufd $0x13, tmp, tmp; \
vpand mask, tmp, tmp; \
vpxor tmp, iv, iv;
#define load_xts_8way(iv, src, dst, x0, x1, x2, x3, x4, x5, x6, x7, tiv, t0, \
t1, xts_gf128mul_and_shl1_mask) \
vmovdqa xts_gf128mul_and_shl1_mask, t0; \
\
/* load IV */ \
vmovdqu (iv), tiv; \
vpxor (0*16)(src), tiv, x0; \
vmovdqu tiv, (0*16)(dst); \
\
/* construct and store IVs, also xor with source */ \
gf128mul_x_ble(tiv, t0, t1); \
vpxor (1*16)(src), tiv, x1; \
vmovdqu tiv, (1*16)(dst); \
\
gf128mul_x_ble(tiv, t0, t1); \
vpxor (2*16)(src), tiv, x2; \
vmovdqu tiv, (2*16)(dst); \
\
gf128mul_x_ble(tiv, t0, t1); \
vpxor (3*16)(src), tiv, x3; \
vmovdqu tiv, (3*16)(dst); \
\
gf128mul_x_ble(tiv, t0, t1); \
vpxor (4*16)(src), tiv, x4; \
vmovdqu tiv, (4*16)(dst); \
\
gf128mul_x_ble(tiv, t0, t1); \
vpxor (5*16)(src), tiv, x5; \
vmovdqu tiv, (5*16)(dst); \
\
gf128mul_x_ble(tiv, t0, t1); \
vpxor (6*16)(src), tiv, x6; \
vmovdqu tiv, (6*16)(dst); \
\
gf128mul_x_ble(tiv, t0, t1); \
vpxor (7*16)(src), tiv, x7; \
vmovdqu tiv, (7*16)(dst); \
\
gf128mul_x_ble(tiv, t0, t1); \
vmovdqu tiv, (iv);
#define store_xts_8way(dst, x0, x1, x2, x3, x4, x5, x6, x7) \
vpxor (0*16)(dst), x0, x0; \
vpxor (1*16)(dst), x1, x1; \
vpxor (2*16)(dst), x2, x2; \
vpxor (3*16)(dst), x3, x3; \
vpxor (4*16)(dst), x4, x4; \
vpxor (5*16)(dst), x5, x5; \
vpxor (6*16)(dst), x6, x6; \
vpxor (7*16)(dst), x7, x7; \
store_8way(dst, x0, x1, x2, x3, x4, x5, x6, x7);
/*
* Shared glue code for 128bit block ciphers, AVX2 assembler macros
*
* Copyright © 2012-2013 Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
*/
#define load_16way(src, x0, x1, x2, x3, x4, x5, x6, x7) \
vmovdqu (0*32)(src), x0; \
vmovdqu (1*32)(src), x1; \
vmovdqu (2*32)(src), x2; \
vmovdqu (3*32)(src), x3; \
vmovdqu (4*32)(src), x4; \
vmovdqu (5*32)(src), x5; \
vmovdqu (6*32)(src), x6; \
vmovdqu (7*32)(src), x7;
#define store_16way(dst, x0, x1, x2, x3, x4, x5, x6, x7) \
vmovdqu x0, (0*32)(dst); \
vmovdqu x1, (1*32)(dst); \
vmovdqu x2, (2*32)(dst); \
vmovdqu x3, (3*32)(dst); \
vmovdqu x4, (4*32)(dst); \
vmovdqu x5, (5*32)(dst); \
vmovdqu x6, (6*32)(dst); \
vmovdqu x7, (7*32)(dst);
#define store_cbc_16way(src, dst, x0, x1, x2, x3, x4, x5, x6, x7, t0) \
vpxor t0, t0, t0; \
vinserti128 $1, (src), t0, t0; \
vpxor t0, x0, x0; \
vpxor (0*32+16)(src), x1, x1; \
vpxor (1*32+16)(src), x2, x2; \
vpxor (2*32+16)(src), x3, x3; \
vpxor (3*32+16)(src), x4, x4; \
vpxor (4*32+16)(src), x5, x5; \
vpxor (5*32+16)(src), x6, x6; \
vpxor (6*32+16)(src), x7, x7; \
store_16way(dst, x0, x1, x2, x3, x4, x5, x6, x7);
#define inc_le128(x, minus_one, tmp) \
vpcmpeqq minus_one, x, tmp; \
vpsubq minus_one, x, x; \
vpslldq $8, tmp, tmp; \
vpsubq tmp, x, x;
#define add2_le128(x, minus_one, minus_two, tmp1, tmp2) \
vpcmpeqq minus_one, x, tmp1; \
vpcmpeqq minus_two, x, tmp2; \
vpsubq minus_two, x, x; \
vpor tmp2, tmp1, tmp1; \
vpslldq $8, tmp1, tmp1; \
vpsubq tmp1, x, x;
#define load_ctr_16way(iv, bswap, x0, x1, x2, x3, x4, x5, x6, x7, t0, t0x, t1, \
t1x, t2, t2x, t3, t3x, t4, t5) \
vpcmpeqd t0, t0, t0; \
vpsrldq $8, t0, t0; /* ab: -1:0 ; cd: -1:0 */ \
vpaddq t0, t0, t4; /* ab: -2:0 ; cd: -2:0 */\
\
/* load IV and byteswap */ \
vmovdqu (iv), t2x; \
vmovdqa t2x, t3x; \
inc_le128(t2x, t0x, t1x); \
vbroadcasti128 bswap, t1; \
vinserti128 $1, t2x, t3, t2; /* ab: le0 ; cd: le1 */ \
vpshufb t1, t2, x0; \
\
/* construct IVs */ \
add2_le128(t2, t0, t4, t3, t5); /* ab: le2 ; cd: le3 */ \
vpshufb t1, t2, x1; \
add2_le128(t2, t0, t4, t3, t5); \
vpshufb t1, t2, x2; \
add2_le128(t2, t0, t4, t3, t5); \
vpshufb t1, t2, x3; \
add2_le128(t2, t0, t4, t3, t5); \
vpshufb t1, t2, x4; \
add2_le128(t2, t0, t4, t3, t5); \
vpshufb t1, t2, x5; \
add2_le128(t2, t0, t4, t3, t5); \
vpshufb t1, t2, x6; \
add2_le128(t2, t0, t4, t3, t5); \
vpshufb t1, t2, x7; \
vextracti128 $1, t2, t2x; \
inc_le128(t2x, t0x, t3x); \
vmovdqu t2x, (iv);
#define store_ctr_16way(src, dst, x0, x1, x2, x3, x4, x5, x6, x7) \
vpxor (0*32)(src), x0, x0; \
vpxor (1*32)(src), x1, x1; \
vpxor (2*32)(src), x2, x2; \
vpxor (3*32)(src), x3, x3; \
vpxor (4*32)(src), x4, x4; \
vpxor (5*32)(src), x5, x5; \
vpxor (6*32)(src), x6, x6; \
vpxor (7*32)(src), x7, x7; \
store_16way(dst, x0, x1, x2, x3, x4, x5, x6, x7);
#define gf128mul_x_ble(iv, mask, tmp) \
vpsrad $31, iv, tmp; \
vpaddq iv, iv, iv; \
vpshufd $0x13, tmp, tmp; \
vpand mask, tmp, tmp; \
vpxor tmp, iv, iv;
#define gf128mul_x2_ble(iv, mask1, mask2, tmp0, tmp1) \
vpsrad $31, iv, tmp0; \
vpaddq iv, iv, tmp1; \
vpsllq $2, iv, iv; \
vpshufd $0x13, tmp0, tmp0; \
vpsrad $31, tmp1, tmp1; \
vpand mask2, tmp0, tmp0; \
vpshufd $0x13, tmp1, tmp1; \
vpxor tmp0, iv, iv; \
vpand mask1, tmp1, tmp1; \
vpxor tmp1, iv, iv;
#define load_xts_16way(iv, src, dst, x0, x1, x2, x3, x4, x5, x6, x7, tiv, \
tivx, t0, t0x, t1, t1x, t2, t2x, t3, \
xts_gf128mul_and_shl1_mask_0, \
xts_gf128mul_and_shl1_mask_1) \
vbroadcasti128 xts_gf128mul_and_shl1_mask_0, t1; \
\
/* load IV and construct second IV */ \
vmovdqu (iv), tivx; \
vmovdqa tivx, t0x; \
gf128mul_x_ble(tivx, t1x, t2x); \
vbroadcasti128 xts_gf128mul_and_shl1_mask_1, t2; \
vinserti128 $1, tivx, t0, tiv; \
vpxor (0*32)(src), tiv, x0; \
vmovdqu tiv, (0*32)(dst); \
\
/* construct and store IVs, also xor with source */ \
gf128mul_x2_ble(tiv, t1, t2, t0, t3); \
vpxor (1*32)(src), tiv, x1; \
vmovdqu tiv, (1*32)(dst); \
\
gf128mul_x2_ble(tiv, t1, t2, t0, t3); \
vpxor (2*32)(src), tiv, x2; \
vmovdqu tiv, (2*32)(dst); \
\
gf128mul_x2_ble(tiv, t1, t2, t0, t3); \
vpxor (3*32)(src), tiv, x3; \
vmovdqu tiv, (3*32)(dst); \
\
gf128mul_x2_ble(tiv, t1, t2, t0, t3); \
vpxor (4*32)(src), tiv, x4; \
vmovdqu tiv, (4*32)(dst); \
\
gf128mul_x2_ble(tiv, t1, t2, t0, t3); \
vpxor (5*32)(src), tiv, x5; \
vmovdqu tiv, (5*32)(dst); \
\
gf128mul_x2_ble(tiv, t1, t2, t0, t3); \
vpxor (6*32)(src), tiv, x6; \
vmovdqu tiv, (6*32)(dst); \
\
gf128mul_x2_ble(tiv, t1, t2, t0, t3); \
vpxor (7*32)(src), tiv, x7; \
vmovdqu tiv, (7*32)(dst); \
\
vextracti128 $1, tiv, tivx; \
gf128mul_x_ble(tivx, t1x, t2x); \
vmovdqu tivx, (iv);
#define store_xts_16way(dst, x0, x1, x2, x3, x4, x5, x6, x7) \
vpxor (0*32)(dst), x0, x0; \
vpxor (1*32)(dst), x1, x1; \
vpxor (2*32)(dst), x2, x2; \
vpxor (3*32)(dst), x3, x3; \
vpxor (4*32)(dst), x4, x4; \
vpxor (5*32)(dst), x5, x5; \
vpxor (6*32)(dst), x6, x6; \
vpxor (7*32)(dst), x7, x7; \
store_16way(dst, x0, x1, x2, x3, x4, x5, x6, x7);
/*
* Shared glue code for 128bit block ciphers
*
* Copyright (c) 2012 Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
* Copyright © 2012-2013 Jussi Kivilinna <jussi.kivilinna@iki.fi>
*
* CBC & ECB parts based on code (crypto/cbc.c,ecb.c) by:
* Copyright (c) 2006 Herbert Xu <herbert@gondor.apana.org.au>
......@@ -304,4 +304,99 @@ int glue_ctr_crypt_128bit(const struct common_glue_ctx *gctx,
}
EXPORT_SYMBOL_GPL(glue_ctr_crypt_128bit);
static unsigned int __glue_xts_crypt_128bit(const struct common_glue_ctx *gctx,
void *ctx,
struct blkcipher_desc *desc,
struct blkcipher_walk *walk)
{
const unsigned int bsize = 128 / 8;
unsigned int nbytes = walk->nbytes;
u128 *src = (u128 *)walk->src.virt.addr;
u128 *dst = (u128 *)walk->dst.virt.addr;
unsigned int num_blocks, func_bytes;
unsigned int i;
/* Process multi-block batch */
for (i = 0; i < gctx->num_funcs; i++) {
num_blocks = gctx->funcs[i].num_blocks;
func_bytes = bsize * num_blocks;
if (nbytes >= func_bytes) {
do {
gctx->funcs[i].fn_u.xts(ctx, dst, src,
(le128 *)walk->iv);
src += num_blocks;
dst += num_blocks;
nbytes -= func_bytes;
} while (nbytes >= func_bytes);
if (nbytes < bsize)
goto done;
}
}
done:
return nbytes;
}
/* for implementations implementing faster XTS IV generator */
int glue_xts_crypt_128bit(const struct common_glue_ctx *gctx,
struct blkcipher_desc *desc, struct scatterlist *dst,
struct scatterlist *src, unsigned int nbytes,
void (*tweak_fn)(void *ctx, u8 *dst, const u8 *src),
void *tweak_ctx, void *crypt_ctx)
{
const unsigned int bsize = 128 / 8;
bool fpu_enabled = false;
struct blkcipher_walk walk;
int err;
blkcipher_walk_init(&walk, dst, src, nbytes);
err = blkcipher_walk_virt(desc, &walk);
nbytes = walk.nbytes;
if (!nbytes)
return err;
/* set minimum length to bsize, for tweak_fn */
fpu_enabled = glue_fpu_begin(bsize, gctx->fpu_blocks_limit,
desc, fpu_enabled,
nbytes < bsize ? bsize : nbytes);
/* calculate first value of T */
tweak_fn(tweak_ctx, walk.iv, walk.iv);
while (nbytes) {
nbytes = __glue_xts_crypt_128bit(gctx, crypt_ctx, desc, &walk);
err = blkcipher_walk_done(desc, &walk, nbytes);
nbytes = walk.nbytes;
}
glue_fpu_end(fpu_enabled);
return err;
}
EXPORT_SYMBOL_GPL(glue_xts_crypt_128bit);
void glue_xts_crypt_128bit_one(void *ctx, u128 *dst, const u128 *src, le128 *iv,
common_glue_func_t fn)
{
le128 ivblk = *iv;
/* generate next IV */
le128_gf128mul_x_ble(iv, &ivblk);
/* CC <- T xor C */
u128_xor(dst, src, (u128 *)&ivblk);
/* PP <- D(Key2,CC) */
fn(ctx, (u8 *)dst, (u8 *)dst);
/* P <- T xor PP */
u128_xor(dst, dst, (u128 *)&ivblk);
}
EXPORT_SYMBOL_GPL(glue_xts_crypt_128bit_one);
MODULE_LICENSE("GPL");
......@@ -4,8 +4,7 @@
* Copyright (C) 2012 Johannes Goetzfried
* <Johannes.Goetzfried@informatik.stud.uni-erlangen.de>
*
* Based on arch/x86/crypto/serpent-sse2-x86_64-asm_64.S by
* Copyright (C) 2011 Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
* Copyright © 2011-2013 Jussi Kivilinna <jussi.kivilinna@iki.fi>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
......@@ -34,6 +33,8 @@
.Lbswap128_mask:
.byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0
.Lxts_gf128mul_and_shl1_mask:
.byte 0x87, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0
.text
......@@ -739,3 +740,43 @@ ENTRY(serpent_ctr_8way_avx)
ret;
ENDPROC(serpent_ctr_8way_avx)
ENTRY(serpent_xts_enc_8way_avx)
/* input:
* %rdi: ctx, CTX
* %rsi: dst
* %rdx: src
* %rcx: iv (t αⁿ GF(2¹²⁸))
*/
/* regs <= src, dst <= IVs, regs <= regs xor IVs */
load_xts_8way(%rcx, %rdx, %rsi, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2,
RK0, RK1, RK2, .Lxts_gf128mul_and_shl1_mask);
call __serpent_enc_blk8_avx;
/* dst <= regs xor IVs(in dst) */
store_xts_8way(%rsi, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
ret;
ENDPROC(serpent_xts_enc_8way_avx)
ENTRY(serpent_xts_dec_8way_avx)
/* input:
* %rdi: ctx, CTX
* %rsi: dst
* %rdx: src
* %rcx: iv (t αⁿ GF(2¹²⁸))
*/
/* regs <= src, dst <= IVs, regs <= regs xor IVs */
load_xts_8way(%rcx, %rdx, %rsi, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2,
RK0, RK1, RK2, .Lxts_gf128mul_and_shl1_mask);
call __serpent_dec_blk8_avx;
/* dst <= regs xor IVs(in dst) */
store_xts_8way(%rsi, RC1, RD1, RB1, RE1, RC2, RD2, RB2, RE2);
ret;
ENDPROC(serpent_xts_dec_8way_avx)
此差异已折叠。
此差异已折叠。
......@@ -4,8 +4,7 @@
* Copyright (C) 2012 Johannes Goetzfried
* <Johannes.Goetzfried@informatik.stud.uni-erlangen.de>
*
* Glue code based on serpent_sse2_glue.c by:
* Copyright (C) 2011 Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
* Copyright © 2011-2013 Jussi Kivilinna <jussi.kivilinna@iki.fi>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
......@@ -42,7 +41,32 @@
#include <asm/crypto/ablk_helper.h>
#include <asm/crypto/glue_helper.h>
static void serpent_crypt_ctr(void *ctx, u128 *dst, const u128 *src, le128 *iv)
/* 8-way parallel cipher functions */
asmlinkage void serpent_ecb_enc_8way_avx(struct serpent_ctx *ctx, u8 *dst,
const u8 *src);
EXPORT_SYMBOL_GPL(serpent_ecb_enc_8way_avx);
asmlinkage void serpent_ecb_dec_8way_avx(struct serpent_ctx *ctx, u8 *dst,
const u8 *src);
EXPORT_SYMBOL_GPL(serpent_ecb_dec_8way_avx);
asmlinkage void serpent_cbc_dec_8way_avx(struct serpent_ctx *ctx, u8 *dst,
const u8 *src);
EXPORT_SYMBOL_GPL(serpent_cbc_dec_8way_avx);
asmlinkage void serpent_ctr_8way_avx(struct serpent_ctx *ctx, u8 *dst,
const u8 *src, le128 *iv);
EXPORT_SYMBOL_GPL(serpent_ctr_8way_avx);
asmlinkage void serpent_xts_enc_8way_avx(struct serpent_ctx *ctx, u8 *dst,
const u8 *src, le128 *iv);
EXPORT_SYMBOL_GPL(serpent_xts_enc_8way_avx);
asmlinkage void serpent_xts_dec_8way_avx(struct serpent_ctx *ctx, u8 *dst,
const u8 *src, le128 *iv);
EXPORT_SYMBOL_GPL(serpent_xts_dec_8way_avx);
void __serpent_crypt_ctr(void *ctx, u128 *dst, const u128 *src, le128 *iv)
{
be128 ctrblk;
......@@ -52,6 +76,22 @@ static void serpent_crypt_ctr(void *ctx, u128 *dst, const u128 *src, le128 *iv)
__serpent_encrypt(ctx, (u8 *)&ctrblk, (u8 *)&ctrblk);
u128_xor(dst, src, (u128 *)&ctrblk);
}
EXPORT_SYMBOL_GPL(__serpent_crypt_ctr);
void serpent_xts_enc(void *ctx, u128 *dst, const u128 *src, le128 *iv)
{
glue_xts_crypt_128bit_one(ctx, dst, src, iv,
GLUE_FUNC_CAST(__serpent_encrypt));
}
EXPORT_SYMBOL_GPL(serpent_xts_enc);
void serpent_xts_dec(void *ctx, u128 *dst, const u128 *src, le128 *iv)
{
glue_xts_crypt_128bit_one(ctx, dst, src, iv,
GLUE_FUNC_CAST(__serpent_decrypt));
}
EXPORT_SYMBOL_GPL(serpent_xts_dec);
static const struct common_glue_ctx serpent_enc = {
.num_funcs = 2,
......@@ -75,7 +115,20 @@ static const struct common_glue_ctx serpent_ctr = {
.fn_u = { .ctr = GLUE_CTR_FUNC_CAST(serpent_ctr_8way_avx) }
}, {
.num_blocks = 1,
.fn_u = { .ctr = GLUE_CTR_FUNC_CAST(serpent_crypt_ctr) }
.fn_u = { .ctr = GLUE_CTR_FUNC_CAST(__serpent_crypt_ctr) }
} }
};
static const struct common_glue_ctx serpent_enc_xts = {
.num_funcs = 2,
.fpu_blocks_limit = SERPENT_PARALLEL_BLOCKS,
.funcs = { {
.num_blocks = SERPENT_PARALLEL_BLOCKS,
.fn_u = { .xts = GLUE_XTS_FUNC_CAST(serpent_xts_enc_8way_avx) }
}, {
.num_blocks = 1,
.fn_u = { .xts = GLUE_XTS_FUNC_CAST(serpent_xts_enc) }
} }
};
......@@ -105,6 +158,19 @@ static const struct common_glue_ctx serpent_dec_cbc = {
} }
};
static const struct common_glue_ctx serpent_dec_xts = {
.num_funcs = 2,
.fpu_blocks_limit = SERPENT_PARALLEL_BLOCKS,
.funcs = { {
.num_blocks = SERPENT_PARALLEL_BLOCKS,
.fn_u = { .xts = GLUE_XTS_FUNC_CAST(serpent_xts_dec_8way_avx) }
}, {
.num_blocks = 1,
.fn_u = { .xts = GLUE_XTS_FUNC_CAST(serpent_xts_dec) }
} }
};
static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
struct scatterlist *src, unsigned int nbytes)
{
......@@ -187,13 +253,8 @@ static void decrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes)
__serpent_decrypt(ctx->ctx, srcdst, srcdst);
}
struct serpent_lrw_ctx {
struct lrw_table_ctx lrw_table;
struct serpent_ctx serpent_ctx;
};
static int lrw_serpent_setkey(struct crypto_tfm *tfm, const u8 *key,
unsigned int keylen)
int lrw_serpent_setkey(struct crypto_tfm *tfm, const u8 *key,
unsigned int keylen)
{
struct serpent_lrw_ctx *ctx = crypto_tfm_ctx(tfm);
int err;
......@@ -206,6 +267,7 @@ static int lrw_serpent_setkey(struct crypto_tfm *tfm, const u8 *key,
return lrw_init_table(&ctx->lrw_table, key + keylen -
SERPENT_BLOCK_SIZE);
}
EXPORT_SYMBOL_GPL(lrw_serpent_setkey);
static int lrw_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
struct scatterlist *src, unsigned int nbytes)
......@@ -259,20 +321,16 @@ static int lrw_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
return ret;
}
static void lrw_exit_tfm(struct crypto_tfm *tfm)
void lrw_serpent_exit_tfm(struct crypto_tfm *tfm)
{
struct serpent_lrw_ctx *ctx = crypto_tfm_ctx(tfm);
lrw_free_table(&ctx->lrw_table);
}
EXPORT_SYMBOL_GPL(lrw_serpent_exit_tfm);
struct serpent_xts_ctx {
struct serpent_ctx tweak_ctx;
struct serpent_ctx crypt_ctx;
};
static int xts_serpent_setkey(struct crypto_tfm *tfm, const u8 *key,
unsigned int keylen)
int xts_serpent_setkey(struct crypto_tfm *tfm, const u8 *key,
unsigned int keylen)
{
struct serpent_xts_ctx *ctx = crypto_tfm_ctx(tfm);
u32 *flags = &tfm->crt_flags;
......@@ -294,59 +352,26 @@ static int xts_serpent_setkey(struct crypto_tfm *tfm, const u8 *key,
/* second half of xts-key is for tweak */
return __serpent_setkey(&ctx->tweak_ctx, key + keylen / 2, keylen / 2);
}
EXPORT_SYMBOL_GPL(xts_serpent_setkey);
static int xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
struct scatterlist *src, unsigned int nbytes)
{
struct serpent_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
be128 buf[SERPENT_PARALLEL_BLOCKS];
struct crypt_priv crypt_ctx = {
.ctx = &ctx->crypt_ctx,
.fpu_enabled = false,
};
struct xts_crypt_req req = {
.tbuf = buf,
.tbuflen = sizeof(buf),
.tweak_ctx = &ctx->tweak_ctx,
.tweak_fn = XTS_TWEAK_CAST(__serpent_encrypt),
.crypt_ctx = &crypt_ctx,
.crypt_fn = encrypt_callback,
};
int ret;
desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
ret = xts_crypt(desc, dst, src, nbytes, &req);
serpent_fpu_end(crypt_ctx.fpu_enabled);
return ret;
return glue_xts_crypt_128bit(&serpent_enc_xts, desc, dst, src, nbytes,
XTS_TWEAK_CAST(__serpent_encrypt),
&ctx->tweak_ctx, &ctx->crypt_ctx);
}
static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
struct scatterlist *src, unsigned int nbytes)
{
struct serpent_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
be128 buf[SERPENT_PARALLEL_BLOCKS];
struct crypt_priv crypt_ctx = {
.ctx = &ctx->crypt_ctx,
.fpu_enabled = false,
};
struct xts_crypt_req req = {
.tbuf = buf,
.tbuflen = sizeof(buf),
.tweak_ctx = &ctx->tweak_ctx,
.tweak_fn = XTS_TWEAK_CAST(__serpent_encrypt),
.crypt_ctx = &crypt_ctx,
.crypt_fn = decrypt_callback,
};
int ret;
desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
ret = xts_crypt(desc, dst, src, nbytes, &req);
serpent_fpu_end(crypt_ctx.fpu_enabled);
return ret;
return glue_xts_crypt_128bit(&serpent_dec_xts, desc, dst, src, nbytes,
XTS_TWEAK_CAST(__serpent_encrypt),
&ctx->tweak_ctx, &ctx->crypt_ctx);
}
static struct crypto_alg serpent_algs[10] = { {
......@@ -417,7 +442,7 @@ static struct crypto_alg serpent_algs[10] = { {
.cra_alignmask = 0,
.cra_type = &crypto_blkcipher_type,
.cra_module = THIS_MODULE,
.cra_exit = lrw_exit_tfm,
.cra_exit = lrw_serpent_exit_tfm,
.cra_u = {
.blkcipher = {
.min_keysize = SERPENT_MIN_KEY_SIZE +
......
此差异已折叠。
此差异已折叠。
此差异已折叠。
/*
* Cryptographic API.
*
* Glue code for the SHA256 Secure Hash Algorithm assembler
* implementation using supplemental SSE3 / AVX / AVX2 instructions.
*
* This file is based on sha256_generic.c
*
* Copyright (C) 2013 Intel Corporation.
*
* Author:
* Tim Chen <tim.c.chen@linux.intel.com>
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the Free
* Software Foundation; either version 2 of the License, or (at your option)
* any later version.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <crypto/internal/hash.h>
#include <linux/init.h>
#include <linux/module.h>
#include <linux/mm.h>
#include <linux/cryptohash.h>
#include <linux/types.h>
#include <crypto/sha.h>
#include <asm/byteorder.h>
#include <asm/i387.h>
#include <asm/xcr.h>
#include <asm/xsave.h>
#include <linux/string.h>
asmlinkage void sha256_transform_ssse3(const char *data, u32 *digest,
u64 rounds);
#ifdef CONFIG_AS_AVX
asmlinkage void sha256_transform_avx(const char *data, u32 *digest,
u64 rounds);
#endif
#ifdef CONFIG_AS_AVX2
asmlinkage void sha256_transform_rorx(const char *data, u32 *digest,
u64 rounds);
#endif
static asmlinkage void (*sha256_transform_asm)(const char *, u32 *, u64);
static int sha256_ssse3_init(struct shash_desc *desc)
{
struct sha256_state *sctx = shash_desc_ctx(desc);
sctx->state[0] = SHA256_H0;
sctx->state[1] = SHA256_H1;
sctx->state[2] = SHA256_H2;
sctx->state[3] = SHA256_H3;
sctx->state[4] = SHA256_H4;
sctx->state[5] = SHA256_H5;
sctx->state[6] = SHA256_H6;
sctx->state[7] = SHA256_H7;
sctx->count = 0;
return 0;
}
static int __sha256_ssse3_update(struct shash_desc *desc, const u8 *data,
unsigned int len, unsigned int partial)
{
struct sha256_state *sctx = shash_desc_ctx(desc);
unsigned int done = 0;
sctx->count += len;
if (partial) {
done = SHA256_BLOCK_SIZE - partial;
memcpy(sctx->buf + partial, data, done);
sha256_transform_asm(sctx->buf, sctx->state, 1);
}
if (len - done >= SHA256_BLOCK_SIZE) {
const unsigned int rounds = (len - done) / SHA256_BLOCK_SIZE;
sha256_transform_asm(data + done, sctx->state, (u64) rounds);
done += rounds * SHA256_BLOCK_SIZE;
}
memcpy(sctx->buf, data + done, len - done);
return 0;
}
static int sha256_ssse3_update(struct shash_desc *desc, const u8 *data,
unsigned int len)
{
struct sha256_state *sctx = shash_desc_ctx(desc);
unsigned int partial = sctx->count % SHA256_BLOCK_SIZE;
int res;
/* Handle the fast case right here */
if (partial + len < SHA256_BLOCK_SIZE) {
sctx->count += len;
memcpy(sctx->buf + partial, data, len);
return 0;
}
if (!irq_fpu_usable()) {
res = crypto_sha256_update(desc, data, len);
} else {
kernel_fpu_begin();
res = __sha256_ssse3_update(desc, data, len, partial);
kernel_fpu_end();
}
return res;
}
/* Add padding and return the message digest. */
static int sha256_ssse3_final(struct shash_desc *desc, u8 *out)
{
struct sha256_state *sctx = shash_desc_ctx(desc);
unsigned int i, index, padlen;
__be32 *dst = (__be32 *)out;
__be64 bits;
static const u8 padding[SHA256_BLOCK_SIZE] = { 0x80, };
bits = cpu_to_be64(sctx->count << 3);
/* Pad out to 56 mod 64 and append length */
index = sctx->count % SHA256_BLOCK_SIZE;
padlen = (index < 56) ? (56 - index) : ((SHA256_BLOCK_SIZE+56)-index);
if (!irq_fpu_usable()) {
crypto_sha256_update(desc, padding, padlen);
crypto_sha256_update(desc, (const u8 *)&bits, sizeof(bits));
} else {
kernel_fpu_begin();
/* We need to fill a whole block for __sha256_ssse3_update() */
if (padlen <= 56) {
sctx->count += padlen;
memcpy(sctx->buf + index, padding, padlen);
} else {
__sha256_ssse3_update(desc, padding, padlen, index);
}
__sha256_ssse3_update(desc, (const u8 *)&bits,
sizeof(bits), 56);
kernel_fpu_end();
}
/* Store state in digest */
for (i = 0; i < 8; i++)
dst[i] = cpu_to_be32(sctx->state[i]);
/* Wipe context */
memset(sctx, 0, sizeof(*sctx));
return 0;
}
static int sha256_ssse3_export(struct shash_desc *desc, void *out)
{
struct sha256_state *sctx = shash_desc_ctx(desc);
memcpy(out, sctx, sizeof(*sctx));
return 0;
}
static int sha256_ssse3_import(struct shash_desc *desc, const void *in)
{
struct sha256_state *sctx = shash_desc_ctx(desc);
memcpy(sctx, in, sizeof(*sctx));
return 0;
}
static struct shash_alg alg = {
.digestsize = SHA256_DIGEST_SIZE,
.init = sha256_ssse3_init,
.update = sha256_ssse3_update,
.final = sha256_ssse3_final,
.export = sha256_ssse3_export,
.import = sha256_ssse3_import,
.descsize = sizeof(struct sha256_state),
.statesize = sizeof(struct sha256_state),
.base = {
.cra_name = "sha256",
.cra_driver_name = "sha256-ssse3",
.cra_priority = 150,
.cra_flags = CRYPTO_ALG_TYPE_SHASH,
.cra_blocksize = SHA256_BLOCK_SIZE,
.cra_module = THIS_MODULE,
}
};
#ifdef CONFIG_AS_AVX
static bool __init avx_usable(void)
{
u64 xcr0;
if (!cpu_has_avx || !cpu_has_osxsave)
return false;
xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK);
if ((xcr0 & (XSTATE_SSE | XSTATE_YMM)) != (XSTATE_SSE | XSTATE_YMM)) {
pr_info("AVX detected but unusable.\n");
return false;
}
return true;
}
#endif
static int __init sha256_ssse3_mod_init(void)
{
/* test for SSE3 first */
if (cpu_has_ssse3)
sha256_transform_asm = sha256_transform_ssse3;
#ifdef CONFIG_AS_AVX
/* allow AVX to override SSSE3, it's a little faster */
if (avx_usable()) {
#ifdef CONFIG_AS_AVX2
if (boot_cpu_has(X86_FEATURE_AVX2))
sha256_transform_asm = sha256_transform_rorx;
else
#endif
sha256_transform_asm = sha256_transform_avx;
}
#endif
if (sha256_transform_asm) {
#ifdef CONFIG_AS_AVX
if (sha256_transform_asm == sha256_transform_avx)
pr_info("Using AVX optimized SHA-256 implementation\n");
#ifdef CONFIG_AS_AVX2
else if (sha256_transform_asm == sha256_transform_rorx)
pr_info("Using AVX2 optimized SHA-256 implementation\n");
#endif
else
#endif
pr_info("Using SSSE3 optimized SHA-256 implementation\n");
return crypto_register_shash(&alg);
}
pr_info("Neither AVX nor SSSE3 is available/usable.\n");
return -ENODEV;
}
static void __exit sha256_ssse3_mod_fini(void)
{
crypto_unregister_shash(&alg);
}
module_init(sha256_ssse3_mod_init);
module_exit(sha256_ssse3_mod_fini);
MODULE_LICENSE("GPL");
MODULE_DESCRIPTION("SHA256 Secure Hash Algorithm, Supplemental SSE3 accelerated");
MODULE_ALIAS("sha256");
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
......@@ -293,6 +293,7 @@ extern const char * const x86_power_flags[32];
#define cpu_has_ssse3 boot_cpu_has(X86_FEATURE_SSSE3)
#define cpu_has_aes boot_cpu_has(X86_FEATURE_AES)
#define cpu_has_avx boot_cpu_has(X86_FEATURE_AVX)
#define cpu_has_avx2 boot_cpu_has(X86_FEATURE_AVX2)
#define cpu_has_ht boot_cpu_has(X86_FEATURE_HT)
#define cpu_has_mp boot_cpu_has(X86_FEATURE_MP)
#define cpu_has_nx boot_cpu_has(X86_FEATURE_NX)
......
此差异已折叠。
此差异已折叠。
......@@ -32,6 +32,7 @@ cryptomgr-y := algboss.o testmgr.o
obj-$(CONFIG_CRYPTO_MANAGER2) += cryptomgr.o
obj-$(CONFIG_CRYPTO_USER) += crypto_user.o
obj-$(CONFIG_CRYPTO_CMAC) += cmac.o
obj-$(CONFIG_CRYPTO_HMAC) += hmac.o
obj-$(CONFIG_CRYPTO_VMAC) += vmac.o
obj-$(CONFIG_CRYPTO_XCBC) += xcbc.o
......
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
......@@ -26,3 +26,4 @@ obj-$(CONFIG_HW_RANDOM_PPC4XX) += ppc4xx-rng.o
obj-$(CONFIG_HW_RANDOM_PSERIES) += pseries-rng.o
obj-$(CONFIG_HW_RANDOM_EXYNOS) += exynos-rng.o
obj-$(CONFIG_HW_RANDOM_TPM) += tpm-rng.o
obj-$(CONFIG_HW_RANDOM_BCM2835) += bcm2835-rng.o
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
......@@ -12,6 +12,7 @@ obj-$(CONFIG_CRYPTO_DEV_PPC4XX) += amcc/
obj-$(CONFIG_CRYPTO_DEV_OMAP_SHAM) += omap-sham.o
obj-$(CONFIG_CRYPTO_DEV_OMAP_AES) += omap-aes.o
obj-$(CONFIG_CRYPTO_DEV_PICOXCELL) += picoxcell_crypto.o
obj-$(CONFIG_CRYPTO_DEV_SAHARA) += sahara.o
obj-$(CONFIG_CRYPTO_DEV_S5P) += s5p-sss.o
obj-$(CONFIG_CRYPTO_DEV_TEGRA_AES) += tegra-aes.o
obj-$(CONFIG_CRYPTO_DEV_UX500) += ux500/
......
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册