提交 9b17608f 编写于 作者: M Martin Willi 提交者: Herbert Xu

crypto: x86/chacha20 - Use larger block functions more aggressively

Now that all block functions support partial lengths, engage the wider
block sizes more aggressively. This prevents using smaller block
functions multiple times, where the next larger block function would
have been faster.
Signed-off-by: NMartin Willi <martin@strongswan.org>
Signed-off-by: NHerbert Xu <herbert@gondor.apana.org.au>
上级 c3b734dd
...@@ -29,6 +29,12 @@ asmlinkage void chacha20_8block_xor_avx2(u32 *state, u8 *dst, const u8 *src, ...@@ -29,6 +29,12 @@ asmlinkage void chacha20_8block_xor_avx2(u32 *state, u8 *dst, const u8 *src,
static bool chacha20_use_avx2; static bool chacha20_use_avx2;
#endif #endif
static unsigned int chacha20_advance(unsigned int len, unsigned int maxblocks)
{
len = min(len, maxblocks * CHACHA20_BLOCK_SIZE);
return round_up(len, CHACHA20_BLOCK_SIZE) / CHACHA20_BLOCK_SIZE;
}
static void chacha20_dosimd(u32 *state, u8 *dst, const u8 *src, static void chacha20_dosimd(u32 *state, u8 *dst, const u8 *src,
unsigned int bytes) unsigned int bytes)
{ {
...@@ -41,6 +47,11 @@ static void chacha20_dosimd(u32 *state, u8 *dst, const u8 *src, ...@@ -41,6 +47,11 @@ static void chacha20_dosimd(u32 *state, u8 *dst, const u8 *src,
dst += CHACHA20_BLOCK_SIZE * 8; dst += CHACHA20_BLOCK_SIZE * 8;
state[12] += 8; state[12] += 8;
} }
if (bytes > CHACHA20_BLOCK_SIZE * 4) {
chacha20_8block_xor_avx2(state, dst, src, bytes);
state[12] += chacha20_advance(bytes, 8);
return;
}
} }
#endif #endif
while (bytes >= CHACHA20_BLOCK_SIZE * 4) { while (bytes >= CHACHA20_BLOCK_SIZE * 4) {
...@@ -50,15 +61,14 @@ static void chacha20_dosimd(u32 *state, u8 *dst, const u8 *src, ...@@ -50,15 +61,14 @@ static void chacha20_dosimd(u32 *state, u8 *dst, const u8 *src,
dst += CHACHA20_BLOCK_SIZE * 4; dst += CHACHA20_BLOCK_SIZE * 4;
state[12] += 4; state[12] += 4;
} }
while (bytes >= CHACHA20_BLOCK_SIZE) { if (bytes > CHACHA20_BLOCK_SIZE) {
chacha20_block_xor_ssse3(state, dst, src, bytes); chacha20_4block_xor_ssse3(state, dst, src, bytes);
bytes -= CHACHA20_BLOCK_SIZE; state[12] += chacha20_advance(bytes, 4);
src += CHACHA20_BLOCK_SIZE; return;
dst += CHACHA20_BLOCK_SIZE;
state[12]++;
} }
if (bytes) { if (bytes) {
chacha20_block_xor_ssse3(state, dst, src, bytes); chacha20_block_xor_ssse3(state, dst, src, bytes);
state[12]++;
} }
} }
...@@ -82,17 +92,16 @@ static int chacha20_simd(struct skcipher_request *req) ...@@ -82,17 +92,16 @@ static int chacha20_simd(struct skcipher_request *req)
kernel_fpu_begin(); kernel_fpu_begin();
while (walk.nbytes >= CHACHA20_BLOCK_SIZE) { while (walk.nbytes > 0) {
chacha20_dosimd(state, walk.dst.virt.addr, walk.src.virt.addr, unsigned int nbytes = walk.nbytes;
rounddown(walk.nbytes, CHACHA20_BLOCK_SIZE));
err = skcipher_walk_done(&walk, if (nbytes < walk.total)
walk.nbytes % CHACHA20_BLOCK_SIZE); nbytes = round_down(nbytes, walk.stride);
}
if (walk.nbytes) {
chacha20_dosimd(state, walk.dst.virt.addr, walk.src.virt.addr, chacha20_dosimd(state, walk.dst.virt.addr, walk.src.virt.addr,
walk.nbytes); nbytes);
err = skcipher_walk_done(&walk, 0);
err = skcipher_walk_done(&walk, walk.nbytes - nbytes);
} }
kernel_fpu_end(); kernel_fpu_end();
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册