提交 b2ccb8c3 编写于 作者: A Ard Biesheuvel 提交者: Zheng Zengkai

crypto: arm64/sha1-ce - simplify NEON yield

mainline inclusion
from mainline-v5.12-rc1
commit 5a69e1b7
category: bugfix
bugzilla: 172149 https://gitee.com/openeuler/kernel/issues/I4CZ7H
CVE: NA

Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=5a69e1b73d54

-----------------------------------------------

Instead of calling into kernel_neon_end() and kernel_neon_begin() (and
potentially into schedule()) from the assembler code when running in
task mode and a reschedule is pending, perform only the preempt count
check in assembler, but simply return early in this case, and let the C
code deal with the consequences.

This reverts commit 7df8d164.
Signed-off-by: NArd Biesheuvel <ardb@kernel.org>
Signed-off-by: NHerbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: NWei Li <liwei391@huawei.com>
Reviewed-by: NJason Yan <yanaijie@huawei.com>
Reviewed-by: NHanjun Guo <guohanjun@huawei.com>
Signed-off-by: NChen Jun <chenjun102@huawei.com>
Signed-off-by: NZheng Zengkai <zhengzengkai@huawei.com>
上级 7c8417dd
...@@ -62,40 +62,34 @@ ...@@ -62,40 +62,34 @@
.endm .endm
/* /*
* void sha1_ce_transform(struct sha1_ce_state *sst, u8 const *src, * int sha1_ce_transform(struct sha1_ce_state *sst, u8 const *src,
* int blocks) * int blocks)
*/ */
SYM_FUNC_START(sha1_ce_transform) SYM_FUNC_START(sha1_ce_transform)
frame_push 3
mov x19, x0
mov x20, x1
mov x21, x2
/* load round constants */ /* load round constants */
0: loadrc k0.4s, 0x5a827999, w6 loadrc k0.4s, 0x5a827999, w6
loadrc k1.4s, 0x6ed9eba1, w6 loadrc k1.4s, 0x6ed9eba1, w6
loadrc k2.4s, 0x8f1bbcdc, w6 loadrc k2.4s, 0x8f1bbcdc, w6
loadrc k3.4s, 0xca62c1d6, w6 loadrc k3.4s, 0xca62c1d6, w6
/* load state */ /* load state */
ld1 {dgav.4s}, [x19] ld1 {dgav.4s}, [x0]
ldr dgb, [x19, #16] ldr dgb, [x0, #16]
/* load sha1_ce_state::finalize */ /* load sha1_ce_state::finalize */
ldr_l w4, sha1_ce_offsetof_finalize, x4 ldr_l w4, sha1_ce_offsetof_finalize, x4
ldr w4, [x19, x4] ldr w4, [x0, x4]
/* load input */ /* load input */
1: ld1 {v8.4s-v11.4s}, [x20], #64 0: ld1 {v8.4s-v11.4s}, [x1], #64
sub w21, w21, #1 sub w2, w2, #1
CPU_LE( rev32 v8.16b, v8.16b ) CPU_LE( rev32 v8.16b, v8.16b )
CPU_LE( rev32 v9.16b, v9.16b ) CPU_LE( rev32 v9.16b, v9.16b )
CPU_LE( rev32 v10.16b, v10.16b ) CPU_LE( rev32 v10.16b, v10.16b )
CPU_LE( rev32 v11.16b, v11.16b ) CPU_LE( rev32 v11.16b, v11.16b )
2: add t0.4s, v8.4s, k0.4s 1: add t0.4s, v8.4s, k0.4s
mov dg0v.16b, dgav.16b mov dg0v.16b, dgav.16b
add_update c, ev, k0, 8, 9, 10, 11, dgb add_update c, ev, k0, 8, 9, 10, 11, dgb
...@@ -126,25 +120,18 @@ CPU_LE( rev32 v11.16b, v11.16b ) ...@@ -126,25 +120,18 @@ CPU_LE( rev32 v11.16b, v11.16b )
add dgbv.2s, dgbv.2s, dg1v.2s add dgbv.2s, dgbv.2s, dg1v.2s
add dgav.4s, dgav.4s, dg0v.4s add dgav.4s, dgav.4s, dg0v.4s
cbz w21, 3f cbz w2, 2f
cond_yield 3f, x5
if_will_cond_yield_neon
st1 {dgav.4s}, [x19]
str dgb, [x19, #16]
do_cond_yield_neon
b 0b b 0b
endif_yield_neon
b 1b
/* /*
* Final block: add padding and total bit count. * Final block: add padding and total bit count.
* Skip if the input size was not a round multiple of the block size, * Skip if the input size was not a round multiple of the block size,
* the padding is handled by the C code in that case. * the padding is handled by the C code in that case.
*/ */
3: cbz x4, 4f 2: cbz x4, 3f
ldr_l w4, sha1_ce_offsetof_count, x4 ldr_l w4, sha1_ce_offsetof_count, x4
ldr x4, [x19, x4] ldr x4, [x0, x4]
movi v9.2d, #0 movi v9.2d, #0
mov x8, #0x80000000 mov x8, #0x80000000
movi v10.2d, #0 movi v10.2d, #0
...@@ -153,11 +140,11 @@ CPU_LE( rev32 v11.16b, v11.16b ) ...@@ -153,11 +140,11 @@ CPU_LE( rev32 v11.16b, v11.16b )
mov x4, #0 mov x4, #0
mov v11.d[0], xzr mov v11.d[0], xzr
mov v11.d[1], x7 mov v11.d[1], x7
b 2b b 1b
/* store new state */ /* store new state */
4: st1 {dgav.4s}, [x19] 3: st1 {dgav.4s}, [x0]
str dgb, [x19, #16] str dgb, [x0, #16]
frame_pop mov w0, w2
ret ret
SYM_FUNC_END(sha1_ce_transform) SYM_FUNC_END(sha1_ce_transform)
...@@ -29,14 +29,22 @@ struct sha1_ce_state { ...@@ -29,14 +29,22 @@ struct sha1_ce_state {
extern const u32 sha1_ce_offsetof_count; extern const u32 sha1_ce_offsetof_count;
extern const u32 sha1_ce_offsetof_finalize; extern const u32 sha1_ce_offsetof_finalize;
asmlinkage void sha1_ce_transform(struct sha1_ce_state *sst, u8 const *src, asmlinkage int sha1_ce_transform(struct sha1_ce_state *sst, u8 const *src,
int blocks); int blocks);
static void __sha1_ce_transform(struct sha1_state *sst, u8 const *src, static void __sha1_ce_transform(struct sha1_state *sst, u8 const *src,
int blocks) int blocks)
{ {
sha1_ce_transform(container_of(sst, struct sha1_ce_state, sst), src, while (blocks) {
blocks); int rem;
kernel_neon_begin();
rem = sha1_ce_transform(container_of(sst, struct sha1_ce_state,
sst), src, blocks);
kernel_neon_end();
src += (blocks - rem) * SHA1_BLOCK_SIZE;
blocks = rem;
}
} }
const u32 sha1_ce_offsetof_count = offsetof(struct sha1_ce_state, sst.count); const u32 sha1_ce_offsetof_count = offsetof(struct sha1_ce_state, sst.count);
...@@ -51,9 +59,7 @@ static int sha1_ce_update(struct shash_desc *desc, const u8 *data, ...@@ -51,9 +59,7 @@ static int sha1_ce_update(struct shash_desc *desc, const u8 *data,
return crypto_sha1_update(desc, data, len); return crypto_sha1_update(desc, data, len);
sctx->finalize = 0; sctx->finalize = 0;
kernel_neon_begin();
sha1_base_do_update(desc, data, len, __sha1_ce_transform); sha1_base_do_update(desc, data, len, __sha1_ce_transform);
kernel_neon_end();
return 0; return 0;
} }
...@@ -73,11 +79,9 @@ static int sha1_ce_finup(struct shash_desc *desc, const u8 *data, ...@@ -73,11 +79,9 @@ static int sha1_ce_finup(struct shash_desc *desc, const u8 *data,
*/ */
sctx->finalize = finalize; sctx->finalize = finalize;
kernel_neon_begin();
sha1_base_do_update(desc, data, len, __sha1_ce_transform); sha1_base_do_update(desc, data, len, __sha1_ce_transform);
if (!finalize) if (!finalize)
sha1_base_do_finalize(desc, __sha1_ce_transform); sha1_base_do_finalize(desc, __sha1_ce_transform);
kernel_neon_end();
return sha1_base_finish(desc, out); return sha1_base_finish(desc, out);
} }
...@@ -89,9 +93,7 @@ static int sha1_ce_final(struct shash_desc *desc, u8 *out) ...@@ -89,9 +93,7 @@ static int sha1_ce_final(struct shash_desc *desc, u8 *out)
return crypto_sha1_finup(desc, NULL, 0, out); return crypto_sha1_finup(desc, NULL, 0, out);
sctx->finalize = 0; sctx->finalize = 0;
kernel_neon_begin();
sha1_base_do_finalize(desc, __sha1_ce_transform); sha1_base_do_finalize(desc, __sha1_ce_transform);
kernel_neon_end();
return sha1_base_finish(desc, out); return sha1_base_finish(desc, out);
} }
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册