提交 d3dfbfe2 编写于 作者: J Josh Poimboeuf 提交者: Herbert Xu

crypto: x86/sha256-avx2 - Fix RBP usage

Using RBP as a temporary register breaks frame pointer convention and
breaks stack traces when unwinding from an interrupt in the crypto code.

There's no need to use RBP as a temporary register for the TBL value,
because it always stores the same value: the address of the K256 table.
Instead just reference the address of K256 directly.
Reported-by: NEric Biggers <ebiggers@google.com>
Reported-by: NPeter Zijlstra <peterz@infradead.org>
Tested-by: NEric Biggers <ebiggers@google.com>
Acked-by: NEric Biggers <ebiggers@google.com>
Signed-off-by: NJosh Poimboeuf <jpoimboe@redhat.com>
Signed-off-by: NHerbert Xu <herbert@gondor.apana.org.au>
上级 673ac6fb
...@@ -98,8 +98,6 @@ d = %r8d ...@@ -98,8 +98,6 @@ d = %r8d
e = %edx # clobbers NUM_BLKS e = %edx # clobbers NUM_BLKS
y3 = %esi # clobbers INP y3 = %esi # clobbers INP
TBL = %rbp
SRND = CTX # SRND is same register as CTX SRND = CTX # SRND is same register as CTX
a = %eax a = %eax
...@@ -531,7 +529,6 @@ STACK_SIZE = _RSP + _RSP_SIZE ...@@ -531,7 +529,6 @@ STACK_SIZE = _RSP + _RSP_SIZE
ENTRY(sha256_transform_rorx) ENTRY(sha256_transform_rorx)
.align 32 .align 32
pushq %rbx pushq %rbx
pushq %rbp
pushq %r12 pushq %r12
pushq %r13 pushq %r13
pushq %r14 pushq %r14
...@@ -568,8 +565,6 @@ ENTRY(sha256_transform_rorx) ...@@ -568,8 +565,6 @@ ENTRY(sha256_transform_rorx)
mov CTX, _CTX(%rsp) mov CTX, _CTX(%rsp)
loop0: loop0:
lea K256(%rip), TBL
## Load first 16 dwords from two blocks ## Load first 16 dwords from two blocks
VMOVDQ 0*32(INP),XTMP0 VMOVDQ 0*32(INP),XTMP0
VMOVDQ 1*32(INP),XTMP1 VMOVDQ 1*32(INP),XTMP1
...@@ -597,19 +592,19 @@ last_block_enter: ...@@ -597,19 +592,19 @@ last_block_enter:
.align 16 .align 16
loop1: loop1:
vpaddd 0*32(TBL, SRND), X0, XFER vpaddd K256+0*32(SRND), X0, XFER
vmovdqa XFER, 0*32+_XFER(%rsp, SRND) vmovdqa XFER, 0*32+_XFER(%rsp, SRND)
FOUR_ROUNDS_AND_SCHED _XFER + 0*32 FOUR_ROUNDS_AND_SCHED _XFER + 0*32
vpaddd 1*32(TBL, SRND), X0, XFER vpaddd K256+1*32(SRND), X0, XFER
vmovdqa XFER, 1*32+_XFER(%rsp, SRND) vmovdqa XFER, 1*32+_XFER(%rsp, SRND)
FOUR_ROUNDS_AND_SCHED _XFER + 1*32 FOUR_ROUNDS_AND_SCHED _XFER + 1*32
vpaddd 2*32(TBL, SRND), X0, XFER vpaddd K256+2*32(SRND), X0, XFER
vmovdqa XFER, 2*32+_XFER(%rsp, SRND) vmovdqa XFER, 2*32+_XFER(%rsp, SRND)
FOUR_ROUNDS_AND_SCHED _XFER + 2*32 FOUR_ROUNDS_AND_SCHED _XFER + 2*32
vpaddd 3*32(TBL, SRND), X0, XFER vpaddd K256+3*32(SRND), X0, XFER
vmovdqa XFER, 3*32+_XFER(%rsp, SRND) vmovdqa XFER, 3*32+_XFER(%rsp, SRND)
FOUR_ROUNDS_AND_SCHED _XFER + 3*32 FOUR_ROUNDS_AND_SCHED _XFER + 3*32
...@@ -619,10 +614,11 @@ loop1: ...@@ -619,10 +614,11 @@ loop1:
loop2: loop2:
## Do last 16 rounds with no scheduling ## Do last 16 rounds with no scheduling
vpaddd 0*32(TBL, SRND), X0, XFER vpaddd K256+0*32(SRND), X0, XFER
vmovdqa XFER, 0*32+_XFER(%rsp, SRND) vmovdqa XFER, 0*32+_XFER(%rsp, SRND)
DO_4ROUNDS _XFER + 0*32 DO_4ROUNDS _XFER + 0*32
vpaddd 1*32(TBL, SRND), X1, XFER
vpaddd K256+1*32(SRND), X1, XFER
vmovdqa XFER, 1*32+_XFER(%rsp, SRND) vmovdqa XFER, 1*32+_XFER(%rsp, SRND)
DO_4ROUNDS _XFER + 1*32 DO_4ROUNDS _XFER + 1*32
add $2*32, SRND add $2*32, SRND
...@@ -676,9 +672,6 @@ loop3: ...@@ -676,9 +672,6 @@ loop3:
ja done_hash ja done_hash
do_last_block: do_last_block:
#### do last block
lea K256(%rip), TBL
VMOVDQ 0*16(INP),XWORD0 VMOVDQ 0*16(INP),XWORD0
VMOVDQ 1*16(INP),XWORD1 VMOVDQ 1*16(INP),XWORD1
VMOVDQ 2*16(INP),XWORD2 VMOVDQ 2*16(INP),XWORD2
...@@ -718,7 +711,6 @@ done_hash: ...@@ -718,7 +711,6 @@ done_hash:
popq %r14 popq %r14
popq %r13 popq %r13
popq %r12 popq %r12
popq %rbp
popq %rbx popq %rbx
ret ret
ENDPROC(sha256_transform_rorx) ENDPROC(sha256_transform_rorx)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册