提交 120a9e1a 编写于 作者: A Andy Polyakov

bn/asm/sparcv9-mont.pl: fix squaring code path.

This module is used only with odd input lengths, i.e. not used in normal
PKI cases, on contemporary processors. The problem was "illuminated" by
fuzzing tests.
Reviewed-by: NRichard Levitte <levitte@openssl.org>
上级 3ba4dac6
......@@ -300,7 +300,7 @@ ___
######## .Lbn_sqr_mont gives up to 20% *overall* improvement over
######## code without following dedicated squaring procedure.
########
$sbit="%i2"; # re-use $bp!
$sbit="%o5";
$code.=<<___;
.align 32
......@@ -413,7 +413,7 @@ $code.=<<___;
mulx $apj,$mul0,$acc0
mulx $npj,$mul1,$acc1
add $acc0,$car0,$car0
add $tpj,$car1,$car1
add $tpj,$sbit,$sbit
ld [$ap+$j],$apj ! ap[j]
and $car0,$mask,$acc0
ld [$np+$j],$npj ! np[j]
......@@ -422,7 +422,7 @@ $code.=<<___;
ld [$tp+8],$tpj ! tp[j]
add $acc0,$acc0,$acc0
add $j,4,$j ! j++
or $sbit,$acc0,$acc0
add $sbit,$acc0,$acc0
srlx $acc0,32,$sbit
and $acc0,$mask,$acc0
cmp $j,$num
......@@ -436,12 +436,12 @@ $code.=<<___;
mulx $apj,$mul0,$acc0
mulx $npj,$mul1,$acc1
add $acc0,$car0,$car0
add $tpj,$car1,$car1
add $tpj,$sbit,$sbit
and $car0,$mask,$acc0
srlx $car0,32,$car0
add $acc1,$car1,$car1
add $acc0,$acc0,$acc0
or $sbit,$acc0,$acc0
add $sbit,$acc0,$acc0
srlx $acc0,32,$sbit
and $acc0,$mask,$acc0
add $acc0,$car1,$car1
......@@ -449,7 +449,7 @@ $code.=<<___;
srlx $car1,32,$car1
add $car0,$car0,$car0
or $sbit,$car0,$car0
add $sbit,$car0,$car0
add $car0,$car1,$car1
add $car2,$car1,$car1
st $car1,[$tp+4]
......@@ -509,7 +509,7 @@ $code.=<<___;
.Lsqr_inner2:
mulx $apj,$mul0,$acc0
mulx $npj,$mul1,$acc1
add $tpj,$car1,$car1
add $tpj,$sbit,$sbit
add $acc0,$car0,$car0
ld [$ap+$j],$apj ! ap[j]
and $car0,$mask,$acc0
......@@ -517,7 +517,7 @@ $code.=<<___;
srlx $car0,32,$car0
add $acc0,$acc0,$acc0
ld [$tp+8],$tpj ! tp[j]
or $sbit,$acc0,$acc0
add $sbit,$acc0,$acc0
add $j,4,$j ! j++
srlx $acc0,32,$sbit
and $acc0,$mask,$acc0
......@@ -532,12 +532,12 @@ $code.=<<___;
.Lsqr_no_inner2:
mulx $apj,$mul0,$acc0
mulx $npj,$mul1,$acc1
add $tpj,$car1,$car1
add $tpj,$sbit,$sbit
add $acc0,$car0,$car0
and $car0,$mask,$acc0
srlx $car0,32,$car0
add $acc0,$acc0,$acc0
or $sbit,$acc0,$acc0
add $sbit,$acc0,$acc0
srlx $acc0,32,$sbit
and $acc0,$mask,$acc0
add $acc0,$car1,$car1
......@@ -546,7 +546,7 @@ $code.=<<___;
srlx $car1,32,$car1
add $car0,$car0,$car0
or $sbit,$car0,$car0
add $sbit,$car0,$car0
add $car0,$car1,$car1
add $car2,$car1,$car1
st $car1,[$tp+4]
......@@ -591,14 +591,17 @@ $code.=<<___;
!.Lsqr_last
mulx $npj,$mul1,$acc1
add $tpj,$car1,$car1
add $tpj,$acc0,$acc0
srlx $acc0,32,$tmp0
and $acc0,$mask,$acc0
add $tmp0,$sbit,$sbit
add $acc0,$car1,$car1
add $acc1,$car1,$car1
st $car1,[$tp]
srlx $car1,32,$car1
add $car0,$car0,$car0 ! recover $car0
or $sbit,$car0,$car0
add $sbit,$car0,$car0
add $car0,$car1,$car1
add $car2,$car1,$car1
st $car1,[$tp+4]
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册