提交 87a75b3e 编写于 作者: A Andy Polyakov

ec/asm/ecp_nistz256-{!x86_64}.pl: fix scatter_w7 function.

The ecp_nistz256_scatter_w7 function is called when application
attempts to use custom generator, i.e. rarely. Even though non-x86_64
versions were wrong, it didn't affect point operations, they were just
not as fast as expected.
Reviewed-by: NRich Salz <rsalz@openssl.org>
(Merged from https://github.com/openssl/openssl/pull/6738)
上级 f40e0a34
......@@ -894,13 +894,13 @@ ecp_nistz256_scatter_w7:
.Loop_scatter_w7:
ldr $mask,[$inp],#4
subs $index,$index,#1
strb $mask,[$out,#64*0-1]
strb $mask,[$out,#64*0]
mov $mask,$mask,lsr#8
strb $mask,[$out,#64*1-1]
strb $mask,[$out,#64*1]
mov $mask,$mask,lsr#8
strb $mask,[$out,#64*2-1]
strb $mask,[$out,#64*2]
mov $mask,$mask,lsr#8
strb $mask,[$out,#64*3-1]
strb $mask,[$out,#64*3]
add $out,$out,#64*4
bne .Loop_scatter_w7
......
......@@ -1776,21 +1776,21 @@ ecp_nistz256_scatter_w7:
prfm pstl1strm,[$out,#4096+64*5]
prfm pstl1strm,[$out,#4096+64*6]
prfm pstl1strm,[$out,#4096+64*7]
strb w3,[$out,#64*0-1]
strb w3,[$out,#64*0]
lsr x3,x3,#8
strb w3,[$out,#64*1-1]
strb w3,[$out,#64*1]
lsr x3,x3,#8
strb w3,[$out,#64*2-1]
strb w3,[$out,#64*2]
lsr x3,x3,#8
strb w3,[$out,#64*3-1]
strb w3,[$out,#64*3]
lsr x3,x3,#8
strb w3,[$out,#64*4-1]
strb w3,[$out,#64*4]
lsr x3,x3,#8
strb w3,[$out,#64*5-1]
strb w3,[$out,#64*5]
lsr x3,x3,#8
strb w3,[$out,#64*6-1]
strb w3,[$out,#64*6]
lsr x3,x3,#8
strb w3,[$out,#64*7-1]
strb w3,[$out,#64*7]
add $out,$out,#64*8
b.ne .Loop_scatter_w7
......
......@@ -2297,21 +2297,21 @@ ecp_nistz256_scatter_w7:
.Loop_scatter_w7:
ldu r0,8($inp)
stb r0,64*0-1($out)
stb r0,64*0($out)
srdi r0,r0,8
stb r0,64*1-1($out)
stb r0,64*1($out)
srdi r0,r0,8
stb r0,64*2-1($out)
stb r0,64*2($out)
srdi r0,r0,8
stb r0,64*3-1($out)
stb r0,64*3($out)
srdi r0,r0,8
stb r0,64*4-1($out)
stb r0,64*4($out)
srdi r0,r0,8
stb r0,64*5-1($out)
stb r0,64*5($out)
srdi r0,r0,8
stb r0,64*6-1($out)
stb r0,64*6($out)
srdi r0,r0,8
stb r0,64*7-1($out)
stb r0,64*7($out)
addi $out,$out,64*8
bdnz .Loop_scatter_w7
......
......@@ -1531,13 +1531,13 @@ ecp_nistz256_scatter_w7:
ld [$inp],%l0
add $inp,4,$inp
subcc $index,1,$index
stb %l0,[$out+64*0-1]
stb %l0,[$out+64*0]
srl %l0,8,%l1
stb %l1,[$out+64*1-1]
stb %l1,[$out+64*1]
srl %l0,16,%l2
stb %l2,[$out+64*2-1]
stb %l2,[$out+64*2]
srl %l0,24,%l3
stb %l3,[$out+64*3-1]
stb %l3,[$out+64*3]
bne .Loop_scatter_w7
add $out,64*4,$out
......
......@@ -1179,7 +1179,7 @@ for ($i=0;$i<7;$i++) {
&mov ("esi",&wparam(1));
&mov ("ebp",&wparam(2));
&lea ("edi",&DWP(-1,"edi","ebp"));
&lea ("edi",&DWP(0,"edi","ebp"));
&mov ("ebp",64/4);
&set_label("scatter_w7_loop");
&mov ("eax",&DWP(0,"esi"));
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册