From 87a75b3e5c04a1696208c279f32d1114b862cfed Mon Sep 17 00:00:00 2001 From: Andy Polyakov Date: Wed, 18 Jul 2018 15:14:44 +0200 Subject: [PATCH] ec/asm/ecp_nistz256-{!x86_64}.pl: fix scatter_w7 function. The ecp_nistz256_scatter_w7 function is called when application attempts to use custom generator, i.e. rarely. Even though non-x86_64 versions were wrong, it didn't affect point operations, they were just not as fast as expected. Reviewed-by: Rich Salz (Merged from https://github.com/openssl/openssl/pull/6738) --- crypto/ec/asm/ecp_nistz256-armv4.pl | 8 ++++---- crypto/ec/asm/ecp_nistz256-armv8.pl | 16 ++++++++-------- crypto/ec/asm/ecp_nistz256-ppc64.pl | 16 ++++++++-------- crypto/ec/asm/ecp_nistz256-sparcv9.pl | 8 ++++---- crypto/ec/asm/ecp_nistz256-x86.pl | 2 +- 5 files changed, 25 insertions(+), 25 deletions(-) diff --git a/crypto/ec/asm/ecp_nistz256-armv4.pl b/crypto/ec/asm/ecp_nistz256-armv4.pl index 84dcb6e17f..83abbdd895 100755 --- a/crypto/ec/asm/ecp_nistz256-armv4.pl +++ b/crypto/ec/asm/ecp_nistz256-armv4.pl @@ -894,13 +894,13 @@ ecp_nistz256_scatter_w7: .Loop_scatter_w7: ldr $mask,[$inp],#4 subs $index,$index,#1 - strb $mask,[$out,#64*0-1] + strb $mask,[$out,#64*0] mov $mask,$mask,lsr#8 - strb $mask,[$out,#64*1-1] + strb $mask,[$out,#64*1] mov $mask,$mask,lsr#8 - strb $mask,[$out,#64*2-1] + strb $mask,[$out,#64*2] mov $mask,$mask,lsr#8 - strb $mask,[$out,#64*3-1] + strb $mask,[$out,#64*3] add $out,$out,#64*4 bne .Loop_scatter_w7 diff --git a/crypto/ec/asm/ecp_nistz256-armv8.pl b/crypto/ec/asm/ecp_nistz256-armv8.pl index 2705d7cf06..1361cb395f 100644 --- a/crypto/ec/asm/ecp_nistz256-armv8.pl +++ b/crypto/ec/asm/ecp_nistz256-armv8.pl @@ -1776,21 +1776,21 @@ ecp_nistz256_scatter_w7: prfm pstl1strm,[$out,#4096+64*5] prfm pstl1strm,[$out,#4096+64*6] prfm pstl1strm,[$out,#4096+64*7] - strb w3,[$out,#64*0-1] + strb w3,[$out,#64*0] lsr x3,x3,#8 - strb w3,[$out,#64*1-1] + strb w3,[$out,#64*1] lsr x3,x3,#8 - strb w3,[$out,#64*2-1] + strb w3,[$out,#64*2] lsr x3,x3,#8 - strb w3,[$out,#64*3-1] + strb w3,[$out,#64*3] lsr x3,x3,#8 - strb w3,[$out,#64*4-1] + strb w3,[$out,#64*4] lsr x3,x3,#8 - strb w3,[$out,#64*5-1] + strb w3,[$out,#64*5] lsr x3,x3,#8 - strb w3,[$out,#64*6-1] + strb w3,[$out,#64*6] lsr x3,x3,#8 - strb w3,[$out,#64*7-1] + strb w3,[$out,#64*7] add $out,$out,#64*8 b.ne .Loop_scatter_w7 diff --git a/crypto/ec/asm/ecp_nistz256-ppc64.pl b/crypto/ec/asm/ecp_nistz256-ppc64.pl index 0c3c186b31..116792f7d2 100755 --- a/crypto/ec/asm/ecp_nistz256-ppc64.pl +++ b/crypto/ec/asm/ecp_nistz256-ppc64.pl @@ -2297,21 +2297,21 @@ ecp_nistz256_scatter_w7: .Loop_scatter_w7: ldu r0,8($inp) - stb r0,64*0-1($out) + stb r0,64*0($out) srdi r0,r0,8 - stb r0,64*1-1($out) + stb r0,64*1($out) srdi r0,r0,8 - stb r0,64*2-1($out) + stb r0,64*2($out) srdi r0,r0,8 - stb r0,64*3-1($out) + stb r0,64*3($out) srdi r0,r0,8 - stb r0,64*4-1($out) + stb r0,64*4($out) srdi r0,r0,8 - stb r0,64*5-1($out) + stb r0,64*5($out) srdi r0,r0,8 - stb r0,64*6-1($out) + stb r0,64*6($out) srdi r0,r0,8 - stb r0,64*7-1($out) + stb r0,64*7($out) addi $out,$out,64*8 bdnz .Loop_scatter_w7 diff --git a/crypto/ec/asm/ecp_nistz256-sparcv9.pl b/crypto/ec/asm/ecp_nistz256-sparcv9.pl index 9af1fae853..59df0f73b6 100755 --- a/crypto/ec/asm/ecp_nistz256-sparcv9.pl +++ b/crypto/ec/asm/ecp_nistz256-sparcv9.pl @@ -1531,13 +1531,13 @@ ecp_nistz256_scatter_w7: ld [$inp],%l0 add $inp,4,$inp subcc $index,1,$index - stb %l0,[$out+64*0-1] + stb %l0,[$out+64*0] srl %l0,8,%l1 - stb %l1,[$out+64*1-1] + stb %l1,[$out+64*1] srl %l0,16,%l2 - stb %l2,[$out+64*2-1] + stb %l2,[$out+64*2] srl %l0,24,%l3 - stb %l3,[$out+64*3-1] + stb %l3,[$out+64*3] bne .Loop_scatter_w7 add $out,64*4,$out diff --git a/crypto/ec/asm/ecp_nistz256-x86.pl b/crypto/ec/asm/ecp_nistz256-x86.pl index a89a9f93bc..8fdd76b84f 100755 --- a/crypto/ec/asm/ecp_nistz256-x86.pl +++ b/crypto/ec/asm/ecp_nistz256-x86.pl @@ -1179,7 +1179,7 @@ for ($i=0;$i<7;$i++) { &mov ("esi",&wparam(1)); &mov ("ebp",&wparam(2)); - &lea ("edi",&DWP(-1,"edi","ebp")); + &lea ("edi",&DWP(0,"edi","ebp")); &mov ("ebp",64/4); &set_label("scatter_w7_loop"); &mov ("eax",&DWP(0,"esi")); -- GitLab