From 563d3e5948a17328819f90b2ab20477256ada1f4 Mon Sep 17 00:00:00 2001 From: Andy Polyakov Date: Fri, 14 Sep 2007 21:06:14 +0000 Subject: [PATCH] Engage new x86 assembler modules. --- Configure | 6 ++-- TABLE | 56 ++++++++++++++++++------------------ crypto/sha/Makefile | 18 ++++++++---- crypto/sha/asm/sha512-586.pl | 14 ++++++--- 4 files changed, 53 insertions(+), 41 deletions(-) diff --git a/Configure b/Configure index e26e6e4e6e..b35cc61459 100755 --- a/Configure +++ b/Configure @@ -116,9 +116,9 @@ my $tlib="-lnsl -lsocket"; my $bits1="THIRTY_TWO_BIT "; my $bits2="SIXTY_FOUR_BIT "; -my $x86_elf_asm="x86cpuid-elf.o:bn86-elf.o co86-elf.o mo86-elf.o:dx86-elf.o yx86-elf.o:ax86-elf.o:bx86-elf.o:mx86-elf.o:sx86-elf.o s512sse2-elf.o:cx86-elf.o:rx86-elf.o:rm86-elf.o:r586-elf.o:wp_block.o w86mmx-elf.o:"; -my $x86_coff_asm="x86cpuid-cof.o:bn86-cof.o co86-cof.o mo86-cof.o:dx86-cof.o yx86-cof.o:ax86-cof.o:bx86-cof.o:mx86-cof.o:sx86-cof.o s512sse2-cof.o:cx86-cof.o:rx86-cof.o:rm86-cof.o:r586-cof.o:wp_block.o w86mmx-cof.o:"; -my $x86_out_asm="x86cpuid-out.o:bn86-out.o co86-out.o mo86-out.o:dx86-out.o yx86-out.o:ax86-out.o:bx86-out.o:mx86-out.o:sx86-out.o s512sse2-out.o:cx86-out.o:rx86-out.o:rm86-out.o:r586-out.o:wp_block.o w86mmx-out.o:"; +my $x86_elf_asm="x86cpuid-elf.o:bn86-elf.o co86-elf.o mo86-elf.o:dx86-elf.o yx86-elf.o:ax86-elf.o:bx86-elf.o:mx86-elf.o:sx86-elf.o sha256x86-elf.o sha512x86-elf.o:cx86-elf.o:rx86-elf.o:rm86-elf.o:r586-elf.o:wp_block.o w86mmx-elf.o:"; +my $x86_coff_asm="x86cpuid-cof.o:bn86-cof.o co86-cof.o mo86-cof.o:dx86-cof.o yx86-cof.o:ax86-cof.o:bx86-cof.o:mx86-cof.o:sx86-cof.o sha256x86-cof.o sha512x86-cof.o:cx86-cof.o:rx86-cof.o:rm86-cof.o:r586-cof.o:wp_block.o w86mmx-cof.o:"; +my $x86_out_asm="x86cpuid-out.o:bn86-out.o co86-out.o mo86-out.o:dx86-out.o yx86-out.o:ax86-out.o:bx86-out.o:mx86-out.o:sx86-out.o sha256x86-out.o sha512x86-out.o:cx86-out.o:rx86-out.o:rm86-out.o:r586-out.o:wp_block.o w86mmx-out.o:"; my $x86_64_asm="x86_64cpuid.o:x86_64-gcc.o x86_64-mont.o::aes-x86_64.o::md5-x86_64.o:sha1-x86_64.o sha256-x86_64.o sha512-x86_64.o::rc4-x86_64.o:::wp-x86_64.o:"; my $ia64_asm="ia64cpuid.o:bn-ia64.o::aes_core.o aes_cbc.o aes-ia64.o::md5-ia64.o:sha1-ia64.o sha256-ia64.o sha512-ia64.o::rc4-ia64.o rc4_skey.o::::"; diff --git a/TABLE b/TABLE index 195cc64860..3219b8bc10 100644 --- a/TABLE +++ b/TABLE @@ -217,7 +217,7 @@ $des_obj = dx86-out.o yx86-out.o $aes_obj = ax86-out.o $bf_obj = bx86-out.o $md5_obj = mx86-out.o -$sha1_obj = sx86-out.o s512sse2-out.o +$sha1_obj = sx86-out.o sha256x86-out.o sha512x86-out.o $cast_obj = cx86-out.o $rc4_obj = rx86-out.o $rmd160_obj = rm86-out.o @@ -246,7 +246,7 @@ $des_obj = dx86-elf.o yx86-elf.o $aes_obj = ax86-elf.o $bf_obj = bx86-elf.o $md5_obj = mx86-elf.o -$sha1_obj = sx86-elf.o s512sse2-elf.o +$sha1_obj = sx86-elf.o sha256x86-elf.o sha512x86-elf.o $cast_obj = cx86-elf.o $rc4_obj = rx86-elf.o $rmd160_obj = rm86-elf.o @@ -304,7 +304,7 @@ $des_obj = dx86-cof.o yx86-cof.o $aes_obj = ax86-cof.o $bf_obj = bx86-cof.o $md5_obj = mx86-cof.o -$sha1_obj = sx86-cof.o s512sse2-cof.o +$sha1_obj = sx86-cof.o sha256x86-cof.o sha512x86-cof.o $cast_obj = cx86-cof.o $rc4_obj = rx86-cof.o $rmd160_obj = rm86-cof.o @@ -362,7 +362,7 @@ $des_obj = dx86-out.o yx86-out.o $aes_obj = ax86-out.o $bf_obj = bx86-out.o $md5_obj = mx86-out.o -$sha1_obj = sx86-out.o s512sse2-out.o +$sha1_obj = sx86-out.o sha256x86-out.o sha512x86-out.o $cast_obj = cx86-out.o $rc4_obj = rx86-out.o $rmd160_obj = rm86-out.o @@ -913,7 +913,7 @@ $des_obj = dx86-elf.o yx86-elf.o $aes_obj = ax86-elf.o $bf_obj = bx86-elf.o $md5_obj = mx86-elf.o -$sha1_obj = sx86-elf.o s512sse2-elf.o +$sha1_obj = sx86-elf.o sha256x86-elf.o sha512x86-elf.o $cast_obj = cx86-elf.o $rc4_obj = rx86-elf.o $rmd160_obj = rm86-elf.o @@ -942,7 +942,7 @@ $des_obj = dx86-elf.o yx86-elf.o $aes_obj = ax86-elf.o $bf_obj = bx86-elf.o $md5_obj = mx86-elf.o -$sha1_obj = sx86-elf.o s512sse2-elf.o +$sha1_obj = sx86-elf.o sha256x86-elf.o sha512x86-elf.o $cast_obj = cx86-elf.o $rc4_obj = rx86-elf.o $rmd160_obj = rm86-elf.o @@ -971,7 +971,7 @@ $des_obj = dx86-elf.o yx86-elf.o $aes_obj = ax86-elf.o $bf_obj = bx86-elf.o $md5_obj = mx86-elf.o -$sha1_obj = sx86-elf.o s512sse2-elf.o +$sha1_obj = sx86-elf.o sha256x86-elf.o sha512x86-elf.o $cast_obj = cx86-elf.o $rc4_obj = rx86-elf.o $rmd160_obj = rm86-elf.o @@ -1232,7 +1232,7 @@ $des_obj = dx86-elf.o yx86-elf.o $aes_obj = ax86-elf.o $bf_obj = bx86-elf.o $md5_obj = mx86-elf.o -$sha1_obj = sx86-elf.o s512sse2-elf.o +$sha1_obj = sx86-elf.o sha256x86-elf.o sha512x86-elf.o $cast_obj = cx86-elf.o $rc4_obj = rx86-elf.o $rmd160_obj = rm86-elf.o @@ -1435,7 +1435,7 @@ $des_obj = dx86-elf.o yx86-elf.o $aes_obj = ax86-elf.o $bf_obj = bx86-elf.o $md5_obj = mx86-elf.o -$sha1_obj = sx86-elf.o s512sse2-elf.o +$sha1_obj = sx86-elf.o sha256x86-elf.o sha512x86-elf.o $cast_obj = cx86-elf.o $rc4_obj = rx86-elf.o $rmd160_obj = rm86-elf.o @@ -1522,7 +1522,7 @@ $des_obj = dx86-elf.o yx86-elf.o $aes_obj = ax86-elf.o $bf_obj = bx86-elf.o $md5_obj = mx86-elf.o -$sha1_obj = sx86-elf.o s512sse2-elf.o +$sha1_obj = sx86-elf.o sha256x86-elf.o sha512x86-elf.o $cast_obj = cx86-elf.o $rc4_obj = rx86-elf.o $rmd160_obj = rm86-elf.o @@ -1551,7 +1551,7 @@ $des_obj = dx86-elf.o yx86-elf.o $aes_obj = ax86-elf.o $bf_obj = bx86-elf.o $md5_obj = mx86-elf.o -$sha1_obj = sx86-elf.o s512sse2-elf.o +$sha1_obj = sx86-elf.o sha256x86-elf.o sha512x86-elf.o $cast_obj = cx86-elf.o $rc4_obj = rx86-elf.o $rmd160_obj = rm86-elf.o @@ -1638,7 +1638,7 @@ $des_obj = dx86-elf.o yx86-elf.o $aes_obj = ax86-elf.o $bf_obj = bx86-elf.o $md5_obj = mx86-elf.o -$sha1_obj = sx86-elf.o s512sse2-elf.o +$sha1_obj = sx86-elf.o sha256x86-elf.o sha512x86-elf.o $cast_obj = cx86-elf.o $rc4_obj = rx86-elf.o $rmd160_obj = rm86-elf.o @@ -1667,7 +1667,7 @@ $des_obj = dx86-elf.o yx86-elf.o $aes_obj = ax86-elf.o $bf_obj = bx86-elf.o $md5_obj = mx86-elf.o -$sha1_obj = sx86-elf.o s512sse2-elf.o +$sha1_obj = sx86-elf.o sha256x86-elf.o sha512x86-elf.o $cast_obj = cx86-elf.o $rc4_obj = rx86-elf.o $rmd160_obj = rm86-elf.o @@ -1725,7 +1725,7 @@ $des_obj = dx86-elf.o yx86-elf.o $aes_obj = ax86-elf.o $bf_obj = bx86-elf.o $md5_obj = mx86-elf.o -$sha1_obj = sx86-elf.o s512sse2-elf.o +$sha1_obj = sx86-elf.o sha256x86-elf.o sha512x86-elf.o $cast_obj = cx86-elf.o $rc4_obj = rx86-elf.o $rmd160_obj = rm86-elf.o @@ -1754,7 +1754,7 @@ $des_obj = dx86-elf.o yx86-elf.o $aes_obj = ax86-elf.o $bf_obj = bx86-elf.o $md5_obj = mx86-elf.o -$sha1_obj = sx86-elf.o s512sse2-elf.o +$sha1_obj = sx86-elf.o sha256x86-elf.o sha512x86-elf.o $cast_obj = cx86-elf.o $rc4_obj = rx86-elf.o $rmd160_obj = rm86-elf.o @@ -1783,7 +1783,7 @@ $des_obj = dx86-elf.o yx86-elf.o $aes_obj = ax86-elf.o $bf_obj = bx86-elf.o $md5_obj = mx86-elf.o -$sha1_obj = sx86-elf.o s512sse2-elf.o +$sha1_obj = sx86-elf.o sha256x86-elf.o sha512x86-elf.o $cast_obj = cx86-elf.o $rc4_obj = rx86-elf.o $rmd160_obj = rm86-elf.o @@ -1986,7 +1986,7 @@ $des_obj = dx86-elf.o yx86-elf.o $aes_obj = ax86-elf.o $bf_obj = bx86-elf.o $md5_obj = mx86-elf.o -$sha1_obj = sx86-elf.o s512sse2-elf.o +$sha1_obj = sx86-elf.o sha256x86-elf.o sha512x86-elf.o $cast_obj = cx86-elf.o $rc4_obj = rx86-elf.o $rmd160_obj = rm86-elf.o @@ -2131,7 +2131,7 @@ $des_obj = dx86-elf.o yx86-elf.o $aes_obj = ax86-elf.o $bf_obj = bx86-elf.o $md5_obj = mx86-elf.o -$sha1_obj = sx86-elf.o s512sse2-elf.o +$sha1_obj = sx86-elf.o sha256x86-elf.o sha512x86-elf.o $cast_obj = cx86-elf.o $rc4_obj = rx86-elf.o $rmd160_obj = rm86-elf.o @@ -2624,7 +2624,7 @@ $des_obj = dx86-elf.o yx86-elf.o $aes_obj = ax86-elf.o $bf_obj = bx86-elf.o $md5_obj = mx86-elf.o -$sha1_obj = sx86-elf.o s512sse2-elf.o +$sha1_obj = sx86-elf.o sha256x86-elf.o sha512x86-elf.o $cast_obj = cx86-elf.o $rc4_obj = rx86-elf.o $rmd160_obj = rm86-elf.o @@ -2943,7 +2943,7 @@ $des_obj = dx86-out.o yx86-out.o $aes_obj = ax86-out.o $bf_obj = bx86-out.o $md5_obj = mx86-out.o -$sha1_obj = sx86-out.o s512sse2-out.o +$sha1_obj = sx86-out.o sha256x86-out.o sha512x86-out.o $cast_obj = cx86-out.o $rc4_obj = rx86-out.o $rmd160_obj = rm86-out.o @@ -2972,7 +2972,7 @@ $des_obj = dx86-elf.o yx86-elf.o $aes_obj = ax86-elf.o $bf_obj = bx86-elf.o $md5_obj = mx86-elf.o -$sha1_obj = sx86-elf.o s512sse2-elf.o +$sha1_obj = sx86-elf.o sha256x86-elf.o sha512x86-elf.o $cast_obj = cx86-elf.o $rc4_obj = rx86-elf.o $rmd160_obj = rm86-elf.o @@ -3059,7 +3059,7 @@ $des_obj = dx86-elf.o yx86-elf.o $aes_obj = ax86-elf.o $bf_obj = bx86-elf.o $md5_obj = mx86-elf.o -$sha1_obj = sx86-elf.o s512sse2-elf.o +$sha1_obj = sx86-elf.o sha256x86-elf.o sha512x86-elf.o $cast_obj = cx86-elf.o $rc4_obj = rx86-elf.o $rmd160_obj = rm86-elf.o @@ -3378,7 +3378,7 @@ $des_obj = dx86-cof.o yx86-cof.o $aes_obj = ax86-cof.o $bf_obj = bx86-cof.o $md5_obj = mx86-cof.o -$sha1_obj = sx86-cof.o s512sse2-cof.o +$sha1_obj = sx86-cof.o sha256x86-cof.o sha512x86-cof.o $cast_obj = cx86-cof.o $rc4_obj = rx86-cof.o $rmd160_obj = rm86-cof.o @@ -3813,7 +3813,7 @@ $des_obj = dx86-elf.o yx86-elf.o $aes_obj = ax86-elf.o $bf_obj = bx86-elf.o $md5_obj = mx86-elf.o -$sha1_obj = sx86-elf.o s512sse2-elf.o +$sha1_obj = sx86-elf.o sha256x86-elf.o sha512x86-elf.o $cast_obj = cx86-elf.o $rc4_obj = rx86-elf.o $rmd160_obj = rm86-elf.o @@ -3842,7 +3842,7 @@ $des_obj = dx86-elf.o yx86-elf.o $aes_obj = ax86-elf.o $bf_obj = bx86-elf.o $md5_obj = mx86-elf.o -$sha1_obj = sx86-elf.o s512sse2-elf.o +$sha1_obj = sx86-elf.o sha256x86-elf.o sha512x86-elf.o $cast_obj = cx86-elf.o $rc4_obj = rx86-elf.o $rmd160_obj = rm86-elf.o @@ -4074,7 +4074,7 @@ $des_obj = dx86-elf.o yx86-elf.o $aes_obj = ax86-elf.o $bf_obj = bx86-elf.o $md5_obj = mx86-elf.o -$sha1_obj = sx86-elf.o s512sse2-elf.o +$sha1_obj = sx86-elf.o sha256x86-elf.o sha512x86-elf.o $cast_obj = cx86-elf.o $rc4_obj = rx86-elf.o $rmd160_obj = rm86-elf.o @@ -4422,7 +4422,7 @@ $des_obj = dx86-elf.o yx86-elf.o $aes_obj = ax86-elf.o $bf_obj = bx86-elf.o $md5_obj = mx86-elf.o -$sha1_obj = sx86-elf.o s512sse2-elf.o +$sha1_obj = sx86-elf.o sha256x86-elf.o sha512x86-elf.o $cast_obj = cx86-elf.o $rc4_obj = rx86-elf.o $rmd160_obj = rm86-elf.o @@ -4451,7 +4451,7 @@ $des_obj = dx86-elf.o yx86-elf.o $aes_obj = ax86-elf.o $bf_obj = bx86-elf.o $md5_obj = mx86-elf.o -$sha1_obj = sx86-elf.o s512sse2-elf.o +$sha1_obj = sx86-elf.o sha256x86-elf.o sha512x86-elf.o $cast_obj = cx86-elf.o $rc4_obj = rx86-elf.o $rmd160_obj = rm86-elf.o diff --git a/crypto/sha/Makefile b/crypto/sha/Makefile index 1231f1da38..e02d9f1dc3 100644 --- a/crypto/sha/Makefile +++ b/crypto/sha/Makefile @@ -45,18 +45,24 @@ lib: $(LIBOBJ) # ELF sx86-elf.s: asm/sha1-586.pl ../perlasm/x86asm.pl (cd asm; $(PERL) sha1-586.pl elf $(CFLAGS) $(PROCESSOR) > ../$@) -s512sse2-elf.s: asm/sha512-sse2.pl ../perlasm/x86asm.pl - (cd asm; $(PERL) sha512-sse2.pl elf $(CFLAGS) $(PROCESSOR) > ../$@) +sha256x86-elf.s: asm/sha256-586.pl ../perlasm/x86asm.pl + (cd asm; $(PERL) sha256-586.pl elf $(CFLAGS) $(PROCESSOR) > ../$@) +sha512x86-elf.s: asm/sha512-586.pl ../perlasm/x86asm.pl + (cd asm; $(PERL) sha512-586.pl elf $(CFLAGS) $(PROCESSOR) > ../$@) # COFF sx86-cof.s: asm/sha1-586.pl ../perlasm/x86asm.pl (cd asm; $(PERL) sha1-586.pl coff $(CFLAGS) $(PROCESSOR) > ../$@) -s512sse2-cof.s: asm/sha512-sse2.pl ../perlasm/x86asm.pl - (cd asm; $(PERL) sha512-sse2.pl coff $(CFLAGS) $(PROCESSOR) > ../$@) +sha256x86-cof.s: asm/sha256-586.pl ../perlasm/x86asm.pl + (cd asm; $(PERL) sha256-586.pl coff $(CFLAGS) $(PROCESSOR) > ../$@) +sha512x86-cof.s: asm/sha512-586.pl ../perlasm/x86asm.pl + (cd asm; $(PERL) sha512-586.pl coff $(CFLAGS) $(PROCESSOR) > ../$@) # a.out sx86-out.s: asm/sha1-586.pl ../perlasm/x86asm.pl (cd asm; $(PERL) sha1-586.pl a.out $(CFLAGS) $(PROCESSOR) > ../$@) -s512sse2-out.s: asm/sha512-sse2.pl ../perlasm/x86asm.pl - (cd asm; $(PERL) sha512-sse2.pl a.out $(CFLAGS) $(PROCESSOR) > ../$@) +sha256x86-out.s: asm/sha256-586.pl ../perlasm/x86asm.pl + (cd asm; $(PERL) sha256-586.pl a.out $(CFLAGS) $(PROCESSOR) > ../$@) +sha512x86-out.s: asm/sha512-586.pl ../perlasm/x86asm.pl + (cd asm; $(PERL) sha512-586.pl a.out $(CFLAGS) $(PROCESSOR) > ../$@) sha1-ia64.s: asm/sha1-ia64.pl (cd asm; $(PERL) sha1-ia64.pl $(CFLAGS) ) > $@ diff --git a/crypto/sha/asm/sha512-586.pl b/crypto/sha/asm/sha512-586.pl index 8f215ac247..49a9814544 100644 --- a/crypto/sha/asm/sha512-586.pl +++ b/crypto/sha/asm/sha512-586.pl @@ -38,6 +38,11 @@ require "x86asm.pl"; &asm_init($ARGV[0],"sha512-586.pl",$ARGV[$#ARGV] eq "386"); +$sse2=0; +for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); } + +&external_label("OPENSSL_ia32cap_P") if ($sse2); + $Tlo=&DWP(0,"esp"); $Thi=&DWP(4,"esp"); $Alo=&DWP(8,"esp"); $Ahi=&DWP(8+4,"esp"); $Blo=&DWP(16,"esp"); $Bhi=&DWP(16+4,"esp"); @@ -70,7 +75,7 @@ sub BODY_00_15_sse2 { &movq ("mm1",$E); # %mm1 is sliding right &movq ("mm2",$E); # %mm2 is sliding left &psrlq ("mm1",14); - &movq ($Esse2,$E); # module-scheduled save e + &movq ($Esse2,$E); # modulo-scheduled save e &psllq ("mm2",23); &movq ("mm3","mm1"); # %mm3 is T1 &psrlq ("mm1",4); @@ -109,7 +114,7 @@ sub BODY_00_15_sse2 { &pxor ("mm7","mm6"); &psllq ("mm6",6); &pxor ("mm7","mm5"); - &movq (&QWP(0,"esp"),$A); # module-scheduled save a + &movq (&QWP(0,"esp"),$A); # modulo-scheduled save a &pxor ("mm7","mm6"); # T2=Sigma0_512(a) &movq ("mm5",$A); # %mm5=a @@ -274,7 +279,8 @@ sub BODY_00_15_x86 { &mov (&DWP(8,"esp"),"eax"); # inp+num*128 &mov (&DWP(12,"esp"),"ebx"); # saved sp - &picmeup("edx","OPENSSL_ia32cap_P",$K512,&label("pic_point")); +if ($sse2) { + &picmeup("edx","OPENSSL_ia32cap_P",$K512,&label("K512")); &bt (&DWP(0,"edx"),26); &jnc (&label("loop_x86")); @@ -403,7 +409,7 @@ sub BODY_00_15_x86 { &emms (); &mov ("esp",&DWP(8*10+12,"esp")); # restore sp &function_end_A(); - +} &set_label("loop_x86",16); # copy input block to stack reversing byte and qword order for ($i=0;$i<8;$i++) { -- GitLab