From a9c32ace06f53df167e59d6d3e95e69d063bd608 Mon Sep 17 00:00:00 2001 From: Andy Polyakov Date: Mon, 1 May 2006 13:35:03 +0000 Subject: [PATCH] SHA1 for PowerPC. --- Configure | 18 +-- crypto/md32_common.h | 3 +- crypto/sha/Makefile | 5 + crypto/sha/asm/sha1-ppc.pl | 309 +++++++++++++++++++++++++++++++++++++ crypto/sha/sha512.c | 2 +- crypto/sha/sha_locl.h | 7 + 6 files changed, 333 insertions(+), 11 deletions(-) create mode 100755 crypto/sha/asm/sha1-ppc.pl diff --git a/Configure b/Configure index f8e4ca7afe..c514b57747 100755 --- a/Configure +++ b/Configure @@ -314,7 +314,7 @@ my %table=( # *-generic* is endian-neutral target, but ./config is free to # throw in -D[BL]_ENDIAN, whichever appropriate... "linux-generic32","gcc:-DTERMIO -O3 -fomit-frame-pointer -Wall::-D_REENTRANT::-ldl:BN_LLONG RC4_CHAR RC4_CHUNK DES_INT DES_UNROLL BF_PTR:${no_asm}:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)", -"linux-ppc", "gcc:-DB_ENDIAN -DTERMIO -O3 -Wall::-D_REENTRANT::-ldl:BN_LLONG RC4_CHAR RC4_CHUNK DES_RISC1 DES_UNROLL::linux_ppc32.o linix_ppc32-mont.o:::::::::::dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)", +"linux-ppc", "gcc:-DB_ENDIAN -DTERMIO -O3 -Wall::-D_REENTRANT::-ldl:BN_LLONG RC4_CHAR RC4_CHUNK DES_RISC1 DES_UNROLL::linux_ppc32.o linix_ppc32-mont.o:::::sha1-ppc_linux32.o::::::dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)", #### IA-32 targets... "linux-ia32-icc", "icc:-DL_ENDIAN -DTERMIO -O2 -no_cpprt::-D_REENTRANT::-ldl:BN_LLONG ${x86_gcc_des} ${x86_gcc_opts}:${x86_elf_asm}:dlfcn:linux-shared:-KPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)", "linux-elf", "gcc:-DL_ENDIAN -DTERMIO -O3 -fomit-frame-pointer -Wall::-D_REENTRANT::-ldl:BN_LLONG ${x86_gcc_des} ${x86_gcc_opts}:${x86_elf_asm}:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)", @@ -322,7 +322,7 @@ my %table=( #### "linux-generic64","gcc:-DTERMIO -O3 -Wall::-D_REENTRANT::-ldl:SIXTY_FOUR_BIT_LONG RC4_CHAR RC4_CHUNK DES_INT DES_UNROLL BF_PTR:${no_asm}:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)", # -bpowerpc64-linux is transient option, -m64 should be the one to use... -"linux-ppc64", "gcc:-bpowerpc64-linux -DB_ENDIAN -DTERMIO -O3 -Wall::-D_REENTRANT::-ldl:SIXTY_FOUR_BIT_LONG RC4_CHAR RC4_CHUNK DES_RISC1 DES_UNROLL::linux_ppc64.o linux_ppc64-mont.o:::::::::::dlfcn:linux-shared:-fPIC:-bpowerpc64-linux:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)", +"linux-ppc64", "gcc:-bpowerpc64-linux -DB_ENDIAN -DTERMIO -O3 -Wall::-D_REENTRANT::-ldl:SIXTY_FOUR_BIT_LONG RC4_CHAR RC4_CHUNK DES_RISC1 DES_UNROLL::linux_ppc64.o linux_ppc64-mont.o:::::sha1-ppc_linux64.o::::::dlfcn:linux-shared:-fPIC:-bpowerpc64-linux:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)", "linux-ia64", "gcc:-DL_ENDIAN -DTERMIO -O3 -Wall::-D_REENTRANT::-ldl:SIXTY_FOUR_BIT_LONG RC4_CHUNK DES_UNROLL DES_INT:${ia64_asm}:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)", "linux-ia64-ecc","ecc:-DL_ENDIAN -DTERMIO -O2 -Wall -no_cpprt::-D_REENTRANT::-ldl:SIXTY_FOUR_BIT_LONG RC4_CHUNK DES_INT:${ia64_asm}:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)", "linux-ia64-icc","icc:-DL_ENDIAN -DTERMIO -O2 -Wall -no_cpprt::-D_REENTRANT::-ldl:SIXTY_FOUR_BIT_LONG RC4_CHUNK DES_RISC1 DES_INT:${ia64_asm}:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)", @@ -407,12 +407,12 @@ my %table=( #### IBM's AIX. "aix3-cc", "cc:-O -DB_ENDIAN -qmaxmem=16384::(unknown):AIX::BN_LLONG RC4_CHAR:::", -"aix-gcc", "gcc:-O -DB_ENDIAN::-D_THREAD_SAFE:AIX::BN_LLONG RC4_CHAR::aix_ppc32.o aix_ppc32-mont.o:::::::::::dlfcn:", -"aix64-gcc","gcc:-O -DB_ENDIAN::-D_THREAD_SAFE:AIX::SIXTY_FOUR_BIT_LONG RC4_CHAR::aix_ppc64.o aix_ppc64-mont.o:::::::::::dlfcn::::::-X64", +"aix-gcc", "gcc:-O -DB_ENDIAN::-D_THREAD_SAFE:AIX::BN_LLONG RC4_CHAR::aix_ppc32.o aix_ppc32-mont.o:::::sha1-ppc_aix32.o::::::dlfcn:", +"aix64-gcc","gcc:-O -DB_ENDIAN::-D_THREAD_SAFE:AIX::SIXTY_FOUR_BIT_LONG RC4_CHAR::aix_ppc64.o aix_ppc64-mont.o:::::sha1-ppc_aix64.o::::::dlfcn::::::-X64", # Below targets assume AIX 5. Idea is to effectively disregard $OBJECT_MODE # at build time. $OBJECT_MODE is respected at ./config stage! -"aix-cc", "cc:-q32 -O -DB_ENDIAN -qmaxmem=16384::-qthreaded:AIX::BN_LLONG RC4_CHAR::aix_ppc32.o aix_ppc32-mont.o:::::::::::dlfcn:aix-shared::-q32:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)::-X 32", -"aix64-cc", "cc:-q64 -O -DB_ENDIAN -qmaxmem=16384::(unknown):AIX::SIXTY_FOUR_BIT_LONG RC4_CHAR::aix_ppc64.o aix_ppc64-mont.o:::::::::::dlfcn:aix-shared::-q64:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)::-X 64", +"aix-cc", "cc:-q32 -O -DB_ENDIAN -qmaxmem=16384::-qthreaded:AIX::BN_LLONG RC4_CHAR::aix_ppc32.o aix_ppc32-mont.o:::::sha1-ppc_aix32.o::::::dlfcn:aix-shared::-q32:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)::-X 32", +"aix64-cc", "cc:-q64 -O -DB_ENDIAN -qmaxmem=16384::(unknown):AIX::SIXTY_FOUR_BIT_LONG RC4_CHAR::aix_ppc64.o aix_ppc64-mont.o:::::sha1-ppc_aix64.o::::::dlfcn:aix-shared::-q64:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)::-X 64", # # Cray T90 and similar (SDSC) @@ -504,10 +504,10 @@ my %table=( ##### MacOS X (a.k.a. Rhapsody or Darwin) setup "rhapsody-ppc-cc","cc:-O3 -DB_ENDIAN::(unknown):MACOSX_RHAPSODY::BN_LLONG RC4_CHAR RC4_CHUNK DES_UNROLL BF_PTR:${no_asm}::", -"darwin-ppc-cc","cc:-O3 -DB_ENDIAN::-D_REENTRANT:MACOSX:-Wl,-search_paths_first:BN_LLONG RC4_CHAR RC4_CHUNK DES_UNROLL BF_PTR::osx_ppc32.o osx_ppc32-mont.o:::::::::::dlfcn:darwin-shared:-fPIC -fno-common:-dynamiclib:.\$(SHLIB_MAJOR).\$(SHLIB_MINOR).dylib", -"darwin64-ppc-cc","cc:-m64 -O3 -DB_ENDIAN::-D_REENTRANT:MACOSX:-Wl,-search_paths_first:SIXTY_FOUR_BIT_LONG RC4_CHAR RC4_CHUNK DES_UNROLL BF_PTR::osx_ppc64.o osx_ppc64-mont.o:::::::::::dlfcn:darwin-shared:-fPIC -fno-common:-m64 -dynamiclib:.\$(SHLIB_MAJOR).\$(SHLIB_MINOR).dylib", +"darwin-ppc-cc","cc:-O3 -DB_ENDIAN::-D_REENTRANT:MACOSX:-Wl,-search_paths_first:BN_LLONG RC4_CHAR RC4_CHUNK DES_UNROLL BF_PTR::osx_ppc32.o osx_ppc32-mont.o:::::sha1-ppc_osx32.o::::::dlfcn:darwin-shared:-fPIC -fno-common:-dynamiclib:.\$(SHLIB_MAJOR).\$(SHLIB_MINOR).dylib", +"darwin64-ppc-cc","cc:-m64 -O3 -DB_ENDIAN::-D_REENTRANT:MACOSX:-Wl,-search_paths_first:SIXTY_FOUR_BIT_LONG RC4_CHAR RC4_CHUNK DES_UNROLL BF_PTR::osx_ppc64.o osx_ppc64-mont.o:::::sha1-ppc_osx64.o::::::dlfcn:darwin-shared:-fPIC -fno-common:-m64 -dynamiclib:.\$(SHLIB_MAJOR).\$(SHLIB_MINOR).dylib", "darwin-i386-cc","cc:-O3 -fomit-frame-pointer -DL_ENDIAN::-D_REENTRANT:MACOSX::BN_LLONG RC4_CHAR RC4_CHUNK DES_UNROLL BF_PTR:${no_asm}:dlfcn:darwin-shared:-fPIC -fno-common:-dynamiclib:.\$(SHLIB_MAJOR).\$(SHLIB_MINOR).dylib", -"debug-darwin-ppc-cc","cc:-DBN_DEBUG -DREF_CHECK -DCONF_DEBUG -DCRYPTO_MDEBUG -DB_ENDIAN -g -Wall -O::-D_REENTRANT:MACOSX::BN_LLONG RC4_CHAR RC4_CHUNK DES_UNROLL BF_PTR::osx_ppc32.o osx_ppc32-mont.o:::::::::::dlfcn:darwin-shared:-fPIC:-dynamiclib:.\$(SHLIB_MAJOR).\$(SHLIB_MINOR).dylib", +"debug-darwin-ppc-cc","cc:-DBN_DEBUG -DREF_CHECK -DCONF_DEBUG -DCRYPTO_MDEBUG -DB_ENDIAN -g -Wall -O::-D_REENTRANT:MACOSX::BN_LLONG RC4_CHAR RC4_CHUNK DES_UNROLL BF_PTR::osx_ppc32.o osx_ppc32-mont.o:::::sha1-ppc_osx32.o::::::dlfcn:darwin-shared:-fPIC:-dynamiclib:.\$(SHLIB_MAJOR).\$(SHLIB_MINOR).dylib", ##### A/UX "aux3-gcc","gcc:-O2 -DTERMIO::(unknown):AUX:-lbsd:RC4_CHAR RC4_CHUNK DES_UNROLL BF_PTR:::", diff --git a/crypto/md32_common.h b/crypto/md32_common.h index 2afe045c5d..3ed16f380f 100644 --- a/crypto/md32_common.h +++ b/crypto/md32_common.h @@ -206,7 +206,8 @@ : "cc"); \ ret; \ }) -# elif defined(__powerpc) || defined(__ppc__) || defined(__powerpc64__) +# elif defined(_ARCH_PPC) || defined(_ARCH_PPC64) || \ + defined(__powerpc) || defined(__ppc__) || defined(__powerpc64__) # define ROTATE(a,n) ({ register unsigned int ret; \ asm ( \ "rlwinm %0,%1,%2,0,31" \ diff --git a/crypto/sha/Makefile b/crypto/sha/Makefile index 91c5fe0cee..01d8b2e469 100644 --- a/crypto/sha/Makefile +++ b/crypto/sha/Makefile @@ -71,6 +71,11 @@ sha256-x86_64.s: asm/sha512-x86_64.pl sha512-x86_64.s: asm/sha512-x86_64.pl $(PERL) asm/sha512-x86_64.pl $@ +sha1-ppc_aix32.s: asm/sha1-ppc.pl; $(PERL) asm/sha1-ppc.pl $@ +sha1-ppc_aix64.s: asm/sha1-ppc.pl; $(PERL) asm/sha1-ppc.pl $@ +# non-AIX targets are believed to be armed with GNU make +sha1-ppc_%.s: asm/sha1-ppc.pl; $(PERL) $< $@ + files: $(PERL) $(TOP)/util/files.pl Makefile >> $(TOP)/MINFO diff --git a/crypto/sha/asm/sha1-ppc.pl b/crypto/sha/asm/sha1-ppc.pl new file mode 100755 index 0000000000..9c955ef036 --- /dev/null +++ b/crypto/sha/asm/sha1-ppc.pl @@ -0,0 +1,309 @@ +#!/usr/bin/env perl + +# ==================================================================== +# Written by Andy Polyakov for the OpenSSL +# project. Rights for redistribution and usage in source and binary +# forms are granted according to the OpenSSL license. +# ==================================================================== + +# I let hardware handle unaligned input, except on page boundaries +# (see below for details). Otherwise straightforward implementation +# with X vector in register bank. The module is big-endian [which is +# not big deal as there're no little-endian targets left around]. + +# gcc-4.0.0 -m64 -m32 +# -------------------------- +# sha1 +76% +59% + +$output = shift; + +if ($output =~ /64\.s/) { + $SIZE_T =8; + $RZONE =288; + $UCMP ="cmpld"; + $STU ="stdu"; + $POP ="ld"; + $PUSH ="std"; +} elsif ($output =~ /32\.s/) { + $SIZE_T =4; + $RZONE =224; + $UCMP ="cmplw"; + $STU ="stwu"; + $POP ="lwz"; + $PUSH ="stw"; +} else { die "nonsense $output"; } + +( defined shift || open STDOUT,"| $^X ../perlasm/ppc-xlate.pl $output" ) || + die "can't call ../perlasm/ppc-xlate.pl: $!"; + +$FRAME=24*$SIZE_T; + +$K ="r0"; +$sp ="r1"; +$toc="r2"; +$ctx="r3"; +$inp="r4"; +$num="r5"; +$t0 ="r15"; +$t1 ="r6"; + +$A ="r7"; +$B ="r8"; +$C ="r9"; +$D ="r10"; +$E ="r11"; +$T ="r12"; + +@V=($A,$B,$C,$D,$E,$T); +@X=("r16","r17","r18","r19","r20","r21","r22","r23", + "r24","r25","r26","r27","r28","r29","r30","r31"); + +sub BODY_00_19 { +my ($i,$a,$b,$c,$d,$e,$f)=@_; +my $j=$i+1; +$code.=<<___ if ($i==0); + lwz @X[$i],$i*4($inp) +___ +$code.=<<___ if ($i<15); + lwz @X[$j],$j*4($inp) + add $f,$K,$e + rotlwi $e,$a,5 + add $f,$f,@X[$i] + and $t0,$c,$b + add $f,$f,$e + andc $t1,$d,$b + rotlwi $b,$b,30 + or $t0,$t0,$t1 + add $f,$f,$t0 +___ +$code.=<<___ if ($i>=15); + add $f,$K,$e + rotlwi $e,$a,5 + xor @X[$j%16],@X[$j%16],@X[($j+2)%16] + add $f,$f,@X[$i%16] + and $t0,$c,$b + xor @X[$j%16],@X[$j%16],@X[($j+8)%16] + add $f,$f,$e + andc $t1,$d,$b + rotlwi $b,$b,30 + or $t0,$t0,$t1 + xor @X[$j%16],@X[$j%16],@X[($j+13)%16] + add $f,$f,$t0 + rotlwi @X[$j%16],@X[$j%16],1 +___ +} + +sub BODY_20_39 { +my ($i,$a,$b,$c,$d,$e,$f)=@_; +my $j=$i+1; +$code.=<<___ if ($i<79); + add $f,$K,$e + rotlwi $e,$a,5 + xor @X[$j%16],@X[$j%16],@X[($j+2)%16] + add $f,$f,@X[$i%16] + xor $t0,$b,$c + xor @X[$j%16],@X[$j%16],@X[($j+8)%16] + add $f,$f,$e + rotlwi $b,$b,30 + xor $t0,$t0,$d + xor @X[$j%16],@X[$j%16],@X[($j+13)%16] + add $f,$f,$t0 + rotlwi @X[$j%16],@X[$j%16],1 +___ +$code.=<<___ if ($i==79); + add $f,$K,$e + rotlwi $e,$a,5 + lwz r16,0($ctx) + add $f,$f,@X[$i%16] + xor $t0,$b,$c + lwz r17,4($ctx) + add $f,$f,$e + rotlwi $b,$b,30 + lwz r18,8($ctx) + xor $t0,$t0,$d + lwz r19,12($ctx) + add $f,$f,$t0 + lwz r20,16($ctx) +___ +} + +sub BODY_40_59 { +my ($i,$a,$b,$c,$d,$e,$f)=@_; +my $j=$i+1; +$code.=<<___; + add $f,$K,$e + rotlwi $e,$a,5 + xor @X[$j%16],@X[$j%16],@X[($j+2)%16] + add $f,$f,@X[$i%16] + and $t0,$b,$c + xor @X[$j%16],@X[$j%16],@X[($j+8)%16] + add $f,$f,$e + or $t1,$b,$c + rotlwi $b,$b,30 + xor @X[$j%16],@X[$j%16],@X[($j+13)%16] + and $t1,$t1,$d + or $t0,$t0,$t1 + rotlwi @X[$j%16],@X[$j%16],1 + add $f,$f,$t0 +___ +} + +$code=<<___; +.text + +.globl .sha1_block_asm_data_order +.align 4 +.sha1_block_asm_data_order: + mflr r0 + $STU $sp,`-($FRAME+64+$RZONE)`($sp) + $PUSH r0,`$FRAME-$SIZE_T*18`($sp) + $PUSH r15,`$FRAME-$SIZE_T*17`($sp) + $PUSH r16,`$FRAME-$SIZE_T*16`($sp) + $PUSH r17,`$FRAME-$SIZE_T*15`($sp) + $PUSH r18,`$FRAME-$SIZE_T*14`($sp) + $PUSH r19,`$FRAME-$SIZE_T*13`($sp) + $PUSH r20,`$FRAME-$SIZE_T*12`($sp) + $PUSH r21,`$FRAME-$SIZE_T*11`($sp) + $PUSH r22,`$FRAME-$SIZE_T*10`($sp) + $PUSH r23,`$FRAME-$SIZE_T*9`($sp) + $PUSH r24,`$FRAME-$SIZE_T*8`($sp) + $PUSH r25,`$FRAME-$SIZE_T*7`($sp) + $PUSH r26,`$FRAME-$SIZE_T*6`($sp) + $PUSH r27,`$FRAME-$SIZE_T*5`($sp) + $PUSH r28,`$FRAME-$SIZE_T*4`($sp) + $PUSH r29,`$FRAME-$SIZE_T*3`($sp) + $PUSH r30,`$FRAME-$SIZE_T*2`($sp) + $PUSH r31,`$FRAME-$SIZE_T*1`($sp) + lwz $A,0($ctx) + lwz $B,4($ctx) + lwz $C,8($ctx) + lwz $D,12($ctx) + lwz $E,16($ctx) + andi. r0,$inp,3 + bne Lunaligned +Laligned: + mtctr $num + bl Lsha1_block_private +Ldone: + $POP r0,`$FRAME-$SIZE_T*18`($sp) + $POP r15,`$FRAME-$SIZE_T*17`($sp) + $POP r16,`$FRAME-$SIZE_T*16`($sp) + $POP r17,`$FRAME-$SIZE_T*15`($sp) + $POP r18,`$FRAME-$SIZE_T*14`($sp) + $POP r19,`$FRAME-$SIZE_T*13`($sp) + $POP r20,`$FRAME-$SIZE_T*12`($sp) + $POP r21,`$FRAME-$SIZE_T*11`($sp) + $POP r22,`$FRAME-$SIZE_T*10`($sp) + $POP r23,`$FRAME-$SIZE_T*9`($sp) + $POP r24,`$FRAME-$SIZE_T*8`($sp) + $POP r25,`$FRAME-$SIZE_T*7`($sp) + $POP r26,`$FRAME-$SIZE_T*6`($sp) + $POP r27,`$FRAME-$SIZE_T*5`($sp) + $POP r28,`$FRAME-$SIZE_T*4`($sp) + $POP r29,`$FRAME-$SIZE_T*3`($sp) + $POP r30,`$FRAME-$SIZE_T*2`($sp) + $POP r31,`$FRAME-$SIZE_T*1`($sp) + mtlr r0 + addi $sp,$sp,`$FRAME+64+$RZONE` + blr +___ + +# PowerPC specification allows an implementation to be ill-behaved +# upon unaligned access which crosses page boundary. "Better safe +# than sorry" principle makes me treat it specially. But I don't +# look for particular offending word, but rather for 64-byte input +# block which crosses the boundary. Once found that block is aligned +# and hashed separately... +$code.=<<___; +.align 4 +Lunaligned: + li $t1,4096 + subf $t1,$inp,$t1 + andi. $t1,$t1,4095 ; distance to closest page boundary + srwi. $t1,$t1,6 ; t1/=64 + beq Lcross_page + $UCMP $num,$t1 + ble- Laligned ; didn't cross the page boundary + mtctr $t1 + subf $num,$t1,$num + bl Lsha1_block_private +Lcross_page: + li $t1,16 + mtctr $t1 + addi r20,$sp,$FRAME ; spot below the frame +Lmemcpy: + lbz r16,0($inp) + lbz r17,1($inp) + lbz r18,2($inp) + lbz r19,3($inp) + addi $inp,$inp,4 + stb r16,0(r20) + stb r17,1(r20) + stb r18,2(r20) + stb r19,3(r20) + addi r20,r20,4 + bdnz Lmemcpy + + $PUSH $inp,`$FRAME-$SIZE_T*19`($sp) + li $t1,1 + addi $inp,$sp,$FRAME + mtctr $t1 + bl Lsha1_block_private + $POP $inp,`$FRAME-$SIZE_T*19`($sp) + addic. $num,$num,-1 + bne- Lunaligned + b Ldone +___ + +# This is private block function, which uses tailored calling +# interface, namely upon entry SHA_CTX is pre-loaded to given +# registers and counter register contains amount of chunks to +# digest... +$code.=<<___; +.align 4 +Lsha1_block_private: +___ +$code.=<<___; # load K_00_19 + lis $K,0x5a82 + ori $K,$K,0x7999 +___ +for($i=0;$i<20;$i++) { &BODY_00_19($i,@V); unshift(@V,pop(@V)); } +$code.=<<___; # load K_20_39 + lis $K,0x6ed9 + ori $K,$K,0xeba1 +___ +for(;$i<40;$i++) { &BODY_20_39($i,@V); unshift(@V,pop(@V)); } +$code.=<<___; # load K_40_59 + lis $K,0x8f1b + ori $K,$K,0xbcdc +___ +for(;$i<60;$i++) { &BODY_40_59($i,@V); unshift(@V,pop(@V)); } +$code.=<<___; # load K_60_79 + lis $K,0xca62 + ori $K,$K,0xc1d6 +___ +for(;$i<80;$i++) { &BODY_20_39($i,@V); unshift(@V,pop(@V)); } +$code.=<<___; + add r16,r16,$E + add r17,r17,$T + add r18,r18,$A + add r19,r19,$B + add r20,r20,$C + stw r16,0($ctx) + mr $A,r16 + stw r17,4($ctx) + mr $B,r17 + stw r18,8($ctx) + mr $C,r18 + stw r19,12($ctx) + mr $D,r19 + stw r20,16($ctx) + mr $E,r20 + addi $inp,$inp,`16*4` + bdnz- Lsha1_block_private + blr +___ + +$code =~ s/\`([^\`]*)\`/eval $1/gem; +print $code; +close STDOUT; diff --git a/crypto/sha/sha512.c b/crypto/sha/sha512.c index e48e2febbe..dba4c49c0b 100644 --- a/crypto/sha/sha512.c +++ b/crypto/sha/sha512.c @@ -330,7 +330,7 @@ static const SHA_LONG64 K512[80] = { : "0"(p[1]),"1"(p[0])); \ ((SHA_LONG64)hi)<<32|lo; }) # endif -# elif defined(_ARCH_PPC) && defined(__64BIT__) +# elif (defined(_ARCH_PPC) && defined(__64BIT__)) || defined(_ARCH_PPC64) # define ROTR(a,n) ({ unsigned long ret; \ asm ("rotrdi %0,%1,%2" \ : "=r"(ret) \ diff --git a/crypto/sha/sha_locl.h b/crypto/sha/sha_locl.h index 28fd930032..2f5ab1f58e 100644 --- a/crypto/sha/sha_locl.h +++ b/crypto/sha/sha_locl.h @@ -127,6 +127,13 @@ # define DONT_IMPLEMENT_BLOCK_HOST_ORDER # define sha1_block_data_order sha1_block_asm_data_order # define DONT_IMPLEMENT_BLOCK_DATA_ORDER +# elif defined(_ARCH_PPC) || defined(_ARCH_PPC64) || \ + defined(__ppc) || defined(__ppc__) || defined(__powerpc) || \ + defined(__ppc64) || defined(__ppc64__) || defined(__powerpc64) +# define sha1_block_host_order sha1_block_asm_data_order +# define DONT_IMPLEMENT_BLOCK_HOST_ORDER +# define sha1_block_data_order sha1_block_asm_data_order +# define DONT_IMPLEMENT_BLOCK_DATA_ORDER # endif # endif void sha1_block_host_order (SHA_CTX *c, const void *p,size_t num); -- GitLab