From 670ad0fbf6ebcf113e278d8174081a7e2d2fa44c Mon Sep 17 00:00:00 2001 From: Andy Polyakov Date: Fri, 15 Apr 2016 16:39:22 +0200 Subject: [PATCH] s390x assembly pack: cache capability query results. IBM argues that in certain scenarios capability query is really expensive. At the same time it's asserted that query results can be safely cached, because disabling CPACF is incompatible with reboot-free operation. Reviewed-by: Tim Hudson --- crypto/aes/asm/aes-s390x.pl | 29 ++++++++-------------- crypto/modes/asm/ghash-s390x.pl | 4 +-- crypto/s390xcpuid.S | 44 ++++++++++++++++++++++++++++----- crypto/sha/asm/sha1-s390x.pl | 7 ++---- crypto/sha/asm/sha512-s390x.pl | 7 ++---- 5 files changed, 53 insertions(+), 38 deletions(-) diff --git a/crypto/aes/asm/aes-s390x.pl b/crypto/aes/asm/aes-s390x.pl index 4aacf1b6b5..231a29982c 100644 --- a/crypto/aes/asm/aes-s390x.pl +++ b/crypto/aes/asm/aes-s390x.pl @@ -818,13 +818,9 @@ $code.=<<___ if (!$softonly); tmhl %r0,0x4000 # check for message-security assist jz .Lekey_internal - lghi %r0,0 # query capability vector - la %r1,16($sp) - .long 0xb92f0042 # kmc %r4,%r2 - - llihh %r1,0x8000 - srlg %r1,%r1,0(%r5) - ng %r1,16($sp) + llihh %r0,0x8000 + srlg %r0,%r0,0(%r5) + ng %r0,48(%r1) # check kmc capability vector jz .Lekey_internal lmg %r0,%r1,0($inp) # just copy 128 bits... @@ -1444,13 +1440,10 @@ $code.=<<___ if (0); ######### kmctr code was measured to be ~12% slower llgfr $s0,%r0 lgr $s1,%r1 - lghi %r0,0 - la %r1,16($sp) - .long 0xb92d2042 # kmctr %r4,%r2,%r2 - + larl %r1,OPENSSL_s390xcap_P llihh %r0,0x8000 # check if kmctr supports the function code srlg %r0,%r0,0($s0) - ng %r0,16($sp) + ng %r0,64(%r1) # check kmctr capability vector lgr %r0,$s0 lgr %r1,$s1 jz .Lctr32_km_loop @@ -1597,12 +1590,10 @@ $code.=<<___ if(1); llgfr $s0,%r0 # put aside the function code lghi $s1,0x7f nr $s1,%r0 - lghi %r0,0 # query capability vector - la %r1,$tweak-16($sp) - .long 0xb92e0042 # km %r4,%r2 - llihh %r1,0x8000 - srlg %r1,%r1,32($s1) # check for 32+function code - ng %r1,$tweak-16($sp) + larl %r1,OPENSSL_s390xcap_P + llihh %r0,0x8000 + srlg %r0,%r0,32($s1) # check for 32+function code + ng %r0,32(%r1) # check km capability vector lgr %r0,$s0 # restore the function code la %r1,0($key1) # restore $key1 jz .Lxts_km_vanilla @@ -2229,7 +2220,7 @@ ___ } $code.=<<___; .string "AES for s390x, CRYPTOGAMS by " -.comm OPENSSL_s390xcap_P,16,8 +.comm OPENSSL_s390xcap_P,80,8 ___ $code =~ s/\`([^\`]*)\`/eval $1/gem; diff --git a/crypto/modes/asm/ghash-s390x.pl b/crypto/modes/asm/ghash-s390x.pl index a46f3eba6a..8c3ce6928d 100644 --- a/crypto/modes/asm/ghash-s390x.pl +++ b/crypto/modes/asm/ghash-s390x.pl @@ -85,9 +85,7 @@ $code.=<<___ if(!$softonly && 0); # hardware is slow for single block... tmhl %r0,0x4000 # check for message-security-assist jz .Lsoft_gmult lghi %r0,0 - la %r1,16($sp) - .long 0xb93e0004 # kimd %r0,%r4 - lg %r1,24($sp) + lg %r1,24(%r1) # load second word of kimd capabilities vector tmhh %r1,0x4000 # check for function 65 jz .Lsoft_gmult stg %r0,16($sp) # arrange 16 bytes of zero input diff --git a/crypto/s390xcpuid.S b/crypto/s390xcpuid.S index 3402a2404b..3efad5506b 100644 --- a/crypto/s390xcpuid.S +++ b/crypto/s390xcpuid.S @@ -5,14 +5,46 @@ .align 16 OPENSSL_s390x_facilities: lghi %r0,0 - larl %r2,OPENSSL_s390xcap_P - stg %r0,8(%r2) - .long 0xb2b02000 # stfle 0(%r2) + larl %r4,OPENSSL_s390xcap_P + stg %r0,8(%r4) # wipe capability vectors + stg %r0,16(%r4) + stg %r0,24(%r4) + stg %r0,32(%r4) + stg %r0,40(%r4) + stg %r0,48(%r4) + stg %r0,56(%r4) + stg %r0,64(%r4) + stg %r0,72(%r4) + + .long 0xb2b04000 # stfle 0(%r4) brc 8,.Ldone lghi %r0,1 - .long 0xb2b02000 # stfle 0(%r2) + .long 0xb2b04000 # stfle 0(%r4) .Ldone: - lg %r2,0(%r2) + lmg %r2,%r3,0(%r4) + tmhl %r2,0x4000 # check for message-security-assist + jz .Lret + + lghi %r0,0 # query kimd capabilities + la %r1,16(%r4) + .long 0xb93e0002 # kimd %r0,%r2 + + lghi %r0,0 # query km capability vector + la %r1,32(%r4) + .long 0xb92e0042 # km %r4,%r2 + + lghi %r0,0 # query kmc capability vector + la %r1,48(%r4) + .long 0xb92f0042 # kmc %r4,%r2 + + tmhh %r3,0x0004 # check for message-security-assist-4 + jz .Lret + + lghi %r0,0 # query kmctr capability vector + la %r1,64(%r4) + .long 0xb92d2042 # kmctr %r4,%r2,%r2 + +.Lret: br %r14 .size OPENSSL_s390x_facilities,.-OPENSSL_s390x_facilities @@ -112,4 +144,4 @@ OPENSSL_instrument_bus2: .section .init brasl %r14,OPENSSL_cpuid_setup -.comm OPENSSL_s390xcap_P,16,8 +.comm OPENSSL_s390xcap_P,80,8 diff --git a/crypto/sha/asm/sha1-s390x.pl b/crypto/sha/asm/sha1-s390x.pl index a62cc31892..003ba7647e 100644 --- a/crypto/sha/asm/sha1-s390x.pl +++ b/crypto/sha/asm/sha1-s390x.pl @@ -168,10 +168,7 @@ $code.=<<___ if ($kimdfunc); lg %r0,0(%r1) tmhl %r0,0x4000 # check for message-security assist jz .Lsoftware - lghi %r0,0 - la %r1,`2*$SIZE_T`($sp) - .long 0xb93e0002 # kimd %r0,%r2 - lg %r0,`2*$SIZE_T`($sp) + lg %r0,16(%r1) # check kimd capabilities tmhh %r0,`0x8000>>$kimdfunc` jz .Lsoftware lghi %r0,$kimdfunc @@ -238,7 +235,7 @@ $code.=<<___; br %r14 .size sha1_block_data_order,.-sha1_block_data_order .string "SHA1 block transform for s390x, CRYPTOGAMS by " -.comm OPENSSL_s390xcap_P,16,8 +.comm OPENSSL_s390xcap_P,80,8 ___ $code =~ s/\`([^\`]*)\`/eval $1/gem; diff --git a/crypto/sha/asm/sha512-s390x.pl b/crypto/sha/asm/sha512-s390x.pl index 7780627131..00cee396b6 100644 --- a/crypto/sha/asm/sha512-s390x.pl +++ b/crypto/sha/asm/sha512-s390x.pl @@ -240,10 +240,7 @@ $code.=<<___ if ($kimdfunc); lg %r0,0(%r1) tmhl %r0,0x4000 # check for message-security assist jz .Lsoftware - lghi %r0,0 - la %r1,`2*$SIZE_T`($sp) - .long 0xb93e0002 # kimd %r0,%r2 - lg %r0,`2*$SIZE_T`($sp) + lg %r0,16(%r1) # check kimd capabilities tmhh %r0,`0x8000>>$kimdfunc` jz .Lsoftware lghi %r0,$kimdfunc @@ -311,7 +308,7 @@ $code.=<<___; br %r14 .size $Func,.-$Func .string "SHA${label} block transform for s390x, CRYPTOGAMS by " -.comm OPENSSL_s390xcap_P,16,8 +.comm OPENSSL_s390xcap_P,80,8 ___ $code =~ s/\`([^\`]*)\`/eval $1/gem; -- GitLab