提交 0ab8fd58 编写于 作者: A Andy Polyakov

s390x assembler pack: tune-up and support for new z196 hardware.

上级 8aa6cff4
此差异已折叠。
...@@ -41,8 +41,8 @@ ...@@ -41,8 +41,8 @@
# processor, as long as it's "z-CPU". Latter implies that the code # processor, as long as it's "z-CPU". Latter implies that the code
# remains z/Architecture specific. Compatibility with 32-bit BN_ULONG # remains z/Architecture specific. Compatibility with 32-bit BN_ULONG
# is achieved by swapping words after 64-bit loads, follow _dswap-s. # is achieved by swapping words after 64-bit loads, follow _dswap-s.
# On z990 it was measured to perform 2.6-2.2 times better, less for # On z990 it was measured to perform 2.6-2.2 times better than
# longer keys... # compiler-generated code, less for longer keys...
$flavour = shift; $flavour = shift;
...@@ -102,8 +102,8 @@ $code.=<<___ if ($flavour =~ /3[12]/); ...@@ -102,8 +102,8 @@ $code.=<<___ if ($flavour =~ /3[12]/);
bnzr %r14 # if ($num&1) return 0; bnzr %r14 # if ($num&1) return 0;
___ ___
$code.=<<___ if ($flavour !~ /3[12]/); $code.=<<___ if ($flavour !~ /3[12]/);
cghi $num,128 # cghi $num,96 #
bhr %r14 # if($num>128) return 0; bhr %r14 # if($num>96) return 0;
___ ___
$code.=<<___; $code.=<<___;
stm${g} %r3,%r15,3*$SIZE_T($sp) stm${g} %r3,%r15,3*$SIZE_T($sp)
......
...@@ -28,6 +28,15 @@ ...@@ -28,6 +28,15 @@
# remains z/Architecture specific. On z990 it was measured to perform # remains z/Architecture specific. On z990 it was measured to perform
# 2.8x better than 32-bit code generated by gcc 4.3. # 2.8x better than 32-bit code generated by gcc 4.3.
# March 2011.
#
# Support for hardware KIMD-GHASH is verified to produce correct
# result and therefore is engaged. On z196 it was measured to process
# 8KB buffer ~7 faster than software implementation. It's not as
# impressive for smaller buffer sizes and for smallest 16-bytes buffer
# it's actually almost 2 times slower. Which is the reason why
# KIMD-GHASH is not used in gcm_gmult_4bit.
$flavour = shift; $flavour = shift;
if ($flavour =~ /3[12]/) { if ($flavour =~ /3[12]/) {
...@@ -41,7 +50,7 @@ if ($flavour =~ /3[12]/) { ...@@ -41,7 +50,7 @@ if ($flavour =~ /3[12]/) {
while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {} while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {}
open STDOUT,">$output"; open STDOUT,">$output";
$softonly=1; # disable hardware support for now $softonly=0;
$Zhi="%r0"; $Zhi="%r0";
$Zlo="%r1"; $Zlo="%r1";
...@@ -70,7 +79,7 @@ $code.=<<___; ...@@ -70,7 +79,7 @@ $code.=<<___;
.align 32 .align 32
gcm_gmult_4bit: gcm_gmult_4bit:
___ ___
$code.=<<___ if(!$softonly); $code.=<<___ if(!$softonly && 0); # hardware is slow for single block...
larl %r1,OPENSSL_s390xcap_P larl %r1,OPENSSL_s390xcap_P
lg %r0,0(%r1) lg %r0,0(%r1)
tmhl %r0,0x4000 # check for message-security-assist tmhl %r0,0x4000 # check for message-security-assist
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册