提交 3b848d34 编写于 作者: A Andy Polyakov

aesni-sha1-x86_64.pl: update performance data.

上级 42b9a417
...@@ -22,20 +22,23 @@ ...@@ -22,20 +22,23 @@
# #
# AES-128-CBC +SHA1 stitch gain # AES-128-CBC +SHA1 stitch gain
# Westmere 3.77[+5.5] 9.26 6.58 +41% # Westmere 3.77[+5.5] 9.26 6.58 +41%
# Sandy Bridge 5.05[+5.0(6.2)] 10.06(11.21) 6.09(7.05) +65%(+59%) # Sandy Bridge 5.05[+5.0(6.2)] 10.06(11.21) 5.98(7.05) +68%(+59%)
# Ivy Bridge 5.05[+4.6] 9.65 5.54 +74% # Ivy Bridge 5.05[+4.6] 9.65 5.54 +74%
# Haswell 4.43[+3.6(4.4)] 8.00(8.80) 4.55(5.21) +75%(+69%)
# Bulldozer 5.77[+6.0] 11.72 6.37 +84% # Bulldozer 5.77[+6.0] 11.72 6.37 +84%
# #
# AES-192-CBC # AES-192-CBC
# Westmere 4.51 10.00 6.87 +46% # Westmere 4.51 10.00 6.87 +46%
# Sandy Bridge 6.05 11.06(12.21) 6.11(7.20) +81%(+70%) # Sandy Bridge 6.05 11.06(12.21) 6.11(7.20) +81%(+70%)
# Ivy Bridge 6.05 10.65 6.07 +75% # Ivy Bridge 6.05 10.65 6.07 +75%
# Haswell 5.29 8.86(9.65) 5.32(5.32) +67%(+81%)
# Bulldozer 6.89 12.84 6.96 +84% # Bulldozer 6.89 12.84 6.96 +84%
# #
# AES-256-CBC # AES-256-CBC
# Westmere 5.25 10.74 7.19 +49% # Westmere 5.25 10.74 7.19 +49%
# Sandy Bridge 7.05 12.06(13.21) 7.12(7.68) +69%(+72%) # Sandy Bridge 7.05 12.06(13.21) 7.12(7.68) +69%(+72%)
# Ivy Bridge 7.05 11.65 7.12 +64% # Ivy Bridge 7.05 11.65 7.12 +64%
# Haswell 6.19 9.76(10.6) 6.21(6.41) +57%(+65%)
# Bulldozer 8.00 13.95 8.25 +69% # Bulldozer 8.00 13.95 8.25 +69%
# #
# (*) There are two code paths: SSSE3 and AVX. See sha1-568.pl for # (*) There are two code paths: SSSE3 and AVX. See sha1-568.pl for
...@@ -51,9 +54,10 @@ ...@@ -51,9 +54,10 @@
# standalone AESNI-CBC decrypt: # standalone AESNI-CBC decrypt:
# #
# AES-128-CBC AES-192-CBC AES-256-CBC # AES-128-CBC AES-192-CBC AES-256-CBC
# Westmere 1.31 1.55 1.80 # Westmere 1.25 1.50 1.75
# Sandy Bridge 0.74 0.91 1.09 # Sandy Bridge 0.74 0.91 1.09
# Ivy Bridge 0.74 0.90 1.11 # Ivy Bridge 0.74 0.90 1.11
# Haswell 0.63 0.76 0.88
# Bulldozer 0.70 0.85 0.99 # Bulldozer 0.70 0.85 0.99
$flavour = shift; $flavour = shift;
...@@ -94,7 +98,7 @@ $code.=<<___; ...@@ -94,7 +98,7 @@ $code.=<<___;
.globl aesni_cbc_sha1_enc .globl aesni_cbc_sha1_enc
.type aesni_cbc_sha1_enc,\@abi-omnipotent .type aesni_cbc_sha1_enc,\@abi-omnipotent
.align 16 .align 32
aesni_cbc_sha1_enc: aesni_cbc_sha1_enc:
# caller should check for SSSE3 and AES-NI bits # caller should check for SSSE3 and AES-NI bits
mov OPENSSL_ia32cap_P+0(%rip),%r10d mov OPENSSL_ia32cap_P+0(%rip),%r10d
...@@ -144,7 +148,7 @@ my $_ror=sub { &ror(@_) }; ...@@ -144,7 +148,7 @@ my $_ror=sub { &ror(@_) };
$code.=<<___; $code.=<<___;
.type aesni_cbc_sha1_enc_ssse3,\@function,6 .type aesni_cbc_sha1_enc_ssse3,\@function,6
.align 16 .align 32
aesni_cbc_sha1_enc_ssse3: aesni_cbc_sha1_enc_ssse3:
mov `($win64?56:8)`(%rsp),$inp # load 7th argument mov `($win64?56:8)`(%rsp),$inp # load 7th argument
#shr \$6,$len # debugging artefact #shr \$6,$len # debugging artefact
...@@ -570,7 +574,7 @@ sub body_40_59 () { # ((b^c)&(c^d))^c ...@@ -570,7 +574,7 @@ sub body_40_59 () { # ((b^c)&(c^d))^c
return @r; return @r;
} }
$code.=<<___; $code.=<<___;
.align 16 .align 32
.Loop_ssse3: .Loop_ssse3:
___ ___
&Xupdate_ssse3_16_31(\&body_00_19); &Xupdate_ssse3_16_31(\&body_00_19);
...@@ -618,7 +622,6 @@ $code.=<<___; ...@@ -618,7 +622,6 @@ $code.=<<___;
and @T[1],@T[0] and @T[1],@T[0]
jmp .Loop_ssse3 jmp .Loop_ssse3
.align 16
.Ldone_ssse3: .Ldone_ssse3:
___ ___
$jj=$j=$saved_j; @V=@saved_V; $jj=$j=$saved_j; @V=@saved_V;
...@@ -689,7 +692,7 @@ my $_ror=sub { &shrd(@_[0],@_) }; ...@@ -689,7 +692,7 @@ my $_ror=sub { &shrd(@_[0],@_) };
$code.=<<___; $code.=<<___;
.type aesni_cbc_sha1_enc_avx,\@function,6 .type aesni_cbc_sha1_enc_avx,\@function,6
.align 16 .align 32
aesni_cbc_sha1_enc_avx: aesni_cbc_sha1_enc_avx:
mov `($win64?56:8)`(%rsp),$inp # load 7th argument mov `($win64?56:8)`(%rsp),$inp # load 7th argument
#shr \$6,$len # debugging artefact #shr \$6,$len # debugging artefact
...@@ -773,14 +776,14 @@ my $aesenc=sub { ...@@ -773,14 +776,14 @@ my $aesenc=sub {
my ($n,$k)=($r/10,$r%10); my ($n,$k)=($r/10,$r%10);
if ($k==0) { if ($k==0) {
$code.=<<___; $code.=<<___;
vmovups `16*$n`($in0),$in # load input vmovdqu `16*$n`($in0),$in # load input
vxorps $rndkey[1],$in,$in vpxor $rndkey[1],$in,$in
___ ___
$code.=<<___ if ($n); $code.=<<___ if ($n);
vmovups $iv,`16*($n-1)`($out,$in0) # write output vmovups $iv,`16*($n-1)`($out,$in0) # write output
___ ___
$code.=<<___; $code.=<<___;
vxorps $in,$iv,$iv vpxor $in,$iv,$iv
vaesenc $rndkey[0],$iv,$iv vaesenc $rndkey[0],$iv,$iv
vmovups `32+16*$k-112`($key),$rndkey[1] vmovups `32+16*$k-112`($key),$rndkey[1]
___ ___
...@@ -1014,7 +1017,7 @@ sub Xtail_avx() ...@@ -1014,7 +1017,7 @@ sub Xtail_avx()
} }
$code.=<<___; $code.=<<___;
.align 16 .align 32
.Loop_avx: .Loop_avx:
___ ___
&Xupdate_avx_16_31(\&body_00_19); &Xupdate_avx_16_31(\&body_00_19);
...@@ -1062,7 +1065,6 @@ $code.=<<___; ...@@ -1062,7 +1065,6 @@ $code.=<<___;
and @T[1],@T[0] and @T[1],@T[0]
jmp .Loop_avx jmp .Loop_avx
.align 16
.Ldone_avx: .Ldone_avx:
___ ___
$jj=$j=$saved_j; @V=@saved_V; $jj=$j=$saved_j; @V=@saved_V;
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册