提交 7a1a1223 编写于 作者: A Andy Polyakov

crypto/modes/asm/aesni-gcm-x86_64.pl: minor optimization.

Avoid occasional up to 8% performance drops.
上级 72a15870
...@@ -21,8 +21,8 @@ ...@@ -21,8 +21,8 @@
# justify. This module is based on combination of Intel submissions, # justify. This module is based on combination of Intel submissions,
# [1] and [2], with MOVBE twist suggested by Ilya Albrekht and Max # [1] and [2], with MOVBE twist suggested by Ilya Albrekht and Max
# Locktyukhin of Intel Corp. who verified that it reduces shuffles # Locktyukhin of Intel Corp. who verified that it reduces shuffles
# pressure with notable relative improvement on upcoming Haswell # pressure with notable relative improvement, achieving 1.0 cycle per
# processor. [Exact performance numbers to be added at launch.] # byte processed with 128-bit key on Haswell processor.
# #
# [1] http://rt.openssl.org/Ticket/Display.html?id=2900&user=guest&pass=guest # [1] http://rt.openssl.org/Ticket/Display.html?id=2900&user=guest&pass=guest
# [2] http://www.intel.com/content/dam/www/public/us/en/documents/software-support/enabling-high-performance-gcm.pdf # [2] http://www.intel.com/content/dam/www/public/us/en/documents/software-support/enabling-high-performance-gcm.pdf
...@@ -422,17 +422,28 @@ $code.=<<___; ...@@ -422,17 +422,28 @@ $code.=<<___;
vzeroupper vzeroupper
vmovdqu ($ivp),$T1 # input counter value vmovdqu ($ivp),$T1 # input counter value
sub \$128,%rsp add \$-128,%rsp
mov 12($ivp),$counter mov 12($ivp),$counter
lea .Lbswap_mask(%rip),$const lea .Lbswap_mask(%rip),$const
lea -0x80($key),$in0 # borrow $in0
mov \$0xf80,$end0 # borrow $end0
vmovdqu ($Xip),$Xi # load Xi vmovdqu ($Xip),$Xi # load Xi
and \$-64,%rsp # ensure stack alignment and \$-128,%rsp # ensure stack alignment
vmovdqu ($const),$Ii # borrow $Ii for .Lbswap_mask vmovdqu ($const),$Ii # borrow $Ii for .Lbswap_mask
lea 0x80($key),$key # size optimization lea 0x80($key),$key # size optimization
lea 0x20+0x20($Xip),$Xip # size optimization lea 0x20+0x20($Xip),$Xip # size optimization
mov 0xf0-0x80($key),$rounds mov 0xf0-0x80($key),$rounds
vpshufb $Ii,$Xi,$Xi vpshufb $Ii,$Xi,$Xi
and $end0,$in0
and %rsp,$end0
sub $in0,$end0
jc .Ldec_no_key_aliasing
cmp \$768,$end0
jnc .Ldec_no_key_aliasing
sub $end0,%rsp # avoid aliasing with key
.Ldec_no_key_aliasing:
vmovdqu 0x50($inp),$Z3 # I[5] vmovdqu 0x50($inp),$Z3 # I[5]
lea ($inp),$in0 lea ($inp),$in0
vmovdqu 0x40($inp),$Z0 vmovdqu 0x40($inp),$Z0
...@@ -621,14 +632,25 @@ $code.=<<___; ...@@ -621,14 +632,25 @@ $code.=<<___;
vzeroupper vzeroupper
vmovdqu ($ivp),$T1 # input counter value vmovdqu ($ivp),$T1 # input counter value
sub \$128,%rsp add \$-128,%rsp
mov 12($ivp),$counter mov 12($ivp),$counter
lea .Lbswap_mask(%rip),$const lea .Lbswap_mask(%rip),$const
lea -0x80($key),$in0 # borrow $in0
mov \$0xf80,$end0 # borrow $end0
lea 0x80($key),$key # size optimization lea 0x80($key),$key # size optimization
vmovdqu ($const),$Ii # borrow $Ii for .Lbswap_mask vmovdqu ($const),$Ii # borrow $Ii for .Lbswap_mask
and \$-64,%rsp # ensure stack alignment and \$-128,%rsp # ensure stack alignment
mov 0xf0-0x80($key),$rounds mov 0xf0-0x80($key),$rounds
and $end0,$in0
and %rsp,$end0
sub $in0,$end0
jc .Lenc_no_key_aliasing
cmp \$768,$end0
jnc .Lenc_no_key_aliasing
sub $end0,%rsp # avoid aliasing with key
.Lenc_no_key_aliasing:
lea ($out),$in0 lea ($out),$in0
lea -0xc0($out,$len),$end0 lea -0xc0($out,$len),$end0
shr \$4,$len shr \$4,$len
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册