Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
btwise
openssl
提交
25f7117f
O
openssl
项目概览
btwise
/
openssl
通知
1
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
O
openssl
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
25f7117f
编写于
1月 04, 2014
作者:
A
Andy Polyakov
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
aesni-sha1-x86_64.pl: refine Atom-specific optimization.
(and update performance data, and fix typo)
上级
5b7f36e8
变更
2
隐藏空白更改
内联
并排
Showing
2 changed file
with
33 addition
and
21 deletion
+33
-21
crypto/aes/asm/aesni-sha1-x86_64.pl
crypto/aes/asm/aesni-sha1-x86_64.pl
+32
-20
crypto/sha/asm/sha1-x86_64.pl
crypto/sha/asm/sha1-x86_64.pl
+1
-1
未找到文件。
crypto/aes/asm/aesni-sha1-x86_64.pl
浏览文件 @
25f7117f
...
...
@@ -28,8 +28,8 @@
# Bulldozer 5.77[+6.0] 11.72 6.37 +84%
#
# AES-192-CBC
# Westmere 4.51 10.00 6.
87 +46
%
# Sandy Bridge 6.05 11.06(12.21) 6.11(7.
20
) +81%(+70%)
# Westmere 4.51 10.00 6.
91 +45
%
# Sandy Bridge 6.05 11.06(12.21) 6.11(7.
18
) +81%(+70%)
# Ivy Bridge 6.05 10.65 6.07 +75%
# Haswell 5.29 8.86(9.42) 5.32(5.32) +67%(+77%)
# Bulldozer 6.89 12.84 6.96 +84%
...
...
@@ -66,8 +66,13 @@
# Westmere 1.75 7.20 6.68 +7.8%
# Sandy Bridge 1.09 6.09(7.22) 5.82(6.95) +4.6%(+3.9%)
# Ivy Bridge 1.11 5.70 5.45 +4.6%
# Haswell 0.88 4.45(5.00) 4.39(4.69) +1.4%(+6.6%)
# Bulldozer 0.99 6.95 5.95 +17%
# Haswell 0.88 4.45(5.00) 4.39(4.69) +1.4%(*)(+6.6%)
# Bulldozer 0.99 6.95 5.95 +17%(**)
#
# (*) Tiny improvement coefficient on Haswell is because we compare
# AVX1 stitch to sum with AVX2 SHA1.
# (**) Execution is fully dominated by integer code sequence and
# SIMD still hardly shows [in single-process benchmark;-]
$flavour
=
shift
;
$output
=
shift
;
...
...
@@ -142,11 +147,13 @@ my @rndkey=("%xmm14","%xmm15"); # for enc
my
(
$inout0
,
$inout1
,
$inout2
,
$inout3
)
=
map
("
%xmm
$_
",(
12
..
15
));
# for dec
if
(
1
)
{
# reassign for Atom Silvermont
@X
=
map
("
%xmm
$_
",(
8
..
15
));
@Tx
=
map
("
%xmm
$_
",(
5
..
7
));
(
$iv
,
$in
,
$rndkey0
)
=
map
("
%xmm
$_
",(
2
..
4
));
# for enc
@rndkey
=
("
%xmm0
","
%xmm1
");
# for enc
(
$inout0
,
$inout1
,
$inout2
,
$inout3
)
=
map
("
%xmm
$_
",(
0
..
3
));
# for dec
# The goal is to minimize amount of instructions with more than
# 3 prefix bytes. Or in more practical terms to keep AES-NI *and*
# SSSE3 instructions to upper half of the register bank.
@X
=
map
("
%xmm
$_
",(
8
..
11
,
4
..
7
));
@Tx
=
map
("
%xmm
$_
",(
12
,
13
,
3
));
(
$iv
,
$in
,
$rndkey0
)
=
map
("
%xmm
$_
",(
2
,
14
,
15
));
@rndkey
=
("
%xmm0
","
%xmm1
");
}
sub
AUTOLOAD
()
#
thunk
[
simplified
]
32-
bit
style
perlasm
...
...
@@ -216,17 +223,17 @@ $code.=<<___;
xor $D,@T[1]
and @T[1],@T[0]
movdqa 64($K_XX_XX),@
X
[2] # pbswap mask
movdqa 64($K_XX_XX),@
Tx
[2] # pbswap mask
movdqa 0($K_XX_XX),@Tx[1] # K_00_19
movdqu 0($inp),@X[-4&7] # load input to %xmm[0-3]
movdqu 16($inp),@X[-3&7]
movdqu 32($inp),@X[-2&7]
movdqu 48($inp),@X[-1&7]
pshufb @
X
[2],@X[-4&7] # byte swap
pshufb @
Tx
[2],@X[-4&7] # byte swap
add \$64,$inp
pshufb @
X
[2],@X[-3&7]
pshufb @
X
[2],@X[-2&7]
pshufb @
X
[2],@X[-1&7]
pshufb @
Tx
[2],@X[-3&7]
pshufb @
Tx
[2],@X[-2&7]
pshufb @
Tx
[2],@X[-1&7]
paddd @Tx[1],@X[-4&7] # add K_00_19
paddd @Tx[1],@X[-3&7]
paddd @Tx[1],@X[-2&7]
...
...
@@ -704,6 +711,11 @@ ___
$j
=
$jj
=
$r
=
$sn
=
$rx
=
0
;
$Xi
=
4
;
# reassign for Atom Silvermont (see above)
(
$inout0
,
$inout1
,
$inout2
,
$inout3
,
$rndkey0
)
=
map
("
%xmm
$_
",(
0
..
4
));
@X
=
map
("
%xmm
$_
",(
8
..
13
,
6
,
7
));
@Tx
=
map
("
%xmm
$_
",(
14
,
15
,
5
));
my
@aes256_dec
=
(
'
&movdqu($inout0,"0x00($in0)");
',
'
&movdqu($inout1,"0x10($in0)"); &pxor ($inout0,$rndkey0);
',
...
...
@@ -844,17 +856,17 @@ $code.=<<___;
xor $D,@T[1]
and @T[1],@T[0]
movdqa 64($K_XX_XX),@
X
[2] # pbswap mask
movdqa 64($K_XX_XX),@
Tx
[2] # pbswap mask
movdqa 0($K_XX_XX),@Tx[1] # K_00_19
movdqu 0($inp),@X[-4&7] # load input to %xmm[0-3]
movdqu 16($inp),@X[-3&7]
movdqu 32($inp),@X[-2&7]
movdqu 48($inp),@X[-1&7]
pshufb @
X
[2],@X[-4&7] # byte swap
pshufb @
Tx
[2],@X[-4&7] # byte swap
add \$64,$inp
pshufb @
X
[2],@X[-3&7]
pshufb @
X
[2],@X[-2&7]
pshufb @
X
[2],@X[-1&7]
pshufb @
Tx
[2],@X[-3&7]
pshufb @
Tx
[2],@X[-2&7]
pshufb @
Tx
[2],@X[-1&7]
paddd @Tx[1],@X[-4&7] # add K_00_19
paddd @Tx[1],@X[-3&7]
paddd @Tx[1],@X[-2&7]
...
...
@@ -1407,7 +1419,7 @@ $code.=<<___;
.size aesni_cbc_sha1_enc_avx,.-aesni_cbc_sha1_enc_avx
___
if
(
$stiched_decrypt
)
{{{
if
(
$sti
t
ched_decrypt
)
{{{
# reset
(
$in0
,
$out
,
$len
,
$key
,
$ivp
,
$ctx
,
$inp
)
=
("
%rdi
","
%rsi
","
%rdx
","
%rcx
","
%r8
","
%r9
","
%r10
");
...
...
crypto/sha/asm/sha1-x86_64.pl
浏览文件 @
25f7117f
...
...
@@ -68,7 +68,7 @@
# Westmere 7.08 5.44/+30% -
# Sandy Bridge 7.93 6.16/+28% 4.99/+59%
# Ivy Bridge 6.30 4.63/+36% 4.60/+37%
# Haswell 5.98 4.
36/+37
% 3.57/+67%
# Haswell 5.98 4.
12/+45
% 3.57/+67%
# Bulldozer 10.9 5.95/+82%
# VIA Nano 10.2 7.46/+37%
# Atom 11.0 9.61/+14%
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录