Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
OpenHarmony
Third Party Openssl
提交
d4bb6bdd
T
Third Party Openssl
项目概览
OpenHarmony
/
Third Party Openssl
1 年多 前同步成功
通知
10
Star
18
Fork
1
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
T
Third Party Openssl
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
d4bb6bdd
编写于
12年前
作者:
A
Andy Polyakov
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
sha256-586.pl: tune away regression on Nehalem core and incidentally
improve performance on Atom and P4.
上级
ee9bf3eb
无相关合并请求
变更
1
隐藏空白更改
内联
并排
Showing
1 changed file
with
26 addition
and
27 deletion
+26
-27
crypto/sha/asm/sha256-586.pl
crypto/sha/asm/sha256-586.pl
+26
-27
未找到文件。
crypto/sha/asm/sha256-586.pl
浏览文件 @
d4bb6bdd
...
...
@@ -16,9 +16,9 @@
# May 2012.
#
# Optimization including one of Pavel Semjanov's ideas resulted in
# ~5% improvement on
P4, AMD and Sandy Bridge, and ~13% on Atom
.
# ~5% improvement on
AMD and Sandy Bridge, and ~15% on Atom and P4
.
# Pavel also suggested full unroll. While his code runs ~20%/13%/6%
# faster on K8/Core2/Sandy Bridge, it's 9.6x larger and ~
6%/18
%/24%
# faster on K8/Core2/Sandy Bridge, it's 9.6x larger and ~
14%/23
%/24%
# slower on P4/Atom/Pentium...
#
# Performance in clock cycles per processed byte (less is better):
...
...
@@ -26,7 +26,7 @@
# Pentium PIII P4 AMD K8 Core2 SB(**) Atom
# gcc 46 36 41 27 26
# icc 57 33 38 25 23
# x86 asm 39
29 31 19 18 19(**) 31
# x86 asm 39
31 29 19 18 19(**) 30
# x86_64 asm(*) - - 21 16 16 18 25
#
# (*) x86_64 assembler performance is presented for reference
...
...
@@ -63,34 +63,35 @@ sub BODY_00_15() {
&mov
("
esi
",
$Foff
);
&ror
("
ecx
",
25
-
11
);
&add
(
$T
,"
edi
")
if
(
$in_16_63
);
# T += sigma1(X[-2])
&xor
("
ecx
",
$E
);
&mov
("
edi
",
$Goff
);
&ror
("
ecx
",
11
-
6
);
&xor
("
esi
","
edi
");
&xor
("
ecx
",
$E
);
&xor
("
esi
","
edi
");
&mov
(
&DWP
(
4
*
(
9
+
15
),"
esp
"),
$T
)
if
(
$in_16_63
);
# save X[0]
&ror
("
ecx
",
6
);
# Sigma1(e)
&ror
("
ecx
",
11
-
6
);
&and
("
esi
",
$E
);
&add
(
$T
,"
ecx
");
# T += Sigma1(e)
&mov
(
$Eoff
,
$E
);
# modulo-scheduled
&xor
("
esi
","
edi
");
# Ch(e,f,g)
&xor
(
$E
,"
ecx
");
&xor
("
esi
","
edi
");
# Ch(e,f,g)
&add
(
$T
,
$Hoff
);
# T += h
&ror
(
$E
,
6
);
# Sigma1(e)
&mov
("
ecx
",
$A
);
&add
(
$T
,"
esi
");
# T += Ch(e,f,g)
&mov
("
ecx
",
$A
);
&mov
(
$E
,
$Doff
);
# e becomes d, which is e in next iteration
&ror
("
ecx
",
22
-
13
);
&add
(
$T
,"
esi
");
# T += Ch(e,f,g)
&xor
("
ecx
",
$A
);
&mov
("
esi
",
&DWP
(
0
,
$K256
));
&ror
("
ecx
",
13
-
2
);
&add
(
$T
,
$E
);
# T += Sigma1(e)
&mov
("
edi
",
$Boff
);
&xor
("
ecx
",
$A
);
&mov
(
$Aoff
,
$A
);
# modulo-scheduled
&lea
("
esp
",
&DWP
(
-
4
,"
esp
"));
&ror
("
ecx
",
13
-
2
);
&mov
("
esi
",
&DWP
(
0
,
$K256
));
&xor
("
ecx
",
$A
);
&mov
(
$E
,
$Eoff
);
# e becomes d, which is e in next iteration
&xor
(
$A
,"
edi
");
# a ^= b
&ror
("
ecx
",
2
);
# Sigma0(a)
&xor
(
$A
,"
edi
");
# a ^= b
&add
(
$T
,"
esi
");
&push
(
$A
);
# (b^c) in next round
&add
(
$T
,"
esi
");
# T+= K[i]
&mov
(
&DWP
(
0
,"
esp
"),
$A
);
# (b^c) in next round
&add
(
$E
,
$T
);
# d += T
&and
(
$A
,
&DWP
(
4
,"
esp
"));
# a &= (b^c)
&add
(
$T
,"
ecx
");
# T += Sigma0(a)
...
...
@@ -176,19 +177,17 @@ sub BODY_00_15() {
&mov
("
ecx
",
&DWP
(
4
*
(
9
+
15
+
16
-
14
),"
esp
"));
&ror
("
esi
",
18
-
7
);
&mov
("
edi
","
ecx
");
&xor
("
esi
",
$T
);
&shr
(
$T
,
3
);
&ror
("
ecx
",
19
-
17
);
&xor
("
esi
",
$T
);
&shr
(
$T
,
3
);
&xor
("
ecx
","
edi
");
&ror
("
esi
",
7
);
&ror
("
edi
",
19
-
17
);
&xor
(
$T
,"
esi
");
# T = sigma0(X[-15])
&xor
("
edi
","
ecx
");
&shr
("
ecx
",
10
);
&ror
("
edi
",
17
);
&ror
("
ecx
",
17
);
&add
(
$T
,
&DWP
(
4
*
(
9
+
15
+
16
),"
esp
"));
# T += X[-16]
&xor
("
edi
","
ecx
");
# sigma1(X[-2])
&shr
("
edi
",
10
);
&add
(
$T
,
&DWP
(
4
*
(
9
+
15
+
16
-
9
),"
esp
"));
# T += X[-7]
&xor
("
edi
","
ecx
");
# sigma1(X[-2])
# &add ($T,"edi"); # T += sigma1(X[-2])
# &mov (&DWP(4*(9+15),"esp"),$T); # save X[0]
...
...
This diff is collapsed.
Click to expand it.
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录
反馈
建议
客服
返回
顶部