Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
OpenHarmony
Third Party Openssl
提交
80bbc9ce
T
Third Party Openssl
项目概览
OpenHarmony
/
Third Party Openssl
接近 2 年 前同步成功
通知
12
Star
18
Fork
1
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
T
Third Party Openssl
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
80bbc9ce
编写于
7月 01, 2004
作者:
A
Andy Polyakov
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Minor (+12% on P4) performance tweak for sha512_block_sse2.
上级
51ce5230
变更
1
隐藏空白更改
内联
并排
Showing
1 changed file
with
29 addition
and
16 deletion
+29
-16
crypto/sha/asm/sha512-sse2.pl
crypto/sha/asm/sha512-sse2.pl
+29
-16
未找到文件。
crypto/sha/asm/sha512-sse2.pl
浏览文件 @
80bbc9ce
...
@@ -23,7 +23,7 @@
...
@@ -23,7 +23,7 @@
# 2.4GHz P4 1.4GHz AMD32 1.4GHz AMD64(*)
# 2.4GHz P4 1.4GHz AMD32 1.4GHz AMD64(*)
# SHA256/gcc(*) 54 43 59
# SHA256/gcc(*) 54 43 59
# SHA512/gcc 17 23 92
# SHA512/gcc 17 23 92
# SHA512/sse2
54(**) 55
(**)
# SHA512/sse2
61(**) 57
(**)
# SHA512/icc 26 28
# SHA512/icc 26 28
# SHA256/icc(*) 65 54
# SHA256/icc(*) 65 54
#
#
...
@@ -81,9 +81,6 @@ sub SHA2_ROUND()
...
@@ -81,9 +81,6 @@ sub SHA2_ROUND()
&movq
("
mm4
",
&QWP
(
$Foff
,
$W512
));
# load f
&movq
("
mm4
",
&QWP
(
$Foff
,
$W512
));
# load f
&movq
("
mm5
",
&QWP
(
$Goff
,
$W512
));
# load g
&movq
("
mm5
",
&QWP
(
$Goff
,
$W512
));
# load g
&movq
("
mm6
",
&QWP
(
$Hoff
,
$W512
));
# load h
&movq
("
mm6
",
&QWP
(
$Hoff
,
$W512
));
# load h
&movq
(
&QWP
(
$Foff
,
$W512
),
$E
);
# f = e
&movq
(
&QWP
(
$Goff
,
$W512
),"
mm4
");
# g = f
&movq
(
&QWP
(
$Hoff
,
$W512
),"
mm5
");
# h = g
&movq
("
mm2
",
$E
);
# %mm2 is sliding right
&movq
("
mm2
",
$E
);
# %mm2 is sliding right
&movq
("
mm3
",
$E
);
# %mm3 is sliding left
&movq
("
mm3
",
$E
);
# %mm3 is sliding left
...
@@ -100,6 +97,10 @@ sub SHA2_ROUND()
...
@@ -100,6 +97,10 @@ sub SHA2_ROUND()
&pxor
("
mm7
","
mm2
");
&pxor
("
mm7
","
mm2
");
&pxor
("
mm7
","
mm3
");
# T1=Sigma1_512(e)
&pxor
("
mm7
","
mm3
");
# T1=Sigma1_512(e)
&movq
(
&QWP
(
$Foff
,
$W512
),
$E
);
# f = e
&movq
(
&QWP
(
$Goff
,
$W512
),"
mm4
");
# g = f
&movq
(
&QWP
(
$Hoff
,
$W512
),"
mm5
");
# h = g
&pxor
("
mm4
","
mm5
");
# f^=g
&pxor
("
mm4
","
mm5
");
# f^=g
&pand
("
mm4
",
$E
);
# f&=e
&pand
("
mm4
",
$E
);
# f&=e
&pxor
("
mm4
","
mm5
");
# f^=g
&pxor
("
mm4
","
mm5
");
# f^=g
...
@@ -108,9 +109,6 @@ sub SHA2_ROUND()
...
@@ -108,9 +109,6 @@ sub SHA2_ROUND()
&movq
("
mm2
",
&QWP
(
$Boff
,
$W512
));
# load b
&movq
("
mm2
",
&QWP
(
$Boff
,
$W512
));
# load b
&movq
("
mm3
",
&QWP
(
$Coff
,
$W512
));
# load c
&movq
("
mm3
",
&QWP
(
$Coff
,
$W512
));
# load c
&movq
(
$E
,
&QWP
(
$Doff
,
$W512
));
# e = d
&movq
(
$E
,
&QWP
(
$Doff
,
$W512
));
# e = d
&movq
(
&QWP
(
$Boff
,
$W512
),
$A
);
# b = a
&movq
(
&QWP
(
$Coff
,
$W512
),"
mm2
");
# c = b
&movq
(
&QWP
(
$Doff
,
$W512
),"
mm3
");
# d = c
&paddq
("
mm7
","
mm6
");
# T1+=h
&paddq
("
mm7
","
mm6
");
# T1+=h
&paddq
("
mm7
",
&QWP
(
0
,
$K512
,
$kidx
,
8
));
# T1+=K512[i]
&paddq
("
mm7
",
&QWP
(
0
,
$K512
,
$kidx
,
8
));
# T1+=K512[i]
...
@@ -132,12 +130,15 @@ sub SHA2_ROUND()
...
@@ -132,12 +130,15 @@ sub SHA2_ROUND()
&pxor
("
mm6
","
mm4
");
&pxor
("
mm6
","
mm4
");
&pxor
("
mm6
","
mm5
");
# T2=Sigma0_512(a)
&pxor
("
mm6
","
mm5
");
# T2=Sigma0_512(a)
&movq
("
mm4
","
mm2
");
# %mm4=b
&movq
(
&QWP
(
$Boff
,
$W512
),
$A
);
# b = a
&pand
("
mm2
",
$A
);
# b&=a
&movq
(
&QWP
(
$Coff
,
$W512
),"
mm2
");
# c = b
&pand
("
mm4
","
mm3
");
# %mm4&=c
&movq
(
&QWP
(
$Doff
,
$W512
),"
mm3
");
# d = c
&pand
("
mm3
",
$A
);
# c&=a
&pxor
("
mm4
","
mm2
");
# %mm4^=b&a
&movq
("
mm4
",
$A
);
# %mm4=a
&pxor
("
mm4
","
mm3
");
# %mm4^=c&a
&por
(
$A
,"
mm3
");
# a=a|c
&pand
("
mm4
","
mm3
");
# %mm4=a&c
&pand
(
$A
,"
mm2
");
# a=(a|c)&b
&por
("
mm4
",
$A
);
# %mm4=(a&c)|((a|c)&b)
&paddq
("
mm6
","
mm4
");
# T2+=Maj(a,b,c)
&paddq
("
mm6
","
mm4
");
# T2+=Maj(a,b,c)
&movq
(
$A
,"
mm7
");
# a=T1
&movq
(
$A
,"
mm7
");
# a=T1
...
@@ -201,8 +202,6 @@ $func="sha512_block_sse2";
...
@@ -201,8 +202,6 @@ $func="sha512_block_sse2";
# available memory slots to fill. It will only relieve some
# available memory slots to fill. It will only relieve some
# pressure off memory bus...
# pressure off memory bus...
&align
(
8
);
&set_label
("
_1st_loop
");
# 0-15
# flip input stream byte order...
# flip input stream byte order...
&mov
("
eax
",
&DWP
(
0
,
$data
,
$Widx
,
8
));
&mov
("
eax
",
&DWP
(
0
,
$data
,
$Widx
,
8
));
&mov
("
ebx
",
&DWP
(
4
,
$data
,
$Widx
,
8
));
&mov
("
ebx
",
&DWP
(
4
,
$data
,
$Widx
,
8
));
...
@@ -213,10 +212,24 @@ $func="sha512_block_sse2";
...
@@ -213,10 +212,24 @@ $func="sha512_block_sse2";
&mov
(
&DWP
(
128
+
0
,
$W512
,
$Widx
,
8
),"
ebx
");
# copy of W512[i]
&mov
(
&DWP
(
128
+
0
,
$W512
,
$Widx
,
8
),"
ebx
");
# copy of W512[i]
&mov
(
&DWP
(
128
+
4
,
$W512
,
$Widx
,
8
),"
eax
");
&mov
(
&DWP
(
128
+
4
,
$W512
,
$Widx
,
8
),"
eax
");
&align
(
8
);
&set_label
("
_1st_loop
");
# 0-15
# flip input stream byte order...
&mov
("
eax
",
&DWP
(
0
+
8
,
$data
,
$Widx
,
8
));
&mov
("
ebx
",
&DWP
(
4
+
8
,
$data
,
$Widx
,
8
));
&bswap
("
eax
");
&bswap
("
ebx
");
&mov
(
&DWP
(
0
+
8
,
$W512
,
$Widx
,
8
),"
ebx
");
# W512[i]
&mov
(
&DWP
(
4
+
8
,
$W512
,
$Widx
,
8
),"
eax
");
&mov
(
&DWP
(
128
+
0
+
8
,
$W512
,
$Widx
,
8
),"
ebx
");
# copy of W512[i]
&mov
(
&DWP
(
128
+
4
+
8
,
$W512
,
$Widx
,
8
),"
eax
");
&set_label
("
_1st_looplet
");
&SHA2_ROUND
(
$Widx
,
$Widx
);
&inc
(
$Widx
);
&SHA2_ROUND
(
$Widx
,
$Widx
);
&inc
(
$Widx
);
&cmp
(
$Widx
,
1
6
)
&cmp
(
$Widx
,
1
5
)
&jl
(
&label
("
_1st_loop
"));
&jl
(
&label
("
_1st_loop
"));
&je
(
&label
("
_1st_looplet
"));
# playing similar trick on 2nd loop
# does not improve performance...
$Kidx
=
"
ebx
";
# start using %ebx as Kidx
$Kidx
=
"
ebx
";
# start using %ebx as Kidx
&mov
(
$Kidx
,
$Widx
);
&mov
(
$Kidx
,
$Widx
);
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录