Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
OpenHarmony
Third Party Openssl
提交
34736de4
T
Third Party Openssl
项目概览
OpenHarmony
/
Third Party Openssl
大约 1 年 前同步成功
通知
9
Star
18
Fork
1
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
T
Third Party Openssl
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
34736de4
编写于
10月 14, 2005
作者:
A
Andy Polyakov
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Flip saved argument block and tp [required for non-SSE2 path].
上级
5f50d597
变更
1
隐藏空白更改
内联
并排
Showing
1 changed file
with
26 addition
and
26 deletion
+26
-26
crypto/bn/asm/x86-mont.pl
crypto/bn/asm/x86-mont.pl
+26
-26
未找到文件。
crypto/bn/asm/x86-mont.pl
浏览文件 @
34736de4
...
@@ -37,16 +37,15 @@ $ap="esi";
...
@@ -37,16 +37,15 @@ $ap="esi";
$rp
=
"
edi
";
$bp
=
"
edi
";
# overlapping variables!!!
$rp
=
"
edi
";
$bp
=
"
edi
";
# overlapping variables!!!
$np
=
"
edx
";
$np
=
"
edx
";
$num
=
"
ebp
";
$num
=
"
ebp
";
$tp
=
"
esp
";
$
bias
=
2
;
# amount of extra words in tp
$
_rp
=
&DWP
(
4
*
0
,"
esp
");
# stack top layout
# (rounded up to even value)
$_ap
=
&DWP
(
4
*
1
,"
esp
");
$_
rp
=
&DWP
(
4
*
(
$bias
+
0
),"
esp
",
$num
,
4
);
# stack frame layout below tp
$_
bp
=
&DWP
(
4
*
2
,"
esp
");
$_
ap
=
&DWP
(
4
*
(
$bias
+
1
),"
esp
",
$num
,
4
);
$_
np
=
&DWP
(
4
*
3
,"
esp
"
);
$_
bp
=
&DWP
(
4
*
(
$bias
+
2
),"
esp
",
$num
,
4
);
$_
n0
=
&DWP
(
4
*
4
,"
esp
"
);
$_n
p
=
&DWP
(
4
*
(
$bias
+
3
),"
esp
",
$num
,
4
);
$_n
um
=
&DWP
(
4
*
5
,"
esp
"
);
$_
n0
=
&DWP
(
4
*
(
$bias
+
4
),"
esp
",
$num
,
4
);
$_
sp
=
&DWP
(
4
*
6
,"
esp
"
);
$
_sp
=
&DWP
(
4
*
(
$bias
+
5
),"
esp
",
$num
,
4
);
$
frame
=
32
;
# size of above frame rounded up to 16n
$acc0
=
"
mm0
";
# mmx register bank layout
$acc0
=
"
mm0
";
# mmx register bank layout
$acc1
=
"
mm1
";
$acc1
=
"
mm1
";
...
@@ -71,12 +70,12 @@ if($sse2) {
...
@@ -71,12 +70,12 @@ if($sse2) {
&mov
(
$num
,
&wparam
(
5
));
# int num
&mov
(
$num
,
&wparam
(
5
));
# int num
&mov
("
edi
","
esp
");
# saved stack pointer!
&mov
("
edi
","
esp
");
# saved stack pointer!
&add
(
$num
,
$bias
+
6
);
&add
(
$num
,
1
);
# extra word on top of tp
&neg
(
$num
);
&neg
(
$num
);
&lea
("
esp
",
&DWP
(
0
,"
esp
",
$num
,
4
));
# alloca(4*(num+$bias+6
))
&lea
("
esp
",
&DWP
(
-
$frame
,"
esp
",
$num
,
4
));
# alloca($frame+8*($num+1
))
&neg
(
$num
);
&neg
(
$num
);
&and
("
esp
",
-
1024
);
# minimize TLB utilization
&and
("
esp
",
-
1024
);
# minimize TLB utilization
&sub
(
$num
,
$bias
+
6
);
# num is restored to its original value
&sub
(
$num
,
1
);
# num is restored to its original value
# and will remain constant from now...
# and will remain constant from now...
&mov
(
$_rp
,"
eax
");
# ... save a copy of argument block
&mov
(
$_rp
,"
eax
");
# ... save a copy of argument block
...
@@ -84,6 +83,7 @@ if($sse2) {
...
@@ -84,6 +83,7 @@ if($sse2) {
&mov
(
$_bp
,"
ecx
");
&mov
(
$_bp
,"
ecx
");
&mov
(
$_np
,"
edx
");
&mov
(
$_np
,"
edx
");
&mov
(
$_n0
,"
esi
");
&mov
(
$_n0
,"
esi
");
#&mov ($_num,$num); # redundant in sse2 context
&mov
(
$_sp
,"
edi
");
# saved stack pointer!
&mov
(
$_sp
,"
edi
");
# saved stack pointer!
&mov
("
eax
",
-
1
);
&mov
("
eax
",
-
1
);
...
@@ -126,7 +126,7 @@ if($sse2) {
...
@@ -126,7 +126,7 @@ if($sse2) {
&paddq
(
$car1
,
$acc1
);
# +=c1
&paddq
(
$car1
,
$acc1
);
# +=c1
&paddq
(
$car1
,
$acc0
);
# +=ap[j]*bp[0];
&paddq
(
$car1
,
$acc0
);
# +=ap[j]*bp[0];
&movd
(
&DWP
(
-
4
,"
esp
",
$j
,
4
),
$car1
);
# tp[j-1]=
&movd
(
&DWP
(
$frame
-
4
,"
esp
",
$j
,
4
),
$car1
);
# tp[j-1]=
&psrlq
(
$car0
,
32
);
&psrlq
(
$car0
,
32
);
&psrlq
(
$car1
,
32
);
&psrlq
(
$car1
,
32
);
...
@@ -136,7 +136,7 @@ if($sse2) {
...
@@ -136,7 +136,7 @@ if($sse2) {
&jl
(
&label
("
1st
"));
&jl
(
&label
("
1st
"));
&paddq
(
$car1
,
$car0
);
&paddq
(
$car1
,
$car0
);
&movq
(
&DWP
(
-
4
,"
esp
",
$num
,
4
),
$car1
);
&movq
(
&DWP
(
$frame
-
4
,"
esp
",
$num
,
4
),
$car1
);
&inc
(
$i
);
# i++
&inc
(
$i
);
# i++
&set_label
("
outer
");
&set_label
("
outer
");
...
@@ -144,8 +144,8 @@ if($sse2) {
...
@@ -144,8 +144,8 @@ if($sse2) {
&movd
(
$mul0
,
&DWP
(
0
,
$bp
,
$i
,
4
));
# bp[i]
&movd
(
$mul0
,
&DWP
(
0
,
$bp
,
$i
,
4
));
# bp[i]
&movd
(
$mul1
,
&DWP
(
0
,
$ap
));
# ap[0]
&movd
(
$mul1
,
&DWP
(
0
,
$ap
));
# ap[0]
&movd
(
$temp
,
&DWP
(
0
,"
esp
"));
# tp[0]
&movd
(
$temp
,
&DWP
(
$frame
,"
esp
"));
# tp[0]
&movd
(
$car1
,
&DWP
(
0
,
$np
,
$j
,
4
));
# np[0]
&movd
(
$car1
,
&DWP
(
0
,
$np
));
# np[0]
&pmuludq
(
$mul1
,
$mul0
);
# ap[0]*bp[i]
&pmuludq
(
$mul1
,
$mul0
);
# ap[0]*bp[i]
&paddq
(
$mul1
,
$temp
);
# +=tp[0]
&paddq
(
$mul1
,
$temp
);
# +=tp[0]
...
@@ -165,7 +165,7 @@ if($sse2) {
...
@@ -165,7 +165,7 @@ if($sse2) {
&set_label
("
inner
");
&set_label
("
inner
");
&movd
(
$acc0
,
&DWP
(
0
,
$ap
,
$j
,
4
));
# ap[j]
&movd
(
$acc0
,
&DWP
(
0
,
$ap
,
$j
,
4
));
# ap[j]
&movd
(
$acc1
,
&DWP
(
0
,
$np
,
$j
,
4
));
# np[j]
&movd
(
$acc1
,
&DWP
(
0
,
$np
,
$j
,
4
));
# np[j]
&movd
(
$temp
,
&DWP
(
0
,"
esp
",
$j
,
4
));
# tp[j]
&movd
(
$temp
,
&DWP
(
$frame
,"
esp
",
$j
,
4
));
# tp[j]
&pmuludq
(
$acc0
,
$mul0
);
# ap[j]*bp[i]
&pmuludq
(
$acc0
,
$mul0
);
# ap[j]*bp[i]
&pmuludq
(
$acc1
,
$mul1
);
# np[j]*m1
&pmuludq
(
$acc1
,
$mul1
);
# np[j]*m1
&paddq
(
$car0
,
$temp
);
# +=tp[j]
&paddq
(
$car0
,
$temp
);
# +=tp[j]
...
@@ -175,7 +175,7 @@ if($sse2) {
...
@@ -175,7 +175,7 @@ if($sse2) {
&paddq
(
$car1
,
$acc1
);
# +=c1
&paddq
(
$car1
,
$acc1
);
# +=c1
&paddq
(
$car1
,
$acc0
);
# +=ap[j]*bp[i]+tp[j]
&paddq
(
$car1
,
$acc0
);
# +=ap[j]*bp[i]+tp[j]
&movd
(
&DWP
(
-
4
,"
esp
",
$j
,
4
),
$car1
);
# tp[j-1]
&movd
(
&DWP
(
$frame
-
4
,"
esp
",
$j
,
4
),
$car1
);
# tp[j-1]=
&psrlq
(
$car0
,
32
);
&psrlq
(
$car0
,
32
);
&psrlq
(
$car1
,
32
);
&psrlq
(
$car1
,
32
);
...
@@ -184,10 +184,10 @@ if($sse2) {
...
@@ -184,10 +184,10 @@ if($sse2) {
&cmp
(
$j
,
$num
);
&cmp
(
$j
,
$num
);
&jl
(
&label
("
inner
"));
&jl
(
&label
("
inner
"));
&movd
(
$temp
,
&DWP
(
0
,"
esp
",
$num
,
4
));
&movd
(
$temp
,
&DWP
(
$frame
,"
esp
",
$num
,
4
));
&paddq
(
$car1
,
$car0
);
&paddq
(
$car1
,
$car0
);
&paddq
(
$car1
,
$temp
);
&paddq
(
$car1
,
$temp
);
&movq
(
&DWP
(
-
4
,"
esp
",
$num
,
4
),
$car1
);
&movq
(
&DWP
(
$frame
-
4
,"
esp
",
$num
,
4
),
$car1
);
&lea
(
$i
,
&DWP
(
1
,
$i
));
# i++
&lea
(
$i
,
&DWP
(
1
,
$i
));
# i++
&cmp
(
$i
,
$num
);
&cmp
(
$i
,
$num
);
...
@@ -195,26 +195,26 @@ if($sse2) {
...
@@ -195,26 +195,26 @@ if($sse2) {
&emms
();
# done with mmx bank
&emms
();
# done with mmx bank
&mov
("
esi
",
&DWP
(
0
,"
esp
",
$num
,
4
));
# load upmost overflow bit
&mov
("
esi
",
&DWP
(
$frame
,"
esp
",
$num
,
4
));
# load upmost overflow bit
&mov
(
$rp
,
$_rp
);
# load result pointer
&mov
(
$rp
,
$_rp
);
# load result pointer
# [$ap and $bp are zapped]
# [$ap and $bp are zapped]
&xor
(
$i
,
$i
);
# i=0
&xor
(
$i
,
$i
);
# i=0
&lea
(
$j
,
&DWP
(
-
1
,
$num
));
# j=num-1
&lea
(
$j
,
&DWP
(
-
1
,
$num
));
# j=num-1
&cmp
("
esi
",
0
);
# clears CF unconditionally
&cmp
("
esi
",
0
);
# clears CF unconditionally
&jnz
(
&label
("
sub
"));
&jnz
(
&label
("
sub
"));
&mov
("
eax
",
&DWP
(
0
,"
esp
",
$j
,
4
));
&mov
("
eax
",
&DWP
(
$frame
,"
esp
",
$j
,
4
));
&cmp
("
eax
",
&DWP
(
0
,
$np
,
$j
,
4
));
# tp[num-1]-np[num-1]?
&cmp
("
eax
",
&DWP
(
0
,
$np
,
$j
,
4
));
# tp[num-1]-np[num-1]?
&jae
(
&label
("
sub
"));
# if taken CF is cleared
&jae
(
&label
("
sub
"));
# if taken CF is cleared
&set_label
("
copy
");
&set_label
("
copy
");
&mov
("
eax
",
&DWP
(
0
,"
esp
",
$j
,
4
));
&mov
("
eax
",
&DWP
(
$frame
,"
esp
",
$j
,
4
));
&mov
(
&DWP
(
0
,
$rp
,
$j
,
4
),"
eax
");
# rp[i]=tp[i]
&mov
(
&DWP
(
0
,
$rp
,
$j
,
4
),"
eax
");
# rp[i]=tp[i]
&mov
(
&DWP
(
0
,"
esp
",
$j
,
4
),
$j
);
# zap temporary vector
&mov
(
&DWP
(
$frame
,"
esp
",
$j
,
4
),
$j
);
# zap temporary vector
&dec
(
$j
);
&dec
(
$j
);
&jge
(
&label
("
copy
"));
&jge
(
&label
("
copy
"));
&jmp
(
&label
("
exit_sse2
"));
&jmp
(
&label
("
exit_sse2
"));
&set_label
("
sub
",
4
);
&set_label
("
sub
",
4
);
&mov
("
eax
",
&DWP
(
0
,"
esp
",
$i
,
4
));
&mov
("
eax
",
&DWP
(
$frame
,"
esp
",
$i
,
4
));
&sbb
("
eax
",
&DWP
(
0
,
$np
,
$i
,
4
));
&sbb
("
eax
",
&DWP
(
0
,
$np
,
$i
,
4
));
&mov
(
&DWP
(
0
,
$rp
,
$i
,
4
),"
eax
");
# rp[i]=tp[i]-np[i]
&mov
(
&DWP
(
0
,
$rp
,
$i
,
4
),"
eax
");
# rp[i]=tp[i]-np[i]
&lea
(
$i
,
&DWP
(
1
,
$i
));
# i++
&lea
(
$i
,
&DWP
(
1
,
$i
));
# i++
...
@@ -224,7 +224,7 @@ if($sse2) {
...
@@ -224,7 +224,7 @@ if($sse2) {
&sbb
("
esi
",
0
);
# esi holds upmost overflow bit
&sbb
("
esi
",
0
);
# esi holds upmost overflow bit
&jc
(
&label
("
copy
"));
&jc
(
&label
("
copy
"));
&set_label
("
zap
");
&set_label
("
zap
");
&mov
(
&DWP
(
0
,"
esp
",
$j
,
4
),
$i
);
# zap temporary vector
&mov
(
&DWP
(
$frame
,"
esp
",
$j
,
4
),
$i
);
# zap temporary vector
&dec
(
$j
);
&dec
(
$j
);
&jge
(
&label
("
zap
"));
&jge
(
&label
("
zap
"));
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录