Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
OpenHarmony
Third Party Openssl
提交
c372482c
T
Third Party Openssl
项目概览
OpenHarmony
/
Third Party Openssl
接近 2 年 前同步成功
通知
12
Star
18
Fork
1
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
T
Third Party Openssl
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
c372482c
编写于
8月 18, 2009
作者:
A
Andy Polyakov
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
sha1-x86* assembler update: F_40_59 and Atom-specific optimizations.
上级
ba4526e0
变更
2
隐藏空白更改
内联
并排
Showing
2 changed file
with
189 addition
and
180 deletion
+189
-180
crypto/sha/asm/sha1-586.pl
crypto/sha/asm/sha1-586.pl
+56
-46
crypto/sha/asm/sha1-x86_64.pl
crypto/sha/asm/sha1-x86_64.pl
+133
-134
未找到文件。
crypto/sha/asm/sha1-586.pl
浏览文件 @
c372482c
...
...
@@ -12,6 +12,8 @@
# commentary below], and in 2006 the rest was rewritten in order to
# gain freedom to liberate licensing terms.
# January, September 2004.
#
# It was noted that Intel IA-32 C compiler generates code which
# performs ~30% *faster* on P4 CPU than original *hand-coded*
# SHA1 assembler implementation. To address this problem (and
...
...
@@ -31,6 +33,17 @@
# ----------------------------------------------------------------
# <appro@fy.chalmers.se>
# August 2009.
#
# George Spelvin has tipped that F_40_59(b,c,d) can be rewritten as
# '(c&d) + (b&(c^d))', which allows to accumulate partial results
# and lighten "pressure" on scratch registers. This resulted in
# >12% performance improvement on contemporary AMD cores (with no
# degradation on other CPUs:-). Also, the code was revised to maximize
# "distance" between instructions producing input to 'lea' instruction
# and the 'lea' instruction itself, which is essential for Intel Atom
# core.
$
0
=~
m/(.*[\/\\])[^\/\\]+$/
;
$dir
=
$
1
;
push
(
@INC
,"
${dir}
","
${dir}
../../perlasm
");
require
"
x86asm.pl
";
...
...
@@ -59,15 +72,16 @@ sub BODY_00_15
&rotl
(
$tmp1
,
5
);
# tmp1=ROTATE(a,5)
&xor
(
$f
,
$d
);
&add
(
$tmp1
,
$e
);
# tmp1+=e;
&and
(
$f
,
$b
);
&mov
(
$e
,
&swtmp
(
$n
%
16
));
# e becomes volatile and is loaded
&mov
(
$e
,
&swtmp
(
$n
%
16
));
# e becomes volatile and is loaded
# with xi, also note that e becomes
# f in next round...
&xor
(
$f
,
$d
);
# f holds F_00_19(b,c,d)
&and
(
$f
,
$b
);
&rotr
(
$b
,
2
);
# b=ROTATE(b,30)
&lea
(
$tmp1
,
&DWP
(
0x5a827999
,
$tmp1
,
$e
));
# tmp1+=K_00_19+xi
&xor
(
$f
,
$d
);
# f holds F_00_19(b,c,d)
&lea
(
$tmp1
,
&DWP
(
0x5a827999
,
$tmp1
,
$e
));
# tmp1+=K_00_19+xi
if
(
$n
==
15
)
{
&add
(
$f
,
$tmp1
);
}
# f+=tmp1
if
(
$n
==
15
)
{
&mov
(
$e
,
&swtmp
((
$n
+
1
)
%
16
));
# pre-fetch f for next round
&add
(
$f
,
$tmp1
);
}
# f+=tmp1
else
{
&add
(
$tmp1
,
$f
);
}
# f becomes a in next round
}
...
...
@@ -77,22 +91,22 @@ sub BODY_16_19
&comment
("
16_19
$n
");
&mov
(
$f
,
&swtmp
(
$n
%
16
));
# f to hold Xupdate(xi,xa,xb,xc,xd)
&mov
(
$tmp1
,
$c
);
# tmp1 to hold F_00_19(b,c,d)
&xor
(
$f
,
&swtmp
((
$n
+
2
)
%
16
));
&xor
(
$tmp1
,
$d
);
&xor
(
$f
,
&swtmp
((
$n
+
8
)
%
16
));
&and
(
$tmp1
,
$b
);
# tmp1 holds F_00_19(b,c,d)
&rotr
(
$b
,
2
);
# b=ROTATE(b,30)
&mov
(
$tmp1
,
$c
);
# tmp1 to hold F_00_19(b,c,d)
&xor
(
$f
,
&swtmp
((
$n
+
2
)
%
16
));
# f to hold Xupdate(xi,xa,xb,xc,xd)
&xor
(
$tmp1
,
$d
);
&xor
(
$f
,
&swtmp
((
$n
+
8
)
%
16
));
&and
(
$tmp1
,
$b
);
&xor
(
$f
,
&swtmp
((
$n
+
13
)
%
16
));
# f holds xa^xb^xc^xd
&rotl
(
$f
,
1
);
# f=ROTATE(f,1)
&xor
(
$tmp1
,
$d
);
# tmp1=F_00_19(b,c,d)
&mov
(
&swtmp
(
$n
%
16
),
$f
);
# xi=f
&lea
(
$f
,
&DWP
(
0x5a827999
,
$f
,
$e
));
# f+=K_00_19+e
&mov
(
$e
,
$a
);
# e becomes volatile
&rotl
(
$e
,
5
);
# e=ROTATE(a,5)
&add
(
$f
,
$tmp1
);
# f+=F_00_19(b,c,d)
&add
(
$f
,
$e
);
# f+=ROTATE(a,5)
&add
(
$e
,
$tmp1
);
# e+=F_00_19(b,c,d)
&mov
(
$tmp1
,
$a
);
&rotr
(
$b
,
2
);
# b=ROTATE(b,30)
&mov
(
&swtmp
(
$n
%
16
),
$f
);
# xi=f
&rotl
(
$tmp1
,
5
);
# ROTATE(a,5)
&lea
(
$f
,
&DWP
(
0x5a827999
,
$f
,
$e
));
# f+=F_00_19(b,c,d)+e
&mov
(
$e
,
&swtmp
((
$n
+
1
)
%
16
));
# pre-fetch f for next round
&add
(
$f
,
$tmp1
);
# f+=ROTATE(a,5)
}
sub
BODY_20_39
...
...
@@ -103,20 +117,20 @@ sub BODY_20_39
&comment
("
20_39
$n
");
&mov
(
$tmp1
,
$b
);
# tmp1 to hold F_20_39(b,c,d)
&mov
(
$f
,
&swtmp
(
$n
%
16
));
# f to hold Xupdate(xi,xa,xb,xc,xd)
&rotr
(
$b
,
2
);
# b=ROTATE(b,30)
&xor
(
$f
,
&swtmp
((
$n
+
2
)
%
16
));
&xor
(
$f
,
&swtmp
((
$n
+
2
)
%
16
));
# f to hold Xupdate(xi,xa,xb,xc,xd)
&xor
(
$tmp1
,
$c
);
&xor
(
$f
,
&swtmp
((
$n
+
8
)
%
16
));
&xor
(
$tmp1
,
$d
);
# tmp1 holds F_20_39(b,c,d)
&xor
(
$f
,
&swtmp
((
$n
+
13
)
%
16
));
# f holds xa^xb^xc^xd
&rotl
(
$f
,
1
);
# f=ROTATE(f,1)
&add
(
$tmp1
,
$e
);
&mov
(
&swtmp
(
$n
%
16
),
$f
);
# xi=f
&mov
(
$e
,
$a
);
# e becomes volatile
&rotl
(
$e
,
5
);
# e=ROTATE(a,5)
&lea
(
$f
,
&DWP
(
$K
,
$f
,
$tmp1
));
# f+=K_20_39+e
&add
(
$f
,
$e
);
# f+=ROTATE(a,5)
&add
(
$e
,
$tmp1
);
# e+=F_20_39(b,c,d)
&rotr
(
$b
,
2
);
# b=ROTATE(b,30)
&mov
(
$tmp1
,
$a
);
&rotl
(
$tmp1
,
5
);
# ROTATE(a,5)
&mov
(
&swtmp
(
$n
%
16
),
$f
)
if
(
$n
<
77
);
# xi=f
&lea
(
$f
,
&DWP
(
$K
,
$f
,
$e
));
# f+=e+K_XX_YY
&mov
(
$e
,
&swtmp
((
$n
+
1
)
%
16
))
if
(
$n
<
79
);
# pre-fetch f for next round
&add
(
$f
,
$tmp1
);
# f+=ROTATE(a,5)
}
sub
BODY_40_59
...
...
@@ -125,28 +139,24 @@ sub BODY_40_59
&comment
("
40_59
$n
");
&mov
(
$f
,
&swtmp
(
$n
%
16
));
# f to hold Xupdate(xi,xa,xb,xc,xd)
&mov
(
$tmp1
,
&swtmp
((
$n
+
2
)
%
16
));
&xor
(
$f
,
$tmp1
);
&mov
(
$tmp1
,
&swtmp
((
$n
+
8
)
%
16
));
&xor
(
$f
,
$tmp1
);
&mov
(
$tmp1
,
&swtmp
((
$n
+
13
)
%
16
));
&xor
(
$f
,
$tmp1
);
# f holds xa^xb^xc^xd
&mov
(
$tmp1
,
$b
);
# tmp1 to hold F_40_59(b,c,d)
&mov
(
$tmp1
,
$c
);
# tmp1 to hold F_40_59(b,c,d)
&xor
(
$f
,
&swtmp
((
$n
+
2
)
%
16
));
# f to hold Xupdate(xi,xa,xb,xc,xd)
&xor
(
$tmp1
,
$d
);
&xor
(
$f
,
&swtmp
((
$n
+
8
)
%
16
));
&and
(
$tmp1
,
$b
);
&xor
(
$f
,
&swtmp
((
$n
+
13
)
%
16
));
# f holds xa^xb^xc^xd
&rotl
(
$f
,
1
);
# f=ROTATE(f,1)
&or
(
$tmp1
,
$c
);
&mov
(
&swtmp
(
$n
%
16
),
$f
);
# xi=f
&and
(
$tmp1
,
$d
);
&lea
(
$f
,
&DWP
(
0x8f1bbcdc
,
$f
,
$e
));
# f+=K_40_59+e
&mov
(
$e
,
$b
);
# e becomes volatile and is used
# to calculate F_40_59(b,c,d)
&add
(
$tmp1
,
$e
);
# b&(c^d)+=e
&rotr
(
$b
,
2
);
# b=ROTATE(b,30)
&
and
(
$e
,
$c
);
&
or
(
$tmp1
,
$e
);
# tmp1 holds F_40_59(b,c,d)
&mov
(
$e
,
$a
);
&
rotl
(
$e
,
5
);
# e=ROTATE(a,5
)
&
add
(
$f
,
$tmp1
);
# f+=tmp1
;
&
mov
(
$e
,
$a
);
# e becomes volatile
&
rotl
(
$e
,
5
);
# ROTATE(a,5)
&mov
(
&swtmp
(
$n
%
16
),
$f
);
# xi=f
&
lea
(
$f
,
&DWP
(
0x8f1bbcdc
,
$f
,
$tmp1
));
# f+=K_40_59+e+(b&(c^d)
)
&
mov
(
$tmp1
,
$c
)
;
&add
(
$f
,
$e
);
# f+=ROTATE(a,5)
&and
(
$tmp1
,
$d
);
&mov
(
$e
,
&swtmp
((
$n
+
1
)
%
16
));
# pre-fetch f for next round
&add
(
$f
,
$tmp1
);
# f+=c&d
}
&function_begin
("
sha1_block_data_order
");
...
...
crypto/sha/asm/sha1-x86_64.pl
浏览文件 @
c372482c
...
...
@@ -16,7 +16,7 @@
# There was suggestion to mechanically translate 32-bit code, but I
# dismissed it, reasoning that x86_64 offers enough register bank
# capacity to fully utilize SHA-1 parallelism. Therefore this fresh
# implementation:-) However! While 64-bit code does perform
s
better
# implementation:-) However! While 64-bit code does perform better
# on Opteron, I failed to beat 32-bit assembler on EM64T core. Well,
# x86_64 does offer larger *addressable* bank, but out-of-order core
# reaches for even more registers through dynamic aliasing, and EM64T
...
...
@@ -29,6 +29,13 @@
# Xeon P4 +65% +0% 9.9
# Core2 +60% +10% 7.0
# August 2009.
#
# The code was revised to minimize code size and to maximize
# "distance" between instructions producing input to 'lea'
# instruction and the 'lea' instruction itself, which is essential
# for Intel Atom core.
$flavour
=
shift
;
$output
=
shift
;
if
(
$flavour
=~
/\./
)
{
$output
=
$flavour
;
undef
$flavour
;
}
...
...
@@ -51,194 +58,184 @@ $ctx="%r8";
$inp
=
"
%r9
";
$num
=
"
%r10
";
$
xi
=
"
%eax
";
$t
0
=
"
%ebx
";
$t
1
=
"
%ecx
";
$A
=
"
%edx
"
;
$
B
=
"
%esi
";
$
C
=
"
%edi
";
$
D
=
"
%ebp
";
$
E
=
"
%r11
d
";
$
T
=
"
%r12
d
";
$
t0
=
"
%eax
";
$t
1
=
"
%ebx
";
$t
2
=
"
%ecx
";
@xi
=
("
%edx
","
%ebp
")
;
$
A
=
"
%esi
";
$
B
=
"
%edi
";
$
C
=
"
%r11d
";
$
D
=
"
%r12
d
";
$
E
=
"
%r13
d
";
@V
=
(
$A
,
$B
,
$C
,
$D
,
$E
,
$T
);
sub
PROLOGUE
{
my
$func
=
shift
;
$code
.=
<<___;
.globl $func
.type $func,\@function,3
.align 16
$func:
push %rbx
push %rbp
push %r12
mov %rsp,%r11
mov %rdi,$ctx # reassigned argument
sub \$`8+16*4`,%rsp
mov %rsi,$inp # reassigned argument
and \$-64,%rsp
mov %rdx,$num # reassigned argument
mov %r11,`16*4`(%rsp)
.Lprologue:
mov 0($ctx),$A
mov 4($ctx),$B
mov 8($ctx),$C
mov 12($ctx),$D
mov 16($ctx),$E
___
}
sub
EPILOGUE
{
my
$func
=
shift
;
$code
.=
<<___;
mov `16*4`(%rsp),%rsi
mov (%rsi),%r12
mov 8(%rsi),%rbp
mov 16(%rsi),%rbx
lea 24(%rsi),%rsp
.Lepilogue:
ret
.size $func,.-$func
___
}
@V
=
(
$A
,
$B
,
$C
,
$D
,
$E
);
sub
BODY_00_19
{
my
(
$i
,
$a
,
$b
,
$c
,
$d
,
$e
,
$f
,
$host
)
=
@_
;
my
(
$i
,
$a
,
$b
,
$c
,
$d
,
$e
)
=
@_
;
my
$j
=
$i
+
1
;
$code
.=<<
___
if
(
$i
==
0
);
mov
`
4*
$i
`(
$inp
),
$xi
`
"bswap
$xi
" if(!defined(
$host
))
`
mov
$xi
,`
4*
$i
`(
%rsp
)
mov
`
4*
$i
`(
$inp
),
$xi
[
0
]
bswap
$xi
[
0
]
mov
$xi
[
0
]
,`
4*
$i
`(
%rsp
)
___
$code
.=<<
___
if
(
$i
<
15
);
lea
0x5a827999
(
$xi
,
$e
),
$f
mov
$c
,
$t0
mov
`
4*
$j
`(
$inp
),
$xi
mov
$a
,
$
e
mov
`
4*
$j
`(
$inp
),
$xi
[
1
]
mov
$a
,
$
t2
xor
$d
,
$t0
`
"bswap
$xi
" if(!defined(
$host
))
`
rol
\
$
5
,
$e
bswap
$xi
[
1
]
rol
\
$
5
,
$t2
lea
0x5a827999
(
$xi
[
0
],
$e
),
$e
and
$b
,
$t0
mov
$xi
,`
4*
$j
`(
%rsp
)
add
$
e
,
$f
mov
$xi
[
1
]
,`
4*
$j
`(
%rsp
)
add
$
t2
,
$e
xor
$d
,
$t0
rol
\
$
30
,
$b
add
$t0
,
$
f
add
$t0
,
$
e
___
$code
.=<<
___
if
(
$i
>=
15
);
lea
0x5a827999
(
$xi
,
$e
),
$f
mov
`
4*(
$j
%16)
`(
%rsp
),
$xi
mov
`
4*(
$j
%16)
`(
%rsp
),
$xi
[
1
]
mov
$c
,
$t0
mov
$a
,
$
e
xor
`
4*((
$j
+2)%16)
`(
%rsp
),
$xi
mov
$a
,
$
t2
xor
`
4*((
$j
+2)%16)
`(
%rsp
),
$xi
[
1
]
xor
$d
,
$t0
rol
\
$
5
,
$
e
xor
`
4*((
$j
+8)%16)
`(
%rsp
),
$xi
rol
\
$
5
,
$
t2
xor
`
4*((
$j
+8)%16)
`(
%rsp
),
$xi
[
1
]
and
$b
,
$t0
add
$e
,
$f
xor
`
4*((
$j
+13)%16)
`(
%rsp
),
$xi
lea
0x5a827999
(
$xi
[
0
],
$e
),
$e
xor
`
4*((
$j
+13)%16)
`(
%rsp
),
$xi
[
1
]
xor
$d
,
$t0
rol
\
$
1
,
$xi
[
1
]
add
$t2
,
$e
rol
\
$
30
,
$b
add
$t0
,
$f
rol
\
$
1
,
$xi
mov
$xi
,`
4*(
$j
%16)
`(
%rsp
)
mov
$xi
[
1
],`
4*(
$j
%16)
`(
%rsp
)
add
$t0
,
$e
___
unshift
(
@xi
,
pop
(
@xi
));
}
sub
BODY_20_39
{
my
(
$i
,
$a
,
$b
,
$c
,
$d
,
$e
,
$f
)
=
@_
;
my
(
$i
,
$a
,
$b
,
$c
,
$d
,
$e
)
=
@_
;
my
$j
=
$i
+
1
;
my
$K
=
(
$i
<
40
)?
0x6ed9eba1
:
0xca62c1d6
;
$code
.=<<
___
if
(
$i
<
79
);
lea
$K
(
$xi
,
$e
),
$f
mov
`
4*(
$j
%16)
`(
%rsp
),
$xi
mov
`
4*(
$j
%16)
`(
%rsp
),
$xi
[
1
]
mov
$c
,
$t0
mov
$a
,
$
e
xor
`
4*((
$j
+2)%16)
`(
%rsp
),
$xi
mov
$a
,
$
t2
xor
`
4*((
$j
+2)%16)
`(
%rsp
),
$xi
[
1
]
xor
$b
,
$t0
rol
\
$
5
,
$e
xor
`
4*((
$j
+8)%16)
`(
%rsp
),
$xi
rol
\
$
5
,
$t2
lea
$K
(
$xi
[
0
],
$e
),
$e
xor
`
4*((
$j
+8)%16)
`(
%rsp
),
$xi
[
1
]
xor
$d
,
$t0
add
$
e
,
$f
xor
`
4*((
$j
+13)%16)
`(
%rsp
),
$xi
add
$
t2
,
$e
xor
`
4*((
$j
+13)%16)
`(
%rsp
),
$xi
[
1
]
rol
\
$
30
,
$b
add
$t0
,
$
f
rol
\
$
1
,
$xi
add
$t0
,
$
e
rol
\
$
1
,
$xi
[
1
]
___
$code
.=<<
___
if
(
$i
<
76
);
mov
$xi
,`
4*(
$j
%16)
`(
%rsp
)
mov
$xi
[
1
]
,`
4*(
$j
%16)
`(
%rsp
)
___
$code
.=<<
___
if
(
$i
==
79
);
lea
$K
(
$xi
,
$e
),
$f
mov
$c
,
$t0
mov
$a
,
$
e
mov
$a
,
$
t2
xor
$b
,
$t0
rol
\
$
5
,
$e
lea
$K
(
$xi
[
0
],
$e
),
$e
rol
\
$
5
,
$t2
xor
$d
,
$t0
add
$
e
,
$f
add
$
t2
,
$e
rol
\
$
30
,
$b
add
$t0
,
$
f
add
$t0
,
$
e
___
unshift
(
@xi
,
pop
(
@xi
));
}
sub
BODY_40_59
{
my
(
$i
,
$a
,
$b
,
$c
,
$d
,
$e
,
$f
)
=
@_
;
my
(
$i
,
$a
,
$b
,
$c
,
$d
,
$e
)
=
@_
;
my
$j
=
$i
+
1
;
$code
.=
<<___;
lea 0x8f1bbcdc($xi,$e),$f
mov
`4*($j%16)`(%rsp),$xi
mov $
b,$t0
mov $b,$t1
xor `4*(($j+2)%16)`(%rsp),$xi
mov $a,$
e
and $c,$t0
xor
`4*(($j+8)%16)`(%rsp),$xi
or $c,$t1
rol \$5,$
e
xor `4*(($j+13)%16)`(%rsp),$xi
a
nd $d,$t1
a
dd $e,$f
rol \$1,$xi
or $t1,$t0
mov `4*($j%16)`(%rsp),$xi[1]
mov
$c,$t0
mov $
c,$t1
xor `4*(($j+2)%16)`(%rsp),$xi[1]
and $d,$t0
mov $a,$
t2
xor `4*(($j+8)%16)`(%rsp),$xi[1]
xor
$d,$t1
lea 0x8f1bbcdc($xi[0],$e),$e
rol \$5,$
t2
xor `4*(($j+13)%16)`(%rsp),$xi
[1]
a
dd $t0,$e
a
nd $b,$t1
rol \$1,$xi
[1]
add $t1,$e
rol \$30,$b
mov $xi,`4*($j%16)`(%rsp)
add $t
0,$f
mov $xi
[1]
,`4*($j%16)`(%rsp)
add $t
2,$e
___
unshift
(
@xi
,
pop
(
@xi
));
}
$code
=
"
.text
\n
";
$code
.=
<<___;
.text
&PROLOGUE
("
sha1_block_data_order
");
$code
.=
"
.align 4
\n
.Lloop:
\n
";
.globl sha1_block_data_order
.type sha1_block_data_order,\@function,3
.align 16
sha1_block_data_order:
push %rbx
push %rbp
push %r12
push %r13
mov %rsp,%r11
mov %rdi,$ctx # reassigned argument
sub \$`8+16*4`,%rsp
mov %rsi,$inp # reassigned argument
and \$-64,%rsp
mov %rdx,$num # reassigned argument
mov %r11,`16*4`(%rsp)
.Lprologue:
mov 0($ctx),$A
mov 4($ctx),$B
mov 8($ctx),$C
mov 12($ctx),$D
mov 16($ctx),$E
.align 4
.Lloop:
___
for
(
$i
=
0
;
$i
<
20
;
$i
++
)
{
&BODY_00_19
(
$i
,
@V
);
unshift
(
@V
,
pop
(
@V
));
}
for
(;
$i
<
40
;
$i
++
)
{
&BODY_20_39
(
$i
,
@V
);
unshift
(
@V
,
pop
(
@V
));
}
for
(;
$i
<
60
;
$i
++
)
{
&BODY_40_59
(
$i
,
@V
);
unshift
(
@V
,
pop
(
@V
));
}
for
(;
$i
<
80
;
$i
++
)
{
&BODY_20_39
(
$i
,
@V
);
unshift
(
@V
,
pop
(
@V
));
}
$code
.=
<<___;
add 0($ctx),$E
add 4($ctx),$T
add 8($ctx),$A
add 12($ctx),$B
add 16($ctx),$C
mov $E,0($ctx)
mov $T,4($ctx)
mov $A,8($ctx)
mov $B,12($ctx)
mov $C,16($ctx)
xchg $E,$A # mov $E,$A
xchg $T,$B # mov $T,$B
xchg $E,$C # mov $A,$C
xchg $T,$D # mov $B,$D
# mov $C,$E
lea `16*4`($inp),$inp
add 0($ctx),$A
add 4($ctx),$B
add 8($ctx),$C
add 12($ctx),$D
add 16($ctx),$E
mov $A,0($ctx)
mov $B,4($ctx)
mov $C,8($ctx)
mov $D,12($ctx)
mov $E,16($ctx)
sub \$1,$num
lea `16*4`($inp),$inp
jnz .Lloop
___
&EPILOGUE
("
sha1_block_data_order
");
$code
.=
<<___;
mov `16*4`(%rsp),%rsi
mov (%rsi),%r13
mov 8(%rsi),%r12
mov 16(%rsi),%rbp
mov 24(%rsi),%rbx
lea 32(%rsi),%rsp
.Lepilogue:
ret
.size sha1_block_data_order,.-sha1_block_data_order
.asciz "SHA1 block transform for x86_64, CRYPTOGAMS by <appro\@openssl.org>"
.align 16
___
...
...
@@ -281,14 +278,16 @@ se_handler:
jae .Lin_prologue
mov `16*4`(%rax),%rax # pull saved stack pointer
lea
24
(%rax),%rax
lea
32
(%rax),%rax
mov -8(%rax),%rbx
mov -16(%rax),%rbp
mov -24(%rax),%r12
mov -32(%rax),%r13
mov %rbx,144($context) # restore context->Rbx
mov %rbp,160($context) # restore context->Rbp
mov %r12,216($context) # restore context->R12
mov %r13,224($context) # restore context->R13
.Lin_prologue:
mov 8(%rax),%rdi
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录