Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
btwise
openssl
提交
30cb9ec7
O
openssl
项目概览
btwise
/
openssl
通知
1
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
O
openssl
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
30cb9ec7
编写于
1月 21, 2004
作者:
A
Andy Polyakov
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
SHA-1 assembler tune-up for Intel P4
上级
af6dab9b
变更
1
隐藏空白更改
内联
并排
Showing
1 changed file
with
88 addition
and
203 deletion
+88
-203
crypto/sha/asm/sha1-586.pl
crypto/sha/asm/sha1-586.pl
+88
-203
未找到文件。
crypto/sha/asm/sha1-586.pl
浏览文件 @
30cb9ec7
#!/usr/local/bin/perl
#!/usr/local/bin/perl
# It was noted that Intel IA-32 C compiler generates code which
# performs ~30% *faster* on P4 CPU than original *hand-coded*
# SHA1 assembler implementation. To address this problem (and
# prove that humans are still better than machines:-), the
# original code was overhauled, which resulted in following
# performance changes:
#
# compared with original compared with Intel cc
# assembler impl. generated code
# Pentium -25% +37%
# PIII/AMD +8% +16%
# P4 +85%(!) +45%
#
# As you can see Pentium came out as looser:-( Yet I reckoned that
# improvement on P4 outweights the loss and incorporate this
# re-tuned code to 0.9.7 and later.
# ----------------------------------------------------------------
# Those who for any particular reason absolutely must score on
# Pentium can replace this module with one from 0.9.6 distribution.
# This "offer" shall be revoked the moment programming interface to
# this module is changed, in which case this paragraph should be
# removed.
# ----------------------------------------------------------------
# <appro@fy.chalmers.se>
$normal
=
0
;
$normal
=
0
;
push
(
@INC
,"
perlasm
","
../../perlasm
");
push
(
@INC
,"
perlasm
","
../../perlasm
");
...
@@ -77,54 +102,21 @@ sub BODY_00_15
...
@@ -77,54 +102,21 @@ sub BODY_00_15
{
{
local
(
$pos
,
$K
,
$X
,
$n
,
$a
,
$b
,
$c
,
$d
,
$e
,
$f
)
=
@_
;
local
(
$pos
,
$K
,
$X
,
$n
,
$a
,
$b
,
$c
,
$d
,
$e
,
$f
)
=
@_
;
return
if
$n
&
1
;
&comment
("
00_15
$n
");
&comment
("
00_15
$n
");
&mov
(
$f
,
$c
);
&mov
(
$tmp1
,
$a
);
&xor
(
$f
,
$d
);
# F2
&rotl
(
$tmp1
,
5
);
# A2
&and
(
$f
,
$b
);
# F3
&add
(
$tmp1
,
$e
);
&rotr
(
$b
,
1
);
# B1 <- F
&mov
(
$e
,
&swtmp
(
$n
));
# G1
&rotr
(
$b
,
1
);
# B1 <- F
&xor
(
$f
,
$d
);
# F4
&lea
(
$tmp1
,
&DWP
(
$K
,
$tmp1
,
$e
,
1
));
############################
# &BODY_40_59( 0,$K[2],$X,42,$A,$B,$C,$D,$E,$T);
# &BODY_40_59( 0,$K[2],$X,43,$T,$A,$B,$C,$D,$E);
$n
++
;
local
(
$n0
,
$n1
,
$n2
,
$n3
,
$np
)
=
&Na
(
$n
);
(
$b
,
$c
,
$d
,
$e
,
$f
,
$a
)
=
(
$a
,
$b
,
$c
,
$d
,
$e
,
$f
);
&mov
(
$f
,
$c
);
&add
(
$a
,
$tmp1
);
# MOVED DOWN
&xor
(
$f
,
$d
);
# F2
&mov
(
$tmp1
,
$a
);
&mov
(
$tmp1
,
$a
);
&and
(
$f
,
$b
);
# F3
&mov
(
$f
,
$c
);
# f to hold F_00_19(b,c,d)
&rotl
(
$tmp1
,
5
);
# tmp1=ROTATE(a,5)
&rotl
(
$tmp1
,
5
);
# A2
&xor
(
$f
,
$d
);
&and
(
$f
,
$b
);
&add
(
$tmp1
,
$e
);
&rotr
(
$b
,
2
);
# b=ROTATE(b,30)
&mov
(
$e
,
&swtmp
(
$n
));
# G1
&add
(
$tmp1
,
$e
);
# tmp1+=e;
&mov
(
$e
,
&swtmp
(
$n
));
# e becomes volatile and
&rotr
(
$b
,
1
);
# B1 <- F
# is loaded with xi
&xor
(
$f
,
$d
);
# F4
&xor
(
$f
,
$d
);
# f holds F_00_19(b,c,d)
&lea
(
$tmp1
,
&DWP
(
$K
,
$tmp1
,
$e
,
1
));
# tmp1+=K_00_19+xi
&rotr
(
$b
,
1
);
# B1 <- F
&lea
(
$tmp1
,
&DWP
(
$K
,
$tmp1
,
$e
,
1
));
&add
(
$f
,
$tmp1
);
# f+=tmp1
&add
(
$f
,
$tmp1
);
}
}
sub
BODY_16_19
sub
BODY_16_19
...
@@ -132,66 +124,24 @@ sub BODY_16_19
...
@@ -132,66 +124,24 @@ sub BODY_16_19
local
(
$pos
,
$K
,
$X
,
$n
,
$a
,
$b
,
$c
,
$d
,
$e
,
$f
)
=
@_
;
local
(
$pos
,
$K
,
$X
,
$n
,
$a
,
$b
,
$c
,
$d
,
$e
,
$f
)
=
@_
;
local
(
$n0
,
$n1
,
$n2
,
$n3
,
$np
)
=
&Na
(
$n
);
local
(
$n0
,
$n1
,
$n2
,
$n3
,
$np
)
=
&Na
(
$n
);
return
if
$n
&
1
;
&comment
("
16_19
$n
");
&comment
("
16_19
$n
");
&nop
()
if
(
$pos
<
0
);
&mov
(
$f
,
&swtmp
(
$n1
));
# f to hold Xupdate(xi,xa,xb,xc,xd)
&mov
(
$tmp1
,
&swtmp
(
$n0
));
# X1
&mov
(
$tmp1
,
$c
);
# tmp1 to hold F_00_19(b,c,d)
&mov
(
$f
,
&swtmp
(
$n1
));
# X2
&xor
(
$f
,
&swtmp
(
$n0
));
&xor
(
$f
,
$tmp1
);
# X3
&xor
(
$tmp1
,
$d
);
&mov
(
$tmp1
,
&swtmp
(
$n2
));
# X4
&xor
(
$f
,
&swtmp
(
$n2
));
&xor
(
$f
,
$tmp1
);
# X5
&and
(
$tmp1
,
$b
);
# tmp1 holds F_00_19(b,c,d)
&mov
(
$tmp1
,
&swtmp
(
$n3
));
# X6
&xor
(
$f
,
&swtmp
(
$n3
));
# f holds xa^xb^xc^xd
&xor
(
$f
,
$tmp1
);
# X7 - slot
&rotr
(
$b
,
2
);
# b=ROTATE(b,30)
&mov
(
$tmp1
,
$c
);
# F1
&xor
(
$tmp1
,
$d
);
# tmp1=F_00_19(b,c,d)
&rotl
(
$f
,
1
);
# X8 - slot
&rotl
(
$f
,
1
);
# f=ROATE(f,1)
&xor
(
$tmp1
,
$d
);
# F2
&mov
(
&swtmp
(
$n0
),
$f
);
# xi=f
&mov
(
&swtmp
(
$n0
),
$f
);
# X9 - anytime
&lea
(
$f
,
&DWP
(
$K
,
$f
,
$e
,
1
));
# f+=K_00_19+e
&and
(
$tmp1
,
$b
);
# F3
&mov
(
$e
,
$a
);
# e becomes volatile
&lea
(
$f
,
&DWP
(
$K
,
$f
,
$e
,
1
));
# tot=X+K+e
&add
(
$f
,
$tmp1
);
# f+=F_00_19(b,c,d)
&xor
(
$tmp1
,
$d
);
# F4
&rotl
(
$e
,
5
);
# e=ROTATE(a,5)
&mov
(
$e
,
$a
);
# A1
&add
(
$f
,
$e
);
# f+=ROTATE(a,5)
&add
(
$f
,
$tmp1
);
# tot+=F();
&rotl
(
$e
,
5
);
# A2
&rotr
(
$b
,
1
);
# B1 <- F
&add
(
$f
,
$e
);
# tot+=a
############################
# &BODY_40_59( 0,$K[2],$X,42,$A,$B,$C,$D,$E,$T);
# &BODY_40_59( 0,$K[2],$X,43,$T,$A,$B,$C,$D,$E);
$n
++
;
local
(
$n0
,
$n1
,
$n2
,
$n3
,
$np
)
=
&Na
(
$n
);
(
$b
,
$c
,
$d
,
$e
,
$f
,
$a
)
=
(
$a
,
$b
,
$c
,
$d
,
$e
,
$f
);
&mov
(
$f
,
&swtmp
(
$n0
));
# X1
&mov
(
$tmp1
,
&swtmp
(
$n1
));
# X2
&xor
(
$f
,
$tmp1
);
# X3
&mov
(
$tmp1
,
&swtmp
(
$n2
));
# X4
&xor
(
$f
,
$tmp1
);
# X5
&mov
(
$tmp1
,
&swtmp
(
$n3
));
# X6
&rotr
(
$c
,
1
);
#&rotr($b,1); # B1 <- F # MOVED DOWN
&xor
(
$f
,
$tmp1
);
# X7 - slot
&rotl
(
$f
,
1
);
# X8 - slot
&mov
(
$tmp1
,
$c
);
# F1
&xor
(
$tmp1
,
$d
);
# F2
&mov
(
&swtmp
(
$n0
),
$f
);
# X9 - anytime
&and
(
$tmp1
,
$b
);
# F3
&lea
(
$f
,
&DWP
(
$K
,
$f
,
$e
,
1
));
# tot=X+K+e
&xor
(
$tmp1
,
$d
);
# F4
&mov
(
$e
,
$a
);
# A1
&rotl
(
$e
,
5
);
# A2
&rotr
(
$b
,
1
);
# B1 <- F
&add
(
$f
,
$e
);
# tot+=a
&rotr
(
$b
,
1
);
# B1 <- F
&add
(
$f
,
$tmp1
);
# tot+=F();
}
}
sub
BODY_20_39
sub
BODY_20_39
...
@@ -201,42 +151,21 @@ sub BODY_20_39
...
@@ -201,42 +151,21 @@ sub BODY_20_39
&comment
("
20_39
$n
");
&comment
("
20_39
$n
");
local
(
$n0
,
$n1
,
$n2
,
$n3
,
$np
)
=
&Na
(
$n
);
local
(
$n0
,
$n1
,
$n2
,
$n3
,
$np
)
=
&Na
(
$n
);
&mov
(
$f
,
&swtmp
(
$n0
));
# X1
&mov
(
$f
,
&swtmp
(
$n0
));
# f to hold Xupdate(xi,xa,xb,xc,xd)
&mov
(
$tmp1
,
&swtmp
(
$n1
));
# X2
&mov
(
$tmp1
,
$b
);
# tmp1 to hold F_20_39(b,c,d)
&xor
(
$f
,
$tmp1
);
# X3
&xor
(
$f
,
&swtmp
(
$n1
));
&mov
(
$tmp1
,
&swtmp
(
$n2
));
# X4
&rotr
(
$b
,
2
);
# b=ROTATE(b,30)
&xor
(
$f
,
$tmp1
);
# X5
&xor
(
$f
,
&swtmp
(
$n2
));
&mov
(
$tmp1
,
&swtmp
(
$n3
));
# X6
&xor
(
$tmp1
,
$c
);
&xor
(
$f
,
$tmp1
);
# X7 - slot
&xor
(
$f
,
&swtmp
(
$n3
));
# f holds xa^xb^xc^xd
&mov
(
$tmp1
,
$b
);
# F1
&xor
(
$tmp1
,
$d
);
# tmp1 holds F_20_39(b,c,d)
&rotl
(
$f
,
1
);
# X8 - slot
&rotl
(
$f
,
1
);
# f=ROTATE(f,1)
&xor
(
$tmp1
,
$c
);
# F2
&mov
(
&swtmp
(
$n0
),
$f
);
# xi=f
&mov
(
&swtmp
(
$n0
),
$f
);
# X9 - anytime
&lea
(
$f
,
&DWP
(
$K
,
$f
,
$e
,
1
));
# f+=K_20_39+e
&xor
(
$tmp1
,
$d
);
# F3
&mov
(
$e
,
$a
);
# e becomes volatile
&rotl
(
$e
,
5
);
# e=ROTATE(a,5)
&lea
(
$f
,
&DWP
(
$K
,
$f
,
$e
,
1
));
# tot=X+K+e
&add
(
$f
,
$tmp1
);
# f+=F_20_39(b,c,d)
&mov
(
$e
,
$a
);
# A1
&add
(
$f
,
$e
);
# f+=ROTATE(a,5)
&rotl
(
$e
,
5
);
# A2
if
(
$n
!=
79
)
# last loop
{
&rotr
(
$b
,
1
);
# B1 <- F
&add
(
$e
,
$tmp1
);
# tmp1=F()+a
&rotr
(
$b
,
1
);
# B2 <- F
&add
(
$f
,
$e
);
# tot+=tmp1;
}
else
{
&add
(
$e
,
$tmp1
);
# tmp1=F()+a
&mov
(
$tmp1
,
&wparam
(
0
));
&rotr
(
$b
,
1
);
# B1 <- F
&add
(
$f
,
$e
);
# tot+=tmp1;
&rotr
(
$b
,
1
);
# B2 <- F
}
}
}
sub
BODY_40_59
sub
BODY_40_59
...
@@ -244,70 +173,27 @@ sub BODY_40_59
...
@@ -244,70 +173,27 @@ sub BODY_40_59
local
(
$pos
,
$K
,
$X
,
$n
,
$a
,
$b
,
$c
,
$d
,
$e
,
$f
)
=
@_
;
local
(
$pos
,
$K
,
$X
,
$n
,
$a
,
$b
,
$c
,
$d
,
$e
,
$f
)
=
@_
;
&comment
("
40_59
$n
");
&comment
("
40_59
$n
");
return
if
$n
&
1
;
local
(
$n0
,
$n1
,
$n2
,
$n3
,
$np
)
=
&Na
(
$n
);
local
(
$n0
,
$n1
,
$n2
,
$n3
,
$np
)
=
&Na
(
$n
);
&mov
(
$f
,
&swtmp
(
$n0
));
# X1
&mov
(
$f
,
&swtmp
(
$n0
));
# f to hold Xupdate(xi,xa,xb,xc,xd)
&mov
(
$tmp1
,
&swtmp
(
$n1
));
# X2
&mov
(
$tmp1
,
$b
);
# tmp1 to hold F_40_59(b,c,d)
&xor
(
$f
,
$tmp1
);
# X3
&xor
(
$f
,
&swtmp
(
$n1
));
&mov
(
$tmp1
,
&swtmp
(
$n2
));
# X4
&or
(
$tmp1
,
$c
);
&xor
(
$f
,
$tmp1
);
# X5
&xor
(
$f
,
&swtmp
(
$n2
));
&mov
(
$tmp1
,
&swtmp
(
$n3
));
# X6
&and
(
$tmp1
,
$d
);
&xor
(
$f
,
$tmp1
);
# X7 - slot
&xor
(
$f
,
&swtmp
(
$n3
));
# f holds xa^xb^xc^xd
&mov
(
$tmp1
,
$b
);
# F1
&rotl
(
$f
,
1
);
# f=ROTATE(f,1)
&rotl
(
$f
,
1
);
# X8 - slot
&mov
(
&swtmp
(
$n0
),
$f
);
# xi=f
&or
(
$tmp1
,
$c
);
# F2
&lea
(
$f
,
&DWP
(
$K
,
$f
,
$e
,
1
));
# f+=K_40_59+e
&mov
(
&swtmp
(
$n0
),
$f
);
# X9 - anytime
&mov
(
$e
,
$b
);
# e becomes volatile and is used
&and
(
$tmp1
,
$d
);
# F3
# to calculate F_40_59(b,c,d)
&rotr
(
$b
,
2
);
# b=ROTATE(b,30)
&lea
(
$f
,
&DWP
(
$K
,
$f
,
$e
,
1
));
# tot=X+K+e
&and
(
$e
,
$c
);
&mov
(
$e
,
$b
);
# F4
&or
(
$tmp1
,
$e
);
# tmp1 holds F_40_59(b,c,d)
&mov
(
$e
,
$a
);
&rotr
(
$b
,
1
);
# B1 <- F
&rotl
(
$e
,
5
);
# e=ROTATE(a,5)
&and
(
$e
,
$c
);
# F5
&add
(
$tmp1
,
$e
);
# tmp1+=ROTATE(a,5)
&add
(
$f
,
$tmp1
);
# f+=tmp1;
&or
(
$tmp1
,
$e
);
# F6
&mov
(
$e
,
$a
);
# A1
&rotl
(
$e
,
5
);
# A2
&add
(
$tmp1
,
$e
);
# tmp1=F()+a
############################
# &BODY_40_59( 0,$K[2],$X,42,$A,$B,$C,$D,$E,$T);
# &BODY_40_59( 0,$K[2],$X,43,$T,$A,$B,$C,$D,$E);
$n
++
;
local
(
$n0
,
$n1
,
$n2
,
$n3
,
$np
)
=
&Na
(
$n
);
(
$b
,
$c
,
$d
,
$e
,
$f
,
$a
)
=
(
$a
,
$b
,
$c
,
$d
,
$e
,
$f
);
&mov
(
$f
,
&swtmp
(
$n0
));
# X1
&add
(
$a
,
$tmp1
);
# tot+=tmp1; # moved was add f,tmp1
&mov
(
$tmp1
,
&swtmp
(
$n1
));
# X2
&xor
(
$f
,
$tmp1
);
# X3
&mov
(
$tmp1
,
&swtmp
(
$n2
));
# X4
&xor
(
$f
,
$tmp1
);
# X5
&mov
(
$tmp1
,
&swtmp
(
$n3
));
# X6
&rotr
(
$c
,
1
);
# B2 <- F # moved was rotr b,1
&xor
(
$f
,
$tmp1
);
# X7 - slot
&rotl
(
$f
,
1
);
# X8 - slot
&mov
(
$tmp1
,
$b
);
# F1
&mov
(
&swtmp
(
$n0
),
$f
);
# X9 - anytime
&or
(
$tmp1
,
$c
);
# F2
&lea
(
$f
,
&DWP
(
$K
,
$f
,
$e
,
1
));
# tot=X+K+e
&mov
(
$e
,
$b
);
# F4
&and
(
$tmp1
,
$d
);
# F3
&and
(
$e
,
$c
);
# F5
&or
(
$tmp1
,
$e
);
# F6
&mov
(
$e
,
$a
);
# A1
&rotl
(
$e
,
5
);
# A2
&rotr
(
$b
,
1
);
# B1 <- F
&add
(
$tmp1
,
$e
);
# tmp1=F()+a
&rotr
(
$b
,
1
);
# B2 <- F
&add
(
$f
,
$tmp1
);
# tot+=tmp1;
}
}
sub
BODY_60_79
sub
BODY_60_79
...
@@ -495,8 +381,7 @@ sub sha1_block_data
...
@@ -495,8 +381,7 @@ sub sha1_block_data
# C -> E
# C -> E
# D -> T
# D -> T
# The last 2 have been moved into the last loop
&mov
(
$tmp1
,
&wparam
(
0
));
# &mov($tmp1,&wparam(0));
&mov
(
$D
,
&DWP
(
12
,
$tmp1
,"",
0
));
&mov
(
$D
,
&DWP
(
12
,
$tmp1
,"",
0
));
&add
(
$D
,
$B
);
&add
(
$D
,
$B
);
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录