Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
btwise
openssl
提交
a0a17fcb
O
openssl
项目概览
btwise
/
openssl
通知
1
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
O
openssl
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
a0a17fcb
编写于
5月 20, 2014
作者:
A
Andy Polyakov
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
aesv8-armx.pl: optimize by adding 128-bit code paths.
上级
d8ac1ea7
变更
1
隐藏空白更改
内联
并排
Showing
1 changed file
with
127 addition
and
16 deletion
+127
-16
crypto/aes/asm/aesv8-armx.pl
crypto/aes/asm/aesv8-armx.pl
+127
-16
未找到文件。
crypto/aes/asm/aesv8-armx.pl
浏览文件 @
a0a17fcb
...
...
@@ -13,8 +13,8 @@
# of operation. Latter is achieved by limiting amount of utilized
# registers to 16, which implies additional instructions. This has
# no effect on mighty Apple A7, as results are literally equal to
# the theoretical estimates
. It remains to be seen how does it
# affect other platforms...
# the theoretical estimates
based on instruction latencies and issue
#
rate. It remains to be seen how does it
affect other platforms...
#
# Performance in cycles per byte processed with 128-bit key:
#
...
...
@@ -274,17 +274,17 @@ ${prefix}_${dir}crypt:
.Loop_${dir}c:
aes$e $inout,$rndkey0
aes$mc $inout,$inout
vld1.32 {$rndkey0},[$key],#16
aes$mc $inout,$inout
subs $rounds,$rounds,#2
aes$e $inout,$rndkey1
aes$mc $inout,$inout
vld1.32 {$rndkey1},[$key],#16
aes$mc $inout,$inout
b.gt .Loop_${dir}c
aes$e $inout,$rndkey0
aes$mc $inout,$inout
vld1.32 {$rndkey0},[$key]
aes$mc $inout,$inout
aes$e $inout,$rndkey1
veor $inout,$inout,$rndkey0
...
...
@@ -298,7 +298,7 @@ ___
}}}
{{{
my
(
$inp
,
$out
,
$len
,
$key
,
$ivp
)
=
map
("
x
$_
",(
0
..
4
));
my
$enc
=
"
w5
";
my
(
$rounds
,
$cnt
,
$key_
,
$step
)
=
(
$enc
,"
w6
","
x7
","
x8
");
my
(
$rounds
,
$cnt
,
$key_
,
$step
,
$step1
)
=
(
$enc
,"
w6
","
x7
","
x8
","
x12
");
my
(
$dat0
,
$dat1
,
$in0
,
$in1
,
$tmp0
,
$tmp1
,
$ivec
,
$rndlast
)
=
map
("
q
$_
",(
0
..
7
));
my
(
$dat
,
$tmp
,
$rndzero_n_last
)
=
(
$dat0
,
$tmp0
,
$tmp1
);
...
...
@@ -346,16 +346,19 @@ $code.=<<___;
mov $cnt,$rounds
b.eq .Lcbc_dec
cmp $rounds,#2
veor $dat,$dat,$ivec
veor $rndzero_n_last,q8,$rndlast
b.eq .Lcbc_enc128
.Loop_cbc_enc:
aese $dat,q8
aesmc $dat,$dat
vld1.32 {q8},[$key_],#16
aesmc $dat,$dat
subs $cnt,$cnt,#2
aese $dat,q9
aesmc $dat,$dat
vld1.32 {q9},[$key_],#16
aesmc $dat,$dat
b.gt .Loop_cbc_enc
aese $dat,q8
...
...
@@ -387,6 +390,111 @@ $code.=<<___;
b .Lcbc_done
.align 5
.Lcbc_enc128:
vld1.32 {$in0-$in1},[$key_]
aese $dat,q8
aesmc $dat,$dat
b .Lenter_cbc_enc128
.Loop_cbc_enc128:
aese $dat,q8
aesmc $dat,$dat
vst1.8 {$ivec},[$out],#16
.Lenter_cbc_enc128:
aese $dat,q9
aesmc $dat,$dat
subs $len,$len,#16
aese $dat,$in0
aesmc $dat,$dat
cclr $step,eq
aese $dat,$in1
aesmc $dat,$dat
aese $dat,q10
aesmc $dat,$dat
aese $dat,q11
aesmc $dat,$dat
vld1.8 {q8},[$inp],$step
aese $dat,q12
aesmc $dat,$dat
aese $dat,q13
aesmc $dat,$dat
aese $dat,q14
aesmc $dat,$dat
veor q8,q8,$rndzero_n_last
aese $dat,q15
veor $ivec,$dat,$rndlast
b.hs .Loop_cbc_enc128
vst1.8 {$ivec},[$out],#16
b .Lcbc_done
.align 5
.Lcbc_dec128:
vld1.32 {$tmp0-$tmp1},[$key_]
veor $ivec,$ivec,$rndlast
veor $in0,$dat0,$rndlast
mov $step1,$step
.Loop2x_cbc_dec128:
aesd $dat0,q8
aesd $dat1,q8
aesimc $dat0,$dat0
aesimc $dat1,$dat1
subs $len,$len,#32
aesd $dat0,q9
aesd $dat1,q9
aesimc $dat0,$dat0
aesimc $dat1,$dat1
cclr $step,lo
aesd $dat0,$tmp0
aesd $dat1,$tmp0
aesimc $dat0,$dat0
aesimc $dat1,$dat1
cclr $step1,ls
aesd $dat0,$tmp1
aesd $dat1,$tmp1
aesimc $dat0,$dat0
aesimc $dat1,$dat1
aesd $dat0,q10
aesd $dat1,q10
aesimc $dat0,$dat0
aesimc $dat1,$dat1
aesd $dat0,q11
aesd $dat1,q11
aesimc $dat0,$dat0
aesimc $dat1,$dat1
aesd $dat0,q12
aesd $dat1,q12
aesimc $dat0,$dat0
aesimc $dat1,$dat1
aesd $dat0,q13
aesd $dat1,q13
aesimc $dat0,$dat0
aesimc $dat1,$dat1
aesd $dat0,q14
aesd $dat1,q14
aesimc $dat0,$dat0
aesimc $dat1,$dat1
aesd $dat0,q15
aesd $dat1,q15
veor $ivec,$ivec,$dat0
veor $in0,$in0,$dat1
vld1.8 {$dat0},[$inp],$step
vld1.8 {$dat1},[$inp],$step1
vst1.8 {$ivec},[$out],#16
veor $ivec,$in1,$rndlast
vst1.8 {$in0},[$out],#16
veor $in0,$dat0,$rndlast
vorr $in1,$dat1,$dat1
b.hs .Loop2x_cbc_dec128
adds $len,$len,#32
veor $ivec,$ivec,$rndlast
b.eq .Lcbc_done
veor $in0,$in0,$rndlast
b .Lcbc_dec_tail
.align 5
.Lcbc_dec:
subs $len,$len,#16
...
...
@@ -394,34 +502,36 @@ $code.=<<___;
b.lo .Lcbc_dec_tail
cclr $step,eq
cmp $rounds,#2
vld1.8 {$dat1},[$inp],$step
vorr $in1,$dat1,$dat1
b.eq .Lcbc_dec128
.Loop2x_cbc_dec:
aesd $dat0,q8
aesd $dat1,q8
vld1.32 {q8},[$key_],#16
aesimc $dat0,$dat0
aesimc $dat1,$dat1
vld1.64 {q8},[$key_],#16
subs $cnt,$cnt,#2
aesd $dat0,q9
aesd $dat1,q9
vld1.32 {q9},[$key_],#16
aesimc $dat0,$dat0
aesimc $dat1,$dat1
vld1.64 {q9},[$key_],#16
b.gt .Loop2x_cbc_dec
aesd $dat0,q8
aesd $dat1,q8
aesimc $dat0,$dat0
veor $tmp0,$ivec,$rndlast
aesimc $dat1,$dat1
veor $tmp0,$ivec,$rndlast
veor $tmp1,$in0,$rndlast
aesd $dat0,q9
aesd $dat1,q9
aesimc $dat0,$dat0
vorr $ivec,$in1,$in1
aesimc $dat1,$dat1
vorr $ivec,$in1,$in1
subs $len,$len,#32
aesd $dat0,q10
aesd $dat1,q10
...
...
@@ -455,10 +565,11 @@ $code.=<<___;
mov $cnt,$rounds
veor $tmp0,$tmp0,$dat0
vorr $dat0,$in0,$in0
veor $tmp1,$tmp1,$dat1
vorr $dat0,$in0,$in0
vst1.8 {$tmp0},[$out],#32
vorr $dat1,$in1,$in1
vst1.8 {$tmp
0-$tmp
1},[$out],#32
vst1.8 {$tmp1},[$out],#32
b.hs .Loop2x_cbc_dec
adds $len,$len,#32
...
...
@@ -466,12 +577,12 @@ $code.=<<___;
.Lcbc_dec_tail:
aesd $dat,q8
vld1.32 {q8},[$key_],#16
aesimc $dat,$dat
vld1.64 {q8},[$key_],#16
subs $cnt,$cnt,#2
aesd $dat,q9
vld1.32 {q9},[$key_],#16
aesimc $dat,$dat
vld1.64 {q9},[$key_],#16
b.gt .Lcbc_dec_tail
aesd $dat,q8
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录