Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
btwise
openssl
提交
ce24d2ed
O
openssl
项目概览
btwise
/
openssl
通知
1
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
O
openssl
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
ce24d2ed
编写于
11月 13, 2015
作者:
A
Andy Polyakov
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
aes/asm/vpaes-ppc.pl: eliminate overhung stores in misaligned cases.
Reviewed-by:
N
Richard Levitte
<
levitte@openssl.org
>
上级
0ee05b73
变更
1
隐藏空白更改
内联
并排
Showing
1 changed file
with
136 addition
and
62 deletion
+136
-62
crypto/aes/asm/vpaes-ppc.pl
crypto/aes/asm/vpaes-ppc.pl
+136
-62
未找到文件。
crypto/aes/asm/vpaes-ppc.pl
浏览文件 @
ce24d2ed
...
...
@@ -337,24 +337,27 @@ Lenc_entry:
addi $inp, $inp, 15 # 15 is not a typo
?lvsr $outperm, 0, $out
?lvsl $keyperm, 0, $key # prepare for unaligned access
vnor $outmask, v7, v7 # 0xff..ff
lvx $inptail, 0, $inp # redundant in aligned case
?vperm $outmask, v7, $outmask, $outperm
lvx $outhead, 0, $out
?vperm v0, v0, $inptail, $inpperm
bl _vpaes_encrypt_core
andi. r8, $out, 15
li r9, 16
beq Lenc_out_aligned
vperm v0, v0, v0, $outperm # rotate right/left
vsel v1, $outhead, v0, $outmask
vmr $outhead, v0
stvx v1, 0, $out
addi $out, $out, 15 # 15 is not a typo
########
mtctr r9
Lenc_out_unaligned:
stvebx v0, 0, $out
addi $out, $out, 1
bdnz Lenc_out_unaligned
b Lenc_done
lvx v1, 0, $out # redundant in aligned case
vsel v1, $outhead, v1, $outmask
stvx v1, 0, $out
.align 4
Lenc_out_aligned:
stvx v0, 0, $out
Lenc_done:
li r10,`15+6*$SIZE_T`
li r11,`31+6*$SIZE_T`
...
...
@@ -566,24 +569,27 @@ Ldec_entry:
addi $inp, $inp, 15 # 15 is not a typo
?lvsr $outperm, 0, $out
?lvsl $keyperm, 0, $key
vnor $outmask, v7, v7 # 0xff..ff
lvx $inptail, 0, $inp # redundant in aligned case
?vperm $outmask, v7, $outmask, $outperm
lvx $outhead, 0, $out
?vperm v0, v0, $inptail, $inpperm
bl _vpaes_decrypt_core
andi. r8, $out, 15
li r9, 16
beq Ldec_out_aligned
vperm v0, v0, v0, $outperm # rotate right/left
vsel v1, $outhead, v0, $outmask
vmr $outhead, v0
stvx v1, 0, $out
addi $out, $out, 15 # 15 is not a typo
########
mtctr r9
Ldec_out_unaligned:
stvebx v0, 0, $out
addi $out, $out, 1
bdnz Ldec_out_unaligned
b Ldec_done
lvx v1, 0, $out # redundant in aligned case
vsel v1, $outhead, v1, $outmask
stvx v1, 0, $out
.align 4
Ldec_out_aligned:
stvx v0, 0, $out
Ldec_done:
li r10,`15+6*$SIZE_T`
li r11,`31+6*$SIZE_T`
...
...
@@ -658,11 +664,11 @@ Ldec_entry:
$PUSH r0, `$FRAME+$SIZE_T*2+$LRSAVE`($sp)
and r30, r5, r9 # copy length&-16
andi. r9, $out, 15 # is $out aligned?
mr r5, r6 # copy pointer to key
mr r31, r7 # copy pointer to iv
blt Lcbc_abort
cmpwi r8, 0 # test direction
li r6, -1
mcrf cr1, cr0 # put aside $out alignment flag
mr r7, r12 # copy vrsave
mtspr 256, r6 # preserve all AltiVec registers
...
...
@@ -672,6 +678,7 @@ Ldec_entry:
lvx v25, r9, r31
?vperm v24, v24, v25, $inpperm
cmpwi r8, 0 # test direction
neg r8, $inp # prepare for unaligned access
vxor v7, v7, v7
?lvsl $keyperm, 0, $key
...
...
@@ -681,13 +688,37 @@ Ldec_entry:
lvx $inptail, 0, $inp
?vperm $outmask, v7, $outmask, $outperm
addi $inp, $inp, 15 # 15 is not a typo
lvx $outhead, 0, $out
beq Lcbc_decrypt
bl _vpaes_encrypt_preheat
li r0, 16
beq cr1, Lcbc_enc_loop # $out is aligned
vmr v0, $inptail
lvx $inptail, 0, $inp
addi $inp, $inp, 16
?vperm v0, v0, $inptail, $inpperm
vxor v0, v0, v24 # ^= iv
bl _vpaes_encrypt_core
andi. r8, $out, 15
vmr v24, v0 # put aside iv
sub r9, $out, r8
vperm $outhead, v0, v0, $outperm # rotate right/left
Lcbc_enc_head:
stvebx $outhead, r8, r9
cmpwi r8, 15
addi r8, r8, 1
bne Lcbc_enc_head
sub. r30, r30, r0 # len -= 16
addi $out, $out, 16
beq Lcbc_unaligned_done
Lcbc_enc_loop:
vmr v0, $inptail
lvx $inptail, 0, $inp
...
...
@@ -713,6 +744,32 @@ Lcbc_decrypt:
bl _vpaes_decrypt_preheat
li r0, 16
beq cr1, Lcbc_dec_loop # $out is aligned
vmr v0, $inptail
lvx $inptail, 0, $inp
addi $inp, $inp, 16
?vperm v0, v0, $inptail, $inpperm
vmr v25, v0 # put aside input
bl _vpaes_decrypt_core
andi. r8, $out, 15
vxor v0, v0, v24 # ^= iv
vmr v24, v25
sub r9, $out, r8
vperm $outhead, v0, v0, $outperm # rotate right/left
Lcbc_dec_head:
stvebx $outhead, r8, r9
cmpwi r8, 15
addi r8, r8, 1
bne Lcbc_dec_head
sub. r30, r30, r0 # len -= 16
addi $out, $out, 16
beq Lcbc_unaligned_done
Lcbc_dec_loop:
vmr v0, $inptail
lvx $inptail, 0, $inp
...
...
@@ -733,23 +790,29 @@ Lcbc_dec_loop:
bne Lcbc_dec_loop
Lcbc_done:
addi $out, $out, -1
lvx v1, 0, $out # redundant in aligned case
vsel v1, $outhead, v1, $outmask
stvx v1, 0, $out
beq cr1, Lcbc_write_iv # $out is aligned
Lcbc_unaligned_done:
andi. r8, $out, 15
sub $out, $out, r8
li r9, 0
Lcbc_tail:
stvebx $outhead, r9, $out
addi r9, r9, 1
cmpw r9, r8
bne Lcbc_tail
Lcbc_write_iv:
neg r8, r31 # write [potentially unaligned] iv
li r10, 4
?lvsl $outperm, 0, r8
li r6, 15
vnor $outmask, v7, v7 # 0xff..ff
?vperm $outmask, v7, $outmask, $outperm
lvx $outhead, 0, r31
li r11, 8
li r12, 12
vperm v24, v24, v24, $outperm # rotate right/left
vsel v0, $outhead, v24, $outmask
lvx v1, r6, r31
stvx v0, 0, r31
vsel v1, v24, v1, $outmask
stvx v1, r6, r31
stvewx v24, 0, r31 # ivp is at least 32-bit aligned
stvewx v24, r10, r31
stvewx v24, r11, r31
stvewx v24, r12, r31
mtspr 256, r7 # restore vrsave
li r10,`15+6*$SIZE_T`
...
...
@@ -872,18 +935,21 @@ _vpaes_schedule_core:
# encrypting, output zeroth round key after transform
li r8, 0x30 # mov \$0x30,%r8d
addi r10, r12, 0x80 # lea .Lk_sr(%rip),%r10
li r9, 4
li r10, 8
li r11, 12
?lvsr $outperm, 0, $out # prepare for unaligned access
vnor $outmask, v9, v9 # 0xff..ff
lvx $outhead, 0, $out
?vperm $outmask, v9, $outmask, $outperm
#stvx v0, 0, $out # vmovdqu %xmm0, (%rdx)
vperm v1, v0, v0, $outperm # rotate right/left
vsel v2, $outhead, v1, $outmask
vmr $outhead, v1
stvx v2, 0, $out
vperm $outhead, v0, v0, $outperm # rotate right/left
stvewx $outhead, 0, $out # some are superfluous
stvewx $outhead, r9, $out
stvewx $outhead, r10, $out
addi r10, r12, 0x80 # lea .Lk_sr(%rip),%r10
stvewx $outhead, r11, $out
b Lschedule_go
Lschedule_am_decrypting:
...
...
@@ -893,20 +959,24 @@ Lschedule_am_decrypting:
addi r10, r12, 0x80 # lea .Lk_sr(%rip),%r10
# decrypting, output zeroth round key after shiftrows
lvx v1, r8, r10 # vmovdqa (%r8,%r10), %xmm1
li r9, 4
li r10, 8
li r11, 12
vperm v4, v3, v3, v1 # vpshufb %xmm1, %xmm3, %xmm3
neg r0, $out # prepare for unaligned access
?lvsl $outperm, 0, r0
addi $out, $out, 15 # 15 is not typo
vnor $outmask, v9, v9 # 0xff..ff
lvx $outhead, 0, $out
?vperm $outmask, $outmask, v9, $outperm
#stvx v4, 0, $out # vmovdqu %xmm3, (%rdx)
vperm v4, v4, v4, $outperm # rotate right/left
vsel v2, $outhead, v4, $outmask
vmr $outhead, v4
stvx v2, 0, $out
vperm $outhead, v4, v4, $outperm # rotate right/left
stvewx $outhead, 0, $out # some are superfluous
stvewx $outhead, r9, $out
stvewx $outhead, r10, $out
addi r10, r12, 0x80 # lea .Lk_sr(%rip),%r10
stvewx $outhead, r11, $out
addi $out, $out, 15 # 15 is not typo
xori r8, r8, 0x30 # xor \$0x30, %r8
Lschedule_go:
...
...
@@ -1038,14 +1108,15 @@ Lschedule_mangle_last:
#stvx v0, r0, $out # vmovdqu %xmm0, (%rdx) # save last key
vperm v0, v0, v0, $outperm # rotate right/left
li r10, 4
vsel v2, $outhead, v0, $outmask
vmr $outhead, v0
li r11, 8
stvx v2, 0, $out
addi $out, $out, 15 # 15 is not typo
lvx v1, 0, $out # redundant in aligned case
vsel v1, $outhead, v1, $outmask
stv
x v1, 0
, $out
li r12, 12
stvewx v0, 0, $out # some (or all) are redundant
stvewx v0, r10, $out
stvewx v0, r11, $out
stv
ewx v0, r12
, $out
b Lschedule_mangle_done
.align 4
...
...
@@ -1057,15 +1128,18 @@ Lschedule_mangle_last_dec:
bl _vpaes_schedule_transform # output transform
#stvx v0, r0, $out # vmovdqu %xmm0, (%rdx) # save last key
addi r9, $out, -15 # -15 is not typo
vperm v0, v0, v0, $outperm # rotate right/left
li r10, 4
vsel v2, $outhead, v0, $outmask
vmr $outhead, v0
li r11, 8
stvx v2, 0, $out
li r12, 12
stvewx v0, 0, r9 # some (or all) are redundant
stvewx v0, r10, r9
stvewx v0, r11, r9
stvewx v0, r12, r9
addi $out, $out, -15 # -15 is not typo
lvx v1, 0, $out # redundant in aligned case
vsel v1, $outhead, v1, $outmask
stvx v1, 0, $out
Lschedule_mangle_done:
mtlr r7
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录