Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
OpenHarmony
Third Party Openssl
提交
42feba47
T
Third Party Openssl
项目概览
OpenHarmony
/
Third Party Openssl
接近 2 年 前同步成功
通知
12
Star
18
Fork
1
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
T
Third Party Openssl
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
42feba47
编写于
4月 10, 2010
作者:
A
Andy Polyakov
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Add ghash-alpha.pl assembler module.
上级
3c01a1e8
变更
1
隐藏空白更改
内联
并排
Showing
1 changed file
with
453 addition
and
0 deletion
+453
-0
crypto/modes/asm/ghash-alpha.pl
crypto/modes/asm/ghash-alpha.pl
+453
-0
未找到文件。
crypto/modes/asm/ghash-alpha.pl
0 → 100644
浏览文件 @
42feba47
#!/usr/bin/env perl
#
# ====================================================================
# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
# project. The module is, however, dual licensed under OpenSSL and
# CRYPTOGAMS licenses depending on where you obtain it. For further
# details see http://www.openssl.org/~appro/cryptogams/.
# ====================================================================
#
# March 2010
#
# The module implements "4-bit" GCM GHASH function and underlying
# single multiplication operation in GF(2^128). "4-bit" means that it
# uses 256 bytes per-key table [+128 bytes shared table]. Even though
# loops are aggressively modulo-scheduled in respect to references to
# Htbl and Z.hi updates for 8 cycles per byte, measured performance is
# ~12 cycles per processed byte on 21264 CPU. It seems to be a dynamic
# scheduling "glitch," because uprofile(1) indicates uniform sample
# distribution, as if all instruction bundles execute in 1.5 cycles.
# Meaning that it could have been even faster, yet 12 cycles is ~60%
# better than gcc-generated code and ~80% than code generated by vendor
# compiler.
$cnt
=
"
v0
";
# $0
$t0
=
"
t0
";
$t1
=
"
t1
";
$t2
=
"
t2
";
$Thi0
=
"
t3
";
# $4
$Tlo0
=
"
t4
";
$Thi1
=
"
t5
";
$Tlo1
=
"
t6
";
$rem
=
"
t7
";
# $8
#################
$Xi
=
"
a0
";
# $16
$Htbl
=
"
a1
";
$nlo
=
"
a4
";
# $20
$nhi
=
"
a5
";
$Zhi
=
"
t8
";
$Zlo
=
"
t9
";
$Xhi
=
"
t10
";
# $24
$Xlo
=
"
t11
";
$remp
=
"
t12
";
$rem_4bit
=
"
AT
";
# $28
{
my
$N
;
sub
loop
()
{
$N
++
;
$code
.=
<<___;
.align 4
extbl $Xlo,7,$nlo
and $nlo,0xf0,$nhi
sll $nlo,4,$nlo
and $nlo,0xf0,$nlo
addq $nlo,$Htbl,$nlo
ldq $Zlo,8($nlo)
addq $nhi,$Htbl,$nhi
ldq $Zhi,0($nlo)
and $Zlo,0x0f,$remp
sll $Zhi,60,$t0
lda $cnt,6(zero)
extbl $Xlo,6,$nlo
ldq $Tlo1,8($nhi)
s8addq $remp,$rem_4bit,$remp
ldq $Thi1,0($nhi)
srl $Zlo,4,$Zlo
ldq $rem,0($remp)
srl $Zhi,4,$Zhi
xor $t0,$Zlo,$Zlo
and $nlo,0xf0,$nhi
xor $Tlo1,$Zlo,$Zlo
sll $nlo,4,$nlo
xor $Thi1,$Zhi,$Zhi
and $nlo,0xf0,$nlo
addq $nlo,$Htbl,$nlo
ldq $Tlo0,8($nlo)
addq $nhi,$Htbl,$nhi
ldq $Thi0,0($nlo)
.Looplo$N:
and $Zlo,0x0f,$remp
sll $Zhi,60,$t0
subq $cnt,1,$cnt
srl $Zlo,4,$Zlo
ldq $Tlo1,8($nhi)
xor $rem,$Zhi,$Zhi
ldq $Thi1,0($nhi)
s8addq $remp,$rem_4bit,$remp
ldq $rem,0($remp)
srl $Zhi,4,$Zhi
xor $t0,$Zlo,$Zlo
extbl $Xlo,$cnt,$nlo
and $nlo,0xf0,$nhi
xor $Thi0,$Zhi,$Zhi
xor $Tlo0,$Zlo,$Zlo
sll $nlo,4,$nlo
and $Zlo,0x0f,$remp
sll $Zhi,60,$t0
and $nlo,0xf0,$nlo
srl $Zlo,4,$Zlo
s8addq $remp,$rem_4bit,$remp
xor $rem,$Zhi,$Zhi
addq $nlo,$Htbl,$nlo
addq $nhi,$Htbl,$nhi
ldq $rem,0($remp)
srl $Zhi,4,$Zhi
ldq $Tlo0,8($nlo)
xor $t0,$Zlo,$Zlo
xor $Tlo1,$Zlo,$Zlo
xor $Thi1,$Zhi,$Zhi
ldq $Thi0,0($nlo)
bne $cnt,.Looplo$N
and $Zlo,0x0f,$remp
sll $Zhi,60,$t0
lda $cnt,7(zero)
srl $Zlo,4,$Zlo
ldq $Tlo1,8($nhi)
xor $rem,$Zhi,$Zhi
ldq $Thi1,0($nhi)
s8addq $remp,$rem_4bit,$remp
ldq $rem,0($remp)
srl $Zhi,4,$Zhi
xor $t0,$Zlo,$Zlo
extbl $Xhi,$cnt,$nlo
and $nlo,0xf0,$nhi
xor $Thi0,$Zhi,$Zhi
xor $Tlo0,$Zlo,$Zlo
sll $nlo,4,$nlo
and $Zlo,0x0f,$remp
sll $Zhi,60,$t0
and $nlo,0xf0,$nlo
srl $Zlo,4,$Zlo
s8addq $remp,$rem_4bit,$remp
xor $rem,$Zhi,$Zhi
addq $nlo,$Htbl,$nlo
addq $nhi,$Htbl,$nhi
ldq $rem,0($remp)
srl $Zhi,4,$Zhi
ldq $Tlo0,8($nlo)
xor $t0,$Zlo,$Zlo
xor $Tlo1,$Zlo,$Zlo
xor $Thi1,$Zhi,$Zhi
ldq $Thi0,0($nlo)
unop
.Loophi$N:
and $Zlo,0x0f,$remp
sll $Zhi,60,$t0
subq $cnt,1,$cnt
srl $Zlo,4,$Zlo
ldq $Tlo1,8($nhi)
xor $rem,$Zhi,$Zhi
ldq $Thi1,0($nhi)
s8addq $remp,$rem_4bit,$remp
ldq $rem,0($remp)
srl $Zhi,4,$Zhi
xor $t0,$Zlo,$Zlo
extbl $Xhi,$cnt,$nlo
and $nlo,0xf0,$nhi
xor $Thi0,$Zhi,$Zhi
xor $Tlo0,$Zlo,$Zlo
sll $nlo,4,$nlo
and $Zlo,0x0f,$remp
sll $Zhi,60,$t0
and $nlo,0xf0,$nlo
srl $Zlo,4,$Zlo
s8addq $remp,$rem_4bit,$remp
xor $rem,$Zhi,$Zhi
addq $nlo,$Htbl,$nlo
addq $nhi,$Htbl,$nhi
ldq $rem,0($remp)
srl $Zhi,4,$Zhi
ldq $Tlo0,8($nlo)
xor $t0,$Zlo,$Zlo
xor $Tlo1,$Zlo,$Zlo
xor $Thi1,$Zhi,$Zhi
ldq $Thi0,0($nlo)
bne $cnt,.Loophi$N
and $Zlo,0x0f,$remp
sll $Zhi,60,$t0
srl $Zlo,4,$Zlo
ldq $Tlo1,8($nhi)
xor $rem,$Zhi,$Zhi
ldq $Thi1,0($nhi)
s8addq $remp,$rem_4bit,$remp
ldq $rem,0($remp)
srl $Zhi,4,$Zhi
xor $t0,$Zlo,$Zlo
xor $Tlo0,$Zlo,$Zlo
xor $Thi0,$Zhi,$Zhi
and $Zlo,0x0f,$remp
sll $Zhi,60,$t0
srl $Zlo,4,$Zlo
s8addq $remp,$rem_4bit,$remp
xor $rem,$Zhi,$Zhi
ldq $rem,0($remp)
srl $Zhi,4,$Zhi
xor $Tlo1,$Zlo,$Zlo
xor $Thi1,$Zhi,$Zhi
xor $t0,$Zlo,$Zlo
xor $rem,$Zhi,$Zhi
___
}}
$code
=
<<___;
#include <asm.h>
#include <regdef.h>
.text
.set noat
.set noreorder
.globl gcm_gmult_4bit
.align 4
.ent gcm_gmult_4bit
gcm_gmult_4bit:
.frame sp,0,ra
.prologue 0
ldq $Xlo,8($Xi)
ldq $Xhi,0($Xi)
br $rem_4bit,.Lpic1
.Lpic1: lda $rem_4bit,rem_4bit-.Lpic1($rem_4bit)
___
&loop
();
$code
.=
<<___;
srl $Zlo,24,$t0 # byte swap
srl $Zlo,8,$t1
sll $Zlo,8,$t2
sll $Zlo,24,$Zlo
zapnot $t0,0x11,$t0
zapnot $t1,0x22,$t1
zapnot $Zlo,0x88,$Zlo
or $t0,$t1,$t0
zapnot $t2,0x44,$t2
or $Zlo,$t0,$Zlo
srl $Zhi,24,$t0
srl $Zhi,8,$t1
or $Zlo,$t2,$Zlo
sll $Zhi,8,$t2
sll $Zhi,24,$Zhi
srl $Zlo,32,$Xlo
sll $Zlo,32,$Zlo
zapnot $t0,0x11,$t0
zapnot $t1,0x22,$t1
or $Zlo,$Xlo,$Xlo
zapnot $Zhi,0x88,$Zhi
or $t0,$t1,$t0
zapnot $t2,0x44,$t2
or $Zhi,$t0,$Zhi
or $Zhi,$t2,$Zhi
srl $Zhi,32,$Xhi
sll $Zhi,32,$Zhi
or $Zhi,$Xhi,$Xhi
stq $Xlo,8($Xi)
stq $Xhi,0($Xi)
ret (ra)
.end gcm_gmult_4bit
___
# argument block for gcm_ghash_4bit
$inp
=
"
a0
";
# $16
$len
=
"
a1
";
$Xi
=
"
a2
";
$Htbl
=
"
a3
";
$inhi
=
"
s0
";
$inlo
=
"
s1
";
$code
.=
<<___;
.globl gcm_ghash_4bit
.align 4
.ent gcm_ghash_4bit
gcm_ghash_4bit:
lda sp,-32(sp)
stq ra,0(sp)
stq s0,8(sp)
stq s1,16(sp)
.mask 0x04000600,-32
.frame sp,32,ra
.prologue 0
ldq_u $inhi,0($inp)
ldq_u $Thi0,7($inp)
ldq_u $inlo,8($inp)
ldq_u $Tlo0,15($inp)
ldq $Xhi,0($Xi)
ldq $Xlo,8($Xi)
br $rem_4bit,.Lpic2
.Lpic2: lda $rem_4bit,rem_4bit-.Lpic2($rem_4bit)
.Louter:
extql $inhi,$inp,$inhi
extqh $Thi0,$inp,$Thi0
or $inhi,$Thi0,$inhi
lda $inp,16($inp)
extql $inlo,$inp,$inlo
extqh $Tlo0,$inp,$Tlo0
or $inlo,$Tlo0,$inlo
subq $len,16,$len
xor $Xlo,$inlo,$Xlo
xor $Xhi,$inhi,$Xhi
___
&loop
();
$code
.=
<<___;
srl $Zlo,24,$t0 # byte swap
srl $Zlo,8,$t1
sll $Zlo,8,$t2
sll $Zlo,24,$Zlo
zapnot $t0,0x11,$t0
zapnot $t1,0x22,$t1
zapnot $Zlo,0x88,$Zlo
or $t0,$t1,$t0
zapnot $t2,0x44,$t2
or $Zlo,$t0,$Zlo
srl $Zhi,24,$t0
srl $Zhi,8,$t1
or $Zlo,$t2,$Zlo
sll $Zhi,8,$t2
sll $Zhi,24,$Zhi
srl $Zlo,32,$Xlo
sll $Zlo,32,$Zlo
beq $len,.Ldone
zapnot $t0,0x11,$t0
zapnot $t1,0x22,$t1
or $Zlo,$Xlo,$Xlo
ldq_u $inhi,0($inp)
zapnot $Zhi,0x88,$Zhi
or $t0,$t1,$t0
zapnot $t2,0x44,$t2
ldq_u $Thi0,7($inp)
or $Zhi,$t0,$Zhi
or $Zhi,$t2,$Zhi
ldq_u $inlo,8($inp)
ldq_u $Tlo0,15($inp)
srl $Zhi,32,$Xhi
sll $Zhi,32,$Zhi
or $Zhi,$Xhi,$Xhi
br zero,.Louter
.Ldone:
zapnot $t0,0x11,$t0
zapnot $t1,0x22,$t1
or $Zlo,$Xlo,$Xlo
zapnot $Zhi,0x88,$Zhi
or $t0,$t1,$t0
zapnot $t2,0x44,$t2
or $Zhi,$t0,$Zhi
or $Zhi,$t2,$Zhi
srl $Zhi,32,$Xhi
sll $Zhi,32,$Zhi
or $Zhi,$Xhi,$Xhi
stq $Xlo,8($Xi)
stq $Xhi,0($Xi)
.set noreorder
/*ldq ra,0(sp)*/
ldq s0,8(sp)
ldq s1,16(sp)
lda sp,32(sp)
ret (ra)
.end gcm_ghash_4bit
.align 4
rem_4bit:
.quad 0x0000<<48, 0x1C20<<48, 0x3840<<48, 0x2460<<48
.quad 0x7080<<48, 0x6CA0<<48, 0x48C0<<48, 0x54E0<<48
.quad 0xE100<<48, 0xFD20<<48, 0xD940<<48, 0xC560<<48
.quad 0x9180<<48, 0x8DA0<<48, 0xA9C0<<48, 0xB5E0<<48
.asciiz "GHASH for Alpha, CRYPTOGAMS by <appro\@openssl.org>"
.align 4
___
$output
=
shift
and
open
STDOUT
,"
>
$output
";
print
$code
;
close
STDOUT
;
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录