Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
OpenHarmony
Third Party Openssl
提交
d4571f43
T
Third Party Openssl
项目概览
OpenHarmony
/
Third Party Openssl
1 年多 前同步成功
通知
10
Star
18
Fork
1
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
T
Third Party Openssl
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
d4571f43
编写于
1月 19, 2013
作者:
A
Andy Polyakov
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
sha512-ppc.pl: add PPC32 code, >2x improvement on in-order cores.
上级
bba43f3f
变更
2
显示空白变更内容
内联
并排
Showing
2 changed file
with
291 addition
and
3 deletion
+291
-3
Configure
Configure
+1
-1
crypto/sha/asm/sha512-ppc.pl
crypto/sha/asm/sha512-ppc.pl
+290
-2
未找到文件。
Configure
浏览文件 @
d4571f43
...
@@ -139,8 +139,8 @@ my $s390x_asm="s390xcap.o s390xcpuid.o:bn-s390x.o s390x-mont.o s390x-gf2m.o::aes
...
@@ -139,8 +139,8 @@ my $s390x_asm="s390xcap.o s390xcpuid.o:bn-s390x.o s390x-mont.o s390x-gf2m.o::aes
my $armv4_asm="armcap.o armv4cpuid.o:bn_asm.o armv4-mont.o armv4-gf2m.o::aes_cbc.o aes-armv4.o:::sha1-armv4-large.o sha256-armv4.o sha512-armv4.o:::::::ghash-armv4.o::void";
my $armv4_asm="armcap.o armv4cpuid.o:bn_asm.o armv4-mont.o armv4-gf2m.o::aes_cbc.o aes-armv4.o:::sha1-armv4-large.o sha256-armv4.o sha512-armv4.o:::::::ghash-armv4.o::void";
my $parisc11_asm="pariscid.o:bn_asm.o parisc-mont.o::aes_core.o aes_cbc.o aes-parisc.o:::sha1-parisc.o sha256-parisc.o sha512-parisc.o::rc4-parisc.o:::::ghash-parisc.o::32";
my $parisc11_asm="pariscid.o:bn_asm.o parisc-mont.o::aes_core.o aes_cbc.o aes-parisc.o:::sha1-parisc.o sha256-parisc.o sha512-parisc.o::rc4-parisc.o:::::ghash-parisc.o::32";
my $parisc20_asm="pariscid.o:pa-risc2W.o parisc-mont.o::aes_core.o aes_cbc.o aes-parisc.o:::sha1-parisc.o sha256-parisc.o sha512-parisc.o::rc4-parisc.o:::::ghash-parisc.o::64";
my $parisc20_asm="pariscid.o:pa-risc2W.o parisc-mont.o::aes_core.o aes_cbc.o aes-parisc.o:::sha1-parisc.o sha256-parisc.o sha512-parisc.o::rc4-parisc.o:::::ghash-parisc.o::64";
my $ppc32_asm="ppccpuid.o ppccap.o:bn-ppc.o ppc-mont.o ppc64-mont.o::aes_core.o aes_cbc.o aes-ppc.o:::sha1-ppc.o sha256-ppc.o::::::::";
my $ppc64_asm="ppccpuid.o ppccap.o:bn-ppc.o ppc-mont.o ppc64-mont.o::aes_core.o aes_cbc.o aes-ppc.o:::sha1-ppc.o sha256-ppc.o sha512-ppc.o::::::::";
my $ppc64_asm="ppccpuid.o ppccap.o:bn-ppc.o ppc-mont.o ppc64-mont.o::aes_core.o aes_cbc.o aes-ppc.o:::sha1-ppc.o sha256-ppc.o sha512-ppc.o::::::::";
my $ppc32_asm=$ppc64_asm;
my $no_asm=":::::::::::::::void";
my $no_asm=":::::::::::::::void";
# As for $BSDthreads. Idea is to maintain "collective" set of flags,
# As for $BSDthreads. Idea is to maintain "collective" set of flags,
...
...
crypto/sha/asm/sha512-ppc.pl
浏览文件 @
d4571f43
#!/usr/bin/env perl
#!/usr/bin/env perl
# ====================================================================
# ====================================================================
# Written by Andy Polyakov <appro@
fy.chalmers.se
> for the OpenSSL
# Written by Andy Polyakov <appro@
openssl.org
> for the OpenSSL
# project. The module is, however, dual licensed under OpenSSL and
# project. The module is, however, dual licensed under OpenSSL and
# CRYPTOGAMS licenses depending on where you obtain it. For further
# CRYPTOGAMS licenses depending on where you obtain it. For further
# details see http://www.openssl.org/~appro/cryptogams/.
# details see http://www.openssl.org/~appro/cryptogams/.
...
@@ -91,6 +91,10 @@ if ($output =~ /512/) {
...
@@ -91,6 +91,10 @@ if ($output =~ /512/) {
$FRAME
=
32
*$SIZE_T
+
16
*$SZ
;
$FRAME
=
32
*$SIZE_T
+
16
*$SZ
;
$LOCALS
=
6
*$SIZE_T
;
$LOCALS
=
6
*$SIZE_T
;
if
(
$SZ
==
8
&&
$SIZE_T
==
4
)
{
$FRAME
+=
16
*$SZ
;
$XOFF
=
$LOCALS
+
16
*$SZ
;
}
$sp
=
"
r1
";
$sp
=
"
r1
";
$toc
=
"
r2
";
$toc
=
"
r2
";
...
@@ -118,7 +122,7 @@ $H ="r15";
...
@@ -118,7 +122,7 @@ $H ="r15";
@X
=
("
r16
","
r17
","
r18
","
r19
","
r20
","
r21
","
r22
","
r23
",
@X
=
("
r16
","
r17
","
r18
","
r19
","
r20
","
r21
","
r22
","
r23
",
"
r24
","
r25
","
r26
","
r27
","
r28
","
r29
","
r30
","
r31
");
"
r24
","
r25
","
r26
","
r27
","
r28
","
r29
","
r30
","
r31
");
$inp
=
"
r31
";
# reassigned $inp! aliases with @X[15]
$inp
=
"
r31
"
if
(
$SZ
==
4
||
$SIZE_T
==
8
)
;
# reassigned $inp! aliases with @X[15]
sub
ROUND_00_15
{
sub
ROUND_00_15
{
my
(
$i
,
$a
,
$b
,
$c
,
$d
,
$e
,
$f
,
$g
,
$h
)
=
@_
;
my
(
$i
,
$a
,
$b
,
$c
,
$d
,
$e
,
$f
,
$g
,
$h
)
=
@_
;
...
@@ -212,7 +216,10 @@ $func:
...
@@ -212,7 +216,10 @@ $func:
$PUSH r30,`$FRAME-$SIZE_T*2`($sp)
$PUSH r30,`$FRAME-$SIZE_T*2`($sp)
$PUSH r31,`$FRAME-$SIZE_T*1`($sp)
$PUSH r31,`$FRAME-$SIZE_T*1`($sp)
$PUSH r0,`$FRAME+$LRSAVE`($sp)
$PUSH r0,`$FRAME+$LRSAVE`($sp)
___
if
(
$SZ
==
4
||
$SIZE_T
==
8
)
{
$code
.=
<<___;
$LD $A,`0*$SZ`($ctx)
$LD $A,`0*$SZ`($ctx)
mr $inp,r4 ; incarnate $inp
mr $inp,r4 ; incarnate $inp
$LD $B,`1*$SZ`($ctx)
$LD $B,`1*$SZ`($ctx)
...
@@ -222,7 +229,16 @@ $func:
...
@@ -222,7 +229,16 @@ $func:
$LD $F,`5*$SZ`($ctx)
$LD $F,`5*$SZ`($ctx)
$LD $G,`6*$SZ`($ctx)
$LD $G,`6*$SZ`($ctx)
$LD $H,`7*$SZ`($ctx)
$LD $H,`7*$SZ`($ctx)
___
}
else
{
for
(
$i
=
16
;
$i
<
32
;
$i
++
)
{
$code
.=
<<___;
lwz r$i,`4*($i-16)`($ctx)
___
}
}
$code
.=
<<___;
bl LPICmeup
bl LPICmeup
LPICedup:
LPICedup:
andi. r0,$inp,3
andi. r0,$inp,3
...
@@ -258,6 +274,9 @@ Lunaligned:
...
@@ -258,6 +274,9 @@ Lunaligned:
Lcross_page:
Lcross_page:
li $t1,`16*$SZ/4`
li $t1,`16*$SZ/4`
mtctr $t1
mtctr $t1
___
if
(
$SZ
==
4
||
$SIZE_T
==
8
)
{
$code
.=
<<___;
addi r20,$sp,$LOCALS ; aligned spot below the frame
addi r20,$sp,$LOCALS ; aligned spot below the frame
Lmemcpy:
Lmemcpy:
lbz r16,0($inp)
lbz r16,0($inp)
...
@@ -271,7 +290,26 @@ Lmemcpy:
...
@@ -271,7 +290,26 @@ Lmemcpy:
stb r19,3(r20)
stb r19,3(r20)
addi r20,r20,4
addi r20,r20,4
bdnz Lmemcpy
bdnz Lmemcpy
___
}
else
{
$code
.=
<<___;
addi r12,$sp,$LOCALS ; aligned spot below the frame
Lmemcpy:
lbz r8,0($inp)
lbz r9,1($inp)
lbz r10,2($inp)
lbz r11,3($inp)
addi $inp,$inp,4
stb r8,0(r12)
stb r9,1(r12)
stb r10,2(r12)
stb r11,3(r12)
addi r12,r12,4
bdnz Lmemcpy
___
}
$code
.=
<<___;
$PUSH $inp,`$FRAME-$SIZE_T*26`($sp) ; save real inp
$PUSH $inp,`$FRAME-$SIZE_T*26`($sp) ; save real inp
addi $t1,$sp,`$LOCALS+16*$SZ` ; fictitious end pointer
addi $t1,$sp,`$LOCALS+16*$SZ` ; fictitious end pointer
addi $inp,$sp,$LOCALS ; fictitious inp pointer
addi $inp,$sp,$LOCALS ; fictitious inp pointer
...
@@ -310,7 +348,10 @@ Ldone:
...
@@ -310,7 +348,10 @@ Ldone:
.long 0
.long 0
.byte 0,12,4,1,0x80,18,3,0
.byte 0,12,4,1,0x80,18,3,0
.long 0
.long 0
___
if
(
$SZ
==
4
||
$SIZE_T
==
8
)
{
$code
.=
<<___;
.align 4
.align 4
Lsha2_block_private:
Lsha2_block_private:
$LD $t1,0($Tbl)
$LD $t1,0($Tbl)
...
@@ -380,6 +421,253 @@ $code.=<<___;
...
@@ -380,6 +421,253 @@ $code.=<<___;
.long 0
.long 0
.byte 0,12,0x14,0,0,0,0,0
.byte 0,12,0x14,0,0,0,0,0
___
___
}
else
{
########################################################################
# SHA512 for PPC32, X vector is off-loaded to stack...
#
# | sha512
# | -m32
# ----------------------+-----------------------
# PPC74x0,gcc-4.0.1 | +48%
# POWER6,gcc-4.4.6 | +124%(*)
# POWER7,gcc-4.4.6 | +79%(*)
# e300,gcc-4.1.0 | +167%
#
# (*) ~1/3 of -m64 result [and ~20% better than -m32 code generated
# by xlc-12.1]
my
@V
=
map
("
r
$_
",(
16
..
31
));
# A..H
my
(
$s0
,
$s1
,
$t0
,
$t1
,
$t2
,
$t3
,
$a0
,
$a1
,
$a2
,
$a3
)
=
map
("
r
$_
",(
0
,
5
,
6
,
8
..
12
,
14
,
15
));
my
(
$x0
,
$x1
)
=
("
r3
","
r4
");
# zaps $ctx and $inp
sub
ROUND_00_15_ppc32
{
my
(
$i
,
$ahi
,
$alo
,
$bhi
,
$blo
,
$chi
,
$clo
,
$dhi
,
$dlo
,
$ehi
,
$elo
,
$fhi
,
$flo
,
$ghi
,
$glo
,
$hhi
,
$hlo
)
=
@_
;
$code
.=
<<___;
lwz $t2,`$SZ*($i%16)+4`($Tbl)
xor $a0,$flo,$glo
lwz $t3,`$SZ*($i%16)+0`($Tbl)
xor $a1,$fhi,$ghi
addc $hlo,$hlo,$t0 ; h+=x[i]
stw $t0,`$XOFF+0+$SZ*($i%16)`($sp) ; save x[i]
srwi $s0,$elo,$Sigma1[0]
srwi $s1,$ehi,$Sigma1[0]
and $a0,$a0,$elo
adde $hhi,$hhi,$t1
and $a1,$a1,$ehi
stw $t1,`$XOFF+4+$SZ*($i%16)`($sp)
srwi $t0,$elo,$Sigma1[1]
srwi $t1,$ehi,$Sigma1[1]
addc $hlo,$hlo,$t2 ; h+=K512[i]
insrwi $s0,$ehi,$Sigma1[0],0
insrwi $s1,$elo,$Sigma1[0],0
xor $a0,$a0,$glo ; Ch(e,f,g)
adde $hhi,$hhi,$t3
xor $a1,$a1,$ghi
insrwi $t0,$ehi,$Sigma1[1],0
insrwi $t1,$elo,$Sigma1[1],0
addc $hlo,$hlo,$a0 ; h+=Ch(e,f,g)
srwi $t2,$ehi,$Sigma1[2]-32
srwi $t3,$elo,$Sigma1[2]-32
xor $s0,$s0,$t0
xor $s1,$s1,$t1
insrwi $t2,$elo,$Sigma1[2]-32,0
insrwi $t3,$ehi,$Sigma1[2]-32,0
xor $a0,$alo,$blo ; a^b, b^c in next round
adde $hhi,$hhi,$a1
xor $a1,$ahi,$bhi
xor $s0,$s0,$t2 ; Sigma1(e)
xor $s1,$s1,$t3
srwi $t0,$alo,$Sigma0[0]
and $a2,$a2,$a0
addc $hlo,$hlo,$s0 ; h+=Sigma1(e)
and $a3,$a3,$a1
srwi $t1,$ahi,$Sigma0[0]
srwi $s0,$ahi,$Sigma0[1]-32
adde $hhi,$hhi,$s1
srwi $s1,$alo,$Sigma0[1]-32
insrwi $t0,$ahi,$Sigma0[0],0
insrwi $t1,$alo,$Sigma0[0],0
xor $a2,$a2,$blo ; Maj(a,b,c)
addc $dlo,$dlo,$hlo ; d+=h
xor $a3,$a3,$bhi
insrwi $s0,$alo,$Sigma0[1]-32,0
insrwi $s1,$ahi,$Sigma0[1]-32,0
adde $dhi,$dhi,$hhi
srwi $t2,$ahi,$Sigma0[2]-32
srwi $t3,$alo,$Sigma0[2]-32
xor $s0,$s0,$t0
addc $hlo,$hlo,$a2 ; h+=Maj(a,b,c)
xor $s1,$s1,$t1
insrwi $t2,$alo,$Sigma0[2]-32,0
insrwi $t3,$ahi,$Sigma0[2]-32,0
adde $hhi,$hhi,$a3
___
$code
.=<<
___
if
(
$i
>=
15
);
lwz
$t0
,`
$XOFF
+0+
$SZ
*((
$i
+2)%16)
`(
$sp
)
lwz
$t1
,`
$XOFF
+4+
$SZ
*((
$i
+2)%16)
`(
$sp
)
___
$code
.=<<
___
if
(
$i
<
15
);
lwz
$t1
,`
$SZ
*(
$i
+1)+0
`(
$inp
)
lwz
$t0
,`
$SZ
*(
$i
+1)+4
`(
$inp
)
___
$code
.=
<<___;
xor $s0,$s0,$t2 ; Sigma0(a)
xor $s1,$s1,$t3
addc $hlo,$hlo,$s0 ; h+=Sigma0(a)
adde $hhi,$hhi,$s1
___
$code
.=<<
___
if
(
$i
==
15
);
lwz
$x0
,`
$XOFF
+0+
$SZ
*((
$i
+1)%16)
`(
$sp
)
lwz
$x1
,`
$XOFF
+4+
$SZ
*((
$i
+1)%16)
`(
$sp
)
___
}
sub
ROUND_16_xx_ppc32
{
my
(
$i
,
$ahi
,
$alo
,
$bhi
,
$blo
,
$chi
,
$clo
,
$dhi
,
$dlo
,
$ehi
,
$elo
,
$fhi
,
$flo
,
$ghi
,
$glo
,
$hhi
,
$hlo
)
=
@_
;
$code
.=
<<___;
srwi $s0,$t0,$sigma0[0]
srwi $s1,$t1,$sigma0[0]
srwi $t2,$t0,$sigma0[1]
srwi $t3,$t1,$sigma0[1]
insrwi $s0,$t1,$sigma0[0],0
insrwi $s1,$t0,$sigma0[0],0
srwi $a0,$t0,$sigma0[2]
insrwi $t2,$t1,$sigma0[1],0
insrwi $t3,$t0,$sigma0[1],0
insrwi $a0,$t1,$sigma0[2],0
xor $s0,$s0,$t2
lwz $t2,`$XOFF+0+$SZ*(($i+14)%16)`($sp)
srwi $a1,$t1,$sigma0[2]
xor $s1,$s1,$t3
lwz $t3,`$XOFF+4+$SZ*(($i+14)%16)`($sp)
xor $a0,$a0,$s0
srwi $s0,$t2,$sigma1[0]
xor $a1,$a1,$s1
srwi $s1,$t3,$sigma1[0]
addc $x0,$x0,$a0 ; x[i]+=sigma0(x[i+1])
srwi $a0,$t3,$sigma1[1]-32
insrwi $s0,$t3,$sigma1[0],0
insrwi $s1,$t2,$sigma1[0],0
adde $x1,$x1,$a1
srwi $a1,$t2,$sigma1[1]-32
insrwi $a0,$t2,$sigma1[1]-32,0
srwi $t2,$t2,$sigma1[2]
insrwi $a1,$t3,$sigma1[1]-32,0
insrwi $t2,$t3,$sigma1[2],0
xor $s0,$s0,$a0
lwz $a0,`$XOFF+0+$SZ*(($i+9)%16)`($sp)
srwi $t3,$t3,$sigma1[2]
xor $s1,$s1,$a1
lwz $a1,`$XOFF+4+$SZ*(($i+9)%16)`($sp)
xor $s0,$s0,$t2
addc $x0,$x0,$a0 ; x[i]+=x[i+9]
xor $s1,$s1,$t3
adde $x1,$x1,$a1
addc $x0,$x0,$s0 ; x[i]+=sigma1(x[i+14])
adde $x1,$x1,$s1
___
(
$t0
,
$t1
,
$x0
,
$x1
)
=
(
$x0
,
$x1
,
$t0
,
$t1
);
&ROUND_00_15_ppc32
(
@
_
);
}
$code
.=
<<___;
.align 4
Lsha2_block_private:
lwz $t1,0($inp)
xor $a2,@V[3],@V[5] ; B^C, magic seed
lwz $t0,4($inp)
xor $a3,@V[2],@V[4]
___
for
(
$i
=
0
;
$i
<
16
;
$i
++
)
{
&ROUND_00_15_ppc32
(
$i
,
@V
);
unshift
(
@V
,
pop
(
@V
));
unshift
(
@V
,
pop
(
@V
));
(
$a0
,
$a1
,
$a2
,
$a3
)
=
(
$a2
,
$a3
,
$a0
,
$a1
);
}
$code
.=
<<___;
li $a0,`$rounds/16-1`
mtctr $a0
.align 4
Lrounds:
addi $Tbl,$Tbl,`16*$SZ`
___
for
(;
$i
<
32
;
$i
++
)
{
&ROUND_16_xx_ppc32
(
$i
,
@V
);
unshift
(
@V
,
pop
(
@V
));
unshift
(
@V
,
pop
(
@V
));
(
$a0
,
$a1
,
$a2
,
$a3
)
=
(
$a2
,
$a3
,
$a0
,
$a1
);
}
$code
.=
<<___;
bdnz- Lrounds
$POP $ctx,`$FRAME-$SIZE_T*22`($sp)
$POP $inp,`$FRAME-$SIZE_T*23`($sp) ; inp pointer
$POP $num,`$FRAME-$SIZE_T*24`($sp) ; end pointer
subi $Tbl,$Tbl,`($rounds-16)*$SZ` ; rewind Tbl
lwz $t0,0($ctx)
lwz $t1,4($ctx)
lwz $t2,8($ctx)
lwz $t3,12($ctx)
lwz $a0,16($ctx)
lwz $a1,20($ctx)
lwz $a2,24($ctx)
addc @V[1],@V[1],$t1
lwz $a3,28($ctx)
adde @V[0],@V[0],$t0
lwz $t0,32($ctx)
addc @V[3],@V[3],$t3
lwz $t1,36($ctx)
adde @V[2],@V[2],$t2
lwz $t2,40($ctx)
addc @V[5],@V[5],$a1
lwz $t3,44($ctx)
adde @V[4],@V[4],$a0
lwz $a0,48($ctx)
addc @V[7],@V[7],$a3
lwz $a1,52($ctx)
adde @V[6],@V[6],$a2
lwz $a2,56($ctx)
addc @V[9],@V[9],$t1
lwz $a3,60($ctx)
adde @V[8],@V[8],$t0
stw @V[0],0($ctx)
stw @V[1],4($ctx)
addc @V[11],@V[11],$t3
stw @V[2],8($ctx)
stw @V[3],12($ctx)
adde @V[10],@V[10],$t2
stw @V[4],16($ctx)
stw @V[5],20($ctx)
addc @V[13],@V[13],$a1
stw @V[6],24($ctx)
stw @V[7],28($ctx)
adde @V[12],@V[12],$a0
stw @V[8],32($ctx)
stw @V[9],36($ctx)
addc @V[15],@V[15],$a3
stw @V[10],40($ctx)
stw @V[11],44($ctx)
adde @V[14],@V[14],$a2
stw @V[12],48($ctx)
stw @V[13],52($ctx)
stw @V[14],56($ctx)
stw @V[15],60($ctx)
addi $inp,$inp,`16*$SZ` ; advance inp
$PUSH $inp,`$FRAME-$SIZE_T*23`($sp)
$UCMP $inp,$num
bne Lsha2_block_private
blr
.long 0
.byte 0,12,0x14,0,0,0,0,0
___
}
# Ugly hack here, because PPC assembler syntax seem to vary too
# Ugly hack here, because PPC assembler syntax seem to vary too
# much from platforms to platform...
# much from platforms to platform...
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录