Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
OpenHarmony
Third Party Openssl
提交
67150340
T
Third Party Openssl
项目概览
OpenHarmony
/
Third Party Openssl
大约 1 年 前同步成功
通知
9
Star
18
Fork
1
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
T
Third Party Openssl
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
67150340
编写于
5月 27, 2011
作者:
A
Andy Polyakov
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
PPC assembler pack: adhere closer to ABI specs, add PowerOpen traceback data.
上级
0ca9a483
变更
7
隐藏空白更改
内联
并排
Showing
7 changed file
with
311 addition
and
241 deletion
+311
-241
crypto/aes/asm/aes-ppc.pl
crypto/aes/asm/aes-ppc.pl
+31
-11
crypto/bn/asm/ppc-mont.pl
crypto/bn/asm/ppc-mont.pl
+51
-45
crypto/bn/asm/ppc.pl
crypto/bn/asm/ppc.pl
+30
-13
crypto/bn/asm/ppc64-mont.pl
crypto/bn/asm/ppc64-mont.pl
+78
-76
crypto/ppccpuid.pl
crypto/ppccpuid.pl
+20
-0
crypto/sha/asm/sha1-ppc.pl
crypto/sha/asm/sha1-ppc.pl
+45
-38
crypto/sha/asm/sha512-ppc.pl
crypto/sha/asm/sha512-ppc.pl
+56
-58
未找到文件。
crypto/aes/asm/aes-ppc.pl
浏览文件 @
67150340
...
...
@@ -18,7 +18,7 @@
# February 2010
#
# Rescheduling instructions to favour Power6 pipeline g
ives
10%
# Rescheduling instructions to favour Power6 pipeline g
ave
10%
# performance improvement on the platfrom in question (and marginal
# improvement even on others). It should be noted that Power6 fails
# to process byte in 18 cycles, only in 23, because it fails to issue
...
...
@@ -33,11 +33,13 @@ $flavour = shift;
if
(
$flavour
=~
/64/
)
{
$SIZE_T
=
8
;
$LRSAVE
=
2
*$SIZE_T
;
$STU
=
"
stdu
";
$POP
=
"
ld
";
$PUSH
=
"
std
";
}
elsif
(
$flavour
=~
/32/
)
{
$SIZE_T
=
4
;
$LRSAVE
=
$SIZE_T
;
$STU
=
"
stwu
";
$POP
=
"
lwz
";
$PUSH
=
"
stw
";
...
...
@@ -116,15 +118,19 @@ LAES_Te:
addi $Tbl0,$Tbl0,`128-8`
mtlr r0
blr
.space `32-24`
.long 0
.byte 0,12,0x14,0,0,0,0,0
.space `64-9*4`
LAES_Td:
mflr r0
bcl 20,31,\$+4
mflr $Tbl0 ; vvvvvvvv "distance" between . and 1st data entry
addi $Tbl0,$Tbl0,`128-
8-32
+2048+256`
addi $Tbl0,$Tbl0,`128-
64-8
+2048+256`
mtlr r0
blr
.space `128-32-24`
.long 0
.byte 0,12,0x14,0,0,0,0,0
.space `128-64-9*4`
___
&_data_word
(
0xc66363a5
,
0xf87c7c84
,
0xee777799
,
0xf67b7b8d
,
...
...
@@ -328,10 +334,9 @@ $code.=<<___;
.globl .AES_encrypt
.align 7
.AES_encrypt:
mflr r0
$STU $sp,-$FRAME($sp)
mflr r0
$PUSH r0,`$FRAME-$SIZE_T*21`($sp)
$PUSH $toc,`$FRAME-$SIZE_T*20`($sp)
$PUSH r13,`$FRAME-$SIZE_T*19`($sp)
$PUSH r14,`$FRAME-$SIZE_T*18`($sp)
...
...
@@ -352,6 +357,7 @@ $code.=<<___;
$PUSH r29,`$FRAME-$SIZE_T*3`($sp)
$PUSH r30,`$FRAME-$SIZE_T*2`($sp)
$PUSH r31,`$FRAME-$SIZE_T*1`($sp)
$PUSH r0,`$FRAME+$LRSAVE`($sp)
lwz $s0,0($inp)
lwz $s1,4($inp)
...
...
@@ -364,7 +370,7 @@ $code.=<<___;
stw $s2,8($out)
stw $s3,12($out)
$POP r0,`$FRAME
-$SIZE_T*21
`($sp)
$POP r0,`$FRAME
+$LRSAVE
`($sp)
$POP $toc,`$FRAME-$SIZE_T*20`($sp)
$POP r13,`$FRAME-$SIZE_T*19`($sp)
$POP r14,`$FRAME-$SIZE_T*18`($sp)
...
...
@@ -388,6 +394,9 @@ $code.=<<___;
mtlr r0
addi $sp,$sp,$FRAME
blr
.long 0
.byte 0,12,4,1,0x80,18,3,0
.long 0
.align 5
Lppc_AES_encrypt:
...
...
@@ -530,6 +539,8 @@ Lenc_loop:
xor $s2,$s2,$t2
xor $s3,$s3,$t3
blr
.long 0
.byte 0,12,0x14,0,0,0,0,0
.align 4
Lppc_AES_encrypt_compact:
...
...
@@ -673,14 +684,15 @@ Lenc_compact_done:
xor $s2,$s2,$t2
xor $s3,$s3,$t3
blr
.long 0
.byte 0,12,0x14,0,0,0,0,0
.globl .AES_decrypt
.align 7
.AES_decrypt:
mflr r0
$STU $sp,-$FRAME($sp)
mflr r0
$PUSH r0,`$FRAME-$SIZE_T*21`($sp)
$PUSH $toc,`$FRAME-$SIZE_T*20`($sp)
$PUSH r13,`$FRAME-$SIZE_T*19`($sp)
$PUSH r14,`$FRAME-$SIZE_T*18`($sp)
...
...
@@ -701,6 +713,7 @@ Lenc_compact_done:
$PUSH r29,`$FRAME-$SIZE_T*3`($sp)
$PUSH r30,`$FRAME-$SIZE_T*2`($sp)
$PUSH r31,`$FRAME-$SIZE_T*1`($sp)
$PUSH r0,`$FRAME+$LRSAVE`($sp)
lwz $s0,0($inp)
lwz $s1,4($inp)
...
...
@@ -713,7 +726,7 @@ Lenc_compact_done:
stw $s2,8($out)
stw $s3,12($out)
$POP r0,`$FRAME
-$SIZE_T*21
`($sp)
$POP r0,`$FRAME
+$LRSAVE
`($sp)
$POP $toc,`$FRAME-$SIZE_T*20`($sp)
$POP r13,`$FRAME-$SIZE_T*19`($sp)
$POP r14,`$FRAME-$SIZE_T*18`($sp)
...
...
@@ -737,6 +750,9 @@ Lenc_compact_done:
mtlr r0
addi $sp,$sp,$FRAME
blr
.long 0
.byte 0,12,4,1,0x80,18,3,0
.long 0
.align 5
Lppc_AES_decrypt:
...
...
@@ -879,6 +895,8 @@ Ldec_loop:
xor $s2,$s2,$t2
xor $s3,$s3,$t3
blr
.long 0
.byte 0,12,0x14,0,0,0,0,0
.align 4
Lppc_AES_decrypt_compact:
...
...
@@ -1179,7 +1197,9 @@ Ldec_compact_done:
xor
$s2
,
$s2
,
$t2
xor
$s3
,
$s3
,
$t3
blr
.
long
0
.
long
0
.
byte
0
,
12
,
0x14
,
0
,
0
,
0
,
0
,
0
.
asciz
"
AES for PPC, CRYPTOGAMS by <appro
\@
openssl.org>
"
.
align
7
___
...
...
crypto/bn/asm/ppc-mont.pl
浏览文件 @
67150340
...
...
@@ -31,7 +31,6 @@ if ($flavour =~ /32/) {
$BNSZ
=
$BITS
/
8
;
$SIZE_T
=
4
;
$RZONE
=
224
;
$FRAME
=
$SIZE_T
*
16
;
$LD
=
"
lwz
";
# load
$LDU
=
"
lwzu
";
# load and update
...
...
@@ -51,7 +50,6 @@ if ($flavour =~ /32/) {
$BNSZ
=
$BITS
/
8
;
$SIZE_T
=
8
;
$RZONE
=
288
;
$FRAME
=
$SIZE_T
*
16
;
# same as above, but 64-bit mnemonics...
$LD
=
"
ld
";
# load
...
...
@@ -69,6 +67,9 @@ if ($flavour =~ /32/) {
$POP
=
$LD
;
}
else
{
die
"
nonsense
$flavour
";
}
$FRAME
=
8
*$SIZE_T
+
$RZONE
;
$LOCALS
=
8
*$SIZE_T
;
$
0
=~
m/(.*[\/\\])[^\/\\]+$/
;
$dir
=
$
1
;
(
$xlate
=
"
${dir}
ppc-xlate.pl
"
and
-
f
$xlate
)
or
(
$xlate
=
"
${dir}
../../perlasm/ppc-xlate.pl
"
and
-
f
$xlate
)
or
...
...
@@ -89,18 +90,18 @@ $aj="r10";
$nj
=
"
r11
";
$tj
=
"
r12
";
# non-volatile registers
$i
=
"
r
14
";
$j
=
"
r
15
";
$tp
=
"
r
16
";
$m0
=
"
r
17
";
$m1
=
"
r
18
";
$lo0
=
"
r
19
";
$hi0
=
"
r2
0
";
$lo1
=
"
r2
1
";
$hi1
=
"
r2
2
";
$alo
=
"
r2
3
";
$ahi
=
"
r
24
";
$nlo
=
"
r
25
";
$i
=
"
r
20
";
$j
=
"
r
21
";
$tp
=
"
r
22
";
$m0
=
"
r
23
";
$m1
=
"
r
24
";
$lo0
=
"
r
25
";
$hi0
=
"
r2
6
";
$lo1
=
"
r2
7
";
$hi1
=
"
r2
8
";
$alo
=
"
r2
9
";
$ahi
=
"
r
30
";
$nlo
=
"
r
31
";
#
$nhi
=
"
r0
";
...
...
@@ -123,32 +124,33 @@ ___
$code
.=
<<___;
slwi $num,$num,`log($BNSZ)/log(2)`
li $tj,-4096
addi $ovf,$num,
`$FRAME+$RZONE`
addi $ovf,$num,
$FRAME
subf $ovf,$ovf,$sp ; $sp-$ovf
and $ovf,$ovf,$tj ; minimize TLB usage
subf $ovf,$sp,$ovf ; $ovf-$sp
mr $tj,$sp
srwi $num,$num,`log($BNSZ)/log(2)`
$STUX $sp,$sp,$ovf
$PUSH r
14,`4*$SIZE_T`($sp
)
$PUSH r
15,`5*$SIZE_T`($sp
)
$PUSH r
16,`6*$SIZE_T`($sp
)
$PUSH r
17,`7*$SIZE_T`($sp
)
$PUSH r
18,`8*$SIZE_T`($sp
)
$PUSH r
19,`9*$SIZE_T`($sp
)
$PUSH r2
0,`10*$SIZE_T`($sp
)
$PUSH r2
1,`11*$SIZE_T`($sp
)
$PUSH r2
2,`12*$SIZE_T`($sp
)
$PUSH r2
3,`13*$SIZE_T`($sp
)
$PUSH r
24,`14*$SIZE_T`($sp
)
$PUSH r
25,`15*$SIZE_T`($sp
)
$PUSH r
20,`-12*$SIZE_T`($tj
)
$PUSH r
21,`-11*$SIZE_T`($tj
)
$PUSH r
22,`-10*$SIZE_T`($tj
)
$PUSH r
23,`-9*$SIZE_T`($tj
)
$PUSH r
24,`-8*$SIZE_T`($tj
)
$PUSH r
25,`-7*$SIZE_T`($tj
)
$PUSH r2
6,`-6*$SIZE_T`($tj
)
$PUSH r2
7,`-5*$SIZE_T`($tj
)
$PUSH r2
8,`-4*$SIZE_T`($tj
)
$PUSH r2
9,`-3*$SIZE_T`($tj
)
$PUSH r
30,`-2*$SIZE_T`($tj
)
$PUSH r
31,`-1*$SIZE_T`($tj
)
$LD $n0,0($n0) ; pull n0[0] value
addi $num,$num,-2 ; adjust $num for counter register
$LD $m0,0($bp) ; m0=bp[0]
$LD $aj,0($ap) ; ap[0]
addi $tp,$sp,$
FRAME
addi $tp,$sp,$
LOCALS
$UMULL $lo0,$aj,$m0 ; ap[0]*bp[0]
$UMULH $hi0,$aj,$m0
...
...
@@ -210,8 +212,8 @@ L1st:
Louter:
$LDX $m0,$bp,$i ; m0=bp[i]
$LD $aj,0($ap) ; ap[0]
addi $tp,$sp,$
FRAME
$LD $tj,$
FRAME($sp)
; tp[0]
addi $tp,$sp,$
LOCALS
$LD $tj,$
LOCALS($sp)
; tp[0]
$UMULL $lo0,$aj,$m0 ; ap[0]*bp[i]
$UMULH $hi0,$aj,$m0
$LD $aj,$BNSZ($ap) ; ap[1]
...
...
@@ -278,7 +280,7 @@ Linner:
addi $num,$num,2 ; restore $num
subfc $j,$j,$j ; j=0 and "clear" XER[CA]
addi $tp,$sp,$
FRAME
addi $tp,$sp,$
LOCALS
mtctr $num
.align 4
...
...
@@ -304,23 +306,27 @@ Lcopy: ; copy or in-place refresh
addi $j,$j,$BNSZ
bdnz- Lcopy
$POP r14,`4*$SIZE_T`($sp)
$POP r15,`5*$SIZE_T`($sp)
$POP r16,`6*$SIZE_T`($sp)
$POP r17,`7*$SIZE_T`($sp)
$POP r18,`8*$SIZE_T`($sp)
$POP r19,`9*$SIZE_T`($sp)
$POP r20,`10*$SIZE_T`($sp)
$POP r21,`11*$SIZE_T`($sp)
$POP r22,`12*$SIZE_T`($sp)
$POP r23,`13*$SIZE_T`($sp)
$POP r24,`14*$SIZE_T`($sp)
$POP r25,`15*$SIZE_T`($sp)
$POP $sp,0($sp)
$POP $tj,0($sp)
li r3,1
$POP r20,`-12*$SIZE_T`($tj)
$POP r21,`-11*$SIZE_T`($tj)
$POP r22,`-10*$SIZE_T`($tj)
$POP r23,`-9*$SIZE_T`($tj)
$POP r24,`-8*$SIZE_T`($tj)
$POP r25,`-7*$SIZE_T`($tj)
$POP r26,`-6*$SIZE_T`($tj)
$POP r27,`-5*$SIZE_T`($tj)
$POP r28,`-4*$SIZE_T`($tj)
$POP r29,`-3*$SIZE_T`($tj)
$POP r30,`-2*$SIZE_T`($tj)
$POP r31,`-1*$SIZE_T`($tj)
mr $sp,$tj
blr
.long 0
.asciz "Montgomery Multiplication for PPC, CRYPTOGAMS by <appro\@fy.chalmers.se>"
.byte 0,12,4,0,0x80,12,6,0
.long 0
.asciz "Montgomery Multiplication for PPC, CRYPTOGAMS by <appro\@openssl.org>"
___
$code
=~
s/\`([^\`]*)\`/eval $1/g
em
;
...
...
crypto/bn/asm/ppc.pl
浏览文件 @
67150340
...
...
@@ -389,7 +389,9 @@ $data=<<EOF;
$ST r9,`6*$BNSZ`(r3) #r[6]=c1
$ST r10,`7*$BNSZ`(r3) #r[7]=c2
blr
.long 0x00000000
.long 0
.byte 0,12,0x14,0,0,0,2,0
.long 0
#
# NOTE: The following label name should be changed to
...
...
@@ -814,8 +816,9 @@ $data=<<EOF;
blr
.long 0x00000000
.long 0
.byte 0,12,0x14,0,0,0,2,0
.long 0
#
# NOTE: The following label name should be changed to
...
...
@@ -966,7 +969,9 @@ $data=<<EOF;
$ST r10,`6*$BNSZ`(r3) #r[6]=c1
$ST r11,`7*$BNSZ`(r3) #r[7]=c2
blr
.long 0x00000000
.long 0
.byte 0,12,0x14,0,0,0,3,0
.long 0
#
# NOTE: The following label name should be changed to
...
...
@@ -1502,7 +1507,9 @@ $data=<<EOF;
$ST r12,`14*$BNSZ`(r3) #r[14]=c3;
$ST r10,`15*$BNSZ`(r3) #r[15]=c1;
blr
.long 0x00000000
.long 0
.byte 0,12,0x14,0,0,0,3,0
.long 0
#
# NOTE: The following label name should be changed to
...
...
@@ -1550,8 +1557,9 @@ Lppcasm_sub_adios:
subfze r3,r0 # if carry bit is set then r3 = 0 else -1
andi. r3,r3,1 # keep only last bit.
blr
.long 0x00000000
.long 0
.byte 0,12,0x14,0,0,0,4,0
.long 0
#
# NOTE: The following label name should be changed to
...
...
@@ -1594,7 +1602,9 @@ Lppcasm_add_mainloop:
Lppcasm_add_adios:
addze r3,r0 #return carry bit.
blr
.long 0x00000000
.long 0
.byte 0,12,0x14,0,0,0,4,0
.long 0
#
# NOTE: The following label name should be changed to
...
...
@@ -1707,7 +1717,9 @@ Lppcasm_div8:
Lppcasm_div9:
or r3,r8,r0
blr
.long 0x00000000
.long 0
.byte 0,12,0x14,0,0,0,3,0
.long 0
#
# NOTE: The following label name should be changed to
...
...
@@ -1746,8 +1758,9 @@ Lppcasm_sqr_mainloop:
bdnz- Lppcasm_sqr_mainloop
Lppcasm_sqr_adios:
blr
.long 0x00000000
.long 0
.byte 0,12,0x14,0,0,0,3,0
.long 0
#
# NOTE: The following label name should be changed to
...
...
@@ -1850,7 +1863,9 @@ Lppcasm_mw_REM:
Lppcasm_mw_OVER:
addi r3,r12,0
blr
.long 0x00000000
.long 0
.byte 0,12,0x14,0,0,0,4,0
.long 0
#
# NOTE: The following label name should be changed to
...
...
@@ -1973,7 +1988,9 @@ Lppcasm_maw_leftover:
Lppcasm_maw_adios:
addi r3,r12,0
blr
.long 0x00000000
.long 0
.byte 0,12,0x14,0,0,0,4,0
.long 0
.align 4
EOF
$data
=~
s/\`([^\`]*)\`/eval $1/g
em
;
...
...
crypto/bn/asm/ppc64-mont.pl
浏览文件 @
67150340
...
...
@@ -70,7 +70,6 @@ $flavour = shift;
if
(
$flavour
=~
/32/
)
{
$SIZE_T
=
4
;
$RZONE
=
224
;
$FRAME
=
$SIZE_T
*
12
+
8
*
12
;
$fname
=
"
bn_mul_mont_fpu64
";
$STUX
=
"
stwux
";
# store indexed and update
...
...
@@ -79,7 +78,6 @@ if ($flavour =~ /32/) {
}
elsif
(
$flavour
=~
/64/
)
{
$SIZE_T
=
8
;
$RZONE
=
288
;
$FRAME
=
$SIZE_T
*
12
+
8
*
12
;
$fname
=
"
bn_mul_mont_fpu64
";
# same as above, but 64-bit mnemonics...
...
...
@@ -95,7 +93,7 @@ die "can't locate ppc-xlate.pl";
open
STDOUT
,"
| $^X
$xlate
$flavour
"
.
shift
||
die
"
can't call
$xlate
: $!
";
$FRAME
=
(
$FRAME
+
63
)
&~
63
;
$FRAME
=
64
;
# padded frame header
$TRANSFER
=
16
*
8
;
$carry
=
"
r0
";
...
...
@@ -112,16 +110,16 @@ $tp="r10";
$j
=
"
r11
";
$i
=
"
r12
";
# non-volatile registers
$nap_d
=
"
r
14
";
# interleaved ap and np in double format
$a0
=
"
r
15
";
# ap[0]
$t0
=
"
r
16
";
# temporary registers
$t1
=
"
r
17
";
$t2
=
"
r
18
";
$t3
=
"
r
19
";
$t4
=
"
r2
0
";
$t5
=
"
r2
1
";
$t6
=
"
r
22
";
$t7
=
"
r
23
";
$nap_d
=
"
r
22
";
# interleaved ap and np in double format
$a0
=
"
r
23
";
# ap[0]
$t0
=
"
r
24
";
# temporary registers
$t1
=
"
r
25
";
$t2
=
"
r
26
";
$t3
=
"
r
27
";
$t4
=
"
r2
8
";
$t5
=
"
r2
9
";
$t6
=
"
r
30
";
$t7
=
"
r
31
";
# PPC offers enough register bank capacity to unroll inner loops twice
#
...
...
@@ -151,28 +149,17 @@ $ba="f0"; $bb="f1"; $bc="f2"; $bd="f3";
$na
=
"
f4
";
$nb
=
"
f5
";
$nc
=
"
f6
";
$nd
=
"
f7
";
$dota
=
"
f8
";
$dotb
=
"
f9
";
$A0
=
"
f10
";
$A1
=
"
f11
";
$A2
=
"
f12
";
$A3
=
"
f13
";
$N0
=
"
f
14
";
$N1
=
"
f15
";
$N2
=
"
f16
";
$N3
=
"
f17
";
$T0a
=
"
f
18
";
$T0b
=
"
f19
";
$T1a
=
"
f2
0
";
$T1b
=
"
f21
";
$T2a
=
"
f2
2
";
$T2b
=
"
f23
";
$T3a
=
"
f
24
";
$T3b
=
"
f25
";
$N0
=
"
f
20
";
$N1
=
"
f21
";
$N2
=
"
f22
";
$N3
=
"
f23
";
$T0a
=
"
f
24
";
$T0b
=
"
f25
";
$T1a
=
"
f2
6
";
$T1b
=
"
f27
";
$T2a
=
"
f2
8
";
$T2b
=
"
f29
";
$T3a
=
"
f
30
";
$T3b
=
"
f31
";
# sp----------->+-------------------------------+
# | saved sp |
# +-------------------------------+
# | |
# +-------------------------------+
# | 10 saved gpr, r14-r23 |
# . .
# . .
# +12*size_t +-------------------------------+
# | 12 saved fpr, f14-f25 |
# . .
# . .
# +12*8 +-------------------------------+
# | padding to 64 byte boundary |
# . .
# +X +-------------------------------+
# +64 +-------------------------------+
# | 16 gpr<->fpr transfer zone |
# . .
# . .
...
...
@@ -192,6 +179,16 @@ $T3a="f24"; $T3b="f25";
# . .
# . .
# +-------------------------------+
# . .
# -12*size_t +-------------------------------+
# | 10 saved gpr, r22-r31 |
# . .
# . .
# -12*8 +-------------------------------+
# | 12 saved fpr, f20-f31 |
# . .
# . .
# +-------------------------------+
$code
=
<<___;
.machine "any"
...
...
@@ -215,30 +212,31 @@ $code=<<___;
subf $tp,$tp,$sp ; $sp-$tp
and $tp,$tp,$i ; minimize TLB usage
subf $tp,$sp,$tp ; $tp-$sp
mr $i,$sp
$STUX $sp,$sp,$tp ; alloca
$PUSH r
14,`2*$SIZE_T`($sp
)
$PUSH r
15,`3*$SIZE_T`($sp
)
$PUSH r
16,`4*$SIZE_T`($sp
)
$PUSH r
17,`5*$SIZE_T`($sp
)
$PUSH r
18,`6*$SIZE_T`($sp
)
$PUSH r
19,`7*$SIZE_T`($sp
)
$PUSH r2
0,`8*$SIZE_T`($sp
)
$PUSH r2
1,`9*$SIZE_T`($sp
)
$PUSH r
22,`10*$SIZE_T`($sp
)
$PUSH r
23,`11*$SIZE_T`($sp
)
stfd f
14,`12*$SIZE_T+0`($sp
)
stfd f
15,`12*$SIZE_T+8`($sp
)
stfd f
16,`12*$SIZE_T+16`($sp
)
stfd f
17,`12*$SIZE_T+24`($sp
)
stfd f
18,`12*$SIZE_T+32`($sp
)
stfd f
19,`12*$SIZE_T+40`($sp
)
stfd f2
0,`12*$SIZE_T+48`($sp
)
stfd f2
1,`12*$SIZE_T+56`($sp
)
stfd f2
2,`12*$SIZE_T+64`($sp
)
stfd f2
3,`12*$SIZE_T+72`($sp
)
stfd f
24,`12*$SIZE_T+80`($sp
)
stfd f
25,`12*$SIZE_T+88`($sp
)
$PUSH r
22,`-12*8-10*$SIZE_T`($i
)
$PUSH r
23,`-12*8-9*$SIZE_T`($i
)
$PUSH r
24,`-12*8-8*$SIZE_T`($i
)
$PUSH r
25,`-12*8-7*$SIZE_T`($i
)
$PUSH r
26,`-12*8-6*$SIZE_T`($i
)
$PUSH r
27,`-12*8-5*$SIZE_T`($i
)
$PUSH r2
8,`-12*8-4*$SIZE_T`($i
)
$PUSH r2
9,`-12*8-3*$SIZE_T`($i
)
$PUSH r
30,`-12*8-2*$SIZE_T`($i
)
$PUSH r
31,`-12*8-1*$SIZE_T`($i
)
stfd f
20,`-12*8`($i
)
stfd f
21,`-11*8`($i
)
stfd f
22,`-10*8`($i
)
stfd f
23,`-9*8`($i
)
stfd f
24,`-8*8`($i
)
stfd f
25,`-7*8`($i
)
stfd f2
6,`-6*8`($i
)
stfd f2
7,`-5*8`($i
)
stfd f2
8,`-4*8`($i
)
stfd f2
9,`-3*8`($i
)
stfd f
30,`-2*8`($i
)
stfd f
31,`-1*8`($i
)
___
$code
.=<<
___
if
(
$SIZE_T
==
8
);
ld
$a0
,
0
(
$ap
)
;
pull
ap
[
0
]
value
...
...
@@ -1052,33 +1050,37 @@ Lcopy: ; copy or in-place refresh
___
$code
.=
<<___;
$POP r14,`2*$SIZE_T`($sp)
$POP r15,`3*$SIZE_T`($sp)
$POP r16,`4*$SIZE_T`($sp)
$POP r17,`5*$SIZE_T`($sp)
$POP r18,`6*$SIZE_T`($sp)
$POP r19,`7*$SIZE_T`($sp)
$POP r20,`8*$SIZE_T`($sp)
$POP r21,`9*$SIZE_T`($sp)
$POP r22,`10*$SIZE_T`($sp)
$POP r23,`11*$SIZE_T`($sp)
lfd f14,`12*$SIZE_T+0`($sp)
lfd f15,`12*$SIZE_T+8`($sp)
lfd f16,`12*$SIZE_T+16`($sp)
lfd f17,`12*$SIZE_T+24`($sp)
lfd f18,`12*$SIZE_T+32`($sp)
lfd f19,`12*$SIZE_T+40`($sp)
lfd f20,`12*$SIZE_T+48`($sp)
lfd f21,`12*$SIZE_T+56`($sp)
lfd f22,`12*$SIZE_T+64`($sp)
lfd f23,`12*$SIZE_T+72`($sp)
lfd f24,`12*$SIZE_T+80`($sp)
lfd f25,`12*$SIZE_T+88`($sp)
$POP $sp,0($sp)
$POP $i,0($sp)
li r3,1 ; signal "handled"
$POP r22,`-12*8-10*$SIZE_T`($i)
$POP r23,`-12*8-9*$SIZE_T`($i)
$POP r24,`-12*8-8*$SIZE_T`($i)
$POP r25,`-12*8-7*$SIZE_T`($i)
$POP r26,`-12*8-6*$SIZE_T`($i)
$POP r27,`-12*8-5*$SIZE_T`($i)
$POP r28,`-12*8-4*$SIZE_T`($i)
$POP r29,`-12*8-3*$SIZE_T`($i)
$POP r30,`-12*8-2*$SIZE_T`($i)
$POP r31,`-12*8-1*$SIZE_T`($i)
lfd f20,`-12*8`($i)
lfd f21,`-11*8`($i)
lfd f22,`-10*8`($i)
lfd f23,`-9*8`($i)
lfd f24,`-8*8`($i)
lfd f25,`-7*8`($i)
lfd f26,`-6*8`($i)
lfd f27,`-5*8`($i)
lfd f28,`-4*8`($i)
lfd f29,`-3*8`($i)
lfd f30,`-2*8`($i)
lfd f31,`-1*8`($i)
mr $sp,$i
blr
.long 0
.asciz "Montgomery Multiplication for PPC64, CRYPTOGAMS by <appro\@fy.chalmers.se>"
.byte 0,12,4,0,0x8c,10,6,0
.long 0
.asciz "Montgomery Multiplication for PPC64, CRYPTOGAMS by <appro\@openssl.org>"
___
$code
=~
s/\`([^\`]*)\`/eval $1/g
em
;
...
...
crypto/ppccpuid.pl
浏览文件 @
67150340
...
...
@@ -29,12 +29,16 @@ $code=<<___;
fcfid f1,f1
extrdi r0,r0,32,0
blr
.long 0
.byte 0,12,0x14,0,0,0,0,0
.globl .OPENSSL_altivec_probe
.align 4
.OPENSSL_altivec_probe:
.long 0x10000484 # vor v0,v0,v0
blr
.long 0
.byte 0,12,0x14,0,0,0,0,0
.globl .OPENSSL_wipe_cpu
.align 4
...
...
@@ -65,6 +69,8 @@ $code=<<___;
fmr f12,f31
fmr f13,f31
blr
.long 0
.byte 0,12,0x14,0,0,0,0,0
.globl .OPENSSL_atomic_add
.align 4
...
...
@@ -75,6 +81,9 @@ Ladd: lwarx r5,0,r3
bne- Ladd
$SIGNX r3,r0
blr
.long 0
.byte 0,12,0x14,0,0,0,2,0
.long 0
.globl .OPENSSL_rdtsc
.align 4
...
...
@@ -82,6 +91,8 @@ Ladd: lwarx r5,0,r3
mftb r3
mftbu r4
blr
.long 0
.byte 0,12,0x14,0,0,0,0,0
.globl .OPENSSL_cleanse
.align 4
...
...
@@ -111,6 +122,9 @@ Laligned:
andi. r4,r4,3
bne Little
blr
.long 0
.byte 0,12,0x14,0,0,0,2,0
.long 0
___
{
my
(
$out
,
$cnt
,
$max
)
=
("
r3
","
r4
","
r5
");
...
...
@@ -145,6 +159,9 @@ Loop: mftb $tick
mr r3,$cnt
blr
.long 0
.byte 0,12,0x14,0,0,0,2,0
.long 0
.globl .OPENSSL_instrument_bus2
.align 4
...
...
@@ -193,6 +210,9 @@ Ldone2:
srwi $cnt,$cnt,2
sub r3,r0,$cnt
blr
.long 0
.byte 0,12,0x14,0,0,0,3,0
.long 0
___
}
...
...
crypto/sha/asm/sha1-ppc.pl
浏览文件 @
67150340
...
...
@@ -24,12 +24,14 @@ $flavour = shift;
if
(
$flavour
=~
/64/
)
{
$SIZE_T
=
8
;
$LRSAVE
=
2
*$SIZE_T
;
$UCMP
=
"
cmpld
";
$STU
=
"
stdu
";
$POP
=
"
ld
";
$PUSH
=
"
std
";
}
elsif
(
$flavour
=~
/32/
)
{
$SIZE_T
=
4
;
$LRSAVE
=
$SIZE_T
;
$UCMP
=
"
cmplw
";
$STU
=
"
stwu
";
$POP
=
"
lwz
";
...
...
@@ -43,7 +45,8 @@ die "can't locate ppc-xlate.pl";
open
STDOUT
,"
| $^X
$xlate
$flavour
"
.
shift
||
die
"
can't call
$xlate
: $!
";
$FRAME
=
24
*$SIZE_T
;
$FRAME
=
24
*$SIZE_T
+
64
;
$LOCALS
=
6
*$SIZE_T
;
$K
=
"
r0
";
$sp
=
"
r1
";
...
...
@@ -162,9 +165,8 @@ $code=<<___;
.globl .sha1_block_data_order
.align 4
.sha1_block_data_order:
$STU $sp,-$FRAME($sp)
mflr r0
$STU $sp,`-($FRAME+64)`($sp)
$PUSH r0,`$FRAME-$SIZE_T*18`($sp)
$PUSH r15,`$FRAME-$SIZE_T*17`($sp)
$PUSH r16,`$FRAME-$SIZE_T*16`($sp)
$PUSH r17,`$FRAME-$SIZE_T*15`($sp)
...
...
@@ -182,6 +184,7 @@ $code=<<___;
$PUSH r29,`$FRAME-$SIZE_T*3`($sp)
$PUSH r30,`$FRAME-$SIZE_T*2`($sp)
$PUSH r31,`$FRAME-$SIZE_T*1`($sp)
$PUSH r0,`$FRAME+$LRSAVE`($sp)
lwz $A,0($ctx)
lwz $B,4($ctx)
lwz $C,8($ctx)
...
...
@@ -192,37 +195,14 @@ $code=<<___;
Laligned:
mtctr $num
bl Lsha1_block_private
Ldone:
$POP r0,`$FRAME-$SIZE_T*18`($sp)
$POP r15,`$FRAME-$SIZE_T*17`($sp)
$POP r16,`$FRAME-$SIZE_T*16`($sp)
$POP r17,`$FRAME-$SIZE_T*15`($sp)
$POP r18,`$FRAME-$SIZE_T*14`($sp)
$POP r19,`$FRAME-$SIZE_T*13`($sp)
$POP r20,`$FRAME-$SIZE_T*12`($sp)
$POP r21,`$FRAME-$SIZE_T*11`($sp)
$POP r22,`$FRAME-$SIZE_T*10`($sp)
$POP r23,`$FRAME-$SIZE_T*9`($sp)
$POP r24,`$FRAME-$SIZE_T*8`($sp)
$POP r25,`$FRAME-$SIZE_T*7`($sp)
$POP r26,`$FRAME-$SIZE_T*6`($sp)
$POP r27,`$FRAME-$SIZE_T*5`($sp)
$POP r28,`$FRAME-$SIZE_T*4`($sp)
$POP r29,`$FRAME-$SIZE_T*3`($sp)
$POP r30,`$FRAME-$SIZE_T*2`($sp)
$POP r31,`$FRAME-$SIZE_T*1`($sp)
mtlr r0
addi $sp,$sp,`$FRAME+64`
blr
___
b Ldone
# PowerPC specification allows an implementation to be ill-behaved
# upon unaligned access which crosses page boundary. "Better safe
# than sorry" principle makes me treat it specially. But I don't
# look for particular offending word, but rather for 64-byte input
# block which crosses the boundary. Once found that block is aligned
# and hashed separately...
$code
.=
<<___;
; PowerPC specification allows an implementation to be ill-behaved
; upon unaligned access which crosses page boundary. "Better safe
; than sorry" principle makes me treat it specially. But I don't
; look for particular offending word, but rather for 64-byte input
; block which crosses the boundary. Once found that block is aligned
; and hashed separately...
.align 4
Lunaligned:
subfic $t1,$inp,4096
...
...
@@ -237,7 +217,7 @@ Lunaligned:
Lcross_page:
li $t1,16
mtctr $t1
addi r20,$sp,$
FRAME ; spot below
the frame
addi r20,$sp,$
LOCALS ; spot within
the frame
Lmemcpy:
lbz r16,0($inp)
lbz r17,1($inp)
...
...
@@ -251,15 +231,40 @@ Lmemcpy:
addi r20,r20,4
bdnz Lmemcpy
$PUSH $inp,`$FRAME-$SIZE_T*1
9
`($sp)
$PUSH $inp,`$FRAME-$SIZE_T*1
8
`($sp)
li $t1,1
addi $inp,$sp,$
FRAME
addi $inp,$sp,$
LOCALS
mtctr $t1
bl Lsha1_block_private
$POP $inp,`$FRAME-$SIZE_T*1
9
`($sp)
$POP $inp,`$FRAME-$SIZE_T*1
8
`($sp)
addic. $num,$num,-1
bne- Lunaligned
b Ldone
Ldone:
$POP r0,`$FRAME+$LRSAVE`($sp)
$POP r15,`$FRAME-$SIZE_T*17`($sp)
$POP r16,`$FRAME-$SIZE_T*16`($sp)
$POP r17,`$FRAME-$SIZE_T*15`($sp)
$POP r18,`$FRAME-$SIZE_T*14`($sp)
$POP r19,`$FRAME-$SIZE_T*13`($sp)
$POP r20,`$FRAME-$SIZE_T*12`($sp)
$POP r21,`$FRAME-$SIZE_T*11`($sp)
$POP r22,`$FRAME-$SIZE_T*10`($sp)
$POP r23,`$FRAME-$SIZE_T*9`($sp)
$POP r24,`$FRAME-$SIZE_T*8`($sp)
$POP r25,`$FRAME-$SIZE_T*7`($sp)
$POP r26,`$FRAME-$SIZE_T*6`($sp)
$POP r27,`$FRAME-$SIZE_T*5`($sp)
$POP r28,`$FRAME-$SIZE_T*4`($sp)
$POP r29,`$FRAME-$SIZE_T*3`($sp)
$POP r30,`$FRAME-$SIZE_T*2`($sp)
$POP r31,`$FRAME-$SIZE_T*1`($sp)
mtlr r0
addi $sp,$sp,$FRAME
blr
.long 0
.byte 0,12,4,1,0x80,18,3,0
.long 0
___
# This is private block function, which uses tailored calling
...
...
@@ -309,6 +314,8 @@ $code.=<<___;
addi $inp,$inp,`16*4`
bdnz- Lsha1_block_private
blr
.long 0
.byte 0,12,0x14,0,0,0,0,0
___
$code
.=
<<___;
.asciz "SHA1 block transform for PPC, CRYPTOGAMS by <appro\@fy.chalmers.se>"
...
...
crypto/sha/asm/sha512-ppc.pl
浏览文件 @
67150340
...
...
@@ -40,6 +40,7 @@ $output =shift;
if
(
$flavour
=~
/64/
)
{
$SIZE_T
=
8
;
$LRSAVE
=
2
*$SIZE_T
;
$STU
=
"
stdu
";
$UCMP
=
"
cmpld
";
$SHL
=
"
sldi
";
...
...
@@ -47,6 +48,7 @@ if ($flavour =~ /64/) {
$PUSH
=
"
std
";
}
elsif
(
$flavour
=~
/32/
)
{
$SIZE_T
=
4
;
$LRSAVE
=
$SIZE_T
;
$STU
=
"
stwu
";
$UCMP
=
"
cmplw
";
$SHL
=
"
slwi
";
...
...
@@ -87,7 +89,8 @@ if ($output =~ /512/) {
$SHR
=
"
srwi
";
}
$FRAME
=
32
*$SIZE_T
;
$FRAME
=
32
*$SIZE_T
+
16
*$SZ
;
$LOCALS
=
6
*$SIZE_T
;
$sp
=
"
r1
";
$toc
=
"
r2
";
...
...
@@ -179,13 +182,12 @@ $code=<<___;
.globl $func
.align 6
$func:
$STU $sp,-$FRAME($sp)
mflr r0
$STU $sp,`-($FRAME+16*$SZ)`($sp)
$SHL $num,$num,`log(16*$SZ)/log(2)`
$PUSH $ctx,`$FRAME-$SIZE_T*22`($sp)
$PUSH r0,`$FRAME-$SIZE_T*21`($sp)
$PUSH $toc,`$FRAME-$SIZE_T*20`($sp)
$PUSH r13,`$FRAME-$SIZE_T*19`($sp)
$PUSH r14,`$FRAME-$SIZE_T*18`($sp)
...
...
@@ -206,6 +208,7 @@ $func:
$PUSH r29,`$FRAME-$SIZE_T*3`($sp)
$PUSH r30,`$FRAME-$SIZE_T*2`($sp)
$PUSH r31,`$FRAME-$SIZE_T*1`($sp)
$PUSH r0,`$FRAME+$LRSAVE`($sp)
$LD $A,`0*$SZ`($ctx)
mr $inp,r4 ; incarnate $inp
...
...
@@ -217,7 +220,7 @@ $func:
$LD $G,`6*$SZ`($ctx)
$LD $H,`7*$SZ`($ctx)
b LPICmeup
b
l
LPICmeup
LPICedup:
andi. r0,$inp,3
bne Lunaligned
...
...
@@ -226,40 +229,14 @@ Laligned:
$PUSH $num,`$FRAME-$SIZE_T*24`($sp) ; end pointer
$PUSH $inp,`$FRAME-$SIZE_T*23`($sp) ; inp pointer
bl Lsha2_block_private
Ldone:
$POP r0,`$FRAME-$SIZE_T*21`($sp)
$POP $toc,`$FRAME-$SIZE_T*20`($sp)
$POP r13,`$FRAME-$SIZE_T*19`($sp)
$POP r14,`$FRAME-$SIZE_T*18`($sp)
$POP r15,`$FRAME-$SIZE_T*17`($sp)
$POP r16,`$FRAME-$SIZE_T*16`($sp)
$POP r17,`$FRAME-$SIZE_T*15`($sp)
$POP r18,`$FRAME-$SIZE_T*14`($sp)
$POP r19,`$FRAME-$SIZE_T*13`($sp)
$POP r20,`$FRAME-$SIZE_T*12`($sp)
$POP r21,`$FRAME-$SIZE_T*11`($sp)
$POP r22,`$FRAME-$SIZE_T*10`($sp)
$POP r23,`$FRAME-$SIZE_T*9`($sp)
$POP r24,`$FRAME-$SIZE_T*8`($sp)
$POP r25,`$FRAME-$SIZE_T*7`($sp)
$POP r26,`$FRAME-$SIZE_T*6`($sp)
$POP r27,`$FRAME-$SIZE_T*5`($sp)
$POP r28,`$FRAME-$SIZE_T*4`($sp)
$POP r29,`$FRAME-$SIZE_T*3`($sp)
$POP r30,`$FRAME-$SIZE_T*2`($sp)
$POP r31,`$FRAME-$SIZE_T*1`($sp)
mtlr r0
addi $sp,$sp,`$FRAME+16*$SZ`
blr
___
b Ldone
# PowerPC specification allows an implementation to be ill-behaved
# upon unaligned access which crosses page boundary. "Better safe
# than sorry" principle makes me treat it specially. But I don't
# look for particular offending word, but rather for the input
# block which crosses the boundary. Once found that block is aligned
# and hashed separately...
$code
.=
<<___;
; PowerPC specification allows an implementation to be ill-behaved
; upon unaligned access which crosses page boundary. "Better safe
; than sorry" principle makes me treat it specially. But I don't
; look for particular offending word, but rather for the input
; block which crosses the boundary. Once found that block is aligned
; and hashed separately...
.align 4
Lunaligned:
subfic $t1,$inp,4096
...
...
@@ -278,7 +255,7 @@ Lunaligned:
Lcross_page:
li $t1,`16*$SZ/4`
mtctr $t1
addi r20,$sp,$
FRAME
; aligned spot below the frame
addi r20,$sp,$
LOCALS
; aligned spot below the frame
Lmemcpy:
lbz r16,0($inp)
lbz r17,1($inp)
...
...
@@ -293,8 +270,8 @@ Lmemcpy:
bdnz Lmemcpy
$PUSH $inp,`$FRAME-$SIZE_T*26`($sp) ; save real inp
addi $t1,$sp,`$
FRAME+16*$SZ`
; fictitious end pointer
addi $inp,$sp,$
FRAME
; fictitious inp pointer
addi $t1,$sp,`$
LOCALS+16*$SZ`
; fictitious end pointer
addi $inp,$sp,$
LOCALS
; fictitious inp pointer
$PUSH $num,`$FRAME-$SIZE_T*25`($sp) ; save real num
$PUSH $t1,`$FRAME-$SIZE_T*24`($sp) ; end pointer
$PUSH $inp,`$FRAME-$SIZE_T*23`($sp) ; inp pointer
...
...
@@ -303,10 +280,36 @@ Lmemcpy:
$POP $num,`$FRAME-$SIZE_T*25`($sp) ; restore real num
addic. $num,$num,`-16*$SZ` ; num--
bne- Lunaligned
b Ldone
___
$code
.=
<<___;
Ldone:
$POP r0,`$FRAME+$LRSAVE`($sp)
$POP $toc,`$FRAME-$SIZE_T*20`($sp)
$POP r13,`$FRAME-$SIZE_T*19`($sp)
$POP r14,`$FRAME-$SIZE_T*18`($sp)
$POP r15,`$FRAME-$SIZE_T*17`($sp)
$POP r16,`$FRAME-$SIZE_T*16`($sp)
$POP r17,`$FRAME-$SIZE_T*15`($sp)
$POP r18,`$FRAME-$SIZE_T*14`($sp)
$POP r19,`$FRAME-$SIZE_T*13`($sp)
$POP r20,`$FRAME-$SIZE_T*12`($sp)
$POP r21,`$FRAME-$SIZE_T*11`($sp)
$POP r22,`$FRAME-$SIZE_T*10`($sp)
$POP r23,`$FRAME-$SIZE_T*9`($sp)
$POP r24,`$FRAME-$SIZE_T*8`($sp)
$POP r25,`$FRAME-$SIZE_T*7`($sp)
$POP r26,`$FRAME-$SIZE_T*6`($sp)
$POP r27,`$FRAME-$SIZE_T*5`($sp)
$POP r28,`$FRAME-$SIZE_T*4`($sp)
$POP r29,`$FRAME-$SIZE_T*3`($sp)
$POP r30,`$FRAME-$SIZE_T*2`($sp)
$POP r31,`$FRAME-$SIZE_T*1`($sp)
mtlr r0
addi $sp,$sp,$FRAME
blr
.long 0
.byte 0,12,4,1,0x80,18,3,0
.long 0
.align 4
Lsha2_block_private:
___
...
...
@@ -372,6 +375,8 @@ $code.=<<___;
$ST $H,`7*$SZ`($ctx)
bne Lsha2_block_private
blr
.long 0
.byte 0,12,0x14,0,0,0,0,0
___
# Ugly hack here, because PPC assembler syntax seem to vary too
...
...
@@ -379,22 +384,15 @@ ___
$code
.=
<<___;
.align 6
LPICmeup:
bl LPIC
addi $Tbl,$Tbl,`64-4` ; "distance" between . and last nop
b LPICedup
nop
nop
nop
nop
nop
LPIC: mflr $Tbl
mflr r0
bcl 20,31,\$+4
mflr $Tbl ; vvvvvv "distance" between . and 1st data entry
addi $Tbl,$Tbl,`64-8`
mtlr r0
blr
nop
nop
nop
nop
nop
nop
.long 0
.byte 0,12,0x14,0,0,0,0,0
.space `64-9*4`
___
$code
.=<<
___
if
(
$SZ
==
8
);
.
long
0x428a2f98
,
0xd728ae22
,
0x71374491
,
0x23ef65cd
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录