Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
OpenHarmony
Third Party Openssl
提交
b4b48a10
T
Third Party Openssl
项目概览
OpenHarmony
/
Third Party Openssl
1 年多 前同步成功
通知
10
Star
18
Fork
1
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
T
Third Party Openssl
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
b4b48a10
编写于
12月 26, 2009
作者:
A
Andy Polyakov
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
ppc64-mont.pl: adapt for 32-bit and engage for all builds.
上级
7e765bf2
变更
7
隐藏空白更改
内联
并排
Showing
7 changed file
with
300 addition
and
37 deletion
+300
-37
Configure
Configure
+3
-3
TABLE
TABLE
+21
-21
crypto/bn/Makefile
crypto/bn/Makefile
+1
-0
crypto/bn/asm/ppc-mont.pl
crypto/bn/asm/ppc-mont.pl
+8
-3
crypto/bn/asm/ppc64-mont.pl
crypto/bn/asm/ppc64-mont.pl
+175
-8
crypto/ppccap.c
crypto/ppccap.c
+88
-0
crypto/ppccpuid.pl
crypto/ppccpuid.pl
+4
-2
未找到文件。
Configure
浏览文件 @
b4b48a10
...
...
@@ -135,8 +135,8 @@ my $alpha_asm="alphacpuid.o:bn_asm.o alpha-mont.o::::::::::::void";
my $mips3_asm=":bn-mips3.o::::::::::::void";
my $s390x_asm="s390xcpuid.o:bn-s390x.o s390x-mont.o::aes-s390x.o:::sha1-s390x.o sha256-s390x.o sha512-s390x.o::rc4-s390x.o:::::void";
my $armv4_asm=":bn_asm.o armv4-mont.o::aes_cbc.o aes-armv4.o:::sha1-armv4-large.o sha256-armv4.o sha512-armv4.o:::::::void";
my $ppc32_asm="ppccpuid.o
:bn-ppc
.o::aes_core.o aes_cbc.o aes-ppc.o:::sha1-ppc.o sha256-ppc.o::::::";
my $ppc64_asm="ppccpuid.o
:bn-ppc.o ppc
-mont.o::aes_core.o aes_cbc.o aes-ppc.o:::sha1-ppc.o sha256-ppc.o sha512-ppc.o::::::";
my $ppc32_asm="ppccpuid.o
ppccap.o:bn-ppc.o ppc-mont.o ppc64-mont
.o::aes_core.o aes_cbc.o aes-ppc.o:::sha1-ppc.o sha256-ppc.o::::::";
my $ppc64_asm="ppccpuid.o
ppccap.o:bn-ppc.o ppc-mont.o ppc64
-mont.o::aes_core.o aes_cbc.o aes-ppc.o:::sha1-ppc.o sha256-ppc.o sha512-ppc.o::::::";
my $no_asm=":::::::::::::void";
# As for $BSDthreads. Idea is to maintain "collective" set of flags,
...
...
@@ -547,7 +547,7 @@ my %table=(
##### MacOS X (a.k.a. Rhapsody or Darwin) setup
"rhapsody-ppc-cc","cc:-O3 -DB_ENDIAN::(unknown):MACOSX_RHAPSODY::BN_LLONG RC4_CHAR RC4_CHUNK DES_UNROLL BF_PTR:${no_asm}::",
"darwin-ppc-cc","cc:-arch ppc -O3 -DB_ENDIAN::-D_REENTRANT:MACOSX:-Wl,-search_paths_first%:BN_LLONG RC4_CHAR RC4_CHUNK DES_UNROLL BF_PTR:${ppc32_asm}:osx32:dlfcn:darwin-shared:-fPIC -fno-common:-arch ppc -dynamiclib:.\$(SHLIB_MAJOR).\$(SHLIB_MINOR).dylib",
"darwin-ppc-cc","cc:-arch ppc -O3 -DB_ENDIAN
-Wa,-force_cpusubtype_ALL
::-D_REENTRANT:MACOSX:-Wl,-search_paths_first%:BN_LLONG RC4_CHAR RC4_CHUNK DES_UNROLL BF_PTR:${ppc32_asm}:osx32:dlfcn:darwin-shared:-fPIC -fno-common:-arch ppc -dynamiclib:.\$(SHLIB_MAJOR).\$(SHLIB_MINOR).dylib",
"darwin64-ppc-cc","cc:-arch ppc64 -O3 -DB_ENDIAN::-D_REENTRANT:MACOSX:-Wl,-search_paths_first%:SIXTY_FOUR_BIT_LONG RC4_CHAR RC4_CHUNK DES_UNROLL BF_PTR:${ppc64_asm}:osx64:dlfcn:darwin-shared:-fPIC -fno-common:-arch ppc64 -dynamiclib:.\$(SHLIB_MAJOR).\$(SHLIB_MINOR).dylib",
"darwin-i386-cc","cc:-arch i386 -O3 -fomit-frame-pointer -DL_ENDIAN::-D_REENTRANT:MACOSX:-Wl,-search_paths_first%:BN_LLONG RC4_INT RC4_CHUNK DES_UNROLL BF_PTR:${x86_asm}:macosx:dlfcn:darwin-shared:-fPIC -fno-common:-arch i386 -dynamiclib:.\$(SHLIB_MAJOR).\$(SHLIB_MINOR).dylib",
"debug-darwin-i386-cc","cc:-arch i386 -g3 -DL_ENDIAN::-D_REENTRANT:MACOSX:-Wl,-search_paths_first%:BN_LLONG RC4_INT RC4_CHUNK DES_UNROLL BF_PTR:${x86_asm}:macosx:dlfcn:darwin-shared:-fPIC -fno-common:-arch i386 -dynamiclib:.\$(SHLIB_MAJOR).\$(SHLIB_MINOR).dylib",
...
...
TABLE
浏览文件 @
b4b48a10
...
...
@@ -814,8 +814,8 @@ $thread_cflag = -qthreaded
$sys_id = AIX
$lflags =
$bn_ops = BN_LLONG RC4_CHAR
$cpuid_obj = ppccpuid.o
$bn_obj = bn-ppc.o
$cpuid_obj = ppccpuid.o
ppccap.o
$bn_obj = bn-ppc.o
ppc-mont.o ppc64-mont.o
$des_obj =
$aes_obj = aes_core.o aes_cbc.o aes-ppc.o
$bf_obj =
...
...
@@ -845,8 +845,8 @@ $thread_cflag = -pthread
$sys_id = AIX
$lflags =
$bn_ops = BN_LLONG RC4_CHAR
$cpuid_obj = ppccpuid.o
$bn_obj = bn-ppc.o
$cpuid_obj = ppccpuid.o
ppccap.o
$bn_obj = bn-ppc.o
ppc-mont.o ppc64-mont.o
$des_obj =
$aes_obj = aes_core.o aes_cbc.o aes-ppc.o
$bf_obj =
...
...
@@ -907,8 +907,8 @@ $thread_cflag = -qthreaded
$sys_id = AIX
$lflags =
$bn_ops = SIXTY_FOUR_BIT_LONG RC4_CHAR
$cpuid_obj = ppccpuid.o
$bn_obj = bn-ppc.o ppc-mont.o
$cpuid_obj = ppccpuid.o
ppccap.o
$bn_obj = bn-ppc.o ppc-mont.o
ppc64-mont.o
$des_obj =
$aes_obj = aes_core.o aes_cbc.o aes-ppc.o
$bf_obj =
...
...
@@ -938,8 +938,8 @@ $thread_cflag = -pthread
$sys_id = AIX
$lflags =
$bn_ops = SIXTY_FOUR_BIT_LONG RC4_CHAR
$cpuid_obj = ppccpuid.o
$bn_obj = bn-ppc.o ppc-mont.o
$cpuid_obj = ppccpuid.o
ppccap.o
$bn_obj = bn-ppc.o ppc-mont.o
ppc64-mont.o
$des_obj =
$aes_obj = aes_core.o aes_cbc.o aes-ppc.o
$bf_obj =
...
...
@@ -1211,14 +1211,14 @@ $multilib =
*** darwin-ppc-cc
$cc = cc
$cflags = -arch ppc -O3 -DB_ENDIAN
$cflags = -arch ppc -O3 -DB_ENDIAN
-Wa,-force_cpusubtype_ALL
$unistd =
$thread_cflag = -D_REENTRANT
$sys_id = MACOSX
$lflags = -Wl,-search_paths_first%
$bn_ops = BN_LLONG RC4_CHAR RC4_CHUNK DES_UNROLL BF_PTR
$cpuid_obj = ppccpuid.o
$bn_obj = bn-ppc.o
$cpuid_obj = ppccpuid.o
ppccap.o
$bn_obj = bn-ppc.o
ppc-mont.o ppc64-mont.o
$des_obj =
$aes_obj = aes_core.o aes_cbc.o aes-ppc.o
$bf_obj =
...
...
@@ -1248,8 +1248,8 @@ $thread_cflag = -D_REENTRANT
$sys_id = MACOSX
$lflags = -Wl,-search_paths_first%
$bn_ops = SIXTY_FOUR_BIT_LONG RC4_CHAR RC4_CHUNK DES_UNROLL BF_PTR
$cpuid_obj = ppccpuid.o
$bn_obj = bn-ppc.o ppc-mont.o
$cpuid_obj = ppccpuid.o
ppccap.o
$bn_obj = bn-ppc.o ppc-mont.o
ppc64-mont.o
$des_obj =
$aes_obj = aes_core.o aes_cbc.o aes-ppc.o
$bf_obj =
...
...
@@ -1682,8 +1682,8 @@ $thread_cflag = -D_REENTRANT
$sys_id = MACOSX
$lflags =
$bn_ops = BN_LLONG RC4_CHAR RC4_CHUNK DES_UNROLL BF_PTR
$cpuid_obj = ppccpuid.o
$bn_obj = bn-ppc.o
$cpuid_obj = ppccpuid.o
ppccap.o
$bn_obj = bn-ppc.o
ppc-mont.o ppc64-mont.o
$des_obj =
$aes_obj = aes_core.o aes_cbc.o aes-ppc.o
$bf_obj =
...
...
@@ -2327,7 +2327,7 @@ $multilib =
*** debug-steve32
$cc = gcc
$cflags = -Wall -pedantic -DPEDANTIC -Wno-long-long -Wsign-compare -Wmissing-prototypes -Wshadow -Wformat -Werror -DCRYPTO_MDEBUG_ALL -DCRYPTO_MDEBUG_ABORT -DREF_CHECK -DOPENSSL_NO_DEPRECATED -m32 -DL_ENDIAN -DCONF_DEBUG -DDEBUG_SAFESTACK -
DDEBUG_UNUSED -
g -pipe
$cflags = -Wall -pedantic -DPEDANTIC -Wno-long-long -Wsign-compare -Wmissing-prototypes -Wshadow -Wformat -Werror -DCRYPTO_MDEBUG_ALL -DCRYPTO_MDEBUG_ABORT -DREF_CHECK -DOPENSSL_NO_DEPRECATED -m32 -DL_ENDIAN -DCONF_DEBUG -DDEBUG_SAFESTACK -g -pipe
$unistd =
$thread_cflag = -D_REENTRANT
$sys_id =
...
...
@@ -2358,7 +2358,7 @@ $multilib =
*** debug-steve64
$cc = gcc
$cflags = -Wall -pedantic -DPEDANTIC -Wno-long-long -Wsign-compare -Wmissing-prototypes -Wshadow -Wformat -Werror -DCRYPTO_MDEBUG_ALL -DCRYPTO_MDEBUG_ABORT -DREF_CHECK -DOPENSSL_NO_DEPRECATED -m64 -DL_ENDIAN -DTERMIO -DCONF_DEBUG -DDEBUG_SAFESTACK -
DDEBUG_UNUSED -
g -DMD32_REG_T=int
$cflags = -Wall -pedantic -DPEDANTIC -Wno-long-long -Wsign-compare -Wmissing-prototypes -Wshadow -Wformat -Werror -DCRYPTO_MDEBUG_ALL -DCRYPTO_MDEBUG_ABORT -DREF_CHECK -DOPENSSL_NO_DEPRECATED -m64 -DL_ENDIAN -DTERMIO -DCONF_DEBUG -DDEBUG_SAFESTACK -g -DMD32_REG_T=int
$unistd =
$thread_cflag = -D_REENTRANT
$sys_id =
...
...
@@ -3666,8 +3666,8 @@ $thread_cflag = -D_REENTRANT
$sys_id =
$lflags = -ldl
$bn_ops = BN_LLONG RC4_CHAR RC4_CHUNK DES_RISC1 DES_UNROLL
$cpuid_obj = ppccpuid.o
$bn_obj = bn-ppc.o
$cpuid_obj = ppccpuid.o
ppccap.o
$bn_obj = bn-ppc.o
ppc-mont.o ppc64-mont.o
$des_obj =
$aes_obj = aes_core.o aes_cbc.o aes-ppc.o
$bf_obj =
...
...
@@ -3697,8 +3697,8 @@ $thread_cflag = -D_REENTRANT
$sys_id =
$lflags = -ldl
$bn_ops = SIXTY_FOUR_BIT_LONG RC4_CHAR RC4_CHUNK DES_RISC1 DES_UNROLL
$cpuid_obj = ppccpuid.o
$bn_obj = bn-ppc.o ppc-mont.o
$cpuid_obj = ppccpuid.o
ppccap.o
$bn_obj = bn-ppc.o ppc-mont.o
ppc64-mont.o
$des_obj =
$aes_obj = aes_core.o aes_cbc.o aes-ppc.o
$bf_obj =
...
...
crypto/bn/Makefile
浏览文件 @
b4b48a10
...
...
@@ -103,6 +103,7 @@ pa-risc2.o: asm/pa-risc2.s
# ppc - AIX, Linux, MacOS X...
bn-ppc.s
:
asm/ppc.pl; $(PERL) asm/ppc.pl $(PERLASM_SCHEME) $@
ppc-mont.s
:
asm/ppc-mont.pl;$(PERL) asm/ppc-mont.pl $(PERLASM_SCHEME) $@
ppc64-mont.s
:
asm/ppc64-mont.pl;$(PERL) asm/ppc64-mont.pl $(PERLASM_SCHEME) $@
alpha-mont.s
:
asm/alpha-mont.pl
$(PERL)
$<
|
$(CC)
-E
- |
tee
$@
>
/dev/null
...
...
crypto/bn/asm/ppc-mont.pl
浏览文件 @
b4b48a10
...
...
@@ -108,14 +108,19 @@ $code=<<___;
.machine "any"
.text
.globl .bn_mul_mont
.globl .bn_mul_mont
_int
.align 4
.bn_mul_mont:
.bn_mul_mont
_int
:
cmpwi $num,4
mr $rp,r3 ; $rp is reassigned
li r3,0
bltlr
___
$code
.=<<
___
if
(
$BNSZ
==
4
);
cmpwi
$num
,
32
;
longer
key
performance
is
not
better
bgelr
___
$code
.=
<<___;
slwi $num,$num,`log($BNSZ)/log(2)`
li $tj,-4096
addi $ovf,$num,`$FRAME+$RZONE`
...
...
crypto/bn/asm/ppc64-mont.pl
浏览文件 @
b4b48a10
...
...
@@ -45,23 +45,41 @@
# on 1.8GHz PPC970, it's only 5-55% faster. Still far from impressive
# in absolute terms, but it's apparently the way Power 6 is...
# December 2009
# Adapted for 32-bit build this module delivers 25-120%, more for
# longer keys, performance improvement on 1.8GHz PPC970. However!
# This implementation utilizes even 64-bit integer operations and
# trouble is that most PPC operating systems don't preserve upper
# halves of general purpose registers upong signal delivery. They do
# preserve them upon context switch, but not signalling:-( This means
# that asynchronous signals have to be blocked upon entry to this
# subroutine. Signal masking (and complementary unmasking) has quite
# an impact on performance, naturally larger for shorter keys. It's
# so severe that shorter key performance as low as 1/3 of expected
# one. This is why this routine should be engaged for longer key
# operations only, see crypto/ppccap.c for further details.
# Alternative is to break dependance on upper halves on GPRs...
# MacOS X is an exception from this and doesn't require signal
# masking, and that's where above improvement coefficients were
# collected.
$flavour
=
shift
;
if
(
$flavour
=~
/32/
)
{
$SIZE_T
=
4
;
$RZONE
=
224
;
$FRAME
=
$SIZE_T
*
12
+
8
*
12
;
$fname
=
"
bn_mul_mont_
ppc
64
";
$fname
=
"
bn_mul_mont_
fpu
64
";
$STUX
=
"
stwux
";
# store indexed and update
$PUSH
=
"
stw
";
$POP
=
"
lwz
";
die
"
not implemented yet
";
}
elsif
(
$flavour
=~
/64/
)
{
$SIZE_T
=
8
;
$RZONE
=
288
;
$FRAME
=
$SIZE_T
*
12
+
8
*
12
;
$fname
=
"
bn_mul_mont
";
$fname
=
"
bn_mul_mont
_fpu64
";
# same as above, but 64-bit mnemonics...
$STUX
=
"
stdux
";
# store indexed and update
...
...
@@ -181,14 +199,14 @@ $code=<<___;
.globl .$fname
.align 5
.$fname:
cmpwi $num,
4
cmpwi $num,
`3*8/$SIZE_T`
mr $rp,r3 ; $rp is reassigned
li r3,0 ; possible "not handled" return code
bltlr-
andi. r0,$num,
1 ; $num has to be even
andi. r0,$num,
`16/$SIZE_T-1` ; $num has to be "even"
bnelr-
slwi $num,$num,
3 ; num*=8
slwi $num,$num,
`log($SIZE_T)/log(2)` ; num*=sizeof(BN_LONG)
li $i,-4096
slwi $tp,$num,2 ; place for {an}p_{lh}[num], i.e. 4*num
add $tp,$tp,$num ; place for tp[num+1]
...
...
@@ -220,11 +238,25 @@ $code=<<___;
stfd f23,`12*$SIZE_T+72`($sp)
stfd f24,`12*$SIZE_T+80`($sp)
stfd f25,`12*$SIZE_T+88`($sp)
___
$code
.=<<
___
if
(
$SIZE_T
==
8
);
ld
$a0
,
0
(
$ap
)
;
pull
ap
[
0
]
value
ld
$n0
,
0
(
$n0
)
;
pull
n0
[
0
]
value
ld
$t3
,
0
(
$bp
)
;
bp
[
0
]
___
$code
.=<<
___
if
(
$SIZE_T
==
4
);
mr
$t1
,
$n0
lwz
$a0
,
0
(
$ap
)
;
pull
ap
[
0
,
1
]
value
lwz
$t0
,
4
(
$ap
)
lwz
$n0
,
0
(
$t1
)
;
pull
n0
[
0
,
1
]
value
lwz
$t1
,
4
(
$t1
)
lwz
$t3
,
0
(
$bp
)
;
bp
[
0
,
1
]
lwz
$t2
,
4
(
$bp
)
insrdi
$a0
,
$t0
,
32
,
0
insrdi
$n0
,
$t1
,
32
,
0
insrdi
$t3
,
$t2
,
32
,
0
___
$code
.=
<<___;
addi $tp,$sp,`$FRAME+$TRANSFER+8+64`
li $i,-64
add $nap_d,$tp,$num
...
...
@@ -258,6 +290,8 @@ $code=<<___;
std $t5,`$FRAME+40`($sp)
std $t6,`$FRAME+48`($sp)
std $t7,`$FRAME+56`($sp)
___
$code
.=<<
___
if
(
$SIZE_T
==
8
);
lwz
$t0
,
4
(
$ap
)
;
load
a
[
j
]
as
32
-
bit
word
pair
lwz
$t1
,
0
(
$ap
)
lwz
$t2
,
12
(
$ap
)
;
load
a
[
j
+
1
]
as
32
-
bit
word
pair
...
...
@@ -266,6 +300,18 @@ $code=<<___;
lwz
$t5
,
0
(
$np
)
lwz
$t6
,
12
(
$np
)
;
load
n
[
j
+
1
]
as
32
-
bit
word
pair
lwz
$t7
,
8
(
$np
)
___
$code
.=<<
___
if
(
$SIZE_T
==
4
);
lwz
$t0
,
0
(
$ap
)
;
load
a
[
j
..
j
+
3
]
as
32
-
bit
word
pairs
lwz
$t1
,
4
(
$ap
)
lwz
$t2
,
8
(
$ap
)
lwz
$t3
,
12
(
$ap
)
lwz
$t4
,
0
(
$np
)
;
load
n
[
j
..
j
+
3
]
as
32
-
bit
word
pairs
lwz
$t5
,
4
(
$np
)
lwz
$t6
,
8
(
$np
)
lwz
$t7
,
12
(
$np
)
___
$code
.=
<<___;
lfd $ba,`$FRAME+0`($sp)
lfd $bb,`$FRAME+8`($sp)
lfd $bc,`$FRAME+16`($sp)
...
...
@@ -374,6 +420,8 @@ $code=<<___;
.align 5
L1st:
___
$code
.=<<
___
if
(
$SIZE_T
==
8
);
lwz
$t0
,
4
(
$ap
)
;
load
a
[
j
]
as
32
-
bit
word
pair
lwz
$t1
,
0
(
$ap
)
lwz
$t2
,
12
(
$ap
)
;
load
a
[
j
+
1
]
as
32
-
bit
word
pair
...
...
@@ -382,6 +430,18 @@ L1st:
lwz
$t5
,
0
(
$np
)
lwz
$t6
,
12
(
$np
)
;
load
n
[
j
+
1
]
as
32
-
bit
word
pair
lwz
$t7
,
8
(
$np
)
___
$code
.=<<
___
if
(
$SIZE_T
==
4
);
lwz
$t0
,
0
(
$ap
)
;
load
a
[
j
..
j
+
3
]
as
32
-
bit
word
pairs
lwz
$t1
,
4
(
$ap
)
lwz
$t2
,
8
(
$ap
)
lwz
$t3
,
12
(
$ap
)
lwz
$t4
,
0
(
$np
)
;
load
n
[
j
..
j
+
3
]
as
32
-
bit
word
pairs
lwz
$t5
,
4
(
$np
)
lwz
$t6
,
8
(
$np
)
lwz
$t7
,
12
(
$np
)
___
$code
.=
<<___;
std $t0,`$FRAME+64`($sp)
std $t1,`$FRAME+72`($sp)
std $t2,`$FRAME+80`($sp)
...
...
@@ -559,7 +619,17 @@ L1st:
li $i,8 ; i=1
.align 5
Louter:
___
$code
.=<<
___
if
(
$SIZE_T
==
8
);
ldx
$t3
,
$bp
,
$i
;
bp
[
i
]
___
$code
.=<<
___
if
(
$SIZE_T
==
4
);
add
$t0
,
$bp
,
$i
lwz
$t3
,
0
(
$t0
)
;
bp
[
i
,
i
+
1
]
lwz
$t0
,
4
(
$t0
)
insrdi
$t3
,
$t0
,
32
,
0
___
$code
.=
<<___;
ld $t6,`$FRAME+$TRANSFER+8`($sp) ; tp[0]
mulld $t7,$a0,$t3 ; ap[0]*bp[i]
...
...
@@ -761,6 +831,13 @@ Linner:
stfd $T0b,`$FRAME+8`($sp)
add $t7,$t7,$carry
addc $t3,$t0,$t1
___
$code
.=<<
___
if
(
$SIZE_T
==
4
);
# adjust XER[CA]
extrdi
$t0
,
$t0
,
32
,
0
extrdi
$t1
,
$t1
,
32
,
0
adde
$t0
,
$t0
,
$t1
___
$code
.=
<<___;
stfd $T1a,`$FRAME+16`($sp)
stfd $T1b,`$FRAME+24`($sp)
insrdi $t4,$t7,16,0 ; 64..127 bits
...
...
@@ -768,6 +845,13 @@ Linner:
stfd $T2a,`$FRAME+32`($sp)
stfd $T2b,`$FRAME+40`($sp)
adde $t5,$t4,$t2
___
$code
.=<<
___
if
(
$SIZE_T
==
4
);
# adjust XER[CA]
extrdi
$t4
,
$t4
,
32
,
0
extrdi
$t2
,
$t2
,
32
,
0
adde
$t4
,
$t4
,
$t2
___
$code
.=
<<___;
stfd $T3a,`$FRAME+48`($sp)
stfd $T3b,`$FRAME+56`($sp)
addze $carry,$carry
...
...
@@ -816,7 +900,21 @@ Linner:
ld $t7,`$FRAME+72`($sp)
addc $t3,$t0,$t1
___
$code
.=<<
___
if
(
$SIZE_T
==
4
);
# adjust XER[CA]
extrdi
$t0
,
$t0
,
32
,
0
extrdi
$t1
,
$t1
,
32
,
0
adde
$t0
,
$t0
,
$t1
___
$code
.=
<<___;
adde $t5,$t4,$t2
___
$code
.=<<
___
if
(
$SIZE_T
==
4
);
# adjust XER[CA]
extrdi
$t4
,
$t4
,
32
,
0
extrdi
$t2
,
$t2
,
32
,
0
adde
$t4
,
$t4
,
$t2
___
$code
.=
<<___;
addze $carry,$carry
std $t3,-16($tp) ; tp[j-1]
...
...
@@ -835,7 +933,9 @@ Linner:
subf $nap_d,$t7,$nap_d ; rewind pointer
cmpw $i,$num
blt- Louter
___
$code
.=<<
___
if
(
$SIZE_T
==
8
);
subf
$np
,
$num
,
$np
;
rewind
np
addi
$j
,
$j
,
1
;
restore
counter
subfc
$i
,
$i
,
$i
;
j
=
0
and
"
clear
"
XER
[
CA
]
...
...
@@ -883,7 +983,74 @@ Lcopy: ; copy or in-place refresh
stdx
$i
,
$t4
,
$i
addi
$i
,
$i
,
16
bdnz
-
Lcopy
___
$code
.=<<
___
if
(
$SIZE_T
==
4
);
subf
$np
,
$num
,
$np
;
rewind
np
addi
$j
,
$j
,
1
;
restore
counter
subfc
$i
,
$i
,
$i
;
j
=
0
and
"
clear
"
XER
[
CA
]
addi
$tp
,
$sp
,`
$FRAME
+
$TRANSFER
`
addi
$np
,
$np
,
-
4
addi
$rp
,
$rp
,
-
4
addi
$ap
,
$sp
,`
$FRAME
+
$TRANSFER
+4
`
mtctr
$j
.
align
4
Lsub:
ld
$t0
,
8
(
$tp
)
;
load
tp
[
j
..
j
+
3
]
in
64
-
bit
word
order
ldu
$t2
,
16
(
$tp
)
lwz
$t4
,
4
(
$np
)
;
load
np
[
j
..
j
+
3
]
in
32
-
bit
word
order
lwz
$t5
,
8
(
$np
)
lwz
$t6
,
12
(
$np
)
lwzu
$t7
,
16
(
$np
)
extrdi
$t1
,
$t0
,
32
,
0
extrdi
$t3
,
$t2
,
32
,
0
subfe
$t4
,
$t4
,
$t0
;
tp
[
j
]
-
np
[
j
]
stw
$t0
,
4
(
$ap
)
;
save
tp
[
j
..
j
+
3
]
in
32
-
bit
word
order
subfe
$t5
,
$t5
,
$t1
;
tp
[
j
+
1
]
-
np
[
j
+
1
]
stw
$t1
,
8
(
$ap
)
subfe
$t6
,
$t6
,
$t2
;
tp
[
j
+
2
]
-
np
[
j
+
2
]
stw
$t2
,
12
(
$ap
)
subfe
$t7
,
$t7
,
$t3
;
tp
[
j
+
3
]
-
np
[
j
+
3
]
stwu
$t3
,
16
(
$ap
)
stw
$t4
,
4
(
$rp
)
stw
$t5
,
8
(
$rp
)
stw
$t6
,
12
(
$rp
)
stwu
$t7
,
16
(
$rp
)
bdnz
-
Lsub
li
$i
,
0
subfe
$ovf
,
$i
,
$ovf
;
handle
upmost
overflow
bit
addi
$tp
,
$sp
,`
$FRAME
+
$TRANSFER
+4
`
subf
$rp
,
$num
,
$rp
;
rewind
rp
and
$ap
,
$tp
,
$ovf
andc
$np
,
$rp
,
$ovf
or
$ap
,
$ap
,
$np
;
ap
=
borrow
?
tp:rp
addi
$tp
,
$sp
,`
$FRAME
+
$TRANSFER
`
mtctr
$j
.
align
4
Lcopy:
;
copy
or
in
-
place
refresh
lwz
$t0
,
4
(
$ap
)
lwz
$t1
,
8
(
$ap
)
lwz
$t2
,
12
(
$ap
)
lwzu
$t3
,
16
(
$ap
)
std
$i
,
8
(
$nap_d
)
;
zap
nap_d
std
$i
,
16
(
$nap_d
)
std
$i
,
24
(
$nap_d
)
std
$i
,
32
(
$nap_d
)
std
$i
,
40
(
$nap_d
)
std
$i
,
48
(
$nap_d
)
std
$i
,
56
(
$nap_d
)
stdu
$i
,
64
(
$nap_d
)
stw
$t0
,
4
(
$rp
)
stw
$t1
,
8
(
$rp
)
stw
$t2
,
12
(
$rp
)
stwu
$t3
,
16
(
$rp
)
std
$i
,
8
(
$tp
)
;
zap
tp
at
once
stdu
$i
,
16
(
$tp
)
bdnz
-
Lcopy
___
$code
.=
<<___;
$POP r14,`2*$SIZE_T`($sp)
$POP r15,`3*$SIZE_T`($sp)
$POP r16,`4*$SIZE_T`($sp)
...
...
crypto/ppccap.c
0 → 100644
浏览文件 @
b4b48a10
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <setjmp.h>
#include <signal.h>
#include <openssl/bn.h>
#define PPC_FPU64 (1<<0)
static
int
OPENSSL_ppccap_P
=
0
;
static
sigset_t
all_masked
;
int
bn_mul_mont
(
BN_ULONG
*
rp
,
const
BN_ULONG
*
ap
,
const
BN_ULONG
*
bp
,
const
BN_ULONG
*
np
,
const
BN_ULONG
*
n0
,
int
num
)
{
int
bn_mul_mont_fpu64
(
BN_ULONG
*
rp
,
const
BN_ULONG
*
ap
,
const
BN_ULONG
*
bp
,
const
BN_ULONG
*
np
,
const
BN_ULONG
*
n0
,
int
num
);
int
bn_mul_mont_int
(
BN_ULONG
*
rp
,
const
BN_ULONG
*
ap
,
const
BN_ULONG
*
bp
,
const
BN_ULONG
*
np
,
const
BN_ULONG
*
n0
,
int
num
);
if
(
sizeof
(
size_t
)
==
4
)
{
#if (defined(__APPLE__) && defined(__MACH__))
if
((
OPENSSL_ppccap_P
&
PPC_FPU64
))
return
bn_mul_mont_fpu64
(
rp
,
ap
,
bp
,
np
,
n0
,
num
);
#else
/* boundary of 32 was experimentally determined on
Linux 2.6.22, might have to be adjusted on AIX... */
if
((
num
>=
32
)
&&
(
OPENSSL_ppccap_P
&
PPC_FPU64
))
{
sigset_t
oset
;
int
ret
;
sigprocmask
(
SIG_SETMASK
,
&
all_masked
,
&
oset
);
ret
=
bn_mul_mont_fpu64
(
rp
,
ap
,
bp
,
np
,
n0
,
num
);
sigprocmask
(
SIG_SETMASK
,
&
oset
,
NULL
);
return
ret
;
}
#endif
}
else
if
((
OPENSSL_ppccap_P
&
PPC_FPU64
))
/* this is a "must" on Power 6, but run-time detection
* is not implemented yet... */
return
bn_mul_mont_fpu64
(
rp
,
ap
,
bp
,
np
,
n0
,
num
);
return
bn_mul_mont_int
(
rp
,
ap
,
bp
,
np
,
n0
,
num
);
}
static
sigjmp_buf
ill_jmp
;
static
void
ill_handler
(
int
sig
)
{
siglongjmp
(
ill_jmp
,
sig
);
}
void
OPENSSL_cpuid_setup
(
void
)
{
char
*
e
;
sigfillset
(
&
all_masked
);
sigdelset
(
&
all_masked
,
SIGSEGV
);
sigdelset
(
&
all_masked
,
SIGILL
);
if
((
e
=
getenv
(
"OPENSSL_ppccap"
)))
{
OPENSSL_ppccap_P
=
strtoul
(
e
,
NULL
,
0
);
return
;
}
if
(
sizeof
(
size_t
)
==
4
)
{
struct
sigaction
ill_oact
,
ill_act
;
sigset_t
oset
;
memset
(
&
ill_act
,
0
,
sizeof
(
ill_act
));
ill_act
.
sa_handler
=
ill_handler
;
sigfillset
(
&
ill_act
.
sa_mask
);
sigdelset
(
&
ill_act
.
sa_mask
,
SIGILL
);
sigprocmask
(
SIG_SETMASK
,
&
ill_act
.
sa_mask
,
&
oset
);
sigaction
(
SIGILL
,
&
ill_act
,
&
ill_oact
);
if
(
sigsetjmp
(
ill_jmp
,
0
)
==
0
)
{
OPENSSL_ppc64_probe
();
OPENSSL_ppccap_P
|=
PPC_FPU64
;
}
else
{
OPENSSL_ppccap_P
&=
~
PPC_FPU64
;
}
sigaction
(
SIGILL
,
&
ill_oact
,
NULL
);
sigprocmask
(
SIG_SETMASK
,
&
oset
,
NULL
);
}
}
crypto/ppccpuid.pl
浏览文件 @
b4b48a10
...
...
@@ -23,9 +23,11 @@ $code=<<___;
.machine "any"
.text
.globl .OPENSSL_
cpuid_setup
.globl .OPENSSL_
ppc64_probe
.align 4
.OPENSSL_cpuid_setup:
.OPENSSL_ppc64_probe:
fcfid f1,f1
extrdi r0,r0,32,0
blr
.globl .OPENSSL_wipe_cpu
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录