Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
OpenHarmony
Third Party Openssl
提交
5e19ee96
T
Third Party Openssl
项目概览
OpenHarmony
/
Third Party Openssl
1 年多 前同步成功
通知
10
Star
18
Fork
1
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
T
Third Party Openssl
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
5e19ee96
编写于
4月 28, 2010
作者:
A
Andy Polyakov
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Add ghash-parisc.pl.
上级
8a1c92ce
变更
4
隐藏空白更改
内联
并排
Showing
4 changed file
with
770 addition
and
5 deletion
+770
-5
Configure
Configure
+3
-2
TABLE
TABLE
+34
-2
crypto/modes/Makefile
crypto/modes/Makefile
+3
-1
crypto/modes/asm/ghash-parisc.pl
crypto/modes/asm/ghash-parisc.pl
+730
-0
未找到文件。
Configure
浏览文件 @
5e19ee96
...
...
@@ -135,8 +135,8 @@ my $alpha_asm="alphacpuid.o:bn_asm.o alpha-mont.o:::::sha1-alpha.o:::::::ghash-a
my $mips3_asm=":bn-mips3.o:::::::::::::void";
my $s390x_asm="s390xcap.o s390xcpuid.o:bn-s390x.o s390x-mont.o::aes_ctr.o aes-s390x.o:::sha1-s390x.o sha256-s390x.o sha512-s390x.o::rc4-s390x.o::::::void";
my $armv4_asm=":bn_asm.o armv4-mont.o::aes_cbc.o aes_ctr.o aes-armv4.o:::sha1-armv4-large.o sha256-armv4.o sha512-armv4.o::::::::void";
my $parisc11_asm="pariscid.o:bn_asm.o parisc-mont.o::aes_core.o aes_cbc.o aes_ctr.o aes-parisc.o:::sha1-parisc.o sha256-parisc.o sha512-parisc.o::rc4-parisc.o::::::32";
my $parisc20_asm="pariscid.o:pa-risc2W.o parisc-mont.o::aes_core.o aes_cbc.o aes_ctr.o aes-parisc.o:::sha1-parisc.o sha256-parisc.o sha512-parisc.o::rc4-parisc.o::::::64";
my $parisc11_asm="pariscid.o:bn_asm.o parisc-mont.o::aes_core.o aes_cbc.o aes_ctr.o aes-parisc.o:::sha1-parisc.o sha256-parisc.o sha512-parisc.o::rc4-parisc.o:::::
ghash-parisc.o
:32";
my $parisc20_asm="pariscid.o:pa-risc2W.o parisc-mont.o::aes_core.o aes_cbc.o aes_ctr.o aes-parisc.o:::sha1-parisc.o sha256-parisc.o sha512-parisc.o::rc4-parisc.o:::::
ghash-parisc.o
:64";
my $ppc32_asm="ppccpuid.o ppccap.o:bn-ppc.o ppc-mont.o ppc64-mont.o::aes_core.o aes_cbc.o aes_ctr.o aes-ppc.o:::sha1-ppc.o sha256-ppc.o:::::::";
my $ppc64_asm="ppccpuid.o ppccap.o:bn-ppc.o ppc-mont.o ppc64-mont.o::aes_core.o aes_cbc.o aes_ctr.o aes-ppc.o:::sha1-ppc.o sha256-ppc.o sha512-ppc.o:::::::";
my $no_asm="::::::::::::::void";
...
...
@@ -292,6 +292,7 @@ my %table=(
# Since there is mention of this in shlib/hpux10-cc.sh
"hpux-parisc-cc-o4","cc:-Ae +O4 +ESlit -z -DB_ENDIAN -DBN_DIV2W -DMD32_XARRAY::-D_REENTRANT::-ldld:BN_LLONG DES_PTR DES_UNROLL DES_RISC1:${no_asm}:dl:hpux-shared:+Z:-b:.sl.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
"hpux-parisc-gcc","gcc:-O3 -DB_ENDIAN -DBN_DIV2W::-D_REENTRANT::-Wl,+s -ldld:BN_LLONG DES_PTR DES_UNROLL DES_RISC1:${no_asm}:dl:hpux-shared:-fPIC:-shared:.sl.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
"hpux-parisc1_1-gcc","gcc:-O3 -DB_ENDIAN -DBN_DIV2W::-D_REENTRANT::-Wl,+s -ldld:BN_LLONG DES_PTR DES_UNROLL DES_RISC1:${parisc11_asm}:dl:hpux-shared:-fPIC:-shared:.sl.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
"hpux-parisc2-gcc","gcc:-march=2.0 -O3 -DB_ENDIAN -D_REENTRANT::::-Wl,+s -ldld:SIXTY_FOUR_BIT RC4_CHAR RC4_CHUNK DES_PTR DES_UNROLL DES_RISC1::pa-risc2.o:::::::::::::void:dl:hpux-shared:-fPIC:-shared:.sl.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
"hpux64-parisc2-gcc","gcc:-O3 -DB_ENDIAN -D_REENTRANT::::-ldl:SIXTY_FOUR_BIT_LONG MD2_CHAR RC4_INDEX RC4_CHAR DES_UNROLL DES_RISC1 DES_INT::pa-risc2W.o:::::::::::::void:dlfcn:hpux-shared:-fpic:-shared:.sl.\$(SHLIB_MAJOR).\$(SHLIB_MINOR):::/pa20_64",
...
...
TABLE
浏览文件 @
5e19ee96
...
...
@@ -2933,7 +2933,7 @@ $rmd160_obj =
$rc5_obj =
$wp_obj =
$cmll_obj =
$modes_obj =
$modes_obj =
ghash-parisc.o
$perlasm_scheme = 32
$dso_scheme = dl
$shared_target= hpux-shared
...
...
@@ -2944,6 +2944,38 @@ $ranlib =
$arflags =
$multilib = /pa1.1
*** hpux-parisc1_1-gcc
$cc = gcc
$cflags = -O3 -DB_ENDIAN -DBN_DIV2W
$unistd =
$thread_cflag = -D_REENTRANT
$sys_id =
$lflags = -Wl,+s -ldld
$bn_ops = BN_LLONG DES_PTR DES_UNROLL DES_RISC1
$cpuid_obj = pariscid.o
$bn_obj = bn_asm.o parisc-mont.o
$des_obj =
$aes_obj = aes_core.o aes_cbc.o aes_ctr.o aes-parisc.o
$bf_obj =
$md5_obj =
$sha1_obj = sha1-parisc.o sha256-parisc.o sha512-parisc.o
$cast_obj =
$rc4_obj = rc4-parisc.o
$rmd160_obj =
$rc5_obj =
$wp_obj =
$cmll_obj =
$modes_obj = ghash-parisc.o
$perlasm_scheme = 32
$dso_scheme = dl
$shared_target= hpux-shared
$shared_cflag = -fPIC
$shared_ldflag = -shared
$shared_extension = .sl.$(SHLIB_MAJOR).$(SHLIB_MINOR)
$ranlib =
$arflags =
$multilib =
*** hpux-parisc2-cc
$cc = cc
$cflags = +DA2.0 +DS2.0 +O3 +Optrs_strongly_typed -Ae +ESlit -DB_ENDIAN -DMD32_XARRAY -D_REENTRANT
...
...
@@ -3093,7 +3125,7 @@ $rmd160_obj =
$rc5_obj =
$wp_obj =
$cmll_obj =
$modes_obj =
$modes_obj =
ghash-parisc.o
$perlasm_scheme = 64
$dso_scheme = dlfcn
$shared_target= hpux-shared
...
...
crypto/modes/Makefile
浏览文件 @
5e19ee96
...
...
@@ -50,9 +50,11 @@ ghash-x86.s: asm/ghash-x86.pl
ghash-x86_64.s
:
asm/ghash-x86_64.pl
$(PERL)
asm/ghash-x86_64.pl
$(PERLASM_SCHEME)
>
$@
ghash-sparcv9.s
:
asm/ghash-sparcv9.pl
$(PERL)
asm/ghash-sparcv
8
.pl
$(CFLAGS)
>
$@
$(PERL)
asm/ghash-sparcv
9
.pl
$(CFLAGS)
>
$@
ghash-alpha.s
:
asm/ghash-alpha.pl
$(PERL)
$<
|
$(CC)
-E
- |
tee
$@
>
/dev/null
ghash-parisc.s
:
asm/ghash-parisc.pl
$
(
$PERL
)
asm/ghash-parisc.pl
$(PERLASM_SCHEME)
$@
# GNU make "catch all"
ghash-%.s
:
asm/ghash-%.pl; $(PERL) $< $(PERLASM_SCHEME) $(CFLAGS) > $@
...
...
crypto/modes/asm/ghash-parisc.pl
0 → 100644
浏览文件 @
5e19ee96
#!/usr/bin/env perl
#
# ====================================================================
# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
# project. The module is, however, dual licensed under OpenSSL and
# CRYPTOGAMS licenses depending on where you obtain it. For further
# details see http://www.openssl.org/~appro/cryptogams/.
# ====================================================================
#
# April 2010
#
# The module implements "4-bit" GCM GHASH function and underlying
# single multiplication operation in GF(2^128). "4-bit" means that it
# uses 256 bytes per-key table [+128 bytes shared table]. On PA-7100LC
# it processes one byte in 19 cycles, which is more than twice as fast
# as code generated by gcc 3.2. PA-RISC 2.0 loop is scheduled for 8
# cycles, but measured performance on PA-8600 system is ~9 cycles per
# processed byte. This is ~2.2x faster than 64-bit code generated by
# vendor compiler (which used to be very hard to beat:-).
#
# Special thanks to polarhome.com for providing HP-UX account.
$flavour
=
shift
;
$output
=
shift
;
open
STDOUT
,"
>
$output
";
if
(
$flavour
=~
/64/
)
{
$LEVEL
=
"
2.0W
";
$SIZE_T
=
8
;
$FRAME_MARKER
=
80
;
$SAVED_RP
=
16
;
$PUSH
=
"
std
";
$PUSHMA
=
"
std,ma
";
$POP
=
"
ldd
";
$POPMB
=
"
ldd,mb
";
$NREGS
=
6
;
}
else
{
$LEVEL
=
"
1.0
";
#"\n\t.ALLOW\t2.0";
$SIZE_T
=
4
;
$FRAME_MARKER
=
48
;
$SAVED_RP
=
20
;
$PUSH
=
"
stw
";
$PUSHMA
=
"
stwm
";
$POP
=
"
ldw
";
$POPMB
=
"
ldwm
";
$NREGS
=
11
;
}
$FRAME
=
10
*$SIZE_T
+
$FRAME_MARKER
;
# NREGS saved regs + frame marker
# [+ argument transfer]
################# volatile registers
$Xi
=
"
%r26
";
# argument block
$Htbl
=
"
%r25
";
$inp
=
"
%r24
";
$len
=
"
%r23
";
$Hhh
=
$Htbl
;
# variables
$Hll
=
"
%r22
";
$Zhh
=
"
%r21
";
$Zll
=
"
%r20
";
$cnt
=
"
%r19
";
$rem_4bit
=
"
%r28
";
$rem
=
"
%r29
";
$mask0xf0
=
"
%r31
";
################# preserved registers
$Thh
=
"
%r1
";
$Tll
=
"
%r2
";
$nlo
=
"
%r3
";
$nhi
=
"
%r4
";
$byte
=
"
%r5
";
if
(
$SIZE_T
==
4
)
{
$Zhl
=
"
%r6
";
$Zlh
=
"
%r7
";
$Hhl
=
"
%r8
";
$Hlh
=
"
%r9
";
$Thl
=
"
%r10
";
$Tlh
=
"
%r11
";
}
$rem2
=
"
%r6
";
# used in PA-RISC 2.0 code
$code
.=
<<___;
.LEVEL $LEVEL
.SPACE \$TEXT\$
.SUBSPA \$CODE\$,QUAD=0,ALIGN=8,ACCESS=0x2C,CODE_ONLY
.EXPORT gcm_gmult_4bit,ENTRY,ARGW0=GR,ARGW1=GR
.ALIGN 64
gcm_gmult_4bit
.PROC
.CALLINFO FRAME=`$FRAME-10*$SIZE_T`,NO_CALLS,SAVE_RP,ENTRY_GR=$NREGS
.ENTRY
$PUSH %r2,-$SAVED_RP(%sp) ; standard prologue
$PUSHMA %r3,$FRAME(%sp)
$PUSH %r4,`-$FRAME+1*$SIZE_T`(%sp)
$PUSH %r5,`-$FRAME+2*$SIZE_T`(%sp)
$PUSH %r6,`-$FRAME+3*$SIZE_T`(%sp)
___
$code
.=<<
___
if
(
$SIZE_T
==
4
);
$PUSH
%r7
,`
-
$FRAME
+4*
$SIZE_T
`(
%sp
)
$PUSH
%r8
,`
-
$FRAME
+5*
$SIZE_T
`(
%sp
)
$PUSH
%r9
,`
-
$FRAME
+6*
$SIZE_T
`(
%sp
)
$PUSH
%r10
,`
-
$FRAME
+7*
$SIZE_T
`(
%sp
)
$PUSH
%r11
,`
-
$FRAME
+8*
$SIZE_T
`(
%sp
)
___
$code
.=
<<___;
blr %r0,$rem_4bit
ldi 3,$rem
L\$pic_gmult
andcm $rem_4bit,$rem,$rem_4bit
addl $inp,$len,$len
ldo L\$rem_4bit-L\$pic_gmult($rem_4bit),$rem_4bit
ldi 0xf0,$mask0xf0
___
$code
.=<<
___
if
(
$SIZE_T
==
4
);
ldi
31
,
$rem
mtctl
$rem
,
%cr11
extrd
,
u
,
*=
$rem
,
%sar
,
1
,
$rem
;
executes
on
PA
-
RISC
1.0
b
L
\
$parisc1_gmult
nop
___
$code
.=
<<___;
ldb 15($Xi),$nlo
ldo 8($Htbl),$Hll
and $mask0xf0,$nlo,$nhi
depd,z $nlo,59,4,$nlo
ldd $nlo($Hll),$Zll
ldd $nlo($Hhh),$Zhh
depd,z $Zll,60,4,$rem
shrpd $Zhh,$Zll,4,$Zll
extrd,u $Zhh,59,60,$Zhh
ldb 14($Xi),$nlo
ldd $nhi($Hll),$Tll
ldd $nhi($Hhh),$Thh
and $mask0xf0,$nlo,$nhi
depd,z $nlo,59,4,$nlo
xor $Tll,$Zll,$Zll
xor $Thh,$Zhh,$Zhh
ldd $rem($rem_4bit),$rem
b L\$oop_gmult_pa2
ldi 13,$cnt
.ALIGN 8
L\$oop_gmult_pa2
xor $rem,$Zhh,$Zhh ; moved here to work around gas bug
depd,z $Zll,60,4,$rem
shrpd $Zhh,$Zll,4,$Zll
extrd,u $Zhh,59,60,$Zhh
ldd $nlo($Hll),$Tll
ldd $nlo($Hhh),$Thh
xor $Tll,$Zll,$Zll
xor $Thh,$Zhh,$Zhh
ldd $rem($rem_4bit),$rem
xor $rem,$Zhh,$Zhh
depd,z $Zll,60,4,$rem
ldbx $cnt($Xi),$nlo
shrpd $Zhh,$Zll,4,$Zll
extrd,u $Zhh,59,60,$Zhh
ldd $nhi($Hll),$Tll
ldd $nhi($Hhh),$Thh
and $mask0xf0,$nlo,$nhi
depd,z $nlo,59,4,$nlo
ldd $rem($rem_4bit),$rem
xor $Tll,$Zll,$Zll
addib,uv -1,$cnt,L\$oop_gmult_pa2
xor $Thh,$Zhh,$Zhh
xor $rem,$Zhh,$Zhh
depd,z $Zll,60,4,$rem
shrpd $Zhh,$Zll,4,$Zll
extrd,u $Zhh,59,60,$Zhh
ldd $nlo($Hll),$Tll
ldd $nlo($Hhh),$Thh
xor $Tll,$Zll,$Zll
xor $Thh,$Zhh,$Zhh
ldd $rem($rem_4bit),$rem
xor $rem,$Zhh,$Zhh
depd,z $Zll,60,4,$rem
shrpd $Zhh,$Zll,4,$Zll
extrd,u $Zhh,59,60,$Zhh
ldd $nhi($Hll),$Tll
ldd $nhi($Hhh),$Thh
xor $Tll,$Zll,$Zll
xor $Thh,$Zhh,$Zhh
ldd $rem($rem_4bit),$rem
xor $rem,$Zhh,$Zhh
std $Zll,8($Xi)
std $Zhh,0($Xi)
___
$code
.=<<
___
if
(
$SIZE_T
==
4
);
b
L
\
$done_gmult
nop
L
\
$parisc1_gmult
ldb
15
(
$Xi
),
$nlo
ldo
12
(
$Htbl
),
$Hll
ldo
8
(
$Htbl
),
$Hlh
ldo
4
(
$Htbl
),
$Hhl
and
$mask0xf0
,
$nlo
,
$nhi
zdep
$nlo
,
27
,
4
,
$nlo
ldwx
$nlo
(
$Hll
),
$Zll
ldwx
$nlo
(
$Hlh
),
$Zlh
ldwx
$nlo
(
$Hhl
),
$Zhl
ldwx
$nlo
(
$Hhh
),
$Zhh
zdep
$Zll
,
28
,
4
,
$rem
ldb
14
(
$Xi
),
$nlo
ldwx
$rem
(
$rem_4bit
),
$rem
shrpw
$Zlh
,
$Zll
,
4
,
$Zll
ldwx
$nhi
(
$Hll
),
$Tll
shrpw
$Zhl
,
$Zlh
,
4
,
$Zlh
ldwx
$nhi
(
$Hlh
),
$Tlh
shrpw
$Zhh
,
$Zhl
,
4
,
$Zhl
ldwx
$nhi
(
$Hhl
),
$Thl
extru
$Zhh
,
27
,
28
,
$Zhh
ldwx
$nhi
(
$Hhh
),
$Thh
xor
$rem
,
$Zhh
,
$Zhh
and
$mask0xf0
,
$nlo
,
$nhi
zdep
$nlo
,
27
,
4
,
$nlo
xor
$Tll
,
$Zll
,
$Zll
ldwx
$nlo
(
$Hll
),
$Tll
xor
$Tlh
,
$Zlh
,
$Zlh
ldwx
$nlo
(
$Hlh
),
$Tlh
xor
$Thl
,
$Zhl
,
$Zhl
b
L
\
$oop_gmult_pa1
ldi
13
,
$cnt
.
ALIGN
8
L
\
$oop_gmult_pa1
zdep
$Zll
,
28
,
4
,
$rem
ldwx
$nlo
(
$Hhl
),
$Thl
xor
$Thh
,
$Zhh
,
$Zhh
ldwx
$rem
(
$rem_4bit
),
$rem
shrpw
$Zlh
,
$Zll
,
4
,
$Zll
ldwx
$nlo
(
$Hhh
),
$Thh
shrpw
$Zhl
,
$Zlh
,
4
,
$Zlh
ldbx
$cnt
(
$Xi
),
$nlo
xor
$Tll
,
$Zll
,
$Zll
ldwx
$nhi
(
$Hll
),
$Tll
shrpw
$Zhh
,
$Zhl
,
4
,
$Zhl
xor
$Tlh
,
$Zlh
,
$Zlh
ldwx
$nhi
(
$Hlh
),
$Tlh
extru
$Zhh
,
27
,
28
,
$Zhh
xor
$Thl
,
$Zhl
,
$Zhl
ldwx
$nhi
(
$Hhl
),
$Thl
xor
$rem
,
$Zhh
,
$Zhh
zdep
$Zll
,
28
,
4
,
$rem
xor
$Thh
,
$Zhh
,
$Zhh
ldwx
$nhi
(
$Hhh
),
$Thh
shrpw
$Zlh
,
$Zll
,
4
,
$Zll
ldwx
$rem
(
$rem_4bit
),
$rem
shrpw
$Zhl
,
$Zlh
,
4
,
$Zlh
shrpw
$Zhh
,
$Zhl
,
4
,
$Zhl
and
$mask0xf0
,
$nlo
,
$nhi
extru
$Zhh
,
27
,
28
,
$Zhh
zdep
$nlo
,
27
,
4
,
$nlo
xor
$Tll
,
$Zll
,
$Zll
ldwx
$nlo
(
$Hll
),
$Tll
xor
$Tlh
,
$Zlh
,
$Zlh
ldwx
$nlo
(
$Hlh
),
$Tlh
xor
$rem
,
$Zhh
,
$Zhh
addib
,
uv
-
1
,
$cnt
,
L
\
$oop_gmult_pa1
xor
$Thl
,
$Zhl
,
$Zhl
zdep
$Zll
,
28
,
4
,
$rem
ldwx
$nlo
(
$Hhl
),
$Thl
xor
$Thh
,
$Zhh
,
$Zhh
ldwx
$rem
(
$rem_4bit
),
$rem
shrpw
$Zlh
,
$Zll
,
4
,
$Zll
ldwx
$nlo
(
$Hhh
),
$Thh
shrpw
$Zhl
,
$Zlh
,
4
,
$Zlh
xor
$Tll
,
$Zll
,
$Zll
ldwx
$nhi
(
$Hll
),
$Tll
shrpw
$Zhh
,
$Zhl
,
4
,
$Zhl
xor
$Tlh
,
$Zlh
,
$Zlh
ldwx
$nhi
(
$Hlh
),
$Tlh
extru
$Zhh
,
27
,
28
,
$Zhh
xor
$rem
,
$Zhh
,
$Zhh
xor
$Thl
,
$Zhl
,
$Zhl
ldwx
$nhi
(
$Hhl
),
$Thl
xor
$Thh
,
$Zhh
,
$Zhh
ldwx
$nhi
(
$Hhh
),
$Thh
zdep
$Zll
,
28
,
4
,
$rem
ldwx
$rem
(
$rem_4bit
),
$rem
shrpw
$Zlh
,
$Zll
,
4
,
$Zll
shrpw
$Zhl
,
$Zlh
,
4
,
$Zlh
shrpw
$Zhh
,
$Zhl
,
4
,
$Zhl
extru
$Zhh
,
27
,
28
,
$Zhh
xor
$Tll
,
$Zll
,
$Zll
xor
$Tlh
,
$Zlh
,
$Zlh
xor
$rem
,
$Zhh
,
$Zhh
stw
$Zll
,
12
(
$Xi
)
xor
$Thl
,
$Zhl
,
$Zhl
stw
$Zlh
,
8
(
$Xi
)
xor
$Thh
,
$Zhh
,
$Zhh
stw
$Zhl
,
4
(
$Xi
)
stw
$Zhh
,
0
(
$Xi
)
___
$code
.=
<<___;
L\$done_gmult
$POP `-$FRAME-$SAVED_RP`(%sp),%r2 ; standard epilogue
$POP `-$FRAME+1*$SIZE_T`(%sp),%r4
$POP `-$FRAME+2*$SIZE_T`(%sp),%r5
$POP `-$FRAME+3*$SIZE_T`(%sp),%r6
___
$code
.=<<
___
if
(
$SIZE_T
==
4
);
$POP
`
-
$FRAME
+4*
$SIZE_T
`(
%sp
),
%r7
$POP
`
-
$FRAME
+5*
$SIZE_T
`(
%sp
),
%r8
$POP
`
-
$FRAME
+6*
$SIZE_T
`(
%sp
),
%r9
$POP
`
-
$FRAME
+7*
$SIZE_T
`(
%sp
),
%r10
$POP
`
-
$FRAME
+8*
$SIZE_T
`(
%sp
),
%r11
___
$code
.=
<<___;
bv (%r2)
.EXIT
$POPMB -$FRAME(%sp),%r3
.PROCEND
.EXPORT gcm_ghash_4bit,ENTRY,ARGW0=GR,ARGW1=GR,ARGW2=GR,ARGW3=GR
.ALIGN 64
gcm_ghash_4bit
.PROC
.CALLINFO FRAME=`$FRAME-10*$SIZE_T`,NO_CALLS,SAVE_RP,ENTRY_GR=11
.ENTRY
$PUSH %r2,-$SAVED_RP(%sp) ; standard prologue
$PUSHMA %r3,$FRAME(%sp)
$PUSH %r4,`-$FRAME+1*$SIZE_T`(%sp)
$PUSH %r5,`-$FRAME+2*$SIZE_T`(%sp)
$PUSH %r6,`-$FRAME+3*$SIZE_T`(%sp)
___
$code
.=<<
___
if
(
$SIZE_T
==
4
);
$PUSH
%r7
,`
-
$FRAME
+4*
$SIZE_T
`(
%sp
)
$PUSH
%r8
,`
-
$FRAME
+5*
$SIZE_T
`(
%sp
)
$PUSH
%r9
,`
-
$FRAME
+6*
$SIZE_T
`(
%sp
)
$PUSH
%r10
,`
-
$FRAME
+7*
$SIZE_T
`(
%sp
)
$PUSH
%r11
,`
-
$FRAME
+8*
$SIZE_T
`(
%sp
)
___
$code
.=
<<___;
blr %r0,$rem_4bit
ldi 3,$rem
L\$pic_ghash
andcm $rem_4bit,$rem,$rem_4bit
addl $inp,$len,$len
ldo L\$rem_4bit-L\$pic_ghash($rem_4bit),$rem_4bit
ldi 0xf0,$mask0xf0
___
$code
.=<<
___
if
(
$SIZE_T
==
4
);
ldi
31
,
$rem
mtctl
$rem
,
%cr11
extrd
,
u
,
*=
$rem
,
%sar
,
1
,
$rem
;
executes
on
PA
-
RISC
1.0
b
L
\
$parisc1_ghash
nop
___
$code
.=
<<___;
ldb 15($Xi),$nlo
ldo 8($Htbl),$Hll
L\$outer_ghash_pa2
ldb 15($inp),$nhi
xor $nhi,$nlo,$nlo
and $mask0xf0,$nlo,$nhi
depd,z $nlo,59,4,$nlo
ldd $nlo($Hll),$Zll
ldd $nlo($Hhh),$Zhh
depd,z $Zll,60,4,$rem
shrpd $Zhh,$Zll,4,$Zll
extrd,u $Zhh,59,60,$Zhh
ldb 14($Xi),$nlo
ldb 14($inp),$byte
ldd $nhi($Hll),$Tll
ldd $nhi($Hhh),$Thh
xor $byte,$nlo,$nlo
and $mask0xf0,$nlo,$nhi
depd,z $nlo,59,4,$nlo
xor $Tll,$Zll,$Zll
xor $Thh,$Zhh,$Zhh
ldd $rem($rem_4bit),$rem
b L\$oop_ghash_pa2
ldi 13,$cnt
.ALIGN 8
L\$oop_ghash_pa2
xor $rem,$Zhh,$Zhh ; moved here to work around gas bug
depd,z $Zll,60,4,$rem2
shrpd $Zhh,$Zll,4,$Zll
extrd,u $Zhh,59,60,$Zhh
ldd $nlo($Hll),$Tll
ldd $nlo($Hhh),$Thh
xor $Tll,$Zll,$Zll
xor $Thh,$Zhh,$Zhh
ldbx $cnt($Xi),$nlo
ldbx $cnt($inp),$byte
depd,z $Zll,60,4,$rem
shrpd $Zhh,$Zll,4,$Zll
ldd $rem2($rem_4bit),$rem2
xor $rem2,$Zhh,$Zhh
xor $byte,$nlo,$nlo
ldd $nhi($Hll),$Tll
ldd $nhi($Hhh),$Thh
and $mask0xf0,$nlo,$nhi
depd,z $nlo,59,4,$nlo
extrd,u $Zhh,59,60,$Zhh
xor $Tll,$Zll,$Zll
ldd $rem($rem_4bit),$rem
addib,uv -1,$cnt,L\$oop_ghash_pa2
xor $Thh,$Zhh,$Zhh
xor $rem,$Zhh,$Zhh
depd,z $Zll,60,4,$rem2
shrpd $Zhh,$Zll,4,$Zll
extrd,u $Zhh,59,60,$Zhh
ldd $nlo($Hll),$Tll
ldd $nlo($Hhh),$Thh
xor $Tll,$Zll,$Zll
xor $Thh,$Zhh,$Zhh
depd,z $Zll,60,4,$rem
shrpd $Zhh,$Zll,4,$Zll
ldd $rem2($rem_4bit),$rem2
xor $rem2,$Zhh,$Zhh
ldd $nhi($Hll),$Tll
ldd $nhi($Hhh),$Thh
extrd,u $Zhh,59,60,$Zhh
xor $Tll,$Zll,$Zll
xor $Thh,$Zhh,$Zhh
ldd $rem($rem_4bit),$rem
xor $rem,$Zhh,$Zhh
std $Zll,8($Xi)
ldo 16($inp),$inp
std $Zhh,0($Xi)
cmpb,*<> $inp,$len,L\$outer_ghash_pa2
copy $Zll,$nlo
___
$code
.=<<
___
if
(
$SIZE_T
==
4
);
b
L
\
$done_ghash
nop
L
\
$parisc1_ghash
ldb
15
(
$Xi
),
$nlo
ldo
12
(
$Htbl
),
$Hll
ldo
8
(
$Htbl
),
$Hlh
ldo
4
(
$Htbl
),
$Hhl
L
\
$outer_ghash_pa1
ldb
15
(
$inp
),
$byte
xor
$byte
,
$nlo
,
$nlo
and
$mask0xf0
,
$nlo
,
$nhi
zdep
$nlo
,
27
,
4
,
$nlo
ldwx
$nlo
(
$Hll
),
$Zll
ldwx
$nlo
(
$Hlh
),
$Zlh
ldwx
$nlo
(
$Hhl
),
$Zhl
ldwx
$nlo
(
$Hhh
),
$Zhh
zdep
$Zll
,
28
,
4
,
$rem
ldb
14
(
$Xi
),
$nlo
ldb
14
(
$inp
),
$byte
ldwx
$rem
(
$rem_4bit
),
$rem
shrpw
$Zlh
,
$Zll
,
4
,
$Zll
ldwx
$nhi
(
$Hll
),
$Tll
shrpw
$Zhl
,
$Zlh
,
4
,
$Zlh
ldwx
$nhi
(
$Hlh
),
$Tlh
shrpw
$Zhh
,
$Zhl
,
4
,
$Zhl
ldwx
$nhi
(
$Hhl
),
$Thl
extru
$Zhh
,
27
,
28
,
$Zhh
ldwx
$nhi
(
$Hhh
),
$Thh
xor
$byte
,
$nlo
,
$nlo
xor
$rem
,
$Zhh
,
$Zhh
and
$mask0xf0
,
$nlo
,
$nhi
zdep
$nlo
,
27
,
4
,
$nlo
xor
$Tll
,
$Zll
,
$Zll
ldwx
$nlo
(
$Hll
),
$Tll
xor
$Tlh
,
$Zlh
,
$Zlh
ldwx
$nlo
(
$Hlh
),
$Tlh
xor
$Thl
,
$Zhl
,
$Zhl
b
L
\
$oop_ghash_pa1
ldi
13
,
$cnt
.
ALIGN
8
L
\
$oop_ghash_pa1
zdep
$Zll
,
28
,
4
,
$rem
ldwx
$nlo
(
$Hhl
),
$Thl
xor
$Thh
,
$Zhh
,
$Zhh
ldwx
$rem
(
$rem_4bit
),
$rem
shrpw
$Zlh
,
$Zll
,
4
,
$Zll
ldwx
$nlo
(
$Hhh
),
$Thh
shrpw
$Zhl
,
$Zlh
,
4
,
$Zlh
ldbx
$cnt
(
$Xi
),
$nlo
xor
$Tll
,
$Zll
,
$Zll
ldwx
$nhi
(
$Hll
),
$Tll
shrpw
$Zhh
,
$Zhl
,
4
,
$Zhl
ldbx
$cnt
(
$inp
),
$byte
xor
$Tlh
,
$Zlh
,
$Zlh
ldwx
$nhi
(
$Hlh
),
$Tlh
extru
$Zhh
,
27
,
28
,
$Zhh
xor
$Thl
,
$Zhl
,
$Zhl
ldwx
$nhi
(
$Hhl
),
$Thl
xor
$rem
,
$Zhh
,
$Zhh
zdep
$Zll
,
28
,
4
,
$rem
xor
$Thh
,
$Zhh
,
$Zhh
ldwx
$nhi
(
$Hhh
),
$Thh
shrpw
$Zlh
,
$Zll
,
4
,
$Zll
ldwx
$rem
(
$rem_4bit
),
$rem
shrpw
$Zhl
,
$Zlh
,
4
,
$Zlh
xor
$byte
,
$nlo
,
$nlo
shrpw
$Zhh
,
$Zhl
,
4
,
$Zhl
and
$mask0xf0
,
$nlo
,
$nhi
extru
$Zhh
,
27
,
28
,
$Zhh
zdep
$nlo
,
27
,
4
,
$nlo
xor
$Tll
,
$Zll
,
$Zll
ldwx
$nlo
(
$Hll
),
$Tll
xor
$Tlh
,
$Zlh
,
$Zlh
ldwx
$nlo
(
$Hlh
),
$Tlh
xor
$rem
,
$Zhh
,
$Zhh
addib
,
uv
-
1
,
$cnt
,
L
\
$oop_ghash_pa1
xor
$Thl
,
$Zhl
,
$Zhl
zdep
$Zll
,
28
,
4
,
$rem
ldwx
$nlo
(
$Hhl
),
$Thl
xor
$Thh
,
$Zhh
,
$Zhh
ldwx
$rem
(
$rem_4bit
),
$rem
shrpw
$Zlh
,
$Zll
,
4
,
$Zll
ldwx
$nlo
(
$Hhh
),
$Thh
shrpw
$Zhl
,
$Zlh
,
4
,
$Zlh
xor
$Tll
,
$Zll
,
$Zll
ldwx
$nhi
(
$Hll
),
$Tll
shrpw
$Zhh
,
$Zhl
,
4
,
$Zhl
xor
$Tlh
,
$Zlh
,
$Zlh
ldwx
$nhi
(
$Hlh
),
$Tlh
extru
$Zhh
,
27
,
28
,
$Zhh
xor
$rem
,
$Zhh
,
$Zhh
xor
$Thl
,
$Zhl
,
$Zhl
ldwx
$nhi
(
$Hhl
),
$Thl
xor
$Thh
,
$Zhh
,
$Zhh
ldwx
$nhi
(
$Hhh
),
$Thh
zdep
$Zll
,
28
,
4
,
$rem
ldwx
$rem
(
$rem_4bit
),
$rem
shrpw
$Zlh
,
$Zll
,
4
,
$Zll
shrpw
$Zhl
,
$Zlh
,
4
,
$Zlh
shrpw
$Zhh
,
$Zhl
,
4
,
$Zhl
extru
$Zhh
,
27
,
28
,
$Zhh
xor
$Tll
,
$Zll
,
$Zll
xor
$Tlh
,
$Zlh
,
$Zlh
xor
$rem
,
$Zhh
,
$Zhh
stw
$Zll
,
12
(
$Xi
)
xor
$Thl
,
$Zhl
,
$Zhl
stw
$Zlh
,
8
(
$Xi
)
xor
$Thh
,
$Zhh
,
$Zhh
stw
$Zhl
,
4
(
$Xi
)
ldo
16
(
$inp
),
$inp
stw
$Zhh
,
0
(
$Xi
)
comb
,
<>
$inp
,
$len
,
L
\
$outer_ghash_pa1
copy
$Zll
,
$nlo
___
$code
.=
<<___;
L\$done_ghash
$POP `-$FRAME-$SAVED_RP`(%sp),%r2 ; standard epilogue
$POP `-$FRAME+1*$SIZE_T`(%sp),%r4
$POP `-$FRAME+2*$SIZE_T`(%sp),%r5
$POP `-$FRAME+3*$SIZE_T`(%sp),%r6
___
$code
.=<<
___
if
(
$SIZE_T
==
4
);
$POP
`
-
$FRAME
+4*
$SIZE_T
`(
%sp
),
%r7
$POP
`
-
$FRAME
+5*
$SIZE_T
`(
%sp
),
%r8
$POP
`
-
$FRAME
+6*
$SIZE_T
`(
%sp
),
%r9
$POP
`
-
$FRAME
+7*
$SIZE_T
`(
%sp
),
%r10
$POP
`
-
$FRAME
+8*
$SIZE_T
`(
%sp
),
%r11
___
$code
.=
<<___;
bv (%r2)
.EXIT
$POPMB -$FRAME(%sp),%r3
.PROCEND
.ALIGN 64
L\$rem_4bit
.WORD `0x0000<<16`,0,`0x1C20<<16`,0,`0x3840<<16`,0,`0x2460<<16`,0
.WORD `0x7080<<16`,0,`0x6CA0<<16`,0,`0x48C0<<16`,0,`0x54E0<<16`,0
.WORD `0xE100<<16`,0,`0xFD20<<16`,0,`0xD940<<16`,0,`0xC560<<16`,0
.WORD `0x9180<<16`,0,`0x8DA0<<16`,0,`0xA9C0<<16`,0,`0xB5E0<<16`,0
.STRINGZ "GHASH for PA-RISC, GRYPTOGAMS by <appro\@openssl.org>"
.ALIGN 64
___
# Explicitly encode PA-RISC 2.0 instructions used in this module, so
# that it can be compiled with .LEVEL 1.0. It should be noted that I
# wouldn't have to do this, if GNU assembler understood .ALLOW 2.0
# directive...
my
$ldd
=
sub
{
my
(
$mod
,
$args
)
=
@_
;
my
$orig
=
"
ldd
$mod
\t
$args
";
if
(
$args
=~
/%r([0-9]+)\(%r([0-9]+)\),%r([0-9]+)/
)
# format 4
{
my
$opcode
=
(
0x03
<<
26
)
|
(
$
2
<<
21
)
|
(
$
1
<<
16
)
|
(
3
<<
6
)
|
$
3
;
sprintf
"
\t
.WORD
\t
0x%08x
\t
; %s
",
$opcode
,
$orig
;
}
elsif
(
$args
=~
/(\-?[0-9]+)\(%r([0-9]+)\),%r([0-9]+)/
)
# format 5
{
my
$opcode
=
(
0x03
<<
26
)
|
(
$
2
<<
21
)
|
(
1
<<
12
)
|
(
3
<<
6
)
|
$
3
;
$opcode
|=
((
$
1
&
0xF
)
<<
17
)
|
((
$
1
&
0x10
)
<<
12
);
# encode offset
$opcode
|=
(
1
<<
5
)
if
(
$mod
=~
/^,m/
);
$opcode
|=
(
1
<<
13
)
if
(
$mod
=~
/^,mb/
);
sprintf
"
\t
.WORD
\t
0x%08x
\t
; %s
",
$opcode
,
$orig
;
}
else
{
"
\t
"
.
$orig
;
}
};
my
$std
=
sub
{
my
(
$mod
,
$args
)
=
@_
;
my
$orig
=
"
std
$mod
\t
$args
";
if
(
$args
=~
/%r([0-9]+),(\-?[0-9]+)\(%r([0-9]+)\)/
)
# format 3 suffices
{
my
$opcode
=
(
0x1c
<<
26
)
|
(
$
3
<<
21
)
|
(
$
1
<<
16
)
|
((
$
2
&
0x1FF8
)
<<
1
)
|
((
$
2
>>
13
)
&
1
);
sprintf
"
\t
.WORD
\t
0x%08x
\t
; %s
",
$opcode
,
$orig
;
}
else
{
"
\t
"
.
$orig
;
}
};
my
$extrd
=
sub
{
my
(
$mod
,
$args
)
=
@_
;
my
$orig
=
"
extrd
$mod
\t
$args
";
# I only have ",u" completer, it's implicitly encoded...
if
(
$args
=~
/%r([0-9]+),([0-9]+),([0-9]+),%r([0-9]+)/
)
# format 15
{
my
$opcode
=
(
0x36
<<
26
)
|
(
$
1
<<
21
)
|
(
$
4
<<
16
);
my
$len
=
32
-
$
3
;
$opcode
|=
((
$
2
&
0x20
)
<<
6
)
|
((
$
2
&
0x1f
)
<<
5
);
# encode pos
$opcode
|=
((
$len
&
0x20
)
<<
7
)
|
(
$len
&
0x1f
);
# encode len
sprintf
"
\t
.WORD
\t
0x%08x
\t
; %s
",
$opcode
,
$orig
;
}
elsif
(
$args
=~
/%r([0-9]+),%sar,([0-9]+),%r([0-9]+)/
)
# format 12
{
my
$opcode
=
(
0x34
<<
26
)
|
(
$
1
<<
21
)
|
(
$
3
<<
16
)
|
(
2
<<
11
)
|
(
1
<<
9
);
my
$len
=
32
-
$
2
;
$opcode
|=
((
$len
&
0x20
)
<<
3
)
|
(
$len
&
0x1f
);
# encode len
$opcode
|=
(
1
<<
13
)
if
(
$mod
=~
/,\**=/
);
sprintf
"
\t
.WORD
\t
0x%08x
\t
; %s
",
$opcode
,
$orig
;
}
else
{
"
\t
"
.
$orig
;
}
};
my
$shrpd
=
sub
{
my
(
$mod
,
$args
)
=
@_
;
my
$orig
=
"
shrpd
$mod
\t
$args
";
if
(
$args
=~
/%r([0-9]+),%r([0-9]+),([0-9]+),%r([0-9]+)/
)
# format 14
{
my
$opcode
=
(
0x34
<<
26
)
|
(
$
2
<<
21
)
|
(
$
1
<<
16
)
|
(
1
<<
10
)
|
$
4
;
my
$cpos
=
63
-
$
3
;
$opcode
|=
((
$cpos
&
0x20
)
<<
6
)
|
((
$cpos
&
0x1f
)
<<
5
);
# encode sa
sprintf
"
\t
.WORD
\t
0x%08x
\t
; %s
",
$opcode
,
$orig
;
}
elsif
(
$args
=~
/%r([0-9]+),%r([0-9]+),%sar,%r([0-9]+)/
)
# format 11
{
sprintf
"
\t
.WORD
\t
0x%08x
\t
; %s
",
(
0x34
<<
26
)
|
(
$
2
<<
21
)
|
(
$
1
<<
16
)
|
(
1
<<
9
)
|
$
3
,
$orig
;
}
else
{
"
\t
"
.
$orig
;
}
};
my
$depd
=
sub
{
my
(
$mod
,
$args
)
=
@_
;
my
$orig
=
"
depd
$mod
\t
$args
";
# I only have ",z" completer, it's impicitly encoded...
if
(
$args
=~
/%r([0-9]+),([0-9]+),([0-9]+),%r([0-9]+)/
)
# format 16
{
my
$opcode
=
(
0x3c
<<
26
)
|
(
$
4
<<
21
)
|
(
$
1
<<
16
);
my
$cpos
=
63
-
$
2
;
my
$len
=
32
-
$
3
;
$opcode
|=
((
$cpos
&
0x20
)
<<
6
)
|
((
$cpos
&
0x1f
)
<<
5
);
# encode pos
$opcode
|=
((
$len
&
0x20
)
<<
7
)
|
(
$len
&
0x1f
);
# encode len
sprintf
"
\t
.WORD
\t
0x%08x
\t
; %s
",
$opcode
,
$orig
;
}
else
{
"
\t
"
.
$orig
;
}
};
sub
assemble
{
my
(
$mnemonic
,
$mod
,
$args
)
=
@_
;
my
$opcode
=
eval
("
\$
$mnemonic
");
ref
(
$opcode
)
eq
'
CODE
'
?
&$opcode
(
$mod
,
$args
)
:
"
\t
$mnemonic$mod
\t
$args
";
}
foreach
(
split
("
\n
",
$code
))
{
s/\`([^\`]*)\`/eval $1/g
e
;
if
(
$SIZE_T
==
4
)
{
s/^\s+([a-z]+)([\S]*)\s+([\S]*)/&assemble($1,$2,$3)/
e
;
s/cmpb,\*/comb,/
;
s/,\*/,/
;
}
print
$_
,"
\n
";
}
close
STDOUT
;
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录