提交 904732f6 编写于 作者: A Andy Polyakov

C64x+ assembly pack: improve EABI support.

上级 cf5ecc3e
......@@ -410,7 +410,7 @@ my %table=(
"linux-alpha+bwx-ccc","ccc:-fast -readonly_strings -DL_ENDIAN -DTERMIO::-D_REENTRANT:::SIXTY_FOUR_BIT_LONG RC4_CHAR RC4_CHUNK DES_INT DES_PTR DES_RISC1 DES_UNROLL:${alpha_asm}",
#
# TI_CGT_C6000_7.3.x is a requirement
"linux-c64xplus","cl6x:--linux --strip_coff_underscore -ea=.s -eo=.o -mv6400+ -o2 -ox -ms -pden -DOPENSSL_SMALL_FOOTPRINT::-D_REENTRANT:::BN_LLONG:c64xpluscpuid.o:bn-c64xplus.o c64xplus-gf2m.o::aes-c64xplus.o aes_cbc.o aes_ctr.o:::sha1-c64xplus.o sha256-c64xplus.o sha512-c64xplus.o:::::::ghash-c64xplus.o::void:dlfcn:linux-shared:--pic:-z --sysv --shared:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR):true",
"linux-c64xplus","cl6x:--linux -ea=.s -eo=.o -mv6400+ -o2 -ox -ms -pden -DOPENSSL_SMALL_FOOTPRINT::-D_REENTRANT:::BN_LLONG:c64xpluscpuid.o:bn-c64xplus.o c64xplus-gf2m.o::aes-c64xplus.o aes_cbc.o aes_ctr.o:::sha1-c64xplus.o sha256-c64xplus.o sha512-c64xplus.o:::::::ghash-c64xplus.o::void:dlfcn:linux-shared:--pic:-z --sysv --shared:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR):true",
# Android: linux-* but without -DTERMIO and pointers to headers and libs.
"android","gcc:-mandroid -I\$(ANDROID_DEV)/include -B\$(ANDROID_DEV)/lib -O3 -fomit-frame-pointer -Wall::-D_REENTRANT::-ldl:BN_LLONG RC4_CHAR RC4_CHUNK DES_INT DES_UNROLL BF_PTR:${no_asm}:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
......
......@@ -3995,7 +3995,7 @@ $multilib =
*** linux-c64xplus
$cc = cl6x
$cflags = --linux --strip_coff_underscore -ea=.s -eo=.o -mv6400+ -o2 -ox -ms -pden -DOPENSSL_SMALL_FOOTPRINT
$cflags = --linux -ea=.s -eo=.o -mv6400+ -o2 -ox -ms -pden -DOPENSSL_SMALL_FOOTPRINT
$unistd =
$thread_cflag = -D_REENTRANT
$sys_id =
......
......@@ -46,6 +46,11 @@ $code=<<___;
.text
.if __TI_EABI__
.nocmp
.asg AES_encrypt,_AES_encrypt
.asg AES_decrypt,_AES_decrypt
.asg AES_set_encrypt_key,_AES_set_encrypt_key
.asg AES_set_decrypt_key,_AES_set_decrypt_key
.asg AES_ctr32_encrypt,_AES_ctr32_encrypt
.endif
.asg B3,RA
......@@ -1021,7 +1026,11 @@ ___
}
# Tables are kept in endian-neutral manner
$code.=<<___;
.if __TI_EABI__
.sect ".text:aes_asm.const"
.else
.sect ".const:aes_asm"
.endif
.align 128
AES_Te:
.byte 0xc6,0x63,0x63,0xa5, 0xf8,0x7c,0x7c,0x84
......@@ -1359,3 +1368,4 @@ AES_Td4:
___
print $code;
close STDOUT;
......@@ -12,6 +12,18 @@
;; SPLOOPs spin at ... 2*n cycles [plus epilogue].
;;====================================================================
.text
.if __TI_EABI__
.asg bn_mul_add_words,_bn_mul_add_words
.asg bn_mul_words,_bn_mul_words
.asg bn_sqr_words,_bn_sqr_words
.asg bn_add_words,_bn_add_words
.asg bn_sub_words,_bn_sub_words
.asg bn_div_words,_bn_div_words
.asg bn_sqr_comba8,_bn_sqr_comba8
.asg bn_mul_comba8,_bn_mul_comba8
.asg bn_sqr_comba4,_bn_sqr_comba4
.asg bn_mul_comba4,_bn_mul_comba4
.endif
.asg B3,RA
.asg A4,ARG0
......@@ -158,14 +170,39 @@ _bn_sub_words:
.endasmfunc
.global _bn_div_words
.global __divull
_bn_div_words:
.asmfunc
CALLP __divull,A3 ; jump to rts64plus.lib
|| MV ARG0,A5
|| MV ARG1,ARG0
|| MV ARG2,ARG1
|| ZERO B5
LMBD 1,A6,A0 ; leading zero bits in dv
LMBD 1,A4,A1 ; leading zero bits in hi
|| MVK 32,B0
CMPLTU A1,A0,A2
|| ADD A0,B0,B0
[ A2] BNOP RA
||[ A2] MVK -1,A4 ; return overflow
||[!A2] MV A4,A3 ; reassign hi
[!A2] MV B4,A4 ; reassign lo, will be quotient
||[!A2] MVC B0,ILC
[!A2] SHL A6,A0,A6 ; normalize dv
|| MVK 1,A1
[!A2] CMPLTU A3,A6,A1 ; hi<dv?
||[!A2] SHL A4,1,A5:A4 ; lo<<1
[!A1] SUB A3,A6,A3 ; hi-=dv
||[!A1] OR 1,A4,A4
[!A2] SHRU A3,31,A1 ; upper bit
||[!A2] ADDAH A5,A3,A3 ; hi<<1|lo>>31
SPLOOP 3
[!A1] CMPLTU A3,A6,A1 ; hi<dv?
||[ A1] ZERO A1
|| SHL A4,1,A5:A4 ; lo<<1
[!A1] SUB A3,A6,A3 ; hi-=dv
||[!A1] OR 1,A4,A4 ; quotient
SHRU A3,31,A1 ; upper bit
|| ADDAH A5,A3,A3 ; hi<<1|lo>>31
SPKERNEL
BNOP RA,5
.endasmfunc
;;====================================================================
......@@ -256,7 +293,7 @@ _bn_mul_comba4:
|| LDW *A5++,B6 ; ap[0]
|| MV A0,A3 ; const A3=M
.else
;; This alternative is exercise in fully unrolled Comba
;; This alternative is an exercise in fully unrolled Comba
;; algorithm implementation that operates at n*(n+1)+12, or
;; as little as 32 cycles...
LDW *ARG1[0],B16 ; a[0]
......
......@@ -107,6 +107,9 @@ ___
}
$code.=<<___;
.text
.if __TI_EABI__
.asg bn_GF2m_mul_2x2,_bn_GF2m_mul_2x2
.endif
.global _bn_GF2m_mul_2x2
_bn_GF2m_mul_2x2:
......
......@@ -6,6 +6,14 @@ open STDOUT,">$output";
$code.=<<___;
.text
.if __TI_EABI__
.asg OPENSSL_rdtsc,_OPENSSL_rdtsc
.asg OPENSSL_cleanse,_OPENSSL_cleanse
.asg OPENSSL_atomic_add,_OPENSSL_atomic_add
.asg OPENSSL_wipe_cpu,_OPENSSL_wipe_cpu
.asg OPENSSL_instrument_bus,_OPENSSL_instrument_bus
.asg OPENSSL_instrument_bus2,_OPENSSL_instrument_bus2
.endif
.asg B3,RA
......
......@@ -35,6 +35,11 @@ open STDOUT,">$output";
$code.=<<___;
.text
.if __TI_EABI__
.asg gcm_gmult_1bit,_gcm_gmult_1bit
.asg gcm_gmult_4bit,_gcm_gmult_4bit
.asg gcm_ghash_4bit,_gcm_ghash_4bit
.endif
.asg B3,RA
......@@ -144,7 +149,7 @@ ___
# 8/2 S1 L1x S2 | ....
#####... ................|............
$code.=<<___;
XORMPY $H0,$xia,$H0x ; 0 ; HXi[i]
XORMPY $H0,$xia,$H0x ; 0 ; H(Xi[i]<<1)
|| XORMPY $H01u,$xib,$H01y
|| [A0] LDBU *--${xip},$x0
XORMPY $H1,$xia,$H1x ; 1
......@@ -153,7 +158,7 @@ $code.=<<___;
XORMPY $H3,$xia,$H3x ; 3
|| XORMPY $H3u,$xib,$H3y
||[!A0] MVK.D 15,A0 ; *--${xip} counter
XOR.L $H0x,$Z0,$Z0 ; 4 ; Z^=HXi[i]
XOR.L $H0x,$Z0,$Z0 ; 4 ; Z^=H(Xi[i]<<1)
|| [A0] SUB.S A0,1,A0
XOR.L $H1x,$Z1,$Z1 ; 5
|| AND.D $H01y,$FF000000,$H0z
......
......@@ -38,6 +38,9 @@ open STDOUT,">$output";
$code=<<___;
.text
.if __TI_EABI__
.asg sha1_block_data_order,_sha1_block_data_order
.endif
.asg B3,RA
.asg A15,FP
......
......@@ -40,6 +40,7 @@ $code.=<<___;
.text
.if __TI_EABI__
.nocmp
.asg sha256_block_data_order,_sha256_block_data_order
.endif
.asg B3,RA
......@@ -275,7 +276,11 @@ outerloop?:
|| STW $H,*${CTXB}[7]
.endasmfunc
.if __TI_EABI__
.sect ".text:sha_asm.const"
.else
.sect ".const:sha_asm"
.endif
.align 128
K256:
.uword 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5
......@@ -300,3 +305,4 @@ K256:
___
print $code;
close STDOUT;
......@@ -48,6 +48,7 @@ $code.=<<___;
.text
.if __TI_EABI__
.nocmp
.asg sha512_block_data_order,_sha512_block_data_order
.endif
.asg B3,RA
......@@ -370,7 +371,11 @@ break?:
NOP 2 ; wait till FP is committed
.endasmfunc
.if __TI_EABI__
.sect ".text:sha_asm.const"
.else
.sect ".const:sha_asm"
.endif
.align 128
K512:
.uword 0x428a2f98,0xd728ae22, 0x71374491,0x23ef65cd
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册