提交 904732f6 编写于 作者: A Andy Polyakov

C64x+ assembly pack: improve EABI support.

上级 cf5ecc3e
...@@ -410,7 +410,7 @@ my %table=( ...@@ -410,7 +410,7 @@ my %table=(
"linux-alpha+bwx-ccc","ccc:-fast -readonly_strings -DL_ENDIAN -DTERMIO::-D_REENTRANT:::SIXTY_FOUR_BIT_LONG RC4_CHAR RC4_CHUNK DES_INT DES_PTR DES_RISC1 DES_UNROLL:${alpha_asm}", "linux-alpha+bwx-ccc","ccc:-fast -readonly_strings -DL_ENDIAN -DTERMIO::-D_REENTRANT:::SIXTY_FOUR_BIT_LONG RC4_CHAR RC4_CHUNK DES_INT DES_PTR DES_RISC1 DES_UNROLL:${alpha_asm}",
# #
# TI_CGT_C6000_7.3.x is a requirement # TI_CGT_C6000_7.3.x is a requirement
"linux-c64xplus","cl6x:--linux --strip_coff_underscore -ea=.s -eo=.o -mv6400+ -o2 -ox -ms -pden -DOPENSSL_SMALL_FOOTPRINT::-D_REENTRANT:::BN_LLONG:c64xpluscpuid.o:bn-c64xplus.o c64xplus-gf2m.o::aes-c64xplus.o aes_cbc.o aes_ctr.o:::sha1-c64xplus.o sha256-c64xplus.o sha512-c64xplus.o:::::::ghash-c64xplus.o::void:dlfcn:linux-shared:--pic:-z --sysv --shared:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR):true", "linux-c64xplus","cl6x:--linux -ea=.s -eo=.o -mv6400+ -o2 -ox -ms -pden -DOPENSSL_SMALL_FOOTPRINT::-D_REENTRANT:::BN_LLONG:c64xpluscpuid.o:bn-c64xplus.o c64xplus-gf2m.o::aes-c64xplus.o aes_cbc.o aes_ctr.o:::sha1-c64xplus.o sha256-c64xplus.o sha512-c64xplus.o:::::::ghash-c64xplus.o::void:dlfcn:linux-shared:--pic:-z --sysv --shared:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR):true",
# Android: linux-* but without -DTERMIO and pointers to headers and libs. # Android: linux-* but without -DTERMIO and pointers to headers and libs.
"android","gcc:-mandroid -I\$(ANDROID_DEV)/include -B\$(ANDROID_DEV)/lib -O3 -fomit-frame-pointer -Wall::-D_REENTRANT::-ldl:BN_LLONG RC4_CHAR RC4_CHUNK DES_INT DES_UNROLL BF_PTR:${no_asm}:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)", "android","gcc:-mandroid -I\$(ANDROID_DEV)/include -B\$(ANDROID_DEV)/lib -O3 -fomit-frame-pointer -Wall::-D_REENTRANT::-ldl:BN_LLONG RC4_CHAR RC4_CHUNK DES_INT DES_UNROLL BF_PTR:${no_asm}:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
......
...@@ -3995,7 +3995,7 @@ $multilib = ...@@ -3995,7 +3995,7 @@ $multilib =
*** linux-c64xplus *** linux-c64xplus
$cc = cl6x $cc = cl6x
$cflags = --linux --strip_coff_underscore -ea=.s -eo=.o -mv6400+ -o2 -ox -ms -pden -DOPENSSL_SMALL_FOOTPRINT $cflags = --linux -ea=.s -eo=.o -mv6400+ -o2 -ox -ms -pden -DOPENSSL_SMALL_FOOTPRINT
$unistd = $unistd =
$thread_cflag = -D_REENTRANT $thread_cflag = -D_REENTRANT
$sys_id = $sys_id =
......
...@@ -46,6 +46,11 @@ $code=<<___; ...@@ -46,6 +46,11 @@ $code=<<___;
.text .text
.if __TI_EABI__ .if __TI_EABI__
.nocmp .nocmp
.asg AES_encrypt,_AES_encrypt
.asg AES_decrypt,_AES_decrypt
.asg AES_set_encrypt_key,_AES_set_encrypt_key
.asg AES_set_decrypt_key,_AES_set_decrypt_key
.asg AES_ctr32_encrypt,_AES_ctr32_encrypt
.endif .endif
.asg B3,RA .asg B3,RA
...@@ -1021,7 +1026,11 @@ ___ ...@@ -1021,7 +1026,11 @@ ___
} }
# Tables are kept in endian-neutral manner # Tables are kept in endian-neutral manner
$code.=<<___; $code.=<<___;
.if __TI_EABI__
.sect ".text:aes_asm.const"
.else
.sect ".const:aes_asm" .sect ".const:aes_asm"
.endif
.align 128 .align 128
AES_Te: AES_Te:
.byte 0xc6,0x63,0x63,0xa5, 0xf8,0x7c,0x7c,0x84 .byte 0xc6,0x63,0x63,0xa5, 0xf8,0x7c,0x7c,0x84
...@@ -1359,3 +1368,4 @@ AES_Td4: ...@@ -1359,3 +1368,4 @@ AES_Td4:
___ ___
print $code; print $code;
close STDOUT;
...@@ -12,6 +12,18 @@ ...@@ -12,6 +12,18 @@
;; SPLOOPs spin at ... 2*n cycles [plus epilogue]. ;; SPLOOPs spin at ... 2*n cycles [plus epilogue].
;;==================================================================== ;;====================================================================
.text .text
.if __TI_EABI__
.asg bn_mul_add_words,_bn_mul_add_words
.asg bn_mul_words,_bn_mul_words
.asg bn_sqr_words,_bn_sqr_words
.asg bn_add_words,_bn_add_words
.asg bn_sub_words,_bn_sub_words
.asg bn_div_words,_bn_div_words
.asg bn_sqr_comba8,_bn_sqr_comba8
.asg bn_mul_comba8,_bn_mul_comba8
.asg bn_sqr_comba4,_bn_sqr_comba4
.asg bn_mul_comba4,_bn_mul_comba4
.endif
.asg B3,RA .asg B3,RA
.asg A4,ARG0 .asg A4,ARG0
...@@ -158,14 +170,39 @@ _bn_sub_words: ...@@ -158,14 +170,39 @@ _bn_sub_words:
.endasmfunc .endasmfunc
.global _bn_div_words .global _bn_div_words
.global __divull
_bn_div_words: _bn_div_words:
.asmfunc .asmfunc
CALLP __divull,A3 ; jump to rts64plus.lib LMBD 1,A6,A0 ; leading zero bits in dv
|| MV ARG0,A5 LMBD 1,A4,A1 ; leading zero bits in hi
|| MV ARG1,ARG0 || MVK 32,B0
|| MV ARG2,ARG1 CMPLTU A1,A0,A2
|| ZERO B5 || ADD A0,B0,B0
[ A2] BNOP RA
||[ A2] MVK -1,A4 ; return overflow
||[!A2] MV A4,A3 ; reassign hi
[!A2] MV B4,A4 ; reassign lo, will be quotient
||[!A2] MVC B0,ILC
[!A2] SHL A6,A0,A6 ; normalize dv
|| MVK 1,A1
[!A2] CMPLTU A3,A6,A1 ; hi<dv?
||[!A2] SHL A4,1,A5:A4 ; lo<<1
[!A1] SUB A3,A6,A3 ; hi-=dv
||[!A1] OR 1,A4,A4
[!A2] SHRU A3,31,A1 ; upper bit
||[!A2] ADDAH A5,A3,A3 ; hi<<1|lo>>31
SPLOOP 3
[!A1] CMPLTU A3,A6,A1 ; hi<dv?
||[ A1] ZERO A1
|| SHL A4,1,A5:A4 ; lo<<1
[!A1] SUB A3,A6,A3 ; hi-=dv
||[!A1] OR 1,A4,A4 ; quotient
SHRU A3,31,A1 ; upper bit
|| ADDAH A5,A3,A3 ; hi<<1|lo>>31
SPKERNEL
BNOP RA,5
.endasmfunc .endasmfunc
;;==================================================================== ;;====================================================================
...@@ -256,7 +293,7 @@ _bn_mul_comba4: ...@@ -256,7 +293,7 @@ _bn_mul_comba4:
|| LDW *A5++,B6 ; ap[0] || LDW *A5++,B6 ; ap[0]
|| MV A0,A3 ; const A3=M || MV A0,A3 ; const A3=M
.else .else
;; This alternative is exercise in fully unrolled Comba ;; This alternative is an exercise in fully unrolled Comba
;; algorithm implementation that operates at n*(n+1)+12, or ;; algorithm implementation that operates at n*(n+1)+12, or
;; as little as 32 cycles... ;; as little as 32 cycles...
LDW *ARG1[0],B16 ; a[0] LDW *ARG1[0],B16 ; a[0]
......
...@@ -107,6 +107,9 @@ ___ ...@@ -107,6 +107,9 @@ ___
} }
$code.=<<___; $code.=<<___;
.text .text
.if __TI_EABI__
.asg bn_GF2m_mul_2x2,_bn_GF2m_mul_2x2
.endif
.global _bn_GF2m_mul_2x2 .global _bn_GF2m_mul_2x2
_bn_GF2m_mul_2x2: _bn_GF2m_mul_2x2:
......
...@@ -6,6 +6,14 @@ open STDOUT,">$output"; ...@@ -6,6 +6,14 @@ open STDOUT,">$output";
$code.=<<___; $code.=<<___;
.text .text
.if __TI_EABI__
.asg OPENSSL_rdtsc,_OPENSSL_rdtsc
.asg OPENSSL_cleanse,_OPENSSL_cleanse
.asg OPENSSL_atomic_add,_OPENSSL_atomic_add
.asg OPENSSL_wipe_cpu,_OPENSSL_wipe_cpu
.asg OPENSSL_instrument_bus,_OPENSSL_instrument_bus
.asg OPENSSL_instrument_bus2,_OPENSSL_instrument_bus2
.endif
.asg B3,RA .asg B3,RA
......
...@@ -35,6 +35,11 @@ open STDOUT,">$output"; ...@@ -35,6 +35,11 @@ open STDOUT,">$output";
$code.=<<___; $code.=<<___;
.text .text
.if __TI_EABI__
.asg gcm_gmult_1bit,_gcm_gmult_1bit
.asg gcm_gmult_4bit,_gcm_gmult_4bit
.asg gcm_ghash_4bit,_gcm_ghash_4bit
.endif
.asg B3,RA .asg B3,RA
...@@ -144,7 +149,7 @@ ___ ...@@ -144,7 +149,7 @@ ___
# 8/2 S1 L1x S2 | .... # 8/2 S1 L1x S2 | ....
#####... ................|............ #####... ................|............
$code.=<<___; $code.=<<___;
XORMPY $H0,$xia,$H0x ; 0 ; HXi[i] XORMPY $H0,$xia,$H0x ; 0 ; H(Xi[i]<<1)
|| XORMPY $H01u,$xib,$H01y || XORMPY $H01u,$xib,$H01y
|| [A0] LDBU *--${xip},$x0 || [A0] LDBU *--${xip},$x0
XORMPY $H1,$xia,$H1x ; 1 XORMPY $H1,$xia,$H1x ; 1
...@@ -153,7 +158,7 @@ $code.=<<___; ...@@ -153,7 +158,7 @@ $code.=<<___;
XORMPY $H3,$xia,$H3x ; 3 XORMPY $H3,$xia,$H3x ; 3
|| XORMPY $H3u,$xib,$H3y || XORMPY $H3u,$xib,$H3y
||[!A0] MVK.D 15,A0 ; *--${xip} counter ||[!A0] MVK.D 15,A0 ; *--${xip} counter
XOR.L $H0x,$Z0,$Z0 ; 4 ; Z^=HXi[i] XOR.L $H0x,$Z0,$Z0 ; 4 ; Z^=H(Xi[i]<<1)
|| [A0] SUB.S A0,1,A0 || [A0] SUB.S A0,1,A0
XOR.L $H1x,$Z1,$Z1 ; 5 XOR.L $H1x,$Z1,$Z1 ; 5
|| AND.D $H01y,$FF000000,$H0z || AND.D $H01y,$FF000000,$H0z
......
...@@ -38,6 +38,9 @@ open STDOUT,">$output"; ...@@ -38,6 +38,9 @@ open STDOUT,">$output";
$code=<<___; $code=<<___;
.text .text
.if __TI_EABI__
.asg sha1_block_data_order,_sha1_block_data_order
.endif
.asg B3,RA .asg B3,RA
.asg A15,FP .asg A15,FP
......
...@@ -40,6 +40,7 @@ $code.=<<___; ...@@ -40,6 +40,7 @@ $code.=<<___;
.text .text
.if __TI_EABI__ .if __TI_EABI__
.nocmp .nocmp
.asg sha256_block_data_order,_sha256_block_data_order
.endif .endif
.asg B3,RA .asg B3,RA
...@@ -275,7 +276,11 @@ outerloop?: ...@@ -275,7 +276,11 @@ outerloop?:
|| STW $H,*${CTXB}[7] || STW $H,*${CTXB}[7]
.endasmfunc .endasmfunc
.if __TI_EABI__
.sect ".text:sha_asm.const"
.else
.sect ".const:sha_asm" .sect ".const:sha_asm"
.endif
.align 128 .align 128
K256: K256:
.uword 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5 .uword 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5
...@@ -300,3 +305,4 @@ K256: ...@@ -300,3 +305,4 @@ K256:
___ ___
print $code; print $code;
close STDOUT;
...@@ -48,6 +48,7 @@ $code.=<<___; ...@@ -48,6 +48,7 @@ $code.=<<___;
.text .text
.if __TI_EABI__ .if __TI_EABI__
.nocmp .nocmp
.asg sha512_block_data_order,_sha512_block_data_order
.endif .endif
.asg B3,RA .asg B3,RA
...@@ -370,7 +371,11 @@ break?: ...@@ -370,7 +371,11 @@ break?:
NOP 2 ; wait till FP is committed NOP 2 ; wait till FP is committed
.endasmfunc .endasmfunc
.if __TI_EABI__
.sect ".text:sha_asm.const"
.else
.sect ".const:sha_asm" .sect ".const:sha_asm"
.endif
.align 128 .align 128
K512: K512:
.uword 0x428a2f98,0xd728ae22, 0x71374491,0x23ef65cd .uword 0x428a2f98,0xd728ae22, 0x71374491,0x23ef65cd
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册