提交 947716c1 编写于 作者: A Andy Polyakov

MIPS assembly pack: adapt it for MIPS[32|64]R6.

MIPS[32|64]R6 is binary and source incompatible with previous MIPS ISA
specifications. Fortunately it's still possible to resolve differences
in source code with standard pre-processor and switching to trap-free
version of addition and subtraction instructions.
Reviewed-by: NRichard Levitte <levitte@openssl.org>
上级 a4324912
...@@ -216,7 +216,7 @@ ...@@ -216,7 +216,7 @@
}, },
mips32_asm => { mips32_asm => {
template => 1, template => 1,
bn_asm_src => "bn-mips.s mips-mont.s", bn_asm_src => "bn-mips.S mips-mont.S",
aes_asm_src => "aes_cbc.c aes-mips.S", aes_asm_src => "aes_cbc.c aes-mips.S",
sha1_asm_src => "sha1-mips.S sha256-mips.S", sha1_asm_src => "sha1-mips.S sha256-mips.S",
}, },
......
...@@ -65,8 +65,8 @@ $flavour = shift || "o32"; # supported flavours are o32,n32,64,nubi32,nubi64 ...@@ -65,8 +65,8 @@ $flavour = shift || "o32"; # supported flavours are o32,n32,64,nubi32,nubi64
if ($flavour =~ /64|n32/i) { if ($flavour =~ /64|n32/i) {
$PTR_LA="dla"; $PTR_LA="dla";
$PTR_ADD="dadd"; # incidentally works even on n32 $PTR_ADD="daddu"; # incidentally works even on n32
$PTR_SUB="dsub"; # incidentally works even on n32 $PTR_SUB="dsubu"; # incidentally works even on n32
$PTR_INS="dins"; $PTR_INS="dins";
$REG_S="sd"; $REG_S="sd";
$REG_L="ld"; $REG_L="ld";
...@@ -74,8 +74,8 @@ if ($flavour =~ /64|n32/i) { ...@@ -74,8 +74,8 @@ if ($flavour =~ /64|n32/i) {
$SZREG=8; $SZREG=8;
} else { } else {
$PTR_LA="la"; $PTR_LA="la";
$PTR_ADD="add"; $PTR_ADD="addu";
$PTR_SUB="sub"; $PTR_SUB="subu";
$PTR_INS="ins"; $PTR_INS="ins";
$REG_S="sw"; $REG_S="sw";
$REG_L="lw"; $REG_L="lw";
...@@ -102,15 +102,13 @@ open STDOUT,">$output"; ...@@ -102,15 +102,13 @@ open STDOUT,">$output";
my ($MSB,$LSB)=(0,3); # automatically converted to little-endian my ($MSB,$LSB)=(0,3); # automatically converted to little-endian
$code.=<<___; $code.=<<___;
#include "mips_arch.h"
.text .text
#ifdef OPENSSL_FIPSCANISTER #ifdef OPENSSL_FIPSCANISTER
# include <openssl/fipssyms.h> # include <openssl/fipssyms.h>
#endif #endif
#if defined(__mips_smartmips) && !defined(_MIPS_ARCH_MIPS32R2)
#define _MIPS_ARCH_MIPS32R2
#endif
#if !defined(__mips_eabi) && (!defined(__vxworks) || defined(__pic__)) #if !defined(__mips_eabi) && (!defined(__vxworks) || defined(__pic__))
.option pic2 .option pic2
#endif #endif
...@@ -146,7 +144,7 @@ _mips_AES_encrypt: ...@@ -146,7 +144,7 @@ _mips_AES_encrypt:
xor $s2,$t2 xor $s2,$t2
xor $s3,$t3 xor $s3,$t3
sub $cnt,1 subu $cnt,1
#if defined(__mips_smartmips) #if defined(__mips_smartmips)
ext $i0,$s1,16,8 ext $i0,$s1,16,8
.Loop_enc: .Loop_enc:
...@@ -218,7 +216,7 @@ _mips_AES_encrypt: ...@@ -218,7 +216,7 @@ _mips_AES_encrypt:
xor $t2,$t6 xor $t2,$t6
xor $t3,$t7 xor $t3,$t7
sub $cnt,1 subu $cnt,1
$PTR_ADD $key0,16 $PTR_ADD $key0,16
xor $s0,$t0 xor $s0,$t0
xor $s1,$t1 xor $s1,$t1
...@@ -409,7 +407,7 @@ _mips_AES_encrypt: ...@@ -409,7 +407,7 @@ _mips_AES_encrypt:
xor $t2,$t6 xor $t2,$t6
xor $t3,$t7 xor $t3,$t7
sub $cnt,1 subu $cnt,1
$PTR_ADD $key0,16 $PTR_ADD $key0,16
xor $s0,$t0 xor $s0,$t0
xor $s1,$t1 xor $s1,$t1
...@@ -657,6 +655,12 @@ $code.=<<___; ...@@ -657,6 +655,12 @@ $code.=<<___;
.set reorder .set reorder
$PTR_LA $Tbl,AES_Te # PIC-ified 'load address' $PTR_LA $Tbl,AES_Te # PIC-ified 'load address'
#if defined(_MIPS_ARCH_MIPS32R6) || defined(_MIPS_ARCH_MIPS64R6)
lw $s0,0($inp)
lw $s1,4($inp)
lw $s2,8($inp)
lw $s3,12($inp)
#else
lwl $s0,0+$MSB($inp) lwl $s0,0+$MSB($inp)
lwl $s1,4+$MSB($inp) lwl $s1,4+$MSB($inp)
lwl $s2,8+$MSB($inp) lwl $s2,8+$MSB($inp)
...@@ -665,9 +669,16 @@ $code.=<<___; ...@@ -665,9 +669,16 @@ $code.=<<___;
lwr $s1,4+$LSB($inp) lwr $s1,4+$LSB($inp)
lwr $s2,8+$LSB($inp) lwr $s2,8+$LSB($inp)
lwr $s3,12+$LSB($inp) lwr $s3,12+$LSB($inp)
#endif
bal _mips_AES_encrypt bal _mips_AES_encrypt
#if defined(_MIPS_ARCH_MIPS32R6) || defined(_MIPS_ARCH_MIPS64R6)
sw $s0,0($out)
sw $s1,4($out)
sw $s2,8($out)
sw $s3,12($out)
#else
swr $s0,0+$LSB($out) swr $s0,0+$LSB($out)
swr $s1,4+$LSB($out) swr $s1,4+$LSB($out)
swr $s2,8+$LSB($out) swr $s2,8+$LSB($out)
...@@ -676,6 +687,7 @@ $code.=<<___; ...@@ -676,6 +687,7 @@ $code.=<<___;
swl $s1,4+$MSB($out) swl $s1,4+$MSB($out)
swl $s2,8+$MSB($out) swl $s2,8+$MSB($out)
swl $s3,12+$MSB($out) swl $s3,12+$MSB($out)
#endif
.set noreorder .set noreorder
$REG_L $ra,$FRAMESIZE-1*$SZREG($sp) $REG_L $ra,$FRAMESIZE-1*$SZREG($sp)
...@@ -720,7 +732,7 @@ _mips_AES_decrypt: ...@@ -720,7 +732,7 @@ _mips_AES_decrypt:
xor $s2,$t2 xor $s2,$t2
xor $s3,$t3 xor $s3,$t3
sub $cnt,1 subu $cnt,1
#if defined(__mips_smartmips) #if defined(__mips_smartmips)
ext $i0,$s3,16,8 ext $i0,$s3,16,8
.Loop_dec: .Loop_dec:
...@@ -792,7 +804,7 @@ _mips_AES_decrypt: ...@@ -792,7 +804,7 @@ _mips_AES_decrypt:
xor $t2,$t6 xor $t2,$t6
xor $t3,$t7 xor $t3,$t7
sub $cnt,1 subu $cnt,1
$PTR_ADD $key0,16 $PTR_ADD $key0,16
xor $s0,$t0 xor $s0,$t0
xor $s1,$t1 xor $s1,$t1
...@@ -985,7 +997,7 @@ _mips_AES_decrypt: ...@@ -985,7 +997,7 @@ _mips_AES_decrypt:
xor $t2,$t6 xor $t2,$t6
xor $t3,$t7 xor $t3,$t7
sub $cnt,1 subu $cnt,1
$PTR_ADD $key0,16 $PTR_ADD $key0,16
xor $s0,$t0 xor $s0,$t0
xor $s1,$t1 xor $s1,$t1
...@@ -1228,6 +1240,12 @@ $code.=<<___; ...@@ -1228,6 +1240,12 @@ $code.=<<___;
.set reorder .set reorder
$PTR_LA $Tbl,AES_Td # PIC-ified 'load address' $PTR_LA $Tbl,AES_Td # PIC-ified 'load address'
#if defined(_MIPS_ARCH_MIPS32R6) || defined(_MIPS_ARCH_MIPS64R6)
lw $s0,0($inp)
lw $s1,4($inp)
lw $s2,8($inp)
lw $s3,12($inp)
#else
lwl $s0,0+$MSB($inp) lwl $s0,0+$MSB($inp)
lwl $s1,4+$MSB($inp) lwl $s1,4+$MSB($inp)
lwl $s2,8+$MSB($inp) lwl $s2,8+$MSB($inp)
...@@ -1236,9 +1254,16 @@ $code.=<<___; ...@@ -1236,9 +1254,16 @@ $code.=<<___;
lwr $s1,4+$LSB($inp) lwr $s1,4+$LSB($inp)
lwr $s2,8+$LSB($inp) lwr $s2,8+$LSB($inp)
lwr $s3,12+$LSB($inp) lwr $s3,12+$LSB($inp)
#endif
bal _mips_AES_decrypt bal _mips_AES_decrypt
#if defined(_MIPS_ARCH_MIPS32R6) || defined(_MIPS_ARCH_MIPS64R6)
sw $s0,0($out)
sw $s1,4($out)
sw $s2,8($out)
sw $s3,12($out)
#else
swr $s0,0+$LSB($out) swr $s0,0+$LSB($out)
swr $s1,4+$LSB($out) swr $s1,4+$LSB($out)
swr $s2,8+$LSB($out) swr $s2,8+$LSB($out)
...@@ -1247,6 +1272,7 @@ $code.=<<___; ...@@ -1247,6 +1272,7 @@ $code.=<<___;
swl $s1,4+$MSB($out) swl $s1,4+$MSB($out)
swl $s2,8+$MSB($out) swl $s2,8+$MSB($out)
swl $s3,12+$MSB($out) swl $s3,12+$MSB($out)
#endif
.set noreorder .set noreorder
$REG_L $ra,$FRAMESIZE-1*$SZREG($sp) $REG_L $ra,$FRAMESIZE-1*$SZREG($sp)
...@@ -1295,35 +1321,52 @@ _mips_AES_set_encrypt_key: ...@@ -1295,35 +1321,52 @@ _mips_AES_set_encrypt_key:
$PTR_ADD $rcon,$Tbl,256 $PTR_ADD $rcon,$Tbl,256
.set reorder .set reorder
#if defined(_MIPS_ARCH_MIPS32R6) || defined(_MIPS_ARCH_MIPS64R6)
lw $rk0,0($inp) # load 128 bits
lw $rk1,4($inp)
lw $rk2,8($inp)
lw $rk3,12($inp)
#else
lwl $rk0,0+$MSB($inp) # load 128 bits lwl $rk0,0+$MSB($inp) # load 128 bits
lwl $rk1,4+$MSB($inp) lwl $rk1,4+$MSB($inp)
lwl $rk2,8+$MSB($inp) lwl $rk2,8+$MSB($inp)
lwl $rk3,12+$MSB($inp) lwl $rk3,12+$MSB($inp)
li $at,128
lwr $rk0,0+$LSB($inp) lwr $rk0,0+$LSB($inp)
lwr $rk1,4+$LSB($inp) lwr $rk1,4+$LSB($inp)
lwr $rk2,8+$LSB($inp) lwr $rk2,8+$LSB($inp)
lwr $rk3,12+$LSB($inp) lwr $rk3,12+$LSB($inp)
#endif
li $at,128
.set noreorder .set noreorder
beq $bits,$at,.L128bits beq $bits,$at,.L128bits
li $cnt,10 li $cnt,10
.set reorder .set reorder
#if defined(_MIPS_ARCH_MIPS32R6) || defined(_MIPS_ARCH_MIPS64R6)
lw $rk4,16($inp) # load 192 bits
lw $rk5,20($inp)
#else
lwl $rk4,16+$MSB($inp) # load 192 bits lwl $rk4,16+$MSB($inp) # load 192 bits
lwl $rk5,20+$MSB($inp) lwl $rk5,20+$MSB($inp)
li $at,192
lwr $rk4,16+$LSB($inp) lwr $rk4,16+$LSB($inp)
lwr $rk5,20+$LSB($inp) lwr $rk5,20+$LSB($inp)
#endif
li $at,192
.set noreorder .set noreorder
beq $bits,$at,.L192bits beq $bits,$at,.L192bits
li $cnt,8 li $cnt,8
.set reorder .set reorder
#if defined(_MIPS_ARCH_MIPS32R6) || defined(_MIPS_ARCH_MIPS64R6)
lw $rk6,24($inp) # load 256 bits
lw $rk7,28($inp)
#else
lwl $rk6,24+$MSB($inp) # load 256 bits lwl $rk6,24+$MSB($inp) # load 256 bits
lwl $rk7,28+$MSB($inp) lwl $rk7,28+$MSB($inp)
li $at,256
lwr $rk6,24+$LSB($inp) lwr $rk6,24+$LSB($inp)
lwr $rk7,28+$LSB($inp) lwr $rk7,28+$LSB($inp)
#endif
li $at,256
.set noreorder .set noreorder
beq $bits,$at,.L256bits beq $bits,$at,.L256bits
li $cnt,7 li $cnt,7
...@@ -1353,7 +1396,7 @@ _mips_AES_set_encrypt_key: ...@@ -1353,7 +1396,7 @@ _mips_AES_set_encrypt_key:
sw $rk1,4($key) sw $rk1,4($key)
sw $rk2,8($key) sw $rk2,8($key)
sw $rk3,12($key) sw $rk3,12($key)
sub $cnt,1 subu $cnt,1
$PTR_ADD $key,16 $PTR_ADD $key,16
_bias $i0,24 _bias $i0,24
...@@ -1410,7 +1453,7 @@ _mips_AES_set_encrypt_key: ...@@ -1410,7 +1453,7 @@ _mips_AES_set_encrypt_key:
sw $rk3,12($key) sw $rk3,12($key)
sw $rk4,16($key) sw $rk4,16($key)
sw $rk5,20($key) sw $rk5,20($key)
sub $cnt,1 subu $cnt,1
$PTR_ADD $key,24 $PTR_ADD $key,24
_bias $i0,24 _bias $i0,24
...@@ -1471,7 +1514,7 @@ _mips_AES_set_encrypt_key: ...@@ -1471,7 +1514,7 @@ _mips_AES_set_encrypt_key:
sw $rk5,20($key) sw $rk5,20($key)
sw $rk6,24($key) sw $rk6,24($key)
sw $rk7,28($key) sw $rk7,28($key)
sub $cnt,1 subu $cnt,1
_bias $i0,24 _bias $i0,24
_bias $i1,16 _bias $i1,16
...@@ -1653,7 +1696,7 @@ $code.=<<___; ...@@ -1653,7 +1696,7 @@ $code.=<<___;
lw $tp1,16($key) # modulo-scheduled lw $tp1,16($key) # modulo-scheduled
lui $x80808080,0x8080 lui $x80808080,0x8080
sub $cnt,1 subu $cnt,1
or $x80808080,0x8080 or $x80808080,0x8080
sll $cnt,2 sll $cnt,2
$PTR_ADD $key,16 $PTR_ADD $key,16
...@@ -1716,7 +1759,7 @@ $code.=<<___; ...@@ -1716,7 +1759,7 @@ $code.=<<___;
lw $tp1,4($key) # modulo-scheduled lw $tp1,4($key) # modulo-scheduled
xor $tpe,$tp2 xor $tpe,$tp2
#endif #endif
sub $cnt,1 subu $cnt,1
sw $tpe,0($key) sw $tpe,0($key)
$PTR_ADD $key,4 $PTR_ADD $key,4
bnez $cnt,.Lmix bnez $cnt,.Lmix
......
...@@ -35,6 +35,7 @@ GENERATE[aesp8-ppc.s]=asm/aesp8-ppc.pl $(PERLASM_SCHEME) ...@@ -35,6 +35,7 @@ GENERATE[aesp8-ppc.s]=asm/aesp8-ppc.pl $(PERLASM_SCHEME)
GENERATE[aes-parisc.s]=asm/aes-parisc.pl $(PERLASM_SCHEME) GENERATE[aes-parisc.s]=asm/aes-parisc.pl $(PERLASM_SCHEME)
GENERATE[aes-mips.S]=asm/aes-mips.pl $(PERLASM_SCHEME) GENERATE[aes-mips.S]=asm/aes-mips.pl $(PERLASM_SCHEME)
INCLUDE[aes-mips.o]=..
GENERATE[aesv8-armx.S]=asm/aesv8-armx.pl $(PERLASM_SCHEME) GENERATE[aesv8-armx.S]=asm/aesv8-armx.pl $(PERLASM_SCHEME)
INCLUDE[aesv8-armx.o]=.. INCLUDE[aesv8-armx.o]=..
......
...@@ -56,14 +56,14 @@ ...@@ -56,14 +56,14 @@
$flavour = shift || "o32"; # supported flavours are o32,n32,64,nubi32,nubi64 $flavour = shift || "o32"; # supported flavours are o32,n32,64,nubi32,nubi64
if ($flavour =~ /64|n32/i) { if ($flavour =~ /64|n32/i) {
$PTR_ADD="dadd"; # incidentally works even on n32 $PTR_ADD="daddu"; # incidentally works even on n32
$PTR_SUB="dsub"; # incidentally works even on n32 $PTR_SUB="dsubu"; # incidentally works even on n32
$REG_S="sd"; $REG_S="sd";
$REG_L="ld"; $REG_L="ld";
$SZREG=8; $SZREG=8;
} else { } else {
$PTR_ADD="add"; $PTR_ADD="addu";
$PTR_SUB="sub"; $PTR_SUB="subu";
$REG_S="sw"; $REG_S="sw";
$REG_L="lw"; $REG_L="lw";
$SZREG=4; $SZREG=4;
...@@ -121,6 +121,8 @@ $m1=$s11; ...@@ -121,6 +121,8 @@ $m1=$s11;
$FRAMESIZE=14; $FRAMESIZE=14;
$code=<<___; $code=<<___;
#include "mips_arch.h"
.text .text
.set noat .set noat
...@@ -183,27 +185,27 @@ $code.=<<___; ...@@ -183,27 +185,27 @@ $code.=<<___;
$PTR_SUB $sp,$num $PTR_SUB $sp,$num
and $sp,$at and $sp,$at
$MULTU $aj,$bi $MULTU ($aj,$bi)
$LD $alo,$BNSZ($ap) $LD $ahi,$BNSZ($ap)
$LD $nlo,$BNSZ($np) $LD $nhi,$BNSZ($np)
mflo $lo0 mflo ($lo0,$aj,$bi)
mfhi $hi0 mfhi ($hi0,$aj,$bi)
$MULTU $lo0,$n0 $MULTU ($lo0,$n0)
mflo $m1 mflo ($m1,$lo0,$n0)
$MULTU $alo,$bi $MULTU ($ahi,$bi)
mflo $alo mflo ($alo,$ahi,$bi)
mfhi $ahi mfhi ($ahi,$ahi,$bi)
$MULTU $nj,$m1 $MULTU ($nj,$m1)
mflo $lo1 mflo ($lo1,$nj,$m1)
mfhi $hi1 mfhi ($hi1,$nj,$m1)
$MULTU $nlo,$m1 $MULTU ($nhi,$m1)
$ADDU $lo1,$lo0 $ADDU $lo1,$lo0
sltu $at,$lo1,$lo0 sltu $at,$lo1,$lo0
$ADDU $hi1,$at $ADDU $hi1,$at
mflo $nlo mflo ($nlo,$nhi,$m1)
mfhi $nhi mfhi ($nhi,$nhi,$m1)
move $tp,$sp move $tp,$sp
li $j,2*$BNSZ li $j,2*$BNSZ
...@@ -215,25 +217,25 @@ $code.=<<___; ...@@ -215,25 +217,25 @@ $code.=<<___;
$LD $aj,($aj) $LD $aj,($aj)
$LD $nj,($nj) $LD $nj,($nj)
$MULTU $aj,$bi $MULTU ($aj,$bi)
$ADDU $lo0,$alo,$hi0 $ADDU $lo0,$alo,$hi0
$ADDU $lo1,$nlo,$hi1 $ADDU $lo1,$nlo,$hi1
sltu $at,$lo0,$hi0 sltu $at,$lo0,$hi0
sltu $t0,$lo1,$hi1 sltu $t0,$lo1,$hi1
$ADDU $hi0,$ahi,$at $ADDU $hi0,$ahi,$at
$ADDU $hi1,$nhi,$t0 $ADDU $hi1,$nhi,$t0
mflo $alo mflo ($alo,$aj,$bi)
mfhi $ahi mfhi ($ahi,$aj,$bi)
$ADDU $lo1,$lo0 $ADDU $lo1,$lo0
sltu $at,$lo1,$lo0 sltu $at,$lo1,$lo0
$MULTU $nj,$m1 $MULTU ($nj,$m1)
$ADDU $hi1,$at $ADDU $hi1,$at
addu $j,$BNSZ addu $j,$BNSZ
$ST $lo1,($tp) $ST $lo1,($tp)
sltu $t0,$j,$num sltu $t0,$j,$num
mflo $nlo mflo ($nlo,$nj,$m1)
mfhi $nhi mfhi ($nhi,$nj,$m1)
bnez $t0,.L1st bnez $t0,.L1st
$PTR_ADD $tp,$BNSZ $PTR_ADD $tp,$BNSZ
...@@ -263,34 +265,34 @@ $code.=<<___; ...@@ -263,34 +265,34 @@ $code.=<<___;
$PTR_ADD $bi,$bp,$i $PTR_ADD $bi,$bp,$i
$LD $bi,($bi) $LD $bi,($bi)
$LD $aj,($ap) $LD $aj,($ap)
$LD $alo,$BNSZ($ap) $LD $ahi,$BNSZ($ap)
$LD $tj,($sp) $LD $tj,($sp)
$MULTU $aj,$bi $MULTU ($aj,$bi)
$LD $nj,($np) $LD $nj,($np)
$LD $nlo,$BNSZ($np) $LD $nhi,$BNSZ($np)
mflo $lo0 mflo ($lo0,$aj,$bi)
mfhi $hi0 mfhi ($hi0,$aj,$bi)
$ADDU $lo0,$tj $ADDU $lo0,$tj
$MULTU $lo0,$n0 $MULTU ($lo0,$n0)
sltu $at,$lo0,$tj sltu $at,$lo0,$tj
$ADDU $hi0,$at $ADDU $hi0,$at
mflo $m1 mflo ($m1,$lo0,$n0)
$MULTU $alo,$bi $MULTU ($ahi,$bi)
mflo $alo mflo ($alo,$ahi,$bi)
mfhi $ahi mfhi ($ahi,$ahi,$bi)
$MULTU $nj,$m1 $MULTU ($nj,$m1)
mflo $lo1 mflo ($lo1,$nj,$m1)
mfhi $hi1 mfhi ($hi1,$nj,$m1)
$MULTU $nlo,$m1 $MULTU ($nhi,$m1)
$ADDU $lo1,$lo0 $ADDU $lo1,$lo0
sltu $at,$lo1,$lo0 sltu $at,$lo1,$lo0
$ADDU $hi1,$at $ADDU $hi1,$at
mflo $nlo mflo ($nlo,$nhi,$m1)
mfhi $nhi mfhi ($nhi,$nhi,$m1)
move $tp,$sp move $tp,$sp
li $j,2*$BNSZ li $j,2*$BNSZ
...@@ -303,19 +305,19 @@ $code.=<<___; ...@@ -303,19 +305,19 @@ $code.=<<___;
$LD $aj,($aj) $LD $aj,($aj)
$LD $nj,($nj) $LD $nj,($nj)
$MULTU $aj,$bi $MULTU ($aj,$bi)
$ADDU $lo0,$alo,$hi0 $ADDU $lo0,$alo,$hi0
$ADDU $lo1,$nlo,$hi1 $ADDU $lo1,$nlo,$hi1
sltu $at,$lo0,$hi0 sltu $at,$lo0,$hi0
sltu $t0,$lo1,$hi1 sltu $t0,$lo1,$hi1
$ADDU $hi0,$ahi,$at $ADDU $hi0,$ahi,$at
$ADDU $hi1,$nhi,$t0 $ADDU $hi1,$nhi,$t0
mflo $alo mflo ($alo,$aj,$bi)
mfhi $ahi mfhi ($ahi,$aj,$bi)
$ADDU $lo0,$tj $ADDU $lo0,$tj
addu $j,$BNSZ addu $j,$BNSZ
$MULTU $nj,$m1 $MULTU ($nj,$m1)
sltu $at,$lo0,$tj sltu $at,$lo0,$tj
$ADDU $lo1,$lo0 $ADDU $lo1,$lo0
$ADDU $hi0,$at $ADDU $hi0,$at
...@@ -323,8 +325,8 @@ $code.=<<___; ...@@ -323,8 +325,8 @@ $code.=<<___;
$LD $tj,2*$BNSZ($tp) $LD $tj,2*$BNSZ($tp)
$ADDU $hi1,$t0 $ADDU $hi1,$t0
sltu $at,$j,$num sltu $at,$j,$num
mflo $nlo mflo ($nlo,$nj,$m1)
mfhi $nhi mfhi ($nhi,$nj,$m1)
$ST $lo1,($tp) $ST $lo1,($tp)
bnez $at,.Linner bnez $at,.Linner
$PTR_ADD $tp,$BNSZ $PTR_ADD $tp,$BNSZ
......
此差异已折叠。
...@@ -34,8 +34,10 @@ INCLUDE[sparct4-mont.o]=.. ...@@ -34,8 +34,10 @@ INCLUDE[sparct4-mont.o]=..
GENERATE[sparcv9-gf2m.S]=asm/sparcv9-gf2m.pl $(PERLASM_SCHEME) GENERATE[sparcv9-gf2m.S]=asm/sparcv9-gf2m.pl $(PERLASM_SCHEME)
INCLUDE[sparcv9-gf2m.o]=.. INCLUDE[sparcv9-gf2m.o]=..
GENERATE[bn-mips.s]=asm/mips.pl $(PERLASM_SCHEME) GENERATE[bn-mips.S]=asm/mips.pl $(PERLASM_SCHEME)
GENERATE[mips-mont.s]=asm/mips-mont.pl $(PERLASM_SCHEME) INCLUDE[bn-mips.o]=..
GENERATE[mips-mont.S]=asm/mips-mont.pl $(PERLASM_SCHEME)
INCLUDE[mips-mont.o]=..
GENERATE[s390x-mont.S]=asm/s390x-mont.pl $(PERLASM_SCHEME) GENERATE[s390x-mont.S]=asm/s390x-mont.pl $(PERLASM_SCHEME)
GENERATE[s390x-gf2m.s]=asm/s390x-gf2m.pl $(PERLASM_SCHEME) GENERATE[s390x-gf2m.s]=asm/s390x-gf2m.pl $(PERLASM_SCHEME)
...@@ -64,16 +66,8 @@ GENERATE[armv4-gf2m.S]=asm/armv4-gf2m.pl $(PERLASM_SCHEME) ...@@ -64,16 +66,8 @@ GENERATE[armv4-gf2m.S]=asm/armv4-gf2m.pl $(PERLASM_SCHEME)
INCLUDE[armv4-gf2m.o]=.. INCLUDE[armv4-gf2m.o]=..
GENERATE[armv8-mont.S]=asm/armv8-mont.pl $(PERLASM_SCHEME) GENERATE[armv8-mont.S]=asm/armv8-mont.pl $(PERLASM_SCHEME)
OVERRIDES=bn-mips3.o pa-risc2W.o pa-risc2.c OVERRIDES=pa-risc2W.o pa-risc2.c
BEGINRAW[Makefile] BEGINRAW[Makefile]
##### BN assembler implementations
{- $builddir -}/bn-mips3.o: {- $sourcedir -}/asm/mips3.s
@if [ "$(CC)" = "gcc" ]; then \
ABI=`expr "$(CFLAGS)" : ".*-mabi=\([n3264]*\)"` && \
as -$$ABI -O -o $@ {- $sourcedir -}/asm/mips3.s; \
else $(CC) -c $(CFLAGS) $(LIB_CFLAGS) -o $@ {- $sourcedir -}/asm/mips3.s; fi
# GNU assembler fails to compile PA-RISC2 modules, insist on calling # GNU assembler fails to compile PA-RISC2 modules, insist on calling
# vendor assembler... # vendor assembler...
{- $builddir -}/pa-risc2W.o: {- $sourcedir -}/asm/pa-risc2W.s {- $builddir -}/pa-risc2W.o: {- $sourcedir -}/asm/pa-risc2W.s
......
/*
* Copyright 2011-2016 The OpenSSL Project Authors. All Rights Reserved.
*
* Licensed under the OpenSSL license (the "License"). You may not use
* this file except in compliance with the License. You can obtain a copy
* in the file LICENSE in the source distribution or at
* https://www.openssl.org/source/license.html
*/
#ifndef __MIPS_ARCH_H__
# define __MIPS_ARCH_H__
# if (defined(__mips_smartmips) || defined(_MIPS_ARCH_MIPS32R3) || \
defined(_MIPS_ARCH_MIPS32R5) || defined(_MIPS_ARCH_MIPS32R6))
&& !defined(_MIPS_ARCH_MIPS32R2)
# define _MIPS_ARCH_MIPS32R2
# endif
# if (defined(_MIPS_ARCH_MIPS64R3) || defined(_MIPS_ARCH_MIPS64R5) || \
defined(_MIPS_ARCH_MIPS64R6)) \
&& !defined(_MIPS_ARCH_MIPS64R2)
# define _MIPS_ARCH_MIPS64R2
# endif
# if defined(_MIPS_ARCH_MIPS64R6)
# define dmultu(rs,rt)
# define mflo(rd,rs,rt) dmulu rd,rs,rt
# define mfhi(rd,rs,rt) dmuhu rd,rs,rt
# elif defined(_MIPS_ARCH_MIPS32R6)
# define multu(rs,rt)
# define mflo(rd,rs,rt) mulu rd,rs,rt
# define mfhi(rd,rs,rt) muhu rd,rs,rt
# else
# define dmultu(rs,rt) dmultu rs,rt
# define multu(rs,rt) multu rs,rt
# define mflo(rd,rs,rt) mflo rd
# define mfhi(rd,rs,rt) mfhi rd
# endif
#endif
...@@ -67,6 +67,8 @@ $SAVED_REGS_MASK = ($flavour =~ /nubi/i) ? "0x0003f000" : "0x00030000"; ...@@ -67,6 +67,8 @@ $SAVED_REGS_MASK = ($flavour =~ /nubi/i) ? "0x0003f000" : "0x00030000";
($in0,$in1,$tmp0,$tmp1,$tmp2,$tmp3,$tmp4) = ($a4,$a5,$a6,$a7,$at,$t0,$t1); ($in0,$in1,$tmp0,$tmp1,$tmp2,$tmp3,$tmp4) = ($a4,$a5,$a6,$a7,$at,$t0,$t1);
$code.=<<___; $code.=<<___;
#include "mips_arch.h"
#ifdef MIPSEB #ifdef MIPSEB
# define MSB 0 # define MSB 0
# define LSB 7 # define LSB 7
...@@ -92,10 +94,15 @@ poly1305_init: ...@@ -92,10 +94,15 @@ poly1305_init:
beqz $inp,.Lno_key beqz $inp,.Lno_key
#if defined(_MIPS_ARCH_MIPS64R6)
ld $in0,0($inp)
ld $in1,8($inp)
#else
ldl $in0,0+MSB($inp) ldl $in0,0+MSB($inp)
ldl $in1,8+MSB($inp) ldl $in1,8+MSB($inp)
ldr $in0,0+LSB($inp) ldr $in0,0+LSB($inp)
ldr $in1,8+LSB($inp) ldr $in1,8+LSB($inp)
#endif
#ifdef MIPSEB #ifdef MIPSEB
# if defined(_MIPS_ARCH_MIPS64R2) # if defined(_MIPS_ARCH_MIPS64R2)
dsbh $in0,$in0 # byte swap dsbh $in0,$in0 # byte swap
...@@ -182,7 +189,7 @@ poly1305_blocks_internal: ...@@ -182,7 +189,7 @@ poly1305_blocks_internal:
.frame $sp,6*8,$ra .frame $sp,6*8,$ra
.mask $SAVED_REGS_MASK,-8 .mask $SAVED_REGS_MASK,-8
.set noreorder .set noreorder
dsub $sp,6*8 dsubu $sp,6*8
sd $s5,40($sp) sd $s5,40($sp)
sd $s4,32($sp) sd $s4,32($sp)
___ ___
...@@ -204,11 +211,16 @@ $code.=<<___; ...@@ -204,11 +211,16 @@ $code.=<<___;
ld $s1,40($ctx) ld $s1,40($ctx)
.Loop: .Loop:
#if defined(_MIPS_ARCH_MIPS64R6)
ld $in0,0($inp) # load input
ld $in1,8($inp)
#else
ldl $in0,0+MSB($inp) # load input ldl $in0,0+MSB($inp) # load input
ldl $in1,8+MSB($inp) ldl $in1,8+MSB($inp)
ldr $in0,0+LSB($inp) ldr $in0,0+LSB($inp)
daddiu $len,-1
ldr $in1,8+LSB($inp) ldr $in1,8+LSB($inp)
#endif
daddiu $len,-1
daddiu $inp,16 daddiu $inp,16
#ifdef MIPSEB #ifdef MIPSEB
# if defined(_MIPS_ARCH_MIPS64R2) # if defined(_MIPS_ARCH_MIPS64R2)
...@@ -258,42 +270,42 @@ $code.=<<___; ...@@ -258,42 +270,42 @@ $code.=<<___;
sltu $tmp1,$h1,$in1 sltu $tmp1,$h1,$in1
daddu $h1,$tmp0 daddu $h1,$tmp0
dmultu $r0,$h0 # h0*r0 dmultu ($r0,$h0) # h0*r0
daddu $h2,$padbit daddu $h2,$padbit
sltu $tmp0,$h1,$tmp0 sltu $tmp0,$h1,$tmp0
mflo $d0 mflo ($d0,$r0,$h0)
mfhi $d1 mfhi ($d1,$r0,$h0)
dmultu $s1,$h1 # h1*5*r1 dmultu ($s1,$h1) # h1*5*r1
daddu $tmp0,$tmp1 daddu $tmp0,$tmp1
daddu $h2,$tmp0 daddu $h2,$tmp0
mflo $tmp0 mflo ($tmp0,$s1,$h1)
mfhi $tmp1 mfhi ($tmp1,$s1,$h1)
dmultu $r1,$h0 # h0*r1 dmultu ($r1,$h0) # h0*r1
daddu $d0,$tmp0 daddu $d0,$tmp0
daddu $d1,$tmp1 daddu $d1,$tmp1
mflo $tmp2 mflo ($tmp2,$r1,$h0)
mfhi $d2 mfhi ($d2,$r1,$h0)
sltu $tmp0,$d0,$tmp0 sltu $tmp0,$d0,$tmp0
daddu $d1,$tmp0 daddu $d1,$tmp0
dmultu $r0,$h1 # h1*r0 dmultu ($r0,$h1) # h1*r0
daddu $d1,$tmp2 daddu $d1,$tmp2
sltu $tmp2,$d1,$tmp2 sltu $tmp2,$d1,$tmp2
mflo $tmp0 mflo ($tmp0,$r0,$h1)
mfhi $tmp1 mfhi ($tmp1,$r0,$h1)
daddu $d2,$tmp2 daddu $d2,$tmp2
dmultu $s1,$h2 # h2*5*r1 dmultu ($s1,$h2) # h2*5*r1
daddu $d1,$tmp0 daddu $d1,$tmp0
daddu $d2,$tmp1 daddu $d2,$tmp1
mflo $tmp2 mflo ($tmp2,$s1,$h2)
dmultu $r0,$h2 # h2*r0 dmultu ($r0,$h2) # h2*r0
sltu $tmp0,$d1,$tmp0 sltu $tmp0,$d1,$tmp0
daddu $d2,$tmp0 daddu $d2,$tmp0
mflo $tmp3 mflo ($tmp3,$r0,$h2)
daddu $d1,$tmp2 daddu $d1,$tmp2
daddu $d2,$tmp3 daddu $d2,$tmp3
...@@ -329,7 +341,7 @@ $code.=<<___ if ($flavour =~ /nubi/i); # optimize non-nubi epilogue ...@@ -329,7 +341,7 @@ $code.=<<___ if ($flavour =~ /nubi/i); # optimize non-nubi epilogue
___ ___
$code.=<<___; $code.=<<___;
jr $ra jr $ra
dadd $sp,6*8 daddu $sp,6*8
.end poly1305_blocks_internal .end poly1305_blocks_internal
___ ___
} }
......
...@@ -13,6 +13,7 @@ INCLUDE[poly1305-armv4.o]=.. ...@@ -13,6 +13,7 @@ INCLUDE[poly1305-armv4.o]=..
GENERATE[poly1305-armv8.S]=asm/poly1305-armv8.pl $(PERLASM_SCHEME) GENERATE[poly1305-armv8.S]=asm/poly1305-armv8.pl $(PERLASM_SCHEME)
INCLUDE[poly1305-armv8.o]=.. INCLUDE[poly1305-armv8.o]=..
GENERATE[poly1305-mips.S]=asm/poly1305-mips.pl $(PERLASM_SCHEME) GENERATE[poly1305-mips.S]=asm/poly1305-mips.pl $(PERLASM_SCHEME)
INCLUDE[poly1305-mips.o]=..
BEGINRAW[Makefile(unix)] BEGINRAW[Makefile(unix)]
{- $builddir -}/poly1305-%.S: {- $sourcedir -}/asm/poly1305-%.pl {- $builddir -}/poly1305-%.S: {- $sourcedir -}/asm/poly1305-%.pl
......
...@@ -56,15 +56,15 @@ ...@@ -56,15 +56,15 @@
$flavour = shift || "o32"; # supported flavours are o32,n32,64,nubi32,nubi64 $flavour = shift || "o32"; # supported flavours are o32,n32,64,nubi32,nubi64
if ($flavour =~ /64|n32/i) { if ($flavour =~ /64|n32/i) {
$PTR_ADD="dadd"; # incidentally works even on n32 $PTR_ADD="daddu"; # incidentally works even on n32
$PTR_SUB="dsub"; # incidentally works even on n32 $PTR_SUB="dsubu"; # incidentally works even on n32
$REG_S="sd"; $REG_S="sd";
$REG_L="ld"; $REG_L="ld";
$PTR_SLL="dsll"; # incidentally works even on n32 $PTR_SLL="dsll"; # incidentally works even on n32
$SZREG=8; $SZREG=8;
} else { } else {
$PTR_ADD="add"; $PTR_ADD="addu";
$PTR_SUB="sub"; $PTR_SUB="subu";
$REG_S="sw"; $REG_S="sw";
$REG_L="lw"; $REG_L="lw";
$PTR_SLL="sll"; $PTR_SLL="sll";
...@@ -126,10 +126,14 @@ $code.=<<___; ...@@ -126,10 +126,14 @@ $code.=<<___;
addu $e,$K # $i addu $e,$K # $i
xor $t0,$c,$d xor $t0,$c,$d
rotr $t1,$a,27 rotr $t1,$a,27
lwl @X[$j],$j*4+$MSB($inp)
and $t0,$b and $t0,$b
addu $e,$t1 addu $e,$t1
#if defined(_MIPS_ARCH_MIPS32R6) || defined(_MIPS_ARCH_MIPS64R6)
lw @X[$j],$j*4($inp)
#else
lwl @X[$j],$j*4+$MSB($inp)
lwr @X[$j],$j*4+$LSB($inp) lwr @X[$j],$j*4+$LSB($inp)
#endif
xor $t0,$d xor $t0,$d
addu $e,@X[$i] addu $e,@X[$i]
rotr $b,$b,2 rotr $b,$b,2
...@@ -336,14 +340,12 @@ $FRAMESIZE=16; # large enough to accommodate NUBI saved registers ...@@ -336,14 +340,12 @@ $FRAMESIZE=16; # large enough to accommodate NUBI saved registers
$SAVED_REGS_MASK = ($flavour =~ /nubi/i) ? "0xc0fff008" : "0xc0ff0000"; $SAVED_REGS_MASK = ($flavour =~ /nubi/i) ? "0xc0fff008" : "0xc0ff0000";
$code=<<___; $code=<<___;
#include "mips_arch.h"
#ifdef OPENSSL_FIPSCANISTER #ifdef OPENSSL_FIPSCANISTER
# include <openssl/fipssyms.h> # include <openssl/fipssyms.h>
#endif #endif
#if defined(__mips_smartmips) && !defined(_MIPS_ARCH_MIPS32R2)
#define _MIPS_ARCH_MIPS32R2
#endif
.text .text
.set noat .set noat
...@@ -387,10 +389,16 @@ $code.=<<___; ...@@ -387,10 +389,16 @@ $code.=<<___;
.align 4 .align 4
.Loop: .Loop:
.set reorder .set reorder
#if defined(_MIPS_ARCH_MIPS32R6) || defined(_MIPS_ARCH_MIPS64R6)
lui $K,0x5a82
lw @X[0],($inp)
ori $K,0x7999 # K_00_19
#else
lwl @X[0],$MSB($inp) lwl @X[0],$MSB($inp)
lui $K,0x5a82 lui $K,0x5a82
lwr @X[0],$LSB($inp) lwr @X[0],$LSB($inp)
ori $K,0x7999 # K_00_19 ori $K,0x7999 # K_00_19
#endif
___ ___
for ($i=0;$i<15;$i++) { &BODY_00_14($i,@V); unshift(@V,pop(@V)); } for ($i=0;$i<15;$i++) { &BODY_00_14($i,@V); unshift(@V,pop(@V)); }
for (;$i<20;$i++) { &BODY_15_19($i,@V); unshift(@V,pop(@V)); } for (;$i<20;$i++) { &BODY_15_19($i,@V); unshift(@V,pop(@V)); }
......
...@@ -60,16 +60,16 @@ $flavour = shift || "o32"; # supported flavours are o32,n32,64,nubi32,nubi64 ...@@ -60,16 +60,16 @@ $flavour = shift || "o32"; # supported flavours are o32,n32,64,nubi32,nubi64
if ($flavour =~ /64|n32/i) { if ($flavour =~ /64|n32/i) {
$PTR_LA="dla"; $PTR_LA="dla";
$PTR_ADD="dadd"; # incidentally works even on n32 $PTR_ADD="daddu"; # incidentally works even on n32
$PTR_SUB="dsub"; # incidentally works even on n32 $PTR_SUB="dsubu"; # incidentally works even on n32
$REG_S="sd"; $REG_S="sd";
$REG_L="ld"; $REG_L="ld";
$PTR_SLL="dsll"; # incidentally works even on n32 $PTR_SLL="dsll"; # incidentally works even on n32
$SZREG=8; $SZREG=8;
} else { } else {
$PTR_LA="la"; $PTR_LA="la";
$PTR_ADD="add"; $PTR_ADD="addu";
$PTR_SUB="sub"; $PTR_SUB="subu";
$REG_S="sw"; $REG_S="sw";
$REG_L="lw"; $REG_L="lw";
$PTR_SLL="sll"; $PTR_SLL="sll";
...@@ -135,8 +135,12 @@ my ($i,$a,$b,$c,$d,$e,$f,$g,$h)=@_; ...@@ -135,8 +135,12 @@ my ($i,$a,$b,$c,$d,$e,$f,$g,$h)=@_;
my ($T1,$tmp0,$tmp1,$tmp2)=(@X[4],@X[5],@X[6],@X[7]); my ($T1,$tmp0,$tmp1,$tmp2)=(@X[4],@X[5],@X[6],@X[7]);
$code.=<<___ if ($i<15); $code.=<<___ if ($i<15);
#if defined(_MIPS_ARCH_MIPS32R6) || defined(_MIPS_ARCH_MIPS64R6)
${LD} @X[1],`($i+1)*$SZ`($inp)
#else
${LD}l @X[1],`($i+1)*$SZ+$MSB`($inp) ${LD}l @X[1],`($i+1)*$SZ+$MSB`($inp)
${LD}r @X[1],`($i+1)*$SZ+$LSB`($inp) ${LD}r @X[1],`($i+1)*$SZ+$LSB`($inp)
#endif
___ ___
$code.=<<___ if (!$big_endian && $i<16 && $SZ==4); $code.=<<___ if (!$big_endian && $i<16 && $SZ==4);
#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2) #if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
...@@ -298,14 +302,12 @@ $FRAMESIZE=16*$SZ+16*$SZREG; ...@@ -298,14 +302,12 @@ $FRAMESIZE=16*$SZ+16*$SZREG;
$SAVED_REGS_MASK = ($flavour =~ /nubi/i) ? "0xc0fff008" : "0xc0ff0000"; $SAVED_REGS_MASK = ($flavour =~ /nubi/i) ? "0xc0fff008" : "0xc0ff0000";
$code.=<<___; $code.=<<___;
#include "mips_arch.h"
#ifdef OPENSSL_FIPSCANISTER #ifdef OPENSSL_FIPSCANISTER
# include <openssl/fipssyms.h> # include <openssl/fipssyms.h>
#endif #endif
#if defined(__mips_smartmips) && !defined(_MIPS_ARCH_MIPS32R2)
#define _MIPS_ARCH_MIPS32R2
#endif
.text .text
.set noat .set noat
#if !defined(__mips_eabi) && (!defined(__vxworks) || defined(__pic__)) #if !defined(__mips_eabi) && (!defined(__vxworks) || defined(__pic__))
...@@ -369,8 +371,12 @@ $code.=<<___; ...@@ -369,8 +371,12 @@ $code.=<<___;
.align 5 .align 5
.Loop: .Loop:
#if defined(_MIPS_ARCH_MIPS32R6) || defined(_MIPS_ARCH_MIPS64R6)
${LD} @X[0],($inp)
#else
${LD}l @X[0],$MSB($inp) ${LD}l @X[0],$MSB($inp)
${LD}r @X[0],$LSB($inp) ${LD}r @X[0],$LSB($inp)
#endif
___ ___
for ($i=0;$i<16;$i++) for ($i=0;$i<16;$i++)
{ &BODY_00_15($i,@V); unshift(@V,pop(@V)); push(@X,shift(@X)); } { &BODY_00_15($i,@V); unshift(@V,pop(@V)); push(@X,shift(@X)); }
......
...@@ -39,8 +39,11 @@ GENERATE[sha256-parisc.s]=asm/sha512-parisc.pl $(PERLASM_SCHEME) ...@@ -39,8 +39,11 @@ GENERATE[sha256-parisc.s]=asm/sha512-parisc.pl $(PERLASM_SCHEME)
GENERATE[sha512-parisc.s]=asm/sha512-parisc.pl $(PERLASM_SCHEME) GENERATE[sha512-parisc.s]=asm/sha512-parisc.pl $(PERLASM_SCHEME)
GENERATE[sha1-mips.S]=asm/sha1-mips.pl $(PERLASM_SCHEME) GENERATE[sha1-mips.S]=asm/sha1-mips.pl $(PERLASM_SCHEME)
INCLUDE[sha1-mips.o]=..
GENERATE[sha256-mips.S]=asm/sha512-mips.pl $(PERLASM_SCHEME) GENERATE[sha256-mips.S]=asm/sha512-mips.pl $(PERLASM_SCHEME)
INCLUDE[sha256-mips.o]=..
GENERATE[sha512-mips.S]=asm/sha512-mips.pl $(PERLASM_SCHEME) GENERATE[sha512-mips.S]=asm/sha512-mips.pl $(PERLASM_SCHEME)
INCLUDE[sha512-mips.o]=..
GENERATE[sha1-armv4-large.S]=asm/sha1-armv4-large.pl $(PERLASM_SCHEME) GENERATE[sha1-armv4-large.S]=asm/sha1-armv4-large.pl $(PERLASM_SCHEME)
INCLUDE[sha1-armv4-large.o]=.. INCLUDE[sha1-armv4-large.o]=..
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册