提交 31439046 编写于 作者: A Andy Polyakov

bn/asm/ppc.pl to use ppc-xlate.pl.

上级 11d0ebc8
......@@ -151,91 +151,15 @@ if ($opf =~ /32\.s/) {
$TR= "td"; # conditional trap
} else { die "nonsense $opf"; }
( defined shift || open STDOUT,">$opf" ) || die "can't open $opf: $!";
$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or
( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or
die "can't locate ppc-xlate.pl";
# function entry points from the AIX code
#
# There are other, more elegant, ways to handle this. We (IBM) chose
# this approach as it plays well with scripts we run to 'namespace'
# OpenSSL .i.e. we add a prefix to all the public symbols so we can
# co-exist in the same process with other implementations of OpenSSL.
# 'cleverer' ways of doing these substitutions tend to hide data we
# need to be obvious.
#
my @items = ("bn_sqr_comba4",
"bn_sqr_comba8",
"bn_mul_comba4",
"bn_mul_comba8",
"bn_sub_words",
"bn_add_words",
"bn_div_words",
"bn_sqr_words",
"bn_mul_words",
"bn_mul_add_words");
( defined shift || open STDOUT,"| $^X $xlate $opf" ) ||
die "can't call $xlate: $!";
if ($opf =~ /linux/) { do_linux(); }
elsif ($opf =~ /aix/) { do_aix(); }
elsif ($opf =~ /osx/) { do_osx(); }
else { do_bsd(); }
sub do_linux {
$d=&data();
if ($BITS==64) {
foreach $t (@items) {
$d =~ s/\.$t:/\
\t.section\t".opd","aw"\
\t.align\t3\
\t.globl\t$t\
$t:\
\t.quad\t.$t,.TOC.\@tocbase,0\
\t.size\t$t,24\
\t.previous\n\
\t.type\t.$t,\@function\
\t.globl\t.$t\
.$t:/g;
}
}
else {
foreach $t (@items) {
$d=~s/\.$t/$t/g;
}
}
# hide internal labels to avoid pollution of name table...
$d=~s/Lppcasm_/.Lppcasm_/gm;
print $d;
}
sub do_aix {
# AIX assembler is smart enough to please the linker without
# making us do something special...
print &data();
}
# MacOSX 32 bit
sub do_osx {
$d=&data();
# Change the bn symbol prefix from '.' to '_'
foreach $t (@items) {
$d=~s/\.$t/_$t/g;
}
# Change .machine to something OS X asm will accept
$d=~s/\.machine.*/.text/g;
$d=~s/\#/;/g; # change comment from '#' to ';'
print $d;
}
# BSD (Untested)
sub do_bsd {
$d=&data();
foreach $t (@items) {
$d=~s/\.$t/_$t/g;
}
print $d;
}
sub data {
local($data)=<<EOF;
$data=<<EOF;
#--------------------------------------------------------------------
#
#
......@@ -297,33 +221,20 @@ sub data {
#
# Defines to be used in the assembly code.
#
.set r0,0 # we use it as storage for value of 0
.set SP,1 # preserved
.set RTOC,2 # preserved
.set r3,3 # 1st argument/return value
.set r4,4 # 2nd argument/volatile register
.set r5,5 # 3rd argument/volatile register
.set r6,6 # ...
.set r7,7
.set r8,8
.set r9,9
.set r10,10
.set r11,11
.set r12,12
.set r13,13 # not used, nor any other "below" it...
.set BO_IF_NOT,4
.set BO_IF,12
.set BO_dCTR_NZERO,16
.set BO_dCTR_ZERO,18
.set BO_ALWAYS,20
.set CR0_LT,0;
.set CR0_GT,1;
.set CR0_EQ,2
.set CR1_FX,4;
.set CR1_FEX,5;
.set CR1_VX,6
.set LR,8
#.set r0,0 # we use it as storage for value of 0
#.set SP,1 # preserved
#.set RTOC,2 # preserved
#.set r3,3 # 1st argument/return value
#.set r4,4 # 2nd argument/volatile register
#.set r5,5 # 3rd argument/volatile register
#.set r6,6 # ...
#.set r7,7
#.set r8,8
#.set r9,9
#.set r10,10
#.set r11,11
#.set r12,12
#.set r13,13 # not used, nor any other "below" it...
# Declare function names to be global
# NOTE: For gcc these names MUST be changed to remove
......@@ -478,7 +389,7 @@ sub data {
$ST r9,`6*$BNSZ`(r3) #r[6]=c1
$ST r10,`7*$BNSZ`(r3) #r[7]=c2
bclr BO_ALWAYS,CR0_LT
blr
.long 0x00000000
#
......@@ -903,7 +814,7 @@ sub data {
$ST r9, `15*$BNSZ`(r3) #r[15]=c1;
bclr BO_ALWAYS,CR0_LT
blr
.long 0x00000000
......@@ -1055,7 +966,7 @@ sub data {
$ST r10,`6*$BNSZ`(r3) #r[6]=c1
$ST r11,`7*$BNSZ`(r3) #r[7]=c2
bclr BO_ALWAYS,CR0_LT
blr
.long 0x00000000
#
......@@ -1591,7 +1502,7 @@ sub data {
adde r10,r10,r9
$ST r12,`14*$BNSZ`(r3) #r[14]=c3;
$ST r10,`15*$BNSZ`(r3) #r[15]=c1;
bclr BO_ALWAYS,CR0_LT
blr
.long 0x00000000
#
......@@ -1623,7 +1534,7 @@ sub data {
subfc. r7,r0,r6 # If r6 is 0 then result is 0.
# if r6 > 0 then result !=0
# In either case carry bit is set.
bc BO_IF,CR0_EQ,Lppcasm_sub_adios
beq Lppcasm_sub_adios
addi r4,r4,-$BNSZ
addi r3,r3,-$BNSZ
addi r5,r5,-$BNSZ
......@@ -1635,11 +1546,11 @@ Lppcasm_sub_mainloop:
# if carry = 1 this is r7-r8. Else it
# is r7-r8 -1 as we need.
$STU r6,$BNSZ(r3)
bc BO_dCTR_NZERO,CR0_EQ,Lppcasm_sub_mainloop
bdnz- Lppcasm_sub_mainloop
Lppcasm_sub_adios:
subfze r3,r0 # if carry bit is set then r3 = 0 else -1
andi. r3,r3,1 # keep only last bit.
bclr BO_ALWAYS,CR0_LT
blr
.long 0x00000000
......@@ -1670,7 +1581,7 @@ Lppcasm_sub_adios:
# check for r6 = 0. Is this needed?
#
addic. r6,r6,0 #test r6 and clear carry bit.
bc BO_IF,CR0_EQ,Lppcasm_add_adios
beq Lppcasm_add_adios
addi r4,r4,-$BNSZ
addi r3,r3,-$BNSZ
addi r5,r5,-$BNSZ
......@@ -1680,10 +1591,10 @@ Lppcasm_add_mainloop:
$LDU r8,$BNSZ(r5)
adde r8,r7,r8
$STU r8,$BNSZ(r3)
bc BO_dCTR_NZERO,CR0_EQ,Lppcasm_add_mainloop
bdnz- Lppcasm_add_mainloop
Lppcasm_add_adios:
addze r3,r0 #return carry bit.
bclr BO_ALWAYS,CR0_LT
blr
.long 0x00000000
#
......@@ -1707,24 +1618,24 @@ Lppcasm_add_adios:
# r5 = d
$UCMPI 0,r5,0 # compare r5 and 0
bc BO_IF_NOT,CR0_EQ,Lppcasm_div1 # proceed if d!=0
bne Lppcasm_div1 # proceed if d!=0
li r3,-1 # d=0 return -1
bclr BO_ALWAYS,CR0_LT
blr
Lppcasm_div1:
xor r0,r0,r0 #r0=0
li r8,$BITS
$CNTLZ. r7,r5 #r7 = num leading 0s in d.
bc BO_IF,CR0_EQ,Lppcasm_div2 #proceed if no leading zeros
beq Lppcasm_div2 #proceed if no leading zeros
subf r8,r7,r8 #r8 = BN_num_bits_word(d)
$SHR. r9,r3,r8 #are there any bits above r8'th?
$TR 16,r9,r0 #if there're, signal to dump core...
Lppcasm_div2:
$UCMP 0,r3,r5 #h>=d?
bc BO_IF,CR0_LT,Lppcasm_div3 #goto Lppcasm_div3 if not
blt Lppcasm_div3 #goto Lppcasm_div3 if not
subf r3,r5,r3 #h-=d ;
Lppcasm_div3: #r7 = BN_BITS2-i. so r7=i
cmpi 0,0,r7,0 # is (i == 0)?
bc BO_IF,CR0_EQ,Lppcasm_div4
beq Lppcasm_div4
$SHL r3,r3,r7 # h = (h<< i)
$SHR r8,r4,r8 # r8 = (l >> BN_BITS2 -i)
$SHL r5,r5,r7 # d<<=i
......@@ -1741,7 +1652,7 @@ Lppcasm_divouterloop:
$SHRI r11,r4,`$BITS/2` #r11= (l&BN_MASK2h)>>BN_BITS4
# compute here for innerloop.
$UCMP 0,r8,r9 # is (h>>BN_BITS4)==dh
bc BO_IF_NOT,CR0_EQ,Lppcasm_div5 # goto Lppcasm_div5 if not
bne Lppcasm_div5 # goto Lppcasm_div5 if not
li r8,-1
$CLRU r8,r8,`$BITS/2` #q = BN_MASK2l
......@@ -1762,9 +1673,9 @@ Lppcasm_divinnerloop:
# the following 2 instructions do that
$SHLI r7,r10,`$BITS/2` # r7 = (t<<BN_BITS4)
or r7,r7,r11 # r7|=((l&BN_MASK2h)>>BN_BITS4)
$UCMP 1,r6,r7 # compare (tl <= r7)
bc BO_IF_NOT,CR0_EQ,Lppcasm_divinnerexit
bc BO_IF_NOT,CR1_FEX,Lppcasm_divinnerexit
$UCMP cr1,r6,r7 # compare (tl <= r7)
bne Lppcasm_divinnerexit
ble cr1,Lppcasm_divinnerexit
addi r8,r8,-1 #q--
subf r12,r9,r12 #th -=dh
$CLRU r10,r5,`$BITS/2` #r10=dl. t is no longer needed in loop.
......@@ -1773,14 +1684,14 @@ Lppcasm_divinnerloop:
Lppcasm_divinnerexit:
$SHRI r10,r6,`$BITS/2` #t=(tl>>BN_BITS4)
$SHLI r11,r6,`$BITS/2` #tl=(tl<<BN_BITS4)&BN_MASK2h;
$UCMP 1,r4,r11 # compare l and tl
$UCMP cr1,r4,r11 # compare l and tl
add r12,r12,r10 # th+=t
bc BO_IF_NOT,CR1_FX,Lppcasm_div7 # if (l>=tl) goto Lppcasm_div7
bge cr1,Lppcasm_div7 # if (l>=tl) goto Lppcasm_div7
addi r12,r12,1 # th++
Lppcasm_div7:
subf r11,r11,r4 #r11=l-tl
$UCMP 1,r3,r12 #compare h and th
bc BO_IF_NOT,CR1_FX,Lppcasm_div8 #if (h>=th) goto Lppcasm_div8
$UCMP cr1,r3,r12 #compare h and th
bge cr1,Lppcasm_div8 #if (h>=th) goto Lppcasm_div8
addi r8,r8,-1 # q--
add r3,r5,r3 # h+=d
Lppcasm_div8:
......@@ -1791,12 +1702,12 @@ Lppcasm_div8:
# the following 2 instructions will do this.
$INSR r11,r12,`$BITS/2`,`$BITS/2` # r11 is the value we want rotated $BITS/2.
$ROTL r3,r11,`$BITS/2` # rotate by $BITS/2 and store in r3
bc BO_dCTR_ZERO,CR0_EQ,Lppcasm_div9#if (count==0) break ;
bdz Lppcasm_div9 #if (count==0) break ;
$SHLI r0,r8,`$BITS/2` #ret =q<<BN_BITS4
b Lppcasm_divouterloop
Lppcasm_div9:
or r3,r8,r0
bclr BO_ALWAYS,CR0_LT
blr
.long 0x00000000
#
......@@ -1822,7 +1733,7 @@ Lppcasm_div9:
# No unrolling done here. Not performance critical.
addic. r5,r5,0 #test r5.
bc BO_IF,CR0_EQ,Lppcasm_sqr_adios
beq Lppcasm_sqr_adios
addi r4,r4,-$BNSZ
addi r3,r3,-$BNSZ
mtctr r5
......@@ -1833,9 +1744,9 @@ Lppcasm_sqr_mainloop:
$UMULH r8,r6,r6
$STU r7,$BNSZ(r3)
$STU r8,$BNSZ(r3)
bc BO_dCTR_NZERO,CR0_EQ,Lppcasm_sqr_mainloop
bdnz- Lppcasm_sqr_mainloop
Lppcasm_sqr_adios:
bclr BO_ALWAYS,CR0_LT
blr
.long 0x00000000
......@@ -1858,7 +1769,7 @@ Lppcasm_sqr_adios:
xor r0,r0,r0
xor r12,r12,r12 # used for carry
rlwinm. r7,r5,30,2,31 # num >> 2
bc BO_IF,CR0_EQ,Lppcasm_mw_REM
beq Lppcasm_mw_REM
mtctr r7
Lppcasm_mw_LOOP:
#mul(rp[0],ap[0],w,c1);
......@@ -1896,11 +1807,11 @@ Lppcasm_mw_LOOP:
addi r3,r3,`4*$BNSZ`
addi r4,r4,`4*$BNSZ`
bc BO_dCTR_NZERO,CR0_EQ,Lppcasm_mw_LOOP
bdnz- Lppcasm_mw_LOOP
Lppcasm_mw_REM:
andi. r5,r5,0x3
bc BO_IF,CR0_EQ,Lppcasm_mw_OVER
beq Lppcasm_mw_OVER
#mul(rp[0],ap[0],w,c1);
$LD r8,`0*$BNSZ`(r4)
$UMULL r9,r6,r8
......@@ -1912,7 +1823,7 @@ Lppcasm_mw_REM:
addi r5,r5,-1
cmpli 0,0,r5,0
bc BO_IF,CR0_EQ,Lppcasm_mw_OVER
beq Lppcasm_mw_OVER
#mul(rp[1],ap[1],w,c1);
......@@ -1926,7 +1837,7 @@ Lppcasm_mw_REM:
addi r5,r5,-1
cmpli 0,0,r5,0
bc BO_IF,CR0_EQ,Lppcasm_mw_OVER
beq Lppcasm_mw_OVER
#mul_add(rp[2],ap[2],w,c1);
$LD r8,`2*$BNSZ`(r4)
......@@ -1939,7 +1850,7 @@ Lppcasm_mw_REM:
Lppcasm_mw_OVER:
addi r3,r12,0
bclr BO_ALWAYS,CR0_LT
blr
.long 0x00000000
#
......@@ -1964,7 +1875,7 @@ Lppcasm_mw_OVER:
xor r0,r0,r0 #r0 = 0
xor r12,r12,r12 #r12 = 0 . used for carry
rlwinm. r7,r5,30,2,31 # num >> 2
bc BO_IF,CR0_EQ,Lppcasm_maw_leftover # if (num < 4) go LPPCASM_maw_leftover
beq Lppcasm_maw_leftover # if (num < 4) go LPPCASM_maw_leftover
mtctr r7
Lppcasm_maw_mainloop:
#mul_add(rp[0],ap[0],w,c1);
......@@ -2017,11 +1928,11 @@ Lppcasm_maw_mainloop:
$ST r11,`3*$BNSZ`(r3)
addi r3,r3,`4*$BNSZ`
addi r4,r4,`4*$BNSZ`
bc BO_dCTR_NZERO,CR0_EQ,Lppcasm_maw_mainloop
bdnz- Lppcasm_maw_mainloop
Lppcasm_maw_leftover:
andi. r5,r5,0x3
bc BO_IF,CR0_EQ,Lppcasm_maw_adios
beq Lppcasm_maw_adios
addi r3,r3,-$BNSZ
addi r4,r4,-$BNSZ
#mul_add(rp[0],ap[0],w,c1);
......@@ -2036,7 +1947,7 @@ Lppcasm_maw_leftover:
addze r12,r10
$ST r9,0(r3)
bc BO_dCTR_ZERO,CR0_EQ,Lppcasm_maw_adios
bdz Lppcasm_maw_adios
#mul_add(rp[1],ap[1],w,c1);
$LDU r8,$BNSZ(r4)
$UMULL r9,r6,r8
......@@ -2048,7 +1959,7 @@ Lppcasm_maw_leftover:
addze r12,r10
$ST r9,0(r3)
bc BO_dCTR_ZERO,CR0_EQ,Lppcasm_maw_adios
bdz Lppcasm_maw_adios
#mul_add(rp[2],ap[2],w,c1);
$LDU r8,$BNSZ(r4)
$UMULL r9,r6,r8
......@@ -2062,17 +1973,10 @@ Lppcasm_maw_leftover:
Lppcasm_maw_adios:
addi r3,r12,0
bclr BO_ALWAYS,CR0_LT
blr
.long 0x00000000
.align 4
EOF
$data =~ s/\`([^\`]*)\`/eval $1/gem;
# if some assembler chokes on some simplified mnemonic,
# this is the spot to fix it up, e.g.:
# GNU as doesn't seem to accept cmplw, 32-bit unsigned compare
$data =~ s/^(\s*)cmplw(\s+)([^,]+),(.*)/$1cmpl$2$3,0,$4/gm;
# assembler X doesn't accept li, load immediate value
#$data =~ s/^(\s*)li(\s+)([^,]+),(.*)/$1addi$2$3,0,$4/gm;
return($data);
}
$data =~ s/\`([^\`]*)\`/eval $1/gem;
print $data;
close STDOUT;
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册