提交 da30c74a 编写于 作者: A Andy Polyakov

Remove unused assembler modules.

上级 67ea999d
此差异已折叠。
# DEC Alpha assember
# The bn_div64 is actually gcc output but the other parts are hand done.
# Thanks to tzeruch@ceddec.com for sending me the gcc output for
# bn_div64.
# I've gone back and re-done most of routines.
# The key thing to remeber for the 164 CPU is that while a
# multiply operation takes 8 cycles, another one can only be issued
# after 4 cycles have elapsed. I've done modification to help
# improve this. Also, normally, a ld instruction will not be available
# for about 3 cycles.
.file 1 "bn_asm.c"
.set noat
gcc2_compiled.:
__gnu_compiled_c:
.text
.align 3
.globl bn_mul_add_words
.ent bn_mul_add_words
bn_mul_add_words:
bn_mul_add_words..ng:
.frame $30,0,$26,0
.prologue 0
.align 5
subq $18,4,$18
bis $31,$31,$0
blt $18,$43 # if we are -1, -2, -3 or -4 goto tail code
ldq $20,0($17) # 1 1
ldq $1,0($16) # 1 1
.align 3
$42:
mulq $20,$19,$5 # 1 2 1 ######
ldq $21,8($17) # 2 1
ldq $2,8($16) # 2 1
umulh $20,$19,$20 # 1 2 ######
ldq $27,16($17) # 3 1
ldq $3,16($16) # 3 1
mulq $21,$19,$6 # 2 2 1 ######
ldq $28,24($17) # 4 1
addq $1,$5,$1 # 1 2 2
ldq $4,24($16) # 4 1
umulh $21,$19,$21 # 2 2 ######
cmpult $1,$5,$22 # 1 2 3 1
addq $20,$22,$20 # 1 3 1
addq $1,$0,$1 # 1 2 3 1
mulq $27,$19,$7 # 3 2 1 ######
cmpult $1,$0,$0 # 1 2 3 2
addq $2,$6,$2 # 2 2 2
addq $20,$0,$0 # 1 3 2
cmpult $2,$6,$23 # 2 2 3 1
addq $21,$23,$21 # 2 3 1
umulh $27,$19,$27 # 3 2 ######
addq $2,$0,$2 # 2 2 3 1
cmpult $2,$0,$0 # 2 2 3 2
subq $18,4,$18
mulq $28,$19,$8 # 4 2 1 ######
addq $21,$0,$0 # 2 3 2
addq $3,$7,$3 # 3 2 2
addq $16,32,$16
cmpult $3,$7,$24 # 3 2 3 1
stq $1,-32($16) # 1 2 4
umulh $28,$19,$28 # 4 2 ######
addq $27,$24,$27 # 3 3 1
addq $3,$0,$3 # 3 2 3 1
stq $2,-24($16) # 2 2 4
cmpult $3,$0,$0 # 3 2 3 2
stq $3,-16($16) # 3 2 4
addq $4,$8,$4 # 4 2 2
addq $27,$0,$0 # 3 3 2
cmpult $4,$8,$25 # 4 2 3 1
addq $17,32,$17
addq $28,$25,$28 # 4 3 1
addq $4,$0,$4 # 4 2 3 1
cmpult $4,$0,$0 # 4 2 3 2
stq $4,-8($16) # 4 2 4
addq $28,$0,$0 # 4 3 2
blt $18,$43
ldq $20,0($17) # 1 1
ldq $1,0($16) # 1 1
br $42
.align 4
$45:
ldq $20,0($17) # 4 1
ldq $1,0($16) # 4 1
mulq $20,$19,$5 # 4 2 1
subq $18,1,$18
addq $16,8,$16
addq $17,8,$17
umulh $20,$19,$20 # 4 2
addq $1,$5,$1 # 4 2 2
cmpult $1,$5,$22 # 4 2 3 1
addq $20,$22,$20 # 4 3 1
addq $1,$0,$1 # 4 2 3 1
cmpult $1,$0,$0 # 4 2 3 2
addq $20,$0,$0 # 4 3 2
stq $1,-8($16) # 4 2 4
bgt $18,$45
ret $31,($26),1 # else exit
.align 4
$43:
addq $18,4,$18
bgt $18,$45 # goto tail code
ret $31,($26),1 # else exit
.end bn_mul_add_words
.align 3
.globl bn_mul_words
.ent bn_mul_words
bn_mul_words:
bn_mul_words..ng:
.frame $30,0,$26,0
.prologue 0
.align 5
subq $18,4,$18
bis $31,$31,$0
blt $18,$143 # if we are -1, -2, -3 or -4 goto tail code
ldq $20,0($17) # 1 1
.align 3
$142:
mulq $20,$19,$5 # 1 2 1 #####
ldq $21,8($17) # 2 1
ldq $27,16($17) # 3 1
umulh $20,$19,$20 # 1 2 #####
ldq $28,24($17) # 4 1
mulq $21,$19,$6 # 2 2 1 #####
addq $5,$0,$5 # 1 2 3 1
subq $18,4,$18
cmpult $5,$0,$0 # 1 2 3 2
umulh $21,$19,$21 # 2 2 #####
addq $20,$0,$0 # 1 3 2
addq $17,32,$17
addq $6,$0,$6 # 2 2 3 1
mulq $27,$19,$7 # 3 2 1 #####
cmpult $6,$0,$0 # 2 2 3 2
addq $21,$0,$0 # 2 3 2
addq $16,32,$16
umulh $27,$19,$27 # 3 2 #####
stq $5,-32($16) # 1 2 4
mulq $28,$19,$8 # 4 2 1 #####
addq $7,$0,$7 # 3 2 3 1
stq $6,-24($16) # 2 2 4
cmpult $7,$0,$0 # 3 2 3 2
umulh $28,$19,$28 # 4 2 #####
addq $27,$0,$0 # 3 3 2
stq $7,-16($16) # 3 2 4
addq $8,$0,$8 # 4 2 3 1
cmpult $8,$0,$0 # 4 2 3 2
addq $28,$0,$0 # 4 3 2
stq $8,-8($16) # 4 2 4
blt $18,$143
ldq $20,0($17) # 1 1
br $142
.align 4
$145:
ldq $20,0($17) # 4 1
mulq $20,$19,$5 # 4 2 1
subq $18,1,$18
umulh $20,$19,$20 # 4 2
addq $5,$0,$5 # 4 2 3 1
addq $16,8,$16
cmpult $5,$0,$0 # 4 2 3 2
addq $17,8,$17
addq $20,$0,$0 # 4 3 2
stq $5,-8($16) # 4 2 4
bgt $18,$145
ret $31,($26),1 # else exit
.align 4
$143:
addq $18,4,$18
bgt $18,$145 # goto tail code
ret $31,($26),1 # else exit
.end bn_mul_words
.align 3
.globl bn_sqr_words
.ent bn_sqr_words
bn_sqr_words:
bn_sqr_words..ng:
.frame $30,0,$26,0
.prologue 0
subq $18,4,$18
blt $18,$543 # if we are -1, -2, -3 or -4 goto tail code
ldq $20,0($17) # 1 1
.align 3
$542:
mulq $20,$20,$5 ######
ldq $21,8($17) # 1 1
subq $18,4
umulh $20,$20,$1 ######
ldq $27,16($17) # 1 1
mulq $21,$21,$6 ######
ldq $28,24($17) # 1 1
stq $5,0($16) # r[0]
umulh $21,$21,$2 ######
stq $1,8($16) # r[1]
mulq $27,$27,$7 ######
stq $6,16($16) # r[0]
umulh $27,$27,$3 ######
stq $2,24($16) # r[1]
mulq $28,$28,$8 ######
stq $7,32($16) # r[0]
umulh $28,$28,$4 ######
stq $3,40($16) # r[1]
addq $16,64,$16
addq $17,32,$17
stq $8,-16($16) # r[0]
stq $4,-8($16) # r[1]
blt $18,$543
ldq $20,0($17) # 1 1
br $542
$442:
ldq $20,0($17) # a[0]
mulq $20,$20,$5 # a[0]*w low part r2
addq $16,16,$16
addq $17,8,$17
subq $18,1,$18
umulh $20,$20,$1 # a[0]*w high part r3
stq $5,-16($16) # r[0]
stq $1,-8($16) # r[1]
bgt $18,$442
ret $31,($26),1 # else exit
.align 4
$543:
addq $18,4,$18
bgt $18,$442 # goto tail code
ret $31,($26),1 # else exit
.end bn_sqr_words
.align 3
.globl bn_add_words
.ent bn_add_words
bn_add_words:
bn_add_words..ng:
.frame $30,0,$26,0
.prologue 0
subq $19,4,$19
bis $31,$31,$0 # carry = 0
blt $19,$900
ldq $5,0($17) # a[0]
ldq $1,0($18) # b[1]
.align 3
$901:
addq $1,$5,$1 # r=a+b;
ldq $6,8($17) # a[1]
cmpult $1,$5,$22 # did we overflow?
ldq $2,8($18) # b[1]
addq $1,$0,$1 # c+= overflow
ldq $7,16($17) # a[2]
cmpult $1,$0,$0 # overflow?
ldq $3,16($18) # b[2]
addq $0,$22,$0
ldq $8,24($17) # a[3]
addq $2,$6,$2 # r=a+b;
ldq $4,24($18) # b[3]
cmpult $2,$6,$23 # did we overflow?
addq $3,$7,$3 # r=a+b;
addq $2,$0,$2 # c+= overflow
cmpult $3,$7,$24 # did we overflow?
cmpult $2,$0,$0 # overflow?
addq $4,$8,$4 # r=a+b;
addq $0,$23,$0
cmpult $4,$8,$25 # did we overflow?
addq $3,$0,$3 # c+= overflow
stq $1,0($16) # r[0]=c
cmpult $3,$0,$0 # overflow?
stq $2,8($16) # r[1]=c
addq $0,$24,$0
stq $3,16($16) # r[2]=c
addq $4,$0,$4 # c+= overflow
subq $19,4,$19 # loop--
cmpult $4,$0,$0 # overflow?
addq $17,32,$17 # a++
addq $0,$25,$0
stq $4,24($16) # r[3]=c
addq $18,32,$18 # b++
addq $16,32,$16 # r++
blt $19,$900
ldq $5,0($17) # a[0]
ldq $1,0($18) # b[1]
br $901
.align 4
$945:
ldq $5,0($17) # a[0]
ldq $1,0($18) # b[1]
addq $1,$5,$1 # r=a+b;
subq $19,1,$19 # loop--
addq $1,$0,$1 # c+= overflow
addq $17,8,$17 # a++
cmpult $1,$5,$22 # did we overflow?
cmpult $1,$0,$0 # overflow?
addq $18,8,$18 # b++
stq $1,0($16) # r[0]=c
addq $0,$22,$0
addq $16,8,$16 # r++
bgt $19,$945
ret $31,($26),1 # else exit
$900:
addq $19,4,$19
bgt $19,$945 # goto tail code
ret $31,($26),1 # else exit
.end bn_add_words
#
# What follows was taken directly from the C compiler with a few
# hacks to redo the lables.
#
.text
.align 3
.globl bn_div64
.ent bn_div64
bn_div64:
ldgp $29,0($27)
bn_div64..ng:
lda $30,-48($30)
.frame $30,48,$26,0
stq $26,0($30)
stq $9,8($30)
stq $10,16($30)
stq $11,24($30)
stq $12,32($30)
stq $13,40($30)
.mask 0x4003e00,-48
.prologue 1
bis $16,$16,$9
bis $17,$17,$10
bis $18,$18,$11
bis $31,$31,$13
bis $31,2,$12
bne $11,$119
lda $0,-1
br $31,$136
.align 4
$119:
bis $11,$11,$16
jsr $26,BN_num_bits_word
ldgp $29,0($26)
subq $0,64,$1
beq $1,$120
bis $31,1,$1
sll $1,$0,$1
cmpule $9,$1,$1
bne $1,$120
# lda $16,_IO_stderr_
# lda $17,$C32
# bis $0,$0,$18
# jsr $26,fprintf
# ldgp $29,0($26)
jsr $26,abort
ldgp $29,0($26)
.align 4
$120:
bis $31,64,$3
cmpult $9,$11,$2
subq $3,$0,$1
addl $1,$31,$0
subq $9,$11,$1
cmoveq $2,$1,$9
beq $0,$122
zapnot $0,15,$2
subq $3,$0,$1
sll $11,$2,$11
sll $9,$2,$3
srl $10,$1,$1
sll $10,$2,$10
bis $3,$1,$9
$122:
srl $11,32,$5
zapnot $11,15,$6
lda $7,-1
.align 5
$123:
srl $9,32,$1
subq $1,$5,$1
bne $1,$126
zapnot $7,15,$27
br $31,$127
.align 4
$126:
bis $9,$9,$24
bis $5,$5,$25
divqu $24,$25,$27
$127:
srl $10,32,$4
.align 5
$128:
mulq $27,$5,$1
subq $9,$1,$3
zapnot $3,240,$1
bne $1,$129
mulq $6,$27,$2
sll $3,32,$1
addq $1,$4,$1
cmpule $2,$1,$2
bne $2,$129
subq $27,1,$27
br $31,$128
.align 4
$129:
mulq $27,$6,$1
mulq $27,$5,$4
srl $1,32,$3
sll $1,32,$1
addq $4,$3,$4
cmpult $10,$1,$2
subq $10,$1,$10
addq $2,$4,$2
cmpult $9,$2,$1
bis $2,$2,$4
beq $1,$134
addq $9,$11,$9
subq $27,1,$27
$134:
subl $12,1,$12
subq $9,$4,$9
beq $12,$124
sll $27,32,$13
sll $9,32,$2
srl $10,32,$1
sll $10,32,$10
bis $2,$1,$9
br $31,$123
.align 4
$124:
bis $13,$27,$0
$136:
ldq $26,0($30)
ldq $9,8($30)
ldq $10,16($30)
ldq $11,24($30)
ldq $12,32($30)
ldq $13,40($30)
addq $30,48,$30
ret $31,($26),1
.end bn_div64
.set noat
.text
.align 3
.globl bn_sub_words
.ent bn_sub_words
bn_sub_words:
bn_sub_words..ng:
.frame $30,0,$26,0
.prologue 0
subq $19, 4, $19
bis $31, $31, $0
blt $19, $100
ldq $1, 0($17)
ldq $2, 0($18)
$101:
ldq $3, 8($17)
cmpult $1, $2, $4
ldq $5, 8($18)
subq $1, $2, $1
ldq $6, 16($17)
cmpult $1, $0, $2
ldq $7, 16($18)
subq $1, $0, $23
ldq $8, 24($17)
addq $2, $4, $0
cmpult $3, $5, $24
subq $3, $5, $3
ldq $22, 24($18)
cmpult $3, $0, $5
subq $3, $0, $25
addq $5, $24, $0
cmpult $6, $7, $27
subq $6, $7, $6
stq $23, 0($16)
cmpult $6, $0, $7
subq $6, $0, $28
addq $7, $27, $0
cmpult $8, $22, $21
subq $8, $22, $8
stq $25, 8($16)
cmpult $8, $0, $22
subq $8, $0, $20
addq $22, $21, $0
stq $28, 16($16)
subq $19, 4, $19
stq $20, 24($16)
addq $17, 32, $17
addq $18, 32, $18
addq $16, 32, $16
blt $19, $100
ldq $1, 0($17)
ldq $2, 0($18)
br $101
$102:
ldq $1, 0($17)
ldq $2, 0($18)
cmpult $1, $2, $27
subq $1, $2, $1
cmpult $1, $0, $2
subq $1, $0, $1
stq $1, 0($16)
addq $2, $27, $0
addq $17, 8, $17
addq $18, 8, $18
addq $16, 8, $16
subq $19, 1, $19
bgt $19, $102
ret $31,($26),1
$100:
addq $19, 4, $19
bgt $19, $102
$103:
ret $31,($26),1
.end bn_sub_words
#!/usr/local/bin/perl
# alpha assember
sub bn_add_words
{
local($name)=@_;
local($cc,$a,$b,$r);
&init_pool(4);
($cc)=GR("r0");
$rp=&wparam(0);
$ap=&wparam(1);
$bp=&wparam(2);
$count=&wparam(3);
&function_begin($name,"");
&comment("");
&sub($count,4,$count);
&mov("zero",$cc);
&br(&label("finish"));
&blt($count,&label("finish"));
($a0,$b0)=&NR(2);
&ld($a0,&QWPw(0,$ap));
&ld($b0,&QWPw(0,$bp));
##########################################################
&set_label("loop");
($a1)=&NR(1); &ld($a1,&QWPw(1,$ap));
($b1)=&NR(1); &ld($b1,&QWPw(1,$bp));
($a2)=&NR(1); &ld($a2,&QWPw(2,$ap));
($b2)=&NR(1); &ld($b2,&QWPw(2,$bp));
($a3)=&NR(1); &ld($a3,&QWPw(3,$ap));
($b3)=&NR(1); &ld($b3,&QWPw(3,$bp));
($o0,$t0)=&NR(2);
&add($a0,$b0,$o0);
&cmpult($o0,$b0,$t0);
&add($o0,$cc,$o0);
&cmpult($o0,$cc,$cc);
&add($cc,$t0,$cc); &FR($t0);
($t1,$o1)=&NR(2);
&add($a1,$b1,$o1); &FR($a1);
&cmpult($o1,$b1,$t1); &FR($b1);
&add($o1,$cc,$o1);
&cmpult($o1,$cc,$cc);
&add($cc,$t1,$cc); &FR($t1);
($t2,$o2)=&NR(2);
&add($a2,$b2,$o2); &FR($a2);
&cmpult($o2,$b2,$t2); &FR($b2);
&add($o2,$cc,$o2);
&cmpult($o2,$cc,$cc);
&add($cc,$t2,$cc); &FR($t2);
($t3,$o3)=&NR(2);
&add($a3,$b3,$o3); &FR($a3);
&cmpult($o3,$b3,$t3); &FR($b3);
&add($o3,$cc,$o3);
&cmpult($o3,$cc,$cc);
&add($cc,$t3,$cc); &FR($t3);
&st($o0,&QWPw(0,$rp)); &FR($o0);
&st($o1,&QWPw(0,$rp)); &FR($o1);
&st($o2,&QWPw(0,$rp)); &FR($o2);
&st($o3,&QWPw(0,$rp)); &FR($o3);
&sub($count,4,$count); # count-=4
&add($ap,4*$QWS,$ap); # count+=4
&add($bp,4*$QWS,$bp); # count+=4
&add($rp,4*$QWS,$rp); # count+=4
&blt($count,&label("finish"));
&ld($a0,&QWPw(0,$ap));
&ld($b0,&QWPw(0,$bp));
&br(&label("loop"));
##################################################
# Do the last 0..3 words
($t0,$o0)=&NR(2);
&set_label("last_loop");
&ld($a0,&QWPw(0,$ap)); # get a
&ld($b0,&QWPw(0,$bp)); # get b
&add($a0,$b0,$o0);
&cmpult($o0,$b0,$t0); # will we borrow?
&add($o0,$cc,$o0); # will we borrow?
&cmpult($o0,$cc,$cc); # will we borrow?
&add($cc,$t0,$cc); # add the borrows
&st($o0,&QWPw(0,$rp)); # save
&add($ap,$QWS,$ap);
&add($bp,$QWS,$bp);
&add($rp,$QWS,$rp);
&sub($count,1,$count);
&bgt($count,&label("last_loop"));
&function_end_A($name);
######################################################
&set_label("finish");
&add($count,4,$count);
&bgt($count,&label("last_loop"));
&FR($o0,$t0,$a0,$b0);
&set_label("end");
&function_end($name);
&fin_pool;
}
1;
#!/usr/local/bin/perl
sub bn_div64
{
local($data)=<<'EOF';
#
# What follows was taken directly from the C compiler with a few
# hacks to redo the lables.
#
.text
.set noreorder
.set volatile
.align 3
.globl bn_div64
.ent bn_div64
bn_div64:
ldgp $29,0($27)
bn_div64..ng:
lda $30,-48($30)
.frame $30,48,$26,0
stq $26,0($30)
stq $9,8($30)
stq $10,16($30)
stq $11,24($30)
stq $12,32($30)
stq $13,40($30)
.mask 0x4003e00,-48
.prologue 1
bis $16,$16,$9
bis $17,$17,$10
bis $18,$18,$11
bis $31,$31,$13
bis $31,2,$12
bne $11,$9119
lda $0,-1
br $31,$9136
.align 4
$9119:
bis $11,$11,$16
jsr $26,BN_num_bits_word
ldgp $29,0($26)
subq $0,64,$1
beq $1,$9120
bis $31,1,$1
sll $1,$0,$1
cmpule $9,$1,$1
bne $1,$9120
# lda $16,_IO_stderr_
# lda $17,$C32
# bis $0,$0,$18
# jsr $26,fprintf
# ldgp $29,0($26)
jsr $26,abort
ldgp $29,0($26)
.align 4
$9120:
bis $31,64,$3
cmpult $9,$11,$2
subq $3,$0,$1
addl $1,$31,$0
subq $9,$11,$1
cmoveq $2,$1,$9
beq $0,$9122
zapnot $0,15,$2
subq $3,$0,$1
sll $11,$2,$11
sll $9,$2,$3
srl $10,$1,$1
sll $10,$2,$10
bis $3,$1,$9
$9122:
srl $11,32,$5
zapnot $11,15,$6
lda $7,-1
.align 5
$9123:
srl $9,32,$1
subq $1,$5,$1
bne $1,$9126
zapnot $7,15,$27
br $31,$9127
.align 4
$9126:
bis $9,$9,$24
bis $5,$5,$25
divqu $24,$25,$27
$9127:
srl $10,32,$4
.align 5
$9128:
mulq $27,$5,$1
subq $9,$1,$3
zapnot $3,240,$1
bne $1,$9129
mulq $6,$27,$2
sll $3,32,$1
addq $1,$4,$1
cmpule $2,$1,$2
bne $2,$9129
subq $27,1,$27
br $31,$9128
.align 4
$9129:
mulq $27,$6,$1
mulq $27,$5,$4
srl $1,32,$3
sll $1,32,$1
addq $4,$3,$4
cmpult $10,$1,$2
subq $10,$1,$10
addq $2,$4,$2
cmpult $9,$2,$1
bis $2,$2,$4
beq $1,$9134
addq $9,$11,$9
subq $27,1,$27
$9134:
subl $12,1,$12
subq $9,$4,$9
beq $12,$9124
sll $27,32,$13
sll $9,32,$2
srl $10,32,$1
sll $10,32,$10
bis $2,$1,$9
br $31,$9123
.align 4
$9124:
bis $13,$27,$0
$9136:
ldq $26,0($30)
ldq $9,8($30)
ldq $10,16($30)
ldq $11,24($30)
ldq $12,32($30)
ldq $13,40($30)
addq $30,48,$30
ret $31,($26),1
.end bn_div64
EOF
&asm_add($data);
}
1;
#!/usr/local/bin/perl
# alpha assember
sub bn_mul_words
{
local($name)=@_;
local($cc,$a,$b,$r,$couny);
&init_pool(4);
($cc)=GR("r0");
$rp=&wparam(0);
$ap=&wparam(1);
$count=&wparam(2);
$word=&wparam(3);
&function_begin($name,"");
&comment("");
&sub($count,4,$count);
&mov("zero",$cc);
&br(&label("finish"));
&blt($count,&label("finish"));
($a0,$r0)=&NR(2);
&ld($a0,&QWPw(0,$ap));
&ld($r0,&QWPw(0,$rp));
$a=<<'EOF';
##########################################################
&set_label("loop");
($a1)=&NR(1); &ld($a1,&QWPw(1,$ap));
($b1)=&NR(1); &ld($b1,&QWPw(1,$bp));
($a2)=&NR(1); &ld($a2,&QWPw(2,$ap));
($b2)=&NR(1); &ld($b2,&QWPw(2,$bp));
($a3)=&NR(1); &ld($a3,&QWPw(3,$ap));
($b3)=&NR(1); &ld($b3,&QWPw(3,$bp));
($o0,$t0)=&NR(2);
&add($a0,$b0,$o0);
&cmpult($o0,$b0,$t0);
&add($o0,$cc,$o0);
&cmpult($o0,$cc,$cc);
&add($cc,$t0,$cc); &FR($t0);
($t1,$o1)=&NR(2);
&add($a1,$b1,$o1); &FR($a1);
&cmpult($o1,$b1,$t1); &FR($b1);
&add($o1,$cc,$o1);
&cmpult($o1,$cc,$cc);
&add($cc,$t1,$cc); &FR($t1);
($t2,$o2)=&NR(2);
&add($a2,$b2,$o2); &FR($a2);
&cmpult($o2,$b2,$t2); &FR($b2);
&add($o2,$cc,$o2);
&cmpult($o2,$cc,$cc);
&add($cc,$t2,$cc); &FR($t2);
($t3,$o3)=&NR(2);
&add($a3,$b3,$o3); &FR($a3);
&cmpult($o3,$b3,$t3); &FR($b3);
&add($o3,$cc,$o3);
&cmpult($o3,$cc,$cc);
&add($cc,$t3,$cc); &FR($t3);
&st($o0,&QWPw(0,$rp)); &FR($o0);
&st($o1,&QWPw(0,$rp)); &FR($o1);
&st($o2,&QWPw(0,$rp)); &FR($o2);
&st($o3,&QWPw(0,$rp)); &FR($o3);
&sub($count,4,$count); # count-=4
&add($ap,4*$QWS,$ap); # count+=4
&add($bp,4*$QWS,$bp); # count+=4
&add($rp,4*$QWS,$rp); # count+=4
&blt($count,&label("finish"));
&ld($a0,&QWPw(0,$ap));
&ld($b0,&QWPw(0,$bp));
&br(&label("loop"));
EOF
##################################################
# Do the last 0..3 words
&set_label("last_loop");
&ld(($a0)=&NR(1),&QWPw(0,$ap)); # get a
&mul($a0,$word,($l0)=&NR(1));
&add($ap,$QWS,$ap);
&muh($a0,$word,($h0)=&NR(1)); &FR($a0);
&add($l0,$cc,$l0);
&add($rp,$QWS,$rp);
&sub($count,1,$count);
&cmpult($l0,$cc,$cc);
&st($l0,&QWPw(-1,$rp)); &FR($l0);
&add($h0,$cc,$cc); &FR($h0);
&bgt($count,&label("last_loop"));
&function_end_A($name);
######################################################
&set_label("finish");
&add($count,4,$count);
&bgt($count,&label("last_loop"));
&set_label("end");
&function_end($name);
&fin_pool;
}
1;
#!/usr/local/bin/perl
# alpha assember
sub bn_mul_add_words
{
local($name)=@_;
local($cc,$a,$b,$r,$couny);
&init_pool(4);
($cc)=GR("r0");
$rp=&wparam(0);
$ap=&wparam(1);
$count=&wparam(2);
$word=&wparam(3);
&function_begin($name,"");
&comment("");
&sub($count,4,$count);
&mov("zero",$cc);
&br(&label("finish"));
&blt($count,&label("finish"));
($a0,$r0)=&NR(2);
&ld($a0,&QWPw(0,$ap));
&ld($r0,&QWPw(0,$rp));
$a=<<'EOF';
##########################################################
&set_label("loop");
($a1)=&NR(1); &ld($a1,&QWPw(1,$ap));
($b1)=&NR(1); &ld($b1,&QWPw(1,$bp));
($a2)=&NR(1); &ld($a2,&QWPw(2,$ap));
($b2)=&NR(1); &ld($b2,&QWPw(2,$bp));
($a3)=&NR(1); &ld($a3,&QWPw(3,$ap));
($b3)=&NR(1); &ld($b3,&QWPw(3,$bp));
($o0,$t0)=&NR(2);
&add($a0,$b0,$o0);
&cmpult($o0,$b0,$t0);
&add($o0,$cc,$o0);
&cmpult($o0,$cc,$cc);
&add($cc,$t0,$cc); &FR($t0);
($t1,$o1)=&NR(2);
&add($a1,$b1,$o1); &FR($a1);
&cmpult($o1,$b1,$t1); &FR($b1);
&add($o1,$cc,$o1);
&cmpult($o1,$cc,$cc);
&add($cc,$t1,$cc); &FR($t1);
($t2,$o2)=&NR(2);
&add($a2,$b2,$o2); &FR($a2);
&cmpult($o2,$b2,$t2); &FR($b2);
&add($o2,$cc,$o2);
&cmpult($o2,$cc,$cc);
&add($cc,$t2,$cc); &FR($t2);
($t3,$o3)=&NR(2);
&add($a3,$b3,$o3); &FR($a3);
&cmpult($o3,$b3,$t3); &FR($b3);
&add($o3,$cc,$o3);
&cmpult($o3,$cc,$cc);
&add($cc,$t3,$cc); &FR($t3);
&st($o0,&QWPw(0,$rp)); &FR($o0);
&st($o1,&QWPw(0,$rp)); &FR($o1);
&st($o2,&QWPw(0,$rp)); &FR($o2);
&st($o3,&QWPw(0,$rp)); &FR($o3);
&sub($count,4,$count); # count-=4
&add($ap,4*$QWS,$ap); # count+=4
&add($bp,4*$QWS,$bp); # count+=4
&add($rp,4*$QWS,$rp); # count+=4
&blt($count,&label("finish"));
&ld($a0,&QWPw(0,$ap));
&ld($b0,&QWPw(0,$bp));
&br(&label("loop"));
EOF
##################################################
# Do the last 0..3 words
&set_label("last_loop");
&ld(($a0)=&NR(1),&QWPw(0,$ap)); # get a
&ld(($r0)=&NR(1),&QWPw(0,$rp)); # get b
&mul($a0,$word,($l0)=&NR(1));
&sub($count,1,$count);
&add($ap,$QWS,$ap);
&muh($a0,$word,($h0)=&NR(1)); &FR($a0);
&add($r0,$l0,$r0);
&add($rp,$QWS,$rp);
&cmpult($r0,$l0,($t0)=&NR(1)); &FR($l0);
&add($r0,$cc,$r0);
&add($h0,$t0,$h0); &FR($t0);
&cmpult($r0,$cc,$cc);
&st($r0,&QWPw(-1,$rp)); &FR($r0);
&add($h0,$cc,$cc); &FR($h0);
&bgt($count,&label("last_loop"));
&function_end_A($name);
######################################################
&set_label("finish");
&add($count,4,$count);
&bgt($count,&label("last_loop"));
&set_label("end");
&function_end($name);
&fin_pool;
}
1;
#!/usr/local/bin/perl
# alpha assember
sub mul_add_c
{
local($a,$b,$c0,$c1,$c2)=@_;
local($l1,$h1,$t1,$t2);
&mul($a,$b,($l1)=&NR(1));
&muh($a,$b,($h1)=&NR(1));
&add($c0,$l1,$c0);
&cmpult($c0,$l1,($t1)=&NR(1)); &FR($l1);
&add($t1,$h1,$h1); &FR($t1);
&add($c1,$h1,$c1);
&cmpult($c1,$h1,($t2)=&NR(1)); &FR($h1);
&add($c2,$t2,$c2); &FR($t2);
}
sub bn_mul_comba4
{
local($name)=@_;
local(@a,@b,$r,$c0,$c1,$c2);
$cnt=1;
&init_pool(3);
$rp=&wparam(0);
$ap=&wparam(1);
$bp=&wparam(2);
&function_begin($name,"");
&comment("");
&ld(($a[0])=&NR(1),&QWPw(0,$ap));
&ld(($b[0])=&NR(1),&QWPw(0,$bp));
&ld(($a[1])=&NR(1),&QWPw(1,$ap));
&ld(($b[1])=&NR(1),&QWPw(1,$bp));
&mul($a[0],$b[0],($r00)=&NR(1));
&ld(($a[2])=&NR(1),&QWPw(2,$ap));
&ld(($b[2])=&NR(1),&QWPw(2,$bp));
&muh($a[0],$b[0],($r01)=&NR(1));
&FR($ap); &ld(($a[3])=&NR(1),&QWPw(3,$ap));
&FR($bp); &ld(($b[3])=&NR(1),&QWPw(3,$bp));
&mul($a[0],$b[1],($r02)=&NR(1));
($R,$H1,$H2)=&NR(3);
&st($r00,&QWPw(0,$rp)); &FR($r00);
&mov("zero",$R);
&mul($a[1],$b[0],($r03)=&NR(1));
&mov("zero",$H1);
&mov("zero",$H0);
&add($R,$r01,$R);
&muh($a[0],$b[1],($r04)=&NR(1));
&cmpult($R,$r01,($t01)=&NR(1)); &FR($r01);
&add($R,$r02,$R);
&add($H1,$t01,$H1) &FR($t01);
&muh($a[1],$b[0],($r05)=&NR(1));
&cmpult($R,$r02,($t02)=&NR(1)); &FR($r02);
&add($R,$r03,$R);
&add($H2,$t02,$H2) &FR($t02);
&mul($a[0],$b[2],($r06)=&NR(1));
&cmpult($R,$r03,($t03)=&NR(1)); &FR($r03);
&add($H1,$t03,$H1) &FR($t03);
&st($R,&QWPw(1,$rp));
&add($H1,$H2,$R);
&mov("zero",$H1);
&add($R,$r04,$R);
&mov("zero",$H2);
&mul($a[1],$b[1],($r07)=&NR(1));
&cmpult($R,$r04,($t04)=&NR(1)); &FR($r04);
&add($R,$r05,$R);
&add($H1,$t04,$H1) &FR($t04);
&mul($a[2],$b[0],($r08)=&NR(1));
&cmpult($R,$r05,($t05)=&NR(1)); &FR($r05);
&add($R,$r01,$R);
&add($H2,$t05,$H2) &FR($t05);
&muh($a[0],$b[2],($r09)=&NR(1));
&cmpult($R,$r06,($t06)=&NR(1)); &FR($r06);
&add($R,$r07,$R);
&add($H1,$t06,$H1) &FR($t06);
&muh($a[1],$b[1],($r10)=&NR(1));
&cmpult($R,$r07,($t07)=&NR(1)); &FR($r07);
&add($R,$r08,$R);
&add($H2,$t07,$H2) &FR($t07);
&muh($a[2],$b[0],($r11)=&NR(1));
&cmpult($R,$r08,($t08)=&NR(1)); &FR($r08);
&add($H1,$t08,$H1) &FR($t08);
&st($R,&QWPw(2,$rp));
&add($H1,$H2,$R);
&mov("zero",$H1);
&add($R,$r09,$R);
&mov("zero",$H2);
&mul($a[0],$b[3],($r12)=&NR(1));
&cmpult($R,$r09,($t09)=&NR(1)); &FR($r09);
&add($R,$r10,$R);
&add($H1,$t09,$H1) &FR($t09);
&mul($a[1],$b[2],($r13)=&NR(1));
&cmpult($R,$r10,($t10)=&NR(1)); &FR($r10);
&add($R,$r11,$R);
&add($H1,$t10,$H1) &FR($t10);
&mul($a[2],$b[1],($r14)=&NR(1));
&cmpult($R,$r11,($t11)=&NR(1)); &FR($r11);
&add($R,$r12,$R);
&add($H1,$t11,$H1) &FR($t11);
&mul($a[3],$b[0],($r15)=&NR(1));
&cmpult($R,$r12,($t12)=&NR(1)); &FR($r12);
&add($R,$r13,$R);
&add($H1,$t12,$H1) &FR($t12);
&muh($a[0],$b[3],($r16)=&NR(1));
&cmpult($R,$r13,($t13)=&NR(1)); &FR($r13);
&add($R,$r14,$R);
&add($H1,$t13,$H1) &FR($t13);
&muh($a[1],$b[2],($r17)=&NR(1));
&cmpult($R,$r14,($t14)=&NR(1)); &FR($r14);
&add($R,$r15,$R);
&add($H1,$t14,$H1) &FR($t14);
&muh($a[2],$b[1],($r18)=&NR(1));
&cmpult($R,$r15,($t15)=&NR(1)); &FR($r15);
&add($H1,$t15,$H1) &FR($t15);
&st($R,&QWPw(3,$rp));
&add($H1,$H2,$R);
&mov("zero",$H1);
&add($R,$r16,$R);
&mov("zero",$H2);
&muh($a[3],$b[0],($r19)=&NR(1));
&cmpult($R,$r16,($t16)=&NR(1)); &FR($r16);
&add($R,$r17,$R);
&add($H1,$t16,$H1) &FR($t16);
&mul($a[1],$b[3],($r20)=&NR(1));
&cmpult($R,$r17,($t17)=&NR(1)); &FR($r17);
&add($R,$r18,$R);
&add($H1,$t17,$H1) &FR($t17);
&mul($a[2],$b[2],($r21)=&NR(1));
&cmpult($R,$r18,($t18)=&NR(1)); &FR($r18);
&add($R,$r19,$R);
&add($H1,$t18,$H1) &FR($t18);
&mul($a[3],$b[1],($r22)=&NR(1));
&cmpult($R,$r19,($t19)=&NR(1)); &FR($r19);
&add($R,$r20,$R);
&add($H1,$t19,$H1) &FR($t19);
&muh($a[1],$b[3],($r23)=&NR(1));
&cmpult($R,$r20,($t20)=&NR(1)); &FR($r20);
&add($R,$r21,$R);
&add($H1,$t20,$H1) &FR($t20);
&muh($a[2],$b[2],($r24)=&NR(1));
&cmpult($R,$r21,($t21)=&NR(1)); &FR($r21);
&add($R,$r22,$R);
&add($H1,$t21,$H1) &FR($t21);
&muh($a[3],$b[1],($r25)=&NR(1));
&cmpult($R,$r22,($t22)=&NR(1)); &FR($r22);
&add($H1,$t22,$H1) &FR($t22);
&st($R,&QWPw(4,$rp));
&add($H1,$H2,$R);
&mov("zero",$H1);
&add($R,$r23,$R);
&mov("zero",$H2);
&mul($a[2],$b[3],($r26)=&NR(1));
&cmpult($R,$r23,($t23)=&NR(1)); &FR($r23);
&add($R,$r24,$R);
&add($H1,$t23,$H1) &FR($t23);
&mul($a[3],$b[2],($r27)=&NR(1));
&cmpult($R,$r24,($t24)=&NR(1)); &FR($r24);
&add($R,$r25,$R);
&add($H1,$t24,$H1) &FR($t24);
&muh($a[2],$b[3],($r28)=&NR(1));
&cmpult($R,$r25,($t25)=&NR(1)); &FR($r25);
&add($R,$r26,$R);
&add($H1,$t25,$H1) &FR($t25);
&muh($a[3],$b[2],($r29)=&NR(1));
&cmpult($R,$r26,($t26)=&NR(1)); &FR($r26);
&add($R,$r27,$R);
&add($H1,$t26,$H1) &FR($t26);
&mul($a[3],$b[3],($r30)=&NR(1));
&cmpult($R,$r27,($t27)=&NR(1)); &FR($r27);
&add($H1,$t27,$H1) &FR($t27);
&st($R,&QWPw(5,$rp));
&add($H1,$H2,$R);
&mov("zero",$H1);
&add($R,$r28,$R);
&mov("zero",$H2);
&muh($a[3],$b[3],($r31)=&NR(1));
&cmpult($R,$r28,($t28)=&NR(1)); &FR($r28);
&add($R,$r29,$R);
&add($H1,$t28,$H1) &FR($t28);
############
&cmpult($R,$r29,($t29)=&NR(1)); &FR($r29);
&add($R,$r30,$R);
&add($H1,$t29,$H1) &FR($t29);
############
&cmpult($R,$r30,($t30)=&NR(1)); &FR($r30);
&add($H1,$t30,$H1) &FR($t30);
&st($R,&QWPw(6,$rp));
&add($H1,$H2,$R);
&add($R,$r31,$R); &FR($r31);
&st($R,&QWPw(7,$rp));
&FR($R,$H1,$H2);
&function_end($name);
&fin_pool;
}
1;
#!/usr/local/bin/perl
# alpha assember
sub mul_add_c
{
local($a,$b,$c0,$c1,$c2)=@_;
local($l1,$h1,$t1,$t2);
print STDERR "count=$cnt\n"; $cnt++;
&mul($a,$b,($l1)=&NR(1));
&muh($a,$b,($h1)=&NR(1));
&add($c0,$l1,$c0);
&cmpult($c0,$l1,($t1)=&NR(1)); &FR($l1);
&add($t1,$h1,$h1); &FR($t1);
&add($c1,$h1,$c1);
&cmpult($c1,$h1,($t2)=&NR(1)); &FR($h1);
&add($c2,$t2,$c2); &FR($t2);
}
sub bn_mul_comba4
{
local($name)=@_;
local(@a,@b,$r,$c0,$c1,$c2);
$cnt=1;
&init_pool(3);
$rp=&wparam(0);
$ap=&wparam(1);
$bp=&wparam(2);
&function_begin($name,"");
&comment("");
&ld(($a[0])=&NR(1),&QWPw(0,$ap));
&ld(($b[0])=&NR(1),&QWPw(0,$bp));
&ld(($a[1])=&NR(1),&QWPw(1,$ap));
&ld(($b[1])=&NR(1),&QWPw(1,$bp));
&ld(($a[2])=&NR(1),&QWPw(2,$ap));
&ld(($b[2])=&NR(1),&QWPw(2,$bp));
&ld(($a[3])=&NR(1),&QWPw(3,$ap)); &FR($ap);
&ld(($b[3])=&NR(1),&QWPw(3,$bp)); &FR($bp);
($c0,$c1,$c2)=&NR(3);
&mov("zero",$c2);
&mul($a[0],$b[0],$c0);
&muh($a[0],$b[0],$c1);
&st($c0,&QWPw(0,$rp)); &FR($c0); ($c0)=&NR($c0);
($c0,$c1,$c2)=($c1,$c2,$c0);
&mov("zero",$c2);
&mul_add_c($a[0],$b[1],$c0,$c1,$c2);
&mul_add_c($a[1],$b[0],$c0,$c1,$c2);
&st($c0,&QWPw(1,$rp)); &FR($c0); ($c0)=&NR($c0);
($c0,$c1,$c2)=($c1,$c2,$c0);
&mov("zero",$c2);
&mul_add_c($a[1],$b[1],$c0,$c1,$c2);
&mul_add_c($a[0],$b[2],$c0,$c1,$c2);
&mul_add_c($a[2],$b[0],$c0,$c1,$c2);
&st($c0,&QWPw(2,$rp)); &FR($c0); ($c0)=&NR($c0);
($c0,$c1,$c2)=($c1,$c2,$c0);
&mov("zero",$c2);
&mul_add_c($a[0],$b[3],$c0,$c1,$c2); &FR($a[0]);
&mul_add_c($a[1],$b[2],$c0,$c1,$c2);
&mul_add_c($a[2],$b[1],$c0,$c1,$c2);
&mul_add_c($a[3],$b[0],$c0,$c1,$c2); &FR($b[0]);
&st($c0,&QWPw(3,$rp)); &FR($c0); ($c0)=&NR($c0);
($c0,$c1,$c2)=($c1,$c2,$c0);
&mov("zero",$c2);
&mul_add_c($a[1],$b[3],$c0,$c1,$c2); &FR($a[1]);
&mul_add_c($a[2],$b[2],$c0,$c1,$c2);
&mul_add_c($a[3],$b[1],$c0,$c1,$c2); &FR($b[1]);
&st($c0,&QWPw(4,$rp)); &FR($c0); ($c0)=&NR($c0);
($c0,$c1,$c2)=($c1,$c2,$c0);
&mov("zero",$c2);
&mul_add_c($a[2],$b[3],$c0,$c1,$c2); &FR($a[2]);
&mul_add_c($a[3],$b[2],$c0,$c1,$c2); &FR($b[2]);
&st($c0,&QWPw(5,$rp)); &FR($c0); ($c0)=&NR($c0);
($c0,$c1,$c2)=($c1,$c2,$c0);
&mov("zero",$c2);
&mul_add_c($a[3],$b[3],$c0,$c1,$c2); &FR($a[3],$b[3]);
&st($c0,&QWPw(6,$rp));
&st($c1,&QWPw(7,$rp));
&FR($c0,$c1,$c2);
&function_end($name);
&fin_pool;
}
1;
#!/usr/local/bin/perl
# alpha assember
sub bn_mul_comba8
{
local($name)=@_;
local(@a,@b,$r,$c0,$c1,$c2);
$cnt=1;
&init_pool(3);
$rp=&wparam(0);
$ap=&wparam(1);
$bp=&wparam(2);
&function_begin($name,"");
&comment("");
&stack_push(2);
&ld(($a[0])=&NR(1),&QWPw(0,$ap));
&ld(($b[0])=&NR(1),&QWPw(0,$bp));
&st($reg_s0,&swtmp(0)); &FR($reg_s0);
&st($reg_s1,&swtmp(1)); &FR($reg_s1);
&ld(($a[1])=&NR(1),&QWPw(1,$ap));
&ld(($b[1])=&NR(1),&QWPw(1,$bp));
&ld(($a[2])=&NR(1),&QWPw(2,$ap));
&ld(($b[2])=&NR(1),&QWPw(2,$bp));
&ld(($a[3])=&NR(1),&QWPw(3,$ap));
&ld(($b[3])=&NR(1),&QWPw(3,$bp));
&ld(($a[4])=&NR(1),&QWPw(1,$ap));
&ld(($b[4])=&NR(1),&QWPw(1,$bp));
&ld(($a[5])=&NR(1),&QWPw(1,$ap));
&ld(($b[5])=&NR(1),&QWPw(1,$bp));
&ld(($a[6])=&NR(1),&QWPw(1,$ap));
&ld(($b[6])=&NR(1),&QWPw(1,$bp));
&ld(($a[7])=&NR(1),&QWPw(1,$ap)); &FR($ap);
&ld(($b[7])=&NR(1),&QWPw(1,$bp)); &FR($bp);
($c0,$c1,$c2)=&NR(3);
&mov("zero",$c2);
&mul($a[0],$b[0],$c0);
&muh($a[0],$b[0],$c1);
&st($c0,&QWPw(0,$rp)); &FR($c0); ($c0)=&NR(1);
($c0,$c1,$c2)=($c1,$c2,$c0);
&mov("zero",$c2);
&mul_add_c($a[0],$b[1],$c0,$c1,$c2);
&mul_add_c($a[1],$b[0],$c0,$c1,$c2);
&st($c0,&QWPw(1,$rp)); &FR($c0); ($c0)=&NR(1);
($c0,$c1,$c2)=($c1,$c2,$c0);
&mov("zero",$c2);
&mul_add_c($a[0],$b[2],$c0,$c1,$c2);
&mul_add_c($a[1],$b[1],$c0,$c1,$c2);
&mul_add_c($a[2],$b[0],$c0,$c1,$c2);
&st($c0,&QWPw(2,$rp)); &FR($c0); ($c0)=&NR(1);
($c0,$c1,$c2)=($c1,$c2,$c0);
&mov("zero",$c2);
&mul_add_c($a[0],$b[3],$c0,$c1,$c2);
&mul_add_c($a[1],$b[2],$c0,$c1,$c2);
&mul_add_c($a[2],$b[1],$c0,$c1,$c2);
&mul_add_c($a[3],$b[0],$c0,$c1,$c2);
&st($c0,&QWPw(3,$rp)); &FR($c0); ($c0)=&NR(1);
($c0,$c1,$c2)=($c1,$c2,$c0);
&mov("zero",$c2);
&mul_add_c($a[0],$b[4],$c0,$c1,$c2);
&mul_add_c($a[1],$b[3],$c0,$c1,$c2);
&mul_add_c($a[2],$b[2],$c0,$c1,$c2);
&mul_add_c($a[3],$b[1],$c0,$c1,$c2);
&mul_add_c($a[4],$b[0],$c0,$c1,$c2);
&st($c0,&QWPw(4,$rp)); &FR($c0); ($c0)=&NR(1);
($c0,$c1,$c2)=($c1,$c2,$c0);
&mov("zero",$c2);
&mul_add_c($a[0],$b[5],$c0,$c1,$c2);
&mul_add_c($a[1],$b[4],$c0,$c1,$c2);
&mul_add_c($a[2],$b[3],$c0,$c1,$c2);
&mul_add_c($a[3],$b[2],$c0,$c1,$c2);
&mul_add_c($a[4],$b[1],$c0,$c1,$c2);
&mul_add_c($a[5],$b[0],$c0,$c1,$c2);
&st($c0,&QWPw(5,$rp)); &FR($c0); ($c0)=&NR(1);
($c0,$c1,$c2)=($c1,$c2,$c0);
&mov("zero",$c2);
&mul_add_c($a[0],$b[6],$c0,$c1,$c2);
&mul_add_c($a[1],$b[5],$c0,$c1,$c2);
&mul_add_c($a[2],$b[4],$c0,$c1,$c2);
&mul_add_c($a[3],$b[3],$c0,$c1,$c2);
&mul_add_c($a[4],$b[2],$c0,$c1,$c2);
&mul_add_c($a[5],$b[1],$c0,$c1,$c2);
&mul_add_c($a[6],$b[0],$c0,$c1,$c2);
&st($c0,&QWPw(6,$rp)); &FR($c0); ($c0)=&NR(1);
($c0,$c1,$c2)=($c1,$c2,$c0);
&mov("zero",$c2);
&mul_add_c($a[0],$b[7],$c0,$c1,$c2); &FR($a[0]);
&mul_add_c($a[1],$b[6],$c0,$c1,$c2);
&mul_add_c($a[2],$b[5],$c0,$c1,$c2);
&mul_add_c($a[3],$b[4],$c0,$c1,$c2);
&mul_add_c($a[4],$b[3],$c0,$c1,$c2);
&mul_add_c($a[5],$b[2],$c0,$c1,$c2);
&mul_add_c($a[6],$b[1],$c0,$c1,$c2);
&mul_add_c($a[7],$b[0],$c0,$c1,$c2); &FR($b[0]);
&st($c0,&QWPw(7,$rp)); &FR($c0); ($c0)=&NR(1);
($c0,$c1,$c2)=($c1,$c2,$c0);
&mov("zero",$c2);
&mul_add_c($a[1],$b[7],$c0,$c1,$c2); &FR($a[1]);
&mul_add_c($a[2],$b[6],$c0,$c1,$c2);
&mul_add_c($a[3],$b[5],$c0,$c1,$c2);
&mul_add_c($a[4],$b[4],$c0,$c1,$c2);
&mul_add_c($a[5],$b[3],$c0,$c1,$c2);
&mul_add_c($a[6],$b[2],$c0,$c1,$c2);
&mul_add_c($a[7],$b[1],$c0,$c1,$c2); &FR($b[1]);
&st($c0,&QWPw(8,$rp)); &FR($c0); ($c0)=&NR(1);
($c0,$c1,$c2)=($c1,$c2,$c0);
&mov("zero",$c2);
&mul_add_c($a[2],$b[7],$c0,$c1,$c2); &FR($a[2]);
&mul_add_c($a[3],$b[6],$c0,$c1,$c2);
&mul_add_c($a[4],$b[5],$c0,$c1,$c2);
&mul_add_c($a[5],$b[4],$c0,$c1,$c2);
&mul_add_c($a[6],$b[3],$c0,$c1,$c2);
&mul_add_c($a[7],$b[2],$c0,$c1,$c2); &FR($b[2]);
&st($c0,&QWPw(9,$rp)); &FR($c0); ($c0)=&NR(1);
($c0,$c1,$c2)=($c1,$c2,$c0);
&mov("zero",$c2);
&mul_add_c($a[3],$b[7],$c0,$c1,$c2); &FR($a[3]);
&mul_add_c($a[4],$b[6],$c0,$c1,$c2);
&mul_add_c($a[5],$b[5],$c0,$c1,$c2);
&mul_add_c($a[6],$b[4],$c0,$c1,$c2);
&mul_add_c($a[7],$b[3],$c0,$c1,$c2); &FR($b[3]);
&st($c0,&QWPw(10,$rp)); &FR($c0); ($c0)=&NR(1);
($c0,$c1,$c2)=($c1,$c2,$c0);
&mov("zero",$c2);
&mul_add_c($a[4],$b[7],$c0,$c1,$c2); &FR($a[4]);
&mul_add_c($a[5],$b[6],$c0,$c1,$c2);
&mul_add_c($a[6],$b[5],$c0,$c1,$c2);
&mul_add_c($a[7],$b[4],$c0,$c1,$c2); &FR($b[4]);
&st($c0,&QWPw(11,$rp)); &FR($c0); ($c0)=&NR(1);
($c0,$c1,$c2)=($c1,$c2,$c0);
&mov("zero",$c2);
&mul_add_c($a[5],$b[7],$c0,$c1,$c2); &FR($a[5]);
&mul_add_c($a[6],$b[6],$c0,$c1,$c2);
&mul_add_c($a[7],$b[5],$c0,$c1,$c2); &FR($b[5]);
&st($c0,&QWPw(12,$rp)); &FR($c0); ($c0)=&NR(1);
($c0,$c1,$c2)=($c1,$c2,$c0);
&mov("zero",$c2);
&mul_add_c($a[6],$b[7],$c0,$c1,$c2); &FR($a[6]);
&mul_add_c($a[7],$b[6],$c0,$c1,$c2); &FR($b[6]);
&st($c0,&QWPw(13,$rp)); &FR($c0); ($c0)=&NR(1);
($c0,$c1,$c2)=($c1,$c2,$c0);
&mov("zero",$c2);
&mul_add_c($a[7],$b[7],$c0,$c1,$c2); &FR($a[7],$b[7]);
&st($c0,&QWPw(14,$rp));
&st($c1,&QWPw(15,$rp));
&FR($c0,$c1,$c2);
&ld($reg_s0,&swtmp(0));
&ld($reg_s1,&swtmp(1));
&stack_pop(2);
&function_end($name);
&fin_pool;
}
1;
#!/usr/local/bin/perl
# alpha assember
sub bn_sqr_words
{
local($name)=@_;
local($cc,$a,$b,$r,$couny);
&init_pool(3);
($cc)=GR("r0");
$rp=&wparam(0);
$ap=&wparam(1);
$count=&wparam(2);
&function_begin($name,"");
&comment("");
&sub($count,4,$count);
&mov("zero",$cc);
&br(&label("finish"));
&blt($count,&label("finish"));
($a0,$r0)=&NR(2);
&ld($a0,&QWPw(0,$ap));
&ld($r0,&QWPw(0,$rp));
$a=<<'EOF';
##########################################################
&set_label("loop");
($a1)=&NR(1); &ld($a1,&QWPw(1,$ap));
($b1)=&NR(1); &ld($b1,&QWPw(1,$bp));
($a2)=&NR(1); &ld($a2,&QWPw(2,$ap));
($b2)=&NR(1); &ld($b2,&QWPw(2,$bp));
($a3)=&NR(1); &ld($a3,&QWPw(3,$ap));
($b3)=&NR(1); &ld($b3,&QWPw(3,$bp));
($o0,$t0)=&NR(2);
&add($a0,$b0,$o0);
&cmpult($o0,$b0,$t0);
&add($o0,$cc,$o0);
&cmpult($o0,$cc,$cc);
&add($cc,$t0,$cc); &FR($t0);
($t1,$o1)=&NR(2);
&add($a1,$b1,$o1); &FR($a1);
&cmpult($o1,$b1,$t1); &FR($b1);
&add($o1,$cc,$o1);
&cmpult($o1,$cc,$cc);
&add($cc,$t1,$cc); &FR($t1);
($t2,$o2)=&NR(2);
&add($a2,$b2,$o2); &FR($a2);
&cmpult($o2,$b2,$t2); &FR($b2);
&add($o2,$cc,$o2);
&cmpult($o2,$cc,$cc);
&add($cc,$t2,$cc); &FR($t2);
($t3,$o3)=&NR(2);
&add($a3,$b3,$o3); &FR($a3);
&cmpult($o3,$b3,$t3); &FR($b3);
&add($o3,$cc,$o3);
&cmpult($o3,$cc,$cc);
&add($cc,$t3,$cc); &FR($t3);
&st($o0,&QWPw(0,$rp)); &FR($o0);
&st($o1,&QWPw(0,$rp)); &FR($o1);
&st($o2,&QWPw(0,$rp)); &FR($o2);
&st($o3,&QWPw(0,$rp)); &FR($o3);
&sub($count,4,$count); # count-=4
&add($ap,4*$QWS,$ap); # count+=4
&add($bp,4*$QWS,$bp); # count+=4
&add($rp,4*$QWS,$rp); # count+=4
&blt($count,&label("finish"));
&ld($a0,&QWPw(0,$ap));
&ld($b0,&QWPw(0,$bp));
&br(&label("loop"));
EOF
##################################################
# Do the last 0..3 words
&set_label("last_loop");
&ld(($a0)=&NR(1),&QWPw(0,$ap)); # get a
&mul($a0,$a0,($l0)=&NR(1));
&add($ap,$QWS,$ap);
&add($rp,2*$QWS,$rp);
&sub($count,1,$count);
&muh($a0,$a0,($h0)=&NR(1)); &FR($a0);
&st($l0,&QWPw(-2,$rp)); &FR($l0);
&st($h0,&QWPw(-1,$rp)); &FR($h0);
&bgt($count,&label("last_loop"));
&function_end_A($name);
######################################################
&set_label("finish");
&add($count,4,$count);
&bgt($count,&label("last_loop"));
&set_label("end");
&function_end($name);
&fin_pool;
}
1;
#!/usr/local/bin/perl
# alpha assember
sub sqr_add_c
{
local($a,$c0,$c1,$c2)=@_;
local($l1,$h1,$t1,$t2);
&mul($a,$a,($l1)=&NR(1));
&muh($a,$a,($h1)=&NR(1));
&add($c0,$l1,$c0);
&add($c1,$h1,$c1);
&cmpult($c0,$l1,($t1)=&NR(1)); &FR($l1);
&cmpult($c1,$h1,($t2)=&NR(1)); &FR($h1);
&add($c1,$t1,$c1); &FR($t1);
&add($c2,$t2,$c2); &FR($t2);
}
sub sqr_add_c2
{
local($a,$b,$c0,$c1,$c2)=@_;
local($l1,$h1,$t1,$t2);
&mul($a,$b,($l1)=&NR(1));
&muh($a,$b,($h1)=&NR(1));
&cmplt($l1,"zero",($lc1)=&NR(1));
&cmplt($h1,"zero",($hc1)=&NR(1));
&add($l1,$l1,$l1);
&add($h1,$h1,$h1);
&add($h1,$lc1,$h1); &FR($lc1);
&add($c2,$hc1,$c2); &FR($hc1);
&add($c0,$l1,$c0);
&add($c1,$h1,$c1);
&cmpult($c0,$l1,($lc1)=&NR(1)); &FR($l1);
&cmpult($c1,$h1,($hc1)=&NR(1)); &FR($h1);
&add($c1,$lc1,$c1); &FR($lc1);
&add($c2,$hc1,$c2); &FR($hc1);
}
sub bn_sqr_comba4
{
local($name)=@_;
local(@a,@b,$r,$c0,$c1,$c2);
$cnt=1;
&init_pool(2);
$rp=&wparam(0);
$ap=&wparam(1);
&function_begin($name,"");
&comment("");
&ld(($a[0])=&NR(1),&QWPw(0,$ap));
&ld(($a[1])=&NR(1),&QWPw(1,$ap));
&ld(($a[2])=&NR(1),&QWPw(2,$ap));
&ld(($a[3])=&NR(1),&QWPw(3,$ap)); &FR($ap);
($c0,$c1,$c2)=&NR(3);
&mov("zero",$c2);
&mul($a[0],$a[0],$c0);
&muh($a[0],$a[0],$c1);
&st($c0,&QWPw(0,$rp));
($c0,$c1,$c2)=($c1,$c2,$c0);
&mov("zero",$c2);
&sqr_add_c2($a[0],$a[1],$c0,$c1,$c2);
&st($c0,&QWPw(1,$rp));
($c0,$c1,$c2)=($c1,$c2,$c0);
&mov("zero",$c2);
&sqr_add_c($a[1],$c0,$c1,$c2);
&sqr_add_c2($a[2],$a[0],$c0,$c1,$c2);
&st($c0,&QWPw(2,$rp));
($c0,$c1,$c2)=($c1,$c2,$c0);
&mov("zero",$c2);
&sqr_add_c2($a[3],$a[0],$c0,$c1,$c2);
&sqr_add_c2($a[2],$a[1],$c0,$c1,$c2);
&st($c0,&QWPw(3,$rp));
($c0,$c1,$c2)=($c1,$c2,$c0);
&mov("zero",$c2);
&sqr_add_c($a[2],$c0,$c1,$c2);
&sqr_add_c2($a[3],$a[1],$c0,$c1,$c2);
&st($c0,&QWPw(4,$rp));
($c0,$c1,$c2)=($c1,$c2,$c0);
&mov("zero",$c2);
&sqr_add_c2($a[3],$a[2],$c0,$c1,$c2);
&st($c0,&QWPw(5,$rp));
($c0,$c1,$c2)=($c1,$c2,$c0);
&mov("zero",$c2);
&sqr_add_c($a[3],$c0,$c1,$c2);
&st($c0,&QWPw(6,$rp));
&st($c1,&QWPw(7,$rp));
&function_end($name);
&fin_pool;
}
1;
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
#!/usr/local/bin/perl
# I have this in perl so I can use more usefull register names and then convert
# them into alpha registers.
#
push(@INC,"perlasm","../../perlasm");
require "alpha.pl";
require "alpha/mul_add.pl";
require "alpha/mul.pl";
require "alpha/sqr.pl";
require "alpha/add.pl";
require "alpha/sub.pl";
require "alpha/mul_c8.pl";
require "alpha/mul_c4.pl";
require "alpha/sqr_c4.pl";
require "alpha/sqr_c8.pl";
require "alpha/div.pl";
&asm_init($ARGV[0],$0);
&bn_mul_words("bn_mul_words");
&bn_sqr_words("bn_sqr_words");
&bn_mul_add_words("bn_mul_add_words");
&bn_add_words("bn_add_words");
&bn_sub_words("bn_sub_words");
&bn_div_words("bn_div_words");
&bn_mul_comba8("bn_mul_comba8");
&bn_mul_comba4("bn_mul_comba4");
&bn_sqr_comba4("bn_sqr_comba4");
&bn_sqr_comba8("bn_sqr_comba8");
&asm_finish();
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册