提交 8b71d354 编写于 作者: A Andy Polyakov

nasm fixes.

上级 760e3535
...@@ -340,7 +340,7 @@ sub bn_sqr_words ...@@ -340,7 +340,7 @@ sub bn_sqr_words
&movd("mm0",&DWP(0,$a)); # mm0 = a[i] &movd("mm0",&DWP(0,$a)); # mm0 = a[i]
&pmuludq("mm0","mm0"); # a[i] *= a[i] &pmuludq("mm0","mm0"); # a[i] *= a[i]
&lea($a,&DWP(4,$a)); # a++ &lea($a,&DWP(4,$a)); # a++
&movq(&DWP(0,$r),"mm0"); # r[i] = a[i]*a[i] &movq(&QWP(0,$r),"mm0"); # r[i] = a[i]*a[i]
&sub($c,1); &sub($c,1);
&lea($r,&DWP(8,$r)); # r += 2 &lea($r,&DWP(8,$r)); # r += 2
&jnz(&label("sqr_sse2_loop")); &jnz(&label("sqr_sse2_loop"));
......
...@@ -51,7 +51,7 @@ $_rp=&DWP(4*1,"esp"); ...@@ -51,7 +51,7 @@ $_rp=&DWP(4*1,"esp");
$_ap=&DWP(4*2,"esp"); $_ap=&DWP(4*2,"esp");
$_bp=&DWP(4*3,"esp"); $_bp=&DWP(4*3,"esp");
$_np=&DWP(4*4,"esp"); $_np=&DWP(4*4,"esp");
$_n0=&DWP(4*5,"esp"); $_n0=&DWP(4*5,"esp"); $_n0q=&QWP(4*5,"esp");
$_sp=&DWP(4*6,"esp"); $_sp=&DWP(4*6,"esp");
$_bpend=&DWP(4*7,"esp"); $_bpend=&DWP(4*7,"esp");
$frame=32; # size of above frame rounded up to 16n $frame=32; # size of above frame rounded up to 16n
...@@ -136,7 +136,7 @@ $mask="mm7"; ...@@ -136,7 +136,7 @@ $mask="mm7";
&movq ($acc0,$mul1); # I wish movd worked for &movq ($acc0,$mul1); # I wish movd worked for
&pand ($acc0,$mask); # inter-register transfers &pand ($acc0,$mask); # inter-register transfers
&pmuludq($mul1,$_n0); # *=n0 &pmuludq($mul1,$_n0q); # *=n0
&pmuludq($car1,$mul1); # "t[0]"*np[0]*n0 &pmuludq($car1,$mul1); # "t[0]"*np[0]*n0
&paddq ($car1,$acc0); &paddq ($car1,$acc0);
...@@ -181,7 +181,7 @@ $mask="mm7"; ...@@ -181,7 +181,7 @@ $mask="mm7";
&psrlq ($car1,32); &psrlq ($car1,32);
&paddq ($car1,$car0); &paddq ($car1,$car0);
&movq (&DWP($frame,"esp",$num,4),$car1); # tp[num].tp[num-1] &movq (&QWP($frame,"esp",$num,4),$car1); # tp[num].tp[num-1]
&inc ($i); # i++ &inc ($i); # i++
&set_label("outer"); &set_label("outer");
...@@ -198,7 +198,7 @@ $mask="mm7"; ...@@ -198,7 +198,7 @@ $mask="mm7";
&movq ($car0,$mul1); &movq ($car0,$mul1);
&pand ($acc0,$mask); &pand ($acc0,$mask);
&pmuludq($mul1,$_n0); # *=n0 &pmuludq($mul1,$_n0q); # *=n0
&pmuludq($car1,$mul1); &pmuludq($car1,$mul1);
&paddq ($car1,$acc0); &paddq ($car1,$acc0);
...@@ -250,7 +250,7 @@ $mask="mm7"; ...@@ -250,7 +250,7 @@ $mask="mm7";
&movd ($temp,&DWP($frame+4,"esp",$num,4)); # += tp[num] &movd ($temp,&DWP($frame+4,"esp",$num,4)); # += tp[num]
&paddq ($car1,$car0); &paddq ($car1,$car0);
&paddq ($car1,$temp); &paddq ($car1,$temp);
&movq (&DWP($frame,"esp",$num,4),$car1); # tp[num].tp[num-1] &movq (&QWP($frame,"esp",$num,4),$car1); # tp[num].tp[num-1]
&lea ($i,&DWP(1,$i)); # i++ &lea ($i,&DWP(1,$i)); # i++
&cmp ($i,$num); &cmp ($i,$num);
......
...@@ -81,7 +81,7 @@ sub get_mem ...@@ -81,7 +81,7 @@ sub get_mem
} }
sub ::BP { &get_mem("BYTE",@_); } sub ::BP { &get_mem("BYTE",@_); }
sub ::DWP { &get_mem("DWORD",@_); } sub ::DWP { &get_mem("DWORD",@_); }
sub ::QWP { &get_mem("QWORD",@_); } sub ::QWP { &get_mem("",@_); }
sub ::BC { (($::mwerks)?"":"BYTE ")."@_"; } sub ::BC { (($::mwerks)?"":"BYTE ")."@_"; }
sub ::DWC { (($::mwerks)?"":"DWORD ")."@_"; } sub ::DWC { (($::mwerks)?"":"DWORD ")."@_"; }
...@@ -160,6 +160,7 @@ dd ${lprfx}OPENSSL_ia32cap_init ...@@ -160,6 +160,7 @@ dd ${lprfx}OPENSSL_ia32cap_init
segment .bss segment .bss
common ${under}OPENSSL_ia32cap_P 4 common ${under}OPENSSL_ia32cap_P 4
___ ___
grep {s/(^extern\s+${under}OPENSSL_ia32cap_P)/\;$1/} @out;
push (@out,$tmp); push (@out,$tmp);
} }
} }
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册