提交 76e624a0 编写于 作者: A Andy Polyakov

bn/asm/x86_64*: add DWARF CFI directives.

Reviewed-by: NRich Salz <rsalz@openssl.org>
上级 a3b5684f
...@@ -168,13 +168,21 @@ $code.=<<___; ...@@ -168,13 +168,21 @@ $code.=<<___;
.type rsaz_1024_sqr_avx2,\@function,5 .type rsaz_1024_sqr_avx2,\@function,5
.align 64 .align 64
rsaz_1024_sqr_avx2: # 702 cycles, 14% faster than rsaz_1024_mul_avx2 rsaz_1024_sqr_avx2: # 702 cycles, 14% faster than rsaz_1024_mul_avx2
.cfi_startproc
lea (%rsp), %rax lea (%rsp), %rax
.cfi_def_cfa_register %rax
push %rbx push %rbx
.cfi_push %rbx
push %rbp push %rbp
.cfi_push %rbp
push %r12 push %r12
.cfi_push %r12
push %r13 push %r13
.cfi_push %r13
push %r14 push %r14
.cfi_push %r14
push %r15 push %r15
.cfi_push %r15
vzeroupper vzeroupper
___ ___
$code.=<<___ if ($win64); $code.=<<___ if ($win64);
...@@ -193,6 +201,7 @@ $code.=<<___ if ($win64); ...@@ -193,6 +201,7 @@ $code.=<<___ if ($win64);
___ ___
$code.=<<___; $code.=<<___;
mov %rax,%rbp mov %rax,%rbp
.cfi_def_cfa_register %rbp
mov %rdx, $np # reassigned argument mov %rdx, $np # reassigned argument
sub \$$FrameSize, %rsp sub \$$FrameSize, %rsp
mov $np, $tmp mov $np, $tmp
...@@ -825,6 +834,7 @@ $code.=<<___; ...@@ -825,6 +834,7 @@ $code.=<<___;
vzeroall vzeroall
mov %rbp, %rax mov %rbp, %rax
.cfi_def_cfa_register %rax
___ ___
$code.=<<___ if ($win64); $code.=<<___ if ($win64);
.Lsqr_1024_in_tail: .Lsqr_1024_in_tail:
...@@ -841,14 +851,22 @@ $code.=<<___ if ($win64); ...@@ -841,14 +851,22 @@ $code.=<<___ if ($win64);
___ ___
$code.=<<___; $code.=<<___;
mov -48(%rax),%r15 mov -48(%rax),%r15
.cfi_restore %r15
mov -40(%rax),%r14 mov -40(%rax),%r14
.cfi_restore %r14
mov -32(%rax),%r13 mov -32(%rax),%r13
.cfi_restore %r13
mov -24(%rax),%r12 mov -24(%rax),%r12
.cfi_restore %r12
mov -16(%rax),%rbp mov -16(%rax),%rbp
.cfi_restore %rbp
mov -8(%rax),%rbx mov -8(%rax),%rbx
.cfi_restore %rbx
lea (%rax),%rsp # restore %rsp lea (%rax),%rsp # restore %rsp
.cfi_def_cfa_register %rsp
.Lsqr_1024_epilogue: .Lsqr_1024_epilogue:
ret ret
.cfi_endproc
.size rsaz_1024_sqr_avx2,.-rsaz_1024_sqr_avx2 .size rsaz_1024_sqr_avx2,.-rsaz_1024_sqr_avx2
___ ___
} }
...@@ -901,13 +919,21 @@ $code.=<<___; ...@@ -901,13 +919,21 @@ $code.=<<___;
.type rsaz_1024_mul_avx2,\@function,5 .type rsaz_1024_mul_avx2,\@function,5
.align 64 .align 64
rsaz_1024_mul_avx2: rsaz_1024_mul_avx2:
.cfi_startproc
lea (%rsp), %rax lea (%rsp), %rax
.cfi_def_cfa_register %rax
push %rbx push %rbx
.cfi_push %rbx
push %rbp push %rbp
.cfi_push %rbp
push %r12 push %r12
.cfi_push %r12
push %r13 push %r13
.cfi_push %r13
push %r14 push %r14
.cfi_push %r14
push %r15 push %r15
.cfi_push %r15
___ ___
$code.=<<___ if ($win64); $code.=<<___ if ($win64);
vzeroupper vzeroupper
...@@ -926,6 +952,7 @@ $code.=<<___ if ($win64); ...@@ -926,6 +952,7 @@ $code.=<<___ if ($win64);
___ ___
$code.=<<___; $code.=<<___;
mov %rax,%rbp mov %rax,%rbp
.cfi_def_cfa_register %rbp
vzeroall vzeroall
mov %rdx, $bp # reassigned argument mov %rdx, $bp # reassigned argument
sub \$64,%rsp sub \$64,%rsp
...@@ -1459,6 +1486,7 @@ $code.=<<___; ...@@ -1459,6 +1486,7 @@ $code.=<<___;
vzeroupper vzeroupper
mov %rbp, %rax mov %rbp, %rax
.cfi_def_cfa_register %rax
___ ___
$code.=<<___ if ($win64); $code.=<<___ if ($win64);
.Lmul_1024_in_tail: .Lmul_1024_in_tail:
...@@ -1475,14 +1503,22 @@ $code.=<<___ if ($win64); ...@@ -1475,14 +1503,22 @@ $code.=<<___ if ($win64);
___ ___
$code.=<<___; $code.=<<___;
mov -48(%rax),%r15 mov -48(%rax),%r15
.cfi_restore %r15
mov -40(%rax),%r14 mov -40(%rax),%r14
.cfi_restore %r14
mov -32(%rax),%r13 mov -32(%rax),%r13
.cfi_restore %r13
mov -24(%rax),%r12 mov -24(%rax),%r12
.cfi_restore %r12
mov -16(%rax),%rbp mov -16(%rax),%rbp
.cfi_restore %rbp
mov -8(%rax),%rbx mov -8(%rax),%rbx
.cfi_restore %rbx
lea (%rax),%rsp # restore %rsp lea (%rax),%rsp # restore %rsp
.cfi_def_cfa_register %rsp
.Lmul_1024_epilogue: .Lmul_1024_epilogue:
ret ret
.cfi_endproc
.size rsaz_1024_mul_avx2,.-rsaz_1024_mul_avx2 .size rsaz_1024_mul_avx2,.-rsaz_1024_mul_avx2
___ ___
} }
...@@ -1601,8 +1637,10 @@ rsaz_1024_scatter5_avx2: ...@@ -1601,8 +1637,10 @@ rsaz_1024_scatter5_avx2:
.type rsaz_1024_gather5_avx2,\@abi-omnipotent .type rsaz_1024_gather5_avx2,\@abi-omnipotent
.align 32 .align 32
rsaz_1024_gather5_avx2: rsaz_1024_gather5_avx2:
.cfi_startproc
vzeroupper vzeroupper
mov %rsp,%r11 mov %rsp,%r11
.cfi_def_cfa_register %r11
___ ___
$code.=<<___ if ($win64); $code.=<<___ if ($win64);
lea -0x88(%rsp),%rax lea -0x88(%rsp),%rax
...@@ -1743,7 +1781,9 @@ $code.=<<___ if ($win64); ...@@ -1743,7 +1781,9 @@ $code.=<<___ if ($win64);
___ ___
$code.=<<___; $code.=<<___;
lea (%r11),%rsp lea (%r11),%rsp
.cfi_def_cfa_register %rsp
ret ret
.cfi_endproc
.LSEH_end_rsaz_1024_gather5: .LSEH_end_rsaz_1024_gather5:
.size rsaz_1024_gather5_avx2,.-rsaz_1024_gather5_avx2 .size rsaz_1024_gather5_avx2,.-rsaz_1024_gather5_avx2
___ ___
......
...@@ -138,14 +138,22 @@ $code.=<<___; ...@@ -138,14 +138,22 @@ $code.=<<___;
.type rsaz_512_sqr,\@function,5 .type rsaz_512_sqr,\@function,5
.align 32 .align 32
rsaz_512_sqr: # 25-29% faster than rsaz_512_mul rsaz_512_sqr: # 25-29% faster than rsaz_512_mul
.cfi_startproc
push %rbx push %rbx
.cfi_push %rbx
push %rbp push %rbp
.cfi_push %rbp
push %r12 push %r12
.cfi_push %r12
push %r13 push %r13
.cfi_push %r13
push %r14 push %r14
.cfi_push %r14
push %r15 push %r15
.cfi_push %r15
subq \$128+24, %rsp subq \$128+24, %rsp
.cfi_adjust_cfa_offset 128+24
.Lsqr_body: .Lsqr_body:
movq $mod, %rbp # common argument movq $mod, %rbp # common argument
movq ($inp), %rdx movq ($inp), %rdx
...@@ -800,15 +808,24 @@ ___ ...@@ -800,15 +808,24 @@ ___
$code.=<<___; $code.=<<___;
leaq 128+24+48(%rsp), %rax leaq 128+24+48(%rsp), %rax
.cfi_def_cfa %rax,8
movq -48(%rax), %r15 movq -48(%rax), %r15
.cfi_restore %r15
movq -40(%rax), %r14 movq -40(%rax), %r14
.cfi_restore %r14
movq -32(%rax), %r13 movq -32(%rax), %r13
.cfi_restore %r13
movq -24(%rax), %r12 movq -24(%rax), %r12
.cfi_restore %r12
movq -16(%rax), %rbp movq -16(%rax), %rbp
.cfi_restore %rbp
movq -8(%rax), %rbx movq -8(%rax), %rbx
.cfi_restore %rbx
leaq (%rax), %rsp leaq (%rax), %rsp
.cfi_def_cfa_register %rsp
.Lsqr_epilogue: .Lsqr_epilogue:
ret ret
.cfi_endproc
.size rsaz_512_sqr,.-rsaz_512_sqr .size rsaz_512_sqr,.-rsaz_512_sqr
___ ___
} }
...@@ -819,14 +836,22 @@ $code.=<<___; ...@@ -819,14 +836,22 @@ $code.=<<___;
.type rsaz_512_mul,\@function,5 .type rsaz_512_mul,\@function,5
.align 32 .align 32
rsaz_512_mul: rsaz_512_mul:
.cfi_startproc
push %rbx push %rbx
.cfi_push %rbx
push %rbp push %rbp
.cfi_push %rbp
push %r12 push %r12
.cfi_push %r12
push %r13 push %r13
.cfi_push %r13
push %r14 push %r14
.cfi_push %r14
push %r15 push %r15
.cfi_push %r15
subq \$128+24, %rsp subq \$128+24, %rsp
.cfi_adjust_cfa_offset 128+24
.Lmul_body: .Lmul_body:
movq $out, %xmm0 # off-load arguments movq $out, %xmm0 # off-load arguments
movq $mod, %xmm1 movq $mod, %xmm1
...@@ -896,15 +921,24 @@ $code.=<<___; ...@@ -896,15 +921,24 @@ $code.=<<___;
call __rsaz_512_subtract call __rsaz_512_subtract
leaq 128+24+48(%rsp), %rax leaq 128+24+48(%rsp), %rax
.cfi_def_cfa %rax,8
movq -48(%rax), %r15 movq -48(%rax), %r15
.cfi_restore %r15
movq -40(%rax), %r14 movq -40(%rax), %r14
.cfi_restore %r14
movq -32(%rax), %r13 movq -32(%rax), %r13
.cfi_restore %r13
movq -24(%rax), %r12 movq -24(%rax), %r12
.cfi_restore %r12
movq -16(%rax), %rbp movq -16(%rax), %rbp
.cfi_restore %rbp
movq -8(%rax), %rbx movq -8(%rax), %rbx
.cfi_restore %rbx
leaq (%rax), %rsp leaq (%rax), %rsp
.cfi_def_cfa_register %rsp
.Lmul_epilogue: .Lmul_epilogue:
ret ret
.cfi_endproc
.size rsaz_512_mul,.-rsaz_512_mul .size rsaz_512_mul,.-rsaz_512_mul
___ ___
} }
...@@ -915,14 +949,22 @@ $code.=<<___; ...@@ -915,14 +949,22 @@ $code.=<<___;
.type rsaz_512_mul_gather4,\@function,6 .type rsaz_512_mul_gather4,\@function,6
.align 32 .align 32
rsaz_512_mul_gather4: rsaz_512_mul_gather4:
.cfi_startproc
push %rbx push %rbx
.cfi_push %rbx
push %rbp push %rbp
.cfi_push %rbp
push %r12 push %r12
.cfi_push %r12
push %r13 push %r13
.cfi_push %r13
push %r14 push %r14
.cfi_push %r14
push %r15 push %r15
.cfi_push %r15
subq \$`128+24+($win64?0xb0:0)`, %rsp subq \$`128+24+($win64?0xb0:0)`, %rsp
.cfi_adjust_cfa_offset `128+24+($win64?0xb0:0)`
___ ___
$code.=<<___ if ($win64); $code.=<<___ if ($win64);
movaps %xmm6,0xa0(%rsp) movaps %xmm6,0xa0(%rsp)
...@@ -1348,15 +1390,24 @@ $code.=<<___ if ($win64); ...@@ -1348,15 +1390,24 @@ $code.=<<___ if ($win64);
lea 0xb0(%rax),%rax lea 0xb0(%rax),%rax
___ ___
$code.=<<___; $code.=<<___;
.cfi_def_cfa %rax,8
movq -48(%rax), %r15 movq -48(%rax), %r15
.cfi_restore %r15
movq -40(%rax), %r14 movq -40(%rax), %r14
.cfi_restore %r14
movq -32(%rax), %r13 movq -32(%rax), %r13
.cfi_restore %r13
movq -24(%rax), %r12 movq -24(%rax), %r12
.cfi_restore %r12
movq -16(%rax), %rbp movq -16(%rax), %rbp
.cfi_restore %rbp
movq -8(%rax), %rbx movq -8(%rax), %rbx
.cfi_restore %rbx
leaq (%rax), %rsp leaq (%rax), %rsp
.cfi_def_cfa_register %rsp
.Lmul_gather4_epilogue: .Lmul_gather4_epilogue:
ret ret
.cfi_endproc
.size rsaz_512_mul_gather4,.-rsaz_512_mul_gather4 .size rsaz_512_mul_gather4,.-rsaz_512_mul_gather4
___ ___
} }
...@@ -1367,15 +1418,23 @@ $code.=<<___; ...@@ -1367,15 +1418,23 @@ $code.=<<___;
.type rsaz_512_mul_scatter4,\@function,6 .type rsaz_512_mul_scatter4,\@function,6
.align 32 .align 32
rsaz_512_mul_scatter4: rsaz_512_mul_scatter4:
.cfi_startproc
push %rbx push %rbx
.cfi_push %rbx
push %rbp push %rbp
.cfi_push %rbp
push %r12 push %r12
.cfi_push %r12
push %r13 push %r13
.cfi_push %r13
push %r14 push %r14
.cfi_push %r14
push %r15 push %r15
.cfi_push %r15
mov $pwr, $pwr mov $pwr, $pwr
subq \$128+24, %rsp subq \$128+24, %rsp
.cfi_adjust_cfa_offset 128+24
.Lmul_scatter4_body: .Lmul_scatter4_body:
leaq ($tbl,$pwr,8), $tbl leaq ($tbl,$pwr,8), $tbl
movq $out, %xmm0 # off-load arguments movq $out, %xmm0 # off-load arguments
...@@ -1458,15 +1517,24 @@ $code.=<<___; ...@@ -1458,15 +1517,24 @@ $code.=<<___;
movq %r15, 128*7($inp) movq %r15, 128*7($inp)
leaq 128+24+48(%rsp), %rax leaq 128+24+48(%rsp), %rax
.cfi_def_cfa %rax,8
movq -48(%rax), %r15 movq -48(%rax), %r15
.cfi_restore %r15
movq -40(%rax), %r14 movq -40(%rax), %r14
.cfi_restore %r14
movq -32(%rax), %r13 movq -32(%rax), %r13
.cfi_restore %r13
movq -24(%rax), %r12 movq -24(%rax), %r12
.cfi_restore %r12
movq -16(%rax), %rbp movq -16(%rax), %rbp
.cfi_restore %rbp
movq -8(%rax), %rbx movq -8(%rax), %rbx
.cfi_restore %rbx
leaq (%rax), %rsp leaq (%rax), %rsp
.cfi_def_cfa_register %rsp
.Lmul_scatter4_epilogue: .Lmul_scatter4_epilogue:
ret ret
.cfi_endproc
.size rsaz_512_mul_scatter4,.-rsaz_512_mul_scatter4 .size rsaz_512_mul_scatter4,.-rsaz_512_mul_scatter4
___ ___
} }
...@@ -1477,14 +1545,22 @@ $code.=<<___; ...@@ -1477,14 +1545,22 @@ $code.=<<___;
.type rsaz_512_mul_by_one,\@function,4 .type rsaz_512_mul_by_one,\@function,4
.align 32 .align 32
rsaz_512_mul_by_one: rsaz_512_mul_by_one:
.cfi_startproc
push %rbx push %rbx
.cfi_push %rbx
push %rbp push %rbp
.cfi_push %rbp
push %r12 push %r12
.cfi_push %r12
push %r13 push %r13
.cfi_push %r13
push %r14 push %r14
.cfi_push %r14
push %r15 push %r15
.cfi_push %r15
subq \$128+24, %rsp subq \$128+24, %rsp
.cfi_adjust_cfa_offset 128+24
.Lmul_by_one_body: .Lmul_by_one_body:
___ ___
$code.=<<___ if ($addx); $code.=<<___ if ($addx);
...@@ -1539,15 +1615,24 @@ $code.=<<___; ...@@ -1539,15 +1615,24 @@ $code.=<<___;
movq %r15, 56($out) movq %r15, 56($out)
leaq 128+24+48(%rsp), %rax leaq 128+24+48(%rsp), %rax
.cfi_def_cfa %rax,8
movq -48(%rax), %r15 movq -48(%rax), %r15
.cfi_restore %r15
movq -40(%rax), %r14 movq -40(%rax), %r14
.cfi_restore %r14
movq -32(%rax), %r13 movq -32(%rax), %r13
.cfi_restore %r13
movq -24(%rax), %r12 movq -24(%rax), %r12
.cfi_restore %r12
movq -16(%rax), %rbp movq -16(%rax), %rbp
.cfi_restore %rbp
movq -8(%rax), %rbx movq -8(%rax), %rbx
.cfi_restore %rbx
leaq (%rax), %rsp leaq (%rax), %rsp
.cfi_def_cfa_register %rsp
.Lmul_by_one_epilogue: .Lmul_by_one_epilogue:
ret ret
.cfi_endproc
.size rsaz_512_mul_by_one,.-rsaz_512_mul_by_one .size rsaz_512_mul_by_one,.-rsaz_512_mul_by_one
___ ___
} }
......
...@@ -54,7 +54,9 @@ $code.=<<___; ...@@ -54,7 +54,9 @@ $code.=<<___;
.type _mul_1x1,\@abi-omnipotent .type _mul_1x1,\@abi-omnipotent
.align 16 .align 16
_mul_1x1: _mul_1x1:
.cfi_startproc
sub \$128+8,%rsp sub \$128+8,%rsp
.cfi_adjust_cfa_offset 128+8
mov \$-1,$a1 mov \$-1,$a1
lea ($a,$a),$i0 lea ($a,$a),$i0
shr \$3,$a1 shr \$3,$a1
...@@ -160,8 +162,10 @@ $code.=<<___; ...@@ -160,8 +162,10 @@ $code.=<<___;
xor $i1,$hi xor $i1,$hi
add \$128+8,%rsp add \$128+8,%rsp
.cfi_adjust_cfa_offset -128-8
ret ret
.Lend_mul_1x1: .Lend_mul_1x1:
.cfi_endproc
.size _mul_1x1,.-_mul_1x1 .size _mul_1x1,.-_mul_1x1
___ ___
...@@ -174,6 +178,7 @@ $code.=<<___; ...@@ -174,6 +178,7 @@ $code.=<<___;
.type bn_GF2m_mul_2x2,\@abi-omnipotent .type bn_GF2m_mul_2x2,\@abi-omnipotent
.align 16 .align 16
bn_GF2m_mul_2x2: bn_GF2m_mul_2x2:
.cfi_startproc
mov %rsp,%rax mov %rsp,%rax
mov OPENSSL_ia32cap_P(%rip),%r10 mov OPENSSL_ia32cap_P(%rip),%r10
bt \$33,%r10 bt \$33,%r10
...@@ -211,6 +216,7 @@ $code.=<<___; ...@@ -211,6 +216,7 @@ $code.=<<___;
.align 16 .align 16
.Lvanilla_mul_2x2: .Lvanilla_mul_2x2:
lea -8*17(%rsp),%rsp lea -8*17(%rsp),%rsp
.cfi_adjust_cfa_offset 8*17
___ ___
$code.=<<___ if ($win64); $code.=<<___ if ($win64);
mov `8*17+40`(%rsp),$b0 mov `8*17+40`(%rsp),$b0
...@@ -219,10 +225,15 @@ $code.=<<___ if ($win64); ...@@ -219,10 +225,15 @@ $code.=<<___ if ($win64);
___ ___
$code.=<<___; $code.=<<___;
mov %r14,8*10(%rsp) mov %r14,8*10(%rsp)
.cfi_rel_offset %r14,8*10
mov %r13,8*11(%rsp) mov %r13,8*11(%rsp)
.cfi_rel_offset %r13,8*11
mov %r12,8*12(%rsp) mov %r12,8*12(%rsp)
.cfi_rel_offset %r12,8*12
mov %rbp,8*13(%rsp) mov %rbp,8*13(%rsp)
.cfi_rel_offset %rbp,8*13
mov %rbx,8*14(%rsp) mov %rbx,8*14(%rsp)
.cfi_rel_offset %rbx,8*14
.Lbody_mul_2x2: .Lbody_mul_2x2:
mov $rp,32(%rsp) # save the arguments mov $rp,32(%rsp) # save the arguments
mov $a1,40(%rsp) mov $a1,40(%rsp)
...@@ -270,10 +281,15 @@ $code.=<<___; ...@@ -270,10 +281,15 @@ $code.=<<___;
mov $lo,8(%rbp) mov $lo,8(%rbp)
mov 8*10(%rsp),%r14 mov 8*10(%rsp),%r14
.cfi_restore %r14
mov 8*11(%rsp),%r13 mov 8*11(%rsp),%r13
.cfi_restore %r13
mov 8*12(%rsp),%r12 mov 8*12(%rsp),%r12
.cfi_restore %r12
mov 8*13(%rsp),%rbp mov 8*13(%rsp),%rbp
.cfi_restore %rbp
mov 8*14(%rsp),%rbx mov 8*14(%rsp),%rbx
.cfi_restore %rbx
___ ___
$code.=<<___ if ($win64); $code.=<<___ if ($win64);
mov 8*15(%rsp),%rdi mov 8*15(%rsp),%rdi
...@@ -281,9 +297,11 @@ $code.=<<___ if ($win64); ...@@ -281,9 +297,11 @@ $code.=<<___ if ($win64);
___ ___
$code.=<<___; $code.=<<___;
lea 8*17(%rsp),%rsp lea 8*17(%rsp),%rsp
.cfi_adjust_cfa_offset -8*17
.Lepilogue_mul_2x2: .Lepilogue_mul_2x2:
ret ret
.Lend_mul_2x2: .Lend_mul_2x2:
.cfi_endproc
.size bn_GF2m_mul_2x2,.-bn_GF2m_mul_2x2 .size bn_GF2m_mul_2x2,.-bn_GF2m_mul_2x2
.asciz "GF(2^m) Multiplication for x86_64, CRYPTOGAMS by <appro\@openssl.org>" .asciz "GF(2^m) Multiplication for x86_64, CRYPTOGAMS by <appro\@openssl.org>"
.align 16 .align 16
......
...@@ -104,8 +104,10 @@ $code=<<___; ...@@ -104,8 +104,10 @@ $code=<<___;
.type bn_mul_mont,\@function,6 .type bn_mul_mont,\@function,6
.align 16 .align 16
bn_mul_mont: bn_mul_mont:
.cfi_startproc
mov ${num}d,${num}d mov ${num}d,${num}d
mov %rsp,%rax mov %rsp,%rax
.cfi_def_cfa_register %rax
test \$3,${num}d test \$3,${num}d
jnz .Lmul_enter jnz .Lmul_enter
cmp \$8,${num}d cmp \$8,${num}d
...@@ -124,11 +126,17 @@ $code.=<<___; ...@@ -124,11 +126,17 @@ $code.=<<___;
.align 16 .align 16
.Lmul_enter: .Lmul_enter:
push %rbx push %rbx
.cfi_push %rbx
push %rbp push %rbp
.cfi_push %rbp
push %r12 push %r12
.cfi_push %r12
push %r13 push %r13
.cfi_push %r13
push %r14 push %r14
.cfi_push %r14
push %r15 push %r15
.cfi_push %r15
neg $num neg $num
mov %rsp,%r11 mov %rsp,%r11
...@@ -161,6 +169,7 @@ $code.=<<___; ...@@ -161,6 +169,7 @@ $code.=<<___;
.Lmul_page_walk_done: .Lmul_page_walk_done:
mov %rax,8(%rsp,$num,8) # tp[num+1]=%rsp mov %rax,8(%rsp,$num,8) # tp[num+1]=%rsp
.cfi_cfa_expression %rsp+8,$num,8,mul,plus,deref,+8
.Lmul_body: .Lmul_body:
mov $bp,%r12 # reassign $bp mov $bp,%r12 # reassign $bp
___ ___
...@@ -331,16 +340,25 @@ $code.=<<___; ...@@ -331,16 +340,25 @@ $code.=<<___;
jnz .Lcopy jnz .Lcopy
mov 8(%rsp,$num,8),%rsi # restore %rsp mov 8(%rsp,$num,8),%rsi # restore %rsp
.cfi_def_cfa %rsi,8
mov \$1,%rax mov \$1,%rax
mov -48(%rsi),%r15 mov -48(%rsi),%r15
.cfi_restore %r15
mov -40(%rsi),%r14 mov -40(%rsi),%r14
.cfi_restore %r14
mov -32(%rsi),%r13 mov -32(%rsi),%r13
.cfi_restore %r13
mov -24(%rsi),%r12 mov -24(%rsi),%r12
.cfi_restore %r12
mov -16(%rsi),%rbp mov -16(%rsi),%rbp
.cfi_restore %rbp
mov -8(%rsi),%rbx mov -8(%rsi),%rbx
.cfi_restore %rbx
lea (%rsi),%rsp lea (%rsi),%rsp
.cfi_def_cfa_register %rsp
.Lmul_epilogue: .Lmul_epilogue:
ret ret
.cfi_endproc
.size bn_mul_mont,.-bn_mul_mont .size bn_mul_mont,.-bn_mul_mont
___ ___
{{{ {{{
...@@ -350,8 +368,10 @@ $code.=<<___; ...@@ -350,8 +368,10 @@ $code.=<<___;
.type bn_mul4x_mont,\@function,6 .type bn_mul4x_mont,\@function,6
.align 16 .align 16
bn_mul4x_mont: bn_mul4x_mont:
.cfi_startproc
mov ${num}d,${num}d mov ${num}d,${num}d
mov %rsp,%rax mov %rsp,%rax
.cfi_def_cfa_register %rax
.Lmul4x_enter: .Lmul4x_enter:
___ ___
$code.=<<___ if ($addx); $code.=<<___ if ($addx);
...@@ -361,11 +381,17 @@ $code.=<<___ if ($addx); ...@@ -361,11 +381,17 @@ $code.=<<___ if ($addx);
___ ___
$code.=<<___; $code.=<<___;
push %rbx push %rbx
.cfi_push %rbx
push %rbp push %rbp
.cfi_push %rbp
push %r12 push %r12
.cfi_push %r12
push %r13 push %r13
.cfi_push %r13
push %r14 push %r14
.cfi_push %r14
push %r15 push %r15
.cfi_push %r15
neg $num neg $num
mov %rsp,%r11 mov %rsp,%r11
...@@ -389,6 +415,7 @@ $code.=<<___; ...@@ -389,6 +415,7 @@ $code.=<<___;
.Lmul4x_page_walk_done: .Lmul4x_page_walk_done:
mov %rax,8(%rsp,$num,8) # tp[num+1]=%rsp mov %rax,8(%rsp,$num,8) # tp[num+1]=%rsp
.cfi_cfa_expression %rsp+8,$num,8,mul,plus,deref,+8
.Lmul4x_body: .Lmul4x_body:
mov $rp,16(%rsp,$num,8) # tp[num+2]=$rp mov $rp,16(%rsp,$num,8) # tp[num+2]=$rp
mov %rdx,%r12 # reassign $bp mov %rdx,%r12 # reassign $bp
...@@ -767,16 +794,25 @@ ___ ...@@ -767,16 +794,25 @@ ___
} }
$code.=<<___; $code.=<<___;
mov 8(%rsp,$num,8),%rsi # restore %rsp mov 8(%rsp,$num,8),%rsi # restore %rsp
.cfi_def_cfa %rsi, 8
mov \$1,%rax mov \$1,%rax
mov -48(%rsi),%r15 mov -48(%rsi),%r15
.cfi_restore %r15
mov -40(%rsi),%r14 mov -40(%rsi),%r14
.cfi_restore %r14
mov -32(%rsi),%r13 mov -32(%rsi),%r13
.cfi_restore %r13
mov -24(%rsi),%r12 mov -24(%rsi),%r12
.cfi_restore %r12
mov -16(%rsi),%rbp mov -16(%rsi),%rbp
.cfi_restore %rbp
mov -8(%rsi),%rbx mov -8(%rsi),%rbx
.cfi_restore %rbx
lea (%rsi),%rsp lea (%rsi),%rsp
.cfi_def_cfa_register %rsp
.Lmul4x_epilogue: .Lmul4x_epilogue:
ret ret
.cfi_endproc
.size bn_mul4x_mont,.-bn_mul4x_mont .size bn_mul4x_mont,.-bn_mul4x_mont
___ ___
}}} }}}
...@@ -804,14 +840,22 @@ $code.=<<___; ...@@ -804,14 +840,22 @@ $code.=<<___;
.type bn_sqr8x_mont,\@function,6 .type bn_sqr8x_mont,\@function,6
.align 32 .align 32
bn_sqr8x_mont: bn_sqr8x_mont:
.cfi_startproc
mov %rsp,%rax mov %rsp,%rax
.cfi_def_cfa_register %rax
.Lsqr8x_enter: .Lsqr8x_enter:
push %rbx push %rbx
.cfi_push %rbx
push %rbp push %rbp
.cfi_push %rbp
push %r12 push %r12
.cfi_push %r12
push %r13 push %r13
.cfi_push %r13
push %r14 push %r14
.cfi_push %r14
push %r15 push %r15
.cfi_push %r15
.Lsqr8x_prologue: .Lsqr8x_prologue:
mov ${num}d,%r10d mov ${num}d,%r10d
...@@ -867,6 +911,7 @@ bn_sqr8x_mont: ...@@ -867,6 +911,7 @@ bn_sqr8x_mont:
mov $n0, 32(%rsp) mov $n0, 32(%rsp)
mov %rax, 40(%rsp) # save original %rsp mov %rax, 40(%rsp) # save original %rsp
.cfi_cfa_expression %rsp+40,deref,+8
.Lsqr8x_body: .Lsqr8x_body:
movq $nptr, %xmm2 # save pointer to modulus movq $nptr, %xmm2 # save pointer to modulus
...@@ -936,6 +981,7 @@ $code.=<<___; ...@@ -936,6 +981,7 @@ $code.=<<___;
pxor %xmm0,%xmm0 pxor %xmm0,%xmm0
pshufd \$0,%xmm1,%xmm1 pshufd \$0,%xmm1,%xmm1
mov 40(%rsp),%rsi # restore %rsp mov 40(%rsp),%rsi # restore %rsp
.cfi_def_cfa %rsi,8
jmp .Lsqr8x_cond_copy jmp .Lsqr8x_cond_copy
.align 32 .align 32
...@@ -965,14 +1011,22 @@ $code.=<<___; ...@@ -965,14 +1011,22 @@ $code.=<<___;
mov \$1,%rax mov \$1,%rax
mov -48(%rsi),%r15 mov -48(%rsi),%r15
.cfi_restore %r15
mov -40(%rsi),%r14 mov -40(%rsi),%r14
.cfi_restore %r14
mov -32(%rsi),%r13 mov -32(%rsi),%r13
.cfi_restore %r13
mov -24(%rsi),%r12 mov -24(%rsi),%r12
.cfi_restore %r12
mov -16(%rsi),%rbp mov -16(%rsi),%rbp
.cfi_restore %rbp
mov -8(%rsi),%rbx mov -8(%rsi),%rbx
.cfi_restore %rbx
lea (%rsi),%rsp lea (%rsi),%rsp
.cfi_def_cfa_register %rsp
.Lsqr8x_epilogue: .Lsqr8x_epilogue:
ret ret
.cfi_endproc
.size bn_sqr8x_mont,.-bn_sqr8x_mont .size bn_sqr8x_mont,.-bn_sqr8x_mont
___ ___
}}} }}}
...@@ -984,14 +1038,22 @@ $code.=<<___; ...@@ -984,14 +1038,22 @@ $code.=<<___;
.type bn_mulx4x_mont,\@function,6 .type bn_mulx4x_mont,\@function,6
.align 32 .align 32
bn_mulx4x_mont: bn_mulx4x_mont:
.cfi_startproc
mov %rsp,%rax mov %rsp,%rax
.cfi_def_cfa_register %rax
.Lmulx4x_enter: .Lmulx4x_enter:
push %rbx push %rbx
.cfi_push %rbx
push %rbp push %rbp
.cfi_push %rbp
push %r12 push %r12
.cfi_push %r12
push %r13 push %r13
.cfi_push %r13
push %r14 push %r14
.cfi_push %r14
push %r15 push %r15
.cfi_push %r15
.Lmulx4x_prologue: .Lmulx4x_prologue:
shl \$3,${num}d # convert $num to bytes shl \$3,${num}d # convert $num to bytes
...@@ -1037,6 +1099,7 @@ bn_mulx4x_mont: ...@@ -1037,6 +1099,7 @@ bn_mulx4x_mont:
mov $n0, 24(%rsp) # save *n0 mov $n0, 24(%rsp) # save *n0
mov $rp, 32(%rsp) # save $rp mov $rp, 32(%rsp) # save $rp
mov %rax,40(%rsp) # save original %rsp mov %rax,40(%rsp) # save original %rsp
.cfi_cfa_expression %rsp+40,deref,+8
mov $num,48(%rsp) # inner counter mov $num,48(%rsp) # inner counter
jmp .Lmulx4x_body jmp .Lmulx4x_body
...@@ -1286,6 +1349,7 @@ $code.=<<___; ...@@ -1286,6 +1349,7 @@ $code.=<<___;
pxor %xmm0,%xmm0 pxor %xmm0,%xmm0
pshufd \$0,%xmm1,%xmm1 pshufd \$0,%xmm1,%xmm1
mov 40(%rsp),%rsi # restore %rsp mov 40(%rsp),%rsi # restore %rsp
.cfi_def_cfa %rsi,8
jmp .Lmulx4x_cond_copy jmp .Lmulx4x_cond_copy
.align 32 .align 32
...@@ -1315,14 +1379,22 @@ $code.=<<___; ...@@ -1315,14 +1379,22 @@ $code.=<<___;
mov \$1,%rax mov \$1,%rax
mov -48(%rsi),%r15 mov -48(%rsi),%r15
.cfi_restore %r15
mov -40(%rsi),%r14 mov -40(%rsi),%r14
.cfi_restore %r15
mov -32(%rsi),%r13 mov -32(%rsi),%r13
.cfi_restore %r15
mov -24(%rsi),%r12 mov -24(%rsi),%r12
.cfi_restore %r15
mov -16(%rsi),%rbp mov -16(%rsi),%rbp
.cfi_restore %r15
mov -8(%rsi),%rbx mov -8(%rsi),%rbx
.cfi_restore %r15
lea (%rsi),%rsp lea (%rsi),%rsp
.cfi_def_cfa_register %rsp
.Lmulx4x_epilogue: .Lmulx4x_epilogue:
ret ret
.cfi_endproc
.size bn_mulx4x_mont,.-bn_mulx4x_mont .size bn_mulx4x_mont,.-bn_mulx4x_mont
___ ___
}}} }}}
......
...@@ -93,8 +93,10 @@ $code=<<___; ...@@ -93,8 +93,10 @@ $code=<<___;
.type bn_mul_mont_gather5,\@function,6 .type bn_mul_mont_gather5,\@function,6
.align 64 .align 64
bn_mul_mont_gather5: bn_mul_mont_gather5:
.cfi_startproc
mov ${num}d,${num}d mov ${num}d,${num}d
mov %rsp,%rax mov %rsp,%rax
.cfi_def_cfa_register %rax
test \$7,${num}d test \$7,${num}d
jnz .Lmul_enter jnz .Lmul_enter
___ ___
...@@ -108,11 +110,17 @@ $code.=<<___; ...@@ -108,11 +110,17 @@ $code.=<<___;
.Lmul_enter: .Lmul_enter:
movd `($win64?56:8)`(%rsp),%xmm5 # load 7th argument movd `($win64?56:8)`(%rsp),%xmm5 # load 7th argument
push %rbx push %rbx
.cfi_push %rbx
push %rbp push %rbp
.cfi_push %rbp
push %r12 push %r12
.cfi_push %r12
push %r13 push %r13
.cfi_push %r13
push %r14 push %r14
.cfi_push %r14
push %r15 push %r15
.cfi_push %r14
neg $num neg $num
mov %rsp,%r11 mov %rsp,%r11
...@@ -145,6 +153,7 @@ $code.=<<___; ...@@ -145,6 +153,7 @@ $code.=<<___;
lea .Linc(%rip),%r10 lea .Linc(%rip),%r10
mov %rax,8(%rsp,$num,8) # tp[num+1]=%rsp mov %rax,8(%rsp,$num,8) # tp[num+1]=%rsp
.cfi_cfa_expression %rsp+8,$num,8,mul,plus,deref,+8
.Lmul_body: .Lmul_body:
lea 128($bp),%r12 # reassign $bp (+size optimization) lea 128($bp),%r12 # reassign $bp (+size optimization)
...@@ -431,17 +440,26 @@ $code.=<<___; ...@@ -431,17 +440,26 @@ $code.=<<___;
jnz .Lcopy jnz .Lcopy
mov 8(%rsp,$num,8),%rsi # restore %rsp mov 8(%rsp,$num,8),%rsi # restore %rsp
.cfi_def_cfa %rsi,8
mov \$1,%rax mov \$1,%rax
mov -48(%rsi),%r15 mov -48(%rsi),%r15
.cfi_restore %r15
mov -40(%rsi),%r14 mov -40(%rsi),%r14
.cfi_restore %r15
mov -32(%rsi),%r13 mov -32(%rsi),%r13
.cfi_restore %r15
mov -24(%rsi),%r12 mov -24(%rsi),%r12
.cfi_restore %r15
mov -16(%rsi),%rbp mov -16(%rsi),%rbp
.cfi_restore %r15
mov -8(%rsi),%rbx mov -8(%rsi),%rbx
.cfi_restore %r15
lea (%rsi),%rsp lea (%rsi),%rsp
.cfi_def_cfa_register %rsp
.Lmul_epilogue: .Lmul_epilogue:
ret ret
.cfi_endproc
.size bn_mul_mont_gather5,.-bn_mul_mont_gather5 .size bn_mul_mont_gather5,.-bn_mul_mont_gather5
___ ___
{{{ {{{
...@@ -451,8 +469,10 @@ $code.=<<___; ...@@ -451,8 +469,10 @@ $code.=<<___;
.type bn_mul4x_mont_gather5,\@function,6 .type bn_mul4x_mont_gather5,\@function,6
.align 32 .align 32
bn_mul4x_mont_gather5: bn_mul4x_mont_gather5:
.cfi_startproc
.byte 0x67 .byte 0x67
mov %rsp,%rax mov %rsp,%rax
.cfi_def_cfa_register %rax
.Lmul4x_enter: .Lmul4x_enter:
___ ___
$code.=<<___ if ($addx); $code.=<<___ if ($addx);
...@@ -462,11 +482,17 @@ $code.=<<___ if ($addx); ...@@ -462,11 +482,17 @@ $code.=<<___ if ($addx);
___ ___
$code.=<<___; $code.=<<___;
push %rbx push %rbx
.cfi_push %rbx
push %rbp push %rbp
.cfi_push %rbp
push %r12 push %r12
.cfi_push %r12
push %r13 push %r13
.cfi_push %r13
push %r14 push %r14
.cfi_push %r14
push %r15 push %r15
.cfi_push %r15
.Lmul4x_prologue: .Lmul4x_prologue:
.byte 0x67 .byte 0x67
...@@ -522,22 +548,32 @@ $code.=<<___; ...@@ -522,22 +548,32 @@ $code.=<<___;
neg $num neg $num
mov %rax,40(%rsp) mov %rax,40(%rsp)
.cfi_cfa_expression %rsp+40,deref,+8
.Lmul4x_body: .Lmul4x_body:
call mul4x_internal call mul4x_internal
mov 40(%rsp),%rsi # restore %rsp mov 40(%rsp),%rsi # restore %rsp
.cfi_def_cfa %rsi,8
mov \$1,%rax mov \$1,%rax
mov -48(%rsi),%r15 mov -48(%rsi),%r15
.cfi_restore %r15
mov -40(%rsi),%r14 mov -40(%rsi),%r14
.cfi_restore %r14
mov -32(%rsi),%r13 mov -32(%rsi),%r13
.cfi_restore %r13
mov -24(%rsi),%r12 mov -24(%rsi),%r12
.cfi_restore %r12
mov -16(%rsi),%rbp mov -16(%rsi),%rbp
.cfi_restore %rbp
mov -8(%rsi),%rbx mov -8(%rsi),%rbx
.cfi_restore %rbx
lea (%rsi),%rsp lea (%rsi),%rsp
.cfi_def_cfa_register %rsp
.Lmul4x_epilogue: .Lmul4x_epilogue:
ret ret
.cfi_endproc
.size bn_mul4x_mont_gather5,.-bn_mul4x_mont_gather5 .size bn_mul4x_mont_gather5,.-bn_mul4x_mont_gather5
.type mul4x_internal,\@abi-omnipotent .type mul4x_internal,\@abi-omnipotent
...@@ -1061,7 +1097,9 @@ $code.=<<___; ...@@ -1061,7 +1097,9 @@ $code.=<<___;
.type bn_power5,\@function,6 .type bn_power5,\@function,6
.align 32 .align 32
bn_power5: bn_power5:
.cfi_startproc
mov %rsp,%rax mov %rsp,%rax
.cfi_def_cfa_register %rax
___ ___
$code.=<<___ if ($addx); $code.=<<___ if ($addx);
mov OPENSSL_ia32cap_P+8(%rip),%r11d mov OPENSSL_ia32cap_P+8(%rip),%r11d
...@@ -1071,11 +1109,17 @@ $code.=<<___ if ($addx); ...@@ -1071,11 +1109,17 @@ $code.=<<___ if ($addx);
___ ___
$code.=<<___; $code.=<<___;
push %rbx push %rbx
.cfi_push %rbx
push %rbp push %rbp
.cfi_push %rbp
push %r12 push %r12
.cfi_push %r12
push %r13 push %r13
.cfi_push %r13
push %r14 push %r14
.cfi_push %r14
push %r15 push %r15
.cfi_push %r15
.Lpower5_prologue: .Lpower5_prologue:
shl \$3,${num}d # convert $num to bytes shl \$3,${num}d # convert $num to bytes
...@@ -1140,6 +1184,7 @@ $code.=<<___; ...@@ -1140,6 +1184,7 @@ $code.=<<___;
# #
mov $n0, 32(%rsp) mov $n0, 32(%rsp)
mov %rax, 40(%rsp) # save original %rsp mov %rax, 40(%rsp) # save original %rsp
.cfi_cfa_expression %rsp+40,deref,+8
.Lpower5_body: .Lpower5_body:
movq $rptr,%xmm1 # save $rptr, used in sqr8x movq $rptr,%xmm1 # save $rptr, used in sqr8x
movq $nptr,%xmm2 # save $nptr movq $nptr,%xmm2 # save $nptr
...@@ -1166,16 +1211,25 @@ $code.=<<___; ...@@ -1166,16 +1211,25 @@ $code.=<<___;
call mul4x_internal call mul4x_internal
mov 40(%rsp),%rsi # restore %rsp mov 40(%rsp),%rsi # restore %rsp
.cfi_def_cfa %rsi,8
mov \$1,%rax mov \$1,%rax
mov -48(%rsi),%r15 mov -48(%rsi),%r15
.cfi_restore %r15
mov -40(%rsi),%r14 mov -40(%rsi),%r14
.cfi_restore %r14
mov -32(%rsi),%r13 mov -32(%rsi),%r13
.cfi_restore %r13
mov -24(%rsi),%r12 mov -24(%rsi),%r12
.cfi_restore %r12
mov -16(%rsi),%rbp mov -16(%rsi),%rbp
.cfi_restore %rbp
mov -8(%rsi),%rbx mov -8(%rsi),%rbx
.cfi_restore %rbx
lea (%rsi),%rsp lea (%rsi),%rsp
.cfi_def_cfa_register %rsp
.Lpower5_epilogue: .Lpower5_epilogue:
ret ret
.cfi_endproc
.size bn_power5,.-bn_power5 .size bn_power5,.-bn_power5
.globl bn_sqr8x_internal .globl bn_sqr8x_internal
...@@ -2055,14 +2109,22 @@ bn_from_montgomery: ...@@ -2055,14 +2109,22 @@ bn_from_montgomery:
.type bn_from_mont8x,\@function,6 .type bn_from_mont8x,\@function,6
.align 32 .align 32
bn_from_mont8x: bn_from_mont8x:
.cfi_startproc
.byte 0x67 .byte 0x67
mov %rsp,%rax mov %rsp,%rax
.cfi_def_cfa_register %rax
push %rbx push %rbx
.cfi_push %rbx
push %rbp push %rbp
.cfi_push %rbp
push %r12 push %r12
.cfi_push %r12
push %r13 push %r13
.cfi_push %r13
push %r14 push %r14
.cfi_push %r14
push %r15 push %r15
.cfi_push %r15
.Lfrom_prologue: .Lfrom_prologue:
shl \$3,${num}d # convert $num to bytes shl \$3,${num}d # convert $num to bytes
...@@ -2127,6 +2189,7 @@ bn_from_mont8x: ...@@ -2127,6 +2189,7 @@ bn_from_mont8x:
# #
mov $n0, 32(%rsp) mov $n0, 32(%rsp)
mov %rax, 40(%rsp) # save original %rsp mov %rax, 40(%rsp) # save original %rsp
.cfi_cfa_expression %rsp+40,deref,+8
.Lfrom_body: .Lfrom_body:
mov $num,%r11 mov $num,%r11
lea 48(%rsp),%rax lea 48(%rsp),%rax
...@@ -2170,7 +2233,6 @@ $code.=<<___ if ($addx); ...@@ -2170,7 +2233,6 @@ $code.=<<___ if ($addx);
pxor %xmm0,%xmm0 pxor %xmm0,%xmm0
lea 48(%rsp),%rax lea 48(%rsp),%rax
mov 40(%rsp),%rsi # restore %rsp
jmp .Lfrom_mont_zero jmp .Lfrom_mont_zero
.align 32 .align 32
...@@ -2182,11 +2244,12 @@ $code.=<<___; ...@@ -2182,11 +2244,12 @@ $code.=<<___;
pxor %xmm0,%xmm0 pxor %xmm0,%xmm0
lea 48(%rsp),%rax lea 48(%rsp),%rax
mov 40(%rsp),%rsi # restore %rsp
jmp .Lfrom_mont_zero jmp .Lfrom_mont_zero
.align 32 .align 32
.Lfrom_mont_zero: .Lfrom_mont_zero:
mov 40(%rsp),%rsi # restore %rsp
.cfi_def_cfa %rsi,8
movdqa %xmm0,16*0(%rax) movdqa %xmm0,16*0(%rax)
movdqa %xmm0,16*1(%rax) movdqa %xmm0,16*1(%rax)
movdqa %xmm0,16*2(%rax) movdqa %xmm0,16*2(%rax)
...@@ -2197,14 +2260,22 @@ $code.=<<___; ...@@ -2197,14 +2260,22 @@ $code.=<<___;
mov \$1,%rax mov \$1,%rax
mov -48(%rsi),%r15 mov -48(%rsi),%r15
.cfi_restore %r15
mov -40(%rsi),%r14 mov -40(%rsi),%r14
.cfi_restore %r14
mov -32(%rsi),%r13 mov -32(%rsi),%r13
.cfi_restore %r13
mov -24(%rsi),%r12 mov -24(%rsi),%r12
.cfi_restore %r12
mov -16(%rsi),%rbp mov -16(%rsi),%rbp
.cfi_restore %rbp
mov -8(%rsi),%rbx mov -8(%rsi),%rbx
.cfi_restore %rbx
lea (%rsi),%rsp lea (%rsi),%rsp
.cfi_def_cfa_register %rsp
.Lfrom_epilogue: .Lfrom_epilogue:
ret ret
.cfi_endproc
.size bn_from_mont8x,.-bn_from_mont8x .size bn_from_mont8x,.-bn_from_mont8x
___ ___
} }
...@@ -2217,14 +2288,22 @@ $code.=<<___; ...@@ -2217,14 +2288,22 @@ $code.=<<___;
.type bn_mulx4x_mont_gather5,\@function,6 .type bn_mulx4x_mont_gather5,\@function,6
.align 32 .align 32
bn_mulx4x_mont_gather5: bn_mulx4x_mont_gather5:
.cfi_startproc
mov %rsp,%rax mov %rsp,%rax
.cfi_def_cfa_register %rax
.Lmulx4x_enter: .Lmulx4x_enter:
push %rbx push %rbx
.cfi_push %rbx
push %rbp push %rbp
.cfi_push %rbp
push %r12 push %r12
.cfi_push %r12
push %r13 push %r13
.cfi_push %r13
push %r14 push %r14
.cfi_push %r14
push %r15 push %r15
.cfi_push %r15
.Lmulx4x_prologue: .Lmulx4x_prologue:
shl \$3,${num}d # convert $num to bytes shl \$3,${num}d # convert $num to bytes
...@@ -2290,21 +2369,31 @@ bn_mulx4x_mont_gather5: ...@@ -2290,21 +2369,31 @@ bn_mulx4x_mont_gather5:
# #
mov $n0, 32(%rsp) # save *n0 mov $n0, 32(%rsp) # save *n0
mov %rax,40(%rsp) # save original %rsp mov %rax,40(%rsp) # save original %rsp
.cfi_cfa_expression %rsp+40,deref,+8
.Lmulx4x_body: .Lmulx4x_body:
call mulx4x_internal call mulx4x_internal
mov 40(%rsp),%rsi # restore %rsp mov 40(%rsp),%rsi # restore %rsp
.cfi_def_cfa %rsi,8
mov \$1,%rax mov \$1,%rax
mov -48(%rsi),%r15 mov -48(%rsi),%r15
.cfi_restore %r15
mov -40(%rsi),%r14 mov -40(%rsi),%r14
.cfi_restore %r14
mov -32(%rsi),%r13 mov -32(%rsi),%r13
.cfi_restore %r13
mov -24(%rsi),%r12 mov -24(%rsi),%r12
.cfi_restore %r12
mov -16(%rsi),%rbp mov -16(%rsi),%rbp
.cfi_restore %rbp
mov -8(%rsi),%rbx mov -8(%rsi),%rbx
.cfi_restore %rbx
lea (%rsi),%rsp lea (%rsi),%rsp
.cfi_def_cfa_register %rsp
.Lmulx4x_epilogue: .Lmulx4x_epilogue:
ret ret
.cfi_endproc
.size bn_mulx4x_mont_gather5,.-bn_mulx4x_mont_gather5 .size bn_mulx4x_mont_gather5,.-bn_mulx4x_mont_gather5
.type mulx4x_internal,\@abi-omnipotent .type mulx4x_internal,\@abi-omnipotent
...@@ -2682,14 +2771,22 @@ $code.=<<___; ...@@ -2682,14 +2771,22 @@ $code.=<<___;
.type bn_powerx5,\@function,6 .type bn_powerx5,\@function,6
.align 32 .align 32
bn_powerx5: bn_powerx5:
.cfi_startproc
mov %rsp,%rax mov %rsp,%rax
.cfi_def_cfa_register %rax
.Lpowerx5_enter: .Lpowerx5_enter:
push %rbx push %rbx
.cfi_push %rbx
push %rbp push %rbp
.cfi_push %rbp
push %r12 push %r12
.cfi_push %r12
push %r13 push %r13
.cfi_push %r13
push %r14 push %r14
.cfi_push %r14
push %r15 push %r15
.cfi_push %r15
.Lpowerx5_prologue: .Lpowerx5_prologue:
shl \$3,${num}d # convert $num to bytes shl \$3,${num}d # convert $num to bytes
...@@ -2761,6 +2858,7 @@ bn_powerx5: ...@@ -2761,6 +2858,7 @@ bn_powerx5:
movq $bptr,%xmm4 movq $bptr,%xmm4
mov $n0, 32(%rsp) mov $n0, 32(%rsp)
mov %rax, 40(%rsp) # save original %rsp mov %rax, 40(%rsp) # save original %rsp
.cfi_cfa_expression %rsp+40,deref,+8
.Lpowerx5_body: .Lpowerx5_body:
call __bn_sqrx8x_internal call __bn_sqrx8x_internal
...@@ -2783,17 +2881,26 @@ bn_powerx5: ...@@ -2783,17 +2881,26 @@ bn_powerx5:
call mulx4x_internal call mulx4x_internal
mov 40(%rsp),%rsi # restore %rsp mov 40(%rsp),%rsi # restore %rsp
.cfi_def_cfa %rsi,8
mov \$1,%rax mov \$1,%rax
mov -48(%rsi),%r15 mov -48(%rsi),%r15
.cfi_restore %r15
mov -40(%rsi),%r14 mov -40(%rsi),%r14
.cfi_restore %r14
mov -32(%rsi),%r13 mov -32(%rsi),%r13
.cfi_restore %r13
mov -24(%rsi),%r12 mov -24(%rsi),%r12
.cfi_restore %r12
mov -16(%rsi),%rbp mov -16(%rsi),%rbp
.cfi_restore %rbp
mov -8(%rsi),%rbx mov -8(%rsi),%rbx
.cfi_restore %rbx
lea (%rsi),%rsp lea (%rsi),%rsp
.cfi_def_cfa_register %rsp
.Lpowerx5_epilogue: .Lpowerx5_epilogue:
ret ret
.cfi_endproc
.size bn_powerx5,.-bn_powerx5 .size bn_powerx5,.-bn_powerx5
.globl bn_sqrx8x_internal .globl bn_sqrx8x_internal
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册