Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
OpenHarmony
Third Party Openssl
提交
76e624a0
T
Third Party Openssl
项目概览
OpenHarmony
/
Third Party Openssl
1 年多 前同步成功
通知
10
Star
18
Fork
1
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
T
Third Party Openssl
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
76e624a0
编写于
2月 08, 2017
作者:
A
Andy Polyakov
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
bn/asm/x86_64*: add DWARF CFI directives.
Reviewed-by:
N
Rich Salz
<
rsalz@openssl.org
>
上级
a3b5684f
变更
5
隐藏空白更改
内联
并排
Showing
5 changed file
with
324 addition
and
2 deletion
+324
-2
crypto/bn/asm/rsaz-avx2.pl
crypto/bn/asm/rsaz-avx2.pl
+40
-0
crypto/bn/asm/rsaz-x86_64.pl
crypto/bn/asm/rsaz-x86_64.pl
+85
-0
crypto/bn/asm/x86_64-gf2m.pl
crypto/bn/asm/x86_64-gf2m.pl
+18
-0
crypto/bn/asm/x86_64-mont.pl
crypto/bn/asm/x86_64-mont.pl
+72
-0
crypto/bn/asm/x86_64-mont5.pl
crypto/bn/asm/x86_64-mont5.pl
+109
-2
未找到文件。
crypto/bn/asm/rsaz-avx2.pl
浏览文件 @
76e624a0
...
@@ -168,13 +168,21 @@ $code.=<<___;
...
@@ -168,13 +168,21 @@ $code.=<<___;
.type rsaz_1024_sqr_avx2,\@function,5
.type rsaz_1024_sqr_avx2,\@function,5
.align 64
.align 64
rsaz_1024_sqr_avx2: # 702 cycles, 14% faster than rsaz_1024_mul_avx2
rsaz_1024_sqr_avx2: # 702 cycles, 14% faster than rsaz_1024_mul_avx2
.cfi_startproc
lea (%rsp), %rax
lea (%rsp), %rax
.cfi_def_cfa_register %rax
push %rbx
push %rbx
.cfi_push %rbx
push %rbp
push %rbp
.cfi_push %rbp
push %r12
push %r12
.cfi_push %r12
push %r13
push %r13
.cfi_push %r13
push %r14
push %r14
.cfi_push %r14
push %r15
push %r15
.cfi_push %r15
vzeroupper
vzeroupper
___
___
$code
.=<<
___
if
(
$win64
);
$code
.=<<
___
if
(
$win64
);
...
@@ -193,6 +201,7 @@ $code.=<<___ if ($win64);
...
@@ -193,6 +201,7 @@ $code.=<<___ if ($win64);
___
___
$code
.=
<<___;
$code
.=
<<___;
mov %rax,%rbp
mov %rax,%rbp
.cfi_def_cfa_register %rbp
mov %rdx, $np # reassigned argument
mov %rdx, $np # reassigned argument
sub \$$FrameSize, %rsp
sub \$$FrameSize, %rsp
mov $np, $tmp
mov $np, $tmp
...
@@ -825,6 +834,7 @@ $code.=<<___;
...
@@ -825,6 +834,7 @@ $code.=<<___;
vzeroall
vzeroall
mov %rbp, %rax
mov %rbp, %rax
.cfi_def_cfa_register %rax
___
___
$code
.=<<
___
if
(
$win64
);
$code
.=<<
___
if
(
$win64
);
.
Lsqr_1024_in_tail:
.
Lsqr_1024_in_tail:
...
@@ -841,14 +851,22 @@ $code.=<<___ if ($win64);
...
@@ -841,14 +851,22 @@ $code.=<<___ if ($win64);
___
___
$code
.=
<<___;
$code
.=
<<___;
mov -48(%rax),%r15
mov -48(%rax),%r15
.cfi_restore %r15
mov -40(%rax),%r14
mov -40(%rax),%r14
.cfi_restore %r14
mov -32(%rax),%r13
mov -32(%rax),%r13
.cfi_restore %r13
mov -24(%rax),%r12
mov -24(%rax),%r12
.cfi_restore %r12
mov -16(%rax),%rbp
mov -16(%rax),%rbp
.cfi_restore %rbp
mov -8(%rax),%rbx
mov -8(%rax),%rbx
.cfi_restore %rbx
lea (%rax),%rsp # restore %rsp
lea (%rax),%rsp # restore %rsp
.cfi_def_cfa_register %rsp
.Lsqr_1024_epilogue:
.Lsqr_1024_epilogue:
ret
ret
.cfi_endproc
.size rsaz_1024_sqr_avx2,.-rsaz_1024_sqr_avx2
.size rsaz_1024_sqr_avx2,.-rsaz_1024_sqr_avx2
___
___
}
}
...
@@ -901,13 +919,21 @@ $code.=<<___;
...
@@ -901,13 +919,21 @@ $code.=<<___;
.type rsaz_1024_mul_avx2,\@function,5
.type rsaz_1024_mul_avx2,\@function,5
.align 64
.align 64
rsaz_1024_mul_avx2:
rsaz_1024_mul_avx2:
.cfi_startproc
lea (%rsp), %rax
lea (%rsp), %rax
.cfi_def_cfa_register %rax
push %rbx
push %rbx
.cfi_push %rbx
push %rbp
push %rbp
.cfi_push %rbp
push %r12
push %r12
.cfi_push %r12
push %r13
push %r13
.cfi_push %r13
push %r14
push %r14
.cfi_push %r14
push %r15
push %r15
.cfi_push %r15
___
___
$code
.=<<
___
if
(
$win64
);
$code
.=<<
___
if
(
$win64
);
vzeroupper
vzeroupper
...
@@ -926,6 +952,7 @@ $code.=<<___ if ($win64);
...
@@ -926,6 +952,7 @@ $code.=<<___ if ($win64);
___
___
$code
.=
<<___;
$code
.=
<<___;
mov %rax,%rbp
mov %rax,%rbp
.cfi_def_cfa_register %rbp
vzeroall
vzeroall
mov %rdx, $bp # reassigned argument
mov %rdx, $bp # reassigned argument
sub \$64,%rsp
sub \$64,%rsp
...
@@ -1459,6 +1486,7 @@ $code.=<<___;
...
@@ -1459,6 +1486,7 @@ $code.=<<___;
vzeroupper
vzeroupper
mov %rbp, %rax
mov %rbp, %rax
.cfi_def_cfa_register %rax
___
___
$code
.=<<
___
if
(
$win64
);
$code
.=<<
___
if
(
$win64
);
.
Lmul_1024_in_tail:
.
Lmul_1024_in_tail:
...
@@ -1475,14 +1503,22 @@ $code.=<<___ if ($win64);
...
@@ -1475,14 +1503,22 @@ $code.=<<___ if ($win64);
___
___
$code
.=
<<___;
$code
.=
<<___;
mov -48(%rax),%r15
mov -48(%rax),%r15
.cfi_restore %r15
mov -40(%rax),%r14
mov -40(%rax),%r14
.cfi_restore %r14
mov -32(%rax),%r13
mov -32(%rax),%r13
.cfi_restore %r13
mov -24(%rax),%r12
mov -24(%rax),%r12
.cfi_restore %r12
mov -16(%rax),%rbp
mov -16(%rax),%rbp
.cfi_restore %rbp
mov -8(%rax),%rbx
mov -8(%rax),%rbx
.cfi_restore %rbx
lea (%rax),%rsp # restore %rsp
lea (%rax),%rsp # restore %rsp
.cfi_def_cfa_register %rsp
.Lmul_1024_epilogue:
.Lmul_1024_epilogue:
ret
ret
.cfi_endproc
.size rsaz_1024_mul_avx2,.-rsaz_1024_mul_avx2
.size rsaz_1024_mul_avx2,.-rsaz_1024_mul_avx2
___
___
}
}
...
@@ -1601,8 +1637,10 @@ rsaz_1024_scatter5_avx2:
...
@@ -1601,8 +1637,10 @@ rsaz_1024_scatter5_avx2:
.type rsaz_1024_gather5_avx2,
\
@abi
-omnipotent
.type rsaz_1024_gather5_avx2,
\
@abi
-omnipotent
.align 32
.align 32
rsaz_1024_gather5_avx2:
rsaz_1024_gather5_avx2:
.cfi_startproc
vzeroupper
vzeroupper
mov %rsp,%r11
mov %rsp,%r11
.cfi_def_cfa_register %r11
___
___
$code
.=<<___ if (
$win64
);
$code
.=<<___ if (
$win64
);
lea -0x88(%rsp),%rax
lea -0x88(%rsp),%rax
...
@@ -1743,7 +1781,9 @@ $code.=<<___ if ($win64);
...
@@ -1743,7 +1781,9 @@ $code.=<<___ if ($win64);
___
___
$code
.=<<___;
$code
.=<<___;
lea (%r11),%rsp
lea (%r11),%rsp
.cfi_def_cfa_register %rsp
ret
ret
.cfi_endproc
.LSEH_end_rsaz_1024_gather5:
.LSEH_end_rsaz_1024_gather5:
.size rsaz_1024_gather5_avx2,.-rsaz_1024_gather5_avx2
.size rsaz_1024_gather5_avx2,.-rsaz_1024_gather5_avx2
___
___
...
...
crypto/bn/asm/rsaz-x86_64.pl
浏览文件 @
76e624a0
...
@@ -138,14 +138,22 @@ $code.=<<___;
...
@@ -138,14 +138,22 @@ $code.=<<___;
.type rsaz_512_sqr,\@function,5
.type rsaz_512_sqr,\@function,5
.align 32
.align 32
rsaz_512_sqr: # 25-29% faster than rsaz_512_mul
rsaz_512_sqr: # 25-29% faster than rsaz_512_mul
.cfi_startproc
push %rbx
push %rbx
.cfi_push %rbx
push %rbp
push %rbp
.cfi_push %rbp
push %r12
push %r12
.cfi_push %r12
push %r13
push %r13
.cfi_push %r13
push %r14
push %r14
.cfi_push %r14
push %r15
push %r15
.cfi_push %r15
subq \$128+24, %rsp
subq \$128+24, %rsp
.cfi_adjust_cfa_offset 128+24
.Lsqr_body:
.Lsqr_body:
movq $mod, %rbp # common argument
movq $mod, %rbp # common argument
movq ($inp), %rdx
movq ($inp), %rdx
...
@@ -800,15 +808,24 @@ ___
...
@@ -800,15 +808,24 @@ ___
$code
.=
<<___;
$code
.=
<<___;
leaq 128+24+48(%rsp), %rax
leaq 128+24+48(%rsp), %rax
.cfi_def_cfa %rax,8
movq -48(%rax), %r15
movq -48(%rax), %r15
.cfi_restore %r15
movq -40(%rax), %r14
movq -40(%rax), %r14
.cfi_restore %r14
movq -32(%rax), %r13
movq -32(%rax), %r13
.cfi_restore %r13
movq -24(%rax), %r12
movq -24(%rax), %r12
.cfi_restore %r12
movq -16(%rax), %rbp
movq -16(%rax), %rbp
.cfi_restore %rbp
movq -8(%rax), %rbx
movq -8(%rax), %rbx
.cfi_restore %rbx
leaq (%rax), %rsp
leaq (%rax), %rsp
.cfi_def_cfa_register %rsp
.Lsqr_epilogue:
.Lsqr_epilogue:
ret
ret
.cfi_endproc
.size rsaz_512_sqr,.-rsaz_512_sqr
.size rsaz_512_sqr,.-rsaz_512_sqr
___
___
}
}
...
@@ -819,14 +836,22 @@ $code.=<<___;
...
@@ -819,14 +836,22 @@ $code.=<<___;
.type rsaz_512_mul,\@function,5
.type rsaz_512_mul,\@function,5
.align 32
.align 32
rsaz_512_mul:
rsaz_512_mul:
.cfi_startproc
push %rbx
push %rbx
.cfi_push %rbx
push %rbp
push %rbp
.cfi_push %rbp
push %r12
push %r12
.cfi_push %r12
push %r13
push %r13
.cfi_push %r13
push %r14
push %r14
.cfi_push %r14
push %r15
push %r15
.cfi_push %r15
subq \$128+24, %rsp
subq \$128+24, %rsp
.cfi_adjust_cfa_offset 128+24
.Lmul_body:
.Lmul_body:
movq $out, %xmm0 # off-load arguments
movq $out, %xmm0 # off-load arguments
movq $mod, %xmm1
movq $mod, %xmm1
...
@@ -896,15 +921,24 @@ $code.=<<___;
...
@@ -896,15 +921,24 @@ $code.=<<___;
call __rsaz_512_subtract
call __rsaz_512_subtract
leaq 128+24+48(%rsp), %rax
leaq 128+24+48(%rsp), %rax
.cfi_def_cfa %rax,8
movq -48(%rax), %r15
movq -48(%rax), %r15
.cfi_restore %r15
movq -40(%rax), %r14
movq -40(%rax), %r14
.cfi_restore %r14
movq -32(%rax), %r13
movq -32(%rax), %r13
.cfi_restore %r13
movq -24(%rax), %r12
movq -24(%rax), %r12
.cfi_restore %r12
movq -16(%rax), %rbp
movq -16(%rax), %rbp
.cfi_restore %rbp
movq -8(%rax), %rbx
movq -8(%rax), %rbx
.cfi_restore %rbx
leaq (%rax), %rsp
leaq (%rax), %rsp
.cfi_def_cfa_register %rsp
.Lmul_epilogue:
.Lmul_epilogue:
ret
ret
.cfi_endproc
.size rsaz_512_mul,.-rsaz_512_mul
.size rsaz_512_mul,.-rsaz_512_mul
___
___
}
}
...
@@ -915,14 +949,22 @@ $code.=<<___;
...
@@ -915,14 +949,22 @@ $code.=<<___;
.type rsaz_512_mul_gather4,\@function,6
.type rsaz_512_mul_gather4,\@function,6
.align 32
.align 32
rsaz_512_mul_gather4:
rsaz_512_mul_gather4:
.cfi_startproc
push %rbx
push %rbx
.cfi_push %rbx
push %rbp
push %rbp
.cfi_push %rbp
push %r12
push %r12
.cfi_push %r12
push %r13
push %r13
.cfi_push %r13
push %r14
push %r14
.cfi_push %r14
push %r15
push %r15
.cfi_push %r15
subq \$`128+24+($win64?0xb0:0)`, %rsp
subq \$`128+24+($win64?0xb0:0)`, %rsp
.cfi_adjust_cfa_offset `128+24+($win64?0xb0:0)`
___
___
$code
.=<<
___
if
(
$win64
);
$code
.=<<
___
if
(
$win64
);
movaps
%xmm6
,
0xa0
(
%rsp
)
movaps
%xmm6
,
0xa0
(
%rsp
)
...
@@ -1348,15 +1390,24 @@ $code.=<<___ if ($win64);
...
@@ -1348,15 +1390,24 @@ $code.=<<___ if ($win64);
lea
0xb0
(
%rax
),
%rax
lea
0xb0
(
%rax
),
%rax
___
___
$code
.=
<<___;
$code
.=
<<___;
.cfi_def_cfa %rax,8
movq -48(%rax), %r15
movq -48(%rax), %r15
.cfi_restore %r15
movq -40(%rax), %r14
movq -40(%rax), %r14
.cfi_restore %r14
movq -32(%rax), %r13
movq -32(%rax), %r13
.cfi_restore %r13
movq -24(%rax), %r12
movq -24(%rax), %r12
.cfi_restore %r12
movq -16(%rax), %rbp
movq -16(%rax), %rbp
.cfi_restore %rbp
movq -8(%rax), %rbx
movq -8(%rax), %rbx
.cfi_restore %rbx
leaq (%rax), %rsp
leaq (%rax), %rsp
.cfi_def_cfa_register %rsp
.Lmul_gather4_epilogue:
.Lmul_gather4_epilogue:
ret
ret
.cfi_endproc
.size rsaz_512_mul_gather4,.-rsaz_512_mul_gather4
.size rsaz_512_mul_gather4,.-rsaz_512_mul_gather4
___
___
}
}
...
@@ -1367,15 +1418,23 @@ $code.=<<___;
...
@@ -1367,15 +1418,23 @@ $code.=<<___;
.type rsaz_512_mul_scatter4,\@function,6
.type rsaz_512_mul_scatter4,\@function,6
.align 32
.align 32
rsaz_512_mul_scatter4:
rsaz_512_mul_scatter4:
.cfi_startproc
push %rbx
push %rbx
.cfi_push %rbx
push %rbp
push %rbp
.cfi_push %rbp
push %r12
push %r12
.cfi_push %r12
push %r13
push %r13
.cfi_push %r13
push %r14
push %r14
.cfi_push %r14
push %r15
push %r15
.cfi_push %r15
mov $pwr, $pwr
mov $pwr, $pwr
subq \$128+24, %rsp
subq \$128+24, %rsp
.cfi_adjust_cfa_offset 128+24
.Lmul_scatter4_body:
.Lmul_scatter4_body:
leaq ($tbl,$pwr,8), $tbl
leaq ($tbl,$pwr,8), $tbl
movq $out, %xmm0 # off-load arguments
movq $out, %xmm0 # off-load arguments
...
@@ -1458,15 +1517,24 @@ $code.=<<___;
...
@@ -1458,15 +1517,24 @@ $code.=<<___;
movq %r15, 128*7($inp)
movq %r15, 128*7($inp)
leaq 128+24+48(%rsp), %rax
leaq 128+24+48(%rsp), %rax
.cfi_def_cfa %rax,8
movq -48(%rax), %r15
movq -48(%rax), %r15
.cfi_restore %r15
movq -40(%rax), %r14
movq -40(%rax), %r14
.cfi_restore %r14
movq -32(%rax), %r13
movq -32(%rax), %r13
.cfi_restore %r13
movq -24(%rax), %r12
movq -24(%rax), %r12
.cfi_restore %r12
movq -16(%rax), %rbp
movq -16(%rax), %rbp
.cfi_restore %rbp
movq -8(%rax), %rbx
movq -8(%rax), %rbx
.cfi_restore %rbx
leaq (%rax), %rsp
leaq (%rax), %rsp
.cfi_def_cfa_register %rsp
.Lmul_scatter4_epilogue:
.Lmul_scatter4_epilogue:
ret
ret
.cfi_endproc
.size rsaz_512_mul_scatter4,.-rsaz_512_mul_scatter4
.size rsaz_512_mul_scatter4,.-rsaz_512_mul_scatter4
___
___
}
}
...
@@ -1477,14 +1545,22 @@ $code.=<<___;
...
@@ -1477,14 +1545,22 @@ $code.=<<___;
.type rsaz_512_mul_by_one,\@function,4
.type rsaz_512_mul_by_one,\@function,4
.align 32
.align 32
rsaz_512_mul_by_one:
rsaz_512_mul_by_one:
.cfi_startproc
push %rbx
push %rbx
.cfi_push %rbx
push %rbp
push %rbp
.cfi_push %rbp
push %r12
push %r12
.cfi_push %r12
push %r13
push %r13
.cfi_push %r13
push %r14
push %r14
.cfi_push %r14
push %r15
push %r15
.cfi_push %r15
subq \$128+24, %rsp
subq \$128+24, %rsp
.cfi_adjust_cfa_offset 128+24
.Lmul_by_one_body:
.Lmul_by_one_body:
___
___
$code
.=<<
___
if
(
$addx
);
$code
.=<<
___
if
(
$addx
);
...
@@ -1539,15 +1615,24 @@ $code.=<<___;
...
@@ -1539,15 +1615,24 @@ $code.=<<___;
movq %r15, 56($out)
movq %r15, 56($out)
leaq 128+24+48(%rsp), %rax
leaq 128+24+48(%rsp), %rax
.cfi_def_cfa %rax,8
movq -48(%rax), %r15
movq -48(%rax), %r15
.cfi_restore %r15
movq -40(%rax), %r14
movq -40(%rax), %r14
.cfi_restore %r14
movq -32(%rax), %r13
movq -32(%rax), %r13
.cfi_restore %r13
movq -24(%rax), %r12
movq -24(%rax), %r12
.cfi_restore %r12
movq -16(%rax), %rbp
movq -16(%rax), %rbp
.cfi_restore %rbp
movq -8(%rax), %rbx
movq -8(%rax), %rbx
.cfi_restore %rbx
leaq (%rax), %rsp
leaq (%rax), %rsp
.cfi_def_cfa_register %rsp
.Lmul_by_one_epilogue:
.Lmul_by_one_epilogue:
ret
ret
.cfi_endproc
.size rsaz_512_mul_by_one,.-rsaz_512_mul_by_one
.size rsaz_512_mul_by_one,.-rsaz_512_mul_by_one
___
___
}
}
...
...
crypto/bn/asm/x86_64-gf2m.pl
浏览文件 @
76e624a0
...
@@ -54,7 +54,9 @@ $code.=<<___;
...
@@ -54,7 +54,9 @@ $code.=<<___;
.type _mul_1x1,\@abi-omnipotent
.type _mul_1x1,\@abi-omnipotent
.align 16
.align 16
_mul_1x1:
_mul_1x1:
.cfi_startproc
sub \$128+8,%rsp
sub \$128+8,%rsp
.cfi_adjust_cfa_offset 128+8
mov \$-1,$a1
mov \$-1,$a1
lea ($a,$a),$i0
lea ($a,$a),$i0
shr \$3,$a1
shr \$3,$a1
...
@@ -160,8 +162,10 @@ $code.=<<___;
...
@@ -160,8 +162,10 @@ $code.=<<___;
xor $i1,$hi
xor $i1,$hi
add \$128+8,%rsp
add \$128+8,%rsp
.cfi_adjust_cfa_offset -128-8
ret
ret
.Lend_mul_1x1:
.Lend_mul_1x1:
.cfi_endproc
.size _mul_1x1,.-_mul_1x1
.size _mul_1x1,.-_mul_1x1
___
___
...
@@ -174,6 +178,7 @@ $code.=<<___;
...
@@ -174,6 +178,7 @@ $code.=<<___;
.type bn_GF2m_mul_2x2,\@abi-omnipotent
.type bn_GF2m_mul_2x2,\@abi-omnipotent
.align 16
.align 16
bn_GF2m_mul_2x2:
bn_GF2m_mul_2x2:
.cfi_startproc
mov %rsp,%rax
mov %rsp,%rax
mov OPENSSL_ia32cap_P(%rip),%r10
mov OPENSSL_ia32cap_P(%rip),%r10
bt \$33,%r10
bt \$33,%r10
...
@@ -211,6 +216,7 @@ $code.=<<___;
...
@@ -211,6 +216,7 @@ $code.=<<___;
.align 16
.align 16
.Lvanilla_mul_2x2:
.Lvanilla_mul_2x2:
lea -8*17(%rsp),%rsp
lea -8*17(%rsp),%rsp
.cfi_adjust_cfa_offset 8*17
___
___
$code
.=<<
___
if
(
$win64
);
$code
.=<<
___
if
(
$win64
);
mov
`
8*17+40
`(
%rsp
),
$b0
mov
`
8*17+40
`(
%rsp
),
$b0
...
@@ -219,10 +225,15 @@ $code.=<<___ if ($win64);
...
@@ -219,10 +225,15 @@ $code.=<<___ if ($win64);
___
___
$code
.=
<<___;
$code
.=
<<___;
mov %r14,8*10(%rsp)
mov %r14,8*10(%rsp)
.cfi_rel_offset %r14,8*10
mov %r13,8*11(%rsp)
mov %r13,8*11(%rsp)
.cfi_rel_offset %r13,8*11
mov %r12,8*12(%rsp)
mov %r12,8*12(%rsp)
.cfi_rel_offset %r12,8*12
mov %rbp,8*13(%rsp)
mov %rbp,8*13(%rsp)
.cfi_rel_offset %rbp,8*13
mov %rbx,8*14(%rsp)
mov %rbx,8*14(%rsp)
.cfi_rel_offset %rbx,8*14
.Lbody_mul_2x2:
.Lbody_mul_2x2:
mov $rp,32(%rsp) # save the arguments
mov $rp,32(%rsp) # save the arguments
mov $a1,40(%rsp)
mov $a1,40(%rsp)
...
@@ -270,10 +281,15 @@ $code.=<<___;
...
@@ -270,10 +281,15 @@ $code.=<<___;
mov $lo,8(%rbp)
mov $lo,8(%rbp)
mov 8*10(%rsp),%r14
mov 8*10(%rsp),%r14
.cfi_restore %r14
mov 8*11(%rsp),%r13
mov 8*11(%rsp),%r13
.cfi_restore %r13
mov 8*12(%rsp),%r12
mov 8*12(%rsp),%r12
.cfi_restore %r12
mov 8*13(%rsp),%rbp
mov 8*13(%rsp),%rbp
.cfi_restore %rbp
mov 8*14(%rsp),%rbx
mov 8*14(%rsp),%rbx
.cfi_restore %rbx
___
___
$code
.=<<
___
if
(
$win64
);
$code
.=<<
___
if
(
$win64
);
mov
8
*
15
(
%rsp
),
%rdi
mov
8
*
15
(
%rsp
),
%rdi
...
@@ -281,9 +297,11 @@ $code.=<<___ if ($win64);
...
@@ -281,9 +297,11 @@ $code.=<<___ if ($win64);
___
___
$code
.=
<<___;
$code
.=
<<___;
lea 8*17(%rsp),%rsp
lea 8*17(%rsp),%rsp
.cfi_adjust_cfa_offset -8*17
.Lepilogue_mul_2x2:
.Lepilogue_mul_2x2:
ret
ret
.Lend_mul_2x2:
.Lend_mul_2x2:
.cfi_endproc
.size bn_GF2m_mul_2x2,.-bn_GF2m_mul_2x2
.size bn_GF2m_mul_2x2,.-bn_GF2m_mul_2x2
.asciz "GF(2^m) Multiplication for x86_64, CRYPTOGAMS by <appro\@openssl.org>"
.asciz "GF(2^m) Multiplication for x86_64, CRYPTOGAMS by <appro\@openssl.org>"
.align 16
.align 16
...
...
crypto/bn/asm/x86_64-mont.pl
浏览文件 @
76e624a0
...
@@ -104,8 +104,10 @@ $code=<<___;
...
@@ -104,8 +104,10 @@ $code=<<___;
.type bn_mul_mont,\@function,6
.type bn_mul_mont,\@function,6
.align 16
.align 16
bn_mul_mont:
bn_mul_mont:
.cfi_startproc
mov ${num}d,${num}d
mov ${num}d,${num}d
mov %rsp,%rax
mov %rsp,%rax
.cfi_def_cfa_register %rax
test \$3,${num}d
test \$3,${num}d
jnz .Lmul_enter
jnz .Lmul_enter
cmp \$8,${num}d
cmp \$8,${num}d
...
@@ -124,11 +126,17 @@ $code.=<<___;
...
@@ -124,11 +126,17 @@ $code.=<<___;
.align 16
.align 16
.Lmul_enter:
.Lmul_enter:
push %rbx
push %rbx
.cfi_push %rbx
push %rbp
push %rbp
.cfi_push %rbp
push %r12
push %r12
.cfi_push %r12
push %r13
push %r13
.cfi_push %r13
push %r14
push %r14
.cfi_push %r14
push %r15
push %r15
.cfi_push %r15
neg $num
neg $num
mov %rsp,%r11
mov %rsp,%r11
...
@@ -161,6 +169,7 @@ $code.=<<___;
...
@@ -161,6 +169,7 @@ $code.=<<___;
.Lmul_page_walk_done:
.Lmul_page_walk_done:
mov %rax,8(%rsp,$num,8) # tp[num+1]=%rsp
mov %rax,8(%rsp,$num,8) # tp[num+1]=%rsp
.cfi_cfa_expression %rsp+8,$num,8,mul,plus,deref,+8
.Lmul_body:
.Lmul_body:
mov $bp,%r12 # reassign $bp
mov $bp,%r12 # reassign $bp
___
___
...
@@ -331,16 +340,25 @@ $code.=<<___;
...
@@ -331,16 +340,25 @@ $code.=<<___;
jnz .Lcopy
jnz .Lcopy
mov 8(%rsp,$num,8),%rsi # restore %rsp
mov 8(%rsp,$num,8),%rsi # restore %rsp
.cfi_def_cfa %rsi,8
mov \$1,%rax
mov \$1,%rax
mov -48(%rsi),%r15
mov -48(%rsi),%r15
.cfi_restore %r15
mov -40(%rsi),%r14
mov -40(%rsi),%r14
.cfi_restore %r14
mov -32(%rsi),%r13
mov -32(%rsi),%r13
.cfi_restore %r13
mov -24(%rsi),%r12
mov -24(%rsi),%r12
.cfi_restore %r12
mov -16(%rsi),%rbp
mov -16(%rsi),%rbp
.cfi_restore %rbp
mov -8(%rsi),%rbx
mov -8(%rsi),%rbx
.cfi_restore %rbx
lea (%rsi),%rsp
lea (%rsi),%rsp
.cfi_def_cfa_register %rsp
.Lmul_epilogue:
.Lmul_epilogue:
ret
ret
.cfi_endproc
.size bn_mul_mont,.-bn_mul_mont
.size bn_mul_mont,.-bn_mul_mont
___
___
{{{
{{{
...
@@ -350,8 +368,10 @@ $code.=<<___;
...
@@ -350,8 +368,10 @@ $code.=<<___;
.type bn_mul4x_mont,\@function,6
.type bn_mul4x_mont,\@function,6
.align 16
.align 16
bn_mul4x_mont:
bn_mul4x_mont:
.cfi_startproc
mov ${num}d,${num}d
mov ${num}d,${num}d
mov %rsp,%rax
mov %rsp,%rax
.cfi_def_cfa_register %rax
.Lmul4x_enter:
.Lmul4x_enter:
___
___
$code
.=<<
___
if
(
$addx
);
$code
.=<<
___
if
(
$addx
);
...
@@ -361,11 +381,17 @@ $code.=<<___ if ($addx);
...
@@ -361,11 +381,17 @@ $code.=<<___ if ($addx);
___
___
$code
.=
<<___;
$code
.=
<<___;
push %rbx
push %rbx
.cfi_push %rbx
push %rbp
push %rbp
.cfi_push %rbp
push %r12
push %r12
.cfi_push %r12
push %r13
push %r13
.cfi_push %r13
push %r14
push %r14
.cfi_push %r14
push %r15
push %r15
.cfi_push %r15
neg $num
neg $num
mov %rsp,%r11
mov %rsp,%r11
...
@@ -389,6 +415,7 @@ $code.=<<___;
...
@@ -389,6 +415,7 @@ $code.=<<___;
.Lmul4x_page_walk_done:
.Lmul4x_page_walk_done:
mov %rax,8(%rsp,$num,8) # tp[num+1]=%rsp
mov %rax,8(%rsp,$num,8) # tp[num+1]=%rsp
.cfi_cfa_expression %rsp+8,$num,8,mul,plus,deref,+8
.Lmul4x_body:
.Lmul4x_body:
mov $rp,16(%rsp,$num,8) # tp[num+2]=$rp
mov $rp,16(%rsp,$num,8) # tp[num+2]=$rp
mov %rdx,%r12 # reassign $bp
mov %rdx,%r12 # reassign $bp
...
@@ -767,16 +794,25 @@ ___
...
@@ -767,16 +794,25 @@ ___
}
}
$code
.=
<<___;
$code
.=
<<___;
mov 8(%rsp,$num,8),%rsi # restore %rsp
mov 8(%rsp,$num,8),%rsi # restore %rsp
.cfi_def_cfa %rsi, 8
mov \$1,%rax
mov \$1,%rax
mov -48(%rsi),%r15
mov -48(%rsi),%r15
.cfi_restore %r15
mov -40(%rsi),%r14
mov -40(%rsi),%r14
.cfi_restore %r14
mov -32(%rsi),%r13
mov -32(%rsi),%r13
.cfi_restore %r13
mov -24(%rsi),%r12
mov -24(%rsi),%r12
.cfi_restore %r12
mov -16(%rsi),%rbp
mov -16(%rsi),%rbp
.cfi_restore %rbp
mov -8(%rsi),%rbx
mov -8(%rsi),%rbx
.cfi_restore %rbx
lea (%rsi),%rsp
lea (%rsi),%rsp
.cfi_def_cfa_register %rsp
.Lmul4x_epilogue:
.Lmul4x_epilogue:
ret
ret
.cfi_endproc
.size bn_mul4x_mont,.-bn_mul4x_mont
.size bn_mul4x_mont,.-bn_mul4x_mont
___
___
}}}
}}}
...
@@ -804,14 +840,22 @@ $code.=<<___;
...
@@ -804,14 +840,22 @@ $code.=<<___;
.type bn_sqr8x_mont,\@function,6
.type bn_sqr8x_mont,\@function,6
.align 32
.align 32
bn_sqr8x_mont:
bn_sqr8x_mont:
.cfi_startproc
mov %rsp,%rax
mov %rsp,%rax
.cfi_def_cfa_register %rax
.Lsqr8x_enter:
.Lsqr8x_enter:
push %rbx
push %rbx
.cfi_push %rbx
push %rbp
push %rbp
.cfi_push %rbp
push %r12
push %r12
.cfi_push %r12
push %r13
push %r13
.cfi_push %r13
push %r14
push %r14
.cfi_push %r14
push %r15
push %r15
.cfi_push %r15
.Lsqr8x_prologue:
.Lsqr8x_prologue:
mov ${num}d,%r10d
mov ${num}d,%r10d
...
@@ -867,6 +911,7 @@ bn_sqr8x_mont:
...
@@ -867,6 +911,7 @@ bn_sqr8x_mont:
mov $n0, 32(%rsp)
mov $n0, 32(%rsp)
mov %rax, 40(%rsp) # save original %rsp
mov %rax, 40(%rsp) # save original %rsp
.cfi_cfa_expression %rsp+40,deref,+8
.Lsqr8x_body:
.Lsqr8x_body:
movq $nptr, %xmm2 # save pointer to modulus
movq $nptr, %xmm2 # save pointer to modulus
...
@@ -936,6 +981,7 @@ $code.=<<___;
...
@@ -936,6 +981,7 @@ $code.=<<___;
pxor %xmm0,%xmm0
pxor %xmm0,%xmm0
pshufd \$0,%xmm1,%xmm1
pshufd \$0,%xmm1,%xmm1
mov 40(%rsp),%rsi # restore %rsp
mov 40(%rsp),%rsi # restore %rsp
.cfi_def_cfa %rsi,8
jmp .Lsqr8x_cond_copy
jmp .Lsqr8x_cond_copy
.align 32
.align 32
...
@@ -965,14 +1011,22 @@ $code.=<<___;
...
@@ -965,14 +1011,22 @@ $code.=<<___;
mov \$1,%rax
mov \$1,%rax
mov -48(%rsi),%r15
mov -48(%rsi),%r15
.cfi_restore %r15
mov -40(%rsi),%r14
mov -40(%rsi),%r14
.cfi_restore %r14
mov -32(%rsi),%r13
mov -32(%rsi),%r13
.cfi_restore %r13
mov -24(%rsi),%r12
mov -24(%rsi),%r12
.cfi_restore %r12
mov -16(%rsi),%rbp
mov -16(%rsi),%rbp
.cfi_restore %rbp
mov -8(%rsi),%rbx
mov -8(%rsi),%rbx
.cfi_restore %rbx
lea (%rsi),%rsp
lea (%rsi),%rsp
.cfi_def_cfa_register %rsp
.Lsqr8x_epilogue:
.Lsqr8x_epilogue:
ret
ret
.cfi_endproc
.size bn_sqr8x_mont,.-bn_sqr8x_mont
.size bn_sqr8x_mont,.-bn_sqr8x_mont
___
___
}}}
}}}
...
@@ -984,14 +1038,22 @@ $code.=<<___;
...
@@ -984,14 +1038,22 @@ $code.=<<___;
.type bn_mulx4x_mont,\@function,6
.type bn_mulx4x_mont,\@function,6
.align 32
.align 32
bn_mulx4x_mont:
bn_mulx4x_mont:
.cfi_startproc
mov %rsp,%rax
mov %rsp,%rax
.cfi_def_cfa_register %rax
.Lmulx4x_enter:
.Lmulx4x_enter:
push %rbx
push %rbx
.cfi_push %rbx
push %rbp
push %rbp
.cfi_push %rbp
push %r12
push %r12
.cfi_push %r12
push %r13
push %r13
.cfi_push %r13
push %r14
push %r14
.cfi_push %r14
push %r15
push %r15
.cfi_push %r15
.Lmulx4x_prologue:
.Lmulx4x_prologue:
shl \$3,${num}d # convert $num to bytes
shl \$3,${num}d # convert $num to bytes
...
@@ -1037,6 +1099,7 @@ bn_mulx4x_mont:
...
@@ -1037,6 +1099,7 @@ bn_mulx4x_mont:
mov $n0, 24(%rsp) # save *n0
mov $n0, 24(%rsp) # save *n0
mov $rp, 32(%rsp) # save $rp
mov $rp, 32(%rsp) # save $rp
mov %rax,40(%rsp) # save original %rsp
mov %rax,40(%rsp) # save original %rsp
.cfi_cfa_expression %rsp+40,deref,+8
mov $num,48(%rsp) # inner counter
mov $num,48(%rsp) # inner counter
jmp .Lmulx4x_body
jmp .Lmulx4x_body
...
@@ -1286,6 +1349,7 @@ $code.=<<___;
...
@@ -1286,6 +1349,7 @@ $code.=<<___;
pxor %xmm0,%xmm0
pxor %xmm0,%xmm0
pshufd \$0,%xmm1,%xmm1
pshufd \$0,%xmm1,%xmm1
mov 40(%rsp),%rsi # restore %rsp
mov 40(%rsp),%rsi # restore %rsp
.cfi_def_cfa %rsi,8
jmp .Lmulx4x_cond_copy
jmp .Lmulx4x_cond_copy
.align 32
.align 32
...
@@ -1315,14 +1379,22 @@ $code.=<<___;
...
@@ -1315,14 +1379,22 @@ $code.=<<___;
mov \$1,%rax
mov \$1,%rax
mov -48(%rsi),%r15
mov -48(%rsi),%r15
.cfi_restore %r15
mov -40(%rsi),%r14
mov -40(%rsi),%r14
.cfi_restore %r15
mov -32(%rsi),%r13
mov -32(%rsi),%r13
.cfi_restore %r15
mov -24(%rsi),%r12
mov -24(%rsi),%r12
.cfi_restore %r15
mov -16(%rsi),%rbp
mov -16(%rsi),%rbp
.cfi_restore %r15
mov -8(%rsi),%rbx
mov -8(%rsi),%rbx
.cfi_restore %r15
lea (%rsi),%rsp
lea (%rsi),%rsp
.cfi_def_cfa_register %rsp
.Lmulx4x_epilogue:
.Lmulx4x_epilogue:
ret
ret
.cfi_endproc
.size bn_mulx4x_mont,.-bn_mulx4x_mont
.size bn_mulx4x_mont,.-bn_mulx4x_mont
___
___
}}}
}}}
...
...
crypto/bn/asm/x86_64-mont5.pl
浏览文件 @
76e624a0
...
@@ -93,8 +93,10 @@ $code=<<___;
...
@@ -93,8 +93,10 @@ $code=<<___;
.type bn_mul_mont_gather5,\@function,6
.type bn_mul_mont_gather5,\@function,6
.align 64
.align 64
bn_mul_mont_gather5:
bn_mul_mont_gather5:
.cfi_startproc
mov ${num}d,${num}d
mov ${num}d,${num}d
mov %rsp,%rax
mov %rsp,%rax
.cfi_def_cfa_register %rax
test \$7,${num}d
test \$7,${num}d
jnz .Lmul_enter
jnz .Lmul_enter
___
___
...
@@ -108,11 +110,17 @@ $code.=<<___;
...
@@ -108,11 +110,17 @@ $code.=<<___;
.Lmul_enter:
.Lmul_enter:
movd `($win64?56:8)`(%rsp),%xmm5 # load 7th argument
movd `($win64?56:8)`(%rsp),%xmm5 # load 7th argument
push %rbx
push %rbx
.cfi_push %rbx
push %rbp
push %rbp
.cfi_push %rbp
push %r12
push %r12
.cfi_push %r12
push %r13
push %r13
.cfi_push %r13
push %r14
push %r14
.cfi_push %r14
push %r15
push %r15
.cfi_push %r14
neg $num
neg $num
mov %rsp,%r11
mov %rsp,%r11
...
@@ -145,6 +153,7 @@ $code.=<<___;
...
@@ -145,6 +153,7 @@ $code.=<<___;
lea .Linc(%rip),%r10
lea .Linc(%rip),%r10
mov %rax,8(%rsp,$num,8) # tp[num+1]=%rsp
mov %rax,8(%rsp,$num,8) # tp[num+1]=%rsp
.cfi_cfa_expression %rsp+8,$num,8,mul,plus,deref,+8
.Lmul_body:
.Lmul_body:
lea 128($bp),%r12 # reassign $bp (+size optimization)
lea 128($bp),%r12 # reassign $bp (+size optimization)
...
@@ -431,17 +440,26 @@ $code.=<<___;
...
@@ -431,17 +440,26 @@ $code.=<<___;
jnz .Lcopy
jnz .Lcopy
mov 8(%rsp,$num,8),%rsi # restore %rsp
mov 8(%rsp,$num,8),%rsi # restore %rsp
.cfi_def_cfa %rsi,8
mov \$1,%rax
mov \$1,%rax
mov -48(%rsi),%r15
mov -48(%rsi),%r15
.cfi_restore %r15
mov -40(%rsi),%r14
mov -40(%rsi),%r14
.cfi_restore %r15
mov -32(%rsi),%r13
mov -32(%rsi),%r13
.cfi_restore %r15
mov -24(%rsi),%r12
mov -24(%rsi),%r12
.cfi_restore %r15
mov -16(%rsi),%rbp
mov -16(%rsi),%rbp
.cfi_restore %r15
mov -8(%rsi),%rbx
mov -8(%rsi),%rbx
.cfi_restore %r15
lea (%rsi),%rsp
lea (%rsi),%rsp
.cfi_def_cfa_register %rsp
.Lmul_epilogue:
.Lmul_epilogue:
ret
ret
.cfi_endproc
.size bn_mul_mont_gather5,.-bn_mul_mont_gather5
.size bn_mul_mont_gather5,.-bn_mul_mont_gather5
___
___
{{{
{{{
...
@@ -451,8 +469,10 @@ $code.=<<___;
...
@@ -451,8 +469,10 @@ $code.=<<___;
.type bn_mul4x_mont_gather5,\@function,6
.type bn_mul4x_mont_gather5,\@function,6
.align 32
.align 32
bn_mul4x_mont_gather5:
bn_mul4x_mont_gather5:
.cfi_startproc
.byte 0x67
.byte 0x67
mov %rsp,%rax
mov %rsp,%rax
.cfi_def_cfa_register %rax
.Lmul4x_enter:
.Lmul4x_enter:
___
___
$code
.=<<
___
if
(
$addx
);
$code
.=<<
___
if
(
$addx
);
...
@@ -462,11 +482,17 @@ $code.=<<___ if ($addx);
...
@@ -462,11 +482,17 @@ $code.=<<___ if ($addx);
___
___
$code
.=
<<___;
$code
.=
<<___;
push %rbx
push %rbx
.cfi_push %rbx
push %rbp
push %rbp
.cfi_push %rbp
push %r12
push %r12
.cfi_push %r12
push %r13
push %r13
.cfi_push %r13
push %r14
push %r14
.cfi_push %r14
push %r15
push %r15
.cfi_push %r15
.Lmul4x_prologue:
.Lmul4x_prologue:
.byte 0x67
.byte 0x67
...
@@ -522,22 +548,32 @@ $code.=<<___;
...
@@ -522,22 +548,32 @@ $code.=<<___;
neg $num
neg $num
mov %rax,40(%rsp)
mov %rax,40(%rsp)
.cfi_cfa_expression %rsp+40,deref,+8
.Lmul4x_body:
.Lmul4x_body:
call mul4x_internal
call mul4x_internal
mov 40(%rsp),%rsi # restore %rsp
mov 40(%rsp),%rsi # restore %rsp
.cfi_def_cfa %rsi,8
mov \$1,%rax
mov \$1,%rax
mov -48(%rsi),%r15
mov -48(%rsi),%r15
.cfi_restore %r15
mov -40(%rsi),%r14
mov -40(%rsi),%r14
.cfi_restore %r14
mov -32(%rsi),%r13
mov -32(%rsi),%r13
.cfi_restore %r13
mov -24(%rsi),%r12
mov -24(%rsi),%r12
.cfi_restore %r12
mov -16(%rsi),%rbp
mov -16(%rsi),%rbp
.cfi_restore %rbp
mov -8(%rsi),%rbx
mov -8(%rsi),%rbx
.cfi_restore %rbx
lea (%rsi),%rsp
lea (%rsi),%rsp
.cfi_def_cfa_register %rsp
.Lmul4x_epilogue:
.Lmul4x_epilogue:
ret
ret
.cfi_endproc
.size bn_mul4x_mont_gather5,.-bn_mul4x_mont_gather5
.size bn_mul4x_mont_gather5,.-bn_mul4x_mont_gather5
.type mul4x_internal,\@abi-omnipotent
.type mul4x_internal,\@abi-omnipotent
...
@@ -1061,7 +1097,9 @@ $code.=<<___;
...
@@ -1061,7 +1097,9 @@ $code.=<<___;
.type bn_power5,\@function,6
.type bn_power5,\@function,6
.align 32
.align 32
bn_power5:
bn_power5:
.cfi_startproc
mov %rsp,%rax
mov %rsp,%rax
.cfi_def_cfa_register %rax
___
___
$code
.=<<
___
if
(
$addx
);
$code
.=<<
___
if
(
$addx
);
mov
OPENSSL_ia32cap_P
+
8
(
%rip
),
%r11d
mov
OPENSSL_ia32cap_P
+
8
(
%rip
),
%r11d
...
@@ -1071,11 +1109,17 @@ $code.=<<___ if ($addx);
...
@@ -1071,11 +1109,17 @@ $code.=<<___ if ($addx);
___
___
$code
.=
<<___;
$code
.=
<<___;
push %rbx
push %rbx
.cfi_push %rbx
push %rbp
push %rbp
.cfi_push %rbp
push %r12
push %r12
.cfi_push %r12
push %r13
push %r13
.cfi_push %r13
push %r14
push %r14
.cfi_push %r14
push %r15
push %r15
.cfi_push %r15
.Lpower5_prologue:
.Lpower5_prologue:
shl \$3,${num}d # convert $num to bytes
shl \$3,${num}d # convert $num to bytes
...
@@ -1140,6 +1184,7 @@ $code.=<<___;
...
@@ -1140,6 +1184,7 @@ $code.=<<___;
#
#
mov $n0, 32(%rsp)
mov $n0, 32(%rsp)
mov %rax, 40(%rsp) # save original %rsp
mov %rax, 40(%rsp) # save original %rsp
.cfi_cfa_expression %rsp+40,deref,+8
.Lpower5_body:
.Lpower5_body:
movq $rptr,%xmm1 # save $rptr, used in sqr8x
movq $rptr,%xmm1 # save $rptr, used in sqr8x
movq $nptr,%xmm2 # save $nptr
movq $nptr,%xmm2 # save $nptr
...
@@ -1166,16 +1211,25 @@ $code.=<<___;
...
@@ -1166,16 +1211,25 @@ $code.=<<___;
call mul4x_internal
call mul4x_internal
mov 40(%rsp),%rsi # restore %rsp
mov 40(%rsp),%rsi # restore %rsp
.cfi_def_cfa %rsi,8
mov \$1,%rax
mov \$1,%rax
mov -48(%rsi),%r15
mov -48(%rsi),%r15
.cfi_restore %r15
mov -40(%rsi),%r14
mov -40(%rsi),%r14
.cfi_restore %r14
mov -32(%rsi),%r13
mov -32(%rsi),%r13
.cfi_restore %r13
mov -24(%rsi),%r12
mov -24(%rsi),%r12
.cfi_restore %r12
mov -16(%rsi),%rbp
mov -16(%rsi),%rbp
.cfi_restore %rbp
mov -8(%rsi),%rbx
mov -8(%rsi),%rbx
.cfi_restore %rbx
lea (%rsi),%rsp
lea (%rsi),%rsp
.cfi_def_cfa_register %rsp
.Lpower5_epilogue:
.Lpower5_epilogue:
ret
ret
.cfi_endproc
.size bn_power5,.-bn_power5
.size bn_power5,.-bn_power5
.globl bn_sqr8x_internal
.globl bn_sqr8x_internal
...
@@ -2055,14 +2109,22 @@ bn_from_montgomery:
...
@@ -2055,14 +2109,22 @@ bn_from_montgomery:
.type bn_from_mont8x,\@function,6
.type bn_from_mont8x,\@function,6
.align 32
.align 32
bn_from_mont8x:
bn_from_mont8x:
.cfi_startproc
.byte 0x67
.byte 0x67
mov %rsp,%rax
mov %rsp,%rax
.cfi_def_cfa_register %rax
push %rbx
push %rbx
.cfi_push %rbx
push %rbp
push %rbp
.cfi_push %rbp
push %r12
push %r12
.cfi_push %r12
push %r13
push %r13
.cfi_push %r13
push %r14
push %r14
.cfi_push %r14
push %r15
push %r15
.cfi_push %r15
.Lfrom_prologue:
.Lfrom_prologue:
shl \$3,${num}d # convert $num to bytes
shl \$3,${num}d # convert $num to bytes
...
@@ -2127,6 +2189,7 @@ bn_from_mont8x:
...
@@ -2127,6 +2189,7 @@ bn_from_mont8x:
#
#
mov $n0, 32(%rsp)
mov $n0, 32(%rsp)
mov %rax, 40(%rsp) # save original %rsp
mov %rax, 40(%rsp) # save original %rsp
.cfi_cfa_expression %rsp+40,deref,+8
.Lfrom_body:
.Lfrom_body:
mov $num,%r11
mov $num,%r11
lea 48(%rsp),%rax
lea 48(%rsp),%rax
...
@@ -2170,7 +2233,6 @@ $code.=<<___ if ($addx);
...
@@ -2170,7 +2233,6 @@ $code.=<<___ if ($addx);
pxor
%xmm0
,
%xmm0
pxor
%xmm0
,
%xmm0
lea
48
(
%rsp
),
%rax
lea
48
(
%rsp
),
%rax
mov
40
(
%rsp
),
%rsi
# restore %rsp
jmp
.
Lfrom_mont_zero
jmp
.
Lfrom_mont_zero
.
align
32
.
align
32
...
@@ -2182,11 +2244,12 @@ $code.=<<___;
...
@@ -2182,11 +2244,12 @@ $code.=<<___;
pxor %xmm0,%xmm0
pxor %xmm0,%xmm0
lea 48(%rsp),%rax
lea 48(%rsp),%rax
mov 40(%rsp),%rsi # restore %rsp
jmp .Lfrom_mont_zero
jmp .Lfrom_mont_zero
.align 32
.align 32
.Lfrom_mont_zero:
.Lfrom_mont_zero:
mov 40(%rsp),%rsi # restore %rsp
.cfi_def_cfa %rsi,8
movdqa %xmm0,16*0(%rax)
movdqa %xmm0,16*0(%rax)
movdqa %xmm0,16*1(%rax)
movdqa %xmm0,16*1(%rax)
movdqa %xmm0,16*2(%rax)
movdqa %xmm0,16*2(%rax)
...
@@ -2197,14 +2260,22 @@ $code.=<<___;
...
@@ -2197,14 +2260,22 @@ $code.=<<___;
mov \$1,%rax
mov \$1,%rax
mov -48(%rsi),%r15
mov -48(%rsi),%r15
.cfi_restore %r15
mov -40(%rsi),%r14
mov -40(%rsi),%r14
.cfi_restore %r14
mov -32(%rsi),%r13
mov -32(%rsi),%r13
.cfi_restore %r13
mov -24(%rsi),%r12
mov -24(%rsi),%r12
.cfi_restore %r12
mov -16(%rsi),%rbp
mov -16(%rsi),%rbp
.cfi_restore %rbp
mov -8(%rsi),%rbx
mov -8(%rsi),%rbx
.cfi_restore %rbx
lea (%rsi),%rsp
lea (%rsi),%rsp
.cfi_def_cfa_register %rsp
.Lfrom_epilogue:
.Lfrom_epilogue:
ret
ret
.cfi_endproc
.size bn_from_mont8x,.-bn_from_mont8x
.size bn_from_mont8x,.-bn_from_mont8x
___
___
}
}
...
@@ -2217,14 +2288,22 @@ $code.=<<___;
...
@@ -2217,14 +2288,22 @@ $code.=<<___;
.type bn_mulx4x_mont_gather5,\@function,6
.type bn_mulx4x_mont_gather5,\@function,6
.align 32
.align 32
bn_mulx4x_mont_gather5:
bn_mulx4x_mont_gather5:
.cfi_startproc
mov %rsp,%rax
mov %rsp,%rax
.cfi_def_cfa_register %rax
.Lmulx4x_enter:
.Lmulx4x_enter:
push %rbx
push %rbx
.cfi_push %rbx
push %rbp
push %rbp
.cfi_push %rbp
push %r12
push %r12
.cfi_push %r12
push %r13
push %r13
.cfi_push %r13
push %r14
push %r14
.cfi_push %r14
push %r15
push %r15
.cfi_push %r15
.Lmulx4x_prologue:
.Lmulx4x_prologue:
shl \$3,${num}d # convert $num to bytes
shl \$3,${num}d # convert $num to bytes
...
@@ -2290,21 +2369,31 @@ bn_mulx4x_mont_gather5:
...
@@ -2290,21 +2369,31 @@ bn_mulx4x_mont_gather5:
#
#
mov $n0, 32(%rsp) # save *n0
mov $n0, 32(%rsp) # save *n0
mov %rax,40(%rsp) # save original %rsp
mov %rax,40(%rsp) # save original %rsp
.cfi_cfa_expression %rsp+40,deref,+8
.Lmulx4x_body:
.Lmulx4x_body:
call mulx4x_internal
call mulx4x_internal
mov 40(%rsp),%rsi # restore %rsp
mov 40(%rsp),%rsi # restore %rsp
.cfi_def_cfa %rsi,8
mov \$1,%rax
mov \$1,%rax
mov -48(%rsi),%r15
mov -48(%rsi),%r15
.cfi_restore %r15
mov -40(%rsi),%r14
mov -40(%rsi),%r14
.cfi_restore %r14
mov -32(%rsi),%r13
mov -32(%rsi),%r13
.cfi_restore %r13
mov -24(%rsi),%r12
mov -24(%rsi),%r12
.cfi_restore %r12
mov -16(%rsi),%rbp
mov -16(%rsi),%rbp
.cfi_restore %rbp
mov -8(%rsi),%rbx
mov -8(%rsi),%rbx
.cfi_restore %rbx
lea (%rsi),%rsp
lea (%rsi),%rsp
.cfi_def_cfa_register %rsp
.Lmulx4x_epilogue:
.Lmulx4x_epilogue:
ret
ret
.cfi_endproc
.size bn_mulx4x_mont_gather5,.-bn_mulx4x_mont_gather5
.size bn_mulx4x_mont_gather5,.-bn_mulx4x_mont_gather5
.type mulx4x_internal,\@abi-omnipotent
.type mulx4x_internal,\@abi-omnipotent
...
@@ -2682,14 +2771,22 @@ $code.=<<___;
...
@@ -2682,14 +2771,22 @@ $code.=<<___;
.type bn_powerx5,\@function,6
.type bn_powerx5,\@function,6
.align 32
.align 32
bn_powerx5:
bn_powerx5:
.cfi_startproc
mov %rsp,%rax
mov %rsp,%rax
.cfi_def_cfa_register %rax
.Lpowerx5_enter:
.Lpowerx5_enter:
push %rbx
push %rbx
.cfi_push %rbx
push %rbp
push %rbp
.cfi_push %rbp
push %r12
push %r12
.cfi_push %r12
push %r13
push %r13
.cfi_push %r13
push %r14
push %r14
.cfi_push %r14
push %r15
push %r15
.cfi_push %r15
.Lpowerx5_prologue:
.Lpowerx5_prologue:
shl \$3,${num}d # convert $num to bytes
shl \$3,${num}d # convert $num to bytes
...
@@ -2761,6 +2858,7 @@ bn_powerx5:
...
@@ -2761,6 +2858,7 @@ bn_powerx5:
movq $bptr,%xmm4
movq $bptr,%xmm4
mov $n0, 32(%rsp)
mov $n0, 32(%rsp)
mov %rax, 40(%rsp) # save original %rsp
mov %rax, 40(%rsp) # save original %rsp
.cfi_cfa_expression %rsp+40,deref,+8
.Lpowerx5_body:
.Lpowerx5_body:
call __bn_sqrx8x_internal
call __bn_sqrx8x_internal
...
@@ -2783,17 +2881,26 @@ bn_powerx5:
...
@@ -2783,17 +2881,26 @@ bn_powerx5:
call mulx4x_internal
call mulx4x_internal
mov 40(%rsp),%rsi # restore %rsp
mov 40(%rsp),%rsi # restore %rsp
.cfi_def_cfa %rsi,8
mov \$1,%rax
mov \$1,%rax
mov -48(%rsi),%r15
mov -48(%rsi),%r15
.cfi_restore %r15
mov -40(%rsi),%r14
mov -40(%rsi),%r14
.cfi_restore %r14
mov -32(%rsi),%r13
mov -32(%rsi),%r13
.cfi_restore %r13
mov -24(%rsi),%r12
mov -24(%rsi),%r12
.cfi_restore %r12
mov -16(%rsi),%rbp
mov -16(%rsi),%rbp
.cfi_restore %rbp
mov -8(%rsi),%rbx
mov -8(%rsi),%rbx
.cfi_restore %rbx
lea (%rsi),%rsp
lea (%rsi),%rsp
.cfi_def_cfa_register %rsp
.Lpowerx5_epilogue:
.Lpowerx5_epilogue:
ret
ret
.cfi_endproc
.size bn_powerx5,.-bn_powerx5
.size bn_powerx5,.-bn_powerx5
.globl bn_sqrx8x_internal
.globl bn_sqrx8x_internal
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录