Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
OpenHarmony
Third Party Openssl
提交
b9064221
T
Third Party Openssl
项目概览
OpenHarmony
/
Third Party Openssl
大约 1 年 前同步成功
通知
9
Star
18
Fork
1
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
T
Third Party Openssl
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
b9064221
编写于
5月 16, 2011
作者:
A
Andy Polyakov
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
x86[_64]cpuid.pl: handle new extensions.
上级
a3e07010
变更
3
显示空白变更内容
内联
并排
Showing
3 changed file
with
133 addition
and
56 deletion
+133
-56
crypto/x86_64cpuid.pl
crypto/x86_64cpuid.pl
+41
-16
crypto/x86cpuid.pl
crypto/x86cpuid.pl
+47
-16
doc/crypto/OPENSSL_ia32cap.pod
doc/crypto/OPENSSL_ia32cap.pod
+45
-24
未找到文件。
crypto/x86_64cpuid.pl
浏览文件 @
b9064221
...
...
@@ -47,7 +47,7 @@ OPENSSL_rdtsc:
.type OPENSSL_ia32_cpuid,\@abi-omnipotent
.align 16
OPENSSL_ia32_cpuid:
mov %rbx,%r8
mov %rbx,%r8
# save %rbx
xor %eax,%eax
cpuid
...
...
@@ -79,7 +79,15 @@ OPENSSL_ia32_cpuid:
# AMD specific
mov \$0x80000000,%eax
cpuid
cmp \$0x80000008,%eax
cmp \$0x80000001,%eax
jb .Lintel
mov %eax,%r10d
mov \$0x80000001,%eax
cpuid
or %ecx,%r9d
and \$0x00000801,%r9d # isolate AMD XOP bit, 1<<11
cmp \$0x80000008,%r10d
jb .Lintel
mov \$0x80000008,%eax
...
...
@@ -90,12 +98,12 @@ OPENSSL_ia32_cpuid:
mov \$1,%eax
cpuid
bt \$28,%edx # test hyper-threading bit
jnc .L
done
jnc .L
generic
shr \$16,%ebx # number of logical processors
cmp %r10b,%bl
ja .L
done
ja .L
generic
and \$0xefffffff,%edx # ~(1<<28)
jmp .L
done
jmp .L
generic
.Lintel:
cmp \$4,%r11d
...
...
@@ -121,21 +129,38 @@ OPENSSL_ia32_cpuid:
or \$0x40000000,%edx # use reserved bit to skip unrolled loop
.Lnotintel:
bt \$28,%edx # test hyper-threading bit
jnc .L
done
jnc .L
generic
and \$0xefffffff,%edx # ~(1<<28)
cmp \$0,%r10d
je .L
done
je .L
generic
or \$0x10000000,%edx # 1<<28
shr \$16,%ebx
cmp \$1,%bl # see if cache is shared
ja .L
done
ja .L
generic
and \$0xefffffff,%edx # ~(1<<28)
.Ldone:
.Lgeneric:
and \$0x00000800,%r9d # isolate AMD XOP flag
and \$0xfffff7ff,%ecx
or %r9d,%ecx # merge AMD XOP flag
shl \$32,%rcx
mov %edx,%eax
mov %r8,%rbx
or %rcx,%rax
mov %edx,%ebx
or %rcx,%rbx # compose capability vector in %rbx
bt \$27+32,%rcx # check OSXSAVE bit
jnc .Lclear_avx
xor %ecx,%ecx # XCR0
.byte 0x0f,0x01,0xd0 # xgetbv
and \$6,%eax # isolate XMM and YMM state support
cmp \$6,%eax
je .Ldone
.Lclear_avx:
mov \$0xefffe7ff,%eax # ~(1<<28|1<<12|1<<11)
shl \$32,%rax
and %rax,%rbx # clear AVX, FMA and AMD XOP bits
.Ldone:
mov %rbx,%rax
mov %r8,%rbx # restore %rbx
ret
.size OPENSSL_ia32_cpuid,.-OPENSSL_ia32_cpuid
...
...
@@ -250,7 +275,7 @@ OPENSSL_instrument_bus:
mov %eax,$lasttick # lasttick = tick
mov \$0,$lastdiff # lastdiff = 0
clflush ($out)
lock
.byte 0xf0 #
lock
add $lastdiff,($out)
jmp .Loop
.align 16
...
...
@@ -260,7 +285,7 @@ OPENSSL_instrument_bus:
mov %edx,$lasttick
mov %eax,$lastdiff
clflush ($out)
lock
.byte 0xf0 #
lock
add %eax,($out)
lea 4($out),$out
sub \$1,$cnt
...
...
@@ -284,7 +309,7 @@ OPENSSL_instrument_bus2:
mov \$0,$lastdiff # lastdiff = 0
clflush ($out)
lock
.byte 0xf0 #
lock
add $lastdiff,($out)
rdtsc # collect 1st diff
...
...
@@ -294,7 +319,7 @@ OPENSSL_instrument_bus2:
mov %eax,$lastdiff # lastdiff = diff
.Loop2:
clflush ($out)
lock
.byte 0xf0 #
lock
add %eax,($out) # accumulate diff
sub \$1,$max
...
...
crypto/x86cpuid.pl
浏览文件 @
b9064221
...
...
@@ -20,7 +20,7 @@ for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); }
&pop
("
eax
");
&xor
("
ecx
","
eax
");
&bt
("
ecx
",
21
);
&jnc
(
&label
("
done
"));
&jnc
(
&label
("
generic
"));
&xor
("
eax
","
eax
");
&cpuid
();
&mov
("
edi
","
eax
");
# max value for standard query level
...
...
@@ -51,7 +51,14 @@ for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); }
# AMD specific
&mov
("
eax
",
0x80000000
);
&cpuid
();
&cmp
("
eax
",
0x80000008
);
&cmp
("
eax
",
0x80000001
);
&jb
(
&label
("
intel
"));
&mov
("
esi
","
eax
");
&mov
("
eax
",
0x80000001
);
&cpuid
();
&or
("
ebp
","
ecx
");
&and
("
ebp
",
1
<<
11
|
1
);
# isolate XOP bit
&cmp
("
esi
",
0x80000008
);
&jb
(
&label
("
intel
"));
&mov
("
eax
",
0x80000008
);
...
...
@@ -62,13 +69,13 @@ for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); }
&mov
("
eax
",
1
);
&cpuid
();
&bt
("
edx
",
28
);
&jnc
(
&label
("
done
"));
&jnc
(
&label
("
generic
"));
&shr
("
ebx
",
16
);
&and
("
ebx
",
0xff
);
&cmp
("
ebx
","
esi
");
&ja
(
&label
("
done
"));
&ja
(
&label
("
generic
"));
&and
("
edx
",
0xefffffff
);
# clear hyper-threading bit
&jmp
(
&label
("
done
"));
&jmp
(
&label
("
generic
"));
&set_label
("
intel
");
&cmp
("
edi
",
4
);
...
...
@@ -93,19 +100,42 @@ for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); }
&or
("
edx
",
1
<<
20
);
# use reserved bit to engage RC4_CHAR
&set_label
("
notP4
");
&bt
("
edx
",
28
);
# test hyper-threading bit
&jnc
(
&label
("
done
"));
&jnc
(
&label
("
generic
"));
&and
("
edx
",
0xefffffff
);
&cmp
("
edi
",
0
);
&je
(
&label
("
done
"));
&je
(
&label
("
generic
"));
&or
("
edx
",
0x10000000
);
&shr
("
ebx
",
16
);
&cmp
(
&LB
("
ebx
"),
1
);
&ja
(
&label
("
done
"));
&ja
(
&label
("
generic
"));
&and
("
edx
",
0xefffffff
);
# clear hyper-threading bit if not
&set_label
("
generic
");
&and
("
ebp
",
1
<<
11
);
# isolate AMD XOP flag
&and
("
ecx
",
~
(
1
<<
11
));
&mov
("
esi
","
edx
");
&or
("
ebp
","
ecx
");
# merge AMD XOP flag
&bt
("
ecx
",
26
);
# check XSAVE bit
&jnc
(
&label
("
done
"));
&bt
("
ecx
",
27
);
# check OSXSAVE bit
&jnc
(
&label
("
clear_xmm
"));
&xor
("
ecx
","
ecx
");
&data_byte
(
0x0f
,
0x01
,
0xd0
);
# xgetbv
&and
("
eax
",
6
);
&cmp
("
eax
",
6
);
&je
(
&label
("
done
"));
&cmp
("
eax
",
2
);
&je
(
&label
("
clear_avx
"));
&set_label
("
clear_xmm
");
&and
("
ebp
",
~
(
1
<<
25
|
1
<<
1
));
# clear AESNI and PCLMULQDQ bits
&and
("
esi
",
~
(
1
<<
24
));
# clear FXSR
&set_label
("
clear_avx
");
&and
("
ebp
",
~
(
1
<<
28
|
1
<<
12
|
1
<<
11
));
# clear AVX, FMA and AMD XOP bits
&set_label
("
done
");
&mov
("
eax
","
e
dx
");
&mov
("
edx
","
e
cx
");
&mov
("
eax
","
e
si
");
&mov
("
edx
","
e
bp
");
&function_end
("
OPENSSL_ia32_cpuid
");
&external_label
("
OPENSSL_ia32cap_P
");
...
...
@@ -199,8 +229,9 @@ for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); }
&bt
(
&DWP
(
0
,"
ecx
"),
1
);
&jnc
(
&label
("
no_x87
"));
if
(
$sse2
)
{
&bt
(
&DWP
(
0
,"
ecx
"),
26
);
&jnc
(
&label
("
no_sse2
"));
&and
("
ecx
",
1
<<
26
|
1
<<
24
);
# check SSE2 and FXSR bits
&cmp
("
ecx
",
1
<<
26
|
1
<<
24
);
&jne
(
&label
("
no_sse2
"));
&pxor
("
xmm0
","
xmm0
");
&pxor
("
xmm1
","
xmm1
");
&pxor
("
xmm2
","
xmm2
");
...
...
@@ -331,7 +362,7 @@ my $max = "ebp";
&mov
(
$lasttick
,"
eax
");
# lasttick = tick
&mov
(
$lastdiff
,
0
);
# lastdiff = 0
&clflush
(
&DWP
(
0
,
$out
));
&
lock
();
&
data_byte
(
0xf0
);
# lock
&add
(
&DWP
(
0
,
$out
),
$lastdiff
);
&jmp
(
&label
("
loop
"));
...
...
@@ -342,7 +373,7 @@ my $max = "ebp";
&mov
(
$lasttick
,"
edx
");
# lasttick = tick
&mov
(
$lastdiff
,"
eax
");
# lastdiff = diff
&clflush
(
&DWP
(
0
,
$out
));
&
lock
();
&
data_byte
(
0xf0
);
# lock
&add
(
&DWP
(
0
,
$out
),"
eax
");
# accumulate diff
&lea
(
$out
,
&DWP
(
4
,
$out
));
# ++$out
&sub
(
$cnt
,
1
);
# --$cnt
...
...
@@ -371,7 +402,7 @@ my $max = "ebp";
&mov
(
$lastdiff
,
0
);
# lastdiff = 0
&clflush
(
&DWP
(
0
,
$out
));
&
lock
();
&
data_byte
(
0xf0
);
# lock
&add
(
&DWP
(
0
,
$out
),
$lastdiff
);
&rdtsc
();
# collect 1st diff
...
...
@@ -383,7 +414,7 @@ my $max = "ebp";
&set_label
("
loop2
",
16
);
&clflush
(
&DWP
(
0
,
$out
));
&
lock
();
&
data_byte
(
0xf0
);
# lock
&add
(
&DWP
(
0
,
$out
),"
eax
");
# accumulate diff
&sub
(
$max
,
1
);
...
...
doc/crypto/OPENSSL_ia32cap.pod
浏览文件 @
b9064221
...
...
@@ -2,7 +2,7 @@
=head1 NAME
OPENSSL_ia32cap -
finding the IA-32 processor capabilities
OPENSSL_ia32cap -
the IA-32 processor capabilities vector
=head1 SYNOPSIS
...
...
@@ -18,30 +18,52 @@ input value (see Intel Application Note #241618). Naturally it's
meaningful on x86 and x86_64 platforms only. The variable is normally
set up automatically upon toolkit initialization, but can be
manipulated afterwards to modify crypto library behaviour. For the
moment of this writing
seven bits are significant, namely
:
moment of this writing
following bits are significant
:
1. bit #4 denoting presence of Time-Stamp Counter.
2. bit #20, reserved by Intel, is used to choose among RC4 code
paths;
3. bit #23 denoting MMX support;
4. bit #25 denoting SSE support;
5. bit #26 denoting SSE2 support;
6. bit #28 denoting Hyperthreading, which is used to distiguish
=item bit #4 denoting presence of Time-Stamp Counter.
=item bit #19 denoting availability of CLFLUSH instruction;
=item bit #20, reserved by Intel, is used to choose among RC4 code paths;
=item bit #23 denoting MMX support;
=item bit #24, FXSR bit, denoting availability of XMM registers;
=item bit #25 denoting SSE support;
=item bit #26 denoting SSE2 support;
=item bit #28 denoting Hyperthreading, which is used to distiguish
cores with shared cache;
7. bit #30, reserved by Intel, is used to choose among RC4 code
=item bit #30, reserved by Intel, is used to choose among RC4 code
paths;
8. bit #57 denoting Intel AES instruction set extension;
=item bit #33 denoting availability of PCLMULQDQ instruction;
=item bit #41 denoting SSSE3, Supplemental SSE3, support;
=item bit #43 denoting AMD XOP support (forced to zero on Intel);
=item bit #57 denoting AES-NI instruction set extension;
=item bit #59, OSXSAVE bit, denoting availability of YMM registers;
=item bit #60 denoting AVX extension;
For example, clearing bit #26 at run-time disables high-performance
SSE2 code present in the crypto library. You might have to do this if
target OpenSSL application is executed on SSE2 capable CPU, but under
control of OS which does not support SSE2 extentions. Even though you
can manipulate the value programmatically, you most likely will find it
more appropriate to set up an environment variable with the same name
prior starting target application, e.g. on Intel P4 processor 'env
OPENSSL_ia32cap=0x12900010 apps/openssl', to achieve same effect
without modifying the application source code. Alternatively you can
reconfigure the toolkit with no-sse2 option and recompile.
SSE2 code present in the crypto library, while clearing bit #24
disables SSE2 code operating on 128-bit XMM register bank. You might
have to do the latter if target OpenSSL application is executed on SSE2
capable CPU, but under control of OS that does not enable XMM
registers. Even though you can manipulate the value programmatically,
you most likely will find it more appropriate to set up an environment
variable with the same name prior starting target application, e.g. on
Intel P4 processor 'env OPENSSL_ia32cap=0x16980010 apps/openssl', to
achieve same effect without modifying the application source code.
Alternatively you can reconfigure the toolkit with no-sse2 option and
recompile.
Less intuituve is clearing bit #28. The truth is that it's not copied
from CPUID output verbatim, but is adjusted to reflect whether or not
...
...
@@ -49,4 +71,3 @@ the data cache is actually shared between logical cores. This in turn
affects the decision on whether or not expensive countermeasures
against cache-timing attacks are applied, most notably in AES assembler
module.
=cut
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录