提交 c5cd28bd 编写于 作者: A Andy Polyakov

Extend OPENSSL_ia32cap_P with extra word to accomodate AVX2 capability.

上级 b3aee265
...@@ -125,7 +125,7 @@ static double SSLeay_MSVC5_hack=0.0; /* and for VC1.5 */ ...@@ -125,7 +125,7 @@ static double SSLeay_MSVC5_hack=0.0; /* and for VC1.5 */
defined(__INTEL__) || \ defined(__INTEL__) || \
defined(__x86_64) || defined(__x86_64__) || defined(_M_AMD64) || defined(_M_X64) defined(__x86_64) || defined(__x86_64__) || defined(_M_AMD64) || defined(_M_X64)
extern unsigned int OPENSSL_ia32cap_P[2]; extern unsigned int OPENSSL_ia32cap_P[4];
unsigned int *OPENSSL_ia32cap_loc(void) { return OPENSSL_ia32cap_P; } unsigned int *OPENSSL_ia32cap_loc(void) { return OPENSSL_ia32cap_P; }
#if defined(OPENSSL_CPUID_OBJ) && !defined(OPENSSL_NO_ASM) && !defined(I386_ONLY) #if defined(OPENSSL_CPUID_OBJ) && !defined(OPENSSL_NO_ASM) && !defined(I386_ONLY)
...@@ -137,7 +137,7 @@ typedef unsigned long long IA32CAP; ...@@ -137,7 +137,7 @@ typedef unsigned long long IA32CAP;
#endif #endif
void OPENSSL_cpuid_setup(void) void OPENSSL_cpuid_setup(void)
{ static int trigger=0; { static int trigger=0;
IA32CAP OPENSSL_ia32_cpuid(void); IA32CAP OPENSSL_ia32_cpuid(unsigned int *);
IA32CAP vec; IA32CAP vec;
char *env; char *env;
...@@ -151,10 +151,18 @@ void OPENSSL_cpuid_setup(void) ...@@ -151,10 +151,18 @@ void OPENSSL_cpuid_setup(void)
#else #else
if (!sscanf(env+off,"%lli",(long long *)&vec)) vec = strtoul(env+off,NULL,0); if (!sscanf(env+off,"%lli",(long long *)&vec)) vec = strtoul(env+off,NULL,0);
#endif #endif
if (off) vec = OPENSSL_ia32_cpuid()&~vec; if (off) vec = OPENSSL_ia32_cpuid(OPENSSL_ia32cap_P)&~vec;
OPENSSL_ia32cap_P[2] = 0;
if ((env=strchr(env,':'))) {
off = (env[1]=='~')?2:1;
vec = strtoul(env+off,NULL,0);
if (off>1) OPENSSL_ia32cap_P[2] &= ~vec;
else OPENSSL_ia32cap_P[2] = vec;
}
} }
else else
vec = OPENSSL_ia32_cpuid(); vec = OPENSSL_ia32_cpuid(OPENSSL_ia32cap_P);
/* /*
* |(1<<10) sets a reserved bit to signal that variable * |(1<<10) sets a reserved bit to signal that variable
...@@ -165,7 +173,7 @@ void OPENSSL_cpuid_setup(void) ...@@ -165,7 +173,7 @@ void OPENSSL_cpuid_setup(void)
OPENSSL_ia32cap_P[1] = (unsigned int)(vec>>32); OPENSSL_ia32cap_P[1] = (unsigned int)(vec>>32);
} }
#else #else
unsigned int OPENSSL_ia32cap_P[2]; unsigned int OPENSSL_ia32cap_P[4];
#endif #endif
#else #else
...@@ -173,7 +181,7 @@ unsigned int *OPENSSL_ia32cap_loc(void) { return NULL; } ...@@ -173,7 +181,7 @@ unsigned int *OPENSSL_ia32cap_loc(void) { return NULL; }
#endif #endif
int OPENSSL_NONPIC_relocated = 0; int OPENSSL_NONPIC_relocated = 0;
#if !defined(OPENSSL_CPUID_SETUP) && !defined(OPENSSL_CPUID_OBJ) #if !defined(OPENSSL_CPUID_SETUP) && !defined(OPENSSL_CPUID_OBJ)
void OPENSSL_cpuid_setup(void) {} void OPENSSL_cpuid_setup(unsigned int *) {}
#endif #endif
#if (defined(_WIN32) || defined(__CYGWIN__)) && defined(_WINDLL) #if (defined(_WIN32) || defined(__CYGWIN__)) && defined(_WINDLL)
......
...@@ -131,6 +131,32 @@ sub ::rdrand ...@@ -131,6 +131,32 @@ sub ::rdrand
{ &::generic("rdrand",@_); } { &::generic("rdrand",@_); }
} }
sub rxb {
local *opcode=shift;
my ($dst,$src1,$src2,$rxb)=@_;
$rxb|=0x7<<5;
$rxb&=~(0x04<<5) if($dst>=8);
$rxb&=~(0x01<<5) if($src1>=8);
$rxb&=~(0x02<<5) if($src2>=8);
push @opcode,$rxb;
}
sub ::vprotd
{ my $args=join(',',@_);
if ($args =~ /xmm([0-7]),xmm([0-7]),([x0-9a-f]+)/)
{ my @opcode=(0x8f);
rxb(\@opcode,$1,$2,-1,0x08);
push @opcode,0x78,0xc2;
push @opcode,0xc0|($2&7)|(($1&7)<<3); # ModR/M
my $c=$3;
push @opcode,$c=~/^0/?oct($c):$c;
&::data_byte(@opcode);
}
else
{ &::generic("vprotd",@_); }
}
# label management # label management
$lbdecor="L"; # local label decoration, set by package $lbdecor="L"; # local label decoration, set by package
$label="000"; $label="000";
......
...@@ -70,6 +70,8 @@ sub ::DWP ...@@ -70,6 +70,8 @@ sub ::DWP
{ my($addr,$reg1,$reg2,$idx)=@_; { my($addr,$reg1,$reg2,$idx)=@_;
my $ret=""; my $ret="";
if (!defined($idx) && 1*$reg2) { $idx=$reg2; $reg2=$reg1; undef $reg1; }
$addr =~ s/^\s+//; $addr =~ s/^\s+//;
# prepend global references with optional underscore # prepend global references with optional underscore
$addr =~ s/^([^\+\-0-9][^\+\-]*)/&::islabel($1) or "$nmdecor$1"/ige; $addr =~ s/^([^\+\-0-9][^\+\-]*)/&::islabel($1) or "$nmdecor$1"/ige;
...@@ -157,7 +159,7 @@ sub ::file_end ...@@ -157,7 +159,7 @@ sub ::file_end
} }
} }
if (grep {/\b${nmdecor}OPENSSL_ia32cap_P\b/i} @out) { if (grep {/\b${nmdecor}OPENSSL_ia32cap_P\b/i} @out) {
my $tmp=".comm\t${nmdecor}OPENSSL_ia32cap_P,8"; my $tmp=".comm\t${nmdecor}OPENSSL_ia32cap_P,16";
if ($::macosx) { push (@out,"$tmp,2\n"); } if ($::macosx) { push (@out,"$tmp,2\n"); }
elsif ($::elf) { push (@out,"$tmp,4\n"); } elsif ($::elf) { push (@out,"$tmp,4\n"); }
else { push (@out,"$tmp\n"); } else { push (@out,"$tmp\n"); }
......
...@@ -39,6 +39,8 @@ sub get_mem ...@@ -39,6 +39,8 @@ sub get_mem
{ my($size,$addr,$reg1,$reg2,$idx)=@_; { my($size,$addr,$reg1,$reg2,$idx)=@_;
my($post,$ret); my($post,$ret);
if (!defined($idx) && 1*$reg2) { $idx=$reg2; $reg2=$reg1; undef $reg1; }
$ret .= "$size PTR " if ($size ne ""); $ret .= "$size PTR " if ($size ne "");
$addr =~ s/^\s+//; $addr =~ s/^\s+//;
...@@ -133,7 +135,7 @@ ___ ...@@ -133,7 +135,7 @@ ___
if (grep {/\b${nmdecor}OPENSSL_ia32cap_P\b/i} @out) if (grep {/\b${nmdecor}OPENSSL_ia32cap_P\b/i} @out)
{ my $comm=<<___; { my $comm=<<___;
.bss SEGMENT 'BSS' .bss SEGMENT 'BSS'
COMM ${nmdecor}OPENSSL_ia32cap_P:QWORD COMM ${nmdecor}OPENSSL_ia32cap_P:DWORD:4
.bss ENDS .bss ENDS
___ ___
# comment out OPENSSL_ia32cap_P declarations # comment out OPENSSL_ia32cap_P declarations
......
...@@ -36,6 +36,8 @@ sub get_mem ...@@ -36,6 +36,8 @@ sub get_mem
{ my($size,$addr,$reg1,$reg2,$idx)=@_; { my($size,$addr,$reg1,$reg2,$idx)=@_;
my($post,$ret); my($post,$ret);
if (!defined($idx) && 1*$reg2) { $idx=$reg2; $reg2=$reg1; undef $reg1; }
if ($size ne "") if ($size ne "")
{ $ret .= "$size"; { $ret .= "$size";
$ret .= " PTR" if ($::mwerks); $ret .= " PTR" if ($::mwerks);
...@@ -117,7 +119,7 @@ sub ::file_end ...@@ -117,7 +119,7 @@ sub ::file_end
{ if (grep {/\b${nmdecor}OPENSSL_ia32cap_P\b/i} @out) { if (grep {/\b${nmdecor}OPENSSL_ia32cap_P\b/i} @out)
{ my $comm=<<___; { my $comm=<<___;
${drdecor}segment .bss ${drdecor}segment .bss
${drdecor}common ${nmdecor}OPENSSL_ia32cap_P 8 ${drdecor}common ${nmdecor}OPENSSL_ia32cap_P 16
___ ___
# comment out OPENSSL_ia32cap_P declarations # comment out OPENSSL_ia32cap_P declarations
grep {s/(^extern\s+${nmdecor}OPENSSL_ia32cap_P)/\;$1/} @out; grep {s/(^extern\s+${nmdecor}OPENSSL_ia32cap_P)/\;$1/} @out;
......
...@@ -23,7 +23,7 @@ print<<___; ...@@ -23,7 +23,7 @@ print<<___;
call OPENSSL_cpuid_setup call OPENSSL_cpuid_setup
.hidden OPENSSL_ia32cap_P .hidden OPENSSL_ia32cap_P
.comm OPENSSL_ia32cap_P,8,4 .comm OPENSSL_ia32cap_P,16,4
.text .text
...@@ -52,12 +52,13 @@ OPENSSL_rdtsc: ...@@ -52,12 +52,13 @@ OPENSSL_rdtsc:
.size OPENSSL_rdtsc,.-OPENSSL_rdtsc .size OPENSSL_rdtsc,.-OPENSSL_rdtsc
.globl OPENSSL_ia32_cpuid .globl OPENSSL_ia32_cpuid
.type OPENSSL_ia32_cpuid,\@abi-omnipotent .type OPENSSL_ia32_cpuid,\@function,1
.align 16 .align 16
OPENSSL_ia32_cpuid: OPENSSL_ia32_cpuid:
mov %rbx,%r8 # save %rbx mov %rbx,%r8 # save %rbx
xor %eax,%eax xor %eax,%eax
mov %eax,8(%rdi) # clear 3rd word
cpuid cpuid
mov %eax,%r11d # max value for standard query level mov %eax,%r11d # max value for standard query level
...@@ -125,6 +126,14 @@ OPENSSL_ia32_cpuid: ...@@ -125,6 +126,14 @@ OPENSSL_ia32_cpuid:
shr \$14,%r10d shr \$14,%r10d
and \$0xfff,%r10d # number of cores -1 per L1D and \$0xfff,%r10d # number of cores -1 per L1D
cmp \$7,%r11d
jb .Lnocacheinfo
mov \$7,%eax
xor %ecx,%ecx
cpuid
mov %ebx,8(%rdi)
.Lnocacheinfo: .Lnocacheinfo:
mov \$1,%eax mov \$1,%eax
cpuid cpuid
...@@ -164,6 +173,7 @@ OPENSSL_ia32_cpuid: ...@@ -164,6 +173,7 @@ OPENSSL_ia32_cpuid:
.Lclear_avx: .Lclear_avx:
mov \$0xefffe7ff,%eax # ~(1<<28|1<<12|1<<11) mov \$0xefffe7ff,%eax # ~(1<<28|1<<12|1<<11)
and %eax,%r9d # clear AVX, FMA and AMD XOP bits and %eax,%r9d # clear AVX, FMA and AMD XOP bits
andl \$0xffffffdf,8(%rdi) # cleax AVX2, ~(1<<5)
.Ldone: .Ldone:
shl \$32,%r9 shl \$32,%r9
mov %r10d,%eax mov %r10d,%eax
......
...@@ -22,6 +22,8 @@ for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); } ...@@ -22,6 +22,8 @@ for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); }
&xor ("eax","eax"); &xor ("eax","eax");
&bt ("ecx",21); &bt ("ecx",21);
&jnc (&label("nocpuid")); &jnc (&label("nocpuid"));
&mov ("esi",&wparam(0));
&mov (&DWP(8,"esi"),"eax"); # clear 3rd word
&cpuid (); &cpuid ();
&mov ("edi","eax"); # max value for standard query level &mov ("edi","eax"); # max value for standard query level
...@@ -89,6 +91,15 @@ for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); } ...@@ -89,6 +91,15 @@ for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); }
&shr ("edi",14); &shr ("edi",14);
&and ("edi",0xfff); # number of cores -1 per L1D &and ("edi",0xfff); # number of cores -1 per L1D
&cmp ("edi",7);
&jb (&label("nocacheinfo"));
&mov ("esi",&wparam(0));
&mov ("eax",7);
&xor ("ecx","ecx");
&cpuid ();
&mov (&DWP(8,"esi"),"ebx");
&set_label("nocacheinfo"); &set_label("nocacheinfo");
&mov ("eax",1); &mov ("eax",1);
&cpuid (); &cpuid ();
...@@ -133,6 +144,8 @@ for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); } ...@@ -133,6 +144,8 @@ for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); }
&and ("esi",0xfeffffff); # clear FXSR &and ("esi",0xfeffffff); # clear FXSR
&set_label("clear_avx"); &set_label("clear_avx");
&and ("ebp",0xefffe7ff); # clear AVX, FMA and AMD XOP bits &and ("ebp",0xefffe7ff); # clear AVX, FMA and AMD XOP bits
&mov ("edi",&wparam(0));
&and (&DWP(8,"edi"),0xffffffdf); # clear AVX2
&set_label("done"); &set_label("done");
&mov ("eax","esi"); &mov ("eax","esi");
&mov ("edx","ebp"); &mov ("edx","ebp");
......
...@@ -72,3 +72,17 @@ the data cache is actually shared between logical cores. This in turn ...@@ -72,3 +72,17 @@ the data cache is actually shared between logical cores. This in turn
affects the decision on whether or not expensive countermeasures affects the decision on whether or not expensive countermeasures
against cache-timing attacks are applied, most notably in AES assembler against cache-timing attacks are applied, most notably in AES assembler
module. module.
The vector is further extended with EBX value returned by CPUID with
EAX=7 and ECX=0 as input. Following bits are significant:
=item bit #64+3 denoting availability of BMI1 instructions, e.g. ANDN;
=item bit #64+5 denoting availability of AVX2 instructions;
=item bit #64+8 denoting availability of BMI2 instructions, e.g. MUXL
and RORX;
=item bit #64+18 denoting availability of RDSEED instruction;
=itme bit #64+19 denoting availability of ADCX and ADOX instructions;
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册