提交 14e21f86 编写于 作者: A Andy Polyakov

Add framework for yet another assembler module dubbed "cpuid." Idea

is to have a placeholder to small routines, which can be written only
in assembler. In IA-32 case this includes processor capability
identification and access to Time-Stamp Counter. As discussed earlier
OPENSSL_ia32cap is introduced to control recently added SSE2 code
pathes (see docs/crypto/OPENSSL_ia32cap.pod). For the moment the
code is operational on ELF platforms only. I haven't checked it yet,
but I have all reasons to believe that Windows build should fail to
link too. I'll be looking into it shortly...
上级 f10725a6
此差异已折叠。
...@@ -80,6 +80,13 @@ MAKEDEPPROG=makedepend ...@@ -80,6 +80,13 @@ MAKEDEPPROG=makedepend
AS=$(CC) -c AS=$(CC) -c
ASFLAG=$(CFLAG) ASFLAG=$(CFLAG)
# For x86 assembler: Set PROCESSOR to 386 if you want to support
# the 80386.
PROCESSOR=
# CPUID module collects small commonly used assembler snippets
CPUID_OBJ=
# Set BN_ASM to bn_asm.o if you want to use the C version # Set BN_ASM to bn_asm.o if you want to use the C version
BN_ASM= bn_asm.o BN_ASM= bn_asm.o
#BN_ASM= bn_asm.o #BN_ASM= bn_asm.o
...@@ -95,10 +102,6 @@ BN_ASM= bn_asm.o ...@@ -95,10 +102,6 @@ BN_ASM= bn_asm.o
#BN_ASM= asm/x86w16.o # 16 bit code for Windows 3.1/DOS #BN_ASM= asm/x86w16.o # 16 bit code for Windows 3.1/DOS
#BN_ASM= asm/x86w32.o # 32 bit code for Windows 3.1 #BN_ASM= asm/x86w32.o # 32 bit code for Windows 3.1
# For x86 assembler: Set PROCESSOR to 386 if you want to support
# the 80386.
PROCESSOR=
# Set DES_ENC to des_enc.o if you want to use the C version # Set DES_ENC to des_enc.o if you want to use the C version
#There are 4 x86 assember options. #There are 4 x86 assember options.
DES_ENC= asm/dx86-out.o asm/yx86-out.o DES_ENC= asm/dx86-out.o asm/yx86-out.o
...@@ -229,6 +232,7 @@ BUILDENV= PLATFORM='${PLATFORM}' PROCESSOR='${PROCESSOR}' \ ...@@ -229,6 +232,7 @@ BUILDENV= PLATFORM='${PLATFORM}' PROCESSOR='${PROCESSOR}' \
EXE_EXT='${EXE_EXT}' SHARED_LIBS='${SHARED_LIBS}' \ EXE_EXT='${EXE_EXT}' SHARED_LIBS='${SHARED_LIBS}' \
SHLIB_EXT='${SHLIB_EXT}' SHLIB_TARGET='${SHLIB_TARGET}' \ SHLIB_EXT='${SHLIB_EXT}' SHLIB_TARGET='${SHLIB_TARGET}' \
PEX_LIBS='${PEX_LIBS}' EX_LIBS='${EX_LIBS}' \ PEX_LIBS='${PEX_LIBS}' EX_LIBS='${EX_LIBS}' \
CPUID_OBJ='${CPUID_OBJ}' \
BN_ASM='${BN_ASM}' DES_ENC='${DES_ENC}' \ BN_ASM='${BN_ASM}' DES_ENC='${DES_ENC}' \
AES_ASM_OBJ='${AES_ASM_OBJ}' \ AES_ASM_OBJ='${AES_ASM_OBJ}' \
BF_ENC='${BF_ENC}' CAST_ENC='${CAST_ENC}' \ BF_ENC='${BF_ENC}' CAST_ENC='${CAST_ENC}' \
......
此差异已折叠。
...@@ -22,6 +22,7 @@ PEX_LIBS= ...@@ -22,6 +22,7 @@ PEX_LIBS=
EX_LIBS= EX_LIBS=
CFLAGS= $(INCLUDE) $(CFLAG) CFLAGS= $(INCLUDE) $(CFLAG)
ASFLAGS= $(INCLUDE) $(ASFLAG)
LIBS= LIBS=
...@@ -39,7 +40,7 @@ GENERAL=Makefile README crypto-lib.com install.com ...@@ -39,7 +40,7 @@ GENERAL=Makefile README crypto-lib.com install.com
LIB= $(TOP)/libcrypto.a LIB= $(TOP)/libcrypto.a
SHARED_LIB= libcrypto$(SHLIB_EXT) SHARED_LIB= libcrypto$(SHLIB_EXT)
LIBSRC= cryptlib.c mem.c mem_clr.c mem_dbg.c cversion.c ex_data.c tmdiff.c cpt_err.c ebcdic.c uid.c o_time.c o_str.c o_dir.c LIBSRC= cryptlib.c mem.c mem_clr.c mem_dbg.c cversion.c ex_data.c tmdiff.c cpt_err.c ebcdic.c uid.c o_time.c o_str.c o_dir.c
LIBOBJ= cryptlib.o mem.o mem_clr.o mem_dbg.o cversion.o ex_data.o tmdiff.o cpt_err.o ebcdic.o uid.o o_time.o o_str.o o_dir.o LIBOBJ= cryptlib.o mem.o mem_clr.o mem_dbg.o cversion.o ex_data.o tmdiff.o cpt_err.o ebcdic.o uid.o o_time.o o_str.o o_dir.o $(CPUID_OBJ)
SRC= $(LIBSRC) SRC= $(LIBSRC)
...@@ -62,6 +63,13 @@ buildinf.h: ../Makefile.ssl ...@@ -62,6 +63,13 @@ buildinf.h: ../Makefile.ssl
echo " #define DATE \"`LC_ALL=C LC_TIME=C date`\""; \ echo " #define DATE \"`LC_ALL=C LC_TIME=C date`\""; \
echo '#endif' ) >buildinf.h echo '#endif' ) >buildinf.h
x86cpuid-elf.s: x86cpuid.pl perlasm/x86asm.pl
$(PERL) x86cpuid.pl elf $(CFLAGS) $(PROCESSOR) > $@
amd64cpuid.s: amd64cpuid.pl
$(PERL) amd64cpuid.pl $@
ia64cpuid.s: ia64cpuid.S
$(CC) $(CFLAGS) -E ia64cpuid.S > $@
testapps: testapps:
if echo ${SDIRS} | fgrep ' des '; \ if echo ${SDIRS} | fgrep ' des '; \
then cd des && $(MAKE) CC='$(CC)' INCLUDES='${INCLUDES}' CFLAG='${CFLAG}' INSTALLTOP='${INSTALLTOP}' PEX_LIBS='${PEX_LIBS}' EX_LIBS='${EX_LIBS}' BN_ASM='${BN_ASM}' DES_ENC='${DES_ENC}' SHA1_ASM_OBJ='${SHA1_ASM_OBJ}' MD5_ASM_OBJ='${MD5_ASM_OBJ}' RMD160_ASM_OBJ='${RMD160_ASM_OBJ}' BF_ENC='${BF_ENC}' CAST_ENC='${CAST_ENC}' RC4_ENC='${RC4_ENC}' RC5_ENC='${RC5_ENC}' AR='${AR}' PROCESSOR='${PROCESSOR}' PERL='${PERL}' RANLIB='${RANLIB}' des; fi then cd des && $(MAKE) CC='$(CC)' INCLUDES='${INCLUDES}' CFLAG='${CFLAG}' INSTALLTOP='${INSTALLTOP}' PEX_LIBS='${PEX_LIBS}' EX_LIBS='${EX_LIBS}' BN_ASM='${BN_ASM}' DES_ENC='${DES_ENC}' SHA1_ASM_OBJ='${SHA1_ASM_OBJ}' MD5_ASM_OBJ='${MD5_ASM_OBJ}' RMD160_ASM_OBJ='${RMD160_ASM_OBJ}' BF_ENC='${BF_ENC}' CAST_ENC='${CAST_ENC}' RC4_ENC='${RC4_ENC}' RC5_ENC='${RC5_ENC}' AR='${AR}' PROCESSOR='${PROCESSOR}' PERL='${PERL}' RANLIB='${RANLIB}' des; fi
...@@ -148,7 +156,7 @@ depend: ...@@ -148,7 +156,7 @@ depend:
done; done;
clean: clean:
rm -f buildinf.h *.o */*.o *.obj lib tags core .pure .nfs* *.old *.bak fluff rm -f buildinf.h *.s *.o */*.o *.obj lib tags core .pure .nfs* *.old *.bak fluff
@for i in $(SDIRS) ;\ @for i in $(SDIRS) ;\
do \ do \
(cd $$i && echo "making clean in crypto/$$i..." && \ (cd $$i && echo "making clean in crypto/$$i..." && \
......
#!/usr/bin/env perl
$output=shift;
$win64a=1 if ($output =~ /win64a\.[s|asm]/);
open STDOUT,">$output" || die "can't open $output: $!";
print<<___ if(defined($win64a));
TEXT SEGMENT
PUBLIC OPENSSL_rdtsc
ALIGN 16
OPENSSL_rdtsc PROC NEAR
rdtsc
shl rdx,32
or rax,rdx
ret
OPENSSL_rdtsc ENDP
TEXT ENDS
END
___
print<<___ if(!defined($win64a));
.text
.globl OPENSSL_rdtsc
.align 16
OPENSSL_rdtsc:
rdtsc
shl \$32,%rdx
or %rdx,%rax
ret
.size OPENSSL_rdtsc,.-OPENSSL_rdtsc
___
...@@ -539,6 +539,38 @@ const char *CRYPTO_get_lock_name(int type) ...@@ -539,6 +539,38 @@ const char *CRYPTO_get_lock_name(int type)
return(sk_value(app_locks,type-CRYPTO_NUM_LOCKS)); return(sk_value(app_locks,type-CRYPTO_NUM_LOCKS));
} }
#if defined(__i386) || defined(__i386__) || defined(_M_IX86) || defined(__INTEL__)
unsigned long OPENSSL_ia32cap=0;
unsigned long *OPENSSL_ia32cap_loc() { return &OPENSSL_ia32cap; }
#if !defined(OPENSSL_NO_ASM) && !defined(I386_ONLY)
#define OPENSSL_CPUID_SETUP
void OPENSSL_cpuid_setup()
{ static int trigger=0;
unsigned long OPENSSL_ia32_cpuid();
char *env;
if (trigger) return;
trigger=1;
if ((env=getenv("OPENSSL_ia32cap")))
OPENSSL_ia32cap = strtoul(env,NULL,0)|(1<<10);
else
OPENSSL_ia32cap = OPENSSL_ia32_cpuid()|(1<<10);
/*
* |(1<<10) sets a reserved bit to signal that variable
* was initialized already... This is to avoid interference
* with cpuid snippets in ELF .init segment.
*/
}
#endif
#endif
#if !defined(OPENSSL_CPUID_SETUP)
void OPENSSL_cpuid_setup() {}
#endif
#ifdef _DLL #ifdef _DLL
#ifdef OPENSSL_SYS_WIN32 #ifdef OPENSSL_SYS_WIN32
...@@ -551,6 +583,7 @@ BOOL WINAPI DLLEntryPoint(HINSTANCE hinstDLL, DWORD fdwReason, ...@@ -551,6 +583,7 @@ BOOL WINAPI DLLEntryPoint(HINSTANCE hinstDLL, DWORD fdwReason,
switch(fdwReason) switch(fdwReason)
{ {
case DLL_PROCESS_ATTACH: case DLL_PROCESS_ATTACH:
OPENSSL_cpuid_setup();
break; break;
case DLL_THREAD_ATTACH: case DLL_THREAD_ATTACH:
break; break;
......
...@@ -74,6 +74,13 @@ void OpenSSL_add_all_algorithms(void) ...@@ -74,6 +74,13 @@ void OpenSSL_add_all_algorithms(void)
void OPENSSL_add_all_algorithms_noconf(void) void OPENSSL_add_all_algorithms_noconf(void)
{ {
/*
* For the moment OPENSSL_cpuid_setup does something
* only on IA-32, but we reserve the option for all
* platforms...
*/
void OPENSSL_cpuid_setup();
OPENSSL_cpuid_setup();
OpenSSL_add_all_ciphers(); OpenSSL_add_all_ciphers();
OpenSSL_add_all_digests(); OpenSSL_add_all_digests();
#ifndef OPENSSL_NO_ENGINE #ifndef OPENSSL_NO_ENGINE
......
// Works on all IA-64 platforms: Linux, HP-UX, Win64i...
// On Win64i compile with ias.exe.
.text
.global OPENSSL_rdtsc#
.proc OPENSSL_rdtsc#
OPENSSL_rdtsc:
mov r8=ar.itc
br.ret b0
.endp OPENSSL_rdtsc#
...@@ -160,6 +160,8 @@ sub main'jne { &out1("jne",@_); } ...@@ -160,6 +160,8 @@ sub main'jne { &out1("jne",@_); }
sub main'jno { &out1("jno",@_); } sub main'jno { &out1("jno",@_); }
sub main'push { &out1("push",@_); $stack+=4; } sub main'push { &out1("push",@_); $stack+=4; }
sub main'pop { &out1("pop",@_); $stack-=4; } sub main'pop { &out1("pop",@_); $stack-=4; }
sub main'pushf { &out0("pushf"); $stack+=4; }
sub main'popf { &out0("popf"); $stack-=4; }
sub main'bswap { &out1("bswap",@_); &using486(); } sub main'bswap { &out1("bswap",@_); &using486(); }
sub main'not { &out1("not",@_); } sub main'not { &out1("not",@_); }
sub main'call { &out1("call",($_[0]=~/^\$L/?'':'_').$_[0]); } sub main'call { &out1("call",($_[0]=~/^\$L/?'':'_').$_[0]); }
...@@ -168,6 +170,8 @@ sub main'nop { &out0("nop"); } ...@@ -168,6 +170,8 @@ sub main'nop { &out0("nop"); }
sub main'test { &out2("test",@_); } sub main'test { &out2("test",@_); }
sub main'bt { &out2("bt",@_); } sub main'bt { &out2("bt",@_); }
sub main'leave { &out0("leave"); } sub main'leave { &out0("leave"); }
sub main'cpuid { &out0("cpuid"); }
sub main'rdtsc { &out0("rdtsc"); }
# SSE2 # SSE2
sub main'emms { &out0("emms"); } sub main'emms { &out0("emms"); }
......
...@@ -169,6 +169,8 @@ sub main'jno { &out1("jno NEAR",@_); } ...@@ -169,6 +169,8 @@ sub main'jno { &out1("jno NEAR",@_); }
sub main'push { &out1("push",@_); $stack+=4; } sub main'push { &out1("push",@_); $stack+=4; }
sub main'pop { &out1("pop",@_); $stack-=4; } sub main'pop { &out1("pop",@_); $stack-=4; }
sub main'pushf { &out0("pushf"); $stack+=4; }
sub main'popf { &out0("popf"); $stack-=4; }
sub main'bswap { &out1("bswap",@_); &using486(); } sub main'bswap { &out1("bswap",@_); &using486(); }
sub main'not { &out1("not",@_); } sub main'not { &out1("not",@_); }
sub main'call { &out1("call",($_[0]=~/^\$L/?'':'_').$_[0]); } sub main'call { &out1("call",($_[0]=~/^\$L/?'':'_').$_[0]); }
...@@ -177,6 +179,8 @@ sub main'nop { &out0("nop"); } ...@@ -177,6 +179,8 @@ sub main'nop { &out0("nop"); }
sub main'test { &out2("test",@_); } sub main'test { &out2("test",@_); }
sub main'bt { &out2("bt",@_); } sub main'bt { &out2("bt",@_); }
sub main'leave { &out0("leave"); } sub main'leave { &out0("leave"); }
sub main'cpuid { &out0("cpuid"); }
sub main'rdtsc { &out0("rdtsc"); }
# SSE2 # SSE2
sub main'emms { &out0("emms"); } sub main'emms { &out0("emms"); }
......
...@@ -199,6 +199,8 @@ sub main'nop { &out0("nop"); } ...@@ -199,6 +199,8 @@ sub main'nop { &out0("nop"); }
sub main'test { &out2("testl",@_); } sub main'test { &out2("testl",@_); }
sub main'bt { &out2("btl",@_); } sub main'bt { &out2("btl",@_); }
sub main'leave { &out0("leave"); } sub main'leave { &out0("leave"); }
sub main'cpuid { &out0(".word\t0xa20f"); }
sub main'rdtsc { &out0(".word\t0x310f"); }
# SSE2 # SSE2
sub main'emms { &out0("emms"); } sub main'emms { &out0("emms"); }
...@@ -519,11 +521,14 @@ sub main'file_end ...@@ -519,11 +521,14 @@ sub main'file_end
# SSE/MMX module with this snippet... Well, it's 72 # SSE/MMX module with this snippet... Well, it's 72
# bytes long and for the moment we have two modules. # bytes long and for the moment we have two modules.
# Let's argue when we have 7 modules or so... # Let's argue when we have 7 modules or so...
#
# $1<<10 sets a reserved bit to signal that variable
# was initialized already...
&main'picmeup("edx","OPENSSL_ia32cap"); &main'picmeup("edx","OPENSSL_ia32cap");
$tmp=<<___; $tmp=<<___;
cmpl \$0,(%edx) cmpl \$0,(%edx)
jne 1f jne 1f
movl \$1,(%edx) movl \$1<<10,(%edx)
pushf pushf
popl %eax popl %eax
movl %eax,%ecx movl %eax,%ecx
...@@ -539,12 +544,13 @@ sub main'file_end ...@@ -539,12 +544,13 @@ sub main'file_end
pushl %ebx pushl %ebx
movl %edx,%edi movl %edx,%edi
movl \$1,%eax movl \$1,%eax
cpuid .word 0xa20f
orl \$1,%edx orl \$1<<10,%edx
movl %edx,0(%edi) movl %edx,0(%edi)
movl %ecx,4(%edi) movl %ecx,4(%edi)
popl %ebx popl %ebx
popl %edi popl %edi
.align 4
1: 1:
___ ___
push (@out,$tmp); push (@out,$tmp);
...@@ -675,3 +681,17 @@ ___ ...@@ -675,3 +681,17 @@ ___
} }
sub main'blindpop { &out1("popl",@_); } sub main'blindpop { &out1("popl",@_); }
sub main'initseg
{
local($f)=@_;
if ($main'elf)
{
local($tmp)=<<___;
.pushsection .init
call $under$f
.popsection
___
push(@out,$tmp);
}
}
#!/usr/bin/env perl
push(@INC,"perlasm");
require "x86asm.pl";
&asm_init($ARGV[0],"x86cpuid");
&function_begin("OPENSSL_ia32_cpuid");
&xor ("edx","edx");
&pushf ();
&pop ("eax");
&mov ("ecx","eax");
&xor ("eax",1<<21);
&push ("eax");
&popf ();
&pushf ();
&pop ("eax");
&xor ("ecx","eax");
&bt ("ecx",21);
&jnc (&label("nocpuid"));
&mov ("eax",1);
&cpuid ();
&set_label("nocpuid");
&mov ("eax","edx");
&mov ("edx","ecx");
&function_end("OPENSSL_ia32_cpuid");
&external_label("OPENSSL_ia32cap");
&function_begin_B("OPENSSL_rdtsc");
&xor ("eax","eax");
&xor ("edx","edx");
&picmeup("ecx","OPENSSL_ia32cap");
&bt (&DWP(0,"ecx"),4);
&jnc (&label("notsc"));
&rdtsc ();
&set_label("notsc");
&ret ();
&function_end_B("OPENSSL_rdtsc");
&initseg("OPENSSL_cpuid_setup") if ($main'elf);
&asm_finish();
=pod
=head1 NAME
OPENSSL_ia32cap
=head1 SYNOPSIS
extern unsigned long OPENSSL_ia32cap;
unsigned long *OPENSSL_ia32cap_loc();
=head1 DESCRIPTION
OPENSSL_ia32cap is a variable containing IA-32 processor capabilities
bit vector as it appears in EDX register after executing CPUID
instruction with EAX=1 input value (see Intel Application Note
#241618). Naturally it's defined/meaningful on IA-32 platforms only.
The variable is normally set up automatically upon toolkit
initialization and can be manipulated afterwards to modify crypto
library behaviour. For the moment of this writing only two bits are
significant, namely bit #26 denoting SSE2 support, and bit #4 denoting
presence of Time-Stamp Counter. Resetting bit #26 at run-time for
example disables high-performance SSE2 code present in the crypto
library. You might have to do this if target OpenSSL application is
executed on SSE2 capable CPU, but under control of OS which does not
support SSE2 extentions. Even though you can programmatically
manipulate the value, you most likely will find it more appropriate to
set up an environment variable with the same name prior starting target
application, e.g. 'env OPENSSL_ia32cap=0x10 apps/openssl', to achieve
same effect without modifying the application source code.
Alternatively you can reconfigure the toolkit with no-sse2 option and
recompile.
=cut
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册