提交 127186bf 编写于 作者: A Andy Polyakov

e_padlock: add support for x86_64 gcc.

上级 f06d0072
......@@ -101,7 +101,10 @@
compiler choice is limited to GCC and Microsoft C. */
#undef COMPILE_HW_PADLOCK
#if !defined(I386_ONLY) && !defined(OPENSSL_NO_INLINE_ASM)
# if (defined(__GNUC__) && (defined(__i386__) || defined(__i386))) || \
# if (defined(__GNUC__) && __GNUC__>=2 && \
(defined(__i386__) || defined(__i386) || \
defined(__x86_64__) || defined(__x86_64)) \
) || \
(defined(_MSC_VER) && defined(_M_IX86))
# define COMPILE_HW_PADLOCK
static ENGINE *ENGINE_padlock (void);
......@@ -294,6 +297,7 @@ static volatile struct padlock_cipher_data *padlock_saved_context;
* =======================================================
*/
#if defined(__GNUC__) && __GNUC__>=2
#if defined(__i386__) || defined(__i386)
/*
* As for excessive "push %ebx"/"pop %ebx" found all over.
* When generating position-independent code GCC won't let
......@@ -373,21 +377,6 @@ padlock_available(void)
return padlock_use_ace + padlock_use_rng;
}
#ifndef OPENSSL_NO_AES
/* Our own htonl()/ntohl() */
static inline void
padlock_bswapl(AES_KEY *ks)
{
size_t i = sizeof(ks->rd_key)/sizeof(ks->rd_key[0]);
unsigned int *key = ks->rd_key;
while (i--) {
asm volatile ("bswapl %0" : "+r"(*key));
key++;
}
}
#endif
/* Force key reload from memory to the CPU microcode.
Loading EFLAGS from the stack clears EFLAGS[30]
which does the trick. */
......@@ -445,12 +434,127 @@ static inline void *name(size_t cnt, \
: "edx", "cc", "memory"); \
return iv; \
}
#endif
#elif defined(__x86_64__) || defined(__x86_64)
/* Load supported features of the CPU to see if
the PadLock is available. */
static int
padlock_available(void)
{
char vendor_string[16];
unsigned int eax, edx;
/* Are we running on the Centaur (VIA) CPU? */
eax = 0x00000000;
vendor_string[12] = 0;
asm volatile (
"cpuid\n"
"movl %%ebx,(%1)\n"
"movl %%edx,4(%1)\n"
"movl %%ecx,8(%1)\n"
: "+a"(eax) : "r"(vendor_string) : "rbx", "rcx", "rdx");
if (strcmp(vendor_string, "CentaurHauls") != 0)
return 0;
/* Check for Centaur Extended Feature Flags presence */
eax = 0xC0000000;
asm volatile ("cpuid"
: "+a"(eax) : : "rbx", "rcx", "rdx");
if (eax < 0xC0000001)
return 0;
/* Read the Centaur Extended Feature Flags */
eax = 0xC0000001;
asm volatile ("cpuid"
: "+a"(eax), "=d"(edx) : : "rbx", "rcx");
/* Fill up some flags */
padlock_use_ace = ((edx & (0x3<<6)) == (0x3<<6));
padlock_use_rng = ((edx & (0x3<<2)) == (0x3<<2));
return padlock_use_ace + padlock_use_rng;
}
/* Force key reload from memory to the CPU microcode.
Loading EFLAGS from the stack clears EFLAGS[30]
which does the trick. */
static inline void
padlock_reload_key(void)
{
asm volatile ("pushfq; popfq");
}
#ifndef OPENSSL_NO_AES
/*
* This is heuristic key context tracing. At first one
* believes that one should use atomic swap instructions,
* but it's not actually necessary. Point is that if
* padlock_saved_context was changed by another thread
* after we've read it and before we compare it with cdata,
* our key *shall* be reloaded upon thread context switch
* and we are therefore set in either case...
*/
static inline void
padlock_verify_context(struct padlock_cipher_data *cdata)
{
asm volatile (
"pushfq\n"
" btl $30,(%%rsp)\n"
" jnc 1f\n"
" cmpq %2,%1\n"
" je 1f\n"
" popfq\n"
" subq $8,%%rsp\n"
"1: addq $8,%%rsp\n"
" movq %2,%0"
:"+m"(padlock_saved_context)
: "r"(padlock_saved_context), "r"(cdata) : "cc");
}
/* Template for padlock_xcrypt_* modes */
/* BIG FAT WARNING:
* The offsets used with 'leal' instructions
* describe items of the 'padlock_cipher_data'
* structure.
*/
#define PADLOCK_XCRYPT_ASM(name,rep_xcrypt) \
static inline void *name(size_t cnt, \
struct padlock_cipher_data *cdata, \
void *out, const void *inp) \
{ void *iv; \
asm volatile ( "leaq 16(%0),%%rdx\n" \
" leaq 32(%0),%%rbx\n" \
rep_xcrypt "\n" \
: "=a"(iv), "=c"(cnt), "=D"(out), "=S"(inp) \
: "0"(cdata), "1"(cnt), "2"(out), "3"(inp) \
: "rbx", "rdx", "cc", "memory"); \
return iv; \
}
#endif
#endif /* cpu */
#ifndef OPENSSL_NO_AES
/* Generate all functions with appropriate opcodes */
PADLOCK_XCRYPT_ASM(padlock_xcrypt_ecb, ".byte 0xf3,0x0f,0xa7,0xc8") /* rep xcryptecb */
PADLOCK_XCRYPT_ASM(padlock_xcrypt_cbc, ".byte 0xf3,0x0f,0xa7,0xd0") /* rep xcryptcbc */
PADLOCK_XCRYPT_ASM(padlock_xcrypt_cfb, ".byte 0xf3,0x0f,0xa7,0xe0") /* rep xcryptcfb */
PADLOCK_XCRYPT_ASM(padlock_xcrypt_ofb, ".byte 0xf3,0x0f,0xa7,0xe8") /* rep xcryptofb */
/* Our own htonl()/ntohl() */
static inline void
padlock_bswapl(AES_KEY *ks)
{
size_t i = sizeof(ks->rd_key)/sizeof(ks->rd_key[0]);
unsigned int *key = ks->rd_key;
while (i--) {
asm volatile ("bswapl %0" : "+r"(*key));
key++;
}
}
#endif
/* The RNG call itself */
......@@ -481,8 +585,8 @@ padlock_xstore(void *addr, unsigned int edx_in)
static inline unsigned char *
padlock_memcpy(void *dst,const void *src,size_t n)
{
long *d=dst;
const long *s=src;
size_t *d=dst;
const size_t *s=src;
n /= sizeof(*d);
do { *d++ = *s++; } while (--n);
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册