提交 8e7f966b 编写于 作者: U Ulf Möller

SHA-1 cleanups and performance enhancements.

Submitted by: Andy Polyakov <appro@fy.chalmers.se>
上级 744029c1
...@@ -5,6 +5,9 @@ ...@@ -5,6 +5,9 @@
Changes between 0.9.2b and 0.9.3 Changes between 0.9.2b and 0.9.3
*) SHA-1 cleanups and performance enhancements.
[Andy Polyakov <appro@fy.chalmers.se>]
*) Sparc v8plus assembler for the bignum library. *) Sparc v8plus assembler for the bignum library.
[Andy Polyakov <appro@fy.chalmers.se>] [Andy Polyakov <appro@fy.chalmers.se>]
......
...@@ -587,6 +587,9 @@ while (<IN>) ...@@ -587,6 +587,9 @@ while (<IN>)
{ printf OUT "#define RC4_INT unsigned %s\n",$type[$rc4_int]; } { printf OUT "#define RC4_INT unsigned %s\n",$type[$rc4_int]; }
elsif (/^#((define)|(undef))\s+RC4_INDEX/) elsif (/^#((define)|(undef))\s+RC4_INDEX/)
{ printf OUT "#%s RC4_INDEX\n",($rc4_idx)?"define":"undef"; } { printf OUT "#%s RC4_INDEX\n",($rc4_idx)?"define":"undef"; }
elsif (/^#(define|undef)\s+I386_ONLY/)
{ printf OUT "#%s I386_ONLY\n", ($processor == 386)?
"define":"undef"; }
elsif (/^#define\s+MD2_INT\s/) elsif (/^#define\s+MD2_INT\s/)
{ printf OUT "#define MD2_INT unsigned %s\n",$type[$md2_int]; } { printf OUT "#define MD2_INT unsigned %s\n",$type[$md2_int]; }
elsif (/^#define\s+IDEA_INT\s/) elsif (/^#define\s+IDEA_INT\s/)
......
/* crypto/opensslconf.h */ /* crypto/opensslconf.h */
/* WARNING: This file is autogenerated by Configure */ /* WARNING: This file is autogenerated by Configure */
/* Generate 80386 code? */
#undef I386_ONLY
#if defined(HEADER_CRYPTLIB_H) && !defined(OPENSSLDIR) #if defined(HEADER_CRYPTLIB_H) && !defined(OPENSSLDIR)
#define OPENSSLDIR "/usr/local/ssl" #define OPENSSLDIR "/usr/local/ssl"
#endif #endif
...@@ -34,7 +37,7 @@ ...@@ -34,7 +37,7 @@
#if defined(HEADER_BN_H) && !defined(CONFIG_HEADER_BN_H) #if defined(HEADER_BN_H) && !defined(CONFIG_HEADER_BN_H)
#define CONFIG_HEADER_BN_H #define CONFIG_HEADER_BN_H
#define BN_LLONG #undef BN_LLONG
/* Should we define BN_DIV2W here? */ /* Should we define BN_DIV2W here? */
...@@ -53,7 +56,7 @@ ...@@ -53,7 +56,7 @@
#define CONFIG_HEADER_RC4_LOCL_H #define CONFIG_HEADER_RC4_LOCL_H
/* if this is defined data[i] is used instead of *data, this is a %20 /* if this is defined data[i] is used instead of *data, this is a %20
* speedup on x86 */ * speedup on x86 */
#define RC4_INDEX #undef RC4_INDEX
#endif #endif
#if defined(HEADER_BF_LOCL_H) && !defined(CONFIG_HEADER_BF_LOCL_H) #if defined(HEADER_BF_LOCL_H) && !defined(CONFIG_HEADER_BF_LOCL_H)
...@@ -67,14 +70,14 @@ ...@@ -67,14 +70,14 @@
/* the following is tweaked from a config script, that is why it is a /* the following is tweaked from a config script, that is why it is a
* protected undef/define */ * protected undef/define */
#ifndef DES_PTR #ifndef DES_PTR
#define DES_PTR #undef DES_PTR
#endif #endif
/* This helps C compiler generate the correct code for multiple functional /* This helps C compiler generate the correct code for multiple functional
* units. It reduces register dependancies at the expense of 2 more * units. It reduces register dependancies at the expense of 2 more
* registers */ * registers */
#ifndef DES_RISC1 #ifndef DES_RISC1
#define DES_RISC1 #undef DES_RISC1
#endif #endif
#ifndef DES_RISC2 #ifndef DES_RISC2
...@@ -88,7 +91,7 @@ YOU SHOULD NOT HAVE BOTH DES_RISC1 AND DES_RISC2 DEFINED!!!!! ...@@ -88,7 +91,7 @@ YOU SHOULD NOT HAVE BOTH DES_RISC1 AND DES_RISC2 DEFINED!!!!!
/* Unroll the inner loop, this sometimes helps, sometimes hinders. /* Unroll the inner loop, this sometimes helps, sometimes hinders.
* Very mucy CPU dependant */ * Very mucy CPU dependant */
#ifndef DES_UNROLL #ifndef DES_UNROLL
#define DES_UNROLL #undef DES_UNROLL
#endif #endif
/* These default values were supplied by /* These default values were supplied by
......
...@@ -67,19 +67,29 @@ extern "C" { ...@@ -67,19 +67,29 @@ extern "C" {
#error SHA is disabled. #error SHA is disabled.
#endif #endif
#define SHA_CBLOCK 64 /*
#define SHA_LBLOCK 16 * !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
#define SHA_BLOCK 16 * ! SHA_LONG has to be at least 32 bits wide. If it's wider, then !
#define SHA_LAST_BLOCK 56 * ! SHA_LONG_LOG2 has to be defined along. !
#define SHA_LENGTH_BLOCK 8 * !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
#define SHA_DIGEST_LENGTH 20 */
#ifdef WIN16 #if defined(WIN16) || defined(__LP32__)
#define SHA_LONG unsigned long
#elif defined(_CRAY) || defined(__ILP64__)
#define SHA_LONG unsigned long #define SHA_LONG unsigned long
#define SHA_LONG_LOG2 3
#else #else
#define SHA_LONG unsigned int #define SHA_LONG unsigned int
#endif #endif
#define SHA_LBLOCK 16
#define SHA_CBLOCK (SHA_LBLOCK*4) /* SHA treats input data as a
* contiguous array of 32 bit
* wide big-endian values. */
#define SHA_LAST_BLOCK (SHA_CBLOCK-8)
#define SHA_DIGEST_LENGTH 20
typedef struct SHAstate_st typedef struct SHAstate_st
{ {
SHA_LONG h0,h1,h2,h3,h4; SHA_LONG h0,h1,h2,h3,h4;
......
...@@ -81,14 +81,14 @@ char *SHA1_version="SHA1" OPENSSL_VERSION_PTEXT; ...@@ -81,14 +81,14 @@ char *SHA1_version="SHA1" OPENSSL_VERSION_PTEXT;
#define K_40_59 0x8f1bbcdcUL #define K_40_59 0x8f1bbcdcUL
#define K_60_79 0xca62c1d6UL #define K_60_79 0xca62c1d6UL
# ifdef SHA1_ASM #ifdef SHA1_ASM
void sha1_block_x86(SHA_CTX *c, register SHA_LONG *p, int num); void sha1_block_x86(SHA_CTX *c, register SHA_LONG *p, int num);
# define sha1_block sha1_block_x86 # define sha1_block(c,p,n) sha1_block_x86((c),(p),(n)*SHA_CBLOCK)
# else #else
void sha1_block(SHA_CTX *c, register SHA_LONG *p, int num); static void sha1_block(SHA_CTX *c, register SHA_LONG *p, int num);
# endif #endif
#if defined(L_ENDIAN) && defined(SHA1_ASM) #if !defined(B_ENDIAN) && defined(SHA1_ASM)
# define M_c2nl c2l # define M_c2nl c2l
# define M_p_c2nl p_c2l # define M_p_c2nl p_c2l
# define M_c2nl_p c2l_p # define M_c2nl_p c2l_p
...@@ -147,7 +147,7 @@ void SHA1_Update(SHA_CTX *c, const register unsigned char *data, ...@@ -147,7 +147,7 @@ void SHA1_Update(SHA_CTX *c, const register unsigned char *data,
} }
len-=(SHA_CBLOCK-c->num); len-=(SHA_CBLOCK-c->num);
sha1_block(c,p,64); sha1_block(c,p,1);
c->num=0; c->num=0;
/* drop through and do the rest */ /* drop through and do the rest */
} }
...@@ -184,15 +184,15 @@ void SHA1_Update(SHA_CTX *c, const register unsigned char *data, ...@@ -184,15 +184,15 @@ void SHA1_Update(SHA_CTX *c, const register unsigned char *data,
* copies it to a local array. I should be able to do this for * copies it to a local array. I should be able to do this for
* the C version as well.... * the C version as well....
*/ */
#if 1 #if SHA_LONG_LOG2==2
#if defined(B_ENDIAN) || defined(SHA1_ASM) #if defined(B_ENDIAN) || defined(SHA1_ASM)
if ((((unsigned long)data)%sizeof(SHA_LONG)) == 0) if ((((unsigned long)data)%sizeof(SHA_LONG)) == 0)
{ {
sw=len/SHA_CBLOCK; sw=len/SHA_CBLOCK;
if (sw) if (sw)
{ {
sw*=SHA_CBLOCK;
sha1_block(c,(SHA_LONG *)data,sw); sha1_block(c,(SHA_LONG *)data,sw);
sw*=SHA_CBLOCK;
data+=sw; data+=sw;
len-=sw; len-=sw;
} }
...@@ -204,35 +204,61 @@ void SHA1_Update(SHA_CTX *c, const register unsigned char *data, ...@@ -204,35 +204,61 @@ void SHA1_Update(SHA_CTX *c, const register unsigned char *data,
p=c->data; p=c->data;
while (len >= SHA_CBLOCK) while (len >= SHA_CBLOCK)
{ {
#if defined(B_ENDIAN) || defined(L_ENDIAN) #if SHA_LONG_LOG2==2
#if defined(B_ENDIAN) || defined(SHA1_ASM)
#define SHA_NO_TAIL_CODE
/*
* Basically we get here only when data happens
* to be unaligned.
*/
if (p != (SHA_LONG *)data) if (p != (SHA_LONG *)data)
memcpy(p,data,SHA_CBLOCK); memcpy(p,data,SHA_CBLOCK);
data+=SHA_CBLOCK; data+=SHA_CBLOCK;
# ifdef L_ENDIAN sha1_block(c,p=c->data,1);
# ifndef SHA1_ASM /* Will not happen */ len-=SHA_CBLOCK;
#else /* little-endian */
#define BE_COPY(dst,src,i) { \
l = ((SHA_LONG *)src)[i]; \
Endian_Reverse32(l); \
dst[i] = l; \
}
if ((((unsigned long)data)%sizeof(SHA_LONG)) == 0)
{
for (sw=(SHA_LBLOCK/4); sw; sw--) for (sw=(SHA_LBLOCK/4); sw; sw--)
{ {
Endian_Reverse32(p[0]); BE_COPY(p,data,0);
Endian_Reverse32(p[1]); BE_COPY(p,data,1);
Endian_Reverse32(p[2]); BE_COPY(p,data,2);
Endian_Reverse32(p[3]); BE_COPY(p,data,3);
p+=4; p+=4;
data += 4*sizeof(SHA_LONG);
}
sha1_block(c,p=c->data,1);
len-=SHA_CBLOCK;
continue;
} }
#endif
#endif
#ifndef SHA_NO_TAIL_CODE
/*
* In addition to "sizeof(SHA_LONG)!= 4" case the
* following code covers unaligned access cases on
* little-endian machines.
* <appro@fy.chalmers.se>
*/
p=c->data; p=c->data;
# endif for (sw=(SHA_LBLOCK/4); sw; sw--)
# endif
#else
for (sw=(SHA_BLOCK/4); sw; sw--)
{ {
M_c2nl(data,l); *(p++)=l; M_c2nl(data,l); p[0]=l;
M_c2nl(data,l); *(p++)=l; M_c2nl(data,l); p[1]=l;
M_c2nl(data,l); *(p++)=l; M_c2nl(data,l); p[2]=l;
M_c2nl(data,l); *(p++)=l; M_c2nl(data,l); p[3]=l;
p+=4;
} }
p=c->data; p=c->data;
#endif sha1_block(c,p,1);
sha1_block(c,p,64);
len-=SHA_CBLOCK; len-=SHA_CBLOCK;
#endif
} }
ec=(int)len; ec=(int)len;
c->num=ec; c->num=ec;
...@@ -247,26 +273,35 @@ void SHA1_Update(SHA_CTX *c, const register unsigned char *data, ...@@ -247,26 +273,35 @@ void SHA1_Update(SHA_CTX *c, const register unsigned char *data,
void SHA1_Transform(SHA_CTX *c, unsigned char *b) void SHA1_Transform(SHA_CTX *c, unsigned char *b)
{ {
SHA_LONG p[16]; SHA_LONG p[SHA_LBLOCK];
#ifndef B_ENDIAN
SHA_LONG *q; SHA_LONG *q;
int i; int i;
#endif
#if defined(B_ENDIAN) || defined(L_ENDIAN) #if SHA_LONG_LOG2==2
memcpy(p,b,64); #if defined(B_ENDIAN) || defined(SHA1_ASM)
#ifdef L_ENDIAN memcpy(p,b,SHA_CBLOCK);
sha1_block(c,p,1);
return;
#else
if (((unsigned long)b%sizeof(SHA_LONG)) == 0)
{
q=p; q=p;
for (i=(SHA_LBLOCK/4); i; i--) for (i=(SHA_LBLOCK/4); i; i--)
{ {
Endian_Reverse32(q[0]); unsigned long l;
Endian_Reverse32(q[1]); BE_COPY(q,b,0); /* BE_COPY was defined above */
Endian_Reverse32(q[2]); BE_COPY(q,b,1);
Endian_Reverse32(q[3]); BE_COPY(q,b,2);
BE_COPY(q,b,3);
q+=4; q+=4;
b+=4*sizeof(SHA_LONG);
}
sha1_block(c,p,1);
return;
} }
#endif #endif
#else #endif
#ifndef SHA_NO_TAIL_CODE /* defined above, see comment */
q=p; q=p;
for (i=(SHA_LBLOCK/4); i; i--) for (i=(SHA_LBLOCK/4); i; i--)
{ {
...@@ -276,16 +311,15 @@ void SHA1_Transform(SHA_CTX *c, unsigned char *b) ...@@ -276,16 +311,15 @@ void SHA1_Transform(SHA_CTX *c, unsigned char *b)
c2nl(b,l); *(q++)=l; c2nl(b,l); *(q++)=l;
c2nl(b,l); *(q++)=l; c2nl(b,l); *(q++)=l;
} }
sha1_block(c,p,1);
#endif #endif
sha1_block(c,p,64);
} }
#ifndef SHA1_ASM #ifndef SHA1_ASM
static void sha1_block(SHA_CTX *c, register SHA_LONG *W, int num)
void sha1_block(SHA_CTX *c, register SHA_LONG *W, int num)
{ {
register SHA_LONG A,B,C,D,E,T; register SHA_LONG A,B,C,D,E,T;
SHA_LONG X[16]; SHA_LONG X[SHA_LBLOCK];
A=c->h0; A=c->h0;
B=c->h1; B=c->h1;
...@@ -385,8 +419,7 @@ void sha1_block(SHA_CTX *c, register SHA_LONG *W, int num) ...@@ -385,8 +419,7 @@ void sha1_block(SHA_CTX *c, register SHA_LONG *W, int num)
c->h3=(c->h3+B)&0xffffffffL; c->h3=(c->h3+B)&0xffffffffL;
c->h4=(c->h4+C)&0xffffffffL; c->h4=(c->h4+C)&0xffffffffL;
num-=64; if (--num <= 0) break;
if (num <= 0) break;
A=c->h0; A=c->h0;
B=c->h1; B=c->h1;
...@@ -394,7 +427,12 @@ void sha1_block(SHA_CTX *c, register SHA_LONG *W, int num) ...@@ -394,7 +427,12 @@ void sha1_block(SHA_CTX *c, register SHA_LONG *W, int num)
D=c->h3; D=c->h3;
E=c->h4; E=c->h4;
W+=16; W+=SHA_LBLOCK; /* Note! This can happen only when sizeof(SHA_LONG)
* is 4. Whenever it's not the actual case this
* function is never called with num larger than 1
* and we never advance down here.
* <appro@fy.chalmers.se>
*/
} }
} }
#endif #endif
...@@ -423,18 +461,20 @@ void SHA1_Final(unsigned char *md, SHA_CTX *c) ...@@ -423,18 +461,20 @@ void SHA1_Final(unsigned char *md, SHA_CTX *c)
{ {
for (; i<SHA_LBLOCK; i++) for (; i<SHA_LBLOCK; i++)
p[i]=0; p[i]=0;
sha1_block(c,p,64); sha1_block(c,p,1);
i=0; i=0;
} }
for (; i<(SHA_LBLOCK-2); i++) for (; i<(SHA_LBLOCK-2); i++)
p[i]=0; p[i]=0;
p[SHA_LBLOCK-2]=c->Nh; p[SHA_LBLOCK-2]=c->Nh;
p[SHA_LBLOCK-1]=c->Nl; p[SHA_LBLOCK-1]=c->Nl;
#if defined(L_ENDIAN) && defined(SHA1_ASM) #if SHA_LONG_LOG2==2
#if !defined(B_ENDIAN) && defined(SHA1_ASM)
Endian_Reverse32(p[SHA_LBLOCK-2]); Endian_Reverse32(p[SHA_LBLOCK-2]);
Endian_Reverse32(p[SHA_LBLOCK-1]); Endian_Reverse32(p[SHA_LBLOCK-1]);
#endif #endif
sha1_block(c,p,64); #endif
sha1_block(c,p,1);
cp=md; cp=md;
l=c->h0; nl2c(l,cp); l=c->h0; nl2c(l,cp);
l=c->h1; nl2c(l,cp); l=c->h1; nl2c(l,cp);
...@@ -442,10 +482,11 @@ void SHA1_Final(unsigned char *md, SHA_CTX *c) ...@@ -442,10 +482,11 @@ void SHA1_Final(unsigned char *md, SHA_CTX *c)
l=c->h3; nl2c(l,cp); l=c->h3; nl2c(l,cp);
l=c->h4; nl2c(l,cp); l=c->h4; nl2c(l,cp);
/* clear stuff, sha1_block may be leaving some stuff on the stack
* but I'm not worried :-) */
c->num=0; c->num=0;
/* memset((char *)&c,0,sizeof(c));*/ /* sha_block may be leaving some stuff on the stack
* but I'm not worried :-)
memset((void *)c,0,sizeof(SHA_CTX));
*/
} }
#endif #endif
...@@ -81,12 +81,21 @@ char *SHA_version="SHA" OPENSSL_VERSION_PTEXT; ...@@ -81,12 +81,21 @@ char *SHA_version="SHA" OPENSSL_VERSION_PTEXT;
#define K_40_59 0x8f1bbcdcUL #define K_40_59 0x8f1bbcdcUL
#define K_60_79 0xca62c1d6UL #define K_60_79 0xca62c1d6UL
void sha_block(SHA_CTX *c, register SHA_LONG *p, int num); static void sha_block(SHA_CTX *c, register SHA_LONG *p, int num);
#define M_c2nl c2nl
#define M_p_c2nl p_c2nl #if !defined(B_ENDIAN) && defined(SHA_ASM)
#define M_c2nl_p c2nl_p # define M_c2nl c2l
#define M_p_c2nl_p p_c2nl_p # define M_p_c2nl p_c2l
#define M_nl2c nl2c # define M_c2nl_p c2l_p
# define M_p_c2nl_p p_c2l_p
# define M_nl2c l2c
#else
# define M_c2nl c2nl
# define M_p_c2nl p_c2nl
# define M_c2nl_p c2nl_p
# define M_p_c2nl_p p_c2nl_p
# define M_nl2c nl2c
#endif
void SHA_Init(SHA_CTX *c) void SHA_Init(SHA_CTX *c)
{ {
...@@ -133,7 +142,7 @@ void SHA_Update(SHA_CTX *c, const register unsigned char *data, ...@@ -133,7 +142,7 @@ void SHA_Update(SHA_CTX *c, const register unsigned char *data,
} }
len-=(SHA_CBLOCK-c->num); len-=(SHA_CBLOCK-c->num);
sha_block(c,p,64); sha_block(c,p,1);
c->num=0; c->num=0;
/* drop through and do the rest */ /* drop through and do the rest */
} }
...@@ -170,15 +179,15 @@ void SHA_Update(SHA_CTX *c, const register unsigned char *data, ...@@ -170,15 +179,15 @@ void SHA_Update(SHA_CTX *c, const register unsigned char *data,
* copies it to a local array. I should be able to do this for * copies it to a local array. I should be able to do this for
* the C version as well.... * the C version as well....
*/ */
#if 1 #if SHA_LONG_LOG2==2
#if defined(B_ENDIAN) || defined(SHA_ASM) #if defined(B_ENDIAN) || defined(SHA_ASM)
if ((((unsigned long)data)%sizeof(SHA_LONG)) == 0) if ((((unsigned long)data)%sizeof(SHA_LONG)) == 0)
{ {
sw=len/SHA_CBLOCK; sw=len/SHA_CBLOCK;
if (sw) if (sw)
{ {
sw*=SHA_CBLOCK;
sha_block(c,(SHA_LONG *)data,sw); sha_block(c,(SHA_LONG *)data,sw);
sw*=SHA_CBLOCK;
data+=sw; data+=sw;
len-=sw; len-=sw;
} }
...@@ -190,35 +199,61 @@ void SHA_Update(SHA_CTX *c, const register unsigned char *data, ...@@ -190,35 +199,61 @@ void SHA_Update(SHA_CTX *c, const register unsigned char *data,
p=c->data; p=c->data;
while (len >= SHA_CBLOCK) while (len >= SHA_CBLOCK)
{ {
#if defined(B_ENDIAN) || defined(L_ENDIAN) #if SHA_LONG_LOG2==2
#if defined(B_ENDIAN) || defined(SHA_ASM)
#define SHA_NO_TAIL_CODE
/*
* Basically we get here only when data happens
* to be unaligned.
*/
if (p != (SHA_LONG *)data) if (p != (SHA_LONG *)data)
memcpy(p,data,SHA_CBLOCK); memcpy(p,data,SHA_CBLOCK);
data+=SHA_CBLOCK; data+=SHA_CBLOCK;
# ifdef L_ENDIAN sha_block(c,p=c->data,1);
# ifndef SHA_ASM /* Will not happen */ len-=SHA_CBLOCK;
#else /* little-endian */
#define BE_COPY(dst,src,i) { \
l = ((SHA_LONG *)src)[i]; \
Endian_Reverse32(l); \
dst[i] = l; \
}
if ((((unsigned long)data)%sizeof(SHA_LONG)) == 0)
{
for (sw=(SHA_LBLOCK/4); sw; sw--) for (sw=(SHA_LBLOCK/4); sw; sw--)
{ {
Endian_Reverse32(p[0]); BE_COPY(p,data,0);
Endian_Reverse32(p[1]); BE_COPY(p,data,1);
Endian_Reverse32(p[2]); BE_COPY(p,data,2);
Endian_Reverse32(p[3]); BE_COPY(p,data,3);
p+=4; p+=4;
data += 4*sizeof(SHA_LONG);
} }
sha_block(c,p=c->data,1);
len-=SHA_CBLOCK;
continue;
}
#endif
#endif
#ifndef SHA_NO_TAIL_CODE
/*
* In addition to "sizeof(SHA_LONG)!= 4" case the
* following code covers unaligned access cases on
* little-endian machines.
* <appro@fy.chalmers.se>
*/
p=c->data; p=c->data;
# endif for (sw=(SHA_LBLOCK/4); sw; sw--)
# endif
#else
for (sw=(SHA_BLOCK/4); sw; sw--)
{ {
M_c2nl(data,l); *(p++)=l; M_c2nl(data,l); p[0]=l;
M_c2nl(data,l); *(p++)=l; M_c2nl(data,l); p[1]=l;
M_c2nl(data,l); *(p++)=l; M_c2nl(data,l); p[2]=l;
M_c2nl(data,l); *(p++)=l; M_c2nl(data,l); p[3]=l;
p+=4;
} }
p=c->data; p=c->data;
#endif sha_block(c,p,1);
sha_block(c,p,64);
len-=SHA_CBLOCK; len-=SHA_CBLOCK;
#endif
} }
ec=(int)len; ec=(int)len;
c->num=ec; c->num=ec;
...@@ -233,26 +268,35 @@ void SHA_Update(SHA_CTX *c, const register unsigned char *data, ...@@ -233,26 +268,35 @@ void SHA_Update(SHA_CTX *c, const register unsigned char *data,
void SHA_Transform(SHA_CTX *c, unsigned char *b) void SHA_Transform(SHA_CTX *c, unsigned char *b)
{ {
SHA_LONG p[16]; SHA_LONG p[SHA_LBLOCK];
#if !defined(B_ENDIAN)
SHA_LONG *q; SHA_LONG *q;
int i; int i;
#endif
#if defined(B_ENDIAN) || defined(L_ENDIAN) #if SHA_LONG_LOG2==2
memcpy(p,b,64); #if defined(B_ENDIAN) || defined(SHA_ASM)
#ifdef L_ENDIAN memcpy(p,b,SHA_CBLOCK);
sha_block(c,p,1);
return;
#else
if (((unsigned long)b%sizeof(SHA_LONG)) == 0)
{
q=p; q=p;
for (i=(SHA_LBLOCK/4); i; i--) for (i=(SHA_LBLOCK/4); i; i--)
{ {
Endian_Reverse32(q[0]); unsigned long l;
Endian_Reverse32(q[1]); BE_COPY(q,b,0); /* BE_COPY was defined above */
Endian_Reverse32(q[2]); BE_COPY(q,b,1);
Endian_Reverse32(q[3]); BE_COPY(q,b,2);
BE_COPY(q,b,3);
q+=4; q+=4;
b+=4*sizeof(SHA_LONG);
}
sha_block(c,p,1);
return;
} }
#endif #endif
#else #endif
#ifndef SHA_NO_TAIL_CODE /* defined above, see comment */
q=p; q=p;
for (i=(SHA_LBLOCK/4); i; i--) for (i=(SHA_LBLOCK/4); i; i--)
{ {
...@@ -262,14 +306,15 @@ void SHA_Transform(SHA_CTX *c, unsigned char *b) ...@@ -262,14 +306,15 @@ void SHA_Transform(SHA_CTX *c, unsigned char *b)
c2nl(b,l); *(q++)=l; c2nl(b,l); *(q++)=l;
c2nl(b,l); *(q++)=l; c2nl(b,l); *(q++)=l;
} }
sha_block(c,p,1);
#endif #endif
sha_block(c,p,64);
} }
void sha_block(SHA_CTX *c, register SHA_LONG *W, int num) #ifndef SHA_ASM
static void sha_block(SHA_CTX *c, register SHA_LONG *W, int num)
{ {
register SHA_LONG A,B,C,D,E,T; register SHA_LONG A,B,C,D,E,T;
SHA_LONG X[16]; SHA_LONG X[SHA_LBLOCK];
A=c->h0; A=c->h0;
B=c->h1; B=c->h1;
...@@ -369,8 +414,7 @@ void sha_block(SHA_CTX *c, register SHA_LONG *W, int num) ...@@ -369,8 +414,7 @@ void sha_block(SHA_CTX *c, register SHA_LONG *W, int num)
c->h3=(c->h3+B)&0xffffffffL; c->h3=(c->h3+B)&0xffffffffL;
c->h4=(c->h4+C)&0xffffffffL; c->h4=(c->h4+C)&0xffffffffL;
num-=64; if (--num <= 0) break;
if (num <= 0) break;
A=c->h0; A=c->h0;
B=c->h1; B=c->h1;
...@@ -378,9 +422,15 @@ void sha_block(SHA_CTX *c, register SHA_LONG *W, int num) ...@@ -378,9 +422,15 @@ void sha_block(SHA_CTX *c, register SHA_LONG *W, int num)
D=c->h3; D=c->h3;
E=c->h4; E=c->h4;
W+=16; W+=SHA_LBLOCK; /* Note! This can happen only when sizeof(SHA_LONG)
* is 4. Whenever it's not the actual case this
* function is never called with num larger than 1
* and we never advance down here.
* <appro@fy.chalmers.se>
*/
} }
} }
#endif
void SHA_Final(unsigned char *md, SHA_CTX *c) void SHA_Final(unsigned char *md, SHA_CTX *c)
{ {
...@@ -406,14 +456,20 @@ void SHA_Final(unsigned char *md, SHA_CTX *c) ...@@ -406,14 +456,20 @@ void SHA_Final(unsigned char *md, SHA_CTX *c)
{ {
for (; i<SHA_LBLOCK; i++) for (; i<SHA_LBLOCK; i++)
p[i]=0; p[i]=0;
sha_block(c,p,64); sha_block(c,p,1);
i=0; i=0;
} }
for (; i<(SHA_LBLOCK-2); i++) for (; i<(SHA_LBLOCK-2); i++)
p[i]=0; p[i]=0;
p[SHA_LBLOCK-2]=c->Nh; p[SHA_LBLOCK-2]=c->Nh;
p[SHA_LBLOCK-1]=c->Nl; p[SHA_LBLOCK-1]=c->Nl;
sha_block(c,p,64); #if SHA_LONG_LOG2==2
#if !defined(B_ENDIAN) && defined(SHA_ASM)
Endian_Reverse32(p[SHA_LBLOCK-2]);
Endian_Reverse32(p[SHA_LBLOCK-1]);
#endif
#endif
sha_block(c,p,1);
cp=md; cp=md;
l=c->h0; nl2c(l,cp); l=c->h0; nl2c(l,cp);
l=c->h1; nl2c(l,cp); l=c->h1; nl2c(l,cp);
...@@ -421,9 +477,10 @@ void SHA_Final(unsigned char *md, SHA_CTX *c) ...@@ -421,9 +477,10 @@ void SHA_Final(unsigned char *md, SHA_CTX *c)
l=c->h3; nl2c(l,cp); l=c->h3; nl2c(l,cp);
l=c->h4; nl2c(l,cp); l=c->h4; nl2c(l,cp);
/* clear stuff, sha_block may be leaving some stuff on the stack
* but I'm not worried :-) */
c->num=0; c->num=0;
/* memset((char *)&c,0,sizeof(c));*/ /* sha_block may be leaving some stuff on the stack
* but I'm not worried :-)
memset((void *)c,0,sizeof(SHA_CTX));
*/
} }
#endif #endif
...@@ -158,30 +158,79 @@ ...@@ -158,30 +158,79 @@
*((c)++)=(unsigned char)(((l)>>16)&0xff), \ *((c)++)=(unsigned char)(((l)>>16)&0xff), \
*((c)++)=(unsigned char)(((l)>>24)&0xff)) *((c)++)=(unsigned char)(((l)>>24)&0xff))
#ifndef SHA_LONG_LOG2
#define SHA_LONG_LOG2 2 /* default to 32 bits */
#endif
#undef ROTATE #undef ROTATE
#undef Endian_Reverse32
#if defined(WIN32) #if defined(WIN32)
#define ROTATE(a,n) _lrotl(a,n) #define ROTATE(a,n) _lrotl(a,n)
#else #elif defined(__GNUC__)
#define ROTATE(a,n) (((a)<<(n))|(((a)&0xffffffff)>>(32-(n)))) /* some inline assembler templates by <appro@fy.chalmers.se> */
#if defined(__i386)
#define ROTATE(a,n) ({ register unsigned int ret; \
asm ("roll %1,%0" \
: "=r"(ret) \
: "I"(n), "0"(a) \
: "cc"); \
ret; \
})
#ifndef I386_ONLY
#define Endian_Reverse32(a) \
{ register unsigned int l=(a); \
asm ("bswapl %0" \
: "=r"(l) : "0"(l)); \
(a)=l; \
}
#endif
#elif defined(__powerpc)
#define ROTATE(a,n) ({ register unsigned int ret; \
asm ("rlwinm %0,%1,%2,0,31" \
: "=r"(ret) \
: "r"(a), "I"(n)); \
ret; \
})
/* Endian_Reverse32 is not needed for PowerPC */
#endif
#endif #endif
/* A nice byte order reversal from Wei Dai <weidai@eskimo.com> */ /* A nice byte order reversal from Wei Dai <weidai@eskimo.com> */
#if defined(WIN32) #ifdef ROTATE
#ifndef Endian_Reverse32
/* 5 instructions with rotate instruction, else 9 */ /* 5 instructions with rotate instruction, else 9 */
#define Endian_Reverse32(a) \ #define Endian_Reverse32(a) \
{ \ { \
unsigned long l=(a); \ unsigned long t=(a); \
(a)=((ROTATE(l,8)&0x00FF00FF)|(ROTATE(l,24)&0xFF00FF00)); \ (a)=((ROTATE(t,8)&0x00FF00FF)|(ROTATE((t&0x00FF00FF),24))); \
} }
#endif
#else #else
#define ROTATE(a,n) (((a)<<(n))|(((a)&0xffffffff)>>(32-(n))))
#ifndef Endian_Reverse32
/* 6 instructions with rotate instruction, else 8 */ /* 6 instructions with rotate instruction, else 8 */
#define Endian_Reverse32(a) \ #define Endian_Reverse32(a) \
{ \ { \
unsigned long l=(a); \ unsigned long t=(a); \
l=(((l&0xFF00FF00)>>8L)|((l&0x00FF00FF)<<8L)); \ t=(((t>>8)&0x00FF00FF)|((t&0x00FF00FF)<<8)); \
(a)=ROTATE(l,16L); \ (a)=ROTATE(t,16); \
} }
#endif #endif
/*
* Originally the middle line started with l=(((l&0xFF00FF00)>>8)|...
* It's rewritten as above for two reasons:
* - RISCs aren't good at long constants and have to explicitely
* compose 'em with several (well, usually 2) instructions in a
* register before performing the actual operation and (as you
* already realized:-) having same constant should inspire the
* compiler to permanently allocate the only register for it;
* - most modern CPUs have two ALUs, but usually only one has
* circuitry for shifts:-( this minor tweak inspires compiler
* to schedule shift instructions in a better way...
*
* <appro@fy.chalmers.se>
*/
#endif
/* As pointed out by Wei Dai <weidai@eskimo.com>, F() below can be /* As pointed out by Wei Dai <weidai@eskimo.com>, F() below can be
* simplified to the code in F_00_19. Wei attributes these optimisations * simplified to the code in F_00_19. Wei attributes these optimisations
...@@ -195,13 +244,12 @@ ...@@ -195,13 +244,12 @@
#define F_40_59(b,c,d) (((b) & (c)) | (((b)|(c)) & (d))) #define F_40_59(b,c,d) (((b) & (c)) | (((b)|(c)) & (d)))
#define F_60_79(b,c,d) F_20_39(b,c,d) #define F_60_79(b,c,d) F_20_39(b,c,d)
#ifdef SHA_0
#undef Xupdate #undef Xupdate
#ifdef SHA_0
#define Xupdate(a,i,ia,ib,ic,id) X[(i)&0x0f]=(a)=\ #define Xupdate(a,i,ia,ib,ic,id) X[(i)&0x0f]=(a)=\
(ia[(i)&0x0f]^ib[((i)+2)&0x0f]^ic[((i)+8)&0x0f]^id[((i)+13)&0x0f]); (ia[(i)&0x0f]^ib[((i)+2)&0x0f]^ic[((i)+8)&0x0f]^id[((i)+13)&0x0f]);
#endif #endif
#ifdef SHA_1 #ifdef SHA_1
#undef Xupdate
#define Xupdate(a,i,ia,ib,ic,id) (a)=\ #define Xupdate(a,i,ia,ib,ic,id) (a)=\
(ia[(i)&0x0f]^ib[((i)+2)&0x0f]^ic[((i)+8)&0x0f]^id[((i)+13)&0x0f]);\ (ia[(i)&0x0f]^ib[((i)+2)&0x0f]^ic[((i)+8)&0x0f]^id[((i)+13)&0x0f]);\
X[(i)&0x0f]=(a)=ROTATE((a),1); X[(i)&0x0f]=(a)=ROTATE((a),1);
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册