diff --git a/crypto/Makefile b/crypto/Makefile index 7595cb4578a9029d7ede4d96db5f4a77c9ef2a04..4147d2d6384c883b1a9d38e4432c25ddd30b9fe3 100644 --- a/crypto/Makefile +++ b/crypto/Makefile @@ -7,7 +7,7 @@ TOP= .. CC= cc INCLUDE= -I. -I$(TOP) -I../include $(ZLIB_INCLUDE) # INCLUDES targets sudbirs! -INCLUDES= -I.. -I../.. -I../asn1 -I../evp -I../../include $(ZLIB_INCLUDE) +INCLUDES= -I.. -I../.. -I../modes -I../asn1 -I../evp -I../../include $(ZLIB_INCLUDE) CFLAG= -g MAKEDEPPROG= makedepend MAKEDEPEND= $(TOP)/util/domd $(TOP) -MD $(MAKEDEPPROG) diff --git a/crypto/evp/e_aes.c b/crypto/evp/e_aes.c index 2f937af0ba905d49e2e9ec8286f21a66191cce79..b1a701b65d6221e0933fb8766a8d1fe40f594725 100644 --- a/crypto/evp/e_aes.c +++ b/crypto/evp/e_aes.c @@ -58,7 +58,7 @@ #include #include #include "evp_locl.h" -#include +#include "modes_lcl.h" #include static int aes_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key, @@ -196,8 +196,7 @@ typedef struct int key_set; /* Set if an iv is set */ int iv_set; - /* Pointer to GCM128_CTX: FIXME actual structure later */ - GCM128_CONTEXT *gcm; + GCM128_CONTEXT gcm; /* Temporary IV store */ unsigned char *iv; /* IV length */ @@ -212,8 +211,7 @@ typedef struct static int aes_gcm_cleanup(EVP_CIPHER_CTX *c) { EVP_AES_GCM_CTX *gctx = c->cipher_data; - if (gctx->gcm) - CRYPTO_gcm128_release(gctx->gcm); + OPENSSL_cleanse(&gctx->gcm, sizeof(gctx->gcm)); if (gctx->iv != c->iv) OPENSSL_free(gctx->iv); return 1; @@ -239,7 +237,6 @@ static int aes_gcm_ctrl(EVP_CIPHER_CTX *c, int type, int arg, void *ptr) switch (type) { case EVP_CTRL_INIT: - gctx->gcm = NULL; gctx->key_set = 0; gctx->iv_set = 0; gctx->ivlen = c->cipher->iv_len; @@ -304,7 +301,7 @@ static int aes_gcm_ctrl(EVP_CIPHER_CTX *c, int type, int arg, void *ptr) case EVP_CTRL_GCM_IV_GEN: if (gctx->iv_gen == 0 || gctx->key_set == 0) return 0; - CRYPTO_gcm128_setiv(gctx->gcm, gctx->iv, gctx->ivlen); + CRYPTO_gcm128_setiv(&gctx->gcm, gctx->iv, gctx->ivlen); memcpy(ptr, gctx->iv, gctx->ivlen); /* Invocation field will be at least 8 bytes in size and * so no need to check wrap around or increment more than @@ -329,15 +326,7 @@ static int aes_gcm_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key, if (key) { AES_set_encrypt_key(key, ctx->key_len * 8, &gctx->ks); - if (!gctx->gcm) - { - gctx->gcm = - CRYPTO_gcm128_new(&gctx->ks, (block128_f)AES_encrypt); - if (!gctx->gcm) - return 0; - } - else - CRYPTO_gcm128_init(gctx->gcm, &gctx->ks, (block128_f)AES_encrypt); + CRYPTO_gcm128_init(&gctx->gcm, &gctx->ks, (block128_f)AES_encrypt); /* If we have an iv can set it directly, otherwise use * saved IV. */ @@ -345,7 +334,7 @@ static int aes_gcm_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key, iv = gctx->iv; if (iv) { - CRYPTO_gcm128_setiv(gctx->gcm, iv, gctx->ivlen); + CRYPTO_gcm128_setiv(&gctx->gcm, iv, gctx->ivlen); gctx->iv_set = 1; } gctx->key_set = 1; @@ -354,7 +343,7 @@ static int aes_gcm_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key, { /* If key set use IV, otherwise copy */ if (gctx->key_set) - CRYPTO_gcm128_setiv(gctx->gcm, iv, gctx->ivlen); + CRYPTO_gcm128_setiv(&gctx->gcm, iv, gctx->ivlen); else memcpy(gctx->iv, iv, gctx->ivlen); gctx->iv_set = 1; @@ -376,17 +365,17 @@ static int aes_gcm(EVP_CIPHER_CTX *ctx, unsigned char *out, { if (out == NULL) { - if (CRYPTO_gcm128_aad(gctx->gcm, in, len)) + if (CRYPTO_gcm128_aad(&gctx->gcm, in, len)) return -1; } else if (ctx->encrypt) { - if (CRYPTO_gcm128_encrypt(gctx->gcm, in, out, len)) + if (CRYPTO_gcm128_encrypt(&gctx->gcm, in, out, len)) return -1; } else { - if (CRYPTO_gcm128_decrypt(gctx->gcm, in, out, len)) + if (CRYPTO_gcm128_decrypt(&gctx->gcm, in, out, len)) return -1; } return len; @@ -395,13 +384,13 @@ static int aes_gcm(EVP_CIPHER_CTX *ctx, unsigned char *out, { if (!ctx->encrypt) { - if (CRYPTO_gcm128_finish(gctx->gcm, + if (CRYPTO_gcm128_finish(&gctx->gcm, gctx->tag, gctx->taglen) != 0) return -1; gctx->iv_set = 0; return 0; } - CRYPTO_gcm128_tag(gctx->gcm, gctx->tag, 16); + CRYPTO_gcm128_tag(&gctx->gcm, gctx->tag, 16); gctx->taglen = 16; /* Don't reuse the IV */ gctx->iv_set = 0; diff --git a/crypto/modes/gcm128.c b/crypto/modes/gcm128.c index cdb6c847a96516324497640fcef4440ecd7f7d4b..8a48e90ac548d408b2e50f4cd327fe31afe3ce43 100644 --- a/crypto/modes/gcm128.c +++ b/crypto/modes/gcm128.c @@ -60,8 +60,6 @@ #endif #include -typedef struct { u64 hi,lo; } u128; - #if defined(BSWAP4) && defined(STRICT_ALIGNMENT) /* redefine, because alignment is ensured */ #undef GETU32 @@ -84,43 +82,6 @@ typedef struct { u64 hi,lo; } u128; } \ } while(0) -#ifdef TABLE_BITS -#undef TABLE_BITS -#endif -/* - * Even though permitted values for TABLE_BITS are 8, 4 and 1, it should - * never be set to 8. 8 is effectively reserved for testing purposes. - * TABLE_BITS>1 are lookup-table-driven implementations referred to as - * "Shoup's" in GCM specification. In other words OpenSSL does not cover - * whole spectrum of possible table driven implementations. Why? In - * non-"Shoup's" case memory access pattern is segmented in such manner, - * that it's trivial to see that cache timing information can reveal - * fair portion of intermediate hash value. Given that ciphertext is - * always available to attacker, it's possible for him to attempt to - * deduce secret parameter H and if successful, tamper with messages - * [which is nothing but trivial in CTR mode]. In "Shoup's" case it's - * not as trivial, but there is no reason to believe that it's resistant - * to cache-timing attack. And the thing about "8-bit" implementation is - * that it consumes 16 (sixteen) times more memory, 4KB per individual - * key + 1KB shared. Well, on pros side it should be twice as fast as - * "4-bit" version. And for gcc-generated x86[_64] code, "8-bit" version - * was observed to run ~75% faster, closer to 100% for commercial - * compilers... Yet "4-bit" procedure is preferred, because it's - * believed to provide better security-performance balance and adequate - * all-round performance. "All-round" refers to things like: - * - * - shorter setup time effectively improves overall timing for - * handling short messages; - * - larger table allocation can become unbearable because of VM - * subsystem penalties (for example on Windows large enough free - * results in VM working set trimming, meaning that consequent - * malloc would immediately incur working set expansion); - * - larger table has larger cache footprint, which can affect - * performance of other code paths (not necessarily even from same - * thread in Hyper-Threading world); - */ -#define TABLE_BITS 4 - #if TABLE_BITS==8 static void gcm_init_8bit(u128 Htable[256], u64 H[2]) @@ -647,23 +608,6 @@ static void gcm_gmult_1bit(u64 Xi[2],const u64 H[2]) #endif -struct gcm128_context { - /* Following 6 names follow names in GCM specification */ - union { u64 u[2]; u32 d[4]; u8 c[16]; } Yi,EKi,EK0, - Xi,H,len; - /* Pre-computed table used by gcm_gmult_* */ -#if TABLE_BITS==8 - u128 Htable[256]; -#else - u128 Htable[16]; - void (*gmult)(u64 Xi[2],const u128 Htable[16]); - void (*ghash)(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len); -#endif - unsigned int mres, ares; - block128_f block; - void *key; -}; - #if TABLE_BITS==4 && defined(GHASH_ASM) && !defined(I386_ONLY) && \ (defined(__i386) || defined(__i386__) || \ defined(__x86_64) || defined(__x86_64__) || \ diff --git a/crypto/modes/modes_lcl.h b/crypto/modes/modes_lcl.h index 12368fb039ef46f6328cae733656af2d82058d52..201a69115e86b58368e2b5e4f8f7d730de25184d 100644 --- a/crypto/modes/modes_lcl.h +++ b/crypto/modes/modes_lcl.h @@ -73,3 +73,61 @@ typedef unsigned char u8; #define GETU32(p) ((u32)(p)[0]<<24|(u32)(p)[1]<<16|(u32)(p)[2]<<8|(u32)(p)[3]) #define PUTU32(p,v) ((p)[0]=(u8)((v)>>24),(p)[1]=(u8)((v)>>16),(p)[2]=(u8)((v)>>8),(p)[3]=(u8)(v)) #endif + +/* GCM definitions */ + +typedef struct { u64 hi,lo; } u128; + +#ifdef TABLE_BITS +#undef TABLE_BITS +#endif +/* + * Even though permitted values for TABLE_BITS are 8, 4 and 1, it should + * never be set to 8. 8 is effectively reserved for testing purposes. + * TABLE_BITS>1 are lookup-table-driven implementations referred to as + * "Shoup's" in GCM specification. In other words OpenSSL does not cover + * whole spectrum of possible table driven implementations. Why? In + * non-"Shoup's" case memory access pattern is segmented in such manner, + * that it's trivial to see that cache timing information can reveal + * fair portion of intermediate hash value. Given that ciphertext is + * always available to attacker, it's possible for him to attempt to + * deduce secret parameter H and if successful, tamper with messages + * [which is nothing but trivial in CTR mode]. In "Shoup's" case it's + * not as trivial, but there is no reason to believe that it's resistant + * to cache-timing attack. And the thing about "8-bit" implementation is + * that it consumes 16 (sixteen) times more memory, 4KB per individual + * key + 1KB shared. Well, on pros side it should be twice as fast as + * "4-bit" version. And for gcc-generated x86[_64] code, "8-bit" version + * was observed to run ~75% faster, closer to 100% for commercial + * compilers... Yet "4-bit" procedure is preferred, because it's + * believed to provide better security-performance balance and adequate + * all-round performance. "All-round" refers to things like: + * + * - shorter setup time effectively improves overall timing for + * handling short messages; + * - larger table allocation can become unbearable because of VM + * subsystem penalties (for example on Windows large enough free + * results in VM working set trimming, meaning that consequent + * malloc would immediately incur working set expansion); + * - larger table has larger cache footprint, which can affect + * performance of other code paths (not necessarily even from same + * thread in Hyper-Threading world); + */ +#define TABLE_BITS 4 + +struct gcm128_context { + /* Following 6 names follow names in GCM specification */ + union { u64 u[2]; u32 d[4]; u8 c[16]; } Yi,EKi,EK0, + Xi,H,len; + /* Pre-computed table used by gcm_gmult_* */ +#if TABLE_BITS==8 + u128 Htable[256]; +#else + u128 Htable[16]; + void (*gmult)(u64 Xi[2],const u128 Htable[16]); + void (*ghash)(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len); +#endif + unsigned int mres, ares; + block128_f block; + void *key; +};