From 81f3d6323dcda6a18b06c718600d6a4739e83263 Mon Sep 17 00:00:00 2001 From: Andy Polyakov Date: Mon, 30 Nov 2015 23:07:38 +0100 Subject: [PATCH] modes/ocb128.c: split fixed block xors to aligned and misaligned. Main goal was to improve performance on RISC platforms, e.g. 10% was measured on MIPS, POWER8... Reviewed-by: Matt Caswell --- crypto/modes/modes_lcl.h | 25 +++++++++-------- crypto/modes/ocb128.c | 58 ++++++++++------------------------------ 2 files changed, 26 insertions(+), 57 deletions(-) diff --git a/crypto/modes/modes_lcl.h b/crypto/modes/modes_lcl.h index 0fd11ce6c4..2f61afe5dc 100644 --- a/crypto/modes/modes_lcl.h +++ b/crypto/modes/modes_lcl.h @@ -144,20 +144,19 @@ struct ccm128_context { #ifndef OPENSSL_NO_OCB -# ifdef STRICT_ALIGNMENT -typedef struct { - unsigned char a[16]; +typedef union { + u64 a[2]; + unsigned char c[16]; } OCB_BLOCK; -# define ocb_block16_xor(in1,in2,out) \ - ocb_block_xor((in1)->a,(in2)->a,16,(out)->a) -# else /* STRICT_ALIGNMENT */ -typedef struct { - u64 a; - u64 b; -} OCB_BLOCK; -# define ocb_block16_xor(in1,in2,out) \ - (out)->a=(in1)->a^(in2)->a; (out)->b=(in1)->b^(in2)->b; -# endif /* STRICT_ALIGNMENT */ +# define ocb_block16_xor(in1,in2,out) \ + ( (out)->a[0]=(in1)->a[0]^(in2)->a[0], \ + (out)->a[1]=(in1)->a[1]^(in2)->a[1] ) +# if STRICT_ALIGNMENT +# define ocb_block16_xor_misaligned(in1,in2,out) \ + ocb_block_xor((in1)->c,(in2)->c,16,(out)->c) +# else +# define ocb_block16_xor_misaligned ocb_block16_xor +# endif struct ocb128_context { /* Need both encrypt and decrypt key schedules for decryption */ diff --git a/crypto/modes/ocb128.c b/crypto/modes/ocb128.c index 5408d50df1..d49aa6ede9 100644 --- a/crypto/modes/ocb128.c +++ b/crypto/modes/ocb128.c @@ -53,11 +53,6 @@ #ifndef OPENSSL_NO_OCB -union ublock { - unsigned char *chrblk; - OCB_BLOCK *ocbblk; -}; - /* * Calculate the number of binary trailing zero's in any given number */ @@ -88,23 +83,18 @@ static void ocb_block_lshift(OCB_BLOCK *in, size_t shift, OCB_BLOCK *out) unsigned char shift_mask; int i; unsigned char mask[15]; - union ublock locin; - union ublock locout; - - locin.ocbblk = in; - locout.ocbblk = out; shift_mask = 0xff; shift_mask <<= (8 - shift); for (i = 15; i >= 0; i--) { if (i > 0) { - mask[i - 1] = locin.chrblk[i] & shift_mask; + mask[i - 1] = in->c[i] & shift_mask; mask[i - 1] >>= 8 - shift; } - locout.chrblk[i] = locin.chrblk[i] << shift; + out->c[i] = in->c[i] << shift; if (i != 15) { - locout.chrblk[i] ^= mask[i]; + out->c[i] ^= mask[i]; } } } @@ -115,23 +105,18 @@ static void ocb_block_lshift(OCB_BLOCK *in, size_t shift, OCB_BLOCK *out) static void ocb_double(OCB_BLOCK *in, OCB_BLOCK *out) { unsigned char mask; - union ublock locin; - union ublock locout; - - locin.ocbblk = in; - locout.ocbblk = out; /* * Calculate the mask based on the most significant bit. There are more * efficient ways to do this - but this way is constant time */ - mask = locin.chrblk[0] & 0x80; + mask = in->c[0] & 0x80; mask >>= 7; mask *= 135; ocb_block_lshift(in, 1, out); - locout.chrblk[15] ^= mask; + out->c[15] ^= mask; } /* @@ -191,13 +176,7 @@ static OCB_BLOCK *ocb_lookup_l(OCB128_CONTEXT *ctx, size_t idx) static void ocb_encrypt(OCB128_CONTEXT *ctx, OCB_BLOCK *in, OCB_BLOCK *out, void *keyenc) { - union ublock locin; - union ublock locout; - - locin.ocbblk = in; - locout.ocbblk = out; - - ctx->encrypt(locin.chrblk, locout.chrblk, keyenc); + ctx->encrypt(in->c, out->c, keyenc); } /* @@ -206,13 +185,7 @@ static void ocb_encrypt(OCB128_CONTEXT *ctx, OCB_BLOCK *in, OCB_BLOCK *out, static void ocb_decrypt(OCB128_CONTEXT *ctx, OCB_BLOCK *in, OCB_BLOCK *out, void *keydec) { - union ublock locin; - union ublock locout; - - locin.ocbblk = in; - locout.ocbblk = out; - - ctx->decrypt(locin.chrblk, locout.chrblk, keydec); + ctx->decrypt(in->c, out->c, keydec); } /* @@ -305,9 +278,6 @@ int CRYPTO_ocb128_setiv(OCB128_CONTEXT *ctx, const unsigned char *iv, unsigned char ktop[16], tmp[16], mask; unsigned char stretch[24], nonce[16]; size_t bottom, shift; - union ublock offset; - - offset.ocbblk = &ctx->offset; /* * Spec says IV is 120 bits or fewer - it allows non byte aligned lengths. @@ -341,7 +311,7 @@ int CRYPTO_ocb128_setiv(OCB128_CONTEXT *ctx, const unsigned char *iv, &ctx->offset); mask = 0xff; mask <<= 8 - shift; - offset.chrblk[15] |= + ctx->offset.c[15] |= (*(stretch + (bottom / 8) + 16) & mask) >> (8 - shift); return 1; @@ -444,13 +414,13 @@ int CRYPTO_ocb128_encrypt(OCB128_CONTEXT *ctx, /* C_i = Offset_i xor ENCIPHER(K, P_i xor Offset_i) */ inblock = (OCB_BLOCK *)(in + ((i - ctx->blocks_processed - 1) * 16)); - ocb_block16_xor(&ctx->offset, inblock, &tmp1); + ocb_block16_xor_misaligned(&ctx->offset, inblock, &tmp1); /* Checksum_i = Checksum_{i-1} xor P_i */ - ocb_block16_xor(&ctx->checksum, inblock, &ctx->checksum); + ocb_block16_xor_misaligned(&ctx->checksum, inblock, &ctx->checksum); ocb_encrypt(ctx, &tmp1, &tmp2, ctx->keyenc); outblock = (OCB_BLOCK *)(out + ((i - ctx->blocks_processed - 1) * 16)); - ocb_block16_xor(&ctx->offset, &tmp2, outblock); + ocb_block16_xor_misaligned(&ctx->offset, &tmp2, outblock); } @@ -517,14 +487,14 @@ int CRYPTO_ocb128_decrypt(OCB128_CONTEXT *ctx, /* P_i = Offset_i xor DECIPHER(K, C_i xor Offset_i) */ inblock = (OCB_BLOCK *)(in + ((i - ctx->blocks_processed - 1) * 16)); - ocb_block16_xor(&ctx->offset, inblock, &tmp1); + ocb_block16_xor_misaligned(&ctx->offset, inblock, &tmp1); ocb_decrypt(ctx, &tmp1, &tmp2, ctx->keydec); outblock = (OCB_BLOCK *)(out + ((i - ctx->blocks_processed - 1) * 16)); - ocb_block16_xor(&ctx->offset, &tmp2, outblock); + ocb_block16_xor_misaligned(&ctx->offset, &tmp2, outblock); /* Checksum_i = Checksum_{i-1} xor P_i */ - ocb_block16_xor(&ctx->checksum, outblock, &ctx->checksum); + ocb_block16_xor_misaligned(&ctx->checksum, outblock, &ctx->checksum); } /* -- GitLab