modes/ocb128.c: split fixed block xors to aligned and misaligned.

Main goal was to improve performance on RISC platforms, e.g. 10%
was measured on MIPS, POWER8...

Reviewed-by: Matt Caswell <matt@openssl.org>
This commit is contained in:
Andy Polyakov 2015-11-30 23:07:38 +01:00
parent b9e3d7e0f6
commit 81f3d6323d
2 changed files with 26 additions and 57 deletions

View File

@ -144,20 +144,19 @@ struct ccm128_context {
#ifndef OPENSSL_NO_OCB #ifndef OPENSSL_NO_OCB
# ifdef STRICT_ALIGNMENT typedef union {
typedef struct { u64 a[2];
unsigned char a[16]; unsigned char c[16];
} OCB_BLOCK; } OCB_BLOCK;
# define ocb_block16_xor(in1,in2,out) \ # define ocb_block16_xor(in1,in2,out) \
ocb_block_xor((in1)->a,(in2)->a,16,(out)->a) ( (out)->a[0]=(in1)->a[0]^(in2)->a[0], \
# else /* STRICT_ALIGNMENT */ (out)->a[1]=(in1)->a[1]^(in2)->a[1] )
typedef struct { # if STRICT_ALIGNMENT
u64 a; # define ocb_block16_xor_misaligned(in1,in2,out) \
u64 b; ocb_block_xor((in1)->c,(in2)->c,16,(out)->c)
} OCB_BLOCK; # else
# define ocb_block16_xor(in1,in2,out) \ # define ocb_block16_xor_misaligned ocb_block16_xor
(out)->a=(in1)->a^(in2)->a; (out)->b=(in1)->b^(in2)->b; # endif
# endif /* STRICT_ALIGNMENT */
struct ocb128_context { struct ocb128_context {
/* Need both encrypt and decrypt key schedules for decryption */ /* Need both encrypt and decrypt key schedules for decryption */

View File

@ -53,11 +53,6 @@
#ifndef OPENSSL_NO_OCB #ifndef OPENSSL_NO_OCB
union ublock {
unsigned char *chrblk;
OCB_BLOCK *ocbblk;
};
/* /*
* Calculate the number of binary trailing zero's in any given number * Calculate the number of binary trailing zero's in any given number
*/ */
@ -88,23 +83,18 @@ static void ocb_block_lshift(OCB_BLOCK *in, size_t shift, OCB_BLOCK *out)
unsigned char shift_mask; unsigned char shift_mask;
int i; int i;
unsigned char mask[15]; unsigned char mask[15];
union ublock locin;
union ublock locout;
locin.ocbblk = in;
locout.ocbblk = out;
shift_mask = 0xff; shift_mask = 0xff;
shift_mask <<= (8 - shift); shift_mask <<= (8 - shift);
for (i = 15; i >= 0; i--) { for (i = 15; i >= 0; i--) {
if (i > 0) { if (i > 0) {
mask[i - 1] = locin.chrblk[i] & shift_mask; mask[i - 1] = in->c[i] & shift_mask;
mask[i - 1] >>= 8 - shift; mask[i - 1] >>= 8 - shift;
} }
locout.chrblk[i] = locin.chrblk[i] << shift; out->c[i] = in->c[i] << shift;
if (i != 15) { if (i != 15) {
locout.chrblk[i] ^= mask[i]; out->c[i] ^= mask[i];
} }
} }
} }
@ -115,23 +105,18 @@ static void ocb_block_lshift(OCB_BLOCK *in, size_t shift, OCB_BLOCK *out)
static void ocb_double(OCB_BLOCK *in, OCB_BLOCK *out) static void ocb_double(OCB_BLOCK *in, OCB_BLOCK *out)
{ {
unsigned char mask; unsigned char mask;
union ublock locin;
union ublock locout;
locin.ocbblk = in;
locout.ocbblk = out;
/* /*
* Calculate the mask based on the most significant bit. There are more * Calculate the mask based on the most significant bit. There are more
* efficient ways to do this - but this way is constant time * efficient ways to do this - but this way is constant time
*/ */
mask = locin.chrblk[0] & 0x80; mask = in->c[0] & 0x80;
mask >>= 7; mask >>= 7;
mask *= 135; mask *= 135;
ocb_block_lshift(in, 1, out); ocb_block_lshift(in, 1, out);
locout.chrblk[15] ^= mask; out->c[15] ^= mask;
} }
/* /*
@ -191,13 +176,7 @@ static OCB_BLOCK *ocb_lookup_l(OCB128_CONTEXT *ctx, size_t idx)
static void ocb_encrypt(OCB128_CONTEXT *ctx, OCB_BLOCK *in, OCB_BLOCK *out, static void ocb_encrypt(OCB128_CONTEXT *ctx, OCB_BLOCK *in, OCB_BLOCK *out,
void *keyenc) void *keyenc)
{ {
union ublock locin; ctx->encrypt(in->c, out->c, keyenc);
union ublock locout;
locin.ocbblk = in;
locout.ocbblk = out;
ctx->encrypt(locin.chrblk, locout.chrblk, keyenc);
} }
/* /*
@ -206,13 +185,7 @@ static void ocb_encrypt(OCB128_CONTEXT *ctx, OCB_BLOCK *in, OCB_BLOCK *out,
static void ocb_decrypt(OCB128_CONTEXT *ctx, OCB_BLOCK *in, OCB_BLOCK *out, static void ocb_decrypt(OCB128_CONTEXT *ctx, OCB_BLOCK *in, OCB_BLOCK *out,
void *keydec) void *keydec)
{ {
union ublock locin; ctx->decrypt(in->c, out->c, keydec);
union ublock locout;
locin.ocbblk = in;
locout.ocbblk = out;
ctx->decrypt(locin.chrblk, locout.chrblk, keydec);
} }
/* /*
@ -305,9 +278,6 @@ int CRYPTO_ocb128_setiv(OCB128_CONTEXT *ctx, const unsigned char *iv,
unsigned char ktop[16], tmp[16], mask; unsigned char ktop[16], tmp[16], mask;
unsigned char stretch[24], nonce[16]; unsigned char stretch[24], nonce[16];
size_t bottom, shift; size_t bottom, shift;
union ublock offset;
offset.ocbblk = &ctx->offset;
/* /*
* Spec says IV is 120 bits or fewer - it allows non byte aligned lengths. * Spec says IV is 120 bits or fewer - it allows non byte aligned lengths.
@ -341,7 +311,7 @@ int CRYPTO_ocb128_setiv(OCB128_CONTEXT *ctx, const unsigned char *iv,
&ctx->offset); &ctx->offset);
mask = 0xff; mask = 0xff;
mask <<= 8 - shift; mask <<= 8 - shift;
offset.chrblk[15] |= ctx->offset.c[15] |=
(*(stretch + (bottom / 8) + 16) & mask) >> (8 - shift); (*(stretch + (bottom / 8) + 16) & mask) >> (8 - shift);
return 1; return 1;
@ -444,13 +414,13 @@ int CRYPTO_ocb128_encrypt(OCB128_CONTEXT *ctx,
/* C_i = Offset_i xor ENCIPHER(K, P_i xor Offset_i) */ /* C_i = Offset_i xor ENCIPHER(K, P_i xor Offset_i) */
inblock = (OCB_BLOCK *)(in + ((i - ctx->blocks_processed - 1) * 16)); inblock = (OCB_BLOCK *)(in + ((i - ctx->blocks_processed - 1) * 16));
ocb_block16_xor(&ctx->offset, inblock, &tmp1); ocb_block16_xor_misaligned(&ctx->offset, inblock, &tmp1);
/* Checksum_i = Checksum_{i-1} xor P_i */ /* Checksum_i = Checksum_{i-1} xor P_i */
ocb_block16_xor(&ctx->checksum, inblock, &ctx->checksum); ocb_block16_xor_misaligned(&ctx->checksum, inblock, &ctx->checksum);
ocb_encrypt(ctx, &tmp1, &tmp2, ctx->keyenc); ocb_encrypt(ctx, &tmp1, &tmp2, ctx->keyenc);
outblock = outblock =
(OCB_BLOCK *)(out + ((i - ctx->blocks_processed - 1) * 16)); (OCB_BLOCK *)(out + ((i - ctx->blocks_processed - 1) * 16));
ocb_block16_xor(&ctx->offset, &tmp2, outblock); ocb_block16_xor_misaligned(&ctx->offset, &tmp2, outblock);
} }
@ -517,14 +487,14 @@ int CRYPTO_ocb128_decrypt(OCB128_CONTEXT *ctx,
/* P_i = Offset_i xor DECIPHER(K, C_i xor Offset_i) */ /* P_i = Offset_i xor DECIPHER(K, C_i xor Offset_i) */
inblock = (OCB_BLOCK *)(in + ((i - ctx->blocks_processed - 1) * 16)); inblock = (OCB_BLOCK *)(in + ((i - ctx->blocks_processed - 1) * 16));
ocb_block16_xor(&ctx->offset, inblock, &tmp1); ocb_block16_xor_misaligned(&ctx->offset, inblock, &tmp1);
ocb_decrypt(ctx, &tmp1, &tmp2, ctx->keydec); ocb_decrypt(ctx, &tmp1, &tmp2, ctx->keydec);
outblock = outblock =
(OCB_BLOCK *)(out + ((i - ctx->blocks_processed - 1) * 16)); (OCB_BLOCK *)(out + ((i - ctx->blocks_processed - 1) * 16));
ocb_block16_xor(&ctx->offset, &tmp2, outblock); ocb_block16_xor_misaligned(&ctx->offset, &tmp2, outblock);
/* Checksum_i = Checksum_{i-1} xor P_i */ /* Checksum_i = Checksum_{i-1} xor P_i */
ocb_block16_xor(&ctx->checksum, outblock, &ctx->checksum); ocb_block16_xor_misaligned(&ctx->checksum, outblock, &ctx->checksum);
} }
/* /*