modes/ocb128.c: split fixed block xors to aligned and misaligned.
Main goal was to improve performance on RISC platforms, e.g. 10% was measured on MIPS, POWER8... Reviewed-by: Matt Caswell <matt@openssl.org>
This commit is contained in:
parent
b9e3d7e0f6
commit
81f3d6323d
@ -144,20 +144,19 @@ struct ccm128_context {
|
||||
|
||||
#ifndef OPENSSL_NO_OCB
|
||||
|
||||
# ifdef STRICT_ALIGNMENT
|
||||
typedef struct {
|
||||
unsigned char a[16];
|
||||
typedef union {
|
||||
u64 a[2];
|
||||
unsigned char c[16];
|
||||
} OCB_BLOCK;
|
||||
# define ocb_block16_xor(in1,in2,out) \
|
||||
ocb_block_xor((in1)->a,(in2)->a,16,(out)->a)
|
||||
# else /* STRICT_ALIGNMENT */
|
||||
typedef struct {
|
||||
u64 a;
|
||||
u64 b;
|
||||
} OCB_BLOCK;
|
||||
# define ocb_block16_xor(in1,in2,out) \
|
||||
(out)->a=(in1)->a^(in2)->a; (out)->b=(in1)->b^(in2)->b;
|
||||
# endif /* STRICT_ALIGNMENT */
|
||||
# define ocb_block16_xor(in1,in2,out) \
|
||||
( (out)->a[0]=(in1)->a[0]^(in2)->a[0], \
|
||||
(out)->a[1]=(in1)->a[1]^(in2)->a[1] )
|
||||
# if STRICT_ALIGNMENT
|
||||
# define ocb_block16_xor_misaligned(in1,in2,out) \
|
||||
ocb_block_xor((in1)->c,(in2)->c,16,(out)->c)
|
||||
# else
|
||||
# define ocb_block16_xor_misaligned ocb_block16_xor
|
||||
# endif
|
||||
|
||||
struct ocb128_context {
|
||||
/* Need both encrypt and decrypt key schedules for decryption */
|
||||
|
@ -53,11 +53,6 @@
|
||||
|
||||
#ifndef OPENSSL_NO_OCB
|
||||
|
||||
union ublock {
|
||||
unsigned char *chrblk;
|
||||
OCB_BLOCK *ocbblk;
|
||||
};
|
||||
|
||||
/*
|
||||
* Calculate the number of binary trailing zero's in any given number
|
||||
*/
|
||||
@ -88,23 +83,18 @@ static void ocb_block_lshift(OCB_BLOCK *in, size_t shift, OCB_BLOCK *out)
|
||||
unsigned char shift_mask;
|
||||
int i;
|
||||
unsigned char mask[15];
|
||||
union ublock locin;
|
||||
union ublock locout;
|
||||
|
||||
locin.ocbblk = in;
|
||||
locout.ocbblk = out;
|
||||
|
||||
shift_mask = 0xff;
|
||||
shift_mask <<= (8 - shift);
|
||||
for (i = 15; i >= 0; i--) {
|
||||
if (i > 0) {
|
||||
mask[i - 1] = locin.chrblk[i] & shift_mask;
|
||||
mask[i - 1] = in->c[i] & shift_mask;
|
||||
mask[i - 1] >>= 8 - shift;
|
||||
}
|
||||
locout.chrblk[i] = locin.chrblk[i] << shift;
|
||||
out->c[i] = in->c[i] << shift;
|
||||
|
||||
if (i != 15) {
|
||||
locout.chrblk[i] ^= mask[i];
|
||||
out->c[i] ^= mask[i];
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -115,23 +105,18 @@ static void ocb_block_lshift(OCB_BLOCK *in, size_t shift, OCB_BLOCK *out)
|
||||
static void ocb_double(OCB_BLOCK *in, OCB_BLOCK *out)
|
||||
{
|
||||
unsigned char mask;
|
||||
union ublock locin;
|
||||
union ublock locout;
|
||||
|
||||
locin.ocbblk = in;
|
||||
locout.ocbblk = out;
|
||||
|
||||
/*
|
||||
* Calculate the mask based on the most significant bit. There are more
|
||||
* efficient ways to do this - but this way is constant time
|
||||
*/
|
||||
mask = locin.chrblk[0] & 0x80;
|
||||
mask = in->c[0] & 0x80;
|
||||
mask >>= 7;
|
||||
mask *= 135;
|
||||
|
||||
ocb_block_lshift(in, 1, out);
|
||||
|
||||
locout.chrblk[15] ^= mask;
|
||||
out->c[15] ^= mask;
|
||||
}
|
||||
|
||||
/*
|
||||
@ -191,13 +176,7 @@ static OCB_BLOCK *ocb_lookup_l(OCB128_CONTEXT *ctx, size_t idx)
|
||||
static void ocb_encrypt(OCB128_CONTEXT *ctx, OCB_BLOCK *in, OCB_BLOCK *out,
|
||||
void *keyenc)
|
||||
{
|
||||
union ublock locin;
|
||||
union ublock locout;
|
||||
|
||||
locin.ocbblk = in;
|
||||
locout.ocbblk = out;
|
||||
|
||||
ctx->encrypt(locin.chrblk, locout.chrblk, keyenc);
|
||||
ctx->encrypt(in->c, out->c, keyenc);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -206,13 +185,7 @@ static void ocb_encrypt(OCB128_CONTEXT *ctx, OCB_BLOCK *in, OCB_BLOCK *out,
|
||||
static void ocb_decrypt(OCB128_CONTEXT *ctx, OCB_BLOCK *in, OCB_BLOCK *out,
|
||||
void *keydec)
|
||||
{
|
||||
union ublock locin;
|
||||
union ublock locout;
|
||||
|
||||
locin.ocbblk = in;
|
||||
locout.ocbblk = out;
|
||||
|
||||
ctx->decrypt(locin.chrblk, locout.chrblk, keydec);
|
||||
ctx->decrypt(in->c, out->c, keydec);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -305,9 +278,6 @@ int CRYPTO_ocb128_setiv(OCB128_CONTEXT *ctx, const unsigned char *iv,
|
||||
unsigned char ktop[16], tmp[16], mask;
|
||||
unsigned char stretch[24], nonce[16];
|
||||
size_t bottom, shift;
|
||||
union ublock offset;
|
||||
|
||||
offset.ocbblk = &ctx->offset;
|
||||
|
||||
/*
|
||||
* Spec says IV is 120 bits or fewer - it allows non byte aligned lengths.
|
||||
@ -341,7 +311,7 @@ int CRYPTO_ocb128_setiv(OCB128_CONTEXT *ctx, const unsigned char *iv,
|
||||
&ctx->offset);
|
||||
mask = 0xff;
|
||||
mask <<= 8 - shift;
|
||||
offset.chrblk[15] |=
|
||||
ctx->offset.c[15] |=
|
||||
(*(stretch + (bottom / 8) + 16) & mask) >> (8 - shift);
|
||||
|
||||
return 1;
|
||||
@ -444,13 +414,13 @@ int CRYPTO_ocb128_encrypt(OCB128_CONTEXT *ctx,
|
||||
|
||||
/* C_i = Offset_i xor ENCIPHER(K, P_i xor Offset_i) */
|
||||
inblock = (OCB_BLOCK *)(in + ((i - ctx->blocks_processed - 1) * 16));
|
||||
ocb_block16_xor(&ctx->offset, inblock, &tmp1);
|
||||
ocb_block16_xor_misaligned(&ctx->offset, inblock, &tmp1);
|
||||
/* Checksum_i = Checksum_{i-1} xor P_i */
|
||||
ocb_block16_xor(&ctx->checksum, inblock, &ctx->checksum);
|
||||
ocb_block16_xor_misaligned(&ctx->checksum, inblock, &ctx->checksum);
|
||||
ocb_encrypt(ctx, &tmp1, &tmp2, ctx->keyenc);
|
||||
outblock =
|
||||
(OCB_BLOCK *)(out + ((i - ctx->blocks_processed - 1) * 16));
|
||||
ocb_block16_xor(&ctx->offset, &tmp2, outblock);
|
||||
ocb_block16_xor_misaligned(&ctx->offset, &tmp2, outblock);
|
||||
|
||||
}
|
||||
|
||||
@ -517,14 +487,14 @@ int CRYPTO_ocb128_decrypt(OCB128_CONTEXT *ctx,
|
||||
|
||||
/* P_i = Offset_i xor DECIPHER(K, C_i xor Offset_i) */
|
||||
inblock = (OCB_BLOCK *)(in + ((i - ctx->blocks_processed - 1) * 16));
|
||||
ocb_block16_xor(&ctx->offset, inblock, &tmp1);
|
||||
ocb_block16_xor_misaligned(&ctx->offset, inblock, &tmp1);
|
||||
ocb_decrypt(ctx, &tmp1, &tmp2, ctx->keydec);
|
||||
outblock =
|
||||
(OCB_BLOCK *)(out + ((i - ctx->blocks_processed - 1) * 16));
|
||||
ocb_block16_xor(&ctx->offset, &tmp2, outblock);
|
||||
ocb_block16_xor_misaligned(&ctx->offset, &tmp2, outblock);
|
||||
|
||||
/* Checksum_i = Checksum_{i-1} xor P_i */
|
||||
ocb_block16_xor(&ctx->checksum, outblock, &ctx->checksum);
|
||||
ocb_block16_xor_misaligned(&ctx->checksum, outblock, &ctx->checksum);
|
||||
}
|
||||
|
||||
/*
|
||||
|
Loading…
x
Reference in New Issue
Block a user