modes/ocb128.c: split fixed block xors to aligned and misaligned.
Main goal was to improve performance on RISC platforms, e.g. 10% was measured on MIPS, POWER8... Reviewed-by: Matt Caswell <matt@openssl.org>
This commit is contained in:
parent
b9e3d7e0f6
commit
81f3d6323d
@ -144,20 +144,19 @@ struct ccm128_context {
|
|||||||
|
|
||||||
#ifndef OPENSSL_NO_OCB
|
#ifndef OPENSSL_NO_OCB
|
||||||
|
|
||||||
# ifdef STRICT_ALIGNMENT
|
typedef union {
|
||||||
typedef struct {
|
u64 a[2];
|
||||||
unsigned char a[16];
|
unsigned char c[16];
|
||||||
} OCB_BLOCK;
|
} OCB_BLOCK;
|
||||||
# define ocb_block16_xor(in1,in2,out) \
|
# define ocb_block16_xor(in1,in2,out) \
|
||||||
ocb_block_xor((in1)->a,(in2)->a,16,(out)->a)
|
( (out)->a[0]=(in1)->a[0]^(in2)->a[0], \
|
||||||
# else /* STRICT_ALIGNMENT */
|
(out)->a[1]=(in1)->a[1]^(in2)->a[1] )
|
||||||
typedef struct {
|
# if STRICT_ALIGNMENT
|
||||||
u64 a;
|
# define ocb_block16_xor_misaligned(in1,in2,out) \
|
||||||
u64 b;
|
ocb_block_xor((in1)->c,(in2)->c,16,(out)->c)
|
||||||
} OCB_BLOCK;
|
# else
|
||||||
# define ocb_block16_xor(in1,in2,out) \
|
# define ocb_block16_xor_misaligned ocb_block16_xor
|
||||||
(out)->a=(in1)->a^(in2)->a; (out)->b=(in1)->b^(in2)->b;
|
# endif
|
||||||
# endif /* STRICT_ALIGNMENT */
|
|
||||||
|
|
||||||
struct ocb128_context {
|
struct ocb128_context {
|
||||||
/* Need both encrypt and decrypt key schedules for decryption */
|
/* Need both encrypt and decrypt key schedules for decryption */
|
||||||
|
@ -53,11 +53,6 @@
|
|||||||
|
|
||||||
#ifndef OPENSSL_NO_OCB
|
#ifndef OPENSSL_NO_OCB
|
||||||
|
|
||||||
union ublock {
|
|
||||||
unsigned char *chrblk;
|
|
||||||
OCB_BLOCK *ocbblk;
|
|
||||||
};
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Calculate the number of binary trailing zero's in any given number
|
* Calculate the number of binary trailing zero's in any given number
|
||||||
*/
|
*/
|
||||||
@ -88,23 +83,18 @@ static void ocb_block_lshift(OCB_BLOCK *in, size_t shift, OCB_BLOCK *out)
|
|||||||
unsigned char shift_mask;
|
unsigned char shift_mask;
|
||||||
int i;
|
int i;
|
||||||
unsigned char mask[15];
|
unsigned char mask[15];
|
||||||
union ublock locin;
|
|
||||||
union ublock locout;
|
|
||||||
|
|
||||||
locin.ocbblk = in;
|
|
||||||
locout.ocbblk = out;
|
|
||||||
|
|
||||||
shift_mask = 0xff;
|
shift_mask = 0xff;
|
||||||
shift_mask <<= (8 - shift);
|
shift_mask <<= (8 - shift);
|
||||||
for (i = 15; i >= 0; i--) {
|
for (i = 15; i >= 0; i--) {
|
||||||
if (i > 0) {
|
if (i > 0) {
|
||||||
mask[i - 1] = locin.chrblk[i] & shift_mask;
|
mask[i - 1] = in->c[i] & shift_mask;
|
||||||
mask[i - 1] >>= 8 - shift;
|
mask[i - 1] >>= 8 - shift;
|
||||||
}
|
}
|
||||||
locout.chrblk[i] = locin.chrblk[i] << shift;
|
out->c[i] = in->c[i] << shift;
|
||||||
|
|
||||||
if (i != 15) {
|
if (i != 15) {
|
||||||
locout.chrblk[i] ^= mask[i];
|
out->c[i] ^= mask[i];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -115,23 +105,18 @@ static void ocb_block_lshift(OCB_BLOCK *in, size_t shift, OCB_BLOCK *out)
|
|||||||
static void ocb_double(OCB_BLOCK *in, OCB_BLOCK *out)
|
static void ocb_double(OCB_BLOCK *in, OCB_BLOCK *out)
|
||||||
{
|
{
|
||||||
unsigned char mask;
|
unsigned char mask;
|
||||||
union ublock locin;
|
|
||||||
union ublock locout;
|
|
||||||
|
|
||||||
locin.ocbblk = in;
|
|
||||||
locout.ocbblk = out;
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Calculate the mask based on the most significant bit. There are more
|
* Calculate the mask based on the most significant bit. There are more
|
||||||
* efficient ways to do this - but this way is constant time
|
* efficient ways to do this - but this way is constant time
|
||||||
*/
|
*/
|
||||||
mask = locin.chrblk[0] & 0x80;
|
mask = in->c[0] & 0x80;
|
||||||
mask >>= 7;
|
mask >>= 7;
|
||||||
mask *= 135;
|
mask *= 135;
|
||||||
|
|
||||||
ocb_block_lshift(in, 1, out);
|
ocb_block_lshift(in, 1, out);
|
||||||
|
|
||||||
locout.chrblk[15] ^= mask;
|
out->c[15] ^= mask;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -191,13 +176,7 @@ static OCB_BLOCK *ocb_lookup_l(OCB128_CONTEXT *ctx, size_t idx)
|
|||||||
static void ocb_encrypt(OCB128_CONTEXT *ctx, OCB_BLOCK *in, OCB_BLOCK *out,
|
static void ocb_encrypt(OCB128_CONTEXT *ctx, OCB_BLOCK *in, OCB_BLOCK *out,
|
||||||
void *keyenc)
|
void *keyenc)
|
||||||
{
|
{
|
||||||
union ublock locin;
|
ctx->encrypt(in->c, out->c, keyenc);
|
||||||
union ublock locout;
|
|
||||||
|
|
||||||
locin.ocbblk = in;
|
|
||||||
locout.ocbblk = out;
|
|
||||||
|
|
||||||
ctx->encrypt(locin.chrblk, locout.chrblk, keyenc);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -206,13 +185,7 @@ static void ocb_encrypt(OCB128_CONTEXT *ctx, OCB_BLOCK *in, OCB_BLOCK *out,
|
|||||||
static void ocb_decrypt(OCB128_CONTEXT *ctx, OCB_BLOCK *in, OCB_BLOCK *out,
|
static void ocb_decrypt(OCB128_CONTEXT *ctx, OCB_BLOCK *in, OCB_BLOCK *out,
|
||||||
void *keydec)
|
void *keydec)
|
||||||
{
|
{
|
||||||
union ublock locin;
|
ctx->decrypt(in->c, out->c, keydec);
|
||||||
union ublock locout;
|
|
||||||
|
|
||||||
locin.ocbblk = in;
|
|
||||||
locout.ocbblk = out;
|
|
||||||
|
|
||||||
ctx->decrypt(locin.chrblk, locout.chrblk, keydec);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -305,9 +278,6 @@ int CRYPTO_ocb128_setiv(OCB128_CONTEXT *ctx, const unsigned char *iv,
|
|||||||
unsigned char ktop[16], tmp[16], mask;
|
unsigned char ktop[16], tmp[16], mask;
|
||||||
unsigned char stretch[24], nonce[16];
|
unsigned char stretch[24], nonce[16];
|
||||||
size_t bottom, shift;
|
size_t bottom, shift;
|
||||||
union ublock offset;
|
|
||||||
|
|
||||||
offset.ocbblk = &ctx->offset;
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Spec says IV is 120 bits or fewer - it allows non byte aligned lengths.
|
* Spec says IV is 120 bits or fewer - it allows non byte aligned lengths.
|
||||||
@ -341,7 +311,7 @@ int CRYPTO_ocb128_setiv(OCB128_CONTEXT *ctx, const unsigned char *iv,
|
|||||||
&ctx->offset);
|
&ctx->offset);
|
||||||
mask = 0xff;
|
mask = 0xff;
|
||||||
mask <<= 8 - shift;
|
mask <<= 8 - shift;
|
||||||
offset.chrblk[15] |=
|
ctx->offset.c[15] |=
|
||||||
(*(stretch + (bottom / 8) + 16) & mask) >> (8 - shift);
|
(*(stretch + (bottom / 8) + 16) & mask) >> (8 - shift);
|
||||||
|
|
||||||
return 1;
|
return 1;
|
||||||
@ -444,13 +414,13 @@ int CRYPTO_ocb128_encrypt(OCB128_CONTEXT *ctx,
|
|||||||
|
|
||||||
/* C_i = Offset_i xor ENCIPHER(K, P_i xor Offset_i) */
|
/* C_i = Offset_i xor ENCIPHER(K, P_i xor Offset_i) */
|
||||||
inblock = (OCB_BLOCK *)(in + ((i - ctx->blocks_processed - 1) * 16));
|
inblock = (OCB_BLOCK *)(in + ((i - ctx->blocks_processed - 1) * 16));
|
||||||
ocb_block16_xor(&ctx->offset, inblock, &tmp1);
|
ocb_block16_xor_misaligned(&ctx->offset, inblock, &tmp1);
|
||||||
/* Checksum_i = Checksum_{i-1} xor P_i */
|
/* Checksum_i = Checksum_{i-1} xor P_i */
|
||||||
ocb_block16_xor(&ctx->checksum, inblock, &ctx->checksum);
|
ocb_block16_xor_misaligned(&ctx->checksum, inblock, &ctx->checksum);
|
||||||
ocb_encrypt(ctx, &tmp1, &tmp2, ctx->keyenc);
|
ocb_encrypt(ctx, &tmp1, &tmp2, ctx->keyenc);
|
||||||
outblock =
|
outblock =
|
||||||
(OCB_BLOCK *)(out + ((i - ctx->blocks_processed - 1) * 16));
|
(OCB_BLOCK *)(out + ((i - ctx->blocks_processed - 1) * 16));
|
||||||
ocb_block16_xor(&ctx->offset, &tmp2, outblock);
|
ocb_block16_xor_misaligned(&ctx->offset, &tmp2, outblock);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -517,14 +487,14 @@ int CRYPTO_ocb128_decrypt(OCB128_CONTEXT *ctx,
|
|||||||
|
|
||||||
/* P_i = Offset_i xor DECIPHER(K, C_i xor Offset_i) */
|
/* P_i = Offset_i xor DECIPHER(K, C_i xor Offset_i) */
|
||||||
inblock = (OCB_BLOCK *)(in + ((i - ctx->blocks_processed - 1) * 16));
|
inblock = (OCB_BLOCK *)(in + ((i - ctx->blocks_processed - 1) * 16));
|
||||||
ocb_block16_xor(&ctx->offset, inblock, &tmp1);
|
ocb_block16_xor_misaligned(&ctx->offset, inblock, &tmp1);
|
||||||
ocb_decrypt(ctx, &tmp1, &tmp2, ctx->keydec);
|
ocb_decrypt(ctx, &tmp1, &tmp2, ctx->keydec);
|
||||||
outblock =
|
outblock =
|
||||||
(OCB_BLOCK *)(out + ((i - ctx->blocks_processed - 1) * 16));
|
(OCB_BLOCK *)(out + ((i - ctx->blocks_processed - 1) * 16));
|
||||||
ocb_block16_xor(&ctx->offset, &tmp2, outblock);
|
ocb_block16_xor_misaligned(&ctx->offset, &tmp2, outblock);
|
||||||
|
|
||||||
/* Checksum_i = Checksum_{i-1} xor P_i */
|
/* Checksum_i = Checksum_{i-1} xor P_i */
|
||||||
ocb_block16_xor(&ctx->checksum, outblock, &ctx->checksum);
|
ocb_block16_xor_misaligned(&ctx->checksum, outblock, &ctx->checksum);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
Loading…
x
Reference in New Issue
Block a user