Expand expspeed.c to make BN_kronecker timings.
This caused a segmentation fault in calls to malloc, so I cleaned up bn_lib.c a little so that it is easier to see what is going on. The bug turned out to be an off-by-one error in BN_bin2bn.
This commit is contained in:
parent
bdec3c5323
commit
a08bcccc67
3
CHANGES
3
CHANGES
@ -3,6 +3,9 @@
|
|||||||
|
|
||||||
Changes between 0.9.6 and 0.9.7 [xx XXX 2000]
|
Changes between 0.9.6 and 0.9.7 [xx XXX 2000]
|
||||||
|
|
||||||
|
*) BN_bin2bn bugfix (off-by-one error).
|
||||||
|
[Bodo Moeller]
|
||||||
|
|
||||||
*) Make BN_mod_inverse faster by explicitly handling small quotients
|
*) Make BN_mod_inverse faster by explicitly handling small quotients
|
||||||
in the Euclid loop. (Speed gain about 20% for small moduli [256 or
|
in the Euclid loop. (Speed gain about 20% for small moduli [256 or
|
||||||
512 bits], about 30% for larger ones [1024 or 2048 bits].)
|
512 bits], about 30% for larger ones [1024 or 2048 bits].)
|
||||||
|
@ -510,6 +510,7 @@ void bn_dump1(FILE *o, const char *a, const BN_ULONG *b,int n);
|
|||||||
#define BN_F_BN_CTX_NEW 106
|
#define BN_F_BN_CTX_NEW 106
|
||||||
#define BN_F_BN_DIV 107
|
#define BN_F_BN_DIV 107
|
||||||
#define BN_F_BN_EXPAND2 108
|
#define BN_F_BN_EXPAND2 108
|
||||||
|
#define BN_F_BN_EXPAND_INTERNAL 120
|
||||||
#define BN_F_BN_MOD_EXP2_MONT 118
|
#define BN_F_BN_MOD_EXP2_MONT 118
|
||||||
#define BN_F_BN_MOD_EXP_MONT 109
|
#define BN_F_BN_MOD_EXP_MONT 109
|
||||||
#define BN_F_BN_MOD_EXP_MONT_WORD 117
|
#define BN_F_BN_MOD_EXP_MONT_WORD 117
|
||||||
|
@ -76,6 +76,7 @@ static ERR_STRING_DATA BN_str_functs[]=
|
|||||||
{ERR_PACK(0,BN_F_BN_CTX_NEW,0), "BN_CTX_new"},
|
{ERR_PACK(0,BN_F_BN_CTX_NEW,0), "BN_CTX_new"},
|
||||||
{ERR_PACK(0,BN_F_BN_DIV,0), "BN_div"},
|
{ERR_PACK(0,BN_F_BN_DIV,0), "BN_div"},
|
||||||
{ERR_PACK(0,BN_F_BN_EXPAND2,0), "bn_expand2"},
|
{ERR_PACK(0,BN_F_BN_EXPAND2,0), "bn_expand2"},
|
||||||
|
{ERR_PACK(0,BN_F_BN_EXPAND_INTERNAL,0), "BN_EXPAND_INTERNAL"},
|
||||||
{ERR_PACK(0,BN_F_BN_MOD_EXP2_MONT,0), "BN_mod_exp2_mont"},
|
{ERR_PACK(0,BN_F_BN_MOD_EXP2_MONT,0), "BN_mod_exp2_mont"},
|
||||||
{ERR_PACK(0,BN_F_BN_MOD_EXP_MONT,0), "BN_mod_exp_mont"},
|
{ERR_PACK(0,BN_F_BN_MOD_EXP_MONT,0), "BN_mod_exp_mont"},
|
||||||
{ERR_PACK(0,BN_F_BN_MOD_EXP_MONT_WORD,0), "BN_mod_exp_mont_word"},
|
{ERR_PACK(0,BN_F_BN_MOD_EXP_MONT_WORD,0), "BN_mod_exp_mont_word"},
|
||||||
|
@ -306,7 +306,7 @@ BIGNUM *BN_new(void)
|
|||||||
|
|
||||||
/* This is used both by bn_expand2() and bn_dup_expand() */
|
/* This is used both by bn_expand2() and bn_dup_expand() */
|
||||||
/* The caller MUST check that words > b->dmax before calling this */
|
/* The caller MUST check that words > b->dmax before calling this */
|
||||||
static BN_ULONG *internal_bn_expand(const BIGNUM *b, int words)
|
static BN_ULONG *bn_expand_internal(const BIGNUM *b, int words)
|
||||||
{
|
{
|
||||||
BN_ULONG *A,*a = NULL;
|
BN_ULONG *A,*a = NULL;
|
||||||
const BN_ULONG *B;
|
const BN_ULONG *B;
|
||||||
@ -315,13 +315,13 @@ static BN_ULONG *internal_bn_expand(const BIGNUM *b, int words)
|
|||||||
bn_check_top(b);
|
bn_check_top(b);
|
||||||
if (BN_get_flags(b,BN_FLG_STATIC_DATA))
|
if (BN_get_flags(b,BN_FLG_STATIC_DATA))
|
||||||
{
|
{
|
||||||
BNerr(BN_F_BN_EXPAND2,BN_R_EXPAND_ON_STATIC_BIGNUM_DATA);
|
BNerr(BN_F_BN_EXPAND_INTERNAL,BN_R_EXPAND_ON_STATIC_BIGNUM_DATA);
|
||||||
return(NULL);
|
return(NULL);
|
||||||
}
|
}
|
||||||
a=A=(BN_ULONG *)OPENSSL_malloc(sizeof(BN_ULONG)*(words+1));
|
a=A=(BN_ULONG *)OPENSSL_malloc(sizeof(BN_ULONG)*(words+1));
|
||||||
if (A == NULL)
|
if (A == NULL)
|
||||||
{
|
{
|
||||||
BNerr(BN_F_BN_EXPAND2,ERR_R_MALLOC_FAILURE);
|
BNerr(BN_F_BN_EXPAND_INTERNAL,ERR_R_MALLOC_FAILURE);
|
||||||
return(NULL);
|
return(NULL);
|
||||||
}
|
}
|
||||||
#if 1
|
#if 1
|
||||||
@ -329,70 +329,6 @@ static BN_ULONG *internal_bn_expand(const BIGNUM *b, int words)
|
|||||||
/* Check if the previous number needs to be copied */
|
/* Check if the previous number needs to be copied */
|
||||||
if (B != NULL)
|
if (B != NULL)
|
||||||
{
|
{
|
||||||
#if 0
|
|
||||||
/* This lot is an unrolled loop to copy b->top
|
|
||||||
* BN_ULONGs from B to A
|
|
||||||
*/
|
|
||||||
/*
|
|
||||||
* I have nothing against unrolling but it's usually done for
|
|
||||||
* several reasons, namely:
|
|
||||||
* - minimize percentage of decision making code, i.e. branches;
|
|
||||||
* - avoid cache trashing;
|
|
||||||
* - make it possible to schedule loads earlier;
|
|
||||||
* Now let's examine the code below. The cornerstone of C is
|
|
||||||
* "programmer is always right" and that's what we love it for:-)
|
|
||||||
* For this very reason C compilers have to be paranoid when it
|
|
||||||
* comes to data aliasing and assume the worst. Yeah, but what
|
|
||||||
* does it mean in real life? This means that loop body below will
|
|
||||||
* be compiled to sequence of loads immediately followed by stores
|
|
||||||
* as compiler assumes the worst, something in A==B+1 style. As a
|
|
||||||
* result CPU pipeline is going to starve for incoming data. Secondly
|
|
||||||
* if A and B happen to share same cache line such code is going to
|
|
||||||
* cause severe cache trashing. Both factors have severe impact on
|
|
||||||
* performance of modern CPUs and this is the reason why this
|
|
||||||
* particular piece of code is #ifdefed away and replaced by more
|
|
||||||
* "friendly" version found in #else section below. This comment
|
|
||||||
* also applies to BN_copy function.
|
|
||||||
*
|
|
||||||
* <appro@fy.chalmers.se>
|
|
||||||
*/
|
|
||||||
for (i=b->top&(~7); i>0; i-=8)
|
|
||||||
{
|
|
||||||
A[0]=B[0]; A[1]=B[1]; A[2]=B[2]; A[3]=B[3];
|
|
||||||
A[4]=B[4]; A[5]=B[5]; A[6]=B[6]; A[7]=B[7];
|
|
||||||
A+=8;
|
|
||||||
B+=8;
|
|
||||||
}
|
|
||||||
switch (b->top&7)
|
|
||||||
{
|
|
||||||
case 7:
|
|
||||||
A[6]=B[6];
|
|
||||||
case 6:
|
|
||||||
A[5]=B[5];
|
|
||||||
case 5:
|
|
||||||
A[4]=B[4];
|
|
||||||
case 4:
|
|
||||||
A[3]=B[3];
|
|
||||||
case 3:
|
|
||||||
A[2]=B[2];
|
|
||||||
case 2:
|
|
||||||
A[1]=B[1];
|
|
||||||
case 1:
|
|
||||||
A[0]=B[0];
|
|
||||||
case 0:
|
|
||||||
/* I need the 'case 0' entry for utrix cc.
|
|
||||||
* If the optimizer is turned on, it does the
|
|
||||||
* switch table by doing
|
|
||||||
* a=top&7
|
|
||||||
* a--;
|
|
||||||
* goto jump_table[a];
|
|
||||||
* If top is 0, this makes us jump to 0xffffffc
|
|
||||||
* which is rather bad :-(.
|
|
||||||
* eric 23-Apr-1998
|
|
||||||
*/
|
|
||||||
;
|
|
||||||
}
|
|
||||||
#else
|
|
||||||
for (i=b->top>>2; i>0; i--,A+=4,B+=4)
|
for (i=b->top>>2; i>0; i--,A+=4,B+=4)
|
||||||
{
|
{
|
||||||
/*
|
/*
|
||||||
@ -413,9 +349,11 @@ static BN_ULONG *internal_bn_expand(const BIGNUM *b, int words)
|
|||||||
case 3: A[2]=B[2];
|
case 3: A[2]=B[2];
|
||||||
case 2: A[1]=B[1];
|
case 2: A[1]=B[1];
|
||||||
case 1: A[0]=B[0];
|
case 1: A[0]=B[0];
|
||||||
case 0: ; /* ultrix cc workaround, see above */
|
case 0: /* workaround for ultrix cc: without 'case 0', the optimizer does
|
||||||
|
* the switch table by doing a=top&3; a--; goto jump_table[a];
|
||||||
|
* which fails for top== 0 */
|
||||||
|
;
|
||||||
}
|
}
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Now need to zero any data between b->top and b->max */
|
/* Now need to zero any data between b->top and b->max */
|
||||||
@ -453,7 +391,7 @@ BIGNUM *bn_dup_expand(const BIGNUM *b, int words)
|
|||||||
|
|
||||||
if (words > b->dmax)
|
if (words > b->dmax)
|
||||||
{
|
{
|
||||||
BN_ULONG *a = internal_bn_expand(b, words);
|
BN_ULONG *a = bn_expand_internal(b, words);
|
||||||
|
|
||||||
if (a)
|
if (a)
|
||||||
{
|
{
|
||||||
@ -472,7 +410,7 @@ BIGNUM *bn_dup_expand(const BIGNUM *b, int words)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
/* If a == NULL, there was an error in allocation in
|
/* If a == NULL, there was an error in allocation in
|
||||||
internal_bn_expand(), and NULL should be returned */
|
bn_expand_internal(), and NULL should be returned */
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
@ -491,11 +429,12 @@ BIGNUM *bn_expand2(BIGNUM *b, int words)
|
|||||||
{
|
{
|
||||||
if (words > b->dmax)
|
if (words > b->dmax)
|
||||||
{
|
{
|
||||||
BN_ULONG *a = internal_bn_expand(b, words);
|
BN_ULONG *a = bn_expand_internal(b, words);
|
||||||
|
|
||||||
if (a)
|
if (a)
|
||||||
{
|
{
|
||||||
OPENSSL_free(b->d);
|
if (b->d)
|
||||||
|
OPENSSL_free(b->d);
|
||||||
b->d=a;
|
b->d=a;
|
||||||
b->dmax=words;
|
b->dmax=words;
|
||||||
}
|
}
|
||||||
@ -547,7 +486,7 @@ BIGNUM *BN_copy(BIGNUM *a, const BIGNUM *b)
|
|||||||
case 3: A[2]=B[2];
|
case 3: A[2]=B[2];
|
||||||
case 2: A[1]=B[1];
|
case 2: A[1]=B[1];
|
||||||
case 1: A[0]=B[0];
|
case 1: A[0]=B[0];
|
||||||
case 0: ; /* ultrix cc workaround, see comments in bn_expand2 */
|
case 0: ; /* ultrix cc workaround, see comments in bn_expand_internal */
|
||||||
}
|
}
|
||||||
#else
|
#else
|
||||||
memcpy(a->d,b->d,sizeof(b->d[0])*b->top);
|
memcpy(a->d,b->d,sizeof(b->d[0])*b->top);
|
||||||
@ -666,7 +605,7 @@ BIGNUM *BN_bin2bn(const unsigned char *s, int len, BIGNUM *ret)
|
|||||||
return(NULL);
|
return(NULL);
|
||||||
i=((n-1)/BN_BYTES)+1;
|
i=((n-1)/BN_BYTES)+1;
|
||||||
m=((n-1)%(BN_BYTES));
|
m=((n-1)%(BN_BYTES));
|
||||||
ret->top=i;
|
ret->top=i-1;
|
||||||
while (n-- > 0)
|
while (n-- > 0)
|
||||||
{
|
{
|
||||||
l=(l<<8L)| *(s++);
|
l=(l<<8L)| *(s++);
|
||||||
|
@ -67,6 +67,7 @@
|
|||||||
/* determine timings for modexp, gcd, or modular inverse */
|
/* determine timings for modexp, gcd, or modular inverse */
|
||||||
#define TEST_EXP
|
#define TEST_EXP
|
||||||
#undef TEST_GCD
|
#undef TEST_GCD
|
||||||
|
#undef TEST_KRON
|
||||||
#undef TEST_INV
|
#undef TEST_INV
|
||||||
|
|
||||||
|
|
||||||
@ -220,7 +221,7 @@ void do_mul_exp(BIGNUM *r, BIGNUM *a, BIGNUM *b, BIGNUM *c, BN_CTX *ctx)
|
|||||||
double tm;
|
double tm;
|
||||||
long num;
|
long num;
|
||||||
|
|
||||||
#if defined(TEST_EXP) + defined(TEST_GCD) + defined(TEST_INV) != 1
|
#if defined(TEST_EXP) + defined(TEST_GCD) + defined(TEST_KRON) + defined(TEST_INV) != 1
|
||||||
# error "choose one test"
|
# error "choose one test"
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
@ -253,7 +254,11 @@ void do_mul_exp(BIGNUM *r, BIGNUM *a, BIGNUM *b, BIGNUM *c, BN_CTX *ctx)
|
|||||||
#elif defined(TEST_GCD)
|
#elif defined(TEST_GCD)
|
||||||
if (!BN_gcd(r,a,b,ctx)) goto err;
|
if (!BN_gcd(r,a,b,ctx)) goto err;
|
||||||
if (!BN_gcd(r,b,c,ctx)) goto err;
|
if (!BN_gcd(r,b,c,ctx)) goto err;
|
||||||
if (!BN_gcd(r,b,c,ctx)) goto err;
|
if (!BN_gcd(r,c,a,ctx)) goto err;
|
||||||
|
#elif defined(TEST_KRON)
|
||||||
|
if (-2 == BN_kronecker(a,b,ctx)) goto err;
|
||||||
|
if (-2 == BN_kronecker(b,c,ctx)) goto err;
|
||||||
|
if (-2 == BN_kronecker(c,a,ctx)) goto err;
|
||||||
#else /* TEST_INV */
|
#else /* TEST_INV */
|
||||||
if (!BN_mod_inverse(r,a,c,ctx)) goto err;
|
if (!BN_mod_inverse(r,a,c,ctx)) goto err;
|
||||||
if (!BN_mod_inverse(r,b,c,ctx)) goto err;
|
if (!BN_mod_inverse(r,b,c,ctx)) goto err;
|
||||||
@ -265,6 +270,8 @@ void do_mul_exp(BIGNUM *r, BIGNUM *a, BIGNUM *b, BIGNUM *c, BN_CTX *ctx)
|
|||||||
"modexp %4d ^ %4d %% %4d"
|
"modexp %4d ^ %4d %% %4d"
|
||||||
#elif defined(TEST_GCD)
|
#elif defined(TEST_GCD)
|
||||||
"3*gcd %4d %4d %4d"
|
"3*gcd %4d %4d %4d"
|
||||||
|
#elif defined(TEST_KRON)
|
||||||
|
"3*kronecker %4d %4d %4d"
|
||||||
#else /* TEST_INV */
|
#else /* TEST_INV */
|
||||||
"2*inv %4d %4d mod %4d"
|
"2*inv %4d %4d mod %4d"
|
||||||
#endif
|
#endif
|
||||||
|
Loading…
x
Reference in New Issue
Block a user