Bignum division tune-up. Idea is to move multiplications in front of
loop body and replace 'em with addition/subtraction.
This commit is contained in:
parent
a40f6dce87
commit
0dd25e3606
@ -1,5 +1,5 @@
|
|||||||
.rdata
|
.rdata
|
||||||
.asciiz "mips3.s, Version 1.0 (prerelease)"
|
.asciiz "mips3.s, Version 1.0"
|
||||||
.asciiz "MIPS III/IV ISA artwork by Andy Polyakov <appro@fy.chalmers.se>"
|
.asciiz "MIPS III/IV ISA artwork by Andy Polyakov <appro@fy.chalmers.se>"
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -19,19 +19,26 @@
|
|||||||
* a drop-in MIPS III/IV ISA replacement for crypto/bn/bn_asm.c
|
* a drop-in MIPS III/IV ISA replacement for crypto/bn/bn_asm.c
|
||||||
* module. For updates see http://fy.chalmers.se/~appro/hpe/.
|
* module. For updates see http://fy.chalmers.se/~appro/hpe/.
|
||||||
*
|
*
|
||||||
* The module is designed to work with "new" IRIX ABI(5), namely
|
* The module is designed to work with either of the "new" MIPS ABI(5),
|
||||||
* N32 and N64. But it was tested only with MIPSpro 7.2.x assembler,
|
* namely N32 or N64, offered by IRIX 6.x. It's not ment to work under
|
||||||
* i.e. depends on preprocessor options set up by MIPSspro 7.2.x
|
* IRIX 5.x not only because it doesn't support new ABIs but also
|
||||||
* driver. Another neat gadget offered by MIPSpro 7.2.x assembler is
|
* because 5.x kernels put R4x00 CPU into 32-bit mode and all those
|
||||||
* an peep-hole(?) optimization pass. This gave me the opportunity
|
* 64-bit instructions (daddu, dmultu, etc.) found below gonna only
|
||||||
* to make the code looking more regular as all those architecture
|
* cause illegal instruction exception:-(
|
||||||
* dependent(!) instruction rescheduling details were left to the
|
*
|
||||||
* assembler. Cool, huh? Do note that I have no idea if GNU assembler
|
* In addition the code depends on preprocessor flags set up by MIPSpro
|
||||||
* does anything similar nor how GNU C will do with this module.
|
* compiler driver (either as or cc) and therefore (probably?) can't be
|
||||||
* Feedback on the matter is therefore very much appreciated:-)
|
* compiled by the GNU assembler. GNU C driver manages fine though...
|
||||||
|
* I mean as long as -mmips-as is specified or is the default option,
|
||||||
|
* because then it simply invokes /usr/bin/as which in turn takes
|
||||||
|
* perfect care of the preprocessor definitions. Another neat feature
|
||||||
|
* offered by the MIPSpro assembler is an optimization pass. This gave
|
||||||
|
* me the opportunity to have the code looking more regular as all those
|
||||||
|
* architecture dependent instruction rescheduling details were left to
|
||||||
|
* the assembler. Cool, huh?
|
||||||
*
|
*
|
||||||
* Performance improvement is astonishing! 'apps/openssl speed rsa dsa'
|
* Performance improvement is astonishing! 'apps/openssl speed rsa dsa'
|
||||||
* exhibits 3-3.5-3.7 times improvement!
|
* goes way over 3 times faster!
|
||||||
*
|
*
|
||||||
* <appro@fy.chalmers.se>
|
* <appro@fy.chalmers.se>
|
||||||
*/
|
*/
|
||||||
@ -56,8 +63,8 @@
|
|||||||
|
|
||||||
#define MINUS4 v1
|
#define MINUS4 v1
|
||||||
|
|
||||||
|
.align 5
|
||||||
LEAF(bn_mul_add_words)
|
LEAF(bn_mul_add_words)
|
||||||
.align 5
|
|
||||||
.set noreorder
|
.set noreorder
|
||||||
bgtzl a2,.L_bn_mul_add_words_proceed
|
bgtzl a2,.L_bn_mul_add_words_proceed
|
||||||
ld t0,0(a1)
|
ld t0,0(a1)
|
||||||
@ -185,8 +192,8 @@ LEAF(bn_mul_add_words)
|
|||||||
jr ra
|
jr ra
|
||||||
END(bn_mul_add_words)
|
END(bn_mul_add_words)
|
||||||
|
|
||||||
|
.align 5
|
||||||
LEAF(bn_mul_words)
|
LEAF(bn_mul_words)
|
||||||
.align 5
|
|
||||||
.set noreorder
|
.set noreorder
|
||||||
bgtzl a2,.L_bn_mul_words_proceed
|
bgtzl a2,.L_bn_mul_words_proceed
|
||||||
ld t0,0(a1)
|
ld t0,0(a1)
|
||||||
@ -284,8 +291,8 @@ LEAF(bn_mul_words)
|
|||||||
jr ra
|
jr ra
|
||||||
END(bn_mul_words)
|
END(bn_mul_words)
|
||||||
|
|
||||||
|
.align 5
|
||||||
LEAF(bn_sqr_words)
|
LEAF(bn_sqr_words)
|
||||||
.align 5
|
|
||||||
.set noreorder
|
.set noreorder
|
||||||
bgtzl a2,.L_bn_sqr_words_proceed
|
bgtzl a2,.L_bn_sqr_words_proceed
|
||||||
ld t0,0(a1)
|
ld t0,0(a1)
|
||||||
@ -371,8 +378,8 @@ LEAF(bn_sqr_words)
|
|||||||
jr ra
|
jr ra
|
||||||
END(bn_sqr_words)
|
END(bn_sqr_words)
|
||||||
|
|
||||||
|
.align 5
|
||||||
LEAF(bn_add_words)
|
LEAF(bn_add_words)
|
||||||
.align 5
|
|
||||||
.set noreorder
|
.set noreorder
|
||||||
bgtzl a3,.L_bn_add_words_proceed
|
bgtzl a3,.L_bn_add_words_proceed
|
||||||
ld t0,0(a1)
|
ld t0,0(a1)
|
||||||
@ -471,8 +478,8 @@ LEAF(bn_add_words)
|
|||||||
jr ra
|
jr ra
|
||||||
END(bn_add_words)
|
END(bn_add_words)
|
||||||
|
|
||||||
|
.align 5
|
||||||
LEAF(bn_sub_words)
|
LEAF(bn_sub_words)
|
||||||
.align 5
|
|
||||||
.set noreorder
|
.set noreorder
|
||||||
bgtzl a3,.L_bn_sub_words_proceed
|
bgtzl a3,.L_bn_sub_words_proceed
|
||||||
ld t0,0(a1)
|
ld t0,0(a1)
|
||||||
@ -567,24 +574,24 @@ END(bn_sub_words)
|
|||||||
|
|
||||||
#undef MINUS4
|
#undef MINUS4
|
||||||
|
|
||||||
|
.align 5
|
||||||
LEAF(bn_div_words)
|
LEAF(bn_div_words)
|
||||||
.align 5
|
|
||||||
.set noreorder
|
.set noreorder
|
||||||
bnezl a2,.L_bn_div_words_proceed
|
bnezl a2,.L_bn_div_words_proceed
|
||||||
move t0,zero
|
move v1,zero
|
||||||
jr ra
|
jr ra
|
||||||
li v0,-1 /* I'd rather signal div-by-zero
|
li v0,-1 /* I'd rather signal div-by-zero
|
||||||
* which can be done with 'break 7' */
|
* which can be done with 'break 7' */
|
||||||
.set reorder
|
|
||||||
|
|
||||||
.L_bn_div_words_proceed:
|
.L_bn_div_words_proceed:
|
||||||
bltz a2,.L_bn_div_words_body
|
bltz a2,.L_bn_div_words_body
|
||||||
.set noreorder
|
move t9,v1
|
||||||
dsll a2,1
|
dsll a2,1
|
||||||
bgtz a2,.-4
|
bgtz a2,.-4
|
||||||
addu t0,1
|
addu t9,1
|
||||||
|
|
||||||
.set reorder
|
.set reorder
|
||||||
negu t1,t0
|
negu t1,t9
|
||||||
li t2,-1
|
li t2,-1
|
||||||
dsll t2,t1
|
dsll t2,t1
|
||||||
and t2,a0
|
and t2,a0
|
||||||
@ -593,65 +600,135 @@ LEAF(bn_div_words)
|
|||||||
bnezl t2,.+8
|
bnezl t2,.+8
|
||||||
break 6 /* signal overflow */
|
break 6 /* signal overflow */
|
||||||
.set reorder
|
.set reorder
|
||||||
dsll a0,t0
|
dsll a0,t9
|
||||||
dsll a1,t0
|
dsll a1,t9
|
||||||
or a0,AT
|
or a0,AT
|
||||||
|
|
||||||
#define QT ta0
|
#define QT ta0
|
||||||
#define DH ta1
|
#define HH ta1
|
||||||
#define HH ta2
|
#define DH v1
|
||||||
#define MINUS1 ta3
|
|
||||||
.L_bn_div_words_body:
|
.L_bn_div_words_body:
|
||||||
dsrl DH,a2,32
|
dsrl DH,a2,32
|
||||||
li v1,2
|
|
||||||
sgeu AT,a0,a2
|
sgeu AT,a0,a2
|
||||||
li MINUS1,-1
|
|
||||||
.set noreorder
|
.set noreorder
|
||||||
bnezl AT,.+8
|
bnezl AT,.+8
|
||||||
dsubu a0,a2
|
dsubu a0,a2
|
||||||
.set reorder
|
.set reorder
|
||||||
|
|
||||||
.L_bn_div_words_outer_loop:
|
li QT,-1
|
||||||
dsrl HH,a0,32
|
dsrl HH,a0,32
|
||||||
subu v1,1
|
dsrl QT,32 /* q=0xffffffff */
|
||||||
dsrl QT,MINUS1,32 /* q=0xffffffff */
|
beq DH,HH,.L_bn_div_words_skip_div1
|
||||||
beq DH,HH,.L_bn_div_words_inner_loop
|
|
||||||
ddivu zero,a0,DH
|
ddivu zero,a0,DH
|
||||||
mflo QT
|
mflo QT
|
||||||
.L_bn_div_words_inner_loop:
|
.L_bn_div_words_skip_div1:
|
||||||
dmultu a2,QT
|
dmultu a2,QT
|
||||||
dsll t3,a0,32
|
dsll t3,a0,32
|
||||||
dsrl AT,a1,32
|
dsrl AT,a1,32
|
||||||
or t3,AT
|
or t3,AT
|
||||||
mflo t0
|
mflo t0
|
||||||
mfhi t1
|
mfhi t1
|
||||||
|
.L_bn_div_words_inner_loop1:
|
||||||
sltu t2,t3,t0
|
sltu t2,t3,t0
|
||||||
seq t8,HH,t1
|
seq t8,HH,t1
|
||||||
sltu AT,HH,t1
|
sltu AT,HH,t1
|
||||||
and t2,t8
|
and t2,t8
|
||||||
or AT,t2
|
or AT,t2
|
||||||
.set noreorder
|
.set noreorder
|
||||||
bnezl AT,.L_bn_div_words_inner_loop
|
beqz AT,.L_bn_div_words_inner_loop1_done
|
||||||
dsubu QT,1
|
sltu t2,t0,a2
|
||||||
.set reorder
|
.set reorder
|
||||||
|
dsubu QT,1
|
||||||
dsubu a0,t3,t0
|
dsubu t0,a2
|
||||||
beqz v1,.L_bn_div_words_outer_loop_done
|
dsubu t1,t2
|
||||||
|
b .L_bn_div_words_inner_loop1
|
||||||
|
.L_bn_div_words_inner_loop1_done:
|
||||||
|
|
||||||
dsll a1,32
|
dsll a1,32
|
||||||
|
dsubu a0,t3,t0
|
||||||
dsll v0,QT,32
|
dsll v0,QT,32
|
||||||
b .L_bn_div_words_outer_loop
|
|
||||||
|
|
||||||
.L_bn_div_words_outer_loop_done:
|
li QT,-1
|
||||||
|
dsrl HH,a0,32
|
||||||
|
dsrl QT,32 /* q=0xffffffff */
|
||||||
|
beq DH,HH,.L_bn_div_words_skip_div2
|
||||||
|
ddivu zero,a0,DH
|
||||||
|
mflo QT
|
||||||
|
.L_bn_div_words_skip_div2:
|
||||||
|
dmultu a2,QT
|
||||||
|
dsll t3,a0,32
|
||||||
|
dsrl AT,a1,32
|
||||||
|
or t3,AT
|
||||||
|
mflo t0
|
||||||
|
mfhi t1
|
||||||
|
.L_bn_div_words_inner_loop2:
|
||||||
|
sltu t2,t3,t0
|
||||||
|
seq t8,HH,t1
|
||||||
|
sltu AT,HH,t1
|
||||||
|
and t2,t8
|
||||||
|
or AT,t2
|
||||||
|
.set noreorder
|
||||||
|
beqz AT,.L_bn_div_words_inner_loop2_done
|
||||||
|
sltu t2,t0,a2
|
||||||
|
.set reorder
|
||||||
|
dsubu QT,1
|
||||||
|
dsubu t0,a2
|
||||||
|
dsubu t1,t2
|
||||||
|
b .L_bn_div_words_inner_loop2
|
||||||
|
.L_bn_div_words_inner_loop2_done:
|
||||||
|
|
||||||
|
dsubu a0,t3,t0
|
||||||
or v0,QT
|
or v0,QT
|
||||||
move v1,a0 /* v1 contains remainder if one wants it */
|
dsrl v1,a0,t9 /* v1 contains remainder if anybody wants it */
|
||||||
|
dsrl a2,t9 /* restore a2 */
|
||||||
jr ra
|
jr ra
|
||||||
#undef MINUS1
|
|
||||||
#undef HH
|
#undef HH
|
||||||
#undef DH
|
#undef DH
|
||||||
#undef QT
|
#undef QT
|
||||||
END(bn_div_words)
|
END(bn_div_words)
|
||||||
|
|
||||||
|
.align 5
|
||||||
|
LEAF(bn_div_3_words)
|
||||||
|
.set reorder
|
||||||
|
move a3,a0 /* we know that bn_div_words doesn't
|
||||||
|
* touch a3, ta2, ta3 and preserves a2
|
||||||
|
* so that we can save two arguments
|
||||||
|
* and return address in registers
|
||||||
|
* instead of stack:-)
|
||||||
|
*/
|
||||||
|
ld a0,(a3)
|
||||||
|
move ta2,a2
|
||||||
|
move a2,a1
|
||||||
|
ld a1,-8(a3)
|
||||||
|
move ta3,ra
|
||||||
|
move v1,zero
|
||||||
|
li v0,-1
|
||||||
|
beq a0,a2,.L_bn_div_3_words_skip_div
|
||||||
|
jal bn_div_words
|
||||||
|
move ra,ta3
|
||||||
|
.L_bn_div_3_words_skip_div:
|
||||||
|
dmultu ta2,v0
|
||||||
|
ld t2,-16(a3)
|
||||||
|
mflo t0
|
||||||
|
mfhi t1
|
||||||
|
.L_bn_div_3_words_inner_loop:
|
||||||
|
sgeu AT,t2,t0
|
||||||
|
seq t9,t1,v1
|
||||||
|
sltu t8,t1,v1
|
||||||
|
and AT,t9
|
||||||
|
or AT,t8
|
||||||
|
bnez AT,.L_bn_div_3_words_inner_loop_done
|
||||||
|
daddu v1,a2
|
||||||
|
sltu t3,t0,ta2
|
||||||
|
sltu AT,v1,a2
|
||||||
|
dsubu v0,1
|
||||||
|
dsubu t0,ta2
|
||||||
|
dsubu t1,t3
|
||||||
|
beqz AT,.L_bn_div_3_words_inner_loop
|
||||||
|
.L_bn_div_3_words_inner_loop_done:
|
||||||
|
jr ra
|
||||||
|
END(bn_div_3_words)
|
||||||
|
|
||||||
#define a_0 t0
|
#define a_0 t0
|
||||||
#define a_1 t1
|
#define a_1 t1
|
||||||
#define a_2 t2
|
#define a_2 t2
|
||||||
@ -679,20 +756,19 @@ END(bn_div_words)
|
|||||||
|
|
||||||
#define FRAME_SIZE 48
|
#define FRAME_SIZE 48
|
||||||
|
|
||||||
|
.align 5
|
||||||
LEAF(bn_mul_comba8)
|
LEAF(bn_mul_comba8)
|
||||||
.align 5
|
|
||||||
.set noreorder
|
.set noreorder
|
||||||
PTR_SUB sp,FRAME_SIZE
|
PTR_SUB sp,FRAME_SIZE
|
||||||
.frame sp,64,ra
|
.frame sp,64,ra
|
||||||
.set reorder
|
.set reorder
|
||||||
ld a_0,0(a1) /* If compiled with -mips3 options
|
ld a_0,0(a1) /* If compiled with -mips3 option on
|
||||||
* assembler barks on this line with
|
* R5000 box assembler barks on this
|
||||||
* "shouldn't have mult/div as last
|
* line with "shouldn't have mult/div
|
||||||
* instruction in bb (R10K bug)"
|
* as last instruction in bb (R10K
|
||||||
* warning. If anybody out there has
|
* bug)" warning. If anybody out there
|
||||||
* a clue on what does "bb" mean and
|
* has a clue about how to circumvent
|
||||||
* how to circumvent this do send me
|
* this do send me a note.
|
||||||
* a note.
|
|
||||||
* <appro@fy.chalmers.se>
|
* <appro@fy.chalmers.se>
|
||||||
*/
|
*/
|
||||||
ld b_0,0(a2)
|
ld b_0,0(a2)
|
||||||
@ -1286,8 +1362,8 @@ LEAF(bn_mul_comba8)
|
|||||||
jr ra
|
jr ra
|
||||||
END(bn_mul_comba8)
|
END(bn_mul_comba8)
|
||||||
|
|
||||||
|
.align 5
|
||||||
LEAF(bn_mul_comba4)
|
LEAF(bn_mul_comba4)
|
||||||
.align 5
|
|
||||||
.set reorder
|
.set reorder
|
||||||
ld a_0,0(a1)
|
ld a_0,0(a1)
|
||||||
ld b_0,0(a2)
|
ld b_0,0(a2)
|
||||||
@ -1444,8 +1520,8 @@ END(bn_mul_comba4)
|
|||||||
#define a_6 b_2
|
#define a_6 b_2
|
||||||
#define a_7 b_3
|
#define a_7 b_3
|
||||||
|
|
||||||
|
.align 5
|
||||||
LEAF(bn_sqr_comba8)
|
LEAF(bn_sqr_comba8)
|
||||||
.align 5
|
|
||||||
.set reorder
|
.set reorder
|
||||||
ld a_0,0(a1)
|
ld a_0,0(a1)
|
||||||
ld a_1,8(a1)
|
ld a_1,8(a1)
|
||||||
@ -1934,8 +2010,8 @@ LEAF(bn_sqr_comba8)
|
|||||||
jr ra
|
jr ra
|
||||||
END(bn_sqr_comba8)
|
END(bn_sqr_comba8)
|
||||||
|
|
||||||
|
.align 5
|
||||||
LEAF(bn_sqr_comba4)
|
LEAF(bn_sqr_comba4)
|
||||||
.align 5
|
|
||||||
.set reorder
|
.set reorder
|
||||||
ld a_0,0(a1)
|
ld a_0,0(a1)
|
||||||
ld a_1,8(a1)
|
ld a_1,8(a1)
|
||||||
|
@ -264,18 +264,20 @@ BN_ULONG bn_div_words(BN_ULONG h, BN_ULONG l, BN_ULONG d)
|
|||||||
else
|
else
|
||||||
q=h/dh;
|
q=h/dh;
|
||||||
|
|
||||||
|
th=q*dh;
|
||||||
|
tl=dl*q;
|
||||||
for (;;)
|
for (;;)
|
||||||
{
|
{
|
||||||
t=(h-(th=q*dh));
|
t=h-th;
|
||||||
tl=BN_MASK2;
|
|
||||||
if ((t&BN_MASK2h) ||
|
if ((t&BN_MASK2h) ||
|
||||||
((tl=dl*q) <= (
|
((tl) <= (
|
||||||
(t<<BN_BITS4)|
|
(t<<BN_BITS4)|
|
||||||
((l&BN_MASK2h)>>BN_BITS4))))
|
((l&BN_MASK2h)>>BN_BITS4))))
|
||||||
break;
|
break;
|
||||||
q--;
|
q--;
|
||||||
|
th-=dh;
|
||||||
|
tl-=dl;
|
||||||
}
|
}
|
||||||
if (tl==BN_MASK2) tl=q*dl;
|
|
||||||
t=(tl>>BN_BITS4);
|
t=(tl>>BN_BITS4);
|
||||||
tl=(tl<<BN_BITS4)&BN_MASK2h;
|
tl=(tl<<BN_BITS4)&BN_MASK2h;
|
||||||
th+=t;
|
th+=t;
|
||||||
|
@ -200,56 +200,69 @@ int BN_div(BIGNUM *dv, BIGNUM *rm, const BIGNUM *num, const BIGNUM *divisor,
|
|||||||
|
|
||||||
for (i=0; i<loop-1; i++)
|
for (i=0; i<loop-1; i++)
|
||||||
{
|
{
|
||||||
BN_ULONG q,n0,n1;
|
BN_ULONG q,l0;
|
||||||
BN_ULONG l0;
|
#ifdef BN_DIV3W
|
||||||
|
q=bn_div_3_words(wnump,d0,d1);
|
||||||
|
#else
|
||||||
|
BN_ULONG n0,n1,rem;
|
||||||
|
|
||||||
wnum.d--; wnum.top++;
|
|
||||||
n0=wnump[0];
|
n0=wnump[0];
|
||||||
n1=wnump[-1];
|
n1=wnump[-1];
|
||||||
if (n0 == d0)
|
if (n0 == d0)
|
||||||
q=BN_MASK2;
|
q=BN_MASK2;
|
||||||
else
|
else
|
||||||
|
#if defined(BN_LLONG) && defined(BN_DIV2W)
|
||||||
|
q=((((BN_ULLONG)n0)<<BN_BITS2)|n1)/((BN_ULLONG)d0);
|
||||||
|
#else
|
||||||
q=bn_div_words(n0,n1,d0);
|
q=bn_div_words(n0,n1,d0);
|
||||||
|
#endif
|
||||||
{
|
{
|
||||||
#ifdef BN_LLONG
|
#ifdef BN_LLONG
|
||||||
BN_ULLONG t1,t2,rem;
|
BN_ULLONG t2;
|
||||||
t1=((BN_ULLONG)n0<<BN_BITS2)|n1;
|
|
||||||
|
/*
|
||||||
|
* rem doesn't have to be BN_ULLONG. The least we
|
||||||
|
* know it's less that d0, isn't it?
|
||||||
|
*/
|
||||||
|
rem=(n1-q*d0)&BN_MASK2;
|
||||||
|
|
||||||
|
t2=(BN_ULLONG)d1*q;
|
||||||
for (;;)
|
for (;;)
|
||||||
{
|
{
|
||||||
rem=t1-(BN_ULLONG)q*d0;
|
if (t2 <= ((((BN_ULLONG)rem)<<BN_BITS2)|wnump[-2]))
|
||||||
t2=(BN_ULLONG)d1*q;
|
|
||||||
if ((rem>>BN_BITS2) ||
|
|
||||||
(t2 <= ((rem<<BN_BITS2)|wnump[-2])))
|
|
||||||
break;
|
break;
|
||||||
q--;
|
q--;
|
||||||
|
rem += d0;
|
||||||
|
if (rem < d0) break; /* don't let rem overflow */
|
||||||
|
t2 -= d1;
|
||||||
}
|
}
|
||||||
#else
|
#else
|
||||||
BN_ULONG t1l,t1h,t2l,t2h,t3l,t3h,ql,qh,t3t;
|
BN_ULONG t2l,t2h,ql,qh;
|
||||||
t1h=n0;
|
|
||||||
t1l=n1;
|
/*
|
||||||
|
* It's more than enough with the only multiplication.
|
||||||
|
* See the comment above in BN_LLONG section...
|
||||||
|
*/
|
||||||
|
rem=(n1-q*d0)&BN_MASK2;
|
||||||
|
|
||||||
|
t2l=LBITS(d1); t2h=HBITS(d1);
|
||||||
|
ql =LBITS(q); qh =HBITS(q);
|
||||||
|
mul64(t2l,t2h,ql,qh); /* t2=(BN_ULLONG)d1*q; */
|
||||||
|
|
||||||
for (;;)
|
for (;;)
|
||||||
{
|
{
|
||||||
t2l=LBITS(d1); t2h=HBITS(d1);
|
if ((t2h < rem) ||
|
||||||
ql =LBITS(q); qh =HBITS(q);
|
((t2h == rem) && (t2l <= wnump[-2])))
|
||||||
mul64(t2l,t2h,ql,qh); /* t2=(BN_ULLONG)d1*q; */
|
break;
|
||||||
|
|
||||||
t3t=LBITS(d0); t3h=HBITS(d0);
|
|
||||||
mul64(t3t,t3h,ql,qh); /* t3=t1-(BN_ULLONG)q*d0; */
|
|
||||||
t3l=(t1l-t3t)&BN_MASK2;
|
|
||||||
if (t3l > t1l) t3h++;
|
|
||||||
t3h=(t1h-t3h)&BN_MASK2;
|
|
||||||
|
|
||||||
/*if ((t3>>BN_BITS2) ||
|
|
||||||
(t2 <= ((t3<<BN_BITS2)+wnump[-2])))
|
|
||||||
break; */
|
|
||||||
if (t3h) break;
|
|
||||||
if (t2h < t3l) break;
|
|
||||||
if ((t2h == t3l) && (t2l <= wnump[-2])) break;
|
|
||||||
|
|
||||||
q--;
|
q--;
|
||||||
|
rem += d0;
|
||||||
|
if (rem < d0) break; /* don't let rem overflow */
|
||||||
|
if (t2l < d1) t2h--; t2l -= d1;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
#endif /* BN_DIV3W */
|
||||||
|
wnum.d--; wnum.top++;
|
||||||
l0=bn_mul_words(tmp->d,sdiv->d,div_n,q);
|
l0=bn_mul_words(tmp->d,sdiv->d,div_n,q);
|
||||||
tmp->d[div_n]=l0;
|
tmp->d[div_n]=l0;
|
||||||
for (j=div_n+1; j>0; j--)
|
for (j=div_n+1; j>0; j--)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user