572 lines
11 KiB
Perl
572 lines
11 KiB
Perl
#!/usr/local/bin/perl
|
|
# I have this in perl so I can use more usefull register names and then convert
|
|
# them into alpha registers.
|
|
#
|
|
|
|
$d=&data();
|
|
$d =~ s/CC/0/g;
|
|
$d =~ s/R1/1/g;
|
|
$d =~ s/R2/2/g;
|
|
$d =~ s/R3/3/g;
|
|
$d =~ s/R4/4/g;
|
|
$d =~ s/L1/5/g;
|
|
$d =~ s/L2/6/g;
|
|
$d =~ s/L3/7/g;
|
|
$d =~ s/L4/8/g;
|
|
$d =~ s/O1/22/g;
|
|
$d =~ s/O2/23/g;
|
|
$d =~ s/O3/24/g;
|
|
$d =~ s/O4/25/g;
|
|
$d =~ s/A1/20/g;
|
|
$d =~ s/A2/21/g;
|
|
$d =~ s/A3/27/g;
|
|
$d =~ s/A4/28/g;
|
|
if (0){
|
|
}
|
|
|
|
print $d;
|
|
|
|
sub data
|
|
{
|
|
local($data)=<<'EOF';
|
|
|
|
# DEC Alpha assember
|
|
# The bn_div_words is actually gcc output but the other parts are hand done.
|
|
# Thanks to tzeruch@ceddec.com for sending me the gcc output for
|
|
# bn_div_words.
|
|
# I've gone back and re-done most of routines.
|
|
# The key thing to remeber for the 164 CPU is that while a
|
|
# multiply operation takes 8 cycles, another one can only be issued
|
|
# after 4 cycles have elapsed. I've done modification to help
|
|
# improve this. Also, normally, a ld instruction will not be available
|
|
# for about 3 cycles.
|
|
.file 1 "bn_asm.c"
|
|
.set noat
|
|
gcc2_compiled.:
|
|
__gnu_compiled_c:
|
|
.text
|
|
.align 3
|
|
.globl bn_mul_add_words
|
|
.ent bn_mul_add_words
|
|
bn_mul_add_words:
|
|
bn_mul_add_words..ng:
|
|
.frame $30,0,$26,0
|
|
.prologue 0
|
|
.align 5
|
|
subq $18,4,$18
|
|
bis $31,$31,$CC
|
|
blt $18,$43 # if we are -1, -2, -3 or -4 goto tail code
|
|
ldq $A1,0($17) # 1 1
|
|
ldq $R1,0($16) # 1 1
|
|
.align 3
|
|
$42:
|
|
mulq $A1,$19,$L1 # 1 2 1 ######
|
|
ldq $A2,8($17) # 2 1
|
|
ldq $R2,8($16) # 2 1
|
|
umulh $A1,$19,$A1 # 1 2 ######
|
|
ldq $A3,16($17) # 3 1
|
|
ldq $R3,16($16) # 3 1
|
|
mulq $A2,$19,$L2 # 2 2 1 ######
|
|
ldq $A4,24($17) # 4 1
|
|
addq $R1,$L1,$R1 # 1 2 2
|
|
ldq $R4,24($16) # 4 1
|
|
umulh $A2,$19,$A2 # 2 2 ######
|
|
cmpult $R1,$L1,$O1 # 1 2 3 1
|
|
addq $A1,$O1,$A1 # 1 3 1
|
|
addq $R1,$CC,$R1 # 1 2 3 1
|
|
mulq $A3,$19,$L3 # 3 2 1 ######
|
|
cmpult $R1,$CC,$CC # 1 2 3 2
|
|
addq $R2,$L2,$R2 # 2 2 2
|
|
addq $A1,$CC,$CC # 1 3 2
|
|
cmpult $R2,$L2,$O2 # 2 2 3 1
|
|
addq $A2,$O2,$A2 # 2 3 1
|
|
umulh $A3,$19,$A3 # 3 2 ######
|
|
addq $R2,$CC,$R2 # 2 2 3 1
|
|
cmpult $R2,$CC,$CC # 2 2 3 2
|
|
subq $18,4,$18
|
|
mulq $A4,$19,$L4 # 4 2 1 ######
|
|
addq $A2,$CC,$CC # 2 3 2
|
|
addq $R3,$L3,$R3 # 3 2 2
|
|
addq $16,32,$16
|
|
cmpult $R3,$L3,$O3 # 3 2 3 1
|
|
stq $R1,-32($16) # 1 2 4
|
|
umulh $A4,$19,$A4 # 4 2 ######
|
|
addq $A3,$O3,$A3 # 3 3 1
|
|
addq $R3,$CC,$R3 # 3 2 3 1
|
|
stq $R2,-24($16) # 2 2 4
|
|
cmpult $R3,$CC,$CC # 3 2 3 2
|
|
stq $R3,-16($16) # 3 2 4
|
|
addq $R4,$L4,$R4 # 4 2 2
|
|
addq $A3,$CC,$CC # 3 3 2
|
|
cmpult $R4,$L4,$O4 # 4 2 3 1
|
|
addq $17,32,$17
|
|
addq $A4,$O4,$A4 # 4 3 1
|
|
addq $R4,$CC,$R4 # 4 2 3 1
|
|
cmpult $R4,$CC,$CC # 4 2 3 2
|
|
stq $R4,-8($16) # 4 2 4
|
|
addq $A4,$CC,$CC # 4 3 2
|
|
blt $18,$43
|
|
|
|
ldq $A1,0($17) # 1 1
|
|
ldq $R1,0($16) # 1 1
|
|
|
|
br $42
|
|
|
|
.align 4
|
|
$45:
|
|
ldq $A1,0($17) # 4 1
|
|
ldq $R1,0($16) # 4 1
|
|
mulq $A1,$19,$L1 # 4 2 1
|
|
subq $18,1,$18
|
|
addq $16,8,$16
|
|
addq $17,8,$17
|
|
umulh $A1,$19,$A1 # 4 2
|
|
addq $R1,$L1,$R1 # 4 2 2
|
|
cmpult $R1,$L1,$O1 # 4 2 3 1
|
|
addq $A1,$O1,$A1 # 4 3 1
|
|
addq $R1,$CC,$R1 # 4 2 3 1
|
|
cmpult $R1,$CC,$CC # 4 2 3 2
|
|
addq $A1,$CC,$CC # 4 3 2
|
|
stq $R1,-8($16) # 4 2 4
|
|
bgt $18,$45
|
|
ret $31,($26),1 # else exit
|
|
|
|
.align 4
|
|
$43:
|
|
addq $18,4,$18
|
|
bgt $18,$45 # goto tail code
|
|
ret $31,($26),1 # else exit
|
|
|
|
.end bn_mul_add_words
|
|
.align 3
|
|
.globl bn_mul_words
|
|
.ent bn_mul_words
|
|
bn_mul_words:
|
|
bn_mul_words..ng:
|
|
.frame $30,0,$26,0
|
|
.prologue 0
|
|
.align 5
|
|
subq $18,4,$18
|
|
bis $31,$31,$CC
|
|
blt $18,$143 # if we are -1, -2, -3 or -4 goto tail code
|
|
ldq $A1,0($17) # 1 1
|
|
.align 3
|
|
$142:
|
|
|
|
mulq $A1,$19,$L1 # 1 2 1 #####
|
|
ldq $A2,8($17) # 2 1
|
|
ldq $A3,16($17) # 3 1
|
|
umulh $A1,$19,$A1 # 1 2 #####
|
|
ldq $A4,24($17) # 4 1
|
|
mulq $A2,$19,$L2 # 2 2 1 #####
|
|
addq $L1,$CC,$L1 # 1 2 3 1
|
|
subq $18,4,$18
|
|
cmpult $L1,$CC,$CC # 1 2 3 2
|
|
umulh $A2,$19,$A2 # 2 2 #####
|
|
addq $A1,$CC,$CC # 1 3 2
|
|
addq $17,32,$17
|
|
addq $L2,$CC,$L2 # 2 2 3 1
|
|
mulq $A3,$19,$L3 # 3 2 1 #####
|
|
cmpult $L2,$CC,$CC # 2 2 3 2
|
|
addq $A2,$CC,$CC # 2 3 2
|
|
addq $16,32,$16
|
|
umulh $A3,$19,$A3 # 3 2 #####
|
|
stq $L1,-32($16) # 1 2 4
|
|
mulq $A4,$19,$L4 # 4 2 1 #####
|
|
addq $L3,$CC,$L3 # 3 2 3 1
|
|
stq $L2,-24($16) # 2 2 4
|
|
cmpult $L3,$CC,$CC # 3 2 3 2
|
|
umulh $A4,$19,$A4 # 4 2 #####
|
|
addq $A3,$CC,$CC # 3 3 2
|
|
stq $L3,-16($16) # 3 2 4
|
|
addq $L4,$CC,$L4 # 4 2 3 1
|
|
cmpult $L4,$CC,$CC # 4 2 3 2
|
|
|
|
addq $A4,$CC,$CC # 4 3 2
|
|
|
|
stq $L4,-8($16) # 4 2 4
|
|
|
|
blt $18,$143
|
|
|
|
ldq $A1,0($17) # 1 1
|
|
|
|
br $142
|
|
|
|
.align 4
|
|
$145:
|
|
ldq $A1,0($17) # 4 1
|
|
mulq $A1,$19,$L1 # 4 2 1
|
|
subq $18,1,$18
|
|
umulh $A1,$19,$A1 # 4 2
|
|
addq $L1,$CC,$L1 # 4 2 3 1
|
|
addq $16,8,$16
|
|
cmpult $L1,$CC,$CC # 4 2 3 2
|
|
addq $17,8,$17
|
|
addq $A1,$CC,$CC # 4 3 2
|
|
stq $L1,-8($16) # 4 2 4
|
|
|
|
bgt $18,$145
|
|
ret $31,($26),1 # else exit
|
|
|
|
.align 4
|
|
$143:
|
|
addq $18,4,$18
|
|
bgt $18,$145 # goto tail code
|
|
ret $31,($26),1 # else exit
|
|
|
|
.end bn_mul_words
|
|
.align 3
|
|
.globl bn_sqr_words
|
|
.ent bn_sqr_words
|
|
bn_sqr_words:
|
|
bn_sqr_words..ng:
|
|
.frame $30,0,$26,0
|
|
.prologue 0
|
|
|
|
subq $18,4,$18
|
|
blt $18,$543 # if we are -1, -2, -3 or -4 goto tail code
|
|
ldq $A1,0($17) # 1 1
|
|
.align 3
|
|
$542:
|
|
mulq $A1,$A1,$L1 ######
|
|
ldq $A2,8($17) # 1 1
|
|
subq $18,4
|
|
umulh $A1,$A1,$R1 ######
|
|
ldq $A3,16($17) # 1 1
|
|
mulq $A2,$A2,$L2 ######
|
|
ldq $A4,24($17) # 1 1
|
|
stq $L1,0($16) # r[0]
|
|
umulh $A2,$A2,$R2 ######
|
|
stq $R1,8($16) # r[1]
|
|
mulq $A3,$A3,$L3 ######
|
|
stq $L2,16($16) # r[0]
|
|
umulh $A3,$A3,$R3 ######
|
|
stq $R2,24($16) # r[1]
|
|
mulq $A4,$A4,$L4 ######
|
|
stq $L3,32($16) # r[0]
|
|
umulh $A4,$A4,$R4 ######
|
|
stq $R3,40($16) # r[1]
|
|
|
|
addq $16,64,$16
|
|
addq $17,32,$17
|
|
stq $L4,-16($16) # r[0]
|
|
stq $R4,-8($16) # r[1]
|
|
|
|
blt $18,$543
|
|
ldq $A1,0($17) # 1 1
|
|
br $542
|
|
|
|
$442:
|
|
ldq $A1,0($17) # a[0]
|
|
mulq $A1,$A1,$L1 # a[0]*w low part r2
|
|
addq $16,16,$16
|
|
addq $17,8,$17
|
|
subq $18,1,$18
|
|
umulh $A1,$A1,$R1 # a[0]*w high part r3
|
|
stq $L1,-16($16) # r[0]
|
|
stq $R1,-8($16) # r[1]
|
|
|
|
bgt $18,$442
|
|
ret $31,($26),1 # else exit
|
|
|
|
.align 4
|
|
$543:
|
|
addq $18,4,$18
|
|
bgt $18,$442 # goto tail code
|
|
ret $31,($26),1 # else exit
|
|
.end bn_sqr_words
|
|
|
|
.align 3
|
|
.globl bn_add_words
|
|
.ent bn_add_words
|
|
bn_add_words:
|
|
bn_add_words..ng:
|
|
.frame $30,0,$26,0
|
|
.prologue 0
|
|
|
|
subq $19,4,$19
|
|
bis $31,$31,$CC # carry = 0
|
|
blt $19,$900
|
|
ldq $L1,0($17) # a[0]
|
|
ldq $R1,0($18) # b[1]
|
|
.align 3
|
|
$901:
|
|
addq $R1,$L1,$R1 # r=a+b;
|
|
ldq $L2,8($17) # a[1]
|
|
cmpult $R1,$L1,$O1 # did we overflow?
|
|
ldq $R2,8($18) # b[1]
|
|
addq $R1,$CC,$R1 # c+= overflow
|
|
ldq $L3,16($17) # a[2]
|
|
cmpult $R1,$CC,$CC # overflow?
|
|
ldq $R3,16($18) # b[2]
|
|
addq $CC,$O1,$CC
|
|
ldq $L4,24($17) # a[3]
|
|
addq $R2,$L2,$R2 # r=a+b;
|
|
ldq $R4,24($18) # b[3]
|
|
cmpult $R2,$L2,$O2 # did we overflow?
|
|
addq $R3,$L3,$R3 # r=a+b;
|
|
addq $R2,$CC,$R2 # c+= overflow
|
|
cmpult $R3,$L3,$O3 # did we overflow?
|
|
cmpult $R2,$CC,$CC # overflow?
|
|
addq $R4,$L4,$R4 # r=a+b;
|
|
addq $CC,$O2,$CC
|
|
cmpult $R4,$L4,$O4 # did we overflow?
|
|
addq $R3,$CC,$R3 # c+= overflow
|
|
stq $R1,0($16) # r[0]=c
|
|
cmpult $R3,$CC,$CC # overflow?
|
|
stq $R2,8($16) # r[1]=c
|
|
addq $CC,$O3,$CC
|
|
stq $R3,16($16) # r[2]=c
|
|
addq $R4,$CC,$R4 # c+= overflow
|
|
subq $19,4,$19 # loop--
|
|
cmpult $R4,$CC,$CC # overflow?
|
|
addq $17,32,$17 # a++
|
|
addq $CC,$O4,$CC
|
|
stq $R4,24($16) # r[3]=c
|
|
addq $18,32,$18 # b++
|
|
addq $16,32,$16 # r++
|
|
|
|
blt $19,$900
|
|
ldq $L1,0($17) # a[0]
|
|
ldq $R1,0($18) # b[1]
|
|
br $901
|
|
.align 4
|
|
$945:
|
|
ldq $L1,0($17) # a[0]
|
|
ldq $R1,0($18) # b[1]
|
|
addq $R1,$L1,$R1 # r=a+b;
|
|
subq $19,1,$19 # loop--
|
|
addq $R1,$CC,$R1 # c+= overflow
|
|
addq $17,8,$17 # a++
|
|
cmpult $R1,$L1,$O1 # did we overflow?
|
|
cmpult $R1,$CC,$CC # overflow?
|
|
addq $18,8,$18 # b++
|
|
stq $R1,0($16) # r[0]=c
|
|
addq $CC,$O1,$CC
|
|
addq $16,8,$16 # r++
|
|
|
|
bgt $19,$945
|
|
ret $31,($26),1 # else exit
|
|
|
|
$900:
|
|
addq $19,4,$19
|
|
bgt $19,$945 # goto tail code
|
|
ret $31,($26),1 # else exit
|
|
.end bn_add_words
|
|
|
|
.align 3
|
|
.globl bn_sub_words
|
|
.ent bn_sub_words
|
|
bn_sub_words:
|
|
bn_sub_words..ng:
|
|
.frame $30,0,$26,0
|
|
.prologue 0
|
|
|
|
subq $19,4,$19
|
|
bis $31,$31,$CC # carry = 0
|
|
br $800
|
|
blt $19,$800
|
|
ldq $L1,0($17) # a[0]
|
|
ldq $R1,0($18) # b[1]
|
|
.align 3
|
|
$801:
|
|
addq $R1,$L1,$R1 # r=a+b;
|
|
ldq $L2,8($17) # a[1]
|
|
cmpult $R1,$L1,$O1 # did we overflow?
|
|
ldq $R2,8($18) # b[1]
|
|
addq $R1,$CC,$R1 # c+= overflow
|
|
ldq $L3,16($17) # a[2]
|
|
cmpult $R1,$CC,$CC # overflow?
|
|
ldq $R3,16($18) # b[2]
|
|
addq $CC,$O1,$CC
|
|
ldq $L4,24($17) # a[3]
|
|
addq $R2,$L2,$R2 # r=a+b;
|
|
ldq $R4,24($18) # b[3]
|
|
cmpult $R2,$L2,$O2 # did we overflow?
|
|
addq $R3,$L3,$R3 # r=a+b;
|
|
addq $R2,$CC,$R2 # c+= overflow
|
|
cmpult $R3,$L3,$O3 # did we overflow?
|
|
cmpult $R2,$CC,$CC # overflow?
|
|
addq $R4,$L4,$R4 # r=a+b;
|
|
addq $CC,$O2,$CC
|
|
cmpult $R4,$L4,$O4 # did we overflow?
|
|
addq $R3,$CC,$R3 # c+= overflow
|
|
stq $R1,0($16) # r[0]=c
|
|
cmpult $R3,$CC,$CC # overflow?
|
|
stq $R2,8($16) # r[1]=c
|
|
addq $CC,$O3,$CC
|
|
stq $R3,16($16) # r[2]=c
|
|
addq $R4,$CC,$R4 # c+= overflow
|
|
subq $19,4,$19 # loop--
|
|
cmpult $R4,$CC,$CC # overflow?
|
|
addq $17,32,$17 # a++
|
|
addq $CC,$O4,$CC
|
|
stq $R4,24($16) # r[3]=c
|
|
addq $18,32,$18 # b++
|
|
addq $16,32,$16 # r++
|
|
|
|
blt $19,$800
|
|
ldq $L1,0($17) # a[0]
|
|
ldq $R1,0($18) # b[1]
|
|
br $801
|
|
.align 4
|
|
$845:
|
|
ldq $L1,0($17) # a[0]
|
|
ldq $R1,0($18) # b[1]
|
|
cmpult $L1,$R1,$O1 # will we borrow?
|
|
subq $L1,$R1,$R1 # r=a-b;
|
|
subq $19,1,$19 # loop--
|
|
cmpult $R1,$CC,$O2 # will we borrow?
|
|
subq $R1,$CC,$R1 # c+= overflow
|
|
addq $17,8,$17 # a++
|
|
addq $18,8,$18 # b++
|
|
stq $R1,0($16) # r[0]=c
|
|
addq $O2,$O1,$CC
|
|
addq $16,8,$16 # r++
|
|
|
|
bgt $19,$845
|
|
ret $31,($26),1 # else exit
|
|
|
|
$800:
|
|
addq $19,4,$19
|
|
bgt $19,$845 # goto tail code
|
|
ret $31,($26),1 # else exit
|
|
.end bn_sub_words
|
|
|
|
#
|
|
# What follows was taken directly from the C compiler with a few
|
|
# hacks to redo the lables.
|
|
#
|
|
.text
|
|
.align 3
|
|
.globl bn_div_words
|
|
.ent bn_div_words
|
|
bn_div_words:
|
|
ldgp $29,0($27)
|
|
bn_div_words..ng:
|
|
lda $30,-48($30)
|
|
.frame $30,48,$26,0
|
|
stq $26,0($30)
|
|
stq $9,8($30)
|
|
stq $10,16($30)
|
|
stq $11,24($30)
|
|
stq $12,32($30)
|
|
stq $13,40($30)
|
|
.mask 0x4003e00,-48
|
|
.prologue 1
|
|
bis $16,$16,$9
|
|
bis $17,$17,$10
|
|
bis $18,$18,$11
|
|
bis $31,$31,$13
|
|
bis $31,2,$12
|
|
bne $11,$119
|
|
lda $0,-1
|
|
br $31,$136
|
|
.align 4
|
|
$119:
|
|
bis $11,$11,$16
|
|
jsr $26,BN_num_bits_word
|
|
ldgp $29,0($26)
|
|
subq $0,64,$1
|
|
beq $1,$120
|
|
bis $31,1,$1
|
|
sll $1,$0,$1
|
|
cmpule $9,$1,$1
|
|
bne $1,$120
|
|
# lda $16,_IO_stderr_
|
|
# lda $17,$C32
|
|
# bis $0,$0,$18
|
|
# jsr $26,fprintf
|
|
# ldgp $29,0($26)
|
|
jsr $26,abort
|
|
ldgp $29,0($26)
|
|
.align 4
|
|
$120:
|
|
bis $31,64,$3
|
|
cmpult $9,$11,$2
|
|
subq $3,$0,$1
|
|
addl $1,$31,$0
|
|
subq $9,$11,$1
|
|
cmoveq $2,$1,$9
|
|
beq $0,$122
|
|
zapnot $0,15,$2
|
|
subq $3,$0,$1
|
|
sll $11,$2,$11
|
|
sll $9,$2,$3
|
|
srl $10,$1,$1
|
|
sll $10,$2,$10
|
|
bis $3,$1,$9
|
|
$122:
|
|
srl $11,32,$5
|
|
zapnot $11,15,$6
|
|
lda $7,-1
|
|
.align 5
|
|
$123:
|
|
srl $9,32,$1
|
|
subq $1,$5,$1
|
|
bne $1,$126
|
|
zapnot $7,15,$27
|
|
br $31,$127
|
|
.align 4
|
|
$126:
|
|
bis $9,$9,$24
|
|
bis $5,$5,$25
|
|
divqu $24,$25,$27
|
|
$127:
|
|
srl $10,32,$4
|
|
.align 5
|
|
$128:
|
|
mulq $27,$5,$1
|
|
subq $9,$1,$3
|
|
zapnot $3,240,$1
|
|
bne $1,$129
|
|
mulq $6,$27,$2
|
|
sll $3,32,$1
|
|
addq $1,$4,$1
|
|
cmpule $2,$1,$2
|
|
bne $2,$129
|
|
subq $27,1,$27
|
|
br $31,$128
|
|
.align 4
|
|
$129:
|
|
mulq $27,$6,$1
|
|
mulq $27,$5,$4
|
|
srl $1,32,$3
|
|
sll $1,32,$1
|
|
addq $4,$3,$4
|
|
cmpult $10,$1,$2
|
|
subq $10,$1,$10
|
|
addq $2,$4,$2
|
|
cmpult $9,$2,$1
|
|
bis $2,$2,$4
|
|
beq $1,$134
|
|
addq $9,$11,$9
|
|
subq $27,1,$27
|
|
$134:
|
|
subl $12,1,$12
|
|
subq $9,$4,$9
|
|
beq $12,$124
|
|
sll $27,32,$13
|
|
sll $9,32,$2
|
|
srl $10,32,$1
|
|
sll $10,32,$10
|
|
bis $2,$1,$9
|
|
br $31,$123
|
|
.align 4
|
|
$124:
|
|
bis $13,$27,$0
|
|
$136:
|
|
ldq $26,0($30)
|
|
ldq $9,8($30)
|
|
ldq $10,16($30)
|
|
ldq $11,24($30)
|
|
ldq $12,32($30)
|
|
ldq $13,40($30)
|
|
addq $30,48,$30
|
|
ret $31,($26),1
|
|
.end bn_div_words
|
|
EOF
|
|
return($data);
|
|
}
|
|
|