2
# The bn_div_words is actually gcc output but the other parts are hand done.
3
# Thanks to tzeruch@ceddec.com for sending me the gcc output for
5
# I've gone back and re-done most of routines.
6
# The key thing to remeber for the 164 CPU is that while a
7
# multiply operation takes 8 cycles, another one can only be issued
8
# after 4 cycles have elapsed. I've done modification to help
9
# improve this. Also, normally, a ld instruction will not be available
17
.globl bn_mul_add_words
26
blt $18,$43 # if we are -1, -2, -3 or -4 goto tail code
31
mulq $20,$19,$5 # 1 2 1 ######
34
umulh $20,$19,$20 # 1 2 ######
37
mulq $21,$19,$6 # 2 2 1 ######
41
umulh $21,$19,$21 # 2 2 ######
42
cmpult $1,$5,$22 # 1 2 3 1
43
addq $20,$22,$20 # 1 3 1
44
addq $1,$0,$1 # 1 2 3 1
45
mulq $27,$19,$7 # 3 2 1 ######
46
cmpult $1,$0,$0 # 1 2 3 2
48
addq $20,$0,$0 # 1 3 2
49
cmpult $2,$6,$23 # 2 2 3 1
50
addq $21,$23,$21 # 2 3 1
51
umulh $27,$19,$27 # 3 2 ######
52
addq $2,$0,$2 # 2 2 3 1
53
cmpult $2,$0,$0 # 2 2 3 2
55
mulq $28,$19,$8 # 4 2 1 ######
56
addq $21,$0,$0 # 2 3 2
59
cmpult $3,$7,$24 # 3 2 3 1
60
stq $1,-32($16) # 1 2 4
61
umulh $28,$19,$28 # 4 2 ######
62
addq $27,$24,$27 # 3 3 1
63
addq $3,$0,$3 # 3 2 3 1
64
stq $2,-24($16) # 2 2 4
65
cmpult $3,$0,$0 # 3 2 3 2
66
stq $3,-16($16) # 3 2 4
68
addq $27,$0,$0 # 3 3 2
69
cmpult $4,$8,$25 # 4 2 3 1
71
addq $28,$25,$28 # 4 3 1
72
addq $4,$0,$4 # 4 2 3 1
73
cmpult $4,$0,$0 # 4 2 3 2
74
stq $4,-8($16) # 4 2 4
75
addq $28,$0,$0 # 4 3 2
87
mulq $20,$19,$5 # 4 2 1
91
umulh $20,$19,$20 # 4 2
93
cmpult $1,$5,$22 # 4 2 3 1
94
addq $20,$22,$20 # 4 3 1
95
addq $1,$0,$1 # 4 2 3 1
96
cmpult $1,$0,$0 # 4 2 3 2
97
addq $20,$0,$0 # 4 3 2
98
stq $1,-8($16) # 4 2 4
100
ret $31,($26),1 # else exit
105
bgt $18,$45 # goto tail code
106
ret $31,($26),1 # else exit
108
.end bn_mul_add_words
119
blt $18,$143 # if we are -1, -2, -3 or -4 goto tail code
124
mulq $20,$19,$5 # 1 2 1 #####
126
ldq $27,16($17) # 3 1
127
umulh $20,$19,$20 # 1 2 #####
128
ldq $28,24($17) # 4 1
129
mulq $21,$19,$6 # 2 2 1 #####
130
addq $5,$0,$5 # 1 2 3 1
132
cmpult $5,$0,$0 # 1 2 3 2
133
umulh $21,$19,$21 # 2 2 #####
134
addq $20,$0,$0 # 1 3 2
136
addq $6,$0,$6 # 2 2 3 1
137
mulq $27,$19,$7 # 3 2 1 #####
138
cmpult $6,$0,$0 # 2 2 3 2
139
addq $21,$0,$0 # 2 3 2
141
umulh $27,$19,$27 # 3 2 #####
142
stq $5,-32($16) # 1 2 4
143
mulq $28,$19,$8 # 4 2 1 #####
144
addq $7,$0,$7 # 3 2 3 1
145
stq $6,-24($16) # 2 2 4
146
cmpult $7,$0,$0 # 3 2 3 2
147
umulh $28,$19,$28 # 4 2 #####
148
addq $27,$0,$0 # 3 3 2
149
stq $7,-16($16) # 3 2 4
150
addq $8,$0,$8 # 4 2 3 1
151
cmpult $8,$0,$0 # 4 2 3 2
153
addq $28,$0,$0 # 4 3 2
155
stq $8,-8($16) # 4 2 4
166
mulq $20,$19,$5 # 4 2 1
168
umulh $20,$19,$20 # 4 2
169
addq $5,$0,$5 # 4 2 3 1
171
cmpult $5,$0,$0 # 4 2 3 2
173
addq $20,$0,$0 # 4 3 2
174
stq $5,-8($16) # 4 2 4
177
ret $31,($26),1 # else exit
182
bgt $18,$145 # goto tail code
183
ret $31,($26),1 # else exit
195
blt $18,$543 # if we are -1, -2, -3 or -4 goto tail code
199
mulq $20,$20,$5 ######
202
umulh $20,$20,$1 ######
203
ldq $27,16($17) # 1 1
204
mulq $21,$21,$6 ######
205
ldq $28,24($17) # 1 1
207
umulh $21,$21,$2 ######
209
mulq $27,$27,$7 ######
210
stq $6,16($16) # r[0]
211
umulh $27,$27,$3 ######
212
stq $2,24($16) # r[1]
213
mulq $28,$28,$8 ######
214
stq $7,32($16) # r[0]
215
umulh $28,$28,$4 ######
216
stq $3,40($16) # r[1]
220
stq $8,-16($16) # r[0]
221
stq $4,-8($16) # r[1]
228
ldq $20,0($17) # a[0]
229
mulq $20,$20,$5 # a[0]*w low part r2
233
umulh $20,$20,$1 # a[0]*w high part r3
234
stq $5,-16($16) # r[0]
235
stq $1,-8($16) # r[1]
238
ret $31,($26),1 # else exit
243
bgt $18,$442 # goto tail code
244
ret $31,($26),1 # else exit
256
bis $31,$31,$0 # carry = 0
262
addq $1,$5,$1 # r=a+b;
264
cmpult $1,$5,$22 # did we overflow?
266
addq $1,$0,$1 # c+= overflow
267
ldq $7,16($17) # a[2]
268
cmpult $1,$0,$0 # overflow?
269
ldq $3,16($18) # b[2]
271
ldq $8,24($17) # a[3]
272
addq $2,$6,$2 # r=a+b;
273
ldq $4,24($18) # b[3]
274
cmpult $2,$6,$23 # did we overflow?
275
addq $3,$7,$3 # r=a+b;
276
addq $2,$0,$2 # c+= overflow
277
cmpult $3,$7,$24 # did we overflow?
278
cmpult $2,$0,$0 # overflow?
279
addq $4,$8,$4 # r=a+b;
281
cmpult $4,$8,$25 # did we overflow?
282
addq $3,$0,$3 # c+= overflow
283
stq $1,0($16) # r[0]=c
284
cmpult $3,$0,$0 # overflow?
285
stq $2,8($16) # r[1]=c
287
stq $3,16($16) # r[2]=c
288
addq $4,$0,$4 # c+= overflow
289
subq $19,4,$19 # loop--
290
cmpult $4,$0,$0 # overflow?
291
addq $17,32,$17 # a++
293
stq $4,24($16) # r[3]=c
294
addq $18,32,$18 # b++
295
addq $16,32,$16 # r++
305
addq $1,$5,$1 # r=a+b;
306
subq $19,1,$19 # loop--
307
addq $1,$0,$1 # c+= overflow
309
cmpult $1,$5,$22 # did we overflow?
310
cmpult $1,$0,$0 # overflow?
312
stq $1,0($16) # r[0]=c
317
ret $31,($26),1 # else exit
321
bgt $19,$945 # goto tail code
322
ret $31,($26),1 # else exit
326
# What follows was taken directly from the C compiler with a few
327
# hacks to redo the lables.
357
jsr $26,BN_num_bits_word
365
# lda $16,_IO_stderr_
2562
.globl bn_sqr_comba4
2702
.globl bn_sqr_comba8
2732
cmpult $22, $24, $21
2733
cmpult $23, $25, $20
2742
cmpult $23, $19, $17
2756
cmpult $23, $28, $20
2773
cmpult $22, $17, $28
2787
cmpult $22, $20, $18
2796
cmpult $22, $17, $28
2797
cmpult $23, $21, $19
2810
cmpult $22, $27, $18
2811
cmpult $23, $24, $17
2824
cmpult $22, $21, $25
2825
cmpult $23, $28, $27
2840
cmpult $23, $24, $20
2854
cmpult $23, $28, $19
2868
cmpult $23, $18, $17
2879
cmpult $22, $19, $21
2893
cmpult $22, $18, $25
2907
cmpult $22, $24, $27
2921
cmpult $22, $28, $19
2936
cmpult $22, $24, $21
2937
cmpult $23, $17, $18
2950
cmpult $22, $28, $25
2951
cmpult $23, $20, $24
2964
cmpult $22, $17, $27
2965
cmpult $23, $21, $28
2978
cmpult $22, $20, $19
2979
cmpult $23, $25, $17
2988
cmpult $23, $21, $28
3002
cmpult $23, $18, $17
3016
cmpult $23, $27, $19
3030
cmpult $23, $20, $25
3047
cmpult $22, $19, $20
3061
cmpult $22, $25, $28
3075
cmpult $22, $24, $17
3084
cmpult $22, $25, $28
3085
cmpult $23, $21, $20
3098
cmpult $22, $27, $17
3099
cmpult $23, $19, $25
3112
cmpult $22, $21, $18
3113
cmpult $23, $28, $27
3128
cmpult $23, $19, $24
3142
cmpult $23, $28, $20
3153
cmpult $22, $24, $27
3167
cmpult $22, $28, $17
3182
cmpult $22, $24, $20
3183
cmpult $23, $21, $25
3192
cmpult $23, $28, $17