1
dnl HP-PA 2.0 64-bit mpn_addmul_1 -- Multiply a limb vector with a limb and
2
dnl add the result to a second limb vector.
4
dnl Copyright 1998, 1999, 2000, 2002, 2003 Free Software Foundation, Inc.
6
dnl This file is part of the GNU MP Library.
8
dnl The GNU MP Library is free software; you can redistribute it and/or modify
9
dnl it under the terms of the GNU Lesser General Public License as published
10
dnl by the Free Software Foundation; either version 2.1 of the License, or (at
11
dnl your option) any later version.
13
dnl The GNU MP Library is distributed in the hope that it will be useful, but
14
dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15
dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
16
dnl License for more details.
18
dnl You should have received a copy of the GNU Lesser General Public License
19
dnl along with the GNU MP Library; see the file COPYING.LIB. If not, write to
20
dnl the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
21
dnl MA 02111-1307, USA.
24
dnl This approaches 7.0 cycles/limb on PA8000 and 6.375 cycles/limb on PA8500
25
dnl for huge operands. It should be possible to do 6.0 cycles/limb with the
26
dnl current instructions and unrolling level. It is unknown why the code runs
29
dnl The feed-in and wind-down code has not yet been scheduled. Many cycles
30
dnl could be saved there per call.
33
dnl The main loop "BIG" is 4-way unrolled, mainly to allow
34
dnl effective use of ADD,DC. Delays in moving data via the cache from the FP
35
dnl registers to the IU registers, have demaned a deep software pipeline, and
36
dnl a lot of stack slots for partial products in flight.
39
dnl save-some-registers
40
dnl do 0, 1, 2, or 3 limbs
41
dnl if done, restore-some-regs and return
47
dnl HP-PA stack grows upwards. We could allocate 8 fewer slots by using the
48
dnl slots marked FREE, as well as some slots in the caller's "frame marker".
85
dnl -38/-138 vlimb home slot. For 2.0N, the vlimb arg will arrive here.
88
include(`../config.m4')
91
define(`rp',`%r26') dnl
92
define(`up',`%r25') dnl
93
define(`n',`%r24') dnl
94
define(`vlimb',`%r23') dnl
96
define(`climb',`%r23') dnl
98
ifdef(`HAVE_ABI_2_0w',
102
PROLOGUE(mpn_addmul_1)
104
ifdef(`HAVE_ABI_2_0w',
105
` std vlimb, -0x38(%r30) C store vlimb into "home" slot
107
std,ma %r3, 0x100(%r30)
110
ldo 0(%r0), climb C clear climb
111
fldd -0x138(%r30), %fr8 C put vlimb in fp register
113
define(`p032a1',`%r1') dnl
114
define(`p032a2',`%r19') dnl
116
define(`m032',`%r20') dnl
117
define(`m096',`%r21') dnl
119
define(`p000a',`%r22') dnl
120
define(`p064a',`%r29') dnl
122
define(`s000',`%r31') dnl
124
define(`ma000',`%r4') dnl
125
define(`ma064',`%r20') dnl
127
define(`r000',`%r3') dnl
129
extrd,u n, 63, 2, %r5
130
cmpb,= %r5, %r0, L(BIG)
135
xmpyu %fr8R, %fr4L, %fr22
136
xmpyu %fr8L, %fr4R, %fr23
137
fstd %fr22, -0x78(%r30) C mid product to -0x78..-0x71
138
xmpyu %fr8R, %fr4R, %fr24
139
xmpyu %fr8L, %fr4L, %fr25
140
fstd %fr23, -0x70(%r30) C mid product to -0x70..-0x69
141
fstd %fr24, -0x80(%r30) C low product to -0x80..-0x79
142
addib,<> -1, %r5, L(two_or_more)
143
fstd %fr25, -0x68(%r30) C high product to -0x68..-0x61
145
ldd -0x78(%r30), p032a1
146
ldd -0x70(%r30), p032a2
147
ldd -0x80(%r30), p000a
149
ldd -0x68(%r30), p064a
154
xmpyu %fr8R, %fr4L, %fr22
155
xmpyu %fr8L, %fr4R, %fr23
156
ldd -0x78(%r30), p032a1
157
fstd %fr22, -0x78(%r30) C mid product to -0x78..-0x71
158
xmpyu %fr8R, %fr4R, %fr24
159
xmpyu %fr8L, %fr4L, %fr25
160
ldd -0x70(%r30), p032a2
161
fstd %fr23, -0x70(%r30) C mid product to -0x70..-0x69
162
ldd -0x80(%r30), p000a
163
fstd %fr24, -0x80(%r30) C low product to -0x80..-0x79
164
ldd -0x68(%r30), p064a
165
addib,<> -1, %r5, L(three_or_more)
166
fstd %fr25, -0x68(%r30) C high product to -0x68..-0x61
168
add p032a1, p032a2, m032
169
add,dc %r0, %r0, m096
170
depd,z m032, 31, 32, ma000
171
extrd,u m032, 31, 32, ma064
174
depd m096, 31, 32, ma064
178
add p032a1, p032a2, m032
179
add,dc %r0, %r0, m096
180
depd,z m032, 31, 32, ma000
181
extrd,u m032, 31, 32, ma064
183
dnl addib,= -1, %r5, L(0_out)
184
depd m096, 31, 32, ma064
186
dnl xmpyu %fr8R, %fr4L, %fr22
187
dnl xmpyu %fr8L, %fr4R, %fr23
188
dnl ldd -0x78(%r30), p032a1
189
dnl fstd %fr22, -0x78(%r30) C mid product to -0x78..-0x71
191
dnl xmpyu %fr8R, %fr4R, %fr24
192
dnl xmpyu %fr8L, %fr4L, %fr25
193
dnl ldd -0x70(%r30), p032a2
194
dnl fstd %fr23, -0x70(%r30) C mid product to -0x70..-0x69
197
dnl add climb, p000a, s000
198
dnl ldd -0x80(%r30), p000a
199
dnl fstd %fr24, -0x80(%r30) C low product to -0x80..-0x79
201
dnl add,dc p064a, %r0, climb
203
dnl ldd -0x68(%r30), p064a
204
dnl fstd %fr25, -0x68(%r30) C high product to -0x68..-0x61
206
dnl add ma000, s000, s000
207
dnl add,dc ma064, climb, climb
210
dnl add r000, s000, s000
211
dnl add,dc %r0, climb, climb
214
dnl add p032a1, p032a2, m032
215
dnl add,dc %r0, %r0, m096
217
dnl depd,z m032, 31, 32, ma000
218
dnl extrd,u m032, 31, 32, ma064
220
dnl addib,<> -1, %r5, L(oop0)
221
dnl depd m096, 31, 32, ma064
224
xmpyu %fr8R, %fr4L, %fr22
225
xmpyu %fr8L, %fr4R, %fr23
226
ldd -0x78(%r30), p032a1
227
fstd %fr22, -0x78(%r30) C mid product to -0x78..-0x71
228
xmpyu %fr8R, %fr4R, %fr24
229
xmpyu %fr8L, %fr4L, %fr25
230
ldd -0x70(%r30), p032a2
231
fstd %fr23, -0x70(%r30) C mid product to -0x70..-0x69
233
add climb, p000a, s000
234
ldd -0x80(%r30), p000a
235
fstd %fr24, -0x80(%r30) C low product to -0x80..-0x79
236
add,dc p064a, %r0, climb
237
ldd -0x68(%r30), p064a
238
fstd %fr25, -0x68(%r30) C high product to -0x68..-0x61
239
add ma000, s000, s000
240
add,dc ma064, climb, climb
242
add,dc %r0, climb, climb
244
add p032a1, p032a2, m032
245
add,dc %r0, %r0, m096
246
depd,z m032, 31, 32, ma000
247
extrd,u m032, 31, 32, ma064
249
depd m096, 31, 32, ma064
251
ldd -0x78(%r30), p032a1
252
ldd -0x70(%r30), p032a2
254
add climb, p000a, s000
255
ldd -0x80(%r30), p000a
256
add,dc p064a, %r0, climb
257
ldd -0x68(%r30), p064a
258
add ma000, s000, s000
259
add,dc ma064, climb, climb
261
add,dc %r0, climb, climb
264
add p032a1, p032a2, m032
265
add,dc %r0, %r0, m096
266
depd,z m032, 31, 32, ma000
267
extrd,u m032, 31, 32, ma064
269
depd m096, 31, 32, ma064
271
add climb, p000a, s000
272
add,dc p064a, %r0, climb
273
add ma000, s000, s000
274
add,dc ma064, climb, climb
276
add,dc %r0, climb, climb
279
cmpib,>= 4, n, L(done)
282
dnl 4-way unrolled code.
286
define(`p032a1',`%r1') dnl
287
define(`p032a2',`%r19') dnl
288
define(`p096b1',`%r20') dnl
289
define(`p096b2',`%r21') dnl
290
define(`p160c1',`%r22') dnl
291
define(`p160c2',`%r29') dnl
292
define(`p224d1',`%r31') dnl
293
define(`p224d2',`%r3') dnl
295
define(`m032',`%r4') dnl
296
define(`m096',`%r5') dnl
297
define(`m160',`%r6') dnl
298
define(`m224',`%r7') dnl
299
define(`m288',`%r8') dnl
301
define(`p000a',`%r1') dnl
302
define(`p064a',`%r19') dnl
303
define(`p064b',`%r20') dnl
304
define(`p128b',`%r21') dnl
305
define(`p128c',`%r22') dnl
306
define(`p192c',`%r29') dnl
307
define(`p192d',`%r31') dnl
308
define(`p256d',`%r3') dnl
310
define(`s000',`%r10') dnl
311
define(`s064',`%r11') dnl
312
define(`s128',`%r12') dnl
313
define(`s192',`%r13') dnl
315
define(`ma000',`%r9') dnl
316
define(`ma064',`%r4') dnl
317
define(`ma128',`%r5') dnl
318
define(`ma192',`%r6') dnl
319
define(`ma256',`%r7') dnl
321
define(`r000',`%r1') dnl
322
define(`r064',`%r19') dnl
323
define(`r128',`%r20') dnl
324
define(`r192',`%r21') dnl
330
std %r10, -0xc8(%r30)
331
std %r11, -0xc0(%r30)
332
std %r12, -0xb8(%r30)
333
std %r13, -0xb0(%r30)
335
ifdef(`HAVE_ABI_2_0w',
336
` extrd,u n, 61, 62, n C right shift 2
337
',` extrd,u n, 61, 30, n C right shift 2, zero extend
345
xmpyu %fr8R, %fr4L, %fr22
346
xmpyu %fr8L, %fr4R, %fr23
347
xmpyu %fr8R, %fr5L, %fr24
348
xmpyu %fr8L, %fr5R, %fr25
349
xmpyu %fr8R, %fr6L, %fr26
350
xmpyu %fr8L, %fr6R, %fr27
351
fstd %fr22, -0x78(%r30) C mid product to -0x78..-0x71
352
xmpyu %fr8R, %fr7L, %fr28
353
xmpyu %fr8L, %fr7R, %fr29
354
fstd %fr23, -0x70(%r30) C mid product to -0x70..-0x69
355
xmpyu %fr8R, %fr4R, %fr30
356
xmpyu %fr8L, %fr4L, %fr31
357
fstd %fr24, -0x38(%r30) C mid product to -0x38..-0x31
358
xmpyu %fr8R, %fr5R, %fr22
359
xmpyu %fr8L, %fr5L, %fr23
360
fstd %fr25, -0x30(%r30) C mid product to -0x30..-0x29
361
xmpyu %fr8R, %fr6R, %fr24
362
xmpyu %fr8L, %fr6L, %fr25
363
fstd %fr26, -0x58(%r30) C mid product to -0x58..-0x51
364
xmpyu %fr8R, %fr7R, %fr26
365
fstd %fr27, -0x50(%r30) C mid product to -0x50..-0x49
366
addib,<> -1, n, L(8_or_more)
367
xmpyu %fr8L, %fr7L, %fr27
368
fstd %fr28, -0x18(%r30) C mid product to -0x18..-0x11
369
fstd %fr29, -0x10(%r30) C mid product to -0x10..-0x09
370
fstd %fr30, -0x80(%r30) C low product to -0x80..-0x79
371
fstd %fr31, -0x68(%r30) C high product to -0x68..-0x61
372
fstd %fr22, -0x40(%r30) C low product to -0x40..-0x39
373
fstd %fr23, -0x28(%r30) C high product to -0x28..-0x21
374
fstd %fr24, -0x60(%r30) C low product to -0x60..-0x59
375
fstd %fr25, -0x48(%r30) C high product to -0x48..-0x41
376
fstd %fr26, -0x20(%r30) C low product to -0x20..-0x19
377
fstd %fr27, -0x88(%r30) C high product to -0x88..-0x81
378
ldd -0x78(%r30), p032a1
379
ldd -0x70(%r30), p032a2
380
ldd -0x38(%r30), p096b1
381
ldd -0x30(%r30), p096b2
382
ldd -0x58(%r30), p160c1
383
ldd -0x50(%r30), p160c2
384
ldd -0x18(%r30), p224d1
385
ldd -0x10(%r30), p224d2
390
fstd %fr28, -0x18(%r30) C mid product to -0x18..-0x11
391
fstd %fr29, -0x10(%r30) C mid product to -0x10..-0x09
393
fstd %fr30, -0x80(%r30) C low product to -0x80..-0x79
394
fstd %fr31, -0x68(%r30) C high product to -0x68..-0x61
395
fstd %fr22, -0x40(%r30) C low product to -0x40..-0x39
396
fstd %fr23, -0x28(%r30) C high product to -0x28..-0x21
397
fstd %fr24, -0x60(%r30) C low product to -0x60..-0x59
398
fstd %fr25, -0x48(%r30) C high product to -0x48..-0x41
399
fstd %fr26, -0x20(%r30) C low product to -0x20..-0x19
400
fstd %fr27, -0x88(%r30) C high product to -0x88..-0x81
405
xmpyu %fr8R, %fr4L, %fr22
406
ldd -0x78(%r30), p032a1
407
xmpyu %fr8L, %fr4R, %fr23
408
xmpyu %fr8R, %fr5L, %fr24
409
ldd -0x70(%r30), p032a2
410
xmpyu %fr8L, %fr5R, %fr25
411
xmpyu %fr8R, %fr6L, %fr26
412
ldd -0x38(%r30), p096b1
413
xmpyu %fr8L, %fr6R, %fr27
414
fstd %fr22, -0x78(%r30) C mid product to -0x78..-0x71
415
xmpyu %fr8R, %fr7L, %fr28
416
ldd -0x30(%r30), p096b2
417
xmpyu %fr8L, %fr7R, %fr29
418
fstd %fr23, -0x70(%r30) C mid product to -0x70..-0x69
419
xmpyu %fr8R, %fr4R, %fr30
420
ldd -0x58(%r30), p160c1
421
xmpyu %fr8L, %fr4L, %fr31
422
fstd %fr24, -0x38(%r30) C mid product to -0x38..-0x31
423
xmpyu %fr8R, %fr5R, %fr22
424
ldd -0x50(%r30), p160c2
425
xmpyu %fr8L, %fr5L, %fr23
426
fstd %fr25, -0x30(%r30) C mid product to -0x30..-0x29
427
xmpyu %fr8R, %fr6R, %fr24
428
ldd -0x18(%r30), p224d1
429
xmpyu %fr8L, %fr6L, %fr25
430
fstd %fr26, -0x58(%r30) C mid product to -0x58..-0x51
431
xmpyu %fr8R, %fr7R, %fr26
432
ldd -0x10(%r30), p224d2
433
fstd %fr27, -0x50(%r30) C mid product to -0x50..-0x49
434
addib,= -1, n, L(end2)
435
xmpyu %fr8L, %fr7L, %fr27
437
add p032a1, p032a2, m032
438
ldd -0x80(%r30), p000a
439
add,dc p096b1, p096b2, m096
440
fstd %fr28, -0x18(%r30) C mid product to -0x18..-0x11
442
add,dc p160c1, p160c2, m160
443
ldd -0x68(%r30), p064a
444
add,dc p224d1, p224d2, m224
445
fstd %fr29, -0x10(%r30) C mid product to -0x10..-0x09
447
add,dc %r0, %r0, m288
448
ldd -0x40(%r30), p064b
450
fstd %fr30, -0x80(%r30) C low product to -0x80..-0x79
452
depd,z m032, 31, 32, ma000
453
ldd -0x28(%r30), p128b
454
extrd,u m032, 31, 32, ma064
455
fstd %fr31, -0x68(%r30) C high product to -0x68..-0x61
457
depd m096, 31, 32, ma064
458
ldd -0x60(%r30), p128c
459
extrd,u m096, 31, 32, ma128
460
fstd %fr22, -0x40(%r30) C low product to -0x40..-0x39
462
depd m160, 31, 32, ma128
463
ldd -0x48(%r30), p192c
464
extrd,u m160, 31, 32, ma192
465
fstd %fr23, -0x28(%r30) C high product to -0x28..-0x21
467
depd m224, 31, 32, ma192
468
ldd -0x20(%r30), p192d
469
extrd,u m224, 31, 32, ma256
470
fstd %fr24, -0x60(%r30) C low product to -0x60..-0x59
472
depd m288, 31, 32, ma256
473
ldd -0x88(%r30), p256d
474
add climb, p000a, s000
475
fstd %fr25, -0x48(%r30) C high product to -0x48..-0x41
477
add,dc p064a, p064b, s064
479
add,dc p128b, p128c, s128
480
fstd %fr26, -0x20(%r30) C low product to -0x20..-0x19
482
add,dc p192c, p192d, s192
484
add,dc p256d, %r0, climb
485
fstd %fr27, -0x88(%r30) C high product to -0x88..-0x81
488
add ma000, s000, s000 C accum mid 0
490
add,dc ma064, s064, s064 C accum mid 1
492
add,dc ma128, s128, s128 C accum mid 2
494
add,dc ma192, s192, s192 C accum mid 3
497
add,dc ma256, climb, climb
499
add r000, s000, s000 C accum rlimb 0
502
add,dc r064, s064, s064 C accum rlimb 1
503
add,dc r128, s128, s128 C accum rlimb 2
506
add,dc r192, s192, s192 C accum rlimb 3
507
add,dc %r0, climb, climb
510
xmpyu %fr8R, %fr4L, %fr22
511
ldd -0x78(%r30), p032a1
512
xmpyu %fr8L, %fr4R, %fr23
515
xmpyu %fr8R, %fr5L, %fr24
516
ldd -0x70(%r30), p032a2
517
xmpyu %fr8L, %fr5R, %fr25
520
xmpyu %fr8R, %fr6L, %fr26
521
ldd -0x38(%r30), p096b1
522
xmpyu %fr8L, %fr6R, %fr27
523
fstd %fr22, -0x78(%r30) C mid product to -0x78..-0x71
525
xmpyu %fr8R, %fr7L, %fr28
526
ldd -0x30(%r30), p096b2
527
xmpyu %fr8L, %fr7R, %fr29
528
fstd %fr23, -0x70(%r30) C mid product to -0x70..-0x69
530
xmpyu %fr8R, %fr4R, %fr30
531
ldd -0x58(%r30), p160c1
532
xmpyu %fr8L, %fr4L, %fr31
533
fstd %fr24, -0x38(%r30) C mid product to -0x38..-0x31
535
xmpyu %fr8R, %fr5R, %fr22
536
ldd -0x50(%r30), p160c2
537
xmpyu %fr8L, %fr5L, %fr23
538
fstd %fr25, -0x30(%r30) C mid product to -0x30..-0x29
540
xmpyu %fr8R, %fr6R, %fr24
541
ldd -0x18(%r30), p224d1
542
xmpyu %fr8L, %fr6L, %fr25
543
fstd %fr26, -0x58(%r30) C mid product to -0x58..-0x51
545
xmpyu %fr8R, %fr7R, %fr26
546
ldd -0x10(%r30), p224d2
547
fstd %fr27, -0x50(%r30) C mid product to -0x50..-0x49
548
xmpyu %fr8L, %fr7L, %fr27
550
addib,<> -1, n, L(oop)
554
add p032a1, p032a2, m032
555
ldd -0x80(%r30), p000a
556
add,dc p096b1, p096b2, m096
557
fstd %fr28, -0x18(%r30) C mid product to -0x18..-0x11
558
add,dc p160c1, p160c2, m160
559
ldd -0x68(%r30), p064a
560
add,dc p224d1, p224d2, m224
561
fstd %fr29, -0x10(%r30) C mid product to -0x10..-0x09
562
add,dc %r0, %r0, m288
563
ldd -0x40(%r30), p064b
564
fstd %fr30, -0x80(%r30) C low product to -0x80..-0x79
565
depd,z m032, 31, 32, ma000
566
ldd -0x28(%r30), p128b
567
extrd,u m032, 31, 32, ma064
568
fstd %fr31, -0x68(%r30) C high product to -0x68..-0x61
569
depd m096, 31, 32, ma064
570
ldd -0x60(%r30), p128c
571
extrd,u m096, 31, 32, ma128
572
fstd %fr22, -0x40(%r30) C low product to -0x40..-0x39
573
depd m160, 31, 32, ma128
574
ldd -0x48(%r30), p192c
575
extrd,u m160, 31, 32, ma192
576
fstd %fr23, -0x28(%r30) C high product to -0x28..-0x21
577
depd m224, 31, 32, ma192
578
ldd -0x20(%r30), p192d
579
extrd,u m224, 31, 32, ma256
580
fstd %fr24, -0x60(%r30) C low product to -0x60..-0x59
581
depd m288, 31, 32, ma256
582
ldd -0x88(%r30), p256d
583
add climb, p000a, s000
584
fstd %fr25, -0x48(%r30) C high product to -0x48..-0x41
585
add,dc p064a, p064b, s064
587
add,dc p128b, p128c, s128
588
fstd %fr26, -0x20(%r30) C low product to -0x20..-0x19
589
add,dc p192c, p192d, s192
591
add,dc p256d, %r0, climb
592
fstd %fr27, -0x88(%r30) C high product to -0x88..-0x81
594
add ma000, s000, s000 C accum mid 0
596
add,dc ma064, s064, s064 C accum mid 1
597
add,dc ma128, s128, s128 C accum mid 2
598
add,dc ma192, s192, s192 C accum mid 3
599
add,dc ma256, climb, climb
600
add r000, s000, s000 C accum rlimb 0
601
add,dc r064, s064, s064 C accum rlimb 1
602
add,dc r128, s128, s128 C accum rlimb 2
604
add,dc r192, s192, s192 C accum rlimb 3
605
add,dc %r0, climb, climb
607
ldd -0x78(%r30), p032a1
609
ldd -0x70(%r30), p032a2
611
ldd -0x38(%r30), p096b1
612
ldd -0x30(%r30), p096b2
613
ldd -0x58(%r30), p160c1
614
ldd -0x50(%r30), p160c2
615
ldd -0x18(%r30), p224d1
616
ldd -0x10(%r30), p224d2
620
add p032a1, p032a2, m032
621
ldd -0x80(%r30), p000a
622
add,dc p096b1, p096b2, m096
623
add,dc p160c1, p160c2, m160
624
ldd -0x68(%r30), p064a
625
add,dc p224d1, p224d2, m224
626
add,dc %r0, %r0, m288
627
ldd -0x40(%r30), p064b
628
depd,z m032, 31, 32, ma000
629
ldd -0x28(%r30), p128b
630
extrd,u m032, 31, 32, ma064
631
depd m096, 31, 32, ma064
632
ldd -0x60(%r30), p128c
633
extrd,u m096, 31, 32, ma128
634
depd m160, 31, 32, ma128
635
ldd -0x48(%r30), p192c
636
extrd,u m160, 31, 32, ma192
637
depd m224, 31, 32, ma192
638
ldd -0x20(%r30), p192d
639
extrd,u m224, 31, 32, ma256
640
depd m288, 31, 32, ma256
641
ldd -0x88(%r30), p256d
642
add climb, p000a, s000
643
add,dc p064a, p064b, s064
645
add,dc p128b, p128c, s128
646
add,dc p192c, p192d, s192
648
add,dc p256d, %r0, climb
650
add ma000, s000, s000 C accum mid 0
652
add,dc ma064, s064, s064 C accum mid 1
653
add,dc ma128, s128, s128 C accum mid 2
654
add,dc ma192, s192, s192 C accum mid 3
655
add,dc ma256, climb, climb
656
add r000, s000, s000 C accum rlimb 0
657
add,dc r064, s064, s064 C accum rlimb 1
658
add,dc r128, s128, s128 C accum rlimb 2
660
add,dc r192, s192, s192 C accum rlimb 3
661
add,dc %r0, climb, climb
666
ldd -0xb0(%r30), %r13
667
ldd -0xb8(%r30), %r12
668
ldd -0xc0(%r30), %r11
669
ldd -0xc8(%r30), %r10
675
ifdef(`HAVE_ABI_2_0w',
677
',` extrd,u climb, 63, 32, %r29
678
extrd,u climb, 31, 32, %r28
683
ldd,mb -0x100(%r30), %r3
684
EPILOGUE(mpn_addmul_1)