2
* Automatically Tuned Linear Algebra Software v3.10.1
3
* Copyright (C) 2010 R. Clint Whaley
5
* Redistribution and use in source and binary forms, with or without
6
* modification, are permitted provided that the following conditions
8
* 1. Redistributions of source code must retain the above copyright
9
* notice, this list of conditions and the following disclaimer.
10
* 2. Redistributions in binary form must reproduce the above copyright
11
* notice, this list of conditions, and the following disclaimer in the
12
* documentation and/or other materials provided with the distribution.
13
* 3. The name of the ATLAS group or the names of its contributers may
14
* not be used to endorse or promote products derived from this
15
* software without specific written permission.
17
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
19
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
20
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE ATLAS GROUP OR ITS CONTRIBUTORS
21
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
22
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
23
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
24
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
25
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
26
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
27
* POSSIBILITY OF SUCH DAMAGE.
30
#include "atlas_misc.h"
31
#include "atlas_lvl2.h"
32
#include "atlas_lvl3.h"
33
#include "atlas_reflevel2.h"
37
static void ATL_mvn_Meq1
38
(ATL_CINT M, ATL_CINT N, const SCALAR alpha, const TYPE *A, ATL_CINT lda,
39
const TYPE *X, ATL_CINT incX, const SCALAR beta, TYPE *Y, ATL_CINT incY)
52
if (beta == ATL_rzero)
56
else if (beta == ATL_rone)
58
Y[0*incY] += y0*alpha;
62
Y[0*incY] = y0*alpha + beta*Y[0*incY];
65
static void ATL_mvn_Meq2
66
(ATL_CINT M, ATL_CINT N, const SCALAR alpha, const TYPE *A, ATL_CINT lda,
67
const TYPE *X, ATL_CINT incX, const SCALAR beta, TYPE *Y, ATL_CINT incY)
70
register TYPE x0, y0, y1;
82
if (beta == ATL_rzero)
87
else if (beta == ATL_rone)
89
Y[0*incY] += y0*alpha;
90
Y[1*incY] += y1*alpha;
94
Y[0*incY] = y0*alpha + beta*Y[0*incY];
95
Y[1*incY] = y1*alpha + beta*Y[1*incY];
98
static void ATL_mvn_Meq3
99
(ATL_CINT M, ATL_CINT N, const SCALAR alpha, const TYPE *A, ATL_CINT lda,
100
const TYPE *X, ATL_CINT incX, const SCALAR beta, TYPE *Y, ATL_CINT incY)
103
register TYPE x0, y0, y1, y2;
108
for (j=0; j < N; j++)
117
if (beta == ATL_rzero)
119
Y[0*incY] = y0*alpha;
120
Y[1*incY] = y1*alpha;
121
Y[2*incY] = y2*alpha;
123
else if (beta == ATL_rone)
125
Y[0*incY] += y0*alpha;
126
Y[1*incY] += y1*alpha;
127
Y[2*incY] += y2*alpha;
131
Y[0*incY] = y0*alpha + beta*Y[0*incY];
132
Y[1*incY] = y1*alpha + beta*Y[1*incY];
133
Y[2*incY] = y2*alpha + beta*Y[2*incY];
136
static void ATL_mvn_Meq4
137
(ATL_CINT M, ATL_CINT N, const SCALAR alpha, const TYPE *A, ATL_CINT lda,
138
const TYPE *X, ATL_CINT incX, const SCALAR beta, TYPE *Y, ATL_CINT incY)
141
register TYPE x0, y0, y1, y2, y3;
147
for (j=0; j < N; j++)
157
if (beta == ATL_rzero)
159
Y[0*incY] = y0*alpha;
160
Y[1*incY] = y1*alpha;
161
Y[2*incY] = y2*alpha;
162
Y[3*incY] = y3*alpha;
164
else if (beta == ATL_rone)
166
Y[0*incY] += y0*alpha;
167
Y[1*incY] += y1*alpha;
168
Y[2*incY] += y2*alpha;
169
Y[3*incY] += y3*alpha;
173
Y[0*incY] = y0*alpha + beta*Y[0*incY];
174
Y[1*incY] = y1*alpha + beta*Y[1*incY];
175
Y[2*incY] = y2*alpha + beta*Y[2*incY];
176
Y[3*incY] = y3*alpha + beta*Y[3*incY];
179
static void ATL_mvn_Meq5
180
(ATL_CINT M, ATL_CINT N, const SCALAR alpha, const TYPE *A, ATL_CINT lda,
181
const TYPE *X, ATL_CINT incX, const SCALAR beta, TYPE *Y, ATL_CINT incY)
184
register TYPE x0, y0, y1, y2, y3, y4;
191
for (j=0; j < N; j++)
202
if (beta == ATL_rzero)
204
Y[0*incY] = y0*alpha;
205
Y[1*incY] = y1*alpha;
206
Y[2*incY] = y2*alpha;
207
Y[3*incY] = y3*alpha;
208
Y[4*incY] = y4*alpha;
210
else if (beta == ATL_rone)
212
Y[0*incY] += y0*alpha;
213
Y[1*incY] += y1*alpha;
214
Y[2*incY] += y2*alpha;
215
Y[3*incY] += y3*alpha;
216
Y[4*incY] += y4*alpha;
220
Y[0*incY] = y0*alpha + beta*Y[0*incY];
221
Y[1*incY] = y1*alpha + beta*Y[1*incY];
222
Y[2*incY] = y2*alpha + beta*Y[2*incY];
223
Y[3*incY] = y3*alpha + beta*Y[3*incY];
224
Y[4*incY] = y4*alpha + beta*Y[4*incY];
227
static void ATL_mvn_Meq6
228
(ATL_CINT M, ATL_CINT N, const SCALAR alpha, const TYPE *A, ATL_CINT lda,
229
const TYPE *X, ATL_CINT incX, const SCALAR beta, TYPE *Y, ATL_CINT incY)
232
register TYPE x0, y0, y1, y2, y3, y4, y5;
240
for (j=0; j < N; j++)
252
if (beta == ATL_rzero)
254
Y[0*incY] = y0*alpha;
255
Y[1*incY] = y1*alpha;
256
Y[2*incY] = y2*alpha;
257
Y[3*incY] = y3*alpha;
258
Y[4*incY] = y4*alpha;
259
Y[5*incY] = y5*alpha;
261
else if (beta == ATL_rone)
263
Y[0*incY] += y0*alpha;
264
Y[1*incY] += y1*alpha;
265
Y[2*incY] += y2*alpha;
266
Y[3*incY] += y3*alpha;
267
Y[4*incY] += y4*alpha;
268
Y[5*incY] += y5*alpha;
272
Y[0*incY] = y0*alpha + beta*Y[0*incY];
273
Y[1*incY] = y1*alpha + beta*Y[1*incY];
274
Y[2*incY] = y2*alpha + beta*Y[2*incY];
275
Y[3*incY] = y3*alpha + beta*Y[3*incY];
276
Y[4*incY] = y4*alpha + beta*Y[4*incY];
277
Y[5*incY] = y5*alpha + beta*Y[5*incY];
280
static void ATL_mvn_Meq7
281
(ATL_CINT M, ATL_CINT N, const SCALAR alpha, const TYPE *A, ATL_CINT lda,
282
const TYPE *X, ATL_CINT incX, const SCALAR beta, TYPE *Y, ATL_CINT incY)
285
register TYPE x0, y0, y1, y2, y3, y4, y5, y6;
294
for (j=0; j < N; j++)
307
if (beta == ATL_rzero)
309
Y[0*incY] = y0*alpha;
310
Y[1*incY] = y1*alpha;
311
Y[2*incY] = y2*alpha;
312
Y[3*incY] = y3*alpha;
313
Y[4*incY] = y4*alpha;
314
Y[5*incY] = y5*alpha;
315
Y[6*incY] = y6*alpha;
317
else if (beta == ATL_rone)
319
Y[0*incY] += y0*alpha;
320
Y[1*incY] += y1*alpha;
321
Y[2*incY] += y2*alpha;
322
Y[3*incY] += y3*alpha;
323
Y[4*incY] += y4*alpha;
324
Y[5*incY] += y5*alpha;
325
Y[6*incY] += y6*alpha;
329
Y[0*incY] = y0*alpha + beta*Y[0*incY];
330
Y[1*incY] = y1*alpha + beta*Y[1*incY];
331
Y[2*incY] = y2*alpha + beta*Y[2*incY];
332
Y[3*incY] = y3*alpha + beta*Y[3*incY];
333
Y[4*incY] = y4*alpha + beta*Y[4*incY];
334
Y[5*incY] = y5*alpha + beta*Y[5*incY];
335
Y[6*incY] = y6*alpha + beta*Y[6*incY];
338
static void ATL_mvn_Meq8
339
(ATL_CINT M, ATL_CINT N, const SCALAR alpha, const TYPE *A, ATL_CINT lda,
340
const TYPE *X, ATL_CINT incX, const SCALAR beta, TYPE *Y, ATL_CINT incY)
343
register TYPE x0, y0, y1, y2, y3, y4, y5, y6, y7;
353
for (j=0; j < N; j++)
367
if (beta == ATL_rzero)
369
Y[0*incY] = y0*alpha;
370
Y[1*incY] = y1*alpha;
371
Y[2*incY] = y2*alpha;
372
Y[3*incY] = y3*alpha;
373
Y[4*incY] = y4*alpha;
374
Y[5*incY] = y5*alpha;
375
Y[6*incY] = y6*alpha;
376
Y[7*incY] = y7*alpha;
378
else if (beta == ATL_rone)
380
Y[0*incY] += y0*alpha;
381
Y[1*incY] += y1*alpha;
382
Y[2*incY] += y2*alpha;
383
Y[3*incY] += y3*alpha;
384
Y[4*incY] += y4*alpha;
385
Y[5*incY] += y5*alpha;
386
Y[6*incY] += y6*alpha;
387
Y[7*incY] += y7*alpha;
391
Y[0*incY] = y0*alpha + beta*Y[0*incY];
392
Y[1*incY] = y1*alpha + beta*Y[1*incY];
393
Y[2*incY] = y2*alpha + beta*Y[2*incY];
394
Y[3*incY] = y3*alpha + beta*Y[3*incY];
395
Y[4*incY] = y4*alpha + beta*Y[4*incY];
396
Y[5*incY] = y5*alpha + beta*Y[5*incY];
397
Y[6*incY] = y6*alpha + beta*Y[6*incY];
398
Y[7*incY] = y7*alpha + beta*Y[7*incY];
401
static void ATL_mvn_Meq9
402
(ATL_CINT M, ATL_CINT N, const SCALAR alpha, const TYPE *A, ATL_CINT lda,
403
const TYPE *X, ATL_CINT incX, const SCALAR beta, TYPE *Y, ATL_CINT incY)
406
register TYPE x0, y0, y1, y2, y3, y4, y5, y6, y7, y8;
417
for (j=0; j < N; j++)
432
if (beta == ATL_rzero)
434
Y[0*incY] = y0*alpha;
435
Y[1*incY] = y1*alpha;
436
Y[2*incY] = y2*alpha;
437
Y[3*incY] = y3*alpha;
438
Y[4*incY] = y4*alpha;
439
Y[5*incY] = y5*alpha;
440
Y[6*incY] = y6*alpha;
441
Y[7*incY] = y7*alpha;
442
Y[8*incY] = y8*alpha;
444
else if (beta == ATL_rone)
446
Y[0*incY] += y0*alpha;
447
Y[1*incY] += y1*alpha;
448
Y[2*incY] += y2*alpha;
449
Y[3*incY] += y3*alpha;
450
Y[4*incY] += y4*alpha;
451
Y[5*incY] += y5*alpha;
452
Y[6*incY] += y6*alpha;
453
Y[7*incY] += y7*alpha;
454
Y[8*incY] += y8*alpha;
458
Y[0*incY] = y0*alpha + beta*Y[0*incY];
459
Y[1*incY] = y1*alpha + beta*Y[1*incY];
460
Y[2*incY] = y2*alpha + beta*Y[2*incY];
461
Y[3*incY] = y3*alpha + beta*Y[3*incY];
462
Y[4*incY] = y4*alpha + beta*Y[4*incY];
463
Y[5*incY] = y5*alpha + beta*Y[5*incY];
464
Y[6*incY] = y6*alpha + beta*Y[6*incY];
465
Y[7*incY] = y7*alpha + beta*Y[7*incY];
466
Y[8*incY] = y8*alpha + beta*Y[8*incY];
469
static void ATL_mvn_Meq10
470
(ATL_CINT M, ATL_CINT N, const SCALAR alpha, const TYPE *A, ATL_CINT lda,
471
const TYPE *X, ATL_CINT incX, const SCALAR beta, TYPE *Y, ATL_CINT incY)
474
register TYPE x0, y0, y1, y2, y3, y4, y5, y6, y7, y8, y9;
486
for (j=0; j < N; j++)
502
if (beta == ATL_rzero)
504
Y[0*incY] = y0*alpha;
505
Y[1*incY] = y1*alpha;
506
Y[2*incY] = y2*alpha;
507
Y[3*incY] = y3*alpha;
508
Y[4*incY] = y4*alpha;
509
Y[5*incY] = y5*alpha;
510
Y[6*incY] = y6*alpha;
511
Y[7*incY] = y7*alpha;
512
Y[8*incY] = y8*alpha;
513
Y[9*incY] = y9*alpha;
515
else if (beta == ATL_rone)
517
Y[0*incY] += y0*alpha;
518
Y[1*incY] += y1*alpha;
519
Y[2*incY] += y2*alpha;
520
Y[3*incY] += y3*alpha;
521
Y[4*incY] += y4*alpha;
522
Y[5*incY] += y5*alpha;
523
Y[6*incY] += y6*alpha;
524
Y[7*incY] += y7*alpha;
525
Y[8*incY] += y8*alpha;
526
Y[9*incY] += y9*alpha;
530
Y[0*incY] = y0*alpha + beta*Y[0*incY];
531
Y[1*incY] = y1*alpha + beta*Y[1*incY];
532
Y[2*incY] = y2*alpha + beta*Y[2*incY];
533
Y[3*incY] = y3*alpha + beta*Y[3*incY];
534
Y[4*incY] = y4*alpha + beta*Y[4*incY];
535
Y[5*incY] = y5*alpha + beta*Y[5*incY];
536
Y[6*incY] = y6*alpha + beta*Y[6*incY];
537
Y[7*incY] = y7*alpha + beta*Y[7*incY];
538
Y[8*incY] = y8*alpha + beta*Y[8*incY];
539
Y[9*incY] = y9*alpha + beta*Y[9*incY];
542
static void ATL_mvn_Meq11
543
(ATL_CINT M, ATL_CINT N, const SCALAR alpha, const TYPE *A, ATL_CINT lda,
544
const TYPE *X, ATL_CINT incX, const SCALAR beta, TYPE *Y, ATL_CINT incY)
547
register TYPE x0, y0, y1, y2, y3, y4, y5, y6, y7, y8, y9, y10;
560
for (j=0; j < N; j++)
577
if (beta == ATL_rzero)
579
Y[0*incY] = y0*alpha;
580
Y[1*incY] = y1*alpha;
581
Y[2*incY] = y2*alpha;
582
Y[3*incY] = y3*alpha;
583
Y[4*incY] = y4*alpha;
584
Y[5*incY] = y5*alpha;
585
Y[6*incY] = y6*alpha;
586
Y[7*incY] = y7*alpha;
587
Y[8*incY] = y8*alpha;
588
Y[9*incY] = y9*alpha;
589
Y[10*incY] = y10*alpha;
591
else if (beta == ATL_rone)
593
Y[0*incY] += y0*alpha;
594
Y[1*incY] += y1*alpha;
595
Y[2*incY] += y2*alpha;
596
Y[3*incY] += y3*alpha;
597
Y[4*incY] += y4*alpha;
598
Y[5*incY] += y5*alpha;
599
Y[6*incY] += y6*alpha;
600
Y[7*incY] += y7*alpha;
601
Y[8*incY] += y8*alpha;
602
Y[9*incY] += y9*alpha;
603
Y[10*incY] += y10*alpha;
607
Y[0*incY] = y0*alpha + beta*Y[0*incY];
608
Y[1*incY] = y1*alpha + beta*Y[1*incY];
609
Y[2*incY] = y2*alpha + beta*Y[2*incY];
610
Y[3*incY] = y3*alpha + beta*Y[3*incY];
611
Y[4*incY] = y4*alpha + beta*Y[4*incY];
612
Y[5*incY] = y5*alpha + beta*Y[5*incY];
613
Y[6*incY] = y6*alpha + beta*Y[6*incY];
614
Y[7*incY] = y7*alpha + beta*Y[7*incY];
615
Y[8*incY] = y8*alpha + beta*Y[8*incY];
616
Y[9*incY] = y9*alpha + beta*Y[9*incY];
617
Y[10*incY] = y10*alpha + beta*Y[10*incY];
620
static void ATL_mvn_Meq12
621
(ATL_CINT M, ATL_CINT N, const SCALAR alpha, const TYPE *A, ATL_CINT lda,
622
const TYPE *X, ATL_CINT incX, const SCALAR beta, TYPE *Y, ATL_CINT incY)
625
register TYPE x0, y0, y1, y2, y3, y4, y5, y6, y7, y8, y9, y10, y11;
639
for (j=0; j < N; j++)
657
if (beta == ATL_rzero)
659
Y[0*incY] = y0*alpha;
660
Y[1*incY] = y1*alpha;
661
Y[2*incY] = y2*alpha;
662
Y[3*incY] = y3*alpha;
663
Y[4*incY] = y4*alpha;
664
Y[5*incY] = y5*alpha;
665
Y[6*incY] = y6*alpha;
666
Y[7*incY] = y7*alpha;
667
Y[8*incY] = y8*alpha;
668
Y[9*incY] = y9*alpha;
669
Y[10*incY] = y10*alpha;
670
Y[11*incY] = y11*alpha;
672
else if (beta == ATL_rone)
674
Y[0*incY] += y0*alpha;
675
Y[1*incY] += y1*alpha;
676
Y[2*incY] += y2*alpha;
677
Y[3*incY] += y3*alpha;
678
Y[4*incY] += y4*alpha;
679
Y[5*incY] += y5*alpha;
680
Y[6*incY] += y6*alpha;
681
Y[7*incY] += y7*alpha;
682
Y[8*incY] += y8*alpha;
683
Y[9*incY] += y9*alpha;
684
Y[10*incY] += y10*alpha;
685
Y[11*incY] += y11*alpha;
689
Y[0*incY] = y0*alpha + beta*Y[0*incY];
690
Y[1*incY] = y1*alpha + beta*Y[1*incY];
691
Y[2*incY] = y2*alpha + beta*Y[2*incY];
692
Y[3*incY] = y3*alpha + beta*Y[3*incY];
693
Y[4*incY] = y4*alpha + beta*Y[4*incY];
694
Y[5*incY] = y5*alpha + beta*Y[5*incY];
695
Y[6*incY] = y6*alpha + beta*Y[6*incY];
696
Y[7*incY] = y7*alpha + beta*Y[7*incY];
697
Y[8*incY] = y8*alpha + beta*Y[8*incY];
698
Y[9*incY] = y9*alpha + beta*Y[9*incY];
699
Y[10*incY] = y10*alpha + beta*Y[10*incY];
700
Y[11*incY] = y11*alpha + beta*Y[11*incY];
703
static void ATL_mvn_Meq13
704
(ATL_CINT M, ATL_CINT N, const SCALAR alpha, const TYPE *A, ATL_CINT lda,
705
const TYPE *X, ATL_CINT incX, const SCALAR beta, TYPE *Y, ATL_CINT incY)
708
register TYPE x0, y0, y1, y2, y3, y4, y5, y6, y7, y8, y9, y10, y11, y12;
723
for (j=0; j < N; j++)
742
if (beta == ATL_rzero)
744
Y[0*incY] = y0*alpha;
745
Y[1*incY] = y1*alpha;
746
Y[2*incY] = y2*alpha;
747
Y[3*incY] = y3*alpha;
748
Y[4*incY] = y4*alpha;
749
Y[5*incY] = y5*alpha;
750
Y[6*incY] = y6*alpha;
751
Y[7*incY] = y7*alpha;
752
Y[8*incY] = y8*alpha;
753
Y[9*incY] = y9*alpha;
754
Y[10*incY] = y10*alpha;
755
Y[11*incY] = y11*alpha;
756
Y[12*incY] = y12*alpha;
758
else if (beta == ATL_rone)
760
Y[0*incY] += y0*alpha;
761
Y[1*incY] += y1*alpha;
762
Y[2*incY] += y2*alpha;
763
Y[3*incY] += y3*alpha;
764
Y[4*incY] += y4*alpha;
765
Y[5*incY] += y5*alpha;
766
Y[6*incY] += y6*alpha;
767
Y[7*incY] += y7*alpha;
768
Y[8*incY] += y8*alpha;
769
Y[9*incY] += y9*alpha;
770
Y[10*incY] += y10*alpha;
771
Y[11*incY] += y11*alpha;
772
Y[12*incY] += y12*alpha;
776
Y[0*incY] = y0*alpha + beta*Y[0*incY];
777
Y[1*incY] = y1*alpha + beta*Y[1*incY];
778
Y[2*incY] = y2*alpha + beta*Y[2*incY];
779
Y[3*incY] = y3*alpha + beta*Y[3*incY];
780
Y[4*incY] = y4*alpha + beta*Y[4*incY];
781
Y[5*incY] = y5*alpha + beta*Y[5*incY];
782
Y[6*incY] = y6*alpha + beta*Y[6*incY];
783
Y[7*incY] = y7*alpha + beta*Y[7*incY];
784
Y[8*incY] = y8*alpha + beta*Y[8*incY];
785
Y[9*incY] = y9*alpha + beta*Y[9*incY];
786
Y[10*incY] = y10*alpha + beta*Y[10*incY];
787
Y[11*incY] = y11*alpha + beta*Y[11*incY];
788
Y[12*incY] = y12*alpha + beta*Y[12*incY];
791
static void ATL_mvn_Meq14
792
(ATL_CINT M, ATL_CINT N, const SCALAR alpha, const TYPE *A, ATL_CINT lda,
793
const TYPE *X, ATL_CINT incX, const SCALAR beta, TYPE *Y, ATL_CINT incY)
796
register TYPE x0, y0, y1, y2, y3, y4, y5, y6, y7, y8, y9, y10, y11, y12;
813
for (j=0; j < N; j++)
833
if (beta == ATL_rzero)
835
Y[0*incY] = y0*alpha;
836
Y[1*incY] = y1*alpha;
837
Y[2*incY] = y2*alpha;
838
Y[3*incY] = y3*alpha;
839
Y[4*incY] = y4*alpha;
840
Y[5*incY] = y5*alpha;
841
Y[6*incY] = y6*alpha;
842
Y[7*incY] = y7*alpha;
843
Y[8*incY] = y8*alpha;
844
Y[9*incY] = y9*alpha;
845
Y[10*incY] = y10*alpha;
846
Y[11*incY] = y11*alpha;
847
Y[12*incY] = y12*alpha;
848
Y[13*incY] = y13*alpha;
850
else if (beta == ATL_rone)
852
Y[0*incY] += y0*alpha;
853
Y[1*incY] += y1*alpha;
854
Y[2*incY] += y2*alpha;
855
Y[3*incY] += y3*alpha;
856
Y[4*incY] += y4*alpha;
857
Y[5*incY] += y5*alpha;
858
Y[6*incY] += y6*alpha;
859
Y[7*incY] += y7*alpha;
860
Y[8*incY] += y8*alpha;
861
Y[9*incY] += y9*alpha;
862
Y[10*incY] += y10*alpha;
863
Y[11*incY] += y11*alpha;
864
Y[12*incY] += y12*alpha;
865
Y[13*incY] += y13*alpha;
869
Y[0*incY] = y0*alpha + beta*Y[0*incY];
870
Y[1*incY] = y1*alpha + beta*Y[1*incY];
871
Y[2*incY] = y2*alpha + beta*Y[2*incY];
872
Y[3*incY] = y3*alpha + beta*Y[3*incY];
873
Y[4*incY] = y4*alpha + beta*Y[4*incY];
874
Y[5*incY] = y5*alpha + beta*Y[5*incY];
875
Y[6*incY] = y6*alpha + beta*Y[6*incY];
876
Y[7*incY] = y7*alpha + beta*Y[7*incY];
877
Y[8*incY] = y8*alpha + beta*Y[8*incY];
878
Y[9*incY] = y9*alpha + beta*Y[9*incY];
879
Y[10*incY] = y10*alpha + beta*Y[10*incY];
880
Y[11*incY] = y11*alpha + beta*Y[11*incY];
881
Y[12*incY] = y12*alpha + beta*Y[12*incY];
882
Y[13*incY] = y13*alpha + beta*Y[13*incY];
885
static void ATL_mvn_Meq15
886
(ATL_CINT M, ATL_CINT N, const SCALAR alpha, const TYPE *A, ATL_CINT lda,
887
const TYPE *X, ATL_CINT incX, const SCALAR beta, TYPE *Y, ATL_CINT incY)
890
register TYPE x0, y0, y1, y2, y3, y4, y5, y6, y7, y8, y9, y10, y11, y12;
891
register TYPE y13, y14;
908
for (j=0; j < N; j++)
929
if (beta == ATL_rzero)
931
Y[0*incY] = y0*alpha;
932
Y[1*incY] = y1*alpha;
933
Y[2*incY] = y2*alpha;
934
Y[3*incY] = y3*alpha;
935
Y[4*incY] = y4*alpha;
936
Y[5*incY] = y5*alpha;
937
Y[6*incY] = y6*alpha;
938
Y[7*incY] = y7*alpha;
939
Y[8*incY] = y8*alpha;
940
Y[9*incY] = y9*alpha;
941
Y[10*incY] = y10*alpha;
942
Y[11*incY] = y11*alpha;
943
Y[12*incY] = y12*alpha;
944
Y[13*incY] = y13*alpha;
945
Y[14*incY] = y14*alpha;
947
else if (beta == ATL_rone)
949
Y[0*incY] += y0*alpha;
950
Y[1*incY] += y1*alpha;
951
Y[2*incY] += y2*alpha;
952
Y[3*incY] += y3*alpha;
953
Y[4*incY] += y4*alpha;
954
Y[5*incY] += y5*alpha;
955
Y[6*incY] += y6*alpha;
956
Y[7*incY] += y7*alpha;
957
Y[8*incY] += y8*alpha;
958
Y[9*incY] += y9*alpha;
959
Y[10*incY] += y10*alpha;
960
Y[11*incY] += y11*alpha;
961
Y[12*incY] += y12*alpha;
962
Y[13*incY] += y13*alpha;
963
Y[14*incY] += y14*alpha;
967
Y[0*incY] = y0*alpha + beta*Y[0*incY];
968
Y[1*incY] = y1*alpha + beta*Y[1*incY];
969
Y[2*incY] = y2*alpha + beta*Y[2*incY];
970
Y[3*incY] = y3*alpha + beta*Y[3*incY];
971
Y[4*incY] = y4*alpha + beta*Y[4*incY];
972
Y[5*incY] = y5*alpha + beta*Y[5*incY];
973
Y[6*incY] = y6*alpha + beta*Y[6*incY];
974
Y[7*incY] = y7*alpha + beta*Y[7*incY];
975
Y[8*incY] = y8*alpha + beta*Y[8*incY];
976
Y[9*incY] = y9*alpha + beta*Y[9*incY];
977
Y[10*incY] = y10*alpha + beta*Y[10*incY];
978
Y[11*incY] = y11*alpha + beta*Y[11*incY];
979
Y[12*incY] = y12*alpha + beta*Y[12*incY];
980
Y[13*incY] = y13*alpha + beta*Y[13*incY];
981
Y[14*incY] = y14*alpha + beta*Y[14*incY];
984
typedef void (*ATL_MVFUNC)
985
(ATL_CINT M, ATL_CINT N, const SCALAR alpha, const TYPE *A, ATL_CINT lda,
986
const TYPE *X, ATL_CINT incX, const SCALAR beta, TYPE *Y, ATL_CINT incY);
989
void Mjoin(PATL,mvnk_Mlt16)
990
(ATL_CINT M, ATL_CINT N, const SCALAR alpha, const TYPE *A, ATL_CINT lda,
991
const TYPE *X, ATL_CINT incX, const SCALAR beta, TYPE *Y, ATL_CINT incY)
993
* y = alpha*A*x + beta*y
997
const static ATL_MVFUNC mvfunc[15] = {ATL_mvn_Meq1,
1014
if ( M < 1 || N < 1 || (SCALAR_IS_ZERO(alpha) && SCALAR_IS_ONE(beta)) )
1017
* Base max unrolling we use on how many regs we think we have
1019
#ifdef ATL_GAS_x8664
1021
#elif defined(ATL_GAS_x8632)
1027
Mjoin(PATL,mvnk_smallN)(M, N, alpha, A, lda, X, incX, beta, Y, incY);
1030
mvfunc[M-1](M, N, alpha, A, lda, X, incX, beta, Y, incY);
1034
Mjoin(PATL,refgemv)(AtlasNoTrans, M, N, alpha, A, lda, X, incX,
1038
Mjoin(PATL,mvnk_smallN)(M, N, alpha, A, lda, X, incX, beta, Y, incY);