2
* Automatically Tuned Linear Algebra Software v3.8.4
3
* (C) Copyright 1999 R. Clint Whaley
5
* Redistribution and use in source and binary forms, with or without
6
* modification, are permitted provided that the following conditions
8
* 1. Redistributions of source code must retain the above copyright
9
* notice, this list of conditions and the following disclaimer.
10
* 2. Redistributions in binary form must reproduce the above copyright
11
* notice, this list of conditions, and the following disclaimer in the
12
* documentation and/or other materials provided with the distribution.
13
* 3. The name of the ATLAS group or the names of its contributers may
14
* not be used to endorse or promote products derived from this
15
* software without specific written permission.
17
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
19
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
20
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE ATLAS GROUP OR ITS CONTRIBUTORS
21
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
22
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
23
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
24
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
25
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
26
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
27
* POSSIBILITY OF SUCH DAMAGE.
31
#include "atlas_misc.h"
32
#include "atlas_level2.h"
33
#include "atlas_level1.h"
44
#define gemv0 Mjoin(Mjoin(Mjoin(Mjoin(gemvNc,NM),_x1),BNM),_y1)
46
#define gemv0 Mjoin(Mjoin(Mjoin(Mjoin(gemvN,NM),_x1),BNM),_y1)
49
static void gemvMlt4(const int M, const int N, const TYPE *A, const int lda,
50
const TYPE *X, const SCALAR beta, TYPE *Y)
53
#if defined(BETAX) || defined(BETA1) || defined(BETAXI0)
54
register TYPE yr1, yi1;
57
const register TYPE rbeta = *beta;
60
register TYPE yr0, yi0;
61
const register TYPE rbeta = *beta, ibeta = beta[1];
64
for(i=M; i; i--, A += 2, Y += 2)
68
Mjoin(PATL,dotc_sub)(N, A, lda, X, 1, Y);
70
Mjoin(PATL,dotu_sub)(N, A, lda, X, 1, Y);
76
yr1 = yr0 * rbeta - yi0 * ibeta;
77
yi1 = yr0 * ibeta + yi0 * rbeta;
87
Mjoin(PATL,dotc_sub)(N, A, lda, X, 1, Y);
89
Mjoin(PATL,dotu_sub)(N, A, lda, X, 1, Y);
97
static void gemvNlt2(const int M, const int N, const TYPE *A, const int lda,
98
const TYPE *X, const SCALAR beta, TYPE *Y)
104
Mjoin(PATL,moveConj)(M, X, A, 1, Y, 1);
106
Mjoin(PATL,cpsc)(M, X, A, 1, Y, 1);
108
#elif defined(BETAX) || defined(BETAXI0)
110
Mjoin(PATL,axpbyConj)(M, X, A, 1, beta, Y, 1);
112
Mjoin(PATL,axpby)(M, X, A, 1, beta, Y, 1);
116
Mjoin(PATL,axpyConj)(M, X, A, 1, Y, 1);
118
Mjoin(PATL,axpy)(M, X, A, 1, Y, 1);
124
void Mjoin(PATL,gemv0)
125
(const int M, const int N, const SCALAR alpha,
126
const TYPE *A, const int lda, const TYPE *X, const int incX,
127
const SCALAR beta, TYPE *Y, const int incY)
130
const int lda2 = lda<<1, N2 = N<<1;
133
#define axpycj Mjoin(PATL,axpyConj)
134
Mjoin(PATL,axpbyConj)(M, X, A, 1, beta, Y, 1);
136
#define axpycj Mjoin(PATL,axpy)
137
Mjoin(PATL,axpby)(M, X, A, 1, beta, Y, 1);
143
for (j=N-1; j; j--, A += lda2, X += 2) axpycj(M, X, A, 1, Y, 1);