124
124
if( !( SCALAR_IS_ONE( BETA ) ) ) Mjoin( PATL, scal )( N, BETA, Y, INCY );
128
if( ( INCX != 1 ) || ( ( INCY == 1 ) && !( SCALAR_IS_ONE( ALPHA ) ) ) )
130
vx = (void *)malloc( ATL_Cachelen + ATL_MulBySize( N ) );
131
ATL_assert( vx ); x = ATL_AlignPtr( vx );
132
Mjoin( PATL, cpsc )( N, ALPHA, X, INCX, x, 1 );
135
else { x = (TYPE *)(X); alphaY = ALPHA; }
137
if( ( INCY != 1 ) || !( SCALAR_IS_ONE( alphaY ) ) )
139
vy = malloc( ATL_Cachelen + ATL_MulBySize( N ) );
140
ATL_assert( vy ); y00 = y = ATL_AlignPtr( vy );
143
else { y00 = y = (TYPE *)(Y); beta0 = BETA; }
145
ATL_GetPartSPMV( A, N, &mb, &nb );
147
mb1 = N - ( ( N - 1 ) / mb ) * mb; incXY1 = (nb SHIFT);
149
if( UPLO == AtlasUpper )
151
if( SCALAR_IS_ZERO( beta0 ) )
152
gpmv0 = Mjoin( PATL, gpmvUC_a1_x1_b0_y1 );
153
else if( SCALAR_IS_ONE ( beta0 ) )
154
gpmv0 = Mjoin( PATL, gpmvUC_a1_x1_b1_y1 );
156
gpmv0 = Mjoin( PATL, gpmvUC_a1_x1_bX_y1 );
157
gpmv1 = Mjoin( PATL, gpmvUC_a1_x1_b1_y1 );
158
gpmvN = Mjoin( PATL, gpmvUN_a1_x1_b1_y1 );
160
lda = 1; lda0 = lda; A0 = (TYPE *)(A); MUpnext( mb, A0, lda0 );
161
incXY = (mb SHIFT); x0 = x + incXY; y0 = y + incXY;
163
for( n = N - mb; n > 0; n -= mb, x0 += incXY, x += incXY,
164
y0 += incXY, y += incXY )
166
Mjoin( PATL, hpmvU )( mb, A, lda, x, beta0, y );
168
for( j = 0, lda1 = lda0, A1 = A0 - (mb SHIFT), x1 = x0, y1 = y0; j < n;
169
j += nb, x1 += incXY1, y1 += incXY1 )
171
jb = n - j; jb = Mmin( jb, nb );
172
gpmv0( jb, mb, one, A1, lda1, x, 1, beta0, y1, 1 );
173
gpmvN( mb, jb, one, A1, lda1, x1, 1, one, y, 1 );
174
MUpnext( jb, A1, lda1 ); A1 -= (jb SHIFT);
176
beta0 = one; gpmv0 = gpmv1; lda = lda0; A = A0; MUpnext( mb, A0, lda0 );
178
Mjoin( PATL, hpmvU )( mb1, A, lda, x, beta0, y );
182
if( SCALAR_IS_ZERO( beta0 ) )
183
gpmv0 = Mjoin( PATL, gpmvLC_a1_x1_b0_y1 );
184
else if( SCALAR_IS_ONE ( beta0 ) )
185
gpmv0 = Mjoin( PATL, gpmvLC_a1_x1_b1_y1 );
187
gpmv0 = Mjoin( PATL, gpmvLC_a1_x1_bX_y1 );
188
gpmv1 = Mjoin( PATL, gpmvLC_a1_x1_b1_y1 );
189
gpmvN = Mjoin( PATL, gpmvLN_a1_x1_b1_y1 );
191
lda = N; lda0 = lda; A0 = (TYPE *)(A); MLpnext( N, A, lda );
192
incXY = (mb SHIFT); x0 = x; y0 = y;
194
for( n = N - mb, x += ((N-mb) SHIFT), y += ((N-mb) SHIFT); n > 0;
195
n -= mb, x -= incXY, y -= incXY )
197
MLpprev( mb, A, lda );
198
Mjoin( PATL, hpmvL )( mb, A, lda, x, beta0, y );
200
for( j = 0, lda1 = lda0, A1 = A0 + (n SHIFT), x1 = x0, y1 = y0; j < n;
201
j += nb, x1 += incXY1, y1 += incXY1 )
203
jb = n - j; jb = Mmin( jb, nb );
204
gpmv0( jb, mb, one, A1, lda1, x, 1, beta0, y1, 1 );
205
gpmvN( mb, jb, one, A1, lda1, x1, 1, one, y, 1 );
206
MLpnext( jb, A1, lda1 ); A1 -= (jb SHIFT);
208
beta0 = one; gpmv0 = gpmv1;
210
Mjoin( PATL, hpmvL )( mb1, A0, lda0, x0, beta0, y0 );
215
{ Mjoin( PATL, axpby )( N, alphaY, y00, 1, BETA, Y, INCY ); free( vy ); }
127
Mjoin(PATL,refhpmv)(UPLO, N, ALPHA, A, X, INCX, BETA, Y, INCY);
217
130
* End of Mjoin( PATL, hpmv )