132
132
if( !( SCALAR_IS_ONE( BETA ) ) ) Mjoin( PATL, scal )( N, BETA, Y, INCY );
136
if( ( INCX != 1 ) || ( ( INCY == 1 ) && !( SCALAR_IS_ONE( ALPHA ) ) ) )
138
vx = (void *)malloc( ATL_Cachelen + ATL_MulBySize( N ) );
139
ATL_assert( vx ); x = ATL_AlignPtr( vx );
140
Mjoin( PATL, cpsc )( N, ALPHA, X, INCX, x, 1 );
143
else { x = (TYPE *)(X); alphaY = ALPHA; }
145
if( ( INCY != 1 ) || !( SCALAR_IS_ONE( alphaY ) ) )
147
vy = malloc( ATL_Cachelen + ATL_MulBySize( N ) );
148
ATL_assert( vy ); y00 = y = ATL_AlignPtr( vy );
151
else { y00 = y = (TYPE *)(Y); beta0 = BETA; }
153
if( SCALAR_IS_ZERO( beta0 ) ) gbmv0 = Mjoin( PATL, gbmvC_a1_x1_b0_y1 );
154
else if( SCALAR_IS_ONE ( beta0 ) ) gbmv0 = Mjoin( PATL, gbmvC_a1_x1_b1_y1 );
155
else gbmv0 = Mjoin( PATL, gbmvC_a1_x1_bX_y1 );
156
gbmv1 = Mjoin( PATL, gbmvC_a1_x1_b1_y1 );
157
gbmvN = Mjoin( PATL, gbmvN_a1_x1_b1_y1 );
159
ATL_GetPartSBMV( A, LDA, &mb, &nb );
161
mb1 = N - ( ( N - 1 ) / mb ) * mb;
163
if( UPLO == AtlasUpper )
165
for( n = N - mb, j = 0; n > 0; n -= mb, j += mb )
167
Mjoin( PATL, hbmvU )( mb, K, A+j*lda2, LDA, x+(j SHIFT),
168
beta0, y+(j SHIFT) );
169
na = N - j - mb; na = Mmin( na, K );
170
for( k = 0; k < na; k += nb )
172
kb = na - k; kb = Mmin( kb, nb );
173
ian = j + mb + k; ia = mb - K + k; ia = j + Mmax( ia, 0 );
174
ma = ian - ia - k; kl = ma - 1; kl = Mmax( kl, 0 );
175
ku = K - k - 1 - kl; ku = Mmax( ku, 0 );
176
gbmv0( kb, ma, kl, ku, one, A+ian*lda2, LDA, x+(ia SHIFT), 1,
177
beta0, y+(ian SHIFT), 1 );
178
gbmvN( ma, kb, kl, ku, one, A+ian*lda2, LDA, x+(ian SHIFT), 1,
179
one, y+(ia SHIFT), 1 );
181
if( !( SCALAR_IS_ONE( beta0 ) ) && ( n > na ) )
182
Mjoin( PATL, scal )( n-na, beta0, y+((j+mb+na) SHIFT), 1 );
183
beta0 = one; gbmv0 = gbmv1;
185
Mjoin( PATL, hbmvU )( mb1, K, A+j*lda2, LDA, x+(j SHIFT), beta0,
190
for( jan = N - mb; jan > 0; jan -= mb )
192
Mjoin( PATL, hbmvL )( mb, K, A+jan*lda2, LDA, x+(jan SHIFT),
193
beta0, y+(jan SHIFT) );
194
ja = jan - K; na = jan - ( ja = Mmax( ja, 0 ) );
195
if( !( SCALAR_IS_ONE( beta0 ) ) && ( ja > 0 ) )
196
Mjoin( PATL, scal )( ja, beta0, y, 1 );
198
for( k = 0; k < na; k += nb )
200
kb = na - k; kb = Mmin( kb, nb );
201
ja = jan - K; ja = k + Mmax( ja, 0 ); ku = jan - ja; kl = K - ku;
202
kl = Mmax( kl, 0 ); ma = k + kl + kb; ma = Mmin( ma, mb );
203
gbmv0( kb, ma, kl, ku, one, A+ja*lda2, LDA, x+(jan SHIFT), 1,
204
beta0, y+(ja SHIFT), 1 );
205
gbmvN( ma, kb, kl, ku, one, A+ja*lda2, LDA, x+(ja SHIFT), 1,
206
one, y+(jan SHIFT), 1 );
208
beta0 = one; gbmv0 = gbmv1;
210
Mjoin( PATL, hbmvL )( mb1, K, A, LDA, x, beta0, y );
215
{ Mjoin( PATL, axpby )( N, alphaY, y00, 1, BETA, Y, INCY ); free( vy ); }
135
Mjoin(PATL,refhbmv)(UPLO, N, K, ALPHA, A, LDA, X, INCX, BETA, Y, INCY);
217
138
* End of Mjoin( PATL, hbmv )