151
158
void COPY(const OrdinalType n, const ScalarType* x, const OrdinalType incx, ScalarType* y, const OrdinalType incy) const;
153
160
//! Perform the operation: \c y \c <- \c y+alpha*x.
154
void AXPY(const OrdinalType n, const ScalarType alpha, const ScalarType* x, const OrdinalType incx, ScalarType* y, const OrdinalType incy) const;
161
template <typename alpha_type, typename x_type>
162
void AXPY(const OrdinalType n, const alpha_type alpha, const x_type* x, const OrdinalType incx, ScalarType* y, const OrdinalType incy) const;
156
164
//! Sum the absolute values of the entries of \c x.
157
165
typename ScalarTraits<ScalarType>::magnitudeType ASUM(const OrdinalType n, const ScalarType* x, const OrdinalType incx) const;
159
167
//! Form the dot product of the vectors \c x and \c y.
160
ScalarType DOT(const OrdinalType n, const ScalarType* x, const OrdinalType incx, const ScalarType* y, const OrdinalType incy) const;
168
template <typename x_type, typename y_type>
169
ScalarType DOT(const OrdinalType n, const x_type* x, const OrdinalType incx, const y_type* y, const OrdinalType incy) const;
162
171
//! Compute the 2-norm of the std::vector \c x.
163
172
typename ScalarTraits<ScalarType>::magnitudeType NRM2(const OrdinalType n, const ScalarType* x, const OrdinalType incx) const;
173
182
//! Performs the matrix-std::vector operation: \c y \c <- \c alpha*A*x+beta*y or \c y \c <- \c alpha*A'*x+beta*y where \c A is a general \c m by \c n matrix.
174
void GEMV(ETransp trans, const OrdinalType m, const OrdinalType n, const ScalarType alpha, const ScalarType* A,
175
const OrdinalType lda, const ScalarType* x, const OrdinalType incx, const ScalarType beta, ScalarType* y, const OrdinalType incy) const;
183
template <typename alpha_type, typename A_type, typename x_type, typename beta_type>
184
void GEMV(ETransp trans, const OrdinalType m, const OrdinalType n, const alpha_type alpha, const A_type* A,
185
const OrdinalType lda, const x_type* x, const OrdinalType incx, const beta_type beta, ScalarType* y, const OrdinalType incy) const;
177
187
//! Performs the matrix-std::vector operation: \c x \c <- \c A*x or \c x \c <- \c A'*x where \c A is a unit/non-unit \c n by \c n upper/lower triangular matrix.
178
void TRMV(EUplo uplo, ETransp trans, EDiag diag, const OrdinalType n, const ScalarType* A,
188
template <typename A_type>
189
void TRMV(EUplo uplo, ETransp trans, EDiag diag, const OrdinalType n, const A_type* A,
179
190
const OrdinalType lda, ScalarType* x, const OrdinalType incx) const;
181
//! Performs the rank 1 operation: \c A \c <- \c alpha*x*y'+A.
182
void GER(const OrdinalType m, const OrdinalType n, const ScalarType alpha, const ScalarType* x, const OrdinalType incx,
183
const ScalarType* y, const OrdinalType incy, ScalarType* A, const OrdinalType lda) const;
192
//! \brief Performs the rank 1 operation: \c A \c <- \c alpha*x*y'+A.
193
template <typename alpha_type, typename x_type, typename y_type>
194
void GER(const OrdinalType m, const OrdinalType n, const alpha_type alpha, const x_type* x, const OrdinalType incx,
195
const y_type* y, const OrdinalType incy, ScalarType* A, const OrdinalType lda) const;
186
198
//! @name Level 3 BLAS Routines.
189
201
//! Performs the matrix-matrix operation: \c C \c <- \c alpha*op(A)*op(B)+beta*C where \c op(A) is either \c A or \c A', \c op(B) is either \c B or \c B', and C is an \c m by \c k matrix.
190
void GEMM(ETransp transa, ETransp transb, const OrdinalType m, const OrdinalType n, const OrdinalType k, const ScalarType alpha, const ScalarType* A, const OrdinalType lda, const ScalarType* B, const OrdinalType ldb, const ScalarType beta, ScalarType* C, const OrdinalType ldc) const;
202
template <typename alpha_type, typename A_type, typename B_type, typename beta_type>
203
void GEMM(ETransp transa, ETransp transb, const OrdinalType m, const OrdinalType n, const OrdinalType k, const alpha_type alpha, const A_type* A, const OrdinalType lda, const B_type* B, const OrdinalType ldb, const beta_type beta, ScalarType* C, const OrdinalType ldc) const;
192
205
//! Performs the matrix-matrix operation: \c C \c <- \c alpha*A*B+beta*C or \c C \c <- \c alpha*B*A+beta*C where \c A is an \c m by \c m or \c n by \c n symmetric matrix and \c B is a general matrix.
193
void SYMM(ESide side, EUplo uplo, const OrdinalType m, const OrdinalType n, const ScalarType alpha, const ScalarType* A, const OrdinalType lda, const ScalarType* B, const OrdinalType ldb, const ScalarType beta, ScalarType* C, const OrdinalType ldc) const;
206
template <typename alpha_type, typename A_type, typename B_type, typename beta_type>
207
void SYMM(ESide side, EUplo uplo, const OrdinalType m, const OrdinalType n, const alpha_type alpha, const A_type* A, const OrdinalType lda, const B_type* B, const OrdinalType ldb, const beta_type beta, ScalarType* C, const OrdinalType ldc) const;
195
209
//! Performs the matrix-matrix operation: \c C \c <- \c alpha*op(A)*B+beta*C or \c C \c <- \c alpha*B*op(A)+beta*C where \c op(A) is an unit/non-unit, upper/lower triangular matrix and \c B is a general matrix.
210
template <typename alpha_type, typename A_type>
196
211
void TRMM(ESide side, EUplo uplo, ETransp transa, EDiag diag, const OrdinalType m, const OrdinalType n,
197
const ScalarType alpha, const ScalarType* A, const OrdinalType lda, ScalarType* B, const OrdinalType ldb) const;
212
const alpha_type alpha, const A_type* A, const OrdinalType lda, ScalarType* B, const OrdinalType ldb) const;
199
214
//! Solves the matrix equations: \c op(A)*X=alpha*B or \c X*op(A)=alpha*B where \c X and \c B are \c m by \c n matrices, \c A is a unit/non-unit, upper/lower triangular matrix and \c op(A) is \c A or \c A'. The matrix \c X is overwritten on \c B.
215
template <typename alpha_type, typename A_type>
200
216
void TRSM(ESide side, EUplo uplo, ETransp transa, EDiag diag, const OrdinalType m, const OrdinalType n,
201
const ScalarType alpha, const ScalarType* A, const OrdinalType lda, ScalarType* B, const OrdinalType ldb) const;
217
const alpha_type alpha, const A_type* A, const OrdinalType lda, ScalarType* B, const OrdinalType ldb) const;
221
template<typename OrdinalType, typename ScalarType>
222
class BLAS : public DefaultBLASImpl<OrdinalType,ScalarType>
225
typedef typename Teuchos::ScalarTraits<ScalarType>::magnitudeType MagnitudeType;
228
//! @name Constructor/Destructor.
231
//! Default constructor.
234
//! Copy constructor.
235
inline BLAS(const BLAS<OrdinalType, ScalarType>& /*BLAS_source*/) {}
238
inline virtual ~BLAS(void) {}
207
244
//------------------------------------------------------------------------------------------
209
246
template<typename OrdinalType, typename ScalarType>
210
void BLAS<OrdinalType, ScalarType>::ROTG(ScalarType* da, ScalarType* db, MagnitudeType* c, ScalarType* s) const
247
void DefaultBLASImpl<OrdinalType, ScalarType>::ROTG(ScalarType* da, ScalarType* db, MagnitudeType* c, ScalarType* s) const
213
ScalarType roe = ScalarTraits<ScalarType>::zero();
249
ScalarType roe, alpha;
214
250
ScalarType zero = ScalarTraits<ScalarType>::zero();
215
251
ScalarType one = ScalarTraits<ScalarType>::one();
252
MagnitudeType scale, norm;
253
MagnitudeType m_one = ScalarTraits<MagnitudeType>::one();
254
MagnitudeType m_zero = ScalarTraits<MagnitudeType>::zero();
217
257
if ( ScalarTraits<ScalarType>::magnitude( *da ) > ScalarTraits<ScalarType>::magnitude( *db ) ) { roe = *da; }
218
258
scale = ScalarTraits<ScalarType>::magnitude( *da ) + ScalarTraits<ScalarType>::magnitude( *db );
219
if ( scale == zero ) // There is nothing to do.
259
if ( scale == m_zero ) // There is nothing to do.
223
263
*da = zero; *db = zero;
224
} else { // Compute the Givens rotation.
225
r = scale*ScalarTraits<ScalarType>::squareroot( ( *da/scale)*(*da/scale) + (*db/scale)*(*db/scale) );
226
if ( roe < zero ) { r *= -one; }
229
*db = ScalarTraits<ScalarType>::one();
265
else if ( *da == zero ) // Still nothing to do.
269
*da = *db; *db = zero;
272
{ // Compute the Givens rotation.
273
norm = scale*ScalarTraits<ScalarType>::magnitude(ScalarTraits<ScalarType>::squareroot( ( *da/scale)*(*da/scale) + (*db/scale)*(*db/scale) ) );
274
alpha = roe / ScalarTraits<ScalarType>::magnitude(roe);
275
*c = ScalarTraits<ScalarType>::magnitude(*da) / norm;
276
*s = alpha * ScalarTraits<ScalarType>::conjugate(*db) / norm;
230
278
if( ScalarTraits<ScalarType>::magnitude( *da ) > ScalarTraits<ScalarType>::magnitude( *db ) ){ *db = *s; }
231
279
if( ScalarTraits<ScalarType>::magnitude( *db ) >= ScalarTraits<ScalarType>::magnitude( *da ) &&
232
*c != ScalarTraits<ScalarType>::zero() ) { *db = one / *c; }
280
*c != ScalarTraits<MagnitudeType>::zero() ) { *db = one / *c; }
237
285
template<typename OrdinalType, typename ScalarType>
238
void BLAS<OrdinalType,ScalarType>::ROT(const OrdinalType n, ScalarType* dx, const OrdinalType incx, ScalarType* dy, const OrdinalType incy, MagnitudeType* c, ScalarType* s) const
286
void DefaultBLASImpl<OrdinalType,ScalarType>::ROT(const OrdinalType n, ScalarType* dx, const OrdinalType incx, ScalarType* dy, const OrdinalType incy, MagnitudeType* c, ScalarType* s) const
240
288
// ToDo: Implement this.
243
291
template<typename OrdinalType, typename ScalarType>
244
void BLAS<OrdinalType, ScalarType>::SCAL(const OrdinalType n, const ScalarType alpha, ScalarType* x, const OrdinalType incx) const
292
void DefaultBLASImpl<OrdinalType, ScalarType>::SCAL(const OrdinalType n, const ScalarType alpha, ScalarType* x, const OrdinalType incx) const
246
294
OrdinalType izero = OrdinalTraits<OrdinalType>::zero();
247
295
OrdinalType ione = OrdinalTraits<OrdinalType>::one();
395
446
//------------------------------------------------------------------------------------------
396
447
// LEVEL 2 BLAS ROUTINES
397
448
//------------------------------------------------------------------------------------------
399
449
template<typename OrdinalType, typename ScalarType>
400
void BLAS<OrdinalType, ScalarType>::GEMV(ETransp trans, const OrdinalType m, const OrdinalType n, const ScalarType alpha, const ScalarType* A, const OrdinalType lda, const ScalarType* x, const OrdinalType incx, const ScalarType beta, ScalarType* y, const OrdinalType incy) const
450
template <typename alpha_type, typename A_type, typename x_type, typename beta_type>
451
void DefaultBLASImpl<OrdinalType, ScalarType>::GEMV(ETransp trans, const OrdinalType m, const OrdinalType n, const alpha_type alpha, const A_type* A, const OrdinalType lda, const x_type* x, const OrdinalType incx, const beta_type beta, ScalarType* y, const OrdinalType incy) const
402
453
OrdinalType izero = OrdinalTraits<OrdinalType>::zero();
403
454
OrdinalType ione = OrdinalTraits<OrdinalType>::one();
404
ScalarType zero = ScalarTraits<ScalarType>::zero();
405
ScalarType one = ScalarTraits<ScalarType>::one();
455
alpha_type alpha_zero = ScalarTraits<alpha_type>::zero();
456
beta_type beta_zero = ScalarTraits<beta_type>::zero();
457
x_type x_zero = ScalarTraits<x_type>::zero();
458
ScalarType y_zero = ScalarTraits<ScalarType>::zero();
459
beta_type beta_one = ScalarTraits<beta_type>::one();
406
460
bool BadArgument = false;
462
TEST_FOR_EXCEPTION(Teuchos::ScalarTraits<ScalarType>::isComplex && trans == CONJ_TRANS, std::logic_error,
463
"Teuchos::BLAS::GEMV() does not currently support CONJ_TRANS for complex data types.");
408
465
// Quick return if there is nothing to do!
409
if( m == izero || n == izero || ( alpha == zero && beta == one ) ){ return; }
466
if( m == izero || n == izero || ( alpha == alpha_zero && beta == beta_one ) ){ return; }
411
468
// Otherwise, we need to check the argument list.
412
469
if( m < izero ) {
531
588
template<typename OrdinalType, typename ScalarType>
532
void BLAS<OrdinalType, ScalarType>::TRMV(EUplo uplo, ETransp trans, EDiag diag, const OrdinalType n, const ScalarType* A, const OrdinalType lda, ScalarType* x, const OrdinalType incx) const
589
template <typename A_type>
590
void DefaultBLASImpl<OrdinalType, ScalarType>::TRMV(EUplo uplo, ETransp trans, EDiag diag, const OrdinalType n, const A_type* A, const OrdinalType lda, ScalarType* x, const OrdinalType incx) const
534
592
OrdinalType izero = OrdinalTraits<OrdinalType>::zero();
535
593
OrdinalType ione = OrdinalTraits<OrdinalType>::one();
536
594
ScalarType zero = ScalarTraits<ScalarType>::zero();
537
595
bool BadArgument = false;
597
TEST_FOR_EXCEPTION(Teuchos::ScalarTraits<ScalarType>::isComplex && trans == CONJ_TRANS, std::logic_error,
598
"Teuchos::BLAS::TRMV() does not currently support CONJ_TRANS for complex data types.");
539
600
// Quick return if there is nothing to do!
540
601
if( n == izero ){ return; }
687
748
template<typename OrdinalType, typename ScalarType>
688
void BLAS<OrdinalType, ScalarType>::GER(const OrdinalType m, const OrdinalType n, const ScalarType alpha, const ScalarType* x, const OrdinalType incx, const ScalarType* y, const OrdinalType incy, ScalarType* A, const OrdinalType lda) const
749
template <typename alpha_type, typename x_type, typename y_type>
750
void DefaultBLASImpl<OrdinalType, ScalarType>::GER(const OrdinalType m, const OrdinalType n, const alpha_type alpha, const x_type* x, const OrdinalType incx, const y_type* y, const OrdinalType incy, ScalarType* A, const OrdinalType lda) const
690
752
OrdinalType izero = OrdinalTraits<OrdinalType>::zero();
691
753
OrdinalType ione = OrdinalTraits<OrdinalType>::one();
692
ScalarType zero = ScalarTraits<ScalarType>::zero();
754
alpha_type alpha_zero = ScalarTraits<alpha_type>::zero();
755
y_type y_zero = ScalarTraits<y_type>::zero();
693
756
bool BadArgument = false;
758
TEST_FOR_EXCEPTION(Teuchos::ScalarTraits<ScalarType>::isComplex, std::logic_error,
759
"Teuchos::BLAS::GER() does not currently support complex data types.");
695
761
// Quick return if there is nothing to do!
696
if( m == izero || n == izero || alpha == zero ){ return; }
762
if( m == izero || n == izero || alpha == alpha_zero ){ return; }
698
764
// Otherwise, we need to check the argument list.
699
765
if( m < izero ) {
758
824
//------------------------------------------------------------------------------------------
760
826
template<typename OrdinalType, typename ScalarType>
761
void BLAS<OrdinalType, ScalarType>::GEMM(ETransp transa, ETransp transb, const OrdinalType m, const OrdinalType n, const OrdinalType k, const ScalarType alpha, const ScalarType* A, const OrdinalType lda, const ScalarType* B, const OrdinalType ldb, const ScalarType beta, ScalarType* C, const OrdinalType ldc) const
827
template <typename alpha_type, typename A_type, typename B_type, typename beta_type>
828
void DefaultBLASImpl<OrdinalType, ScalarType>::GEMM(ETransp transa, ETransp transb, const OrdinalType m, const OrdinalType n, const OrdinalType k, const alpha_type alpha, const A_type* A, const OrdinalType lda, const B_type* B, const OrdinalType ldb, const beta_type beta, ScalarType* C, const OrdinalType ldc) const
831
typedef TypeNameTraits<OrdinalType> OTNT;
832
typedef TypeNameTraits<ScalarType> STNT;
763
834
OrdinalType izero = OrdinalTraits<OrdinalType>::zero();
764
ScalarType zero = ScalarTraits<ScalarType>::zero();
765
ScalarType one = ScalarTraits<ScalarType>::one();
835
alpha_type alpha_zero = ScalarTraits<alpha_type>::zero();
836
beta_type beta_zero = ScalarTraits<beta_type>::zero();
837
B_type B_zero = ScalarTraits<B_type>::zero();
838
ScalarType C_zero = ScalarTraits<ScalarType>::zero();
839
beta_type beta_one = ScalarTraits<beta_type>::one();
766
840
OrdinalType i, j, p;
767
841
OrdinalType NRowA = m, NRowB = k;
769
843
bool BadArgument = false;
846
Teuchos::ScalarTraits<ScalarType>::isComplex
847
&& (transa == CONJ_TRANS || transb == CONJ_TRANS),
849
"Teuchos::BLAS<"<<OTNT::name()<<","<<STNT::name()<<">::GEMM()"
850
" does not currently support CONJ_TRANS for complex data types."
771
853
// Change dimensions of matrix if either matrix is transposed
772
854
if( !(ETranspChar[transa]=='N') ) {
909
991
template<typename OrdinalType, typename ScalarType>
910
void BLAS<OrdinalType, ScalarType>::SYMM(ESide side, EUplo uplo, const OrdinalType m, const OrdinalType n, const ScalarType alpha, const ScalarType* A, const OrdinalType lda, const ScalarType* B, const OrdinalType ldb, const ScalarType beta, ScalarType* C, const OrdinalType ldc) const
992
template <typename alpha_type, typename A_type, typename B_type, typename beta_type>
993
void DefaultBLASImpl<OrdinalType, ScalarType>::SYMM(ESide side, EUplo uplo, const OrdinalType m, const OrdinalType n, const alpha_type alpha, const A_type* A, const OrdinalType lda, const B_type* B, const OrdinalType ldb, const beta_type beta, ScalarType* C, const OrdinalType ldc) const
912
995
OrdinalType izero = OrdinalTraits<OrdinalType>::zero();
913
996
OrdinalType ione = OrdinalTraits<OrdinalType>::one();
914
ScalarType zero = ScalarTraits<ScalarType>::zero();
915
ScalarType one = ScalarTraits<ScalarType>::one();
997
alpha_type alpha_zero = ScalarTraits<alpha_type>::zero();
998
beta_type beta_zero = ScalarTraits<beta_type>::zero();
999
ScalarType C_zero = ScalarTraits<ScalarType>::zero();
1000
beta_type beta_one = ScalarTraits<beta_type>::one();
916
1001
OrdinalType i, j, k, NRowA = m;
917
1002
ScalarType temp1, temp2;
918
1003
bool BadArgument = false;
919
1004
bool Upper = (EUploChar[uplo] == 'U');
920
1005
if (ESideChar[side] == 'R') { NRowA = n; }
1007
TEST_FOR_EXCEPTION(Teuchos::ScalarTraits<ScalarType>::isComplex, std::logic_error,
1008
"Teuchos::BLAS::SYMM() does not currently support complex data types.");
922
1010
// Quick return.
923
if ( (m==izero) || (n==izero) || ( (alpha==zero)&&(beta==one) ) ) { return; }
1011
if ( (m==izero) || (n==izero) || ( (alpha==alpha_zero)&&(beta==beta_one) ) ) { return; }
925
1013
std::cout << "BLAS::SYMM Error: M == "<< m << std::endl;
926
1014
BadArgument = true; }
1036
1124
template<typename OrdinalType, typename ScalarType>
1037
void BLAS<OrdinalType, ScalarType>::TRMM(ESide side, EUplo uplo, ETransp transa, EDiag diag, const OrdinalType m, const OrdinalType n, const ScalarType alpha, const ScalarType* A, const OrdinalType lda, ScalarType* B, const OrdinalType ldb) const
1125
template <typename alpha_type, typename A_type>
1126
void DefaultBLASImpl<OrdinalType, ScalarType>::TRMM(ESide side, EUplo uplo, ETransp transa, EDiag diag, const OrdinalType m, const OrdinalType n, const alpha_type alpha, const A_type* A, const OrdinalType lda, ScalarType* B, const OrdinalType ldb) const
1039
1128
OrdinalType izero = OrdinalTraits<OrdinalType>::zero();
1040
1129
OrdinalType ione = OrdinalTraits<OrdinalType>::one();
1041
ScalarType zero = ScalarTraits<ScalarType>::zero();
1130
alpha_type alpha_zero = ScalarTraits<alpha_type>::zero();
1131
A_type A_zero = ScalarTraits<A_type>::zero();
1132
ScalarType B_zero = ScalarTraits<ScalarType>::zero();
1042
1133
ScalarType one = ScalarTraits<ScalarType>::one();
1043
1134
OrdinalType i, j, k, NRowA = m;
1044
1135
ScalarType temp;
1235
1329
template<typename OrdinalType, typename ScalarType>
1236
void BLAS<OrdinalType, ScalarType>::TRSM(ESide side, EUplo uplo, ETransp transa, EDiag diag, const OrdinalType m, const OrdinalType n, const ScalarType alpha, const ScalarType* A, const OrdinalType lda, ScalarType* B, const OrdinalType ldb) const
1330
template <typename alpha_type, typename A_type>
1331
void DefaultBLASImpl<OrdinalType, ScalarType>::TRSM(ESide side, EUplo uplo, ETransp transa, EDiag diag, const OrdinalType m, const OrdinalType n, const alpha_type alpha, const A_type* A, const OrdinalType lda, ScalarType* B, const OrdinalType ldb) const
1238
1333
OrdinalType izero = OrdinalTraits<OrdinalType>::zero();
1239
1334
OrdinalType ione = OrdinalTraits<OrdinalType>::one();
1240
ScalarType zero = ScalarTraits<ScalarType>::zero();
1241
ScalarType one = ScalarTraits<ScalarType>::one();
1335
alpha_type alpha_zero = ScalarTraits<alpha_type>::zero();
1336
A_type A_zero = ScalarTraits<A_type>::zero();
1337
ScalarType B_zero = ScalarTraits<ScalarType>::zero();
1338
alpha_type alpha_one = ScalarTraits<alpha_type>::one();
1339
ScalarType B_one = ScalarTraits<ScalarType>::one();
1242
1340
ScalarType temp;
1243
1341
OrdinalType NRowA = m;
1244
1342
bool BadArgument = false;
1245
1343
bool NoUnit = (EDiagChar[diag]=='N');
1345
TEST_FOR_EXCEPTION(Teuchos::ScalarTraits<ScalarType>::isComplex && transa == CONJ_TRANS, std::logic_error,
1346
"Teuchos::BLAS::TRSM() does not currently support CONJ_TRANS for complex data types.");
1247
1348
if (!(ESideChar[side] == 'L')) { NRowA = n; }
1249
1350
// Quick return.