1
#include "atlas_misc.h"
2
#include "atlas_threads.h"
3
#include "atlas_tlvl3.h"
4
int Mjoin(PATL,StructIsInitTRSM)(void *vp)
6
return(((ATL_TTRSM_t*)vp)->B != NULL);
10
void Mjoin(PATL,DoWorkTRSM)(ATL_LAUNCHSTRUCT_t *lp, void *vp)
13
ATL_TTRSM_t *tp=((ATL_TTRSM_t*)lp->opstruct) + thp->rank;
14
Mjoin(PATL,trsm)(tp->side, tp->uplo, tp->TA, tp->diag, tp->M, tp->N,
15
SVVAL((TYPE*)tp->alpha), tp->A, tp->lda, tp->B, tp->ldb);
18
#ifndef ATL_TTRSM_XOVER
19
#define ATL_TTRSM_XOVER 4 /* want 4 total blocks before adding proc */
21
void Mjoin(PATL,ttrsm)(const enum ATLAS_SIDE side, const enum ATLAS_UPLO uplo,
22
const enum ATLAS_TRANS TA, const enum ATLAS_DIAG diag,
23
ATL_CINT M, ATL_CINT N, const SCALAR alpha,
24
const TYPE *A, ATL_CINT lda, TYPE *B, ATL_CINT ldb)
26
ATL_TTRSM_t trsms[ATL_NTHREADS];
28
ATL_INT n, nblks, minblks;
30
int nr, p, i, j, extrablks;
35
if (SCALAR_IS_ZERO(alpha))
37
Mjoin(PATL,gezero)(M, N, B, ldb);
41
* Distribute RHS over the processors
43
if (!nb) nb = Mjoin(PATL,GetNB)();
44
if (side == AtlasLeft)
48
tblks = ((double)(M*N)) / ( (double)nb * nb );
49
p = (tblks+ATL_TTRSM_XOVER-1)/ATL_TTRSM_XOVER;
50
p = Mmin(p, ATL_NTHREADS);
55
extrablks = nblks - minblks*p;
60
else if (i == extrablks)
70
trsms[i].alpha = SADD alpha;
75
n *= (ldb << Mjoin(PATL,shift));
79
else /* Side == AtlasRight */
84
p = (tblks+ATL_TTRSM_XOVER-1)/ATL_TTRSM_XOVER;
85
p = Mmin(p, ATL_NTHREADS);
90
extrablks = nblks - minblks*p;
95
else if (i == extrablks)
105
trsms[i].alpha = SADD alpha;
106
trsms[i].side = side;
107
trsms[i].uplo = uplo;
109
trsms[i].diag = diag;
110
n <<= Mjoin(PATL,shift);
116
Mjoin(PATL,trsm)(side, uplo, TA, diag, M, N, alpha, A, lda, B, ldb);
119
for (; i < ATL_NTHREADS; i++) /* flag rest of struct as uninitialized */
121
ATL_goparallel(p, Mjoin(PATL,DoWorkTRSM), trsms, NULL);