17
@ROUT l3cmp.tex repsys.tex repf77.tex repat2.tex
18
@BEGINPROC typecharts scl chname chname2 label
20
\begin{minipage}[t]{3.0in}
21
@define lab @(d) Double precision complex@
22
@define lab @(c) Single precision complex@
23
@define lab @(b) Double precision real@
24
@define lab @(a) Single precision real@
26
\includegraphics[scale=@(scl),trim=0 20 0 0]{charts/@(pre)@(chname)}
34
\begin{minipage}[t]{3.0in}
35
@define lab @(h) Double precision complex@
36
@define lab @(g) Single precision complex@
37
@define lab @(f) Double precision real@
38
@define lab @(e) Single precision real@
40
\includegraphics[scale=@(scl),trim=0 20 0 0]{charts/@(pre)@(chname2)}
54
\documentclass[11pt]{article}
58
%\usepackage{stfloats}
61
\usepackage[caption=false,font=footnotesize]{subfig}
63
\newcommand{\Wskip}[1]{ }
64
\newcommand{\Wceil}[1]{\lceil #1 \rceil}
65
\newcommand{\Wfloor}[1]{\lfloor #1 \rfloor}
67
\newenvironment{routdef}[1]
71
\setlength{\parsep}{0in}
72
\setlength{\itemsep}{.01in}
73
\setlength{\partopsep}{0in}
74
\setlength{\topsep}{0.1in}
75
\setlength{\labelsep}{0in}
76
\setlength{\labelwidth}{#1in}
77
\setlength{\leftmargin}{#1in}
80
\newcommand{\rditem}[2]{\item[#1\hfill(~]#2 )}
99
\title{ATLAS timing report}
103
R. Clint Whaley \thanks { {\tt rwhaley@users.sourceforge.net},
104
{\tt www.cs.utsa.edu/$\sim$whaley}}
109
\fancypagestyle{plain}{}
112
This is an auto-generated timing report skeleton, made to be filled out
113
with commentary after an ATLAS install.
117
\rhead{\footnotesize CONTENTS}
127
\rhead{{\footnotesize\bf L3Time Report}~~~~~~\thepage}
129
\section{Symmetric Routines (SYMM, SYRK, SYR2K, HERK, HER2K)}
131
@define caption @ATLAS vs @(sys) SYMM, Side=Left, Uplo=Lower, serial (left) parallel (right).@
132
@CALLPROC typecharts .45 symm_LLN_mlr_@(sf).eps symm_LLN_mlr_@(sf)_pt.eps fig-symm
134
@define caption @Serial SYMM variants as \% of GEMM, ATLAS (left) @(sys) (right)@
135
@CALLPROC typecharts .45 pcmm_symm_atl.eps pcmm_symm_@(lib).eps fig-symmV
137
@define caption @Parallel SYMM variants as \% of GEMM, ATLAS (left) @(sys) (right)@
138
@CALLPROC typecharts .45 pcmm_symm_atl_pt.eps pcmm_symm_@(lib)_pt.eps fig-symmV_pt
141
@define caption @Serial SYRK variants as \% of GEMM, ATLAS (left) @(sys) (right)@
142
@CALLPROC typecharts .45 pcmm_syrk_atl.eps pcmm_syrk_@(lib).eps fig-syrkV
143
@define caption @Parallel SYRK variants as \% of GEMM, ATLAS (left) @(sys) (right)@
144
@CALLPROC typecharts .45 pcmm_syrk_atl_pt.eps pcmm_syrk_@(lib)_pt.eps fig-syrkV_pt
149
@CALLPROC typecharts .45 pcmm_syr2k_atl.eps pcmm_syr2k_@(lib).eps fig-syr2kV
150
@define caption @Parallel SYR2K variants as \% of GEMM, ATLAS (left) @(sys) (right)@
151
@CALLPROC typecharts .45 pcmm_syr2k_atl_pt.eps pcmm_syr2k_@(lib)_pt.eps fig-syr2kV_pt
154
\section{Triangular Routines (TRMM, TRSM)}
156
@define caption @Serial TRMM\_N variants as \% of GEMM, ATLAS (left) @(sys) (right)@
157
@CALLPROC typecharts pcmm_trmm_N_atl.eps pcmm_trmm_N_@(lib).eps fig-trmmNV
158
@define caption @Parallel TRMM\_N variants as \% of GEMM, ATLAS (left) @(sys) (right)@
159
@CALLPROC typecharts pcmm_trmm_N_atl_pt.eps pcmm_trmm_N_@(lib)_pt.eps fig-trmmNV_pt
161
@define caption @Serial TRMM\_T variants as \% of GEMM, ATLAS (left) @(sys) (right)@
162
@CALLPROC typecharts pcmm_trmm_T_atl.eps pcmm_trmm_T_@(lib).eps fig-trmmNV
163
@define caption @Parallel TRMM\_T variants as \% of GEMM, ATLAS (left) @(sys) (right)@
164
@CALLPROC typecharts pcmm_trmm_T_atl_pt.eps pcmm_trmm_T_@(lib)_pt.eps fig-trmmNV_pt
166
@define caption @Serial TRSM\_N variants as \% of GEMM, ATLAS (left) @(sys) (right)@
167
@CALLPROC typecharts pcmm_trsm_N_atl.eps pcmm_trsm_N_@(lib).eps fig-trsmNV
168
@define caption @Parallel TRSM\_N variants as \% of GEMM, ATLAS (left) @(sys) (right)@
169
@CALLPROC typecharts pcmm_trsm_N_atl_pt.eps pcmm_trsm_N_@(lib)_pt.eps fig-trsmNV_pt
171
@define caption @Serial TRSM\_T variants as \% of GEMM, ATLAS (left) @(sys) (right)@
172
@CALLPROC typecharts pcmm_trsm_T_atl.eps pcmm_trsm_T_@(lib).eps fig-trsmNV
173
@define caption @Parallel TRSM\_T variants as \% of GEMM, ATLAS (left) @(sys) (right)@
174
@CALLPROC typecharts pcmm_trsm_T_atl_pt.eps pcmm_trsm_T_@(lib)_pt.eps fig-trsmNV_pt
176
@ROUT repsys.tex repf77.tex repat2.tex
177
\rhead{{\footnotesize\bf Time Report}~~~~~~\thepage}
178
\section{BLAS performance}
180
%\subsection{Level 1 BLAS}
181
%Not yet autotimed, not sure worth it anyway.
183
%\subsection{Level 2 BLAS}
187
%Figure~\ref{fig-mmsq} (Figure~\ref{fig-mmsq_tiny}) contrasts ATLAS and
188
%@up@(sys) square GEMM performance for midrange (tiny) problems,
189
%while Figure~\ref{fig-mmrk} (Figure~\ref{fig-mmrk_tiny}) does the same
194
@define caption @ATLAS vs @(sys) square GEMM, serial (left) parallel (right).@
195
@CALLPROC typecharts .5 mmsq_NN_mlr_@(sf).eps mmsq_NN_mlr_@(sf)_pt.eps fig-mmsq
196
@define caption @ATLAS vs @(sys) square GEMM, tiny range serial (left) parallel (right).@
197
@CALLPROC typecharts .5 mmsq_NN_tin_@(sf).eps mmsq_NN_tin_@(sf)_pt.eps fig-mmsq_tiny
198
@define caption @ATLAS vs @(sys) rank-K GEMM, serial (left) parallel (right).@
199
@CALLPROC typecharts .5 mmrk_NN_mlr_@(sf).eps mmrk_NN_mlr_@(sf)_pt.eps fig-mmrk
200
@define caption @ATLAS vs @(sys) rank-K GEMM, tiny range serial (left) parallel (right).@
201
@CALLPROC typecharts .5 mmrk_NN_tin_@(sf).eps mmrk_NN_tin_@(sf)_pt.eps fig-mmrk_tiny
204
\subsection{Other Level 3 BLAS}
205
%\ref{fig-sy,fig-tr,fig-sy_pt,fig-tr_pt}
206
\chead{\bf Symmetric Summary}
208
@define caption @Serial Symmetric Summary, Si=Left, Up=Lower, ATLAS (left) @(sys) (right).@
209
@CALLPROC typecharts .45 pcmm_l3sy_LLN_atl.eps pcmm_l3sy_LLN_@(lib).eps fig-sy
210
@define caption @Parallel Symmetric Summary, Si=Left, Up=Lower, ATLAS (left) @(sys) (right).@
211
@CALLPROC typecharts .45 pcmm_l3sy_LLN_atl_pt.eps pcmm_l3sy_LLN_@(lib)_pt.eps fig-sy_pt
213
\chead{\bf Triangular Summary}
215
@define caption @Serial Triangular Summary, Si=Left, Up=Lower, TA=NoT ATLAS (left) @(sys) (right).@
216
@CALLPROC typecharts .45 pcmm_l3tr_LLN_atl.eps pcmm_l3tr_LLN_@(lib).eps fig-tr
217
@define caption @Parallel Triangular Summary, Si=Left, Up=Lower, TA=NoT ATLAS (left) @(sys) (right).@
218
@CALLPROC typecharts .45 pcmm_l3tr_LLN_atl_pt.eps pcmm_l3tr_LLN_@(lib)_pt.eps fig-tr_pt
222
%\ref{fig-symm,fig-symmV,fig-symmV_pt}
225
@define caption @ATLAS vs @(sys) SYMM, Side=Left, Uplo=Lower, serial (left) parallel (right).@
226
@CALLPROC typecharts .5 symm_LLN_mlr_@(sf).eps symm_LLN_mlr_@(sf)_pt.eps fig-symm
232
%\ref{fig-syrk,fig-syrkV,fig-syrkV_pt}
233
@define caption @ATLAS vs @(sys) SYRK, Uplo=Lower, TA=NoTrans serial (left) parallel (right).@
234
@CALLPROC typecharts .5 syrk_LLN_mlr_@(sf).eps syrk_LLN_mlr_@(sf)_pt.eps fig-syrk
237
\subsubsection{SYR2K}
240
%\ref{fig-syr2k,fig-syr2kV,fig-syr2kV_pt}
241
@define caption @ATLAS vs @(sys) SYR2K, Uplo=Lower, TA=NoTrans serial (left) parallel (right).@
242
@CALLPROC typecharts .5 syr2k_LLN_mlr_@(sf).eps syr2k_LLN_mlr_@(sf)_pt.eps fig-syr2k
243
@define caption @Serial SYR2K variants as \% of GEMM, ATLAS (left) @(sys) (right)@
249
%\ref{fig-trmm,fig-trmmNV,fig-trmmNV_pt,fig-trmmTV,fig-trmmTV_pt}
250
@define caption @ATLAS vs @(sys) TRMM, Side=Left, Uplo=Lower, TA=NoTrans serial (left) parallel (right).@
251
@CALLPROC typecharts .5 trmm_LLN_mlr_@(sf).eps trmm_LLN_mlr_@(sf)_pt.eps fig-trmm
252
@define caption @ATLAS vs @(sys) TRMM, tiny problems, Side=Left, Uplo=Lower, TA=NoTrans serial (left) parallel (right).@
253
@CALLPROC typecharts .5 trmm_LLN_tin_@(sf).eps trmm_LLN_tin_@(sf)_pt.eps fig-trmm-tiny
257
%\ref{fig-trsm,fig-trsmNV,fig-trsmNV_pt,fig-trsmTV,fig-trsmTV_pt}
260
@define caption @ATLAS vs @(sys) TRSM, Side=Left, Uplo=Lower, TA=NoTrans serial (left) parallel (right).@
261
@CALLPROC typecharts .5 trsm_LLN_mlr_@(sf).eps trsm_LLN_mlr_@(sf)_pt.eps fig-trsm
262
@define caption @ATLAS vs @(sys) TRSM, tiny problems, Side=Left, Uplo=Lower, TA=NoTrans serial (left) parallel (right).@
263
@CALLPROC typecharts .5 trsm_LLN_tin_@(sf).eps trsm_LLN_tin_@(sf)_pt.eps fig-trsm-tiny
265
%\subsubsection{HERK}
268
%\subsubsection{HER2K}
271
\section{LAPACK performance}
273
%Figure~\ref{fig-mmla} shows the MFLOPS achieved for serial square GEMM,
274
%and the LU, QR and Cholesky factorizations, while Figure~\ref{fig-mmla_pt}
275
%charts the same data for parallel operations.
276
%Figures~\ref{fig-pcmm} and~\ref{fig-pcmm_pt} show this same data, where
277
\chead{\bf LAPACK Summary}
279
@define caption @Serial Factorization Summation for ATLAS (left) and @(sys) (right) in MFLOP.@
280
@CALLPROC typecharts 0.40 factor_cmb_atl.eps factor_cmb_@(lib).eps fig-mmla
281
@define caption @Parallel Factorization Summation for ATLAS (left) and @(sys) (right) in MFLOP.@
282
@CALLPROC typecharts 0.40 factor_cmb_atl_pt.eps factor_cmb_@(lib)_pt.eps fig-mmla_pt
284
@define caption @Serial Factorization as a percentage of square GEMM speed for ATLAS (left) and @(sys) (right).@
285
@CALLPROC typecharts 0.4 pcmm_factor_cmb_atl.eps pcmm_factor_cmb_@(lib).eps fig-pcmmla
286
@define caption @Parallel Factorization as a percentage of square tGEMM speed for ATLAS (left) and @(sys) (right).@
287
@CALLPROC typecharts 0.4 pcmm_factor_cmb_atl_pt.eps pcmm_factor_cmb_@(lib)_pt.eps fig-pcmmla_pt
290
\subsection{LU in detail}
291
%Figure~\ref{fig-lalu}~(\ref{fig-lalu_tiny}) contrasts ATLAS and
292
%@up@(sys) performance for midrange (tiny) problems.
294
\chead{\bf LU Factorization}
296
@define caption @ATLAS vs @(sys) LU, serial (left) parallel (right).@
297
@CALLPROC typecharts 0.5 getrf_LLN_mlr_@(sf).eps getrf_LLN_mlr_@(sf)_pt.eps fig-lalu
298
@define caption @ATLAS vs @(sys) LU, tiny range serial (left) parallel (right).@
299
@CALLPROC typecharts 0.5 getrf_LLN_tin_@(sf).eps getrf_LLN_tin_@(sf)_pt.eps fig-lalu_tiny
302
\subsection{QR in detail}
303
%Figure~\ref{fig-laqrs}~(\ref{fig-laqrs_pt}) contrasts ATLAS and
304
%@up@(sys) performance for all serial (parallel) QR variants.
305
\chead{\bf QR Factorization}
307
@define caption @Serial QR variant overview for ATLAS (left) and @(sys) (right)@
308
@CALLPROC typecharts .4 qrvar_cmb_atl.eps qrvar_cmb_@(lib).eps fig-laqrs
309
@define caption @Parallel QR variant overview for ATLAS (left) and @(sys) (right)@
310
@CALLPROC typecharts .4 qrvar_cmb_atl_pt.eps qrvar_cmb_@(lib)_pt.eps fig-laqrs_pt
312
%Figures~\ref{fig-laqr}~(\ref{fig-laqr_tiny}) contrasts ATLAS and
313
%@up@(sys) performance for midrange (tiny) QR problems, respectively,
314
%while figures~\ref{fig-lalq} and~\ref{fig-lalq_tiny} do the same for LQ.
316
@define caption @ATLAS vs @(sys) QR, serial (left) parallel (right).@
317
@CALLPROC typecharts 0.50 geqrf_RUN_mlr_@(sf).eps geqrf_RUN_mlr_@(sf)_pt.eps fig-laqr
318
@define caption @ATLAS vs @(sys) QR, tiny range serial (left) parallel (right).@
319
@CALLPROC typecharts 0.50 geqrf_RUN_tin_@(sf).eps geqrf_RUN_tin_@(sf)_pt.eps fig-laqr_tiny
321
@define caption @ATLAS vs @(sys) LQ, serial (left) parallel (right).@
322
@CALLPROC typecharts 0.50 geqrf_LLN_mlr_@(sf).eps geqrf_LLN_mlr_@(sf)_pt.eps fig-lalq
323
@define caption @ATLAS vs @(sys) LQ, tiny range serial (left) parallel (right).@
324
@CALLPROC typecharts 0.50 geqrf_LLN_tin_@(sf).eps geqrf_LLN_tin_@(sf)_pt.eps fig-lalq_tiny
327
\subsection{Cholesky in detail}
328
%Figure~\ref{fig-laut}~(\ref{fig-laut_tiny}) contrasts ATLAS and
329
%@up@(sys) performance for midrange (tiny) problems for
330
%the Upper case, while figure~\ref{fig-lalt}~(\ref{fig-lalt_tiny})
331
%does the same for the Lower.
332
\chead{\bf Cholesky Factorization}
334
@define caption @ATLAS vs @(sys) $U^HU$, serial (left) parallel (right).@
335
@CALLPROC typecharts .5 potrf_LUN_mlr_@(sf).eps potrf_LUN_mlr_@(sf)_pt.eps fig-laut
336
@define caption @ATLAS vs @(sys) $U^HU$, tiny range serial (left) parallel (right).@
337
@CALLPROC typecharts .5 potrf_LUN_tin_@(sf).eps potrf_LUN_tin_@(sf)_pt.eps fig-laut_tiny
339
@define caption @ATLAS vs @(sys) $LL^H$, serial (left) parallel (right).@
340
@CALLPROC typecharts .5 potrf_LLN_mlr_@(sf).eps potrf_LLN_mlr_@(sf)_pt.eps fig-lalt
341
@define caption @ATLAS vs @(sys) $LL^H$, tiny range serial (left) parallel (right).@
342
@CALLPROC typecharts .5 potrf_LLN_tin_@(sf).eps potrf_LLN_tin_@(sf)_pt.eps fig-lalt_tiny
344
@ROUT pclinechart linechart
349
@define tmp @@(flds)@
351
@define flds @@(tmp),@(j)@
381
@ROUT pclinechart ` yaxis.stubs: incremental 10`
383
yautorange datafield=@(flds) lowfix=0
400
@ROUT pclinechart linechart
403
pentagon leftriangle righttriangle circle downtriangle triangle diamond square
406
claret powderblue orange purple black red blue green
408
@whiledef tmp @(labs)
417
linedetails: color=@(clr)
418
pointsymbol: shape=@(shp) style=fill fillcolor=@(clr) radius=0.08
427
location: min+@(xoff) min+0.25
430
@ROUT Make.res_lapack
433
# List of macros that we need
435
pre=d # precision [s,d,c,z]
436
rt=getrf # routine [getrf,geqrf,potrf]
439
pt= # if _pt, then do threaded, if blank do serial
440
lib=atl # library [atl,at2,sys,f77]
441
AT2dir= # path to 2nd ATLAS install to compare against
443
tvecs/$(pre)mmsq_cmb_$(lib)$(pt).tvec :
444
$(MAKE) -f Make.l3blas tvecs/$(pre)mmsq_cmb_$(lib)$(pt).tvec
445
@multidef lpr a s f 2
446
@whiledef lib atl sys f77 at2
447
$(BINdir)/x$(pre)slatime_@(lpre)l_@(lpr)b :
448
cd $(BINdir) ; make x$(pre)slatime_@(lpre)l_@(lpr)b
449
$(BINdir)/x$(pre)tlatime_@(lpre)l_@(lpr)b :
450
cd $(BINdir) ; make x$(pre)tlatime_@(lpre)l_@(lpr)b
451
x$(pre)latime_@(lib) : $(BINdir)/x$(pre)slatime_@(lpre)l_@(lpr)b
452
ln -s $(BINdir)/x$(pre)slatime_@(lpr)l_@(lpr)b $@
453
x$(pre)latime_@(lib)_pt : $(BINdir)/x$(pre)tlatime_@(lpr)l_@(lpr)b
454
ln -s $(BINdir)/x$(pre)tlatime_@(lpr)l_@(lpr)b $@
458
@multidef szargs -N@^10@^100@^10 -N@^200@^2000@^200 -N@^2400@^4000@^400
460
@whiledef sz tin med lrg
461
raw/$(pre)$(rt)_$(sd)$(up)_@(sz)_$(lib)$(pt).out : x$(pre)latime_@(lib)@(pt) \
462
./x$(pre)latime_$(lib)@(pt) -F 40 -R 1 $(rt) -S 1 @(sd) \
463
-U 1 $(up) @(szargs) -# @(rp) > $@
464
tvecs/$(pre)$(rt)_$(sd)$(up)_@(sz)_$(lib)$(pt).tvec : xatl2tvec \
465
xreducetvec raw/$(pre)$(rt)_$(sd)$(up)_@(sz)_$(lib)$(pt).out
466
./xatl2tvec -# @(rp) -H 2 N MFLOP \
467
-i raw/$(pre)$(rt)_$(sd)$(up)_@(sz)_$(lib)$(pt).out |
468
./xreducetvec -o $@ -R 1 N -C 1 MFLOP
473
# Merge medium and large runs into one combined range
475
tvecs/$(pre)$(rt)_$(sd)$(up)_mlr_$(lib)$(pt).tvec : xmergetvecs \
476
tvecs/$(pre)$(rt)_$(sd)$(up)_med_$(lib)$(pt).tvec \
477
tvecs/$(pre)$(rt)_$(sd)$(up)_lrg_$(lib)$(pt).tvec \
478
./xmergetvecs -r N -C 2 N MFLOP_avg -o $@ \
479
-i1 tvecs/$(pre)$(rt)_$(sd)$(up)_med_$(lib)$(pt).tvec \
480
-i2 tvecs/$(pre)$(rt)_$(sd)$(up)_lrg_$(lib)$(pt).tvec
482
# Merge tiny, medium, and large runs into one combined range
484
tvecs/$(pre)$(rt)_$(sd)$(up)_cmb_$(lib)$(pt).tvec : xmergetvecs \
485
tvecs/$(pre)$(rt)_$(sd)$(up)_tin_$(lib)$(pt).tvec \
486
tvecs/$(pre)$(rt)_$(sd)$(up)_med_$(lib)$(pt).tvec \
487
tvecs/$(pre)$(rt)_$(sd)$(up)_lrg_$(lib)$(pt).tvec \
488
./xmergetvecs -r N -C 2 N MFLOP_avg \
489
-i1 tvecs/$(pre)$(rt)_$(sd)$(up)_tin_$(lib)$(pt).tvec \
490
-i2 tvecs/$(pre)$(rt)_$(sd)$(up)_med_$(lib)$(pt).tvec | \
491
./xmergetvecs -r N -C 2 N MFLOP_avg -o $@ \
492
-i2 tvecs/$(pre)$(rt)_$(sd)$(up)$(ta)_lrg_$(lib)$(pt).tvec
493
tvecs/$(pre)mmsq_cmb_$(lib)$(pt).tvec : xmergetvecs \
494
tvecs/$(pre)mmsq_tin_$(lib)$(pt).tvec \
495
tvecs/$(pre)mmsq_med_$(lib)$(pt).tvec \
496
tvecs/$(pre)mmsq_lrg_$(lib)$(pt).tvec \
497
./xmergetvecs -r N -C 2 N MFLOP_avg \
498
-i1 tvecs/$(pre)mmsq_tin_$(lib)$(pt).tvec \
499
-i2 tvecs/$(pre)mmsq_med_$(lib)$(pt).tvec | \
500
./xmergetvecs -r N -C 2 N MFLOP_avg -o $@ \
501
-i2 tvecs/$(pre)mmsq_lrg_$(lib)$(pt).tvec
503
# Create master file with all factorizations, in this order:
504
# MM,LU,LL,UU,QR,QL,RQ,LQ
506
tvecs/$(pre)factor_cmb_$(lib)$(pt).tvec : xcattvecs force_build \
507
tvecs/$(pre)mmsq_cmb_$(lib)$(pt).tvec
508
$(MAKE) tvecs/$(pre)getrf_LU_cmb_$(lib)$(pt).tvec sd=L up=U \
509
rt=getrf pre=$(pre) pt="$(pt)" lib=$(lib)
510
$(MAKE) tvecs/$(pre)potrf_LU_cmb_$(lib)$(pt).tvec sd=L up=U \
511
rt=potrf pre=$(pre) pt="$(pt)" lib=$(lib)
512
$(MAKE) tvecs/$(pre)potrf_LL_cmb_$(lib)$(pt).tvec sd=L up=L \
513
rt=potrf pre=$(pre) pt="$(pt)" lib=$(lib)
514
$(MAKE) tvecs/$(pre)geqrf_RU_cmb_$(lib)$(pt).tvec sd=R up=U \
515
rt=geqrf pre=$(pre) pt="$(pt)" lib=$(lib)
516
$(MAKE) tvecs/$(pre)geqrf_RL_cmb_$(lib)$(pt).tvec sd=R up=L \
517
rt=geqrf pre=$(pre) pt="$(pt)" lib=$(lib)
518
$(MAKE) tvecs/$(pre)geqrf_LU_cmb_$(lib)$(pt).tvec sd=L up=U \
519
rt=geqrf pre=$(pre) pt="$(pt)" lib=$(lib)
520
$(MAKE) tvecs/$(pre)geqrf_LL_cmb_$(lib)$(pt).tvec sd=L up=L \
521
rt=geqrf pre=$(pre) pt="$(pt)" lib=$(lib)
522
cat tvecs/$(pre)mmsq_cmb_$(lib)$(pt).tvec \
523
tvecs/$(pre)getrf_LU_cmb_$(lib)$(pt).tvec \
524
tvecs/$(pre)potrf_LU_cmb_$(lib)$(pt).tvec \
525
tvecs/$(pre)potrf_LL_cmb_$(lib)$(pt).tvec \
526
tvecs/$(pre)geqrf_RU_cmb_$(lib)$(pt).tvec \
527
tvecs/$(pre)geqrf_RL_cmb_$(lib)$(pt).tvec \
528
tvecs/$(pre)geqrf_LU_cmb_$(lib)$(pt).tvec \
529
tvecs/$(pre)geqrf_LL_cmb_$(lib)$(pt).tvec | \
530
./xcattvecs -# 8 -o $@ -C 2 N MFLOP_avg
532
# Create Triangular variant file
534
tvecs/$(pre)trvar_cmb_$(lib)$(pt).tvec : xcattvecs force_build \
535
tvecs/$(pre)mmsq_cmb_$(lib)$(pt).tvec
536
$(MAKE) tvecs/$(pre)getrf_LU_cmb_$(lib)$(pt).tvec sd=L up=U \
537
rt=getrf pre=$(pre) pt="$(pt)" lib=$(lib)
538
$(MAKE) tvecs/$(pre)potrf_LU_cmb_$(lib)$(pt).tvec sd=L up=U \
539
rt=potrf pre=$(pre) pt="$(pt)" lib=$(lib)
540
$(MAKE) tvecs/$(pre)potrf_LL_cmb_$(lib)$(pt).tvec sd=L up=L \
541
rt=potrf pre=$(pre) pt="$(pt)" lib=$(lib)
542
cat tvecs/$(pre)mmsq_cmb_$(lib)$(pt).tvec \
543
tvecs/$(pre)getrf_LU_cmb_$(lib)$(pt).tvec \
544
tvecs/$(pre)potrf_LU_cmb_$(lib)$(pt).tvec \
545
tvecs/$(pre)potrf_LL_cmb_$(lib)$(pt).tvec | \
546
./xcattvecs -# 4 -o $@ -C 2 N MFLOP_avg
549
# Create QR variant combined file
551
tvecs/$(pre)qrvar_cmb_$(lib)$(pt).tvec : xcattvecs force_build \
552
tvecs/$(pre)mmsq_cmb_$(lib)$(pt).tvec
553
$(MAKE) tvecs/$(pre)geqrf_RU_cmb_$(lib)$(pt).tvec sd=R up=U \
554
rt=geqrf pre=$(pre) pt="$(pt)" lib=$(lib)
555
$(MAKE) tvecs/$(pre)geqrf_RL_cmb_$(lib)$(pt).tvec sd=R up=L \
556
rt=geqrf pre=$(pre) pt="$(pt)" lib=$(lib)
557
$(MAKE) tvecs/$(pre)geqrf_LU_cmb_$(lib)$(pt).tvec sd=L up=U \
558
rt=geqrf pre=$(pre) pt="$(pt)" lib=$(lib)
559
$(MAKE) tvecs/$(pre)geqrf_LL_cmb_$(lib)$(pt).tvec sd=L up=L \
560
rt=geqrf pre=$(pre) pt="$(pt)" lib=$(lib)
561
cat tvecs/$(pre)mmsq_cmb_$(lib)$(pt).tvec \
562
tvecs/$(pre)geqrf_RU_cmb_$(lib)$(pt).tvec \
563
tvecs/$(pre)geqrf_RL_cmb_$(lib)$(pt).tvec \
564
tvecs/$(pre)geqrf_LU_cmb_$(lib)$(pt).tvec \
565
tvecs/$(pre)geqrf_LL_cmb_$(lib)$(pt).tvec | \
566
./xcattvecs -# 5 -o $@ -C 2 N MFLOP_avg
568
# Cat combined Uplo&Trans variants together with sqmm
570
tvecs/$(pre)$(rt)_UpTr_cmb_$(lib)$(pt).tvec : xcattvecs force_build \
571
tvecs/$(pre)mmsq_cmb_$(lib)$(pt).tvec
572
$(MAKE) tvecs/$(pre)$(rt)_LUN_cmb_$(lib)$(pt).tvec up=U ta=N \
573
rt=$(rt) pre=$(pre) pt="$(pt)" lib=$(lib)
574
$(MAKE) tvecs/$(pre)$(rt)_LUT_cmb_$(lib)$(pt).tvec up=U ta=T \
575
rt=$(rt) pre=$(pre) pt="$(pt)" lib=$(lib)
576
$(MAKE) tvecs/$(pre)$(rt)_LLN_cmb_$(lib)$(pt).tvec up=L ta=N \
577
rt=$(rt) pre=$(pre) pt="$(pt)" lib=$(lib)
578
$(MAKE) tvecs/$(pre)$(rt)_LLT_cmb_$(lib)$(pt).tvec up=L ta=T \
579
rt=$(rt) pre=$(pre) pt="$(pt)" lib=$(lib)
580
cat tvecs/$(pre)mmsq_cmb_$(lib)$(pt).tvec \
581
tvecs/$(pre)$(rt)_LUN_cmb_$(lib)$(pt).tvec \
582
tvecs/$(pre)$(rt)_LUT_cmb_$(lib)$(pt).tvec \
583
tvecs/$(pre)$(rt)_LLN_cmb_$(lib)$(pt).tvec \
584
tvecs/$(pre)$(rt)_LLT_cmb_$(lib)$(pt).tvec | \
585
./xcattvecs -# 5 -o $@ -C 2 N MFLOP_avg
587
# Cat combined Side,Uplo,Trans, variants together with sqmm
589
tvecs/$(pre)$(rt)_SdUpTr_cmb_$(lib)$(pt).tvec : xcattvecs force_build \
590
tvecs/$(pre)mmsq_cmb_$(lib)$(pt).tvec
591
$(MAKE) tvecs/$(pre)$(rt)_LUN_cmb_$(lib)$(pt).tvec sd=L up=U ta=N \
592
rt=$(rt) pre=$(pre) pt="$(pt)" lib=$(lib)
593
$(MAKE) tvecs/$(pre)$(rt)_LUT_cmb_$(lib)$(pt).tvec sd=L up=U ta=T \
594
rt=$(rt) pre=$(pre) pt="$(pt)" lib=$(lib)
595
$(MAKE) tvecs/$(pre)$(rt)_LLN_cmb_$(lib)$(pt).tvec sd=L up=L ta=N \
596
rt=$(rt) pre=$(pre) pt="$(pt)" lib=$(lib)
597
$(MAKE) tvecs/$(pre)$(rt)_LLT_cmb_$(lib)$(pt).tvec sd=L up=L ta=T \
598
rt=$(rt) pre=$(pre) pt="$(pt)" lib=$(lib)
599
$(MAKE) tvecs/$(pre)$(rt)_RUN_cmb_$(lib)$(pt).tvec sd=R up=U ta=N \
600
rt=$(rt) pre=$(pre) pt="$(pt)" lib=$(lib)
601
$(MAKE) tvecs/$(pre)$(rt)_RUT_cmb_$(lib)$(pt).tvec sd=R up=U ta=T \
602
rt=$(rt) pre=$(pre) pt="$(pt)" lib=$(lib)
603
$(MAKE) tvecs/$(pre)$(rt)_RLN_cmb_$(lib)$(pt).tvec sd=R up=L ta=N \
604
rt=$(rt) pre=$(pre) pt="$(pt)" lib=$(lib)
605
$(MAKE) tvecs/$(pre)$(rt)_RLT_cmb_$(lib)$(pt).tvec sd=R up=L ta=T \
606
rt=$(rt) pre=$(pre) pt="$(pt)" lib=$(lib)
607
cat tvecs/$(pre)mmsq_cmb_$(lib)$(pt).tvec \
608
tvecs/$(pre)$(rt)_LUN_cmb_$(lib)$(pt).tvec \
609
tvecs/$(pre)$(rt)_LUT_cmb_$(lib)$(pt).tvec \
610
tvecs/$(pre)$(rt)_LLN_cmb_$(lib)$(pt).tvec \
611
tvecs/$(pre)$(rt)_LLT_cmb_$(lib)$(pt).tvec \
612
tvecs/$(pre)$(rt)_RUN_cmb_$(lib)$(pt).tvec \
613
tvecs/$(pre)$(rt)_RUT_cmb_$(lib)$(pt).tvec \
614
tvecs/$(pre)$(rt)_RLN_cmb_$(lib)$(pt).tvec \
615
tvecs/$(pre)$(rt)_RLT_cmb_$(lib)$(pt).tvec | \
616
./xcattvecs -# 9 -o $@ -C 2 N MFLOP_avg
618
# Change to percentage of GEMM speed
620
tvecs/$(pre)pcmm_$(rt)_SdUp_cmb_$(lib)$(pt).tvec : \
621
tvecs/$(pre)$(rt)_SdUp_cmb_$(lib)$(pt).tvec
622
$(MAKE) pcmm4 inf=tvecs/$(pre)$(rt)_SdUp_cmb_$(lib)$(pt).tvec outf=$@
623
tvecs/$(pre)pcmm_$(rt)_UpTr_cmb_$(lib)$(pt).tvec : \
624
tvecs/$(pre)$(rt)_UpTr_cmb_$(lib)$(pt).tvec
625
$(MAKE) pcmm4 inf=tvecs/$(pre)$(rt)_UpTr_cmb_$(lib)$(pt).tvec outf=$@
626
tvecs/$(pre)pcmm_$(rt)_SdUpTr_cmb_$(lib)$(pt).tvec : \
627
tvecs/$(pre)$(rt)_SdUpTr_cmb_$(lib)$(pt).tvec
628
$(MAKE) pcmm8 inf=tvecs/$(pre)$(rt)_UpTr_cmb_$(lib)$(pt).tvec outf=$@