2
# In this file, any line beginning with a '#' is ignored, but the # must be in
3
# column 0. All multiple whitespace is reduced to one space (i.e. used only
4
# to distinguish where words begin/end). Lines may be extended by putting '\'
5
# as the *last* character of line.
7
# The file has the following format:
8
# ROUT='routine name' AUTH='author names' COMP='compiler name' CFLAGS='flags'
9
# ID=<id> NU=<nu> MU=<mu> minN=<#> minM=<#> alignX=<#> alignY=<#> alignA=<#>,
10
# SSE=[0,1,2,3] X87=[0,1] PREF[a,x,y]=[DIST,INSTDIST, INST] LDAMUL=<#>
11
# ALLALIGNXY=[0,1] GEMMBASED=[0,1] CONJDEF=[0,1] FNU=[0,1]
12
# ASM=[asmlist], eg., asmlist is "GAS_x8664,GAS_x8632" or "GAS_SPARC"
13
# ASM defaults to no assembly dialect required.
14
# If NU/MU is negative, then the routine can only handle multiples of NU/MU.
16
# Assuming M is the length of X, and N is the length of Y, all routines
17
# are assumed to handle any runtime value of N >= minN, M >= minM.
18
# They must respect the compile-time macros BETA0, BETA1, & BETAX.
19
# Some less-obvious keywords:
20
# LDAMUL : Kernel will only work if lda is a multiple of # (in bytes)
21
# PFTUNEx : Kernel uses pref_x(mem) macro for each op=x (A,y,x). prefetch
22
# inst can be varied wt this macro, as can fetch distance.
23
# If set to INSTDIST, tune both distance and instruction type;
24
# If set to INST, tune instruction type only
25
# If set to DIST, tune distance only
26
# FNU : if set, kernel can only handle N where N%NU == 0
27
ID=1 MU=16 NU=1 AUTH='R. Clint Whaley' ROUT='ATL_gerk_axpy.c'
28
ID=2 MU=4 NU=4 AUTH='R. Clint Whaley' ROUT='ATL_gerk_4x4_1.c'
29
ID=3 MU=1 NU=4 AUTH='R. Clint Whaley' ROUT='ATL_gerk_1x4_0.c'
30
ID=4 MU=8 NU=4 AUTH='R. Clint Whaley' ROUT='ATL_gerk_8x4_0.c'
31
ID=6 MU=8 NU=4 SSE=3 LDAMUL=16 ALIGNX2A=1 \
32
AUTH='R. Clint Whaley' ROUT='ATL_dgerk_4x8_sse.c'
33
ID=7 ROUT='ATL_gerk_vx4_vsx.c' AUTH='IBM' MU=2 NU=4