2
# In this file, any line beginning with a '#' is ignored, but the # must be in
3
# column 0. All multiple whitespace is reduced to one space (i.e. used only
4
# to distinguish where words begin/end). Lines may be extended by putting '\'
5
# as the *last* character of line.
7
# The file has the following format:
8
# ROUT='routine name' AUTH='author names' COMP='compiler name' CFLAGS='flags'
9
# ID=<id> MU=<mu> NU=<nu> minY=<#> minX=<#> alignX=<#> alignY=<#> alignA=<#>,
10
# SSE=[0,1,2,3] X87=[0,1] PREF[a,x,y]=[DIST,INSTDIST, INST] LDAMUL=<#>
11
# ALLALIGNXY=[0,1] GEMMBASED=[0,1] CONJDEF=[0,1] FNU=[0,1]
12
# ASM=[asmlist], eg., asmlist is "GAS_x8664,GAS_x8632" or "GAS_SPARC"
13
# ASM defaults to no assembly dialect required.
14
# If MU/NU is negative, then the routine can only handle multiples of MU/NU.
16
# Some less-obvious keywords:
17
# LDAMUL : Kernel will only work if lda is a multiple of # (in bytes)
18
# PFTUNEx : Kernel uses pref_x(mem) macro for each op=x (A,y,x). prefetch
19
# inst can be varied wt this macro, as can fetch distance.
20
# If set to INSTDIST, tune both distance and instruction type;
21
# If set to INST, tune instruction type only
22
# If set to DIST, tune distance only
23
# FNU : if set, kernel can only handle N where N%NU == 0
24
ID=1 TA='N' NU=16 MU=1 AUTH='R. Clint Whaley', ROUT='ATL_gemvN_axpy.c' \
26
ID=2 TA='N' NU=4 MU=8 AUTH='R. Clint Whaley', ROUT='ATL_sgemvN_8x4_sse.c' \
27
AXPYBASED=1 SSE=1 ALIGNX2A=1 alignY=16 LDAMUL=16
28
ID=3 TA='N' MU=24 NU=8 AUTH='IBM', ROUT='ATL_gemvN_v6x8_vsx.c' \
29
AXPYBASED=1 COMP='gcc' CFLAGS='-O3 -mvsx'