1
# This Makefile.am specifies a set of codelets, efficient transforms
2
# of small sizes, that are used as building blocks (kernels) by FFTW
3
# to build up large transforms, as well as the options for generating
6
# You can customize FFTW for special needs, e.g. to handle certain
7
# sizes more efficiently, by adding new codelets to the lists of those
8
# included by default. If you change the list of codelets, any new
9
# ones you added will be automatically generated when you run the
10
# bootstrap script (see "Generating your own code" in the FFTW
13
###########################################################################
14
AM_CPPFLAGS = -I$(top_srcdir)/kernel -I$(top_srcdir)/dft \
15
-I$(top_srcdir)/dft/simd -I$(top_srcdir)/simd
16
AM_CFLAGS = $(SIMD_CFLAGS)
17
noinst_LTLIBRARIES = libdft_codelets_simd.la
19
###########################################################################
20
# n1fv_<n> is a hard-coded FFTW_FORWARD FFT of size <n>, using SIMD
21
N1F = n1fv_2.c n1fv_3.c n1fv_4.c n1fv_5.c n1fv_6.c n1fv_7.c n1fv_8.c \
22
n1fv_9.c n1fv_10.c n1fv_11.c n1fv_12.c n1fv_13.c n1fv_14.c n1fv_15.c \
23
n1fv_16.c n1fv_32.c n1fv_64.c
25
# as above, with restricted input vector stride
26
N2F = n2fv_2.c n2fv_4.c n2fv_6.c n2fv_8.c n2fv_10.c n2fv_12.c \
27
n2fv_14.c n2fv_16.c n2fv_32.c n2fv_64.c
29
# as above, but FFTW_BACKWARD
30
N1B = n1bv_2.c n1bv_3.c n1bv_4.c n1bv_5.c n1bv_6.c n1bv_7.c n1bv_8.c \
31
n1bv_9.c n1bv_10.c n1bv_11.c n1bv_12.c n1bv_13.c n1bv_14.c n1bv_15.c \
32
n1bv_16.c n1bv_32.c n1bv_64.c
34
N2B = n2bv_2.c n2bv_4.c n2bv_6.c n2bv_8.c n2bv_10.c n2bv_12.c \
35
n2bv_14.c n2bv_16.c n2bv_32.c n2bv_64.c
37
# split-complex codelets
38
N2S = n2sv_4.c n2sv_8.c n2sv_16.c n2sv_32.c n2sv_64.c
40
###########################################################################
41
# t1fv_<r> is a "twiddle" FFT of size <r>, implementing a radix-r DIT step
42
# for an FFTW_FORWARD transform, using SIMD
43
T1F = t1fv_2.c t1fv_3.c t1fv_4.c t1fv_5.c t1fv_6.c t1fv_7.c t1fv_8.c \
44
t1fv_9.c t1fv_10.c t1fv_12.c t1fv_15.c t1fv_16.c t1fv_32.c t1fv_64.c
46
# same as t1fv_*, but with different twiddle storage scheme
47
T2F = t2fv_2.c t2fv_4.c t2fv_8.c t2fv_16.c t2fv_32.c t2fv_64.c
48
T3F = t3fv_4.c t3fv_8.c t3fv_16.c t3fv_32.c
50
# as above, but FFTW_BACKWARD
51
T1B = t1bv_2.c t1bv_3.c t1bv_4.c t1bv_5.c t1bv_6.c t1bv_7.c t1bv_8.c \
52
t1bv_9.c t1bv_10.c t1bv_12.c t1bv_15.c t1bv_16.c t1bv_32.c t1bv_64.c
54
# same as t1bv_*, but with different twiddle storage scheme
55
T2B = t2bv_2.c t2bv_4.c t2bv_8.c t2bv_16.c t2bv_32.c t2bv_64.c
56
T3B = t3bv_4.c t3bv_8.c t3bv_16.c t3bv_32.c
58
# split-complex codelets
59
T1S = t1sv_2.c t1sv_4.c t1sv_8.c t1sv_16.c t1sv_32.c #t1sv_64.c
60
T2S = t2sv_4.c t2sv_8.c t2sv_16.c t2sv_32.c #t2sv_64.c
62
###########################################################################
63
# q1fv_<r> is <r> twiddle FFTW_FORWARD FFTs of size <r> (DIF step),
64
# where the output is transposed, using SIMD. This is used for
65
# in-place transposes in sizes that are divisible by <r>^2. These
66
# codelets have size ~ <r>^2, so you should probably not use <r>
67
# bigger than 8 or so.
68
Q1F = q1fv_2.c q1fv_4.c q1fv_8.c
70
# as above, but FFTW_BACKWARD
71
Q1B = q1bv_2.c q1bv_4.c q1bv_8.c
73
###########################################################################
74
SIMD_CODELETS = $(N1F) $(N1B) $(N2F) $(N2B) $(N2S) $(T1F) $(T2F) \
75
$(T3F) $(T1B) $(T2B) $(T3B) $(T1S) $(T2S) $(Q1F) $(Q1B)
78
ALL_CODELETS = $(SIMD_CODELETS)
83
EXTRA_DIST = $(SIMD_CODELETS)
84
BUILT_SOURCES= $(ALL_CODELETS) $(CODLIST)
86
libdft_codelets_simd_la_SOURCES = $(BUILT_SOURCES)
88
SOLVTAB_NAME = X(solvtab_dft_simd)
90
# special rules for regenerating codelets.
91
include $(top_srcdir)/support/Makefile.codelets
94
GFLAGS = -simd $(FLAGS_COMMON) -pipeline-latency 8
95
FLAGS_T2S=-twiddle-log3 -precompute-twiddles
96
FLAGS_T3=-twiddle-log3 -precompute-twiddles -no-generate-bytw
98
n1fv_%.c: $(CODELET_DEPS) $(GEN_NOTW_C)
99
($(PRELUDE_COMMANDS_DFT); $(TWOVERS) $(GEN_NOTW_C) $(GFLAGS) -n $* -name n1fv_$* -include "n1f.h") | $(ADD_DATE) | $(INDENT) >$@
101
n2fv_%.c: $(CODELET_DEPS) $(GEN_NOTW_C)
102
($(PRELUDE_COMMANDS_DFT); $(TWOVERS) $(GEN_NOTW_C) $(GFLAGS) -n $* -name n2fv_$* -with-ostride 2 -include "n2f.h" -store-multiple 2) | $(ADD_DATE) | $(INDENT) >$@
104
n1bv_%.c: $(CODELET_DEPS) $(GEN_NOTW_C)
105
($(PRELUDE_COMMANDS_DFT); $(TWOVERS) $(GEN_NOTW_C) $(GFLAGS) -sign 1 -n $* -name n1bv_$* -include "n1b.h") | $(ADD_DATE) | $(INDENT) >$@
107
n2bv_%.c: $(CODELET_DEPS) $(GEN_NOTW_C)
108
($(PRELUDE_COMMANDS_DFT); $(TWOVERS) $(GEN_NOTW_C) $(GFLAGS) -sign 1 -n $* -name n2bv_$* -with-ostride 2 -include "n2b.h" -store-multiple 2) | $(ADD_DATE) | $(INDENT) >$@
110
n2sv_%.c: $(CODELET_DEPS) $(GEN_NOTW)
111
($(PRELUDE_COMMANDS_DFT); $(TWOVERS) $(GEN_NOTW) $(GFLAGS) -n $* -name n2sv_$* -with-ostride 1 -include "n2s.h" -store-multiple 4) | $(ADD_DATE) | $(INDENT) >$@
113
t1fv_%.c: $(CODELET_DEPS) $(GEN_TWIDDLE_C)
114
($(PRELUDE_COMMANDS_DFT); $(TWOVERS) $(GEN_TWIDDLE_C) $(GFLAGS) -n $* -name t1fv_$* -include "t1f.h") | $(ADD_DATE) | $(INDENT) >$@
116
t2fv_%.c: $(CODELET_DEPS) $(GEN_TWIDDLE_C)
117
($(PRELUDE_COMMANDS_DFT); $(TWOVERS) $(GEN_TWIDDLE_C) $(GFLAGS) -n $* -name t2fv_$* -include "t2f.h") | $(ADD_DATE) | $(INDENT) >$@
119
t3fv_%.c: $(CODELET_DEPS) $(GEN_TWIDDLE_C)
120
($(PRELUDE_COMMANDS_DFT); $(TWOVERS) $(GEN_TWIDDLE_C) $(GFLAGS) $(FLAGS_T3) -n $* -name t3fv_$* -include "t3f.h") | $(ADD_DATE) | $(INDENT) >$@
122
t1bv_%.c: $(CODELET_DEPS) $(GEN_TWIDDLE_C)
123
($(PRELUDE_COMMANDS_DFT); $(TWOVERS) $(GEN_TWIDDLE_C) $(GFLAGS) -n $* -name t1bv_$* -include "t1b.h" -sign 1) | $(ADD_DATE) | $(INDENT) >$@
125
t2bv_%.c: $(CODELET_DEPS) $(GEN_TWIDDLE_C)
126
($(PRELUDE_COMMANDS_DFT); $(TWOVERS) $(GEN_TWIDDLE_C) $(GFLAGS) -n $* -name t2bv_$* -include "t2b.h" -sign 1) | $(ADD_DATE) | $(INDENT) >$@
128
t3bv_%.c: $(CODELET_DEPS) $(GEN_TWIDDLE_C)
129
($(PRELUDE_COMMANDS_DFT); $(TWOVERS) $(GEN_TWIDDLE_C) $(GFLAGS) $(FLAGS_T3) -n $* -name t3bv_$* -include "t3b.h" -sign 1) | $(ADD_DATE) | $(INDENT) >$@
131
t1sv_%.c: $(CODELET_DEPS) $(GEN_TWIDDLE)
132
($(PRELUDE_COMMANDS_DFT); $(TWOVERS) $(GEN_TWIDDLE) $(GFLAGS) -n $* -name t1sv_$* -include "ts.h") | $(ADD_DATE) | $(INDENT) >$@
134
t2sv_%.c: $(CODELET_DEPS) $(GEN_TWIDDLE)
135
($(PRELUDE_COMMANDS_DFT); $(TWOVERS) $(GEN_TWIDDLE) $(GFLAGS) $(FLAGS_T2S) -n $* -name t2sv_$* -include "ts.h") | $(ADD_DATE) | $(INDENT) >$@
137
q1fv_%.c: $(CODELET_DEPS) $(GEN_TWIDSQ_C)
138
($(PRELUDE_COMMANDS_DFT); $(TWOVERS) $(GEN_TWIDSQ_C) $(GFLAGS) -n $* -dif -name q1fv_$* -include "q1f.h") | $(ADD_DATE) | $(INDENT) >$@
140
q1bv_%.c: $(CODELET_DEPS) $(GEN_TWIDSQ_C)
141
($(PRELUDE_COMMANDS_DFT); $(TWOVERS) $(GEN_TWIDSQ_C) $(GFLAGS) -n $* -dif -name q1bv_$* -include "q1b.h" -sign 1) | $(ADD_DATE) | $(INDENT) >$@
144
endif # MAINTAINER_MODE