29
29
#include "fftw_cilk.cilkh"
31
31
static cilk void parallel_twiddle_codelet(int a, int b,
32
fftw_twiddle_codelet *codelet,
32
fftw_twiddle_codelet *codelet,
40
(*codelet)(out + dist * a, W + ntwiddle * a, ostride, b - a, dist);
40
(*codelet)(out + dist * a, W + ntwiddle * a, ostride, b - a, dist);
43
spawn parallel_twiddle_codelet(a, ab, codelet, W, out, ostride,
45
spawn parallel_twiddle_codelet(ab, b, codelet, W, out, ostride,
43
spawn parallel_twiddle_codelet(a, ab, codelet, W, out, ostride,
45
spawn parallel_twiddle_codelet(ab, b, codelet, W, out, ostride,
51
51
static cilk void executor_simple_cilk(int n, const fftw_complex *in,
59
(*p->nodeu.notw.codelet)(in, out, istride, ostride);
64
int r = p->nodeu.twiddle.size;
67
fftw_twiddle_codelet *codelet;
71
fftw_executor_simple(n, in, out, p,
76
for (i = 0; i < r; ++i)
77
spawn executor_simple_cilk(m, in + i * istride,
78
out + i * (m * ostride),
79
p->nodeu.twiddle.recurse,
80
istride * r, ostride);
83
codelet = p->nodeu.twiddle.codelet;
84
W = p->nodeu.twiddle.tw->twarray;
86
spawn parallel_twiddle_codelet(0, m,
90
p->nodeu.twiddle.codelet_desc->ntwiddle);
98
int r = p->nodeu.generic.size;
101
fftw_generic_codelet *codelet;
104
for (i = 0; i < r; ++i)
105
spawn executor_simple_cilk(m, in + i * istride,
106
out + i * (m * ostride),
107
p->nodeu.generic.recurse,
108
istride * r, ostride);
112
codelet = p->nodeu.generic.codelet;
113
W = p->nodeu.generic.tw->twarray;
115
(*codelet)(out, W, m, r, n, ostride);
122
int r = p->nodeu.rader.size;
125
fftw_rader_codelet *codelet;
128
for (i = 0; i < r; ++i) {
129
spawn executor_simple_cilk(m, in + i * istride,
130
out + i * (m * ostride),
131
p->nodeu.rader.recurse,
132
istride * r, ostride);
136
codelet = p->nodeu.rader.codelet;
137
W = p->nodeu.rader.tw->twarray;
138
(*codelet)(out, W, m, r, ostride,
139
p->nodeu.rader.rader_data);
145
fftw_die("BUG in executor: illegal plan\n");
59
(*p->nodeu.notw.codelet)(in, out, istride, ostride);
64
int r = p->nodeu.twiddle.size;
67
fftw_twiddle_codelet *codelet;
71
fftw_executor_simple(n, in, out, p,
77
for (i = 0; i < r; ++i)
78
spawn executor_simple_cilk(m, in + i * istride,
79
out + i * (m * ostride),
80
p->nodeu.twiddle.recurse,
81
istride * r, ostride);
84
codelet = p->nodeu.twiddle.codelet;
85
W = p->nodeu.twiddle.tw->twarray;
87
spawn parallel_twiddle_codelet(0, m,
91
p->nodeu.twiddle.codelet_desc->ntwiddle);
99
int r = p->nodeu.generic.size;
102
fftw_generic_codelet *codelet;
105
for (i = 0; i < r; ++i)
106
spawn executor_simple_cilk(m, in + i * istride,
107
out + i * (m * ostride),
108
p->nodeu.generic.recurse,
109
istride * r, ostride);
113
codelet = p->nodeu.generic.codelet;
114
W = p->nodeu.generic.tw->twarray;
116
(*codelet)(out, W, m, r, n, ostride);
123
int r = p->nodeu.rader.size;
126
fftw_rader_codelet *codelet;
129
for (i = 0; i < r; ++i) {
130
spawn executor_simple_cilk(m, in + i * istride,
131
out + i * (m * ostride),
132
p->nodeu.rader.recurse,
133
istride * r, ostride);
137
codelet = p->nodeu.rader.codelet;
138
W = p->nodeu.rader.tw->twarray;
139
(*codelet)(out, W, m, r, ostride,
140
p->nodeu.rader.rader_data);
146
fftw_die("BUG in executor: illegal plan\n");
150
151
static cilk void executor_simple_inplace_cilk(int n, fftw_complex *in,
154
155
switch (p->type) {
156
(*p->nodeu.notw.codelet) (in, in, istride, istride);
163
tmp = (fftw_complex *)
164
Cilk_alloca(n * sizeof(fftw_complex));
166
spawn executor_simple_cilk(n, in, tmp, p, istride, 1);
168
fftw_strided_copy(n, tmp, istride, in);
157
(*p->nodeu.notw.codelet) (in, in, istride, istride);
164
tmp = (fftw_complex *)
165
Cilk_alloca(n * sizeof(fftw_complex));
167
spawn executor_simple_cilk(n, in, tmp, p, istride, 1);
169
fftw_strided_copy(n, tmp, istride, in);
173
174
#define FFTW_CILK_HOWMANY_CODELET_THRESHOLD 16
174
175
#define FFTW_CILK_HOWMANY_SIMPLE_THRESHOLD 16
177
int n; fftw_plan_node *p;
178
181
fftw_notw_codelet *codelet;
179
const fftw_complex *in,*out;
180
int istride, ostride, idist,odist;
181
} execute_howmany_data;
182
const fftw_complex *in;
184
int istride, ostride, idist, odist;
186
execute_howmany_data;
183
188
static cilk void execute_howmany_codelets(
184
189
execute_howmany_data *d,
185
190
int min, int max)
187
192
if (max - min > FFTW_CILK_HOWMANY_CODELET_THRESHOLD) {
188
spawn execute_howmany_codelets(d,
190
spawn execute_howmany_codelets(d,
194
fftw_notw_codelet *codelet;
195
fftw_complex *in,*out;
196
int istride, ostride, idist,odist;
198
codelet = d->codelet;
199
in = d->in; out = d->out;
200
istride = d->istride; ostride = d->ostride;
201
idist = d->idist; odist = d->odist;
203
for (; min <= max; ++min)
204
(*codelet)(in + min * idist,
205
out + min * odist,istride,ostride);
193
spawn execute_howmany_codelets(d,
195
spawn execute_howmany_codelets(d,
198
fftw_notw_codelet *codelet;
199
const fftw_complex *in;
201
int istride, ostride, idist, odist;
203
codelet = d->codelet;
206
istride = d->istride;
207
ostride = d->ostride;
211
for (; min <= max; ++min)
212
(*codelet)(in + min * idist,
213
out + min * odist, istride, ostride);
211
219
int min, int max)
213
221
if (max - min > FFTW_CILK_HOWMANY_SIMPLE_THRESHOLD) {
214
spawn execute_howmany_simple(d,
216
spawn execute_howmany_simple(d,
221
fftw_complex *in, *out;
223
int idist, odist, istride, ostride;
229
istride = d->istride;
230
ostride = d->ostride;
234
for (; min <= max; ++min)
235
fftw_executor_simple(n, in + min*idist,
237
p, istride, ostride);
222
spawn execute_howmany_simple(d,
224
spawn execute_howmany_simple(d,
228
const fftw_complex *in;
231
int idist, odist, istride, ostride;
237
istride = d->istride;
238
ostride = d->ostride;
242
for (; min <= max; ++min)
243
fftw_executor_simple(n, in + min*idist,
246
FFTW_NORMAL_RECURSE);
241
250
static cilk void executor_many_cilk(int n, const fftw_complex *in,
246
int howmany, int idist, int odist)
255
int howmany, int idist, int odist)
248
257
switch (p->type) {
251
execute_howmany_data d;
253
d.codelet = p->nodeu.notw.codelet;
254
d.in = in; d.out = out;
255
d.istride = istride; d.ostride = ostride;
256
d.idist = idist; d.odist = odist;
258
spawn execute_howmany_codelets(&d,0,howmany-1);
265
execute_howmany_data d;
268
d.in = in; d.out = out;
269
d.istride = istride; d.ostride = ostride;
270
d.idist = idist; d.odist = odist;
272
spawn execute_howmany_simple(&d,0,howmany-1);
260
execute_howmany_data d;
262
d.codelet = p->nodeu.notw.codelet;
270
spawn execute_howmany_codelets(&d,0,howmany-1);
277
execute_howmany_data d;
288
spawn execute_howmany_simple(&d,0,howmany-1);
305
321
int min, int max)
307
323
if (max - min > FFTW_CILK_HOWMANY_SIMPLE_THRESHOLD) {
308
spawn execute_howmany_simple_in_place(d,
310
spawn execute_howmany_simple_in_place(d,
315
fftw_complex *in,*tmp;
321
in = d->in + min * (idist = d->idist);
324
spawn execute_howmany_simple_in_place(d,
326
spawn execute_howmany_simple_in_place(d,
337
in = (fftw_complex *)d->in + min * (idist = d->idist);
322
338
istride = d->istride;
323
tmp = d->out + n * Self;
339
tmp = d->out + n * Self;
325
for (; min <= max; ++min) {
326
fftw_executor_simple(n, in, tmp, p, istride, 1);
327
fftw_strided_copy(n, tmp, istride, in);
341
for (; min <= max; ++min) {
342
fftw_executor_simple(n, in, tmp, p, istride, 1,
343
FFTW_NORMAL_RECURSE);
344
fftw_strided_copy(n, tmp, istride, in);
333
350
static cilk void executor_many_inplace_cilk(int n, fftw_complex *in,
336
int howmany, int idist)
353
int howmany, int idist)
338
355
switch (p->type) {
341
execute_howmany_data d;
343
d.codelet = p->nodeu.notw.codelet;
348
spawn execute_howmany_codelets_in_place(&d,0,howmany-1);
355
execute_howmany_data d;
362
d.out = (fftw_complex *)
363
Cilk_alloca(Cilk_active_size *
364
n * sizeof(fftw_complex));
366
spawn execute_howmany_simple_in_place(&d,0,howmany-1);
358
execute_howmany_data d;
360
d.codelet = p->nodeu.notw.codelet;
365
spawn execute_howmany_codelets_in_place(&d,0,howmany-1);
372
execute_howmany_data d;
379
d.out = (fftw_complex *)
380
Cilk_alloca(Cilk_active_size *
381
n * sizeof(fftw_complex));
383
spawn execute_howmany_simple_in_place(&d,0,howmany-1);
371
388
/* user interface */
372
389
cilk void fftw_cilk(fftw_plan plan, int howmany, fftw_complex *in, int istride,
373
int idist, fftw_complex *out, int ostride, int odist)
390
int idist, fftw_complex *out, int ostride, int odist)
377
394
if (plan->flags & FFTW_IN_PLACE) {
379
spawn executor_simple_inplace_cilk(n, in,
380
plan->root, istride);
382
spawn executor_many_inplace_cilk(n, in,
383
plan->root, istride, howmany,
396
spawn executor_simple_inplace_cilk(n, in,
397
plan->root, istride);
399
spawn executor_many_inplace_cilk(n, in,
400
plan->root, istride, howmany,
388
spawn executor_simple_cilk(n, in, out,
389
plan->root, istride, ostride);
391
spawn executor_many_cilk(n, in, out,
392
plan->root, istride, ostride,
393
howmany, idist, odist);
405
spawn executor_simple_cilk(n, in, out,
406
plan->root, istride, ostride);
408
spawn executor_many_cilk(n, in, out,
409
plan->root, istride, ostride,
410
howmany, idist, odist);