2
* Copyright (c) 2003, 2006 Matteo Frigo
3
* Copyright (c) 2003, 2006 Massachusetts Institute of Technology
5
* This program is free software; you can redistribute it and/or modify
6
* it under the terms of the GNU General Public License as published by
7
* the Free Software Foundation; either version 2 of the License, or
8
* (at your option) any later version.
10
* This program is distributed in the hope that it will be useful,
11
* but WITHOUT ANY WARRANTY; without even the implied warranty of
12
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
* GNU General Public License for more details.
15
* You should have received a copy of the GNU General Public License
16
* along with this program; if not, write to the Free Software
17
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
21
/* $Id: ctsq.c,v 1.12 2006-01-27 02:10:50 athena Exp $ */
23
/* special ``square transpose'' cooley-tukey solver for in-place problems */
43
static void apply_dif(const plan *ego_, R *ri, R *ii, R *ro, R *io)
45
const P *ego = (const P *) ego_;
48
UNUSED(ro); /* == ri */
49
UNUSED(io); /* == ii */
50
ego->k(ri, ii, ego->td->W, ego->ios, ego->vs, ego->m, ego->dist);
52
cld = (plan_dft *) ego->cld;
53
cld->apply(ego->cld, ri, ii, ri, ii);
56
static void awake(plan *ego_, enum wakefulness wakefulness)
59
X(plan_awake)(ego->cld, wakefulness);
60
X(twiddle_awake)(wakefulness, &ego->td, ego->slv->desc->tw,
61
ego->r * ego->m, ego->r, ego->m);
64
static void destroy(plan *ego_)
67
X(plan_destroy_internal)(ego->cld);
68
X(stride_destroy)(ego->ios);
69
X(stride_destroy)(ego->vs);
72
static void print(const plan *ego_, printer *p)
74
const P *ego = (const P *) ego_;
75
const S *slv = ego->slv;
76
const ct_desc *e = slv->desc;
78
p->print(p, "(dft-ctsq-%s/%D \"%s\"%(%p%))",
79
slv->dec == DECDIT ? "dit" : "dif",
80
ego->r, e->nam, ego->cld);
83
#define divides(a, b) (((b) % (a)) == 0)
84
static int applicable(const S *ego, const problem *p_, planner *plnr)
86
const problem_dft *p = (const problem_dft *) p_;
87
const ct_desc *e = ego->desc;
88
iodim *d = p->sz->dims, *vd = p->vecsz->dims;
91
&& p->ri == p->ro /* inplace only */
96
&& divides(e->radix, d[0].n)
98
/* conditions for transposition */
99
&& vd[0].n == e->radix
100
&& d[0].os == vd[0].is
101
&& d[0].is == e->radix * vd[0].is
102
&& vd[0].os == d[0].n * vd[0].is
104
/* SIMD strides etc. */
105
&& (e->genus->okp(e, p->ri, p->ii, vd[0].os, vd[0].is,
106
d[0].n / e->radix, 0, d[0].n / e->radix,
112
static plan *mkplan(const solver *ego_, const problem *p_, planner *plnr)
114
const S *ego = (const S *) ego_;
115
const problem_dft *p;
119
const ct_desc *e = ego->desc;
121
static const plan_adt padt = {
122
X(dft_solve), awake, print, destroy
125
if (!applicable(ego, p_, plnr))
128
p = (const problem_dft *) p_;
135
A(0); /* not implemented */
139
cld = X(mkplan_d)(plnr,
141
X(mktensor_1d)(d[0].n / e->radix,
143
X(mktensor_2d)(vd[0].n,
147
p->ro, p->io, p->ro, p->io));
150
pln = MKPLAN_DFT(P, &padt, apply_dif);
151
pln->ios = X(mkstride)(e->radix, vd[0].os);
152
pln->vs = X(mkstride)(e->radix, vd[0].is);
163
pln->m = d[0].n / pln->r;
167
X(ops_madd)(pln->m / e->genus->vl, &e->ops, &cld->ops,
168
&pln->super.super.ops);
169
return &(pln->super.super);
172
X(plan_destroy_internal)(cld);
176
solver *X(mksolver_ctsq)(kdftwsq codelet, const ct_desc *desc, int dec)
178
static const solver_adt sadt = { PROBLEM_DFT, mkplan };
179
S *slv = MKSOLVER(S, &sadt);
183
return &(slv->super);