2
* Copyright (c) 2003, 2006 Matteo Frigo
3
* Copyright (c) 2003, 2006 Massachusetts Institute of Technology
5
* This program is free software; you can redistribute it and/or modify
6
* it under the terms of the GNU General Public License as published by
7
* the Free Software Foundation; either version 2 of the License, or
8
* (at your option) any later version.
10
* This program is distributed in the hope that it will be useful,
11
* but WITHOUT ANY WARRANTY; without even the implied warranty of
12
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
* GNU General Public License for more details.
15
* You should have received a copy of the GNU General Public License
16
* along with this program; if not, write to the Free Software
17
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
21
/* This file was automatically generated --- DO NOT EDIT */
22
/* Generated on Sat Jul 1 13:54:27 EDT 2006 */
24
#include "codelet-dft.h"
28
/* Generated by: ../../../genfft/gen_notw -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -n 12 -name n1_12 -include n.h */
31
* This function contains 96 FP additions, 24 FP multiplications,
32
* (or, 72 additions, 0 multiplications, 24 fused multiply/add),
33
* 63 stack variables, and 48 memory accesses
37
* $Id: algsimp.ml,v 1.9 2006-02-12 23:34:12 athena Exp $
38
* $Id: fft.ml,v 1.4 2006-01-05 03:04:27 stevenj Exp $
39
* $Id: gen_notw.ml,v 1.30 2006-02-12 23:34:12 athena Exp $
44
static void n1_12(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs)
46
DK(KP866025403, +0.866025403784438646763723170752936183471402627);
47
DK(KP500000000, +0.500000000000000000000000000000000000000000000);
49
for (i = v; i > 0; i = i - 1, ri = ri + ivs, ii = ii + ivs, ro = ro + ovs, io = io + ovs, MAKE_VOLATILE_STRIDE(is), MAKE_VOLATILE_STRIDE(os)) {
50
E TT, TW, TF, T1q, TY, TQ, TX, T1n;
52
E TA, TS, TR, T5, Ts, Tz, TD, TV, TU, Ta, Tx, TC, T1d, Th, TJ;
53
E TG, Tg, T1u, T1c, T1f, TM, TN, Tk, T1i;
57
E T1, To, Tp, Tq, T4, T2, T3, T7, T8, Tr;
69
TR = FNMS(KP500000000, T4, T1);
73
Tz = FNMS(KP500000000, Tr, To);
82
E Tc, T1a, TH, TI, Tf, Td, Te, Tw, Ti, Tj, T1b;
86
TU = FNMS(KP500000000, T9, T6);
90
TC = FNMS(KP500000000, Tw, Tt);
100
TG = FNMS(KP500000000, Tf, Tc);
104
T1c = FNMS(KP500000000, T1b, T1a);
114
E T1t, TO, TL, T1h, T1w, Tb, T1g, Tl;
119
TL = FNMS(KP500000000, Tk, Th);
122
E T1x, Ty, T1v, Tn, Tm, T1y;
126
T1h = FNMS(KP500000000, T1g, T1f);
132
ro[WS(os, 6)] = Tb - Tm;
133
io[WS(os, 3)] = Tn + Ty;
135
io[WS(os, 6)] = T1x - T1y;
136
io[WS(os, 9)] = Ty - Tn;
139
E TB, TE, T1o, T11, T1p, TK, TP, T15, T1k, T18, T14, T16, T1l, T1m;
141
E T1e, T1j, TZ, T10, T12, T13;
142
TB = FNMS(KP866025403, TA, Tz);
143
TZ = FMA(KP866025403, TA, Tz);
144
T10 = FMA(KP866025403, TD, TC);
145
TE = FNMS(KP866025403, TD, TC);
146
T1o = FNMS(KP866025403, T1d, T1c);
147
T1e = FMA(KP866025403, T1d, T1c);
148
ro[WS(os, 9)] = T1t + T1w;
149
ro[WS(os, 3)] = T1t - T1w;
152
T1j = FMA(KP866025403, T1i, T1h);
153
T1p = FNMS(KP866025403, T1i, T1h);
154
TK = FNMS(KP866025403, TJ, TG);
155
T12 = FMA(KP866025403, TJ, TG);
156
T13 = FMA(KP866025403, TO, TL);
157
TP = FNMS(KP866025403, TO, TL);
158
TT = FNMS(KP866025403, TS, TR);
159
T15 = FMA(KP866025403, TS, TR);
164
T16 = FMA(KP866025403, TV, TU);
165
TW = FNMS(KP866025403, TV, TU);
167
io[WS(os, 10)] = T1l - T1m;
168
io[WS(os, 4)] = T1l + T1m;
169
io[WS(os, 7)] = T11 + T14;
170
io[WS(os, 1)] = T11 - T14;
172
E T17, T19, T1r, T1s;
175
ro[WS(os, 7)] = T19 - T1k;
176
ro[WS(os, 1)] = T19 + T1k;
177
ro[WS(os, 4)] = T17 + T18;
178
ro[WS(os, 10)] = T17 - T18;
185
io[WS(os, 2)] = T1r - T1s;
186
io[WS(os, 8)] = T1r + T1s;
191
io[WS(os, 11)] = TF + TQ;
192
io[WS(os, 5)] = TF - TQ;
195
ro[WS(os, 11)] = T1n - T1q;
196
ro[WS(os, 5)] = T1n + T1q;
197
ro[WS(os, 8)] = TX + TY;
198
ro[WS(os, 2)] = TX - TY;
202
static const kdft_desc desc = { 12, "n1_12", {72, 0, 24, 0}, &GENUS, 0, 0, 0, 0 };
203
void X(codelet_n1_12) (planner *p) {
204
X(kdft_register) (p, n1_12, &desc);
209
/* Generated by: ../../../genfft/gen_notw -compact -variables 4 -pipeline-latency 4 -n 12 -name n1_12 -include n.h */
212
* This function contains 96 FP additions, 16 FP multiplications,
213
* (or, 88 additions, 8 multiplications, 8 fused multiply/add),
214
* 43 stack variables, and 48 memory accesses
218
* $Id: algsimp.ml,v 1.9 2006-02-12 23:34:12 athena Exp $
219
* $Id: fft.ml,v 1.4 2006-01-05 03:04:27 stevenj Exp $
220
* $Id: gen_notw.ml,v 1.30 2006-02-12 23:34:12 athena Exp $
225
static void n1_12(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs)
227
DK(KP866025403, +0.866025403784438646763723170752936183471402627);
228
DK(KP500000000, +0.500000000000000000000000000000000000000000000);
230
for (i = v; i > 0; i = i - 1, ri = ri + ivs, ii = ii + ivs, ro = ro + ovs, io = io + ovs, MAKE_VOLATILE_STRIDE(is), MAKE_VOLATILE_STRIDE(os)) {
231
E T5, TR, TA, Ts, TS, Tz, Ta, TU, TD, Tx, TV, TC, Tg, T1a, TG;
232
E TJ, T1u, T1d, Tl, T1f, TL, TO, T1v, T1i;
240
TR = FNMS(KP500000000, T4, T1);
241
TA = KP866025403 * (T3 - T2);
250
TS = KP866025403 * (Tp - Tq);
251
Tz = FNMS(KP500000000, Tr, To);
260
TU = FNMS(KP500000000, T9, T6);
261
TD = KP866025403 * (T8 - T7);
270
TV = KP866025403 * (Tu - Tv);
271
TC = FNMS(KP500000000, Tw, Tt);
280
T1a = KP866025403 * (Te - Td);
281
TG = FNMS(KP500000000, Tf, Tc);
289
TJ = KP866025403 * (TH - TI);
291
T1d = FNMS(KP500000000, T1c, T1b);
300
T1f = KP866025403 * (Tj - Ti);
301
TL = FNMS(KP500000000, Tk, Th);
309
TO = KP866025403 * (TM - TN);
311
T1i = FNMS(KP500000000, T1h, T1g);
317
ro[WS(os, 6)] = Tb - Tm;
323
io[WS(os, 6)] = T1x - T1y;
327
io[WS(os, 3)] = Tn + Ty;
328
io[WS(os, 9)] = Ty - Tn;
332
ro[WS(os, 3)] = T1t - T1w;
333
ro[WS(os, 9)] = T1t + T1w;
335
E T11, T1l, T1k, T1m, T14, T18, T17, T19;
348
E T12, T13, T15, T16;
358
io[WS(os, 1)] = T11 - T14;
359
ro[WS(os, 1)] = T19 + T1k;
360
io[WS(os, 7)] = T11 + T14;
361
ro[WS(os, 7)] = T19 - T1k;
362
ro[WS(os, 10)] = T17 - T18;
363
io[WS(os, 10)] = T1l - T1m;
364
ro[WS(os, 4)] = T17 + T18;
365
io[WS(os, 4)] = T1l + T1m;
368
E TF, T1r, T1q, T1s, TQ, TY, TX, T1n;
391
io[WS(os, 5)] = TF - TQ;
392
ro[WS(os, 5)] = T1n + T1q;
393
io[WS(os, 11)] = TF + TQ;
394
ro[WS(os, 11)] = T1n - T1q;
395
ro[WS(os, 2)] = TX - TY;
396
io[WS(os, 2)] = T1r - T1s;
397
ro[WS(os, 8)] = TX + TY;
398
io[WS(os, 8)] = T1r + T1s;
404
static const kdft_desc desc = { 12, "n1_12", {88, 8, 8, 0}, &GENUS, 0, 0, 0, 0 };
405
void X(codelet_n1_12) (planner *p) {
406
X(kdft_register) (p, n1_12, &desc);
409
#endif /* HAVE_FMA */