2
* Copyright (c) 2003, 2006 Matteo Frigo
3
* Copyright (c) 2003, 2006 Massachusetts Institute of Technology
5
* This program is free software; you can redistribute it and/or modify
6
* it under the terms of the GNU General Public License as published by
7
* the Free Software Foundation; either version 2 of the License, or
8
* (at your option) any later version.
10
* This program is distributed in the hope that it will be useful,
11
* but WITHOUT ANY WARRANTY; without even the implied warranty of
12
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
* GNU General Public License for more details.
15
* You should have received a copy of the GNU General Public License
16
* along with this program; if not, write to the Free Software
17
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
21
/* This file was automatically generated --- DO NOT EDIT */
22
/* Generated on Sat Jul 1 14:25:26 EDT 2006 */
24
#include "codelet-dft.h"
28
/* Generated by: ../../../genfft/gen_notw_c -fma -reorder-insns -schedule-for-pipeline -simd -compact -variables 4 -pipeline-latency 8 -n 14 -name n1fv_14 -include n1f.h */
31
* This function contains 74 FP additions, 48 FP multiplications,
32
* (or, 32 additions, 6 multiplications, 42 fused multiply/add),
33
* 63 stack variables, and 28 memory accesses
37
* $Id: algsimp.ml,v 1.9 2006-02-12 23:34:12 athena Exp $
38
* $Id: fft.ml,v 1.4 2006-01-05 03:04:27 stevenj Exp $
39
* $Id: gen_notw_c.ml,v 1.17 2006-02-12 23:34:12 athena Exp $
44
static void n1fv_14(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs)
46
DVK(KP900968867, +0.900968867902419126236102319507445051165919162);
47
DVK(KP801937735, +0.801937735804838252472204639014890102331838324);
48
DVK(KP974927912, +0.974927912181823607018131682993931217232785801);
49
DVK(KP692021471, +0.692021471630095869627814897002069140197260599);
50
DVK(KP554958132, +0.554958132087371191422194871006410481067288862);
51
DVK(KP356895867, +0.356895867892209443894399510021300583399127187);
57
for (i = v; i > 0; i = i - VL, xi = xi + (VL * ivs), xo = xo + (VL * ovs), MAKE_VOLATILE_STRIDE(is), MAKE_VOLATILE_STRIDE(os)) {
58
V TH, T3, TP, Tn, Ta, Ts, TW, TK, TO, Tk, TM, Tg, TL, Td, T1;
60
T1 = LD(&(xi[0]), ivs, &(xi[0]));
61
T2 = LD(&(xi[WS(is, 7)]), ivs, &(xi[WS(is, 1)]));
63
V Ti, TI, T6, TJ, T9, Tj, Te, Tf, Tb, Tc;
65
V T4, T5, T7, T8, Tl, Tm;
66
T4 = LD(&(xi[WS(is, 2)]), ivs, &(xi[0]));
67
T5 = LD(&(xi[WS(is, 9)]), ivs, &(xi[WS(is, 1)]));
68
T7 = LD(&(xi[WS(is, 12)]), ivs, &(xi[0]));
69
T8 = LD(&(xi[WS(is, 5)]), ivs, &(xi[WS(is, 1)]));
70
Tl = LD(&(xi[WS(is, 8)]), ivs, &(xi[0]));
71
Tm = LD(&(xi[WS(is, 1)]), ivs, &(xi[WS(is, 1)]));
72
Ti = LD(&(xi[WS(is, 6)]), ivs, &(xi[0]));
81
Tj = LD(&(xi[WS(is, 13)]), ivs, &(xi[WS(is, 1)]));
82
Te = LD(&(xi[WS(is, 10)]), ivs, &(xi[0]));
83
Tf = LD(&(xi[WS(is, 3)]), ivs, &(xi[WS(is, 1)]));
84
Tb = LD(&(xi[WS(is, 4)]), ivs, &(xi[0]));
85
Tc = LD(&(xi[WS(is, 11)]), ivs, &(xi[WS(is, 1)]));
99
V T18, TB, T13, TY, TG, Tw, T11, Tr, T16, TT, Tz, TE, TU, TQ;
112
TZ = VFNMS(LDK(KP356895867), TK, TQ);
113
T17 = VFNMS(LDK(KP554958132), TU, TW);
116
Tp = VFNMS(LDK(KP356895867), Ta, To);
117
TA = VFMA(LDK(KP554958132), Tt, Ts);
118
ST(&(xo[0]), VADD(TH, VADD(TK, VADD(TN, TQ))), ovs, &(xo[0]));
119
T14 = VFNMS(LDK(KP356895867), TN, TK);
120
TR = VFNMS(LDK(KP356895867), TQ, TN);
123
T12 = VFMA(LDK(KP554958132), TV, TU);
124
TX = VFMA(LDK(KP554958132), TW, TV);
125
ST(&(xo[WS(os, 7)]), VADD(T3, VADD(Ta, VADD(Th, To))), ovs, &(xo[WS(os, 1)]));
126
Tx = VFNMS(LDK(KP356895867), Th, Ta);
127
TC = VFNMS(LDK(KP356895867), To, Th);
130
TF = VFNMS(LDK(KP554958132), Ts, Tu);
131
Tv = VFMA(LDK(KP554958132), Tu, Tt);
132
T10 = VFNMS(LDK(KP692021471), TZ, TN);
133
T18 = VMUL(LDK(KP974927912), VFNMS(LDK(KP801937735), T17, TV));
134
Tq = VFNMS(LDK(KP692021471), Tp, Th);
135
TB = VMUL(LDK(KP974927912), VFMA(LDK(KP801937735), TA, Tu));
138
T15 = VFNMS(LDK(KP692021471), T14, TQ);
139
TS = VFNMS(LDK(KP692021471), TR, TK);
140
T13 = VMUL(LDK(KP974927912), VFMA(LDK(KP801937735), T12, TW));
141
TY = VMUL(LDK(KP974927912), VFNMS(LDK(KP801937735), TX, TU));
142
Ty = VFNMS(LDK(KP692021471), Tx, To);
143
TD = VFNMS(LDK(KP692021471), TC, Ta);
144
TG = VMUL(LDK(KP974927912), VFNMS(LDK(KP801937735), TF, Tt));
145
Tw = VMUL(LDK(KP974927912), VFNMS(LDK(KP801937735), Tv, Ts));
146
T11 = VFNMS(LDK(KP900968867), T10, TH);
147
Tr = VFNMS(LDK(KP900968867), Tq, T3);
148
T16 = VFNMS(LDK(KP900968867), T15, TH);
149
TT = VFNMS(LDK(KP900968867), TS, TH);
150
Tz = VFNMS(LDK(KP900968867), Ty, T3);
151
TE = VFNMS(LDK(KP900968867), TD, T3);
158
ST(&(xo[WS(os, 12)]), VFNMSI(T13, T11), ovs, &(xo[0]));
159
ST(&(xo[WS(os, 2)]), VFMAI(T13, T11), ovs, &(xo[0]));
160
ST(&(xo[WS(os, 9)]), VFMAI(Tw, Tr), ovs, &(xo[WS(os, 1)]));
161
ST(&(xo[WS(os, 5)]), VFNMSI(Tw, Tr), ovs, &(xo[WS(os, 1)]));
162
ST(&(xo[WS(os, 8)]), VFNMSI(T18, T16), ovs, &(xo[0]));
163
ST(&(xo[WS(os, 6)]), VFMAI(T18, T16), ovs, &(xo[0]));
164
ST(&(xo[WS(os, 10)]), VFNMSI(TY, TT), ovs, &(xo[0]));
165
ST(&(xo[WS(os, 4)]), VFMAI(TY, TT), ovs, &(xo[0]));
166
ST(&(xo[WS(os, 1)]), VFMAI(TB, Tz), ovs, &(xo[WS(os, 1)]));
167
ST(&(xo[WS(os, 13)]), VFNMSI(TB, Tz), ovs, &(xo[WS(os, 1)]));
168
ST(&(xo[WS(os, 3)]), VFMAI(TG, TE), ovs, &(xo[WS(os, 1)]));
169
ST(&(xo[WS(os, 11)]), VFNMSI(TG, TE), ovs, &(xo[WS(os, 1)]));
174
static const kdft_desc desc = { 14, "n1fv_14", {32, 6, 42, 0}, &GENUS, 0, 0, 0, 0 };
175
void X(codelet_n1fv_14) (planner *p) {
176
X(kdft_register) (p, n1fv_14, &desc);
181
/* Generated by: ../../../genfft/gen_notw_c -simd -compact -variables 4 -pipeline-latency 8 -n 14 -name n1fv_14 -include n1f.h */
184
* This function contains 74 FP additions, 36 FP multiplications,
185
* (or, 50 additions, 12 multiplications, 24 fused multiply/add),
186
* 33 stack variables, and 28 memory accesses
190
* $Id: algsimp.ml,v 1.9 2006-02-12 23:34:12 athena Exp $
191
* $Id: fft.ml,v 1.4 2006-01-05 03:04:27 stevenj Exp $
192
* $Id: gen_notw_c.ml,v 1.17 2006-02-12 23:34:12 athena Exp $
197
static void n1fv_14(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs)
199
DVK(KP222520933, +0.222520933956314404288902564496794759466355569);
200
DVK(KP900968867, +0.900968867902419126236102319507445051165919162);
201
DVK(KP623489801, +0.623489801858733530525004884004239810632274731);
202
DVK(KP433883739, +0.433883739117558120475768332848358754609990728);
203
DVK(KP781831482, +0.781831482468029808708444526674057750232334519);
204
DVK(KP974927912, +0.974927912181823607018131682993931217232785801);
210
for (i = v; i > 0; i = i - VL, xi = xi + (VL * ivs), xo = xo + (VL * ovs), MAKE_VOLATILE_STRIDE(is), MAKE_VOLATILE_STRIDE(os)) {
211
V T3, Ty, To, TK, Tr, TE, Ta, TJ, Tq, TB, Th, TL, Ts, TH, T1;
213
T1 = LD(&(xi[0]), ivs, &(xi[0]));
214
T2 = LD(&(xi[WS(is, 7)]), ivs, &(xi[WS(is, 1)]));
221
Ti = LD(&(xi[WS(is, 6)]), ivs, &(xi[0]));
222
Tj = LD(&(xi[WS(is, 13)]), ivs, &(xi[WS(is, 1)]));
225
Tl = LD(&(xi[WS(is, 8)]), ivs, &(xi[0]));
226
Tm = LD(&(xi[WS(is, 1)]), ivs, &(xi[WS(is, 1)]));
239
T4 = LD(&(xi[WS(is, 2)]), ivs, &(xi[0]));
240
T5 = LD(&(xi[WS(is, 9)]), ivs, &(xi[WS(is, 1)]));
243
T7 = LD(&(xi[WS(is, 12)]), ivs, &(xi[0]));
244
T8 = LD(&(xi[WS(is, 5)]), ivs, &(xi[WS(is, 1)]));
257
Tb = LD(&(xi[WS(is, 4)]), ivs, &(xi[0]));
258
Tc = LD(&(xi[WS(is, 11)]), ivs, &(xi[WS(is, 1)]));
261
Te = LD(&(xi[WS(is, 10)]), ivs, &(xi[0]));
262
Tf = LD(&(xi[WS(is, 3)]), ivs, &(xi[WS(is, 1)]));
271
ST(&(xo[WS(os, 7)]), VADD(T3, VADD(Ta, VADD(Th, To))), ovs, &(xo[WS(os, 1)]));
272
ST(&(xo[0]), VADD(Ty, VADD(TB, VADD(TH, TE))), ovs, &(xo[0]));
275
Tt = VBYI(VFNMS(LDK(KP781831482), Tr, VFNMS(LDK(KP433883739), Ts, VMUL(LDK(KP974927912), Tq))));
276
Tp = VFMA(LDK(KP623489801), To, VFNMS(LDK(KP900968867), Th, VFNMS(LDK(KP222520933), Ta, T3)));
277
ST(&(xo[WS(os, 5)]), VSUB(Tp, Tt), ovs, &(xo[WS(os, 1)]));
278
ST(&(xo[WS(os, 9)]), VADD(Tp, Tt), ovs, &(xo[WS(os, 1)]));
279
TP = VBYI(VFMA(LDK(KP974927912), TJ, VFMA(LDK(KP433883739), TL, VMUL(LDK(KP781831482), TK))));
280
TQ = VFMA(LDK(KP623489801), TE, VFNMS(LDK(KP900968867), TH, VFNMS(LDK(KP222520933), TB, Ty)));
281
ST(&(xo[WS(os, 2)]), VADD(TP, TQ), ovs, &(xo[0]));
282
ST(&(xo[WS(os, 12)]), VSUB(TQ, TP), ovs, &(xo[0]));
286
Tv = VBYI(VFMA(LDK(KP781831482), Tq, VFMA(LDK(KP974927912), Ts, VMUL(LDK(KP433883739), Tr))));
287
Tu = VFMA(LDK(KP623489801), Ta, VFNMS(LDK(KP900968867), To, VFNMS(LDK(KP222520933), Th, T3)));
288
ST(&(xo[WS(os, 13)]), VSUB(Tu, Tv), ovs, &(xo[WS(os, 1)]));
289
ST(&(xo[WS(os, 1)]), VADD(Tu, Tv), ovs, &(xo[WS(os, 1)]));
290
TM = VBYI(VFNMS(LDK(KP433883739), TK, VFNMS(LDK(KP974927912), TL, VMUL(LDK(KP781831482), TJ))));
291
TI = VFMA(LDK(KP623489801), TB, VFNMS(LDK(KP900968867), TE, VFNMS(LDK(KP222520933), TH, Ty)));
292
ST(&(xo[WS(os, 6)]), VSUB(TI, TM), ovs, &(xo[0]));
293
ST(&(xo[WS(os, 8)]), VADD(TM, TI), ovs, &(xo[0]));
297
TO = VBYI(VFMA(LDK(KP433883739), TJ, VFNMS(LDK(KP974927912), TK, VMUL(LDK(KP781831482), TL))));
298
TN = VFMA(LDK(KP623489801), TH, VFNMS(LDK(KP222520933), TE, VFNMS(LDK(KP900968867), TB, Ty)));
299
ST(&(xo[WS(os, 4)]), VSUB(TN, TO), ovs, &(xo[0]));
300
ST(&(xo[WS(os, 10)]), VADD(TO, TN), ovs, &(xo[0]));
301
Tx = VBYI(VFMA(LDK(KP433883739), Tq, VFNMS(LDK(KP781831482), Ts, VMUL(LDK(KP974927912), Tr))));
302
Tw = VFMA(LDK(KP623489801), Th, VFNMS(LDK(KP222520933), To, VFNMS(LDK(KP900968867), Ta, T3)));
303
ST(&(xo[WS(os, 11)]), VSUB(Tw, Tx), ovs, &(xo[WS(os, 1)]));
304
ST(&(xo[WS(os, 3)]), VADD(Tw, Tx), ovs, &(xo[WS(os, 1)]));
309
static const kdft_desc desc = { 14, "n1fv_14", {50, 12, 24, 0}, &GENUS, 0, 0, 0, 0 };
310
void X(codelet_n1fv_14) (planner *p) {
311
X(kdft_register) (p, n1fv_14, &desc);
314
#endif /* HAVE_FMA */