2
* Copyright (c) 2003, 2006 Matteo Frigo
3
* Copyright (c) 2003, 2006 Massachusetts Institute of Technology
5
* This program is free software; you can redistribute it and/or modify
6
* it under the terms of the GNU General Public License as published by
7
* the Free Software Foundation; either version 2 of the License, or
8
* (at your option) any later version.
10
* This program is distributed in the hope that it will be useful,
11
* but WITHOUT ANY WARRANTY; without even the implied warranty of
12
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
* GNU General Public License for more details.
15
* You should have received a copy of the GNU General Public License
16
* along with this program; if not, write to the Free Software
17
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
21
/* This file was automatically generated --- DO NOT EDIT */
22
/* Generated on Sun Jul 2 16:39:53 EDT 2006 */
24
#include "codelet-rdft.h"
28
/* Generated by: ../../../genfft/gen_hc2r -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -sign 1 -n 15 -name hc2rIII_15 -dft-III -include hc2rIII.h */
31
* This function contains 64 FP additions, 43 FP multiplications,
32
* (or, 21 additions, 0 multiplications, 43 fused multiply/add),
33
* 48 stack variables, and 30 memory accesses
37
* $Id: algsimp.ml,v 1.9 2006-02-12 23:34:12 athena Exp $
38
* $Id: fft.ml,v 1.4 2006-01-05 03:04:27 stevenj Exp $
39
* $Id: gen_hc2r.ml,v 1.19 2006-02-12 23:34:12 athena Exp $
44
static void hc2rIII_15(const R *ri, const R *ii, R *O, stride ris, stride iis, stride os, INT v, INT ivs, INT ovs)
46
DK(KP951056516, +0.951056516295153572116439333379382143405698634);
47
DK(KP1_732050807, +1.732050807568877293527446341505872366942805254);
48
DK(KP559016994, +0.559016994374947424102293417182819058860154590);
49
DK(KP250000000, +0.250000000000000000000000000000000000000000000);
50
DK(KP1_902113032, +1.902113032590307144232878666758764286811397268);
51
DK(KP1_118033988, +1.118033988749894848204586834365638117720309180);
52
DK(KP2_000000000, +2.000000000000000000000000000000000000000000000);
53
DK(KP500000000, +0.500000000000000000000000000000000000000000000);
54
DK(KP618033988, +0.618033988749894848204586834365638117720309180);
56
for (i = v; i > 0; i = i - 1, ri = ri + ivs, ii = ii + ivs, O = O + ovs, MAKE_VOLATILE_STRIDE(ris), MAKE_VOLATILE_STRIDE(iis), MAKE_VOLATILE_STRIDE(os)) {
57
E TX, Tv, To, TW, Tl, Tx, Ty, Tw;
59
E TA, Tk, T6, T5, Tz, Th, TI, Tp, Tu, TK, TR, Tn, Td, Tq;
67
TA = FNMS(KP618033988, Ti, Tj);
68
Tk = FMA(KP618033988, Tj, Ti);
79
Tf = FNMS(KP500000000, T4, T1);
80
T5 = FMA(KP2_000000000, T4, T1);
83
Tz = FNMS(KP1_118033988, Tg, Tf);
84
Th = FMA(KP1_118033988, Tg, Tf);
97
TX = FMA(KP618033988, TP, TQ);
98
TR = FNMS(KP618033988, TQ, TP);
106
E TB, TF, TO, TG, TE;
108
E Tm, T11, TN, TD, TM, T12, TC;
109
TB = FNMS(KP1_902113032, TA, Tz);
110
TF = FMA(KP1_902113032, TA, Tz);
113
Tm = FNMS(KP250000000, Td, T6);
117
O[0] = FMA(KP2_000000000, Te, T5);
121
Tv = FMA(KP618033988, Tu, Tr);
122
TD = FNMS(KP618033988, Tr, Tu);
123
TM = FNMS(KP250000000, TL, TI);
126
TC = FNMS(KP559016994, Tn, Tm);
127
To = FMA(KP559016994, Tn, Tm);
128
O[WS(os, 5)] = FMA(KP1_732050807, T12, T11);
129
O[WS(os, 10)] = FMS(KP1_732050807, T12, T11);
130
TW = FMA(KP559016994, TN, TM);
131
TO = FNMS(KP559016994, TN, TM);
132
TG = FNMS(KP951056516, TD, TC);
133
TE = FMA(KP951056516, TD, TC);
135
Tl = FNMS(KP1_902113032, Tk, Th);
136
Tx = FMA(KP1_902113032, Tk, Th);
139
TS = FMA(KP951056516, TR, TO);
140
TU = FNMS(KP951056516, TR, TO);
142
O[WS(os, 3)] = -(FMA(KP2_000000000, TG, TF));
144
O[WS(os, 12)] = FMA(KP2_000000000, TE, TB);
145
O[WS(os, 13)] = -(FMA(KP1_732050807, TU, TT));
146
O[WS(os, 8)] = FNMS(KP1_732050807, TU, TT);
147
O[WS(os, 7)] = -(FMA(KP1_732050807, TS, TH));
148
O[WS(os, 2)] = FNMS(KP1_732050807, TS, TH);
152
Ty = FNMS(KP951056516, Tv, To);
153
Tw = FMA(KP951056516, Tv, To);
156
T10 = FMA(KP951056516, TX, TW);
157
TY = FNMS(KP951056516, TX, TW);
159
O[WS(os, 6)] = FMA(KP2_000000000, Ty, Tx);
161
O[WS(os, 9)] = -(FMA(KP2_000000000, Tw, Tl));
162
O[WS(os, 11)] = FMA(KP1_732050807, TY, TV);
163
O[WS(os, 1)] = FNMS(KP1_732050807, TY, TV);
164
O[WS(os, 4)] = FMA(KP1_732050807, T10, TZ);
165
O[WS(os, 14)] = FNMS(KP1_732050807, T10, TZ);
170
static const khc2r_desc desc = { 15, "hc2rIII_15", {21, 0, 43, 0}, &GENUS, 0, 0, 0, 0, 0 };
172
void X(codelet_hc2rIII_15) (planner *p) {
173
X(khc2rIII_register) (p, hc2rIII_15, &desc);
178
/* Generated by: ../../../genfft/gen_hc2r -compact -variables 4 -pipeline-latency 4 -sign 1 -n 15 -name hc2rIII_15 -dft-III -include hc2rIII.h */
181
* This function contains 64 FP additions, 26 FP multiplications,
182
* (or, 49 additions, 11 multiplications, 15 fused multiply/add),
183
* 47 stack variables, and 30 memory accesses
187
* $Id: algsimp.ml,v 1.9 2006-02-12 23:34:12 athena Exp $
188
* $Id: fft.ml,v 1.4 2006-01-05 03:04:27 stevenj Exp $
189
* $Id: gen_hc2r.ml,v 1.19 2006-02-12 23:34:12 athena Exp $
194
static void hc2rIII_15(const R *ri, const R *ii, R *O, stride ris, stride iis, stride os, INT v, INT ivs, INT ovs)
196
DK(KP1_732050807, +1.732050807568877293527446341505872366942805254);
197
DK(KP433012701, +0.433012701892219323381861585376468091735701313);
198
DK(KP968245836, +0.968245836551854221294816349945599902708230426);
199
DK(KP587785252, +0.587785252292473129168705954639072768597652438);
200
DK(KP951056516, +0.951056516295153572116439333379382143405698634);
201
DK(KP250000000, +0.250000000000000000000000000000000000000000000);
202
DK(KP1_647278207, +1.647278207092663851754840078556380006059321028);
203
DK(KP1_018073920, +1.018073920910254366901961726787815297021466329);
204
DK(KP559016994, +0.559016994374947424102293417182819058860154590);
205
DK(KP500000000, +0.500000000000000000000000000000000000000000000);
206
DK(KP2_000000000, +2.000000000000000000000000000000000000000000000);
207
DK(KP1_118033988, +1.118033988749894848204586834365638117720309180);
208
DK(KP1_175570504, +1.175570504584946258337411909278145537195304875);
209
DK(KP1_902113032, +1.902113032590307144232878666758764286811397268);
211
for (i = v; i > 0; i = i - 1, ri = ri + ivs, ii = ii + ivs, O = O + ovs, MAKE_VOLATILE_STRIDE(ris), MAKE_VOLATILE_STRIDE(iis), MAKE_VOLATILE_STRIDE(os)) {
212
E Tv, TD, T5, Ts, TC, T6, Tf, TW, TK, Td, Tg, TP, To, TN, TA;
213
E TO, TQ, Tt, Tu, T12, Te, T11;
216
Tv = FMA(KP1_902113032, Tt, KP1_175570504 * Tu);
217
TD = FNMS(KP1_175570504, Tt, KP1_902113032 * Tu);
219
E T1, T4, Tq, T2, T3, Tr;
224
Tq = KP1_118033988 * (T2 - T3);
225
T5 = FMA(KP2_000000000, T4, T1);
226
Tr = FNMS(KP500000000, T4, T1);
244
Tf = KP559016994 * (T9 - Tc);
245
TW = FNMS(KP1_647278207, TJ, KP1_018073920 * TI);
246
TK = FMA(KP1_647278207, TI, KP1_018073920 * TJ);
248
Tg = FNMS(KP250000000, Td, T6);
264
To = FMA(KP951056516, Tk, KP587785252 * Tn);
265
TN = KP968245836 * (TL - TM);
266
TA = FNMS(KP587785252, Tk, KP951056516 * Tn);
268
TQ = FMA(KP433012701, TO, KP1_732050807 * TP);
270
T12 = KP1_732050807 * (TP - TO);
273
O[0] = FMA(KP2_000000000, Te, T5);
274
O[WS(os, 10)] = T12 - T11;
275
O[WS(os, 5)] = T11 + T12;
277
E TE, TG, TB, TF, TY, T10, Tz, TX, TV, TZ;
286
O[WS(os, 12)] = FMA(KP2_000000000, TB, TE);
287
O[WS(os, 3)] = FMS(KP2_000000000, TF, TG);
289
O[WS(os, 2)] = TV + TY;
290
O[WS(os, 7)] = TY - TV;
292
O[WS(os, 8)] = TZ - T10;
293
O[WS(os, 13)] = -(TZ + T10);
296
E Tw, Ty, Tp, Tx, TS, TU, Th, TR, TH, TT;
305
O[WS(os, 9)] = -(FMA(KP2_000000000, Tp, Tw));
306
O[WS(os, 6)] = FMA(KP2_000000000, Tx, Ty);
308
O[WS(os, 11)] = TH - TS;
309
O[WS(os, 1)] = TH + TS;
311
O[WS(os, 4)] = TT - TU;
312
O[WS(os, 14)] = TT + TU;
317
static const khc2r_desc desc = { 15, "hc2rIII_15", {49, 11, 15, 0}, &GENUS, 0, 0, 0, 0, 0 };
319
void X(codelet_hc2rIII_15) (planner *p) {
320
X(khc2rIII_register) (p, hc2rIII_15, &desc);
323
#endif /* HAVE_FMA */