2
* Copyright (c) 2003, 2006 Matteo Frigo
3
* Copyright (c) 2003, 2006 Massachusetts Institute of Technology
5
* This program is free software; you can redistribute it and/or modify
6
* it under the terms of the GNU General Public License as published by
7
* the Free Software Foundation; either version 2 of the License, or
8
* (at your option) any later version.
10
* This program is distributed in the hope that it will be useful,
11
* but WITHOUT ANY WARRANTY; without even the implied warranty of
12
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
* GNU General Public License for more details.
15
* You should have received a copy of the GNU General Public License
16
* along with this program; if not, write to the Free Software
17
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
21
/* This file was automatically generated --- DO NOT EDIT */
22
/* Generated on Sun Jul 2 16:39:49 EDT 2006 */
24
#include "codelet-rdft.h"
28
/* Generated by: ../../../genfft/gen_hc2r -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -sign 1 -n 12 -name hc2rIII_12 -dft-III -include hc2rIII.h */
31
* This function contains 42 FP additions, 20 FP multiplications,
32
* (or, 30 additions, 8 multiplications, 12 fused multiply/add),
33
* 37 stack variables, and 24 memory accesses
37
* $Id: algsimp.ml,v 1.9 2006-02-12 23:34:12 athena Exp $
38
* $Id: fft.ml,v 1.4 2006-01-05 03:04:27 stevenj Exp $
39
* $Id: gen_hc2r.ml,v 1.19 2006-02-12 23:34:12 athena Exp $
44
static void hc2rIII_12(const R *ri, const R *ii, R *O, stride ris, stride iis, stride os, INT v, INT ivs, INT ovs)
46
DK(KP707106781, +0.707106781186547524400844362104849039284835938);
47
DK(KP1_414213562, +1.414213562373095048801688724209698078569671875);
48
DK(KP1_732050807, +1.732050807568877293527446341505872366942805254);
49
DK(KP2_000000000, +2.000000000000000000000000000000000000000000000);
51
for (i = v; i > 0; i = i - 1, ri = ri + ivs, ii = ii + ivs, O = O + ovs, MAKE_VOLATILE_STRIDE(ris), MAKE_VOLATILE_STRIDE(iis), MAKE_VOLATILE_STRIDE(os)) {
54
E Tx, T6, Te, Tb, T5, Tw, Ts, To, Th, Ti, T9, TA;
56
E T1, Tq, Tc, Td, T4, T2, T3, T7, T8, Tr;
68
Tb = FNMS(KP2_000000000, T1, T4);
71
Tw = FMA(KP2_000000000, Tq, Tr);
86
Tl = FNMS(KP1_732050807, Te, Tb);
87
Tf = FMA(KP1_732050807, Te, Tb);
90
Tg = FNMS(KP2_000000000, T6, T9);
94
Ty = FMA(KP1_732050807, Tx, Tw);
95
TE = FNMS(KP1_732050807, Tx, Tw);
96
Tz = FMA(KP2_000000000, To, Tn);
98
Tm = FMA(KP1_732050807, Tj, Tg);
99
Tk = FNMS(KP1_732050807, Tj, Tg);
101
O[0] = KP2_000000000 * (T5 + Ta);
103
O[WS(os, 6)] = KP2_000000000 * (Ts + Tp);
105
TD = FMA(KP1_732050807, TA, Tz);
106
TB = FNMS(KP1_732050807, TA, Tz);
107
O[WS(os, 9)] = KP1_414213562 * (Tu + Tt);
108
O[WS(os, 3)] = KP1_414213562 * (Tt - Tu);
111
O[WS(os, 4)] = Tf + Tk;
113
O[WS(os, 10)] = TB - Ty;
115
O[WS(os, 7)] = KP707106781 * (Tv + TC);
116
O[WS(os, 1)] = KP707106781 * (Tv - TC);
118
O[WS(os, 8)] = -(Tl + Tm);
121
O[WS(os, 2)] = TD - TE;
123
O[WS(os, 11)] = KP707106781 * (TF - TG);
124
O[WS(os, 5)] = KP707106781 * (TF + TG);
128
static const khc2r_desc desc = { 12, "hc2rIII_12", {30, 8, 12, 0}, &GENUS, 0, 0, 0, 0, 0 };
130
void X(codelet_hc2rIII_12) (planner *p) {
131
X(khc2rIII_register) (p, hc2rIII_12, &desc);
136
/* Generated by: ../../../genfft/gen_hc2r -compact -variables 4 -pipeline-latency 4 -sign 1 -n 12 -name hc2rIII_12 -dft-III -include hc2rIII.h */
139
* This function contains 42 FP additions, 20 FP multiplications,
140
* (or, 38 additions, 16 multiplications, 4 fused multiply/add),
141
* 25 stack variables, and 24 memory accesses
145
* $Id: algsimp.ml,v 1.9 2006-02-12 23:34:12 athena Exp $
146
* $Id: fft.ml,v 1.4 2006-01-05 03:04:27 stevenj Exp $
147
* $Id: gen_hc2r.ml,v 1.19 2006-02-12 23:34:12 athena Exp $
152
static void hc2rIII_12(const R *ri, const R *ii, R *O, stride ris, stride iis, stride os, INT v, INT ivs, INT ovs)
154
DK(KP1_414213562, +1.414213562373095048801688724209698078569671875);
155
DK(KP2_000000000, +2.000000000000000000000000000000000000000000000);
156
DK(KP500000000, +0.500000000000000000000000000000000000000000000);
157
DK(KP866025403, +0.866025403784438646763723170752936183471402627);
159
for (i = v; i > 0; i = i - 1, ri = ri + ivs, ii = ii + ivs, O = O + ovs, MAKE_VOLATILE_STRIDE(ris), MAKE_VOLATILE_STRIDE(iis), MAKE_VOLATILE_STRIDE(os)) {
160
E T5, Tw, Tb, Te, Tx, Ts, Ta, TA, Tg, Tj, Tz, Tp, Tt, Tu;
168
Tw = KP866025403 * (T2 - T3);
169
Tb = FNMS(KP500000000, T4, T1);
177
Te = KP866025403 * (Tc + Td);
178
Tx = FMA(KP500000000, Tr, Tq);
188
TA = KP866025403 * (T7 - T8);
189
Tg = FNMS(KP500000000, T9, T6);
197
Tj = KP866025403 * (Th + Ti);
198
Tz = FMA(KP500000000, Tn, To);
201
O[0] = KP2_000000000 * (T5 + Ta);
202
O[WS(os, 6)] = KP2_000000000 * (Ts + Tp);
205
O[WS(os, 3)] = KP1_414213562 * (Tt - Tu);
206
O[WS(os, 9)] = KP1_414213562 * (Tu + Tt);
208
E Tf, Tk, Tv, Ty, TB, TC;
215
O[WS(os, 4)] = -(KP2_000000000 * (Tf + Tk));
216
O[WS(os, 10)] = KP2_000000000 * (TB - Ty);
217
O[WS(os, 1)] = KP1_414213562 * (Tv - TC);
218
O[WS(os, 7)] = KP1_414213562 * (Tv + TC);
221
E Tl, Tm, TF, TD, TE, TG;
228
O[WS(os, 8)] = KP2_000000000 * (Tl + Tm);
229
O[WS(os, 5)] = KP1_414213562 * (TF + TG);
230
O[WS(os, 2)] = KP2_000000000 * (TD - TE);
231
O[WS(os, 11)] = KP1_414213562 * (TF - TG);
236
static const khc2r_desc desc = { 12, "hc2rIII_12", {38, 16, 4, 0}, &GENUS, 0, 0, 0, 0, 0 };
238
void X(codelet_hc2rIII_12) (planner *p) {
239
X(khc2rIII_register) (p, hc2rIII_12, &desc);
242
#endif /* HAVE_FMA */