2
* Copyright (c) 2003, 2006 Matteo Frigo
3
* Copyright (c) 2003, 2006 Massachusetts Institute of Technology
5
* This program is free software; you can redistribute it and/or modify
6
* it under the terms of the GNU General Public License as published by
7
* the Free Software Foundation; either version 2 of the License, or
8
* (at your option) any later version.
10
* This program is distributed in the hope that it will be useful,
11
* but WITHOUT ANY WARRANTY; without even the implied warranty of
12
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
* GNU General Public License for more details.
15
* You should have received a copy of the GNU General Public License
16
* along with this program; if not, write to the Free Software
17
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
21
/* This file was automatically generated --- DO NOT EDIT */
22
/* Generated on Sun Jul 2 16:15:53 EDT 2006 */
24
#include "codelet-rdft.h"
28
/* Generated by: ../../../genfft/gen_hc2r -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -sign 1 -n 12 -name hc2r_12 -include hc2r.h */
31
* This function contains 38 FP additions, 16 FP multiplications,
32
* (or, 22 additions, 0 multiplications, 16 fused multiply/add),
33
* 31 stack variables, and 24 memory accesses
37
* $Id: algsimp.ml,v 1.9 2006-02-12 23:34:12 athena Exp $
38
* $Id: fft.ml,v 1.4 2006-01-05 03:04:27 stevenj Exp $
39
* $Id: gen_hc2r.ml,v 1.19 2006-02-12 23:34:12 athena Exp $
44
static void hc2r_12(const R *ri, const R *ii, R *O, stride ris, stride iis, stride os, INT v, INT ivs, INT ovs)
46
DK(KP1_732050807, +1.732050807568877293527446341505872366942805254);
47
DK(KP2_000000000, +2.000000000000000000000000000000000000000000000);
49
for (i = v; i > 0; i = i - 1, ri = ri + ivs, ii = ii + ivs, O = O + ovs, MAKE_VOLATILE_STRIDE(ris), MAKE_VOLATILE_STRIDE(iis), MAKE_VOLATILE_STRIDE(os)) {
52
E Tz, Te, Tn, Tk, Tc, Tw, Ty, Th, T4, T3, Td, T5;
54
E T8, Tu, Tl, Tm, Tb, T9, Ta, T1, T2, Tv;
66
Tk = FNMS(KP2_000000000, T8, Tb);
71
Ty = FMA(KP2_000000000, Tu, Tv);
74
T3 = FMA(KP2_000000000, T2, T1);
79
E To, Tp, Tf, Tg, T6, TA, TC;
80
To = FMA(KP1_732050807, Tn, Tk);
81
Ts = FNMS(KP1_732050807, Tn, Tk);
82
Tp = FNMS(KP1_732050807, Te, Td);
83
Tf = FMA(KP1_732050807, Te, Td);
85
T6 = FMA(KP2_000000000, T5, T4);
86
TA = FMA(KP1_732050807, Tz, Ty);
87
TC = FNMS(KP1_732050807, Tz, Ty);
89
E Tt, T7, Ti, Tq, Tj, TB, Tx;
92
Ti = FNMS(KP1_732050807, Th, Tg);
93
Tq = FMA(KP1_732050807, Th, Tg);
94
O[0] = FMA(KP2_000000000, Tc, T7);
95
O[WS(os, 6)] = FNMS(KP2_000000000, Tc, T7);
100
O[WS(os, 11)] = TB + TC;
101
O[WS(os, 5)] = TB - TC;
102
O[WS(os, 8)] = Tj - To;
103
O[WS(os, 2)] = Tj + To;
104
O[WS(os, 7)] = Tx + TA;
105
O[WS(os, 1)] = Tx - TA;
106
O[WS(os, 9)] = FNMS(KP2_000000000, Tw, Tt);
107
O[WS(os, 3)] = FMA(KP2_000000000, Tw, Tt);
111
O[WS(os, 4)] = Tr - Ts;
112
O[WS(os, 10)] = Tr + Ts;
116
static const khc2r_desc desc = { 12, "hc2r_12", {22, 0, 16, 0}, &GENUS, 0, 0, 0, 0, 0 };
118
void X(codelet_hc2r_12) (planner *p) {
119
X(khc2r_register) (p, hc2r_12, &desc);
124
/* Generated by: ../../../genfft/gen_hc2r -compact -variables 4 -pipeline-latency 4 -sign 1 -n 12 -name hc2r_12 -include hc2r.h */
127
* This function contains 38 FP additions, 10 FP multiplications,
128
* (or, 34 additions, 6 multiplications, 4 fused multiply/add),
129
* 25 stack variables, and 24 memory accesses
133
* $Id: algsimp.ml,v 1.9 2006-02-12 23:34:12 athena Exp $
134
* $Id: fft.ml,v 1.4 2006-01-05 03:04:27 stevenj Exp $
135
* $Id: gen_hc2r.ml,v 1.19 2006-02-12 23:34:12 athena Exp $
140
static void hc2r_12(const R *ri, const R *ii, R *O, stride ris, stride iis, stride os, INT v, INT ivs, INT ovs)
142
DK(KP1_732050807, +1.732050807568877293527446341505872366942805254);
143
DK(KP2_000000000, +2.000000000000000000000000000000000000000000000);
145
for (i = v; i > 0; i = i - 1, ri = ri + ivs, ii = ii + ivs, O = O + ovs, MAKE_VOLATILE_STRIDE(ris), MAKE_VOLATILE_STRIDE(iis), MAKE_VOLATILE_STRIDE(os)) {
146
E T8, Tb, Tm, TA, Tw, Tx, Tp, TB, T3, Tr, Tg, T6, Ts, Tk;
153
Tm = FMS(KP2_000000000, T8, Tb);
154
TA = KP1_732050807 * (T9 - Ta);
159
Tp = KP1_732050807 * (Tn - To);
160
TB = FMA(KP2_000000000, Tw, Tx);
163
E Tf, T1, T2, Td, Te;
165
Tf = KP1_732050807 * Te;
169
T3 = FMA(KP2_000000000, T2, T1);
174
E Tj, T4, T5, Th, Ti;
176
Tj = KP1_732050807 * Ti;
180
T6 = FMA(KP2_000000000, T5, T4);
187
Tc = KP2_000000000 * (T8 + Tb);
188
O[WS(os, 6)] = T7 - Tc;
194
O[WS(os, 2)] = Tl - Tq;
195
O[WS(os, 8)] = Tl + Tq;
198
O[WS(os, 5)] = TD - TE;
199
O[WS(os, 11)] = TD + TE;
203
O[WS(os, 1)] = Tz - TC;
204
O[WS(os, 7)] = Tz + TC;
208
Ty = KP2_000000000 * (Tw - Tx);
209
O[WS(os, 9)] = Tv - Ty;
210
O[WS(os, 3)] = Tv + Ty;
213
O[WS(os, 10)] = Tt - Tu;
214
O[WS(os, 4)] = Tt + Tu;
220
static const khc2r_desc desc = { 12, "hc2r_12", {34, 6, 4, 0}, &GENUS, 0, 0, 0, 0, 0 };
222
void X(codelet_hc2r_12) (planner *p) {
223
X(khc2r_register) (p, hc2r_12, &desc);
226
#endif /* HAVE_FMA */