2
* Copyright (c) 2003, 2006 Matteo Frigo
3
* Copyright (c) 2003, 2006 Massachusetts Institute of Technology
5
* This program is free software; you can redistribute it and/or modify
6
* it under the terms of the GNU General Public License as published by
7
* the Free Software Foundation; either version 2 of the License, or
8
* (at your option) any later version.
10
* This program is distributed in the hope that it will be useful,
11
* but WITHOUT ANY WARRANTY; without even the implied warranty of
12
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
* GNU General Public License for more details.
15
* You should have received a copy of the GNU General Public License
16
* along with this program; if not, write to the Free Software
17
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
21
/* This file was automatically generated --- DO NOT EDIT */
22
/* Generated on Sun Jul 2 16:31:02 EDT 2006 */
24
#include "codelet-rdft.h"
28
/* Generated by: ../../../genfft/gen_hc2hc -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -sign 1 -n 7 -dif -name hb_7 -include hb.h */
31
* This function contains 72 FP additions, 66 FP multiplications,
32
* (or, 18 additions, 12 multiplications, 54 fused multiply/add),
33
* 66 stack variables, and 28 memory accesses
37
* $Id: algsimp.ml,v 1.9 2006-02-12 23:34:12 athena Exp $
38
* $Id: fft.ml,v 1.4 2006-01-05 03:04:27 stevenj Exp $
39
* $Id: gen_hc2hc.ml,v 1.16 2006-02-12 23:34:12 athena Exp $
44
static const R *hb_7(R *rio, R *iio, const R *W, stride ios, INT m, INT dist)
46
DK(KP974927912, +0.974927912181823607018131682993931217232785801);
47
DK(KP900968867, +0.900968867902419126236102319507445051165919162);
48
DK(KP801937735, +0.801937735804838252472204639014890102331838324);
49
DK(KP692021471, +0.692021471630095869627814897002069140197260599);
50
DK(KP356895867, +0.356895867892209443894399510021300583399127187);
51
DK(KP554958132, +0.554958132087371191422194871006410481067288862);
53
for (i = m - 2; i > 0; i = i - 2, rio = rio + dist, iio = iio - dist, W = W + 12, MAKE_VOLATILE_STRIDE(ios)) {
54
E T14, T13, T17, T15, T16, T18;
56
E T1, Tt, TB, T4, TC, Ta, TD, TA, T7, Tl, T1l, TZ, T1b, Th, Tw;
57
E Td, TP, Tm, Ti, Tj, T5, T6;
63
T3 = iio[-WS(ios, 6)];
65
T9 = iio[-WS(ios, 4)];
71
T6 = iio[-WS(ios, 5)];
74
E Tf, T1a, Tg, Tc, TO;
75
Tf = iio[-WS(ios, 2)];
76
TD = FMA(KP554958132, TC, TB);
77
T1a = FNMS(KP356895867, T4, Ta);
81
Tl = iio[-WS(ios, 3)];
82
T1l = FMA(KP554958132, TA, TC);
83
TZ = FNMS(KP554958132, TB, TA);
84
Tc = FNMS(KP356895867, T7, T4);
85
TO = FNMS(KP356895867, Ta, T7);
86
T1b = FNMS(KP692021471, T1a, T7);
89
Td = FNMS(KP692021471, Tc, Ta);
90
TP = FNMS(KP692021471, TO, T4);
92
Ti = iio[-WS(ios, 1)];
96
E Tu, Tv, TX, T1e, T1j, TS, TL, TH, TK, TJ, TM, Tn;
97
rio[0] = T1 + T4 + T7 + Ta;
104
TW = FNMS(KP356895867, Tu, Tw);
105
T1d = FMA(KP554958132, Th, Tn);
106
T1i = FNMS(KP356895867, Tv, Tu);
109
Tx = FNMS(KP356895867, Tw, Tv);
110
TR = FNMS(KP554958132, Tk, Th);
111
To = FMA(KP554958132, Tn, Tk);
112
TX = FNMS(KP692021471, TW, Tv);
113
T1e = FNMS(KP801937735, T1d, Tk);
114
T1j = FNMS(KP692021471, T1i, Tw);
115
Ty = FNMS(KP692021471, Tx, Tu);
116
TS = FNMS(KP801937735, TR, Tn);
119
Tp = FMA(KP801937735, To, Th);
120
Te = FNMS(KP900968867, Td, T1);
121
Tz = FNMS(KP900968867, Ty, Tt);
122
TE = FMA(KP801937735, TD, TA);
124
E Tb, Tq, TI, TF, Ts, Tr, TG;
126
Tq = FNMS(KP974927912, Tp, Te);
127
TI = FMA(KP974927912, Tp, Te);
128
TF = FMA(KP974927912, TE, Tz);
129
TL = FNMS(KP974927912, TE, Tz);
135
rio[WS(ios, 1)] = FNMS(Ts, TF, Tr);
138
iio[-WS(ios, 5)] = FMA(Tb, TF, TG);
143
iio[-WS(ios, 6)] = Tt + Tu + Tv + Tw;
144
rio[WS(ios, 6)] = FNMS(TK, TL, TJ);
145
iio[0] = FMA(TH, TL, TM);
147
E T1t, T1p, T1s, T1r, T1u, T1c, T1k, T1m;
148
T1c = FNMS(KP900968867, T1b, T1);
149
T1k = FNMS(KP900968867, T1j, Tt);
150
T1m = FNMS(KP801937735, T1l, TB);
152
E T19, T1f, T1q, T1n, T1h, T1g, T1o;
154
T1f = FNMS(KP974927912, T1e, T1c);
155
T1q = FMA(KP974927912, T1e, T1c);
156
T1n = FMA(KP974927912, T1m, T1k);
157
T1t = FNMS(KP974927912, T1m, T1k);
163
rio[WS(ios, 2)] = FNMS(T1h, T1n, T1g);
166
iio[-WS(ios, 4)] = FMA(T19, T1n, T1o);
170
TQ = FNMS(KP900968867, TP, T1);
171
rio[WS(ios, 5)] = FNMS(T1s, T1t, T1r);
172
iio[-WS(ios, 1)] = FMA(T1p, T1t, T1u);
173
TY = FNMS(KP900968867, TX, Tt);
174
T14 = FMA(KP974927912, TS, TQ);
175
TT = FNMS(KP974927912, TS, TQ);
176
T10 = FNMS(KP801937735, TZ, TC);
178
E TN, TV, T11, TU, T12;
182
T17 = FNMS(KP974927912, T10, TY);
183
T11 = FMA(KP974927912, T10, TY);
188
rio[WS(ios, 3)] = FNMS(TV, T11, TU);
189
iio[-WS(ios, 3)] = FMA(TN, T11, T12);
195
rio[WS(ios, 4)] = FNMS(T16, T17, T15);
197
iio[-WS(ios, 2)] = FMA(T13, T17, T18);
202
static const tw_instr twinstr[] = {
207
static const hc2hc_desc desc = { 7, "hb_7", twinstr, &GENUS, {18, 12, 54, 0}, 0, 0, 0 };
209
void X(codelet_hb_7) (planner *p) {
210
X(khc2hc_register) (p, hb_7, &desc);
214
/* Generated by: ../../../genfft/gen_hc2hc -compact -variables 4 -pipeline-latency 4 -sign 1 -n 7 -dif -name hb_7 -include hb.h */
217
* This function contains 72 FP additions, 60 FP multiplications,
218
* (or, 36 additions, 24 multiplications, 36 fused multiply/add),
219
* 36 stack variables, and 28 memory accesses
223
* $Id: algsimp.ml,v 1.9 2006-02-12 23:34:12 athena Exp $
224
* $Id: fft.ml,v 1.4 2006-01-05 03:04:27 stevenj Exp $
225
* $Id: gen_hc2hc.ml,v 1.16 2006-02-12 23:34:12 athena Exp $
230
static const R *hb_7(R *rio, R *iio, const R *W, stride ios, INT m, INT dist)
232
DK(KP222520933, +0.222520933956314404288902564496794759466355569);
233
DK(KP900968867, +0.900968867902419126236102319507445051165919162);
234
DK(KP623489801, +0.623489801858733530525004884004239810632274731);
235
DK(KP781831482, +0.781831482468029808708444526674057750232334519);
236
DK(KP974927912, +0.974927912181823607018131682993931217232785801);
237
DK(KP433883739, +0.433883739117558120475768332848358754609990728);
239
for (i = m - 2; i > 0; i = i - 2, rio = rio + dist, iio = iio - dist, W = W + 12, MAKE_VOLATILE_STRIDE(ios)) {
240
E T1, Ta, T4, T7, Tq, TI, TR, TU, TE, Tt, Tb, Tk, Te, Th, Tx;
241
E TF, TV, TQ, TJ, Tm;
243
E To, Tn, Tp, T8, T9;
245
T8 = rio[WS(ios, 3)];
246
T9 = iio[-WS(ios, 4)];
251
T2 = rio[WS(ios, 1)];
252
T3 = iio[-WS(ios, 6)];
255
T5 = rio[WS(ios, 2)];
256
T6 = iio[-WS(ios, 5)];
260
Tq = FMA(KP433883739, Tn, KP974927912 * To) - (KP781831482 * Tp);
261
TI = FMA(KP781831482, Tn, KP974927912 * Tp) + (KP433883739 * To);
262
TR = FNMS(KP781831482, To, KP974927912 * Tn) - (KP433883739 * Tp);
263
TU = FMA(KP623489801, Ta, T1) + FNMA(KP900968867, T7, KP222520933 * T4);
264
TE = FMA(KP623489801, T4, T1) + FNMA(KP900968867, Ta, KP222520933 * T7);
265
Tt = FMA(KP623489801, T7, T1) + FNMA(KP222520933, Ta, KP900968867 * T4);
268
E Tv, Tw, Tu, Ti, Tj;
270
Ti = rio[WS(ios, 4)];
271
Tj = iio[-WS(ios, 3)];
276
Tc = iio[-WS(ios, 1)];
277
Td = rio[WS(ios, 6)];
280
Tf = rio[WS(ios, 5)];
281
Tg = iio[-WS(ios, 2)];
285
Tx = FNMS(KP974927912, Tv, KP781831482 * Tu) - (KP433883739 * Tw);
286
TF = FMA(KP781831482, Tw, KP974927912 * Tu) + (KP433883739 * Tv);
287
TV = FMA(KP433883739, Tu, KP781831482 * Tv) - (KP974927912 * Tw);
288
TQ = FMA(KP900968867, Th, Tb) + FNMA(KP623489801, Tk, KP222520933 * Te);
289
TJ = FMA(KP623489801, Te, Tb) + FMA(KP222520933, Th, KP900968867 * Tk);
290
Tm = FMA(KP222520933, Tk, Tb) + FNMA(KP623489801, Th, KP900968867 * Te);
292
rio[0] = T1 + T4 + T7 + Ta;
293
iio[-WS(ios, 6)] = Tb + Te - (Th + Tk);
300
iio[0] = FMA(TL, TM, TN * TO);
301
rio[WS(ios, 6)] = FNMS(TN, TM, TL * TO);
309
rio[WS(ios, 2)] = FNMS(TZ, T10, TX * TY);
310
iio[-WS(ios, 4)] = FMA(TX, T10, TZ * TY);
318
iio[-WS(ios, 2)] = FMA(Tl, Tr, Ts * Ty);
319
rio[WS(ios, 4)] = FNMS(Ts, Tr, Tl * Ty);
327
rio[WS(ios, 3)] = FNMS(TB, TC, Tz * TA);
328
iio[-WS(ios, 3)] = FMA(Tz, TC, TB * TA);
336
iio[-WS(ios, 1)] = FMA(TP, TS, TT * TW);
337
rio[WS(ios, 5)] = FNMS(TT, TS, TP * TW);
345
rio[WS(ios, 1)] = FNMS(TH, TK, TD * TG);
346
iio[-WS(ios, 5)] = FMA(TD, TK, TH * TG);
352
static const tw_instr twinstr[] = {
357
static const hc2hc_desc desc = { 7, "hb_7", twinstr, &GENUS, {36, 24, 36, 0}, 0, 0, 0 };
359
void X(codelet_hb_7) (planner *p) {
360
X(khc2hc_register) (p, hb_7, &desc);
362
#endif /* HAVE_FMA */