2
* Copyright (c) 2003, 2006 Matteo Frigo
3
* Copyright (c) 2003, 2006 Massachusetts Institute of Technology
5
* This program is free software; you can redistribute it and/or modify
6
* it under the terms of the GNU General Public License as published by
7
* the Free Software Foundation; either version 2 of the License, or
8
* (at your option) any later version.
10
* This program is distributed in the hope that it will be useful,
11
* but WITHOUT ANY WARRANTY; without even the implied warranty of
12
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
* GNU General Public License for more details.
15
* You should have received a copy of the GNU General Public License
16
* along with this program; if not, write to the Free Software
17
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
21
/* This file was automatically generated --- DO NOT EDIT */
22
/* Generated on Sat Jul 1 14:16:19 EDT 2006 */
24
#include "codelet-dft.h"
28
/* Generated by: ../../../genfft/gen_twidsq -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -reload-twiddle -dif -n 2 -name q1_2 -include q.h */
31
* This function contains 12 FP additions, 8 FP multiplications,
32
* (or, 8 additions, 4 multiplications, 4 fused multiply/add),
33
* 21 stack variables, and 16 memory accesses
37
* $Id: algsimp.ml,v 1.9 2006-02-12 23:34:12 athena Exp $
38
* $Id: fft.ml,v 1.4 2006-01-05 03:04:27 stevenj Exp $
39
* $Id: gen_twidsq.ml,v 1.19 2006-02-12 23:34:12 athena Exp $
44
static const R *q1_2(R *rio, R *iio, const R *W, stride is, stride vs, INT m, INT dist)
47
for (i = m; i > 0; i = i - 1, rio = rio + dist, iio = iio + dist, W = W + 2, MAKE_VOLATILE_STRIDE(is), MAKE_VOLATILE_STRIDE(vs)) {
50
E T1, T2, T7, T8, Tb, T4, Tc, Th, Ti, Te, Tj, Td, Tg;
57
Tc = rio[WS(vs, 1) + WS(is, 1)];
60
Ti = iio[WS(vs, 1) + WS(is, 1)];
65
rio[WS(is, 1)] = Tb + Tc;
66
iio[WS(is, 1)] = Th + Ti;
77
iio[WS(vs, 1) + WS(is, 1)] = FNMS(Tg, Te, Tk);
78
rio[WS(vs, 1) + WS(is, 1)] = FMA(Tg, Tj, Tf);
79
iio[WS(vs, 1)] = FNMS(T6, T4, Ta);
82
rio[WS(vs, 1)] = FMA(T6, T9, T5);
87
static const tw_instr twinstr[] = {
92
static const ct_desc desc = { 2, "q1_2", twinstr, &GENUS, {8, 4, 4, 0}, 0, 0, 0 };
94
void X(codelet_q1_2) (planner *p) {
95
X(kdft_difsq_register) (p, q1_2, &desc);
99
/* Generated by: ../../../genfft/gen_twidsq -compact -variables 4 -pipeline-latency 4 -reload-twiddle -dif -n 2 -name q1_2 -include q.h */
102
* This function contains 12 FP additions, 8 FP multiplications,
103
* (or, 8 additions, 4 multiplications, 4 fused multiply/add),
104
* 17 stack variables, and 16 memory accesses
108
* $Id: algsimp.ml,v 1.9 2006-02-12 23:34:12 athena Exp $
109
* $Id: fft.ml,v 1.4 2006-01-05 03:04:27 stevenj Exp $
110
* $Id: gen_twidsq.ml,v 1.19 2006-02-12 23:34:12 athena Exp $
115
static const R *q1_2(R *rio, R *iio, const R *W, stride is, stride vs, INT m, INT dist)
118
for (i = m; i > 0; i = i - 1, rio = rio + dist, iio = iio + dist, W = W + 2, MAKE_VOLATILE_STRIDE(is), MAKE_VOLATILE_STRIDE(vs)) {
119
E T1, T2, T4, T6, T7, T8, T9, Ta, Tc, Te, Tf, Tg;
127
Ta = rio[WS(vs, 1) + WS(is, 1)];
130
Tf = iio[WS(vs, 1) + WS(is, 1)];
134
rio[WS(is, 1)] = T9 + Ta;
135
iio[WS(is, 1)] = Te + Tf;
140
rio[WS(vs, 1) + WS(is, 1)] = FMA(Tb, Tc, Td * Tg);
141
iio[WS(vs, 1) + WS(is, 1)] = FNMS(Td, Tc, Tb * Tg);
144
rio[WS(vs, 1)] = FMA(T3, T4, T5 * T8);
145
iio[WS(vs, 1)] = FNMS(T5, T4, T3 * T8);
151
static const tw_instr twinstr[] = {
156
static const ct_desc desc = { 2, "q1_2", twinstr, &GENUS, {8, 4, 4, 0}, 0, 0, 0 };
158
void X(codelet_q1_2) (planner *p) {
159
X(kdft_difsq_register) (p, q1_2, &desc);
161
#endif /* HAVE_FMA */