~ubuntu-branches/ubuntu/utopic/fftw3/utopic

« back to all changes in this revision

Viewing changes to dft/simd/codelets/t2fv_16.c

  • Committer: Package Import Robot
  • Author(s): Matthias Klose
  • Date: 2011-12-14 13:21:22 UTC
  • mfrom: (3.1.5 sid)
  • Revision ID: package-import@ubuntu.com-20111214132122-l4avyl2kkr7vq5aj
Tags: 3.3-1ubuntu1
* Merge with Debian; remaining changes:
  - Revert the ARM workaround.

Show diffs side-by-side

added added

removed removed

Lines of Context:
1
 
/*
2
 
 * Copyright (c) 2003, 2007-8 Matteo Frigo
3
 
 * Copyright (c) 2003, 2007-8 Massachusetts Institute of Technology
4
 
 *
5
 
 * This program is free software; you can redistribute it and/or modify
6
 
 * it under the terms of the GNU General Public License as published by
7
 
 * the Free Software Foundation; either version 2 of the License, or
8
 
 * (at your option) any later version.
9
 
 *
10
 
 * This program is distributed in the hope that it will be useful,
11
 
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12
 
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13
 
 * GNU General Public License for more details.
14
 
 *
15
 
 * You should have received a copy of the GNU General Public License
16
 
 * along with this program; if not, write to the Free Software
17
 
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
18
 
 *
19
 
 */
20
 
 
21
 
/* This file was automatically generated --- DO NOT EDIT */
22
 
/* Generated on Sun Jul 12 06:41:55 EDT 2009 */
23
 
 
24
 
#include "codelet-dft.h"
25
 
 
26
 
#ifdef HAVE_FMA
27
 
 
28
 
/* Generated by: ../../../genfft/gen_twiddle_c -fma -reorder-insns -schedule-for-pipeline -simd -compact -variables 4 -pipeline-latency 8 -n 16 -name t2fv_16 -include t2f.h */
29
 
 
30
 
/*
31
 
 * This function contains 87 FP additions, 64 FP multiplications,
32
 
 * (or, 53 additions, 30 multiplications, 34 fused multiply/add),
33
 
 * 61 stack variables, 3 constants, and 32 memory accesses
34
 
 */
35
 
#include "t2f.h"
36
 
 
37
 
static void t2fv_16(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms)
38
 
{
39
 
     DVK(KP923879532, +0.923879532511286756128183189396788286822416626);
40
 
     DVK(KP414213562, +0.414213562373095048801688724209698078569671875);
41
 
     DVK(KP707106781, +0.707106781186547524400844362104849039284835938);
42
 
     INT m;
43
 
     R *x;
44
 
     x = ri;
45
 
     for (m = mb, W = W + (mb * ((TWVL / VL) * 30)); m < me; m = m + VL, x = x + (VL * ms), W = W + (TWVL * 30), MAKE_VOLATILE_STRIDE(rs)) {
46
 
          V TO, Ta, TJ, TP, T14, Tq, T1i, T10, T1b, T1l, T13, T1c, TR, Tl, T15;
47
 
          V Tv;
48
 
          {
49
 
               V Tc, TW, T4, T19, T9, TD, TI, Tj, TZ, T1a, Te, Th, Tn, Tr, Tu;
50
 
               V Tp;
51
 
               {
52
 
                    V T1, T2, T5, T7;
53
 
                    T1 = LD(&(x[0]), ms, &(x[0]));
54
 
                    T2 = LD(&(x[WS(rs, 8)]), ms, &(x[0]));
55
 
                    T5 = LD(&(x[WS(rs, 4)]), ms, &(x[0]));
56
 
                    T7 = LD(&(x[WS(rs, 12)]), ms, &(x[0]));
57
 
                    {
58
 
                         V Tz, TG, TB, TE;
59
 
                         Tz = LD(&(x[WS(rs, 14)]), ms, &(x[0]));
60
 
                         TG = LD(&(x[WS(rs, 10)]), ms, &(x[0]));
61
 
                         TB = LD(&(x[WS(rs, 6)]), ms, &(x[0]));
62
 
                         TE = LD(&(x[WS(rs, 2)]), ms, &(x[0]));
63
 
                         {
64
 
                              V Ti, TY, TX, Td, Tg, Tm, Tt, To;
65
 
                              {
66
 
                                   V T3, T6, T8, TA, TH, TC, TF, Tb;
67
 
                                   Tb = LD(&(x[WS(rs, 1)]), ms, &(x[WS(rs, 1)]));
68
 
                                   T3 = BYTWJ(&(W[TWVL * 14]), T2);
69
 
                                   T6 = BYTWJ(&(W[TWVL * 6]), T5);
70
 
                                   T8 = BYTWJ(&(W[TWVL * 22]), T7);
71
 
                                   TA = BYTWJ(&(W[TWVL * 26]), Tz);
72
 
                                   TH = BYTWJ(&(W[TWVL * 18]), TG);
73
 
                                   TC = BYTWJ(&(W[TWVL * 10]), TB);
74
 
                                   TF = BYTWJ(&(W[TWVL * 2]), TE);
75
 
                                   Tc = BYTWJ(&(W[0]), Tb);
76
 
                                   TW = VSUB(T1, T3);
77
 
                                   T4 = VADD(T1, T3);
78
 
                                   T19 = VSUB(T6, T8);
79
 
                                   T9 = VADD(T6, T8);
80
 
                                   Ti = LD(&(x[WS(rs, 13)]), ms, &(x[WS(rs, 1)]));
81
 
                                   TD = VADD(TA, TC);
82
 
                                   TY = VSUB(TA, TC);
83
 
                                   TI = VADD(TF, TH);
84
 
                                   TX = VSUB(TF, TH);
85
 
                              }
86
 
                              Td = LD(&(x[WS(rs, 9)]), ms, &(x[WS(rs, 1)]));
87
 
                              Tg = LD(&(x[WS(rs, 5)]), ms, &(x[WS(rs, 1)]));
88
 
                              Tm = LD(&(x[WS(rs, 15)]), ms, &(x[WS(rs, 1)]));
89
 
                              Tj = BYTWJ(&(W[TWVL * 24]), Ti);
90
 
                              Tt = LD(&(x[WS(rs, 11)]), ms, &(x[WS(rs, 1)]));
91
 
                              To = LD(&(x[WS(rs, 7)]), ms, &(x[WS(rs, 1)]));
92
 
                              TZ = VADD(TX, TY);
93
 
                              T1a = VSUB(TY, TX);
94
 
                              Te = BYTWJ(&(W[TWVL * 16]), Td);
95
 
                              Th = BYTWJ(&(W[TWVL * 8]), Tg);
96
 
                              Tn = BYTWJ(&(W[TWVL * 28]), Tm);
97
 
                              Tr = LD(&(x[WS(rs, 3)]), ms, &(x[WS(rs, 1)]));
98
 
                              Tu = BYTWJ(&(W[TWVL * 20]), Tt);
99
 
                              Tp = BYTWJ(&(W[TWVL * 12]), To);
100
 
                         }
101
 
                    }
102
 
               }
103
 
               {
104
 
                    V Tf, T11, Tk, T12, Ts;
105
 
                    TO = VADD(T4, T9);
106
 
                    Ta = VSUB(T4, T9);
107
 
                    TJ = VSUB(TD, TI);
108
 
                    TP = VADD(TI, TD);
109
 
                    Tf = VADD(Tc, Te);
110
 
                    T11 = VSUB(Tc, Te);
111
 
                    Tk = VADD(Th, Tj);
112
 
                    T12 = VSUB(Th, Tj);
113
 
                    Ts = BYTWJ(&(W[TWVL * 4]), Tr);
114
 
                    T14 = VSUB(Tn, Tp);
115
 
                    Tq = VADD(Tn, Tp);
116
 
                    T1i = VFNMS(LDK(KP707106781), TZ, TW);
117
 
                    T10 = VFMA(LDK(KP707106781), TZ, TW);
118
 
                    T1b = VFNMS(LDK(KP707106781), T1a, T19);
119
 
                    T1l = VFMA(LDK(KP707106781), T1a, T19);
120
 
                    T13 = VFNMS(LDK(KP414213562), T12, T11);
121
 
                    T1c = VFMA(LDK(KP414213562), T11, T12);
122
 
                    TR = VADD(Tf, Tk);
123
 
                    Tl = VSUB(Tf, Tk);
124
 
                    T15 = VSUB(Tu, Ts);
125
 
                    Tv = VADD(Ts, Tu);
126
 
               }
127
 
          }
128
 
          {
129
 
               V T1d, T16, TS, Tw, TU, TQ;
130
 
               T1d = VFMA(LDK(KP414213562), T14, T15);
131
 
               T16 = VFNMS(LDK(KP414213562), T15, T14);
132
 
               TS = VADD(Tq, Tv);
133
 
               Tw = VSUB(Tq, Tv);
134
 
               TU = VSUB(TO, TP);
135
 
               TQ = VADD(TO, TP);
136
 
               {
137
 
                    V T1e, T1j, T17, T1m;
138
 
                    T1e = VSUB(T1c, T1d);
139
 
                    T1j = VADD(T1c, T1d);
140
 
                    T17 = VADD(T13, T16);
141
 
                    T1m = VSUB(T16, T13);
142
 
                    {
143
 
                         V TV, TT, TK, Tx;
144
 
                         TV = VSUB(TS, TR);
145
 
                         TT = VADD(TR, TS);
146
 
                         TK = VSUB(Tw, Tl);
147
 
                         Tx = VADD(Tl, Tw);
148
 
                         {
149
 
                              V T1h, T1f, T1o, T1k;
150
 
                              T1h = VFMA(LDK(KP923879532), T1e, T1b);
151
 
                              T1f = VFNMS(LDK(KP923879532), T1e, T1b);
152
 
                              T1o = VFMA(LDK(KP923879532), T1j, T1i);
153
 
                              T1k = VFNMS(LDK(KP923879532), T1j, T1i);
154
 
                              {
155
 
                                   V T1g, T18, T1p, T1n;
156
 
                                   T1g = VFMA(LDK(KP923879532), T17, T10);
157
 
                                   T18 = VFNMS(LDK(KP923879532), T17, T10);
158
 
                                   T1p = VFMA(LDK(KP923879532), T1m, T1l);
159
 
                                   T1n = VFNMS(LDK(KP923879532), T1m, T1l);
160
 
                                   ST(&(x[WS(rs, 12)]), VFNMSI(TV, TU), ms, &(x[0]));
161
 
                                   ST(&(x[WS(rs, 4)]), VFMAI(TV, TU), ms, &(x[0]));
162
 
                                   ST(&(x[0]), VADD(TQ, TT), ms, &(x[0]));
163
 
                                   ST(&(x[WS(rs, 8)]), VSUB(TQ, TT), ms, &(x[0]));
164
 
                                   {
165
 
                                        V TN, TL, TM, Ty;
166
 
                                        TN = VFMA(LDK(KP707106781), TK, TJ);
167
 
                                        TL = VFNMS(LDK(KP707106781), TK, TJ);
168
 
                                        TM = VFMA(LDK(KP707106781), Tx, Ta);
169
 
                                        Ty = VFNMS(LDK(KP707106781), Tx, Ta);
170
 
                                        ST(&(x[WS(rs, 1)]), VFNMSI(T1h, T1g), ms, &(x[WS(rs, 1)]));
171
 
                                        ST(&(x[WS(rs, 15)]), VFMAI(T1h, T1g), ms, &(x[WS(rs, 1)]));
172
 
                                        ST(&(x[WS(rs, 7)]), VFMAI(T1f, T18), ms, &(x[WS(rs, 1)]));
173
 
                                        ST(&(x[WS(rs, 9)]), VFNMSI(T1f, T18), ms, &(x[WS(rs, 1)]));
174
 
                                        ST(&(x[WS(rs, 3)]), VFMAI(T1p, T1o), ms, &(x[WS(rs, 1)]));
175
 
                                        ST(&(x[WS(rs, 13)]), VFNMSI(T1p, T1o), ms, &(x[WS(rs, 1)]));
176
 
                                        ST(&(x[WS(rs, 11)]), VFMAI(T1n, T1k), ms, &(x[WS(rs, 1)]));
177
 
                                        ST(&(x[WS(rs, 5)]), VFNMSI(T1n, T1k), ms, &(x[WS(rs, 1)]));
178
 
                                        ST(&(x[WS(rs, 14)]), VFNMSI(TN, TM), ms, &(x[0]));
179
 
                                        ST(&(x[WS(rs, 2)]), VFMAI(TN, TM), ms, &(x[0]));
180
 
                                        ST(&(x[WS(rs, 10)]), VFMAI(TL, Ty), ms, &(x[0]));
181
 
                                        ST(&(x[WS(rs, 6)]), VFNMSI(TL, Ty), ms, &(x[0]));
182
 
                                   }
183
 
                              }
184
 
                         }
185
 
                    }
186
 
               }
187
 
          }
188
 
     }
189
 
}
190
 
 
191
 
static const tw_instr twinstr[] = {
192
 
     VTW(0, 1),
193
 
     VTW(0, 2),
194
 
     VTW(0, 3),
195
 
     VTW(0, 4),
196
 
     VTW(0, 5),
197
 
     VTW(0, 6),
198
 
     VTW(0, 7),
199
 
     VTW(0, 8),
200
 
     VTW(0, 9),
201
 
     VTW(0, 10),
202
 
     VTW(0, 11),
203
 
     VTW(0, 12),
204
 
     VTW(0, 13),
205
 
     VTW(0, 14),
206
 
     VTW(0, 15),
207
 
     {TW_NEXT, VL, 0}
208
 
};
209
 
 
210
 
static const ct_desc desc = { 16, "t2fv_16", twinstr, &GENUS, {53, 30, 34, 0}, 0, 0, 0 };
211
 
 
212
 
void X(codelet_t2fv_16) (planner *p) {
213
 
     X(kdft_dit_register) (p, t2fv_16, &desc);
214
 
}
215
 
#else                           /* HAVE_FMA */
216
 
 
217
 
/* Generated by: ../../../genfft/gen_twiddle_c -simd -compact -variables 4 -pipeline-latency 8 -n 16 -name t2fv_16 -include t2f.h */
218
 
 
219
 
/*
220
 
 * This function contains 87 FP additions, 42 FP multiplications,
221
 
 * (or, 83 additions, 38 multiplications, 4 fused multiply/add),
222
 
 * 36 stack variables, 3 constants, and 32 memory accesses
223
 
 */
224
 
#include "t2f.h"
225
 
 
226
 
static void t2fv_16(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms)
227
 
{
228
 
     DVK(KP923879532, +0.923879532511286756128183189396788286822416626);
229
 
     DVK(KP382683432, +0.382683432365089771728459984030398866761344562);
230
 
     DVK(KP707106781, +0.707106781186547524400844362104849039284835938);
231
 
     INT m;
232
 
     R *x;
233
 
     x = ri;
234
 
     for (m = mb, W = W + (mb * ((TWVL / VL) * 30)); m < me; m = m + VL, x = x + (VL * ms), W = W + (TWVL * 30), MAKE_VOLATILE_STRIDE(rs)) {
235
 
          V TJ, T10, TD, T11, T1b, T1c, Ty, TK, T16, T17, T18, Tb, TN, T13, T14;
236
 
          V T15, Tm, TM, TG, TI, TH;
237
 
          TG = LD(&(x[0]), ms, &(x[0]));
238
 
          TH = LD(&(x[WS(rs, 8)]), ms, &(x[0]));
239
 
          TI = BYTWJ(&(W[TWVL * 14]), TH);
240
 
          TJ = VSUB(TG, TI);
241
 
          T10 = VADD(TG, TI);
242
 
          {
243
 
               V TA, TC, Tz, TB;
244
 
               Tz = LD(&(x[WS(rs, 4)]), ms, &(x[0]));
245
 
               TA = BYTWJ(&(W[TWVL * 6]), Tz);
246
 
               TB = LD(&(x[WS(rs, 12)]), ms, &(x[0]));
247
 
               TC = BYTWJ(&(W[TWVL * 22]), TB);
248
 
               TD = VSUB(TA, TC);
249
 
               T11 = VADD(TA, TC);
250
 
          }
251
 
          {
252
 
               V Tp, Tw, Tr, Tu, Ts, Tx;
253
 
               {
254
 
                    V To, Tv, Tq, Tt;
255
 
                    To = LD(&(x[WS(rs, 14)]), ms, &(x[0]));
256
 
                    Tp = BYTWJ(&(W[TWVL * 26]), To);
257
 
                    Tv = LD(&(x[WS(rs, 10)]), ms, &(x[0]));
258
 
                    Tw = BYTWJ(&(W[TWVL * 18]), Tv);
259
 
                    Tq = LD(&(x[WS(rs, 6)]), ms, &(x[0]));
260
 
                    Tr = BYTWJ(&(W[TWVL * 10]), Tq);
261
 
                    Tt = LD(&(x[WS(rs, 2)]), ms, &(x[0]));
262
 
                    Tu = BYTWJ(&(W[TWVL * 2]), Tt);
263
 
               }
264
 
               T1b = VADD(Tp, Tr);
265
 
               T1c = VADD(Tu, Tw);
266
 
               Ts = VSUB(Tp, Tr);
267
 
               Tx = VSUB(Tu, Tw);
268
 
               Ty = VMUL(LDK(KP707106781), VSUB(Ts, Tx));
269
 
               TK = VMUL(LDK(KP707106781), VADD(Tx, Ts));
270
 
          }
271
 
          {
272
 
               V T2, T9, T4, T7, T5, Ta;
273
 
               {
274
 
                    V T1, T8, T3, T6;
275
 
                    T1 = LD(&(x[WS(rs, 15)]), ms, &(x[WS(rs, 1)]));
276
 
                    T2 = BYTWJ(&(W[TWVL * 28]), T1);
277
 
                    T8 = LD(&(x[WS(rs, 11)]), ms, &(x[WS(rs, 1)]));
278
 
                    T9 = BYTWJ(&(W[TWVL * 20]), T8);
279
 
                    T3 = LD(&(x[WS(rs, 7)]), ms, &(x[WS(rs, 1)]));
280
 
                    T4 = BYTWJ(&(W[TWVL * 12]), T3);
281
 
                    T6 = LD(&(x[WS(rs, 3)]), ms, &(x[WS(rs, 1)]));
282
 
                    T7 = BYTWJ(&(W[TWVL * 4]), T6);
283
 
               }
284
 
               T16 = VADD(T2, T4);
285
 
               T17 = VADD(T7, T9);
286
 
               T18 = VSUB(T16, T17);
287
 
               T5 = VSUB(T2, T4);
288
 
               Ta = VSUB(T7, T9);
289
 
               Tb = VFNMS(LDK(KP923879532), Ta, VMUL(LDK(KP382683432), T5));
290
 
               TN = VFMA(LDK(KP923879532), T5, VMUL(LDK(KP382683432), Ta));
291
 
          }
292
 
          {
293
 
               V Td, Tk, Tf, Ti, Tg, Tl;
294
 
               {
295
 
                    V Tc, Tj, Te, Th;
296
 
                    Tc = LD(&(x[WS(rs, 1)]), ms, &(x[WS(rs, 1)]));
297
 
                    Td = BYTWJ(&(W[0]), Tc);
298
 
                    Tj = LD(&(x[WS(rs, 13)]), ms, &(x[WS(rs, 1)]));
299
 
                    Tk = BYTWJ(&(W[TWVL * 24]), Tj);
300
 
                    Te = LD(&(x[WS(rs, 9)]), ms, &(x[WS(rs, 1)]));
301
 
                    Tf = BYTWJ(&(W[TWVL * 16]), Te);
302
 
                    Th = LD(&(x[WS(rs, 5)]), ms, &(x[WS(rs, 1)]));
303
 
                    Ti = BYTWJ(&(W[TWVL * 8]), Th);
304
 
               }
305
 
               T13 = VADD(Td, Tf);
306
 
               T14 = VADD(Ti, Tk);
307
 
               T15 = VSUB(T13, T14);
308
 
               Tg = VSUB(Td, Tf);
309
 
               Tl = VSUB(Ti, Tk);
310
 
               Tm = VFMA(LDK(KP382683432), Tg, VMUL(LDK(KP923879532), Tl));
311
 
               TM = VFNMS(LDK(KP382683432), Tl, VMUL(LDK(KP923879532), Tg));
312
 
          }
313
 
          {
314
 
               V T1a, T1g, T1f, T1h;
315
 
               {
316
 
                    V T12, T19, T1d, T1e;
317
 
                    T12 = VSUB(T10, T11);
318
 
                    T19 = VMUL(LDK(KP707106781), VADD(T15, T18));
319
 
                    T1a = VADD(T12, T19);
320
 
                    T1g = VSUB(T12, T19);
321
 
                    T1d = VSUB(T1b, T1c);
322
 
                    T1e = VMUL(LDK(KP707106781), VSUB(T18, T15));
323
 
                    T1f = VBYI(VADD(T1d, T1e));
324
 
                    T1h = VBYI(VSUB(T1e, T1d));
325
 
               }
326
 
               ST(&(x[WS(rs, 14)]), VSUB(T1a, T1f), ms, &(x[0]));
327
 
               ST(&(x[WS(rs, 6)]), VADD(T1g, T1h), ms, &(x[0]));
328
 
               ST(&(x[WS(rs, 2)]), VADD(T1a, T1f), ms, &(x[0]));
329
 
               ST(&(x[WS(rs, 10)]), VSUB(T1g, T1h), ms, &(x[0]));
330
 
          }
331
 
          {
332
 
               V T1k, T1o, T1n, T1p;
333
 
               {
334
 
                    V T1i, T1j, T1l, T1m;
335
 
                    T1i = VADD(T10, T11);
336
 
                    T1j = VADD(T1c, T1b);
337
 
                    T1k = VADD(T1i, T1j);
338
 
                    T1o = VSUB(T1i, T1j);
339
 
                    T1l = VADD(T13, T14);
340
 
                    T1m = VADD(T16, T17);
341
 
                    T1n = VADD(T1l, T1m);
342
 
                    T1p = VBYI(VSUB(T1m, T1l));
343
 
               }
344
 
               ST(&(x[WS(rs, 8)]), VSUB(T1k, T1n), ms, &(x[0]));
345
 
               ST(&(x[WS(rs, 4)]), VADD(T1o, T1p), ms, &(x[0]));
346
 
               ST(&(x[0]), VADD(T1k, T1n), ms, &(x[0]));
347
 
               ST(&(x[WS(rs, 12)]), VSUB(T1o, T1p), ms, &(x[0]));
348
 
          }
349
 
          {
350
 
               V TF, TQ, TP, TR;
351
 
               {
352
 
                    V Tn, TE, TL, TO;
353
 
                    Tn = VSUB(Tb, Tm);
354
 
                    TE = VSUB(Ty, TD);
355
 
                    TF = VBYI(VSUB(Tn, TE));
356
 
                    TQ = VBYI(VADD(TE, Tn));
357
 
                    TL = VADD(TJ, TK);
358
 
                    TO = VADD(TM, TN);
359
 
                    TP = VSUB(TL, TO);
360
 
                    TR = VADD(TL, TO);
361
 
               }
362
 
               ST(&(x[WS(rs, 7)]), VADD(TF, TP), ms, &(x[WS(rs, 1)]));
363
 
               ST(&(x[WS(rs, 15)]), VSUB(TR, TQ), ms, &(x[WS(rs, 1)]));
364
 
               ST(&(x[WS(rs, 9)]), VSUB(TP, TF), ms, &(x[WS(rs, 1)]));
365
 
               ST(&(x[WS(rs, 1)]), VADD(TQ, TR), ms, &(x[WS(rs, 1)]));
366
 
          }
367
 
          {
368
 
               V TU, TY, TX, TZ;
369
 
               {
370
 
                    V TS, TT, TV, TW;
371
 
                    TS = VSUB(TJ, TK);
372
 
                    TT = VADD(Tm, Tb);
373
 
                    TU = VADD(TS, TT);
374
 
                    TY = VSUB(TS, TT);
375
 
                    TV = VADD(TD, Ty);
376
 
                    TW = VSUB(TN, TM);
377
 
                    TX = VBYI(VADD(TV, TW));
378
 
                    TZ = VBYI(VSUB(TW, TV));
379
 
               }
380
 
               ST(&(x[WS(rs, 13)]), VSUB(TU, TX), ms, &(x[WS(rs, 1)]));
381
 
               ST(&(x[WS(rs, 5)]), VADD(TY, TZ), ms, &(x[WS(rs, 1)]));
382
 
               ST(&(x[WS(rs, 3)]), VADD(TU, TX), ms, &(x[WS(rs, 1)]));
383
 
               ST(&(x[WS(rs, 11)]), VSUB(TY, TZ), ms, &(x[WS(rs, 1)]));
384
 
          }
385
 
     }
386
 
}
387
 
 
388
 
static const tw_instr twinstr[] = {
389
 
     VTW(0, 1),
390
 
     VTW(0, 2),
391
 
     VTW(0, 3),
392
 
     VTW(0, 4),
393
 
     VTW(0, 5),
394
 
     VTW(0, 6),
395
 
     VTW(0, 7),
396
 
     VTW(0, 8),
397
 
     VTW(0, 9),
398
 
     VTW(0, 10),
399
 
     VTW(0, 11),
400
 
     VTW(0, 12),
401
 
     VTW(0, 13),
402
 
     VTW(0, 14),
403
 
     VTW(0, 15),
404
 
     {TW_NEXT, VL, 0}
405
 
};
406
 
 
407
 
static const ct_desc desc = { 16, "t2fv_16", twinstr, &GENUS, {83, 38, 4, 0}, 0, 0, 0 };
408
 
 
409
 
void X(codelet_t2fv_16) (planner *p) {
410
 
     X(kdft_dit_register) (p, t2fv_16, &desc);
411
 
}
412
 
#endif                          /* HAVE_FMA */