~ubuntu-branches/ubuntu/maverick/blender/maverick

« back to all changes in this revision

Viewing changes to extern/fftw/dft/simd/codelets/t2fv_16.c

  • Committer: Bazaar Package Importer
  • Author(s): Khashayar Naderehvandi, Khashayar Naderehvandi, Alessio Treglia
  • Date: 2009-01-22 16:53:59 UTC
  • mfrom: (14.1.1 experimental)
  • Revision ID: james.westby@ubuntu.com-20090122165359-v0996tn7fbit64ni
Tags: 2.48a+dfsg-1ubuntu1
[ Khashayar Naderehvandi ]
* Merge from debian experimental (LP: #320045), Ubuntu remaining changes:
  - Add patch correcting header file locations.
  - Add libvorbis-dev and libgsm1-dev to Build-Depends.
  - Use avcodec_decode_audio2() in source/blender/src/hddaudio.c

[ Alessio Treglia ]
* Add missing previous changelog entries.

Show diffs side-by-side

added added

removed removed

Lines of Context:
 
1
/*
 
2
 * Copyright (c) 2003, 2006 Matteo Frigo
 
3
 * Copyright (c) 2003, 2006 Massachusetts Institute of Technology
 
4
 *
 
5
 * This program is free software; you can redistribute it and/or modify
 
6
 * it under the terms of the GNU General Public License as published by
 
7
 * the Free Software Foundation; either version 2 of the License, or
 
8
 * (at your option) any later version.
 
9
 *
 
10
 * This program is distributed in the hope that it will be useful,
 
11
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 
12
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 
13
 * GNU General Public License for more details.
 
14
 *
 
15
 * You should have received a copy of the GNU General Public License
 
16
 * along with this program; if not, write to the Free Software
 
17
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 
18
 *
 
19
 */
 
20
 
 
21
/* This file was automatically generated --- DO NOT EDIT */
 
22
/* Generated on Sat Jul  1 14:55:08 EDT 2006 */
 
23
 
 
24
#include "codelet-dft.h"
 
25
 
 
26
#ifdef HAVE_FMA
 
27
 
 
28
/* Generated by: ../../../genfft/gen_twiddle_c -fma -reorder-insns -schedule-for-pipeline -simd -compact -variables 4 -pipeline-latency 8 -n 16 -name t2fv_16 -include t2f.h */
 
29
 
 
30
/*
 
31
 * This function contains 87 FP additions, 64 FP multiplications,
 
32
 * (or, 53 additions, 30 multiplications, 34 fused multiply/add),
 
33
 * 61 stack variables, and 32 memory accesses
 
34
 */
 
35
/*
 
36
 * Generator Id's : 
 
37
 * $Id: algsimp.ml,v 1.9 2006-02-12 23:34:12 athena Exp $
 
38
 * $Id: fft.ml,v 1.4 2006-01-05 03:04:27 stevenj Exp $
 
39
 * $Id: gen_twiddle_c.ml,v 1.14 2006-02-12 23:34:12 athena Exp $
 
40
 */
 
41
 
 
42
#include "t2f.h"
 
43
 
 
44
static const R *t2fv_16(R *ri, R *ii, const R *W, stride ios, INT m, INT dist)
 
45
{
 
46
     DVK(KP923879532, +0.923879532511286756128183189396788286822416626);
 
47
     DVK(KP414213562, +0.414213562373095048801688724209698078569671875);
 
48
     DVK(KP707106781, +0.707106781186547524400844362104849039284835938);
 
49
     INT i;
 
50
     R *x;
 
51
     x = ri;
 
52
     for (i = m; i > 0; i = i - VL, x = x + (VL * dist), W = W + (TWVL * 30), MAKE_VOLATILE_STRIDE(ios)) {
 
53
          V TO, Ta, TJ, TP, T14, Tq, T1i, T10, T1b, T1l, T13, T1c, TR, Tl, T15;
 
54
          V Tv;
 
55
          {
 
56
               V Tc, TW, T4, T19, T9, TD, TI, Tj, TZ, T1a, Te, Th, Tn, Tr, Tu;
 
57
               V Tp;
 
58
               {
 
59
                    V T1, T2, T5, T7;
 
60
                    T1 = LD(&(x[0]), dist, &(x[0]));
 
61
                    T2 = LD(&(x[WS(ios, 8)]), dist, &(x[0]));
 
62
                    T5 = LD(&(x[WS(ios, 4)]), dist, &(x[0]));
 
63
                    T7 = LD(&(x[WS(ios, 12)]), dist, &(x[0]));
 
64
                    {
 
65
                         V Tz, TG, TB, TE;
 
66
                         Tz = LD(&(x[WS(ios, 14)]), dist, &(x[0]));
 
67
                         TG = LD(&(x[WS(ios, 10)]), dist, &(x[0]));
 
68
                         TB = LD(&(x[WS(ios, 6)]), dist, &(x[0]));
 
69
                         TE = LD(&(x[WS(ios, 2)]), dist, &(x[0]));
 
70
                         {
 
71
                              V Ti, TY, TX, Td, Tg, Tm, Tt, To;
 
72
                              {
 
73
                                   V T3, T6, T8, TA, TH, TC, TF, Tb;
 
74
                                   Tb = LD(&(x[WS(ios, 1)]), dist, &(x[WS(ios, 1)]));
 
75
                                   T3 = BYTWJ(&(W[TWVL * 14]), T2);
 
76
                                   T6 = BYTWJ(&(W[TWVL * 6]), T5);
 
77
                                   T8 = BYTWJ(&(W[TWVL * 22]), T7);
 
78
                                   TA = BYTWJ(&(W[TWVL * 26]), Tz);
 
79
                                   TH = BYTWJ(&(W[TWVL * 18]), TG);
 
80
                                   TC = BYTWJ(&(W[TWVL * 10]), TB);
 
81
                                   TF = BYTWJ(&(W[TWVL * 2]), TE);
 
82
                                   Tc = BYTWJ(&(W[0]), Tb);
 
83
                                   TW = VSUB(T1, T3);
 
84
                                   T4 = VADD(T1, T3);
 
85
                                   T19 = VSUB(T6, T8);
 
86
                                   T9 = VADD(T6, T8);
 
87
                                   Ti = LD(&(x[WS(ios, 13)]), dist, &(x[WS(ios, 1)]));
 
88
                                   TD = VADD(TA, TC);
 
89
                                   TY = VSUB(TA, TC);
 
90
                                   TI = VADD(TF, TH);
 
91
                                   TX = VSUB(TF, TH);
 
92
                              }
 
93
                              Td = LD(&(x[WS(ios, 9)]), dist, &(x[WS(ios, 1)]));
 
94
                              Tg = LD(&(x[WS(ios, 5)]), dist, &(x[WS(ios, 1)]));
 
95
                              Tm = LD(&(x[WS(ios, 15)]), dist, &(x[WS(ios, 1)]));
 
96
                              Tj = BYTWJ(&(W[TWVL * 24]), Ti);
 
97
                              Tt = LD(&(x[WS(ios, 11)]), dist, &(x[WS(ios, 1)]));
 
98
                              To = LD(&(x[WS(ios, 7)]), dist, &(x[WS(ios, 1)]));
 
99
                              TZ = VADD(TX, TY);
 
100
                              T1a = VSUB(TY, TX);
 
101
                              Te = BYTWJ(&(W[TWVL * 16]), Td);
 
102
                              Th = BYTWJ(&(W[TWVL * 8]), Tg);
 
103
                              Tn = BYTWJ(&(W[TWVL * 28]), Tm);
 
104
                              Tr = LD(&(x[WS(ios, 3)]), dist, &(x[WS(ios, 1)]));
 
105
                              Tu = BYTWJ(&(W[TWVL * 20]), Tt);
 
106
                              Tp = BYTWJ(&(W[TWVL * 12]), To);
 
107
                         }
 
108
                    }
 
109
               }
 
110
               {
 
111
                    V Tf, T11, Tk, T12, Ts;
 
112
                    TO = VADD(T4, T9);
 
113
                    Ta = VSUB(T4, T9);
 
114
                    TJ = VSUB(TD, TI);
 
115
                    TP = VADD(TI, TD);
 
116
                    Tf = VADD(Tc, Te);
 
117
                    T11 = VSUB(Tc, Te);
 
118
                    Tk = VADD(Th, Tj);
 
119
                    T12 = VSUB(Th, Tj);
 
120
                    Ts = BYTWJ(&(W[TWVL * 4]), Tr);
 
121
                    T14 = VSUB(Tn, Tp);
 
122
                    Tq = VADD(Tn, Tp);
 
123
                    T1i = VFNMS(LDK(KP707106781), TZ, TW);
 
124
                    T10 = VFMA(LDK(KP707106781), TZ, TW);
 
125
                    T1b = VFNMS(LDK(KP707106781), T1a, T19);
 
126
                    T1l = VFMA(LDK(KP707106781), T1a, T19);
 
127
                    T13 = VFNMS(LDK(KP414213562), T12, T11);
 
128
                    T1c = VFMA(LDK(KP414213562), T11, T12);
 
129
                    TR = VADD(Tf, Tk);
 
130
                    Tl = VSUB(Tf, Tk);
 
131
                    T15 = VSUB(Tu, Ts);
 
132
                    Tv = VADD(Ts, Tu);
 
133
               }
 
134
          }
 
135
          {
 
136
               V T1d, T16, TS, Tw, TU, TQ;
 
137
               T1d = VFMA(LDK(KP414213562), T14, T15);
 
138
               T16 = VFNMS(LDK(KP414213562), T15, T14);
 
139
               TS = VADD(Tq, Tv);
 
140
               Tw = VSUB(Tq, Tv);
 
141
               TU = VSUB(TO, TP);
 
142
               TQ = VADD(TO, TP);
 
143
               {
 
144
                    V T1e, T1j, T17, T1m;
 
145
                    T1e = VSUB(T1c, T1d);
 
146
                    T1j = VADD(T1c, T1d);
 
147
                    T17 = VADD(T13, T16);
 
148
                    T1m = VSUB(T16, T13);
 
149
                    {
 
150
                         V TV, TT, TK, Tx;
 
151
                         TV = VSUB(TS, TR);
 
152
                         TT = VADD(TR, TS);
 
153
                         TK = VSUB(Tw, Tl);
 
154
                         Tx = VADD(Tl, Tw);
 
155
                         {
 
156
                              V T1h, T1f, T1o, T1k;
 
157
                              T1h = VFMA(LDK(KP923879532), T1e, T1b);
 
158
                              T1f = VFNMS(LDK(KP923879532), T1e, T1b);
 
159
                              T1o = VFMA(LDK(KP923879532), T1j, T1i);
 
160
                              T1k = VFNMS(LDK(KP923879532), T1j, T1i);
 
161
                              {
 
162
                                   V T1g, T18, T1p, T1n;
 
163
                                   T1g = VFMA(LDK(KP923879532), T17, T10);
 
164
                                   T18 = VFNMS(LDK(KP923879532), T17, T10);
 
165
                                   T1p = VFMA(LDK(KP923879532), T1m, T1l);
 
166
                                   T1n = VFNMS(LDK(KP923879532), T1m, T1l);
 
167
                                   ST(&(x[WS(ios, 12)]), VFNMSI(TV, TU), dist, &(x[0]));
 
168
                                   ST(&(x[WS(ios, 4)]), VFMAI(TV, TU), dist, &(x[0]));
 
169
                                   ST(&(x[0]), VADD(TQ, TT), dist, &(x[0]));
 
170
                                   ST(&(x[WS(ios, 8)]), VSUB(TQ, TT), dist, &(x[0]));
 
171
                                   {
 
172
                                        V TN, TL, TM, Ty;
 
173
                                        TN = VFMA(LDK(KP707106781), TK, TJ);
 
174
                                        TL = VFNMS(LDK(KP707106781), TK, TJ);
 
175
                                        TM = VFMA(LDK(KP707106781), Tx, Ta);
 
176
                                        Ty = VFNMS(LDK(KP707106781), Tx, Ta);
 
177
                                        ST(&(x[WS(ios, 1)]), VFNMSI(T1h, T1g), dist, &(x[WS(ios, 1)]));
 
178
                                        ST(&(x[WS(ios, 15)]), VFMAI(T1h, T1g), dist, &(x[WS(ios, 1)]));
 
179
                                        ST(&(x[WS(ios, 7)]), VFMAI(T1f, T18), dist, &(x[WS(ios, 1)]));
 
180
                                        ST(&(x[WS(ios, 9)]), VFNMSI(T1f, T18), dist, &(x[WS(ios, 1)]));
 
181
                                        ST(&(x[WS(ios, 3)]), VFMAI(T1p, T1o), dist, &(x[WS(ios, 1)]));
 
182
                                        ST(&(x[WS(ios, 13)]), VFNMSI(T1p, T1o), dist, &(x[WS(ios, 1)]));
 
183
                                        ST(&(x[WS(ios, 11)]), VFMAI(T1n, T1k), dist, &(x[WS(ios, 1)]));
 
184
                                        ST(&(x[WS(ios, 5)]), VFNMSI(T1n, T1k), dist, &(x[WS(ios, 1)]));
 
185
                                        ST(&(x[WS(ios, 14)]), VFNMSI(TN, TM), dist, &(x[0]));
 
186
                                        ST(&(x[WS(ios, 2)]), VFMAI(TN, TM), dist, &(x[0]));
 
187
                                        ST(&(x[WS(ios, 10)]), VFMAI(TL, Ty), dist, &(x[0]));
 
188
                                        ST(&(x[WS(ios, 6)]), VFNMSI(TL, Ty), dist, &(x[0]));
 
189
                                   }
 
190
                              }
 
191
                         }
 
192
                    }
 
193
               }
 
194
          }
 
195
     }
 
196
     return W;
 
197
}
 
198
 
 
199
static const tw_instr twinstr[] = {
 
200
     VTW(1),
 
201
     VTW(2),
 
202
     VTW(3),
 
203
     VTW(4),
 
204
     VTW(5),
 
205
     VTW(6),
 
206
     VTW(7),
 
207
     VTW(8),
 
208
     VTW(9),
 
209
     VTW(10),
 
210
     VTW(11),
 
211
     VTW(12),
 
212
     VTW(13),
 
213
     VTW(14),
 
214
     VTW(15),
 
215
     {TW_NEXT, VL, 0}
 
216
};
 
217
 
 
218
static const ct_desc desc = { 16, "t2fv_16", twinstr, &GENUS, {53, 30, 34, 0}, 0, 0, 0 };
 
219
 
 
220
void X(codelet_t2fv_16) (planner *p) {
 
221
     X(kdft_dit_register) (p, t2fv_16, &desc);
 
222
}
 
223
#else                           /* HAVE_FMA */
 
224
 
 
225
/* Generated by: ../../../genfft/gen_twiddle_c -simd -compact -variables 4 -pipeline-latency 8 -n 16 -name t2fv_16 -include t2f.h */
 
226
 
 
227
/*
 
228
 * This function contains 87 FP additions, 42 FP multiplications,
 
229
 * (or, 83 additions, 38 multiplications, 4 fused multiply/add),
 
230
 * 36 stack variables, and 32 memory accesses
 
231
 */
 
232
/*
 
233
 * Generator Id's : 
 
234
 * $Id: algsimp.ml,v 1.9 2006-02-12 23:34:12 athena Exp $
 
235
 * $Id: fft.ml,v 1.4 2006-01-05 03:04:27 stevenj Exp $
 
236
 * $Id: gen_twiddle_c.ml,v 1.14 2006-02-12 23:34:12 athena Exp $
 
237
 */
 
238
 
 
239
#include "t2f.h"
 
240
 
 
241
static const R *t2fv_16(R *ri, R *ii, const R *W, stride ios, INT m, INT dist)
 
242
{
 
243
     DVK(KP923879532, +0.923879532511286756128183189396788286822416626);
 
244
     DVK(KP382683432, +0.382683432365089771728459984030398866761344562);
 
245
     DVK(KP707106781, +0.707106781186547524400844362104849039284835938);
 
246
     INT i;
 
247
     R *x;
 
248
     x = ri;
 
249
     for (i = m; i > 0; i = i - VL, x = x + (VL * dist), W = W + (TWVL * 30), MAKE_VOLATILE_STRIDE(ios)) {
 
250
          V TJ, T10, TD, T11, T1b, T1c, Ty, TK, T16, T17, T18, Tb, TN, T13, T14;
 
251
          V T15, Tm, TM, TG, TI, TH;
 
252
          TG = LD(&(x[0]), dist, &(x[0]));
 
253
          TH = LD(&(x[WS(ios, 8)]), dist, &(x[0]));
 
254
          TI = BYTWJ(&(W[TWVL * 14]), TH);
 
255
          TJ = VSUB(TG, TI);
 
256
          T10 = VADD(TG, TI);
 
257
          {
 
258
               V TA, TC, Tz, TB;
 
259
               Tz = LD(&(x[WS(ios, 4)]), dist, &(x[0]));
 
260
               TA = BYTWJ(&(W[TWVL * 6]), Tz);
 
261
               TB = LD(&(x[WS(ios, 12)]), dist, &(x[0]));
 
262
               TC = BYTWJ(&(W[TWVL * 22]), TB);
 
263
               TD = VSUB(TA, TC);
 
264
               T11 = VADD(TA, TC);
 
265
          }
 
266
          {
 
267
               V Tp, Tw, Tr, Tu, Ts, Tx;
 
268
               {
 
269
                    V To, Tv, Tq, Tt;
 
270
                    To = LD(&(x[WS(ios, 14)]), dist, &(x[0]));
 
271
                    Tp = BYTWJ(&(W[TWVL * 26]), To);
 
272
                    Tv = LD(&(x[WS(ios, 10)]), dist, &(x[0]));
 
273
                    Tw = BYTWJ(&(W[TWVL * 18]), Tv);
 
274
                    Tq = LD(&(x[WS(ios, 6)]), dist, &(x[0]));
 
275
                    Tr = BYTWJ(&(W[TWVL * 10]), Tq);
 
276
                    Tt = LD(&(x[WS(ios, 2)]), dist, &(x[0]));
 
277
                    Tu = BYTWJ(&(W[TWVL * 2]), Tt);
 
278
               }
 
279
               T1b = VADD(Tp, Tr);
 
280
               T1c = VADD(Tu, Tw);
 
281
               Ts = VSUB(Tp, Tr);
 
282
               Tx = VSUB(Tu, Tw);
 
283
               Ty = VMUL(LDK(KP707106781), VSUB(Ts, Tx));
 
284
               TK = VMUL(LDK(KP707106781), VADD(Tx, Ts));
 
285
          }
 
286
          {
 
287
               V T2, T9, T4, T7, T5, Ta;
 
288
               {
 
289
                    V T1, T8, T3, T6;
 
290
                    T1 = LD(&(x[WS(ios, 15)]), dist, &(x[WS(ios, 1)]));
 
291
                    T2 = BYTWJ(&(W[TWVL * 28]), T1);
 
292
                    T8 = LD(&(x[WS(ios, 11)]), dist, &(x[WS(ios, 1)]));
 
293
                    T9 = BYTWJ(&(W[TWVL * 20]), T8);
 
294
                    T3 = LD(&(x[WS(ios, 7)]), dist, &(x[WS(ios, 1)]));
 
295
                    T4 = BYTWJ(&(W[TWVL * 12]), T3);
 
296
                    T6 = LD(&(x[WS(ios, 3)]), dist, &(x[WS(ios, 1)]));
 
297
                    T7 = BYTWJ(&(W[TWVL * 4]), T6);
 
298
               }
 
299
               T16 = VADD(T2, T4);
 
300
               T17 = VADD(T7, T9);
 
301
               T18 = VSUB(T16, T17);
 
302
               T5 = VSUB(T2, T4);
 
303
               Ta = VSUB(T7, T9);
 
304
               Tb = VFNMS(LDK(KP923879532), Ta, VMUL(LDK(KP382683432), T5));
 
305
               TN = VFMA(LDK(KP923879532), T5, VMUL(LDK(KP382683432), Ta));
 
306
          }
 
307
          {
 
308
               V Td, Tk, Tf, Ti, Tg, Tl;
 
309
               {
 
310
                    V Tc, Tj, Te, Th;
 
311
                    Tc = LD(&(x[WS(ios, 1)]), dist, &(x[WS(ios, 1)]));
 
312
                    Td = BYTWJ(&(W[0]), Tc);
 
313
                    Tj = LD(&(x[WS(ios, 13)]), dist, &(x[WS(ios, 1)]));
 
314
                    Tk = BYTWJ(&(W[TWVL * 24]), Tj);
 
315
                    Te = LD(&(x[WS(ios, 9)]), dist, &(x[WS(ios, 1)]));
 
316
                    Tf = BYTWJ(&(W[TWVL * 16]), Te);
 
317
                    Th = LD(&(x[WS(ios, 5)]), dist, &(x[WS(ios, 1)]));
 
318
                    Ti = BYTWJ(&(W[TWVL * 8]), Th);
 
319
               }
 
320
               T13 = VADD(Td, Tf);
 
321
               T14 = VADD(Ti, Tk);
 
322
               T15 = VSUB(T13, T14);
 
323
               Tg = VSUB(Td, Tf);
 
324
               Tl = VSUB(Ti, Tk);
 
325
               Tm = VFMA(LDK(KP382683432), Tg, VMUL(LDK(KP923879532), Tl));
 
326
               TM = VFNMS(LDK(KP382683432), Tl, VMUL(LDK(KP923879532), Tg));
 
327
          }
 
328
          {
 
329
               V T1a, T1g, T1f, T1h;
 
330
               {
 
331
                    V T12, T19, T1d, T1e;
 
332
                    T12 = VSUB(T10, T11);
 
333
                    T19 = VMUL(LDK(KP707106781), VADD(T15, T18));
 
334
                    T1a = VADD(T12, T19);
 
335
                    T1g = VSUB(T12, T19);
 
336
                    T1d = VSUB(T1b, T1c);
 
337
                    T1e = VMUL(LDK(KP707106781), VSUB(T18, T15));
 
338
                    T1f = VBYI(VADD(T1d, T1e));
 
339
                    T1h = VBYI(VSUB(T1e, T1d));
 
340
               }
 
341
               ST(&(x[WS(ios, 14)]), VSUB(T1a, T1f), dist, &(x[0]));
 
342
               ST(&(x[WS(ios, 6)]), VADD(T1g, T1h), dist, &(x[0]));
 
343
               ST(&(x[WS(ios, 2)]), VADD(T1a, T1f), dist, &(x[0]));
 
344
               ST(&(x[WS(ios, 10)]), VSUB(T1g, T1h), dist, &(x[0]));
 
345
          }
 
346
          {
 
347
               V T1k, T1o, T1n, T1p;
 
348
               {
 
349
                    V T1i, T1j, T1l, T1m;
 
350
                    T1i = VADD(T10, T11);
 
351
                    T1j = VADD(T1c, T1b);
 
352
                    T1k = VADD(T1i, T1j);
 
353
                    T1o = VSUB(T1i, T1j);
 
354
                    T1l = VADD(T13, T14);
 
355
                    T1m = VADD(T16, T17);
 
356
                    T1n = VADD(T1l, T1m);
 
357
                    T1p = VBYI(VSUB(T1m, T1l));
 
358
               }
 
359
               ST(&(x[WS(ios, 8)]), VSUB(T1k, T1n), dist, &(x[0]));
 
360
               ST(&(x[WS(ios, 4)]), VADD(T1o, T1p), dist, &(x[0]));
 
361
               ST(&(x[0]), VADD(T1k, T1n), dist, &(x[0]));
 
362
               ST(&(x[WS(ios, 12)]), VSUB(T1o, T1p), dist, &(x[0]));
 
363
          }
 
364
          {
 
365
               V TF, TQ, TP, TR;
 
366
               {
 
367
                    V Tn, TE, TL, TO;
 
368
                    Tn = VSUB(Tb, Tm);
 
369
                    TE = VSUB(Ty, TD);
 
370
                    TF = VBYI(VSUB(Tn, TE));
 
371
                    TQ = VBYI(VADD(TE, Tn));
 
372
                    TL = VADD(TJ, TK);
 
373
                    TO = VADD(TM, TN);
 
374
                    TP = VSUB(TL, TO);
 
375
                    TR = VADD(TL, TO);
 
376
               }
 
377
               ST(&(x[WS(ios, 7)]), VADD(TF, TP), dist, &(x[WS(ios, 1)]));
 
378
               ST(&(x[WS(ios, 15)]), VSUB(TR, TQ), dist, &(x[WS(ios, 1)]));
 
379
               ST(&(x[WS(ios, 9)]), VSUB(TP, TF), dist, &(x[WS(ios, 1)]));
 
380
               ST(&(x[WS(ios, 1)]), VADD(TQ, TR), dist, &(x[WS(ios, 1)]));
 
381
          }
 
382
          {
 
383
               V TU, TY, TX, TZ;
 
384
               {
 
385
                    V TS, TT, TV, TW;
 
386
                    TS = VSUB(TJ, TK);
 
387
                    TT = VADD(Tm, Tb);
 
388
                    TU = VADD(TS, TT);
 
389
                    TY = VSUB(TS, TT);
 
390
                    TV = VADD(TD, Ty);
 
391
                    TW = VSUB(TN, TM);
 
392
                    TX = VBYI(VADD(TV, TW));
 
393
                    TZ = VBYI(VSUB(TW, TV));
 
394
               }
 
395
               ST(&(x[WS(ios, 13)]), VSUB(TU, TX), dist, &(x[WS(ios, 1)]));
 
396
               ST(&(x[WS(ios, 5)]), VADD(TY, TZ), dist, &(x[WS(ios, 1)]));
 
397
               ST(&(x[WS(ios, 3)]), VADD(TU, TX), dist, &(x[WS(ios, 1)]));
 
398
               ST(&(x[WS(ios, 11)]), VSUB(TY, TZ), dist, &(x[WS(ios, 1)]));
 
399
          }
 
400
     }
 
401
     return W;
 
402
}
 
403
 
 
404
static const tw_instr twinstr[] = {
 
405
     VTW(1),
 
406
     VTW(2),
 
407
     VTW(3),
 
408
     VTW(4),
 
409
     VTW(5),
 
410
     VTW(6),
 
411
     VTW(7),
 
412
     VTW(8),
 
413
     VTW(9),
 
414
     VTW(10),
 
415
     VTW(11),
 
416
     VTW(12),
 
417
     VTW(13),
 
418
     VTW(14),
 
419
     VTW(15),
 
420
     {TW_NEXT, VL, 0}
 
421
};
 
422
 
 
423
static const ct_desc desc = { 16, "t2fv_16", twinstr, &GENUS, {83, 38, 4, 0}, 0, 0, 0 };
 
424
 
 
425
void X(codelet_t2fv_16) (planner *p) {
 
426
     X(kdft_dit_register) (p, t2fv_16, &desc);
 
427
}
 
428
#endif                          /* HAVE_FMA */