~ubuntu-branches/ubuntu/maverick/blender/maverick

« back to all changes in this revision

Viewing changes to extern/fftw/dft/codelets/standard/n1_16.c

  • Committer: Bazaar Package Importer
  • Author(s): Khashayar Naderehvandi, Khashayar Naderehvandi, Alessio Treglia
  • Date: 2009-01-22 16:53:59 UTC
  • mfrom: (14.1.1 experimental)
  • Revision ID: james.westby@ubuntu.com-20090122165359-v0996tn7fbit64ni
Tags: 2.48a+dfsg-1ubuntu1
[ Khashayar Naderehvandi ]
* Merge from debian experimental (LP: #320045), Ubuntu remaining changes:
  - Add patch correcting header file locations.
  - Add libvorbis-dev and libgsm1-dev to Build-Depends.
  - Use avcodec_decode_audio2() in source/blender/src/hddaudio.c

[ Alessio Treglia ]
* Add missing previous changelog entries.

Show diffs side-by-side

added added

removed removed

Lines of Context:
 
1
/*
 
2
 * Copyright (c) 2003, 2006 Matteo Frigo
 
3
 * Copyright (c) 2003, 2006 Massachusetts Institute of Technology
 
4
 *
 
5
 * This program is free software; you can redistribute it and/or modify
 
6
 * it under the terms of the GNU General Public License as published by
 
7
 * the Free Software Foundation; either version 2 of the License, or
 
8
 * (at your option) any later version.
 
9
 *
 
10
 * This program is distributed in the hope that it will be useful,
 
11
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 
12
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 
13
 * GNU General Public License for more details.
 
14
 *
 
15
 * You should have received a copy of the GNU General Public License
 
16
 * along with this program; if not, write to the Free Software
 
17
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 
18
 *
 
19
 */
 
20
 
 
21
/* This file was automatically generated --- DO NOT EDIT */
 
22
/* Generated on Sat Jul  1 13:55:41 EDT 2006 */
 
23
 
 
24
#include "codelet-dft.h"
 
25
 
 
26
#ifdef HAVE_FMA
 
27
 
 
28
/* Generated by: ../../../genfft/gen_notw -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -n 16 -name n1_16 -include n.h */
 
29
 
 
30
/*
 
31
 * This function contains 144 FP additions, 40 FP multiplications,
 
32
 * (or, 104 additions, 0 multiplications, 40 fused multiply/add),
 
33
 * 82 stack variables, and 64 memory accesses
 
34
 */
 
35
/*
 
36
 * Generator Id's : 
 
37
 * $Id: algsimp.ml,v 1.9 2006-02-12 23:34:12 athena Exp $
 
38
 * $Id: fft.ml,v 1.4 2006-01-05 03:04:27 stevenj Exp $
 
39
 * $Id: gen_notw.ml,v 1.30 2006-02-12 23:34:12 athena Exp $
 
40
 */
 
41
 
 
42
#include "n.h"
 
43
 
 
44
static void n1_16(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs)
 
45
{
 
46
     DK(KP923879532, +0.923879532511286756128183189396788286822416626);
 
47
     DK(KP414213562, +0.414213562373095048801688724209698078569671875);
 
48
     DK(KP707106781, +0.707106781186547524400844362104849039284835938);
 
49
     INT i;
 
50
     for (i = v; i > 0; i = i - 1, ri = ri + ivs, ii = ii + ivs, ro = ro + ovs, io = io + ovs, MAKE_VOLATILE_STRIDE(is), MAKE_VOLATILE_STRIDE(os)) {
 
51
          E T1z, T1L, T1M, T1N, T1P, T1J, T1K, T1G, T1O, T1Q;
 
52
          {
 
53
               E T1l, T1H, T1R, T7, T1x, TN, TC, T25, T1E, T1b, T1Z, Tt, T2h, T22, T1D;
 
54
               E T1g, T1n, TQ, Te, T26, TT, T1m, TJ, T1S, Tj, T11, Ti, T1V, TZ, Tk;
 
55
               E T12, T13;
 
56
               {
 
57
                    E Tq, T1c, Tp, T20, T1a, Tr, T1d, T1e;
 
58
                    {
 
59
                         E T4, TL, T3, T1k, Ty, T5, Tz, TA;
 
60
                         {
 
61
                              E T1, T2, Tw, Tx;
 
62
                              T1 = ri[0];
 
63
                              T2 = ri[WS(is, 8)];
 
64
                              Tw = ii[0];
 
65
                              Tx = ii[WS(is, 8)];
 
66
                              T4 = ri[WS(is, 4)];
 
67
                              TL = T1 - T2;
 
68
                              T3 = T1 + T2;
 
69
                              T1k = Tw - Tx;
 
70
                              Ty = Tw + Tx;
 
71
                              T5 = ri[WS(is, 12)];
 
72
                              Tz = ii[WS(is, 4)];
 
73
                              TA = ii[WS(is, 12)];
 
74
                         }
 
75
                         {
 
76
                              E Tn, To, T18, T19;
 
77
                              Tn = ri[WS(is, 15)];
 
78
                              {
 
79
                                   E T1j, T6, TM, TB;
 
80
                                   T1j = T4 - T5;
 
81
                                   T6 = T4 + T5;
 
82
                                   TM = Tz - TA;
 
83
                                   TB = Tz + TA;
 
84
                                   T1l = T1j + T1k;
 
85
                                   T1H = T1k - T1j;
 
86
                                   T1R = T3 - T6;
 
87
                                   T7 = T3 + T6;
 
88
                                   T1x = TL + TM;
 
89
                                   TN = TL - TM;
 
90
                                   TC = Ty + TB;
 
91
                                   T25 = Ty - TB;
 
92
                                   To = ri[WS(is, 7)];
 
93
                              }
 
94
                              T18 = ii[WS(is, 15)];
 
95
                              T19 = ii[WS(is, 7)];
 
96
                              Tq = ri[WS(is, 3)];
 
97
                              T1c = Tn - To;
 
98
                              Tp = Tn + To;
 
99
                              T20 = T18 + T19;
 
100
                              T1a = T18 - T19;
 
101
                              Tr = ri[WS(is, 11)];
 
102
                              T1d = ii[WS(is, 3)];
 
103
                              T1e = ii[WS(is, 11)];
 
104
                         }
 
105
                    }
 
106
                    {
 
107
                         E Tb, TP, Ta, TO, TF, Tc, TG, TH;
 
108
                         {
 
109
                              E T8, T9, TD, TE;
 
110
                              T8 = ri[WS(is, 2)];
 
111
                              {
 
112
                                   E T17, Ts, T21, T1f;
 
113
                                   T17 = Tq - Tr;
 
114
                                   Ts = Tq + Tr;
 
115
                                   T21 = T1d + T1e;
 
116
                                   T1f = T1d - T1e;
 
117
                                   T1E = T1a - T17;
 
118
                                   T1b = T17 + T1a;
 
119
                                   T1Z = Tp - Ts;
 
120
                                   Tt = Tp + Ts;
 
121
                                   T2h = T20 + T21;
 
122
                                   T22 = T20 - T21;
 
123
                                   T1D = T1c + T1f;
 
124
                                   T1g = T1c - T1f;
 
125
                                   T9 = ri[WS(is, 10)];
 
126
                              }
 
127
                              TD = ii[WS(is, 2)];
 
128
                              TE = ii[WS(is, 10)];
 
129
                              Tb = ri[WS(is, 14)];
 
130
                              TP = T8 - T9;
 
131
                              Ta = T8 + T9;
 
132
                              TO = TD - TE;
 
133
                              TF = TD + TE;
 
134
                              Tc = ri[WS(is, 6)];
 
135
                              TG = ii[WS(is, 14)];
 
136
                              TH = ii[WS(is, 6)];
 
137
                         }
 
138
                         {
 
139
                              E TR, Td, TS, TI;
 
140
                              T1n = TP + TO;
 
141
                              TQ = TO - TP;
 
142
                              TR = Tb - Tc;
 
143
                              Td = Tb + Tc;
 
144
                              TS = TG - TH;
 
145
                              TI = TG + TH;
 
146
                              Te = Ta + Td;
 
147
                              T26 = Td - Ta;
 
148
                              TT = TR + TS;
 
149
                              T1m = TR - TS;
 
150
                              TJ = TF + TI;
 
151
                              T1S = TF - TI;
 
152
                         }
 
153
                    }
 
154
                    {
 
155
                         E Tg, Th, TX, TY;
 
156
                         Tg = ri[WS(is, 1)];
 
157
                         Th = ri[WS(is, 9)];
 
158
                         TX = ii[WS(is, 1)];
 
159
                         TY = ii[WS(is, 9)];
 
160
                         Tj = ri[WS(is, 5)];
 
161
                         T11 = Tg - Th;
 
162
                         Ti = Tg + Th;
 
163
                         T1V = TX + TY;
 
164
                         TZ = TX - TY;
 
165
                         Tk = ri[WS(is, 13)];
 
166
                         T12 = ii[WS(is, 5)];
 
167
                         T13 = ii[WS(is, 13)];
 
168
                    }
 
169
               }
 
170
               {
 
171
                    E T2f, T1B, T10, T1U, T1X, T1A, T15, Tv, TK, T2i;
 
172
                    {
 
173
                         E Tf, Tu, T2j, T2k, T2g;
 
174
                         T2f = T7 - Te;
 
175
                         Tf = T7 + Te;
 
176
                         {
 
177
                              E TW, Tl, T1W, T14, Tm;
 
178
                              TW = Tj - Tk;
 
179
                              Tl = Tj + Tk;
 
180
                              T1W = T12 + T13;
 
181
                              T14 = T12 - T13;
 
182
                              T1B = TZ - TW;
 
183
                              T10 = TW + TZ;
 
184
                              T1U = Ti - Tl;
 
185
                              Tm = Ti + Tl;
 
186
                              T2g = T1V + T1W;
 
187
                              T1X = T1V - T1W;
 
188
                              T1A = T11 + T14;
 
189
                              T15 = T11 - T14;
 
190
                              Tu = Tm + Tt;
 
191
                              Tv = Tt - Tm;
 
192
                         }
 
193
                         TK = TC - TJ;
 
194
                         T2j = TC + TJ;
 
195
                         T2k = T2g + T2h;
 
196
                         T2i = T2g - T2h;
 
197
                         ro[0] = Tf + Tu;
 
198
                         ro[WS(os, 8)] = Tf - Tu;
 
199
                         io[0] = T2j + T2k;
 
200
                         io[WS(os, 8)] = T2j - T2k;
 
201
                    }
 
202
                    {
 
203
                         E T29, T1T, T27, T2d, T2a, T2b, T28, T24, T1Y, T23;
 
204
                         T29 = T1R - T1S;
 
205
                         T1T = T1R + T1S;
 
206
                         io[WS(os, 12)] = TK - Tv;
 
207
                         io[WS(os, 4)] = Tv + TK;
 
208
                         ro[WS(os, 4)] = T2f + T2i;
 
209
                         ro[WS(os, 12)] = T2f - T2i;
 
210
                         T27 = T25 - T26;
 
211
                         T2d = T26 + T25;
 
212
                         T2a = T1X - T1U;
 
213
                         T1Y = T1U + T1X;
 
214
                         T23 = T1Z - T22;
 
215
                         T2b = T1Z + T22;
 
216
                         T28 = T23 - T1Y;
 
217
                         T24 = T1Y + T23;
 
218
                         {
 
219
                              E T1I, TV, T1v, T1y, T1t, T1s, T1r, T1p, T1q, T1i;
 
220
                              {
 
221
                                   E T1o, T2e, T2c, TU, T16, T1h;
 
222
                                   T1I = TQ + TT;
 
223
                                   TU = TQ - TT;
 
224
                                   io[WS(os, 14)] = FNMS(KP707106781, T28, T27);
 
225
                                   io[WS(os, 6)] = FMA(KP707106781, T28, T27);
 
226
                                   ro[WS(os, 2)] = FMA(KP707106781, T24, T1T);
 
227
                                   ro[WS(os, 10)] = FNMS(KP707106781, T24, T1T);
 
228
                                   T2e = T2a + T2b;
 
229
                                   T2c = T2a - T2b;
 
230
                                   TV = FMA(KP707106781, TU, TN);
 
231
                                   T1v = FNMS(KP707106781, TU, TN);
 
232
                                   io[WS(os, 10)] = FNMS(KP707106781, T2e, T2d);
 
233
                                   io[WS(os, 2)] = FMA(KP707106781, T2e, T2d);
 
234
                                   ro[WS(os, 6)] = FMA(KP707106781, T2c, T29);
 
235
                                   ro[WS(os, 14)] = FNMS(KP707106781, T2c, T29);
 
236
                                   T1o = T1m - T1n;
 
237
                                   T1y = T1n + T1m;
 
238
                                   T1t = FNMS(KP414213562, T10, T15);
 
239
                                   T16 = FMA(KP414213562, T15, T10);
 
240
                                   T1h = FNMS(KP414213562, T1g, T1b);
 
241
                                   T1s = FMA(KP414213562, T1b, T1g);
 
242
                                   T1r = FMA(KP707106781, T1o, T1l);
 
243
                                   T1p = FNMS(KP707106781, T1o, T1l);
 
244
                                   T1q = T16 + T1h;
 
245
                                   T1i = T16 - T1h;
 
246
                              }
 
247
                              {
 
248
                                   E T1w, T1u, T1C, T1F;
 
249
                                   io[WS(os, 15)] = FMA(KP923879532, T1q, T1p);
 
250
                                   io[WS(os, 7)] = FNMS(KP923879532, T1q, T1p);
 
251
                                   ro[WS(os, 3)] = FMA(KP923879532, T1i, TV);
 
252
                                   ro[WS(os, 11)] = FNMS(KP923879532, T1i, TV);
 
253
                                   T1w = T1t + T1s;
 
254
                                   T1u = T1s - T1t;
 
255
                                   T1z = FMA(KP707106781, T1y, T1x);
 
256
                                   T1L = FNMS(KP707106781, T1y, T1x);
 
257
                                   ro[WS(os, 15)] = FMA(KP923879532, T1w, T1v);
 
258
                                   ro[WS(os, 7)] = FNMS(KP923879532, T1w, T1v);
 
259
                                   io[WS(os, 3)] = FMA(KP923879532, T1u, T1r);
 
260
                                   io[WS(os, 11)] = FNMS(KP923879532, T1u, T1r);
 
261
                                   T1M = FNMS(KP414213562, T1A, T1B);
 
262
                                   T1C = FMA(KP414213562, T1B, T1A);
 
263
                                   T1F = FNMS(KP414213562, T1E, T1D);
 
264
                                   T1N = FMA(KP414213562, T1D, T1E);
 
265
                                   T1P = FMA(KP707106781, T1I, T1H);
 
266
                                   T1J = FNMS(KP707106781, T1I, T1H);
 
267
                                   T1K = T1F - T1C;
 
268
                                   T1G = T1C + T1F;
 
269
                              }
 
270
                         }
 
271
                    }
 
272
               }
 
273
          }
 
274
          io[WS(os, 5)] = FMA(KP923879532, T1K, T1J);
 
275
          io[WS(os, 13)] = FNMS(KP923879532, T1K, T1J);
 
276
          ro[WS(os, 1)] = FMA(KP923879532, T1G, T1z);
 
277
          ro[WS(os, 9)] = FNMS(KP923879532, T1G, T1z);
 
278
          T1O = T1M - T1N;
 
279
          T1Q = T1M + T1N;
 
280
          io[WS(os, 1)] = FMA(KP923879532, T1Q, T1P);
 
281
          io[WS(os, 9)] = FNMS(KP923879532, T1Q, T1P);
 
282
          ro[WS(os, 5)] = FMA(KP923879532, T1O, T1L);
 
283
          ro[WS(os, 13)] = FNMS(KP923879532, T1O, T1L);
 
284
     }
 
285
}
 
286
 
 
287
static const kdft_desc desc = { 16, "n1_16", {104, 0, 40, 0}, &GENUS, 0, 0, 0, 0 };
 
288
void X(codelet_n1_16) (planner *p) {
 
289
     X(kdft_register) (p, n1_16, &desc);
 
290
}
 
291
 
 
292
#else                           /* HAVE_FMA */
 
293
 
 
294
/* Generated by: ../../../genfft/gen_notw -compact -variables 4 -pipeline-latency 4 -n 16 -name n1_16 -include n.h */
 
295
 
 
296
/*
 
297
 * This function contains 144 FP additions, 24 FP multiplications,
 
298
 * (or, 136 additions, 16 multiplications, 8 fused multiply/add),
 
299
 * 50 stack variables, and 64 memory accesses
 
300
 */
 
301
/*
 
302
 * Generator Id's : 
 
303
 * $Id: algsimp.ml,v 1.9 2006-02-12 23:34:12 athena Exp $
 
304
 * $Id: fft.ml,v 1.4 2006-01-05 03:04:27 stevenj Exp $
 
305
 * $Id: gen_notw.ml,v 1.30 2006-02-12 23:34:12 athena Exp $
 
306
 */
 
307
 
 
308
#include "n.h"
 
309
 
 
310
static void n1_16(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs)
 
311
{
 
312
     DK(KP382683432, +0.382683432365089771728459984030398866761344562);
 
313
     DK(KP923879532, +0.923879532511286756128183189396788286822416626);
 
314
     DK(KP707106781, +0.707106781186547524400844362104849039284835938);
 
315
     INT i;
 
316
     for (i = v; i > 0; i = i - 1, ri = ri + ivs, ii = ii + ivs, ro = ro + ovs, io = io + ovs, MAKE_VOLATILE_STRIDE(is), MAKE_VOLATILE_STRIDE(os)) {
 
317
          E T7, T1R, T25, TC, TN, T1x, T1H, T1l, Tt, T22, T2h, T1b, T1g, T1E, T1Z;
 
318
          E T1D, Te, T1S, T26, TJ, TQ, T1m, T1n, TT, Tm, T1X, T2g, T10, T15, T1B;
 
319
          E T1U, T1A;
 
320
          {
 
321
               E T3, TL, Ty, T1k, T6, T1j, TB, TM;
 
322
               {
 
323
                    E T1, T2, Tw, Tx;
 
324
                    T1 = ri[0];
 
325
                    T2 = ri[WS(is, 8)];
 
326
                    T3 = T1 + T2;
 
327
                    TL = T1 - T2;
 
328
                    Tw = ii[0];
 
329
                    Tx = ii[WS(is, 8)];
 
330
                    Ty = Tw + Tx;
 
331
                    T1k = Tw - Tx;
 
332
               }
 
333
               {
 
334
                    E T4, T5, Tz, TA;
 
335
                    T4 = ri[WS(is, 4)];
 
336
                    T5 = ri[WS(is, 12)];
 
337
                    T6 = T4 + T5;
 
338
                    T1j = T4 - T5;
 
339
                    Tz = ii[WS(is, 4)];
 
340
                    TA = ii[WS(is, 12)];
 
341
                    TB = Tz + TA;
 
342
                    TM = Tz - TA;
 
343
               }
 
344
               T7 = T3 + T6;
 
345
               T1R = T3 - T6;
 
346
               T25 = Ty - TB;
 
347
               TC = Ty + TB;
 
348
               TN = TL - TM;
 
349
               T1x = TL + TM;
 
350
               T1H = T1k - T1j;
 
351
               T1l = T1j + T1k;
 
352
          }
 
353
          {
 
354
               E Tp, T17, T1f, T20, Ts, T1c, T1a, T21;
 
355
               {
 
356
                    E Tn, To, T1d, T1e;
 
357
                    Tn = ri[WS(is, 15)];
 
358
                    To = ri[WS(is, 7)];
 
359
                    Tp = Tn + To;
 
360
                    T17 = Tn - To;
 
361
                    T1d = ii[WS(is, 15)];
 
362
                    T1e = ii[WS(is, 7)];
 
363
                    T1f = T1d - T1e;
 
364
                    T20 = T1d + T1e;
 
365
               }
 
366
               {
 
367
                    E Tq, Tr, T18, T19;
 
368
                    Tq = ri[WS(is, 3)];
 
369
                    Tr = ri[WS(is, 11)];
 
370
                    Ts = Tq + Tr;
 
371
                    T1c = Tq - Tr;
 
372
                    T18 = ii[WS(is, 3)];
 
373
                    T19 = ii[WS(is, 11)];
 
374
                    T1a = T18 - T19;
 
375
                    T21 = T18 + T19;
 
376
               }
 
377
               Tt = Tp + Ts;
 
378
               T22 = T20 - T21;
 
379
               T2h = T20 + T21;
 
380
               T1b = T17 - T1a;
 
381
               T1g = T1c + T1f;
 
382
               T1E = T1f - T1c;
 
383
               T1Z = Tp - Ts;
 
384
               T1D = T17 + T1a;
 
385
          }
 
386
          {
 
387
               E Ta, TP, TF, TO, Td, TR, TI, TS;
 
388
               {
 
389
                    E T8, T9, TD, TE;
 
390
                    T8 = ri[WS(is, 2)];
 
391
                    T9 = ri[WS(is, 10)];
 
392
                    Ta = T8 + T9;
 
393
                    TP = T8 - T9;
 
394
                    TD = ii[WS(is, 2)];
 
395
                    TE = ii[WS(is, 10)];
 
396
                    TF = TD + TE;
 
397
                    TO = TD - TE;
 
398
               }
 
399
               {
 
400
                    E Tb, Tc, TG, TH;
 
401
                    Tb = ri[WS(is, 14)];
 
402
                    Tc = ri[WS(is, 6)];
 
403
                    Td = Tb + Tc;
 
404
                    TR = Tb - Tc;
 
405
                    TG = ii[WS(is, 14)];
 
406
                    TH = ii[WS(is, 6)];
 
407
                    TI = TG + TH;
 
408
                    TS = TG - TH;
 
409
               }
 
410
               Te = Ta + Td;
 
411
               T1S = TF - TI;
 
412
               T26 = Td - Ta;
 
413
               TJ = TF + TI;
 
414
               TQ = TO - TP;
 
415
               T1m = TR - TS;
 
416
               T1n = TP + TO;
 
417
               TT = TR + TS;
 
418
          }
 
419
          {
 
420
               E Ti, T11, TZ, T1V, Tl, TW, T14, T1W;
 
421
               {
 
422
                    E Tg, Th, TX, TY;
 
423
                    Tg = ri[WS(is, 1)];
 
424
                    Th = ri[WS(is, 9)];
 
425
                    Ti = Tg + Th;
 
426
                    T11 = Tg - Th;
 
427
                    TX = ii[WS(is, 1)];
 
428
                    TY = ii[WS(is, 9)];
 
429
                    TZ = TX - TY;
 
430
                    T1V = TX + TY;
 
431
               }
 
432
               {
 
433
                    E Tj, Tk, T12, T13;
 
434
                    Tj = ri[WS(is, 5)];
 
435
                    Tk = ri[WS(is, 13)];
 
436
                    Tl = Tj + Tk;
 
437
                    TW = Tj - Tk;
 
438
                    T12 = ii[WS(is, 5)];
 
439
                    T13 = ii[WS(is, 13)];
 
440
                    T14 = T12 - T13;
 
441
                    T1W = T12 + T13;
 
442
               }
 
443
               Tm = Ti + Tl;
 
444
               T1X = T1V - T1W;
 
445
               T2g = T1V + T1W;
 
446
               T10 = TW + TZ;
 
447
               T15 = T11 - T14;
 
448
               T1B = T11 + T14;
 
449
               T1U = Ti - Tl;
 
450
               T1A = TZ - TW;
 
451
          }
 
452
          {
 
453
               E Tf, Tu, T2j, T2k;
 
454
               Tf = T7 + Te;
 
455
               Tu = Tm + Tt;
 
456
               ro[WS(os, 8)] = Tf - Tu;
 
457
               ro[0] = Tf + Tu;
 
458
               T2j = TC + TJ;
 
459
               T2k = T2g + T2h;
 
460
               io[WS(os, 8)] = T2j - T2k;
 
461
               io[0] = T2j + T2k;
 
462
          }
 
463
          {
 
464
               E Tv, TK, T2f, T2i;
 
465
               Tv = Tt - Tm;
 
466
               TK = TC - TJ;
 
467
               io[WS(os, 4)] = Tv + TK;
 
468
               io[WS(os, 12)] = TK - Tv;
 
469
               T2f = T7 - Te;
 
470
               T2i = T2g - T2h;
 
471
               ro[WS(os, 12)] = T2f - T2i;
 
472
               ro[WS(os, 4)] = T2f + T2i;
 
473
          }
 
474
          {
 
475
               E T1T, T27, T24, T28, T1Y, T23;
 
476
               T1T = T1R + T1S;
 
477
               T27 = T25 - T26;
 
478
               T1Y = T1U + T1X;
 
479
               T23 = T1Z - T22;
 
480
               T24 = KP707106781 * (T1Y + T23);
 
481
               T28 = KP707106781 * (T23 - T1Y);
 
482
               ro[WS(os, 10)] = T1T - T24;
 
483
               io[WS(os, 6)] = T27 + T28;
 
484
               ro[WS(os, 2)] = T1T + T24;
 
485
               io[WS(os, 14)] = T27 - T28;
 
486
          }
 
487
          {
 
488
               E T29, T2d, T2c, T2e, T2a, T2b;
 
489
               T29 = T1R - T1S;
 
490
               T2d = T26 + T25;
 
491
               T2a = T1X - T1U;
 
492
               T2b = T1Z + T22;
 
493
               T2c = KP707106781 * (T2a - T2b);
 
494
               T2e = KP707106781 * (T2a + T2b);
 
495
               ro[WS(os, 14)] = T29 - T2c;
 
496
               io[WS(os, 2)] = T2d + T2e;
 
497
               ro[WS(os, 6)] = T29 + T2c;
 
498
               io[WS(os, 10)] = T2d - T2e;
 
499
          }
 
500
          {
 
501
               E TV, T1r, T1p, T1v, T1i, T1q, T1u, T1w, TU, T1o;
 
502
               TU = KP707106781 * (TQ - TT);
 
503
               TV = TN + TU;
 
504
               T1r = TN - TU;
 
505
               T1o = KP707106781 * (T1m - T1n);
 
506
               T1p = T1l - T1o;
 
507
               T1v = T1l + T1o;
 
508
               {
 
509
                    E T16, T1h, T1s, T1t;
 
510
                    T16 = FMA(KP923879532, T10, KP382683432 * T15);
 
511
                    T1h = FNMS(KP923879532, T1g, KP382683432 * T1b);
 
512
                    T1i = T16 + T1h;
 
513
                    T1q = T1h - T16;
 
514
                    T1s = FNMS(KP923879532, T15, KP382683432 * T10);
 
515
                    T1t = FMA(KP382683432, T1g, KP923879532 * T1b);
 
516
                    T1u = T1s - T1t;
 
517
                    T1w = T1s + T1t;
 
518
               }
 
519
               ro[WS(os, 11)] = TV - T1i;
 
520
               io[WS(os, 11)] = T1v - T1w;
 
521
               ro[WS(os, 3)] = TV + T1i;
 
522
               io[WS(os, 3)] = T1v + T1w;
 
523
               io[WS(os, 15)] = T1p - T1q;
 
524
               ro[WS(os, 15)] = T1r - T1u;
 
525
               io[WS(os, 7)] = T1p + T1q;
 
526
               ro[WS(os, 7)] = T1r + T1u;
 
527
          }
 
528
          {
 
529
               E T1z, T1L, T1J, T1P, T1G, T1K, T1O, T1Q, T1y, T1I;
 
530
               T1y = KP707106781 * (T1n + T1m);
 
531
               T1z = T1x + T1y;
 
532
               T1L = T1x - T1y;
 
533
               T1I = KP707106781 * (TQ + TT);
 
534
               T1J = T1H - T1I;
 
535
               T1P = T1H + T1I;
 
536
               {
 
537
                    E T1C, T1F, T1M, T1N;
 
538
                    T1C = FMA(KP382683432, T1A, KP923879532 * T1B);
 
539
                    T1F = FNMS(KP382683432, T1E, KP923879532 * T1D);
 
540
                    T1G = T1C + T1F;
 
541
                    T1K = T1F - T1C;
 
542
                    T1M = FNMS(KP382683432, T1B, KP923879532 * T1A);
 
543
                    T1N = FMA(KP923879532, T1E, KP382683432 * T1D);
 
544
                    T1O = T1M - T1N;
 
545
                    T1Q = T1M + T1N;
 
546
               }
 
547
               ro[WS(os, 9)] = T1z - T1G;
 
548
               io[WS(os, 9)] = T1P - T1Q;
 
549
               ro[WS(os, 1)] = T1z + T1G;
 
550
               io[WS(os, 1)] = T1P + T1Q;
 
551
               io[WS(os, 13)] = T1J - T1K;
 
552
               ro[WS(os, 13)] = T1L - T1O;
 
553
               io[WS(os, 5)] = T1J + T1K;
 
554
               ro[WS(os, 5)] = T1L + T1O;
 
555
          }
 
556
     }
 
557
}
 
558
 
 
559
static const kdft_desc desc = { 16, "n1_16", {136, 16, 8, 0}, &GENUS, 0, 0, 0, 0 };
 
560
void X(codelet_n1_16) (planner *p) {
 
561
     X(kdft_register) (p, n1_16, &desc);
 
562
}
 
563
 
 
564
#endif                          /* HAVE_FMA */