~ubuntu-branches/ubuntu/maverick/blender/maverick

« back to all changes in this revision

Viewing changes to extern/fftw/rdft/codelets/r2hc/r2hc_32.c

  • Committer: Bazaar Package Importer
  • Author(s): Khashayar Naderehvandi, Khashayar Naderehvandi, Alessio Treglia
  • Date: 2009-01-22 16:53:59 UTC
  • mfrom: (14.1.1 experimental)
  • Revision ID: james.westby@ubuntu.com-20090122165359-v0996tn7fbit64ni
Tags: 2.48a+dfsg-1ubuntu1
[ Khashayar Naderehvandi ]
* Merge from debian experimental (LP: #320045), Ubuntu remaining changes:
  - Add patch correcting header file locations.
  - Add libvorbis-dev and libgsm1-dev to Build-Depends.
  - Use avcodec_decode_audio2() in source/blender/src/hddaudio.c

[ Alessio Treglia ]
* Add missing previous changelog entries.

Show diffs side-by-side

added added

removed removed

Lines of Context:
 
1
/*
 
2
 * Copyright (c) 2003, 2006 Matteo Frigo
 
3
 * Copyright (c) 2003, 2006 Massachusetts Institute of Technology
 
4
 *
 
5
 * This program is free software; you can redistribute it and/or modify
 
6
 * it under the terms of the GNU General Public License as published by
 
7
 * the Free Software Foundation; either version 2 of the License, or
 
8
 * (at your option) any later version.
 
9
 *
 
10
 * This program is distributed in the hope that it will be useful,
 
11
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 
12
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 
13
 * GNU General Public License for more details.
 
14
 *
 
15
 * You should have received a copy of the GNU General Public License
 
16
 * along with this program; if not, write to the Free Software
 
17
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 
18
 *
 
19
 */
 
20
 
 
21
/* This file was automatically generated --- DO NOT EDIT */
 
22
/* Generated on Sun Jul  2 14:19:41 EDT 2006 */
 
23
 
 
24
#include "codelet-rdft.h"
 
25
 
 
26
#ifdef HAVE_FMA
 
27
 
 
28
/* Generated by: ../../../genfft/gen_r2hc -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -n 32 -name r2hc_32 -include r2hc.h */
 
29
 
 
30
/*
 
31
 * This function contains 156 FP additions, 68 FP multiplications,
 
32
 * (or, 88 additions, 0 multiplications, 68 fused multiply/add),
 
33
 * 89 stack variables, and 64 memory accesses
 
34
 */
 
35
/*
 
36
 * Generator Id's : 
 
37
 * $Id: algsimp.ml,v 1.9 2006-02-12 23:34:12 athena Exp $
 
38
 * $Id: fft.ml,v 1.4 2006-01-05 03:04:27 stevenj Exp $
 
39
 * $Id: gen_r2hc.ml,v 1.18 2006-02-12 23:34:12 athena Exp $
 
40
 */
 
41
 
 
42
#include "r2hc.h"
 
43
 
 
44
static void r2hc_32(const R *I, R *ro, R *io, stride is, stride ros, stride ios, INT v, INT ivs, INT ovs)
 
45
{
 
46
     DK(KP831469612, +0.831469612302545237078788377617905756738560812);
 
47
     DK(KP668178637, +0.668178637919298919997757686523080761552472251);
 
48
     DK(KP980785280, +0.980785280403230449126182236134239036973933731);
 
49
     DK(KP198912367, +0.198912367379658006911597622644676228597850501);
 
50
     DK(KP923879532, +0.923879532511286756128183189396788286822416626);
 
51
     DK(KP707106781, +0.707106781186547524400844362104849039284835938);
 
52
     DK(KP414213562, +0.414213562373095048801688724209698078569671875);
 
53
     INT i;
 
54
     for (i = v; i > 0; i = i - 1, I = I + ivs, ro = ro + ovs, io = io + ovs, MAKE_VOLATILE_STRIDE(is), MAKE_VOLATILE_STRIDE(ros), MAKE_VOLATILE_STRIDE(ios)) {
 
55
          E T1x, T1M, T1I, T1E, T1J, T1H;
 
56
          {
 
57
               E Tv, T1h, T7, T2b, Te, T2n, Ty, T1i, T1l, TF, T2d, Tt, T1k, TC, T2c;
 
58
               E Tm, T2j, T1Z, T2k, T22, TK, T1B, T19, T1C, T1e, TO, TV, T1T, TN, TP;
 
59
               E T2g, T1S;
 
60
               {
 
61
                    E TD, Tp, Tq, Tr;
 
62
                    {
 
63
                         E T1, T2, T4, T5;
 
64
                         T1 = I[0];
 
65
                         T2 = I[WS(is, 16)];
 
66
                         T4 = I[WS(is, 8)];
 
67
                         T5 = I[WS(is, 24)];
 
68
                         {
 
69
                              E Ta, Tw, Tx, Td, Tn, To;
 
70
                              {
 
71
                                   E T8, T3, T6, T9, Tb, Tc;
 
72
                                   T8 = I[WS(is, 4)];
 
73
                                   Tv = T1 - T2;
 
74
                                   T3 = T1 + T2;
 
75
                                   T1h = T4 - T5;
 
76
                                   T6 = T4 + T5;
 
77
                                   T9 = I[WS(is, 20)];
 
78
                                   Tb = I[WS(is, 28)];
 
79
                                   Tc = I[WS(is, 12)];
 
80
                                   T7 = T3 + T6;
 
81
                                   T2b = T3 - T6;
 
82
                                   Ta = T8 + T9;
 
83
                                   Tw = T8 - T9;
 
84
                                   Tx = Tb - Tc;
 
85
                                   Td = Tb + Tc;
 
86
                              }
 
87
                              Tn = I[WS(is, 30)];
 
88
                              To = I[WS(is, 14)];
 
89
                              Te = Ta + Td;
 
90
                              T2n = Td - Ta;
 
91
                              Ty = Tw + Tx;
 
92
                              T1i = Tx - Tw;
 
93
                              TD = Tn - To;
 
94
                              Tp = Tn + To;
 
95
                              Tq = I[WS(is, 6)];
 
96
                              Tr = I[WS(is, 22)];
 
97
                         }
 
98
                    }
 
99
                    {
 
100
                         E Tj, TA, Ti, Tk;
 
101
                         {
 
102
                              E Tg, Th, TE, Ts;
 
103
                              Tg = I[WS(is, 2)];
 
104
                              Th = I[WS(is, 18)];
 
105
                              Tj = I[WS(is, 10)];
 
106
                              TE = Tq - Tr;
 
107
                              Ts = Tq + Tr;
 
108
                              TA = Tg - Th;
 
109
                              Ti = Tg + Th;
 
110
                              T1l = FNMS(KP414213562, TD, TE);
 
111
                              TF = FMA(KP414213562, TE, TD);
 
112
                              T2d = Tp - Ts;
 
113
                              Tt = Tp + Ts;
 
114
                              Tk = I[WS(is, 26)];
 
115
                         }
 
116
                         {
 
117
                              E T11, T15, T1c, T20, T14, T16, T1X, T1Y, T1Q, T1R;
 
118
                              {
 
119
                                   E T1a, T1b, T12, T13;
 
120
                                   {
 
121
                                        E TZ, T10, TB, Tl;
 
122
                                        TZ = I[WS(is, 31)];
 
123
                                        T10 = I[WS(is, 15)];
 
124
                                        T1a = I[WS(is, 23)];
 
125
                                        TB = Tj - Tk;
 
126
                                        Tl = Tj + Tk;
 
127
                                        T1X = TZ + T10;
 
128
                                        T11 = TZ - T10;
 
129
                                        T1k = FMA(KP414213562, TA, TB);
 
130
                                        TC = FNMS(KP414213562, TB, TA);
 
131
                                        T2c = Ti - Tl;
 
132
                                        Tm = Ti + Tl;
 
133
                                        T1b = I[WS(is, 7)];
 
134
                                   }
 
135
                                   T12 = I[WS(is, 3)];
 
136
                                   T13 = I[WS(is, 19)];
 
137
                                   T15 = I[WS(is, 27)];
 
138
                                   T1Y = T1b + T1a;
 
139
                                   T1c = T1a - T1b;
 
140
                                   T20 = T12 + T13;
 
141
                                   T14 = T12 - T13;
 
142
                                   T16 = I[WS(is, 11)];
 
143
                              }
 
144
                              T2j = T1X - T1Y;
 
145
                              T1Z = T1X + T1Y;
 
146
                              {
 
147
                                   E TT, TU, TL, TM;
 
148
                                   {
 
149
                                        E TI, T21, T17, TJ, T18, T1d;
 
150
                                        TI = I[WS(is, 1)];
 
151
                                        T21 = T15 + T16;
 
152
                                        T17 = T15 - T16;
 
153
                                        TJ = I[WS(is, 17)];
 
154
                                        TT = I[WS(is, 9)];
 
155
                                        T2k = T21 - T20;
 
156
                                        T22 = T20 + T21;
 
157
                                        T18 = T14 + T17;
 
158
                                        T1d = T17 - T14;
 
159
                                        T1Q = TI + TJ;
 
160
                                        TK = TI - TJ;
 
161
                                        T1B = FNMS(KP707106781, T18, T11);
 
162
                                        T19 = FMA(KP707106781, T18, T11);
 
163
                                        T1C = FNMS(KP707106781, T1d, T1c);
 
164
                                        T1e = FMA(KP707106781, T1d, T1c);
 
165
                                        TU = I[WS(is, 25)];
 
166
                                   }
 
167
                                   TL = I[WS(is, 5)];
 
168
                                   TM = I[WS(is, 21)];
 
169
                                   TO = I[WS(is, 29)];
 
170
                                   T1R = TT + TU;
 
171
                                   TV = TT - TU;
 
172
                                   T1T = TL + TM;
 
173
                                   TN = TL - TM;
 
174
                                   TP = I[WS(is, 13)];
 
175
                              }
 
176
                              T2g = T1Q - T1R;
 
177
                              T1S = T1Q + T1R;
 
178
                         }
 
179
                    }
 
180
               }
 
181
               {
 
182
                    E T1P, T25, T23, T2h, T1W, T1y, TS, T1z, TX, T27, T2a;
 
183
                    {
 
184
                         E Tf, Tu, T29, T28;
 
185
                         {
 
186
                              E T1U, TQ, T1V, TR, TW;
 
187
                              T1P = T7 - Te;
 
188
                              Tf = T7 + Te;
 
189
                              T1U = TO + TP;
 
190
                              TQ = TO - TP;
 
191
                              Tu = Tm + Tt;
 
192
                              T25 = Tt - Tm;
 
193
                              T23 = T1Z - T22;
 
194
                              T29 = T1Z + T22;
 
195
                              T2h = T1U - T1T;
 
196
                              T1V = T1T + T1U;
 
197
                              TR = TN + TQ;
 
198
                              TW = TN - TQ;
 
199
                              T27 = Tf + Tu;
 
200
                              T1W = T1S - T1V;
 
201
                              T28 = T1S + T1V;
 
202
                              T1y = FNMS(KP707106781, TR, TK);
 
203
                              TS = FMA(KP707106781, TR, TK);
 
204
                              T1z = FNMS(KP707106781, TW, TV);
 
205
                              TX = FMA(KP707106781, TW, TV);
 
206
                              T2a = T28 + T29;
 
207
                         }
 
208
                         ro[WS(ros, 8)] = Tf - Tu;
 
209
                         io[WS(ios, 8)] = T29 - T28;
 
210
                    }
 
211
                    ro[0] = T27 + T2a;
 
212
                    ro[WS(ros, 16)] = T27 - T2a;
 
213
                    {
 
214
                         E T2s, T2i, T2v, T2f, T2r, T2p, T2l, T2t;
 
215
                         {
 
216
                              E T2o, T2e, T26, T24;
 
217
                              T2o = T2d - T2c;
 
218
                              T2e = T2c + T2d;
 
219
                              T2s = FNMS(KP414213562, T2g, T2h);
 
220
                              T2i = FMA(KP414213562, T2h, T2g);
 
221
                              T26 = T23 - T1W;
 
222
                              T24 = T1W + T23;
 
223
                              T2v = FNMS(KP707106781, T2e, T2b);
 
224
                              T2f = FMA(KP707106781, T2e, T2b);
 
225
                              T2r = FMA(KP707106781, T2o, T2n);
 
226
                              T2p = FNMS(KP707106781, T2o, T2n);
 
227
                              io[WS(ios, 4)] = FMA(KP707106781, T26, T25);
 
228
                              io[WS(ios, 12)] = FMS(KP707106781, T26, T25);
 
229
                              ro[WS(ros, 4)] = FMA(KP707106781, T24, T1P);
 
230
                              ro[WS(ros, 12)] = FNMS(KP707106781, T24, T1P);
 
231
                              T2l = FNMS(KP414213562, T2k, T2j);
 
232
                              T2t = FMA(KP414213562, T2j, T2k);
 
233
                         }
 
234
                         {
 
235
                              E T1v, T1G, TH, T1s, T1F, T1w, T1o, T1g, T1p, T1n;
 
236
                              {
 
237
                                   E T1f, TY, T1t, T1u, T1j, T1m;
 
238
                                   {
 
239
                                        E Tz, TG, T1q, T1r;
 
240
                                        T1v = FNMS(KP707106781, Ty, Tv);
 
241
                                        Tz = FMA(KP707106781, Ty, Tv);
 
242
                                        {
 
243
                                             E T2q, T2m, T2w, T2u;
 
244
                                             T2q = T2l - T2i;
 
245
                                             T2m = T2i + T2l;
 
246
                                             T2w = T2t - T2s;
 
247
                                             T2u = T2s + T2t;
 
248
                                             io[WS(ios, 10)] = FMA(KP923879532, T2q, T2p);
 
249
                                             io[WS(ios, 6)] = FMS(KP923879532, T2q, T2p);
 
250
                                             ro[WS(ros, 2)] = FMA(KP923879532, T2m, T2f);
 
251
                                             ro[WS(ros, 14)] = FNMS(KP923879532, T2m, T2f);
 
252
                                             ro[WS(ros, 10)] = FNMS(KP923879532, T2w, T2v);
 
253
                                             ro[WS(ros, 6)] = FMA(KP923879532, T2w, T2v);
 
254
                                             io[WS(ios, 2)] = FMA(KP923879532, T2u, T2r);
 
255
                                             io[WS(ios, 14)] = FMS(KP923879532, T2u, T2r);
 
256
                                             TG = TC + TF;
 
257
                                             T1G = TF - TC;
 
258
                                        }
 
259
                                        T1f = FNMS(KP198912367, T1e, T19);
 
260
                                        T1q = FMA(KP198912367, T19, T1e);
 
261
                                        T1r = FMA(KP198912367, TS, TX);
 
262
                                        TY = FNMS(KP198912367, TX, TS);
 
263
                                        T1t = FNMS(KP923879532, TG, Tz);
 
264
                                        TH = FMA(KP923879532, TG, Tz);
 
265
                                        T1u = T1r + T1q;
 
266
                                        T1s = T1q - T1r;
 
267
                                        T1F = FMA(KP707106781, T1i, T1h);
 
268
                                        T1j = FNMS(KP707106781, T1i, T1h);
 
269
                                        T1m = T1k + T1l;
 
270
                                        T1w = T1k - T1l;
 
271
                                   }
 
272
                                   ro[WS(ros, 7)] = FMA(KP980785280, T1u, T1t);
 
273
                                   T1o = T1f - TY;
 
274
                                   T1g = TY + T1f;
 
275
                                   T1p = FMA(KP923879532, T1m, T1j);
 
276
                                   T1n = FNMS(KP923879532, T1m, T1j);
 
277
                                   ro[WS(ros, 9)] = FNMS(KP980785280, T1u, T1t);
 
278
                              }
 
279
                              ro[WS(ros, 1)] = FMA(KP980785280, T1g, TH);
 
280
                              ro[WS(ros, 15)] = FNMS(KP980785280, T1g, TH);
 
281
                              io[WS(ios, 1)] = FMS(KP980785280, T1s, T1p);
 
282
                              io[WS(ios, 15)] = FMA(KP980785280, T1s, T1p);
 
283
                              io[WS(ios, 9)] = FMS(KP980785280, T1o, T1n);
 
284
                              io[WS(ios, 7)] = FMA(KP980785280, T1o, T1n);
 
285
                              {
 
286
                                   E T1A, T1D, T1N, T1O, T1K, T1L;
 
287
                                   T1A = FMA(KP668178637, T1z, T1y);
 
288
                                   T1K = FNMS(KP668178637, T1y, T1z);
 
289
                                   T1L = FNMS(KP668178637, T1B, T1C);
 
290
                                   T1D = FMA(KP668178637, T1C, T1B);
 
291
                                   T1N = FNMS(KP923879532, T1w, T1v);
 
292
                                   T1x = FMA(KP923879532, T1w, T1v);
 
293
                                   T1O = T1K + T1L;
 
294
                                   T1M = T1K - T1L;
 
295
                                   ro[WS(ros, 5)] = FNMS(KP831469612, T1O, T1N);
 
296
                                   T1I = T1D - T1A;
 
297
                                   T1E = T1A + T1D;
 
298
                                   T1J = FMA(KP923879532, T1G, T1F);
 
299
                                   T1H = FNMS(KP923879532, T1G, T1F);
 
300
                                   ro[WS(ros, 11)] = FMA(KP831469612, T1O, T1N);
 
301
                              }
 
302
                         }
 
303
                    }
 
304
               }
 
305
          }
 
306
          io[WS(ios, 3)] = FMA(KP831469612, T1M, T1J);
 
307
          ro[WS(ros, 3)] = FMA(KP831469612, T1E, T1x);
 
308
          io[WS(ios, 13)] = FMS(KP831469612, T1M, T1J);
 
309
          ro[WS(ros, 13)] = FNMS(KP831469612, T1E, T1x);
 
310
          io[WS(ios, 11)] = FMA(KP831469612, T1I, T1H);
 
311
          io[WS(ios, 5)] = FMS(KP831469612, T1I, T1H);
 
312
     }
 
313
}
 
314
 
 
315
static const kr2hc_desc desc = { 32, "r2hc_32", {88, 0, 68, 0}, &GENUS, 0, 0, 0, 0, 0 };
 
316
 
 
317
void X(codelet_r2hc_32) (planner *p) {
 
318
     X(kr2hc_register) (p, r2hc_32, &desc);
 
319
}
 
320
 
 
321
#else                           /* HAVE_FMA */
 
322
 
 
323
/* Generated by: ../../../genfft/gen_r2hc -compact -variables 4 -pipeline-latency 4 -n 32 -name r2hc_32 -include r2hc.h */
 
324
 
 
325
/*
 
326
 * This function contains 156 FP additions, 42 FP multiplications,
 
327
 * (or, 140 additions, 26 multiplications, 16 fused multiply/add),
 
328
 * 54 stack variables, and 64 memory accesses
 
329
 */
 
330
/*
 
331
 * Generator Id's : 
 
332
 * $Id: algsimp.ml,v 1.9 2006-02-12 23:34:12 athena Exp $
 
333
 * $Id: fft.ml,v 1.4 2006-01-05 03:04:27 stevenj Exp $
 
334
 * $Id: gen_r2hc.ml,v 1.18 2006-02-12 23:34:12 athena Exp $
 
335
 */
 
336
 
 
337
#include "r2hc.h"
 
338
 
 
339
static void r2hc_32(const R *I, R *ro, R *io, stride is, stride ros, stride ios, INT v, INT ivs, INT ovs)
 
340
{
 
341
     DK(KP555570233, +0.555570233019602224742830813948532874374937191);
 
342
     DK(KP831469612, +0.831469612302545237078788377617905756738560812);
 
343
     DK(KP195090322, +0.195090322016128267848284868477022240927691618);
 
344
     DK(KP980785280, +0.980785280403230449126182236134239036973933731);
 
345
     DK(KP382683432, +0.382683432365089771728459984030398866761344562);
 
346
     DK(KP923879532, +0.923879532511286756128183189396788286822416626);
 
347
     DK(KP707106781, +0.707106781186547524400844362104849039284835938);
 
348
     INT i;
 
349
     for (i = v; i > 0; i = i - 1, I = I + ivs, ro = ro + ovs, io = io + ovs, MAKE_VOLATILE_STRIDE(is), MAKE_VOLATILE_STRIDE(ros), MAKE_VOLATILE_STRIDE(ios)) {
 
350
          E T7, T2b, Tv, T1l, Te, T2o, Ty, T1k, Tt, T2d, TF, T1h, Tm, T2c, TC;
 
351
          E T1i, T1Z, T22, T2k, T2j, T1e, T1C, T19, T1B, T1S, T1V, T2h, T2g, TX, T1z;
 
352
          E TS, T1y;
 
353
          {
 
354
               E T1, T2, T3, T4, T5, T6;
 
355
               T1 = I[0];
 
356
               T2 = I[WS(is, 16)];
 
357
               T3 = T1 + T2;
 
358
               T4 = I[WS(is, 8)];
 
359
               T5 = I[WS(is, 24)];
 
360
               T6 = T4 + T5;
 
361
               T7 = T3 + T6;
 
362
               T2b = T3 - T6;
 
363
               Tv = T1 - T2;
 
364
               T1l = T4 - T5;
 
365
          }
 
366
          {
 
367
               E Ta, Tw, Td, Tx;
 
368
               {
 
369
                    E T8, T9, Tb, Tc;
 
370
                    T8 = I[WS(is, 4)];
 
371
                    T9 = I[WS(is, 20)];
 
372
                    Ta = T8 + T9;
 
373
                    Tw = T8 - T9;
 
374
                    Tb = I[WS(is, 28)];
 
375
                    Tc = I[WS(is, 12)];
 
376
                    Td = Tb + Tc;
 
377
                    Tx = Tb - Tc;
 
378
               }
 
379
               Te = Ta + Td;
 
380
               T2o = Td - Ta;
 
381
               Ty = KP707106781 * (Tw + Tx);
 
382
               T1k = KP707106781 * (Tx - Tw);
 
383
          }
 
384
          {
 
385
               E Tp, TD, Ts, TE;
 
386
               {
 
387
                    E Tn, To, Tq, Tr;
 
388
                    Tn = I[WS(is, 30)];
 
389
                    To = I[WS(is, 14)];
 
390
                    Tp = Tn + To;
 
391
                    TD = Tn - To;
 
392
                    Tq = I[WS(is, 6)];
 
393
                    Tr = I[WS(is, 22)];
 
394
                    Ts = Tq + Tr;
 
395
                    TE = Tq - Tr;
 
396
               }
 
397
               Tt = Tp + Ts;
 
398
               T2d = Tp - Ts;
 
399
               TF = FMA(KP923879532, TD, KP382683432 * TE);
 
400
               T1h = FNMS(KP923879532, TE, KP382683432 * TD);
 
401
          }
 
402
          {
 
403
               E Ti, TA, Tl, TB;
 
404
               {
 
405
                    E Tg, Th, Tj, Tk;
 
406
                    Tg = I[WS(is, 2)];
 
407
                    Th = I[WS(is, 18)];
 
408
                    Ti = Tg + Th;
 
409
                    TA = Tg - Th;
 
410
                    Tj = I[WS(is, 10)];
 
411
                    Tk = I[WS(is, 26)];
 
412
                    Tl = Tj + Tk;
 
413
                    TB = Tj - Tk;
 
414
               }
 
415
               Tm = Ti + Tl;
 
416
               T2c = Ti - Tl;
 
417
               TC = FNMS(KP382683432, TB, KP923879532 * TA);
 
418
               T1i = FMA(KP382683432, TA, KP923879532 * TB);
 
419
          }
 
420
          {
 
421
               E T11, T1X, T1d, T1Y, T14, T20, T17, T21, T1a, T18;
 
422
               {
 
423
                    E TZ, T10, T1b, T1c;
 
424
                    TZ = I[WS(is, 31)];
 
425
                    T10 = I[WS(is, 15)];
 
426
                    T11 = TZ - T10;
 
427
                    T1X = TZ + T10;
 
428
                    T1b = I[WS(is, 7)];
 
429
                    T1c = I[WS(is, 23)];
 
430
                    T1d = T1b - T1c;
 
431
                    T1Y = T1b + T1c;
 
432
               }
 
433
               {
 
434
                    E T12, T13, T15, T16;
 
435
                    T12 = I[WS(is, 3)];
 
436
                    T13 = I[WS(is, 19)];
 
437
                    T14 = T12 - T13;
 
438
                    T20 = T12 + T13;
 
439
                    T15 = I[WS(is, 27)];
 
440
                    T16 = I[WS(is, 11)];
 
441
                    T17 = T15 - T16;
 
442
                    T21 = T15 + T16;
 
443
               }
 
444
               T1Z = T1X + T1Y;
 
445
               T22 = T20 + T21;
 
446
               T2k = T21 - T20;
 
447
               T2j = T1X - T1Y;
 
448
               T1a = KP707106781 * (T17 - T14);
 
449
               T1e = T1a - T1d;
 
450
               T1C = T1d + T1a;
 
451
               T18 = KP707106781 * (T14 + T17);
 
452
               T19 = T11 + T18;
 
453
               T1B = T11 - T18;
 
454
          }
 
455
          {
 
456
               E TK, T1Q, TW, T1R, TN, T1T, TQ, T1U, TT, TR;
 
457
               {
 
458
                    E TI, TJ, TU, TV;
 
459
                    TI = I[WS(is, 1)];
 
460
                    TJ = I[WS(is, 17)];
 
461
                    TK = TI - TJ;
 
462
                    T1Q = TI + TJ;
 
463
                    TU = I[WS(is, 9)];
 
464
                    TV = I[WS(is, 25)];
 
465
                    TW = TU - TV;
 
466
                    T1R = TU + TV;
 
467
               }
 
468
               {
 
469
                    E TL, TM, TO, TP;
 
470
                    TL = I[WS(is, 5)];
 
471
                    TM = I[WS(is, 21)];
 
472
                    TN = TL - TM;
 
473
                    T1T = TL + TM;
 
474
                    TO = I[WS(is, 29)];
 
475
                    TP = I[WS(is, 13)];
 
476
                    TQ = TO - TP;
 
477
                    T1U = TO + TP;
 
478
               }
 
479
               T1S = T1Q + T1R;
 
480
               T1V = T1T + T1U;
 
481
               T2h = T1U - T1T;
 
482
               T2g = T1Q - T1R;
 
483
               TT = KP707106781 * (TQ - TN);
 
484
               TX = TT - TW;
 
485
               T1z = TW + TT;
 
486
               TR = KP707106781 * (TN + TQ);
 
487
               TS = TK + TR;
 
488
               T1y = TK - TR;
 
489
          }
 
490
          {
 
491
               E Tf, Tu, T27, T28, T29, T2a;
 
492
               Tf = T7 + Te;
 
493
               Tu = Tm + Tt;
 
494
               T27 = Tf + Tu;
 
495
               T28 = T1S + T1V;
 
496
               T29 = T1Z + T22;
 
497
               T2a = T28 + T29;
 
498
               ro[WS(ros, 8)] = Tf - Tu;
 
499
               io[WS(ios, 8)] = T29 - T28;
 
500
               ro[WS(ros, 16)] = T27 - T2a;
 
501
               ro[0] = T27 + T2a;
 
502
          }
 
503
          {
 
504
               E T1P, T25, T24, T26, T1W, T23;
 
505
               T1P = T7 - Te;
 
506
               T25 = Tt - Tm;
 
507
               T1W = T1S - T1V;
 
508
               T23 = T1Z - T22;
 
509
               T24 = KP707106781 * (T1W + T23);
 
510
               T26 = KP707106781 * (T23 - T1W);
 
511
               ro[WS(ros, 12)] = T1P - T24;
 
512
               io[WS(ios, 12)] = T26 - T25;
 
513
               ro[WS(ros, 4)] = T1P + T24;
 
514
               io[WS(ios, 4)] = T25 + T26;
 
515
          }
 
516
          {
 
517
               E T2f, T2v, T2p, T2r, T2m, T2q, T2u, T2w, T2e, T2n;
 
518
               T2e = KP707106781 * (T2c + T2d);
 
519
               T2f = T2b + T2e;
 
520
               T2v = T2b - T2e;
 
521
               T2n = KP707106781 * (T2d - T2c);
 
522
               T2p = T2n - T2o;
 
523
               T2r = T2o + T2n;
 
524
               {
 
525
                    E T2i, T2l, T2s, T2t;
 
526
                    T2i = FMA(KP923879532, T2g, KP382683432 * T2h);
 
527
                    T2l = FNMS(KP382683432, T2k, KP923879532 * T2j);
 
528
                    T2m = T2i + T2l;
 
529
                    T2q = T2l - T2i;
 
530
                    T2s = FNMS(KP382683432, T2g, KP923879532 * T2h);
 
531
                    T2t = FMA(KP382683432, T2j, KP923879532 * T2k);
 
532
                    T2u = T2s + T2t;
 
533
                    T2w = T2t - T2s;
 
534
               }
 
535
               ro[WS(ros, 14)] = T2f - T2m;
 
536
               io[WS(ios, 14)] = T2u - T2r;
 
537
               ro[WS(ros, 2)] = T2f + T2m;
 
538
               io[WS(ios, 2)] = T2r + T2u;
 
539
               io[WS(ios, 6)] = T2p + T2q;
 
540
               ro[WS(ros, 6)] = T2v + T2w;
 
541
               io[WS(ios, 10)] = T2q - T2p;
 
542
               ro[WS(ros, 10)] = T2v - T2w;
 
543
          }
 
544
          {
 
545
               E TH, T1t, T1s, T1u, T1g, T1o, T1n, T1p;
 
546
               {
 
547
                    E Tz, TG, T1q, T1r;
 
548
                    Tz = Tv + Ty;
 
549
                    TG = TC + TF;
 
550
                    TH = Tz + TG;
 
551
                    T1t = Tz - TG;
 
552
                    T1q = FNMS(KP195090322, TS, KP980785280 * TX);
 
553
                    T1r = FMA(KP195090322, T19, KP980785280 * T1e);
 
554
                    T1s = T1q + T1r;
 
555
                    T1u = T1r - T1q;
 
556
               }
 
557
               {
 
558
                    E TY, T1f, T1j, T1m;
 
559
                    TY = FMA(KP980785280, TS, KP195090322 * TX);
 
560
                    T1f = FNMS(KP195090322, T1e, KP980785280 * T19);
 
561
                    T1g = TY + T1f;
 
562
                    T1o = T1f - TY;
 
563
                    T1j = T1h - T1i;
 
564
                    T1m = T1k - T1l;
 
565
                    T1n = T1j - T1m;
 
566
                    T1p = T1m + T1j;
 
567
               }
 
568
               ro[WS(ros, 15)] = TH - T1g;
 
569
               io[WS(ios, 15)] = T1s - T1p;
 
570
               ro[WS(ros, 1)] = TH + T1g;
 
571
               io[WS(ios, 1)] = T1p + T1s;
 
572
               io[WS(ios, 7)] = T1n + T1o;
 
573
               ro[WS(ros, 7)] = T1t + T1u;
 
574
               io[WS(ios, 9)] = T1o - T1n;
 
575
               ro[WS(ros, 9)] = T1t - T1u;
 
576
          }
 
577
          {
 
578
               E T1x, T1N, T1M, T1O, T1E, T1I, T1H, T1J;
 
579
               {
 
580
                    E T1v, T1w, T1K, T1L;
 
581
                    T1v = Tv - Ty;
 
582
                    T1w = T1i + T1h;
 
583
                    T1x = T1v + T1w;
 
584
                    T1N = T1v - T1w;
 
585
                    T1K = FNMS(KP555570233, T1y, KP831469612 * T1z);
 
586
                    T1L = FMA(KP555570233, T1B, KP831469612 * T1C);
 
587
                    T1M = T1K + T1L;
 
588
                    T1O = T1L - T1K;
 
589
               }
 
590
               {
 
591
                    E T1A, T1D, T1F, T1G;
 
592
                    T1A = FMA(KP831469612, T1y, KP555570233 * T1z);
 
593
                    T1D = FNMS(KP555570233, T1C, KP831469612 * T1B);
 
594
                    T1E = T1A + T1D;
 
595
                    T1I = T1D - T1A;
 
596
                    T1F = TF - TC;
 
597
                    T1G = T1l + T1k;
 
598
                    T1H = T1F - T1G;
 
599
                    T1J = T1G + T1F;
 
600
               }
 
601
               ro[WS(ros, 13)] = T1x - T1E;
 
602
               io[WS(ios, 13)] = T1M - T1J;
 
603
               ro[WS(ros, 3)] = T1x + T1E;
 
604
               io[WS(ios, 3)] = T1J + T1M;
 
605
               io[WS(ios, 5)] = T1H + T1I;
 
606
               ro[WS(ros, 5)] = T1N + T1O;
 
607
               io[WS(ios, 11)] = T1I - T1H;
 
608
               ro[WS(ros, 11)] = T1N - T1O;
 
609
          }
 
610
     }
 
611
}
 
612
 
 
613
static const kr2hc_desc desc = { 32, "r2hc_32", {140, 26, 16, 0}, &GENUS, 0, 0, 0, 0, 0 };
 
614
 
 
615
void X(codelet_r2hc_32) (planner *p) {
 
616
     X(kr2hc_register) (p, r2hc_32, &desc);
 
617
}
 
618
 
 
619
#endif                          /* HAVE_FMA */