~diresu/blender/blender-command-port

« back to all changes in this revision

Viewing changes to extern/fftw/rdft/codelets/r2hc/hf2_32.c

  • Committer: theeth
  • Date: 2008-10-14 16:52:04 UTC
  • Revision ID: vcs-imports@canonical.com-20081014165204-r32w2gm6s0osvdhn
copy back trunk

Show diffs side-by-side

added added

removed removed

Lines of Context:
 
1
/*
 
2
 * Copyright (c) 2003, 2006 Matteo Frigo
 
3
 * Copyright (c) 2003, 2006 Massachusetts Institute of Technology
 
4
 *
 
5
 * This program is free software; you can redistribute it and/or modify
 
6
 * it under the terms of the GNU General Public License as published by
 
7
 * the Free Software Foundation; either version 2 of the License, or
 
8
 * (at your option) any later version.
 
9
 *
 
10
 * This program is distributed in the hope that it will be useful,
 
11
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 
12
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 
13
 * GNU General Public License for more details.
 
14
 *
 
15
 * You should have received a copy of the GNU General Public License
 
16
 * along with this program; if not, write to the Free Software
 
17
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 
18
 *
 
19
 */
 
20
 
 
21
/* This file was automatically generated --- DO NOT EDIT */
 
22
/* Generated on Sun Jul  2 15:59:33 EDT 2006 */
 
23
 
 
24
#include "codelet-rdft.h"
 
25
 
 
26
#ifdef HAVE_FMA
 
27
 
 
28
/* Generated by: ../../../genfft/gen_hc2hc -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -twiddle-log3 -precompute-twiddles -n 32 -dit -name hf2_32 -include hf.h */
 
29
 
 
30
/*
 
31
 * This function contains 488 FP additions, 350 FP multiplications,
 
32
 * (or, 236 additions, 98 multiplications, 252 fused multiply/add),
 
33
 * 181 stack variables, and 128 memory accesses
 
34
 */
 
35
/*
 
36
 * Generator Id's : 
 
37
 * $Id: algsimp.ml,v 1.9 2006-02-12 23:34:12 athena Exp $
 
38
 * $Id: fft.ml,v 1.4 2006-01-05 03:04:27 stevenj Exp $
 
39
 * $Id: gen_hc2hc.ml,v 1.16 2006-02-12 23:34:12 athena Exp $
 
40
 */
 
41
 
 
42
#include "hf.h"
 
43
 
 
44
static const R *hf2_32(R *rio, R *iio, const R *W, stride ios, INT m, INT dist)
 
45
{
 
46
     DK(KP831469612, +0.831469612302545237078788377617905756738560812);
 
47
     DK(KP980785280, +0.980785280403230449126182236134239036973933731);
 
48
     DK(KP198912367, +0.198912367379658006911597622644676228597850501);
 
49
     DK(KP668178637, +0.668178637919298919997757686523080761552472251);
 
50
     DK(KP923879532, +0.923879532511286756128183189396788286822416626);
 
51
     DK(KP414213562, +0.414213562373095048801688724209698078569671875);
 
52
     DK(KP707106781, +0.707106781186547524400844362104849039284835938);
 
53
     INT i;
 
54
     for (i = m - 2; i > 0; i = i - 2, rio = rio + dist, iio = iio - dist, W = W + 8, MAKE_VOLATILE_STRIDE(ios)) {
 
55
          E T9A, T9z;
 
56
          {
 
57
               E T2, T8, T3, T6, Te, Tr, T18, T4, Ta, Tz, T1n, T10, Ti, T5, Tc;
 
58
               T2 = W[0];
 
59
               T8 = W[4];
 
60
               T3 = W[2];
 
61
               T6 = W[3];
 
62
               Te = W[6];
 
63
               Tr = T2 * T8;
 
64
               T18 = T3 * T8;
 
65
               T4 = T2 * T3;
 
66
               Ta = T2 * T6;
 
67
               Tz = T3 * Te;
 
68
               T1n = T8 * Te;
 
69
               T10 = T2 * Te;
 
70
               Ti = W[7];
 
71
               T5 = W[1];
 
72
               Tc = W[5];
 
73
               {
 
74
                    E TR, TP, T3r, T3n, Tq, T46, T8H, T97, T4b, T8D, TH, T98, TZ, T7f, T4j;
 
75
                    E T6t, T1g, T7g, T4q, T6u, T1J, T7m, T6y, T4z, T7l, T8d, T6x, T4G, T2k, T7o;
 
76
                    E T7r, T8e, T6B, T4O, T6A, T4V, T7A, T2N, T6I, T5s, T6F, T55, T8i, T7x, T5c;
 
77
                    E T5t, T3c, T7y, T5j, T5u, T7D, T8j, T5L, T62, T43, T7G, T5S, T63, T7O, T8o;
 
78
                    E T3o, T3l, T3p, T5W, T3E, T5C, T3s, T3v, T3x;
 
79
                    {
 
80
                         E T1K, T23, T1N, T26, T2b, T1U, T3C, T3j, T3z, T3f, T1R, T29, Th, T34, T2J;
 
81
                         E T31, T2F, Td, T2X, T2T, T2w, T2s, T3Q, T3M, T1Z, T1V, T2g, T2c;
 
82
                         {
 
83
                              E T11, T1C, TM, Tb, TJ, T7, T1o, T19, T1w, T1F, T15, T1s, T1d, T1z, TW;
 
84
                              E TS, Ty, T4a, T48, TG;
 
85
                              {
 
86
                                   E T1, TA, Ts, TE, Tw, Tn, Tj, T8G, Tk, To, T14;
 
87
                                   T1 = rio[0];
 
88
                                   TA = FMA(T6, Ti, Tz);
 
89
                                   T1K = FNMS(T6, Ti, Tz);
 
90
                                   T14 = T2 * Ti;
 
91
                                   {
 
92
                                        E T1r, TD, T1c, Tv;
 
93
                                        T1r = T8 * Ti;
 
94
                                        TD = T3 * Ti;
 
95
                                        T11 = FNMS(T5, Ti, T10);
 
96
                                        T1C = FMA(T5, Ti, T10);
 
97
                                        TM = FMA(T5, T3, Ta);
 
98
                                        Tb = FNMS(T5, T3, Ta);
 
99
                                        TJ = FNMS(T5, T6, T4);
 
100
                                        T7 = FMA(T5, T6, T4);
 
101
                                        T1o = FMA(Tc, Ti, T1n);
 
102
                                        T23 = FMA(T6, Tc, T18);
 
103
                                        T19 = FNMS(T6, Tc, T18);
 
104
                                        T1w = FNMS(T5, Tc, Tr);
 
105
                                        Ts = FMA(T5, Tc, Tr);
 
106
                                        T1c = T3 * Tc;
 
107
                                        Tv = T2 * Tc;
 
108
                                        T1F = FNMS(T5, Te, T14);
 
109
                                        T15 = FMA(T5, Te, T14);
 
110
                                        T1s = FNMS(Tc, Te, T1r);
 
111
                                        T1N = FMA(T6, Te, TD);
 
112
                                        TE = FNMS(T6, Te, TD);
 
113
                                        {
 
114
                                             E T1T, T3i, T3e, T1Q;
 
115
                                             T1T = TJ * Tc;
 
116
                                             T3i = TJ * Ti;
 
117
                                             T3e = TJ * Te;
 
118
                                             T1Q = TJ * T8;
 
119
                                             {
 
120
                                                  E Tg, T2I, T2E, T9;
 
121
                                                  Tg = T7 * Tc;
 
122
                                                  T2I = T7 * Ti;
 
123
                                                  T2E = T7 * Te;
 
124
                                                  T9 = T7 * T8;
 
125
                                                  {
 
126
                                                       E T3q, T3m, T2v, T2r;
 
127
                                                       T3q = T19 * Ti;
 
128
                                                       T3m = T19 * Te;
 
129
                                                       T2v = T1w * Ti;
 
130
                                                       T2r = T1w * Te;
 
131
                                                       {
 
132
                                                            E T2W, T2S, T3P, T3L;
 
133
                                                            T2W = T23 * Ti;
 
134
                                                            T2S = T23 * Te;
 
135
                                                            T3P = Ts * Ti;
 
136
                                                            T3L = Ts * Te;
 
137
                                                            T26 = FNMS(T6, T8, T1c);
 
138
                                                            T1d = FMA(T6, T8, T1c);
 
139
                                                            T1z = FMA(T5, T8, Tv);
 
140
                                                            Tw = FNMS(T5, T8, Tv);
 
141
                                                            T2b = FNMS(TM, T8, T1T);
 
142
                                                            T1U = FMA(TM, T8, T1T);
 
143
                                                            T3C = FNMS(TM, Te, T3i);
 
144
                                                            T3j = FMA(TM, Te, T3i);
 
145
                                                            T3z = FMA(TM, Ti, T3e);
 
146
                                                            T3f = FNMS(TM, Ti, T3e);
 
147
                                                            T1R = FNMS(TM, Tc, T1Q);
 
148
                                                            T29 = FMA(TM, Tc, T1Q);
 
149
                                                            TR = FNMS(Tb, T8, Tg);
 
150
                                                            Th = FMA(Tb, T8, Tg);
 
151
                                                            T34 = FMA(Tb, Te, T2I);
 
152
                                                            T2J = FNMS(Tb, Te, T2I);
 
153
                                                            T31 = FNMS(Tb, Ti, T2E);
 
154
                                                            T2F = FMA(Tb, Ti, T2E);
 
155
                                                            Td = FNMS(Tb, Tc, T9);
 
156
                                                            TP = FMA(Tb, Tc, T9);
 
157
                                                            T2X = FNMS(T26, Te, T2W);
 
158
                                                            T2T = FMA(T26, Ti, T2S);
 
159
                                                            T3r = FNMS(T1d, Te, T3q);
 
160
                                                            T3n = FMA(T1d, Ti, T3m);
 
161
                                                            T2w = FNMS(T1z, Te, T2v);
 
162
                                                            T2s = FMA(T1z, Ti, T2r);
 
163
                                                            T3Q = FNMS(Tw, Te, T3P);
 
164
                                                            T3M = FMA(Tw, Ti, T3L);
 
165
                                                            {
 
166
                                                                 E T1Y, T1S, T2f, T2a;
 
167
                                                                 T1Y = T1R * Ti;
 
168
                                                                 T1S = T1R * Te;
 
169
                                                                 T2f = T29 * Ti;
 
170
                                                                 T2a = T29 * Te;
 
171
                                                                 {
 
172
                                                                      E Tm, Tf, TV, TQ;
 
173
                                                                      Tm = Td * Ti;
 
174
                                                                      Tf = Td * Te;
 
175
                                                                      TV = TP * Ti;
 
176
                                                                      TQ = TP * Te;
 
177
                                                                      T1Z = FNMS(T1U, Te, T1Y);
 
178
                                                                      T1V = FMA(T1U, Ti, T1S);
 
179
                                                                      T2g = FNMS(T2b, Te, T2f);
 
180
                                                                      T2c = FMA(T2b, Ti, T2a);
 
181
                                                                      Tn = FNMS(Th, Te, Tm);
 
182
                                                                      Tj = FMA(Th, Ti, Tf);
 
183
                                                                      TW = FNMS(TR, Te, TV);
 
184
                                                                      TS = FMA(TR, Ti, TQ);
 
185
                                                                      T8G = iio[-WS(ios, 31)];
 
186
                                                                 }
 
187
                                                            }
 
188
                                                       }
 
189
                                                  }
 
190
                                             }
 
191
                                        }
 
192
                                   }
 
193
                                   Tk = rio[WS(ios, 16)];
 
194
                                   To = iio[-WS(ios, 15)];
 
195
                                   {
 
196
                                        E Tt, TF, Tu, T49, Tx, TB, T47, TC;
 
197
                                        {
 
198
                                             E Tl, T8E, Tp, T8F;
 
199
                                             Tt = rio[WS(ios, 8)];
 
200
                                             TF = iio[-WS(ios, 7)];
 
201
                                             Tl = Tj * Tk;
 
202
                                             T8E = Tj * To;
 
203
                                             Tu = Ts * Tt;
 
204
                                             T49 = TA * TF;
 
205
                                             Tp = FMA(Tn, To, Tl);
 
206
                                             T8F = FNMS(Tn, Tk, T8E);
 
207
                                             Tx = iio[-WS(ios, 23)];
 
208
                                             TB = rio[WS(ios, 24)];
 
209
                                             Tq = T1 + Tp;
 
210
                                             T46 = T1 - Tp;
 
211
                                             T8H = T8F + T8G;
 
212
                                             T97 = T8G - T8F;
 
213
                                             T47 = Ts * Tx;
 
214
                                             TC = TA * TB;
 
215
                                        }
 
216
                                        Ty = FMA(Tw, Tx, Tu);
 
217
                                        T4a = FNMS(TE, TB, T49);
 
218
                                        T48 = FNMS(Tw, Tt, T47);
 
219
                                        TG = FMA(TE, TF, TC);
 
220
                                   }
 
221
                              }
 
222
                              {
 
223
                                   E TT, TX, TO, T4f, TU, T4g;
 
224
                                   {
 
225
                                        E TK, TN, TL, T4e;
 
226
                                        TK = rio[WS(ios, 4)];
 
227
                                        TN = iio[-WS(ios, 27)];
 
228
                                        T4b = T48 - T4a;
 
229
                                        T8D = T48 + T4a;
 
230
                                        TH = Ty + TG;
 
231
                                        T98 = Ty - TG;
 
232
                                        TL = TJ * TK;
 
233
                                        T4e = TJ * TN;
 
234
                                        TT = rio[WS(ios, 20)];
 
235
                                        TX = iio[-WS(ios, 11)];
 
236
                                        TO = FMA(TM, TN, TL);
 
237
                                        T4f = FNMS(TM, TK, T4e);
 
238
                                        TU = TS * TT;
 
239
                                        T4g = TS * TX;
 
240
                                   }
 
241
                                   {
 
242
                                        E T17, T4m, T1a, T1e, T4d, T4i;
 
243
                                        {
 
244
                                             E T12, T16, TY, T4h, T13, T4l;
 
245
                                             T12 = rio[WS(ios, 28)];
 
246
                                             T16 = iio[-WS(ios, 3)];
 
247
                                             TY = FMA(TW, TX, TU);
 
248
                                             T4h = FNMS(TW, TT, T4g);
 
249
                                             T13 = T11 * T12;
 
250
                                             T4l = T11 * T16;
 
251
                                             TZ = TO + TY;
 
252
                                             T4d = TO - TY;
 
253
                                             T7f = T4f + T4h;
 
254
                                             T4i = T4f - T4h;
 
255
                                             T17 = FMA(T15, T16, T13);
 
256
                                             T4m = FNMS(T15, T12, T4l);
 
257
                                        }
 
258
                                        T4j = T4d + T4i;
 
259
                                        T6t = T4i - T4d;
 
260
                                        T1a = rio[WS(ios, 12)];
 
261
                                        T1e = iio[-WS(ios, 19)];
 
262
                                        {
 
263
                                             E T1m, T4B, T1H, T4x, T1x, T1A, T1u, T4D, T1y, T4u;
 
264
                                             {
 
265
                                                  E T1D, T1G, T1E, T4w;
 
266
                                                  {
 
267
                                                       E T1f, T4o, T4k, T4p;
 
268
                                                       {
 
269
                                                            E T1j, T1l, T1b, T4n, T1k, T4A;
 
270
                                                            T1j = rio[WS(ios, 2)];
 
271
                                                            T1l = iio[-WS(ios, 29)];
 
272
                                                            T1b = T19 * T1a;
 
273
                                                            T4n = T19 * T1e;
 
274
                                                            T1k = T7 * T1j;
 
275
                                                            T4A = T7 * T1l;
 
276
                                                            T1f = FMA(T1d, T1e, T1b);
 
277
                                                            T4o = FNMS(T1d, T1a, T4n);
 
278
                                                            T1m = FMA(Tb, T1l, T1k);
 
279
                                                            T4B = FNMS(Tb, T1j, T4A);
 
280
                                                       }
 
281
                                                       T1g = T17 + T1f;
 
282
                                                       T4k = T17 - T1f;
 
283
                                                       T7g = T4m + T4o;
 
284
                                                       T4p = T4m - T4o;
 
285
                                                       T1D = rio[WS(ios, 26)];
 
286
                                                       T1G = iio[-WS(ios, 5)];
 
287
                                                       T4q = T4k - T4p;
 
288
                                                       T6u = T4k + T4p;
 
289
                                                       T1E = T1C * T1D;
 
290
                                                       T4w = T1C * T1G;
 
291
                                                  }
 
292
                                                  {
 
293
                                                       E T1p, T1t, T1q, T4C;
 
294
                                                       T1p = rio[WS(ios, 18)];
 
295
                                                       T1t = iio[-WS(ios, 13)];
 
296
                                                       T1H = FMA(T1F, T1G, T1E);
 
297
                                                       T4x = FNMS(T1F, T1D, T4w);
 
298
                                                       T1q = T1o * T1p;
 
299
                                                       T4C = T1o * T1t;
 
300
                                                       T1x = rio[WS(ios, 10)];
 
301
                                                       T1A = iio[-WS(ios, 21)];
 
302
                                                       T1u = FMA(T1s, T1t, T1q);
 
303
                                                       T4D = FNMS(T1s, T1p, T4C);
 
304
                                                       T1y = T1w * T1x;
 
305
                                                       T4u = T1w * T1A;
 
306
                                                  }
 
307
                                             }
 
308
                                             {
 
309
                                                  E T4t, T1v, T7j, T4E, T1B, T4v;
 
310
                                                  T4t = T1m - T1u;
 
311
                                                  T1v = T1m + T1u;
 
312
                                                  T7j = T4B + T4D;
 
313
                                                  T4E = T4B - T4D;
 
314
                                                  T1B = FMA(T1z, T1A, T1y);
 
315
                                                  T4v = FNMS(T1z, T1x, T4u);
 
316
                                                  {
 
317
                                                       E T4F, T1I, T4y, T7k;
 
318
                                                       T4F = T1B - T1H;
 
319
                                                       T1I = T1B + T1H;
 
320
                                                       T4y = T4v - T4x;
 
321
                                                       T7k = T4v + T4x;
 
322
                                                       T1J = T1v + T1I;
 
323
                                                       T7m = T1v - T1I;
 
324
                                                       T6y = T4t - T4y;
 
325
                                                       T4z = T4t + T4y;
 
326
                                                       T7l = T7j - T7k;
 
327
                                                       T8d = T7j + T7k;
 
328
                                                       T6x = T4E + T4F;
 
329
                                                       T4G = T4E - T4F;
 
330
                                                  }
 
331
                                             }
 
332
                                        }
 
333
                                   }
 
334
                              }
 
335
                         }
 
336
                         {
 
337
                              E T53, T2z, T4Z, T7v, T5q, T2M, T5r, T51, T4T, T4U;
 
338
                              {
 
339
                                   E T1P, T4Q, T2i, T4M, T21, T4S, T28, T4K;
 
340
                                   {
 
341
                                        E T1L, T1O, T1W, T20;
 
342
                                        T1L = rio[WS(ios, 30)];
 
343
                                        T1O = iio[-WS(ios, 1)];
 
344
                                        {
 
345
                                             E T2d, T2h, T1M, T4P, T2e, T4L;
 
346
                                             T2d = rio[WS(ios, 22)];
 
347
                                             T2h = iio[-WS(ios, 9)];
 
348
                                             T1M = T1K * T1L;
 
349
                                             T4P = T1K * T1O;
 
350
                                             T2e = T2c * T2d;
 
351
                                             T4L = T2c * T2h;
 
352
                                             T1P = FMA(T1N, T1O, T1M);
 
353
                                             T4Q = FNMS(T1N, T1L, T4P);
 
354
                                             T2i = FMA(T2g, T2h, T2e);
 
355
                                             T4M = FNMS(T2g, T2d, T4L);
 
356
                                        }
 
357
                                        T1W = rio[WS(ios, 14)];
 
358
                                        T20 = iio[-WS(ios, 17)];
 
359
                                        {
 
360
                                             E T24, T27, T1X, T4R, T25, T4J;
 
361
                                             T24 = rio[WS(ios, 6)];
 
362
                                             T27 = iio[-WS(ios, 25)];
 
363
                                             T1X = T1V * T1W;
 
364
                                             T4R = T1V * T20;
 
365
                                             T25 = T23 * T24;
 
366
                                             T4J = T23 * T27;
 
367
                                             T21 = FMA(T1Z, T20, T1X);
 
368
                                             T4S = FNMS(T1Z, T1W, T4R);
 
369
                                             T28 = FMA(T26, T27, T25);
 
370
                                             T4K = FNMS(T26, T24, T4J);
 
371
                                        }
 
372
                                   }
 
373
                                   {
 
374
                                        E T4I, T22, T7p, T2j, T7q, T4N;
 
375
                                        T4I = T1P - T21;
 
376
                                        T22 = T1P + T21;
 
377
                                        T7p = T4Q + T4S;
 
378
                                        T4T = T4Q - T4S;
 
379
                                        T4U = T28 - T2i;
 
380
                                        T2j = T28 + T2i;
 
381
                                        T7q = T4K + T4M;
 
382
                                        T4N = T4K - T4M;
 
383
                                        T2k = T22 + T2j;
 
384
                                        T7o = T22 - T2j;
 
385
                                        T7r = T7p - T7q;
 
386
                                        T8e = T7p + T7q;
 
387
                                        T6B = T4I - T4N;
 
388
                                        T4O = T4I + T4N;
 
389
                                   }
 
390
                              }
 
391
                              {
 
392
                                   E T2q, T5n, T2L, T2A, T2y, T2B, T2C, T5p, T2G, T2H, T2K, T2D, T50;
 
393
                                   {
 
394
                                        E T2n, T2p, T2o, T5m;
 
395
                                        T2n = rio[WS(ios, 1)];
 
396
                                        T2p = iio[-WS(ios, 30)];
 
397
                                        T2G = rio[WS(ios, 25)];
 
398
                                        T6A = T4T + T4U;
 
399
                                        T4V = T4T - T4U;
 
400
                                        T2o = T2 * T2n;
 
401
                                        T5m = T2 * T2p;
 
402
                                        T2H = T2F * T2G;
 
403
                                        T2K = iio[-WS(ios, 6)];
 
404
                                        T2q = FMA(T5, T2p, T2o);
 
405
                                        T5n = FNMS(T5, T2n, T5m);
 
406
                                   }
 
407
                                   {
 
408
                                        E T2t, T52, T2x, T2u, T5o;
 
409
                                        T2t = rio[WS(ios, 17)];
 
410
                                        T2L = FMA(T2J, T2K, T2H);
 
411
                                        T52 = T2F * T2K;
 
412
                                        T2x = iio[-WS(ios, 14)];
 
413
                                        T2u = T2s * T2t;
 
414
                                        T2A = rio[WS(ios, 9)];
 
415
                                        T53 = FNMS(T2J, T2G, T52);
 
416
                                        T5o = T2s * T2x;
 
417
                                        T2y = FMA(T2w, T2x, T2u);
 
418
                                        T2B = T8 * T2A;
 
419
                                        T2C = iio[-WS(ios, 22)];
 
420
                                        T5p = FNMS(T2w, T2t, T5o);
 
421
                                   }
 
422
                                   T2z = T2q + T2y;
 
423
                                   T4Z = T2q - T2y;
 
424
                                   T2D = FMA(Tc, T2C, T2B);
 
425
                                   T50 = T8 * T2C;
 
426
                                   T7v = T5n + T5p;
 
427
                                   T5q = T5n - T5p;
 
428
                                   T2M = T2D + T2L;
 
429
                                   T5r = T2D - T2L;
 
430
                                   T51 = FNMS(Tc, T2A, T50);
 
431
                              }
 
432
                              {
 
433
                                   E T2U, T2R, T2V, T58, T3a, T5h, T2Y, T32, T35;
 
434
                                   {
 
435
                                        E T2O, T2P, T2Q, T37, T39, T54, T7w, T57, T38, T5g;
 
436
                                        T2O = rio[WS(ios, 5)];
 
437
                                        T7A = T2z - T2M;
 
438
                                        T2N = T2z + T2M;
 
439
                                        T54 = T51 - T53;
 
440
                                        T7w = T51 + T53;
 
441
                                        T6I = T5q + T5r;
 
442
                                        T5s = T5q - T5r;
 
443
                                        T6F = T4Z - T54;
 
444
                                        T55 = T4Z + T54;
 
445
                                        T8i = T7v + T7w;
 
446
                                        T7x = T7v - T7w;
 
447
                                        T2P = T29 * T2O;
 
448
                                        T2Q = iio[-WS(ios, 26)];
 
449
                                        T37 = rio[WS(ios, 13)];
 
450
                                        T39 = iio[-WS(ios, 18)];
 
451
                                        T2U = rio[WS(ios, 21)];
 
452
                                        T2R = FMA(T2b, T2Q, T2P);
 
453
                                        T57 = T29 * T2Q;
 
454
                                        T38 = T1R * T37;
 
455
                                        T5g = T1R * T39;
 
456
                                        T2V = T2T * T2U;
 
457
                                        T58 = FNMS(T2b, T2O, T57);
 
458
                                        T3a = FMA(T1U, T39, T38);
 
459
                                        T5h = FNMS(T1U, T37, T5g);
 
460
                                        T2Y = iio[-WS(ios, 10)];
 
461
                                        T32 = rio[WS(ios, 29)];
 
462
                                        T35 = iio[-WS(ios, 2)];
 
463
                                   }
 
464
                                   {
 
465
                                        E T3N, T3K, T3O, T5H, T41, T5Q, T3R, T3U, T3W;
 
466
                                        {
 
467
                                             E T3H, T3I, T3J, T3Y, T40, T5G, T3Z, T5P;
 
468
                                             {
 
469
                                                  E T30, T56, T5a, T36, T5f, T7B, T5b;
 
470
                                                  T3H = rio[WS(ios, 3)];
 
471
                                                  {
 
472
                                                       E T2Z, T59, T33, T5e;
 
473
                                                       T2Z = FMA(T2X, T2Y, T2V);
 
474
                                                       T59 = T2T * T2Y;
 
475
                                                       T33 = T31 * T32;
 
476
                                                       T5e = T31 * T35;
 
477
                                                       T30 = T2R + T2Z;
 
478
                                                       T56 = T2R - T2Z;
 
479
                                                       T5a = FNMS(T2X, T2U, T59);
 
480
                                                       T36 = FMA(T34, T35, T33);
 
481
                                                       T5f = FNMS(T34, T32, T5e);
 
482
                                                       T3I = T3 * T3H;
 
483
                                                  }
 
484
                                                  T7B = T58 + T5a;
 
485
                                                  T5b = T58 - T5a;
 
486
                                                  {
 
487
                                                       E T3b, T5d, T7C, T5i;
 
488
                                                       T3b = T36 + T3a;
 
489
                                                       T5d = T36 - T3a;
 
490
                                                       T7C = T5f + T5h;
 
491
                                                       T5i = T5f - T5h;
 
492
                                                       T5c = T56 + T5b;
 
493
                                                       T5t = T5b - T56;
 
494
                                                       T3c = T30 + T3b;
 
495
                                                       T7y = T3b - T30;
 
496
                                                       T5j = T5d - T5i;
 
497
                                                       T5u = T5d + T5i;
 
498
                                                       T7D = T7B - T7C;
 
499
                                                       T8j = T7B + T7C;
 
500
                                                       T3J = iio[-WS(ios, 28)];
 
501
                                                  }
 
502
                                             }
 
503
                                             T3Y = rio[WS(ios, 11)];
 
504
                                             T40 = iio[-WS(ios, 20)];
 
505
                                             T3N = rio[WS(ios, 19)];
 
506
                                             T3K = FMA(T6, T3J, T3I);
 
507
                                             T5G = T3 * T3J;
 
508
                                             T3Z = Td * T3Y;
 
509
                                             T5P = Td * T40;
 
510
                                             T3O = T3M * T3N;
 
511
                                             T5H = FNMS(T6, T3H, T5G);
 
512
                                             T41 = FMA(Th, T40, T3Z);
 
513
                                             T5Q = FNMS(Th, T3Y, T5P);
 
514
                                             T3R = iio[-WS(ios, 12)];
 
515
                                             T3U = rio[WS(ios, 27)];
 
516
                                             T3W = iio[-WS(ios, 4)];
 
517
                                        }
 
518
                                        {
 
519
                                             E T3g, T3h, T3k, T3A, T3D, T5V, T3B, T5B;
 
520
                                             {
 
521
                                                  E T3T, T5F, T5J, T3X, T5O, T7M, T5K;
 
522
                                                  T3g = rio[WS(ios, 31)];
 
523
                                                  {
 
524
                                                       E T3S, T5I, T3V, T5N;
 
525
                                                       T3S = FMA(T3Q, T3R, T3O);
 
526
                                                       T5I = T3M * T3R;
 
527
                                                       T3V = Te * T3U;
 
528
                                                       T5N = Te * T3W;
 
529
                                                       T3T = T3K + T3S;
 
530
                                                       T5F = T3K - T3S;
 
531
                                                       T5J = FNMS(T3Q, T3N, T5I);
 
532
                                                       T3X = FMA(Ti, T3W, T3V);
 
533
                                                       T5O = FNMS(Ti, T3U, T5N);
 
534
                                                       T3h = T3f * T3g;
 
535
                                                  }
 
536
                                                  T7M = T5H + T5J;
 
537
                                                  T5K = T5H - T5J;
 
538
                                                  {
 
539
                                                       E T42, T5M, T7N, T5R;
 
540
                                                       T42 = T3X + T41;
 
541
                                                       T5M = T3X - T41;
 
542
                                                       T7N = T5O + T5Q;
 
543
                                                       T5R = T5O - T5Q;
 
544
                                                       T5L = T5F + T5K;
 
545
                                                       T62 = T5K - T5F;
 
546
                                                       T43 = T3T + T42;
 
547
                                                       T7G = T42 - T3T;
 
548
                                                       T5S = T5M - T5R;
 
549
                                                       T63 = T5M + T5R;
 
550
                                                       T7O = T7M - T7N;
 
551
                                                       T8o = T7M + T7N;
 
552
                                                       T3k = iio[0];
 
553
                                                  }
 
554
                                             }
 
555
                                             T3A = rio[WS(ios, 23)];
 
556
                                             T3D = iio[-WS(ios, 8)];
 
557
                                             T3o = rio[WS(ios, 15)];
 
558
                                             T3l = FMA(T3j, T3k, T3h);
 
559
                                             T5V = T3f * T3k;
 
560
                                             T3B = T3z * T3A;
 
561
                                             T5B = T3z * T3D;
 
562
                                             T3p = T3n * T3o;
 
563
                                             T5W = FNMS(T3j, T3g, T5V);
 
564
                                             T3E = FMA(T3C, T3D, T3B);
 
565
                                             T5C = FNMS(T3C, T3A, T5B);
 
566
                                             T3s = iio[-WS(ios, 16)];
 
567
                                             T3v = rio[WS(ios, 7)];
 
568
                                             T3x = iio[-WS(ios, 24)];
 
569
                                        }
 
570
                                   }
 
571
                              }
 
572
                         }
 
573
                    }
 
574
                    {
 
575
                         E T61, T6P, T5E, T6M, T88, T90, T8Z, T8b;
 
576
                         {
 
577
                              E T7e, T8T, T7L, T7J, T7h, T8U, T8S, T8R;
 
578
                              {
 
579
                                   E T8c, T1i, T8A, T8z, T8O, T8J, T8N, T2l, T8L, T45, T8t, T8l, T8u, T8q, T3G;
 
580
                                   E T8k, T8p, T8w, T2m;
 
581
                                   {
 
582
                                        E T8x, T8y, T8n, T8C, T8I;
 
583
                                        {
 
584
                                             E TI, T3u, T5y, T5Y, T3y, T5A, T1h, T7H, T5Z;
 
585
                                             TI = Tq + TH;
 
586
                                             T7e = Tq - TH;
 
587
                                             {
 
588
                                                  E T3t, T5X, T3w, T5z;
 
589
                                                  T3t = FMA(T3r, T3s, T3p);
 
590
                                                  T5X = T3n * T3s;
 
591
                                                  T3w = TP * T3v;
 
592
                                                  T5z = TP * T3x;
 
593
                                                  T3u = T3l + T3t;
 
594
                                                  T5y = T3l - T3t;
 
595
                                                  T5Y = FNMS(T3r, T3o, T5X);
 
596
                                                  T3y = FMA(TR, T3x, T3w);
 
597
                                                  T5A = FNMS(TR, T3v, T5z);
 
598
                                                  T1h = TZ + T1g;
 
599
                                                  T8T = T1g - TZ;
 
600
                                             }
 
601
                                             T7H = T5W + T5Y;
 
602
                                             T5Z = T5W - T5Y;
 
603
                                             {
 
604
                                                  E T3F, T60, T7I, T5D;
 
605
                                                  T3F = T3y + T3E;
 
606
                                                  T60 = T3E - T3y;
 
607
                                                  T7I = T5A + T5C;
 
608
                                                  T5D = T5A - T5C;
 
609
                                                  T61 = T5Z + T60;
 
610
                                                  T6P = T60 - T5Z;
 
611
                                                  T3G = T3u + T3F;
 
612
                                                  T7L = T3u - T3F;
 
613
                                                  T5E = T5y + T5D;
 
614
                                                  T6M = T5y - T5D;
 
615
                                                  T7J = T7H - T7I;
 
616
                                                  T8n = T7H + T7I;
 
617
                                                  T8c = TI - T1h;
 
618
                                                  T1i = TI + T1h;
 
619
                                             }
 
620
                                        }
 
621
                                        T8k = T8i - T8j;
 
622
                                        T8x = T8i + T8j;
 
623
                                        T8y = T8n + T8o;
 
624
                                        T8p = T8n - T8o;
 
625
                                        T7h = T7f - T7g;
 
626
                                        T8C = T7f + T7g;
 
627
                                        T8I = T8D + T8H;
 
628
                                        T8U = T8H - T8D;
 
629
                                        T8A = T8x + T8y;
 
630
                                        T8z = T8x - T8y;
 
631
                                        T8O = T8I - T8C;
 
632
                                        T8J = T8C + T8I;
 
633
                                   }
 
634
                                   {
 
635
                                        E T8h, T8m, T3d, T44;
 
636
                                        T8h = T2N - T3c;
 
637
                                        T3d = T2N + T3c;
 
638
                                        T44 = T3G + T43;
 
639
                                        T8m = T3G - T43;
 
640
                                        T8N = T2k - T1J;
 
641
                                        T2l = T1J + T2k;
 
642
                                        T8L = T44 - T3d;
 
643
                                        T45 = T3d + T44;
 
644
                                        T8t = T8k - T8h;
 
645
                                        T8l = T8h + T8k;
 
646
                                        T8u = T8m + T8p;
 
647
                                        T8q = T8m - T8p;
 
648
                                   }
 
649
                                   T8w = T1i - T2l;
 
650
                                   T2m = T1i + T2l;
 
651
                                   {
 
652
                                        E T8s, T8P, T8Q, T8v;
 
653
                                        {
 
654
                                             E T8r, T8M, T8K, T8g, T8B, T8f;
 
655
                                             T8S = T8q - T8l;
 
656
                                             T8r = T8l + T8q;
 
657
                                             T8B = T8d + T8e;
 
658
                                             T8f = T8d - T8e;
 
659
                                             rio[0] = T2m + T45;
 
660
                                             iio[-WS(ios, 16)] = T2m - T45;
 
661
                                             rio[WS(ios, 8)] = T8w + T8z;
 
662
                                             iio[-WS(ios, 24)] = T8w - T8z;
 
663
                                             T8M = T8J - T8B;
 
664
                                             T8K = T8B + T8J;
 
665
                                             T8g = T8c + T8f;
 
666
                                             T8s = T8c - T8f;
 
667
                                             T8R = T8O - T8N;
 
668
                                             T8P = T8N + T8O;
 
669
                                             iio[-WS(ios, 8)] = T8L + T8M;
 
670
                                             rio[WS(ios, 24)] = T8L - T8M;
 
671
                                             iio[0] = T8A + T8K;
 
672
                                             rio[WS(ios, 16)] = T8A - T8K;
 
673
                                             rio[WS(ios, 4)] = FMA(KP707106781, T8r, T8g);
 
674
                                             iio[-WS(ios, 20)] = FNMS(KP707106781, T8r, T8g);
 
675
                                             T8Q = T8t + T8u;
 
676
                                             T8v = T8t - T8u;
 
677
                                        }
 
678
                                        iio[-WS(ios, 4)] = FMA(KP707106781, T8Q, T8P);
 
679
                                        rio[WS(ios, 20)] = FMS(KP707106781, T8Q, T8P);
 
680
                                        rio[WS(ios, 12)] = FMA(KP707106781, T8v, T8s);
 
681
                                        iio[-WS(ios, 28)] = FNMS(KP707106781, T8v, T8s);
 
682
                                   }
 
683
                              }
 
684
                              {
 
685
                                   E T7P, T7W, T7i, T7K, T8a, T86, T91, T8V, T8W, T7t, T7T, T7F, T92, T7Z, T89;
 
686
                                   E T83;
 
687
                                   {
 
688
                                        E T7X, T7n, T7s, T7Y, T84, T85;
 
689
                                        T7P = T7L - T7O;
 
690
                                        T84 = T7L + T7O;
 
691
                                        iio[-WS(ios, 12)] = FMA(KP707106781, T8S, T8R);
 
692
                                        rio[WS(ios, 28)] = FMS(KP707106781, T8S, T8R);
 
693
                                        T7W = T7e + T7h;
 
694
                                        T7i = T7e - T7h;
 
695
                                        T85 = T7J + T7G;
 
696
                                        T7K = T7G - T7J;
 
697
                                        T7X = T7m + T7l;
 
698
                                        T7n = T7l - T7m;
 
699
                                        T8a = FMA(KP414213562, T84, T85);
 
700
                                        T86 = FNMS(KP414213562, T85, T84);
 
701
                                        T91 = T8U - T8T;
 
702
                                        T8V = T8T + T8U;
 
703
                                        T7s = T7o + T7r;
 
704
                                        T7Y = T7o - T7r;
 
705
                                        {
 
706
                                             E T82, T81, T7z, T7E;
 
707
                                             T82 = T7x + T7y;
 
708
                                             T7z = T7x - T7y;
 
709
                                             T7E = T7A - T7D;
 
710
                                             T81 = T7A + T7D;
 
711
                                             T8W = T7n + T7s;
 
712
                                             T7t = T7n - T7s;
 
713
                                             T7T = FNMS(KP414213562, T7z, T7E);
 
714
                                             T7F = FMA(KP414213562, T7E, T7z);
 
715
                                             T92 = T7Y - T7X;
 
716
                                             T7Z = T7X + T7Y;
 
717
                                             T89 = FNMS(KP414213562, T81, T82);
 
718
                                             T83 = FMA(KP414213562, T82, T81);
 
719
                                        }
 
720
                                   }
 
721
                                   {
 
722
                                        E T7S, T7u, T93, T95, T7U, T7Q;
 
723
                                        T7S = FNMS(KP707106781, T7t, T7i);
 
724
                                        T7u = FMA(KP707106781, T7t, T7i);
 
725
                                        T93 = FMA(KP707106781, T92, T91);
 
726
                                        T95 = FNMS(KP707106781, T92, T91);
 
727
                                        T7U = FNMS(KP414213562, T7K, T7P);
 
728
                                        T7Q = FMA(KP414213562, T7P, T7K);
 
729
                                        {
 
730
                                             E T80, T87, T8X, T8Y;
 
731
                                             T88 = FNMS(KP707106781, T7Z, T7W);
 
732
                                             T80 = FMA(KP707106781, T7Z, T7W);
 
733
                                             {
 
734
                                                  E T7V, T94, T96, T7R;
 
735
                                                  T7V = T7T + T7U;
 
736
                                                  T94 = T7U - T7T;
 
737
                                                  T96 = T7Q - T7F;
 
738
                                                  T7R = T7F + T7Q;
 
739
                                                  iio[-WS(ios, 30)] = FMA(KP923879532, T7V, T7S);
 
740
                                                  rio[WS(ios, 14)] = FNMS(KP923879532, T7V, T7S);
 
741
                                                  iio[-WS(ios, 6)] = FMA(KP923879532, T94, T93);
 
742
                                                  rio[WS(ios, 22)] = FMS(KP923879532, T94, T93);
 
743
                                                  iio[-WS(ios, 14)] = FMA(KP923879532, T96, T95);
 
744
                                                  rio[WS(ios, 30)] = FMS(KP923879532, T96, T95);
 
745
                                                  rio[WS(ios, 6)] = FMA(KP923879532, T7R, T7u);
 
746
                                                  iio[-WS(ios, 22)] = FNMS(KP923879532, T7R, T7u);
 
747
                                                  T87 = T83 + T86;
 
748
                                                  T90 = T86 - T83;
 
749
                                             }
 
750
                                             T8Z = FNMS(KP707106781, T8W, T8V);
 
751
                                             T8X = FMA(KP707106781, T8W, T8V);
 
752
                                             T8Y = T89 + T8a;
 
753
                                             T8b = T89 - T8a;
 
754
                                             rio[WS(ios, 2)] = FMA(KP923879532, T87, T80);
 
755
                                             iio[-WS(ios, 18)] = FNMS(KP923879532, T87, T80);
 
756
                                             iio[-WS(ios, 2)] = FMA(KP923879532, T8Y, T8X);
 
757
                                             rio[WS(ios, 18)] = FMS(KP923879532, T8Y, T8X);
 
758
                                        }
 
759
                                   }
 
760
                              }
 
761
                         }
 
762
                         {
 
763
                              E T6s, T9o, T9n, T6v, T6Q, T6N, T6J, T6G, T9k, T9j;
 
764
                              {
 
765
                                   E T6c, T4s, T9i, T4X, T9h, T9b, T9c, T6f, T5U, T6k, T64, T5k, T5v;
 
766
                                   {
 
767
                                        E T6d, T6e, T99, T9a, T5T;
 
768
                                        {
 
769
                                             E T4c, T4r, T4H, T4W;
 
770
                                             T6s = T46 - T4b;
 
771
                                             T4c = T46 + T4b;
 
772
                                             rio[WS(ios, 10)] = FMA(KP923879532, T8b, T88);
 
773
                                             iio[-WS(ios, 26)] = FNMS(KP923879532, T8b, T88);
 
774
                                             iio[-WS(ios, 10)] = FMA(KP923879532, T90, T8Z);
 
775
                                             rio[WS(ios, 26)] = FMS(KP923879532, T90, T8Z);
 
776
                                             T4r = T4j + T4q;
 
777
                                             T9o = T4q - T4j;
 
778
                                             T6d = FNMS(KP414213562, T4z, T4G);
 
779
                                             T4H = FMA(KP414213562, T4G, T4z);
 
780
                                             T4W = FNMS(KP414213562, T4V, T4O);
 
781
                                             T6e = FMA(KP414213562, T4O, T4V);
 
782
                                             T9n = T98 + T97;
 
783
                                             T99 = T97 - T98;
 
784
                                             T6c = FNMS(KP707106781, T4r, T4c);
 
785
                                             T4s = FMA(KP707106781, T4r, T4c);
 
786
                                             T9i = T4W - T4H;
 
787
                                             T4X = T4H + T4W;
 
788
                                             T9a = T6t + T6u;
 
789
                                             T6v = T6t - T6u;
 
790
                                        }
 
791
                                        T6Q = T5S - T5L;
 
792
                                        T5T = T5L + T5S;
 
793
                                        T9h = FNMS(KP707106781, T9a, T99);
 
794
                                        T9b = FMA(KP707106781, T9a, T99);
 
795
                                        T9c = T6d + T6e;
 
796
                                        T6f = T6d - T6e;
 
797
                                        T5U = FMA(KP707106781, T5T, T5E);
 
798
                                        T6k = FNMS(KP707106781, T5T, T5E);
 
799
                                        T64 = T62 + T63;
 
800
                                        T6N = T63 - T62;
 
801
                                        T6J = T5c - T5j;
 
802
                                        T5k = T5c + T5j;
 
803
                                        T5v = T5t + T5u;
 
804
                                        T6G = T5u - T5t;
 
805
                                   }
 
806
                                   {
 
807
                                        E T6m, T6q, T6j, T6p, T9f, T9g;
 
808
                                        {
 
809
                                             E T68, T4Y, T6a, T66, T69, T5x, T9d, T6l, T65, T9e, T6b, T67;
 
810
                                             T68 = FNMS(KP923879532, T4X, T4s);
 
811
                                             T4Y = FMA(KP923879532, T4X, T4s);
 
812
                                             T6l = FNMS(KP707106781, T64, T61);
 
813
                                             T65 = FMA(KP707106781, T64, T61);
 
814
                                             {
 
815
                                                  E T6h, T5l, T6i, T5w;
 
816
                                                  T6h = FNMS(KP707106781, T5k, T55);
 
817
                                                  T5l = FMA(KP707106781, T5k, T55);
 
818
                                                  T6i = FNMS(KP707106781, T5v, T5s);
 
819
                                                  T5w = FMA(KP707106781, T5v, T5s);
 
820
                                                  T6m = FMA(KP668178637, T6l, T6k);
 
821
                                                  T6q = FNMS(KP668178637, T6k, T6l);
 
822
                                                  T6a = FMA(KP198912367, T5U, T65);
 
823
                                                  T66 = FNMS(KP198912367, T65, T5U);
 
824
                                                  T6j = FNMS(KP668178637, T6i, T6h);
 
825
                                                  T6p = FMA(KP668178637, T6h, T6i);
 
826
                                                  T69 = FNMS(KP198912367, T5l, T5w);
 
827
                                                  T5x = FMA(KP198912367, T5w, T5l);
 
828
                                             }
 
829
                                             T9d = FMA(KP923879532, T9c, T9b);
 
830
                                             T9f = FNMS(KP923879532, T9c, T9b);
 
831
                                             T9e = T69 + T6a;
 
832
                                             T6b = T69 - T6a;
 
833
                                             T9g = T66 - T5x;
 
834
                                             T67 = T5x + T66;
 
835
                                             iio[-WS(ios, 1)] = FMA(KP980785280, T9e, T9d);
 
836
                                             rio[WS(ios, 17)] = FMS(KP980785280, T9e, T9d);
 
837
                                             rio[WS(ios, 1)] = FMA(KP980785280, T67, T4Y);
 
838
                                             iio[-WS(ios, 17)] = FNMS(KP980785280, T67, T4Y);
 
839
                                             rio[WS(ios, 9)] = FMA(KP980785280, T6b, T68);
 
840
                                             iio[-WS(ios, 25)] = FNMS(KP980785280, T6b, T68);
 
841
                                        }
 
842
                                        {
 
843
                                             E T6o, T9l, T9m, T6r, T6g, T6n;
 
844
                                             T6o = FMA(KP923879532, T6f, T6c);
 
845
                                             T6g = FNMS(KP923879532, T6f, T6c);
 
846
                                             T6n = T6j + T6m;
 
847
                                             T9k = T6m - T6j;
 
848
                                             T9j = FMA(KP923879532, T9i, T9h);
 
849
                                             T9l = FNMS(KP923879532, T9i, T9h);
 
850
                                             iio[-WS(ios, 9)] = FMA(KP980785280, T9g, T9f);
 
851
                                             rio[WS(ios, 25)] = FMS(KP980785280, T9g, T9f);
 
852
                                             iio[-WS(ios, 29)] = FMA(KP831469612, T6n, T6g);
 
853
                                             rio[WS(ios, 13)] = FNMS(KP831469612, T6n, T6g);
 
854
                                             T9m = T6p + T6q;
 
855
                                             T6r = T6p - T6q;
 
856
                                             iio[-WS(ios, 13)] = FNMS(KP831469612, T9m, T9l);
 
857
                                             rio[WS(ios, 29)] = -(FMA(KP831469612, T9m, T9l));
 
858
                                             rio[WS(ios, 5)] = FMA(KP831469612, T6r, T6o);
 
859
                                             iio[-WS(ios, 21)] = FNMS(KP831469612, T6r, T6o);
 
860
                                        }
 
861
                                   }
 
862
                              }
 
863
                              {
 
864
                                   E T6Y, T6w, T9w, T6D, T9v, T9p, T9q, T71, T6H, T74, T78, T7c, T6W, T6S;
 
865
                                   {
 
866
                                        E T6Z, T6z, T6C, T70;
 
867
                                        T6Z = FNMS(KP414213562, T6x, T6y);
 
868
                                        T6z = FMA(KP414213562, T6y, T6x);
 
869
                                        iio[-WS(ios, 5)] = FMA(KP831469612, T9k, T9j);
 
870
                                        rio[WS(ios, 21)] = FMS(KP831469612, T9k, T9j);
 
871
                                        T6Y = FNMS(KP707106781, T6v, T6s);
 
872
                                        T6w = FMA(KP707106781, T6v, T6s);
 
873
                                        T6C = FNMS(KP414213562, T6B, T6A);
 
874
                                        T70 = FMA(KP414213562, T6A, T6B);
 
875
                                        T9w = T6z + T6C;
 
876
                                        T6D = T6z - T6C;
 
877
                                        T9v = FNMS(KP707106781, T9o, T9n);
 
878
                                        T9p = FMA(KP707106781, T9o, T9n);
 
879
                                        {
 
880
                                             E T77, T6O, T76, T6R;
 
881
                                             T9q = T70 - T6Z;
 
882
                                             T71 = T6Z + T70;
 
883
                                             T77 = FMA(KP707106781, T6N, T6M);
 
884
                                             T6O = FNMS(KP707106781, T6N, T6M);
 
885
                                             T76 = FMA(KP707106781, T6Q, T6P);
 
886
                                             T6R = FNMS(KP707106781, T6Q, T6P);
 
887
                                             T6H = FNMS(KP707106781, T6G, T6F);
 
888
                                             T74 = FMA(KP707106781, T6G, T6F);
 
889
                                             T78 = FMA(KP198912367, T77, T76);
 
890
                                             T7c = FNMS(KP198912367, T76, T77);
 
891
                                             T6W = FNMS(KP668178637, T6O, T6R);
 
892
                                             T6S = FMA(KP668178637, T6R, T6O);
 
893
                                        }
 
894
                                   }
 
895
                                   {
 
896
                                        E T6U, T6E, T9r, T9t, T73, T6K;
 
897
                                        T6U = FNMS(KP923879532, T6D, T6w);
 
898
                                        T6E = FMA(KP923879532, T6D, T6w);
 
899
                                        T9r = FMA(KP923879532, T9q, T9p);
 
900
                                        T9t = FNMS(KP923879532, T9q, T9p);
 
901
                                        T73 = FMA(KP707106781, T6J, T6I);
 
902
                                        T6K = FNMS(KP707106781, T6J, T6I);
 
903
                                        {
 
904
                                             E T7a, T9x, T9y, T7d;
 
905
                                             {
 
906
                                                  E T72, T7b, T6V, T6L, T79, T75;
 
907
                                                  T7a = FMA(KP923879532, T71, T6Y);
 
908
                                                  T72 = FNMS(KP923879532, T71, T6Y);
 
909
                                                  T75 = FMA(KP198912367, T74, T73);
 
910
                                                  T7b = FNMS(KP198912367, T73, T74);
 
911
                                                  T6V = FNMS(KP668178637, T6H, T6K);
 
912
                                                  T6L = FMA(KP668178637, T6K, T6H);
 
913
                                                  T79 = T75 + T78;
 
914
                                                  T9A = T78 - T75;
 
915
                                                  T9z = FMA(KP923879532, T9w, T9v);
 
916
                                                  T9x = FNMS(KP923879532, T9w, T9v);
 
917
                                                  {
 
918
                                                       E T6X, T9s, T9u, T6T;
 
919
                                                       T6X = T6V + T6W;
 
920
                                                       T9s = T6V - T6W;
 
921
                                                       T9u = T6S - T6L;
 
922
                                                       T6T = T6L + T6S;
 
923
                                                       rio[WS(ios, 7)] = FMA(KP980785280, T79, T72);
 
924
                                                       iio[-WS(ios, 23)] = FNMS(KP980785280, T79, T72);
 
925
                                                       rio[WS(ios, 11)] = FMA(KP831469612, T6X, T6U);
 
926
                                                       iio[-WS(ios, 27)] = FNMS(KP831469612, T6X, T6U);
 
927
                                                       iio[-WS(ios, 3)] = FMA(KP831469612, T9s, T9r);
 
928
                                                       rio[WS(ios, 19)] = FMS(KP831469612, T9s, T9r);
 
929
                                                       iio[-WS(ios, 11)] = FMA(KP831469612, T9u, T9t);
 
930
                                                       rio[WS(ios, 27)] = FMS(KP831469612, T9u, T9t);
 
931
                                                       rio[WS(ios, 3)] = FMA(KP831469612, T6T, T6E);
 
932
                                                       iio[-WS(ios, 19)] = FNMS(KP831469612, T6T, T6E);
 
933
                                                       T9y = T7c - T7b;
 
934
                                                       T7d = T7b + T7c;
 
935
                                                  }
 
936
                                             }
 
937
                                             iio[-WS(ios, 7)] = FMA(KP980785280, T9y, T9x);
 
938
                                             rio[WS(ios, 23)] = FMS(KP980785280, T9y, T9x);
 
939
                                             iio[-WS(ios, 31)] = FMA(KP980785280, T7d, T7a);
 
940
                                             rio[WS(ios, 15)] = FNMS(KP980785280, T7d, T7a);
 
941
                                        }
 
942
                                   }
 
943
                              }
 
944
                         }
 
945
                    }
 
946
               }
 
947
          }
 
948
          iio[-WS(ios, 15)] = FMA(KP980785280, T9A, T9z);
 
949
          rio[WS(ios, 31)] = FMS(KP980785280, T9A, T9z);
 
950
     }
 
951
     return W;
 
952
}
 
953
 
 
954
static const tw_instr twinstr[] = {
 
955
     {TW_CEXP, 0, 1},
 
956
     {TW_CEXP, 0, 3},
 
957
     {TW_CEXP, 0, 9},
 
958
     {TW_CEXP, 0, 27},
 
959
     {TW_NEXT, 1, 0}
 
960
};
 
961
 
 
962
static const hc2hc_desc desc = { 32, "hf2_32", twinstr, &GENUS, {236, 98, 252, 0}, 0, 0, 0 };
 
963
 
 
964
void X(codelet_hf2_32) (planner *p) {
 
965
     X(khc2hc_register) (p, hf2_32, &desc);
 
966
}
 
967
#else                           /* HAVE_FMA */
 
968
 
 
969
/* Generated by: ../../../genfft/gen_hc2hc -compact -variables 4 -pipeline-latency 4 -twiddle-log3 -precompute-twiddles -n 32 -dit -name hf2_32 -include hf.h */
 
970
 
 
971
/*
 
972
 * This function contains 488 FP additions, 280 FP multiplications,
 
973
 * (or, 376 additions, 168 multiplications, 112 fused multiply/add),
 
974
 * 158 stack variables, and 128 memory accesses
 
975
 */
 
976
/*
 
977
 * Generator Id's : 
 
978
 * $Id: algsimp.ml,v 1.9 2006-02-12 23:34:12 athena Exp $
 
979
 * $Id: fft.ml,v 1.4 2006-01-05 03:04:27 stevenj Exp $
 
980
 * $Id: gen_hc2hc.ml,v 1.16 2006-02-12 23:34:12 athena Exp $
 
981
 */
 
982
 
 
983
#include "hf.h"
 
984
 
 
985
static const R *hf2_32(R *rio, R *iio, const R *W, stride ios, INT m, INT dist)
 
986
{
 
987
     DK(KP195090322, +0.195090322016128267848284868477022240927691618);
 
988
     DK(KP980785280, +0.980785280403230449126182236134239036973933731);
 
989
     DK(KP555570233, +0.555570233019602224742830813948532874374937191);
 
990
     DK(KP831469612, +0.831469612302545237078788377617905756738560812);
 
991
     DK(KP382683432, +0.382683432365089771728459984030398866761344562);
 
992
     DK(KP923879532, +0.923879532511286756128183189396788286822416626);
 
993
     DK(KP707106781, +0.707106781186547524400844362104849039284835938);
 
994
     INT i;
 
995
     for (i = m - 2; i > 0; i = i - 2, rio = rio + dist, iio = iio - dist, W = W + 8, MAKE_VOLATILE_STRIDE(ios)) {
 
996
          E T2, T5, T3, T6, T8, TM, TO, Td, T9, Te, Th, Tl, TD, TH, T1y;
 
997
          E T1H, T15, T1A, T11, T1F, T1n, T1p, T2q, T2I, T2u, T2K, T2V, T3b, T2Z, T3d;
 
998
          E Tu, Ty, T3l, T3n, T1t, T1v, T2f, T2h, T1a, T1e, T32, T34, T1W, T1Y, T2C;
 
999
          E T2E, Tg, TR, Tk, TS, Tm, TV, To, TT, T1M, T21, T1P, T22, T1Q, T25;
 
1000
          E T1S, T23;
 
1001
          {
 
1002
               E Ts, T1d, Tx, T18, Tt, T1c, Tw, T19, TB, T14, TG, TZ, TC, T13, TF;
 
1003
               E T10;
 
1004
               {
 
1005
                    E T4, Tc, T7, Tb;
 
1006
                    T2 = W[0];
 
1007
                    T5 = W[1];
 
1008
                    T3 = W[2];
 
1009
                    T6 = W[3];
 
1010
                    T4 = T2 * T3;
 
1011
                    Tc = T5 * T3;
 
1012
                    T7 = T5 * T6;
 
1013
                    Tb = T2 * T6;
 
1014
                    T8 = T4 + T7;
 
1015
                    TM = T4 - T7;
 
1016
                    TO = Tb + Tc;
 
1017
                    Td = Tb - Tc;
 
1018
                    T9 = W[4];
 
1019
                    Ts = T2 * T9;
 
1020
                    T1d = T6 * T9;
 
1021
                    Tx = T5 * T9;
 
1022
                    T18 = T3 * T9;
 
1023
                    Te = W[5];
 
1024
                    Tt = T5 * Te;
 
1025
                    T1c = T3 * Te;
 
1026
                    Tw = T2 * Te;
 
1027
                    T19 = T6 * Te;
 
1028
                    Th = W[6];
 
1029
                    TB = T3 * Th;
 
1030
                    T14 = T5 * Th;
 
1031
                    TG = T6 * Th;
 
1032
                    TZ = T2 * Th;
 
1033
                    Tl = W[7];
 
1034
                    TC = T6 * Tl;
 
1035
                    T13 = T2 * Tl;
 
1036
                    TF = T3 * Tl;
 
1037
                    T10 = T5 * Tl;
 
1038
               }
 
1039
               TD = TB + TC;
 
1040
               TH = TF - TG;
 
1041
               T1y = TZ + T10;
 
1042
               T1H = TF + TG;
 
1043
               T15 = T13 + T14;
 
1044
               T1A = T13 - T14;
 
1045
               T11 = TZ - T10;
 
1046
               T1F = TB - TC;
 
1047
               T1n = FMA(T9, Th, Te * Tl);
 
1048
               T1p = FNMS(Te, Th, T9 * Tl);
 
1049
               {
 
1050
                    E T2o, T2p, T2s, T2t;
 
1051
                    T2o = T8 * Th;
 
1052
                    T2p = Td * Tl;
 
1053
                    T2q = T2o + T2p;
 
1054
                    T2I = T2o - T2p;
 
1055
                    T2s = T8 * Tl;
 
1056
                    T2t = Td * Th;
 
1057
                    T2u = T2s - T2t;
 
1058
                    T2K = T2s + T2t;
 
1059
               }
 
1060
               {
 
1061
                    E T2T, T2U, T2X, T2Y;
 
1062
                    T2T = TM * Th;
 
1063
                    T2U = TO * Tl;
 
1064
                    T2V = T2T - T2U;
 
1065
                    T3b = T2T + T2U;
 
1066
                    T2X = TM * Tl;
 
1067
                    T2Y = TO * Th;
 
1068
                    T2Z = T2X + T2Y;
 
1069
                    T3d = T2X - T2Y;
 
1070
                    Tu = Ts + Tt;
 
1071
                    Ty = Tw - Tx;
 
1072
                    T3l = FMA(Tu, Th, Ty * Tl);
 
1073
                    T3n = FNMS(Ty, Th, Tu * Tl);
 
1074
               }
 
1075
               T1t = Ts - Tt;
 
1076
               T1v = Tw + Tx;
 
1077
               T2f = FMA(T1t, Th, T1v * Tl);
 
1078
               T2h = FNMS(T1v, Th, T1t * Tl);
 
1079
               T1a = T18 - T19;
 
1080
               T1e = T1c + T1d;
 
1081
               T32 = FMA(T1a, Th, T1e * Tl);
 
1082
               T34 = FNMS(T1e, Th, T1a * Tl);
 
1083
               T1W = T18 + T19;
 
1084
               T1Y = T1c - T1d;
 
1085
               T2C = FMA(T1W, Th, T1Y * Tl);
 
1086
               T2E = FNMS(T1Y, Th, T1W * Tl);
 
1087
               {
 
1088
                    E Ta, Tf, Ti, Tj;
 
1089
                    Ta = T8 * T9;
 
1090
                    Tf = Td * Te;
 
1091
                    Tg = Ta - Tf;
 
1092
                    TR = Ta + Tf;
 
1093
                    Ti = T8 * Te;
 
1094
                    Tj = Td * T9;
 
1095
                    Tk = Ti + Tj;
 
1096
                    TS = Ti - Tj;
 
1097
               }
 
1098
               Tm = FMA(Tg, Th, Tk * Tl);
 
1099
               TV = FNMS(TS, Th, TR * Tl);
 
1100
               To = FNMS(Tk, Th, Tg * Tl);
 
1101
               TT = FMA(TR, Th, TS * Tl);
 
1102
               {
 
1103
                    E T1K, T1L, T1N, T1O;
 
1104
                    T1K = TM * T9;
 
1105
                    T1L = TO * Te;
 
1106
                    T1M = T1K - T1L;
 
1107
                    T21 = T1K + T1L;
 
1108
                    T1N = TM * Te;
 
1109
                    T1O = TO * T9;
 
1110
                    T1P = T1N + T1O;
 
1111
                    T22 = T1N - T1O;
 
1112
               }
 
1113
               T1Q = FMA(T1M, Th, T1P * Tl);
 
1114
               T25 = FNMS(T22, Th, T21 * Tl);
 
1115
               T1S = FNMS(T1P, Th, T1M * Tl);
 
1116
               T23 = FMA(T21, Th, T22 * Tl);
 
1117
          }
 
1118
          {
 
1119
               E TL, T6f, T8c, T8q, T3F, T5t, T7I, T7W, T3h, T6H, T6O, T7o, T4L, T5N, T52;
 
1120
               E T5Q, T1i, T7V, T6i, T7D, T3K, T5u, T3P, T5v, T2y, T6B, T6y, T7j, T4k, T5J;
 
1121
               E T4B, T5G, T29, T6p, T6s, T7f, T47, T5B, T4c, T5C, T1E, T6n, T6m, T7e, T3W;
 
1122
               E T5y, T41, T5z, T2R, T6z, T6E, T7k, T4v, T5H, T4E, T5K, T3y, T6P, T6K, T7p;
 
1123
               E T4W, T5R, T55, T5O;
 
1124
               {
 
1125
                    E T1, T7G, Tq, T7F, TA, T3C, TJ, T3D, Tn, Tp;
 
1126
                    T1 = rio[0];
 
1127
                    T7G = iio[-WS(ios, 31)];
 
1128
                    Tn = rio[WS(ios, 16)];
 
1129
                    Tp = iio[-WS(ios, 15)];
 
1130
                    Tq = FMA(Tm, Tn, To * Tp);
 
1131
                    T7F = FNMS(To, Tn, Tm * Tp);
 
1132
                    {
 
1133
                         E Tv, Tz, TE, TI;
 
1134
                         Tv = rio[WS(ios, 8)];
 
1135
                         Tz = iio[-WS(ios, 23)];
 
1136
                         TA = FMA(Tu, Tv, Ty * Tz);
 
1137
                         T3C = FNMS(Ty, Tv, Tu * Tz);
 
1138
                         TE = rio[WS(ios, 24)];
 
1139
                         TI = iio[-WS(ios, 7)];
 
1140
                         TJ = FMA(TD, TE, TH * TI);
 
1141
                         T3D = FNMS(TH, TE, TD * TI);
 
1142
                    }
 
1143
                    {
 
1144
                         E Tr, TK, T8a, T8b;
 
1145
                         Tr = T1 + Tq;
 
1146
                         TK = TA + TJ;
 
1147
                         TL = Tr + TK;
 
1148
                         T6f = Tr - TK;
 
1149
                         T8a = T7G - T7F;
 
1150
                         T8b = TA - TJ;
 
1151
                         T8c = T8a - T8b;
 
1152
                         T8q = T8b + T8a;
 
1153
                    }
 
1154
                    {
 
1155
                         E T3B, T3E, T7E, T7H;
 
1156
                         T3B = T1 - Tq;
 
1157
                         T3E = T3C - T3D;
 
1158
                         T3F = T3B - T3E;
 
1159
                         T5t = T3B + T3E;
 
1160
                         T7E = T3C + T3D;
 
1161
                         T7H = T7F + T7G;
 
1162
                         T7I = T7E + T7H;
 
1163
                         T7W = T7H - T7E;
 
1164
                    }
 
1165
               }
 
1166
               {
 
1167
                    E T31, T4Y, T3f, T4J, T36, T4Z, T3a, T4I;
 
1168
                    {
 
1169
                         E T2W, T30, T3c, T3e;
 
1170
                         T2W = rio[WS(ios, 31)];
 
1171
                         T30 = iio[0];
 
1172
                         T31 = FMA(T2V, T2W, T2Z * T30);
 
1173
                         T4Y = FNMS(T2Z, T2W, T2V * T30);
 
1174
                         T3c = rio[WS(ios, 23)];
 
1175
                         T3e = iio[-WS(ios, 8)];
 
1176
                         T3f = FMA(T3b, T3c, T3d * T3e);
 
1177
                         T4J = FNMS(T3d, T3c, T3b * T3e);
 
1178
                    }
 
1179
                    {
 
1180
                         E T33, T35, T38, T39;
 
1181
                         T33 = rio[WS(ios, 15)];
 
1182
                         T35 = iio[-WS(ios, 16)];
 
1183
                         T36 = FMA(T32, T33, T34 * T35);
 
1184
                         T4Z = FNMS(T34, T33, T32 * T35);
 
1185
                         T38 = rio[WS(ios, 7)];
 
1186
                         T39 = iio[-WS(ios, 24)];
 
1187
                         T3a = FMA(TR, T38, TS * T39);
 
1188
                         T4I = FNMS(TS, T38, TR * T39);
 
1189
                    }
 
1190
                    {
 
1191
                         E T37, T3g, T6M, T6N;
 
1192
                         T37 = T31 + T36;
 
1193
                         T3g = T3a + T3f;
 
1194
                         T3h = T37 + T3g;
 
1195
                         T6H = T37 - T3g;
 
1196
                         T6M = T4Y + T4Z;
 
1197
                         T6N = T4I + T4J;
 
1198
                         T6O = T6M - T6N;
 
1199
                         T7o = T6M + T6N;
 
1200
                    }
 
1201
                    {
 
1202
                         E T4H, T4K, T50, T51;
 
1203
                         T4H = T31 - T36;
 
1204
                         T4K = T4I - T4J;
 
1205
                         T4L = T4H - T4K;
 
1206
                         T5N = T4H + T4K;
 
1207
                         T50 = T4Y - T4Z;
 
1208
                         T51 = T3a - T3f;
 
1209
                         T52 = T50 + T51;
 
1210
                         T5Q = T50 - T51;
 
1211
                    }
 
1212
               }
 
1213
               {
 
1214
                    E TQ, T3G, T1g, T3N, TX, T3H, T17, T3M;
 
1215
                    {
 
1216
                         E TN, TP, T1b, T1f;
 
1217
                         TN = rio[WS(ios, 4)];
 
1218
                         TP = iio[-WS(ios, 27)];
 
1219
                         TQ = FMA(TM, TN, TO * TP);
 
1220
                         T3G = FNMS(TO, TN, TM * TP);
 
1221
                         T1b = rio[WS(ios, 12)];
 
1222
                         T1f = iio[-WS(ios, 19)];
 
1223
                         T1g = FMA(T1a, T1b, T1e * T1f);
 
1224
                         T3N = FNMS(T1e, T1b, T1a * T1f);
 
1225
                    }
 
1226
                    {
 
1227
                         E TU, TW, T12, T16;
 
1228
                         TU = rio[WS(ios, 20)];
 
1229
                         TW = iio[-WS(ios, 11)];
 
1230
                         TX = FMA(TT, TU, TV * TW);
 
1231
                         T3H = FNMS(TV, TU, TT * TW);
 
1232
                         T12 = rio[WS(ios, 28)];
 
1233
                         T16 = iio[-WS(ios, 3)];
 
1234
                         T17 = FMA(T11, T12, T15 * T16);
 
1235
                         T3M = FNMS(T15, T12, T11 * T16);
 
1236
                    }
 
1237
                    {
 
1238
                         E TY, T1h, T6g, T6h;
 
1239
                         TY = TQ + TX;
 
1240
                         T1h = T17 + T1g;
 
1241
                         T1i = TY + T1h;
 
1242
                         T7V = T1h - TY;
 
1243
                         T6g = T3G + T3H;
 
1244
                         T6h = T3M + T3N;
 
1245
                         T6i = T6g - T6h;
 
1246
                         T7D = T6g + T6h;
 
1247
                    }
 
1248
                    {
 
1249
                         E T3I, T3J, T3L, T3O;
 
1250
                         T3I = T3G - T3H;
 
1251
                         T3J = TQ - TX;
 
1252
                         T3K = T3I - T3J;
 
1253
                         T5u = T3J + T3I;
 
1254
                         T3L = T17 - T1g;
 
1255
                         T3O = T3M - T3N;
 
1256
                         T3P = T3L + T3O;
 
1257
                         T5v = T3L - T3O;
 
1258
                    }
 
1259
               }
 
1260
               {
 
1261
                    E T2e, T4g, T2w, T4z, T2j, T4h, T2n, T4y;
 
1262
                    {
 
1263
                         E T2c, T2d, T2r, T2v;
 
1264
                         T2c = rio[WS(ios, 1)];
 
1265
                         T2d = iio[-WS(ios, 30)];
 
1266
                         T2e = FMA(T2, T2c, T5 * T2d);
 
1267
                         T4g = FNMS(T5, T2c, T2 * T2d);
 
1268
                         T2r = rio[WS(ios, 25)];
 
1269
                         T2v = iio[-WS(ios, 6)];
 
1270
                         T2w = FMA(T2q, T2r, T2u * T2v);
 
1271
                         T4z = FNMS(T2u, T2r, T2q * T2v);
 
1272
                    }
 
1273
                    {
 
1274
                         E T2g, T2i, T2l, T2m;
 
1275
                         T2g = rio[WS(ios, 17)];
 
1276
                         T2i = iio[-WS(ios, 14)];
 
1277
                         T2j = FMA(T2f, T2g, T2h * T2i);
 
1278
                         T4h = FNMS(T2h, T2g, T2f * T2i);
 
1279
                         T2l = rio[WS(ios, 9)];
 
1280
                         T2m = iio[-WS(ios, 22)];
 
1281
                         T2n = FMA(T9, T2l, Te * T2m);
 
1282
                         T4y = FNMS(Te, T2l, T9 * T2m);
 
1283
                    }
 
1284
                    {
 
1285
                         E T2k, T2x, T6w, T6x;
 
1286
                         T2k = T2e + T2j;
 
1287
                         T2x = T2n + T2w;
 
1288
                         T2y = T2k + T2x;
 
1289
                         T6B = T2k - T2x;
 
1290
                         T6w = T4g + T4h;
 
1291
                         T6x = T4y + T4z;
 
1292
                         T6y = T6w - T6x;
 
1293
                         T7j = T6w + T6x;
 
1294
                    }
 
1295
                    {
 
1296
                         E T4i, T4j, T4x, T4A;
 
1297
                         T4i = T4g - T4h;
 
1298
                         T4j = T2n - T2w;
 
1299
                         T4k = T4i + T4j;
 
1300
                         T5J = T4i - T4j;
 
1301
                         T4x = T2e - T2j;
 
1302
                         T4A = T4y - T4z;
 
1303
                         T4B = T4x - T4A;
 
1304
                         T5G = T4x + T4A;
 
1305
                    }
 
1306
               }
 
1307
               {
 
1308
                    E T1J, T43, T27, T4a, T1U, T44, T20, T49;
 
1309
                    {
 
1310
                         E T1G, T1I, T24, T26;
 
1311
                         T1G = rio[WS(ios, 30)];
 
1312
                         T1I = iio[-WS(ios, 1)];
 
1313
                         T1J = FMA(T1F, T1G, T1H * T1I);
 
1314
                         T43 = FNMS(T1H, T1G, T1F * T1I);
 
1315
                         T24 = rio[WS(ios, 22)];
 
1316
                         T26 = iio[-WS(ios, 9)];
 
1317
                         T27 = FMA(T23, T24, T25 * T26);
 
1318
                         T4a = FNMS(T25, T24, T23 * T26);
 
1319
                    }
 
1320
                    {
 
1321
                         E T1R, T1T, T1X, T1Z;
 
1322
                         T1R = rio[WS(ios, 14)];
 
1323
                         T1T = iio[-WS(ios, 17)];
 
1324
                         T1U = FMA(T1Q, T1R, T1S * T1T);
 
1325
                         T44 = FNMS(T1S, T1R, T1Q * T1T);
 
1326
                         T1X = rio[WS(ios, 6)];
 
1327
                         T1Z = iio[-WS(ios, 25)];
 
1328
                         T20 = FMA(T1W, T1X, T1Y * T1Z);
 
1329
                         T49 = FNMS(T1Y, T1X, T1W * T1Z);
 
1330
                    }
 
1331
                    {
 
1332
                         E T1V, T28, T6q, T6r;
 
1333
                         T1V = T1J + T1U;
 
1334
                         T28 = T20 + T27;
 
1335
                         T29 = T1V + T28;
 
1336
                         T6p = T1V - T28;
 
1337
                         T6q = T43 + T44;
 
1338
                         T6r = T49 + T4a;
 
1339
                         T6s = T6q - T6r;
 
1340
                         T7f = T6q + T6r;
 
1341
                    }
 
1342
                    {
 
1343
                         E T45, T46, T48, T4b;
 
1344
                         T45 = T43 - T44;
 
1345
                         T46 = T20 - T27;
 
1346
                         T47 = T45 + T46;
 
1347
                         T5B = T45 - T46;
 
1348
                         T48 = T1J - T1U;
 
1349
                         T4b = T49 - T4a;
 
1350
                         T4c = T48 - T4b;
 
1351
                         T5C = T48 + T4b;
 
1352
                    }
 
1353
               }
 
1354
               {
 
1355
                    E T1m, T3S, T1C, T3Z, T1r, T3T, T1x, T3Y;
 
1356
                    {
 
1357
                         E T1k, T1l, T1z, T1B;
 
1358
                         T1k = rio[WS(ios, 2)];
 
1359
                         T1l = iio[-WS(ios, 29)];
 
1360
                         T1m = FMA(T8, T1k, Td * T1l);
 
1361
                         T3S = FNMS(Td, T1k, T8 * T1l);
 
1362
                         T1z = rio[WS(ios, 26)];
 
1363
                         T1B = iio[-WS(ios, 5)];
 
1364
                         T1C = FMA(T1y, T1z, T1A * T1B);
 
1365
                         T3Z = FNMS(T1A, T1z, T1y * T1B);
 
1366
                    }
 
1367
                    {
 
1368
                         E T1o, T1q, T1u, T1w;
 
1369
                         T1o = rio[WS(ios, 18)];
 
1370
                         T1q = iio[-WS(ios, 13)];
 
1371
                         T1r = FMA(T1n, T1o, T1p * T1q);
 
1372
                         T3T = FNMS(T1p, T1o, T1n * T1q);
 
1373
                         T1u = rio[WS(ios, 10)];
 
1374
                         T1w = iio[-WS(ios, 21)];
 
1375
                         T1x = FMA(T1t, T1u, T1v * T1w);
 
1376
                         T3Y = FNMS(T1v, T1u, T1t * T1w);
 
1377
                    }
 
1378
                    {
 
1379
                         E T1s, T1D, T6k, T6l;
 
1380
                         T1s = T1m + T1r;
 
1381
                         T1D = T1x + T1C;
 
1382
                         T1E = T1s + T1D;
 
1383
                         T6n = T1s - T1D;
 
1384
                         T6k = T3S + T3T;
 
1385
                         T6l = T3Y + T3Z;
 
1386
                         T6m = T6k - T6l;
 
1387
                         T7e = T6k + T6l;
 
1388
                    }
 
1389
                    {
 
1390
                         E T3U, T3V, T3X, T40;
 
1391
                         T3U = T3S - T3T;
 
1392
                         T3V = T1x - T1C;
 
1393
                         T3W = T3U + T3V;
 
1394
                         T5y = T3U - T3V;
 
1395
                         T3X = T1m - T1r;
 
1396
                         T40 = T3Y - T3Z;
 
1397
                         T41 = T3X - T40;
 
1398
                         T5z = T3X + T40;
 
1399
                    }
 
1400
               }
 
1401
               {
 
1402
                    E T2B, T4r, T2G, T4s, T4q, T4t, T2M, T4m, T2P, T4n, T4l, T4o;
 
1403
                    {
 
1404
                         E T2z, T2A, T2D, T2F;
 
1405
                         T2z = rio[WS(ios, 5)];
 
1406
                         T2A = iio[-WS(ios, 26)];
 
1407
                         T2B = FMA(T21, T2z, T22 * T2A);
 
1408
                         T4r = FNMS(T22, T2z, T21 * T2A);
 
1409
                         T2D = rio[WS(ios, 21)];
 
1410
                         T2F = iio[-WS(ios, 10)];
 
1411
                         T2G = FMA(T2C, T2D, T2E * T2F);
 
1412
                         T4s = FNMS(T2E, T2D, T2C * T2F);
 
1413
                    }
 
1414
                    T4q = T2B - T2G;
 
1415
                    T4t = T4r - T4s;
 
1416
                    {
 
1417
                         E T2J, T2L, T2N, T2O;
 
1418
                         T2J = rio[WS(ios, 29)];
 
1419
                         T2L = iio[-WS(ios, 2)];
 
1420
                         T2M = FMA(T2I, T2J, T2K * T2L);
 
1421
                         T4m = FNMS(T2K, T2J, T2I * T2L);
 
1422
                         T2N = rio[WS(ios, 13)];
 
1423
                         T2O = iio[-WS(ios, 18)];
 
1424
                         T2P = FMA(T1M, T2N, T1P * T2O);
 
1425
                         T4n = FNMS(T1P, T2N, T1M * T2O);
 
1426
                    }
 
1427
                    T4l = T2M - T2P;
 
1428
                    T4o = T4m - T4n;
 
1429
                    {
 
1430
                         E T2H, T2Q, T6C, T6D;
 
1431
                         T2H = T2B + T2G;
 
1432
                         T2Q = T2M + T2P;
 
1433
                         T2R = T2H + T2Q;
 
1434
                         T6z = T2Q - T2H;
 
1435
                         T6C = T4r + T4s;
 
1436
                         T6D = T4m + T4n;
 
1437
                         T6E = T6C - T6D;
 
1438
                         T7k = T6C + T6D;
 
1439
                    }
 
1440
                    {
 
1441
                         E T4p, T4u, T4C, T4D;
 
1442
                         T4p = T4l - T4o;
 
1443
                         T4u = T4q + T4t;
 
1444
                         T4v = KP707106781 * (T4p - T4u);
 
1445
                         T5H = KP707106781 * (T4u + T4p);
 
1446
                         T4C = T4t - T4q;
 
1447
                         T4D = T4l + T4o;
 
1448
                         T4E = KP707106781 * (T4C - T4D);
 
1449
                         T5K = KP707106781 * (T4C + T4D);
 
1450
                    }
 
1451
               }
 
1452
               {
 
1453
                    E T3k, T4M, T3p, T4N, T4O, T4P, T3t, T4S, T3w, T4T, T4R, T4U;
 
1454
                    {
 
1455
                         E T3i, T3j, T3m, T3o;
 
1456
                         T3i = rio[WS(ios, 3)];
 
1457
                         T3j = iio[-WS(ios, 28)];
 
1458
                         T3k = FMA(T3, T3i, T6 * T3j);
 
1459
                         T4M = FNMS(T6, T3i, T3 * T3j);
 
1460
                         T3m = rio[WS(ios, 19)];
 
1461
                         T3o = iio[-WS(ios, 12)];
 
1462
                         T3p = FMA(T3l, T3m, T3n * T3o);
 
1463
                         T4N = FNMS(T3n, T3m, T3l * T3o);
 
1464
                    }
 
1465
                    T4O = T4M - T4N;
 
1466
                    T4P = T3k - T3p;
 
1467
                    {
 
1468
                         E T3r, T3s, T3u, T3v;
 
1469
                         T3r = rio[WS(ios, 27)];
 
1470
                         T3s = iio[-WS(ios, 4)];
 
1471
                         T3t = FMA(Th, T3r, Tl * T3s);
 
1472
                         T4S = FNMS(Tl, T3r, Th * T3s);
 
1473
                         T3u = rio[WS(ios, 11)];
 
1474
                         T3v = iio[-WS(ios, 20)];
 
1475
                         T3w = FMA(Tg, T3u, Tk * T3v);
 
1476
                         T4T = FNMS(Tk, T3u, Tg * T3v);
 
1477
                    }
 
1478
                    T4R = T3t - T3w;
 
1479
                    T4U = T4S - T4T;
 
1480
                    {
 
1481
                         E T3q, T3x, T6I, T6J;
 
1482
                         T3q = T3k + T3p;
 
1483
                         T3x = T3t + T3w;
 
1484
                         T3y = T3q + T3x;
 
1485
                         T6P = T3x - T3q;
 
1486
                         T6I = T4M + T4N;
 
1487
                         T6J = T4S + T4T;
 
1488
                         T6K = T6I - T6J;
 
1489
                         T7p = T6I + T6J;
 
1490
                    }
 
1491
                    {
 
1492
                         E T4Q, T4V, T53, T54;
 
1493
                         T4Q = T4O - T4P;
 
1494
                         T4V = T4R + T4U;
 
1495
                         T4W = KP707106781 * (T4Q - T4V);
 
1496
                         T5R = KP707106781 * (T4Q + T4V);
 
1497
                         T53 = T4R - T4U;
 
1498
                         T54 = T4P + T4O;
 
1499
                         T55 = KP707106781 * (T53 - T54);
 
1500
                         T5O = KP707106781 * (T54 + T53);
 
1501
                    }
 
1502
               }
 
1503
               {
 
1504
                    E T2b, T7x, T7K, T7M, T3A, T7L, T7A, T7B;
 
1505
                    {
 
1506
                         E T1j, T2a, T7C, T7J;
 
1507
                         T1j = TL + T1i;
 
1508
                         T2a = T1E + T29;
 
1509
                         T2b = T1j + T2a;
 
1510
                         T7x = T1j - T2a;
 
1511
                         T7C = T7e + T7f;
 
1512
                         T7J = T7D + T7I;
 
1513
                         T7K = T7C + T7J;
 
1514
                         T7M = T7J - T7C;
 
1515
                    }
 
1516
                    {
 
1517
                         E T2S, T3z, T7y, T7z;
 
1518
                         T2S = T2y + T2R;
 
1519
                         T3z = T3h + T3y;
 
1520
                         T3A = T2S + T3z;
 
1521
                         T7L = T3z - T2S;
 
1522
                         T7y = T7j + T7k;
 
1523
                         T7z = T7o + T7p;
 
1524
                         T7A = T7y - T7z;
 
1525
                         T7B = T7y + T7z;
 
1526
                    }
 
1527
                    iio[-WS(ios, 16)] = T2b - T3A;
 
1528
                    rio[WS(ios, 16)] = T7B - T7K;
 
1529
                    rio[0] = T2b + T3A;
 
1530
                    iio[0] = T7B + T7K;
 
1531
                    iio[-WS(ios, 24)] = T7x - T7A;
 
1532
                    rio[WS(ios, 24)] = T7L - T7M;
 
1533
                    rio[WS(ios, 8)] = T7x + T7A;
 
1534
                    iio[-WS(ios, 8)] = T7L + T7M;
 
1535
               }
 
1536
               {
 
1537
                    E T7h, T7t, T7Q, T7S, T7m, T7u, T7r, T7v;
 
1538
                    {
 
1539
                         E T7d, T7g, T7O, T7P;
 
1540
                         T7d = TL - T1i;
 
1541
                         T7g = T7e - T7f;
 
1542
                         T7h = T7d + T7g;
 
1543
                         T7t = T7d - T7g;
 
1544
                         T7O = T29 - T1E;
 
1545
                         T7P = T7I - T7D;
 
1546
                         T7Q = T7O + T7P;
 
1547
                         T7S = T7P - T7O;
 
1548
                    }
 
1549
                    {
 
1550
                         E T7i, T7l, T7n, T7q;
 
1551
                         T7i = T2y - T2R;
 
1552
                         T7l = T7j - T7k;
 
1553
                         T7m = T7i + T7l;
 
1554
                         T7u = T7l - T7i;
 
1555
                         T7n = T3h - T3y;
 
1556
                         T7q = T7o - T7p;
 
1557
                         T7r = T7n - T7q;
 
1558
                         T7v = T7n + T7q;
 
1559
                    }
 
1560
                    {
 
1561
                         E T7s, T7N, T7w, T7R;
 
1562
                         T7s = KP707106781 * (T7m + T7r);
 
1563
                         iio[-WS(ios, 20)] = T7h - T7s;
 
1564
                         rio[WS(ios, 4)] = T7h + T7s;
 
1565
                         T7N = KP707106781 * (T7u + T7v);
 
1566
                         rio[WS(ios, 20)] = T7N - T7Q;
 
1567
                         iio[-WS(ios, 4)] = T7N + T7Q;
 
1568
                         T7w = KP707106781 * (T7u - T7v);
 
1569
                         iio[-WS(ios, 28)] = T7t - T7w;
 
1570
                         rio[WS(ios, 12)] = T7t + T7w;
 
1571
                         T7R = KP707106781 * (T7r - T7m);
 
1572
                         rio[WS(ios, 28)] = T7R - T7S;
 
1573
                         iio[-WS(ios, 12)] = T7R + T7S;
 
1574
                    }
 
1575
               }
 
1576
               {
 
1577
                    E T6j, T7X, T83, T6X, T6u, T7U, T77, T7b, T70, T82, T6G, T6U, T74, T7a, T6R;
 
1578
                    E T6V;
 
1579
                    {
 
1580
                         E T6o, T6t, T6A, T6F;
 
1581
                         T6j = T6f - T6i;
 
1582
                         T7X = T7V + T7W;
 
1583
                         T83 = T7W - T7V;
 
1584
                         T6X = T6f + T6i;
 
1585
                         T6o = T6m - T6n;
 
1586
                         T6t = T6p + T6s;
 
1587
                         T6u = KP707106781 * (T6o - T6t);
 
1588
                         T7U = KP707106781 * (T6o + T6t);
 
1589
                         {
 
1590
                              E T75, T76, T6Y, T6Z;
 
1591
                              T75 = T6H + T6K;
 
1592
                              T76 = T6O + T6P;
 
1593
                              T77 = FNMS(KP382683432, T76, KP923879532 * T75);
 
1594
                              T7b = FMA(KP923879532, T76, KP382683432 * T75);
 
1595
                              T6Y = T6n + T6m;
 
1596
                              T6Z = T6p - T6s;
 
1597
                              T70 = KP707106781 * (T6Y + T6Z);
 
1598
                              T82 = KP707106781 * (T6Z - T6Y);
 
1599
                         }
 
1600
                         T6A = T6y - T6z;
 
1601
                         T6F = T6B - T6E;
 
1602
                         T6G = FMA(KP923879532, T6A, KP382683432 * T6F);
 
1603
                         T6U = FNMS(KP923879532, T6F, KP382683432 * T6A);
 
1604
                         {
 
1605
                              E T72, T73, T6L, T6Q;
 
1606
                              T72 = T6y + T6z;
 
1607
                              T73 = T6B + T6E;
 
1608
                              T74 = FMA(KP382683432, T72, KP923879532 * T73);
 
1609
                              T7a = FNMS(KP382683432, T73, KP923879532 * T72);
 
1610
                              T6L = T6H - T6K;
 
1611
                              T6Q = T6O - T6P;
 
1612
                              T6R = FNMS(KP923879532, T6Q, KP382683432 * T6L);
 
1613
                              T6V = FMA(KP382683432, T6Q, KP923879532 * T6L);
 
1614
                         }
 
1615
                    }
 
1616
                    {
 
1617
                         E T6v, T6S, T81, T84;
 
1618
                         T6v = T6j + T6u;
 
1619
                         T6S = T6G + T6R;
 
1620
                         iio[-WS(ios, 22)] = T6v - T6S;
 
1621
                         rio[WS(ios, 6)] = T6v + T6S;
 
1622
                         T81 = T6U + T6V;
 
1623
                         T84 = T82 + T83;
 
1624
                         rio[WS(ios, 22)] = T81 - T84;
 
1625
                         iio[-WS(ios, 6)] = T81 + T84;
 
1626
                    }
 
1627
                    {
 
1628
                         E T6T, T6W, T85, T86;
 
1629
                         T6T = T6j - T6u;
 
1630
                         T6W = T6U - T6V;
 
1631
                         iio[-WS(ios, 30)] = T6T - T6W;
 
1632
                         rio[WS(ios, 14)] = T6T + T6W;
 
1633
                         T85 = T6R - T6G;
 
1634
                         T86 = T83 - T82;
 
1635
                         rio[WS(ios, 30)] = T85 - T86;
 
1636
                         iio[-WS(ios, 14)] = T85 + T86;
 
1637
                    }
 
1638
                    {
 
1639
                         E T71, T78, T7T, T7Y;
 
1640
                         T71 = T6X + T70;
 
1641
                         T78 = T74 + T77;
 
1642
                         iio[-WS(ios, 18)] = T71 - T78;
 
1643
                         rio[WS(ios, 2)] = T71 + T78;
 
1644
                         T7T = T7a + T7b;
 
1645
                         T7Y = T7U + T7X;
 
1646
                         rio[WS(ios, 18)] = T7T - T7Y;
 
1647
                         iio[-WS(ios, 2)] = T7T + T7Y;
 
1648
                    }
 
1649
                    {
 
1650
                         E T79, T7c, T7Z, T80;
 
1651
                         T79 = T6X - T70;
 
1652
                         T7c = T7a - T7b;
 
1653
                         iio[-WS(ios, 26)] = T79 - T7c;
 
1654
                         rio[WS(ios, 10)] = T79 + T7c;
 
1655
                         T7Z = T77 - T74;
 
1656
                         T80 = T7X - T7U;
 
1657
                         rio[WS(ios, 26)] = T7Z - T80;
 
1658
                         iio[-WS(ios, 10)] = T7Z + T80;
 
1659
                    }
 
1660
               }
 
1661
               {
 
1662
                    E T3R, T5d, T8r, T8x, T4e, T8o, T5n, T5r, T4G, T5a, T5g, T8w, T5k, T5q, T57;
 
1663
                    E T5b, T3Q, T8p;
 
1664
                    T3Q = KP707106781 * (T3K - T3P);
 
1665
                    T3R = T3F - T3Q;
 
1666
                    T5d = T3F + T3Q;
 
1667
                    T8p = KP707106781 * (T5v - T5u);
 
1668
                    T8r = T8p + T8q;
 
1669
                    T8x = T8q - T8p;
 
1670
                    {
 
1671
                         E T42, T4d, T5l, T5m;
 
1672
                         T42 = FNMS(KP923879532, T41, KP382683432 * T3W);
 
1673
                         T4d = FMA(KP382683432, T47, KP923879532 * T4c);
 
1674
                         T4e = T42 - T4d;
 
1675
                         T8o = T42 + T4d;
 
1676
                         T5l = T4L + T4W;
 
1677
                         T5m = T52 + T55;
 
1678
                         T5n = FNMS(KP555570233, T5m, KP831469612 * T5l);
 
1679
                         T5r = FMA(KP831469612, T5m, KP555570233 * T5l);
 
1680
                    }
 
1681
                    {
 
1682
                         E T4w, T4F, T5e, T5f;
 
1683
                         T4w = T4k - T4v;
 
1684
                         T4F = T4B - T4E;
 
1685
                         T4G = FMA(KP980785280, T4w, KP195090322 * T4F);
 
1686
                         T5a = FNMS(KP980785280, T4F, KP195090322 * T4w);
 
1687
                         T5e = FMA(KP923879532, T3W, KP382683432 * T41);
 
1688
                         T5f = FNMS(KP923879532, T47, KP382683432 * T4c);
 
1689
                         T5g = T5e + T5f;
 
1690
                         T8w = T5f - T5e;
 
1691
                    }
 
1692
                    {
 
1693
                         E T5i, T5j, T4X, T56;
 
1694
                         T5i = T4k + T4v;
 
1695
                         T5j = T4B + T4E;
 
1696
                         T5k = FMA(KP555570233, T5i, KP831469612 * T5j);
 
1697
                         T5q = FNMS(KP555570233, T5j, KP831469612 * T5i);
 
1698
                         T4X = T4L - T4W;
 
1699
                         T56 = T52 - T55;
 
1700
                         T57 = FNMS(KP980785280, T56, KP195090322 * T4X);
 
1701
                         T5b = FMA(KP195090322, T56, KP980785280 * T4X);
 
1702
                    }
 
1703
                    {
 
1704
                         E T4f, T58, T8v, T8y;
 
1705
                         T4f = T3R + T4e;
 
1706
                         T58 = T4G + T57;
 
1707
                         iio[-WS(ios, 23)] = T4f - T58;
 
1708
                         rio[WS(ios, 7)] = T4f + T58;
 
1709
                         T8v = T5a + T5b;
 
1710
                         T8y = T8w + T8x;
 
1711
                         rio[WS(ios, 23)] = T8v - T8y;
 
1712
                         iio[-WS(ios, 7)] = T8v + T8y;
 
1713
                    }
 
1714
                    {
 
1715
                         E T59, T5c, T8z, T8A;
 
1716
                         T59 = T3R - T4e;
 
1717
                         T5c = T5a - T5b;
 
1718
                         iio[-WS(ios, 31)] = T59 - T5c;
 
1719
                         rio[WS(ios, 15)] = T59 + T5c;
 
1720
                         T8z = T57 - T4G;
 
1721
                         T8A = T8x - T8w;
 
1722
                         rio[WS(ios, 31)] = T8z - T8A;
 
1723
                         iio[-WS(ios, 15)] = T8z + T8A;
 
1724
                    }
 
1725
                    {
 
1726
                         E T5h, T5o, T8n, T8s;
 
1727
                         T5h = T5d + T5g;
 
1728
                         T5o = T5k + T5n;
 
1729
                         iio[-WS(ios, 19)] = T5h - T5o;
 
1730
                         rio[WS(ios, 3)] = T5h + T5o;
 
1731
                         T8n = T5q + T5r;
 
1732
                         T8s = T8o + T8r;
 
1733
                         rio[WS(ios, 19)] = T8n - T8s;
 
1734
                         iio[-WS(ios, 3)] = T8n + T8s;
 
1735
                    }
 
1736
                    {
 
1737
                         E T5p, T5s, T8t, T8u;
 
1738
                         T5p = T5d - T5g;
 
1739
                         T5s = T5q - T5r;
 
1740
                         iio[-WS(ios, 27)] = T5p - T5s;
 
1741
                         rio[WS(ios, 11)] = T5p + T5s;
 
1742
                         T8t = T5n - T5k;
 
1743
                         T8u = T8r - T8o;
 
1744
                         rio[WS(ios, 27)] = T8t - T8u;
 
1745
                         iio[-WS(ios, 11)] = T8t + T8u;
 
1746
                    }
 
1747
               }
 
1748
               {
 
1749
                    E T5x, T5Z, T8d, T8j, T5E, T88, T69, T6d, T5M, T5W, T62, T8i, T66, T6c, T5T;
 
1750
                    E T5X, T5w, T89;
 
1751
                    T5w = KP707106781 * (T5u + T5v);
 
1752
                    T5x = T5t - T5w;
 
1753
                    T5Z = T5t + T5w;
 
1754
                    T89 = KP707106781 * (T3K + T3P);
 
1755
                    T8d = T89 + T8c;
 
1756
                    T8j = T8c - T89;
 
1757
                    {
 
1758
                         E T5A, T5D, T67, T68;
 
1759
                         T5A = FNMS(KP382683432, T5z, KP923879532 * T5y);
 
1760
                         T5D = FMA(KP923879532, T5B, KP382683432 * T5C);
 
1761
                         T5E = T5A - T5D;
 
1762
                         T88 = T5A + T5D;
 
1763
                         T67 = T5N + T5O;
 
1764
                         T68 = T5Q + T5R;
 
1765
                         T69 = FNMS(KP195090322, T68, KP980785280 * T67);
 
1766
                         T6d = FMA(KP195090322, T67, KP980785280 * T68);
 
1767
                    }
 
1768
                    {
 
1769
                         E T5I, T5L, T60, T61;
 
1770
                         T5I = T5G - T5H;
 
1771
                         T5L = T5J - T5K;
 
1772
                         T5M = FMA(KP555570233, T5I, KP831469612 * T5L);
 
1773
                         T5W = FNMS(KP831469612, T5I, KP555570233 * T5L);
 
1774
                         T60 = FMA(KP382683432, T5y, KP923879532 * T5z);
 
1775
                         T61 = FNMS(KP382683432, T5B, KP923879532 * T5C);
 
1776
                         T62 = T60 + T61;
 
1777
                         T8i = T61 - T60;
 
1778
                    }
 
1779
                    {
 
1780
                         E T64, T65, T5P, T5S;
 
1781
                         T64 = T5G + T5H;
 
1782
                         T65 = T5J + T5K;
 
1783
                         T66 = FMA(KP980785280, T64, KP195090322 * T65);
 
1784
                         T6c = FNMS(KP195090322, T64, KP980785280 * T65);
 
1785
                         T5P = T5N - T5O;
 
1786
                         T5S = T5Q - T5R;
 
1787
                         T5T = FNMS(KP831469612, T5S, KP555570233 * T5P);
 
1788
                         T5X = FMA(KP831469612, T5P, KP555570233 * T5S);
 
1789
                    }
 
1790
                    {
 
1791
                         E T5F, T5U, T8h, T8k;
 
1792
                         T5F = T5x + T5E;
 
1793
                         T5U = T5M + T5T;
 
1794
                         iio[-WS(ios, 21)] = T5F - T5U;
 
1795
                         rio[WS(ios, 5)] = T5F + T5U;
 
1796
                         T8h = T5W + T5X;
 
1797
                         T8k = T8i + T8j;
 
1798
                         rio[WS(ios, 21)] = T8h - T8k;
 
1799
                         iio[-WS(ios, 5)] = T8h + T8k;
 
1800
                    }
 
1801
                    {
 
1802
                         E T5V, T5Y, T8l, T8m;
 
1803
                         T5V = T5x - T5E;
 
1804
                         T5Y = T5W - T5X;
 
1805
                         iio[-WS(ios, 29)] = T5V - T5Y;
 
1806
                         rio[WS(ios, 13)] = T5V + T5Y;
 
1807
                         T8l = T5T - T5M;
 
1808
                         T8m = T8j - T8i;
 
1809
                         rio[WS(ios, 29)] = T8l - T8m;
 
1810
                         iio[-WS(ios, 13)] = T8l + T8m;
 
1811
                    }
 
1812
                    {
 
1813
                         E T63, T6a, T87, T8e;
 
1814
                         T63 = T5Z + T62;
 
1815
                         T6a = T66 + T69;
 
1816
                         iio[-WS(ios, 17)] = T63 - T6a;
 
1817
                         rio[WS(ios, 1)] = T63 + T6a;
 
1818
                         T87 = T6c + T6d;
 
1819
                         T8e = T88 + T8d;
 
1820
                         rio[WS(ios, 17)] = T87 - T8e;
 
1821
                         iio[-WS(ios, 1)] = T87 + T8e;
 
1822
                    }
 
1823
                    {
 
1824
                         E T6b, T6e, T8f, T8g;
 
1825
                         T6b = T5Z - T62;
 
1826
                         T6e = T6c - T6d;
 
1827
                         iio[-WS(ios, 25)] = T6b - T6e;
 
1828
                         rio[WS(ios, 9)] = T6b + T6e;
 
1829
                         T8f = T69 - T66;
 
1830
                         T8g = T8d - T88;
 
1831
                         rio[WS(ios, 25)] = T8f - T8g;
 
1832
                         iio[-WS(ios, 9)] = T8f + T8g;
 
1833
                    }
 
1834
               }
 
1835
          }
 
1836
     }
 
1837
     return W;
 
1838
}
 
1839
 
 
1840
static const tw_instr twinstr[] = {
 
1841
     {TW_CEXP, 0, 1},
 
1842
     {TW_CEXP, 0, 3},
 
1843
     {TW_CEXP, 0, 9},
 
1844
     {TW_CEXP, 0, 27},
 
1845
     {TW_NEXT, 1, 0}
 
1846
};
 
1847
 
 
1848
static const hc2hc_desc desc = { 32, "hf2_32", twinstr, &GENUS, {376, 168, 112, 0}, 0, 0, 0 };
 
1849
 
 
1850
void X(codelet_hf2_32) (planner *p) {
 
1851
     X(khc2hc_register) (p, hf2_32, &desc);
 
1852
}
 
1853
#endif                          /* HAVE_FMA */