~ubuntu-branches/ubuntu/maverick/blender/maverick

« back to all changes in this revision

Viewing changes to extern/fftw/rdft/codelets/hc2r/hb_32.c

  • Committer: Bazaar Package Importer
  • Author(s): Khashayar Naderehvandi, Khashayar Naderehvandi, Alessio Treglia
  • Date: 2009-01-22 16:53:59 UTC
  • mfrom: (14.1.1 experimental)
  • Revision ID: james.westby@ubuntu.com-20090122165359-v0996tn7fbit64ni
Tags: 2.48a+dfsg-1ubuntu1
[ Khashayar Naderehvandi ]
* Merge from debian experimental (LP: #320045), Ubuntu remaining changes:
  - Add patch correcting header file locations.
  - Add libvorbis-dev and libgsm1-dev to Build-Depends.
  - Use avcodec_decode_audio2() in source/blender/src/hddaudio.c

[ Alessio Treglia ]
* Add missing previous changelog entries.

Show diffs side-by-side

added added

removed removed

Lines of Context:
 
1
/*
 
2
 * Copyright (c) 2003, 2006 Matteo Frigo
 
3
 * Copyright (c) 2003, 2006 Massachusetts Institute of Technology
 
4
 *
 
5
 * This program is free software; you can redistribute it and/or modify
 
6
 * it under the terms of the GNU General Public License as published by
 
7
 * the Free Software Foundation; either version 2 of the License, or
 
8
 * (at your option) any later version.
 
9
 *
 
10
 * This program is distributed in the hope that it will be useful,
 
11
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 
12
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 
13
 * GNU General Public License for more details.
 
14
 *
 
15
 * You should have received a copy of the GNU General Public License
 
16
 * along with this program; if not, write to the Free Software
 
17
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 
18
 *
 
19
 */
 
20
 
 
21
/* This file was automatically generated --- DO NOT EDIT */
 
22
/* Generated on Sun Jul  2 16:31:42 EDT 2006 */
 
23
 
 
24
#include "codelet-rdft.h"
 
25
 
 
26
#ifdef HAVE_FMA
 
27
 
 
28
/* Generated by: ../../../genfft/gen_hc2hc -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -sign 1 -n 32 -dif -name hb_32 -include hb.h */
 
29
 
 
30
/*
 
31
 * This function contains 434 FP additions, 260 FP multiplications,
 
32
 * (or, 236 additions, 62 multiplications, 198 fused multiply/add),
 
33
 * 141 stack variables, and 128 memory accesses
 
34
 */
 
35
/*
 
36
 * Generator Id's : 
 
37
 * $Id: algsimp.ml,v 1.9 2006-02-12 23:34:12 athena Exp $
 
38
 * $Id: fft.ml,v 1.4 2006-01-05 03:04:27 stevenj Exp $
 
39
 * $Id: gen_hc2hc.ml,v 1.16 2006-02-12 23:34:12 athena Exp $
 
40
 */
 
41
 
 
42
#include "hb.h"
 
43
 
 
44
static const R *hb_32(R *rio, R *iio, const R *W, stride ios, INT m, INT dist)
 
45
{
 
46
     DK(KP980785280, +0.980785280403230449126182236134239036973933731);
 
47
     DK(KP198912367, +0.198912367379658006911597622644676228597850501);
 
48
     DK(KP831469612, +0.831469612302545237078788377617905756738560812);
 
49
     DK(KP668178637, +0.668178637919298919997757686523080761552472251);
 
50
     DK(KP923879532, +0.923879532511286756128183189396788286822416626);
 
51
     DK(KP414213562, +0.414213562373095048801688724209698078569671875);
 
52
     DK(KP707106781, +0.707106781186547524400844362104849039284835938);
 
53
     INT i;
 
54
     for (i = m - 2; i > 0; i = i - 2, rio = rio + dist, iio = iio - dist, W = W + 62, MAKE_VOLATILE_STRIDE(ios)) {
 
55
          E T86, T81, T8b, T7Y, T7Z, T82, T80, T83, T88, T8c, T87;
 
56
          {
 
57
               E T3y, Tf, T8x, T7k, T8k, T7N, T2Q, T1a, T33, T2j, T6N, T50, T6A, T63, T4w;
 
58
               E T3U, TZ, T4q, T26, T2p, T8q, T7F, T37, T2Y, T6H, T5K, T8r, T7C, T3X, T3M;
 
59
               E T6G, T5R, Tu, T3R, T2m, T2R, T1t, T34, T8y, T7Q, T4x, T3B, T6O, T66, T8l;
 
60
               E T7r, T6B, T5f, TK, T4p, T5x, T1N, T2o, T8n, T7y, T5u, T36, T2V, T6E, T5r;
 
61
               E T8o, T7v, T3W, T3H, T5Q, T5N;
 
62
               {
 
63
                    E T5Z, T62, T3T, T3S;
 
64
                    {
 
65
                         E T4Q, T3, T5Y, T2e, T5X, T6, T4R, T2h, Td, T18, T61, T4V, Ta, T4X, T4W;
 
66
                         E T15;
 
67
                         {
 
68
                              E T4, T5, T2f, T2g;
 
69
                              {
 
70
                                   E T1, T2, T2c, T2d;
 
71
                                   T1 = rio[0];
 
72
                                   T2 = iio[-WS(ios, 16)];
 
73
                                   T2c = iio[0];
 
74
                                   T2d = rio[WS(ios, 16)];
 
75
                                   T4 = rio[WS(ios, 8)];
 
76
                                   T4Q = T1 - T2;
 
77
                                   T3 = T1 + T2;
 
78
                                   T5Y = T2c + T2d;
 
79
                                   T2e = T2c - T2d;
 
80
                                   T5 = iio[-WS(ios, 24)];
 
81
                              }
 
82
                              T2f = iio[-WS(ios, 8)];
 
83
                              T2g = rio[WS(ios, 24)];
 
84
                              {
 
85
                                   E Tb, Tc, T16, T17;
 
86
                                   Tb = iio[-WS(ios, 28)];
 
87
                                   T5X = T4 - T5;
 
88
                                   T6 = T4 + T5;
 
89
                                   T4R = T2f + T2g;
 
90
                                   T2h = T2f - T2g;
 
91
                                   Tc = rio[WS(ios, 12)];
 
92
                                   T16 = iio[-WS(ios, 12)];
 
93
                                   T17 = rio[WS(ios, 28)];
 
94
                                   {
 
95
                                        E T8, T4T, T4U, T9, T13, T14;
 
96
                                        T8 = rio[WS(ios, 4)];
 
97
                                        Td = Tb + Tc;
 
98
                                        T4T = Tb - Tc;
 
99
                                        T18 = T16 - T17;
 
100
                                        T4U = T17 + T16;
 
101
                                        T9 = iio[-WS(ios, 20)];
 
102
                                        T13 = iio[-WS(ios, 4)];
 
103
                                        T14 = rio[WS(ios, 20)];
 
104
                                        T61 = T4T + T4U;
 
105
                                        T4V = T4T - T4U;
 
106
                                        Ta = T8 + T9;
 
107
                                        T4X = T8 - T9;
 
108
                                        T4W = T13 + T14;
 
109
                                        T15 = T13 - T14;
 
110
                                   }
 
111
                              }
 
112
                         }
 
113
                         {
 
114
                              E T12, T2b, T4Z, T4S, T7L, T7M, T19, T2i;
 
115
                              {
 
116
                                   E T7, T60, Te, T7i, T7j, T4Y;
 
117
                                   T12 = T3 - T6;
 
118
                                   T7 = T3 + T6;
 
119
                                   T4Y = T4W - T4X;
 
120
                                   T60 = T4X + T4W;
 
121
                                   Te = Ta + Td;
 
122
                                   T2b = Td - Ta;
 
123
                                   T5Z = T5X + T5Y;
 
124
                                   T7i = T5Y - T5X;
 
125
                                   T7j = T4Y + T4V;
 
126
                                   T4Z = T4V - T4Y;
 
127
                                   T4S = T4Q - T4R;
 
128
                                   T7L = T4Q + T4R;
 
129
                                   T3y = T7 - Te;
 
130
                                   Tf = T7 + Te;
 
131
                                   T8x = FNMS(KP707106781, T7j, T7i);
 
132
                                   T7k = FMA(KP707106781, T7j, T7i);
 
133
                                   T7M = T60 + T61;
 
134
                                   T62 = T60 - T61;
 
135
                                   T19 = T15 - T18;
 
136
                                   T3T = T15 + T18;
 
137
                              }
 
138
                              T8k = FNMS(KP707106781, T7M, T7L);
 
139
                              T7N = FMA(KP707106781, T7M, T7L);
 
140
                              T2Q = T12 - T19;
 
141
                              T1a = T12 + T19;
 
142
                              T2i = T2e - T2h;
 
143
                              T3S = T2e + T2h;
 
144
                              T33 = T2i - T2b;
 
145
                              T2j = T2b + T2i;
 
146
                              T6N = FNMS(KP707106781, T4Z, T4S);
 
147
                              T50 = FMA(KP707106781, T4Z, T4S);
 
148
                         }
 
149
                    }
 
150
                    {
 
151
                         E T5A, T5L, TR, T1O, T5M, T5B, T3J, T24, T5F, T5O, TY, T1X, T5I, T5P, T1V;
 
152
                         E T3K;
 
153
                         {
 
154
                              E TL, TM, TO, TP, T20, T23;
 
155
                              TL = iio[-WS(ios, 31)];
 
156
                              T6A = FNMS(KP707106781, T62, T5Z);
 
157
                              T63 = FMA(KP707106781, T62, T5Z);
 
158
                              T4w = T3T + T3S;
 
159
                              T3U = T3S - T3T;
 
160
                              TM = rio[WS(ios, 15)];
 
161
                              TO = rio[WS(ios, 7)];
 
162
                              TP = iio[-WS(ios, 23)];
 
163
                              {
 
164
                                   E T1Y, TN, TQ, T1Z, T21, T22;
 
165
                                   T1Y = iio[-WS(ios, 15)];
 
166
                                   T5A = TL - TM;
 
167
                                   TN = TL + TM;
 
168
                                   T5L = TO - TP;
 
169
                                   TQ = TO + TP;
 
170
                                   T1Z = rio[WS(ios, 31)];
 
171
                                   T21 = iio[-WS(ios, 7)];
 
172
                                   T22 = rio[WS(ios, 23)];
 
173
                                   TR = TN + TQ;
 
174
                                   T1O = TN - TQ;
 
175
                                   T5M = T1Z + T1Y;
 
176
                                   T20 = T1Y - T1Z;
 
177
                                   T23 = T21 - T22;
 
178
                                   T5B = T21 + T22;
 
179
                              }
 
180
                              {
 
181
                                   E TV, T5D, TU, T5E, T1R, TW, T1S, T1T;
 
182
                                   {
 
183
                                        E TS, TT, T1P, T1Q;
 
184
                                        TS = rio[WS(ios, 3)];
 
185
                                        T3J = T20 + T23;
 
186
                                        T24 = T20 - T23;
 
187
                                        TT = iio[-WS(ios, 19)];
 
188
                                        T1P = iio[-WS(ios, 3)];
 
189
                                        T1Q = rio[WS(ios, 19)];
 
190
                                        TV = iio[-WS(ios, 27)];
 
191
                                        T5D = TS - TT;
 
192
                                        TU = TS + TT;
 
193
                                        T5E = T1P + T1Q;
 
194
                                        T1R = T1P - T1Q;
 
195
                                        TW = rio[WS(ios, 11)];
 
196
                                        T1S = iio[-WS(ios, 11)];
 
197
                                        T1T = rio[WS(ios, 27)];
 
198
                                   }
 
199
                                   {
 
200
                                        E T5G, TX, T5H, T1U;
 
201
                                        T5F = T5D - T5E;
 
202
                                        T5O = T5D + T5E;
 
203
                                        T5G = TV - TW;
 
204
                                        TX = TV + TW;
 
205
                                        T5H = T1T + T1S;
 
206
                                        T1U = T1S - T1T;
 
207
                                        TY = TU + TX;
 
208
                                        T1X = TX - TU;
 
209
                                        T5I = T5G - T5H;
 
210
                                        T5P = T5G + T5H;
 
211
                                        T1V = T1R - T1U;
 
212
                                        T3K = T1R + T1U;
 
213
                                   }
 
214
                              }
 
215
                         }
 
216
                         {
 
217
                              E T3I, T2W, T2X, T3L, T5C, T7D, T7E, T1W, T25, T5J, T7B, T7A;
 
218
                              T3I = TR - TY;
 
219
                              TZ = TR + TY;
 
220
                              T1W = T1O + T1V;
 
221
                              T2W = T1O - T1V;
 
222
                              T2X = T24 - T1X;
 
223
                              T25 = T1X + T24;
 
224
                              T4q = T3K + T3J;
 
225
                              T3L = T3J - T3K;
 
226
                              T5C = T5A - T5B;
 
227
                              T7D = T5A + T5B;
 
228
                              T7E = T5O + T5P;
 
229
                              T5Q = T5O - T5P;
 
230
                              T26 = FNMS(KP414213562, T25, T1W);
 
231
                              T2p = FMA(KP414213562, T1W, T25);
 
232
                              T8q = FNMS(KP707106781, T7E, T7D);
 
233
                              T7F = FMA(KP707106781, T7E, T7D);
 
234
                              T5J = T5F + T5I;
 
235
                              T7B = T5F - T5I;
 
236
                              T5N = T5L - T5M;
 
237
                              T7A = T5L + T5M;
 
238
                              T37 = FNMS(KP414213562, T2W, T2X);
 
239
                              T2Y = FMA(KP414213562, T2X, T2W);
 
240
                              T6H = FNMS(KP707106781, T5J, T5C);
 
241
                              T5K = FMA(KP707106781, T5J, T5C);
 
242
                              T8r = FNMS(KP707106781, T7B, T7A);
 
243
                              T7C = FMA(KP707106781, T7B, T7A);
 
244
                              T3X = T3I + T3L;
 
245
                              T3M = T3I - T3L;
 
246
                         }
 
247
                    }
 
248
               }
 
249
               {
 
250
                    E T7n, T7q, T57, T5e;
 
251
                    {
 
252
                         E T56, T7l, T1b, Tm, T7m, T53, T3z, T1i, T58, Tp, T1o, T5c, T1n, T5b, Ts;
 
253
                         E T1p;
 
254
                         {
 
255
                              E T51, Ti, T1f, T55, T1e, T54, Tl, T1g;
 
256
                              {
 
257
                                   E T1c, T1d, Tg, Th, Tj, Tk;
 
258
                                   Tg = rio[WS(ios, 2)];
 
259
                                   Th = iio[-WS(ios, 18)];
 
260
                                   T1c = iio[-WS(ios, 2)];
 
261
                                   T6G = FNMS(KP707106781, T5Q, T5N);
 
262
                                   T5R = FMA(KP707106781, T5Q, T5N);
 
263
                                   T51 = Tg - Th;
 
264
                                   Ti = Tg + Th;
 
265
                                   T1d = rio[WS(ios, 18)];
 
266
                                   Tj = rio[WS(ios, 10)];
 
267
                                   Tk = iio[-WS(ios, 26)];
 
268
                                   T1f = iio[-WS(ios, 10)];
 
269
                                   T55 = T1c + T1d;
 
270
                                   T1e = T1c - T1d;
 
271
                                   T54 = Tj - Tk;
 
272
                                   Tl = Tj + Tk;
 
273
                                   T1g = rio[WS(ios, 26)];
 
274
                              }
 
275
                              {
 
276
                                   E T1l, T1m, Tq, Tr;
 
277
                                   {
 
278
                                        E Tn, T1h, T52, To;
 
279
                                        Tn = iio[-WS(ios, 30)];
 
280
                                        T56 = T54 + T55;
 
281
                                        T7l = T55 - T54;
 
282
                                        T1b = Ti - Tl;
 
283
                                        Tm = Ti + Tl;
 
284
                                        T1h = T1f - T1g;
 
285
                                        T52 = T1f + T1g;
 
286
                                        To = rio[WS(ios, 14)];
 
287
                                        T1l = iio[-WS(ios, 14)];
 
288
                                        T7m = T51 + T52;
 
289
                                        T53 = T51 - T52;
 
290
                                        T3z = T1e + T1h;
 
291
                                        T1i = T1e - T1h;
 
292
                                        T58 = Tn - To;
 
293
                                        Tp = Tn + To;
 
294
                                        T1m = rio[WS(ios, 30)];
 
295
                                   }
 
296
                                   Tq = rio[WS(ios, 6)];
 
297
                                   Tr = iio[-WS(ios, 22)];
 
298
                                   T1o = iio[-WS(ios, 6)];
 
299
                                   T5c = T1m + T1l;
 
300
                                   T1n = T1l - T1m;
 
301
                                   T5b = Tq - Tr;
 
302
                                   Ts = Tq + Tr;
 
303
                                   T1p = rio[WS(ios, 22)];
 
304
                              }
 
305
                         }
 
306
                         {
 
307
                              E T5d, T5a, T3A, T64, T65;
 
308
                              {
 
309
                                   E T2k, T1j, T7o, T1k, Tt, T1q, T59, T7p, T1r;
 
310
                                   T2k = T1i - T1b;
 
311
                                   T1j = T1b + T1i;
 
312
                                   T5d = T5b - T5c;
 
313
                                   T7o = T5b + T5c;
 
314
                                   T1k = Tp - Ts;
 
315
                                   Tt = Tp + Ts;
 
316
                                   T1q = T1o - T1p;
 
317
                                   T59 = T1o + T1p;
 
318
                                   T7p = T58 + T59;
 
319
                                   T5a = T58 - T59;
 
320
                                   T3A = T1n + T1q;
 
321
                                   T1r = T1n - T1q;
 
322
                                   {
 
323
                                        E T7O, T7P, T1s, T2l;
 
324
                                        T7n = FNMS(KP414213562, T7m, T7l);
 
325
                                        T7O = FMA(KP414213562, T7l, T7m);
 
326
                                        T7P = FMA(KP414213562, T7o, T7p);
 
327
                                        T7q = FNMS(KP414213562, T7p, T7o);
 
328
                                        T1s = T1k - T1r;
 
329
                                        T2l = T1k + T1r;
 
330
                                        Tu = Tm + Tt;
 
331
                                        T3R = Tt - Tm;
 
332
                                        T2m = T2k + T2l;
 
333
                                        T2R = T2l - T2k;
 
334
                                        T1t = T1j + T1s;
 
335
                                        T34 = T1j - T1s;
 
336
                                        T8y = T7O - T7P;
 
337
                                        T7Q = T7O + T7P;
 
338
                                   }
 
339
                              }
 
340
                              T57 = FNMS(KP414213562, T56, T53);
 
341
                              T64 = FMA(KP414213562, T53, T56);
 
342
                              T65 = FNMS(KP414213562, T5a, T5d);
 
343
                              T5e = FMA(KP414213562, T5d, T5a);
 
344
                              T4x = T3z + T3A;
 
345
                              T3B = T3z - T3A;
 
346
                              T6O = T64 - T65;
 
347
                              T66 = T64 + T65;
 
348
                         }
 
349
                    }
 
350
                    {
 
351
                         E T5h, T5s, TC, T1v, T5t, T5i, T3E, T1L, T5p, T5v, TJ, T1E, T5w, T5m, T3F;
 
352
                         E T1C;
 
353
                         {
 
354
                              E Tw, Tx, Tz, TA, T1H, T1K;
 
355
                              Tw = rio[WS(ios, 1)];
 
356
                              T8l = T7n + T7q;
 
357
                              T7r = T7n - T7q;
 
358
                              T6B = T5e - T57;
 
359
                              T5f = T57 + T5e;
 
360
                              Tx = iio[-WS(ios, 17)];
 
361
                              Tz = rio[WS(ios, 9)];
 
362
                              TA = iio[-WS(ios, 25)];
 
363
                              {
 
364
                                   E T1F, Ty, TB, T1G, T1I, T1J;
 
365
                                   T1F = iio[-WS(ios, 1)];
 
366
                                   T5h = Tw - Tx;
 
367
                                   Ty = Tw + Tx;
 
368
                                   T5s = Tz - TA;
 
369
                                   TB = Tz + TA;
 
370
                                   T1G = rio[WS(ios, 17)];
 
371
                                   T1I = iio[-WS(ios, 9)];
 
372
                                   T1J = rio[WS(ios, 25)];
 
373
                                   TC = Ty + TB;
 
374
                                   T1v = Ty - TB;
 
375
                                   T5t = T1F + T1G;
 
376
                                   T1H = T1F - T1G;
 
377
                                   T1K = T1I - T1J;
 
378
                                   T5i = T1I + T1J;
 
379
                              }
 
380
                              {
 
381
                                   E TG, T5o, TF, T5n, T1y, TH, T1z, T1A;
 
382
                                   {
 
383
                                        E TD, TE, T1w, T1x;
 
384
                                        TD = rio[WS(ios, 5)];
 
385
                                        T3E = T1H + T1K;
 
386
                                        T1L = T1H - T1K;
 
387
                                        TE = iio[-WS(ios, 21)];
 
388
                                        T1w = iio[-WS(ios, 5)];
 
389
                                        T1x = rio[WS(ios, 21)];
 
390
                                        TG = iio[-WS(ios, 29)];
 
391
                                        T5o = TD - TE;
 
392
                                        TF = TD + TE;
 
393
                                        T5n = T1w + T1x;
 
394
                                        T1y = T1w - T1x;
 
395
                                        TH = rio[WS(ios, 13)];
 
396
                                        T1z = iio[-WS(ios, 13)];
 
397
                                        T1A = rio[WS(ios, 29)];
 
398
                                   }
 
399
                                   {
 
400
                                        E T5k, TI, T5l, T1B;
 
401
                                        T5p = T5n - T5o;
 
402
                                        T5v = T5o + T5n;
 
403
                                        T5k = TG - TH;
 
404
                                        TI = TG + TH;
 
405
                                        T5l = T1A + T1z;
 
406
                                        T1B = T1z - T1A;
 
407
                                        TJ = TF + TI;
 
408
                                        T1E = TI - TF;
 
409
                                        T5w = T5k + T5l;
 
410
                                        T5m = T5k - T5l;
 
411
                                        T3F = T1y + T1B;
 
412
                                        T1C = T1y - T1B;
 
413
                                   }
 
414
                              }
 
415
                         }
 
416
                         {
 
417
                              E T3D, T2U, T2T, T3G, T5j, T7w, T7x, T1M, T1D, T5q, T7u, T7t;
 
418
                              T3D = TC - TJ;
 
419
                              TK = TC + TJ;
 
420
                              T1M = T1E + T1L;
 
421
                              T2U = T1L - T1E;
 
422
                              T2T = T1v - T1C;
 
423
                              T1D = T1v + T1C;
 
424
                              T4p = T3F + T3E;
 
425
                              T3G = T3E - T3F;
 
426
                              T5j = T5h - T5i;
 
427
                              T7w = T5h + T5i;
 
428
                              T7x = T5v + T5w;
 
429
                              T5x = T5v - T5w;
 
430
                              T1N = FMA(KP414213562, T1M, T1D);
 
431
                              T2o = FNMS(KP414213562, T1D, T1M);
 
432
                              T8n = FNMS(KP707106781, T7x, T7w);
 
433
                              T7y = FMA(KP707106781, T7x, T7w);
 
434
                              T5q = T5m - T5p;
 
435
                              T7u = T5p + T5m;
 
436
                              T5u = T5s + T5t;
 
437
                              T7t = T5t - T5s;
 
438
                              T36 = FMA(KP414213562, T2T, T2U);
 
439
                              T2V = FNMS(KP414213562, T2U, T2T);
 
440
                              T6E = FNMS(KP707106781, T5q, T5j);
 
441
                              T5r = FMA(KP707106781, T5q, T5j);
 
442
                              T8o = FNMS(KP707106781, T7u, T7t);
 
443
                              T7v = FMA(KP707106781, T7u, T7t);
 
444
                              T3W = T3G - T3D;
 
445
                              T3H = T3D + T3G;
 
446
                         }
 
447
                    }
 
448
               }
 
449
               {
 
450
                    E T6D, T5y, T2C, T2H, T2E, T2z;
 
451
                    {
 
452
                         E T4o, T4v, T4y, T4r, T3C, T3Y, T3V, T4i, T3N, T4h, T4l, T4j, T4k;
 
453
                         {
 
454
                              E T4H, T4K, T4N, T4J, T4O;
 
455
                              {
 
456
                                   E Tv, T10, T4L, T4I, T4M;
 
457
                                   T4o = Tf - Tu;
 
458
                                   Tv = Tf + Tu;
 
459
                                   T10 = TK + TZ;
 
460
                                   T4v = TZ - TK;
 
461
                                   T4y = T4w - T4x;
 
462
                                   T4L = T4x + T4w;
 
463
                                   T6D = FNMS(KP707106781, T5x, T5u);
 
464
                                   T5y = FMA(KP707106781, T5x, T5u);
 
465
                                   T4I = Tv - T10;
 
466
                                   T4r = T4p - T4q;
 
467
                                   T4M = T4p + T4q;
 
468
                                   rio[0] = Tv + T10;
 
469
                                   T4H = W[30];
 
470
                                   T4K = W[31];
 
471
                                   iio[-WS(ios, 31)] = T4M + T4L;
 
472
                                   T4N = T4L - T4M;
 
473
                                   T4J = T4H * T4I;
 
474
                                   T4O = T4K * T4I;
 
475
                              }
 
476
                              {
 
477
                                   E T4d, T4a, T4e, T48, T49;
 
478
                                   T3C = T3y + T3B;
 
479
                                   T48 = T3y - T3B;
 
480
                                   T49 = T3X - T3W;
 
481
                                   T3Y = T3W + T3X;
 
482
                                   rio[WS(ios, 16)] = FNMS(T4K, T4N, T4J);
 
483
                                   iio[-WS(ios, 15)] = FMA(T4H, T4N, T4O);
 
484
                                   T3V = T3R + T3U;
 
485
                                   T4d = T3U - T3R;
 
486
                                   T4i = FNMS(KP707106781, T49, T48);
 
487
                                   T4a = FMA(KP707106781, T49, T48);
 
488
                                   T4e = T3H - T3M;
 
489
                                   T3N = T3H + T3M;
 
490
                                   {
 
491
                                        E T47, T4c, T4f, T4b, T4g;
 
492
                                        T47 = W[6];
 
493
                                        T4c = W[7];
 
494
                                        T4h = W[38];
 
495
                                        T4l = FNMS(KP707106781, T4e, T4d);
 
496
                                        T4f = FMA(KP707106781, T4e, T4d);
 
497
                                        T4b = T47 * T4a;
 
498
                                        T4g = T4c * T4a;
 
499
                                        T4j = T4h * T4i;
 
500
                                        T4k = W[39];
 
501
                                        rio[WS(ios, 4)] = FNMS(T4c, T4f, T4b);
 
502
                                        iio[-WS(ios, 27)] = FMA(T47, T4f, T4g);
 
503
                                   }
 
504
                              }
 
505
                         }
 
506
                         {
 
507
                              E T4s, T4C, T4F, T4z, T4m, T4n, T4u;
 
508
                              rio[WS(ios, 20)] = FNMS(T4k, T4l, T4j);
 
509
                              T4m = T4k * T4i;
 
510
                              iio[-WS(ios, 11)] = FMA(T4h, T4l, T4m);
 
511
                              T4s = T4o + T4r;
 
512
                              T4C = T4o - T4r;
 
513
                              T4F = T4y - T4v;
 
514
                              T4z = T4v + T4y;
 
515
                              T4n = W[46];
 
516
                              T4u = W[47];
 
517
                              {
 
518
                                   E T4B, T4E, T4t, T4A, T4D, T4G;
 
519
                                   T4B = W[14];
 
520
                                   T4E = W[15];
 
521
                                   T4t = T4n * T4s;
 
522
                                   T4A = T4u * T4s;
 
523
                                   T4D = T4B * T4C;
 
524
                                   T4G = T4E * T4C;
 
525
                                   rio[WS(ios, 24)] = FNMS(T4u, T4z, T4t);
 
526
                                   iio[-WS(ios, 7)] = FMA(T4n, T4z, T4A);
 
527
                                   rio[WS(ios, 8)] = FNMS(T4E, T4F, T4D);
 
528
                                   iio[-WS(ios, 23)] = FMA(T4B, T4F, T4G);
 
529
                              }
 
530
                         }
 
531
                         {
 
532
                              E T45, T41, T44, T43, T46;
 
533
                              {
 
534
                                   E T3x, T42, T3O, T3Z, T3Q, T3P, T40;
 
535
                                   T3x = W[54];
 
536
                                   T42 = FNMS(KP707106781, T3N, T3C);
 
537
                                   T3O = FMA(KP707106781, T3N, T3C);
 
538
                                   T45 = FNMS(KP707106781, T3Y, T3V);
 
539
                                   T3Z = FMA(KP707106781, T3Y, T3V);
 
540
                                   T3Q = W[55];
 
541
                                   T3P = T3x * T3O;
 
542
                                   T41 = W[22];
 
543
                                   T44 = W[23];
 
544
                                   T40 = T3Q * T3O;
 
545
                                   rio[WS(ios, 28)] = FNMS(T3Q, T3Z, T3P);
 
546
                                   T43 = T41 * T42;
 
547
                                   T46 = T44 * T42;
 
548
                                   iio[-WS(ios, 3)] = FMA(T3x, T3Z, T40);
 
549
                              }
 
550
                              {
 
551
                                   E T2B, T2q, T2G, T2A, T2n, T2F, T28, T2u, T27, T1u;
 
552
                                   T2B = T2p - T2o;
 
553
                                   T2q = T2o + T2p;
 
554
                                   rio[WS(ios, 12)] = FNMS(T44, T45, T43);
 
555
                                   iio[-WS(ios, 19)] = FMA(T41, T45, T46);
 
556
                                   T2G = T1N - T26;
 
557
                                   T27 = T1N + T26;
 
558
                                   T1u = FMA(KP707106781, T1t, T1a);
 
559
                                   T2A = FNMS(KP707106781, T1t, T1a);
 
560
                                   T2n = FMA(KP707106781, T2m, T2j);
 
561
                                   T2F = FNMS(KP707106781, T2m, T2j);
 
562
                                   T28 = FMA(KP923879532, T27, T1u);
 
563
                                   T2u = FNMS(KP923879532, T27, T1u);
 
564
                                   {
 
565
                                        E T2x, T2K, T2N, T2M, T2J, T2O;
 
566
                                        {
 
567
                                             E T2r, T2a, T11, T2s, T29;
 
568
                                             T2r = FMA(KP923879532, T2q, T2n);
 
569
                                             T2x = FNMS(KP923879532, T2q, T2n);
 
570
                                             T2a = W[59];
 
571
                                             T11 = W[58];
 
572
                                             T2C = FMA(KP923879532, T2B, T2A);
 
573
                                             T2K = FNMS(KP923879532, T2B, T2A);
 
574
                                             T2s = T2a * T28;
 
575
                                             T29 = T11 * T28;
 
576
                                             T2H = FMA(KP923879532, T2G, T2F);
 
577
                                             T2N = FNMS(KP923879532, T2G, T2F);
 
578
                                             T2M = W[43];
 
579
                                             iio[-WS(ios, 1)] = FMA(T11, T2r, T2s);
 
580
                                             rio[WS(ios, 30)] = FNMS(T2a, T2r, T29);
 
581
                                             T2J = W[42];
 
582
                                             T2O = T2M * T2K;
 
583
                                        }
 
584
                                        {
 
585
                                             E T2w, T2t, T2y, T2L, T2v;
 
586
                                             T2w = W[27];
 
587
                                             T2L = T2J * T2K;
 
588
                                             iio[-WS(ios, 9)] = FMA(T2J, T2N, T2O);
 
589
                                             T2t = W[26];
 
590
                                             T2y = T2w * T2u;
 
591
                                             rio[WS(ios, 22)] = FNMS(T2M, T2N, T2L);
 
592
                                             T2v = T2t * T2u;
 
593
                                             iio[-WS(ios, 17)] = FMA(T2t, T2x, T2y);
 
594
                                             T2E = W[11];
 
595
                                             T2z = W[10];
 
596
                                             rio[WS(ios, 14)] = FNMS(T2w, T2x, T2v);
 
597
                                        }
 
598
                                   }
 
599
                              }
 
600
                         }
 
601
                    }
 
602
                    {
 
603
                         E T74, T6Z, T79, T6W, T6X, T70, T6Y, T71, T76;
 
604
                         {
 
605
                              E T3p, T3m, T3h, T3q, T3l;
 
606
                              {
 
607
                                   E T38, T3j, T2Z, T3o, T3i, T2S, T3n, T35, T2I, T2D;
 
608
                                   T38 = T36 + T37;
 
609
                                   T3j = T36 - T37;
 
610
                                   T2I = T2E * T2C;
 
611
                                   T2D = T2z * T2C;
 
612
                                   T2Z = T2V + T2Y;
 
613
                                   T3o = T2Y - T2V;
 
614
                                   iio[-WS(ios, 25)] = FMA(T2z, T2H, T2I);
 
615
                                   rio[WS(ios, 6)] = FNMS(T2E, T2H, T2D);
 
616
                                   T3i = FNMS(KP707106781, T2R, T2Q);
 
617
                                   T2S = FMA(KP707106781, T2R, T2Q);
 
618
                                   T3n = FNMS(KP707106781, T34, T33);
 
619
                                   T35 = FMA(KP707106781, T34, T33);
 
620
                                   {
 
621
                                        E T3f, T3c, T3k, T3v, T3u, T3t;
 
622
                                        {
 
623
                                             E T39, T30, T3s, T32, T2P;
 
624
                                             T3f = FNMS(KP923879532, T38, T35);
 
625
                                             T39 = FMA(KP923879532, T38, T35);
 
626
                                             T3c = FNMS(KP923879532, T2Z, T2S);
 
627
                                             T30 = FMA(KP923879532, T2Z, T2S);
 
628
                                             T3s = FNMS(KP923879532, T3j, T3i);
 
629
                                             T3k = FMA(KP923879532, T3j, T3i);
 
630
                                             T32 = W[3];
 
631
                                             T2P = W[2];
 
632
                                             {
 
633
                                                  E T3r, T3w, T3a, T31;
 
634
                                                  T3v = FNMS(KP923879532, T3o, T3n);
 
635
                                                  T3p = FMA(KP923879532, T3o, T3n);
 
636
                                                  T3u = W[19];
 
637
                                                  T3a = T32 * T30;
 
638
                                                  T31 = T2P * T30;
 
639
                                                  T3r = W[18];
 
640
                                                  T3w = T3u * T3s;
 
641
                                                  iio[-WS(ios, 29)] = FMA(T2P, T39, T3a);
 
642
                                                  rio[WS(ios, 2)] = FNMS(T32, T39, T31);
 
643
                                                  T3t = T3r * T3s;
 
644
                                                  iio[-WS(ios, 21)] = FMA(T3r, T3v, T3w);
 
645
                                             }
 
646
                                        }
 
647
                                        {
 
648
                                             E T3e, T3b, T3g, T3d;
 
649
                                             T3e = W[35];
 
650
                                             rio[WS(ios, 10)] = FNMS(T3u, T3v, T3t);
 
651
                                             T3b = W[34];
 
652
                                             T3g = T3e * T3c;
 
653
                                             T3m = W[51];
 
654
                                             T3d = T3b * T3c;
 
655
                                             T3h = W[50];
 
656
                                             iio[-WS(ios, 13)] = FMA(T3b, T3f, T3g);
 
657
                                             T3q = T3m * T3k;
 
658
                                             rio[WS(ios, 18)] = FNMS(T3e, T3f, T3d);
 
659
                                             T3l = T3h * T3k;
 
660
                                        }
 
661
                                   }
 
662
                              }
 
663
                              {
 
664
                                   E T77, T6C, T6P, T72, T6Q, T6R, T73, T6J, T6F, T6I;
 
665
                                   T77 = FNMS(KP923879532, T6B, T6A);
 
666
                                   T6C = FMA(KP923879532, T6B, T6A);
 
667
                                   iio[-WS(ios, 5)] = FMA(T3h, T3p, T3q);
 
668
                                   T6P = FMA(KP923879532, T6O, T6N);
 
669
                                   T72 = FNMS(KP923879532, T6O, T6N);
 
670
                                   rio[WS(ios, 26)] = FNMS(T3m, T3p, T3l);
 
671
                                   T6Q = FMA(KP668178637, T6D, T6E);
 
672
                                   T6F = FNMS(KP668178637, T6E, T6D);
 
673
                                   T6I = FMA(KP668178637, T6H, T6G);
 
674
                                   T6R = FNMS(KP668178637, T6G, T6H);
 
675
                                   T73 = T6I - T6F;
 
676
                                   T6J = T6F + T6I;
 
677
                                   {
 
678
                                        E T6z, T7f, T6K, T6M, T6L, T7c, T6T, T7b, T7e, T78, T6S;
 
679
                                        T6z = W[56];
 
680
                                        T74 = FMA(KP831469612, T73, T72);
 
681
                                        T7f = FNMS(KP831469612, T73, T72);
 
682
                                        T6Z = FNMS(KP831469612, T6J, T6C);
 
683
                                        T6K = FMA(KP831469612, T6J, T6C);
 
684
                                        T78 = T6Q - T6R;
 
685
                                        T6S = T6Q + T6R;
 
686
                                        T6M = W[57];
 
687
                                        T6L = T6z * T6K;
 
688
                                        T79 = FMA(KP831469612, T78, T77);
 
689
                                        T7c = FNMS(KP831469612, T78, T77);
 
690
                                        T6W = FNMS(KP831469612, T6S, T6P);
 
691
                                        T6T = FMA(KP831469612, T6S, T6P);
 
692
                                        T7b = W[40];
 
693
                                        T7e = W[41];
 
694
                                        {
 
695
                                             E T6V, T6U, T7g, T7d;
 
696
                                             T6V = W[24];
 
697
                                             iio[-WS(ios, 2)] = FMA(T6M, T6T, T6L);
 
698
                                             T6U = T6z * T6T;
 
699
                                             T7g = T7b * T7f;
 
700
                                             T7d = T7b * T7c;
 
701
                                             T6X = T6V * T6W;
 
702
                                             rio[WS(ios, 29)] = FNMS(T6M, T6K, T6U);
 
703
                                             rio[WS(ios, 21)] = FNMS(T7e, T7c, T7g);
 
704
                                             iio[-WS(ios, 10)] = FMA(T7e, T7f, T7d);
 
705
                                             T70 = T6V * T6Z;
 
706
                                        }
 
707
                                        T6Y = W[25];
 
708
                                        T71 = W[8];
 
709
                                        T76 = W[9];
 
710
                                   }
 
711
                              }
 
712
                         }
 
713
                         {
 
714
                              E T6m, T6h, T6r, T6e, T6f, T6i, T6g, T6j, T6o;
 
715
                              {
 
716
                                   E T6p, T5g, T6k, T67, T68, T69, T6l, T5T;
 
717
                                   {
 
718
                                        E T5z, T5S, T7a, T75;
 
719
                                        T6p = FNMS(KP923879532, T5f, T50);
 
720
                                        T5g = FMA(KP923879532, T5f, T50);
 
721
                                        iio[-WS(ios, 18)] = FMA(T6Y, T6W, T70);
 
722
                                        rio[WS(ios, 13)] = FNMS(T6Y, T6Z, T6X);
 
723
                                        T7a = T71 * T79;
 
724
                                        T75 = T71 * T74;
 
725
                                        T6k = FNMS(KP923879532, T66, T63);
 
726
                                        T67 = FMA(KP923879532, T66, T63);
 
727
                                        T68 = FMA(KP198912367, T5r, T5y);
 
728
                                        T5z = FNMS(KP198912367, T5y, T5r);
 
729
                                        iio[-WS(ios, 26)] = FMA(T76, T74, T7a);
 
730
                                        rio[WS(ios, 5)] = FNMS(T76, T79, T75);
 
731
                                        T5S = FMA(KP198912367, T5R, T5K);
 
732
                                        T69 = FNMS(KP198912367, T5K, T5R);
 
733
                                        T6l = T5S - T5z;
 
734
                                        T5T = T5z + T5S;
 
735
                                   }
 
736
                                   {
 
737
                                        E T4P, T6x, T5U, T5W, T5V, T6u, T6b, T6t, T6w, T6q, T6a;
 
738
                                        T4P = W[0];
 
739
                                        T6m = FMA(KP980785280, T6l, T6k);
 
740
                                        T6x = FNMS(KP980785280, T6l, T6k);
 
741
                                        T6h = FNMS(KP980785280, T5T, T5g);
 
742
                                        T5U = FMA(KP980785280, T5T, T5g);
 
743
                                        T6q = T68 - T69;
 
744
                                        T6a = T68 + T69;
 
745
                                        T5W = W[1];
 
746
                                        T5V = T4P * T5U;
 
747
                                        T6r = FMA(KP980785280, T6q, T6p);
 
748
                                        T6u = FNMS(KP980785280, T6q, T6p);
 
749
                                        T6e = FNMS(KP980785280, T6a, T67);
 
750
                                        T6b = FMA(KP980785280, T6a, T67);
 
751
                                        T6t = W[16];
 
752
                                        T6w = W[17];
 
753
                                        {
 
754
                                             E T6d, T6c, T6y, T6v;
 
755
                                             T6d = W[32];
 
756
                                             rio[WS(ios, 1)] = FNMS(T5W, T6b, T5V);
 
757
                                             T6c = T4P * T6b;
 
758
                                             T6y = T6t * T6x;
 
759
                                             T6v = T6t * T6u;
 
760
                                             T6f = T6d * T6e;
 
761
                                             iio[-WS(ios, 30)] = FMA(T5W, T5U, T6c);
 
762
                                             iio[-WS(ios, 22)] = FMA(T6w, T6u, T6y);
 
763
                                             rio[WS(ios, 9)] = FNMS(T6w, T6x, T6v);
 
764
                                             T6i = T6d * T6h;
 
765
                                        }
 
766
                                        T6g = W[33];
 
767
                                        T6j = W[48];
 
768
                                        T6o = W[49];
 
769
                                   }
 
770
                              }
 
771
                              {
 
772
                                   E T8O, T8J, T8T, T8G, T8H, T8K, T8I, T8L, T8Q;
 
773
                                   {
 
774
                                        E T8R, T8m, T8M, T8z, T8A, T8B, T8N, T8t;
 
775
                                        {
 
776
                                             E T8p, T8s, T6s, T6n;
 
777
                                             T8R = FMA(KP923879532, T8l, T8k);
 
778
                                             T8m = FNMS(KP923879532, T8l, T8k);
 
779
                                             rio[WS(ios, 17)] = FNMS(T6g, T6e, T6i);
 
780
                                             iio[-WS(ios, 14)] = FMA(T6g, T6h, T6f);
 
781
                                             T6s = T6j * T6r;
 
782
                                             T6n = T6j * T6m;
 
783
                                             T8M = FNMS(KP923879532, T8y, T8x);
 
784
                                             T8z = FMA(KP923879532, T8y, T8x);
 
785
                                             T8A = FMA(KP668178637, T8n, T8o);
 
786
                                             T8p = FNMS(KP668178637, T8o, T8n);
 
787
                                             rio[WS(ios, 25)] = FNMS(T6o, T6m, T6s);
 
788
                                             iio[-WS(ios, 6)] = FMA(T6o, T6r, T6n);
 
789
                                             T8s = FNMS(KP668178637, T8r, T8q);
 
790
                                             T8B = FMA(KP668178637, T8q, T8r);
 
791
                                             T8N = T8s - T8p;
 
792
                                             T8t = T8p + T8s;
 
793
                                        }
 
794
                                        {
 
795
                                             E T8j, T8Z, T8u, T8w, T8v, T8W, T8D, T8V, T8Y, T8S, T8C;
 
796
                                             T8j = W[4];
 
797
                                             T8O = FMA(KP831469612, T8N, T8M);
 
798
                                             T8Z = FNMS(KP831469612, T8N, T8M);
 
799
                                             T8J = FNMS(KP831469612, T8t, T8m);
 
800
                                             T8u = FMA(KP831469612, T8t, T8m);
 
801
                                             T8S = T8A + T8B;
 
802
                                             T8C = T8A - T8B;
 
803
                                             T8w = W[5];
 
804
                                             T8v = T8j * T8u;
 
805
                                             T8T = FMA(KP831469612, T8S, T8R);
 
806
                                             T8W = FNMS(KP831469612, T8S, T8R);
 
807
                                             T8G = FNMS(KP831469612, T8C, T8z);
 
808
                                             T8D = FMA(KP831469612, T8C, T8z);
 
809
                                             T8V = W[20];
 
810
                                             T8Y = W[21];
 
811
                                             {
 
812
                                                  E T8F, T8E, T90, T8X;
 
813
                                                  T8F = W[36];
 
814
                                                  rio[WS(ios, 3)] = FNMS(T8w, T8D, T8v);
 
815
                                                  T8E = T8j * T8D;
 
816
                                                  T90 = T8V * T8Z;
 
817
                                                  T8X = T8V * T8W;
 
818
                                                  T8H = T8F * T8G;
 
819
                                                  iio[-WS(ios, 28)] = FMA(T8w, T8u, T8E);
 
820
                                                  iio[-WS(ios, 20)] = FMA(T8Y, T8W, T90);
 
821
                                                  rio[WS(ios, 11)] = FNMS(T8Y, T8Z, T8X);
 
822
                                                  T8K = T8F * T8J;
 
823
                                             }
 
824
                                             T8I = W[37];
 
825
                                             T8L = W[52];
 
826
                                             T8Q = W[53];
 
827
                                        }
 
828
                                   }
 
829
                                   {
 
830
                                        E T89, T7s, T84, T7R, T7S, T7T, T85, T7H;
 
831
                                        {
 
832
                                             E T7z, T7G, T8U, T8P;
 
833
                                             T89 = FNMS(KP923879532, T7r, T7k);
 
834
                                             T7s = FMA(KP923879532, T7r, T7k);
 
835
                                             rio[WS(ios, 19)] = FNMS(T8I, T8G, T8K);
 
836
                                             iio[-WS(ios, 12)] = FMA(T8I, T8J, T8H);
 
837
                                             T8U = T8L * T8T;
 
838
                                             T8P = T8L * T8O;
 
839
                                             T84 = FNMS(KP923879532, T7Q, T7N);
 
840
                                             T7R = FMA(KP923879532, T7Q, T7N);
 
841
                                             T7S = FMA(KP198912367, T7v, T7y);
 
842
                                             T7z = FNMS(KP198912367, T7y, T7v);
 
843
                                             rio[WS(ios, 27)] = FNMS(T8Q, T8O, T8U);
 
844
                                             iio[-WS(ios, 4)] = FMA(T8Q, T8T, T8P);
 
845
                                             T7G = FNMS(KP198912367, T7F, T7C);
 
846
                                             T7T = FMA(KP198912367, T7C, T7F);
 
847
                                             T85 = T7z + T7G;
 
848
                                             T7H = T7z - T7G;
 
849
                                        }
 
850
                                        {
 
851
                                             E T7h, T8h, T7I, T7K, T7J, T8e, T7V, T8d, T8g, T8a, T7U;
 
852
                                             T7h = W[60];
 
853
                                             T86 = FNMS(KP980785280, T85, T84);
 
854
                                             T8h = FMA(KP980785280, T85, T84);
 
855
                                             T81 = FNMS(KP980785280, T7H, T7s);
 
856
                                             T7I = FMA(KP980785280, T7H, T7s);
 
857
                                             T8a = T7S - T7T;
 
858
                                             T7U = T7S + T7T;
 
859
                                             T7K = W[61];
 
860
                                             T7J = T7h * T7I;
 
861
                                             T8b = FMA(KP980785280, T8a, T89);
 
862
                                             T8e = FNMS(KP980785280, T8a, T89);
 
863
                                             T7Y = FNMS(KP980785280, T7U, T7R);
 
864
                                             T7V = FMA(KP980785280, T7U, T7R);
 
865
                                             T8d = W[44];
 
866
                                             T8g = W[45];
 
867
                                             {
 
868
                                                  E T7X, T7W, T8i, T8f;
 
869
                                                  T7X = W[28];
 
870
                                                  iio[0] = FMA(T7K, T7V, T7J);
 
871
                                                  T7W = T7h * T7V;
 
872
                                                  T8i = T8d * T8h;
 
873
                                                  T8f = T8d * T8e;
 
874
                                                  T7Z = T7X * T7Y;
 
875
                                                  rio[WS(ios, 31)] = FNMS(T7K, T7I, T7W);
 
876
                                                  rio[WS(ios, 23)] = FNMS(T8g, T8e, T8i);
 
877
                                                  iio[-WS(ios, 8)] = FMA(T8g, T8h, T8f);
 
878
                                                  T82 = T7X * T81;
 
879
                                             }
 
880
                                             T80 = W[29];
 
881
                                             T83 = W[12];
 
882
                                             T88 = W[13];
 
883
                                        }
 
884
                                   }
 
885
                              }
 
886
                         }
 
887
                    }
 
888
               }
 
889
          }
 
890
          iio[-WS(ios, 16)] = FMA(T80, T7Y, T82);
 
891
          rio[WS(ios, 15)] = FNMS(T80, T81, T7Z);
 
892
          T8c = T83 * T8b;
 
893
          T87 = T83 * T86;
 
894
          iio[-WS(ios, 24)] = FMA(T88, T86, T8c);
 
895
          rio[WS(ios, 7)] = FNMS(T88, T8b, T87);
 
896
     }
 
897
     return W;
 
898
}
 
899
 
 
900
static const tw_instr twinstr[] = {
 
901
     {TW_FULL, 0, 32},
 
902
     {TW_NEXT, 1, 0}
 
903
};
 
904
 
 
905
static const hc2hc_desc desc = { 32, "hb_32", twinstr, &GENUS, {236, 62, 198, 0}, 0, 0, 0 };
 
906
 
 
907
void X(codelet_hb_32) (planner *p) {
 
908
     X(khc2hc_register) (p, hb_32, &desc);
 
909
}
 
910
#else                           /* HAVE_FMA */
 
911
 
 
912
/* Generated by: ../../../genfft/gen_hc2hc -compact -variables 4 -pipeline-latency 4 -sign 1 -n 32 -dif -name hb_32 -include hb.h */
 
913
 
 
914
/*
 
915
 * This function contains 434 FP additions, 208 FP multiplications,
 
916
 * (or, 340 additions, 114 multiplications, 94 fused multiply/add),
 
917
 * 98 stack variables, and 128 memory accesses
 
918
 */
 
919
/*
 
920
 * Generator Id's : 
 
921
 * $Id: algsimp.ml,v 1.9 2006-02-12 23:34:12 athena Exp $
 
922
 * $Id: fft.ml,v 1.4 2006-01-05 03:04:27 stevenj Exp $
 
923
 * $Id: gen_hc2hc.ml,v 1.16 2006-02-12 23:34:12 athena Exp $
 
924
 */
 
925
 
 
926
#include "hb.h"
 
927
 
 
928
static const R *hb_32(R *rio, R *iio, const R *W, stride ios, INT m, INT dist)
 
929
{
 
930
     DK(KP555570233, +0.555570233019602224742830813948532874374937191);
 
931
     DK(KP831469612, +0.831469612302545237078788377617905756738560812);
 
932
     DK(KP195090322, +0.195090322016128267848284868477022240927691618);
 
933
     DK(KP980785280, +0.980785280403230449126182236134239036973933731);
 
934
     DK(KP382683432, +0.382683432365089771728459984030398866761344562);
 
935
     DK(KP923879532, +0.923879532511286756128183189396788286822416626);
 
936
     DK(KP707106781, +0.707106781186547524400844362104849039284835938);
 
937
     INT i;
 
938
     for (i = m - 2; i > 0; i = i - 2, rio = rio + dist, iio = iio - dist, W = W + 62, MAKE_VOLATILE_STRIDE(ios)) {
 
939
          E T5n, T6y, T77, T5u, Tf, T3i, T5x, T76, T3G, T47, T1a, T2I, T5k, T6z, T2o;
 
940
          E T2Y, Tu, T3D, T6D, T73, T6G, T74, T1j, T2d, T1s, T2e, T55, T5z, T5c, T5A;
 
941
          E T3l, T48, TK, T3n, T6L, T7t, T6O, T7s, T1D, T2L, T1M, T2M, T4w, T62, T4D;
 
942
          E T61, T3q, T41, TZ, T3s, T6S, T7w, T6V, T7v, T1W, T2O, T25, T2P, T4P, T64;
 
943
          E T4W, T65, T3v, T42;
 
944
          {
 
945
               E T3, T5l, T2j, T5t, T6, T5s, T2m, T5m, Ta, T5i, T15, T5h, Td, T5e, T18;
 
946
               E T5f;
 
947
               {
 
948
                    E T1, T2, T2h, T2i;
 
949
                    T1 = rio[0];
 
950
                    T2 = iio[-WS(ios, 16)];
 
951
                    T3 = T1 + T2;
 
952
                    T5l = T1 - T2;
 
953
                    T2h = iio[0];
 
954
                    T2i = rio[WS(ios, 16)];
 
955
                    T2j = T2h - T2i;
 
956
                    T5t = T2h + T2i;
 
957
               }
 
958
               {
 
959
                    E T4, T5, T2k, T2l;
 
960
                    T4 = rio[WS(ios, 8)];
 
961
                    T5 = iio[-WS(ios, 24)];
 
962
                    T6 = T4 + T5;
 
963
                    T5s = T4 - T5;
 
964
                    T2k = iio[-WS(ios, 8)];
 
965
                    T2l = rio[WS(ios, 24)];
 
966
                    T2m = T2k - T2l;
 
967
                    T5m = T2k + T2l;
 
968
               }
 
969
               {
 
970
                    E T8, T9, T13, T14;
 
971
                    T8 = rio[WS(ios, 4)];
 
972
                    T9 = iio[-WS(ios, 20)];
 
973
                    Ta = T8 + T9;
 
974
                    T5i = T8 - T9;
 
975
                    T13 = iio[-WS(ios, 4)];
 
976
                    T14 = rio[WS(ios, 20)];
 
977
                    T15 = T13 - T14;
 
978
                    T5h = T13 + T14;
 
979
               }
 
980
               {
 
981
                    E Tb, Tc, T16, T17;
 
982
                    Tb = iio[-WS(ios, 28)];
 
983
                    Tc = rio[WS(ios, 12)];
 
984
                    Td = Tb + Tc;
 
985
                    T5e = Tb - Tc;
 
986
                    T16 = iio[-WS(ios, 12)];
 
987
                    T17 = rio[WS(ios, 28)];
 
988
                    T18 = T16 - T17;
 
989
                    T5f = T17 + T16;
 
990
               }
 
991
               {
 
992
                    E T7, Te, T12, T19;
 
993
                    T5n = T5l - T5m;
 
994
                    T6y = T5t - T5s;
 
995
                    T77 = T5l + T5m;
 
996
                    T5u = T5s + T5t;
 
997
                    T7 = T3 + T6;
 
998
                    Te = Ta + Td;
 
999
                    Tf = T7 + Te;
 
1000
                    T3i = T7 - Te;
 
1001
                    {
 
1002
                         E T5v, T5w, T3E, T3F;
 
1003
                         T5v = T5i + T5h;
 
1004
                         T5w = T5e + T5f;
 
1005
                         T5x = KP707106781 * (T5v - T5w);
 
1006
                         T76 = KP707106781 * (T5v + T5w);
 
1007
                         T3E = T2j + T2m;
 
1008
                         T3F = T15 + T18;
 
1009
                         T3G = T3E - T3F;
 
1010
                         T47 = T3F + T3E;
 
1011
                    }
 
1012
                    T12 = T3 - T6;
 
1013
                    T19 = T15 - T18;
 
1014
                    T1a = T12 + T19;
 
1015
                    T2I = T12 - T19;
 
1016
                    {
 
1017
                         E T5g, T5j, T2g, T2n;
 
1018
                         T5g = T5e - T5f;
 
1019
                         T5j = T5h - T5i;
 
1020
                         T5k = KP707106781 * (T5g - T5j);
 
1021
                         T6z = KP707106781 * (T5j + T5g);
 
1022
                         T2g = Td - Ta;
 
1023
                         T2n = T2j - T2m;
 
1024
                         T2o = T2g + T2n;
 
1025
                         T2Y = T2n - T2g;
 
1026
                    }
 
1027
               }
 
1028
          }
 
1029
          {
 
1030
               E Ti, T4Z, T1e, T53, Tl, T52, T1h, T50, Tp, T56, T1n, T5a, Ts, T59, T1q;
 
1031
               E T57;
 
1032
               {
 
1033
                    E Tg, Th, T1c, T1d;
 
1034
                    Tg = rio[WS(ios, 2)];
 
1035
                    Th = iio[-WS(ios, 18)];
 
1036
                    Ti = Tg + Th;
 
1037
                    T4Z = Tg - Th;
 
1038
                    T1c = iio[-WS(ios, 2)];
 
1039
                    T1d = rio[WS(ios, 18)];
 
1040
                    T1e = T1c - T1d;
 
1041
                    T53 = T1c + T1d;
 
1042
               }
 
1043
               {
 
1044
                    E Tj, Tk, T1f, T1g;
 
1045
                    Tj = rio[WS(ios, 10)];
 
1046
                    Tk = iio[-WS(ios, 26)];
 
1047
                    Tl = Tj + Tk;
 
1048
                    T52 = Tj - Tk;
 
1049
                    T1f = iio[-WS(ios, 10)];
 
1050
                    T1g = rio[WS(ios, 26)];
 
1051
                    T1h = T1f - T1g;
 
1052
                    T50 = T1f + T1g;
 
1053
               }
 
1054
               {
 
1055
                    E Tn, To, T1l, T1m;
 
1056
                    Tn = iio[-WS(ios, 30)];
 
1057
                    To = rio[WS(ios, 14)];
 
1058
                    Tp = Tn + To;
 
1059
                    T56 = Tn - To;
 
1060
                    T1l = iio[-WS(ios, 14)];
 
1061
                    T1m = rio[WS(ios, 30)];
 
1062
                    T1n = T1l - T1m;
 
1063
                    T5a = T1m + T1l;
 
1064
               }
 
1065
               {
 
1066
                    E Tq, Tr, T1o, T1p;
 
1067
                    Tq = rio[WS(ios, 6)];
 
1068
                    Tr = iio[-WS(ios, 22)];
 
1069
                    Ts = Tq + Tr;
 
1070
                    T59 = Tq - Tr;
 
1071
                    T1o = iio[-WS(ios, 6)];
 
1072
                    T1p = rio[WS(ios, 22)];
 
1073
                    T1q = T1o - T1p;
 
1074
                    T57 = T1o + T1p;
 
1075
               }
 
1076
               {
 
1077
                    E Tm, Tt, T6B, T6C;
 
1078
                    Tm = Ti + Tl;
 
1079
                    Tt = Tp + Ts;
 
1080
                    Tu = Tm + Tt;
 
1081
                    T3D = Tt - Tm;
 
1082
                    T6B = T53 - T52;
 
1083
                    T6C = T4Z + T50;
 
1084
                    T6D = FNMS(KP382683432, T6C, KP923879532 * T6B);
 
1085
                    T73 = FMA(KP382683432, T6B, KP923879532 * T6C);
 
1086
               }
 
1087
               {
 
1088
                    E T6E, T6F, T1b, T1i;
 
1089
                    T6E = T56 + T57;
 
1090
                    T6F = T59 + T5a;
 
1091
                    T6G = FNMS(KP923879532, T6F, KP382683432 * T6E);
 
1092
                    T74 = FMA(KP923879532, T6E, KP382683432 * T6F);
 
1093
                    T1b = Ti - Tl;
 
1094
                    T1i = T1e - T1h;
 
1095
                    T1j = T1b + T1i;
 
1096
                    T2d = T1i - T1b;
 
1097
               }
 
1098
               {
 
1099
                    E T1k, T1r, T51, T54;
 
1100
                    T1k = Tp - Ts;
 
1101
                    T1r = T1n - T1q;
 
1102
                    T1s = T1k - T1r;
 
1103
                    T2e = T1k + T1r;
 
1104
                    T51 = T4Z - T50;
 
1105
                    T54 = T52 + T53;
 
1106
                    T55 = FNMS(KP382683432, T54, KP923879532 * T51);
 
1107
                    T5z = FMA(KP923879532, T54, KP382683432 * T51);
 
1108
               }
 
1109
               {
 
1110
                    E T58, T5b, T3j, T3k;
 
1111
                    T58 = T56 - T57;
 
1112
                    T5b = T59 - T5a;
 
1113
                    T5c = FMA(KP923879532, T58, KP382683432 * T5b);
 
1114
                    T5A = FNMS(KP382683432, T58, KP923879532 * T5b);
 
1115
                    T3j = T1e + T1h;
 
1116
                    T3k = T1q + T1n;
 
1117
                    T3l = T3j - T3k;
 
1118
                    T48 = T3j + T3k;
 
1119
               }
 
1120
          }
 
1121
          {
 
1122
               E Ty, T4t, T1H, T4y, TB, T4x, T1K, T4u, TI, T4B, T1B, T4o, TF, T4A, T1y;
 
1123
               E T4r;
 
1124
               {
 
1125
                    E Tw, Tx, T1I, T1J;
 
1126
                    Tw = rio[WS(ios, 1)];
 
1127
                    Tx = iio[-WS(ios, 17)];
 
1128
                    Ty = Tw + Tx;
 
1129
                    T4t = Tw - Tx;
 
1130
                    {
 
1131
                         E T1F, T1G, Tz, TA;
 
1132
                         T1F = iio[-WS(ios, 1)];
 
1133
                         T1G = rio[WS(ios, 17)];
 
1134
                         T1H = T1F - T1G;
 
1135
                         T4y = T1F + T1G;
 
1136
                         Tz = rio[WS(ios, 9)];
 
1137
                         TA = iio[-WS(ios, 25)];
 
1138
                         TB = Tz + TA;
 
1139
                         T4x = Tz - TA;
 
1140
                    }
 
1141
                    T1I = iio[-WS(ios, 9)];
 
1142
                    T1J = rio[WS(ios, 25)];
 
1143
                    T1K = T1I - T1J;
 
1144
                    T4u = T1I + T1J;
 
1145
                    {
 
1146
                         E TG, TH, T4m, T1z, T1A, T4n;
 
1147
                         TG = iio[-WS(ios, 29)];
 
1148
                         TH = rio[WS(ios, 13)];
 
1149
                         T4m = TG - TH;
 
1150
                         T1z = iio[-WS(ios, 13)];
 
1151
                         T1A = rio[WS(ios, 29)];
 
1152
                         T4n = T1A + T1z;
 
1153
                         TI = TG + TH;
 
1154
                         T4B = T4m + T4n;
 
1155
                         T1B = T1z - T1A;
 
1156
                         T4o = T4m - T4n;
 
1157
                    }
 
1158
                    {
 
1159
                         E TD, TE, T4q, T1w, T1x, T4p;
 
1160
                         TD = rio[WS(ios, 5)];
 
1161
                         TE = iio[-WS(ios, 21)];
 
1162
                         T4q = TD - TE;
 
1163
                         T1w = iio[-WS(ios, 5)];
 
1164
                         T1x = rio[WS(ios, 21)];
 
1165
                         T4p = T1w + T1x;
 
1166
                         TF = TD + TE;
 
1167
                         T4A = T4q + T4p;
 
1168
                         T1y = T1w - T1x;
 
1169
                         T4r = T4p - T4q;
 
1170
                    }
 
1171
               }
 
1172
               {
 
1173
                    E TC, TJ, T6J, T6K;
 
1174
                    TC = Ty + TB;
 
1175
                    TJ = TF + TI;
 
1176
                    TK = TC + TJ;
 
1177
                    T3n = TC - TJ;
 
1178
                    T6J = T4y - T4x;
 
1179
                    T6K = KP707106781 * (T4r + T4o);
 
1180
                    T6L = T6J + T6K;
 
1181
                    T7t = T6J - T6K;
 
1182
               }
 
1183
               {
 
1184
                    E T6M, T6N, T1v, T1C;
 
1185
                    T6M = KP707106781 * (T4A + T4B);
 
1186
                    T6N = T4t + T4u;
 
1187
                    T6O = T6M + T6N;
 
1188
                    T7s = T6N - T6M;
 
1189
                    T1v = Ty - TB;
 
1190
                    T1C = T1y - T1B;
 
1191
                    T1D = T1v + T1C;
 
1192
                    T2L = T1v - T1C;
 
1193
               }
 
1194
               {
 
1195
                    E T1E, T1L, T4s, T4v;
 
1196
                    T1E = TI - TF;
 
1197
                    T1L = T1H - T1K;
 
1198
                    T1M = T1E + T1L;
 
1199
                    T2M = T1L - T1E;
 
1200
                    T4s = KP707106781 * (T4o - T4r);
 
1201
                    T4v = T4t - T4u;
 
1202
                    T4w = T4s + T4v;
 
1203
                    T62 = T4v - T4s;
 
1204
               }
 
1205
               {
 
1206
                    E T4z, T4C, T3o, T3p;
 
1207
                    T4z = T4x + T4y;
 
1208
                    T4C = KP707106781 * (T4A - T4B);
 
1209
                    T4D = T4z + T4C;
 
1210
                    T61 = T4z - T4C;
 
1211
                    T3o = T1H + T1K;
 
1212
                    T3p = T1y + T1B;
 
1213
                    T3q = T3o - T3p;
 
1214
                    T41 = T3p + T3o;
 
1215
               }
 
1216
          }
 
1217
          {
 
1218
               E TN, T4T, T20, T4N, TQ, T4M, T23, T4U, TX, T4Q, T1U, T4K, TU, T4R, T1R;
 
1219
               E T4H;
 
1220
               {
 
1221
                    E TL, TM, T21, T22;
 
1222
                    TL = iio[-WS(ios, 31)];
 
1223
                    TM = rio[WS(ios, 15)];
 
1224
                    TN = TL + TM;
 
1225
                    T4T = TL - TM;
 
1226
                    {
 
1227
                         E T1Y, T1Z, TO, TP;
 
1228
                         T1Y = iio[-WS(ios, 15)];
 
1229
                         T1Z = rio[WS(ios, 31)];
 
1230
                         T20 = T1Y - T1Z;
 
1231
                         T4N = T1Z + T1Y;
 
1232
                         TO = rio[WS(ios, 7)];
 
1233
                         TP = iio[-WS(ios, 23)];
 
1234
                         TQ = TO + TP;
 
1235
                         T4M = TO - TP;
 
1236
                    }
 
1237
                    T21 = iio[-WS(ios, 7)];
 
1238
                    T22 = rio[WS(ios, 23)];
 
1239
                    T23 = T21 - T22;
 
1240
                    T4U = T21 + T22;
 
1241
                    {
 
1242
                         E TV, TW, T4I, T1S, T1T, T4J;
 
1243
                         TV = iio[-WS(ios, 27)];
 
1244
                         TW = rio[WS(ios, 11)];
 
1245
                         T4I = TV - TW;
 
1246
                         T1S = iio[-WS(ios, 11)];
 
1247
                         T1T = rio[WS(ios, 27)];
 
1248
                         T4J = T1T + T1S;
 
1249
                         TX = TV + TW;
 
1250
                         T4Q = T4I - T4J;
 
1251
                         T1U = T1S - T1T;
 
1252
                         T4K = T4I + T4J;
 
1253
                    }
 
1254
                    {
 
1255
                         E TS, TT, T4F, T1P, T1Q, T4G;
 
1256
                         TS = rio[WS(ios, 3)];
 
1257
                         TT = iio[-WS(ios, 19)];
 
1258
                         T4F = TS - TT;
 
1259
                         T1P = iio[-WS(ios, 3)];
 
1260
                         T1Q = rio[WS(ios, 19)];
 
1261
                         T4G = T1P + T1Q;
 
1262
                         TU = TS + TT;
 
1263
                         T4R = T4G - T4F;
 
1264
                         T1R = T1P - T1Q;
 
1265
                         T4H = T4F + T4G;
 
1266
                    }
 
1267
               }
 
1268
               {
 
1269
                    E TR, TY, T6Q, T6R;
 
1270
                    TR = TN + TQ;
 
1271
                    TY = TU + TX;
 
1272
                    TZ = TR + TY;
 
1273
                    T3s = TR - TY;
 
1274
                    T6Q = KP707106781 * (T4R + T4Q);
 
1275
                    T6R = T4M + T4N;
 
1276
                    T6S = T6Q - T6R;
 
1277
                    T7w = T6Q + T6R;
 
1278
               }
 
1279
               {
 
1280
                    E T6T, T6U, T1O, T1V;
 
1281
                    T6T = KP707106781 * (T4H + T4K);
 
1282
                    T6U = T4T + T4U;
 
1283
                    T6V = T6T + T6U;
 
1284
                    T7v = T6U - T6T;
 
1285
                    T1O = TN - TQ;
 
1286
                    T1V = T1R - T1U;
 
1287
                    T1W = T1O + T1V;
 
1288
                    T2O = T1O - T1V;
 
1289
               }
 
1290
               {
 
1291
                    E T1X, T24, T4L, T4O;
 
1292
                    T1X = TX - TU;
 
1293
                    T24 = T20 - T23;
 
1294
                    T25 = T1X + T24;
 
1295
                    T2P = T24 - T1X;
 
1296
                    T4L = KP707106781 * (T4H - T4K);
 
1297
                    T4O = T4M - T4N;
 
1298
                    T4P = T4L + T4O;
 
1299
                    T64 = T4O - T4L;
 
1300
               }
 
1301
               {
 
1302
                    E T4S, T4V, T3t, T3u;
 
1303
                    T4S = KP707106781 * (T4Q - T4R);
 
1304
                    T4V = T4T - T4U;
 
1305
                    T4W = T4S + T4V;
 
1306
                    T65 = T4V - T4S;
 
1307
                    T3t = T20 + T23;
 
1308
                    T3u = T1R + T1U;
 
1309
                    T3v = T3t - T3u;
 
1310
                    T42 = T3u + T3t;
 
1311
               }
 
1312
          }
 
1313
          {
 
1314
               E Tv, T10, T4g, T4i, T4j, T4k, T4f, T4h;
 
1315
               Tv = Tf + Tu;
 
1316
               T10 = TK + TZ;
 
1317
               T4g = Tv - T10;
 
1318
               T4i = T48 + T47;
 
1319
               T4j = T41 + T42;
 
1320
               T4k = T4i - T4j;
 
1321
               rio[0] = Tv + T10;
 
1322
               iio[-WS(ios, 31)] = T4j + T4i;
 
1323
               T4f = W[30];
 
1324
               T4h = W[31];
 
1325
               rio[WS(ios, 16)] = FNMS(T4h, T4k, T4f * T4g);
 
1326
               iio[-WS(ios, 15)] = FMA(T4h, T4g, T4f * T4k);
 
1327
          }
 
1328
          {
 
1329
               E T44, T4c, T4a, T4e;
 
1330
               {
 
1331
                    E T40, T43, T46, T49;
 
1332
                    T40 = Tf - Tu;
 
1333
                    T43 = T41 - T42;
 
1334
                    T44 = T40 + T43;
 
1335
                    T4c = T40 - T43;
 
1336
                    T46 = TZ - TK;
 
1337
                    T49 = T47 - T48;
 
1338
                    T4a = T46 + T49;
 
1339
                    T4e = T49 - T46;
 
1340
               }
 
1341
               {
 
1342
                    E T3Z, T45, T4b, T4d;
 
1343
                    T3Z = W[46];
 
1344
                    T45 = W[47];
 
1345
                    rio[WS(ios, 24)] = FNMS(T45, T4a, T3Z * T44);
 
1346
                    iio[-WS(ios, 7)] = FMA(T45, T44, T3Z * T4a);
 
1347
                    T4b = W[14];
 
1348
                    T4d = W[15];
 
1349
                    rio[WS(ios, 8)] = FNMS(T4d, T4e, T4b * T4c);
 
1350
                    iio[-WS(ios, 23)] = FMA(T4d, T4c, T4b * T4e);
 
1351
               }
 
1352
          }
 
1353
          {
 
1354
               E T3m, T3H, T3T, T3O, T3C, T3P, T3x, T3S;
 
1355
               T3m = T3i + T3l;
 
1356
               T3H = T3D + T3G;
 
1357
               T3T = T3G - T3D;
 
1358
               T3O = T3i - T3l;
 
1359
               {
 
1360
                    E T3A, T3B, T3r, T3w;
 
1361
                    T3A = T3q - T3n;
 
1362
                    T3B = T3s + T3v;
 
1363
                    T3C = KP707106781 * (T3A + T3B);
 
1364
                    T3P = KP707106781 * (T3B - T3A);
 
1365
                    T3r = T3n + T3q;
 
1366
                    T3w = T3s - T3v;
 
1367
                    T3x = KP707106781 * (T3r + T3w);
 
1368
                    T3S = KP707106781 * (T3r - T3w);
 
1369
               }
 
1370
               {
 
1371
                    E T3y, T3I, T3h, T3z;
 
1372
                    T3y = T3m + T3x;
 
1373
                    T3I = T3C + T3H;
 
1374
                    T3h = W[54];
 
1375
                    T3z = W[55];
 
1376
                    rio[WS(ios, 28)] = FNMS(T3z, T3I, T3h * T3y);
 
1377
                    iio[-WS(ios, 3)] = FMA(T3z, T3y, T3h * T3I);
 
1378
               }
 
1379
               {
 
1380
                    E T3W, T3Y, T3V, T3X;
 
1381
                    T3W = T3O - T3P;
 
1382
                    T3Y = T3T - T3S;
 
1383
                    T3V = W[38];
 
1384
                    T3X = W[39];
 
1385
                    rio[WS(ios, 20)] = FNMS(T3X, T3Y, T3V * T3W);
 
1386
                    iio[-WS(ios, 11)] = FMA(T3X, T3W, T3V * T3Y);
 
1387
               }
 
1388
               {
 
1389
                    E T3K, T3M, T3J, T3L;
 
1390
                    T3K = T3m - T3x;
 
1391
                    T3M = T3H - T3C;
 
1392
                    T3J = W[22];
 
1393
                    T3L = W[23];
 
1394
                    rio[WS(ios, 12)] = FNMS(T3L, T3M, T3J * T3K);
 
1395
                    iio[-WS(ios, 19)] = FMA(T3L, T3K, T3J * T3M);
 
1396
               }
 
1397
               {
 
1398
                    E T3Q, T3U, T3N, T3R;
 
1399
                    T3Q = T3O + T3P;
 
1400
                    T3U = T3S + T3T;
 
1401
                    T3N = W[6];
 
1402
                    T3R = W[7];
 
1403
                    rio[WS(ios, 4)] = FNMS(T3R, T3U, T3N * T3Q);
 
1404
                    iio[-WS(ios, 27)] = FMA(T3R, T3Q, T3N * T3U);
 
1405
               }
 
1406
          }
 
1407
          {
 
1408
               E T2K, T36, T2Z, T3b, T2R, T3a, T2W, T37, T2J, T2X;
 
1409
               T2J = KP707106781 * (T2e - T2d);
 
1410
               T2K = T2I + T2J;
 
1411
               T36 = T2I - T2J;
 
1412
               T2X = KP707106781 * (T1j - T1s);
 
1413
               T2Z = T2X + T2Y;
 
1414
               T3b = T2Y - T2X;
 
1415
               {
 
1416
                    E T2N, T2Q, T2U, T2V;
 
1417
                    T2N = FNMS(KP382683432, T2M, KP923879532 * T2L);
 
1418
                    T2Q = FMA(KP923879532, T2O, KP382683432 * T2P);
 
1419
                    T2R = T2N + T2Q;
 
1420
                    T3a = T2Q - T2N;
 
1421
                    T2U = FMA(KP382683432, T2L, KP923879532 * T2M);
 
1422
                    T2V = FNMS(KP382683432, T2O, KP923879532 * T2P);
 
1423
                    T2W = T2U + T2V;
 
1424
                    T37 = T2U - T2V;
 
1425
               }
 
1426
               {
 
1427
                    E T2S, T30, T2H, T2T;
 
1428
                    T2S = T2K + T2R;
 
1429
                    T30 = T2W + T2Z;
 
1430
                    T2H = W[2];
 
1431
                    T2T = W[3];
 
1432
                    rio[WS(ios, 2)] = FNMS(T2T, T30, T2H * T2S);
 
1433
                    iio[-WS(ios, 29)] = FMA(T2T, T2S, T2H * T30);
 
1434
               }
 
1435
               {
 
1436
                    E T3e, T3g, T3d, T3f;
 
1437
                    T3e = T36 - T37;
 
1438
                    T3g = T3b - T3a;
 
1439
                    T3d = W[18];
 
1440
                    T3f = W[19];
 
1441
                    rio[WS(ios, 10)] = FNMS(T3f, T3g, T3d * T3e);
 
1442
                    iio[-WS(ios, 21)] = FMA(T3f, T3e, T3d * T3g);
 
1443
               }
 
1444
               {
 
1445
                    E T32, T34, T31, T33;
 
1446
                    T32 = T2K - T2R;
 
1447
                    T34 = T2Z - T2W;
 
1448
                    T31 = W[34];
 
1449
                    T33 = W[35];
 
1450
                    rio[WS(ios, 18)] = FNMS(T33, T34, T31 * T32);
 
1451
                    iio[-WS(ios, 13)] = FMA(T33, T32, T31 * T34);
 
1452
               }
 
1453
               {
 
1454
                    E T38, T3c, T35, T39;
 
1455
                    T38 = T36 + T37;
 
1456
                    T3c = T3a + T3b;
 
1457
                    T35 = W[50];
 
1458
                    T39 = W[51];
 
1459
                    rio[WS(ios, 26)] = FNMS(T39, T3c, T35 * T38);
 
1460
                    iio[-WS(ios, 5)] = FMA(T39, T38, T35 * T3c);
 
1461
               }
 
1462
          }
 
1463
          {
 
1464
               E T1u, T2w, T2p, T2B, T27, T2A, T2c, T2x, T1t, T2f;
 
1465
               T1t = KP707106781 * (T1j + T1s);
 
1466
               T1u = T1a + T1t;
 
1467
               T2w = T1a - T1t;
 
1468
               T2f = KP707106781 * (T2d + T2e);
 
1469
               T2p = T2f + T2o;
 
1470
               T2B = T2o - T2f;
 
1471
               {
 
1472
                    E T1N, T26, T2a, T2b;
 
1473
                    T1N = FMA(KP923879532, T1D, KP382683432 * T1M);
 
1474
                    T26 = FNMS(KP382683432, T25, KP923879532 * T1W);
 
1475
                    T27 = T1N + T26;
 
1476
                    T2A = T1N - T26;
 
1477
                    T2a = FNMS(KP382683432, T1D, KP923879532 * T1M);
 
1478
                    T2b = FMA(KP382683432, T1W, KP923879532 * T25);
 
1479
                    T2c = T2a + T2b;
 
1480
                    T2x = T2b - T2a;
 
1481
               }
 
1482
               {
 
1483
                    E T28, T2q, T11, T29;
 
1484
                    T28 = T1u + T27;
 
1485
                    T2q = T2c + T2p;
 
1486
                    T11 = W[58];
 
1487
                    T29 = W[59];
 
1488
                    rio[WS(ios, 30)] = FNMS(T29, T2q, T11 * T28);
 
1489
                    iio[-WS(ios, 1)] = FMA(T29, T28, T11 * T2q);
 
1490
               }
 
1491
               {
 
1492
                    E T2E, T2G, T2D, T2F;
 
1493
                    T2E = T2w - T2x;
 
1494
                    T2G = T2B - T2A;
 
1495
                    T2D = W[42];
 
1496
                    T2F = W[43];
 
1497
                    rio[WS(ios, 22)] = FNMS(T2F, T2G, T2D * T2E);
 
1498
                    iio[-WS(ios, 9)] = FMA(T2F, T2E, T2D * T2G);
 
1499
               }
 
1500
               {
 
1501
                    E T2s, T2u, T2r, T2t;
 
1502
                    T2s = T1u - T27;
 
1503
                    T2u = T2p - T2c;
 
1504
                    T2r = W[26];
 
1505
                    T2t = W[27];
 
1506
                    rio[WS(ios, 14)] = FNMS(T2t, T2u, T2r * T2s);
 
1507
                    iio[-WS(ios, 17)] = FMA(T2t, T2s, T2r * T2u);
 
1508
               }
 
1509
               {
 
1510
                    E T2y, T2C, T2v, T2z;
 
1511
                    T2y = T2w + T2x;
 
1512
                    T2C = T2A + T2B;
 
1513
                    T2v = W[10];
 
1514
                    T2z = W[11];
 
1515
                    rio[WS(ios, 6)] = FNMS(T2z, T2C, T2v * T2y);
 
1516
                    iio[-WS(ios, 25)] = FMA(T2z, T2y, T2v * T2C);
 
1517
               }
 
1518
          }
 
1519
          {
 
1520
               E T4Y, T5N, T5F, T5Q, T5p, T5R, T5C, T5M;
 
1521
               {
 
1522
                    E T4E, T4X, T5D, T5E;
 
1523
                    T4E = FNMS(KP195090322, T4D, KP980785280 * T4w);
 
1524
                    T4X = FMA(KP195090322, T4P, KP980785280 * T4W);
 
1525
                    T4Y = T4E + T4X;
 
1526
                    T5N = T4X - T4E;
 
1527
                    T5D = FMA(KP980785280, T4D, KP195090322 * T4w);
 
1528
                    T5E = FNMS(KP195090322, T4W, KP980785280 * T4P);
 
1529
                    T5F = T5D + T5E;
 
1530
                    T5Q = T5D - T5E;
 
1531
               }
 
1532
               {
 
1533
                    E T5d, T5o, T5y, T5B;
 
1534
                    T5d = T55 + T5c;
 
1535
                    T5o = T5k + T5n;
 
1536
                    T5p = T5d + T5o;
 
1537
                    T5R = T5o - T5d;
 
1538
                    T5y = T5u + T5x;
 
1539
                    T5B = T5z + T5A;
 
1540
                    T5C = T5y + T5B;
 
1541
                    T5M = T5y - T5B;
 
1542
               }
 
1543
               {
 
1544
                    E T5q, T5G, T4l, T5r;
 
1545
                    T5q = T4Y + T5p;
 
1546
                    T5G = T5C + T5F;
 
1547
                    T4l = W[0];
 
1548
                    T5r = W[1];
 
1549
                    rio[WS(ios, 1)] = FNMS(T5r, T5G, T4l * T5q);
 
1550
                    iio[-WS(ios, 30)] = FMA(T4l, T5G, T5r * T5q);
 
1551
               }
 
1552
               {
 
1553
                    E T5U, T5W, T5T, T5V;
 
1554
                    T5U = T5R - T5Q;
 
1555
                    T5W = T5M - T5N;
 
1556
                    T5T = W[16];
 
1557
                    T5V = W[17];
 
1558
                    rio[WS(ios, 9)] = FNMS(T5V, T5W, T5T * T5U);
 
1559
                    iio[-WS(ios, 22)] = FMA(T5T, T5W, T5V * T5U);
 
1560
               }
 
1561
               {
 
1562
                    E T5I, T5K, T5H, T5J;
 
1563
                    T5I = T5C - T5F;
 
1564
                    T5K = T5p - T4Y;
 
1565
                    T5H = W[32];
 
1566
                    T5J = W[33];
 
1567
                    iio[-WS(ios, 14)] = FMA(T5H, T5I, T5J * T5K);
 
1568
                    rio[WS(ios, 17)] = FNMS(T5J, T5I, T5H * T5K);
 
1569
               }
 
1570
               {
 
1571
                    E T5O, T5S, T5L, T5P;
 
1572
                    T5O = T5M + T5N;
 
1573
                    T5S = T5Q + T5R;
 
1574
                    T5L = W[48];
 
1575
                    T5P = W[49];
 
1576
                    iio[-WS(ios, 6)] = FMA(T5L, T5O, T5P * T5S);
 
1577
                    rio[WS(ios, 25)] = FNMS(T5P, T5O, T5L * T5S);
 
1578
               }
 
1579
          }
 
1580
          {
 
1581
               E T60, T6q, T6f, T6n, T67, T6m, T6c, T6r;
 
1582
               {
 
1583
                    E T5Y, T5Z, T6d, T6e;
 
1584
                    T5Y = T5u - T5x;
 
1585
                    T5Z = T5c - T55;
 
1586
                    T60 = T5Y + T5Z;
 
1587
                    T6q = T5Y - T5Z;
 
1588
                    T6d = T5z - T5A;
 
1589
                    T6e = T5n - T5k;
 
1590
                    T6f = T6d + T6e;
 
1591
                    T6n = T6e - T6d;
 
1592
               }
 
1593
               {
 
1594
                    E T63, T66, T6a, T6b;
 
1595
                    T63 = FNMS(KP555570233, T62, KP831469612 * T61);
 
1596
                    T66 = FMA(KP831469612, T64, KP555570233 * T65);
 
1597
                    T67 = T63 + T66;
 
1598
                    T6m = T66 - T63;
 
1599
                    T6a = FMA(KP555570233, T61, KP831469612 * T62);
 
1600
                    T6b = FNMS(KP555570233, T64, KP831469612 * T65);
 
1601
                    T6c = T6a + T6b;
 
1602
                    T6r = T6a - T6b;
 
1603
               }
 
1604
               {
 
1605
                    E T68, T6g, T5X, T69;
 
1606
                    T68 = T60 + T67;
 
1607
                    T6g = T6c + T6f;
 
1608
                    T5X = W[56];
 
1609
                    T69 = W[57];
 
1610
                    iio[-WS(ios, 2)] = FMA(T5X, T68, T69 * T6g);
 
1611
                    rio[WS(ios, 29)] = FNMS(T69, T68, T5X * T6g);
 
1612
               }
 
1613
               {
 
1614
                    E T6u, T6w, T6t, T6v;
 
1615
                    T6u = T6q - T6r;
 
1616
                    T6w = T6n - T6m;
 
1617
                    T6t = W[40];
 
1618
                    T6v = W[41];
 
1619
                    iio[-WS(ios, 10)] = FMA(T6t, T6u, T6v * T6w);
 
1620
                    rio[WS(ios, 21)] = FNMS(T6v, T6u, T6t * T6w);
 
1621
               }
 
1622
               {
 
1623
                    E T6i, T6k, T6h, T6j;
 
1624
                    T6i = T6f - T6c;
 
1625
                    T6k = T60 - T67;
 
1626
                    T6h = W[24];
 
1627
                    T6j = W[25];
 
1628
                    rio[WS(ios, 13)] = FNMS(T6j, T6k, T6h * T6i);
 
1629
                    iio[-WS(ios, 18)] = FMA(T6h, T6k, T6j * T6i);
 
1630
               }
 
1631
               {
 
1632
                    E T6o, T6s, T6l, T6p;
 
1633
                    T6o = T6m + T6n;
 
1634
                    T6s = T6q + T6r;
 
1635
                    T6l = W[8];
 
1636
                    T6p = W[9];
 
1637
                    rio[WS(ios, 5)] = FNMS(T6p, T6s, T6l * T6o);
 
1638
                    iio[-WS(ios, 26)] = FMA(T6l, T6s, T6p * T6o);
 
1639
               }
 
1640
          }
 
1641
          {
 
1642
               E T7y, T7R, T7J, T7U, T7B, T7V, T7G, T7Q;
 
1643
               {
 
1644
                    E T7u, T7x, T7H, T7I;
 
1645
                    T7u = FNMS(KP555570233, T7t, KP831469612 * T7s);
 
1646
                    T7x = FNMS(KP555570233, T7w, KP831469612 * T7v);
 
1647
                    T7y = T7u + T7x;
 
1648
                    T7R = T7x - T7u;
 
1649
                    T7H = FMA(KP831469612, T7t, KP555570233 * T7s);
 
1650
                    T7I = FMA(KP831469612, T7w, KP555570233 * T7v);
 
1651
                    T7J = T7H - T7I;
 
1652
                    T7U = T7H + T7I;
 
1653
               }
 
1654
               {
 
1655
                    E T7z, T7A, T7E, T7F;
 
1656
                    T7z = T6G - T6D;
 
1657
                    T7A = T77 - T76;
 
1658
                    T7B = T7z + T7A;
 
1659
                    T7V = T7A - T7z;
 
1660
                    T7E = T6y - T6z;
 
1661
                    T7F = T73 - T74;
 
1662
                    T7G = T7E + T7F;
 
1663
                    T7Q = T7E - T7F;
 
1664
               }
 
1665
               {
 
1666
                    E T7C, T7K, T7r, T7D;
 
1667
                    T7C = T7y + T7B;
 
1668
                    T7K = T7G + T7J;
 
1669
                    T7r = W[4];
 
1670
                    T7D = W[5];
 
1671
                    rio[WS(ios, 3)] = FNMS(T7D, T7K, T7r * T7C);
 
1672
                    iio[-WS(ios, 28)] = FMA(T7r, T7K, T7D * T7C);
 
1673
               }
 
1674
               {
 
1675
                    E T7Y, T80, T7X, T7Z;
 
1676
                    T7Y = T7V - T7U;
 
1677
                    T80 = T7Q - T7R;
 
1678
                    T7X = W[20];
 
1679
                    T7Z = W[21];
 
1680
                    rio[WS(ios, 11)] = FNMS(T7Z, T80, T7X * T7Y);
 
1681
                    iio[-WS(ios, 20)] = FMA(T7X, T80, T7Z * T7Y);
 
1682
               }
 
1683
               {
 
1684
                    E T7M, T7O, T7L, T7N;
 
1685
                    T7M = T7G - T7J;
 
1686
                    T7O = T7B - T7y;
 
1687
                    T7L = W[36];
 
1688
                    T7N = W[37];
 
1689
                    iio[-WS(ios, 12)] = FMA(T7L, T7M, T7N * T7O);
 
1690
                    rio[WS(ios, 19)] = FNMS(T7N, T7M, T7L * T7O);
 
1691
               }
 
1692
               {
 
1693
                    E T7S, T7W, T7P, T7T;
 
1694
                    T7S = T7Q + T7R;
 
1695
                    T7W = T7U + T7V;
 
1696
                    T7P = W[52];
 
1697
                    T7T = W[53];
 
1698
                    iio[-WS(ios, 4)] = FMA(T7P, T7S, T7T * T7W);
 
1699
                    rio[WS(ios, 27)] = FNMS(T7T, T7S, T7P * T7W);
 
1700
               }
 
1701
          }
 
1702
          {
 
1703
               E T6I, T7k, T79, T7h, T6X, T7g, T72, T7l;
 
1704
               {
 
1705
                    E T6A, T6H, T75, T78;
 
1706
                    T6A = T6y + T6z;
 
1707
                    T6H = T6D + T6G;
 
1708
                    T6I = T6A + T6H;
 
1709
                    T7k = T6A - T6H;
 
1710
                    T75 = T73 + T74;
 
1711
                    T78 = T76 + T77;
 
1712
                    T79 = T75 + T78;
 
1713
                    T7h = T78 - T75;
 
1714
               }
 
1715
               {
 
1716
                    E T6P, T6W, T70, T71;
 
1717
                    T6P = FNMS(KP195090322, T6O, KP980785280 * T6L);
 
1718
                    T6W = FMA(KP980785280, T6S, KP195090322 * T6V);
 
1719
                    T6X = T6P + T6W;
 
1720
                    T7g = T6W - T6P;
 
1721
                    T70 = FMA(KP195090322, T6L, KP980785280 * T6O);
 
1722
                    T71 = FNMS(KP195090322, T6S, KP980785280 * T6V);
 
1723
                    T72 = T70 + T71;
 
1724
                    T7l = T70 - T71;
 
1725
               }
 
1726
               {
 
1727
                    E T6Y, T7a, T6x, T6Z;
 
1728
                    T6Y = T6I + T6X;
 
1729
                    T7a = T72 + T79;
 
1730
                    T6x = W[60];
 
1731
                    T6Z = W[61];
 
1732
                    iio[0] = FMA(T6x, T6Y, T6Z * T7a);
 
1733
                    rio[WS(ios, 31)] = FNMS(T6Z, T6Y, T6x * T7a);
 
1734
               }
 
1735
               {
 
1736
                    E T7o, T7q, T7n, T7p;
 
1737
                    T7o = T7k - T7l;
 
1738
                    T7q = T7h - T7g;
 
1739
                    T7n = W[44];
 
1740
                    T7p = W[45];
 
1741
                    iio[-WS(ios, 8)] = FMA(T7n, T7o, T7p * T7q);
 
1742
                    rio[WS(ios, 23)] = FNMS(T7p, T7o, T7n * T7q);
 
1743
               }
 
1744
               {
 
1745
                    E T7c, T7e, T7b, T7d;
 
1746
                    T7c = T79 - T72;
 
1747
                    T7e = T6I - T6X;
 
1748
                    T7b = W[28];
 
1749
                    T7d = W[29];
 
1750
                    rio[WS(ios, 15)] = FNMS(T7d, T7e, T7b * T7c);
 
1751
                    iio[-WS(ios, 16)] = FMA(T7b, T7e, T7d * T7c);
 
1752
               }
 
1753
               {
 
1754
                    E T7i, T7m, T7f, T7j;
 
1755
                    T7i = T7g + T7h;
 
1756
                    T7m = T7k + T7l;
 
1757
                    T7f = W[12];
 
1758
                    T7j = W[13];
 
1759
                    rio[WS(ios, 7)] = FNMS(T7j, T7m, T7f * T7i);
 
1760
                    iio[-WS(ios, 24)] = FMA(T7f, T7m, T7j * T7i);
 
1761
               }
 
1762
          }
 
1763
     }
 
1764
     return W;
 
1765
}
 
1766
 
 
1767
static const tw_instr twinstr[] = {
 
1768
     {TW_FULL, 0, 32},
 
1769
     {TW_NEXT, 1, 0}
 
1770
};
 
1771
 
 
1772
static const hc2hc_desc desc = { 32, "hb_32", twinstr, &GENUS, {340, 114, 94, 0}, 0, 0, 0 };
 
1773
 
 
1774
void X(codelet_hb_32) (planner *p) {
 
1775
     X(khc2hc_register) (p, hb_32, &desc);
 
1776
}
 
1777
#endif                          /* HAVE_FMA */