~ubuntu-branches/ubuntu/utopic/fftw3/utopic

« back to all changes in this revision

Viewing changes to rdft/scalar/r2cf/hf_32.c

  • Committer: Package Import Robot
  • Author(s): Matthias Klose
  • Date: 2011-12-14 13:21:22 UTC
  • mfrom: (3.1.5 sid)
  • Revision ID: package-import@ubuntu.com-20111214132122-l4avyl2kkr7vq5aj
Tags: 3.3-1ubuntu1
* Merge with Debian; remaining changes:
  - Revert the ARM workaround.

Show diffs side-by-side

added added

removed removed

Lines of Context:
1
1
/*
2
 
 * Copyright (c) 2003, 2007-8 Matteo Frigo
3
 
 * Copyright (c) 2003, 2007-8 Massachusetts Institute of Technology
 
2
 * Copyright (c) 2003, 2007-11 Matteo Frigo
 
3
 * Copyright (c) 2003, 2007-11 Massachusetts Institute of Technology
4
4
 *
5
5
 * This program is free software; you can redistribute it and/or modify
6
6
 * it under the terms of the GNU General Public License as published by
19
19
 */
20
20
 
21
21
/* This file was automatically generated --- DO NOT EDIT */
22
 
/* Generated on Sun Jul 12 06:43:51 EDT 2009 */
 
22
/* Generated on Wed Jul 27 06:16:39 EDT 2011 */
23
23
 
24
24
#include "codelet-rdft.h"
25
25
 
26
26
#ifdef HAVE_FMA
27
27
 
28
 
/* Generated by: ../../../genfft/gen_hc2hc -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -n 32 -dit -name hf_32 -include hf.h */
 
28
/* Generated by: ../../../genfft/gen_hc2hc.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -n 32 -dit -name hf_32 -include hf.h */
29
29
 
30
30
/*
31
31
 * This function contains 434 FP additions, 260 FP multiplications,
43
43
     DK(KP198912367, +0.198912367379658006911597622644676228597850501);
44
44
     DK(KP414213562, +0.414213562373095048801688724209698078569671875);
45
45
     DK(KP707106781, +0.707106781186547524400844362104849039284835938);
46
 
     INT m;
47
 
     for (m = mb, W = W + ((mb - 1) * 62); m < me; m = m + 1, cr = cr + ms, ci = ci - ms, W = W + 62, MAKE_VOLATILE_STRIDE(rs)) {
48
 
          E T6D, T6A;
49
 
          {
50
 
               E T8y, T87, T8, T3w, T83, T3B, T8x, Tl, T6G, Tz, T3J, T5T, T6F, TM, T3Q;
51
 
               E T5U, T46, T5X, T7E, T6M, T5Y, T3Z, T6J, T1f, T7D, T6R, T61, T4e, T6O, T1G;
52
 
               E T60, T4l, T54, T6c, T7d, T7N, T32, T76, T6f, T5r, T4v, T65, T72, T7I, T29;
53
 
               E T6V, T68, T4S, T5t, T5b, T7O, T79, T7e, T3t, T5s, T5i, T4H, T2y, T4B, T6X;
54
 
               E T2m, T4w, T4F, T2s;
55
 
               {
56
 
                    E T44, T1d, T3X, T6K, T11, T40, T42, T17, T5h, T5c;
57
 
                    {
58
 
                         E Ta, Td, Tg, T3x, Tb, Tj, Tf, Tc, Ti;
59
 
                         {
60
 
                              E T1, T86, T3, T6, T2, T5;
61
 
                              T1 = cr[0];
62
 
                              T86 = ci[0];
63
 
                              T3 = cr[WS(rs, 16)];
64
 
                              T6 = ci[WS(rs, 16)];
65
 
                              T2 = W[30];
66
 
                              T5 = W[31];
67
 
                              {
68
 
                                   E T84, T4, T9, T85, T7;
69
 
                                   Ta = cr[WS(rs, 8)];
70
 
                                   Td = ci[WS(rs, 8)];
71
 
                                   T84 = T2 * T6;
72
 
                                   T4 = T2 * T3;
73
 
                                   T9 = W[14];
74
 
                                   Tg = cr[WS(rs, 24)];
75
 
                                   T85 = FNMS(T5, T3, T84);
76
 
                                   T7 = FMA(T5, T6, T4);
77
 
                                   T3x = T9 * Td;
78
 
                                   Tb = T9 * Ta;
79
 
                                   T8y = T86 - T85;
80
 
                                   T87 = T85 + T86;
81
 
                                   T8 = T1 + T7;
82
 
                                   T3w = T1 - T7;
83
 
                                   Tj = ci[WS(rs, 24)];
84
 
                                   Tf = W[46];
85
 
                              }
86
 
                              Tc = W[15];
87
 
                              Ti = W[47];
88
 
                         }
89
 
                         {
90
 
                              E Tu, Tx, T3F, Ts, Tw, T3G, Tv;
91
 
                              {
92
 
                                   E To, Tr, Tp, T3E, Tq, Tt;
93
 
                                   {
94
 
                                        E T3y, Te, T3A, Tk, T3z, Th, Tn;
95
 
                                        To = cr[WS(rs, 4)];
96
 
                                        T3z = Tf * Tj;
97
 
                                        Th = Tf * Tg;
98
 
                                        T3y = FNMS(Tc, Ta, T3x);
99
 
                                        Te = FMA(Tc, Td, Tb);
100
 
                                        T3A = FNMS(Ti, Tg, T3z);
101
 
                                        Tk = FMA(Ti, Tj, Th);
102
 
                                        Tr = ci[WS(rs, 4)];
103
 
                                        Tn = W[6];
104
 
                                        T83 = T3y + T3A;
105
 
                                        T3B = T3y - T3A;
106
 
                                        T8x = Te - Tk;
107
 
                                        Tl = Te + Tk;
108
 
                                        Tp = Tn * To;
109
 
                                        T3E = Tn * Tr;
110
 
                                   }
111
 
                                   Tq = W[7];
112
 
                                   Tu = cr[WS(rs, 20)];
113
 
                                   Tx = ci[WS(rs, 20)];
114
 
                                   Tt = W[38];
115
 
                                   T3F = FNMS(Tq, To, T3E);
116
 
                                   Ts = FMA(Tq, Tr, Tp);
117
 
                                   Tw = W[39];
118
 
                                   T3G = Tt * Tx;
119
 
                                   Tv = Tt * Tu;
120
 
                              }
121
 
                              {
122
 
                                   E T3M, TF, TH, TK, TG, TJ, TE, TD, TC;
123
 
                                   {
124
 
                                        E TB, T3H, Ty, TA, T3I, T3D, T3L;
125
 
                                        TB = cr[WS(rs, 28)];
126
 
                                        TE = ci[WS(rs, 28)];
127
 
                                        T3H = FNMS(Tw, Tu, T3G);
128
 
                                        Ty = FMA(Tw, Tx, Tv);
129
 
                                        TA = W[54];
130
 
                                        TD = W[55];
131
 
                                        T6G = T3F + T3H;
132
 
                                        T3I = T3F - T3H;
133
 
                                        Tz = Ts + Ty;
134
 
                                        T3D = Ts - Ty;
135
 
                                        T3L = TA * TE;
136
 
                                        TC = TA * TB;
137
 
                                        T3J = T3D - T3I;
138
 
                                        T5T = T3D + T3I;
139
 
                                        T3M = FNMS(TD, TB, T3L);
140
 
                                   }
141
 
                                   TF = FMA(TD, TE, TC);
142
 
                                   TH = cr[WS(rs, 12)];
143
 
                                   TK = ci[WS(rs, 12)];
144
 
                                   TG = W[22];
145
 
                                   TJ = W[23];
146
 
                                   {
147
 
                                        E TU, T3U, T13, T16, T3W, T10, T12, T15, T41, T14;
148
 
                                        {
149
 
                                             E T19, T1c, T18, T1b, T3P, T3K;
150
 
                                             {
151
 
                                                  E TQ, TT, T3N, TI, TP, TS;
152
 
                                                  TQ = cr[WS(rs, 2)];
153
 
                                                  TT = ci[WS(rs, 2)];
154
 
                                                  T3N = TG * TK;
155
 
                                                  TI = TG * TH;
156
 
                                                  TP = W[2];
157
 
                                                  TS = W[3];
158
 
                                                  {
159
 
                                                       E T3O, TL, T3T, TR;
160
 
                                                       T3O = FNMS(TJ, TH, T3N);
161
 
                                                       TL = FMA(TJ, TK, TI);
162
 
                                                       T3T = TP * TT;
163
 
                                                       TR = TP * TQ;
164
 
                                                       T6F = T3M + T3O;
165
 
                                                       T3P = T3M - T3O;
166
 
                                                       TM = TF + TL;
167
 
                                                       T3K = TF - TL;
168
 
                                                       TU = FMA(TS, TT, TR);
169
 
                                                       T3U = FNMS(TS, TQ, T3T);
170
 
                                                  }
171
 
                                             }
172
 
                                             T3Q = T3K + T3P;
173
 
                                             T5U = T3K - T3P;
174
 
                                             T19 = cr[WS(rs, 26)];
175
 
                                             T1c = ci[WS(rs, 26)];
176
 
                                             T18 = W[50];
177
 
                                             T1b = W[51];
178
 
                                             {
179
 
                                                  E TW, TZ, TY, T3V, TX, T43, T1a, TV;
180
 
                                                  TW = cr[WS(rs, 18)];
181
 
                                                  TZ = ci[WS(rs, 18)];
182
 
                                                  T43 = T18 * T1c;
183
 
                                                  T1a = T18 * T19;
184
 
                                                  TV = W[34];
185
 
                                                  TY = W[35];
186
 
                                                  T44 = FNMS(T1b, T19, T43);
187
 
                                                  T1d = FMA(T1b, T1c, T1a);
188
 
                                                  T3V = TV * TZ;
189
 
                                                  TX = TV * TW;
190
 
                                                  T13 = cr[WS(rs, 10)];
191
 
                                                  T16 = ci[WS(rs, 10)];
192
 
                                                  T3W = FNMS(TY, TW, T3V);
193
 
                                                  T10 = FMA(TY, TZ, TX);
194
 
                                                  T12 = W[18];
195
 
                                                  T15 = W[19];
196
 
                                             }
197
 
                                        }
198
 
                                        T3X = T3U - T3W;
199
 
                                        T6K = T3U + T3W;
200
 
                                        T11 = TU + T10;
201
 
                                        T40 = TU - T10;
202
 
                                        T41 = T12 * T16;
203
 
                                        T14 = T12 * T13;
204
 
                                        T42 = FNMS(T15, T13, T41);
205
 
                                        T17 = FMA(T15, T16, T14);
206
 
                                   }
207
 
                              }
208
 
                         }
209
 
                    }
210
 
                    {
211
 
                         E T49, T1l, T4j, T1E, T1u, T1x, T1w, T4b, T1r, T4g, T1v;
212
 
                         {
213
 
                              E T1A, T1D, T1C, T4i, T1B;
214
 
                              {
215
 
                                   E T1h, T1k, T1g, T1j, T48, T1i, T1z;
216
 
                                   T1h = cr[WS(rs, 30)];
217
 
                                   T1k = ci[WS(rs, 30)];
218
 
                                   {
219
 
                                        E T6L, T45, T1e, T3Y;
220
 
                                        T6L = T42 + T44;
221
 
                                        T45 = T42 - T44;
222
 
                                        T1e = T17 + T1d;
223
 
                                        T3Y = T17 - T1d;
224
 
                                        T46 = T40 - T45;
225
 
                                        T5X = T40 + T45;
226
 
                                        T7E = T6K + T6L;
227
 
                                        T6M = T6K - T6L;
228
 
                                        T5Y = T3X - T3Y;
229
 
                                        T3Z = T3X + T3Y;
230
 
                                        T6J = T11 - T1e;
231
 
                                        T1f = T11 + T1e;
232
 
                                        T1g = W[58];
233
 
                                   }
234
 
                                   T1j = W[59];
235
 
                                   T1A = cr[WS(rs, 22)];
236
 
                                   T1D = ci[WS(rs, 22)];
237
 
                                   T48 = T1g * T1k;
238
 
                                   T1i = T1g * T1h;
239
 
                                   T1z = W[42];
240
 
                                   T1C = W[43];
241
 
                                   T49 = FNMS(T1j, T1h, T48);
242
 
                                   T1l = FMA(T1j, T1k, T1i);
243
 
                                   T4i = T1z * T1D;
244
 
                                   T1B = T1z * T1A;
245
 
                              }
246
 
                              {
247
 
                                   E T1n, T1q, T1m, T1p, T4a, T1o, T1t;
248
 
                                   T1n = cr[WS(rs, 14)];
249
 
                                   T1q = ci[WS(rs, 14)];
250
 
                                   T4j = FNMS(T1C, T1A, T4i);
251
 
                                   T1E = FMA(T1C, T1D, T1B);
252
 
                                   T1m = W[26];
253
 
                                   T1p = W[27];
254
 
                                   T1u = cr[WS(rs, 6)];
255
 
                                   T1x = ci[WS(rs, 6)];
256
 
                                   T4a = T1m * T1q;
257
 
                                   T1o = T1m * T1n;
258
 
                                   T1t = W[10];
259
 
                                   T1w = W[11];
260
 
                                   T4b = FNMS(T1p, T1n, T4a);
261
 
                                   T1r = FMA(T1p, T1q, T1o);
262
 
                                   T4g = T1t * T1x;
263
 
                                   T1v = T1t * T1u;
264
 
                              }
265
 
                         }
266
 
                         {
267
 
                              E T4c, T6P, T1s, T4f, T4h, T1y;
268
 
                              T4c = T49 - T4b;
269
 
                              T6P = T49 + T4b;
270
 
                              T1s = T1l + T1r;
271
 
                              T4f = T1l - T1r;
272
 
                              T4h = FNMS(T1w, T1u, T4g);
273
 
                              T1y = FMA(T1w, T1x, T1v);
274
 
                              {
275
 
                                   E T4k, T6Q, T4d, T1F;
276
 
                                   T4k = T4h - T4j;
277
 
                                   T6Q = T4h + T4j;
278
 
                                   T4d = T1y - T1E;
279
 
                                   T1F = T1y + T1E;
280
 
                                   T7D = T6P + T6Q;
281
 
                                   T6R = T6P - T6Q;
282
 
                                   T61 = T4c - T4d;
283
 
                                   T4e = T4c + T4d;
284
 
                                   T6O = T1s - T1F;
285
 
                                   T1G = T1s + T1F;
286
 
                                   T60 = T4f + T4k;
287
 
                                   T4l = T4f - T4k;
288
 
                              }
289
 
                         }
290
 
                    }
291
 
                    {
292
 
                         E T5n, T2H, T52, T30, T2Q, T2T, T2S, T5p, T2N, T4Z, T2R;
293
 
                         {
294
 
                              E T2W, T2Z, T2Y, T51, T2X;
295
 
                              {
296
 
                                   E T2D, T2G, T2C, T2F, T5m, T2E, T2V;
297
 
                                   T2D = cr[WS(rs, 31)];
298
 
                                   T2G = ci[WS(rs, 31)];
299
 
                                   T2C = W[60];
300
 
                                   T2F = W[61];
301
 
                                   T2W = cr[WS(rs, 23)];
302
 
                                   T2Z = ci[WS(rs, 23)];
303
 
                                   T5m = T2C * T2G;
304
 
                                   T2E = T2C * T2D;
305
 
                                   T2V = W[44];
306
 
                                   T2Y = W[45];
307
 
                                   T5n = FNMS(T2F, T2D, T5m);
308
 
                                   T2H = FMA(T2F, T2G, T2E);
309
 
                                   T51 = T2V * T2Z;
310
 
                                   T2X = T2V * T2W;
311
 
                              }
312
 
                              {
313
 
                                   E T2J, T2M, T2I, T2L, T5o, T2K, T2P;
314
 
                                   T2J = cr[WS(rs, 15)];
315
 
                                   T2M = ci[WS(rs, 15)];
316
 
                                   T52 = FNMS(T2Y, T2W, T51);
317
 
                                   T30 = FMA(T2Y, T2Z, T2X);
318
 
                                   T2I = W[28];
319
 
                                   T2L = W[29];
320
 
                                   T2Q = cr[WS(rs, 7)];
321
 
                                   T2T = ci[WS(rs, 7)];
322
 
                                   T5o = T2I * T2M;
323
 
                                   T2K = T2I * T2J;
324
 
                                   T2P = W[12];
325
 
                                   T2S = W[13];
326
 
                                   T5p = FNMS(T2L, T2J, T5o);
327
 
                                   T2N = FMA(T2L, T2M, T2K);
328
 
                                   T4Z = T2P * T2T;
329
 
                                   T2R = T2P * T2Q;
330
 
                              }
331
 
                         }
332
 
                         {
333
 
                              E T5q, T7b, T2O, T4Y, T50, T2U;
334
 
                              T5q = T5n - T5p;
335
 
                              T7b = T5n + T5p;
336
 
                              T2O = T2H + T2N;
337
 
                              T4Y = T2H - T2N;
338
 
                              T50 = FNMS(T2S, T2Q, T4Z);
339
 
                              T2U = FMA(T2S, T2T, T2R);
340
 
                              {
341
 
                                   E T7c, T53, T31, T5l;
342
 
                                   T7c = T50 + T52;
343
 
                                   T53 = T50 - T52;
344
 
                                   T31 = T2U + T30;
345
 
                                   T5l = T30 - T2U;
346
 
                                   T54 = T4Y - T53;
347
 
                                   T6c = T4Y + T53;
348
 
                                   T7d = T7b - T7c;
349
 
                                   T7N = T7b + T7c;
350
 
                                   T32 = T2O + T31;
351
 
                                   T76 = T2O - T31;
352
 
                                   T6f = T5q + T5l;
353
 
                                   T5r = T5l - T5q;
354
 
                              }
355
 
                         }
356
 
                    }
357
 
                    {
358
 
                         E T4N, T1O, T4t, T27, T1X, T20, T1Z, T4P, T1U, T4q, T1Y;
359
 
                         {
360
 
                              E T23, T26, T25, T4s, T24;
361
 
                              {
362
 
                                   E T1K, T1N, T1J, T1M, T4M, T1L, T22;
363
 
                                   T1K = cr[WS(rs, 1)];
364
 
                                   T1N = ci[WS(rs, 1)];
365
 
                                   T1J = W[0];
366
 
                                   T1M = W[1];
367
 
                                   T23 = cr[WS(rs, 25)];
368
 
                                   T26 = ci[WS(rs, 25)];
369
 
                                   T4M = T1J * T1N;
370
 
                                   T1L = T1J * T1K;
371
 
                                   T22 = W[48];
372
 
                                   T25 = W[49];
373
 
                                   T4N = FNMS(T1M, T1K, T4M);
374
 
                                   T1O = FMA(T1M, T1N, T1L);
375
 
                                   T4s = T22 * T26;
376
 
                                   T24 = T22 * T23;
377
 
                              }
378
 
                              {
379
 
                                   E T1Q, T1T, T1P, T1S, T4O, T1R, T1W;
380
 
                                   T1Q = cr[WS(rs, 17)];
381
 
                                   T1T = ci[WS(rs, 17)];
382
 
                                   T4t = FNMS(T25, T23, T4s);
383
 
                                   T27 = FMA(T25, T26, T24);
384
 
                                   T1P = W[32];
385
 
                                   T1S = W[33];
386
 
                                   T1X = cr[WS(rs, 9)];
387
 
                                   T20 = ci[WS(rs, 9)];
388
 
                                   T4O = T1P * T1T;
389
 
                                   T1R = T1P * T1Q;
390
 
                                   T1W = W[16];
391
 
                                   T1Z = W[17];
392
 
                                   T4P = FNMS(T1S, T1Q, T4O);
393
 
                                   T1U = FMA(T1S, T1T, T1R);
394
 
                                   T4q = T1W * T20;
395
 
                                   T1Y = T1W * T1X;
396
 
                              }
397
 
                         }
398
 
                         {
399
 
                              E T4Q, T70, T1V, T4p, T4r, T21;
400
 
                              T4Q = T4N - T4P;
401
 
                              T70 = T4N + T4P;
402
 
                              T1V = T1O + T1U;
403
 
                              T4p = T1O - T1U;
404
 
                              T4r = FNMS(T1Z, T1X, T4q);
405
 
                              T21 = FMA(T1Z, T20, T1Y);
406
 
                              {
407
 
                                   E T71, T4u, T4R, T28;
408
 
                                   T71 = T4r + T4t;
409
 
                                   T4u = T4r - T4t;
410
 
                                   T4R = T21 - T27;
411
 
                                   T28 = T21 + T27;
412
 
                                   T4v = T4p - T4u;
413
 
                                   T65 = T4p + T4u;
414
 
                                   T72 = T70 - T71;
415
 
                                   T7I = T70 + T71;
416
 
                                   T29 = T1V + T28;
417
 
                                   T6V = T1V - T28;
418
 
                                   T68 = T4Q - T4R;
419
 
                                   T4S = T4Q + T4R;
420
 
                              }
421
 
                         }
422
 
                    }
423
 
                    {
424
 
                         E T57, T38, T5g, T3r, T3h, T3k, T3j, T59, T3e, T5d, T3i;
425
 
                         {
426
 
                              E T3n, T3q, T3p, T5f, T3o;
427
 
                              {
428
 
                                   E T34, T37, T33, T36, T56, T35, T3m;
429
 
                                   T34 = cr[WS(rs, 3)];
430
 
                                   T37 = ci[WS(rs, 3)];
431
 
                                   T33 = W[4];
432
 
                                   T36 = W[5];
433
 
                                   T3n = cr[WS(rs, 11)];
434
 
                                   T3q = ci[WS(rs, 11)];
435
 
                                   T56 = T33 * T37;
436
 
                                   T35 = T33 * T34;
437
 
                                   T3m = W[20];
438
 
                                   T3p = W[21];
439
 
                                   T57 = FNMS(T36, T34, T56);
440
 
                                   T38 = FMA(T36, T37, T35);
441
 
                                   T5f = T3m * T3q;
442
 
                                   T3o = T3m * T3n;
443
 
                              }
444
 
                              {
445
 
                                   E T3a, T3d, T39, T3c, T58, T3b, T3g;
446
 
                                   T3a = cr[WS(rs, 19)];
447
 
                                   T3d = ci[WS(rs, 19)];
448
 
                                   T5g = FNMS(T3p, T3n, T5f);
449
 
                                   T3r = FMA(T3p, T3q, T3o);
450
 
                                   T39 = W[36];
451
 
                                   T3c = W[37];
452
 
                                   T3h = cr[WS(rs, 27)];
453
 
                                   T3k = ci[WS(rs, 27)];
454
 
                                   T58 = T39 * T3d;
455
 
                                   T3b = T39 * T3a;
456
 
                                   T3g = W[52];
457
 
                                   T3j = W[53];
458
 
                                   T59 = FNMS(T3c, T3a, T58);
459
 
                                   T3e = FMA(T3c, T3d, T3b);
460
 
                                   T5d = T3g * T3k;
461
 
                                   T3i = T3g * T3h;
462
 
                              }
463
 
                         }
464
 
                         {
465
 
                              E T5a, T78, T3f, T55, T5e, T3l, T77, T3s;
466
 
                              T5a = T57 - T59;
467
 
                              T78 = T57 + T59;
468
 
                              T3f = T38 + T3e;
469
 
                              T55 = T38 - T3e;
470
 
                              T5e = FNMS(T3j, T3h, T5d);
471
 
                              T3l = FMA(T3j, T3k, T3i);
472
 
                              T5h = T5e - T5g;
473
 
                              T77 = T5e + T5g;
474
 
                              T3s = T3l + T3r;
475
 
                              T5c = T3l - T3r;
476
 
                              T5t = T55 + T5a;
477
 
                              T5b = T55 - T5a;
478
 
                              T7O = T78 + T77;
479
 
                              T79 = T77 - T78;
480
 
                              T7e = T3s - T3f;
481
 
                              T3t = T3f + T3s;
482
 
                         }
483
 
                    }
484
 
                    {
485
 
                         E T4y, T2f, T2o, T2r, T4A, T2l, T2n, T2q, T4E, T2p;
486
 
                         {
487
 
                              E T2u, T2x, T2t, T2w;
488
 
                              {
489
 
                                   E T2b, T2e, T2d, T4x, T2c, T2a;
490
 
                                   T2b = cr[WS(rs, 5)];
491
 
                                   T2e = ci[WS(rs, 5)];
492
 
                                   T2a = W[8];
493
 
                                   T5s = T5c - T5h;
494
 
                                   T5i = T5c + T5h;
495
 
                                   T2d = W[9];
496
 
                                   T4x = T2a * T2e;
497
 
                                   T2c = T2a * T2b;
498
 
                                   T2u = cr[WS(rs, 13)];
499
 
                                   T2x = ci[WS(rs, 13)];
500
 
                                   T4y = FNMS(T2d, T2b, T4x);
501
 
                                   T2f = FMA(T2d, T2e, T2c);
502
 
                                   T2t = W[24];
503
 
                                   T2w = W[25];
504
 
                              }
505
 
                              {
506
 
                                   E T2h, T2k, T2j, T4z, T2i, T4G, T2v, T2g;
507
 
                                   T2h = cr[WS(rs, 21)];
508
 
                                   T2k = ci[WS(rs, 21)];
509
 
                                   T4G = T2t * T2x;
510
 
                                   T2v = T2t * T2u;
511
 
                                   T2g = W[40];
512
 
                                   T2j = W[41];
513
 
                                   T4H = FNMS(T2w, T2u, T4G);
514
 
                                   T2y = FMA(T2w, T2x, T2v);
515
 
                                   T4z = T2g * T2k;
516
 
                                   T2i = T2g * T2h;
517
 
                                   T2o = cr[WS(rs, 29)];
518
 
                                   T2r = ci[WS(rs, 29)];
519
 
                                   T4A = FNMS(T2j, T2h, T4z);
520
 
                                   T2l = FMA(T2j, T2k, T2i);
521
 
                                   T2n = W[56];
522
 
                                   T2q = W[57];
523
 
                              }
524
 
                         }
525
 
                         T4B = T4y - T4A;
526
 
                         T6X = T4y + T4A;
527
 
                         T2m = T2f + T2l;
528
 
                         T4w = T2f - T2l;
529
 
                         T4E = T2n * T2r;
530
 
                         T2p = T2n * T2o;
531
 
                         T4F = FNMS(T2q, T2o, T4E);
532
 
                         T2s = FMA(T2q, T2r, T2p);
533
 
                    }
534
 
               }
535
 
               {
536
 
                    E T6E, T8j, T6Y, T73, T6H, T8k, T5S, T8O, T8N, T5V, T6g, T6d, T69, T66, T5O;
537
 
                    E T5R;
538
 
                    {
539
 
                         E T4T, T4C, T4J, T4U, T7S, T7V;
540
 
                         {
541
 
                              E T7C, TO, T80, T7Z, T8e, T89, T8d, T1H, T8b, T3v, T7T, T7L, T7U, T7Q, T2A;
542
 
                              E T7P, T7K, T7W, T1I;
543
 
                              {
544
 
                                   E T7X, T7Y, T7J, T82, T88;
545
 
                                   {
546
 
                                        E Tm, T4I, T6W, T4D, T2z, TN;
547
 
                                        T6E = T8 - Tl;
548
 
                                        Tm = T8 + Tl;
549
 
                                        T4T = T4w + T4B;
550
 
                                        T4C = T4w - T4B;
551
 
                                        T4I = T4F - T4H;
552
 
                                        T6W = T4F + T4H;
553
 
                                        T4D = T2s - T2y;
554
 
                                        T2z = T2s + T2y;
555
 
                                        TN = Tz + TM;
556
 
                                        T8j = Tz - TM;
557
 
                                        T6Y = T6W - T6X;
558
 
                                        T7J = T6X + T6W;
559
 
                                        T4J = T4D + T4I;
560
 
                                        T4U = T4I - T4D;
561
 
                                        T2A = T2m + T2z;
562
 
                                        T73 = T2m - T2z;
563
 
                                        T7C = Tm - TN;
564
 
                                        TO = Tm + TN;
565
 
                                   }
566
 
                                   T7P = T7N - T7O;
567
 
                                   T7X = T7N + T7O;
568
 
                                   T7Y = T7I + T7J;
569
 
                                   T7K = T7I - T7J;
570
 
                                   T6H = T6F - T6G;
571
 
                                   T82 = T6G + T6F;
572
 
                                   T88 = T83 + T87;
573
 
                                   T8k = T87 - T83;
574
 
                                   T80 = T7Y + T7X;
575
 
                                   T7Z = T7X - T7Y;
576
 
                                   T8e = T88 - T82;
577
 
                                   T89 = T82 + T88;
578
 
                              }
579
 
                              {
580
 
                                   E T7H, T7M, T2B, T3u;
581
 
                                   T7H = T29 - T2A;
582
 
                                   T2B = T29 + T2A;
583
 
                                   T3u = T32 + T3t;
584
 
                                   T7M = T32 - T3t;
585
 
                                   T8d = T1f - T1G;
586
 
                                   T1H = T1f + T1G;
587
 
                                   T8b = T3u - T2B;
588
 
                                   T3v = T2B + T3u;
589
 
                                   T7T = T7H - T7K;
590
 
                                   T7L = T7H + T7K;
591
 
                                   T7U = T7M + T7P;
592
 
                                   T7Q = T7M - T7P;
593
 
                              }
594
 
                              T7W = TO - T1H;
595
 
                              T1I = TO + T1H;
596
 
                              {
597
 
                                   E T8g, T8h, T8f, T8i;
598
 
                                   {
599
 
                                        E T7R, T8c, T8a, T7G, T81, T7F;
600
 
                                        T8g = T7Q - T7L;
601
 
                                        T7R = T7L + T7Q;
602
 
                                        T81 = T7E + T7D;
603
 
                                        T7F = T7D - T7E;
604
 
                                        cr[0] = T1I + T3v;
605
 
                                        ci[WS(rs, 15)] = T1I - T3v;
606
 
                                        ci[WS(rs, 7)] = T7W + T7Z;
607
 
                                        cr[WS(rs, 8)] = T7W - T7Z;
608
 
                                        T8c = T89 - T81;
609
 
                                        T8a = T81 + T89;
610
 
                                        T7G = T7C - T7F;
611
 
                                        T7S = T7C + T7F;
612
 
                                        T8h = T8e - T8d;
613
 
                                        T8f = T8d + T8e;
614
 
                                        ci[WS(rs, 23)] = T8b + T8c;
615
 
                                        cr[WS(rs, 24)] = T8b - T8c;
616
 
                                        ci[WS(rs, 31)] = T80 + T8a;
617
 
                                        cr[WS(rs, 16)] = T80 - T8a;
618
 
                                        cr[WS(rs, 4)] = FMA(KP707106781, T7R, T7G);
619
 
                                        ci[WS(rs, 11)] = FNMS(KP707106781, T7R, T7G);
620
 
                                   }
621
 
                                   T8i = T7U - T7T;
622
 
                                   T7V = T7T + T7U;
623
 
                                   ci[WS(rs, 19)] = FMA(KP707106781, T8g, T8f);
624
 
                                   cr[WS(rs, 28)] = FMS(KP707106781, T8g, T8f);
625
 
                                   ci[WS(rs, 27)] = FMA(KP707106781, T8i, T8h);
626
 
                                   cr[WS(rs, 20)] = FMS(KP707106781, T8i, T8h);
627
 
                              }
628
 
                         }
629
 
                         {
630
 
                              E T5C, T3S, T8C, T4n, T8H, T8B, T8I, T5F, T4L, T5H, T5M, T5Q, T5A, T5w, T4V;
631
 
                              {
632
 
                                   E T5D, T47, T4m, T5E, T8z, T8A, T3C, T3R, T5j, T5u;
633
 
                                   T5S = T3w + T3B;
634
 
                                   T3C = T3w - T3B;
635
 
                                   T3R = T3J + T3Q;
636
 
                                   T8O = T3Q - T3J;
637
 
                                   T5D = FNMS(KP414213562, T3Z, T46);
638
 
                                   T47 = FMA(KP414213562, T46, T3Z);
639
 
                                   ci[WS(rs, 3)] = FMA(KP707106781, T7V, T7S);
640
 
                                   cr[WS(rs, 12)] = FNMS(KP707106781, T7V, T7S);
641
 
                                   T5C = FMA(KP707106781, T3R, T3C);
642
 
                                   T3S = FNMS(KP707106781, T3R, T3C);
643
 
                                   T4m = FNMS(KP414213562, T4l, T4e);
644
 
                                   T5E = FMA(KP414213562, T4e, T4l);
645
 
                                   T8N = T8y - T8x;
646
 
                                   T8z = T8x + T8y;
647
 
                                   T8A = T5T - T5U;
648
 
                                   T5V = T5T + T5U;
649
 
                                   T8C = T47 + T4m;
650
 
                                   T4n = T47 - T4m;
651
 
                                   T8H = FNMS(KP707106781, T8A, T8z);
652
 
                                   T8B = FMA(KP707106781, T8A, T8z);
653
 
                                   T6g = T5i - T5b;
654
 
                                   T5j = T5b + T5i;
655
 
                                   T5u = T5s - T5t;
656
 
                                   T6d = T5t + T5s;
657
 
                                   {
658
 
                                        E T5K, T5k, T5L, T5v, T4K;
659
 
                                        T69 = T4J - T4C;
660
 
                                        T4K = T4C + T4J;
661
 
                                        T8I = T5E - T5D;
662
 
                                        T5F = T5D + T5E;
663
 
                                        T5K = FMA(KP707106781, T5j, T54);
664
 
                                        T5k = FNMS(KP707106781, T5j, T54);
665
 
                                        T5L = FMA(KP707106781, T5u, T5r);
666
 
                                        T5v = FNMS(KP707106781, T5u, T5r);
667
 
                                        T4L = FNMS(KP707106781, T4K, T4v);
668
 
                                        T5H = FMA(KP707106781, T4K, T4v);
669
 
                                        T5M = FNMS(KP198912367, T5L, T5K);
670
 
                                        T5Q = FMA(KP198912367, T5K, T5L);
671
 
                                        T5A = FNMS(KP668178637, T5k, T5v);
672
 
                                        T5w = FMA(KP668178637, T5v, T5k);
673
 
                                        T4V = T4T + T4U;
674
 
                                        T66 = T4T - T4U;
675
 
                                   }
676
 
                              }
677
 
                              {
678
 
                                   E T5y, T4o, T8J, T8L, T5I, T4W;
679
 
                                   T5y = FNMS(KP923879532, T4n, T3S);
680
 
                                   T4o = FMA(KP923879532, T4n, T3S);
681
 
                                   T8J = FMA(KP923879532, T8I, T8H);
682
 
                                   T8L = FNMS(KP923879532, T8I, T8H);
683
 
                                   T5I = FMA(KP707106781, T4V, T4S);
684
 
                                   T4W = FNMS(KP707106781, T4V, T4S);
685
 
                                   {
686
 
                                        E T8G, T8F, T8D, T8E;
687
 
                                        {
688
 
                                             E T5G, T5P, T5z, T4X, T5N, T5J;
689
 
                                             T5O = FNMS(KP923879532, T5F, T5C);
690
 
                                             T5G = FMA(KP923879532, T5F, T5C);
691
 
                                             T5J = FNMS(KP198912367, T5I, T5H);
692
 
                                             T5P = FMA(KP198912367, T5H, T5I);
693
 
                                             T5z = FNMS(KP668178637, T4L, T4W);
694
 
                                             T4X = FMA(KP668178637, T4W, T4L);
695
 
                                             T5N = T5J + T5M;
696
 
                                             T8G = T5M - T5J;
697
 
                                             T8F = FNMS(KP923879532, T8C, T8B);
698
 
                                             T8D = FMA(KP923879532, T8C, T8B);
699
 
                                             {
700
 
                                                  E T5B, T8K, T8M, T5x;
701
 
                                                  T5B = T5z + T5A;
702
 
                                                  T8K = T5z - T5A;
703
 
                                                  T8M = T5w - T4X;
704
 
                                                  T5x = T4X + T5w;
705
 
                                                  ci[0] = FMA(KP980785280, T5N, T5G);
706
 
                                                  cr[WS(rs, 15)] = FNMS(KP980785280, T5N, T5G);
707
 
                                                  ci[WS(rs, 4)] = FNMS(KP831469612, T5B, T5y);
708
 
                                                  cr[WS(rs, 11)] = FMA(KP831469612, T5B, T5y);
709
 
                                                  ci[WS(rs, 28)] = FMA(KP831469612, T8K, T8J);
710
 
                                                  cr[WS(rs, 19)] = FMS(KP831469612, T8K, T8J);
711
 
                                                  ci[WS(rs, 20)] = FMA(KP831469612, T8M, T8L);
712
 
                                                  cr[WS(rs, 27)] = FMS(KP831469612, T8M, T8L);
713
 
                                                  cr[WS(rs, 3)] = FMA(KP831469612, T5x, T4o);
714
 
                                                  ci[WS(rs, 12)] = FNMS(KP831469612, T5x, T4o);
715
 
                                                  T8E = T5Q - T5P;
716
 
                                                  T5R = T5P + T5Q;
717
 
                                             }
718
 
                                        }
719
 
                                        ci[WS(rs, 16)] = FMA(KP980785280, T8E, T8D);
720
 
                                        cr[WS(rs, 31)] = FMS(KP980785280, T8E, T8D);
721
 
                                        ci[WS(rs, 24)] = FMA(KP980785280, T8G, T8F);
722
 
                                        cr[WS(rs, 23)] = FMS(KP980785280, T8G, T8F);
723
 
                                   }
724
 
                              }
725
 
                         }
726
 
                    }
727
 
                    {
728
 
                         E T7y, T8q, T8p, T7B;
729
 
                         {
730
 
                              E T7a, T7m, T6I, T7f, T7A, T7w, T8r, T8l, T8m, T6T, T7k, T75, T8s, T7p, T7z;
731
 
                              E T7t;
732
 
                              {
733
 
                                   E T7n, T6N, T6S, T7o, T7u, T7v;
734
 
                                   T7a = T76 - T79;
735
 
                                   T7u = T76 + T79;
736
 
                                   cr[WS(rs, 7)] = FMA(KP980785280, T5R, T5O);
737
 
                                   ci[WS(rs, 8)] = FNMS(KP980785280, T5R, T5O);
738
 
                                   T7m = T6E + T6H;
739
 
                                   T6I = T6E - T6H;
740
 
                                   T7v = T7e - T7d;
741
 
                                   T7f = T7d + T7e;
742
 
                                   T7n = T6J - T6M;
743
 
                                   T6N = T6J + T6M;
744
 
                                   T7A = FMA(KP414213562, T7u, T7v);
745
 
                                   T7w = FNMS(KP414213562, T7v, T7u);
746
 
                                   T8r = T8k - T8j;
747
 
                                   T8l = T8j + T8k;
748
 
                                   T6S = T6O - T6R;
749
 
                                   T7o = T6O + T6R;
750
 
                                   {
751
 
                                        E T7r, T7s, T6Z, T74;
752
 
                                        T7r = T6V + T6Y;
753
 
                                        T6Z = T6V - T6Y;
754
 
                                        T74 = T72 - T73;
755
 
                                        T7s = T72 + T73;
756
 
                                        T8m = T6N - T6S;
757
 
                                        T6T = T6N + T6S;
758
 
                                        T7k = FNMS(KP414213562, T6Z, T74);
759
 
                                        T75 = FMA(KP414213562, T74, T6Z);
760
 
                                        T8s = T7o - T7n;
761
 
                                        T7p = T7n + T7o;
762
 
                                        T7z = FMA(KP414213562, T7r, T7s);
763
 
                                        T7t = FNMS(KP414213562, T7s, T7r);
764
 
                                   }
765
 
                              }
766
 
                              {
767
 
                                   E T7i, T6U, T8t, T8v, T7j, T7g;
768
 
                                   T7i = FNMS(KP707106781, T6T, T6I);
769
 
                                   T6U = FMA(KP707106781, T6T, T6I);
770
 
                                   T8t = FMA(KP707106781, T8s, T8r);
771
 
                                   T8v = FNMS(KP707106781, T8s, T8r);
772
 
                                   T7j = FMA(KP414213562, T7a, T7f);
773
 
                                   T7g = FNMS(KP414213562, T7f, T7a);
774
 
                                   {
775
 
                                        E T7q, T7x, T8n, T8o;
776
 
                                        T7y = FNMS(KP707106781, T7p, T7m);
777
 
                                        T7q = FMA(KP707106781, T7p, T7m);
778
 
                                        {
779
 
                                             E T7l, T8u, T8w, T7h;
780
 
                                             T7l = T7j - T7k;
781
 
                                             T8u = T7k + T7j;
782
 
                                             T8w = T7g - T75;
783
 
                                             T7h = T75 + T7g;
784
 
                                             ci[WS(rs, 5)] = FMA(KP923879532, T7l, T7i);
785
 
                                             cr[WS(rs, 10)] = FNMS(KP923879532, T7l, T7i);
786
 
                                             ci[WS(rs, 29)] = FMA(KP923879532, T8u, T8t);
787
 
                                             cr[WS(rs, 18)] = FMS(KP923879532, T8u, T8t);
788
 
                                             ci[WS(rs, 21)] = FMA(KP923879532, T8w, T8v);
789
 
                                             cr[WS(rs, 26)] = FMS(KP923879532, T8w, T8v);
790
 
                                             cr[WS(rs, 2)] = FMA(KP923879532, T7h, T6U);
791
 
                                             ci[WS(rs, 13)] = FNMS(KP923879532, T7h, T6U);
792
 
                                             T7x = T7t + T7w;
793
 
                                             T8q = T7w - T7t;
794
 
                                        }
795
 
                                        T8p = FNMS(KP707106781, T8m, T8l);
796
 
                                        T8n = FMA(KP707106781, T8m, T8l);
797
 
                                        T8o = T7A - T7z;
798
 
                                        T7B = T7z + T7A;
799
 
                                        ci[WS(rs, 1)] = FMA(KP923879532, T7x, T7q);
800
 
                                        cr[WS(rs, 14)] = FNMS(KP923879532, T7x, T7q);
801
 
                                        ci[WS(rs, 17)] = FMA(KP923879532, T8o, T8n);
802
 
                                        cr[WS(rs, 30)] = FMS(KP923879532, T8o, T8n);
803
 
                                   }
804
 
                              }
805
 
                         }
806
 
                         {
807
 
                              E T6o, T5W, T8W, T63, T8V, T8P, T8Q, T6r, T6e, T6w;
808
 
                              {
809
 
                                   E T6q, T6p, T5Z, T62;
810
 
                                   ci[WS(rs, 25)] = FMA(KP923879532, T8q, T8p);
811
 
                                   cr[WS(rs, 22)] = FMS(KP923879532, T8q, T8p);
812
 
                                   cr[WS(rs, 6)] = FMA(KP923879532, T7B, T7y);
813
 
                                   ci[WS(rs, 9)] = FNMS(KP923879532, T7B, T7y);
814
 
                                   T6q = FNMS(KP414213562, T5X, T5Y);
815
 
                                   T5Z = FMA(KP414213562, T5Y, T5X);
816
 
                                   T62 = FNMS(KP414213562, T61, T60);
817
 
                                   T6p = FMA(KP414213562, T60, T61);
818
 
                                   T6o = FNMS(KP707106781, T5V, T5S);
819
 
                                   T5W = FMA(KP707106781, T5V, T5S);
820
 
                                   T8W = T5Z - T62;
821
 
                                   T63 = T5Z + T62;
822
 
                                   T8V = FNMS(KP707106781, T8O, T8N);
823
 
                                   T8P = FMA(KP707106781, T8O, T8N);
824
 
                                   T8Q = T6q + T6p;
825
 
                                   T6r = T6p - T6q;
826
 
                                   T6e = FMA(KP707106781, T6d, T6c);
827
 
                                   T6w = FNMS(KP707106781, T6d, T6c);
828
 
                              }
829
 
                              {
830
 
                                   E T6k, T8U, T6z, T6n, T8S, T8T, T8R, T6s;
831
 
                                   {
832
 
                                        E T64, T6y, T6l, T6i, T6v, T6m, T6b, T8X, T8Z, T8Y, T6j, T90;
833
 
                                        {
834
 
                                             E T6C, T6B, T6x, T6h;
835
 
                                             T6k = FNMS(KP923879532, T63, T5W);
836
 
                                             T64 = FMA(KP923879532, T63, T5W);
837
 
                                             T6x = FNMS(KP707106781, T6g, T6f);
838
 
                                             T6h = FMA(KP707106781, T6g, T6f);
839
 
                                             {
840
 
                                                  E T6t, T67, T6u, T6a;
841
 
                                                  T6t = FNMS(KP707106781, T66, T65);
842
 
                                                  T67 = FMA(KP707106781, T66, T65);
843
 
                                                  T6u = FNMS(KP707106781, T69, T68);
844
 
                                                  T6a = FMA(KP707106781, T69, T68);
845
 
                                                  T6y = FMA(KP668178637, T6x, T6w);
846
 
                                                  T6C = FNMS(KP668178637, T6w, T6x);
847
 
                                                  T6l = FMA(KP198912367, T6e, T6h);
848
 
                                                  T6i = FNMS(KP198912367, T6h, T6e);
849
 
                                                  T6v = FNMS(KP668178637, T6u, T6t);
850
 
                                                  T6B = FMA(KP668178637, T6t, T6u);
851
 
                                                  T6m = FNMS(KP198912367, T67, T6a);
852
 
                                                  T6b = FMA(KP198912367, T6a, T67);
853
 
                                             }
854
 
                                             T8X = FMA(KP923879532, T8W, T8V);
855
 
                                             T8Z = FNMS(KP923879532, T8W, T8V);
856
 
                                             T6D = T6B - T6C;
857
 
                                             T8Y = T6B + T6C;
858
 
                                        }
859
 
                                        T8U = T6i - T6b;
860
 
                                        T6j = T6b + T6i;
861
 
                                        T90 = T6y - T6v;
862
 
                                        T6z = T6v + T6y;
863
 
                                        ci[WS(rs, 18)] = FNMS(KP831469612, T8Y, T8X);
864
 
                                        cr[WS(rs, 29)] = -(FMA(KP831469612, T8Y, T8X));
865
 
                                        cr[WS(rs, 1)] = FMA(KP980785280, T6j, T64);
866
 
                                        ci[WS(rs, 14)] = FNMS(KP980785280, T6j, T64);
867
 
                                        cr[WS(rs, 21)] = FMS(KP831469612, T90, T8Z);
868
 
                                        ci[WS(rs, 26)] = FMA(KP831469612, T90, T8Z);
869
 
                                        T6n = T6l - T6m;
870
 
                                        T8S = T6m + T6l;
871
 
                                   }
872
 
                                   T6A = FNMS(KP923879532, T6r, T6o);
873
 
                                   T6s = FMA(KP923879532, T6r, T6o);
874
 
                                   T8T = FNMS(KP923879532, T8Q, T8P);
875
 
                                   T8R = FMA(KP923879532, T8Q, T8P);
876
 
                                   ci[WS(rs, 6)] = FMA(KP980785280, T6n, T6k);
877
 
                                   cr[WS(rs, 9)] = FNMS(KP980785280, T6n, T6k);
878
 
                                   ci[WS(rs, 2)] = FMA(KP831469612, T6z, T6s);
879
 
                                   cr[WS(rs, 13)] = FNMS(KP831469612, T6z, T6s);
880
 
                                   ci[WS(rs, 30)] = FMA(KP980785280, T8S, T8R);
881
 
                                   cr[WS(rs, 17)] = FMS(KP980785280, T8S, T8R);
882
 
                                   ci[WS(rs, 22)] = FMA(KP980785280, T8U, T8T);
883
 
                                   cr[WS(rs, 25)] = FMS(KP980785280, T8U, T8T);
884
 
                              }
885
 
                         }
886
 
                    }
887
 
               }
 
46
     {
 
47
          INT m;
 
48
          for (m = mb, W = W + ((mb - 1) * 62); m < me; m = m + 1, cr = cr + ms, ci = ci - ms, W = W + 62, MAKE_VOLATILE_STRIDE(rs)) {
 
49
               E T6D, T6A;
 
50
               {
 
51
                    E T8y, T87, T8, T3w, T83, T3B, T8x, Tl, T6G, Tz, T3J, T5T, T6F, TM, T3Q;
 
52
                    E T5U, T46, T5X, T7E, T6M, T5Y, T3Z, T6J, T1f, T7D, T6R, T61, T4e, T6O, T1G;
 
53
                    E T60, T4l, T54, T6c, T7d, T7N, T32, T76, T6f, T5r, T4v, T65, T72, T7I, T29;
 
54
                    E T6V, T68, T4S, T5t, T5b, T7O, T79, T7e, T3t, T5s, T5i, T4H, T2y, T4B, T6X;
 
55
                    E T2m, T4w, T4F, T2s;
 
56
                    {
 
57
                         E T44, T1d, T3X, T6K, T11, T40, T42, T17, T5h, T5c;
 
58
                         {
 
59
                              E Ta, Td, Tg, T3x, Tb, Tj, Tf, Tc, Ti;
 
60
                              {
 
61
                                   E T1, T86, T3, T6, T2, T5;
 
62
                                   T1 = cr[0];
 
63
                                   T86 = ci[0];
 
64
                                   T3 = cr[WS(rs, 16)];
 
65
                                   T6 = ci[WS(rs, 16)];
 
66
                                   T2 = W[30];
 
67
                                   T5 = W[31];
 
68
                                   {
 
69
                                        E T84, T4, T9, T85, T7;
 
70
                                        Ta = cr[WS(rs, 8)];
 
71
                                        Td = ci[WS(rs, 8)];
 
72
                                        T84 = T2 * T6;
 
73
                                        T4 = T2 * T3;
 
74
                                        T9 = W[14];
 
75
                                        Tg = cr[WS(rs, 24)];
 
76
                                        T85 = FNMS(T5, T3, T84);
 
77
                                        T7 = FMA(T5, T6, T4);
 
78
                                        T3x = T9 * Td;
 
79
                                        Tb = T9 * Ta;
 
80
                                        T8y = T86 - T85;
 
81
                                        T87 = T85 + T86;
 
82
                                        T8 = T1 + T7;
 
83
                                        T3w = T1 - T7;
 
84
                                        Tj = ci[WS(rs, 24)];
 
85
                                        Tf = W[46];
 
86
                                   }
 
87
                                   Tc = W[15];
 
88
                                   Ti = W[47];
 
89
                              }
 
90
                              {
 
91
                                   E Tu, Tx, T3F, Ts, Tw, T3G, Tv;
 
92
                                   {
 
93
                                        E To, Tr, Tp, T3E, Tq, Tt;
 
94
                                        {
 
95
                                             E T3y, Te, T3A, Tk, T3z, Th, Tn;
 
96
                                             To = cr[WS(rs, 4)];
 
97
                                             T3z = Tf * Tj;
 
98
                                             Th = Tf * Tg;
 
99
                                             T3y = FNMS(Tc, Ta, T3x);
 
100
                                             Te = FMA(Tc, Td, Tb);
 
101
                                             T3A = FNMS(Ti, Tg, T3z);
 
102
                                             Tk = FMA(Ti, Tj, Th);
 
103
                                             Tr = ci[WS(rs, 4)];
 
104
                                             Tn = W[6];
 
105
                                             T83 = T3y + T3A;
 
106
                                             T3B = T3y - T3A;
 
107
                                             T8x = Te - Tk;
 
108
                                             Tl = Te + Tk;
 
109
                                             Tp = Tn * To;
 
110
                                             T3E = Tn * Tr;
 
111
                                        }
 
112
                                        Tq = W[7];
 
113
                                        Tu = cr[WS(rs, 20)];
 
114
                                        Tx = ci[WS(rs, 20)];
 
115
                                        Tt = W[38];
 
116
                                        T3F = FNMS(Tq, To, T3E);
 
117
                                        Ts = FMA(Tq, Tr, Tp);
 
118
                                        Tw = W[39];
 
119
                                        T3G = Tt * Tx;
 
120
                                        Tv = Tt * Tu;
 
121
                                   }
 
122
                                   {
 
123
                                        E T3M, TF, TH, TK, TG, TJ, TE, TD, TC;
 
124
                                        {
 
125
                                             E TB, T3H, Ty, TA, T3I, T3D, T3L;
 
126
                                             TB = cr[WS(rs, 28)];
 
127
                                             TE = ci[WS(rs, 28)];
 
128
                                             T3H = FNMS(Tw, Tu, T3G);
 
129
                                             Ty = FMA(Tw, Tx, Tv);
 
130
                                             TA = W[54];
 
131
                                             TD = W[55];
 
132
                                             T6G = T3F + T3H;
 
133
                                             T3I = T3F - T3H;
 
134
                                             Tz = Ts + Ty;
 
135
                                             T3D = Ts - Ty;
 
136
                                             T3L = TA * TE;
 
137
                                             TC = TA * TB;
 
138
                                             T3J = T3D - T3I;
 
139
                                             T5T = T3D + T3I;
 
140
                                             T3M = FNMS(TD, TB, T3L);
 
141
                                        }
 
142
                                        TF = FMA(TD, TE, TC);
 
143
                                        TH = cr[WS(rs, 12)];
 
144
                                        TK = ci[WS(rs, 12)];
 
145
                                        TG = W[22];
 
146
                                        TJ = W[23];
 
147
                                        {
 
148
                                             E TU, T3U, T13, T16, T3W, T10, T12, T15, T41, T14;
 
149
                                             {
 
150
                                                  E T19, T1c, T18, T1b, T3P, T3K;
 
151
                                                  {
 
152
                                                       E TQ, TT, T3N, TI, TP, TS;
 
153
                                                       TQ = cr[WS(rs, 2)];
 
154
                                                       TT = ci[WS(rs, 2)];
 
155
                                                       T3N = TG * TK;
 
156
                                                       TI = TG * TH;
 
157
                                                       TP = W[2];
 
158
                                                       TS = W[3];
 
159
                                                       {
 
160
                                                            E T3O, TL, T3T, TR;
 
161
                                                            T3O = FNMS(TJ, TH, T3N);
 
162
                                                            TL = FMA(TJ, TK, TI);
 
163
                                                            T3T = TP * TT;
 
164
                                                            TR = TP * TQ;
 
165
                                                            T6F = T3M + T3O;
 
166
                                                            T3P = T3M - T3O;
 
167
                                                            TM = TF + TL;
 
168
                                                            T3K = TF - TL;
 
169
                                                            TU = FMA(TS, TT, TR);
 
170
                                                            T3U = FNMS(TS, TQ, T3T);
 
171
                                                       }
 
172
                                                  }
 
173
                                                  T3Q = T3K + T3P;
 
174
                                                  T5U = T3K - T3P;
 
175
                                                  T19 = cr[WS(rs, 26)];
 
176
                                                  T1c = ci[WS(rs, 26)];
 
177
                                                  T18 = W[50];
 
178
                                                  T1b = W[51];
 
179
                                                  {
 
180
                                                       E TW, TZ, TY, T3V, TX, T43, T1a, TV;
 
181
                                                       TW = cr[WS(rs, 18)];
 
182
                                                       TZ = ci[WS(rs, 18)];
 
183
                                                       T43 = T18 * T1c;
 
184
                                                       T1a = T18 * T19;
 
185
                                                       TV = W[34];
 
186
                                                       TY = W[35];
 
187
                                                       T44 = FNMS(T1b, T19, T43);
 
188
                                                       T1d = FMA(T1b, T1c, T1a);
 
189
                                                       T3V = TV * TZ;
 
190
                                                       TX = TV * TW;
 
191
                                                       T13 = cr[WS(rs, 10)];
 
192
                                                       T16 = ci[WS(rs, 10)];
 
193
                                                       T3W = FNMS(TY, TW, T3V);
 
194
                                                       T10 = FMA(TY, TZ, TX);
 
195
                                                       T12 = W[18];
 
196
                                                       T15 = W[19];
 
197
                                                  }
 
198
                                             }
 
199
                                             T3X = T3U - T3W;
 
200
                                             T6K = T3U + T3W;
 
201
                                             T11 = TU + T10;
 
202
                                             T40 = TU - T10;
 
203
                                             T41 = T12 * T16;
 
204
                                             T14 = T12 * T13;
 
205
                                             T42 = FNMS(T15, T13, T41);
 
206
                                             T17 = FMA(T15, T16, T14);
 
207
                                        }
 
208
                                   }
 
209
                              }
 
210
                         }
 
211
                         {
 
212
                              E T49, T1l, T4j, T1E, T1u, T1x, T1w, T4b, T1r, T4g, T1v;
 
213
                              {
 
214
                                   E T1A, T1D, T1C, T4i, T1B;
 
215
                                   {
 
216
                                        E T1h, T1k, T1g, T1j, T48, T1i, T1z;
 
217
                                        T1h = cr[WS(rs, 30)];
 
218
                                        T1k = ci[WS(rs, 30)];
 
219
                                        {
 
220
                                             E T6L, T45, T1e, T3Y;
 
221
                                             T6L = T42 + T44;
 
222
                                             T45 = T42 - T44;
 
223
                                             T1e = T17 + T1d;
 
224
                                             T3Y = T17 - T1d;
 
225
                                             T46 = T40 - T45;
 
226
                                             T5X = T40 + T45;
 
227
                                             T7E = T6K + T6L;
 
228
                                             T6M = T6K - T6L;
 
229
                                             T5Y = T3X - T3Y;
 
230
                                             T3Z = T3X + T3Y;
 
231
                                             T6J = T11 - T1e;
 
232
                                             T1f = T11 + T1e;
 
233
                                             T1g = W[58];
 
234
                                        }
 
235
                                        T1j = W[59];
 
236
                                        T1A = cr[WS(rs, 22)];
 
237
                                        T1D = ci[WS(rs, 22)];
 
238
                                        T48 = T1g * T1k;
 
239
                                        T1i = T1g * T1h;
 
240
                                        T1z = W[42];
 
241
                                        T1C = W[43];
 
242
                                        T49 = FNMS(T1j, T1h, T48);
 
243
                                        T1l = FMA(T1j, T1k, T1i);
 
244
                                        T4i = T1z * T1D;
 
245
                                        T1B = T1z * T1A;
 
246
                                   }
 
247
                                   {
 
248
                                        E T1n, T1q, T1m, T1p, T4a, T1o, T1t;
 
249
                                        T1n = cr[WS(rs, 14)];
 
250
                                        T1q = ci[WS(rs, 14)];
 
251
                                        T4j = FNMS(T1C, T1A, T4i);
 
252
                                        T1E = FMA(T1C, T1D, T1B);
 
253
                                        T1m = W[26];
 
254
                                        T1p = W[27];
 
255
                                        T1u = cr[WS(rs, 6)];
 
256
                                        T1x = ci[WS(rs, 6)];
 
257
                                        T4a = T1m * T1q;
 
258
                                        T1o = T1m * T1n;
 
259
                                        T1t = W[10];
 
260
                                        T1w = W[11];
 
261
                                        T4b = FNMS(T1p, T1n, T4a);
 
262
                                        T1r = FMA(T1p, T1q, T1o);
 
263
                                        T4g = T1t * T1x;
 
264
                                        T1v = T1t * T1u;
 
265
                                   }
 
266
                              }
 
267
                              {
 
268
                                   E T4c, T6P, T1s, T4f, T4h, T1y;
 
269
                                   T4c = T49 - T4b;
 
270
                                   T6P = T49 + T4b;
 
271
                                   T1s = T1l + T1r;
 
272
                                   T4f = T1l - T1r;
 
273
                                   T4h = FNMS(T1w, T1u, T4g);
 
274
                                   T1y = FMA(T1w, T1x, T1v);
 
275
                                   {
 
276
                                        E T4k, T6Q, T4d, T1F;
 
277
                                        T4k = T4h - T4j;
 
278
                                        T6Q = T4h + T4j;
 
279
                                        T4d = T1y - T1E;
 
280
                                        T1F = T1y + T1E;
 
281
                                        T7D = T6P + T6Q;
 
282
                                        T6R = T6P - T6Q;
 
283
                                        T61 = T4c - T4d;
 
284
                                        T4e = T4c + T4d;
 
285
                                        T6O = T1s - T1F;
 
286
                                        T1G = T1s + T1F;
 
287
                                        T60 = T4f + T4k;
 
288
                                        T4l = T4f - T4k;
 
289
                                   }
 
290
                              }
 
291
                         }
 
292
                         {
 
293
                              E T5n, T2H, T52, T30, T2Q, T2T, T2S, T5p, T2N, T4Z, T2R;
 
294
                              {
 
295
                                   E T2W, T2Z, T2Y, T51, T2X;
 
296
                                   {
 
297
                                        E T2D, T2G, T2C, T2F, T5m, T2E, T2V;
 
298
                                        T2D = cr[WS(rs, 31)];
 
299
                                        T2G = ci[WS(rs, 31)];
 
300
                                        T2C = W[60];
 
301
                                        T2F = W[61];
 
302
                                        T2W = cr[WS(rs, 23)];
 
303
                                        T2Z = ci[WS(rs, 23)];
 
304
                                        T5m = T2C * T2G;
 
305
                                        T2E = T2C * T2D;
 
306
                                        T2V = W[44];
 
307
                                        T2Y = W[45];
 
308
                                        T5n = FNMS(T2F, T2D, T5m);
 
309
                                        T2H = FMA(T2F, T2G, T2E);
 
310
                                        T51 = T2V * T2Z;
 
311
                                        T2X = T2V * T2W;
 
312
                                   }
 
313
                                   {
 
314
                                        E T2J, T2M, T2I, T2L, T5o, T2K, T2P;
 
315
                                        T2J = cr[WS(rs, 15)];
 
316
                                        T2M = ci[WS(rs, 15)];
 
317
                                        T52 = FNMS(T2Y, T2W, T51);
 
318
                                        T30 = FMA(T2Y, T2Z, T2X);
 
319
                                        T2I = W[28];
 
320
                                        T2L = W[29];
 
321
                                        T2Q = cr[WS(rs, 7)];
 
322
                                        T2T = ci[WS(rs, 7)];
 
323
                                        T5o = T2I * T2M;
 
324
                                        T2K = T2I * T2J;
 
325
                                        T2P = W[12];
 
326
                                        T2S = W[13];
 
327
                                        T5p = FNMS(T2L, T2J, T5o);
 
328
                                        T2N = FMA(T2L, T2M, T2K);
 
329
                                        T4Z = T2P * T2T;
 
330
                                        T2R = T2P * T2Q;
 
331
                                   }
 
332
                              }
 
333
                              {
 
334
                                   E T5q, T7b, T2O, T4Y, T50, T2U;
 
335
                                   T5q = T5n - T5p;
 
336
                                   T7b = T5n + T5p;
 
337
                                   T2O = T2H + T2N;
 
338
                                   T4Y = T2H - T2N;
 
339
                                   T50 = FNMS(T2S, T2Q, T4Z);
 
340
                                   T2U = FMA(T2S, T2T, T2R);
 
341
                                   {
 
342
                                        E T7c, T53, T31, T5l;
 
343
                                        T7c = T50 + T52;
 
344
                                        T53 = T50 - T52;
 
345
                                        T31 = T2U + T30;
 
346
                                        T5l = T30 - T2U;
 
347
                                        T54 = T4Y - T53;
 
348
                                        T6c = T4Y + T53;
 
349
                                        T7d = T7b - T7c;
 
350
                                        T7N = T7b + T7c;
 
351
                                        T32 = T2O + T31;
 
352
                                        T76 = T2O - T31;
 
353
                                        T6f = T5q + T5l;
 
354
                                        T5r = T5l - T5q;
 
355
                                   }
 
356
                              }
 
357
                         }
 
358
                         {
 
359
                              E T4N, T1O, T4t, T27, T1X, T20, T1Z, T4P, T1U, T4q, T1Y;
 
360
                              {
 
361
                                   E T23, T26, T25, T4s, T24;
 
362
                                   {
 
363
                                        E T1K, T1N, T1J, T1M, T4M, T1L, T22;
 
364
                                        T1K = cr[WS(rs, 1)];
 
365
                                        T1N = ci[WS(rs, 1)];
 
366
                                        T1J = W[0];
 
367
                                        T1M = W[1];
 
368
                                        T23 = cr[WS(rs, 25)];
 
369
                                        T26 = ci[WS(rs, 25)];
 
370
                                        T4M = T1J * T1N;
 
371
                                        T1L = T1J * T1K;
 
372
                                        T22 = W[48];
 
373
                                        T25 = W[49];
 
374
                                        T4N = FNMS(T1M, T1K, T4M);
 
375
                                        T1O = FMA(T1M, T1N, T1L);
 
376
                                        T4s = T22 * T26;
 
377
                                        T24 = T22 * T23;
 
378
                                   }
 
379
                                   {
 
380
                                        E T1Q, T1T, T1P, T1S, T4O, T1R, T1W;
 
381
                                        T1Q = cr[WS(rs, 17)];
 
382
                                        T1T = ci[WS(rs, 17)];
 
383
                                        T4t = FNMS(T25, T23, T4s);
 
384
                                        T27 = FMA(T25, T26, T24);
 
385
                                        T1P = W[32];
 
386
                                        T1S = W[33];
 
387
                                        T1X = cr[WS(rs, 9)];
 
388
                                        T20 = ci[WS(rs, 9)];
 
389
                                        T4O = T1P * T1T;
 
390
                                        T1R = T1P * T1Q;
 
391
                                        T1W = W[16];
 
392
                                        T1Z = W[17];
 
393
                                        T4P = FNMS(T1S, T1Q, T4O);
 
394
                                        T1U = FMA(T1S, T1T, T1R);
 
395
                                        T4q = T1W * T20;
 
396
                                        T1Y = T1W * T1X;
 
397
                                   }
 
398
                              }
 
399
                              {
 
400
                                   E T4Q, T70, T1V, T4p, T4r, T21;
 
401
                                   T4Q = T4N - T4P;
 
402
                                   T70 = T4N + T4P;
 
403
                                   T1V = T1O + T1U;
 
404
                                   T4p = T1O - T1U;
 
405
                                   T4r = FNMS(T1Z, T1X, T4q);
 
406
                                   T21 = FMA(T1Z, T20, T1Y);
 
407
                                   {
 
408
                                        E T71, T4u, T4R, T28;
 
409
                                        T71 = T4r + T4t;
 
410
                                        T4u = T4r - T4t;
 
411
                                        T4R = T21 - T27;
 
412
                                        T28 = T21 + T27;
 
413
                                        T4v = T4p - T4u;
 
414
                                        T65 = T4p + T4u;
 
415
                                        T72 = T70 - T71;
 
416
                                        T7I = T70 + T71;
 
417
                                        T29 = T1V + T28;
 
418
                                        T6V = T1V - T28;
 
419
                                        T68 = T4Q - T4R;
 
420
                                        T4S = T4Q + T4R;
 
421
                                   }
 
422
                              }
 
423
                         }
 
424
                         {
 
425
                              E T57, T38, T5g, T3r, T3h, T3k, T3j, T59, T3e, T5d, T3i;
 
426
                              {
 
427
                                   E T3n, T3q, T3p, T5f, T3o;
 
428
                                   {
 
429
                                        E T34, T37, T33, T36, T56, T35, T3m;
 
430
                                        T34 = cr[WS(rs, 3)];
 
431
                                        T37 = ci[WS(rs, 3)];
 
432
                                        T33 = W[4];
 
433
                                        T36 = W[5];
 
434
                                        T3n = cr[WS(rs, 11)];
 
435
                                        T3q = ci[WS(rs, 11)];
 
436
                                        T56 = T33 * T37;
 
437
                                        T35 = T33 * T34;
 
438
                                        T3m = W[20];
 
439
                                        T3p = W[21];
 
440
                                        T57 = FNMS(T36, T34, T56);
 
441
                                        T38 = FMA(T36, T37, T35);
 
442
                                        T5f = T3m * T3q;
 
443
                                        T3o = T3m * T3n;
 
444
                                   }
 
445
                                   {
 
446
                                        E T3a, T3d, T39, T3c, T58, T3b, T3g;
 
447
                                        T3a = cr[WS(rs, 19)];
 
448
                                        T3d = ci[WS(rs, 19)];
 
449
                                        T5g = FNMS(T3p, T3n, T5f);
 
450
                                        T3r = FMA(T3p, T3q, T3o);
 
451
                                        T39 = W[36];
 
452
                                        T3c = W[37];
 
453
                                        T3h = cr[WS(rs, 27)];
 
454
                                        T3k = ci[WS(rs, 27)];
 
455
                                        T58 = T39 * T3d;
 
456
                                        T3b = T39 * T3a;
 
457
                                        T3g = W[52];
 
458
                                        T3j = W[53];
 
459
                                        T59 = FNMS(T3c, T3a, T58);
 
460
                                        T3e = FMA(T3c, T3d, T3b);
 
461
                                        T5d = T3g * T3k;
 
462
                                        T3i = T3g * T3h;
 
463
                                   }
 
464
                              }
 
465
                              {
 
466
                                   E T5a, T78, T3f, T55, T5e, T3l, T77, T3s;
 
467
                                   T5a = T57 - T59;
 
468
                                   T78 = T57 + T59;
 
469
                                   T3f = T38 + T3e;
 
470
                                   T55 = T38 - T3e;
 
471
                                   T5e = FNMS(T3j, T3h, T5d);
 
472
                                   T3l = FMA(T3j, T3k, T3i);
 
473
                                   T5h = T5e - T5g;
 
474
                                   T77 = T5e + T5g;
 
475
                                   T3s = T3l + T3r;
 
476
                                   T5c = T3l - T3r;
 
477
                                   T5t = T55 + T5a;
 
478
                                   T5b = T55 - T5a;
 
479
                                   T7O = T78 + T77;
 
480
                                   T79 = T77 - T78;
 
481
                                   T7e = T3s - T3f;
 
482
                                   T3t = T3f + T3s;
 
483
                              }
 
484
                         }
 
485
                         {
 
486
                              E T4y, T2f, T2o, T2r, T4A, T2l, T2n, T2q, T4E, T2p;
 
487
                              {
 
488
                                   E T2u, T2x, T2t, T2w;
 
489
                                   {
 
490
                                        E T2b, T2e, T2d, T4x, T2c, T2a;
 
491
                                        T2b = cr[WS(rs, 5)];
 
492
                                        T2e = ci[WS(rs, 5)];
 
493
                                        T2a = W[8];
 
494
                                        T5s = T5c - T5h;
 
495
                                        T5i = T5c + T5h;
 
496
                                        T2d = W[9];
 
497
                                        T4x = T2a * T2e;
 
498
                                        T2c = T2a * T2b;
 
499
                                        T2u = cr[WS(rs, 13)];
 
500
                                        T2x = ci[WS(rs, 13)];
 
501
                                        T4y = FNMS(T2d, T2b, T4x);
 
502
                                        T2f = FMA(T2d, T2e, T2c);
 
503
                                        T2t = W[24];
 
504
                                        T2w = W[25];
 
505
                                   }
 
506
                                   {
 
507
                                        E T2h, T2k, T2j, T4z, T2i, T4G, T2v, T2g;
 
508
                                        T2h = cr[WS(rs, 21)];
 
509
                                        T2k = ci[WS(rs, 21)];
 
510
                                        T4G = T2t * T2x;
 
511
                                        T2v = T2t * T2u;
 
512
                                        T2g = W[40];
 
513
                                        T2j = W[41];
 
514
                                        T4H = FNMS(T2w, T2u, T4G);
 
515
                                        T2y = FMA(T2w, T2x, T2v);
 
516
                                        T4z = T2g * T2k;
 
517
                                        T2i = T2g * T2h;
 
518
                                        T2o = cr[WS(rs, 29)];
 
519
                                        T2r = ci[WS(rs, 29)];
 
520
                                        T4A = FNMS(T2j, T2h, T4z);
 
521
                                        T2l = FMA(T2j, T2k, T2i);
 
522
                                        T2n = W[56];
 
523
                                        T2q = W[57];
 
524
                                   }
 
525
                              }
 
526
                              T4B = T4y - T4A;
 
527
                              T6X = T4y + T4A;
 
528
                              T2m = T2f + T2l;
 
529
                              T4w = T2f - T2l;
 
530
                              T4E = T2n * T2r;
 
531
                              T2p = T2n * T2o;
 
532
                              T4F = FNMS(T2q, T2o, T4E);
 
533
                              T2s = FMA(T2q, T2r, T2p);
 
534
                         }
 
535
                    }
 
536
                    {
 
537
                         E T6E, T8j, T6Y, T73, T6H, T8k, T5S, T8O, T8N, T5V, T6g, T6d, T69, T66, T5O;
 
538
                         E T5R;
 
539
                         {
 
540
                              E T4T, T4C, T4J, T4U, T7S, T7V;
 
541
                              {
 
542
                                   E T7C, TO, T80, T7Z, T8e, T89, T8d, T1H, T8b, T3v, T7T, T7L, T7U, T7Q, T2A;
 
543
                                   E T7P, T7K, T7W, T1I;
 
544
                                   {
 
545
                                        E T7X, T7Y, T7J, T82, T88;
 
546
                                        {
 
547
                                             E Tm, T4I, T6W, T4D, T2z, TN;
 
548
                                             T6E = T8 - Tl;
 
549
                                             Tm = T8 + Tl;
 
550
                                             T4T = T4w + T4B;
 
551
                                             T4C = T4w - T4B;
 
552
                                             T4I = T4F - T4H;
 
553
                                             T6W = T4F + T4H;
 
554
                                             T4D = T2s - T2y;
 
555
                                             T2z = T2s + T2y;
 
556
                                             TN = Tz + TM;
 
557
                                             T8j = Tz - TM;
 
558
                                             T6Y = T6W - T6X;
 
559
                                             T7J = T6X + T6W;
 
560
                                             T4J = T4D + T4I;
 
561
                                             T4U = T4I - T4D;
 
562
                                             T2A = T2m + T2z;
 
563
                                             T73 = T2m - T2z;
 
564
                                             T7C = Tm - TN;
 
565
                                             TO = Tm + TN;
 
566
                                        }
 
567
                                        T7P = T7N - T7O;
 
568
                                        T7X = T7N + T7O;
 
569
                                        T7Y = T7I + T7J;
 
570
                                        T7K = T7I - T7J;
 
571
                                        T6H = T6F - T6G;
 
572
                                        T82 = T6G + T6F;
 
573
                                        T88 = T83 + T87;
 
574
                                        T8k = T87 - T83;
 
575
                                        T80 = T7Y + T7X;
 
576
                                        T7Z = T7X - T7Y;
 
577
                                        T8e = T88 - T82;
 
578
                                        T89 = T82 + T88;
 
579
                                   }
 
580
                                   {
 
581
                                        E T7H, T7M, T2B, T3u;
 
582
                                        T7H = T29 - T2A;
 
583
                                        T2B = T29 + T2A;
 
584
                                        T3u = T32 + T3t;
 
585
                                        T7M = T32 - T3t;
 
586
                                        T8d = T1f - T1G;
 
587
                                        T1H = T1f + T1G;
 
588
                                        T8b = T3u - T2B;
 
589
                                        T3v = T2B + T3u;
 
590
                                        T7T = T7H - T7K;
 
591
                                        T7L = T7H + T7K;
 
592
                                        T7U = T7M + T7P;
 
593
                                        T7Q = T7M - T7P;
 
594
                                   }
 
595
                                   T7W = TO - T1H;
 
596
                                   T1I = TO + T1H;
 
597
                                   {
 
598
                                        E T8g, T8h, T8f, T8i;
 
599
                                        {
 
600
                                             E T7R, T8c, T8a, T7G, T81, T7F;
 
601
                                             T8g = T7Q - T7L;
 
602
                                             T7R = T7L + T7Q;
 
603
                                             T81 = T7E + T7D;
 
604
                                             T7F = T7D - T7E;
 
605
                                             cr[0] = T1I + T3v;
 
606
                                             ci[WS(rs, 15)] = T1I - T3v;
 
607
                                             ci[WS(rs, 7)] = T7W + T7Z;
 
608
                                             cr[WS(rs, 8)] = T7W - T7Z;
 
609
                                             T8c = T89 - T81;
 
610
                                             T8a = T81 + T89;
 
611
                                             T7G = T7C - T7F;
 
612
                                             T7S = T7C + T7F;
 
613
                                             T8h = T8e - T8d;
 
614
                                             T8f = T8d + T8e;
 
615
                                             ci[WS(rs, 23)] = T8b + T8c;
 
616
                                             cr[WS(rs, 24)] = T8b - T8c;
 
617
                                             ci[WS(rs, 31)] = T80 + T8a;
 
618
                                             cr[WS(rs, 16)] = T80 - T8a;
 
619
                                             cr[WS(rs, 4)] = FMA(KP707106781, T7R, T7G);
 
620
                                             ci[WS(rs, 11)] = FNMS(KP707106781, T7R, T7G);
 
621
                                        }
 
622
                                        T8i = T7U - T7T;
 
623
                                        T7V = T7T + T7U;
 
624
                                        ci[WS(rs, 19)] = FMA(KP707106781, T8g, T8f);
 
625
                                        cr[WS(rs, 28)] = FMS(KP707106781, T8g, T8f);
 
626
                                        ci[WS(rs, 27)] = FMA(KP707106781, T8i, T8h);
 
627
                                        cr[WS(rs, 20)] = FMS(KP707106781, T8i, T8h);
 
628
                                   }
 
629
                              }
 
630
                              {
 
631
                                   E T5C, T3S, T8C, T4n, T8H, T8B, T8I, T5F, T4L, T5H, T5M, T5Q, T5A, T5w, T4V;
 
632
                                   {
 
633
                                        E T5D, T47, T4m, T5E, T8z, T8A, T3C, T3R, T5j, T5u;
 
634
                                        T5S = T3w + T3B;
 
635
                                        T3C = T3w - T3B;
 
636
                                        T3R = T3J + T3Q;
 
637
                                        T8O = T3Q - T3J;
 
638
                                        T5D = FNMS(KP414213562, T3Z, T46);
 
639
                                        T47 = FMA(KP414213562, T46, T3Z);
 
640
                                        ci[WS(rs, 3)] = FMA(KP707106781, T7V, T7S);
 
641
                                        cr[WS(rs, 12)] = FNMS(KP707106781, T7V, T7S);
 
642
                                        T5C = FMA(KP707106781, T3R, T3C);
 
643
                                        T3S = FNMS(KP707106781, T3R, T3C);
 
644
                                        T4m = FNMS(KP414213562, T4l, T4e);
 
645
                                        T5E = FMA(KP414213562, T4e, T4l);
 
646
                                        T8N = T8y - T8x;
 
647
                                        T8z = T8x + T8y;
 
648
                                        T8A = T5T - T5U;
 
649
                                        T5V = T5T + T5U;
 
650
                                        T8C = T47 + T4m;
 
651
                                        T4n = T47 - T4m;
 
652
                                        T8H = FNMS(KP707106781, T8A, T8z);
 
653
                                        T8B = FMA(KP707106781, T8A, T8z);
 
654
                                        T6g = T5i - T5b;
 
655
                                        T5j = T5b + T5i;
 
656
                                        T5u = T5s - T5t;
 
657
                                        T6d = T5t + T5s;
 
658
                                        {
 
659
                                             E T5K, T5k, T5L, T5v, T4K;
 
660
                                             T69 = T4J - T4C;
 
661
                                             T4K = T4C + T4J;
 
662
                                             T8I = T5E - T5D;
 
663
                                             T5F = T5D + T5E;
 
664
                                             T5K = FMA(KP707106781, T5j, T54);
 
665
                                             T5k = FNMS(KP707106781, T5j, T54);
 
666
                                             T5L = FMA(KP707106781, T5u, T5r);
 
667
                                             T5v = FNMS(KP707106781, T5u, T5r);
 
668
                                             T4L = FNMS(KP707106781, T4K, T4v);
 
669
                                             T5H = FMA(KP707106781, T4K, T4v);
 
670
                                             T5M = FNMS(KP198912367, T5L, T5K);
 
671
                                             T5Q = FMA(KP198912367, T5K, T5L);
 
672
                                             T5A = FNMS(KP668178637, T5k, T5v);
 
673
                                             T5w = FMA(KP668178637, T5v, T5k);
 
674
                                             T4V = T4T + T4U;
 
675
                                             T66 = T4T - T4U;
 
676
                                        }
 
677
                                   }
 
678
                                   {
 
679
                                        E T5y, T4o, T8J, T8L, T5I, T4W;
 
680
                                        T5y = FNMS(KP923879532, T4n, T3S);
 
681
                                        T4o = FMA(KP923879532, T4n, T3S);
 
682
                                        T8J = FMA(KP923879532, T8I, T8H);
 
683
                                        T8L = FNMS(KP923879532, T8I, T8H);
 
684
                                        T5I = FMA(KP707106781, T4V, T4S);
 
685
                                        T4W = FNMS(KP707106781, T4V, T4S);
 
686
                                        {
 
687
                                             E T8G, T8F, T8D, T8E;
 
688
                                             {
 
689
                                                  E T5G, T5P, T5z, T4X, T5N, T5J;
 
690
                                                  T5O = FNMS(KP923879532, T5F, T5C);
 
691
                                                  T5G = FMA(KP923879532, T5F, T5C);
 
692
                                                  T5J = FNMS(KP198912367, T5I, T5H);
 
693
                                                  T5P = FMA(KP198912367, T5H, T5I);
 
694
                                                  T5z = FNMS(KP668178637, T4L, T4W);
 
695
                                                  T4X = FMA(KP668178637, T4W, T4L);
 
696
                                                  T5N = T5J + T5M;
 
697
                                                  T8G = T5M - T5J;
 
698
                                                  T8F = FNMS(KP923879532, T8C, T8B);
 
699
                                                  T8D = FMA(KP923879532, T8C, T8B);
 
700
                                                  {
 
701
                                                       E T5B, T8K, T8M, T5x;
 
702
                                                       T5B = T5z + T5A;
 
703
                                                       T8K = T5z - T5A;
 
704
                                                       T8M = T5w - T4X;
 
705
                                                       T5x = T4X + T5w;
 
706
                                                       ci[0] = FMA(KP980785280, T5N, T5G);
 
707
                                                       cr[WS(rs, 15)] = FNMS(KP980785280, T5N, T5G);
 
708
                                                       ci[WS(rs, 4)] = FNMS(KP831469612, T5B, T5y);
 
709
                                                       cr[WS(rs, 11)] = FMA(KP831469612, T5B, T5y);
 
710
                                                       ci[WS(rs, 28)] = FMA(KP831469612, T8K, T8J);
 
711
                                                       cr[WS(rs, 19)] = FMS(KP831469612, T8K, T8J);
 
712
                                                       ci[WS(rs, 20)] = FMA(KP831469612, T8M, T8L);
 
713
                                                       cr[WS(rs, 27)] = FMS(KP831469612, T8M, T8L);
 
714
                                                       cr[WS(rs, 3)] = FMA(KP831469612, T5x, T4o);
 
715
                                                       ci[WS(rs, 12)] = FNMS(KP831469612, T5x, T4o);
 
716
                                                       T8E = T5Q - T5P;
 
717
                                                       T5R = T5P + T5Q;
 
718
                                                  }
 
719
                                             }
 
720
                                             ci[WS(rs, 16)] = FMA(KP980785280, T8E, T8D);
 
721
                                             cr[WS(rs, 31)] = FMS(KP980785280, T8E, T8D);
 
722
                                             ci[WS(rs, 24)] = FMA(KP980785280, T8G, T8F);
 
723
                                             cr[WS(rs, 23)] = FMS(KP980785280, T8G, T8F);
 
724
                                        }
 
725
                                   }
 
726
                              }
 
727
                         }
 
728
                         {
 
729
                              E T7y, T8q, T8p, T7B;
 
730
                              {
 
731
                                   E T7a, T7m, T6I, T7f, T7A, T7w, T8r, T8l, T8m, T6T, T7k, T75, T8s, T7p, T7z;
 
732
                                   E T7t;
 
733
                                   {
 
734
                                        E T7n, T6N, T6S, T7o, T7u, T7v;
 
735
                                        T7a = T76 - T79;
 
736
                                        T7u = T76 + T79;
 
737
                                        cr[WS(rs, 7)] = FMA(KP980785280, T5R, T5O);
 
738
                                        ci[WS(rs, 8)] = FNMS(KP980785280, T5R, T5O);
 
739
                                        T7m = T6E + T6H;
 
740
                                        T6I = T6E - T6H;
 
741
                                        T7v = T7e - T7d;
 
742
                                        T7f = T7d + T7e;
 
743
                                        T7n = T6J - T6M;
 
744
                                        T6N = T6J + T6M;
 
745
                                        T7A = FMA(KP414213562, T7u, T7v);
 
746
                                        T7w = FNMS(KP414213562, T7v, T7u);
 
747
                                        T8r = T8k - T8j;
 
748
                                        T8l = T8j + T8k;
 
749
                                        T6S = T6O - T6R;
 
750
                                        T7o = T6O + T6R;
 
751
                                        {
 
752
                                             E T7r, T7s, T6Z, T74;
 
753
                                             T7r = T6V + T6Y;
 
754
                                             T6Z = T6V - T6Y;
 
755
                                             T74 = T72 - T73;
 
756
                                             T7s = T72 + T73;
 
757
                                             T8m = T6N - T6S;
 
758
                                             T6T = T6N + T6S;
 
759
                                             T7k = FNMS(KP414213562, T6Z, T74);
 
760
                                             T75 = FMA(KP414213562, T74, T6Z);
 
761
                                             T8s = T7o - T7n;
 
762
                                             T7p = T7n + T7o;
 
763
                                             T7z = FMA(KP414213562, T7r, T7s);
 
764
                                             T7t = FNMS(KP414213562, T7s, T7r);
 
765
                                        }
 
766
                                   }
 
767
                                   {
 
768
                                        E T7i, T6U, T8t, T8v, T7j, T7g;
 
769
                                        T7i = FNMS(KP707106781, T6T, T6I);
 
770
                                        T6U = FMA(KP707106781, T6T, T6I);
 
771
                                        T8t = FMA(KP707106781, T8s, T8r);
 
772
                                        T8v = FNMS(KP707106781, T8s, T8r);
 
773
                                        T7j = FMA(KP414213562, T7a, T7f);
 
774
                                        T7g = FNMS(KP414213562, T7f, T7a);
 
775
                                        {
 
776
                                             E T7q, T7x, T8n, T8o;
 
777
                                             T7y = FNMS(KP707106781, T7p, T7m);
 
778
                                             T7q = FMA(KP707106781, T7p, T7m);
 
779
                                             {
 
780
                                                  E T7l, T8u, T8w, T7h;
 
781
                                                  T7l = T7j - T7k;
 
782
                                                  T8u = T7k + T7j;
 
783
                                                  T8w = T7g - T75;
 
784
                                                  T7h = T75 + T7g;
 
785
                                                  ci[WS(rs, 5)] = FMA(KP923879532, T7l, T7i);
 
786
                                                  cr[WS(rs, 10)] = FNMS(KP923879532, T7l, T7i);
 
787
                                                  ci[WS(rs, 29)] = FMA(KP923879532, T8u, T8t);
 
788
                                                  cr[WS(rs, 18)] = FMS(KP923879532, T8u, T8t);
 
789
                                                  ci[WS(rs, 21)] = FMA(KP923879532, T8w, T8v);
 
790
                                                  cr[WS(rs, 26)] = FMS(KP923879532, T8w, T8v);
 
791
                                                  cr[WS(rs, 2)] = FMA(KP923879532, T7h, T6U);
 
792
                                                  ci[WS(rs, 13)] = FNMS(KP923879532, T7h, T6U);
 
793
                                                  T7x = T7t + T7w;
 
794
                                                  T8q = T7w - T7t;
 
795
                                             }
 
796
                                             T8p = FNMS(KP707106781, T8m, T8l);
 
797
                                             T8n = FMA(KP707106781, T8m, T8l);
 
798
                                             T8o = T7A - T7z;
 
799
                                             T7B = T7z + T7A;
 
800
                                             ci[WS(rs, 1)] = FMA(KP923879532, T7x, T7q);
 
801
                                             cr[WS(rs, 14)] = FNMS(KP923879532, T7x, T7q);
 
802
                                             ci[WS(rs, 17)] = FMA(KP923879532, T8o, T8n);
 
803
                                             cr[WS(rs, 30)] = FMS(KP923879532, T8o, T8n);
 
804
                                        }
 
805
                                   }
 
806
                              }
 
807
                              {
 
808
                                   E T6o, T5W, T8W, T63, T8V, T8P, T8Q, T6r, T6e, T6w;
 
809
                                   {
 
810
                                        E T6q, T6p, T5Z, T62;
 
811
                                        ci[WS(rs, 25)] = FMA(KP923879532, T8q, T8p);
 
812
                                        cr[WS(rs, 22)] = FMS(KP923879532, T8q, T8p);
 
813
                                        cr[WS(rs, 6)] = FMA(KP923879532, T7B, T7y);
 
814
                                        ci[WS(rs, 9)] = FNMS(KP923879532, T7B, T7y);
 
815
                                        T6q = FNMS(KP414213562, T5X, T5Y);
 
816
                                        T5Z = FMA(KP414213562, T5Y, T5X);
 
817
                                        T62 = FNMS(KP414213562, T61, T60);
 
818
                                        T6p = FMA(KP414213562, T60, T61);
 
819
                                        T6o = FNMS(KP707106781, T5V, T5S);
 
820
                                        T5W = FMA(KP707106781, T5V, T5S);
 
821
                                        T8W = T5Z - T62;
 
822
                                        T63 = T5Z + T62;
 
823
                                        T8V = FNMS(KP707106781, T8O, T8N);
 
824
                                        T8P = FMA(KP707106781, T8O, T8N);
 
825
                                        T8Q = T6q + T6p;
 
826
                                        T6r = T6p - T6q;
 
827
                                        T6e = FMA(KP707106781, T6d, T6c);
 
828
                                        T6w = FNMS(KP707106781, T6d, T6c);
 
829
                                   }
 
830
                                   {
 
831
                                        E T6k, T8U, T6z, T6n, T8S, T8T, T8R, T6s;
 
832
                                        {
 
833
                                             E T64, T6y, T6l, T6i, T6v, T6m, T6b, T8X, T8Z, T8Y, T6j, T90;
 
834
                                             {
 
835
                                                  E T6C, T6B, T6x, T6h;
 
836
                                                  T6k = FNMS(KP923879532, T63, T5W);
 
837
                                                  T64 = FMA(KP923879532, T63, T5W);
 
838
                                                  T6x = FNMS(KP707106781, T6g, T6f);
 
839
                                                  T6h = FMA(KP707106781, T6g, T6f);
 
840
                                                  {
 
841
                                                       E T6t, T67, T6u, T6a;
 
842
                                                       T6t = FNMS(KP707106781, T66, T65);
 
843
                                                       T67 = FMA(KP707106781, T66, T65);
 
844
                                                       T6u = FNMS(KP707106781, T69, T68);
 
845
                                                       T6a = FMA(KP707106781, T69, T68);
 
846
                                                       T6y = FMA(KP668178637, T6x, T6w);
 
847
                                                       T6C = FNMS(KP668178637, T6w, T6x);
 
848
                                                       T6l = FMA(KP198912367, T6e, T6h);
 
849
                                                       T6i = FNMS(KP198912367, T6h, T6e);
 
850
                                                       T6v = FNMS(KP668178637, T6u, T6t);
 
851
                                                       T6B = FMA(KP668178637, T6t, T6u);
 
852
                                                       T6m = FNMS(KP198912367, T67, T6a);
 
853
                                                       T6b = FMA(KP198912367, T6a, T67);
 
854
                                                  }
 
855
                                                  T8X = FMA(KP923879532, T8W, T8V);
 
856
                                                  T8Z = FNMS(KP923879532, T8W, T8V);
 
857
                                                  T6D = T6B - T6C;
 
858
                                                  T8Y = T6B + T6C;
 
859
                                             }
 
860
                                             T8U = T6i - T6b;
 
861
                                             T6j = T6b + T6i;
 
862
                                             T90 = T6y - T6v;
 
863
                                             T6z = T6v + T6y;
 
864
                                             ci[WS(rs, 18)] = FNMS(KP831469612, T8Y, T8X);
 
865
                                             cr[WS(rs, 29)] = -(FMA(KP831469612, T8Y, T8X));
 
866
                                             cr[WS(rs, 1)] = FMA(KP980785280, T6j, T64);
 
867
                                             ci[WS(rs, 14)] = FNMS(KP980785280, T6j, T64);
 
868
                                             cr[WS(rs, 21)] = FMS(KP831469612, T90, T8Z);
 
869
                                             ci[WS(rs, 26)] = FMA(KP831469612, T90, T8Z);
 
870
                                             T6n = T6l - T6m;
 
871
                                             T8S = T6m + T6l;
 
872
                                        }
 
873
                                        T6A = FNMS(KP923879532, T6r, T6o);
 
874
                                        T6s = FMA(KP923879532, T6r, T6o);
 
875
                                        T8T = FNMS(KP923879532, T8Q, T8P);
 
876
                                        T8R = FMA(KP923879532, T8Q, T8P);
 
877
                                        ci[WS(rs, 6)] = FMA(KP980785280, T6n, T6k);
 
878
                                        cr[WS(rs, 9)] = FNMS(KP980785280, T6n, T6k);
 
879
                                        ci[WS(rs, 2)] = FMA(KP831469612, T6z, T6s);
 
880
                                        cr[WS(rs, 13)] = FNMS(KP831469612, T6z, T6s);
 
881
                                        ci[WS(rs, 30)] = FMA(KP980785280, T8S, T8R);
 
882
                                        cr[WS(rs, 17)] = FMS(KP980785280, T8S, T8R);
 
883
                                        ci[WS(rs, 22)] = FMA(KP980785280, T8U, T8T);
 
884
                                        cr[WS(rs, 25)] = FMS(KP980785280, T8U, T8T);
 
885
                                   }
 
886
                              }
 
887
                         }
 
888
                    }
 
889
               }
 
890
               cr[WS(rs, 5)] = FMA(KP831469612, T6D, T6A);
 
891
               ci[WS(rs, 10)] = FNMS(KP831469612, T6D, T6A);
888
892
          }
889
 
          cr[WS(rs, 5)] = FMA(KP831469612, T6D, T6A);
890
 
          ci[WS(rs, 10)] = FNMS(KP831469612, T6D, T6A);
891
893
     }
892
894
}
893
895
 
903
905
}
904
906
#else                           /* HAVE_FMA */
905
907
 
906
 
/* Generated by: ../../../genfft/gen_hc2hc -compact -variables 4 -pipeline-latency 4 -n 32 -dit -name hf_32 -include hf.h */
 
908
/* Generated by: ../../../genfft/gen_hc2hc.native -compact -variables 4 -pipeline-latency 4 -n 32 -dit -name hf_32 -include hf.h */
907
909
 
908
910
/*
909
911
 * This function contains 434 FP additions, 208 FP multiplications,
921
923
     DK(KP382683432, +0.382683432365089771728459984030398866761344562);
922
924
     DK(KP923879532, +0.923879532511286756128183189396788286822416626);
923
925
     DK(KP707106781, +0.707106781186547524400844362104849039284835938);
924
 
     INT m;
925
 
     for (m = mb, W = W + ((mb - 1) * 62); m < me; m = m + 1, cr = cr + ms, ci = ci - ms, W = W + 62, MAKE_VOLATILE_STRIDE(rs)) {
926
 
          E Tj, T5F, T7C, T7Q, T35, T4T, T78, T7m, T1Q, T61, T5Y, T6J, T3K, T56, T41;
927
 
          E T59, T2B, T67, T6e, T6O, T4b, T5g, T4s, T5d, TG, T7l, T5I, T73, T3a, T4U;
928
 
          E T3f, T4V, T14, T5K, T5N, T6F, T3m, T4Z, T3r, T4Y, T1r, T5P, T5S, T6E, T3x;
929
 
          E T52, T3C, T51, T2d, T5Z, T64, T6K, T3V, T5a, T44, T57, T2Y, T6f, T6a, T6P;
930
 
          E T4m, T5e, T4v, T5h;
931
 
          {
932
 
               E T1, T76, T6, T75, Tc, T32, Th, T33;
933
 
               T1 = cr[0];
934
 
               T76 = ci[0];
935
 
               {
936
 
                    E T3, T5, T2, T4;
937
 
                    T3 = cr[WS(rs, 16)];
938
 
                    T5 = ci[WS(rs, 16)];
939
 
                    T2 = W[30];
940
 
                    T4 = W[31];
941
 
                    T6 = FMA(T2, T3, T4 * T5);
942
 
                    T75 = FNMS(T4, T3, T2 * T5);
943
 
               }
944
 
               {
945
 
                    E T9, Tb, T8, Ta;
946
 
                    T9 = cr[WS(rs, 8)];
947
 
                    Tb = ci[WS(rs, 8)];
948
 
                    T8 = W[14];
949
 
                    Ta = W[15];
950
 
                    Tc = FMA(T8, T9, Ta * Tb);
951
 
                    T32 = FNMS(Ta, T9, T8 * Tb);
952
 
               }
953
 
               {
954
 
                    E Te, Tg, Td, Tf;
955
 
                    Te = cr[WS(rs, 24)];
956
 
                    Tg = ci[WS(rs, 24)];
957
 
                    Td = W[46];
958
 
                    Tf = W[47];
959
 
                    Th = FMA(Td, Te, Tf * Tg);
960
 
                    T33 = FNMS(Tf, Te, Td * Tg);
961
 
               }
962
 
               {
963
 
                    E T7, Ti, T7A, T7B;
964
 
                    T7 = T1 + T6;
965
 
                    Ti = Tc + Th;
966
 
                    Tj = T7 + Ti;
967
 
                    T5F = T7 - Ti;
968
 
                    T7A = Tc - Th;
969
 
                    T7B = T76 - T75;
970
 
                    T7C = T7A + T7B;
971
 
                    T7Q = T7B - T7A;
972
 
               }
973
 
               {
974
 
                    E T31, T34, T74, T77;
975
 
                    T31 = T1 - T6;
976
 
                    T34 = T32 - T33;
977
 
                    T35 = T31 + T34;
978
 
                    T4T = T31 - T34;
979
 
                    T74 = T32 + T33;
980
 
                    T77 = T75 + T76;
981
 
                    T78 = T74 + T77;
982
 
                    T7m = T77 - T74;
983
 
               }
984
 
          }
985
 
          {
986
 
               E T1y, T3X, T1O, T3I, T1D, T3Y, T1J, T3H;
987
 
               {
988
 
                    E T1v, T1x, T1u, T1w;
989
 
                    T1v = cr[WS(rs, 1)];
990
 
                    T1x = ci[WS(rs, 1)];
991
 
                    T1u = W[0];
992
 
                    T1w = W[1];
993
 
                    T1y = FMA(T1u, T1v, T1w * T1x);
994
 
                    T3X = FNMS(T1w, T1v, T1u * T1x);
995
 
               }
996
 
               {
997
 
                    E T1L, T1N, T1K, T1M;
998
 
                    T1L = cr[WS(rs, 25)];
999
 
                    T1N = ci[WS(rs, 25)];
1000
 
                    T1K = W[48];
1001
 
                    T1M = W[49];
1002
 
                    T1O = FMA(T1K, T1L, T1M * T1N);
1003
 
                    T3I = FNMS(T1M, T1L, T1K * T1N);
1004
 
               }
1005
 
               {
1006
 
                    E T1A, T1C, T1z, T1B;
1007
 
                    T1A = cr[WS(rs, 17)];
1008
 
                    T1C = ci[WS(rs, 17)];
1009
 
                    T1z = W[32];
1010
 
                    T1B = W[33];
1011
 
                    T1D = FMA(T1z, T1A, T1B * T1C);
1012
 
                    T3Y = FNMS(T1B, T1A, T1z * T1C);
1013
 
               }
1014
 
               {
1015
 
                    E T1G, T1I, T1F, T1H;
1016
 
                    T1G = cr[WS(rs, 9)];
1017
 
                    T1I = ci[WS(rs, 9)];
1018
 
                    T1F = W[16];
1019
 
                    T1H = W[17];
1020
 
                    T1J = FMA(T1F, T1G, T1H * T1I);
1021
 
                    T3H = FNMS(T1H, T1G, T1F * T1I);
1022
 
               }
1023
 
               {
1024
 
                    E T1E, T1P, T5W, T5X;
1025
 
                    T1E = T1y + T1D;
1026
 
                    T1P = T1J + T1O;
1027
 
                    T1Q = T1E + T1P;
1028
 
                    T61 = T1E - T1P;
1029
 
                    T5W = T3X + T3Y;
1030
 
                    T5X = T3H + T3I;
1031
 
                    T5Y = T5W - T5X;
1032
 
                    T6J = T5W + T5X;
1033
 
               }
1034
 
               {
1035
 
                    E T3G, T3J, T3Z, T40;
1036
 
                    T3G = T1y - T1D;
1037
 
                    T3J = T3H - T3I;
1038
 
                    T3K = T3G + T3J;
1039
 
                    T56 = T3G - T3J;
1040
 
                    T3Z = T3X - T3Y;
1041
 
                    T40 = T1J - T1O;
1042
 
                    T41 = T3Z - T40;
1043
 
                    T59 = T3Z + T40;
1044
 
               }
1045
 
          }
1046
 
          {
1047
 
               E T2j, T47, T2z, T4q, T2o, T48, T2u, T4p;
1048
 
               {
1049
 
                    E T2g, T2i, T2f, T2h;
1050
 
                    T2g = cr[WS(rs, 31)];
1051
 
                    T2i = ci[WS(rs, 31)];
1052
 
                    T2f = W[60];
1053
 
                    T2h = W[61];
1054
 
                    T2j = FMA(T2f, T2g, T2h * T2i);
1055
 
                    T47 = FNMS(T2h, T2g, T2f * T2i);
1056
 
               }
1057
 
               {
1058
 
                    E T2w, T2y, T2v, T2x;
1059
 
                    T2w = cr[WS(rs, 23)];
1060
 
                    T2y = ci[WS(rs, 23)];
1061
 
                    T2v = W[44];
1062
 
                    T2x = W[45];
1063
 
                    T2z = FMA(T2v, T2w, T2x * T2y);
1064
 
                    T4q = FNMS(T2x, T2w, T2v * T2y);
1065
 
               }
1066
 
               {
1067
 
                    E T2l, T2n, T2k, T2m;
1068
 
                    T2l = cr[WS(rs, 15)];
1069
 
                    T2n = ci[WS(rs, 15)];
1070
 
                    T2k = W[28];
1071
 
                    T2m = W[29];
1072
 
                    T2o = FMA(T2k, T2l, T2m * T2n);
1073
 
                    T48 = FNMS(T2m, T2l, T2k * T2n);
1074
 
               }
1075
 
               {
1076
 
                    E T2r, T2t, T2q, T2s;
1077
 
                    T2r = cr[WS(rs, 7)];
1078
 
                    T2t = ci[WS(rs, 7)];
1079
 
                    T2q = W[12];
1080
 
                    T2s = W[13];
1081
 
                    T2u = FMA(T2q, T2r, T2s * T2t);
1082
 
                    T4p = FNMS(T2s, T2r, T2q * T2t);
1083
 
               }
1084
 
               {
1085
 
                    E T2p, T2A, T6c, T6d;
1086
 
                    T2p = T2j + T2o;
1087
 
                    T2A = T2u + T2z;
1088
 
                    T2B = T2p + T2A;
1089
 
                    T67 = T2p - T2A;
1090
 
                    T6c = T47 + T48;
1091
 
                    T6d = T4p + T4q;
1092
 
                    T6e = T6c - T6d;
1093
 
                    T6O = T6c + T6d;
1094
 
               }
1095
 
               {
1096
 
                    E T49, T4a, T4o, T4r;
1097
 
                    T49 = T47 - T48;
1098
 
                    T4a = T2u - T2z;
1099
 
                    T4b = T49 - T4a;
1100
 
                    T5g = T49 + T4a;
1101
 
                    T4o = T2j - T2o;
1102
 
                    T4r = T4p - T4q;
1103
 
                    T4s = T4o + T4r;
1104
 
                    T5d = T4o - T4r;
1105
 
               }
1106
 
          }
1107
 
          {
1108
 
               E To, T37, TE, T3d, Tt, T38, Tz, T3c;
1109
 
               {
1110
 
                    E Tl, Tn, Tk, Tm;
1111
 
                    Tl = cr[WS(rs, 4)];
1112
 
                    Tn = ci[WS(rs, 4)];
1113
 
                    Tk = W[6];
1114
 
                    Tm = W[7];
1115
 
                    To = FMA(Tk, Tl, Tm * Tn);
1116
 
                    T37 = FNMS(Tm, Tl, Tk * Tn);
1117
 
               }
1118
 
               {
1119
 
                    E TB, TD, TA, TC;
1120
 
                    TB = cr[WS(rs, 12)];
1121
 
                    TD = ci[WS(rs, 12)];
1122
 
                    TA = W[22];
1123
 
                    TC = W[23];
1124
 
                    TE = FMA(TA, TB, TC * TD);
1125
 
                    T3d = FNMS(TC, TB, TA * TD);
1126
 
               }
1127
 
               {
1128
 
                    E Tq, Ts, Tp, Tr;
1129
 
                    Tq = cr[WS(rs, 20)];
1130
 
                    Ts = ci[WS(rs, 20)];
1131
 
                    Tp = W[38];
1132
 
                    Tr = W[39];
1133
 
                    Tt = FMA(Tp, Tq, Tr * Ts);
1134
 
                    T38 = FNMS(Tr, Tq, Tp * Ts);
1135
 
               }
1136
 
               {
1137
 
                    E Tw, Ty, Tv, Tx;
1138
 
                    Tw = cr[WS(rs, 28)];
1139
 
                    Ty = ci[WS(rs, 28)];
1140
 
                    Tv = W[54];
1141
 
                    Tx = W[55];
1142
 
                    Tz = FMA(Tv, Tw, Tx * Ty);
1143
 
                    T3c = FNMS(Tx, Tw, Tv * Ty);
1144
 
               }
1145
 
               {
1146
 
                    E Tu, TF, T5G, T5H;
1147
 
                    Tu = To + Tt;
1148
 
                    TF = Tz + TE;
1149
 
                    TG = Tu + TF;
1150
 
                    T7l = Tu - TF;
1151
 
                    T5G = T3c + T3d;
1152
 
                    T5H = T37 + T38;
1153
 
                    T5I = T5G - T5H;
1154
 
                    T73 = T5H + T5G;
1155
 
               }
1156
 
               {
1157
 
                    E T36, T39, T3b, T3e;
1158
 
                    T36 = To - Tt;
1159
 
                    T39 = T37 - T38;
1160
 
                    T3a = T36 + T39;
1161
 
                    T4U = T36 - T39;
1162
 
                    T3b = Tz - TE;
1163
 
                    T3e = T3c - T3d;
1164
 
                    T3f = T3b - T3e;
1165
 
                    T4V = T3b + T3e;
1166
 
               }
1167
 
          }
1168
 
          {
1169
 
               E TM, T3n, T12, T3k, TR, T3o, TX, T3j;
1170
 
               {
1171
 
                    E TJ, TL, TI, TK;
1172
 
                    TJ = cr[WS(rs, 2)];
1173
 
                    TL = ci[WS(rs, 2)];
1174
 
                    TI = W[2];
1175
 
                    TK = W[3];
1176
 
                    TM = FMA(TI, TJ, TK * TL);
1177
 
                    T3n = FNMS(TK, TJ, TI * TL);
1178
 
               }
1179
 
               {
1180
 
                    E TZ, T11, TY, T10;
1181
 
                    TZ = cr[WS(rs, 26)];
1182
 
                    T11 = ci[WS(rs, 26)];
1183
 
                    TY = W[50];
1184
 
                    T10 = W[51];
1185
 
                    T12 = FMA(TY, TZ, T10 * T11);
1186
 
                    T3k = FNMS(T10, TZ, TY * T11);
1187
 
               }
1188
 
               {
1189
 
                    E TO, TQ, TN, TP;
1190
 
                    TO = cr[WS(rs, 18)];
1191
 
                    TQ = ci[WS(rs, 18)];
1192
 
                    TN = W[34];
1193
 
                    TP = W[35];
1194
 
                    TR = FMA(TN, TO, TP * TQ);
1195
 
                    T3o = FNMS(TP, TO, TN * TQ);
1196
 
               }
1197
 
               {
1198
 
                    E TU, TW, TT, TV;
1199
 
                    TU = cr[WS(rs, 10)];
1200
 
                    TW = ci[WS(rs, 10)];
1201
 
                    TT = W[18];
1202
 
                    TV = W[19];
1203
 
                    TX = FMA(TT, TU, TV * TW);
1204
 
                    T3j = FNMS(TV, TU, TT * TW);
1205
 
               }
1206
 
               {
1207
 
                    E TS, T13, T5L, T5M;
1208
 
                    TS = TM + TR;
1209
 
                    T13 = TX + T12;
1210
 
                    T14 = TS + T13;
1211
 
                    T5K = TS - T13;
1212
 
                    T5L = T3n + T3o;
1213
 
                    T5M = T3j + T3k;
1214
 
                    T5N = T5L - T5M;
1215
 
                    T6F = T5L + T5M;
1216
 
               }
1217
 
               {
1218
 
                    E T3i, T3l, T3p, T3q;
1219
 
                    T3i = TM - TR;
1220
 
                    T3l = T3j - T3k;
1221
 
                    T3m = T3i + T3l;
1222
 
                    T4Z = T3i - T3l;
1223
 
                    T3p = T3n - T3o;
1224
 
                    T3q = TX - T12;
1225
 
                    T3r = T3p - T3q;
1226
 
                    T4Y = T3p + T3q;
1227
 
               }
1228
 
          }
1229
 
          {
1230
 
               E T19, T3t, T1p, T3A, T1e, T3u, T1k, T3z;
1231
 
               {
1232
 
                    E T16, T18, T15, T17;
1233
 
                    T16 = cr[WS(rs, 30)];
1234
 
                    T18 = ci[WS(rs, 30)];
1235
 
                    T15 = W[58];
1236
 
                    T17 = W[59];
1237
 
                    T19 = FMA(T15, T16, T17 * T18);
1238
 
                    T3t = FNMS(T17, T16, T15 * T18);
1239
 
               }
1240
 
               {
1241
 
                    E T1m, T1o, T1l, T1n;
1242
 
                    T1m = cr[WS(rs, 22)];
1243
 
                    T1o = ci[WS(rs, 22)];
1244
 
                    T1l = W[42];
1245
 
                    T1n = W[43];
1246
 
                    T1p = FMA(T1l, T1m, T1n * T1o);
1247
 
                    T3A = FNMS(T1n, T1m, T1l * T1o);
1248
 
               }
1249
 
               {
1250
 
                    E T1b, T1d, T1a, T1c;
1251
 
                    T1b = cr[WS(rs, 14)];
1252
 
                    T1d = ci[WS(rs, 14)];
1253
 
                    T1a = W[26];
1254
 
                    T1c = W[27];
1255
 
                    T1e = FMA(T1a, T1b, T1c * T1d);
1256
 
                    T3u = FNMS(T1c, T1b, T1a * T1d);
1257
 
               }
1258
 
               {
1259
 
                    E T1h, T1j, T1g, T1i;
1260
 
                    T1h = cr[WS(rs, 6)];
1261
 
                    T1j = ci[WS(rs, 6)];
1262
 
                    T1g = W[10];
1263
 
                    T1i = W[11];
1264
 
                    T1k = FMA(T1g, T1h, T1i * T1j);
1265
 
                    T3z = FNMS(T1i, T1h, T1g * T1j);
1266
 
               }
1267
 
               {
1268
 
                    E T1f, T1q, T5Q, T5R;
1269
 
                    T1f = T19 + T1e;
1270
 
                    T1q = T1k + T1p;
1271
 
                    T1r = T1f + T1q;
1272
 
                    T5P = T1f - T1q;
1273
 
                    T5Q = T3t + T3u;
1274
 
                    T5R = T3z + T3A;
1275
 
                    T5S = T5Q - T5R;
1276
 
                    T6E = T5Q + T5R;
1277
 
               }
1278
 
               {
1279
 
                    E T3v, T3w, T3y, T3B;
1280
 
                    T3v = T3t - T3u;
1281
 
                    T3w = T1k - T1p;
1282
 
                    T3x = T3v - T3w;
1283
 
                    T52 = T3v + T3w;
1284
 
                    T3y = T19 - T1e;
1285
 
                    T3B = T3z - T3A;
1286
 
                    T3C = T3y + T3B;
1287
 
                    T51 = T3y - T3B;
1288
 
               }
1289
 
          }
1290
 
          {
1291
 
               E T1V, T3M, T20, T3N, T3L, T3O, T26, T3Q, T2b, T3R, T3S, T3T;
1292
 
               {
1293
 
                    E T1S, T1U, T1R, T1T;
1294
 
                    T1S = cr[WS(rs, 5)];
1295
 
                    T1U = ci[WS(rs, 5)];
1296
 
                    T1R = W[8];
1297
 
                    T1T = W[9];
1298
 
                    T1V = FMA(T1R, T1S, T1T * T1U);
1299
 
                    T3M = FNMS(T1T, T1S, T1R * T1U);
1300
 
               }
1301
 
               {
1302
 
                    E T1X, T1Z, T1W, T1Y;
1303
 
                    T1X = cr[WS(rs, 21)];
1304
 
                    T1Z = ci[WS(rs, 21)];
1305
 
                    T1W = W[40];
1306
 
                    T1Y = W[41];
1307
 
                    T20 = FMA(T1W, T1X, T1Y * T1Z);
1308
 
                    T3N = FNMS(T1Y, T1X, T1W * T1Z);
1309
 
               }
1310
 
               T3L = T1V - T20;
1311
 
               T3O = T3M - T3N;
1312
 
               {
1313
 
                    E T23, T25, T22, T24;
1314
 
                    T23 = cr[WS(rs, 29)];
1315
 
                    T25 = ci[WS(rs, 29)];
1316
 
                    T22 = W[56];
1317
 
                    T24 = W[57];
1318
 
                    T26 = FMA(T22, T23, T24 * T25);
1319
 
                    T3Q = FNMS(T24, T23, T22 * T25);
1320
 
               }
1321
 
               {
1322
 
                    E T28, T2a, T27, T29;
1323
 
                    T28 = cr[WS(rs, 13)];
1324
 
                    T2a = ci[WS(rs, 13)];
1325
 
                    T27 = W[24];
1326
 
                    T29 = W[25];
1327
 
                    T2b = FMA(T27, T28, T29 * T2a);
1328
 
                    T3R = FNMS(T29, T28, T27 * T2a);
1329
 
               }
1330
 
               T3S = T3Q - T3R;
1331
 
               T3T = T26 - T2b;
1332
 
               {
1333
 
                    E T21, T2c, T62, T63;
1334
 
                    T21 = T1V + T20;
1335
 
                    T2c = T26 + T2b;
1336
 
                    T2d = T21 + T2c;
1337
 
                    T5Z = T21 - T2c;
1338
 
                    T62 = T3Q + T3R;
1339
 
                    T63 = T3M + T3N;
1340
 
                    T64 = T62 - T63;
1341
 
                    T6K = T63 + T62;
1342
 
               }
1343
 
               {
1344
 
                    E T3P, T3U, T42, T43;
1345
 
                    T3P = T3L + T3O;
1346
 
                    T3U = T3S - T3T;
1347
 
                    T3V = KP707106781 * (T3P - T3U);
1348
 
                    T5a = KP707106781 * (T3P + T3U);
1349
 
                    T42 = T3T + T3S;
1350
 
                    T43 = T3L - T3O;
1351
 
                    T44 = KP707106781 * (T42 - T43);
1352
 
                    T57 = KP707106781 * (T43 + T42);
1353
 
               }
1354
 
          }
1355
 
          {
1356
 
               E T2G, T4i, T2L, T4j, T4h, T4k, T2R, T4d, T2W, T4e, T4c, T4f;
1357
 
               {
1358
 
                    E T2D, T2F, T2C, T2E;
1359
 
                    T2D = cr[WS(rs, 3)];
1360
 
                    T2F = ci[WS(rs, 3)];
1361
 
                    T2C = W[4];
1362
 
                    T2E = W[5];
1363
 
                    T2G = FMA(T2C, T2D, T2E * T2F);
1364
 
                    T4i = FNMS(T2E, T2D, T2C * T2F);
1365
 
               }
1366
 
               {
1367
 
                    E T2I, T2K, T2H, T2J;
1368
 
                    T2I = cr[WS(rs, 19)];
1369
 
                    T2K = ci[WS(rs, 19)];
1370
 
                    T2H = W[36];
1371
 
                    T2J = W[37];
1372
 
                    T2L = FMA(T2H, T2I, T2J * T2K);
1373
 
                    T4j = FNMS(T2J, T2I, T2H * T2K);
1374
 
               }
1375
 
               T4h = T2G - T2L;
1376
 
               T4k = T4i - T4j;
1377
 
               {
1378
 
                    E T2O, T2Q, T2N, T2P;
1379
 
                    T2O = cr[WS(rs, 27)];
1380
 
                    T2Q = ci[WS(rs, 27)];
1381
 
                    T2N = W[52];
1382
 
                    T2P = W[53];
1383
 
                    T2R = FMA(T2N, T2O, T2P * T2Q);
1384
 
                    T4d = FNMS(T2P, T2O, T2N * T2Q);
1385
 
               }
1386
 
               {
1387
 
                    E T2T, T2V, T2S, T2U;
1388
 
                    T2T = cr[WS(rs, 11)];
1389
 
                    T2V = ci[WS(rs, 11)];
1390
 
                    T2S = W[20];
1391
 
                    T2U = W[21];
1392
 
                    T2W = FMA(T2S, T2T, T2U * T2V);
1393
 
                    T4e = FNMS(T2U, T2T, T2S * T2V);
1394
 
               }
1395
 
               T4c = T2R - T2W;
1396
 
               T4f = T4d - T4e;
1397
 
               {
1398
 
                    E T2M, T2X, T68, T69;
1399
 
                    T2M = T2G + T2L;
1400
 
                    T2X = T2R + T2W;
1401
 
                    T2Y = T2M + T2X;
1402
 
                    T6f = T2M - T2X;
1403
 
                    T68 = T4d + T4e;
1404
 
                    T69 = T4i + T4j;
1405
 
                    T6a = T68 - T69;
1406
 
                    T6P = T69 + T68;
1407
 
               }
1408
 
               {
1409
 
                    E T4g, T4l, T4t, T4u;
1410
 
                    T4g = T4c + T4f;
1411
 
                    T4l = T4h - T4k;
1412
 
                    T4m = KP707106781 * (T4g - T4l);
1413
 
                    T5e = KP707106781 * (T4l + T4g);
1414
 
                    T4t = T4h + T4k;
1415
 
                    T4u = T4f - T4c;
1416
 
                    T4v = KP707106781 * (T4t - T4u);
1417
 
                    T5h = KP707106781 * (T4t + T4u);
1418
 
               }
1419
 
          }
1420
 
          {
1421
 
               E T1t, T6X, T7a, T7c, T30, T7b, T70, T71;
1422
 
               {
1423
 
                    E TH, T1s, T72, T79;
1424
 
                    TH = Tj + TG;
1425
 
                    T1s = T14 + T1r;
1426
 
                    T1t = TH + T1s;
1427
 
                    T6X = TH - T1s;
1428
 
                    T72 = T6F + T6E;
1429
 
                    T79 = T73 + T78;
1430
 
                    T7a = T72 + T79;
1431
 
                    T7c = T79 - T72;
1432
 
               }
1433
 
               {
1434
 
                    E T2e, T2Z, T6Y, T6Z;
1435
 
                    T2e = T1Q + T2d;
1436
 
                    T2Z = T2B + T2Y;
1437
 
                    T30 = T2e + T2Z;
1438
 
                    T7b = T2Z - T2e;
1439
 
                    T6Y = T6O + T6P;
1440
 
                    T6Z = T6J + T6K;
1441
 
                    T70 = T6Y - T6Z;
1442
 
                    T71 = T6Z + T6Y;
1443
 
               }
1444
 
               ci[WS(rs, 15)] = T1t - T30;
1445
 
               cr[WS(rs, 24)] = T7b - T7c;
1446
 
               ci[WS(rs, 23)] = T7b + T7c;
1447
 
               cr[0] = T1t + T30;
1448
 
               cr[WS(rs, 8)] = T6X - T70;
1449
 
               cr[WS(rs, 16)] = T71 - T7a;
1450
 
               ci[WS(rs, 31)] = T71 + T7a;
1451
 
               ci[WS(rs, 7)] = T6X + T70;
1452
 
          }
1453
 
          {
1454
 
               E T4X, T5p, T7D, T7J, T54, T7y, T5z, T5D, T5c, T5m, T5s, T7I, T5w, T5C, T5j;
1455
 
               E T5n, T4W, T7z;
1456
 
               T4W = KP707106781 * (T4U + T4V);
1457
 
               T4X = T4T - T4W;
1458
 
               T5p = T4T + T4W;
1459
 
               T7z = KP707106781 * (T3a - T3f);
1460
 
               T7D = T7z + T7C;
1461
 
               T7J = T7C - T7z;
1462
 
               {
1463
 
                    E T50, T53, T5x, T5y;
1464
 
                    T50 = FMA(KP923879532, T4Y, KP382683432 * T4Z);
1465
 
                    T53 = FNMS(KP923879532, T52, KP382683432 * T51);
1466
 
                    T54 = T50 + T53;
1467
 
                    T7y = T50 - T53;
1468
 
                    T5x = T5d + T5e;
1469
 
                    T5y = T5g + T5h;
1470
 
                    T5z = FNMS(KP980785280, T5y, KP195090322 * T5x);
1471
 
                    T5D = FMA(KP980785280, T5x, KP195090322 * T5y);
1472
 
               }
1473
 
               {
1474
 
                    E T58, T5b, T5q, T5r;
1475
 
                    T58 = T56 - T57;
1476
 
                    T5b = T59 - T5a;
1477
 
                    T5c = FMA(KP831469612, T58, KP555570233 * T5b);
1478
 
                    T5m = FNMS(KP831469612, T5b, KP555570233 * T58);
1479
 
                    T5q = FNMS(KP382683432, T4Y, KP923879532 * T4Z);
1480
 
                    T5r = FMA(KP382683432, T52, KP923879532 * T51);
1481
 
                    T5s = T5q + T5r;
1482
 
                    T7I = T5r - T5q;
1483
 
               }
1484
 
               {
1485
 
                    E T5u, T5v, T5f, T5i;
1486
 
                    T5u = T56 + T57;
1487
 
                    T5v = T59 + T5a;
1488
 
                    T5w = FMA(KP195090322, T5u, KP980785280 * T5v);
1489
 
                    T5C = FNMS(KP195090322, T5v, KP980785280 * T5u);
1490
 
                    T5f = T5d - T5e;
1491
 
                    T5i = T5g - T5h;
1492
 
                    T5j = FNMS(KP555570233, T5i, KP831469612 * T5f);
1493
 
                    T5n = FMA(KP555570233, T5f, KP831469612 * T5i);
1494
 
               }
1495
 
               {
1496
 
                    E T55, T5k, T7H, T7K;
1497
 
                    T55 = T4X + T54;
1498
 
                    T5k = T5c + T5j;
1499
 
                    ci[WS(rs, 12)] = T55 - T5k;
1500
 
                    cr[WS(rs, 3)] = T55 + T5k;
1501
 
                    T7H = T5n - T5m;
1502
 
                    T7K = T7I + T7J;
1503
 
                    cr[WS(rs, 19)] = T7H - T7K;
1504
 
                    ci[WS(rs, 28)] = T7H + T7K;
1505
 
               }
1506
 
               {
1507
 
                    E T7L, T7M, T5l, T5o;
1508
 
                    T7L = T5j - T5c;
1509
 
                    T7M = T7J - T7I;
1510
 
                    cr[WS(rs, 27)] = T7L - T7M;
1511
 
                    ci[WS(rs, 20)] = T7L + T7M;
1512
 
                    T5l = T4X - T54;
1513
 
                    T5o = T5m + T5n;
1514
 
                    cr[WS(rs, 11)] = T5l - T5o;
1515
 
                    ci[WS(rs, 4)] = T5l + T5o;
1516
 
               }
1517
 
               {
1518
 
                    E T5t, T5A, T7x, T7E;
1519
 
                    T5t = T5p - T5s;
1520
 
                    T5A = T5w + T5z;
1521
 
                    ci[WS(rs, 8)] = T5t - T5A;
1522
 
                    cr[WS(rs, 7)] = T5t + T5A;
1523
 
                    T7x = T5z - T5w;
1524
 
                    T7E = T7y + T7D;
1525
 
                    cr[WS(rs, 31)] = T7x - T7E;
1526
 
                    ci[WS(rs, 16)] = T7x + T7E;
1527
 
               }
1528
 
               {
1529
 
                    E T7F, T7G, T5B, T5E;
1530
 
                    T7F = T5D - T5C;
1531
 
                    T7G = T7D - T7y;
1532
 
                    cr[WS(rs, 23)] = T7F - T7G;
1533
 
                    ci[WS(rs, 24)] = T7F + T7G;
1534
 
                    T5B = T5p + T5s;
1535
 
                    T5E = T5C + T5D;
1536
 
                    cr[WS(rs, 15)] = T5B - T5E;
1537
 
                    ci[0] = T5B + T5E;
1538
 
               }
1539
 
          }
1540
 
          {
1541
 
               E T6H, T6T, T7g, T7i, T6M, T6U, T6R, T6V;
1542
 
               {
1543
 
                    E T6D, T6G, T7e, T7f;
1544
 
                    T6D = Tj - TG;
1545
 
                    T6G = T6E - T6F;
1546
 
                    T6H = T6D - T6G;
1547
 
                    T6T = T6D + T6G;
1548
 
                    T7e = T14 - T1r;
1549
 
                    T7f = T78 - T73;
1550
 
                    T7g = T7e + T7f;
1551
 
                    T7i = T7f - T7e;
1552
 
               }
1553
 
               {
1554
 
                    E T6I, T6L, T6N, T6Q;
1555
 
                    T6I = T1Q - T2d;
1556
 
                    T6L = T6J - T6K;
1557
 
                    T6M = T6I + T6L;
1558
 
                    T6U = T6I - T6L;
1559
 
                    T6N = T2B - T2Y;
1560
 
                    T6Q = T6O - T6P;
1561
 
                    T6R = T6N - T6Q;
1562
 
                    T6V = T6N + T6Q;
1563
 
               }
1564
 
               {
1565
 
                    E T6S, T7h, T6W, T7d;
1566
 
                    T6S = KP707106781 * (T6M + T6R);
1567
 
                    ci[WS(rs, 11)] = T6H - T6S;
1568
 
                    cr[WS(rs, 4)] = T6H + T6S;
1569
 
                    T7h = KP707106781 * (T6V - T6U);
1570
 
                    cr[WS(rs, 20)] = T7h - T7i;
1571
 
                    ci[WS(rs, 27)] = T7h + T7i;
1572
 
                    T6W = KP707106781 * (T6U + T6V);
1573
 
                    cr[WS(rs, 12)] = T6T - T6W;
1574
 
                    ci[WS(rs, 3)] = T6T + T6W;
1575
 
                    T7d = KP707106781 * (T6R - T6M);
1576
 
                    cr[WS(rs, 28)] = T7d - T7g;
1577
 
                    ci[WS(rs, 19)] = T7d + T7g;
1578
 
               }
1579
 
          }
1580
 
          {
1581
 
               E T5J, T7n, T7t, T6n, T5U, T7k, T6x, T6B, T6q, T7s, T66, T6k, T6u, T6A, T6h;
1582
 
               E T6l;
1583
 
               {
1584
 
                    E T5O, T5T, T60, T65;
1585
 
                    T5J = T5F - T5I;
1586
 
                    T7n = T7l + T7m;
1587
 
                    T7t = T7m - T7l;
1588
 
                    T6n = T5F + T5I;
1589
 
                    T5O = T5K + T5N;
1590
 
                    T5T = T5P - T5S;
1591
 
                    T5U = KP707106781 * (T5O + T5T);
1592
 
                    T7k = KP707106781 * (T5O - T5T);
1593
 
                    {
1594
 
                         E T6v, T6w, T6o, T6p;
1595
 
                         T6v = T6e + T6f;
1596
 
                         T6w = T67 + T6a;
1597
 
                         T6x = FMA(KP382683432, T6v, KP923879532 * T6w);
1598
 
                         T6B = FNMS(KP923879532, T6v, KP382683432 * T6w);
1599
 
                         T6o = T5K - T5N;
1600
 
                         T6p = T5P + T5S;
1601
 
                         T6q = KP707106781 * (T6o + T6p);
1602
 
                         T7s = KP707106781 * (T6p - T6o);
1603
 
                    }
1604
 
                    T60 = T5Y - T5Z;
1605
 
                    T65 = T61 - T64;
1606
 
                    T66 = FMA(KP382683432, T60, KP923879532 * T65);
1607
 
                    T6k = FNMS(KP923879532, T60, KP382683432 * T65);
1608
 
                    {
1609
 
                         E T6s, T6t, T6b, T6g;
1610
 
                         T6s = T61 + T64;
1611
 
                         T6t = T5Y + T5Z;
1612
 
                         T6u = FNMS(KP382683432, T6t, KP923879532 * T6s);
1613
 
                         T6A = FMA(KP923879532, T6t, KP382683432 * T6s);
1614
 
                         T6b = T67 - T6a;
1615
 
                         T6g = T6e - T6f;
1616
 
                         T6h = FNMS(KP382683432, T6g, KP923879532 * T6b);
1617
 
                         T6l = FMA(KP923879532, T6g, KP382683432 * T6b);
1618
 
                    }
1619
 
               }
1620
 
               {
1621
 
                    E T5V, T6i, T7r, T7u;
1622
 
                    T5V = T5J + T5U;
1623
 
                    T6i = T66 + T6h;
1624
 
                    ci[WS(rs, 13)] = T5V - T6i;
1625
 
                    cr[WS(rs, 2)] = T5V + T6i;
1626
 
                    T7r = T6l - T6k;
1627
 
                    T7u = T7s + T7t;
1628
 
                    cr[WS(rs, 18)] = T7r - T7u;
1629
 
                    ci[WS(rs, 29)] = T7r + T7u;
1630
 
               }
1631
 
               {
1632
 
                    E T7v, T7w, T6j, T6m;
1633
 
                    T7v = T6h - T66;
1634
 
                    T7w = T7t - T7s;
1635
 
                    cr[WS(rs, 26)] = T7v - T7w;
1636
 
                    ci[WS(rs, 21)] = T7v + T7w;
1637
 
                    T6j = T5J - T5U;
1638
 
                    T6m = T6k + T6l;
1639
 
                    cr[WS(rs, 10)] = T6j - T6m;
1640
 
                    ci[WS(rs, 5)] = T6j + T6m;
1641
 
               }
1642
 
               {
1643
 
                    E T6r, T6y, T7j, T7o;
1644
 
                    T6r = T6n + T6q;
1645
 
                    T6y = T6u + T6x;
1646
 
                    cr[WS(rs, 14)] = T6r - T6y;
1647
 
                    ci[WS(rs, 1)] = T6r + T6y;
1648
 
                    T7j = T6B - T6A;
1649
 
                    T7o = T7k + T7n;
1650
 
                    cr[WS(rs, 30)] = T7j - T7o;
1651
 
                    ci[WS(rs, 17)] = T7j + T7o;
1652
 
               }
1653
 
               {
1654
 
                    E T7p, T7q, T6z, T6C;
1655
 
                    T7p = T6x - T6u;
1656
 
                    T7q = T7n - T7k;
1657
 
                    cr[WS(rs, 22)] = T7p - T7q;
1658
 
                    ci[WS(rs, 25)] = T7p + T7q;
1659
 
                    T6z = T6n - T6q;
1660
 
                    T6C = T6A + T6B;
1661
 
                    ci[WS(rs, 9)] = T6z - T6C;
1662
 
                    cr[WS(rs, 6)] = T6z + T6C;
1663
 
               }
1664
 
          }
1665
 
          {
1666
 
               E T3h, T4D, T7R, T7X, T3E, T7O, T4N, T4R, T46, T4A, T4G, T7W, T4K, T4Q, T4x;
1667
 
               E T4B, T3g, T7P;
1668
 
               T3g = KP707106781 * (T3a + T3f);
1669
 
               T3h = T35 - T3g;
1670
 
               T4D = T35 + T3g;
1671
 
               T7P = KP707106781 * (T4V - T4U);
1672
 
               T7R = T7P + T7Q;
1673
 
               T7X = T7Q - T7P;
1674
 
               {
1675
 
                    E T3s, T3D, T4L, T4M;
1676
 
                    T3s = FNMS(KP923879532, T3r, KP382683432 * T3m);
1677
 
                    T3D = FMA(KP923879532, T3x, KP382683432 * T3C);
1678
 
                    T3E = T3s + T3D;
1679
 
                    T7O = T3D - T3s;
1680
 
                    T4L = T4s + T4v;
1681
 
                    T4M = T4b + T4m;
1682
 
                    T4N = FNMS(KP195090322, T4M, KP980785280 * T4L);
1683
 
                    T4R = FMA(KP980785280, T4M, KP195090322 * T4L);
1684
 
               }
1685
 
               {
1686
 
                    E T3W, T45, T4E, T4F;
1687
 
                    T3W = T3K - T3V;
1688
 
                    T45 = T41 - T44;
1689
 
                    T46 = FNMS(KP555570233, T45, KP831469612 * T3W);
1690
 
                    T4A = FMA(KP831469612, T45, KP555570233 * T3W);
1691
 
                    T4E = FMA(KP382683432, T3r, KP923879532 * T3m);
1692
 
                    T4F = FNMS(KP382683432, T3x, KP923879532 * T3C);
1693
 
                    T4G = T4E + T4F;
1694
 
                    T7W = T4E - T4F;
1695
 
               }
1696
 
               {
1697
 
                    E T4I, T4J, T4n, T4w;
1698
 
                    T4I = T41 + T44;
1699
 
                    T4J = T3K + T3V;
1700
 
                    T4K = FMA(KP195090322, T4I, KP980785280 * T4J);
1701
 
                    T4Q = FNMS(KP980785280, T4I, KP195090322 * T4J);
1702
 
                    T4n = T4b - T4m;
1703
 
                    T4w = T4s - T4v;
1704
 
                    T4x = FMA(KP555570233, T4n, KP831469612 * T4w);
1705
 
                    T4B = FNMS(KP831469612, T4n, KP555570233 * T4w);
1706
 
               }
1707
 
               {
1708
 
                    E T3F, T4y, T7V, T7Y;
1709
 
                    T3F = T3h + T3E;
1710
 
                    T4y = T46 + T4x;
1711
 
                    cr[WS(rs, 13)] = T3F - T4y;
1712
 
                    ci[WS(rs, 2)] = T3F + T4y;
1713
 
                    T7V = T4B - T4A;
1714
 
                    T7Y = T7W + T7X;
1715
 
                    cr[WS(rs, 29)] = T7V - T7Y;
1716
 
                    ci[WS(rs, 18)] = T7V + T7Y;
1717
 
               }
1718
 
               {
1719
 
                    E T7Z, T80, T4z, T4C;
1720
 
                    T7Z = T4x - T46;
1721
 
                    T80 = T7X - T7W;
1722
 
                    cr[WS(rs, 21)] = T7Z - T80;
1723
 
                    ci[WS(rs, 26)] = T7Z + T80;
1724
 
                    T4z = T3h - T3E;
1725
 
                    T4C = T4A + T4B;
1726
 
                    ci[WS(rs, 10)] = T4z - T4C;
1727
 
                    cr[WS(rs, 5)] = T4z + T4C;
1728
 
               }
1729
 
               {
1730
 
                    E T4H, T4O, T7N, T7S;
1731
 
                    T4H = T4D + T4G;
1732
 
                    T4O = T4K + T4N;
1733
 
                    ci[WS(rs, 14)] = T4H - T4O;
1734
 
                    cr[WS(rs, 1)] = T4H + T4O;
1735
 
                    T7N = T4R - T4Q;
1736
 
                    T7S = T7O + T7R;
1737
 
                    cr[WS(rs, 17)] = T7N - T7S;
1738
 
                    ci[WS(rs, 30)] = T7N + T7S;
1739
 
               }
1740
 
               {
1741
 
                    E T7T, T7U, T4P, T4S;
1742
 
                    T7T = T4N - T4K;
1743
 
                    T7U = T7R - T7O;
1744
 
                    cr[WS(rs, 25)] = T7T - T7U;
1745
 
                    ci[WS(rs, 22)] = T7T + T7U;
1746
 
                    T4P = T4D - T4G;
1747
 
                    T4S = T4Q + T4R;
1748
 
                    cr[WS(rs, 9)] = T4P - T4S;
1749
 
                    ci[WS(rs, 6)] = T4P + T4S;
 
926
     {
 
927
          INT m;
 
928
          for (m = mb, W = W + ((mb - 1) * 62); m < me; m = m + 1, cr = cr + ms, ci = ci - ms, W = W + 62, MAKE_VOLATILE_STRIDE(rs)) {
 
929
               E Tj, T5F, T7C, T7Q, T35, T4T, T78, T7m, T1Q, T61, T5Y, T6J, T3K, T56, T41;
 
930
               E T59, T2B, T67, T6e, T6O, T4b, T5g, T4s, T5d, TG, T7l, T5I, T73, T3a, T4U;
 
931
               E T3f, T4V, T14, T5K, T5N, T6F, T3m, T4Z, T3r, T4Y, T1r, T5P, T5S, T6E, T3x;
 
932
               E T52, T3C, T51, T2d, T5Z, T64, T6K, T3V, T5a, T44, T57, T2Y, T6f, T6a, T6P;
 
933
               E T4m, T5e, T4v, T5h;
 
934
               {
 
935
                    E T1, T76, T6, T75, Tc, T32, Th, T33;
 
936
                    T1 = cr[0];
 
937
                    T76 = ci[0];
 
938
                    {
 
939
                         E T3, T5, T2, T4;
 
940
                         T3 = cr[WS(rs, 16)];
 
941
                         T5 = ci[WS(rs, 16)];
 
942
                         T2 = W[30];
 
943
                         T4 = W[31];
 
944
                         T6 = FMA(T2, T3, T4 * T5);
 
945
                         T75 = FNMS(T4, T3, T2 * T5);
 
946
                    }
 
947
                    {
 
948
                         E T9, Tb, T8, Ta;
 
949
                         T9 = cr[WS(rs, 8)];
 
950
                         Tb = ci[WS(rs, 8)];
 
951
                         T8 = W[14];
 
952
                         Ta = W[15];
 
953
                         Tc = FMA(T8, T9, Ta * Tb);
 
954
                         T32 = FNMS(Ta, T9, T8 * Tb);
 
955
                    }
 
956
                    {
 
957
                         E Te, Tg, Td, Tf;
 
958
                         Te = cr[WS(rs, 24)];
 
959
                         Tg = ci[WS(rs, 24)];
 
960
                         Td = W[46];
 
961
                         Tf = W[47];
 
962
                         Th = FMA(Td, Te, Tf * Tg);
 
963
                         T33 = FNMS(Tf, Te, Td * Tg);
 
964
                    }
 
965
                    {
 
966
                         E T7, Ti, T7A, T7B;
 
967
                         T7 = T1 + T6;
 
968
                         Ti = Tc + Th;
 
969
                         Tj = T7 + Ti;
 
970
                         T5F = T7 - Ti;
 
971
                         T7A = Tc - Th;
 
972
                         T7B = T76 - T75;
 
973
                         T7C = T7A + T7B;
 
974
                         T7Q = T7B - T7A;
 
975
                    }
 
976
                    {
 
977
                         E T31, T34, T74, T77;
 
978
                         T31 = T1 - T6;
 
979
                         T34 = T32 - T33;
 
980
                         T35 = T31 + T34;
 
981
                         T4T = T31 - T34;
 
982
                         T74 = T32 + T33;
 
983
                         T77 = T75 + T76;
 
984
                         T78 = T74 + T77;
 
985
                         T7m = T77 - T74;
 
986
                    }
 
987
               }
 
988
               {
 
989
                    E T1y, T3X, T1O, T3I, T1D, T3Y, T1J, T3H;
 
990
                    {
 
991
                         E T1v, T1x, T1u, T1w;
 
992
                         T1v = cr[WS(rs, 1)];
 
993
                         T1x = ci[WS(rs, 1)];
 
994
                         T1u = W[0];
 
995
                         T1w = W[1];
 
996
                         T1y = FMA(T1u, T1v, T1w * T1x);
 
997
                         T3X = FNMS(T1w, T1v, T1u * T1x);
 
998
                    }
 
999
                    {
 
1000
                         E T1L, T1N, T1K, T1M;
 
1001
                         T1L = cr[WS(rs, 25)];
 
1002
                         T1N = ci[WS(rs, 25)];
 
1003
                         T1K = W[48];
 
1004
                         T1M = W[49];
 
1005
                         T1O = FMA(T1K, T1L, T1M * T1N);
 
1006
                         T3I = FNMS(T1M, T1L, T1K * T1N);
 
1007
                    }
 
1008
                    {
 
1009
                         E T1A, T1C, T1z, T1B;
 
1010
                         T1A = cr[WS(rs, 17)];
 
1011
                         T1C = ci[WS(rs, 17)];
 
1012
                         T1z = W[32];
 
1013
                         T1B = W[33];
 
1014
                         T1D = FMA(T1z, T1A, T1B * T1C);
 
1015
                         T3Y = FNMS(T1B, T1A, T1z * T1C);
 
1016
                    }
 
1017
                    {
 
1018
                         E T1G, T1I, T1F, T1H;
 
1019
                         T1G = cr[WS(rs, 9)];
 
1020
                         T1I = ci[WS(rs, 9)];
 
1021
                         T1F = W[16];
 
1022
                         T1H = W[17];
 
1023
                         T1J = FMA(T1F, T1G, T1H * T1I);
 
1024
                         T3H = FNMS(T1H, T1G, T1F * T1I);
 
1025
                    }
 
1026
                    {
 
1027
                         E T1E, T1P, T5W, T5X;
 
1028
                         T1E = T1y + T1D;
 
1029
                         T1P = T1J + T1O;
 
1030
                         T1Q = T1E + T1P;
 
1031
                         T61 = T1E - T1P;
 
1032
                         T5W = T3X + T3Y;
 
1033
                         T5X = T3H + T3I;
 
1034
                         T5Y = T5W - T5X;
 
1035
                         T6J = T5W + T5X;
 
1036
                    }
 
1037
                    {
 
1038
                         E T3G, T3J, T3Z, T40;
 
1039
                         T3G = T1y - T1D;
 
1040
                         T3J = T3H - T3I;
 
1041
                         T3K = T3G + T3J;
 
1042
                         T56 = T3G - T3J;
 
1043
                         T3Z = T3X - T3Y;
 
1044
                         T40 = T1J - T1O;
 
1045
                         T41 = T3Z - T40;
 
1046
                         T59 = T3Z + T40;
 
1047
                    }
 
1048
               }
 
1049
               {
 
1050
                    E T2j, T47, T2z, T4q, T2o, T48, T2u, T4p;
 
1051
                    {
 
1052
                         E T2g, T2i, T2f, T2h;
 
1053
                         T2g = cr[WS(rs, 31)];
 
1054
                         T2i = ci[WS(rs, 31)];
 
1055
                         T2f = W[60];
 
1056
                         T2h = W[61];
 
1057
                         T2j = FMA(T2f, T2g, T2h * T2i);
 
1058
                         T47 = FNMS(T2h, T2g, T2f * T2i);
 
1059
                    }
 
1060
                    {
 
1061
                         E T2w, T2y, T2v, T2x;
 
1062
                         T2w = cr[WS(rs, 23)];
 
1063
                         T2y = ci[WS(rs, 23)];
 
1064
                         T2v = W[44];
 
1065
                         T2x = W[45];
 
1066
                         T2z = FMA(T2v, T2w, T2x * T2y);
 
1067
                         T4q = FNMS(T2x, T2w, T2v * T2y);
 
1068
                    }
 
1069
                    {
 
1070
                         E T2l, T2n, T2k, T2m;
 
1071
                         T2l = cr[WS(rs, 15)];
 
1072
                         T2n = ci[WS(rs, 15)];
 
1073
                         T2k = W[28];
 
1074
                         T2m = W[29];
 
1075
                         T2o = FMA(T2k, T2l, T2m * T2n);
 
1076
                         T48 = FNMS(T2m, T2l, T2k * T2n);
 
1077
                    }
 
1078
                    {
 
1079
                         E T2r, T2t, T2q, T2s;
 
1080
                         T2r = cr[WS(rs, 7)];
 
1081
                         T2t = ci[WS(rs, 7)];
 
1082
                         T2q = W[12];
 
1083
                         T2s = W[13];
 
1084
                         T2u = FMA(T2q, T2r, T2s * T2t);
 
1085
                         T4p = FNMS(T2s, T2r, T2q * T2t);
 
1086
                    }
 
1087
                    {
 
1088
                         E T2p, T2A, T6c, T6d;
 
1089
                         T2p = T2j + T2o;
 
1090
                         T2A = T2u + T2z;
 
1091
                         T2B = T2p + T2A;
 
1092
                         T67 = T2p - T2A;
 
1093
                         T6c = T47 + T48;
 
1094
                         T6d = T4p + T4q;
 
1095
                         T6e = T6c - T6d;
 
1096
                         T6O = T6c + T6d;
 
1097
                    }
 
1098
                    {
 
1099
                         E T49, T4a, T4o, T4r;
 
1100
                         T49 = T47 - T48;
 
1101
                         T4a = T2u - T2z;
 
1102
                         T4b = T49 - T4a;
 
1103
                         T5g = T49 + T4a;
 
1104
                         T4o = T2j - T2o;
 
1105
                         T4r = T4p - T4q;
 
1106
                         T4s = T4o + T4r;
 
1107
                         T5d = T4o - T4r;
 
1108
                    }
 
1109
               }
 
1110
               {
 
1111
                    E To, T37, TE, T3d, Tt, T38, Tz, T3c;
 
1112
                    {
 
1113
                         E Tl, Tn, Tk, Tm;
 
1114
                         Tl = cr[WS(rs, 4)];
 
1115
                         Tn = ci[WS(rs, 4)];
 
1116
                         Tk = W[6];
 
1117
                         Tm = W[7];
 
1118
                         To = FMA(Tk, Tl, Tm * Tn);
 
1119
                         T37 = FNMS(Tm, Tl, Tk * Tn);
 
1120
                    }
 
1121
                    {
 
1122
                         E TB, TD, TA, TC;
 
1123
                         TB = cr[WS(rs, 12)];
 
1124
                         TD = ci[WS(rs, 12)];
 
1125
                         TA = W[22];
 
1126
                         TC = W[23];
 
1127
                         TE = FMA(TA, TB, TC * TD);
 
1128
                         T3d = FNMS(TC, TB, TA * TD);
 
1129
                    }
 
1130
                    {
 
1131
                         E Tq, Ts, Tp, Tr;
 
1132
                         Tq = cr[WS(rs, 20)];
 
1133
                         Ts = ci[WS(rs, 20)];
 
1134
                         Tp = W[38];
 
1135
                         Tr = W[39];
 
1136
                         Tt = FMA(Tp, Tq, Tr * Ts);
 
1137
                         T38 = FNMS(Tr, Tq, Tp * Ts);
 
1138
                    }
 
1139
                    {
 
1140
                         E Tw, Ty, Tv, Tx;
 
1141
                         Tw = cr[WS(rs, 28)];
 
1142
                         Ty = ci[WS(rs, 28)];
 
1143
                         Tv = W[54];
 
1144
                         Tx = W[55];
 
1145
                         Tz = FMA(Tv, Tw, Tx * Ty);
 
1146
                         T3c = FNMS(Tx, Tw, Tv * Ty);
 
1147
                    }
 
1148
                    {
 
1149
                         E Tu, TF, T5G, T5H;
 
1150
                         Tu = To + Tt;
 
1151
                         TF = Tz + TE;
 
1152
                         TG = Tu + TF;
 
1153
                         T7l = Tu - TF;
 
1154
                         T5G = T3c + T3d;
 
1155
                         T5H = T37 + T38;
 
1156
                         T5I = T5G - T5H;
 
1157
                         T73 = T5H + T5G;
 
1158
                    }
 
1159
                    {
 
1160
                         E T36, T39, T3b, T3e;
 
1161
                         T36 = To - Tt;
 
1162
                         T39 = T37 - T38;
 
1163
                         T3a = T36 + T39;
 
1164
                         T4U = T36 - T39;
 
1165
                         T3b = Tz - TE;
 
1166
                         T3e = T3c - T3d;
 
1167
                         T3f = T3b - T3e;
 
1168
                         T4V = T3b + T3e;
 
1169
                    }
 
1170
               }
 
1171
               {
 
1172
                    E TM, T3n, T12, T3k, TR, T3o, TX, T3j;
 
1173
                    {
 
1174
                         E TJ, TL, TI, TK;
 
1175
                         TJ = cr[WS(rs, 2)];
 
1176
                         TL = ci[WS(rs, 2)];
 
1177
                         TI = W[2];
 
1178
                         TK = W[3];
 
1179
                         TM = FMA(TI, TJ, TK * TL);
 
1180
                         T3n = FNMS(TK, TJ, TI * TL);
 
1181
                    }
 
1182
                    {
 
1183
                         E TZ, T11, TY, T10;
 
1184
                         TZ = cr[WS(rs, 26)];
 
1185
                         T11 = ci[WS(rs, 26)];
 
1186
                         TY = W[50];
 
1187
                         T10 = W[51];
 
1188
                         T12 = FMA(TY, TZ, T10 * T11);
 
1189
                         T3k = FNMS(T10, TZ, TY * T11);
 
1190
                    }
 
1191
                    {
 
1192
                         E TO, TQ, TN, TP;
 
1193
                         TO = cr[WS(rs, 18)];
 
1194
                         TQ = ci[WS(rs, 18)];
 
1195
                         TN = W[34];
 
1196
                         TP = W[35];
 
1197
                         TR = FMA(TN, TO, TP * TQ);
 
1198
                         T3o = FNMS(TP, TO, TN * TQ);
 
1199
                    }
 
1200
                    {
 
1201
                         E TU, TW, TT, TV;
 
1202
                         TU = cr[WS(rs, 10)];
 
1203
                         TW = ci[WS(rs, 10)];
 
1204
                         TT = W[18];
 
1205
                         TV = W[19];
 
1206
                         TX = FMA(TT, TU, TV * TW);
 
1207
                         T3j = FNMS(TV, TU, TT * TW);
 
1208
                    }
 
1209
                    {
 
1210
                         E TS, T13, T5L, T5M;
 
1211
                         TS = TM + TR;
 
1212
                         T13 = TX + T12;
 
1213
                         T14 = TS + T13;
 
1214
                         T5K = TS - T13;
 
1215
                         T5L = T3n + T3o;
 
1216
                         T5M = T3j + T3k;
 
1217
                         T5N = T5L - T5M;
 
1218
                         T6F = T5L + T5M;
 
1219
                    }
 
1220
                    {
 
1221
                         E T3i, T3l, T3p, T3q;
 
1222
                         T3i = TM - TR;
 
1223
                         T3l = T3j - T3k;
 
1224
                         T3m = T3i + T3l;
 
1225
                         T4Z = T3i - T3l;
 
1226
                         T3p = T3n - T3o;
 
1227
                         T3q = TX - T12;
 
1228
                         T3r = T3p - T3q;
 
1229
                         T4Y = T3p + T3q;
 
1230
                    }
 
1231
               }
 
1232
               {
 
1233
                    E T19, T3t, T1p, T3A, T1e, T3u, T1k, T3z;
 
1234
                    {
 
1235
                         E T16, T18, T15, T17;
 
1236
                         T16 = cr[WS(rs, 30)];
 
1237
                         T18 = ci[WS(rs, 30)];
 
1238
                         T15 = W[58];
 
1239
                         T17 = W[59];
 
1240
                         T19 = FMA(T15, T16, T17 * T18);
 
1241
                         T3t = FNMS(T17, T16, T15 * T18);
 
1242
                    }
 
1243
                    {
 
1244
                         E T1m, T1o, T1l, T1n;
 
1245
                         T1m = cr[WS(rs, 22)];
 
1246
                         T1o = ci[WS(rs, 22)];
 
1247
                         T1l = W[42];
 
1248
                         T1n = W[43];
 
1249
                         T1p = FMA(T1l, T1m, T1n * T1o);
 
1250
                         T3A = FNMS(T1n, T1m, T1l * T1o);
 
1251
                    }
 
1252
                    {
 
1253
                         E T1b, T1d, T1a, T1c;
 
1254
                         T1b = cr[WS(rs, 14)];
 
1255
                         T1d = ci[WS(rs, 14)];
 
1256
                         T1a = W[26];
 
1257
                         T1c = W[27];
 
1258
                         T1e = FMA(T1a, T1b, T1c * T1d);
 
1259
                         T3u = FNMS(T1c, T1b, T1a * T1d);
 
1260
                    }
 
1261
                    {
 
1262
                         E T1h, T1j, T1g, T1i;
 
1263
                         T1h = cr[WS(rs, 6)];
 
1264
                         T1j = ci[WS(rs, 6)];
 
1265
                         T1g = W[10];
 
1266
                         T1i = W[11];
 
1267
                         T1k = FMA(T1g, T1h, T1i * T1j);
 
1268
                         T3z = FNMS(T1i, T1h, T1g * T1j);
 
1269
                    }
 
1270
                    {
 
1271
                         E T1f, T1q, T5Q, T5R;
 
1272
                         T1f = T19 + T1e;
 
1273
                         T1q = T1k + T1p;
 
1274
                         T1r = T1f + T1q;
 
1275
                         T5P = T1f - T1q;
 
1276
                         T5Q = T3t + T3u;
 
1277
                         T5R = T3z + T3A;
 
1278
                         T5S = T5Q - T5R;
 
1279
                         T6E = T5Q + T5R;
 
1280
                    }
 
1281
                    {
 
1282
                         E T3v, T3w, T3y, T3B;
 
1283
                         T3v = T3t - T3u;
 
1284
                         T3w = T1k - T1p;
 
1285
                         T3x = T3v - T3w;
 
1286
                         T52 = T3v + T3w;
 
1287
                         T3y = T19 - T1e;
 
1288
                         T3B = T3z - T3A;
 
1289
                         T3C = T3y + T3B;
 
1290
                         T51 = T3y - T3B;
 
1291
                    }
 
1292
               }
 
1293
               {
 
1294
                    E T1V, T3M, T20, T3N, T3L, T3O, T26, T3Q, T2b, T3R, T3S, T3T;
 
1295
                    {
 
1296
                         E T1S, T1U, T1R, T1T;
 
1297
                         T1S = cr[WS(rs, 5)];
 
1298
                         T1U = ci[WS(rs, 5)];
 
1299
                         T1R = W[8];
 
1300
                         T1T = W[9];
 
1301
                         T1V = FMA(T1R, T1S, T1T * T1U);
 
1302
                         T3M = FNMS(T1T, T1S, T1R * T1U);
 
1303
                    }
 
1304
                    {
 
1305
                         E T1X, T1Z, T1W, T1Y;
 
1306
                         T1X = cr[WS(rs, 21)];
 
1307
                         T1Z = ci[WS(rs, 21)];
 
1308
                         T1W = W[40];
 
1309
                         T1Y = W[41];
 
1310
                         T20 = FMA(T1W, T1X, T1Y * T1Z);
 
1311
                         T3N = FNMS(T1Y, T1X, T1W * T1Z);
 
1312
                    }
 
1313
                    T3L = T1V - T20;
 
1314
                    T3O = T3M - T3N;
 
1315
                    {
 
1316
                         E T23, T25, T22, T24;
 
1317
                         T23 = cr[WS(rs, 29)];
 
1318
                         T25 = ci[WS(rs, 29)];
 
1319
                         T22 = W[56];
 
1320
                         T24 = W[57];
 
1321
                         T26 = FMA(T22, T23, T24 * T25);
 
1322
                         T3Q = FNMS(T24, T23, T22 * T25);
 
1323
                    }
 
1324
                    {
 
1325
                         E T28, T2a, T27, T29;
 
1326
                         T28 = cr[WS(rs, 13)];
 
1327
                         T2a = ci[WS(rs, 13)];
 
1328
                         T27 = W[24];
 
1329
                         T29 = W[25];
 
1330
                         T2b = FMA(T27, T28, T29 * T2a);
 
1331
                         T3R = FNMS(T29, T28, T27 * T2a);
 
1332
                    }
 
1333
                    T3S = T3Q - T3R;
 
1334
                    T3T = T26 - T2b;
 
1335
                    {
 
1336
                         E T21, T2c, T62, T63;
 
1337
                         T21 = T1V + T20;
 
1338
                         T2c = T26 + T2b;
 
1339
                         T2d = T21 + T2c;
 
1340
                         T5Z = T21 - T2c;
 
1341
                         T62 = T3Q + T3R;
 
1342
                         T63 = T3M + T3N;
 
1343
                         T64 = T62 - T63;
 
1344
                         T6K = T63 + T62;
 
1345
                    }
 
1346
                    {
 
1347
                         E T3P, T3U, T42, T43;
 
1348
                         T3P = T3L + T3O;
 
1349
                         T3U = T3S - T3T;
 
1350
                         T3V = KP707106781 * (T3P - T3U);
 
1351
                         T5a = KP707106781 * (T3P + T3U);
 
1352
                         T42 = T3T + T3S;
 
1353
                         T43 = T3L - T3O;
 
1354
                         T44 = KP707106781 * (T42 - T43);
 
1355
                         T57 = KP707106781 * (T43 + T42);
 
1356
                    }
 
1357
               }
 
1358
               {
 
1359
                    E T2G, T4i, T2L, T4j, T4h, T4k, T2R, T4d, T2W, T4e, T4c, T4f;
 
1360
                    {
 
1361
                         E T2D, T2F, T2C, T2E;
 
1362
                         T2D = cr[WS(rs, 3)];
 
1363
                         T2F = ci[WS(rs, 3)];
 
1364
                         T2C = W[4];
 
1365
                         T2E = W[5];
 
1366
                         T2G = FMA(T2C, T2D, T2E * T2F);
 
1367
                         T4i = FNMS(T2E, T2D, T2C * T2F);
 
1368
                    }
 
1369
                    {
 
1370
                         E T2I, T2K, T2H, T2J;
 
1371
                         T2I = cr[WS(rs, 19)];
 
1372
                         T2K = ci[WS(rs, 19)];
 
1373
                         T2H = W[36];
 
1374
                         T2J = W[37];
 
1375
                         T2L = FMA(T2H, T2I, T2J * T2K);
 
1376
                         T4j = FNMS(T2J, T2I, T2H * T2K);
 
1377
                    }
 
1378
                    T4h = T2G - T2L;
 
1379
                    T4k = T4i - T4j;
 
1380
                    {
 
1381
                         E T2O, T2Q, T2N, T2P;
 
1382
                         T2O = cr[WS(rs, 27)];
 
1383
                         T2Q = ci[WS(rs, 27)];
 
1384
                         T2N = W[52];
 
1385
                         T2P = W[53];
 
1386
                         T2R = FMA(T2N, T2O, T2P * T2Q);
 
1387
                         T4d = FNMS(T2P, T2O, T2N * T2Q);
 
1388
                    }
 
1389
                    {
 
1390
                         E T2T, T2V, T2S, T2U;
 
1391
                         T2T = cr[WS(rs, 11)];
 
1392
                         T2V = ci[WS(rs, 11)];
 
1393
                         T2S = W[20];
 
1394
                         T2U = W[21];
 
1395
                         T2W = FMA(T2S, T2T, T2U * T2V);
 
1396
                         T4e = FNMS(T2U, T2T, T2S * T2V);
 
1397
                    }
 
1398
                    T4c = T2R - T2W;
 
1399
                    T4f = T4d - T4e;
 
1400
                    {
 
1401
                         E T2M, T2X, T68, T69;
 
1402
                         T2M = T2G + T2L;
 
1403
                         T2X = T2R + T2W;
 
1404
                         T2Y = T2M + T2X;
 
1405
                         T6f = T2M - T2X;
 
1406
                         T68 = T4d + T4e;
 
1407
                         T69 = T4i + T4j;
 
1408
                         T6a = T68 - T69;
 
1409
                         T6P = T69 + T68;
 
1410
                    }
 
1411
                    {
 
1412
                         E T4g, T4l, T4t, T4u;
 
1413
                         T4g = T4c + T4f;
 
1414
                         T4l = T4h - T4k;
 
1415
                         T4m = KP707106781 * (T4g - T4l);
 
1416
                         T5e = KP707106781 * (T4l + T4g);
 
1417
                         T4t = T4h + T4k;
 
1418
                         T4u = T4f - T4c;
 
1419
                         T4v = KP707106781 * (T4t - T4u);
 
1420
                         T5h = KP707106781 * (T4t + T4u);
 
1421
                    }
 
1422
               }
 
1423
               {
 
1424
                    E T1t, T6X, T7a, T7c, T30, T7b, T70, T71;
 
1425
                    {
 
1426
                         E TH, T1s, T72, T79;
 
1427
                         TH = Tj + TG;
 
1428
                         T1s = T14 + T1r;
 
1429
                         T1t = TH + T1s;
 
1430
                         T6X = TH - T1s;
 
1431
                         T72 = T6F + T6E;
 
1432
                         T79 = T73 + T78;
 
1433
                         T7a = T72 + T79;
 
1434
                         T7c = T79 - T72;
 
1435
                    }
 
1436
                    {
 
1437
                         E T2e, T2Z, T6Y, T6Z;
 
1438
                         T2e = T1Q + T2d;
 
1439
                         T2Z = T2B + T2Y;
 
1440
                         T30 = T2e + T2Z;
 
1441
                         T7b = T2Z - T2e;
 
1442
                         T6Y = T6O + T6P;
 
1443
                         T6Z = T6J + T6K;
 
1444
                         T70 = T6Y - T6Z;
 
1445
                         T71 = T6Z + T6Y;
 
1446
                    }
 
1447
                    ci[WS(rs, 15)] = T1t - T30;
 
1448
                    cr[WS(rs, 24)] = T7b - T7c;
 
1449
                    ci[WS(rs, 23)] = T7b + T7c;
 
1450
                    cr[0] = T1t + T30;
 
1451
                    cr[WS(rs, 8)] = T6X - T70;
 
1452
                    cr[WS(rs, 16)] = T71 - T7a;
 
1453
                    ci[WS(rs, 31)] = T71 + T7a;
 
1454
                    ci[WS(rs, 7)] = T6X + T70;
 
1455
               }
 
1456
               {
 
1457
                    E T4X, T5p, T7D, T7J, T54, T7y, T5z, T5D, T5c, T5m, T5s, T7I, T5w, T5C, T5j;
 
1458
                    E T5n, T4W, T7z;
 
1459
                    T4W = KP707106781 * (T4U + T4V);
 
1460
                    T4X = T4T - T4W;
 
1461
                    T5p = T4T + T4W;
 
1462
                    T7z = KP707106781 * (T3a - T3f);
 
1463
                    T7D = T7z + T7C;
 
1464
                    T7J = T7C - T7z;
 
1465
                    {
 
1466
                         E T50, T53, T5x, T5y;
 
1467
                         T50 = FMA(KP923879532, T4Y, KP382683432 * T4Z);
 
1468
                         T53 = FNMS(KP923879532, T52, KP382683432 * T51);
 
1469
                         T54 = T50 + T53;
 
1470
                         T7y = T50 - T53;
 
1471
                         T5x = T5d + T5e;
 
1472
                         T5y = T5g + T5h;
 
1473
                         T5z = FNMS(KP980785280, T5y, KP195090322 * T5x);
 
1474
                         T5D = FMA(KP980785280, T5x, KP195090322 * T5y);
 
1475
                    }
 
1476
                    {
 
1477
                         E T58, T5b, T5q, T5r;
 
1478
                         T58 = T56 - T57;
 
1479
                         T5b = T59 - T5a;
 
1480
                         T5c = FMA(KP831469612, T58, KP555570233 * T5b);
 
1481
                         T5m = FNMS(KP831469612, T5b, KP555570233 * T58);
 
1482
                         T5q = FNMS(KP382683432, T4Y, KP923879532 * T4Z);
 
1483
                         T5r = FMA(KP382683432, T52, KP923879532 * T51);
 
1484
                         T5s = T5q + T5r;
 
1485
                         T7I = T5r - T5q;
 
1486
                    }
 
1487
                    {
 
1488
                         E T5u, T5v, T5f, T5i;
 
1489
                         T5u = T56 + T57;
 
1490
                         T5v = T59 + T5a;
 
1491
                         T5w = FMA(KP195090322, T5u, KP980785280 * T5v);
 
1492
                         T5C = FNMS(KP195090322, T5v, KP980785280 * T5u);
 
1493
                         T5f = T5d - T5e;
 
1494
                         T5i = T5g - T5h;
 
1495
                         T5j = FNMS(KP555570233, T5i, KP831469612 * T5f);
 
1496
                         T5n = FMA(KP555570233, T5f, KP831469612 * T5i);
 
1497
                    }
 
1498
                    {
 
1499
                         E T55, T5k, T7H, T7K;
 
1500
                         T55 = T4X + T54;
 
1501
                         T5k = T5c + T5j;
 
1502
                         ci[WS(rs, 12)] = T55 - T5k;
 
1503
                         cr[WS(rs, 3)] = T55 + T5k;
 
1504
                         T7H = T5n - T5m;
 
1505
                         T7K = T7I + T7J;
 
1506
                         cr[WS(rs, 19)] = T7H - T7K;
 
1507
                         ci[WS(rs, 28)] = T7H + T7K;
 
1508
                    }
 
1509
                    {
 
1510
                         E T7L, T7M, T5l, T5o;
 
1511
                         T7L = T5j - T5c;
 
1512
                         T7M = T7J - T7I;
 
1513
                         cr[WS(rs, 27)] = T7L - T7M;
 
1514
                         ci[WS(rs, 20)] = T7L + T7M;
 
1515
                         T5l = T4X - T54;
 
1516
                         T5o = T5m + T5n;
 
1517
                         cr[WS(rs, 11)] = T5l - T5o;
 
1518
                         ci[WS(rs, 4)] = T5l + T5o;
 
1519
                    }
 
1520
                    {
 
1521
                         E T5t, T5A, T7x, T7E;
 
1522
                         T5t = T5p - T5s;
 
1523
                         T5A = T5w + T5z;
 
1524
                         ci[WS(rs, 8)] = T5t - T5A;
 
1525
                         cr[WS(rs, 7)] = T5t + T5A;
 
1526
                         T7x = T5z - T5w;
 
1527
                         T7E = T7y + T7D;
 
1528
                         cr[WS(rs, 31)] = T7x - T7E;
 
1529
                         ci[WS(rs, 16)] = T7x + T7E;
 
1530
                    }
 
1531
                    {
 
1532
                         E T7F, T7G, T5B, T5E;
 
1533
                         T7F = T5D - T5C;
 
1534
                         T7G = T7D - T7y;
 
1535
                         cr[WS(rs, 23)] = T7F - T7G;
 
1536
                         ci[WS(rs, 24)] = T7F + T7G;
 
1537
                         T5B = T5p + T5s;
 
1538
                         T5E = T5C + T5D;
 
1539
                         cr[WS(rs, 15)] = T5B - T5E;
 
1540
                         ci[0] = T5B + T5E;
 
1541
                    }
 
1542
               }
 
1543
               {
 
1544
                    E T6H, T6T, T7g, T7i, T6M, T6U, T6R, T6V;
 
1545
                    {
 
1546
                         E T6D, T6G, T7e, T7f;
 
1547
                         T6D = Tj - TG;
 
1548
                         T6G = T6E - T6F;
 
1549
                         T6H = T6D - T6G;
 
1550
                         T6T = T6D + T6G;
 
1551
                         T7e = T14 - T1r;
 
1552
                         T7f = T78 - T73;
 
1553
                         T7g = T7e + T7f;
 
1554
                         T7i = T7f - T7e;
 
1555
                    }
 
1556
                    {
 
1557
                         E T6I, T6L, T6N, T6Q;
 
1558
                         T6I = T1Q - T2d;
 
1559
                         T6L = T6J - T6K;
 
1560
                         T6M = T6I + T6L;
 
1561
                         T6U = T6I - T6L;
 
1562
                         T6N = T2B - T2Y;
 
1563
                         T6Q = T6O - T6P;
 
1564
                         T6R = T6N - T6Q;
 
1565
                         T6V = T6N + T6Q;
 
1566
                    }
 
1567
                    {
 
1568
                         E T6S, T7h, T6W, T7d;
 
1569
                         T6S = KP707106781 * (T6M + T6R);
 
1570
                         ci[WS(rs, 11)] = T6H - T6S;
 
1571
                         cr[WS(rs, 4)] = T6H + T6S;
 
1572
                         T7h = KP707106781 * (T6V - T6U);
 
1573
                         cr[WS(rs, 20)] = T7h - T7i;
 
1574
                         ci[WS(rs, 27)] = T7h + T7i;
 
1575
                         T6W = KP707106781 * (T6U + T6V);
 
1576
                         cr[WS(rs, 12)] = T6T - T6W;
 
1577
                         ci[WS(rs, 3)] = T6T + T6W;
 
1578
                         T7d = KP707106781 * (T6R - T6M);
 
1579
                         cr[WS(rs, 28)] = T7d - T7g;
 
1580
                         ci[WS(rs, 19)] = T7d + T7g;
 
1581
                    }
 
1582
               }
 
1583
               {
 
1584
                    E T5J, T7n, T7t, T6n, T5U, T7k, T6x, T6B, T6q, T7s, T66, T6k, T6u, T6A, T6h;
 
1585
                    E T6l;
 
1586
                    {
 
1587
                         E T5O, T5T, T60, T65;
 
1588
                         T5J = T5F - T5I;
 
1589
                         T7n = T7l + T7m;
 
1590
                         T7t = T7m - T7l;
 
1591
                         T6n = T5F + T5I;
 
1592
                         T5O = T5K + T5N;
 
1593
                         T5T = T5P - T5S;
 
1594
                         T5U = KP707106781 * (T5O + T5T);
 
1595
                         T7k = KP707106781 * (T5O - T5T);
 
1596
                         {
 
1597
                              E T6v, T6w, T6o, T6p;
 
1598
                              T6v = T6e + T6f;
 
1599
                              T6w = T67 + T6a;
 
1600
                              T6x = FMA(KP382683432, T6v, KP923879532 * T6w);
 
1601
                              T6B = FNMS(KP923879532, T6v, KP382683432 * T6w);
 
1602
                              T6o = T5K - T5N;
 
1603
                              T6p = T5P + T5S;
 
1604
                              T6q = KP707106781 * (T6o + T6p);
 
1605
                              T7s = KP707106781 * (T6p - T6o);
 
1606
                         }
 
1607
                         T60 = T5Y - T5Z;
 
1608
                         T65 = T61 - T64;
 
1609
                         T66 = FMA(KP382683432, T60, KP923879532 * T65);
 
1610
                         T6k = FNMS(KP923879532, T60, KP382683432 * T65);
 
1611
                         {
 
1612
                              E T6s, T6t, T6b, T6g;
 
1613
                              T6s = T61 + T64;
 
1614
                              T6t = T5Y + T5Z;
 
1615
                              T6u = FNMS(KP382683432, T6t, KP923879532 * T6s);
 
1616
                              T6A = FMA(KP923879532, T6t, KP382683432 * T6s);
 
1617
                              T6b = T67 - T6a;
 
1618
                              T6g = T6e - T6f;
 
1619
                              T6h = FNMS(KP382683432, T6g, KP923879532 * T6b);
 
1620
                              T6l = FMA(KP923879532, T6g, KP382683432 * T6b);
 
1621
                         }
 
1622
                    }
 
1623
                    {
 
1624
                         E T5V, T6i, T7r, T7u;
 
1625
                         T5V = T5J + T5U;
 
1626
                         T6i = T66 + T6h;
 
1627
                         ci[WS(rs, 13)] = T5V - T6i;
 
1628
                         cr[WS(rs, 2)] = T5V + T6i;
 
1629
                         T7r = T6l - T6k;
 
1630
                         T7u = T7s + T7t;
 
1631
                         cr[WS(rs, 18)] = T7r - T7u;
 
1632
                         ci[WS(rs, 29)] = T7r + T7u;
 
1633
                    }
 
1634
                    {
 
1635
                         E T7v, T7w, T6j, T6m;
 
1636
                         T7v = T6h - T66;
 
1637
                         T7w = T7t - T7s;
 
1638
                         cr[WS(rs, 26)] = T7v - T7w;
 
1639
                         ci[WS(rs, 21)] = T7v + T7w;
 
1640
                         T6j = T5J - T5U;
 
1641
                         T6m = T6k + T6l;
 
1642
                         cr[WS(rs, 10)] = T6j - T6m;
 
1643
                         ci[WS(rs, 5)] = T6j + T6m;
 
1644
                    }
 
1645
                    {
 
1646
                         E T6r, T6y, T7j, T7o;
 
1647
                         T6r = T6n + T6q;
 
1648
                         T6y = T6u + T6x;
 
1649
                         cr[WS(rs, 14)] = T6r - T6y;
 
1650
                         ci[WS(rs, 1)] = T6r + T6y;
 
1651
                         T7j = T6B - T6A;
 
1652
                         T7o = T7k + T7n;
 
1653
                         cr[WS(rs, 30)] = T7j - T7o;
 
1654
                         ci[WS(rs, 17)] = T7j + T7o;
 
1655
                    }
 
1656
                    {
 
1657
                         E T7p, T7q, T6z, T6C;
 
1658
                         T7p = T6x - T6u;
 
1659
                         T7q = T7n - T7k;
 
1660
                         cr[WS(rs, 22)] = T7p - T7q;
 
1661
                         ci[WS(rs, 25)] = T7p + T7q;
 
1662
                         T6z = T6n - T6q;
 
1663
                         T6C = T6A + T6B;
 
1664
                         ci[WS(rs, 9)] = T6z - T6C;
 
1665
                         cr[WS(rs, 6)] = T6z + T6C;
 
1666
                    }
 
1667
               }
 
1668
               {
 
1669
                    E T3h, T4D, T7R, T7X, T3E, T7O, T4N, T4R, T46, T4A, T4G, T7W, T4K, T4Q, T4x;
 
1670
                    E T4B, T3g, T7P;
 
1671
                    T3g = KP707106781 * (T3a + T3f);
 
1672
                    T3h = T35 - T3g;
 
1673
                    T4D = T35 + T3g;
 
1674
                    T7P = KP707106781 * (T4V - T4U);
 
1675
                    T7R = T7P + T7Q;
 
1676
                    T7X = T7Q - T7P;
 
1677
                    {
 
1678
                         E T3s, T3D, T4L, T4M;
 
1679
                         T3s = FNMS(KP923879532, T3r, KP382683432 * T3m);
 
1680
                         T3D = FMA(KP923879532, T3x, KP382683432 * T3C);
 
1681
                         T3E = T3s + T3D;
 
1682
                         T7O = T3D - T3s;
 
1683
                         T4L = T4s + T4v;
 
1684
                         T4M = T4b + T4m;
 
1685
                         T4N = FNMS(KP195090322, T4M, KP980785280 * T4L);
 
1686
                         T4R = FMA(KP980785280, T4M, KP195090322 * T4L);
 
1687
                    }
 
1688
                    {
 
1689
                         E T3W, T45, T4E, T4F;
 
1690
                         T3W = T3K - T3V;
 
1691
                         T45 = T41 - T44;
 
1692
                         T46 = FNMS(KP555570233, T45, KP831469612 * T3W);
 
1693
                         T4A = FMA(KP831469612, T45, KP555570233 * T3W);
 
1694
                         T4E = FMA(KP382683432, T3r, KP923879532 * T3m);
 
1695
                         T4F = FNMS(KP382683432, T3x, KP923879532 * T3C);
 
1696
                         T4G = T4E + T4F;
 
1697
                         T7W = T4E - T4F;
 
1698
                    }
 
1699
                    {
 
1700
                         E T4I, T4J, T4n, T4w;
 
1701
                         T4I = T41 + T44;
 
1702
                         T4J = T3K + T3V;
 
1703
                         T4K = FMA(KP195090322, T4I, KP980785280 * T4J);
 
1704
                         T4Q = FNMS(KP980785280, T4I, KP195090322 * T4J);
 
1705
                         T4n = T4b - T4m;
 
1706
                         T4w = T4s - T4v;
 
1707
                         T4x = FMA(KP555570233, T4n, KP831469612 * T4w);
 
1708
                         T4B = FNMS(KP831469612, T4n, KP555570233 * T4w);
 
1709
                    }
 
1710
                    {
 
1711
                         E T3F, T4y, T7V, T7Y;
 
1712
                         T3F = T3h + T3E;
 
1713
                         T4y = T46 + T4x;
 
1714
                         cr[WS(rs, 13)] = T3F - T4y;
 
1715
                         ci[WS(rs, 2)] = T3F + T4y;
 
1716
                         T7V = T4B - T4A;
 
1717
                         T7Y = T7W + T7X;
 
1718
                         cr[WS(rs, 29)] = T7V - T7Y;
 
1719
                         ci[WS(rs, 18)] = T7V + T7Y;
 
1720
                    }
 
1721
                    {
 
1722
                         E T7Z, T80, T4z, T4C;
 
1723
                         T7Z = T4x - T46;
 
1724
                         T80 = T7X - T7W;
 
1725
                         cr[WS(rs, 21)] = T7Z - T80;
 
1726
                         ci[WS(rs, 26)] = T7Z + T80;
 
1727
                         T4z = T3h - T3E;
 
1728
                         T4C = T4A + T4B;
 
1729
                         ci[WS(rs, 10)] = T4z - T4C;
 
1730
                         cr[WS(rs, 5)] = T4z + T4C;
 
1731
                    }
 
1732
                    {
 
1733
                         E T4H, T4O, T7N, T7S;
 
1734
                         T4H = T4D + T4G;
 
1735
                         T4O = T4K + T4N;
 
1736
                         ci[WS(rs, 14)] = T4H - T4O;
 
1737
                         cr[WS(rs, 1)] = T4H + T4O;
 
1738
                         T7N = T4R - T4Q;
 
1739
                         T7S = T7O + T7R;
 
1740
                         cr[WS(rs, 17)] = T7N - T7S;
 
1741
                         ci[WS(rs, 30)] = T7N + T7S;
 
1742
                    }
 
1743
                    {
 
1744
                         E T7T, T7U, T4P, T4S;
 
1745
                         T7T = T4N - T4K;
 
1746
                         T7U = T7R - T7O;
 
1747
                         cr[WS(rs, 25)] = T7T - T7U;
 
1748
                         ci[WS(rs, 22)] = T7T + T7U;
 
1749
                         T4P = T4D - T4G;
 
1750
                         T4S = T4Q + T4R;
 
1751
                         cr[WS(rs, 9)] = T4P - T4S;
 
1752
                         ci[WS(rs, 6)] = T4P + T4S;
 
1753
                    }
1750
1754
               }
1751
1755
          }
1752
1756
     }