~ubuntu-branches/ubuntu/utopic/fftw3/utopic

« back to all changes in this revision

Viewing changes to rdft/scalar/r2cf/r2cfII_32.c

  • Committer: Package Import Robot
  • Author(s): Matthias Klose
  • Date: 2011-12-14 13:21:22 UTC
  • mfrom: (3.1.5 sid)
  • Revision ID: package-import@ubuntu.com-20111214132122-l4avyl2kkr7vq5aj
Tags: 3.3-1ubuntu1
* Merge with Debian; remaining changes:
  - Revert the ARM workaround.

Show diffs side-by-side

added added

removed removed

Lines of Context:
1
1
/*
2
 
 * Copyright (c) 2003, 2007-8 Matteo Frigo
3
 
 * Copyright (c) 2003, 2007-8 Massachusetts Institute of Technology
 
2
 * Copyright (c) 2003, 2007-11 Matteo Frigo
 
3
 * Copyright (c) 2003, 2007-11 Massachusetts Institute of Technology
4
4
 *
5
5
 * This program is free software; you can redistribute it and/or modify
6
6
 * it under the terms of the GNU General Public License as published by
19
19
 */
20
20
 
21
21
/* This file was automatically generated --- DO NOT EDIT */
22
 
/* Generated on Sun Jul 12 06:44:25 EDT 2009 */
 
22
/* Generated on Wed Jul 27 06:17:06 EDT 2011 */
23
23
 
24
24
#include "codelet-rdft.h"
25
25
 
26
26
#ifdef HAVE_FMA
27
27
 
28
 
/* Generated by: ../../../genfft/gen_r2cf -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -n 32 -name r2cfII_32 -dft-II -include r2cfII.h */
 
28
/* Generated by: ../../../genfft/gen_r2cf.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -n 32 -name r2cfII_32 -dft-II -include r2cfII.h */
29
29
 
30
30
/*
31
31
 * This function contains 174 FP additions, 128 FP multiplications,
51
51
     DK(KP198912367, +0.198912367379658006911597622644676228597850501);
52
52
     DK(KP707106781, +0.707106781186547524400844362104849039284835938);
53
53
     DK(KP414213562, +0.414213562373095048801688724209698078569671875);
54
 
     INT i;
55
 
     for (i = v; i > 0; i = i - 1, R0 = R0 + ivs, R1 = R1 + ivs, Cr = Cr + ovs, Ci = Ci + ovs, MAKE_VOLATILE_STRIDE(rs), MAKE_VOLATILE_STRIDE(csr), MAKE_VOLATILE_STRIDE(csi)) {
56
 
          E T23, T1S, T21, T1L, T2z, T2x, T1Z, T22;
57
 
          {
58
 
               E T2n, T2B, T1z, T5, T1C, T2C, T2o, Tc, T27, T1J, T1l, Tm, T26, T1G, T1k;
59
 
               E Tv, T1s, T1c, T2e, T1Y, T1r, T15, T2d, T1V, TP, TF, T1M, TC, T1P, TN;
60
 
               E TO, TI;
61
 
               {
62
 
                    E T1A, T8, Te, Tj, Tf, T1B, Tb, Tg;
63
 
                    {
64
 
                         E T1, T2l, T2, T3, T9, Ta;
65
 
                         T1 = R0[0];
66
 
                         T2l = R0[WS(rs, 8)];
67
 
                         T2 = R0[WS(rs, 4)];
68
 
                         T3 = R0[WS(rs, 12)];
69
 
                         {
70
 
                              E T6, T7, T2m, T4;
71
 
                              T6 = R0[WS(rs, 10)];
72
 
                              T7 = R0[WS(rs, 2)];
73
 
                              T9 = R0[WS(rs, 6)];
74
 
                              T2m = T2 + T3;
75
 
                              T4 = T2 - T3;
76
 
                              T1A = FNMS(KP414213562, T6, T7);
77
 
                              T8 = FMA(KP414213562, T7, T6);
78
 
                              T2n = FMA(KP707106781, T2m, T2l);
79
 
                              T2B = FNMS(KP707106781, T2m, T2l);
80
 
                              T1z = FMA(KP707106781, T4, T1);
81
 
                              T5 = FNMS(KP707106781, T4, T1);
82
 
                              Ta = R0[WS(rs, 14)];
83
 
                         }
84
 
                         Te = R0[WS(rs, 7)];
85
 
                         Tj = R0[WS(rs, 15)];
86
 
                         Tf = R0[WS(rs, 3)];
87
 
                         T1B = FMS(KP414213562, T9, Ta);
88
 
                         Tb = FMA(KP414213562, Ta, T9);
89
 
                         Tg = R0[WS(rs, 11)];
90
 
                    }
91
 
                    {
92
 
                         E Tn, Ts, To, T1I, Tl, T1H, Ti, Tp, Tk, Th, T1T, T1U;
93
 
                         Tn = R0[WS(rs, 9)];
94
 
                         T1C = T1A + T1B;
95
 
                         T2C = T1B - T1A;
96
 
                         T2o = T8 + Tb;
97
 
                         Tc = T8 - Tb;
98
 
                         Tk = Tg - Tf;
99
 
                         Th = Tf + Tg;
100
 
                         Ts = R0[WS(rs, 1)];
101
 
                         To = R0[WS(rs, 5)];
102
 
                         T1I = FMA(KP707106781, Tk, Tj);
103
 
                         Tl = FNMS(KP707106781, Tk, Tj);
104
 
                         T1H = FMA(KP707106781, Th, Te);
105
 
                         Ti = FNMS(KP707106781, Th, Te);
106
 
                         Tp = R0[WS(rs, 13)];
107
 
                         {
108
 
                              E TT, T16, TY, T17, TW, TZ, T11, T12, Tt, Tq;
109
 
                              TT = R1[WS(rs, 15)];
110
 
                              T27 = FNMS(KP198912367, T1H, T1I);
111
 
                              T1J = FMA(KP198912367, T1I, T1H);
112
 
                              T1l = FMA(KP668178637, Ti, Tl);
113
 
                              Tm = FNMS(KP668178637, Tl, Ti);
114
 
                              Tt = To - Tp;
115
 
                              Tq = To + Tp;
116
 
                              T16 = R1[WS(rs, 7)];
117
 
                              {
118
 
                                   E TU, T1F, Tu, T1E, Tr, TV;
119
 
                                   TU = R1[WS(rs, 3)];
120
 
                                   T1F = FMA(KP707106781, Tt, Ts);
121
 
                                   Tu = FNMS(KP707106781, Tt, Ts);
122
 
                                   T1E = FMA(KP707106781, Tq, Tn);
123
 
                                   Tr = FNMS(KP707106781, Tq, Tn);
124
 
                                   TV = R1[WS(rs, 11)];
125
 
                                   TY = R1[WS(rs, 9)];
126
 
                                   T26 = FNMS(KP198912367, T1E, T1F);
127
 
                                   T1G = FMA(KP198912367, T1F, T1E);
128
 
                                   T1k = FMA(KP668178637, Tr, Tu);
129
 
                                   Tv = FNMS(KP668178637, Tu, Tr);
130
 
                                   T17 = TU + TV;
131
 
                                   TW = TU - TV;
132
 
                                   TZ = R1[WS(rs, 1)];
133
 
                                   T11 = R1[WS(rs, 5)];
134
 
                                   T12 = R1[WS(rs, 13)];
135
 
                              }
136
 
                              {
137
 
                                   E TX, T1a, T10, T19, T13, T1W, T18, T1b, T14, T1X;
138
 
                                   T1T = FMS(KP707106781, TW, TT);
139
 
                                   TX = FMA(KP707106781, TW, TT);
140
 
                                   T1a = FNMS(KP414213562, TY, TZ);
141
 
                                   T10 = FMA(KP414213562, TZ, TY);
142
 
                                   T19 = FMS(KP414213562, T11, T12);
143
 
                                   T13 = FMA(KP414213562, T12, T11);
144
 
                                   T1W = FMA(KP707106781, T17, T16);
145
 
                                   T18 = FNMS(KP707106781, T17, T16);
146
 
                                   T1b = T19 - T1a;
147
 
                                   T1U = T1a + T19;
148
 
                                   T14 = T10 - T13;
149
 
                                   T1X = T10 + T13;
150
 
                                   T1s = FMA(KP923879532, T1b, T18);
151
 
                                   T1c = FNMS(KP923879532, T1b, T18);
152
 
                                   T2e = FMA(KP923879532, T1X, T1W);
153
 
                                   T1Y = FNMS(KP923879532, T1X, T1W);
154
 
                                   T1r = FNMS(KP923879532, T14, TX);
155
 
                                   T15 = FMA(KP923879532, T14, TX);
156
 
                              }
157
 
                         }
158
 
                         {
159
 
                              E Ty, TL, TG, TM, TB, TH;
160
 
                              Ty = R1[0];
161
 
                              TL = R1[WS(rs, 8)];
162
 
                              {
163
 
                                   E Tz, TA, TD, TE;
164
 
                                   Tz = R1[WS(rs, 4)];
165
 
                                   T2d = FMA(KP923879532, T1U, T1T);
166
 
                                   T1V = FNMS(KP923879532, T1U, T1T);
167
 
                                   TA = R1[WS(rs, 12)];
168
 
                                   TD = R1[WS(rs, 10)];
169
 
                                   TE = R1[WS(rs, 2)];
170
 
                                   TG = R1[WS(rs, 6)];
171
 
                                   TM = Tz + TA;
172
 
                                   TB = Tz - TA;
173
 
                                   TP = FNMS(KP414213562, TD, TE);
174
 
                                   TF = FMA(KP414213562, TE, TD);
175
 
                                   TH = R1[WS(rs, 14)];
176
 
                              }
177
 
                              T1M = FMA(KP707106781, TB, Ty);
178
 
                              TC = FNMS(KP707106781, TB, Ty);
179
 
                              T1P = FMA(KP707106781, TM, TL);
180
 
                              TN = FNMS(KP707106781, TM, TL);
181
 
                              TO = FMS(KP414213562, TG, TH);
182
 
                              TI = FMA(KP414213562, TH, TG);
183
 
                         }
184
 
                    }
185
 
               }
186
 
               {
187
 
                    E T1j, T1O, T1p, T1R, T1o, T2E, T2D, T1m, T1D, T2w, T2v, T1K, T2i, T2c, T2h;
188
 
                    E T29, T2t, T2r, T2f, T2j;
189
 
                    {
190
 
                         E T2a, T2b, T1g, TS, T1f, Tx, T2N, T2L, T1d, T1h;
191
 
                         {
192
 
                              E Td, TR, TK, Tw, T2J, T2K;
193
 
                              T1j = FMA(KP923879532, Tc, T5);
194
 
                              Td = FNMS(KP923879532, Tc, T5);
195
 
                              {
196
 
                                   E T1N, TQ, T1Q, TJ;
197
 
                                   T1N = TP + TO;
198
 
                                   TQ = TO - TP;
199
 
                                   T1Q = TF + TI;
200
 
                                   TJ = TF - TI;
201
 
                                   T2a = FMA(KP923879532, T1N, T1M);
202
 
                                   T1O = FNMS(KP923879532, T1N, T1M);
203
 
                                   T1p = FMA(KP923879532, TQ, TN);
204
 
                                   TR = FNMS(KP923879532, TQ, TN);
205
 
                                   T2b = FMA(KP923879532, T1Q, T1P);
206
 
                                   T1R = FNMS(KP923879532, T1Q, T1P);
207
 
                                   T1o = FMA(KP923879532, TJ, TC);
208
 
                                   TK = FNMS(KP923879532, TJ, TC);
209
 
                                   Tw = Tm - Tv;
210
 
                                   T2E = Tv + Tm;
211
 
                              }
212
 
                              T2D = FMA(KP923879532, T2C, T2B);
213
 
                              T2J = FNMS(KP923879532, T2C, T2B);
214
 
                              T2K = T1k + T1l;
215
 
                              T1m = T1k - T1l;
216
 
                              T1g = FMA(KP534511135, TK, TR);
217
 
                              TS = FNMS(KP534511135, TR, TK);
218
 
                              T1f = FNMS(KP831469612, Tw, Td);
219
 
                              Tx = FMA(KP831469612, Tw, Td);
220
 
                              T2N = FNMS(KP831469612, T2K, T2J);
221
 
                              T2L = FMA(KP831469612, T2K, T2J);
222
 
                              T1d = FNMS(KP534511135, T1c, T15);
223
 
                              T1h = FMA(KP534511135, T15, T1c);
224
 
                         }
225
 
                         {
226
 
                              E T25, T28, T2p, T2q;
227
 
                              T1D = FNMS(KP923879532, T1C, T1z);
228
 
                              T25 = FMA(KP923879532, T1C, T1z);
229
 
                              {
230
 
                                   E T2O, T1e, T2M, T1i;
231
 
                                   T2O = TS + T1d;
232
 
                                   T1e = TS - T1d;
233
 
                                   T2M = T1g + T1h;
234
 
                                   T1i = T1g - T1h;
235
 
                                   Ci[WS(csi, 5)] = FNMS(KP881921264, T2O, T2N);
236
 
                                   Ci[WS(csi, 10)] = -(FMA(KP881921264, T2O, T2N));
237
 
                                   Cr[WS(csr, 2)] = FMA(KP881921264, T1e, Tx);
238
 
                                   Cr[WS(csr, 13)] = FNMS(KP881921264, T1e, Tx);
239
 
                                   Ci[WS(csi, 2)] = -(FMA(KP881921264, T2M, T2L));
240
 
                                   Ci[WS(csi, 13)] = FNMS(KP881921264, T2M, T2L);
241
 
                                   Cr[WS(csr, 5)] = FMA(KP881921264, T1i, T1f);
242
 
                                   Cr[WS(csr, 10)] = FNMS(KP881921264, T1i, T1f);
243
 
                                   T28 = T26 - T27;
244
 
                                   T2w = T26 + T27;
245
 
                              }
246
 
                              T2v = FNMS(KP923879532, T2o, T2n);
247
 
                              T2p = FMA(KP923879532, T2o, T2n);
248
 
                              T2q = T1G + T1J;
249
 
                              T1K = T1G - T1J;
250
 
                              T2i = FMA(KP098491403, T2a, T2b);
251
 
                              T2c = FNMS(KP098491403, T2b, T2a);
252
 
                              T2h = FNMS(KP980785280, T28, T25);
253
 
                              T29 = FMA(KP980785280, T28, T25);
254
 
                              T2t = FNMS(KP980785280, T2q, T2p);
255
 
                              T2r = FMA(KP980785280, T2q, T2p);
256
 
                              T2f = FMA(KP098491403, T2e, T2d);
257
 
                              T2j = FNMS(KP098491403, T2d, T2e);
258
 
                         }
259
 
                    }
260
 
                    {
261
 
                         E T1x, T1q, T1v, T1n, T2H, T2F, T1t, T1w;
262
 
                         {
263
 
                              E T2u, T2g, T2s, T2k;
264
 
                              T2u = T2f - T2c;
265
 
                              T2g = T2c + T2f;
266
 
                              T2s = T2i + T2j;
267
 
                              T2k = T2i - T2j;
268
 
                              Ci[WS(csi, 7)] = FMA(KP995184726, T2u, T2t);
269
 
                              Ci[WS(csi, 8)] = FMS(KP995184726, T2u, T2t);
270
 
                              Cr[0] = FMA(KP995184726, T2g, T29);
271
 
                              Cr[WS(csr, 15)] = FNMS(KP995184726, T2g, T29);
272
 
                              Ci[0] = -(FMA(KP995184726, T2s, T2r));
273
 
                              Ci[WS(csi, 15)] = FNMS(KP995184726, T2s, T2r);
274
 
                              Cr[WS(csr, 7)] = FMA(KP995184726, T2k, T2h);
275
 
                              Cr[WS(csr, 8)] = FNMS(KP995184726, T2k, T2h);
276
 
                         }
277
 
                         T1x = FNMS(KP303346683, T1o, T1p);
278
 
                         T1q = FMA(KP303346683, T1p, T1o);
279
 
                         T1v = FNMS(KP831469612, T1m, T1j);
280
 
                         T1n = FMA(KP831469612, T1m, T1j);
281
 
                         T2H = FNMS(KP831469612, T2E, T2D);
282
 
                         T2F = FMA(KP831469612, T2E, T2D);
283
 
                         T1t = FMA(KP303346683, T1s, T1r);
284
 
                         T1w = FNMS(KP303346683, T1r, T1s);
285
 
                         {
286
 
                              E T2I, T1u, T2G, T1y;
287
 
                              T2I = T1q + T1t;
288
 
                              T1u = T1q - T1t;
289
 
                              T2G = T1x + T1w;
290
 
                              T1y = T1w - T1x;
291
 
                              Ci[WS(csi, 6)] = -(FMA(KP956940335, T2I, T2H));
292
 
                              Ci[WS(csi, 9)] = FNMS(KP956940335, T2I, T2H);
293
 
                              Cr[WS(csr, 1)] = FMA(KP956940335, T1u, T1n);
294
 
                              Cr[WS(csr, 14)] = FNMS(KP956940335, T1u, T1n);
295
 
                              Ci[WS(csi, 1)] = FMA(KP956940335, T2G, T2F);
296
 
                              Ci[WS(csi, 14)] = FMS(KP956940335, T2G, T2F);
297
 
                              Cr[WS(csr, 6)] = FMA(KP956940335, T1y, T1v);
298
 
                              Cr[WS(csr, 9)] = FNMS(KP956940335, T1y, T1v);
299
 
                         }
300
 
                         T23 = FNMS(KP820678790, T1O, T1R);
301
 
                         T1S = FMA(KP820678790, T1R, T1O);
302
 
                         T21 = FNMS(KP980785280, T1K, T1D);
303
 
                         T1L = FMA(KP980785280, T1K, T1D);
304
 
                         T2z = FMA(KP980785280, T2w, T2v);
305
 
                         T2x = FNMS(KP980785280, T2w, T2v);
306
 
                         T1Z = FNMS(KP820678790, T1Y, T1V);
307
 
                         T22 = FMA(KP820678790, T1V, T1Y);
308
 
                    }
309
 
               }
310
 
          }
311
 
          {
312
 
               E T20, T2A, T24, T2y;
313
 
               T20 = T1S + T1Z;
314
 
               T2A = T1Z - T1S;
315
 
               T24 = T22 - T23;
316
 
               T2y = T23 + T22;
317
 
               Ci[WS(csi, 4)] = FMS(KP773010453, T2A, T2z);
318
 
               Ci[WS(csi, 11)] = FMA(KP773010453, T2A, T2z);
319
 
               Cr[WS(csr, 3)] = FMA(KP773010453, T20, T1L);
320
 
               Cr[WS(csr, 12)] = FNMS(KP773010453, T20, T1L);
321
 
               Ci[WS(csi, 3)] = FMA(KP773010453, T2y, T2x);
322
 
               Ci[WS(csi, 12)] = FMS(KP773010453, T2y, T2x);
323
 
               Cr[WS(csr, 4)] = FMA(KP773010453, T24, T21);
324
 
               Cr[WS(csr, 11)] = FNMS(KP773010453, T24, T21);
 
54
     {
 
55
          INT i;
 
56
          for (i = v; i > 0; i = i - 1, R0 = R0 + ivs, R1 = R1 + ivs, Cr = Cr + ovs, Ci = Ci + ovs, MAKE_VOLATILE_STRIDE(rs), MAKE_VOLATILE_STRIDE(csr), MAKE_VOLATILE_STRIDE(csi)) {
 
57
               E T23, T1S, T21, T1L, T2z, T2x, T1Z, T22;
 
58
               {
 
59
                    E T2n, T2B, T1z, T5, T1C, T2C, T2o, Tc, T27, T1J, T1l, Tm, T26, T1G, T1k;
 
60
                    E Tv, T1s, T1c, T2e, T1Y, T1r, T15, T2d, T1V, TP, TF, T1M, TC, T1P, TN;
 
61
                    E TO, TI;
 
62
                    {
 
63
                         E T1A, T8, Te, Tj, Tf, T1B, Tb, Tg;
 
64
                         {
 
65
                              E T1, T2l, T2, T3, T9, Ta;
 
66
                              T1 = R0[0];
 
67
                              T2l = R0[WS(rs, 8)];
 
68
                              T2 = R0[WS(rs, 4)];
 
69
                              T3 = R0[WS(rs, 12)];
 
70
                              {
 
71
                                   E T6, T7, T2m, T4;
 
72
                                   T6 = R0[WS(rs, 10)];
 
73
                                   T7 = R0[WS(rs, 2)];
 
74
                                   T9 = R0[WS(rs, 6)];
 
75
                                   T2m = T2 + T3;
 
76
                                   T4 = T2 - T3;
 
77
                                   T1A = FNMS(KP414213562, T6, T7);
 
78
                                   T8 = FMA(KP414213562, T7, T6);
 
79
                                   T2n = FMA(KP707106781, T2m, T2l);
 
80
                                   T2B = FNMS(KP707106781, T2m, T2l);
 
81
                                   T1z = FMA(KP707106781, T4, T1);
 
82
                                   T5 = FNMS(KP707106781, T4, T1);
 
83
                                   Ta = R0[WS(rs, 14)];
 
84
                              }
 
85
                              Te = R0[WS(rs, 7)];
 
86
                              Tj = R0[WS(rs, 15)];
 
87
                              Tf = R0[WS(rs, 3)];
 
88
                              T1B = FMS(KP414213562, T9, Ta);
 
89
                              Tb = FMA(KP414213562, Ta, T9);
 
90
                              Tg = R0[WS(rs, 11)];
 
91
                         }
 
92
                         {
 
93
                              E Tn, Ts, To, T1I, Tl, T1H, Ti, Tp, Tk, Th, T1T, T1U;
 
94
                              Tn = R0[WS(rs, 9)];
 
95
                              T1C = T1A + T1B;
 
96
                              T2C = T1B - T1A;
 
97
                              T2o = T8 + Tb;
 
98
                              Tc = T8 - Tb;
 
99
                              Tk = Tg - Tf;
 
100
                              Th = Tf + Tg;
 
101
                              Ts = R0[WS(rs, 1)];
 
102
                              To = R0[WS(rs, 5)];
 
103
                              T1I = FMA(KP707106781, Tk, Tj);
 
104
                              Tl = FNMS(KP707106781, Tk, Tj);
 
105
                              T1H = FMA(KP707106781, Th, Te);
 
106
                              Ti = FNMS(KP707106781, Th, Te);
 
107
                              Tp = R0[WS(rs, 13)];
 
108
                              {
 
109
                                   E TT, T16, TY, T17, TW, TZ, T11, T12, Tt, Tq;
 
110
                                   TT = R1[WS(rs, 15)];
 
111
                                   T27 = FNMS(KP198912367, T1H, T1I);
 
112
                                   T1J = FMA(KP198912367, T1I, T1H);
 
113
                                   T1l = FMA(KP668178637, Ti, Tl);
 
114
                                   Tm = FNMS(KP668178637, Tl, Ti);
 
115
                                   Tt = To - Tp;
 
116
                                   Tq = To + Tp;
 
117
                                   T16 = R1[WS(rs, 7)];
 
118
                                   {
 
119
                                        E TU, T1F, Tu, T1E, Tr, TV;
 
120
                                        TU = R1[WS(rs, 3)];
 
121
                                        T1F = FMA(KP707106781, Tt, Ts);
 
122
                                        Tu = FNMS(KP707106781, Tt, Ts);
 
123
                                        T1E = FMA(KP707106781, Tq, Tn);
 
124
                                        Tr = FNMS(KP707106781, Tq, Tn);
 
125
                                        TV = R1[WS(rs, 11)];
 
126
                                        TY = R1[WS(rs, 9)];
 
127
                                        T26 = FNMS(KP198912367, T1E, T1F);
 
128
                                        T1G = FMA(KP198912367, T1F, T1E);
 
129
                                        T1k = FMA(KP668178637, Tr, Tu);
 
130
                                        Tv = FNMS(KP668178637, Tu, Tr);
 
131
                                        T17 = TU + TV;
 
132
                                        TW = TU - TV;
 
133
                                        TZ = R1[WS(rs, 1)];
 
134
                                        T11 = R1[WS(rs, 5)];
 
135
                                        T12 = R1[WS(rs, 13)];
 
136
                                   }
 
137
                                   {
 
138
                                        E TX, T1a, T10, T19, T13, T1W, T18, T1b, T14, T1X;
 
139
                                        T1T = FMS(KP707106781, TW, TT);
 
140
                                        TX = FMA(KP707106781, TW, TT);
 
141
                                        T1a = FNMS(KP414213562, TY, TZ);
 
142
                                        T10 = FMA(KP414213562, TZ, TY);
 
143
                                        T19 = FMS(KP414213562, T11, T12);
 
144
                                        T13 = FMA(KP414213562, T12, T11);
 
145
                                        T1W = FMA(KP707106781, T17, T16);
 
146
                                        T18 = FNMS(KP707106781, T17, T16);
 
147
                                        T1b = T19 - T1a;
 
148
                                        T1U = T1a + T19;
 
149
                                        T14 = T10 - T13;
 
150
                                        T1X = T10 + T13;
 
151
                                        T1s = FMA(KP923879532, T1b, T18);
 
152
                                        T1c = FNMS(KP923879532, T1b, T18);
 
153
                                        T2e = FMA(KP923879532, T1X, T1W);
 
154
                                        T1Y = FNMS(KP923879532, T1X, T1W);
 
155
                                        T1r = FNMS(KP923879532, T14, TX);
 
156
                                        T15 = FMA(KP923879532, T14, TX);
 
157
                                   }
 
158
                              }
 
159
                              {
 
160
                                   E Ty, TL, TG, TM, TB, TH;
 
161
                                   Ty = R1[0];
 
162
                                   TL = R1[WS(rs, 8)];
 
163
                                   {
 
164
                                        E Tz, TA, TD, TE;
 
165
                                        Tz = R1[WS(rs, 4)];
 
166
                                        T2d = FMA(KP923879532, T1U, T1T);
 
167
                                        T1V = FNMS(KP923879532, T1U, T1T);
 
168
                                        TA = R1[WS(rs, 12)];
 
169
                                        TD = R1[WS(rs, 10)];
 
170
                                        TE = R1[WS(rs, 2)];
 
171
                                        TG = R1[WS(rs, 6)];
 
172
                                        TM = Tz + TA;
 
173
                                        TB = Tz - TA;
 
174
                                        TP = FNMS(KP414213562, TD, TE);
 
175
                                        TF = FMA(KP414213562, TE, TD);
 
176
                                        TH = R1[WS(rs, 14)];
 
177
                                   }
 
178
                                   T1M = FMA(KP707106781, TB, Ty);
 
179
                                   TC = FNMS(KP707106781, TB, Ty);
 
180
                                   T1P = FMA(KP707106781, TM, TL);
 
181
                                   TN = FNMS(KP707106781, TM, TL);
 
182
                                   TO = FMS(KP414213562, TG, TH);
 
183
                                   TI = FMA(KP414213562, TH, TG);
 
184
                              }
 
185
                         }
 
186
                    }
 
187
                    {
 
188
                         E T1j, T1O, T1p, T1R, T1o, T2E, T2D, T1m, T1D, T2w, T2v, T1K, T2i, T2c, T2h;
 
189
                         E T29, T2t, T2r, T2f, T2j;
 
190
                         {
 
191
                              E T2a, T2b, T1g, TS, T1f, Tx, T2N, T2L, T1d, T1h;
 
192
                              {
 
193
                                   E Td, TR, TK, Tw, T2J, T2K;
 
194
                                   T1j = FMA(KP923879532, Tc, T5);
 
195
                                   Td = FNMS(KP923879532, Tc, T5);
 
196
                                   {
 
197
                                        E T1N, TQ, T1Q, TJ;
 
198
                                        T1N = TP + TO;
 
199
                                        TQ = TO - TP;
 
200
                                        T1Q = TF + TI;
 
201
                                        TJ = TF - TI;
 
202
                                        T2a = FMA(KP923879532, T1N, T1M);
 
203
                                        T1O = FNMS(KP923879532, T1N, T1M);
 
204
                                        T1p = FMA(KP923879532, TQ, TN);
 
205
                                        TR = FNMS(KP923879532, TQ, TN);
 
206
                                        T2b = FMA(KP923879532, T1Q, T1P);
 
207
                                        T1R = FNMS(KP923879532, T1Q, T1P);
 
208
                                        T1o = FMA(KP923879532, TJ, TC);
 
209
                                        TK = FNMS(KP923879532, TJ, TC);
 
210
                                        Tw = Tm - Tv;
 
211
                                        T2E = Tv + Tm;
 
212
                                   }
 
213
                                   T2D = FMA(KP923879532, T2C, T2B);
 
214
                                   T2J = FNMS(KP923879532, T2C, T2B);
 
215
                                   T2K = T1k + T1l;
 
216
                                   T1m = T1k - T1l;
 
217
                                   T1g = FMA(KP534511135, TK, TR);
 
218
                                   TS = FNMS(KP534511135, TR, TK);
 
219
                                   T1f = FNMS(KP831469612, Tw, Td);
 
220
                                   Tx = FMA(KP831469612, Tw, Td);
 
221
                                   T2N = FNMS(KP831469612, T2K, T2J);
 
222
                                   T2L = FMA(KP831469612, T2K, T2J);
 
223
                                   T1d = FNMS(KP534511135, T1c, T15);
 
224
                                   T1h = FMA(KP534511135, T15, T1c);
 
225
                              }
 
226
                              {
 
227
                                   E T25, T28, T2p, T2q;
 
228
                                   T1D = FNMS(KP923879532, T1C, T1z);
 
229
                                   T25 = FMA(KP923879532, T1C, T1z);
 
230
                                   {
 
231
                                        E T2O, T1e, T2M, T1i;
 
232
                                        T2O = TS + T1d;
 
233
                                        T1e = TS - T1d;
 
234
                                        T2M = T1g + T1h;
 
235
                                        T1i = T1g - T1h;
 
236
                                        Ci[WS(csi, 5)] = FNMS(KP881921264, T2O, T2N);
 
237
                                        Ci[WS(csi, 10)] = -(FMA(KP881921264, T2O, T2N));
 
238
                                        Cr[WS(csr, 2)] = FMA(KP881921264, T1e, Tx);
 
239
                                        Cr[WS(csr, 13)] = FNMS(KP881921264, T1e, Tx);
 
240
                                        Ci[WS(csi, 2)] = -(FMA(KP881921264, T2M, T2L));
 
241
                                        Ci[WS(csi, 13)] = FNMS(KP881921264, T2M, T2L);
 
242
                                        Cr[WS(csr, 5)] = FMA(KP881921264, T1i, T1f);
 
243
                                        Cr[WS(csr, 10)] = FNMS(KP881921264, T1i, T1f);
 
244
                                        T28 = T26 - T27;
 
245
                                        T2w = T26 + T27;
 
246
                                   }
 
247
                                   T2v = FNMS(KP923879532, T2o, T2n);
 
248
                                   T2p = FMA(KP923879532, T2o, T2n);
 
249
                                   T2q = T1G + T1J;
 
250
                                   T1K = T1G - T1J;
 
251
                                   T2i = FMA(KP098491403, T2a, T2b);
 
252
                                   T2c = FNMS(KP098491403, T2b, T2a);
 
253
                                   T2h = FNMS(KP980785280, T28, T25);
 
254
                                   T29 = FMA(KP980785280, T28, T25);
 
255
                                   T2t = FNMS(KP980785280, T2q, T2p);
 
256
                                   T2r = FMA(KP980785280, T2q, T2p);
 
257
                                   T2f = FMA(KP098491403, T2e, T2d);
 
258
                                   T2j = FNMS(KP098491403, T2d, T2e);
 
259
                              }
 
260
                         }
 
261
                         {
 
262
                              E T1x, T1q, T1v, T1n, T2H, T2F, T1t, T1w;
 
263
                              {
 
264
                                   E T2u, T2g, T2s, T2k;
 
265
                                   T2u = T2f - T2c;
 
266
                                   T2g = T2c + T2f;
 
267
                                   T2s = T2i + T2j;
 
268
                                   T2k = T2i - T2j;
 
269
                                   Ci[WS(csi, 7)] = FMA(KP995184726, T2u, T2t);
 
270
                                   Ci[WS(csi, 8)] = FMS(KP995184726, T2u, T2t);
 
271
                                   Cr[0] = FMA(KP995184726, T2g, T29);
 
272
                                   Cr[WS(csr, 15)] = FNMS(KP995184726, T2g, T29);
 
273
                                   Ci[0] = -(FMA(KP995184726, T2s, T2r));
 
274
                                   Ci[WS(csi, 15)] = FNMS(KP995184726, T2s, T2r);
 
275
                                   Cr[WS(csr, 7)] = FMA(KP995184726, T2k, T2h);
 
276
                                   Cr[WS(csr, 8)] = FNMS(KP995184726, T2k, T2h);
 
277
                              }
 
278
                              T1x = FNMS(KP303346683, T1o, T1p);
 
279
                              T1q = FMA(KP303346683, T1p, T1o);
 
280
                              T1v = FNMS(KP831469612, T1m, T1j);
 
281
                              T1n = FMA(KP831469612, T1m, T1j);
 
282
                              T2H = FNMS(KP831469612, T2E, T2D);
 
283
                              T2F = FMA(KP831469612, T2E, T2D);
 
284
                              T1t = FMA(KP303346683, T1s, T1r);
 
285
                              T1w = FNMS(KP303346683, T1r, T1s);
 
286
                              {
 
287
                                   E T2I, T1u, T2G, T1y;
 
288
                                   T2I = T1q + T1t;
 
289
                                   T1u = T1q - T1t;
 
290
                                   T2G = T1x + T1w;
 
291
                                   T1y = T1w - T1x;
 
292
                                   Ci[WS(csi, 6)] = -(FMA(KP956940335, T2I, T2H));
 
293
                                   Ci[WS(csi, 9)] = FNMS(KP956940335, T2I, T2H);
 
294
                                   Cr[WS(csr, 1)] = FMA(KP956940335, T1u, T1n);
 
295
                                   Cr[WS(csr, 14)] = FNMS(KP956940335, T1u, T1n);
 
296
                                   Ci[WS(csi, 1)] = FMA(KP956940335, T2G, T2F);
 
297
                                   Ci[WS(csi, 14)] = FMS(KP956940335, T2G, T2F);
 
298
                                   Cr[WS(csr, 6)] = FMA(KP956940335, T1y, T1v);
 
299
                                   Cr[WS(csr, 9)] = FNMS(KP956940335, T1y, T1v);
 
300
                              }
 
301
                              T23 = FNMS(KP820678790, T1O, T1R);
 
302
                              T1S = FMA(KP820678790, T1R, T1O);
 
303
                              T21 = FNMS(KP980785280, T1K, T1D);
 
304
                              T1L = FMA(KP980785280, T1K, T1D);
 
305
                              T2z = FMA(KP980785280, T2w, T2v);
 
306
                              T2x = FNMS(KP980785280, T2w, T2v);
 
307
                              T1Z = FNMS(KP820678790, T1Y, T1V);
 
308
                              T22 = FMA(KP820678790, T1V, T1Y);
 
309
                         }
 
310
                    }
 
311
               }
 
312
               {
 
313
                    E T20, T2A, T24, T2y;
 
314
                    T20 = T1S + T1Z;
 
315
                    T2A = T1Z - T1S;
 
316
                    T24 = T22 - T23;
 
317
                    T2y = T23 + T22;
 
318
                    Ci[WS(csi, 4)] = FMS(KP773010453, T2A, T2z);
 
319
                    Ci[WS(csi, 11)] = FMA(KP773010453, T2A, T2z);
 
320
                    Cr[WS(csr, 3)] = FMA(KP773010453, T20, T1L);
 
321
                    Cr[WS(csr, 12)] = FNMS(KP773010453, T20, T1L);
 
322
                    Ci[WS(csi, 3)] = FMA(KP773010453, T2y, T2x);
 
323
                    Ci[WS(csi, 12)] = FMS(KP773010453, T2y, T2x);
 
324
                    Cr[WS(csr, 4)] = FMA(KP773010453, T24, T21);
 
325
                    Cr[WS(csr, 11)] = FNMS(KP773010453, T24, T21);
 
326
               }
325
327
          }
326
328
     }
327
329
}
334
336
 
335
337
#else                           /* HAVE_FMA */
336
338
 
337
 
/* Generated by: ../../../genfft/gen_r2cf -compact -variables 4 -pipeline-latency 4 -n 32 -name r2cfII_32 -dft-II -include r2cfII.h */
 
339
/* Generated by: ../../../genfft/gen_r2cf.native -compact -variables 4 -pipeline-latency 4 -n 32 -name r2cfII_32 -dft-II -include r2cfII.h */
338
340
 
339
341
/*
340
342
 * This function contains 174 FP additions, 82 FP multiplications,
360
362
     DK(KP382683432, +0.382683432365089771728459984030398866761344562);
361
363
     DK(KP923879532, +0.923879532511286756128183189396788286822416626);
362
364
     DK(KP707106781, +0.707106781186547524400844362104849039284835938);
363
 
     INT i;
364
 
     for (i = v; i > 0; i = i - 1, R0 = R0 + ivs, R1 = R1 + ivs, Cr = Cr + ovs, Ci = Ci + ovs, MAKE_VOLATILE_STRIDE(rs), MAKE_VOLATILE_STRIDE(csr), MAKE_VOLATILE_STRIDE(csi)) {
365
 
          E T5, T2D, T1z, T2q, Tc, T2C, T1C, T2n, Tm, T1k, T1J, T26, Tv, T1l, T1G;
366
 
          E T27, T15, T1r, T1Y, T2e, T1c, T1s, T1V, T2d, TK, T1o, T1R, T2b, TR, T1p;
367
 
          E T1O, T2a;
368
 
          {
369
 
               E T1, T2p, T4, T2o, T2, T3;
370
 
               T1 = R0[0];
371
 
               T2p = R0[WS(rs, 8)];
372
 
               T2 = R0[WS(rs, 4)];
373
 
               T3 = R0[WS(rs, 12)];
374
 
               T4 = KP707106781 * (T2 - T3);
375
 
               T2o = KP707106781 * (T2 + T3);
376
 
               T5 = T1 + T4;
377
 
               T2D = T2p - T2o;
378
 
               T1z = T1 - T4;
379
 
               T2q = T2o + T2p;
380
 
          }
381
 
          {
382
 
               E T8, T1A, Tb, T1B;
383
 
               {
384
 
                    E T6, T7, T9, Ta;
385
 
                    T6 = R0[WS(rs, 2)];
386
 
                    T7 = R0[WS(rs, 10)];
387
 
                    T8 = FNMS(KP382683432, T7, KP923879532 * T6);
388
 
                    T1A = FMA(KP382683432, T6, KP923879532 * T7);
389
 
                    T9 = R0[WS(rs, 6)];
390
 
                    Ta = R0[WS(rs, 14)];
391
 
                    Tb = FNMS(KP923879532, Ta, KP382683432 * T9);
392
 
                    T1B = FMA(KP923879532, T9, KP382683432 * Ta);
393
 
               }
394
 
               Tc = T8 + Tb;
395
 
               T2C = Tb - T8;
396
 
               T1C = T1A - T1B;
397
 
               T2n = T1A + T1B;
398
 
          }
399
 
          {
400
 
               E Te, Tk, Th, Tj, Tf, Tg;
401
 
               Te = R0[WS(rs, 1)];
402
 
               Tk = R0[WS(rs, 9)];
403
 
               Tf = R0[WS(rs, 5)];
404
 
               Tg = R0[WS(rs, 13)];
405
 
               Th = KP707106781 * (Tf - Tg);
406
 
               Tj = KP707106781 * (Tf + Tg);
407
 
               {
408
 
                    E Ti, Tl, T1H, T1I;
409
 
                    Ti = Te + Th;
410
 
                    Tl = Tj + Tk;
411
 
                    Tm = FNMS(KP195090322, Tl, KP980785280 * Ti);
412
 
                    T1k = FMA(KP195090322, Ti, KP980785280 * Tl);
413
 
                    T1H = Tk - Tj;
414
 
                    T1I = Te - Th;
415
 
                    T1J = FNMS(KP555570233, T1I, KP831469612 * T1H);
416
 
                    T26 = FMA(KP831469612, T1I, KP555570233 * T1H);
417
 
               }
418
 
          }
419
 
          {
420
 
               E Tq, Tt, Tp, Ts, Tn, To;
421
 
               Tq = R0[WS(rs, 15)];
422
 
               Tt = R0[WS(rs, 7)];
423
 
               Tn = R0[WS(rs, 3)];
424
 
               To = R0[WS(rs, 11)];
425
 
               Tp = KP707106781 * (Tn - To);
426
 
               Ts = KP707106781 * (Tn + To);
427
 
               {
428
 
                    E Tr, Tu, T1E, T1F;
429
 
                    Tr = Tp - Tq;
430
 
                    Tu = Ts + Tt;
431
 
                    Tv = FMA(KP980785280, Tr, KP195090322 * Tu);
432
 
                    T1l = FNMS(KP980785280, Tu, KP195090322 * Tr);
433
 
                    T1E = Tt - Ts;
434
 
                    T1F = Tp + Tq;
435
 
                    T1G = FNMS(KP555570233, T1F, KP831469612 * T1E);
436
 
                    T27 = FMA(KP831469612, T1F, KP555570233 * T1E);
437
 
               }
438
 
          }
439
 
          {
440
 
               E TW, T1a, TV, T19, T10, T16, T13, T17, TT, TU;
441
 
               TW = R1[WS(rs, 15)];
442
 
               T1a = R1[WS(rs, 7)];
443
 
               TT = R1[WS(rs, 3)];
444
 
               TU = R1[WS(rs, 11)];
445
 
               TV = KP707106781 * (TT - TU);
446
 
               T19 = KP707106781 * (TT + TU);
447
 
               {
448
 
                    E TY, TZ, T11, T12;
449
 
                    TY = R1[WS(rs, 1)];
450
 
                    TZ = R1[WS(rs, 9)];
451
 
                    T10 = FNMS(KP382683432, TZ, KP923879532 * TY);
452
 
                    T16 = FMA(KP382683432, TY, KP923879532 * TZ);
453
 
                    T11 = R1[WS(rs, 5)];
454
 
                    T12 = R1[WS(rs, 13)];
455
 
                    T13 = FNMS(KP923879532, T12, KP382683432 * T11);
456
 
                    T17 = FMA(KP923879532, T11, KP382683432 * T12);
457
 
               }
458
 
               {
459
 
                    E TX, T14, T1W, T1X;
460
 
                    TX = TV - TW;
461
 
                    T14 = T10 + T13;
462
 
                    T15 = TX + T14;
463
 
                    T1r = TX - T14;
464
 
                    T1W = T13 - T10;
465
 
                    T1X = T1a - T19;
466
 
                    T1Y = T1W - T1X;
467
 
                    T2e = T1W + T1X;
468
 
               }
469
 
               {
470
 
                    E T18, T1b, T1T, T1U;
471
 
                    T18 = T16 + T17;
472
 
                    T1b = T19 + T1a;
473
 
                    T1c = T18 + T1b;
474
 
                    T1s = T1b - T18;
475
 
                    T1T = TV + TW;
476
 
                    T1U = T16 - T17;
477
 
                    T1V = T1T + T1U;
478
 
                    T2d = T1U - T1T;
479
 
               }
480
 
          }
481
 
          {
482
 
               E Ty, TP, TB, TO, TF, TL, TI, TM, Tz, TA;
483
 
               Ty = R1[0];
484
 
               TP = R1[WS(rs, 8)];
485
 
               Tz = R1[WS(rs, 4)];
486
 
               TA = R1[WS(rs, 12)];
487
 
               TB = KP707106781 * (Tz - TA);
488
 
               TO = KP707106781 * (Tz + TA);
489
 
               {
490
 
                    E TD, TE, TG, TH;
491
 
                    TD = R1[WS(rs, 2)];
492
 
                    TE = R1[WS(rs, 10)];
493
 
                    TF = FNMS(KP382683432, TE, KP923879532 * TD);
494
 
                    TL = FMA(KP382683432, TD, KP923879532 * TE);
495
 
                    TG = R1[WS(rs, 6)];
496
 
                    TH = R1[WS(rs, 14)];
497
 
                    TI = FNMS(KP923879532, TH, KP382683432 * TG);
498
 
                    TM = FMA(KP923879532, TG, KP382683432 * TH);
499
 
               }
500
 
               {
501
 
                    E TC, TJ, T1P, T1Q;
502
 
                    TC = Ty + TB;
503
 
                    TJ = TF + TI;
504
 
                    TK = TC + TJ;
505
 
                    T1o = TC - TJ;
506
 
                    T1P = TI - TF;
507
 
                    T1Q = TP - TO;
508
 
                    T1R = T1P - T1Q;
509
 
                    T2b = T1P + T1Q;
510
 
               }
511
 
               {
512
 
                    E TN, TQ, T1M, T1N;
513
 
                    TN = TL + TM;
514
 
                    TQ = TO + TP;
515
 
                    TR = TN + TQ;
516
 
                    T1p = TQ - TN;
517
 
                    T1M = Ty - TB;
518
 
                    T1N = TL - TM;
519
 
                    T1O = T1M - T1N;
520
 
                    T2a = T1M + T1N;
521
 
               }
522
 
          }
523
 
          {
524
 
               E Tx, T1f, T2s, T2u, T1e, T2l, T1i, T2t;
525
 
               {
526
 
                    E Td, Tw, T2m, T2r;
527
 
                    Td = T5 + Tc;
528
 
                    Tw = Tm + Tv;
529
 
                    Tx = Td - Tw;
530
 
                    T1f = Td + Tw;
531
 
                    T2m = T1l - T1k;
532
 
                    T2r = T2n + T2q;
533
 
                    T2s = T2m - T2r;
534
 
                    T2u = T2m + T2r;
535
 
               }
536
 
               {
537
 
                    E TS, T1d, T1g, T1h;
538
 
                    TS = FMA(KP098017140, TK, KP995184726 * TR);
539
 
                    T1d = FNMS(KP995184726, T1c, KP098017140 * T15);
540
 
                    T1e = TS + T1d;
541
 
                    T2l = T1d - TS;
542
 
                    T1g = FNMS(KP098017140, TR, KP995184726 * TK);
543
 
                    T1h = FMA(KP995184726, T15, KP098017140 * T1c);
544
 
                    T1i = T1g + T1h;
545
 
                    T2t = T1h - T1g;
546
 
               }
547
 
               Cr[WS(csr, 8)] = Tx - T1e;
548
 
               Ci[WS(csi, 8)] = T2t - T2u;
549
 
               Cr[WS(csr, 7)] = Tx + T1e;
550
 
               Ci[WS(csi, 7)] = T2t + T2u;
551
 
               Cr[WS(csr, 15)] = T1f - T1i;
552
 
               Ci[WS(csi, 15)] = T2l - T2s;
553
 
               Cr[0] = T1f + T1i;
554
 
               Ci[0] = T2l + T2s;
555
 
          }
556
 
          {
557
 
               E T29, T2h, T2M, T2O, T2g, T2J, T2k, T2N;
558
 
               {
559
 
                    E T25, T28, T2K, T2L;
560
 
                    T25 = T1z + T1C;
561
 
                    T28 = T26 - T27;
562
 
                    T29 = T25 + T28;
563
 
                    T2h = T25 - T28;
564
 
                    T2K = T1J + T1G;
565
 
                    T2L = T2C + T2D;
566
 
                    T2M = T2K - T2L;
567
 
                    T2O = T2K + T2L;
568
 
               }
569
 
               {
570
 
                    E T2c, T2f, T2i, T2j;
571
 
                    T2c = FMA(KP956940335, T2a, KP290284677 * T2b);
572
 
                    T2f = FNMS(KP290284677, T2e, KP956940335 * T2d);
573
 
                    T2g = T2c + T2f;
574
 
                    T2J = T2f - T2c;
575
 
                    T2i = FMA(KP290284677, T2d, KP956940335 * T2e);
576
 
                    T2j = FNMS(KP290284677, T2a, KP956940335 * T2b);
577
 
                    T2k = T2i - T2j;
578
 
                    T2N = T2j + T2i;
579
 
               }
580
 
               Cr[WS(csr, 14)] = T29 - T2g;
581
 
               Ci[WS(csi, 14)] = T2N - T2O;
582
 
               Cr[WS(csr, 1)] = T29 + T2g;
583
 
               Ci[WS(csi, 1)] = T2N + T2O;
584
 
               Cr[WS(csr, 9)] = T2h - T2k;
585
 
               Ci[WS(csi, 9)] = T2J - T2M;
586
 
               Cr[WS(csr, 6)] = T2h + T2k;
587
 
               Ci[WS(csi, 6)] = T2J + T2M;
588
 
          }
589
 
          {
590
 
               E T1n, T1v, T2y, T2A, T1u, T2v, T1y, T2z;
591
 
               {
592
 
                    E T1j, T1m, T2w, T2x;
593
 
                    T1j = T5 - Tc;
594
 
                    T1m = T1k + T1l;
595
 
                    T1n = T1j + T1m;
596
 
                    T1v = T1j - T1m;
597
 
                    T2w = Tv - Tm;
598
 
                    T2x = T2q - T2n;
599
 
                    T2y = T2w - T2x;
600
 
                    T2A = T2w + T2x;
601
 
               }
602
 
               {
603
 
                    E T1q, T1t, T1w, T1x;
604
 
                    T1q = FMA(KP773010453, T1o, KP634393284 * T1p);
605
 
                    T1t = FNMS(KP634393284, T1s, KP773010453 * T1r);
606
 
                    T1u = T1q + T1t;
607
 
                    T2v = T1t - T1q;
608
 
                    T1w = FMA(KP634393284, T1r, KP773010453 * T1s);
609
 
                    T1x = FNMS(KP634393284, T1o, KP773010453 * T1p);
610
 
                    T1y = T1w - T1x;
611
 
                    T2z = T1x + T1w;
612
 
               }
613
 
               Cr[WS(csr, 12)] = T1n - T1u;
614
 
               Ci[WS(csi, 12)] = T2z - T2A;
615
 
               Cr[WS(csr, 3)] = T1n + T1u;
616
 
               Ci[WS(csi, 3)] = T2z + T2A;
617
 
               Cr[WS(csr, 11)] = T1v - T1y;
618
 
               Ci[WS(csi, 11)] = T2v - T2y;
619
 
               Cr[WS(csr, 4)] = T1v + T1y;
620
 
               Ci[WS(csi, 4)] = T2v + T2y;
621
 
          }
622
 
          {
623
 
               E T1L, T21, T2G, T2I, T20, T2H, T24, T2B;
624
 
               {
625
 
                    E T1D, T1K, T2E, T2F;
626
 
                    T1D = T1z - T1C;
627
 
                    T1K = T1G - T1J;
628
 
                    T1L = T1D + T1K;
629
 
                    T21 = T1D - T1K;
630
 
                    T2E = T2C - T2D;
631
 
                    T2F = T26 + T27;
632
 
                    T2G = T2E - T2F;
633
 
                    T2I = T2F + T2E;
634
 
               }
635
 
               {
636
 
                    E T1S, T1Z, T22, T23;
637
 
                    T1S = FMA(KP881921264, T1O, KP471396736 * T1R);
638
 
                    T1Z = FMA(KP881921264, T1V, KP471396736 * T1Y);
639
 
                    T20 = T1S - T1Z;
640
 
                    T2H = T1S + T1Z;
641
 
                    T22 = FNMS(KP471396736, T1V, KP881921264 * T1Y);
642
 
                    T23 = FNMS(KP471396736, T1O, KP881921264 * T1R);
643
 
                    T24 = T22 - T23;
644
 
                    T2B = T23 + T22;
645
 
               }
646
 
               Cr[WS(csr, 13)] = T1L - T20;
647
 
               Ci[WS(csi, 13)] = T2B - T2G;
648
 
               Cr[WS(csr, 2)] = T1L + T20;
649
 
               Ci[WS(csi, 2)] = T2B + T2G;
650
 
               Cr[WS(csr, 10)] = T21 - T24;
651
 
               Ci[WS(csi, 10)] = T2I - T2H;
652
 
               Cr[WS(csr, 5)] = T21 + T24;
653
 
               Ci[WS(csi, 5)] = -(T2H + T2I);
 
365
     {
 
366
          INT i;
 
367
          for (i = v; i > 0; i = i - 1, R0 = R0 + ivs, R1 = R1 + ivs, Cr = Cr + ovs, Ci = Ci + ovs, MAKE_VOLATILE_STRIDE(rs), MAKE_VOLATILE_STRIDE(csr), MAKE_VOLATILE_STRIDE(csi)) {
 
368
               E T5, T2D, T1z, T2q, Tc, T2C, T1C, T2n, Tm, T1k, T1J, T26, Tv, T1l, T1G;
 
369
               E T27, T15, T1r, T1Y, T2e, T1c, T1s, T1V, T2d, TK, T1o, T1R, T2b, TR, T1p;
 
370
               E T1O, T2a;
 
371
               {
 
372
                    E T1, T2p, T4, T2o, T2, T3;
 
373
                    T1 = R0[0];
 
374
                    T2p = R0[WS(rs, 8)];
 
375
                    T2 = R0[WS(rs, 4)];
 
376
                    T3 = R0[WS(rs, 12)];
 
377
                    T4 = KP707106781 * (T2 - T3);
 
378
                    T2o = KP707106781 * (T2 + T3);
 
379
                    T5 = T1 + T4;
 
380
                    T2D = T2p - T2o;
 
381
                    T1z = T1 - T4;
 
382
                    T2q = T2o + T2p;
 
383
               }
 
384
               {
 
385
                    E T8, T1A, Tb, T1B;
 
386
                    {
 
387
                         E T6, T7, T9, Ta;
 
388
                         T6 = R0[WS(rs, 2)];
 
389
                         T7 = R0[WS(rs, 10)];
 
390
                         T8 = FNMS(KP382683432, T7, KP923879532 * T6);
 
391
                         T1A = FMA(KP382683432, T6, KP923879532 * T7);
 
392
                         T9 = R0[WS(rs, 6)];
 
393
                         Ta = R0[WS(rs, 14)];
 
394
                         Tb = FNMS(KP923879532, Ta, KP382683432 * T9);
 
395
                         T1B = FMA(KP923879532, T9, KP382683432 * Ta);
 
396
                    }
 
397
                    Tc = T8 + Tb;
 
398
                    T2C = Tb - T8;
 
399
                    T1C = T1A - T1B;
 
400
                    T2n = T1A + T1B;
 
401
               }
 
402
               {
 
403
                    E Te, Tk, Th, Tj, Tf, Tg;
 
404
                    Te = R0[WS(rs, 1)];
 
405
                    Tk = R0[WS(rs, 9)];
 
406
                    Tf = R0[WS(rs, 5)];
 
407
                    Tg = R0[WS(rs, 13)];
 
408
                    Th = KP707106781 * (Tf - Tg);
 
409
                    Tj = KP707106781 * (Tf + Tg);
 
410
                    {
 
411
                         E Ti, Tl, T1H, T1I;
 
412
                         Ti = Te + Th;
 
413
                         Tl = Tj + Tk;
 
414
                         Tm = FNMS(KP195090322, Tl, KP980785280 * Ti);
 
415
                         T1k = FMA(KP195090322, Ti, KP980785280 * Tl);
 
416
                         T1H = Tk - Tj;
 
417
                         T1I = Te - Th;
 
418
                         T1J = FNMS(KP555570233, T1I, KP831469612 * T1H);
 
419
                         T26 = FMA(KP831469612, T1I, KP555570233 * T1H);
 
420
                    }
 
421
               }
 
422
               {
 
423
                    E Tq, Tt, Tp, Ts, Tn, To;
 
424
                    Tq = R0[WS(rs, 15)];
 
425
                    Tt = R0[WS(rs, 7)];
 
426
                    Tn = R0[WS(rs, 3)];
 
427
                    To = R0[WS(rs, 11)];
 
428
                    Tp = KP707106781 * (Tn - To);
 
429
                    Ts = KP707106781 * (Tn + To);
 
430
                    {
 
431
                         E Tr, Tu, T1E, T1F;
 
432
                         Tr = Tp - Tq;
 
433
                         Tu = Ts + Tt;
 
434
                         Tv = FMA(KP980785280, Tr, KP195090322 * Tu);
 
435
                         T1l = FNMS(KP980785280, Tu, KP195090322 * Tr);
 
436
                         T1E = Tt - Ts;
 
437
                         T1F = Tp + Tq;
 
438
                         T1G = FNMS(KP555570233, T1F, KP831469612 * T1E);
 
439
                         T27 = FMA(KP831469612, T1F, KP555570233 * T1E);
 
440
                    }
 
441
               }
 
442
               {
 
443
                    E TW, T1a, TV, T19, T10, T16, T13, T17, TT, TU;
 
444
                    TW = R1[WS(rs, 15)];
 
445
                    T1a = R1[WS(rs, 7)];
 
446
                    TT = R1[WS(rs, 3)];
 
447
                    TU = R1[WS(rs, 11)];
 
448
                    TV = KP707106781 * (TT - TU);
 
449
                    T19 = KP707106781 * (TT + TU);
 
450
                    {
 
451
                         E TY, TZ, T11, T12;
 
452
                         TY = R1[WS(rs, 1)];
 
453
                         TZ = R1[WS(rs, 9)];
 
454
                         T10 = FNMS(KP382683432, TZ, KP923879532 * TY);
 
455
                         T16 = FMA(KP382683432, TY, KP923879532 * TZ);
 
456
                         T11 = R1[WS(rs, 5)];
 
457
                         T12 = R1[WS(rs, 13)];
 
458
                         T13 = FNMS(KP923879532, T12, KP382683432 * T11);
 
459
                         T17 = FMA(KP923879532, T11, KP382683432 * T12);
 
460
                    }
 
461
                    {
 
462
                         E TX, T14, T1W, T1X;
 
463
                         TX = TV - TW;
 
464
                         T14 = T10 + T13;
 
465
                         T15 = TX + T14;
 
466
                         T1r = TX - T14;
 
467
                         T1W = T13 - T10;
 
468
                         T1X = T1a - T19;
 
469
                         T1Y = T1W - T1X;
 
470
                         T2e = T1W + T1X;
 
471
                    }
 
472
                    {
 
473
                         E T18, T1b, T1T, T1U;
 
474
                         T18 = T16 + T17;
 
475
                         T1b = T19 + T1a;
 
476
                         T1c = T18 + T1b;
 
477
                         T1s = T1b - T18;
 
478
                         T1T = TV + TW;
 
479
                         T1U = T16 - T17;
 
480
                         T1V = T1T + T1U;
 
481
                         T2d = T1U - T1T;
 
482
                    }
 
483
               }
 
484
               {
 
485
                    E Ty, TP, TB, TO, TF, TL, TI, TM, Tz, TA;
 
486
                    Ty = R1[0];
 
487
                    TP = R1[WS(rs, 8)];
 
488
                    Tz = R1[WS(rs, 4)];
 
489
                    TA = R1[WS(rs, 12)];
 
490
                    TB = KP707106781 * (Tz - TA);
 
491
                    TO = KP707106781 * (Tz + TA);
 
492
                    {
 
493
                         E TD, TE, TG, TH;
 
494
                         TD = R1[WS(rs, 2)];
 
495
                         TE = R1[WS(rs, 10)];
 
496
                         TF = FNMS(KP382683432, TE, KP923879532 * TD);
 
497
                         TL = FMA(KP382683432, TD, KP923879532 * TE);
 
498
                         TG = R1[WS(rs, 6)];
 
499
                         TH = R1[WS(rs, 14)];
 
500
                         TI = FNMS(KP923879532, TH, KP382683432 * TG);
 
501
                         TM = FMA(KP923879532, TG, KP382683432 * TH);
 
502
                    }
 
503
                    {
 
504
                         E TC, TJ, T1P, T1Q;
 
505
                         TC = Ty + TB;
 
506
                         TJ = TF + TI;
 
507
                         TK = TC + TJ;
 
508
                         T1o = TC - TJ;
 
509
                         T1P = TI - TF;
 
510
                         T1Q = TP - TO;
 
511
                         T1R = T1P - T1Q;
 
512
                         T2b = T1P + T1Q;
 
513
                    }
 
514
                    {
 
515
                         E TN, TQ, T1M, T1N;
 
516
                         TN = TL + TM;
 
517
                         TQ = TO + TP;
 
518
                         TR = TN + TQ;
 
519
                         T1p = TQ - TN;
 
520
                         T1M = Ty - TB;
 
521
                         T1N = TL - TM;
 
522
                         T1O = T1M - T1N;
 
523
                         T2a = T1M + T1N;
 
524
                    }
 
525
               }
 
526
               {
 
527
                    E Tx, T1f, T2s, T2u, T1e, T2l, T1i, T2t;
 
528
                    {
 
529
                         E Td, Tw, T2m, T2r;
 
530
                         Td = T5 + Tc;
 
531
                         Tw = Tm + Tv;
 
532
                         Tx = Td - Tw;
 
533
                         T1f = Td + Tw;
 
534
                         T2m = T1l - T1k;
 
535
                         T2r = T2n + T2q;
 
536
                         T2s = T2m - T2r;
 
537
                         T2u = T2m + T2r;
 
538
                    }
 
539
                    {
 
540
                         E TS, T1d, T1g, T1h;
 
541
                         TS = FMA(KP098017140, TK, KP995184726 * TR);
 
542
                         T1d = FNMS(KP995184726, T1c, KP098017140 * T15);
 
543
                         T1e = TS + T1d;
 
544
                         T2l = T1d - TS;
 
545
                         T1g = FNMS(KP098017140, TR, KP995184726 * TK);
 
546
                         T1h = FMA(KP995184726, T15, KP098017140 * T1c);
 
547
                         T1i = T1g + T1h;
 
548
                         T2t = T1h - T1g;
 
549
                    }
 
550
                    Cr[WS(csr, 8)] = Tx - T1e;
 
551
                    Ci[WS(csi, 8)] = T2t - T2u;
 
552
                    Cr[WS(csr, 7)] = Tx + T1e;
 
553
                    Ci[WS(csi, 7)] = T2t + T2u;
 
554
                    Cr[WS(csr, 15)] = T1f - T1i;
 
555
                    Ci[WS(csi, 15)] = T2l - T2s;
 
556
                    Cr[0] = T1f + T1i;
 
557
                    Ci[0] = T2l + T2s;
 
558
               }
 
559
               {
 
560
                    E T29, T2h, T2M, T2O, T2g, T2J, T2k, T2N;
 
561
                    {
 
562
                         E T25, T28, T2K, T2L;
 
563
                         T25 = T1z + T1C;
 
564
                         T28 = T26 - T27;
 
565
                         T29 = T25 + T28;
 
566
                         T2h = T25 - T28;
 
567
                         T2K = T1J + T1G;
 
568
                         T2L = T2C + T2D;
 
569
                         T2M = T2K - T2L;
 
570
                         T2O = T2K + T2L;
 
571
                    }
 
572
                    {
 
573
                         E T2c, T2f, T2i, T2j;
 
574
                         T2c = FMA(KP956940335, T2a, KP290284677 * T2b);
 
575
                         T2f = FNMS(KP290284677, T2e, KP956940335 * T2d);
 
576
                         T2g = T2c + T2f;
 
577
                         T2J = T2f - T2c;
 
578
                         T2i = FMA(KP290284677, T2d, KP956940335 * T2e);
 
579
                         T2j = FNMS(KP290284677, T2a, KP956940335 * T2b);
 
580
                         T2k = T2i - T2j;
 
581
                         T2N = T2j + T2i;
 
582
                    }
 
583
                    Cr[WS(csr, 14)] = T29 - T2g;
 
584
                    Ci[WS(csi, 14)] = T2N - T2O;
 
585
                    Cr[WS(csr, 1)] = T29 + T2g;
 
586
                    Ci[WS(csi, 1)] = T2N + T2O;
 
587
                    Cr[WS(csr, 9)] = T2h - T2k;
 
588
                    Ci[WS(csi, 9)] = T2J - T2M;
 
589
                    Cr[WS(csr, 6)] = T2h + T2k;
 
590
                    Ci[WS(csi, 6)] = T2J + T2M;
 
591
               }
 
592
               {
 
593
                    E T1n, T1v, T2y, T2A, T1u, T2v, T1y, T2z;
 
594
                    {
 
595
                         E T1j, T1m, T2w, T2x;
 
596
                         T1j = T5 - Tc;
 
597
                         T1m = T1k + T1l;
 
598
                         T1n = T1j + T1m;
 
599
                         T1v = T1j - T1m;
 
600
                         T2w = Tv - Tm;
 
601
                         T2x = T2q - T2n;
 
602
                         T2y = T2w - T2x;
 
603
                         T2A = T2w + T2x;
 
604
                    }
 
605
                    {
 
606
                         E T1q, T1t, T1w, T1x;
 
607
                         T1q = FMA(KP773010453, T1o, KP634393284 * T1p);
 
608
                         T1t = FNMS(KP634393284, T1s, KP773010453 * T1r);
 
609
                         T1u = T1q + T1t;
 
610
                         T2v = T1t - T1q;
 
611
                         T1w = FMA(KP634393284, T1r, KP773010453 * T1s);
 
612
                         T1x = FNMS(KP634393284, T1o, KP773010453 * T1p);
 
613
                         T1y = T1w - T1x;
 
614
                         T2z = T1x + T1w;
 
615
                    }
 
616
                    Cr[WS(csr, 12)] = T1n - T1u;
 
617
                    Ci[WS(csi, 12)] = T2z - T2A;
 
618
                    Cr[WS(csr, 3)] = T1n + T1u;
 
619
                    Ci[WS(csi, 3)] = T2z + T2A;
 
620
                    Cr[WS(csr, 11)] = T1v - T1y;
 
621
                    Ci[WS(csi, 11)] = T2v - T2y;
 
622
                    Cr[WS(csr, 4)] = T1v + T1y;
 
623
                    Ci[WS(csi, 4)] = T2v + T2y;
 
624
               }
 
625
               {
 
626
                    E T1L, T21, T2G, T2I, T20, T2H, T24, T2B;
 
627
                    {
 
628
                         E T1D, T1K, T2E, T2F;
 
629
                         T1D = T1z - T1C;
 
630
                         T1K = T1G - T1J;
 
631
                         T1L = T1D + T1K;
 
632
                         T21 = T1D - T1K;
 
633
                         T2E = T2C - T2D;
 
634
                         T2F = T26 + T27;
 
635
                         T2G = T2E - T2F;
 
636
                         T2I = T2F + T2E;
 
637
                    }
 
638
                    {
 
639
                         E T1S, T1Z, T22, T23;
 
640
                         T1S = FMA(KP881921264, T1O, KP471396736 * T1R);
 
641
                         T1Z = FMA(KP881921264, T1V, KP471396736 * T1Y);
 
642
                         T20 = T1S - T1Z;
 
643
                         T2H = T1S + T1Z;
 
644
                         T22 = FNMS(KP471396736, T1V, KP881921264 * T1Y);
 
645
                         T23 = FNMS(KP471396736, T1O, KP881921264 * T1R);
 
646
                         T24 = T22 - T23;
 
647
                         T2B = T23 + T22;
 
648
                    }
 
649
                    Cr[WS(csr, 13)] = T1L - T20;
 
650
                    Ci[WS(csi, 13)] = T2B - T2G;
 
651
                    Cr[WS(csr, 2)] = T1L + T20;
 
652
                    Ci[WS(csi, 2)] = T2B + T2G;
 
653
                    Cr[WS(csr, 10)] = T21 - T24;
 
654
                    Ci[WS(csi, 10)] = T2I - T2H;
 
655
                    Cr[WS(csr, 5)] = T21 + T24;
 
656
                    Ci[WS(csi, 5)] = -(T2H + T2I);
 
657
               }
654
658
          }
655
659
     }
656
660
}