~ubuntu-branches/ubuntu/utopic/fftw3/utopic

« back to all changes in this revision

Viewing changes to rdft/scalar/r2cb/hc2cbdft_12.c

  • Committer: Package Import Robot
  • Author(s): Matthias Klose
  • Date: 2011-12-14 13:21:22 UTC
  • mfrom: (3.1.5 sid)
  • Revision ID: package-import@ubuntu.com-20111214132122-l4avyl2kkr7vq5aj
Tags: 3.3-1ubuntu1
* Merge with Debian; remaining changes:
  - Revert the ARM workaround.

Show diffs side-by-side

added added

removed removed

Lines of Context:
1
1
/*
2
 
 * Copyright (c) 2003, 2007-8 Matteo Frigo
3
 
 * Copyright (c) 2003, 2007-8 Massachusetts Institute of Technology
 
2
 * Copyright (c) 2003, 2007-11 Matteo Frigo
 
3
 * Copyright (c) 2003, 2007-11 Massachusetts Institute of Technology
4
4
 *
5
5
 * This program is free software; you can redistribute it and/or modify
6
6
 * it under the terms of the GNU General Public License as published by
19
19
 */
20
20
 
21
21
/* This file was automatically generated --- DO NOT EDIT */
22
 
/* Generated on Sun Jul 12 06:47:05 EDT 2009 */
 
22
/* Generated on Wed Jul 27 06:19:40 EDT 2011 */
23
23
 
24
24
#include "codelet-rdft.h"
25
25
 
26
26
#ifdef HAVE_FMA
27
27
 
28
 
/* Generated by: ../../../genfft/gen_hc2cdft -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -sign 1 -n 12 -dif -name hc2cbdft_12 -include hc2cb.h */
 
28
/* Generated by: ../../../genfft/gen_hc2cdft.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -sign 1 -n 12 -dif -name hc2cbdft_12 -include hc2cb.h */
29
29
 
30
30
/*
31
31
 * This function contains 142 FP additions, 68 FP multiplications,
38
38
{
39
39
     DK(KP866025403, +0.866025403784438646763723170752936183471402627);
40
40
     DK(KP500000000, +0.500000000000000000000000000000000000000000000);
41
 
     INT m;
42
 
     for (m = mb, W = W + ((mb - 1) * 22); m < me; m = m + 1, Rp = Rp + ms, Ip = Ip + ms, Rm = Rm - ms, Im = Im - ms, W = W + 22, MAKE_VOLATILE_STRIDE(rs)) {
43
 
          E T2S, T2V, T2w, T2Z, T2T, T2I, T2Q, T2Y, T2U, T2K, T2G, T30, T2W;
44
 
          {
45
 
               E Tb, T1Z, T2D, T1E, T1N, T2y, TD, T2t, T1U, T1e, T2o, TY, T1f, TI, T1g;
46
 
               E TN, Tm, T1V, T2z, T1H, T1Q, T2E, T19, T2u;
47
 
               {
48
 
                    E T1c, TU, T1d, TX;
49
 
                    {
50
 
                         E Tu, T6, TT, TS, T5, Tt, Tw, Tx, TB, T9, Ty;
51
 
                         {
52
 
                              E T1, Tp, Tq, Tr, T4, T2, T3, T7, T8, Ts;
53
 
                              T1 = Rp[0];
54
 
                              T2 = Rp[WS(rs, 4)];
55
 
                              T3 = Rm[WS(rs, 3)];
56
 
                              Tp = Ip[0];
57
 
                              Tq = Ip[WS(rs, 4)];
58
 
                              Tr = Im[WS(rs, 3)];
59
 
                              T4 = T2 + T3;
60
 
                              Tu = T2 - T3;
61
 
                              T6 = Rm[WS(rs, 5)];
62
 
                              TT = Tr + Tq;
63
 
                              Ts = Tq - Tr;
64
 
                              TS = FNMS(KP500000000, T4, T1);
 
41
     {
 
42
          INT m;
 
43
          for (m = mb, W = W + ((mb - 1) * 22); m < me; m = m + 1, Rp = Rp + ms, Ip = Ip + ms, Rm = Rm - ms, Im = Im - ms, W = W + 22, MAKE_VOLATILE_STRIDE(rs)) {
 
44
               E T2S, T2V, T2w, T2Z, T2T, T2I, T2Q, T2Y, T2U, T2K, T2G, T30, T2W;
 
45
               {
 
46
                    E Tb, T1Z, T2D, T1E, T1N, T2y, TD, T2t, T1U, T1e, T2o, TY, T1f, TI, T1g;
 
47
                    E TN, Tm, T1V, T2z, T1H, T1Q, T2E, T19, T2u;
 
48
                    {
 
49
                         E T1c, TU, T1d, TX;
 
50
                         {
 
51
                              E Tu, T6, TT, TS, T5, Tt, Tw, Tx, TB, T9, Ty;
 
52
                              {
 
53
                                   E T1, Tp, Tq, Tr, T4, T2, T3, T7, T8, Ts;
 
54
                                   T1 = Rp[0];
 
55
                                   T2 = Rp[WS(rs, 4)];
 
56
                                   T3 = Rm[WS(rs, 3)];
 
57
                                   Tp = Ip[0];
 
58
                                   Tq = Ip[WS(rs, 4)];
 
59
                                   Tr = Im[WS(rs, 3)];
 
60
                                   T4 = T2 + T3;
 
61
                                   Tu = T2 - T3;
 
62
                                   T6 = Rm[WS(rs, 5)];
 
63
                                   TT = Tr + Tq;
 
64
                                   Ts = Tq - Tr;
 
65
                                   TS = FNMS(KP500000000, T4, T1);
 
66
                                   T5 = T1 + T4;
 
67
                                   T7 = Rm[WS(rs, 1)];
 
68
                                   T8 = Rp[WS(rs, 2)];
 
69
                                   T1c = Tp + Ts;
 
70
                                   Tt = FNMS(KP500000000, Ts, Tp);
 
71
                                   Tw = Im[WS(rs, 5)];
 
72
                                   Tx = Im[WS(rs, 1)];
 
73
                                   TB = T7 - T8;
 
74
                                   T9 = T7 + T8;
 
75
                                   Ty = Ip[WS(rs, 2)];
 
76
                              }
 
77
                              {
 
78
                                   E T1L, Tv, Ta, TV, TW, Tz;
 
79
                                   T1L = FNMS(KP866025403, Tu, Tt);
 
80
                                   Tv = FMA(KP866025403, Tu, Tt);
 
81
                                   Ta = T6 + T9;
 
82
                                   TV = FNMS(KP500000000, T9, T6);
 
83
                                   TW = Tx + Ty;
 
84
                                   Tz = Tx - Ty;
 
85
                                   {
 
86
                                        E TC, T1M, T1C, TA, T1D;
 
87
                                        T1C = FMA(KP866025403, TT, TS);
 
88
                                        TU = FNMS(KP866025403, TT, TS);
 
89
                                        T1d = Tw + Tz;
 
90
                                        TA = FNMS(KP500000000, Tz, Tw);
 
91
                                        T1D = FNMS(KP866025403, TW, TV);
 
92
                                        TX = FMA(KP866025403, TW, TV);
 
93
                                        Tb = T5 + Ta;
 
94
                                        T1Z = T5 - Ta;
 
95
                                        TC = FNMS(KP866025403, TB, TA);
 
96
                                        T1M = FMA(KP866025403, TB, TA);
 
97
                                        T2D = T1C - T1D;
 
98
                                        T1E = T1C + T1D;
 
99
                                        T1N = T1L - T1M;
 
100
                                        T2y = T1L + T1M;
 
101
                                        TD = Tv + TC;
 
102
                                        T2t = Tv - TC;
 
103
                                   }
 
104
                              }
 
105
                         }
 
106
                         {
 
107
                              E T12, Th, TH, TE, Tg, T11, T14, TK, T17, Tk, TL;
 
108
                              {
 
109
                                   E Tc, TZ, TF, TG, Tf, Td, Te, Ti, Tj, T10;
 
110
                                   Tc = Rp[WS(rs, 3)];
 
111
                                   T1U = T1c + T1d;
 
112
                                   T1e = T1c - T1d;
 
113
                                   T2o = TU + TX;
 
114
                                   TY = TU - TX;
 
115
                                   Td = Rm[WS(rs, 4)];
 
116
                                   Te = Rm[0];
 
117
                                   TZ = Ip[WS(rs, 3)];
 
118
                                   TF = Im[WS(rs, 4)];
 
119
                                   TG = Im[0];
 
120
                                   Tf = Td + Te;
 
121
                                   T12 = Td - Te;
 
122
                                   Th = Rm[WS(rs, 2)];
 
123
                                   TH = TF - TG;
 
124
                                   T10 = TF + TG;
 
125
                                   TE = FNMS(KP500000000, Tf, Tc);
 
126
                                   Tg = Tc + Tf;
 
127
                                   Ti = Rp[WS(rs, 1)];
 
128
                                   Tj = Rp[WS(rs, 5)];
 
129
                                   T1f = TZ - T10;
 
130
                                   T11 = FMA(KP500000000, T10, TZ);
 
131
                                   T14 = Im[WS(rs, 2)];
 
132
                                   TK = Ip[WS(rs, 5)];
 
133
                                   T17 = Ti - Tj;
 
134
                                   Tk = Ti + Tj;
 
135
                                   TL = Ip[WS(rs, 1)];
 
136
                              }
 
137
                              {
 
138
                                   E T1O, T13, Tl, TJ, TM, T15;
 
139
                                   T1O = FNMS(KP866025403, T12, T11);
 
140
                                   T13 = FMA(KP866025403, T12, T11);
 
141
                                   Tl = Th + Tk;
 
142
                                   TJ = FNMS(KP500000000, Tk, Th);
 
143
                                   TM = TK - TL;
 
144
                                   T15 = TK + TL;
 
145
                                   {
 
146
                                        E T18, T1P, T1F, T16, T1G;
 
147
                                        T1F = FNMS(KP866025403, TH, TE);
 
148
                                        TI = FMA(KP866025403, TH, TE);
 
149
                                        T1g = T15 - T14;
 
150
                                        T16 = FMA(KP500000000, T15, T14);
 
151
                                        T1G = FNMS(KP866025403, TM, TJ);
 
152
                                        TN = FMA(KP866025403, TM, TJ);
 
153
                                        Tm = Tg + Tl;
 
154
                                        T1V = Tg - Tl;
 
155
                                        T18 = FNMS(KP866025403, T17, T16);
 
156
                                        T1P = FMA(KP866025403, T17, T16);
 
157
                                        T2z = T1F - T1G;
 
158
                                        T1H = T1F + T1G;
 
159
                                        T1Q = T1O - T1P;
 
160
                                        T2E = T1O + T1P;
 
161
                                        T19 = T13 + T18;
 
162
                                        T2u = T13 - T18;
 
163
                                   }
 
164
                              }
 
165
                         }
 
166
                    }
 
167
                    {
 
168
                         E T20, T2p, T1v, T1s, T1q, T1y, T1u, T1z, T1t;
 
169
                         {
 
170
                              E T1m, Tn, T1a, T1p, T1i, To, TP, TR, T1h, TO;
 
171
                              T1m = Tb - Tm;
 
172
                              Tn = Tb + Tm;
 
173
                              T20 = T1f - T1g;
 
174
                              T1h = T1f + T1g;
 
175
                              T2p = TI + TN;
 
176
                              TO = TI - TN;
 
177
                              T1a = TY - T19;
 
178
                              T1v = TY + T19;
 
179
                              T1p = T1e - T1h;
 
180
                              T1i = T1e + T1h;
 
181
                              To = W[0];
 
182
                              T1s = TD - TO;
 
183
                              TP = TD + TO;
 
184
                              TR = W[1];
 
185
                              {
 
186
                                   E T1l, T1o, T1n, T1x, T1r;
 
187
                                   {
 
188
                                        E T1j, TQ, T1k, T1b;
 
189
                                        T1j = To * T1a;
 
190
                                        TQ = To * TP;
 
191
                                        T1l = W[10];
 
192
                                        T1k = FNMS(TR, TP, T1j);
 
193
                                        T1b = FMA(TR, T1a, TQ);
 
194
                                        T1o = W[11];
 
195
                                        T1n = T1l * T1m;
 
196
                                        Im[0] = T1k - T1i;
 
197
                                        Ip[0] = T1i + T1k;
 
198
                                        Rm[0] = Tn + T1b;
 
199
                                        Rp[0] = Tn - T1b;
 
200
                                        T1x = T1o * T1m;
 
201
                                        T1r = W[12];
 
202
                                   }
 
203
                                   T1q = FNMS(T1o, T1p, T1n);
 
204
                                   T1y = FMA(T1l, T1p, T1x);
 
205
                                   T1u = W[13];
 
206
                                   T1z = T1r * T1v;
 
207
                                   T1t = T1r * T1s;
 
208
                              }
 
209
                         }
 
210
                         {
 
211
                              E T2e, T2h, T1S, T2j, T2f, T26, T2c, T2m, T2g, T24, T22;
 
212
                              {
 
213
                                   E T2b, T1R, T27, T2a, T1B, T29, T2l, T1K, T1J, T1W, T21, T25, T2d, T23, T1X;
 
214
                                   E T1Y;
 
215
                                   {
 
216
                                        E T1I, T28, T1A, T1w, T1T;
 
217
                                        T1A = FNMS(T1u, T1s, T1z);
 
218
                                        T1w = FMA(T1u, T1v, T1t);
 
219
                                        T1I = T1E - T1H;
 
220
                                        T28 = T1E + T1H;
 
221
                                        T2b = T1N + T1Q;
 
222
                                        T1R = T1N - T1Q;
 
223
                                        Im[WS(rs, 3)] = T1A - T1y;
 
224
                                        Ip[WS(rs, 3)] = T1y + T1A;
 
225
                                        Rm[WS(rs, 3)] = T1q + T1w;
 
226
                                        Rp[WS(rs, 3)] = T1q - T1w;
 
227
                                        T27 = W[14];
 
228
                                        T2a = W[15];
 
229
                                        T1B = W[2];
 
230
                                        T29 = T27 * T28;
 
231
                                        T2l = T2a * T28;
 
232
                                        T1K = W[3];
 
233
                                        T1J = T1B * T1I;
 
234
                                        T1W = T1U - T1V;
 
235
                                        T2e = T1V + T1U;
 
236
                                        T2h = T1Z - T20;
 
237
                                        T21 = T1Z + T20;
 
238
                                        T25 = T1K * T1I;
 
239
                                        T1T = W[4];
 
240
                                        T2d = W[16];
 
241
                                        T23 = T1T * T21;
 
242
                                        T1X = T1T * T1W;
 
243
                                   }
 
244
                                   T1S = FNMS(T1K, T1R, T1J);
 
245
                                   T2j = T2d * T2h;
 
246
                                   T2f = T2d * T2e;
 
247
                                   T26 = FMA(T1B, T1R, T25);
 
248
                                   T1Y = W[5];
 
249
                                   T2c = FNMS(T2a, T2b, T29);
 
250
                                   T2m = FMA(T27, T2b, T2l);
 
251
                                   T2g = W[17];
 
252
                                   T24 = FNMS(T1Y, T1W, T23);
 
253
                                   T22 = FMA(T1Y, T21, T1X);
 
254
                              }
 
255
                              {
 
256
                                   E T2L, T2O, T2P, T2v, T2N, T2X, T2n, T2s, T2A, T2F, T2r, T2H, T2R, T2J, T2B;
 
257
                                   E T2C;
 
258
                                   {
 
259
                                        E T2q, T2k, T2i, T2M, T2x;
 
260
                                        T2k = FNMS(T2g, T2e, T2j);
 
261
                                        T2i = FMA(T2g, T2h, T2f);
 
262
                                        Im[WS(rs, 1)] = T24 - T26;
 
263
                                        Ip[WS(rs, 1)] = T24 + T26;
 
264
                                        Rm[WS(rs, 1)] = T22 + T1S;
 
265
                                        Rp[WS(rs, 1)] = T1S - T22;
 
266
                                        Im[WS(rs, 4)] = T2k - T2m;
 
267
                                        Ip[WS(rs, 4)] = T2k + T2m;
 
268
                                        Rm[WS(rs, 4)] = T2i + T2c;
 
269
                                        Rp[WS(rs, 4)] = T2c - T2i;
 
270
                                        T2q = T2o + T2p;
 
271
                                        T2M = T2o - T2p;
 
272
                                        T2L = W[18];
 
273
                                        T2O = W[19];
 
274
                                        T2P = T2t - T2u;
 
275
                                        T2v = T2t + T2u;
 
276
                                        T2N = T2L * T2M;
 
277
                                        T2X = T2O * T2M;
 
278
                                        T2n = W[6];
 
279
                                        T2s = W[7];
 
280
                                        T2S = T2y - T2z;
 
281
                                        T2A = T2y + T2z;
 
282
                                        T2F = T2D - T2E;
 
283
                                        T2V = T2D + T2E;
 
284
                                        T2r = T2n * T2q;
 
285
                                        T2H = T2s * T2q;
 
286
                                        T2x = W[8];
 
287
                                        T2R = W[20];
 
288
                                        T2J = T2x * T2F;
 
289
                                        T2B = T2x * T2A;
 
290
                                   }
 
291
                                   T2w = FNMS(T2s, T2v, T2r);
 
292
                                   T2Z = T2R * T2V;
 
293
                                   T2T = T2R * T2S;
 
294
                                   T2I = FMA(T2n, T2v, T2H);
 
295
                                   T2C = W[9];
 
296
                                   T2Q = FNMS(T2O, T2P, T2N);
 
297
                                   T2Y = FMA(T2L, T2P, T2X);
 
298
                                   T2U = W[21];
 
299
                                   T2K = FNMS(T2C, T2A, T2J);
 
300
                                   T2G = FMA(T2C, T2F, T2B);
 
301
                              }
 
302
                         }
 
303
                    }
 
304
               }
 
305
               T30 = FNMS(T2U, T2S, T2Z);
 
306
               T2W = FMA(T2U, T2V, T2T);
 
307
               Im[WS(rs, 2)] = T2K - T2I;
 
308
               Ip[WS(rs, 2)] = T2I + T2K;
 
309
               Rm[WS(rs, 2)] = T2w + T2G;
 
310
               Rp[WS(rs, 2)] = T2w - T2G;
 
311
               Im[WS(rs, 5)] = T30 - T2Y;
 
312
               Ip[WS(rs, 5)] = T2Y + T30;
 
313
               Rm[WS(rs, 5)] = T2Q + T2W;
 
314
               Rp[WS(rs, 5)] = T2Q - T2W;
 
315
          }
 
316
     }
 
317
}
 
318
 
 
319
static const tw_instr twinstr[] = {
 
320
     {TW_FULL, 1, 12},
 
321
     {TW_NEXT, 1, 0}
 
322
};
 
323
 
 
324
static const hc2c_desc desc = { 12, "hc2cbdft_12", twinstr, &GENUS, {96, 22, 46, 0} };
 
325
 
 
326
void X(codelet_hc2cbdft_12) (planner *p) {
 
327
     X(khc2c_register) (p, hc2cbdft_12, &desc, HC2C_VIA_DFT);
 
328
}
 
329
#else                           /* HAVE_FMA */
 
330
 
 
331
/* Generated by: ../../../genfft/gen_hc2cdft.native -compact -variables 4 -pipeline-latency 4 -sign 1 -n 12 -dif -name hc2cbdft_12 -include hc2cb.h */
 
332
 
 
333
/*
 
334
 * This function contains 142 FP additions, 60 FP multiplications,
 
335
 * (or, 112 additions, 30 multiplications, 30 fused multiply/add),
 
336
 * 47 stack variables, 2 constants, and 48 memory accesses
 
337
 */
 
338
#include "hc2cb.h"
 
339
 
 
340
static void hc2cbdft_12(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms)
 
341
{
 
342
     DK(KP500000000, +0.500000000000000000000000000000000000000000000);
 
343
     DK(KP866025403, +0.866025403784438646763723170752936183471402627);
 
344
     {
 
345
          INT m;
 
346
          for (m = mb, W = W + ((mb - 1) * 22); m < me; m = m + 1, Rp = Rp + ms, Ip = Ip + ms, Rm = Rm - ms, Im = Im - ms, W = W + 22, MAKE_VOLATILE_STRIDE(rs)) {
 
347
               E Tv, T1E, TC, T1F, TW, T1x, TT, T1w, T1d, T1N, Tb, T1R, TI, T1z, TN;
 
348
               E T1A, T17, T1I, T12, T1H, T1g, T1S, Tm, T1O;
 
349
               {
 
350
                    E T1, Tq, T6, TA, T4, Tp, Tt, TS, T9, Tw, Tz, TV;
 
351
                    T1 = Rp[0];
 
352
                    Tq = Ip[0];
 
353
                    T6 = Rm[WS(rs, 5)];
 
354
                    TA = Im[WS(rs, 5)];
 
355
                    {
 
356
                         E T2, T3, Tr, Ts;
 
357
                         T2 = Rp[WS(rs, 4)];
 
358
                         T3 = Rm[WS(rs, 3)];
 
359
                         T4 = T2 + T3;
 
360
                         Tp = KP866025403 * (T2 - T3);
 
361
                         Tr = Im[WS(rs, 3)];
 
362
                         Ts = Ip[WS(rs, 4)];
 
363
                         Tt = Tr - Ts;
 
364
                         TS = KP866025403 * (Tr + Ts);
 
365
                    }
 
366
                    {
 
367
                         E T7, T8, Tx, Ty;
 
368
                         T7 = Rm[WS(rs, 1)];
 
369
                         T8 = Rp[WS(rs, 2)];
 
370
                         T9 = T7 + T8;
 
371
                         Tw = KP866025403 * (T7 - T8);
 
372
                         Tx = Im[WS(rs, 1)];
 
373
                         Ty = Ip[WS(rs, 2)];
 
374
                         Tz = Tx - Ty;
 
375
                         TV = KP866025403 * (Tx + Ty);
 
376
                    }
 
377
                    {
 
378
                         E Tu, TB, TU, TR;
 
379
                         Tu = FMA(KP500000000, Tt, Tq);
 
380
                         Tv = Tp + Tu;
 
381
                         T1E = Tu - Tp;
 
382
                         TB = FMS(KP500000000, Tz, TA);
 
383
                         TC = Tw + TB;
 
384
                         T1F = TB - Tw;
 
385
                         TU = FNMS(KP500000000, T9, T6);
 
386
                         TW = TU + TV;
 
387
                         T1x = TU - TV;
 
388
                         TR = FNMS(KP500000000, T4, T1);
 
389
                         TT = TR - TS;
 
390
                         T1w = TR + TS;
 
391
                         {
 
392
                              E T1b, T1c, T5, Ta;
 
393
                              T1b = Tq - Tt;
 
394
                              T1c = Tz + TA;
 
395
                              T1d = T1b - T1c;
 
396
                              T1N = T1b + T1c;
65
397
                              T5 = T1 + T4;
66
 
                              T7 = Rm[WS(rs, 1)];
67
 
                              T8 = Rp[WS(rs, 2)];
68
 
                              T1c = Tp + Ts;
69
 
                              Tt = FNMS(KP500000000, Ts, Tp);
70
 
                              Tw = Im[WS(rs, 5)];
71
 
                              Tx = Im[WS(rs, 1)];
72
 
                              TB = T7 - T8;
73
 
                              T9 = T7 + T8;
74
 
                              Ty = Ip[WS(rs, 2)];
75
 
                         }
76
 
                         {
77
 
                              E T1L, Tv, Ta, TV, TW, Tz;
78
 
                              T1L = FNMS(KP866025403, Tu, Tt);
79
 
                              Tv = FMA(KP866025403, Tu, Tt);
80
398
                              Ta = T6 + T9;
81
 
                              TV = FNMS(KP500000000, T9, T6);
82
 
                              TW = Tx + Ty;
83
 
                              Tz = Tx - Ty;
84
 
                              {
85
 
                                   E TC, T1M, T1C, TA, T1D;
86
 
                                   T1C = FMA(KP866025403, TT, TS);
87
 
                                   TU = FNMS(KP866025403, TT, TS);
88
 
                                   T1d = Tw + Tz;
89
 
                                   TA = FNMS(KP500000000, Tz, Tw);
90
 
                                   T1D = FNMS(KP866025403, TW, TV);
91
 
                                   TX = FMA(KP866025403, TW, TV);
92
 
                                   Tb = T5 + Ta;
93
 
                                   T1Z = T5 - Ta;
94
 
                                   TC = FNMS(KP866025403, TB, TA);
95
 
                                   T1M = FMA(KP866025403, TB, TA);
96
 
                                   T2D = T1C - T1D;
97
 
                                   T1E = T1C + T1D;
98
 
                                   T1N = T1L - T1M;
99
 
                                   T2y = T1L + T1M;
100
 
                                   TD = Tv + TC;
101
 
                                   T2t = Tv - TC;
102
 
                              }
 
399
                              Tb = T5 + Ta;
 
400
                              T1R = T5 - Ta;
103
401
                         }
104
402
                    }
105
 
                    {
106
 
                         E T12, Th, TH, TE, Tg, T11, T14, TK, T17, Tk, TL;
 
403
               }
 
404
               {
 
405
                    E Tc, T10, Th, T15, Tf, TY, TH, TZ, Tk, T13, TM, T14;
 
406
                    Tc = Rp[WS(rs, 3)];
 
407
                    T10 = Ip[WS(rs, 3)];
 
408
                    Th = Rm[WS(rs, 2)];
 
409
                    T15 = Im[WS(rs, 2)];
 
410
                    {
 
411
                         E Td, Te, TF, TG;
 
412
                         Td = Rm[WS(rs, 4)];
 
413
                         Te = Rm[0];
 
414
                         Tf = Td + Te;
 
415
                         TY = KP866025403 * (Td - Te);
 
416
                         TF = Im[WS(rs, 4)];
 
417
                         TG = Im[0];
 
418
                         TH = KP866025403 * (TF - TG);
 
419
                         TZ = TF + TG;
 
420
                    }
 
421
                    {
 
422
                         E Ti, Tj, TK, TL;
 
423
                         Ti = Rp[WS(rs, 1)];
 
424
                         Tj = Rp[WS(rs, 5)];
 
425
                         Tk = Ti + Tj;
 
426
                         T13 = KP866025403 * (Ti - Tj);
 
427
                         TK = Ip[WS(rs, 5)];
 
428
                         TL = Ip[WS(rs, 1)];
 
429
                         TM = KP866025403 * (TK - TL);
 
430
                         T14 = TK + TL;
 
431
                    }
 
432
                    {
 
433
                         E TE, TJ, T16, T11;
 
434
                         TE = FNMS(KP500000000, Tf, Tc);
 
435
                         TI = TE + TH;
 
436
                         T1z = TE - TH;
 
437
                         TJ = FNMS(KP500000000, Tk, Th);
 
438
                         TN = TJ + TM;
 
439
                         T1A = TJ - TM;
 
440
                         T16 = FMA(KP500000000, T14, T15);
 
441
                         T17 = T13 - T16;
 
442
                         T1I = T13 + T16;
 
443
                         T11 = FMA(KP500000000, TZ, T10);
 
444
                         T12 = TY + T11;
 
445
                         T1H = T11 - TY;
107
446
                         {
108
 
                              E Tc, TZ, TF, TG, Tf, Td, Te, Ti, Tj, T10;
109
 
                              Tc = Rp[WS(rs, 3)];
110
 
                              T1U = T1c + T1d;
111
 
                              T1e = T1c - T1d;
112
 
                              T2o = TU + TX;
113
 
                              TY = TU - TX;
114
 
                              Td = Rm[WS(rs, 4)];
115
 
                              Te = Rm[0];
116
 
                              TZ = Ip[WS(rs, 3)];
117
 
                              TF = Im[WS(rs, 4)];
118
 
                              TG = Im[0];
119
 
                              Tf = Td + Te;
120
 
                              T12 = Td - Te;
121
 
                              Th = Rm[WS(rs, 2)];
122
 
                              TH = TF - TG;
123
 
                              T10 = TF + TG;
124
 
                              TE = FNMS(KP500000000, Tf, Tc);
 
447
                              E T1e, T1f, Tg, Tl;
 
448
                              T1e = T10 - TZ;
 
449
                              T1f = T14 - T15;
 
450
                              T1g = T1e + T1f;
 
451
                              T1S = T1e - T1f;
125
452
                              Tg = Tc + Tf;
126
 
                              Ti = Rp[WS(rs, 1)];
127
 
                              Tj = Rp[WS(rs, 5)];
128
 
                              T1f = TZ - T10;
129
 
                              T11 = FMA(KP500000000, T10, TZ);
130
 
                              T14 = Im[WS(rs, 2)];
131
 
                              TK = Ip[WS(rs, 5)];
132
 
                              T17 = Ti - Tj;
133
 
                              Tk = Ti + Tj;
134
 
                              TL = Ip[WS(rs, 1)];
135
 
                         }
136
 
                         {
137
 
                              E T1O, T13, Tl, TJ, TM, T15;
138
 
                              T1O = FNMS(KP866025403, T12, T11);
139
 
                              T13 = FMA(KP866025403, T12, T11);
140
453
                              Tl = Th + Tk;
141
 
                              TJ = FNMS(KP500000000, Tk, Th);
142
 
                              TM = TK - TL;
143
 
                              T15 = TK + TL;
144
 
                              {
145
 
                                   E T18, T1P, T1F, T16, T1G;
146
 
                                   T1F = FNMS(KP866025403, TH, TE);
147
 
                                   TI = FMA(KP866025403, TH, TE);
148
 
                                   T1g = T15 - T14;
149
 
                                   T16 = FMA(KP500000000, T15, T14);
150
 
                                   T1G = FNMS(KP866025403, TM, TJ);
151
 
                                   TN = FMA(KP866025403, TM, TJ);
152
 
                                   Tm = Tg + Tl;
153
 
                                   T1V = Tg - Tl;
154
 
                                   T18 = FNMS(KP866025403, T17, T16);
155
 
                                   T1P = FMA(KP866025403, T17, T16);
156
 
                                   T2z = T1F - T1G;
157
 
                                   T1H = T1F + T1G;
158
 
                                   T1Q = T1O - T1P;
159
 
                                   T2E = T1O + T1P;
160
 
                                   T19 = T13 + T18;
161
 
                                   T2u = T13 - T18;
162
 
                              }
 
454
                              Tm = Tg + Tl;
 
455
                              T1O = Tg - Tl;
163
456
                         }
164
457
                    }
165
458
               }
166
459
               {
167
 
                    E T20, T2p, T1v, T1s, T1q, T1y, T1u, T1z, T1t;
 
460
                    E Tn, T1h, TP, T1p, T19, T1r, T1n, T1t;
 
461
                    Tn = Tb + Tm;
 
462
                    T1h = T1d + T1g;
168
463
                    {
169
 
                         E T1m, Tn, T1a, T1p, T1i, To, TP, TR, T1h, TO;
170
 
                         T1m = Tb - Tm;
171
 
                         Tn = Tb + Tm;
172
 
                         T20 = T1f - T1g;
173
 
                         T1h = T1f + T1g;
174
 
                         T2p = TI + TN;
 
464
                         E TD, TO, TX, T18;
 
465
                         TD = Tv - TC;
175
466
                         TO = TI - TN;
176
 
                         T1a = TY - T19;
177
 
                         T1v = TY + T19;
178
 
                         T1p = T1e - T1h;
179
 
                         T1i = T1e + T1h;
 
467
                         TP = TD + TO;
 
468
                         T1p = TD - TO;
 
469
                         TX = TT - TW;
 
470
                         T18 = T12 - T17;
 
471
                         T19 = TX - T18;
 
472
                         T1r = TX + T18;
 
473
                         {
 
474
                              E T1k, T1m, T1j, T1l;
 
475
                              T1k = Tb - Tm;
 
476
                              T1m = T1d - T1g;
 
477
                              T1j = W[10];
 
478
                              T1l = W[11];
 
479
                              T1n = FNMS(T1l, T1m, T1j * T1k);
 
480
                              T1t = FMA(T1l, T1k, T1j * T1m);
 
481
                         }
 
482
                    }
 
483
                    {
 
484
                         E T1a, T1i, To, TQ;
180
485
                         To = W[0];
181
 
                         T1s = TD - TO;
182
 
                         TP = TD + TO;
183
 
                         TR = W[1];
184
 
                         {
185
 
                              E T1l, T1o, T1n, T1x, T1r;
186
 
                              {
187
 
                                   E T1j, TQ, T1k, T1b;
188
 
                                   T1j = To * T1a;
189
 
                                   TQ = To * TP;
190
 
                                   T1l = W[10];
191
 
                                   T1k = FNMS(TR, TP, T1j);
192
 
                                   T1b = FMA(TR, T1a, TQ);
193
 
                                   T1o = W[11];
194
 
                                   T1n = T1l * T1m;
195
 
                                   Im[0] = T1k - T1i;
196
 
                                   Ip[0] = T1i + T1k;
197
 
                                   Rm[0] = Tn + T1b;
198
 
                                   Rp[0] = Tn - T1b;
199
 
                                   T1x = T1o * T1m;
200
 
                                   T1r = W[12];
201
 
                              }
202
 
                              T1q = FNMS(T1o, T1p, T1n);
203
 
                              T1y = FMA(T1l, T1p, T1x);
204
 
                              T1u = W[13];
205
 
                              T1z = T1r * T1v;
206
 
                              T1t = T1r * T1s;
207
 
                         }
208
 
                    }
209
 
                    {
210
 
                         E T2e, T2h, T1S, T2j, T2f, T26, T2c, T2m, T2g, T24, T22;
211
 
                         {
212
 
                              E T2b, T1R, T27, T2a, T1B, T29, T2l, T1K, T1J, T1W, T21, T25, T2d, T23, T1X;
213
 
                              E T1Y;
214
 
                              {
215
 
                                   E T1I, T28, T1A, T1w, T1T;
216
 
                                   T1A = FNMS(T1u, T1s, T1z);
217
 
                                   T1w = FMA(T1u, T1v, T1t);
218
 
                                   T1I = T1E - T1H;
219
 
                                   T28 = T1E + T1H;
220
 
                                   T2b = T1N + T1Q;
221
 
                                   T1R = T1N - T1Q;
222
 
                                   Im[WS(rs, 3)] = T1A - T1y;
223
 
                                   Ip[WS(rs, 3)] = T1y + T1A;
224
 
                                   Rm[WS(rs, 3)] = T1q + T1w;
225
 
                                   Rp[WS(rs, 3)] = T1q - T1w;
226
 
                                   T27 = W[14];
227
 
                                   T2a = W[15];
228
 
                                   T1B = W[2];
229
 
                                   T29 = T27 * T28;
230
 
                                   T2l = T2a * T28;
231
 
                                   T1K = W[3];
232
 
                                   T1J = T1B * T1I;
233
 
                                   T1W = T1U - T1V;
234
 
                                   T2e = T1V + T1U;
235
 
                                   T2h = T1Z - T20;
236
 
                                   T21 = T1Z + T20;
237
 
                                   T25 = T1K * T1I;
238
 
                                   T1T = W[4];
239
 
                                   T2d = W[16];
240
 
                                   T23 = T1T * T21;
241
 
                                   T1X = T1T * T1W;
242
 
                              }
243
 
                              T1S = FNMS(T1K, T1R, T1J);
244
 
                              T2j = T2d * T2h;
245
 
                              T2f = T2d * T2e;
246
 
                              T26 = FMA(T1B, T1R, T25);
247
 
                              T1Y = W[5];
248
 
                              T2c = FNMS(T2a, T2b, T29);
249
 
                              T2m = FMA(T27, T2b, T2l);
250
 
                              T2g = W[17];
251
 
                              T24 = FNMS(T1Y, T1W, T23);
252
 
                              T22 = FMA(T1Y, T21, T1X);
253
 
                         }
254
 
                         {
255
 
                              E T2L, T2O, T2P, T2v, T2N, T2X, T2n, T2s, T2A, T2F, T2r, T2H, T2R, T2J, T2B;
256
 
                              E T2C;
257
 
                              {
258
 
                                   E T2q, T2k, T2i, T2M, T2x;
259
 
                                   T2k = FNMS(T2g, T2e, T2j);
260
 
                                   T2i = FMA(T2g, T2h, T2f);
261
 
                                   Im[WS(rs, 1)] = T24 - T26;
262
 
                                   Ip[WS(rs, 1)] = T24 + T26;
263
 
                                   Rm[WS(rs, 1)] = T22 + T1S;
264
 
                                   Rp[WS(rs, 1)] = T1S - T22;
265
 
                                   Im[WS(rs, 4)] = T2k - T2m;
266
 
                                   Ip[WS(rs, 4)] = T2k + T2m;
267
 
                                   Rm[WS(rs, 4)] = T2i + T2c;
268
 
                                   Rp[WS(rs, 4)] = T2c - T2i;
269
 
                                   T2q = T2o + T2p;
270
 
                                   T2M = T2o - T2p;
271
 
                                   T2L = W[18];
272
 
                                   T2O = W[19];
273
 
                                   T2P = T2t - T2u;
274
 
                                   T2v = T2t + T2u;
275
 
                                   T2N = T2L * T2M;
276
 
                                   T2X = T2O * T2M;
277
 
                                   T2n = W[6];
278
 
                                   T2s = W[7];
279
 
                                   T2S = T2y - T2z;
280
 
                                   T2A = T2y + T2z;
281
 
                                   T2F = T2D - T2E;
282
 
                                   T2V = T2D + T2E;
283
 
                                   T2r = T2n * T2q;
284
 
                                   T2H = T2s * T2q;
285
 
                                   T2x = W[8];
286
 
                                   T2R = W[20];
287
 
                                   T2J = T2x * T2F;
288
 
                                   T2B = T2x * T2A;
289
 
                              }
290
 
                              T2w = FNMS(T2s, T2v, T2r);
291
 
                              T2Z = T2R * T2V;
292
 
                              T2T = T2R * T2S;
293
 
                              T2I = FMA(T2n, T2v, T2H);
294
 
                              T2C = W[9];
295
 
                              T2Q = FNMS(T2O, T2P, T2N);
296
 
                              T2Y = FMA(T2L, T2P, T2X);
297
 
                              T2U = W[21];
298
 
                              T2K = FNMS(T2C, T2A, T2J);
299
 
                              T2G = FMA(T2C, T2F, T2B);
300
 
                         }
301
 
                    }
302
 
               }
303
 
          }
304
 
          T30 = FNMS(T2U, T2S, T2Z);
305
 
          T2W = FMA(T2U, T2V, T2T);
306
 
          Im[WS(rs, 2)] = T2K - T2I;
307
 
          Ip[WS(rs, 2)] = T2I + T2K;
308
 
          Rm[WS(rs, 2)] = T2w + T2G;
309
 
          Rp[WS(rs, 2)] = T2w - T2G;
310
 
          Im[WS(rs, 5)] = T30 - T2Y;
311
 
          Ip[WS(rs, 5)] = T2Y + T30;
312
 
          Rm[WS(rs, 5)] = T2Q + T2W;
313
 
          Rp[WS(rs, 5)] = T2Q - T2W;
314
 
     }
315
 
}
316
 
 
317
 
static const tw_instr twinstr[] = {
318
 
     {TW_FULL, 1, 12},
319
 
     {TW_NEXT, 1, 0}
320
 
};
321
 
 
322
 
static const hc2c_desc desc = { 12, "hc2cbdft_12", twinstr, &GENUS, {96, 22, 46, 0} };
323
 
 
324
 
void X(codelet_hc2cbdft_12) (planner *p) {
325
 
     X(khc2c_register) (p, hc2cbdft_12, &desc, HC2C_VIA_DFT);
326
 
}
327
 
#else                           /* HAVE_FMA */
328
 
 
329
 
/* Generated by: ../../../genfft/gen_hc2cdft -compact -variables 4 -pipeline-latency 4 -sign 1 -n 12 -dif -name hc2cbdft_12 -include hc2cb.h */
330
 
 
331
 
/*
332
 
 * This function contains 142 FP additions, 60 FP multiplications,
333
 
 * (or, 112 additions, 30 multiplications, 30 fused multiply/add),
334
 
 * 47 stack variables, 2 constants, and 48 memory accesses
335
 
 */
336
 
#include "hc2cb.h"
337
 
 
338
 
static void hc2cbdft_12(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms)
339
 
{
340
 
     DK(KP500000000, +0.500000000000000000000000000000000000000000000);
341
 
     DK(KP866025403, +0.866025403784438646763723170752936183471402627);
342
 
     INT m;
343
 
     for (m = mb, W = W + ((mb - 1) * 22); m < me; m = m + 1, Rp = Rp + ms, Ip = Ip + ms, Rm = Rm - ms, Im = Im - ms, W = W + 22, MAKE_VOLATILE_STRIDE(rs)) {
344
 
          E Tv, T1E, TC, T1F, TW, T1x, TT, T1w, T1d, T1N, Tb, T1R, TI, T1z, TN;
345
 
          E T1A, T17, T1I, T12, T1H, T1g, T1S, Tm, T1O;
346
 
          {
347
 
               E T1, Tq, T6, TA, T4, Tp, Tt, TS, T9, Tw, Tz, TV;
348
 
               T1 = Rp[0];
349
 
               Tq = Ip[0];
350
 
               T6 = Rm[WS(rs, 5)];
351
 
               TA = Im[WS(rs, 5)];
352
 
               {
353
 
                    E T2, T3, Tr, Ts;
354
 
                    T2 = Rp[WS(rs, 4)];
355
 
                    T3 = Rm[WS(rs, 3)];
356
 
                    T4 = T2 + T3;
357
 
                    Tp = KP866025403 * (T2 - T3);
358
 
                    Tr = Im[WS(rs, 3)];
359
 
                    Ts = Ip[WS(rs, 4)];
360
 
                    Tt = Tr - Ts;
361
 
                    TS = KP866025403 * (Tr + Ts);
362
 
               }
363
 
               {
364
 
                    E T7, T8, Tx, Ty;
365
 
                    T7 = Rm[WS(rs, 1)];
366
 
                    T8 = Rp[WS(rs, 2)];
367
 
                    T9 = T7 + T8;
368
 
                    Tw = KP866025403 * (T7 - T8);
369
 
                    Tx = Im[WS(rs, 1)];
370
 
                    Ty = Ip[WS(rs, 2)];
371
 
                    Tz = Tx - Ty;
372
 
                    TV = KP866025403 * (Tx + Ty);
373
 
               }
374
 
               {
375
 
                    E Tu, TB, TU, TR;
376
 
                    Tu = FMA(KP500000000, Tt, Tq);
377
 
                    Tv = Tp + Tu;
378
 
                    T1E = Tu - Tp;
379
 
                    TB = FMS(KP500000000, Tz, TA);
380
 
                    TC = Tw + TB;
381
 
                    T1F = TB - Tw;
382
 
                    TU = FNMS(KP500000000, T9, T6);
383
 
                    TW = TU + TV;
384
 
                    T1x = TU - TV;
385
 
                    TR = FNMS(KP500000000, T4, T1);
386
 
                    TT = TR - TS;
387
 
                    T1w = TR + TS;
388
 
                    {
389
 
                         E T1b, T1c, T5, Ta;
390
 
                         T1b = Tq - Tt;
391
 
                         T1c = Tz + TA;
392
 
                         T1d = T1b - T1c;
393
 
                         T1N = T1b + T1c;
394
 
                         T5 = T1 + T4;
395
 
                         Ta = T6 + T9;
396
 
                         Tb = T5 + Ta;
397
 
                         T1R = T5 - Ta;
398
 
                    }
399
 
               }
400
 
          }
401
 
          {
402
 
               E Tc, T10, Th, T15, Tf, TY, TH, TZ, Tk, T13, TM, T14;
403
 
               Tc = Rp[WS(rs, 3)];
404
 
               T10 = Ip[WS(rs, 3)];
405
 
               Th = Rm[WS(rs, 2)];
406
 
               T15 = Im[WS(rs, 2)];
407
 
               {
408
 
                    E Td, Te, TF, TG;
409
 
                    Td = Rm[WS(rs, 4)];
410
 
                    Te = Rm[0];
411
 
                    Tf = Td + Te;
412
 
                    TY = KP866025403 * (Td - Te);
413
 
                    TF = Im[WS(rs, 4)];
414
 
                    TG = Im[0];
415
 
                    TH = KP866025403 * (TF - TG);
416
 
                    TZ = TF + TG;
417
 
               }
418
 
               {
419
 
                    E Ti, Tj, TK, TL;
420
 
                    Ti = Rp[WS(rs, 1)];
421
 
                    Tj = Rp[WS(rs, 5)];
422
 
                    Tk = Ti + Tj;
423
 
                    T13 = KP866025403 * (Ti - Tj);
424
 
                    TK = Ip[WS(rs, 5)];
425
 
                    TL = Ip[WS(rs, 1)];
426
 
                    TM = KP866025403 * (TK - TL);
427
 
                    T14 = TK + TL;
428
 
               }
429
 
               {
430
 
                    E TE, TJ, T16, T11;
431
 
                    TE = FNMS(KP500000000, Tf, Tc);
432
 
                    TI = TE + TH;
433
 
                    T1z = TE - TH;
434
 
                    TJ = FNMS(KP500000000, Tk, Th);
435
 
                    TN = TJ + TM;
436
 
                    T1A = TJ - TM;
437
 
                    T16 = FMA(KP500000000, T14, T15);
438
 
                    T17 = T13 - T16;
439
 
                    T1I = T13 + T16;
440
 
                    T11 = FMA(KP500000000, TZ, T10);
441
 
                    T12 = TY + T11;
442
 
                    T1H = T11 - TY;
443
 
                    {
444
 
                         E T1e, T1f, Tg, Tl;
445
 
                         T1e = T10 - TZ;
446
 
                         T1f = T14 - T15;
447
 
                         T1g = T1e + T1f;
448
 
                         T1S = T1e - T1f;
449
 
                         Tg = Tc + Tf;
450
 
                         Tl = Th + Tk;
451
 
                         Tm = Tg + Tl;
452
 
                         T1O = Tg - Tl;
453
 
                    }
454
 
               }
455
 
          }
456
 
          {
457
 
               E Tn, T1h, TP, T1p, T19, T1r, T1n, T1t;
458
 
               Tn = Tb + Tm;
459
 
               T1h = T1d + T1g;
460
 
               {
461
 
                    E TD, TO, TX, T18;
462
 
                    TD = Tv - TC;
463
 
                    TO = TI - TN;
464
 
                    TP = TD + TO;
465
 
                    T1p = TD - TO;
466
 
                    TX = TT - TW;
467
 
                    T18 = T12 - T17;
468
 
                    T19 = TX - T18;
469
 
                    T1r = TX + T18;
470
 
                    {
471
 
                         E T1k, T1m, T1j, T1l;
472
 
                         T1k = Tb - Tm;
473
 
                         T1m = T1d - T1g;
474
 
                         T1j = W[10];
475
 
                         T1l = W[11];
476
 
                         T1n = FNMS(T1l, T1m, T1j * T1k);
477
 
                         T1t = FMA(T1l, T1k, T1j * T1m);
478
 
                    }
479
 
               }
480
 
               {
481
 
                    E T1a, T1i, To, TQ;
482
 
                    To = W[0];
483
 
                    TQ = W[1];
484
 
                    T1a = FMA(To, TP, TQ * T19);
485
 
                    T1i = FNMS(TQ, TP, To * T19);
486
 
                    Rp[0] = Tn - T1a;
487
 
                    Ip[0] = T1h + T1i;
488
 
                    Rm[0] = Tn + T1a;
489
 
                    Im[0] = T1i - T1h;
490
 
               }
491
 
               {
492
 
                    E T1s, T1u, T1o, T1q;
493
 
                    T1o = W[12];
494
 
                    T1q = W[13];
495
 
                    T1s = FMA(T1o, T1p, T1q * T1r);
496
 
                    T1u = FNMS(T1q, T1p, T1o * T1r);
497
 
                    Rp[WS(rs, 3)] = T1n - T1s;
498
 
                    Ip[WS(rs, 3)] = T1t + T1u;
499
 
                    Rm[WS(rs, 3)] = T1n + T1s;
500
 
                    Im[WS(rs, 3)] = T1u - T1t;
501
 
               }
502
 
          }
503
 
          {
504
 
               E T1C, T1Y, T1K, T20, T1U, T1V, T26, T27;
505
 
               {
506
 
                    E T1y, T1B, T1G, T1J;
507
 
                    T1y = T1w + T1x;
508
 
                    T1B = T1z + T1A;
509
 
                    T1C = T1y - T1B;
510
 
                    T1Y = T1y + T1B;
511
 
                    T1G = T1E + T1F;
512
 
                    T1J = T1H - T1I;
513
 
                    T1K = T1G - T1J;
514
 
                    T20 = T1G + T1J;
515
 
               }
516
 
               {
517
 
                    E T1P, T1T, T1M, T1Q;
518
 
                    T1P = T1N - T1O;
519
 
                    T1T = T1R + T1S;
520
 
                    T1M = W[4];
521
 
                    T1Q = W[5];
522
 
                    T1U = FMA(T1M, T1P, T1Q * T1T);
523
 
                    T1V = FNMS(T1Q, T1P, T1M * T1T);
524
 
               }
525
 
               {
526
 
                    E T23, T25, T22, T24;
527
 
                    T23 = T1O + T1N;
528
 
                    T25 = T1R - T1S;
529
 
                    T22 = W[16];
530
 
                    T24 = W[17];
531
 
                    T26 = FMA(T22, T23, T24 * T25);
532
 
                    T27 = FNMS(T24, T23, T22 * T25);
533
 
               }
534
 
               {
535
 
                    E T1L, T1W, T1v, T1D;
536
 
                    T1v = W[2];
537
 
                    T1D = W[3];
538
 
                    T1L = FNMS(T1D, T1K, T1v * T1C);
539
 
                    T1W = FMA(T1D, T1C, T1v * T1K);
540
 
                    Rp[WS(rs, 1)] = T1L - T1U;
541
 
                    Ip[WS(rs, 1)] = T1V + T1W;
542
 
                    Rm[WS(rs, 1)] = T1U + T1L;
543
 
                    Im[WS(rs, 1)] = T1V - T1W;
544
 
               }
545
 
               {
546
 
                    E T21, T28, T1X, T1Z;
547
 
                    T1X = W[14];
548
 
                    T1Z = W[15];
549
 
                    T21 = FNMS(T1Z, T20, T1X * T1Y);
550
 
                    T28 = FMA(T1Z, T1Y, T1X * T20);
551
 
                    Rp[WS(rs, 4)] = T21 - T26;
552
 
                    Ip[WS(rs, 4)] = T27 + T28;
553
 
                    Rm[WS(rs, 4)] = T26 + T21;
554
 
                    Im[WS(rs, 4)] = T27 - T28;
555
 
               }
556
 
          }
557
 
          {
558
 
               E T2c, T2u, T2p, T2B, T2g, T2w, T2l, T2z;
559
 
               {
560
 
                    E T2a, T2b, T2n, T2o;
561
 
                    T2a = TT + TW;
562
 
                    T2b = TI + TN;
563
 
                    T2c = T2a + T2b;
564
 
                    T2u = T2a - T2b;
565
 
                    T2n = T1w - T1x;
566
 
                    T2o = T1H + T1I;
567
 
                    T2p = T2n - T2o;
568
 
                    T2B = T2n + T2o;
569
 
               }
570
 
               {
571
 
                    E T2e, T2f, T2j, T2k;
572
 
                    T2e = Tv + TC;
573
 
                    T2f = T12 + T17;
574
 
                    T2g = T2e + T2f;
575
 
                    T2w = T2e - T2f;
576
 
                    T2j = T1E - T1F;
577
 
                    T2k = T1z - T1A;
578
 
                    T2l = T2j + T2k;
579
 
                    T2z = T2j - T2k;
580
 
               }
581
 
               {
582
 
                    E T2h, T2r, T2q, T2s;
583
 
                    {
584
 
                         E T29, T2d, T2i, T2m;
585
 
                         T29 = W[6];
586
 
                         T2d = W[7];
587
 
                         T2h = FNMS(T2d, T2g, T29 * T2c);
588
 
                         T2r = FMA(T2d, T2c, T29 * T2g);
589
 
                         T2i = W[8];
590
 
                         T2m = W[9];
591
 
                         T2q = FMA(T2i, T2l, T2m * T2p);
592
 
                         T2s = FNMS(T2m, T2l, T2i * T2p);
593
 
                    }
594
 
                    Rp[WS(rs, 2)] = T2h - T2q;
595
 
                    Ip[WS(rs, 2)] = T2r + T2s;
596
 
                    Rm[WS(rs, 2)] = T2h + T2q;
597
 
                    Im[WS(rs, 2)] = T2s - T2r;
598
 
               }
599
 
               {
600
 
                    E T2x, T2D, T2C, T2E;
601
 
                    {
602
 
                         E T2t, T2v, T2y, T2A;
603
 
                         T2t = W[18];
604
 
                         T2v = W[19];
605
 
                         T2x = FNMS(T2v, T2w, T2t * T2u);
606
 
                         T2D = FMA(T2v, T2u, T2t * T2w);
607
 
                         T2y = W[20];
608
 
                         T2A = W[21];
609
 
                         T2C = FMA(T2y, T2z, T2A * T2B);
610
 
                         T2E = FNMS(T2A, T2z, T2y * T2B);
611
 
                    }
612
 
                    Rp[WS(rs, 5)] = T2x - T2C;
613
 
                    Ip[WS(rs, 5)] = T2D + T2E;
614
 
                    Rm[WS(rs, 5)] = T2x + T2C;
615
 
                    Im[WS(rs, 5)] = T2E - T2D;
 
486
                         TQ = W[1];
 
487
                         T1a = FMA(To, TP, TQ * T19);
 
488
                         T1i = FNMS(TQ, TP, To * T19);
 
489
                         Rp[0] = Tn - T1a;
 
490
                         Ip[0] = T1h + T1i;
 
491
                         Rm[0] = Tn + T1a;
 
492
                         Im[0] = T1i - T1h;
 
493
                    }
 
494
                    {
 
495
                         E T1s, T1u, T1o, T1q;
 
496
                         T1o = W[12];
 
497
                         T1q = W[13];
 
498
                         T1s = FMA(T1o, T1p, T1q * T1r);
 
499
                         T1u = FNMS(T1q, T1p, T1o * T1r);
 
500
                         Rp[WS(rs, 3)] = T1n - T1s;
 
501
                         Ip[WS(rs, 3)] = T1t + T1u;
 
502
                         Rm[WS(rs, 3)] = T1n + T1s;
 
503
                         Im[WS(rs, 3)] = T1u - T1t;
 
504
                    }
 
505
               }
 
506
               {
 
507
                    E T1C, T1Y, T1K, T20, T1U, T1V, T26, T27;
 
508
                    {
 
509
                         E T1y, T1B, T1G, T1J;
 
510
                         T1y = T1w + T1x;
 
511
                         T1B = T1z + T1A;
 
512
                         T1C = T1y - T1B;
 
513
                         T1Y = T1y + T1B;
 
514
                         T1G = T1E + T1F;
 
515
                         T1J = T1H - T1I;
 
516
                         T1K = T1G - T1J;
 
517
                         T20 = T1G + T1J;
 
518
                    }
 
519
                    {
 
520
                         E T1P, T1T, T1M, T1Q;
 
521
                         T1P = T1N - T1O;
 
522
                         T1T = T1R + T1S;
 
523
                         T1M = W[4];
 
524
                         T1Q = W[5];
 
525
                         T1U = FMA(T1M, T1P, T1Q * T1T);
 
526
                         T1V = FNMS(T1Q, T1P, T1M * T1T);
 
527
                    }
 
528
                    {
 
529
                         E T23, T25, T22, T24;
 
530
                         T23 = T1O + T1N;
 
531
                         T25 = T1R - T1S;
 
532
                         T22 = W[16];
 
533
                         T24 = W[17];
 
534
                         T26 = FMA(T22, T23, T24 * T25);
 
535
                         T27 = FNMS(T24, T23, T22 * T25);
 
536
                    }
 
537
                    {
 
538
                         E T1L, T1W, T1v, T1D;
 
539
                         T1v = W[2];
 
540
                         T1D = W[3];
 
541
                         T1L = FNMS(T1D, T1K, T1v * T1C);
 
542
                         T1W = FMA(T1D, T1C, T1v * T1K);
 
543
                         Rp[WS(rs, 1)] = T1L - T1U;
 
544
                         Ip[WS(rs, 1)] = T1V + T1W;
 
545
                         Rm[WS(rs, 1)] = T1U + T1L;
 
546
                         Im[WS(rs, 1)] = T1V - T1W;
 
547
                    }
 
548
                    {
 
549
                         E T21, T28, T1X, T1Z;
 
550
                         T1X = W[14];
 
551
                         T1Z = W[15];
 
552
                         T21 = FNMS(T1Z, T20, T1X * T1Y);
 
553
                         T28 = FMA(T1Z, T1Y, T1X * T20);
 
554
                         Rp[WS(rs, 4)] = T21 - T26;
 
555
                         Ip[WS(rs, 4)] = T27 + T28;
 
556
                         Rm[WS(rs, 4)] = T26 + T21;
 
557
                         Im[WS(rs, 4)] = T27 - T28;
 
558
                    }
 
559
               }
 
560
               {
 
561
                    E T2c, T2u, T2p, T2B, T2g, T2w, T2l, T2z;
 
562
                    {
 
563
                         E T2a, T2b, T2n, T2o;
 
564
                         T2a = TT + TW;
 
565
                         T2b = TI + TN;
 
566
                         T2c = T2a + T2b;
 
567
                         T2u = T2a - T2b;
 
568
                         T2n = T1w - T1x;
 
569
                         T2o = T1H + T1I;
 
570
                         T2p = T2n - T2o;
 
571
                         T2B = T2n + T2o;
 
572
                    }
 
573
                    {
 
574
                         E T2e, T2f, T2j, T2k;
 
575
                         T2e = Tv + TC;
 
576
                         T2f = T12 + T17;
 
577
                         T2g = T2e + T2f;
 
578
                         T2w = T2e - T2f;
 
579
                         T2j = T1E - T1F;
 
580
                         T2k = T1z - T1A;
 
581
                         T2l = T2j + T2k;
 
582
                         T2z = T2j - T2k;
 
583
                    }
 
584
                    {
 
585
                         E T2h, T2r, T2q, T2s;
 
586
                         {
 
587
                              E T29, T2d, T2i, T2m;
 
588
                              T29 = W[6];
 
589
                              T2d = W[7];
 
590
                              T2h = FNMS(T2d, T2g, T29 * T2c);
 
591
                              T2r = FMA(T2d, T2c, T29 * T2g);
 
592
                              T2i = W[8];
 
593
                              T2m = W[9];
 
594
                              T2q = FMA(T2i, T2l, T2m * T2p);
 
595
                              T2s = FNMS(T2m, T2l, T2i * T2p);
 
596
                         }
 
597
                         Rp[WS(rs, 2)] = T2h - T2q;
 
598
                         Ip[WS(rs, 2)] = T2r + T2s;
 
599
                         Rm[WS(rs, 2)] = T2h + T2q;
 
600
                         Im[WS(rs, 2)] = T2s - T2r;
 
601
                    }
 
602
                    {
 
603
                         E T2x, T2D, T2C, T2E;
 
604
                         {
 
605
                              E T2t, T2v, T2y, T2A;
 
606
                              T2t = W[18];
 
607
                              T2v = W[19];
 
608
                              T2x = FNMS(T2v, T2w, T2t * T2u);
 
609
                              T2D = FMA(T2v, T2u, T2t * T2w);
 
610
                              T2y = W[20];
 
611
                              T2A = W[21];
 
612
                              T2C = FMA(T2y, T2z, T2A * T2B);
 
613
                              T2E = FNMS(T2A, T2z, T2y * T2B);
 
614
                         }
 
615
                         Rp[WS(rs, 5)] = T2x - T2C;
 
616
                         Ip[WS(rs, 5)] = T2D + T2E;
 
617
                         Rm[WS(rs, 5)] = T2x + T2C;
 
618
                         Im[WS(rs, 5)] = T2E - T2D;
 
619
                    }
616
620
               }
617
621
          }
618
622
     }