~ubuntu-branches/ubuntu/utopic/fftw3/utopic

« back to all changes in this revision

Viewing changes to rdft/scalar/r2cf/hf_16.c

  • Committer: Package Import Robot
  • Author(s): Matthias Klose
  • Date: 2011-12-14 13:21:22 UTC
  • mfrom: (3.1.5 sid)
  • Revision ID: package-import@ubuntu.com-20111214132122-l4avyl2kkr7vq5aj
Tags: 3.3-1ubuntu1
* Merge with Debian; remaining changes:
  - Revert the ARM workaround.

Show diffs side-by-side

added added

removed removed

Lines of Context:
1
1
/*
2
 
 * Copyright (c) 2003, 2007-8 Matteo Frigo
3
 
 * Copyright (c) 2003, 2007-8 Massachusetts Institute of Technology
 
2
 * Copyright (c) 2003, 2007-11 Matteo Frigo
 
3
 * Copyright (c) 2003, 2007-11 Massachusetts Institute of Technology
4
4
 *
5
5
 * This program is free software; you can redistribute it and/or modify
6
6
 * it under the terms of the GNU General Public License as published by
19
19
 */
20
20
 
21
21
/* This file was automatically generated --- DO NOT EDIT */
22
 
/* Generated on Sun Jul 12 06:43:50 EDT 2009 */
 
22
/* Generated on Wed Jul 27 06:16:38 EDT 2011 */
23
23
 
24
24
#include "codelet-rdft.h"
25
25
 
26
26
#ifdef HAVE_FMA
27
27
 
28
 
/* Generated by: ../../../genfft/gen_hc2hc -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -n 16 -dit -name hf_16 -include hf.h */
 
28
/* Generated by: ../../../genfft/gen_hc2hc.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -n 16 -dit -name hf_16 -include hf.h */
29
29
 
30
30
/*
31
31
 * This function contains 174 FP additions, 100 FP multiplications,
39
39
     DK(KP923879532, +0.923879532511286756128183189396788286822416626);
40
40
     DK(KP414213562, +0.414213562373095048801688724209698078569671875);
41
41
     DK(KP707106781, +0.707106781186547524400844362104849039284835938);
42
 
     INT m;
43
 
     for (m = mb, W = W + ((mb - 1) * 30); m < me; m = m + 1, cr = cr + ms, ci = ci - ms, W = W + 30, MAKE_VOLATILE_STRIDE(rs)) {
44
 
          E T2T, T2Q;
45
 
          {
46
 
               E T3A, T3o, T8, T1I, T2w, T35, T2k, T1s, T2p, T36, T2r, T1F, T3k, T1N, T3z;
47
 
               E Tl, T1U, T2W, T1P, Tz, T2g, T30, T25, T11, TB, TE, T2a, T31, T2h, T1e;
48
 
               E TC, T1X, TH, TK, TG, TD, TJ;
49
 
               {
50
 
                    E Ta, Td, Tb, T1J, Tg, Tj, Tf, Tc, Ti;
51
 
                    {
52
 
                         E T1h, T1k, T1n, T2s, T1i, T1q, T1m, T1j, T1p;
53
 
                         {
54
 
                              E T1, T3n, T3, T6, T2, T5;
55
 
                              T1 = cr[0];
56
 
                              T3n = ci[0];
57
 
                              T3 = cr[WS(rs, 8)];
58
 
                              T6 = ci[WS(rs, 8)];
59
 
                              T2 = W[14];
60
 
                              T5 = W[15];
61
 
                              {
62
 
                                   E T3l, T4, T1g, T3m, T7;
63
 
                                   T1h = cr[WS(rs, 15)];
64
 
                                   T1k = ci[WS(rs, 15)];
65
 
                                   T3l = T2 * T6;
66
 
                                   T4 = T2 * T3;
67
 
                                   T1g = W[28];
68
 
                                   T1n = cr[WS(rs, 7)];
69
 
                                   T3m = FNMS(T5, T3, T3l);
70
 
                                   T7 = FMA(T5, T6, T4);
71
 
                                   T2s = T1g * T1k;
72
 
                                   T1i = T1g * T1h;
73
 
                                   T3A = T3n - T3m;
74
 
                                   T3o = T3m + T3n;
75
 
                                   T8 = T1 + T7;
76
 
                                   T1I = T1 - T7;
77
 
                                   T1q = ci[WS(rs, 7)];
78
 
                                   T1m = W[12];
79
 
                              }
80
 
                              T1j = W[29];
81
 
                              T1p = W[13];
82
 
                         }
83
 
                         {
84
 
                              E T1u, T1x, T1v, T2l, T1A, T1D, T1z, T1w, T1C;
85
 
                              {
86
 
                                   E T2t, T1l, T2v, T1r, T2u, T1o, T1t;
87
 
                                   T1u = cr[WS(rs, 3)];
88
 
                                   T2u = T1m * T1q;
89
 
                                   T1o = T1m * T1n;
90
 
                                   T2t = FNMS(T1j, T1h, T2s);
91
 
                                   T1l = FMA(T1j, T1k, T1i);
92
 
                                   T2v = FNMS(T1p, T1n, T2u);
93
 
                                   T1r = FMA(T1p, T1q, T1o);
94
 
                                   T1x = ci[WS(rs, 3)];
95
 
                                   T1t = W[4];
96
 
                                   T2w = T2t - T2v;
97
 
                                   T35 = T2t + T2v;
98
 
                                   T2k = T1l - T1r;
99
 
                                   T1s = T1l + T1r;
100
 
                                   T1v = T1t * T1u;
101
 
                                   T2l = T1t * T1x;
102
 
                              }
103
 
                              T1A = cr[WS(rs, 11)];
104
 
                              T1D = ci[WS(rs, 11)];
105
 
                              T1z = W[20];
106
 
                              T1w = W[5];
107
 
                              T1C = W[21];
108
 
                              {
109
 
                                   E T2m, T1y, T2o, T1E, T2n, T1B, T9;
110
 
                                   Ta = cr[WS(rs, 4)];
111
 
                                   T2n = T1z * T1D;
112
 
                                   T1B = T1z * T1A;
113
 
                                   T2m = FNMS(T1w, T1u, T2l);
114
 
                                   T1y = FMA(T1w, T1x, T1v);
115
 
                                   T2o = FNMS(T1C, T1A, T2n);
116
 
                                   T1E = FMA(T1C, T1D, T1B);
117
 
                                   Td = ci[WS(rs, 4)];
118
 
                                   T9 = W[6];
119
 
                                   T2p = T2m - T2o;
120
 
                                   T36 = T2m + T2o;
121
 
                                   T2r = T1E - T1y;
122
 
                                   T1F = T1y + T1E;
123
 
                                   Tb = T9 * Ta;
124
 
                                   T1J = T9 * Td;
125
 
                              }
126
 
                              Tg = cr[WS(rs, 12)];
127
 
                              Tj = ci[WS(rs, 12)];
128
 
                              Tf = W[22];
129
 
                              Tc = W[7];
130
 
                              Ti = W[23];
131
 
                         }
132
 
                    }
133
 
                    {
134
 
                         E TQ, TT, TR, T2c, TW, TZ, TV, TS, TY;
135
 
                         {
136
 
                              E To, Tr, Tp, T1Q, Tu, Tx, Tt, Tq, Tw;
137
 
                              {
138
 
                                   E T1K, Te, T1M, Tk, T1L, Th, Tn;
139
 
                                   To = cr[WS(rs, 2)];
140
 
                                   T1L = Tf * Tj;
141
 
                                   Th = Tf * Tg;
142
 
                                   T1K = FNMS(Tc, Ta, T1J);
143
 
                                   Te = FMA(Tc, Td, Tb);
144
 
                                   T1M = FNMS(Ti, Tg, T1L);
145
 
                                   Tk = FMA(Ti, Tj, Th);
146
 
                                   Tr = ci[WS(rs, 2)];
147
 
                                   Tn = W[2];
148
 
                                   T3k = T1K + T1M;
149
 
                                   T1N = T1K - T1M;
150
 
                                   T3z = Te - Tk;
151
 
                                   Tl = Te + Tk;
152
 
                                   Tp = Tn * To;
153
 
                                   T1Q = Tn * Tr;
154
 
                              }
155
 
                              Tu = cr[WS(rs, 10)];
156
 
                              Tx = ci[WS(rs, 10)];
157
 
                              Tt = W[18];
158
 
                              Tq = W[3];
159
 
                              Tw = W[19];
160
 
                              {
161
 
                                   E T1R, Ts, T1T, Ty, T1S, Tv, TP;
162
 
                                   TQ = cr[WS(rs, 1)];
163
 
                                   T1S = Tt * Tx;
164
 
                                   Tv = Tt * Tu;
165
 
                                   T1R = FNMS(Tq, To, T1Q);
166
 
                                   Ts = FMA(Tq, Tr, Tp);
167
 
                                   T1T = FNMS(Tw, Tu, T1S);
168
 
                                   Ty = FMA(Tw, Tx, Tv);
169
 
                                   TT = ci[WS(rs, 1)];
170
 
                                   TP = W[0];
171
 
                                   T1U = T1R - T1T;
172
 
                                   T2W = T1R + T1T;
173
 
                                   T1P = Ts - Ty;
174
 
                                   Tz = Ts + Ty;
175
 
                                   TR = TP * TQ;
176
 
                                   T2c = TP * TT;
177
 
                              }
178
 
                              TW = cr[WS(rs, 9)];
179
 
                              TZ = ci[WS(rs, 9)];
180
 
                              TV = W[16];
181
 
                              TS = W[1];
182
 
                              TY = W[17];
183
 
                         }
184
 
                         {
185
 
                              E T13, T16, T14, T26, T19, T1c, T18, T15, T1b;
186
 
                              {
187
 
                                   E T2d, TU, T2f, T10, T2e, TX, T12;
188
 
                                   T13 = cr[WS(rs, 5)];
189
 
                                   T2e = TV * TZ;
190
 
                                   TX = TV * TW;
191
 
                                   T2d = FNMS(TS, TQ, T2c);
192
 
                                   TU = FMA(TS, TT, TR);
193
 
                                   T2f = FNMS(TY, TW, T2e);
194
 
                                   T10 = FMA(TY, TZ, TX);
195
 
                                   T16 = ci[WS(rs, 5)];
196
 
                                   T12 = W[8];
197
 
                                   T2g = T2d - T2f;
198
 
                                   T30 = T2d + T2f;
199
 
                                   T25 = TU - T10;
200
 
                                   T11 = TU + T10;
201
 
                                   T14 = T12 * T13;
202
 
                                   T26 = T12 * T16;
203
 
                              }
204
 
                              T19 = cr[WS(rs, 13)];
205
 
                              T1c = ci[WS(rs, 13)];
206
 
                              T18 = W[24];
207
 
                              T15 = W[9];
208
 
                              T1b = W[25];
209
 
                              {
210
 
                                   E T27, T17, T29, T1d, T28, T1a, TA;
211
 
                                   TB = cr[WS(rs, 14)];
212
 
                                   T28 = T18 * T1c;
213
 
                                   T1a = T18 * T19;
214
 
                                   T27 = FNMS(T15, T13, T26);
215
 
                                   T17 = FMA(T15, T16, T14);
216
 
                                   T29 = FNMS(T1b, T19, T28);
217
 
                                   T1d = FMA(T1b, T1c, T1a);
218
 
                                   TE = ci[WS(rs, 14)];
219
 
                                   TA = W[26];
220
 
                                   T2a = T27 - T29;
221
 
                                   T31 = T27 + T29;
222
 
                                   T2h = T17 - T1d;
223
 
                                   T1e = T17 + T1d;
224
 
                                   TC = TA * TB;
225
 
                                   T1X = TA * TE;
226
 
                              }
227
 
                              TH = cr[WS(rs, 6)];
228
 
                              TK = ci[WS(rs, 6)];
229
 
                              TG = W[10];
230
 
                              TD = W[27];
231
 
                              TJ = W[11];
232
 
                         }
233
 
                    }
234
 
               }
235
 
               {
236
 
                    E T2U, T3u, T2Z, T21, T1W, T34, T2X, T37, T3t, T3q, T3e, T32, T3i, T3h;
237
 
                    {
238
 
                         E T3f, T3r, T1H, T3s, TO, T3g;
239
 
                         {
240
 
                              E Tm, T1Y, TF, T20, TL, T3p, T1Z, TI;
241
 
                              T2U = T8 - Tl;
242
 
                              Tm = T8 + Tl;
243
 
                              T1Z = TG * TK;
244
 
                              TI = TG * TH;
245
 
                              T1Y = FNMS(TD, TB, T1X);
246
 
                              TF = FMA(TD, TE, TC);
247
 
                              T20 = FNMS(TJ, TH, T1Z);
248
 
                              TL = FMA(TJ, TK, TI);
249
 
                              T3p = T3k + T3o;
250
 
                              T3u = T3o - T3k;
251
 
                              {
252
 
                                   E T1f, TM, T1G, T3j, T2V, TN;
253
 
                                   T2Z = T11 - T1e;
254
 
                                   T1f = T11 + T1e;
255
 
                                   T21 = T1Y - T20;
256
 
                                   T2V = T1Y + T20;
257
 
                                   T1W = TF - TL;
258
 
                                   TM = TF + TL;
259
 
                                   T1G = T1s + T1F;
260
 
                                   T34 = T1s - T1F;
261
 
                                   T2X = T2V - T2W;
262
 
                                   T3j = T2W + T2V;
263
 
                                   T3f = T35 + T36;
264
 
                                   T37 = T35 - T36;
265
 
                                   T3t = Tz - TM;
266
 
                                   TN = Tz + TM;
267
 
                                   T3r = T1G - T1f;
268
 
                                   T1H = T1f + T1G;
269
 
                                   T3s = T3p - T3j;
270
 
                                   T3q = T3j + T3p;
271
 
                                   T3e = Tm - TN;
272
 
                                   TO = Tm + TN;
273
 
                                   T3g = T30 + T31;
274
 
                                   T32 = T30 - T31;
275
 
                              }
276
 
                         }
277
 
                         cr[WS(rs, 12)] = T3r - T3s;
278
 
                         ci[WS(rs, 11)] = T3r + T3s;
279
 
                         ci[WS(rs, 7)] = TO - T1H;
280
 
                         T3i = T3g + T3f;
281
 
                         T3h = T3f - T3g;
282
 
                         cr[0] = TO + T1H;
283
 
                    }
284
 
                    {
285
 
                         E T3a, T2Y, T3x, T3v;
286
 
                         ci[WS(rs, 15)] = T3i + T3q;
287
 
                         cr[WS(rs, 8)] = T3i - T3q;
288
 
                         ci[WS(rs, 3)] = T3e + T3h;
289
 
                         cr[WS(rs, 4)] = T3e - T3h;
290
 
                         T3a = T2U + T2X;
291
 
                         T2Y = T2U - T2X;
292
 
                         T3x = T3u - T3t;
293
 
                         T3v = T3t + T3u;
294
 
                         {
295
 
                              E T2E, T1O, T3B, T3H, T2q, T2x, T3I, T23, T2R, T2O, T2J, T2K, T3C, T2H, T2B;
296
 
                              E T2j;
297
 
                              {
298
 
                                   E T2F, T1V, T22, T2G;
299
 
                                   {
300
 
                                        E T3b, T33, T3c, T38;
301
 
                                        T2E = T1I + T1N;
302
 
                                        T1O = T1I - T1N;
303
 
                                        T3b = T2Z - T32;
304
 
                                        T33 = T2Z + T32;
305
 
                                        T3c = T34 + T37;
306
 
                                        T38 = T34 - T37;
307
 
                                        T3B = T3z + T3A;
308
 
                                        T3H = T3A - T3z;
309
 
                                        {
310
 
                                             E T3d, T3y, T3w, T39;
311
 
                                             T3d = T3b + T3c;
312
 
                                             T3y = T3c - T3b;
313
 
                                             T3w = T38 - T33;
314
 
                                             T39 = T33 + T38;
315
 
                                             ci[WS(rs, 1)] = FMA(KP707106781, T3d, T3a);
316
 
                                             cr[WS(rs, 6)] = FNMS(KP707106781, T3d, T3a);
317
 
                                             ci[WS(rs, 13)] = FMA(KP707106781, T3y, T3x);
318
 
                                             cr[WS(rs, 10)] = FMS(KP707106781, T3y, T3x);
319
 
                                             ci[WS(rs, 9)] = FMA(KP707106781, T3w, T3v);
320
 
                                             cr[WS(rs, 14)] = FMS(KP707106781, T3w, T3v);
321
 
                                             cr[WS(rs, 2)] = FMA(KP707106781, T39, T2Y);
322
 
                                             ci[WS(rs, 5)] = FNMS(KP707106781, T39, T2Y);
323
 
                                             T2F = T1P + T1U;
324
 
                                             T1V = T1P - T1U;
325
 
                                             T22 = T1W + T21;
326
 
                                             T2G = T1W - T21;
327
 
                                        }
328
 
                                   }
329
 
                                   {
330
 
                                        E T2M, T2N, T2b, T2i;
331
 
                                        T2q = T2k - T2p;
332
 
                                        T2M = T2k + T2p;
333
 
                                        T2N = T2w + T2r;
334
 
                                        T2x = T2r - T2w;
335
 
                                        T3I = T22 - T1V;
336
 
                                        T23 = T1V + T22;
337
 
                                        T2R = FMA(KP414213562, T2M, T2N);
338
 
                                        T2O = FNMS(KP414213562, T2N, T2M);
339
 
                                        T2J = T25 + T2a;
340
 
                                        T2b = T25 - T2a;
341
 
                                        T2i = T2g + T2h;
342
 
                                        T2K = T2g - T2h;
343
 
                                        T3C = T2F - T2G;
344
 
                                        T2H = T2F + T2G;
345
 
                                        T2B = FMA(KP414213562, T2b, T2i);
346
 
                                        T2j = FNMS(KP414213562, T2i, T2b);
347
 
                                   }
348
 
                              }
349
 
                              {
350
 
                                   E T2A, T3G, T2P, T2D, T3E, T3F, T3D, T2I;
351
 
                                   {
352
 
                                        E T24, T2L, T2C, T2y, T3J, T3L, T3K, T2S, T2z, T3M;
353
 
                                        T2A = FNMS(KP707106781, T23, T1O);
354
 
                                        T24 = FMA(KP707106781, T23, T1O);
355
 
                                        T2S = FNMS(KP414213562, T2J, T2K);
356
 
                                        T2L = FMA(KP414213562, T2K, T2J);
357
 
                                        T2C = FMA(KP414213562, T2q, T2x);
358
 
                                        T2y = FNMS(KP414213562, T2x, T2q);
359
 
                                        T3J = FMA(KP707106781, T3I, T3H);
360
 
                                        T3L = FNMS(KP707106781, T3I, T3H);
361
 
                                        T2T = T2R - T2S;
362
 
                                        T3K = T2S + T2R;
363
 
                                        T3G = T2y - T2j;
364
 
                                        T2z = T2j + T2y;
365
 
                                        T3M = T2O - T2L;
366
 
                                        T2P = T2L + T2O;
367
 
                                        ci[WS(rs, 14)] = FMA(KP923879532, T3K, T3J);
368
 
                                        cr[WS(rs, 9)] = FMS(KP923879532, T3K, T3J);
369
 
                                        ci[0] = FMA(KP923879532, T2z, T24);
370
 
                                        cr[WS(rs, 7)] = FNMS(KP923879532, T2z, T24);
371
 
                                        cr[WS(rs, 13)] = FMS(KP923879532, T3M, T3L);
372
 
                                        ci[WS(rs, 10)] = FMA(KP923879532, T3M, T3L);
373
 
                                        T2D = T2B + T2C;
374
 
                                        T3E = T2C - T2B;
375
 
                                   }
376
 
                                   T2Q = FNMS(KP707106781, T2H, T2E);
377
 
                                   T2I = FMA(KP707106781, T2H, T2E);
378
 
                                   T3F = FNMS(KP707106781, T3C, T3B);
379
 
                                   T3D = FMA(KP707106781, T3C, T3B);
380
 
                                   cr[WS(rs, 3)] = FMA(KP923879532, T2D, T2A);
381
 
                                   ci[WS(rs, 4)] = FNMS(KP923879532, T2D, T2A);
382
 
                                   cr[WS(rs, 1)] = FMA(KP923879532, T2P, T2I);
383
 
                                   ci[WS(rs, 6)] = FNMS(KP923879532, T2P, T2I);
384
 
                                   ci[WS(rs, 8)] = FMA(KP923879532, T3E, T3D);
385
 
                                   cr[WS(rs, 15)] = FMS(KP923879532, T3E, T3D);
386
 
                                   ci[WS(rs, 12)] = FMA(KP923879532, T3G, T3F);
387
 
                                   cr[WS(rs, 11)] = FMS(KP923879532, T3G, T3F);
388
 
                              }
389
 
                         }
390
 
                    }
391
 
               }
 
42
     {
 
43
          INT m;
 
44
          for (m = mb, W = W + ((mb - 1) * 30); m < me; m = m + 1, cr = cr + ms, ci = ci - ms, W = W + 30, MAKE_VOLATILE_STRIDE(rs)) {
 
45
               E T2T, T2Q;
 
46
               {
 
47
                    E T3A, T3o, T8, T1I, T2w, T35, T2k, T1s, T2p, T36, T2r, T1F, T3k, T1N, T3z;
 
48
                    E Tl, T1U, T2W, T1P, Tz, T2g, T30, T25, T11, TB, TE, T2a, T31, T2h, T1e;
 
49
                    E TC, T1X, TH, TK, TG, TD, TJ;
 
50
                    {
 
51
                         E Ta, Td, Tb, T1J, Tg, Tj, Tf, Tc, Ti;
 
52
                         {
 
53
                              E T1h, T1k, T1n, T2s, T1i, T1q, T1m, T1j, T1p;
 
54
                              {
 
55
                                   E T1, T3n, T3, T6, T2, T5;
 
56
                                   T1 = cr[0];
 
57
                                   T3n = ci[0];
 
58
                                   T3 = cr[WS(rs, 8)];
 
59
                                   T6 = ci[WS(rs, 8)];
 
60
                                   T2 = W[14];
 
61
                                   T5 = W[15];
 
62
                                   {
 
63
                                        E T3l, T4, T1g, T3m, T7;
 
64
                                        T1h = cr[WS(rs, 15)];
 
65
                                        T1k = ci[WS(rs, 15)];
 
66
                                        T3l = T2 * T6;
 
67
                                        T4 = T2 * T3;
 
68
                                        T1g = W[28];
 
69
                                        T1n = cr[WS(rs, 7)];
 
70
                                        T3m = FNMS(T5, T3, T3l);
 
71
                                        T7 = FMA(T5, T6, T4);
 
72
                                        T2s = T1g * T1k;
 
73
                                        T1i = T1g * T1h;
 
74
                                        T3A = T3n - T3m;
 
75
                                        T3o = T3m + T3n;
 
76
                                        T8 = T1 + T7;
 
77
                                        T1I = T1 - T7;
 
78
                                        T1q = ci[WS(rs, 7)];
 
79
                                        T1m = W[12];
 
80
                                   }
 
81
                                   T1j = W[29];
 
82
                                   T1p = W[13];
 
83
                              }
 
84
                              {
 
85
                                   E T1u, T1x, T1v, T2l, T1A, T1D, T1z, T1w, T1C;
 
86
                                   {
 
87
                                        E T2t, T1l, T2v, T1r, T2u, T1o, T1t;
 
88
                                        T1u = cr[WS(rs, 3)];
 
89
                                        T2u = T1m * T1q;
 
90
                                        T1o = T1m * T1n;
 
91
                                        T2t = FNMS(T1j, T1h, T2s);
 
92
                                        T1l = FMA(T1j, T1k, T1i);
 
93
                                        T2v = FNMS(T1p, T1n, T2u);
 
94
                                        T1r = FMA(T1p, T1q, T1o);
 
95
                                        T1x = ci[WS(rs, 3)];
 
96
                                        T1t = W[4];
 
97
                                        T2w = T2t - T2v;
 
98
                                        T35 = T2t + T2v;
 
99
                                        T2k = T1l - T1r;
 
100
                                        T1s = T1l + T1r;
 
101
                                        T1v = T1t * T1u;
 
102
                                        T2l = T1t * T1x;
 
103
                                   }
 
104
                                   T1A = cr[WS(rs, 11)];
 
105
                                   T1D = ci[WS(rs, 11)];
 
106
                                   T1z = W[20];
 
107
                                   T1w = W[5];
 
108
                                   T1C = W[21];
 
109
                                   {
 
110
                                        E T2m, T1y, T2o, T1E, T2n, T1B, T9;
 
111
                                        Ta = cr[WS(rs, 4)];
 
112
                                        T2n = T1z * T1D;
 
113
                                        T1B = T1z * T1A;
 
114
                                        T2m = FNMS(T1w, T1u, T2l);
 
115
                                        T1y = FMA(T1w, T1x, T1v);
 
116
                                        T2o = FNMS(T1C, T1A, T2n);
 
117
                                        T1E = FMA(T1C, T1D, T1B);
 
118
                                        Td = ci[WS(rs, 4)];
 
119
                                        T9 = W[6];
 
120
                                        T2p = T2m - T2o;
 
121
                                        T36 = T2m + T2o;
 
122
                                        T2r = T1E - T1y;
 
123
                                        T1F = T1y + T1E;
 
124
                                        Tb = T9 * Ta;
 
125
                                        T1J = T9 * Td;
 
126
                                   }
 
127
                                   Tg = cr[WS(rs, 12)];
 
128
                                   Tj = ci[WS(rs, 12)];
 
129
                                   Tf = W[22];
 
130
                                   Tc = W[7];
 
131
                                   Ti = W[23];
 
132
                              }
 
133
                         }
 
134
                         {
 
135
                              E TQ, TT, TR, T2c, TW, TZ, TV, TS, TY;
 
136
                              {
 
137
                                   E To, Tr, Tp, T1Q, Tu, Tx, Tt, Tq, Tw;
 
138
                                   {
 
139
                                        E T1K, Te, T1M, Tk, T1L, Th, Tn;
 
140
                                        To = cr[WS(rs, 2)];
 
141
                                        T1L = Tf * Tj;
 
142
                                        Th = Tf * Tg;
 
143
                                        T1K = FNMS(Tc, Ta, T1J);
 
144
                                        Te = FMA(Tc, Td, Tb);
 
145
                                        T1M = FNMS(Ti, Tg, T1L);
 
146
                                        Tk = FMA(Ti, Tj, Th);
 
147
                                        Tr = ci[WS(rs, 2)];
 
148
                                        Tn = W[2];
 
149
                                        T3k = T1K + T1M;
 
150
                                        T1N = T1K - T1M;
 
151
                                        T3z = Te - Tk;
 
152
                                        Tl = Te + Tk;
 
153
                                        Tp = Tn * To;
 
154
                                        T1Q = Tn * Tr;
 
155
                                   }
 
156
                                   Tu = cr[WS(rs, 10)];
 
157
                                   Tx = ci[WS(rs, 10)];
 
158
                                   Tt = W[18];
 
159
                                   Tq = W[3];
 
160
                                   Tw = W[19];
 
161
                                   {
 
162
                                        E T1R, Ts, T1T, Ty, T1S, Tv, TP;
 
163
                                        TQ = cr[WS(rs, 1)];
 
164
                                        T1S = Tt * Tx;
 
165
                                        Tv = Tt * Tu;
 
166
                                        T1R = FNMS(Tq, To, T1Q);
 
167
                                        Ts = FMA(Tq, Tr, Tp);
 
168
                                        T1T = FNMS(Tw, Tu, T1S);
 
169
                                        Ty = FMA(Tw, Tx, Tv);
 
170
                                        TT = ci[WS(rs, 1)];
 
171
                                        TP = W[0];
 
172
                                        T1U = T1R - T1T;
 
173
                                        T2W = T1R + T1T;
 
174
                                        T1P = Ts - Ty;
 
175
                                        Tz = Ts + Ty;
 
176
                                        TR = TP * TQ;
 
177
                                        T2c = TP * TT;
 
178
                                   }
 
179
                                   TW = cr[WS(rs, 9)];
 
180
                                   TZ = ci[WS(rs, 9)];
 
181
                                   TV = W[16];
 
182
                                   TS = W[1];
 
183
                                   TY = W[17];
 
184
                              }
 
185
                              {
 
186
                                   E T13, T16, T14, T26, T19, T1c, T18, T15, T1b;
 
187
                                   {
 
188
                                        E T2d, TU, T2f, T10, T2e, TX, T12;
 
189
                                        T13 = cr[WS(rs, 5)];
 
190
                                        T2e = TV * TZ;
 
191
                                        TX = TV * TW;
 
192
                                        T2d = FNMS(TS, TQ, T2c);
 
193
                                        TU = FMA(TS, TT, TR);
 
194
                                        T2f = FNMS(TY, TW, T2e);
 
195
                                        T10 = FMA(TY, TZ, TX);
 
196
                                        T16 = ci[WS(rs, 5)];
 
197
                                        T12 = W[8];
 
198
                                        T2g = T2d - T2f;
 
199
                                        T30 = T2d + T2f;
 
200
                                        T25 = TU - T10;
 
201
                                        T11 = TU + T10;
 
202
                                        T14 = T12 * T13;
 
203
                                        T26 = T12 * T16;
 
204
                                   }
 
205
                                   T19 = cr[WS(rs, 13)];
 
206
                                   T1c = ci[WS(rs, 13)];
 
207
                                   T18 = W[24];
 
208
                                   T15 = W[9];
 
209
                                   T1b = W[25];
 
210
                                   {
 
211
                                        E T27, T17, T29, T1d, T28, T1a, TA;
 
212
                                        TB = cr[WS(rs, 14)];
 
213
                                        T28 = T18 * T1c;
 
214
                                        T1a = T18 * T19;
 
215
                                        T27 = FNMS(T15, T13, T26);
 
216
                                        T17 = FMA(T15, T16, T14);
 
217
                                        T29 = FNMS(T1b, T19, T28);
 
218
                                        T1d = FMA(T1b, T1c, T1a);
 
219
                                        TE = ci[WS(rs, 14)];
 
220
                                        TA = W[26];
 
221
                                        T2a = T27 - T29;
 
222
                                        T31 = T27 + T29;
 
223
                                        T2h = T17 - T1d;
 
224
                                        T1e = T17 + T1d;
 
225
                                        TC = TA * TB;
 
226
                                        T1X = TA * TE;
 
227
                                   }
 
228
                                   TH = cr[WS(rs, 6)];
 
229
                                   TK = ci[WS(rs, 6)];
 
230
                                   TG = W[10];
 
231
                                   TD = W[27];
 
232
                                   TJ = W[11];
 
233
                              }
 
234
                         }
 
235
                    }
 
236
                    {
 
237
                         E T2U, T3u, T2Z, T21, T1W, T34, T2X, T37, T3t, T3q, T3e, T32, T3i, T3h;
 
238
                         {
 
239
                              E T3f, T3r, T1H, T3s, TO, T3g;
 
240
                              {
 
241
                                   E Tm, T1Y, TF, T20, TL, T3p, T1Z, TI;
 
242
                                   T2U = T8 - Tl;
 
243
                                   Tm = T8 + Tl;
 
244
                                   T1Z = TG * TK;
 
245
                                   TI = TG * TH;
 
246
                                   T1Y = FNMS(TD, TB, T1X);
 
247
                                   TF = FMA(TD, TE, TC);
 
248
                                   T20 = FNMS(TJ, TH, T1Z);
 
249
                                   TL = FMA(TJ, TK, TI);
 
250
                                   T3p = T3k + T3o;
 
251
                                   T3u = T3o - T3k;
 
252
                                   {
 
253
                                        E T1f, TM, T1G, T3j, T2V, TN;
 
254
                                        T2Z = T11 - T1e;
 
255
                                        T1f = T11 + T1e;
 
256
                                        T21 = T1Y - T20;
 
257
                                        T2V = T1Y + T20;
 
258
                                        T1W = TF - TL;
 
259
                                        TM = TF + TL;
 
260
                                        T1G = T1s + T1F;
 
261
                                        T34 = T1s - T1F;
 
262
                                        T2X = T2V - T2W;
 
263
                                        T3j = T2W + T2V;
 
264
                                        T3f = T35 + T36;
 
265
                                        T37 = T35 - T36;
 
266
                                        T3t = Tz - TM;
 
267
                                        TN = Tz + TM;
 
268
                                        T3r = T1G - T1f;
 
269
                                        T1H = T1f + T1G;
 
270
                                        T3s = T3p - T3j;
 
271
                                        T3q = T3j + T3p;
 
272
                                        T3e = Tm - TN;
 
273
                                        TO = Tm + TN;
 
274
                                        T3g = T30 + T31;
 
275
                                        T32 = T30 - T31;
 
276
                                   }
 
277
                              }
 
278
                              cr[WS(rs, 12)] = T3r - T3s;
 
279
                              ci[WS(rs, 11)] = T3r + T3s;
 
280
                              ci[WS(rs, 7)] = TO - T1H;
 
281
                              T3i = T3g + T3f;
 
282
                              T3h = T3f - T3g;
 
283
                              cr[0] = TO + T1H;
 
284
                         }
 
285
                         {
 
286
                              E T3a, T2Y, T3x, T3v;
 
287
                              ci[WS(rs, 15)] = T3i + T3q;
 
288
                              cr[WS(rs, 8)] = T3i - T3q;
 
289
                              ci[WS(rs, 3)] = T3e + T3h;
 
290
                              cr[WS(rs, 4)] = T3e - T3h;
 
291
                              T3a = T2U + T2X;
 
292
                              T2Y = T2U - T2X;
 
293
                              T3x = T3u - T3t;
 
294
                              T3v = T3t + T3u;
 
295
                              {
 
296
                                   E T2E, T1O, T3B, T3H, T2q, T2x, T3I, T23, T2R, T2O, T2J, T2K, T3C, T2H, T2B;
 
297
                                   E T2j;
 
298
                                   {
 
299
                                        E T2F, T1V, T22, T2G;
 
300
                                        {
 
301
                                             E T3b, T33, T3c, T38;
 
302
                                             T2E = T1I + T1N;
 
303
                                             T1O = T1I - T1N;
 
304
                                             T3b = T2Z - T32;
 
305
                                             T33 = T2Z + T32;
 
306
                                             T3c = T34 + T37;
 
307
                                             T38 = T34 - T37;
 
308
                                             T3B = T3z + T3A;
 
309
                                             T3H = T3A - T3z;
 
310
                                             {
 
311
                                                  E T3d, T3y, T3w, T39;
 
312
                                                  T3d = T3b + T3c;
 
313
                                                  T3y = T3c - T3b;
 
314
                                                  T3w = T38 - T33;
 
315
                                                  T39 = T33 + T38;
 
316
                                                  ci[WS(rs, 1)] = FMA(KP707106781, T3d, T3a);
 
317
                                                  cr[WS(rs, 6)] = FNMS(KP707106781, T3d, T3a);
 
318
                                                  ci[WS(rs, 13)] = FMA(KP707106781, T3y, T3x);
 
319
                                                  cr[WS(rs, 10)] = FMS(KP707106781, T3y, T3x);
 
320
                                                  ci[WS(rs, 9)] = FMA(KP707106781, T3w, T3v);
 
321
                                                  cr[WS(rs, 14)] = FMS(KP707106781, T3w, T3v);
 
322
                                                  cr[WS(rs, 2)] = FMA(KP707106781, T39, T2Y);
 
323
                                                  ci[WS(rs, 5)] = FNMS(KP707106781, T39, T2Y);
 
324
                                                  T2F = T1P + T1U;
 
325
                                                  T1V = T1P - T1U;
 
326
                                                  T22 = T1W + T21;
 
327
                                                  T2G = T1W - T21;
 
328
                                             }
 
329
                                        }
 
330
                                        {
 
331
                                             E T2M, T2N, T2b, T2i;
 
332
                                             T2q = T2k - T2p;
 
333
                                             T2M = T2k + T2p;
 
334
                                             T2N = T2w + T2r;
 
335
                                             T2x = T2r - T2w;
 
336
                                             T3I = T22 - T1V;
 
337
                                             T23 = T1V + T22;
 
338
                                             T2R = FMA(KP414213562, T2M, T2N);
 
339
                                             T2O = FNMS(KP414213562, T2N, T2M);
 
340
                                             T2J = T25 + T2a;
 
341
                                             T2b = T25 - T2a;
 
342
                                             T2i = T2g + T2h;
 
343
                                             T2K = T2g - T2h;
 
344
                                             T3C = T2F - T2G;
 
345
                                             T2H = T2F + T2G;
 
346
                                             T2B = FMA(KP414213562, T2b, T2i);
 
347
                                             T2j = FNMS(KP414213562, T2i, T2b);
 
348
                                        }
 
349
                                   }
 
350
                                   {
 
351
                                        E T2A, T3G, T2P, T2D, T3E, T3F, T3D, T2I;
 
352
                                        {
 
353
                                             E T24, T2L, T2C, T2y, T3J, T3L, T3K, T2S, T2z, T3M;
 
354
                                             T2A = FNMS(KP707106781, T23, T1O);
 
355
                                             T24 = FMA(KP707106781, T23, T1O);
 
356
                                             T2S = FNMS(KP414213562, T2J, T2K);
 
357
                                             T2L = FMA(KP414213562, T2K, T2J);
 
358
                                             T2C = FMA(KP414213562, T2q, T2x);
 
359
                                             T2y = FNMS(KP414213562, T2x, T2q);
 
360
                                             T3J = FMA(KP707106781, T3I, T3H);
 
361
                                             T3L = FNMS(KP707106781, T3I, T3H);
 
362
                                             T2T = T2R - T2S;
 
363
                                             T3K = T2S + T2R;
 
364
                                             T3G = T2y - T2j;
 
365
                                             T2z = T2j + T2y;
 
366
                                             T3M = T2O - T2L;
 
367
                                             T2P = T2L + T2O;
 
368
                                             ci[WS(rs, 14)] = FMA(KP923879532, T3K, T3J);
 
369
                                             cr[WS(rs, 9)] = FMS(KP923879532, T3K, T3J);
 
370
                                             ci[0] = FMA(KP923879532, T2z, T24);
 
371
                                             cr[WS(rs, 7)] = FNMS(KP923879532, T2z, T24);
 
372
                                             cr[WS(rs, 13)] = FMS(KP923879532, T3M, T3L);
 
373
                                             ci[WS(rs, 10)] = FMA(KP923879532, T3M, T3L);
 
374
                                             T2D = T2B + T2C;
 
375
                                             T3E = T2C - T2B;
 
376
                                        }
 
377
                                        T2Q = FNMS(KP707106781, T2H, T2E);
 
378
                                        T2I = FMA(KP707106781, T2H, T2E);
 
379
                                        T3F = FNMS(KP707106781, T3C, T3B);
 
380
                                        T3D = FMA(KP707106781, T3C, T3B);
 
381
                                        cr[WS(rs, 3)] = FMA(KP923879532, T2D, T2A);
 
382
                                        ci[WS(rs, 4)] = FNMS(KP923879532, T2D, T2A);
 
383
                                        cr[WS(rs, 1)] = FMA(KP923879532, T2P, T2I);
 
384
                                        ci[WS(rs, 6)] = FNMS(KP923879532, T2P, T2I);
 
385
                                        ci[WS(rs, 8)] = FMA(KP923879532, T3E, T3D);
 
386
                                        cr[WS(rs, 15)] = FMS(KP923879532, T3E, T3D);
 
387
                                        ci[WS(rs, 12)] = FMA(KP923879532, T3G, T3F);
 
388
                                        cr[WS(rs, 11)] = FMS(KP923879532, T3G, T3F);
 
389
                                   }
 
390
                              }
 
391
                         }
 
392
                    }
 
393
               }
 
394
               ci[WS(rs, 2)] = FMA(KP923879532, T2T, T2Q);
 
395
               cr[WS(rs, 5)] = FNMS(KP923879532, T2T, T2Q);
392
396
          }
393
 
          ci[WS(rs, 2)] = FMA(KP923879532, T2T, T2Q);
394
 
          cr[WS(rs, 5)] = FNMS(KP923879532, T2T, T2Q);
395
397
     }
396
398
}
397
399
 
407
409
}
408
410
#else                           /* HAVE_FMA */
409
411
 
410
 
/* Generated by: ../../../genfft/gen_hc2hc -compact -variables 4 -pipeline-latency 4 -n 16 -dit -name hf_16 -include hf.h */
 
412
/* Generated by: ../../../genfft/gen_hc2hc.native -compact -variables 4 -pipeline-latency 4 -n 16 -dit -name hf_16 -include hf.h */
411
413
 
412
414
/*
413
415
 * This function contains 174 FP additions, 84 FP multiplications,
421
423
     DK(KP382683432, +0.382683432365089771728459984030398866761344562);
422
424
     DK(KP923879532, +0.923879532511286756128183189396788286822416626);
423
425
     DK(KP707106781, +0.707106781186547524400844362104849039284835938);
424
 
     INT m;
425
 
     for (m = mb, W = W + ((mb - 1) * 30); m < me; m = m + 1, cr = cr + ms, ci = ci - ms, W = W + 30, MAKE_VOLATILE_STRIDE(rs)) {
426
 
          E T7, T38, T1t, T2U, Ti, T37, T1w, T2R, Tu, T2t, T1C, T2c, TF, T2s, T1H;
427
 
          E T2d, T1f, T1q, T2B, T2C, T2D, T2E, T1Z, T2k, T24, T2j, TS, T13, T2w, T2x;
428
 
          E T2y, T2z, T1O, T2h, T1T, T2g;
429
 
          {
430
 
               E T1, T2T, T6, T2S;
431
 
               T1 = cr[0];
432
 
               T2T = ci[0];
433
 
               {
434
 
                    E T3, T5, T2, T4;
435
 
                    T3 = cr[WS(rs, 8)];
436
 
                    T5 = ci[WS(rs, 8)];
437
 
                    T2 = W[14];
438
 
                    T4 = W[15];
439
 
                    T6 = FMA(T2, T3, T4 * T5);
440
 
                    T2S = FNMS(T4, T3, T2 * T5);
441
 
               }
442
 
               T7 = T1 + T6;
443
 
               T38 = T2T - T2S;
444
 
               T1t = T1 - T6;
445
 
               T2U = T2S + T2T;
446
 
          }
447
 
          {
448
 
               E Tc, T1u, Th, T1v;
449
 
               {
450
 
                    E T9, Tb, T8, Ta;
451
 
                    T9 = cr[WS(rs, 4)];
452
 
                    Tb = ci[WS(rs, 4)];
453
 
                    T8 = W[6];
454
 
                    Ta = W[7];
455
 
                    Tc = FMA(T8, T9, Ta * Tb);
456
 
                    T1u = FNMS(Ta, T9, T8 * Tb);
457
 
               }
458
 
               {
459
 
                    E Te, Tg, Td, Tf;
460
 
                    Te = cr[WS(rs, 12)];
461
 
                    Tg = ci[WS(rs, 12)];
462
 
                    Td = W[22];
463
 
                    Tf = W[23];
464
 
                    Th = FMA(Td, Te, Tf * Tg);
465
 
                    T1v = FNMS(Tf, Te, Td * Tg);
466
 
               }
467
 
               Ti = Tc + Th;
468
 
               T37 = Tc - Th;
469
 
               T1w = T1u - T1v;
470
 
               T2R = T1u + T1v;
471
 
          }
472
 
          {
473
 
               E To, T1z, Tt, T1A, T1y, T1B;
474
 
               {
475
 
                    E Tl, Tn, Tk, Tm;
476
 
                    Tl = cr[WS(rs, 2)];
477
 
                    Tn = ci[WS(rs, 2)];
478
 
                    Tk = W[2];
479
 
                    Tm = W[3];
480
 
                    To = FMA(Tk, Tl, Tm * Tn);
481
 
                    T1z = FNMS(Tm, Tl, Tk * Tn);
482
 
               }
483
 
               {
484
 
                    E Tq, Ts, Tp, Tr;
485
 
                    Tq = cr[WS(rs, 10)];
486
 
                    Ts = ci[WS(rs, 10)];
487
 
                    Tp = W[18];
488
 
                    Tr = W[19];
489
 
                    Tt = FMA(Tp, Tq, Tr * Ts);
490
 
                    T1A = FNMS(Tr, Tq, Tp * Ts);
491
 
               }
492
 
               Tu = To + Tt;
493
 
               T2t = T1z + T1A;
494
 
               T1y = To - Tt;
495
 
               T1B = T1z - T1A;
496
 
               T1C = T1y - T1B;
497
 
               T2c = T1y + T1B;
498
 
          }
499
 
          {
500
 
               E Tz, T1E, TE, T1F, T1D, T1G;
501
 
               {
502
 
                    E Tw, Ty, Tv, Tx;
503
 
                    Tw = cr[WS(rs, 14)];
504
 
                    Ty = ci[WS(rs, 14)];
505
 
                    Tv = W[26];
506
 
                    Tx = W[27];
507
 
                    Tz = FMA(Tv, Tw, Tx * Ty);
508
 
                    T1E = FNMS(Tx, Tw, Tv * Ty);
509
 
               }
510
 
               {
511
 
                    E TB, TD, TA, TC;
512
 
                    TB = cr[WS(rs, 6)];
513
 
                    TD = ci[WS(rs, 6)];
514
 
                    TA = W[10];
515
 
                    TC = W[11];
516
 
                    TE = FMA(TA, TB, TC * TD);
517
 
                    T1F = FNMS(TC, TB, TA * TD);
518
 
               }
519
 
               TF = Tz + TE;
520
 
               T2s = T1E + T1F;
521
 
               T1D = Tz - TE;
522
 
               T1G = T1E - T1F;
523
 
               T1H = T1D + T1G;
524
 
               T2d = T1D - T1G;
525
 
          }
526
 
          {
527
 
               E T19, T1V, T1p, T22, T1e, T1W, T1k, T21;
528
 
               {
529
 
                    E T16, T18, T15, T17;
530
 
                    T16 = cr[WS(rs, 15)];
531
 
                    T18 = ci[WS(rs, 15)];
532
 
                    T15 = W[28];
533
 
                    T17 = W[29];
534
 
                    T19 = FMA(T15, T16, T17 * T18);
535
 
                    T1V = FNMS(T17, T16, T15 * T18);
536
 
               }
537
 
               {
538
 
                    E T1m, T1o, T1l, T1n;
539
 
                    T1m = cr[WS(rs, 11)];
540
 
                    T1o = ci[WS(rs, 11)];
541
 
                    T1l = W[20];
542
 
                    T1n = W[21];
543
 
                    T1p = FMA(T1l, T1m, T1n * T1o);
544
 
                    T22 = FNMS(T1n, T1m, T1l * T1o);
545
 
               }
546
 
               {
547
 
                    E T1b, T1d, T1a, T1c;
548
 
                    T1b = cr[WS(rs, 7)];
549
 
                    T1d = ci[WS(rs, 7)];
550
 
                    T1a = W[12];
551
 
                    T1c = W[13];
552
 
                    T1e = FMA(T1a, T1b, T1c * T1d);
553
 
                    T1W = FNMS(T1c, T1b, T1a * T1d);
554
 
               }
555
 
               {
556
 
                    E T1h, T1j, T1g, T1i;
557
 
                    T1h = cr[WS(rs, 3)];
558
 
                    T1j = ci[WS(rs, 3)];
559
 
                    T1g = W[4];
560
 
                    T1i = W[5];
561
 
                    T1k = FMA(T1g, T1h, T1i * T1j);
562
 
                    T21 = FNMS(T1i, T1h, T1g * T1j);
563
 
               }
564
 
               T1f = T19 + T1e;
565
 
               T1q = T1k + T1p;
566
 
               T2B = T1f - T1q;
567
 
               T2C = T1V + T1W;
568
 
               T2D = T21 + T22;
569
 
               T2E = T2C - T2D;
570
 
               {
571
 
                    E T1X, T1Y, T20, T23;
572
 
                    T1X = T1V - T1W;
573
 
                    T1Y = T1k - T1p;
574
 
                    T1Z = T1X + T1Y;
575
 
                    T2k = T1X - T1Y;
576
 
                    T20 = T19 - T1e;
577
 
                    T23 = T21 - T22;
578
 
                    T24 = T20 - T23;
579
 
                    T2j = T20 + T23;
580
 
               }
581
 
          }
582
 
          {
583
 
               E TM, T1P, T12, T1M, TR, T1Q, TX, T1L;
584
 
               {
585
 
                    E TJ, TL, TI, TK;
586
 
                    TJ = cr[WS(rs, 1)];
587
 
                    TL = ci[WS(rs, 1)];
588
 
                    TI = W[0];
589
 
                    TK = W[1];
590
 
                    TM = FMA(TI, TJ, TK * TL);
591
 
                    T1P = FNMS(TK, TJ, TI * TL);
592
 
               }
593
 
               {
594
 
                    E TZ, T11, TY, T10;
595
 
                    TZ = cr[WS(rs, 13)];
596
 
                    T11 = ci[WS(rs, 13)];
597
 
                    TY = W[24];
598
 
                    T10 = W[25];
599
 
                    T12 = FMA(TY, TZ, T10 * T11);
600
 
                    T1M = FNMS(T10, TZ, TY * T11);
601
 
               }
602
 
               {
603
 
                    E TO, TQ, TN, TP;
604
 
                    TO = cr[WS(rs, 9)];
605
 
                    TQ = ci[WS(rs, 9)];
606
 
                    TN = W[16];
607
 
                    TP = W[17];
608
 
                    TR = FMA(TN, TO, TP * TQ);
609
 
                    T1Q = FNMS(TP, TO, TN * TQ);
610
 
               }
611
 
               {
612
 
                    E TU, TW, TT, TV;
613
 
                    TU = cr[WS(rs, 5)];
614
 
                    TW = ci[WS(rs, 5)];
615
 
                    TT = W[8];
616
 
                    TV = W[9];
617
 
                    TX = FMA(TT, TU, TV * TW);
618
 
                    T1L = FNMS(TV, TU, TT * TW);
619
 
               }
620
 
               TS = TM + TR;
621
 
               T13 = TX + T12;
622
 
               T2w = TS - T13;
623
 
               T2x = T1P + T1Q;
624
 
               T2y = T1L + T1M;
625
 
               T2z = T2x - T2y;
626
 
               {
627
 
                    E T1K, T1N, T1R, T1S;
628
 
                    T1K = TM - TR;
629
 
                    T1N = T1L - T1M;
630
 
                    T1O = T1K - T1N;
631
 
                    T2h = T1K + T1N;
632
 
                    T1R = T1P - T1Q;
633
 
                    T1S = TX - T12;
634
 
                    T1T = T1R + T1S;
635
 
                    T2g = T1R - T1S;
636
 
               }
637
 
          }
638
 
          {
639
 
               E T1J, T27, T3a, T3c, T26, T3b, T2a, T35;
640
 
               {
641
 
                    E T1x, T1I, T36, T39;
642
 
                    T1x = T1t - T1w;
643
 
                    T1I = KP707106781 * (T1C + T1H);
644
 
                    T1J = T1x + T1I;
645
 
                    T27 = T1x - T1I;
646
 
                    T36 = KP707106781 * (T2c - T2d);
647
 
                    T39 = T37 + T38;
648
 
                    T3a = T36 + T39;
649
 
                    T3c = T39 - T36;
650
 
               }
651
 
               {
652
 
                    E T1U, T25, T28, T29;
653
 
                    T1U = FNMS(KP382683432, T1T, KP923879532 * T1O);
654
 
                    T25 = FMA(KP382683432, T1Z, KP923879532 * T24);
655
 
                    T26 = T1U + T25;
656
 
                    T3b = T25 - T1U;
657
 
                    T28 = FMA(KP923879532, T1T, KP382683432 * T1O);
658
 
                    T29 = FNMS(KP923879532, T1Z, KP382683432 * T24);
659
 
                    T2a = T28 + T29;
660
 
                    T35 = T29 - T28;
661
 
               }
662
 
               cr[WS(rs, 7)] = T1J - T26;
663
 
               cr[WS(rs, 11)] = T3b - T3c;
664
 
               ci[WS(rs, 12)] = T3b + T3c;
665
 
               ci[0] = T1J + T26;
666
 
               ci[WS(rs, 4)] = T27 - T2a;
667
 
               cr[WS(rs, 15)] = T35 - T3a;
668
 
               ci[WS(rs, 8)] = T35 + T3a;
669
 
               cr[WS(rs, 3)] = T27 + T2a;
670
 
          }
671
 
          {
672
 
               E TH, T2L, T2W, T2Y, T1s, T2X, T2O, T2P;
673
 
               {
674
 
                    E Tj, TG, T2Q, T2V;
675
 
                    Tj = T7 + Ti;
676
 
                    TG = Tu + TF;
677
 
                    TH = Tj + TG;
678
 
                    T2L = Tj - TG;
679
 
                    T2Q = T2t + T2s;
680
 
                    T2V = T2R + T2U;
681
 
                    T2W = T2Q + T2V;
682
 
                    T2Y = T2V - T2Q;
683
 
               }
684
 
               {
685
 
                    E T14, T1r, T2M, T2N;
686
 
                    T14 = TS + T13;
687
 
                    T1r = T1f + T1q;
688
 
                    T1s = T14 + T1r;
689
 
                    T2X = T1r - T14;
690
 
                    T2M = T2C + T2D;
691
 
                    T2N = T2x + T2y;
692
 
                    T2O = T2M - T2N;
693
 
                    T2P = T2N + T2M;
694
 
               }
695
 
               ci[WS(rs, 7)] = TH - T1s;
696
 
               cr[WS(rs, 12)] = T2X - T2Y;
697
 
               ci[WS(rs, 11)] = T2X + T2Y;
698
 
               cr[0] = TH + T1s;
699
 
               cr[WS(rs, 4)] = T2L - T2O;
700
 
               cr[WS(rs, 8)] = T2P - T2W;
701
 
               ci[WS(rs, 15)] = T2P + T2W;
702
 
               ci[WS(rs, 3)] = T2L + T2O;
703
 
          }
704
 
          {
705
 
               E T2f, T2n, T3g, T3i, T2m, T3h, T2q, T3d;
706
 
               {
707
 
                    E T2b, T2e, T3e, T3f;
708
 
                    T2b = T1t + T1w;
709
 
                    T2e = KP707106781 * (T2c + T2d);
710
 
                    T2f = T2b + T2e;
711
 
                    T2n = T2b - T2e;
712
 
                    T3e = KP707106781 * (T1H - T1C);
713
 
                    T3f = T38 - T37;
714
 
                    T3g = T3e + T3f;
715
 
                    T3i = T3f - T3e;
716
 
               }
717
 
               {
718
 
                    E T2i, T2l, T2o, T2p;
719
 
                    T2i = FMA(KP382683432, T2g, KP923879532 * T2h);
720
 
                    T2l = FNMS(KP382683432, T2k, KP923879532 * T2j);
721
 
                    T2m = T2i + T2l;
722
 
                    T3h = T2l - T2i;
723
 
                    T2o = FNMS(KP923879532, T2g, KP382683432 * T2h);
724
 
                    T2p = FMA(KP923879532, T2k, KP382683432 * T2j);
725
 
                    T2q = T2o + T2p;
726
 
                    T3d = T2p - T2o;
727
 
               }
728
 
               ci[WS(rs, 6)] = T2f - T2m;
729
 
               cr[WS(rs, 13)] = T3h - T3i;
730
 
               ci[WS(rs, 10)] = T3h + T3i;
731
 
               cr[WS(rs, 1)] = T2f + T2m;
732
 
               cr[WS(rs, 5)] = T2n - T2q;
733
 
               cr[WS(rs, 9)] = T3d - T3g;
734
 
               ci[WS(rs, 14)] = T3d + T3g;
735
 
               ci[WS(rs, 2)] = T2n + T2q;
736
 
          }
737
 
          {
738
 
               E T2v, T2H, T32, T34, T2G, T2Z, T2K, T33;
739
 
               {
740
 
                    E T2r, T2u, T30, T31;
741
 
                    T2r = T7 - Ti;
742
 
                    T2u = T2s - T2t;
743
 
                    T2v = T2r - T2u;
744
 
                    T2H = T2r + T2u;
745
 
                    T30 = Tu - TF;
746
 
                    T31 = T2U - T2R;
747
 
                    T32 = T30 + T31;
748
 
                    T34 = T31 - T30;
749
 
               }
750
 
               {
751
 
                    E T2A, T2F, T2I, T2J;
752
 
                    T2A = T2w + T2z;
753
 
                    T2F = T2B - T2E;
754
 
                    T2G = KP707106781 * (T2A + T2F);
755
 
                    T2Z = KP707106781 * (T2F - T2A);
756
 
                    T2I = T2w - T2z;
757
 
                    T2J = T2B + T2E;
758
 
                    T2K = KP707106781 * (T2I + T2J);
759
 
                    T33 = KP707106781 * (T2J - T2I);
760
 
               }
761
 
               ci[WS(rs, 5)] = T2v - T2G;
762
 
               cr[WS(rs, 10)] = T33 - T34;
763
 
               ci[WS(rs, 13)] = T33 + T34;
764
 
               cr[WS(rs, 2)] = T2v + T2G;
765
 
               cr[WS(rs, 6)] = T2H - T2K;
766
 
               cr[WS(rs, 14)] = T2Z - T32;
767
 
               ci[WS(rs, 9)] = T2Z + T32;
768
 
               ci[WS(rs, 1)] = T2H + T2K;
 
426
     {
 
427
          INT m;
 
428
          for (m = mb, W = W + ((mb - 1) * 30); m < me; m = m + 1, cr = cr + ms, ci = ci - ms, W = W + 30, MAKE_VOLATILE_STRIDE(rs)) {
 
429
               E T7, T38, T1t, T2U, Ti, T37, T1w, T2R, Tu, T2t, T1C, T2c, TF, T2s, T1H;
 
430
               E T2d, T1f, T1q, T2B, T2C, T2D, T2E, T1Z, T2k, T24, T2j, TS, T13, T2w, T2x;
 
431
               E T2y, T2z, T1O, T2h, T1T, T2g;
 
432
               {
 
433
                    E T1, T2T, T6, T2S;
 
434
                    T1 = cr[0];
 
435
                    T2T = ci[0];
 
436
                    {
 
437
                         E T3, T5, T2, T4;
 
438
                         T3 = cr[WS(rs, 8)];
 
439
                         T5 = ci[WS(rs, 8)];
 
440
                         T2 = W[14];
 
441
                         T4 = W[15];
 
442
                         T6 = FMA(T2, T3, T4 * T5);
 
443
                         T2S = FNMS(T4, T3, T2 * T5);
 
444
                    }
 
445
                    T7 = T1 + T6;
 
446
                    T38 = T2T - T2S;
 
447
                    T1t = T1 - T6;
 
448
                    T2U = T2S + T2T;
 
449
               }
 
450
               {
 
451
                    E Tc, T1u, Th, T1v;
 
452
                    {
 
453
                         E T9, Tb, T8, Ta;
 
454
                         T9 = cr[WS(rs, 4)];
 
455
                         Tb = ci[WS(rs, 4)];
 
456
                         T8 = W[6];
 
457
                         Ta = W[7];
 
458
                         Tc = FMA(T8, T9, Ta * Tb);
 
459
                         T1u = FNMS(Ta, T9, T8 * Tb);
 
460
                    }
 
461
                    {
 
462
                         E Te, Tg, Td, Tf;
 
463
                         Te = cr[WS(rs, 12)];
 
464
                         Tg = ci[WS(rs, 12)];
 
465
                         Td = W[22];
 
466
                         Tf = W[23];
 
467
                         Th = FMA(Td, Te, Tf * Tg);
 
468
                         T1v = FNMS(Tf, Te, Td * Tg);
 
469
                    }
 
470
                    Ti = Tc + Th;
 
471
                    T37 = Tc - Th;
 
472
                    T1w = T1u - T1v;
 
473
                    T2R = T1u + T1v;
 
474
               }
 
475
               {
 
476
                    E To, T1z, Tt, T1A, T1y, T1B;
 
477
                    {
 
478
                         E Tl, Tn, Tk, Tm;
 
479
                         Tl = cr[WS(rs, 2)];
 
480
                         Tn = ci[WS(rs, 2)];
 
481
                         Tk = W[2];
 
482
                         Tm = W[3];
 
483
                         To = FMA(Tk, Tl, Tm * Tn);
 
484
                         T1z = FNMS(Tm, Tl, Tk * Tn);
 
485
                    }
 
486
                    {
 
487
                         E Tq, Ts, Tp, Tr;
 
488
                         Tq = cr[WS(rs, 10)];
 
489
                         Ts = ci[WS(rs, 10)];
 
490
                         Tp = W[18];
 
491
                         Tr = W[19];
 
492
                         Tt = FMA(Tp, Tq, Tr * Ts);
 
493
                         T1A = FNMS(Tr, Tq, Tp * Ts);
 
494
                    }
 
495
                    Tu = To + Tt;
 
496
                    T2t = T1z + T1A;
 
497
                    T1y = To - Tt;
 
498
                    T1B = T1z - T1A;
 
499
                    T1C = T1y - T1B;
 
500
                    T2c = T1y + T1B;
 
501
               }
 
502
               {
 
503
                    E Tz, T1E, TE, T1F, T1D, T1G;
 
504
                    {
 
505
                         E Tw, Ty, Tv, Tx;
 
506
                         Tw = cr[WS(rs, 14)];
 
507
                         Ty = ci[WS(rs, 14)];
 
508
                         Tv = W[26];
 
509
                         Tx = W[27];
 
510
                         Tz = FMA(Tv, Tw, Tx * Ty);
 
511
                         T1E = FNMS(Tx, Tw, Tv * Ty);
 
512
                    }
 
513
                    {
 
514
                         E TB, TD, TA, TC;
 
515
                         TB = cr[WS(rs, 6)];
 
516
                         TD = ci[WS(rs, 6)];
 
517
                         TA = W[10];
 
518
                         TC = W[11];
 
519
                         TE = FMA(TA, TB, TC * TD);
 
520
                         T1F = FNMS(TC, TB, TA * TD);
 
521
                    }
 
522
                    TF = Tz + TE;
 
523
                    T2s = T1E + T1F;
 
524
                    T1D = Tz - TE;
 
525
                    T1G = T1E - T1F;
 
526
                    T1H = T1D + T1G;
 
527
                    T2d = T1D - T1G;
 
528
               }
 
529
               {
 
530
                    E T19, T1V, T1p, T22, T1e, T1W, T1k, T21;
 
531
                    {
 
532
                         E T16, T18, T15, T17;
 
533
                         T16 = cr[WS(rs, 15)];
 
534
                         T18 = ci[WS(rs, 15)];
 
535
                         T15 = W[28];
 
536
                         T17 = W[29];
 
537
                         T19 = FMA(T15, T16, T17 * T18);
 
538
                         T1V = FNMS(T17, T16, T15 * T18);
 
539
                    }
 
540
                    {
 
541
                         E T1m, T1o, T1l, T1n;
 
542
                         T1m = cr[WS(rs, 11)];
 
543
                         T1o = ci[WS(rs, 11)];
 
544
                         T1l = W[20];
 
545
                         T1n = W[21];
 
546
                         T1p = FMA(T1l, T1m, T1n * T1o);
 
547
                         T22 = FNMS(T1n, T1m, T1l * T1o);
 
548
                    }
 
549
                    {
 
550
                         E T1b, T1d, T1a, T1c;
 
551
                         T1b = cr[WS(rs, 7)];
 
552
                         T1d = ci[WS(rs, 7)];
 
553
                         T1a = W[12];
 
554
                         T1c = W[13];
 
555
                         T1e = FMA(T1a, T1b, T1c * T1d);
 
556
                         T1W = FNMS(T1c, T1b, T1a * T1d);
 
557
                    }
 
558
                    {
 
559
                         E T1h, T1j, T1g, T1i;
 
560
                         T1h = cr[WS(rs, 3)];
 
561
                         T1j = ci[WS(rs, 3)];
 
562
                         T1g = W[4];
 
563
                         T1i = W[5];
 
564
                         T1k = FMA(T1g, T1h, T1i * T1j);
 
565
                         T21 = FNMS(T1i, T1h, T1g * T1j);
 
566
                    }
 
567
                    T1f = T19 + T1e;
 
568
                    T1q = T1k + T1p;
 
569
                    T2B = T1f - T1q;
 
570
                    T2C = T1V + T1W;
 
571
                    T2D = T21 + T22;
 
572
                    T2E = T2C - T2D;
 
573
                    {
 
574
                         E T1X, T1Y, T20, T23;
 
575
                         T1X = T1V - T1W;
 
576
                         T1Y = T1k - T1p;
 
577
                         T1Z = T1X + T1Y;
 
578
                         T2k = T1X - T1Y;
 
579
                         T20 = T19 - T1e;
 
580
                         T23 = T21 - T22;
 
581
                         T24 = T20 - T23;
 
582
                         T2j = T20 + T23;
 
583
                    }
 
584
               }
 
585
               {
 
586
                    E TM, T1P, T12, T1M, TR, T1Q, TX, T1L;
 
587
                    {
 
588
                         E TJ, TL, TI, TK;
 
589
                         TJ = cr[WS(rs, 1)];
 
590
                         TL = ci[WS(rs, 1)];
 
591
                         TI = W[0];
 
592
                         TK = W[1];
 
593
                         TM = FMA(TI, TJ, TK * TL);
 
594
                         T1P = FNMS(TK, TJ, TI * TL);
 
595
                    }
 
596
                    {
 
597
                         E TZ, T11, TY, T10;
 
598
                         TZ = cr[WS(rs, 13)];
 
599
                         T11 = ci[WS(rs, 13)];
 
600
                         TY = W[24];
 
601
                         T10 = W[25];
 
602
                         T12 = FMA(TY, TZ, T10 * T11);
 
603
                         T1M = FNMS(T10, TZ, TY * T11);
 
604
                    }
 
605
                    {
 
606
                         E TO, TQ, TN, TP;
 
607
                         TO = cr[WS(rs, 9)];
 
608
                         TQ = ci[WS(rs, 9)];
 
609
                         TN = W[16];
 
610
                         TP = W[17];
 
611
                         TR = FMA(TN, TO, TP * TQ);
 
612
                         T1Q = FNMS(TP, TO, TN * TQ);
 
613
                    }
 
614
                    {
 
615
                         E TU, TW, TT, TV;
 
616
                         TU = cr[WS(rs, 5)];
 
617
                         TW = ci[WS(rs, 5)];
 
618
                         TT = W[8];
 
619
                         TV = W[9];
 
620
                         TX = FMA(TT, TU, TV * TW);
 
621
                         T1L = FNMS(TV, TU, TT * TW);
 
622
                    }
 
623
                    TS = TM + TR;
 
624
                    T13 = TX + T12;
 
625
                    T2w = TS - T13;
 
626
                    T2x = T1P + T1Q;
 
627
                    T2y = T1L + T1M;
 
628
                    T2z = T2x - T2y;
 
629
                    {
 
630
                         E T1K, T1N, T1R, T1S;
 
631
                         T1K = TM - TR;
 
632
                         T1N = T1L - T1M;
 
633
                         T1O = T1K - T1N;
 
634
                         T2h = T1K + T1N;
 
635
                         T1R = T1P - T1Q;
 
636
                         T1S = TX - T12;
 
637
                         T1T = T1R + T1S;
 
638
                         T2g = T1R - T1S;
 
639
                    }
 
640
               }
 
641
               {
 
642
                    E T1J, T27, T3a, T3c, T26, T3b, T2a, T35;
 
643
                    {
 
644
                         E T1x, T1I, T36, T39;
 
645
                         T1x = T1t - T1w;
 
646
                         T1I = KP707106781 * (T1C + T1H);
 
647
                         T1J = T1x + T1I;
 
648
                         T27 = T1x - T1I;
 
649
                         T36 = KP707106781 * (T2c - T2d);
 
650
                         T39 = T37 + T38;
 
651
                         T3a = T36 + T39;
 
652
                         T3c = T39 - T36;
 
653
                    }
 
654
                    {
 
655
                         E T1U, T25, T28, T29;
 
656
                         T1U = FNMS(KP382683432, T1T, KP923879532 * T1O);
 
657
                         T25 = FMA(KP382683432, T1Z, KP923879532 * T24);
 
658
                         T26 = T1U + T25;
 
659
                         T3b = T25 - T1U;
 
660
                         T28 = FMA(KP923879532, T1T, KP382683432 * T1O);
 
661
                         T29 = FNMS(KP923879532, T1Z, KP382683432 * T24);
 
662
                         T2a = T28 + T29;
 
663
                         T35 = T29 - T28;
 
664
                    }
 
665
                    cr[WS(rs, 7)] = T1J - T26;
 
666
                    cr[WS(rs, 11)] = T3b - T3c;
 
667
                    ci[WS(rs, 12)] = T3b + T3c;
 
668
                    ci[0] = T1J + T26;
 
669
                    ci[WS(rs, 4)] = T27 - T2a;
 
670
                    cr[WS(rs, 15)] = T35 - T3a;
 
671
                    ci[WS(rs, 8)] = T35 + T3a;
 
672
                    cr[WS(rs, 3)] = T27 + T2a;
 
673
               }
 
674
               {
 
675
                    E TH, T2L, T2W, T2Y, T1s, T2X, T2O, T2P;
 
676
                    {
 
677
                         E Tj, TG, T2Q, T2V;
 
678
                         Tj = T7 + Ti;
 
679
                         TG = Tu + TF;
 
680
                         TH = Tj + TG;
 
681
                         T2L = Tj - TG;
 
682
                         T2Q = T2t + T2s;
 
683
                         T2V = T2R + T2U;
 
684
                         T2W = T2Q + T2V;
 
685
                         T2Y = T2V - T2Q;
 
686
                    }
 
687
                    {
 
688
                         E T14, T1r, T2M, T2N;
 
689
                         T14 = TS + T13;
 
690
                         T1r = T1f + T1q;
 
691
                         T1s = T14 + T1r;
 
692
                         T2X = T1r - T14;
 
693
                         T2M = T2C + T2D;
 
694
                         T2N = T2x + T2y;
 
695
                         T2O = T2M - T2N;
 
696
                         T2P = T2N + T2M;
 
697
                    }
 
698
                    ci[WS(rs, 7)] = TH - T1s;
 
699
                    cr[WS(rs, 12)] = T2X - T2Y;
 
700
                    ci[WS(rs, 11)] = T2X + T2Y;
 
701
                    cr[0] = TH + T1s;
 
702
                    cr[WS(rs, 4)] = T2L - T2O;
 
703
                    cr[WS(rs, 8)] = T2P - T2W;
 
704
                    ci[WS(rs, 15)] = T2P + T2W;
 
705
                    ci[WS(rs, 3)] = T2L + T2O;
 
706
               }
 
707
               {
 
708
                    E T2f, T2n, T3g, T3i, T2m, T3h, T2q, T3d;
 
709
                    {
 
710
                         E T2b, T2e, T3e, T3f;
 
711
                         T2b = T1t + T1w;
 
712
                         T2e = KP707106781 * (T2c + T2d);
 
713
                         T2f = T2b + T2e;
 
714
                         T2n = T2b - T2e;
 
715
                         T3e = KP707106781 * (T1H - T1C);
 
716
                         T3f = T38 - T37;
 
717
                         T3g = T3e + T3f;
 
718
                         T3i = T3f - T3e;
 
719
                    }
 
720
                    {
 
721
                         E T2i, T2l, T2o, T2p;
 
722
                         T2i = FMA(KP382683432, T2g, KP923879532 * T2h);
 
723
                         T2l = FNMS(KP382683432, T2k, KP923879532 * T2j);
 
724
                         T2m = T2i + T2l;
 
725
                         T3h = T2l - T2i;
 
726
                         T2o = FNMS(KP923879532, T2g, KP382683432 * T2h);
 
727
                         T2p = FMA(KP923879532, T2k, KP382683432 * T2j);
 
728
                         T2q = T2o + T2p;
 
729
                         T3d = T2p - T2o;
 
730
                    }
 
731
                    ci[WS(rs, 6)] = T2f - T2m;
 
732
                    cr[WS(rs, 13)] = T3h - T3i;
 
733
                    ci[WS(rs, 10)] = T3h + T3i;
 
734
                    cr[WS(rs, 1)] = T2f + T2m;
 
735
                    cr[WS(rs, 5)] = T2n - T2q;
 
736
                    cr[WS(rs, 9)] = T3d - T3g;
 
737
                    ci[WS(rs, 14)] = T3d + T3g;
 
738
                    ci[WS(rs, 2)] = T2n + T2q;
 
739
               }
 
740
               {
 
741
                    E T2v, T2H, T32, T34, T2G, T2Z, T2K, T33;
 
742
                    {
 
743
                         E T2r, T2u, T30, T31;
 
744
                         T2r = T7 - Ti;
 
745
                         T2u = T2s - T2t;
 
746
                         T2v = T2r - T2u;
 
747
                         T2H = T2r + T2u;
 
748
                         T30 = Tu - TF;
 
749
                         T31 = T2U - T2R;
 
750
                         T32 = T30 + T31;
 
751
                         T34 = T31 - T30;
 
752
                    }
 
753
                    {
 
754
                         E T2A, T2F, T2I, T2J;
 
755
                         T2A = T2w + T2z;
 
756
                         T2F = T2B - T2E;
 
757
                         T2G = KP707106781 * (T2A + T2F);
 
758
                         T2Z = KP707106781 * (T2F - T2A);
 
759
                         T2I = T2w - T2z;
 
760
                         T2J = T2B + T2E;
 
761
                         T2K = KP707106781 * (T2I + T2J);
 
762
                         T33 = KP707106781 * (T2J - T2I);
 
763
                    }
 
764
                    ci[WS(rs, 5)] = T2v - T2G;
 
765
                    cr[WS(rs, 10)] = T33 - T34;
 
766
                    ci[WS(rs, 13)] = T33 + T34;
 
767
                    cr[WS(rs, 2)] = T2v + T2G;
 
768
                    cr[WS(rs, 6)] = T2H - T2K;
 
769
                    cr[WS(rs, 14)] = T2Z - T32;
 
770
                    ci[WS(rs, 9)] = T2Z + T32;
 
771
                    ci[WS(rs, 1)] = T2H + T2K;
 
772
               }
769
773
          }
770
774
     }
771
775
}