39
39
DK(KP923879532, +0.923879532511286756128183189396788286822416626);
40
40
DK(KP414213562, +0.414213562373095048801688724209698078569671875);
41
41
DK(KP707106781, +0.707106781186547524400844362104849039284835938);
43
for (m = mb, W = W + ((mb - 1) * 30); m < me; m = m + 1, cr = cr + ms, ci = ci - ms, W = W + 30, MAKE_VOLATILE_STRIDE(rs)) {
46
E T3A, T3o, T8, T1I, T2w, T35, T2k, T1s, T2p, T36, T2r, T1F, T3k, T1N, T3z;
47
E Tl, T1U, T2W, T1P, Tz, T2g, T30, T25, T11, TB, TE, T2a, T31, T2h, T1e;
48
E TC, T1X, TH, TK, TG, TD, TJ;
50
E Ta, Td, Tb, T1J, Tg, Tj, Tf, Tc, Ti;
52
E T1h, T1k, T1n, T2s, T1i, T1q, T1m, T1j, T1p;
54
E T1, T3n, T3, T6, T2, T5;
62
E T3l, T4, T1g, T3m, T7;
69
T3m = FNMS(T5, T3, T3l);
84
E T1u, T1x, T1v, T2l, T1A, T1D, T1z, T1w, T1C;
86
E T2t, T1l, T2v, T1r, T2u, T1o, T1t;
90
T2t = FNMS(T1j, T1h, T2s);
91
T1l = FMA(T1j, T1k, T1i);
92
T2v = FNMS(T1p, T1n, T2u);
93
T1r = FMA(T1p, T1q, T1o);
103
T1A = cr[WS(rs, 11)];
104
T1D = ci[WS(rs, 11)];
109
E T2m, T1y, T2o, T1E, T2n, T1B, T9;
113
T2m = FNMS(T1w, T1u, T2l);
114
T1y = FMA(T1w, T1x, T1v);
115
T2o = FNMS(T1C, T1A, T2n);
116
T1E = FMA(T1C, T1D, T1B);
134
E TQ, TT, TR, T2c, TW, TZ, TV, TS, TY;
136
E To, Tr, Tp, T1Q, Tu, Tx, Tt, Tq, Tw;
138
E T1K, Te, T1M, Tk, T1L, Th, Tn;
142
T1K = FNMS(Tc, Ta, T1J);
143
Te = FMA(Tc, Td, Tb);
144
T1M = FNMS(Ti, Tg, T1L);
145
Tk = FMA(Ti, Tj, Th);
161
E T1R, Ts, T1T, Ty, T1S, Tv, TP;
165
T1R = FNMS(Tq, To, T1Q);
166
Ts = FMA(Tq, Tr, Tp);
167
T1T = FNMS(Tw, Tu, T1S);
168
Ty = FMA(Tw, Tx, Tv);
185
E T13, T16, T14, T26, T19, T1c, T18, T15, T1b;
187
E T2d, TU, T2f, T10, T2e, TX, T12;
191
T2d = FNMS(TS, TQ, T2c);
192
TU = FMA(TS, TT, TR);
193
T2f = FNMS(TY, TW, T2e);
194
T10 = FMA(TY, TZ, TX);
204
T19 = cr[WS(rs, 13)];
205
T1c = ci[WS(rs, 13)];
210
E T27, T17, T29, T1d, T28, T1a, TA;
214
T27 = FNMS(T15, T13, T26);
215
T17 = FMA(T15, T16, T14);
216
T29 = FNMS(T1b, T19, T28);
217
T1d = FMA(T1b, T1c, T1a);
236
E T2U, T3u, T2Z, T21, T1W, T34, T2X, T37, T3t, T3q, T3e, T32, T3i, T3h;
238
E T3f, T3r, T1H, T3s, TO, T3g;
240
E Tm, T1Y, TF, T20, TL, T3p, T1Z, TI;
245
T1Y = FNMS(TD, TB, T1X);
246
TF = FMA(TD, TE, TC);
247
T20 = FNMS(TJ, TH, T1Z);
248
TL = FMA(TJ, TK, TI);
252
E T1f, TM, T1G, T3j, T2V, TN;
277
cr[WS(rs, 12)] = T3r - T3s;
278
ci[WS(rs, 11)] = T3r + T3s;
279
ci[WS(rs, 7)] = TO - T1H;
285
E T3a, T2Y, T3x, T3v;
286
ci[WS(rs, 15)] = T3i + T3q;
287
cr[WS(rs, 8)] = T3i - T3q;
288
ci[WS(rs, 3)] = T3e + T3h;
289
cr[WS(rs, 4)] = T3e - T3h;
295
E T2E, T1O, T3B, T3H, T2q, T2x, T3I, T23, T2R, T2O, T2J, T2K, T3C, T2H, T2B;
298
E T2F, T1V, T22, T2G;
300
E T3b, T33, T3c, T38;
310
E T3d, T3y, T3w, T39;
315
ci[WS(rs, 1)] = FMA(KP707106781, T3d, T3a);
316
cr[WS(rs, 6)] = FNMS(KP707106781, T3d, T3a);
317
ci[WS(rs, 13)] = FMA(KP707106781, T3y, T3x);
318
cr[WS(rs, 10)] = FMS(KP707106781, T3y, T3x);
319
ci[WS(rs, 9)] = FMA(KP707106781, T3w, T3v);
320
cr[WS(rs, 14)] = FMS(KP707106781, T3w, T3v);
321
cr[WS(rs, 2)] = FMA(KP707106781, T39, T2Y);
322
ci[WS(rs, 5)] = FNMS(KP707106781, T39, T2Y);
330
E T2M, T2N, T2b, T2i;
337
T2R = FMA(KP414213562, T2M, T2N);
338
T2O = FNMS(KP414213562, T2N, T2M);
345
T2B = FMA(KP414213562, T2b, T2i);
346
T2j = FNMS(KP414213562, T2i, T2b);
350
E T2A, T3G, T2P, T2D, T3E, T3F, T3D, T2I;
352
E T24, T2L, T2C, T2y, T3J, T3L, T3K, T2S, T2z, T3M;
353
T2A = FNMS(KP707106781, T23, T1O);
354
T24 = FMA(KP707106781, T23, T1O);
355
T2S = FNMS(KP414213562, T2J, T2K);
356
T2L = FMA(KP414213562, T2K, T2J);
357
T2C = FMA(KP414213562, T2q, T2x);
358
T2y = FNMS(KP414213562, T2x, T2q);
359
T3J = FMA(KP707106781, T3I, T3H);
360
T3L = FNMS(KP707106781, T3I, T3H);
367
ci[WS(rs, 14)] = FMA(KP923879532, T3K, T3J);
368
cr[WS(rs, 9)] = FMS(KP923879532, T3K, T3J);
369
ci[0] = FMA(KP923879532, T2z, T24);
370
cr[WS(rs, 7)] = FNMS(KP923879532, T2z, T24);
371
cr[WS(rs, 13)] = FMS(KP923879532, T3M, T3L);
372
ci[WS(rs, 10)] = FMA(KP923879532, T3M, T3L);
376
T2Q = FNMS(KP707106781, T2H, T2E);
377
T2I = FMA(KP707106781, T2H, T2E);
378
T3F = FNMS(KP707106781, T3C, T3B);
379
T3D = FMA(KP707106781, T3C, T3B);
380
cr[WS(rs, 3)] = FMA(KP923879532, T2D, T2A);
381
ci[WS(rs, 4)] = FNMS(KP923879532, T2D, T2A);
382
cr[WS(rs, 1)] = FMA(KP923879532, T2P, T2I);
383
ci[WS(rs, 6)] = FNMS(KP923879532, T2P, T2I);
384
ci[WS(rs, 8)] = FMA(KP923879532, T3E, T3D);
385
cr[WS(rs, 15)] = FMS(KP923879532, T3E, T3D);
386
ci[WS(rs, 12)] = FMA(KP923879532, T3G, T3F);
387
cr[WS(rs, 11)] = FMS(KP923879532, T3G, T3F);
44
for (m = mb, W = W + ((mb - 1) * 30); m < me; m = m + 1, cr = cr + ms, ci = ci - ms, W = W + 30, MAKE_VOLATILE_STRIDE(rs)) {
47
E T3A, T3o, T8, T1I, T2w, T35, T2k, T1s, T2p, T36, T2r, T1F, T3k, T1N, T3z;
48
E Tl, T1U, T2W, T1P, Tz, T2g, T30, T25, T11, TB, TE, T2a, T31, T2h, T1e;
49
E TC, T1X, TH, TK, TG, TD, TJ;
51
E Ta, Td, Tb, T1J, Tg, Tj, Tf, Tc, Ti;
53
E T1h, T1k, T1n, T2s, T1i, T1q, T1m, T1j, T1p;
55
E T1, T3n, T3, T6, T2, T5;
63
E T3l, T4, T1g, T3m, T7;
70
T3m = FNMS(T5, T3, T3l);
85
E T1u, T1x, T1v, T2l, T1A, T1D, T1z, T1w, T1C;
87
E T2t, T1l, T2v, T1r, T2u, T1o, T1t;
91
T2t = FNMS(T1j, T1h, T2s);
92
T1l = FMA(T1j, T1k, T1i);
93
T2v = FNMS(T1p, T1n, T2u);
94
T1r = FMA(T1p, T1q, T1o);
104
T1A = cr[WS(rs, 11)];
105
T1D = ci[WS(rs, 11)];
110
E T2m, T1y, T2o, T1E, T2n, T1B, T9;
114
T2m = FNMS(T1w, T1u, T2l);
115
T1y = FMA(T1w, T1x, T1v);
116
T2o = FNMS(T1C, T1A, T2n);
117
T1E = FMA(T1C, T1D, T1B);
135
E TQ, TT, TR, T2c, TW, TZ, TV, TS, TY;
137
E To, Tr, Tp, T1Q, Tu, Tx, Tt, Tq, Tw;
139
E T1K, Te, T1M, Tk, T1L, Th, Tn;
143
T1K = FNMS(Tc, Ta, T1J);
144
Te = FMA(Tc, Td, Tb);
145
T1M = FNMS(Ti, Tg, T1L);
146
Tk = FMA(Ti, Tj, Th);
162
E T1R, Ts, T1T, Ty, T1S, Tv, TP;
166
T1R = FNMS(Tq, To, T1Q);
167
Ts = FMA(Tq, Tr, Tp);
168
T1T = FNMS(Tw, Tu, T1S);
169
Ty = FMA(Tw, Tx, Tv);
186
E T13, T16, T14, T26, T19, T1c, T18, T15, T1b;
188
E T2d, TU, T2f, T10, T2e, TX, T12;
192
T2d = FNMS(TS, TQ, T2c);
193
TU = FMA(TS, TT, TR);
194
T2f = FNMS(TY, TW, T2e);
195
T10 = FMA(TY, TZ, TX);
205
T19 = cr[WS(rs, 13)];
206
T1c = ci[WS(rs, 13)];
211
E T27, T17, T29, T1d, T28, T1a, TA;
215
T27 = FNMS(T15, T13, T26);
216
T17 = FMA(T15, T16, T14);
217
T29 = FNMS(T1b, T19, T28);
218
T1d = FMA(T1b, T1c, T1a);
237
E T2U, T3u, T2Z, T21, T1W, T34, T2X, T37, T3t, T3q, T3e, T32, T3i, T3h;
239
E T3f, T3r, T1H, T3s, TO, T3g;
241
E Tm, T1Y, TF, T20, TL, T3p, T1Z, TI;
246
T1Y = FNMS(TD, TB, T1X);
247
TF = FMA(TD, TE, TC);
248
T20 = FNMS(TJ, TH, T1Z);
249
TL = FMA(TJ, TK, TI);
253
E T1f, TM, T1G, T3j, T2V, TN;
278
cr[WS(rs, 12)] = T3r - T3s;
279
ci[WS(rs, 11)] = T3r + T3s;
280
ci[WS(rs, 7)] = TO - T1H;
286
E T3a, T2Y, T3x, T3v;
287
ci[WS(rs, 15)] = T3i + T3q;
288
cr[WS(rs, 8)] = T3i - T3q;
289
ci[WS(rs, 3)] = T3e + T3h;
290
cr[WS(rs, 4)] = T3e - T3h;
296
E T2E, T1O, T3B, T3H, T2q, T2x, T3I, T23, T2R, T2O, T2J, T2K, T3C, T2H, T2B;
299
E T2F, T1V, T22, T2G;
301
E T3b, T33, T3c, T38;
311
E T3d, T3y, T3w, T39;
316
ci[WS(rs, 1)] = FMA(KP707106781, T3d, T3a);
317
cr[WS(rs, 6)] = FNMS(KP707106781, T3d, T3a);
318
ci[WS(rs, 13)] = FMA(KP707106781, T3y, T3x);
319
cr[WS(rs, 10)] = FMS(KP707106781, T3y, T3x);
320
ci[WS(rs, 9)] = FMA(KP707106781, T3w, T3v);
321
cr[WS(rs, 14)] = FMS(KP707106781, T3w, T3v);
322
cr[WS(rs, 2)] = FMA(KP707106781, T39, T2Y);
323
ci[WS(rs, 5)] = FNMS(KP707106781, T39, T2Y);
331
E T2M, T2N, T2b, T2i;
338
T2R = FMA(KP414213562, T2M, T2N);
339
T2O = FNMS(KP414213562, T2N, T2M);
346
T2B = FMA(KP414213562, T2b, T2i);
347
T2j = FNMS(KP414213562, T2i, T2b);
351
E T2A, T3G, T2P, T2D, T3E, T3F, T3D, T2I;
353
E T24, T2L, T2C, T2y, T3J, T3L, T3K, T2S, T2z, T3M;
354
T2A = FNMS(KP707106781, T23, T1O);
355
T24 = FMA(KP707106781, T23, T1O);
356
T2S = FNMS(KP414213562, T2J, T2K);
357
T2L = FMA(KP414213562, T2K, T2J);
358
T2C = FMA(KP414213562, T2q, T2x);
359
T2y = FNMS(KP414213562, T2x, T2q);
360
T3J = FMA(KP707106781, T3I, T3H);
361
T3L = FNMS(KP707106781, T3I, T3H);
368
ci[WS(rs, 14)] = FMA(KP923879532, T3K, T3J);
369
cr[WS(rs, 9)] = FMS(KP923879532, T3K, T3J);
370
ci[0] = FMA(KP923879532, T2z, T24);
371
cr[WS(rs, 7)] = FNMS(KP923879532, T2z, T24);
372
cr[WS(rs, 13)] = FMS(KP923879532, T3M, T3L);
373
ci[WS(rs, 10)] = FMA(KP923879532, T3M, T3L);
377
T2Q = FNMS(KP707106781, T2H, T2E);
378
T2I = FMA(KP707106781, T2H, T2E);
379
T3F = FNMS(KP707106781, T3C, T3B);
380
T3D = FMA(KP707106781, T3C, T3B);
381
cr[WS(rs, 3)] = FMA(KP923879532, T2D, T2A);
382
ci[WS(rs, 4)] = FNMS(KP923879532, T2D, T2A);
383
cr[WS(rs, 1)] = FMA(KP923879532, T2P, T2I);
384
ci[WS(rs, 6)] = FNMS(KP923879532, T2P, T2I);
385
ci[WS(rs, 8)] = FMA(KP923879532, T3E, T3D);
386
cr[WS(rs, 15)] = FMS(KP923879532, T3E, T3D);
387
ci[WS(rs, 12)] = FMA(KP923879532, T3G, T3F);
388
cr[WS(rs, 11)] = FMS(KP923879532, T3G, T3F);
394
ci[WS(rs, 2)] = FMA(KP923879532, T2T, T2Q);
395
cr[WS(rs, 5)] = FNMS(KP923879532, T2T, T2Q);
393
ci[WS(rs, 2)] = FMA(KP923879532, T2T, T2Q);
394
cr[WS(rs, 5)] = FNMS(KP923879532, T2T, T2Q);
421
423
DK(KP382683432, +0.382683432365089771728459984030398866761344562);
422
424
DK(KP923879532, +0.923879532511286756128183189396788286822416626);
423
425
DK(KP707106781, +0.707106781186547524400844362104849039284835938);
425
for (m = mb, W = W + ((mb - 1) * 30); m < me; m = m + 1, cr = cr + ms, ci = ci - ms, W = W + 30, MAKE_VOLATILE_STRIDE(rs)) {
426
E T7, T38, T1t, T2U, Ti, T37, T1w, T2R, Tu, T2t, T1C, T2c, TF, T2s, T1H;
427
E T2d, T1f, T1q, T2B, T2C, T2D, T2E, T1Z, T2k, T24, T2j, TS, T13, T2w, T2x;
428
E T2y, T2z, T1O, T2h, T1T, T2g;
439
T6 = FMA(T2, T3, T4 * T5);
440
T2S = FNMS(T4, T3, T2 * T5);
455
Tc = FMA(T8, T9, Ta * Tb);
456
T1u = FNMS(Ta, T9, T8 * Tb);
464
Th = FMA(Td, Te, Tf * Tg);
465
T1v = FNMS(Tf, Te, Td * Tg);
473
E To, T1z, Tt, T1A, T1y, T1B;
480
To = FMA(Tk, Tl, Tm * Tn);
481
T1z = FNMS(Tm, Tl, Tk * Tn);
489
Tt = FMA(Tp, Tq, Tr * Ts);
490
T1A = FNMS(Tr, Tq, Tp * Ts);
500
E Tz, T1E, TE, T1F, T1D, T1G;
507
Tz = FMA(Tv, Tw, Tx * Ty);
508
T1E = FNMS(Tx, Tw, Tv * Ty);
516
TE = FMA(TA, TB, TC * TD);
517
T1F = FNMS(TC, TB, TA * TD);
527
E T19, T1V, T1p, T22, T1e, T1W, T1k, T21;
529
E T16, T18, T15, T17;
530
T16 = cr[WS(rs, 15)];
531
T18 = ci[WS(rs, 15)];
534
T19 = FMA(T15, T16, T17 * T18);
535
T1V = FNMS(T17, T16, T15 * T18);
538
E T1m, T1o, T1l, T1n;
539
T1m = cr[WS(rs, 11)];
540
T1o = ci[WS(rs, 11)];
543
T1p = FMA(T1l, T1m, T1n * T1o);
544
T22 = FNMS(T1n, T1m, T1l * T1o);
547
E T1b, T1d, T1a, T1c;
552
T1e = FMA(T1a, T1b, T1c * T1d);
553
T1W = FNMS(T1c, T1b, T1a * T1d);
556
E T1h, T1j, T1g, T1i;
561
T1k = FMA(T1g, T1h, T1i * T1j);
562
T21 = FNMS(T1i, T1h, T1g * T1j);
571
E T1X, T1Y, T20, T23;
583
E TM, T1P, T12, T1M, TR, T1Q, TX, T1L;
590
TM = FMA(TI, TJ, TK * TL);
591
T1P = FNMS(TK, TJ, TI * TL);
596
T11 = ci[WS(rs, 13)];
599
T12 = FMA(TY, TZ, T10 * T11);
600
T1M = FNMS(T10, TZ, TY * T11);
608
TR = FMA(TN, TO, TP * TQ);
609
T1Q = FNMS(TP, TO, TN * TQ);
617
TX = FMA(TT, TU, TV * TW);
618
T1L = FNMS(TV, TU, TT * TW);
627
E T1K, T1N, T1R, T1S;
639
E T1J, T27, T3a, T3c, T26, T3b, T2a, T35;
641
E T1x, T1I, T36, T39;
643
T1I = KP707106781 * (T1C + T1H);
646
T36 = KP707106781 * (T2c - T2d);
652
E T1U, T25, T28, T29;
653
T1U = FNMS(KP382683432, T1T, KP923879532 * T1O);
654
T25 = FMA(KP382683432, T1Z, KP923879532 * T24);
657
T28 = FMA(KP923879532, T1T, KP382683432 * T1O);
658
T29 = FNMS(KP923879532, T1Z, KP382683432 * T24);
662
cr[WS(rs, 7)] = T1J - T26;
663
cr[WS(rs, 11)] = T3b - T3c;
664
ci[WS(rs, 12)] = T3b + T3c;
666
ci[WS(rs, 4)] = T27 - T2a;
667
cr[WS(rs, 15)] = T35 - T3a;
668
ci[WS(rs, 8)] = T35 + T3a;
669
cr[WS(rs, 3)] = T27 + T2a;
672
E TH, T2L, T2W, T2Y, T1s, T2X, T2O, T2P;
685
E T14, T1r, T2M, T2N;
695
ci[WS(rs, 7)] = TH - T1s;
696
cr[WS(rs, 12)] = T2X - T2Y;
697
ci[WS(rs, 11)] = T2X + T2Y;
699
cr[WS(rs, 4)] = T2L - T2O;
700
cr[WS(rs, 8)] = T2P - T2W;
701
ci[WS(rs, 15)] = T2P + T2W;
702
ci[WS(rs, 3)] = T2L + T2O;
705
E T2f, T2n, T3g, T3i, T2m, T3h, T2q, T3d;
707
E T2b, T2e, T3e, T3f;
709
T2e = KP707106781 * (T2c + T2d);
712
T3e = KP707106781 * (T1H - T1C);
718
E T2i, T2l, T2o, T2p;
719
T2i = FMA(KP382683432, T2g, KP923879532 * T2h);
720
T2l = FNMS(KP382683432, T2k, KP923879532 * T2j);
723
T2o = FNMS(KP923879532, T2g, KP382683432 * T2h);
724
T2p = FMA(KP923879532, T2k, KP382683432 * T2j);
728
ci[WS(rs, 6)] = T2f - T2m;
729
cr[WS(rs, 13)] = T3h - T3i;
730
ci[WS(rs, 10)] = T3h + T3i;
731
cr[WS(rs, 1)] = T2f + T2m;
732
cr[WS(rs, 5)] = T2n - T2q;
733
cr[WS(rs, 9)] = T3d - T3g;
734
ci[WS(rs, 14)] = T3d + T3g;
735
ci[WS(rs, 2)] = T2n + T2q;
738
E T2v, T2H, T32, T34, T2G, T2Z, T2K, T33;
740
E T2r, T2u, T30, T31;
751
E T2A, T2F, T2I, T2J;
754
T2G = KP707106781 * (T2A + T2F);
755
T2Z = KP707106781 * (T2F - T2A);
758
T2K = KP707106781 * (T2I + T2J);
759
T33 = KP707106781 * (T2J - T2I);
761
ci[WS(rs, 5)] = T2v - T2G;
762
cr[WS(rs, 10)] = T33 - T34;
763
ci[WS(rs, 13)] = T33 + T34;
764
cr[WS(rs, 2)] = T2v + T2G;
765
cr[WS(rs, 6)] = T2H - T2K;
766
cr[WS(rs, 14)] = T2Z - T32;
767
ci[WS(rs, 9)] = T2Z + T32;
768
ci[WS(rs, 1)] = T2H + T2K;
428
for (m = mb, W = W + ((mb - 1) * 30); m < me; m = m + 1, cr = cr + ms, ci = ci - ms, W = W + 30, MAKE_VOLATILE_STRIDE(rs)) {
429
E T7, T38, T1t, T2U, Ti, T37, T1w, T2R, Tu, T2t, T1C, T2c, TF, T2s, T1H;
430
E T2d, T1f, T1q, T2B, T2C, T2D, T2E, T1Z, T2k, T24, T2j, TS, T13, T2w, T2x;
431
E T2y, T2z, T1O, T2h, T1T, T2g;
442
T6 = FMA(T2, T3, T4 * T5);
443
T2S = FNMS(T4, T3, T2 * T5);
458
Tc = FMA(T8, T9, Ta * Tb);
459
T1u = FNMS(Ta, T9, T8 * Tb);
467
Th = FMA(Td, Te, Tf * Tg);
468
T1v = FNMS(Tf, Te, Td * Tg);
476
E To, T1z, Tt, T1A, T1y, T1B;
483
To = FMA(Tk, Tl, Tm * Tn);
484
T1z = FNMS(Tm, Tl, Tk * Tn);
492
Tt = FMA(Tp, Tq, Tr * Ts);
493
T1A = FNMS(Tr, Tq, Tp * Ts);
503
E Tz, T1E, TE, T1F, T1D, T1G;
510
Tz = FMA(Tv, Tw, Tx * Ty);
511
T1E = FNMS(Tx, Tw, Tv * Ty);
519
TE = FMA(TA, TB, TC * TD);
520
T1F = FNMS(TC, TB, TA * TD);
530
E T19, T1V, T1p, T22, T1e, T1W, T1k, T21;
532
E T16, T18, T15, T17;
533
T16 = cr[WS(rs, 15)];
534
T18 = ci[WS(rs, 15)];
537
T19 = FMA(T15, T16, T17 * T18);
538
T1V = FNMS(T17, T16, T15 * T18);
541
E T1m, T1o, T1l, T1n;
542
T1m = cr[WS(rs, 11)];
543
T1o = ci[WS(rs, 11)];
546
T1p = FMA(T1l, T1m, T1n * T1o);
547
T22 = FNMS(T1n, T1m, T1l * T1o);
550
E T1b, T1d, T1a, T1c;
555
T1e = FMA(T1a, T1b, T1c * T1d);
556
T1W = FNMS(T1c, T1b, T1a * T1d);
559
E T1h, T1j, T1g, T1i;
564
T1k = FMA(T1g, T1h, T1i * T1j);
565
T21 = FNMS(T1i, T1h, T1g * T1j);
574
E T1X, T1Y, T20, T23;
586
E TM, T1P, T12, T1M, TR, T1Q, TX, T1L;
593
TM = FMA(TI, TJ, TK * TL);
594
T1P = FNMS(TK, TJ, TI * TL);
599
T11 = ci[WS(rs, 13)];
602
T12 = FMA(TY, TZ, T10 * T11);
603
T1M = FNMS(T10, TZ, TY * T11);
611
TR = FMA(TN, TO, TP * TQ);
612
T1Q = FNMS(TP, TO, TN * TQ);
620
TX = FMA(TT, TU, TV * TW);
621
T1L = FNMS(TV, TU, TT * TW);
630
E T1K, T1N, T1R, T1S;
642
E T1J, T27, T3a, T3c, T26, T3b, T2a, T35;
644
E T1x, T1I, T36, T39;
646
T1I = KP707106781 * (T1C + T1H);
649
T36 = KP707106781 * (T2c - T2d);
655
E T1U, T25, T28, T29;
656
T1U = FNMS(KP382683432, T1T, KP923879532 * T1O);
657
T25 = FMA(KP382683432, T1Z, KP923879532 * T24);
660
T28 = FMA(KP923879532, T1T, KP382683432 * T1O);
661
T29 = FNMS(KP923879532, T1Z, KP382683432 * T24);
665
cr[WS(rs, 7)] = T1J - T26;
666
cr[WS(rs, 11)] = T3b - T3c;
667
ci[WS(rs, 12)] = T3b + T3c;
669
ci[WS(rs, 4)] = T27 - T2a;
670
cr[WS(rs, 15)] = T35 - T3a;
671
ci[WS(rs, 8)] = T35 + T3a;
672
cr[WS(rs, 3)] = T27 + T2a;
675
E TH, T2L, T2W, T2Y, T1s, T2X, T2O, T2P;
688
E T14, T1r, T2M, T2N;
698
ci[WS(rs, 7)] = TH - T1s;
699
cr[WS(rs, 12)] = T2X - T2Y;
700
ci[WS(rs, 11)] = T2X + T2Y;
702
cr[WS(rs, 4)] = T2L - T2O;
703
cr[WS(rs, 8)] = T2P - T2W;
704
ci[WS(rs, 15)] = T2P + T2W;
705
ci[WS(rs, 3)] = T2L + T2O;
708
E T2f, T2n, T3g, T3i, T2m, T3h, T2q, T3d;
710
E T2b, T2e, T3e, T3f;
712
T2e = KP707106781 * (T2c + T2d);
715
T3e = KP707106781 * (T1H - T1C);
721
E T2i, T2l, T2o, T2p;
722
T2i = FMA(KP382683432, T2g, KP923879532 * T2h);
723
T2l = FNMS(KP382683432, T2k, KP923879532 * T2j);
726
T2o = FNMS(KP923879532, T2g, KP382683432 * T2h);
727
T2p = FMA(KP923879532, T2k, KP382683432 * T2j);
731
ci[WS(rs, 6)] = T2f - T2m;
732
cr[WS(rs, 13)] = T3h - T3i;
733
ci[WS(rs, 10)] = T3h + T3i;
734
cr[WS(rs, 1)] = T2f + T2m;
735
cr[WS(rs, 5)] = T2n - T2q;
736
cr[WS(rs, 9)] = T3d - T3g;
737
ci[WS(rs, 14)] = T3d + T3g;
738
ci[WS(rs, 2)] = T2n + T2q;
741
E T2v, T2H, T32, T34, T2G, T2Z, T2K, T33;
743
E T2r, T2u, T30, T31;
754
E T2A, T2F, T2I, T2J;
757
T2G = KP707106781 * (T2A + T2F);
758
T2Z = KP707106781 * (T2F - T2A);
761
T2K = KP707106781 * (T2I + T2J);
762
T33 = KP707106781 * (T2J - T2I);
764
ci[WS(rs, 5)] = T2v - T2G;
765
cr[WS(rs, 10)] = T33 - T34;
766
ci[WS(rs, 13)] = T33 + T34;
767
cr[WS(rs, 2)] = T2v + T2G;
768
cr[WS(rs, 6)] = T2H - T2K;
769
cr[WS(rs, 14)] = T2Z - T32;
770
ci[WS(rs, 9)] = T2Z + T32;
771
ci[WS(rs, 1)] = T2H + T2K;