43
43
DK(KP198912367, +0.198912367379658006911597622644676228597850501);
44
44
DK(KP414213562, +0.414213562373095048801688724209698078569671875);
45
45
DK(KP707106781, +0.707106781186547524400844362104849039284835938);
47
for (m = mb, W = W + ((mb - 1) * 62); m < me; m = m + 1, cr = cr + ms, ci = ci - ms, W = W + 62, MAKE_VOLATILE_STRIDE(rs)) {
50
E T8y, T87, T8, T3w, T83, T3B, T8x, Tl, T6G, Tz, T3J, T5T, T6F, TM, T3Q;
51
E T5U, T46, T5X, T7E, T6M, T5Y, T3Z, T6J, T1f, T7D, T6R, T61, T4e, T6O, T1G;
52
E T60, T4l, T54, T6c, T7d, T7N, T32, T76, T6f, T5r, T4v, T65, T72, T7I, T29;
53
E T6V, T68, T4S, T5t, T5b, T7O, T79, T7e, T3t, T5s, T5i, T4H, T2y, T4B, T6X;
56
E T44, T1d, T3X, T6K, T11, T40, T42, T17, T5h, T5c;
58
E Ta, Td, Tg, T3x, Tb, Tj, Tf, Tc, Ti;
60
E T1, T86, T3, T6, T2, T5;
68
E T84, T4, T9, T85, T7;
75
T85 = FNMS(T5, T3, T84);
90
E Tu, Tx, T3F, Ts, Tw, T3G, Tv;
92
E To, Tr, Tp, T3E, Tq, Tt;
94
E T3y, Te, T3A, Tk, T3z, Th, Tn;
98
T3y = FNMS(Tc, Ta, T3x);
100
T3A = FNMS(Ti, Tg, T3z);
101
Tk = FMA(Ti, Tj, Th);
115
T3F = FNMS(Tq, To, T3E);
116
Ts = FMA(Tq, Tr, Tp);
122
E T3M, TF, TH, TK, TG, TJ, TE, TD, TC;
124
E TB, T3H, Ty, TA, T3I, T3D, T3L;
127
T3H = FNMS(Tw, Tu, T3G);
128
Ty = FMA(Tw, Tx, Tv);
139
T3M = FNMS(TD, TB, T3L);
141
TF = FMA(TD, TE, TC);
147
E TU, T3U, T13, T16, T3W, T10, T12, T15, T41, T14;
149
E T19, T1c, T18, T1b, T3P, T3K;
151
E TQ, TT, T3N, TI, TP, TS;
160
T3O = FNMS(TJ, TH, T3N);
161
TL = FMA(TJ, TK, TI);
168
TU = FMA(TS, TT, TR);
169
T3U = FNMS(TS, TQ, T3T);
174
T19 = cr[WS(rs, 26)];
175
T1c = ci[WS(rs, 26)];
179
E TW, TZ, TY, T3V, TX, T43, T1a, TV;
186
T44 = FNMS(T1b, T19, T43);
187
T1d = FMA(T1b, T1c, T1a);
190
T13 = cr[WS(rs, 10)];
191
T16 = ci[WS(rs, 10)];
192
T3W = FNMS(TY, TW, T3V);
193
T10 = FMA(TY, TZ, TX);
204
T42 = FNMS(T15, T13, T41);
205
T17 = FMA(T15, T16, T14);
211
E T49, T1l, T4j, T1E, T1u, T1x, T1w, T4b, T1r, T4g, T1v;
213
E T1A, T1D, T1C, T4i, T1B;
215
E T1h, T1k, T1g, T1j, T48, T1i, T1z;
216
T1h = cr[WS(rs, 30)];
217
T1k = ci[WS(rs, 30)];
219
E T6L, T45, T1e, T3Y;
235
T1A = cr[WS(rs, 22)];
236
T1D = ci[WS(rs, 22)];
241
T49 = FNMS(T1j, T1h, T48);
242
T1l = FMA(T1j, T1k, T1i);
247
E T1n, T1q, T1m, T1p, T4a, T1o, T1t;
248
T1n = cr[WS(rs, 14)];
249
T1q = ci[WS(rs, 14)];
250
T4j = FNMS(T1C, T1A, T4i);
251
T1E = FMA(T1C, T1D, T1B);
260
T4b = FNMS(T1p, T1n, T4a);
261
T1r = FMA(T1p, T1q, T1o);
267
E T4c, T6P, T1s, T4f, T4h, T1y;
272
T4h = FNMS(T1w, T1u, T4g);
273
T1y = FMA(T1w, T1x, T1v);
275
E T4k, T6Q, T4d, T1F;
292
E T5n, T2H, T52, T30, T2Q, T2T, T2S, T5p, T2N, T4Z, T2R;
294
E T2W, T2Z, T2Y, T51, T2X;
296
E T2D, T2G, T2C, T2F, T5m, T2E, T2V;
297
T2D = cr[WS(rs, 31)];
298
T2G = ci[WS(rs, 31)];
301
T2W = cr[WS(rs, 23)];
302
T2Z = ci[WS(rs, 23)];
307
T5n = FNMS(T2F, T2D, T5m);
308
T2H = FMA(T2F, T2G, T2E);
313
E T2J, T2M, T2I, T2L, T5o, T2K, T2P;
314
T2J = cr[WS(rs, 15)];
315
T2M = ci[WS(rs, 15)];
316
T52 = FNMS(T2Y, T2W, T51);
317
T30 = FMA(T2Y, T2Z, T2X);
326
T5p = FNMS(T2L, T2J, T5o);
327
T2N = FMA(T2L, T2M, T2K);
333
E T5q, T7b, T2O, T4Y, T50, T2U;
338
T50 = FNMS(T2S, T2Q, T4Z);
339
T2U = FMA(T2S, T2T, T2R);
341
E T7c, T53, T31, T5l;
358
E T4N, T1O, T4t, T27, T1X, T20, T1Z, T4P, T1U, T4q, T1Y;
360
E T23, T26, T25, T4s, T24;
362
E T1K, T1N, T1J, T1M, T4M, T1L, T22;
367
T23 = cr[WS(rs, 25)];
368
T26 = ci[WS(rs, 25)];
373
T4N = FNMS(T1M, T1K, T4M);
374
T1O = FMA(T1M, T1N, T1L);
379
E T1Q, T1T, T1P, T1S, T4O, T1R, T1W;
380
T1Q = cr[WS(rs, 17)];
381
T1T = ci[WS(rs, 17)];
382
T4t = FNMS(T25, T23, T4s);
383
T27 = FMA(T25, T26, T24);
392
T4P = FNMS(T1S, T1Q, T4O);
393
T1U = FMA(T1S, T1T, T1R);
399
E T4Q, T70, T1V, T4p, T4r, T21;
404
T4r = FNMS(T1Z, T1X, T4q);
405
T21 = FMA(T1Z, T20, T1Y);
407
E T71, T4u, T4R, T28;
424
E T57, T38, T5g, T3r, T3h, T3k, T3j, T59, T3e, T5d, T3i;
426
E T3n, T3q, T3p, T5f, T3o;
428
E T34, T37, T33, T36, T56, T35, T3m;
433
T3n = cr[WS(rs, 11)];
434
T3q = ci[WS(rs, 11)];
439
T57 = FNMS(T36, T34, T56);
440
T38 = FMA(T36, T37, T35);
445
E T3a, T3d, T39, T3c, T58, T3b, T3g;
446
T3a = cr[WS(rs, 19)];
447
T3d = ci[WS(rs, 19)];
448
T5g = FNMS(T3p, T3n, T5f);
449
T3r = FMA(T3p, T3q, T3o);
452
T3h = cr[WS(rs, 27)];
453
T3k = ci[WS(rs, 27)];
458
T59 = FNMS(T3c, T3a, T58);
459
T3e = FMA(T3c, T3d, T3b);
465
E T5a, T78, T3f, T55, T5e, T3l, T77, T3s;
470
T5e = FNMS(T3j, T3h, T5d);
471
T3l = FMA(T3j, T3k, T3i);
485
E T4y, T2f, T2o, T2r, T4A, T2l, T2n, T2q, T4E, T2p;
487
E T2u, T2x, T2t, T2w;
489
E T2b, T2e, T2d, T4x, T2c, T2a;
498
T2u = cr[WS(rs, 13)];
499
T2x = ci[WS(rs, 13)];
500
T4y = FNMS(T2d, T2b, T4x);
501
T2f = FMA(T2d, T2e, T2c);
506
E T2h, T2k, T2j, T4z, T2i, T4G, T2v, T2g;
507
T2h = cr[WS(rs, 21)];
508
T2k = ci[WS(rs, 21)];
513
T4H = FNMS(T2w, T2u, T4G);
514
T2y = FMA(T2w, T2x, T2v);
517
T2o = cr[WS(rs, 29)];
518
T2r = ci[WS(rs, 29)];
519
T4A = FNMS(T2j, T2h, T4z);
520
T2l = FMA(T2j, T2k, T2i);
531
T4F = FNMS(T2q, T2o, T4E);
532
T2s = FMA(T2q, T2r, T2p);
536
E T6E, T8j, T6Y, T73, T6H, T8k, T5S, T8O, T8N, T5V, T6g, T6d, T69, T66, T5O;
539
E T4T, T4C, T4J, T4U, T7S, T7V;
541
E T7C, TO, T80, T7Z, T8e, T89, T8d, T1H, T8b, T3v, T7T, T7L, T7U, T7Q, T2A;
542
E T7P, T7K, T7W, T1I;
544
E T7X, T7Y, T7J, T82, T88;
546
E Tm, T4I, T6W, T4D, T2z, TN;
580
E T7H, T7M, T2B, T3u;
597
E T8g, T8h, T8f, T8i;
599
E T7R, T8c, T8a, T7G, T81, T7F;
605
ci[WS(rs, 15)] = T1I - T3v;
606
ci[WS(rs, 7)] = T7W + T7Z;
607
cr[WS(rs, 8)] = T7W - T7Z;
614
ci[WS(rs, 23)] = T8b + T8c;
615
cr[WS(rs, 24)] = T8b - T8c;
616
ci[WS(rs, 31)] = T80 + T8a;
617
cr[WS(rs, 16)] = T80 - T8a;
618
cr[WS(rs, 4)] = FMA(KP707106781, T7R, T7G);
619
ci[WS(rs, 11)] = FNMS(KP707106781, T7R, T7G);
623
ci[WS(rs, 19)] = FMA(KP707106781, T8g, T8f);
624
cr[WS(rs, 28)] = FMS(KP707106781, T8g, T8f);
625
ci[WS(rs, 27)] = FMA(KP707106781, T8i, T8h);
626
cr[WS(rs, 20)] = FMS(KP707106781, T8i, T8h);
630
E T5C, T3S, T8C, T4n, T8H, T8B, T8I, T5F, T4L, T5H, T5M, T5Q, T5A, T5w, T4V;
632
E T5D, T47, T4m, T5E, T8z, T8A, T3C, T3R, T5j, T5u;
637
T5D = FNMS(KP414213562, T3Z, T46);
638
T47 = FMA(KP414213562, T46, T3Z);
639
ci[WS(rs, 3)] = FMA(KP707106781, T7V, T7S);
640
cr[WS(rs, 12)] = FNMS(KP707106781, T7V, T7S);
641
T5C = FMA(KP707106781, T3R, T3C);
642
T3S = FNMS(KP707106781, T3R, T3C);
643
T4m = FNMS(KP414213562, T4l, T4e);
644
T5E = FMA(KP414213562, T4e, T4l);
651
T8H = FNMS(KP707106781, T8A, T8z);
652
T8B = FMA(KP707106781, T8A, T8z);
658
E T5K, T5k, T5L, T5v, T4K;
663
T5K = FMA(KP707106781, T5j, T54);
664
T5k = FNMS(KP707106781, T5j, T54);
665
T5L = FMA(KP707106781, T5u, T5r);
666
T5v = FNMS(KP707106781, T5u, T5r);
667
T4L = FNMS(KP707106781, T4K, T4v);
668
T5H = FMA(KP707106781, T4K, T4v);
669
T5M = FNMS(KP198912367, T5L, T5K);
670
T5Q = FMA(KP198912367, T5K, T5L);
671
T5A = FNMS(KP668178637, T5k, T5v);
672
T5w = FMA(KP668178637, T5v, T5k);
678
E T5y, T4o, T8J, T8L, T5I, T4W;
679
T5y = FNMS(KP923879532, T4n, T3S);
680
T4o = FMA(KP923879532, T4n, T3S);
681
T8J = FMA(KP923879532, T8I, T8H);
682
T8L = FNMS(KP923879532, T8I, T8H);
683
T5I = FMA(KP707106781, T4V, T4S);
684
T4W = FNMS(KP707106781, T4V, T4S);
686
E T8G, T8F, T8D, T8E;
688
E T5G, T5P, T5z, T4X, T5N, T5J;
689
T5O = FNMS(KP923879532, T5F, T5C);
690
T5G = FMA(KP923879532, T5F, T5C);
691
T5J = FNMS(KP198912367, T5I, T5H);
692
T5P = FMA(KP198912367, T5H, T5I);
693
T5z = FNMS(KP668178637, T4L, T4W);
694
T4X = FMA(KP668178637, T4W, T4L);
697
T8F = FNMS(KP923879532, T8C, T8B);
698
T8D = FMA(KP923879532, T8C, T8B);
700
E T5B, T8K, T8M, T5x;
705
ci[0] = FMA(KP980785280, T5N, T5G);
706
cr[WS(rs, 15)] = FNMS(KP980785280, T5N, T5G);
707
ci[WS(rs, 4)] = FNMS(KP831469612, T5B, T5y);
708
cr[WS(rs, 11)] = FMA(KP831469612, T5B, T5y);
709
ci[WS(rs, 28)] = FMA(KP831469612, T8K, T8J);
710
cr[WS(rs, 19)] = FMS(KP831469612, T8K, T8J);
711
ci[WS(rs, 20)] = FMA(KP831469612, T8M, T8L);
712
cr[WS(rs, 27)] = FMS(KP831469612, T8M, T8L);
713
cr[WS(rs, 3)] = FMA(KP831469612, T5x, T4o);
714
ci[WS(rs, 12)] = FNMS(KP831469612, T5x, T4o);
719
ci[WS(rs, 16)] = FMA(KP980785280, T8E, T8D);
720
cr[WS(rs, 31)] = FMS(KP980785280, T8E, T8D);
721
ci[WS(rs, 24)] = FMA(KP980785280, T8G, T8F);
722
cr[WS(rs, 23)] = FMS(KP980785280, T8G, T8F);
728
E T7y, T8q, T8p, T7B;
730
E T7a, T7m, T6I, T7f, T7A, T7w, T8r, T8l, T8m, T6T, T7k, T75, T8s, T7p, T7z;
733
E T7n, T6N, T6S, T7o, T7u, T7v;
736
cr[WS(rs, 7)] = FMA(KP980785280, T5R, T5O);
737
ci[WS(rs, 8)] = FNMS(KP980785280, T5R, T5O);
744
T7A = FMA(KP414213562, T7u, T7v);
745
T7w = FNMS(KP414213562, T7v, T7u);
751
E T7r, T7s, T6Z, T74;
758
T7k = FNMS(KP414213562, T6Z, T74);
759
T75 = FMA(KP414213562, T74, T6Z);
762
T7z = FMA(KP414213562, T7r, T7s);
763
T7t = FNMS(KP414213562, T7s, T7r);
767
E T7i, T6U, T8t, T8v, T7j, T7g;
768
T7i = FNMS(KP707106781, T6T, T6I);
769
T6U = FMA(KP707106781, T6T, T6I);
770
T8t = FMA(KP707106781, T8s, T8r);
771
T8v = FNMS(KP707106781, T8s, T8r);
772
T7j = FMA(KP414213562, T7a, T7f);
773
T7g = FNMS(KP414213562, T7f, T7a);
775
E T7q, T7x, T8n, T8o;
776
T7y = FNMS(KP707106781, T7p, T7m);
777
T7q = FMA(KP707106781, T7p, T7m);
779
E T7l, T8u, T8w, T7h;
784
ci[WS(rs, 5)] = FMA(KP923879532, T7l, T7i);
785
cr[WS(rs, 10)] = FNMS(KP923879532, T7l, T7i);
786
ci[WS(rs, 29)] = FMA(KP923879532, T8u, T8t);
787
cr[WS(rs, 18)] = FMS(KP923879532, T8u, T8t);
788
ci[WS(rs, 21)] = FMA(KP923879532, T8w, T8v);
789
cr[WS(rs, 26)] = FMS(KP923879532, T8w, T8v);
790
cr[WS(rs, 2)] = FMA(KP923879532, T7h, T6U);
791
ci[WS(rs, 13)] = FNMS(KP923879532, T7h, T6U);
795
T8p = FNMS(KP707106781, T8m, T8l);
796
T8n = FMA(KP707106781, T8m, T8l);
799
ci[WS(rs, 1)] = FMA(KP923879532, T7x, T7q);
800
cr[WS(rs, 14)] = FNMS(KP923879532, T7x, T7q);
801
ci[WS(rs, 17)] = FMA(KP923879532, T8o, T8n);
802
cr[WS(rs, 30)] = FMS(KP923879532, T8o, T8n);
807
E T6o, T5W, T8W, T63, T8V, T8P, T8Q, T6r, T6e, T6w;
809
E T6q, T6p, T5Z, T62;
810
ci[WS(rs, 25)] = FMA(KP923879532, T8q, T8p);
811
cr[WS(rs, 22)] = FMS(KP923879532, T8q, T8p);
812
cr[WS(rs, 6)] = FMA(KP923879532, T7B, T7y);
813
ci[WS(rs, 9)] = FNMS(KP923879532, T7B, T7y);
814
T6q = FNMS(KP414213562, T5X, T5Y);
815
T5Z = FMA(KP414213562, T5Y, T5X);
816
T62 = FNMS(KP414213562, T61, T60);
817
T6p = FMA(KP414213562, T60, T61);
818
T6o = FNMS(KP707106781, T5V, T5S);
819
T5W = FMA(KP707106781, T5V, T5S);
822
T8V = FNMS(KP707106781, T8O, T8N);
823
T8P = FMA(KP707106781, T8O, T8N);
826
T6e = FMA(KP707106781, T6d, T6c);
827
T6w = FNMS(KP707106781, T6d, T6c);
830
E T6k, T8U, T6z, T6n, T8S, T8T, T8R, T6s;
832
E T64, T6y, T6l, T6i, T6v, T6m, T6b, T8X, T8Z, T8Y, T6j, T90;
834
E T6C, T6B, T6x, T6h;
835
T6k = FNMS(KP923879532, T63, T5W);
836
T64 = FMA(KP923879532, T63, T5W);
837
T6x = FNMS(KP707106781, T6g, T6f);
838
T6h = FMA(KP707106781, T6g, T6f);
840
E T6t, T67, T6u, T6a;
841
T6t = FNMS(KP707106781, T66, T65);
842
T67 = FMA(KP707106781, T66, T65);
843
T6u = FNMS(KP707106781, T69, T68);
844
T6a = FMA(KP707106781, T69, T68);
845
T6y = FMA(KP668178637, T6x, T6w);
846
T6C = FNMS(KP668178637, T6w, T6x);
847
T6l = FMA(KP198912367, T6e, T6h);
848
T6i = FNMS(KP198912367, T6h, T6e);
849
T6v = FNMS(KP668178637, T6u, T6t);
850
T6B = FMA(KP668178637, T6t, T6u);
851
T6m = FNMS(KP198912367, T67, T6a);
852
T6b = FMA(KP198912367, T6a, T67);
854
T8X = FMA(KP923879532, T8W, T8V);
855
T8Z = FNMS(KP923879532, T8W, T8V);
863
ci[WS(rs, 18)] = FNMS(KP831469612, T8Y, T8X);
864
cr[WS(rs, 29)] = -(FMA(KP831469612, T8Y, T8X));
865
cr[WS(rs, 1)] = FMA(KP980785280, T6j, T64);
866
ci[WS(rs, 14)] = FNMS(KP980785280, T6j, T64);
867
cr[WS(rs, 21)] = FMS(KP831469612, T90, T8Z);
868
ci[WS(rs, 26)] = FMA(KP831469612, T90, T8Z);
872
T6A = FNMS(KP923879532, T6r, T6o);
873
T6s = FMA(KP923879532, T6r, T6o);
874
T8T = FNMS(KP923879532, T8Q, T8P);
875
T8R = FMA(KP923879532, T8Q, T8P);
876
ci[WS(rs, 6)] = FMA(KP980785280, T6n, T6k);
877
cr[WS(rs, 9)] = FNMS(KP980785280, T6n, T6k);
878
ci[WS(rs, 2)] = FMA(KP831469612, T6z, T6s);
879
cr[WS(rs, 13)] = FNMS(KP831469612, T6z, T6s);
880
ci[WS(rs, 30)] = FMA(KP980785280, T8S, T8R);
881
cr[WS(rs, 17)] = FMS(KP980785280, T8S, T8R);
882
ci[WS(rs, 22)] = FMA(KP980785280, T8U, T8T);
883
cr[WS(rs, 25)] = FMS(KP980785280, T8U, T8T);
48
for (m = mb, W = W + ((mb - 1) * 62); m < me; m = m + 1, cr = cr + ms, ci = ci - ms, W = W + 62, MAKE_VOLATILE_STRIDE(rs)) {
51
E T8y, T87, T8, T3w, T83, T3B, T8x, Tl, T6G, Tz, T3J, T5T, T6F, TM, T3Q;
52
E T5U, T46, T5X, T7E, T6M, T5Y, T3Z, T6J, T1f, T7D, T6R, T61, T4e, T6O, T1G;
53
E T60, T4l, T54, T6c, T7d, T7N, T32, T76, T6f, T5r, T4v, T65, T72, T7I, T29;
54
E T6V, T68, T4S, T5t, T5b, T7O, T79, T7e, T3t, T5s, T5i, T4H, T2y, T4B, T6X;
57
E T44, T1d, T3X, T6K, T11, T40, T42, T17, T5h, T5c;
59
E Ta, Td, Tg, T3x, Tb, Tj, Tf, Tc, Ti;
61
E T1, T86, T3, T6, T2, T5;
69
E T84, T4, T9, T85, T7;
76
T85 = FNMS(T5, T3, T84);
91
E Tu, Tx, T3F, Ts, Tw, T3G, Tv;
93
E To, Tr, Tp, T3E, Tq, Tt;
95
E T3y, Te, T3A, Tk, T3z, Th, Tn;
99
T3y = FNMS(Tc, Ta, T3x);
100
Te = FMA(Tc, Td, Tb);
101
T3A = FNMS(Ti, Tg, T3z);
102
Tk = FMA(Ti, Tj, Th);
116
T3F = FNMS(Tq, To, T3E);
117
Ts = FMA(Tq, Tr, Tp);
123
E T3M, TF, TH, TK, TG, TJ, TE, TD, TC;
125
E TB, T3H, Ty, TA, T3I, T3D, T3L;
128
T3H = FNMS(Tw, Tu, T3G);
129
Ty = FMA(Tw, Tx, Tv);
140
T3M = FNMS(TD, TB, T3L);
142
TF = FMA(TD, TE, TC);
148
E TU, T3U, T13, T16, T3W, T10, T12, T15, T41, T14;
150
E T19, T1c, T18, T1b, T3P, T3K;
152
E TQ, TT, T3N, TI, TP, TS;
161
T3O = FNMS(TJ, TH, T3N);
162
TL = FMA(TJ, TK, TI);
169
TU = FMA(TS, TT, TR);
170
T3U = FNMS(TS, TQ, T3T);
175
T19 = cr[WS(rs, 26)];
176
T1c = ci[WS(rs, 26)];
180
E TW, TZ, TY, T3V, TX, T43, T1a, TV;
187
T44 = FNMS(T1b, T19, T43);
188
T1d = FMA(T1b, T1c, T1a);
191
T13 = cr[WS(rs, 10)];
192
T16 = ci[WS(rs, 10)];
193
T3W = FNMS(TY, TW, T3V);
194
T10 = FMA(TY, TZ, TX);
205
T42 = FNMS(T15, T13, T41);
206
T17 = FMA(T15, T16, T14);
212
E T49, T1l, T4j, T1E, T1u, T1x, T1w, T4b, T1r, T4g, T1v;
214
E T1A, T1D, T1C, T4i, T1B;
216
E T1h, T1k, T1g, T1j, T48, T1i, T1z;
217
T1h = cr[WS(rs, 30)];
218
T1k = ci[WS(rs, 30)];
220
E T6L, T45, T1e, T3Y;
236
T1A = cr[WS(rs, 22)];
237
T1D = ci[WS(rs, 22)];
242
T49 = FNMS(T1j, T1h, T48);
243
T1l = FMA(T1j, T1k, T1i);
248
E T1n, T1q, T1m, T1p, T4a, T1o, T1t;
249
T1n = cr[WS(rs, 14)];
250
T1q = ci[WS(rs, 14)];
251
T4j = FNMS(T1C, T1A, T4i);
252
T1E = FMA(T1C, T1D, T1B);
261
T4b = FNMS(T1p, T1n, T4a);
262
T1r = FMA(T1p, T1q, T1o);
268
E T4c, T6P, T1s, T4f, T4h, T1y;
273
T4h = FNMS(T1w, T1u, T4g);
274
T1y = FMA(T1w, T1x, T1v);
276
E T4k, T6Q, T4d, T1F;
293
E T5n, T2H, T52, T30, T2Q, T2T, T2S, T5p, T2N, T4Z, T2R;
295
E T2W, T2Z, T2Y, T51, T2X;
297
E T2D, T2G, T2C, T2F, T5m, T2E, T2V;
298
T2D = cr[WS(rs, 31)];
299
T2G = ci[WS(rs, 31)];
302
T2W = cr[WS(rs, 23)];
303
T2Z = ci[WS(rs, 23)];
308
T5n = FNMS(T2F, T2D, T5m);
309
T2H = FMA(T2F, T2G, T2E);
314
E T2J, T2M, T2I, T2L, T5o, T2K, T2P;
315
T2J = cr[WS(rs, 15)];
316
T2M = ci[WS(rs, 15)];
317
T52 = FNMS(T2Y, T2W, T51);
318
T30 = FMA(T2Y, T2Z, T2X);
327
T5p = FNMS(T2L, T2J, T5o);
328
T2N = FMA(T2L, T2M, T2K);
334
E T5q, T7b, T2O, T4Y, T50, T2U;
339
T50 = FNMS(T2S, T2Q, T4Z);
340
T2U = FMA(T2S, T2T, T2R);
342
E T7c, T53, T31, T5l;
359
E T4N, T1O, T4t, T27, T1X, T20, T1Z, T4P, T1U, T4q, T1Y;
361
E T23, T26, T25, T4s, T24;
363
E T1K, T1N, T1J, T1M, T4M, T1L, T22;
368
T23 = cr[WS(rs, 25)];
369
T26 = ci[WS(rs, 25)];
374
T4N = FNMS(T1M, T1K, T4M);
375
T1O = FMA(T1M, T1N, T1L);
380
E T1Q, T1T, T1P, T1S, T4O, T1R, T1W;
381
T1Q = cr[WS(rs, 17)];
382
T1T = ci[WS(rs, 17)];
383
T4t = FNMS(T25, T23, T4s);
384
T27 = FMA(T25, T26, T24);
393
T4P = FNMS(T1S, T1Q, T4O);
394
T1U = FMA(T1S, T1T, T1R);
400
E T4Q, T70, T1V, T4p, T4r, T21;
405
T4r = FNMS(T1Z, T1X, T4q);
406
T21 = FMA(T1Z, T20, T1Y);
408
E T71, T4u, T4R, T28;
425
E T57, T38, T5g, T3r, T3h, T3k, T3j, T59, T3e, T5d, T3i;
427
E T3n, T3q, T3p, T5f, T3o;
429
E T34, T37, T33, T36, T56, T35, T3m;
434
T3n = cr[WS(rs, 11)];
435
T3q = ci[WS(rs, 11)];
440
T57 = FNMS(T36, T34, T56);
441
T38 = FMA(T36, T37, T35);
446
E T3a, T3d, T39, T3c, T58, T3b, T3g;
447
T3a = cr[WS(rs, 19)];
448
T3d = ci[WS(rs, 19)];
449
T5g = FNMS(T3p, T3n, T5f);
450
T3r = FMA(T3p, T3q, T3o);
453
T3h = cr[WS(rs, 27)];
454
T3k = ci[WS(rs, 27)];
459
T59 = FNMS(T3c, T3a, T58);
460
T3e = FMA(T3c, T3d, T3b);
466
E T5a, T78, T3f, T55, T5e, T3l, T77, T3s;
471
T5e = FNMS(T3j, T3h, T5d);
472
T3l = FMA(T3j, T3k, T3i);
486
E T4y, T2f, T2o, T2r, T4A, T2l, T2n, T2q, T4E, T2p;
488
E T2u, T2x, T2t, T2w;
490
E T2b, T2e, T2d, T4x, T2c, T2a;
499
T2u = cr[WS(rs, 13)];
500
T2x = ci[WS(rs, 13)];
501
T4y = FNMS(T2d, T2b, T4x);
502
T2f = FMA(T2d, T2e, T2c);
507
E T2h, T2k, T2j, T4z, T2i, T4G, T2v, T2g;
508
T2h = cr[WS(rs, 21)];
509
T2k = ci[WS(rs, 21)];
514
T4H = FNMS(T2w, T2u, T4G);
515
T2y = FMA(T2w, T2x, T2v);
518
T2o = cr[WS(rs, 29)];
519
T2r = ci[WS(rs, 29)];
520
T4A = FNMS(T2j, T2h, T4z);
521
T2l = FMA(T2j, T2k, T2i);
532
T4F = FNMS(T2q, T2o, T4E);
533
T2s = FMA(T2q, T2r, T2p);
537
E T6E, T8j, T6Y, T73, T6H, T8k, T5S, T8O, T8N, T5V, T6g, T6d, T69, T66, T5O;
540
E T4T, T4C, T4J, T4U, T7S, T7V;
542
E T7C, TO, T80, T7Z, T8e, T89, T8d, T1H, T8b, T3v, T7T, T7L, T7U, T7Q, T2A;
543
E T7P, T7K, T7W, T1I;
545
E T7X, T7Y, T7J, T82, T88;
547
E Tm, T4I, T6W, T4D, T2z, TN;
581
E T7H, T7M, T2B, T3u;
598
E T8g, T8h, T8f, T8i;
600
E T7R, T8c, T8a, T7G, T81, T7F;
606
ci[WS(rs, 15)] = T1I - T3v;
607
ci[WS(rs, 7)] = T7W + T7Z;
608
cr[WS(rs, 8)] = T7W - T7Z;
615
ci[WS(rs, 23)] = T8b + T8c;
616
cr[WS(rs, 24)] = T8b - T8c;
617
ci[WS(rs, 31)] = T80 + T8a;
618
cr[WS(rs, 16)] = T80 - T8a;
619
cr[WS(rs, 4)] = FMA(KP707106781, T7R, T7G);
620
ci[WS(rs, 11)] = FNMS(KP707106781, T7R, T7G);
624
ci[WS(rs, 19)] = FMA(KP707106781, T8g, T8f);
625
cr[WS(rs, 28)] = FMS(KP707106781, T8g, T8f);
626
ci[WS(rs, 27)] = FMA(KP707106781, T8i, T8h);
627
cr[WS(rs, 20)] = FMS(KP707106781, T8i, T8h);
631
E T5C, T3S, T8C, T4n, T8H, T8B, T8I, T5F, T4L, T5H, T5M, T5Q, T5A, T5w, T4V;
633
E T5D, T47, T4m, T5E, T8z, T8A, T3C, T3R, T5j, T5u;
638
T5D = FNMS(KP414213562, T3Z, T46);
639
T47 = FMA(KP414213562, T46, T3Z);
640
ci[WS(rs, 3)] = FMA(KP707106781, T7V, T7S);
641
cr[WS(rs, 12)] = FNMS(KP707106781, T7V, T7S);
642
T5C = FMA(KP707106781, T3R, T3C);
643
T3S = FNMS(KP707106781, T3R, T3C);
644
T4m = FNMS(KP414213562, T4l, T4e);
645
T5E = FMA(KP414213562, T4e, T4l);
652
T8H = FNMS(KP707106781, T8A, T8z);
653
T8B = FMA(KP707106781, T8A, T8z);
659
E T5K, T5k, T5L, T5v, T4K;
664
T5K = FMA(KP707106781, T5j, T54);
665
T5k = FNMS(KP707106781, T5j, T54);
666
T5L = FMA(KP707106781, T5u, T5r);
667
T5v = FNMS(KP707106781, T5u, T5r);
668
T4L = FNMS(KP707106781, T4K, T4v);
669
T5H = FMA(KP707106781, T4K, T4v);
670
T5M = FNMS(KP198912367, T5L, T5K);
671
T5Q = FMA(KP198912367, T5K, T5L);
672
T5A = FNMS(KP668178637, T5k, T5v);
673
T5w = FMA(KP668178637, T5v, T5k);
679
E T5y, T4o, T8J, T8L, T5I, T4W;
680
T5y = FNMS(KP923879532, T4n, T3S);
681
T4o = FMA(KP923879532, T4n, T3S);
682
T8J = FMA(KP923879532, T8I, T8H);
683
T8L = FNMS(KP923879532, T8I, T8H);
684
T5I = FMA(KP707106781, T4V, T4S);
685
T4W = FNMS(KP707106781, T4V, T4S);
687
E T8G, T8F, T8D, T8E;
689
E T5G, T5P, T5z, T4X, T5N, T5J;
690
T5O = FNMS(KP923879532, T5F, T5C);
691
T5G = FMA(KP923879532, T5F, T5C);
692
T5J = FNMS(KP198912367, T5I, T5H);
693
T5P = FMA(KP198912367, T5H, T5I);
694
T5z = FNMS(KP668178637, T4L, T4W);
695
T4X = FMA(KP668178637, T4W, T4L);
698
T8F = FNMS(KP923879532, T8C, T8B);
699
T8D = FMA(KP923879532, T8C, T8B);
701
E T5B, T8K, T8M, T5x;
706
ci[0] = FMA(KP980785280, T5N, T5G);
707
cr[WS(rs, 15)] = FNMS(KP980785280, T5N, T5G);
708
ci[WS(rs, 4)] = FNMS(KP831469612, T5B, T5y);
709
cr[WS(rs, 11)] = FMA(KP831469612, T5B, T5y);
710
ci[WS(rs, 28)] = FMA(KP831469612, T8K, T8J);
711
cr[WS(rs, 19)] = FMS(KP831469612, T8K, T8J);
712
ci[WS(rs, 20)] = FMA(KP831469612, T8M, T8L);
713
cr[WS(rs, 27)] = FMS(KP831469612, T8M, T8L);
714
cr[WS(rs, 3)] = FMA(KP831469612, T5x, T4o);
715
ci[WS(rs, 12)] = FNMS(KP831469612, T5x, T4o);
720
ci[WS(rs, 16)] = FMA(KP980785280, T8E, T8D);
721
cr[WS(rs, 31)] = FMS(KP980785280, T8E, T8D);
722
ci[WS(rs, 24)] = FMA(KP980785280, T8G, T8F);
723
cr[WS(rs, 23)] = FMS(KP980785280, T8G, T8F);
729
E T7y, T8q, T8p, T7B;
731
E T7a, T7m, T6I, T7f, T7A, T7w, T8r, T8l, T8m, T6T, T7k, T75, T8s, T7p, T7z;
734
E T7n, T6N, T6S, T7o, T7u, T7v;
737
cr[WS(rs, 7)] = FMA(KP980785280, T5R, T5O);
738
ci[WS(rs, 8)] = FNMS(KP980785280, T5R, T5O);
745
T7A = FMA(KP414213562, T7u, T7v);
746
T7w = FNMS(KP414213562, T7v, T7u);
752
E T7r, T7s, T6Z, T74;
759
T7k = FNMS(KP414213562, T6Z, T74);
760
T75 = FMA(KP414213562, T74, T6Z);
763
T7z = FMA(KP414213562, T7r, T7s);
764
T7t = FNMS(KP414213562, T7s, T7r);
768
E T7i, T6U, T8t, T8v, T7j, T7g;
769
T7i = FNMS(KP707106781, T6T, T6I);
770
T6U = FMA(KP707106781, T6T, T6I);
771
T8t = FMA(KP707106781, T8s, T8r);
772
T8v = FNMS(KP707106781, T8s, T8r);
773
T7j = FMA(KP414213562, T7a, T7f);
774
T7g = FNMS(KP414213562, T7f, T7a);
776
E T7q, T7x, T8n, T8o;
777
T7y = FNMS(KP707106781, T7p, T7m);
778
T7q = FMA(KP707106781, T7p, T7m);
780
E T7l, T8u, T8w, T7h;
785
ci[WS(rs, 5)] = FMA(KP923879532, T7l, T7i);
786
cr[WS(rs, 10)] = FNMS(KP923879532, T7l, T7i);
787
ci[WS(rs, 29)] = FMA(KP923879532, T8u, T8t);
788
cr[WS(rs, 18)] = FMS(KP923879532, T8u, T8t);
789
ci[WS(rs, 21)] = FMA(KP923879532, T8w, T8v);
790
cr[WS(rs, 26)] = FMS(KP923879532, T8w, T8v);
791
cr[WS(rs, 2)] = FMA(KP923879532, T7h, T6U);
792
ci[WS(rs, 13)] = FNMS(KP923879532, T7h, T6U);
796
T8p = FNMS(KP707106781, T8m, T8l);
797
T8n = FMA(KP707106781, T8m, T8l);
800
ci[WS(rs, 1)] = FMA(KP923879532, T7x, T7q);
801
cr[WS(rs, 14)] = FNMS(KP923879532, T7x, T7q);
802
ci[WS(rs, 17)] = FMA(KP923879532, T8o, T8n);
803
cr[WS(rs, 30)] = FMS(KP923879532, T8o, T8n);
808
E T6o, T5W, T8W, T63, T8V, T8P, T8Q, T6r, T6e, T6w;
810
E T6q, T6p, T5Z, T62;
811
ci[WS(rs, 25)] = FMA(KP923879532, T8q, T8p);
812
cr[WS(rs, 22)] = FMS(KP923879532, T8q, T8p);
813
cr[WS(rs, 6)] = FMA(KP923879532, T7B, T7y);
814
ci[WS(rs, 9)] = FNMS(KP923879532, T7B, T7y);
815
T6q = FNMS(KP414213562, T5X, T5Y);
816
T5Z = FMA(KP414213562, T5Y, T5X);
817
T62 = FNMS(KP414213562, T61, T60);
818
T6p = FMA(KP414213562, T60, T61);
819
T6o = FNMS(KP707106781, T5V, T5S);
820
T5W = FMA(KP707106781, T5V, T5S);
823
T8V = FNMS(KP707106781, T8O, T8N);
824
T8P = FMA(KP707106781, T8O, T8N);
827
T6e = FMA(KP707106781, T6d, T6c);
828
T6w = FNMS(KP707106781, T6d, T6c);
831
E T6k, T8U, T6z, T6n, T8S, T8T, T8R, T6s;
833
E T64, T6y, T6l, T6i, T6v, T6m, T6b, T8X, T8Z, T8Y, T6j, T90;
835
E T6C, T6B, T6x, T6h;
836
T6k = FNMS(KP923879532, T63, T5W);
837
T64 = FMA(KP923879532, T63, T5W);
838
T6x = FNMS(KP707106781, T6g, T6f);
839
T6h = FMA(KP707106781, T6g, T6f);
841
E T6t, T67, T6u, T6a;
842
T6t = FNMS(KP707106781, T66, T65);
843
T67 = FMA(KP707106781, T66, T65);
844
T6u = FNMS(KP707106781, T69, T68);
845
T6a = FMA(KP707106781, T69, T68);
846
T6y = FMA(KP668178637, T6x, T6w);
847
T6C = FNMS(KP668178637, T6w, T6x);
848
T6l = FMA(KP198912367, T6e, T6h);
849
T6i = FNMS(KP198912367, T6h, T6e);
850
T6v = FNMS(KP668178637, T6u, T6t);
851
T6B = FMA(KP668178637, T6t, T6u);
852
T6m = FNMS(KP198912367, T67, T6a);
853
T6b = FMA(KP198912367, T6a, T67);
855
T8X = FMA(KP923879532, T8W, T8V);
856
T8Z = FNMS(KP923879532, T8W, T8V);
864
ci[WS(rs, 18)] = FNMS(KP831469612, T8Y, T8X);
865
cr[WS(rs, 29)] = -(FMA(KP831469612, T8Y, T8X));
866
cr[WS(rs, 1)] = FMA(KP980785280, T6j, T64);
867
ci[WS(rs, 14)] = FNMS(KP980785280, T6j, T64);
868
cr[WS(rs, 21)] = FMS(KP831469612, T90, T8Z);
869
ci[WS(rs, 26)] = FMA(KP831469612, T90, T8Z);
873
T6A = FNMS(KP923879532, T6r, T6o);
874
T6s = FMA(KP923879532, T6r, T6o);
875
T8T = FNMS(KP923879532, T8Q, T8P);
876
T8R = FMA(KP923879532, T8Q, T8P);
877
ci[WS(rs, 6)] = FMA(KP980785280, T6n, T6k);
878
cr[WS(rs, 9)] = FNMS(KP980785280, T6n, T6k);
879
ci[WS(rs, 2)] = FMA(KP831469612, T6z, T6s);
880
cr[WS(rs, 13)] = FNMS(KP831469612, T6z, T6s);
881
ci[WS(rs, 30)] = FMA(KP980785280, T8S, T8R);
882
cr[WS(rs, 17)] = FMS(KP980785280, T8S, T8R);
883
ci[WS(rs, 22)] = FMA(KP980785280, T8U, T8T);
884
cr[WS(rs, 25)] = FMS(KP980785280, T8U, T8T);
890
cr[WS(rs, 5)] = FMA(KP831469612, T6D, T6A);
891
ci[WS(rs, 10)] = FNMS(KP831469612, T6D, T6A);
889
cr[WS(rs, 5)] = FMA(KP831469612, T6D, T6A);
890
ci[WS(rs, 10)] = FNMS(KP831469612, T6D, T6A);
921
923
DK(KP382683432, +0.382683432365089771728459984030398866761344562);
922
924
DK(KP923879532, +0.923879532511286756128183189396788286822416626);
923
925
DK(KP707106781, +0.707106781186547524400844362104849039284835938);
925
for (m = mb, W = W + ((mb - 1) * 62); m < me; m = m + 1, cr = cr + ms, ci = ci - ms, W = W + 62, MAKE_VOLATILE_STRIDE(rs)) {
926
E Tj, T5F, T7C, T7Q, T35, T4T, T78, T7m, T1Q, T61, T5Y, T6J, T3K, T56, T41;
927
E T59, T2B, T67, T6e, T6O, T4b, T5g, T4s, T5d, TG, T7l, T5I, T73, T3a, T4U;
928
E T3f, T4V, T14, T5K, T5N, T6F, T3m, T4Z, T3r, T4Y, T1r, T5P, T5S, T6E, T3x;
929
E T52, T3C, T51, T2d, T5Z, T64, T6K, T3V, T5a, T44, T57, T2Y, T6f, T6a, T6P;
930
E T4m, T5e, T4v, T5h;
932
E T1, T76, T6, T75, Tc, T32, Th, T33;
941
T6 = FMA(T2, T3, T4 * T5);
942
T75 = FNMS(T4, T3, T2 * T5);
950
Tc = FMA(T8, T9, Ta * Tb);
951
T32 = FNMS(Ta, T9, T8 * Tb);
959
Th = FMA(Td, Te, Tf * Tg);
960
T33 = FNMS(Tf, Te, Td * Tg);
974
E T31, T34, T74, T77;
986
E T1y, T3X, T1O, T3I, T1D, T3Y, T1J, T3H;
988
E T1v, T1x, T1u, T1w;
993
T1y = FMA(T1u, T1v, T1w * T1x);
994
T3X = FNMS(T1w, T1v, T1u * T1x);
997
E T1L, T1N, T1K, T1M;
998
T1L = cr[WS(rs, 25)];
999
T1N = ci[WS(rs, 25)];
1002
T1O = FMA(T1K, T1L, T1M * T1N);
1003
T3I = FNMS(T1M, T1L, T1K * T1N);
1006
E T1A, T1C, T1z, T1B;
1007
T1A = cr[WS(rs, 17)];
1008
T1C = ci[WS(rs, 17)];
1011
T1D = FMA(T1z, T1A, T1B * T1C);
1012
T3Y = FNMS(T1B, T1A, T1z * T1C);
1015
E T1G, T1I, T1F, T1H;
1016
T1G = cr[WS(rs, 9)];
1017
T1I = ci[WS(rs, 9)];
1020
T1J = FMA(T1F, T1G, T1H * T1I);
1021
T3H = FNMS(T1H, T1G, T1F * T1I);
1024
E T1E, T1P, T5W, T5X;
1035
E T3G, T3J, T3Z, T40;
1047
E T2j, T47, T2z, T4q, T2o, T48, T2u, T4p;
1049
E T2g, T2i, T2f, T2h;
1050
T2g = cr[WS(rs, 31)];
1051
T2i = ci[WS(rs, 31)];
1054
T2j = FMA(T2f, T2g, T2h * T2i);
1055
T47 = FNMS(T2h, T2g, T2f * T2i);
1058
E T2w, T2y, T2v, T2x;
1059
T2w = cr[WS(rs, 23)];
1060
T2y = ci[WS(rs, 23)];
1063
T2z = FMA(T2v, T2w, T2x * T2y);
1064
T4q = FNMS(T2x, T2w, T2v * T2y);
1067
E T2l, T2n, T2k, T2m;
1068
T2l = cr[WS(rs, 15)];
1069
T2n = ci[WS(rs, 15)];
1072
T2o = FMA(T2k, T2l, T2m * T2n);
1073
T48 = FNMS(T2m, T2l, T2k * T2n);
1076
E T2r, T2t, T2q, T2s;
1077
T2r = cr[WS(rs, 7)];
1078
T2t = ci[WS(rs, 7)];
1081
T2u = FMA(T2q, T2r, T2s * T2t);
1082
T4p = FNMS(T2s, T2r, T2q * T2t);
1085
E T2p, T2A, T6c, T6d;
1096
E T49, T4a, T4o, T4r;
1108
E To, T37, TE, T3d, Tt, T38, Tz, T3c;
1115
To = FMA(Tk, Tl, Tm * Tn);
1116
T37 = FNMS(Tm, Tl, Tk * Tn);
1120
TB = cr[WS(rs, 12)];
1121
TD = ci[WS(rs, 12)];
1124
TE = FMA(TA, TB, TC * TD);
1125
T3d = FNMS(TC, TB, TA * TD);
1129
Tq = cr[WS(rs, 20)];
1130
Ts = ci[WS(rs, 20)];
1133
Tt = FMA(Tp, Tq, Tr * Ts);
1134
T38 = FNMS(Tr, Tq, Tp * Ts);
1138
Tw = cr[WS(rs, 28)];
1139
Ty = ci[WS(rs, 28)];
1142
Tz = FMA(Tv, Tw, Tx * Ty);
1143
T3c = FNMS(Tx, Tw, Tv * Ty);
1157
E T36, T39, T3b, T3e;
1169
E TM, T3n, T12, T3k, TR, T3o, TX, T3j;
1176
TM = FMA(TI, TJ, TK * TL);
1177
T3n = FNMS(TK, TJ, TI * TL);
1181
TZ = cr[WS(rs, 26)];
1182
T11 = ci[WS(rs, 26)];
1185
T12 = FMA(TY, TZ, T10 * T11);
1186
T3k = FNMS(T10, TZ, TY * T11);
1190
TO = cr[WS(rs, 18)];
1191
TQ = ci[WS(rs, 18)];
1194
TR = FMA(TN, TO, TP * TQ);
1195
T3o = FNMS(TP, TO, TN * TQ);
1199
TU = cr[WS(rs, 10)];
1200
TW = ci[WS(rs, 10)];
1203
TX = FMA(TT, TU, TV * TW);
1204
T3j = FNMS(TV, TU, TT * TW);
1207
E TS, T13, T5L, T5M;
1218
E T3i, T3l, T3p, T3q;
1230
E T19, T3t, T1p, T3A, T1e, T3u, T1k, T3z;
1232
E T16, T18, T15, T17;
1233
T16 = cr[WS(rs, 30)];
1234
T18 = ci[WS(rs, 30)];
1237
T19 = FMA(T15, T16, T17 * T18);
1238
T3t = FNMS(T17, T16, T15 * T18);
1241
E T1m, T1o, T1l, T1n;
1242
T1m = cr[WS(rs, 22)];
1243
T1o = ci[WS(rs, 22)];
1246
T1p = FMA(T1l, T1m, T1n * T1o);
1247
T3A = FNMS(T1n, T1m, T1l * T1o);
1250
E T1b, T1d, T1a, T1c;
1251
T1b = cr[WS(rs, 14)];
1252
T1d = ci[WS(rs, 14)];
1255
T1e = FMA(T1a, T1b, T1c * T1d);
1256
T3u = FNMS(T1c, T1b, T1a * T1d);
1259
E T1h, T1j, T1g, T1i;
1260
T1h = cr[WS(rs, 6)];
1261
T1j = ci[WS(rs, 6)];
1264
T1k = FMA(T1g, T1h, T1i * T1j);
1265
T3z = FNMS(T1i, T1h, T1g * T1j);
1268
E T1f, T1q, T5Q, T5R;
1279
E T3v, T3w, T3y, T3B;
1291
E T1V, T3M, T20, T3N, T3L, T3O, T26, T3Q, T2b, T3R, T3S, T3T;
1293
E T1S, T1U, T1R, T1T;
1294
T1S = cr[WS(rs, 5)];
1295
T1U = ci[WS(rs, 5)];
1298
T1V = FMA(T1R, T1S, T1T * T1U);
1299
T3M = FNMS(T1T, T1S, T1R * T1U);
1302
E T1X, T1Z, T1W, T1Y;
1303
T1X = cr[WS(rs, 21)];
1304
T1Z = ci[WS(rs, 21)];
1307
T20 = FMA(T1W, T1X, T1Y * T1Z);
1308
T3N = FNMS(T1Y, T1X, T1W * T1Z);
1313
E T23, T25, T22, T24;
1314
T23 = cr[WS(rs, 29)];
1315
T25 = ci[WS(rs, 29)];
1318
T26 = FMA(T22, T23, T24 * T25);
1319
T3Q = FNMS(T24, T23, T22 * T25);
1322
E T28, T2a, T27, T29;
1323
T28 = cr[WS(rs, 13)];
1324
T2a = ci[WS(rs, 13)];
1327
T2b = FMA(T27, T28, T29 * T2a);
1328
T3R = FNMS(T29, T28, T27 * T2a);
1333
E T21, T2c, T62, T63;
1344
E T3P, T3U, T42, T43;
1347
T3V = KP707106781 * (T3P - T3U);
1348
T5a = KP707106781 * (T3P + T3U);
1351
T44 = KP707106781 * (T42 - T43);
1352
T57 = KP707106781 * (T43 + T42);
1356
E T2G, T4i, T2L, T4j, T4h, T4k, T2R, T4d, T2W, T4e, T4c, T4f;
1358
E T2D, T2F, T2C, T2E;
1359
T2D = cr[WS(rs, 3)];
1360
T2F = ci[WS(rs, 3)];
1363
T2G = FMA(T2C, T2D, T2E * T2F);
1364
T4i = FNMS(T2E, T2D, T2C * T2F);
1367
E T2I, T2K, T2H, T2J;
1368
T2I = cr[WS(rs, 19)];
1369
T2K = ci[WS(rs, 19)];
1372
T2L = FMA(T2H, T2I, T2J * T2K);
1373
T4j = FNMS(T2J, T2I, T2H * T2K);
1378
E T2O, T2Q, T2N, T2P;
1379
T2O = cr[WS(rs, 27)];
1380
T2Q = ci[WS(rs, 27)];
1383
T2R = FMA(T2N, T2O, T2P * T2Q);
1384
T4d = FNMS(T2P, T2O, T2N * T2Q);
1387
E T2T, T2V, T2S, T2U;
1388
T2T = cr[WS(rs, 11)];
1389
T2V = ci[WS(rs, 11)];
1392
T2W = FMA(T2S, T2T, T2U * T2V);
1393
T4e = FNMS(T2U, T2T, T2S * T2V);
1398
E T2M, T2X, T68, T69;
1409
E T4g, T4l, T4t, T4u;
1412
T4m = KP707106781 * (T4g - T4l);
1413
T5e = KP707106781 * (T4l + T4g);
1416
T4v = KP707106781 * (T4t - T4u);
1417
T5h = KP707106781 * (T4t + T4u);
1421
E T1t, T6X, T7a, T7c, T30, T7b, T70, T71;
1423
E TH, T1s, T72, T79;
1434
E T2e, T2Z, T6Y, T6Z;
1444
ci[WS(rs, 15)] = T1t - T30;
1445
cr[WS(rs, 24)] = T7b - T7c;
1446
ci[WS(rs, 23)] = T7b + T7c;
1448
cr[WS(rs, 8)] = T6X - T70;
1449
cr[WS(rs, 16)] = T71 - T7a;
1450
ci[WS(rs, 31)] = T71 + T7a;
1451
ci[WS(rs, 7)] = T6X + T70;
1454
E T4X, T5p, T7D, T7J, T54, T7y, T5z, T5D, T5c, T5m, T5s, T7I, T5w, T5C, T5j;
1456
T4W = KP707106781 * (T4U + T4V);
1459
T7z = KP707106781 * (T3a - T3f);
1463
E T50, T53, T5x, T5y;
1464
T50 = FMA(KP923879532, T4Y, KP382683432 * T4Z);
1465
T53 = FNMS(KP923879532, T52, KP382683432 * T51);
1470
T5z = FNMS(KP980785280, T5y, KP195090322 * T5x);
1471
T5D = FMA(KP980785280, T5x, KP195090322 * T5y);
1474
E T58, T5b, T5q, T5r;
1477
T5c = FMA(KP831469612, T58, KP555570233 * T5b);
1478
T5m = FNMS(KP831469612, T5b, KP555570233 * T58);
1479
T5q = FNMS(KP382683432, T4Y, KP923879532 * T4Z);
1480
T5r = FMA(KP382683432, T52, KP923879532 * T51);
1485
E T5u, T5v, T5f, T5i;
1488
T5w = FMA(KP195090322, T5u, KP980785280 * T5v);
1489
T5C = FNMS(KP195090322, T5v, KP980785280 * T5u);
1492
T5j = FNMS(KP555570233, T5i, KP831469612 * T5f);
1493
T5n = FMA(KP555570233, T5f, KP831469612 * T5i);
1496
E T55, T5k, T7H, T7K;
1499
ci[WS(rs, 12)] = T55 - T5k;
1500
cr[WS(rs, 3)] = T55 + T5k;
1503
cr[WS(rs, 19)] = T7H - T7K;
1504
ci[WS(rs, 28)] = T7H + T7K;
1507
E T7L, T7M, T5l, T5o;
1510
cr[WS(rs, 27)] = T7L - T7M;
1511
ci[WS(rs, 20)] = T7L + T7M;
1514
cr[WS(rs, 11)] = T5l - T5o;
1515
ci[WS(rs, 4)] = T5l + T5o;
1518
E T5t, T5A, T7x, T7E;
1521
ci[WS(rs, 8)] = T5t - T5A;
1522
cr[WS(rs, 7)] = T5t + T5A;
1525
cr[WS(rs, 31)] = T7x - T7E;
1526
ci[WS(rs, 16)] = T7x + T7E;
1529
E T7F, T7G, T5B, T5E;
1532
cr[WS(rs, 23)] = T7F - T7G;
1533
ci[WS(rs, 24)] = T7F + T7G;
1536
cr[WS(rs, 15)] = T5B - T5E;
1541
E T6H, T6T, T7g, T7i, T6M, T6U, T6R, T6V;
1543
E T6D, T6G, T7e, T7f;
1554
E T6I, T6L, T6N, T6Q;
1565
E T6S, T7h, T6W, T7d;
1566
T6S = KP707106781 * (T6M + T6R);
1567
ci[WS(rs, 11)] = T6H - T6S;
1568
cr[WS(rs, 4)] = T6H + T6S;
1569
T7h = KP707106781 * (T6V - T6U);
1570
cr[WS(rs, 20)] = T7h - T7i;
1571
ci[WS(rs, 27)] = T7h + T7i;
1572
T6W = KP707106781 * (T6U + T6V);
1573
cr[WS(rs, 12)] = T6T - T6W;
1574
ci[WS(rs, 3)] = T6T + T6W;
1575
T7d = KP707106781 * (T6R - T6M);
1576
cr[WS(rs, 28)] = T7d - T7g;
1577
ci[WS(rs, 19)] = T7d + T7g;
1581
E T5J, T7n, T7t, T6n, T5U, T7k, T6x, T6B, T6q, T7s, T66, T6k, T6u, T6A, T6h;
1584
E T5O, T5T, T60, T65;
1591
T5U = KP707106781 * (T5O + T5T);
1592
T7k = KP707106781 * (T5O - T5T);
1594
E T6v, T6w, T6o, T6p;
1597
T6x = FMA(KP382683432, T6v, KP923879532 * T6w);
1598
T6B = FNMS(KP923879532, T6v, KP382683432 * T6w);
1601
T6q = KP707106781 * (T6o + T6p);
1602
T7s = KP707106781 * (T6p - T6o);
1606
T66 = FMA(KP382683432, T60, KP923879532 * T65);
1607
T6k = FNMS(KP923879532, T60, KP382683432 * T65);
1609
E T6s, T6t, T6b, T6g;
1612
T6u = FNMS(KP382683432, T6t, KP923879532 * T6s);
1613
T6A = FMA(KP923879532, T6t, KP382683432 * T6s);
1616
T6h = FNMS(KP382683432, T6g, KP923879532 * T6b);
1617
T6l = FMA(KP923879532, T6g, KP382683432 * T6b);
1621
E T5V, T6i, T7r, T7u;
1624
ci[WS(rs, 13)] = T5V - T6i;
1625
cr[WS(rs, 2)] = T5V + T6i;
1628
cr[WS(rs, 18)] = T7r - T7u;
1629
ci[WS(rs, 29)] = T7r + T7u;
1632
E T7v, T7w, T6j, T6m;
1635
cr[WS(rs, 26)] = T7v - T7w;
1636
ci[WS(rs, 21)] = T7v + T7w;
1639
cr[WS(rs, 10)] = T6j - T6m;
1640
ci[WS(rs, 5)] = T6j + T6m;
1643
E T6r, T6y, T7j, T7o;
1646
cr[WS(rs, 14)] = T6r - T6y;
1647
ci[WS(rs, 1)] = T6r + T6y;
1650
cr[WS(rs, 30)] = T7j - T7o;
1651
ci[WS(rs, 17)] = T7j + T7o;
1654
E T7p, T7q, T6z, T6C;
1657
cr[WS(rs, 22)] = T7p - T7q;
1658
ci[WS(rs, 25)] = T7p + T7q;
1661
ci[WS(rs, 9)] = T6z - T6C;
1662
cr[WS(rs, 6)] = T6z + T6C;
1666
E T3h, T4D, T7R, T7X, T3E, T7O, T4N, T4R, T46, T4A, T4G, T7W, T4K, T4Q, T4x;
1668
T3g = KP707106781 * (T3a + T3f);
1671
T7P = KP707106781 * (T4V - T4U);
1675
E T3s, T3D, T4L, T4M;
1676
T3s = FNMS(KP923879532, T3r, KP382683432 * T3m);
1677
T3D = FMA(KP923879532, T3x, KP382683432 * T3C);
1682
T4N = FNMS(KP195090322, T4M, KP980785280 * T4L);
1683
T4R = FMA(KP980785280, T4M, KP195090322 * T4L);
1686
E T3W, T45, T4E, T4F;
1689
T46 = FNMS(KP555570233, T45, KP831469612 * T3W);
1690
T4A = FMA(KP831469612, T45, KP555570233 * T3W);
1691
T4E = FMA(KP382683432, T3r, KP923879532 * T3m);
1692
T4F = FNMS(KP382683432, T3x, KP923879532 * T3C);
1697
E T4I, T4J, T4n, T4w;
1700
T4K = FMA(KP195090322, T4I, KP980785280 * T4J);
1701
T4Q = FNMS(KP980785280, T4I, KP195090322 * T4J);
1704
T4x = FMA(KP555570233, T4n, KP831469612 * T4w);
1705
T4B = FNMS(KP831469612, T4n, KP555570233 * T4w);
1708
E T3F, T4y, T7V, T7Y;
1711
cr[WS(rs, 13)] = T3F - T4y;
1712
ci[WS(rs, 2)] = T3F + T4y;
1715
cr[WS(rs, 29)] = T7V - T7Y;
1716
ci[WS(rs, 18)] = T7V + T7Y;
1719
E T7Z, T80, T4z, T4C;
1722
cr[WS(rs, 21)] = T7Z - T80;
1723
ci[WS(rs, 26)] = T7Z + T80;
1726
ci[WS(rs, 10)] = T4z - T4C;
1727
cr[WS(rs, 5)] = T4z + T4C;
1730
E T4H, T4O, T7N, T7S;
1733
ci[WS(rs, 14)] = T4H - T4O;
1734
cr[WS(rs, 1)] = T4H + T4O;
1737
cr[WS(rs, 17)] = T7N - T7S;
1738
ci[WS(rs, 30)] = T7N + T7S;
1741
E T7T, T7U, T4P, T4S;
1744
cr[WS(rs, 25)] = T7T - T7U;
1745
ci[WS(rs, 22)] = T7T + T7U;
1748
cr[WS(rs, 9)] = T4P - T4S;
1749
ci[WS(rs, 6)] = T4P + T4S;
928
for (m = mb, W = W + ((mb - 1) * 62); m < me; m = m + 1, cr = cr + ms, ci = ci - ms, W = W + 62, MAKE_VOLATILE_STRIDE(rs)) {
929
E Tj, T5F, T7C, T7Q, T35, T4T, T78, T7m, T1Q, T61, T5Y, T6J, T3K, T56, T41;
930
E T59, T2B, T67, T6e, T6O, T4b, T5g, T4s, T5d, TG, T7l, T5I, T73, T3a, T4U;
931
E T3f, T4V, T14, T5K, T5N, T6F, T3m, T4Z, T3r, T4Y, T1r, T5P, T5S, T6E, T3x;
932
E T52, T3C, T51, T2d, T5Z, T64, T6K, T3V, T5a, T44, T57, T2Y, T6f, T6a, T6P;
933
E T4m, T5e, T4v, T5h;
935
E T1, T76, T6, T75, Tc, T32, Th, T33;
944
T6 = FMA(T2, T3, T4 * T5);
945
T75 = FNMS(T4, T3, T2 * T5);
953
Tc = FMA(T8, T9, Ta * Tb);
954
T32 = FNMS(Ta, T9, T8 * Tb);
962
Th = FMA(Td, Te, Tf * Tg);
963
T33 = FNMS(Tf, Te, Td * Tg);
977
E T31, T34, T74, T77;
989
E T1y, T3X, T1O, T3I, T1D, T3Y, T1J, T3H;
991
E T1v, T1x, T1u, T1w;
996
T1y = FMA(T1u, T1v, T1w * T1x);
997
T3X = FNMS(T1w, T1v, T1u * T1x);
1000
E T1L, T1N, T1K, T1M;
1001
T1L = cr[WS(rs, 25)];
1002
T1N = ci[WS(rs, 25)];
1005
T1O = FMA(T1K, T1L, T1M * T1N);
1006
T3I = FNMS(T1M, T1L, T1K * T1N);
1009
E T1A, T1C, T1z, T1B;
1010
T1A = cr[WS(rs, 17)];
1011
T1C = ci[WS(rs, 17)];
1014
T1D = FMA(T1z, T1A, T1B * T1C);
1015
T3Y = FNMS(T1B, T1A, T1z * T1C);
1018
E T1G, T1I, T1F, T1H;
1019
T1G = cr[WS(rs, 9)];
1020
T1I = ci[WS(rs, 9)];
1023
T1J = FMA(T1F, T1G, T1H * T1I);
1024
T3H = FNMS(T1H, T1G, T1F * T1I);
1027
E T1E, T1P, T5W, T5X;
1038
E T3G, T3J, T3Z, T40;
1050
E T2j, T47, T2z, T4q, T2o, T48, T2u, T4p;
1052
E T2g, T2i, T2f, T2h;
1053
T2g = cr[WS(rs, 31)];
1054
T2i = ci[WS(rs, 31)];
1057
T2j = FMA(T2f, T2g, T2h * T2i);
1058
T47 = FNMS(T2h, T2g, T2f * T2i);
1061
E T2w, T2y, T2v, T2x;
1062
T2w = cr[WS(rs, 23)];
1063
T2y = ci[WS(rs, 23)];
1066
T2z = FMA(T2v, T2w, T2x * T2y);
1067
T4q = FNMS(T2x, T2w, T2v * T2y);
1070
E T2l, T2n, T2k, T2m;
1071
T2l = cr[WS(rs, 15)];
1072
T2n = ci[WS(rs, 15)];
1075
T2o = FMA(T2k, T2l, T2m * T2n);
1076
T48 = FNMS(T2m, T2l, T2k * T2n);
1079
E T2r, T2t, T2q, T2s;
1080
T2r = cr[WS(rs, 7)];
1081
T2t = ci[WS(rs, 7)];
1084
T2u = FMA(T2q, T2r, T2s * T2t);
1085
T4p = FNMS(T2s, T2r, T2q * T2t);
1088
E T2p, T2A, T6c, T6d;
1099
E T49, T4a, T4o, T4r;
1111
E To, T37, TE, T3d, Tt, T38, Tz, T3c;
1118
To = FMA(Tk, Tl, Tm * Tn);
1119
T37 = FNMS(Tm, Tl, Tk * Tn);
1123
TB = cr[WS(rs, 12)];
1124
TD = ci[WS(rs, 12)];
1127
TE = FMA(TA, TB, TC * TD);
1128
T3d = FNMS(TC, TB, TA * TD);
1132
Tq = cr[WS(rs, 20)];
1133
Ts = ci[WS(rs, 20)];
1136
Tt = FMA(Tp, Tq, Tr * Ts);
1137
T38 = FNMS(Tr, Tq, Tp * Ts);
1141
Tw = cr[WS(rs, 28)];
1142
Ty = ci[WS(rs, 28)];
1145
Tz = FMA(Tv, Tw, Tx * Ty);
1146
T3c = FNMS(Tx, Tw, Tv * Ty);
1160
E T36, T39, T3b, T3e;
1172
E TM, T3n, T12, T3k, TR, T3o, TX, T3j;
1179
TM = FMA(TI, TJ, TK * TL);
1180
T3n = FNMS(TK, TJ, TI * TL);
1184
TZ = cr[WS(rs, 26)];
1185
T11 = ci[WS(rs, 26)];
1188
T12 = FMA(TY, TZ, T10 * T11);
1189
T3k = FNMS(T10, TZ, TY * T11);
1193
TO = cr[WS(rs, 18)];
1194
TQ = ci[WS(rs, 18)];
1197
TR = FMA(TN, TO, TP * TQ);
1198
T3o = FNMS(TP, TO, TN * TQ);
1202
TU = cr[WS(rs, 10)];
1203
TW = ci[WS(rs, 10)];
1206
TX = FMA(TT, TU, TV * TW);
1207
T3j = FNMS(TV, TU, TT * TW);
1210
E TS, T13, T5L, T5M;
1221
E T3i, T3l, T3p, T3q;
1233
E T19, T3t, T1p, T3A, T1e, T3u, T1k, T3z;
1235
E T16, T18, T15, T17;
1236
T16 = cr[WS(rs, 30)];
1237
T18 = ci[WS(rs, 30)];
1240
T19 = FMA(T15, T16, T17 * T18);
1241
T3t = FNMS(T17, T16, T15 * T18);
1244
E T1m, T1o, T1l, T1n;
1245
T1m = cr[WS(rs, 22)];
1246
T1o = ci[WS(rs, 22)];
1249
T1p = FMA(T1l, T1m, T1n * T1o);
1250
T3A = FNMS(T1n, T1m, T1l * T1o);
1253
E T1b, T1d, T1a, T1c;
1254
T1b = cr[WS(rs, 14)];
1255
T1d = ci[WS(rs, 14)];
1258
T1e = FMA(T1a, T1b, T1c * T1d);
1259
T3u = FNMS(T1c, T1b, T1a * T1d);
1262
E T1h, T1j, T1g, T1i;
1263
T1h = cr[WS(rs, 6)];
1264
T1j = ci[WS(rs, 6)];
1267
T1k = FMA(T1g, T1h, T1i * T1j);
1268
T3z = FNMS(T1i, T1h, T1g * T1j);
1271
E T1f, T1q, T5Q, T5R;
1282
E T3v, T3w, T3y, T3B;
1294
E T1V, T3M, T20, T3N, T3L, T3O, T26, T3Q, T2b, T3R, T3S, T3T;
1296
E T1S, T1U, T1R, T1T;
1297
T1S = cr[WS(rs, 5)];
1298
T1U = ci[WS(rs, 5)];
1301
T1V = FMA(T1R, T1S, T1T * T1U);
1302
T3M = FNMS(T1T, T1S, T1R * T1U);
1305
E T1X, T1Z, T1W, T1Y;
1306
T1X = cr[WS(rs, 21)];
1307
T1Z = ci[WS(rs, 21)];
1310
T20 = FMA(T1W, T1X, T1Y * T1Z);
1311
T3N = FNMS(T1Y, T1X, T1W * T1Z);
1316
E T23, T25, T22, T24;
1317
T23 = cr[WS(rs, 29)];
1318
T25 = ci[WS(rs, 29)];
1321
T26 = FMA(T22, T23, T24 * T25);
1322
T3Q = FNMS(T24, T23, T22 * T25);
1325
E T28, T2a, T27, T29;
1326
T28 = cr[WS(rs, 13)];
1327
T2a = ci[WS(rs, 13)];
1330
T2b = FMA(T27, T28, T29 * T2a);
1331
T3R = FNMS(T29, T28, T27 * T2a);
1336
E T21, T2c, T62, T63;
1347
E T3P, T3U, T42, T43;
1350
T3V = KP707106781 * (T3P - T3U);
1351
T5a = KP707106781 * (T3P + T3U);
1354
T44 = KP707106781 * (T42 - T43);
1355
T57 = KP707106781 * (T43 + T42);
1359
E T2G, T4i, T2L, T4j, T4h, T4k, T2R, T4d, T2W, T4e, T4c, T4f;
1361
E T2D, T2F, T2C, T2E;
1362
T2D = cr[WS(rs, 3)];
1363
T2F = ci[WS(rs, 3)];
1366
T2G = FMA(T2C, T2D, T2E * T2F);
1367
T4i = FNMS(T2E, T2D, T2C * T2F);
1370
E T2I, T2K, T2H, T2J;
1371
T2I = cr[WS(rs, 19)];
1372
T2K = ci[WS(rs, 19)];
1375
T2L = FMA(T2H, T2I, T2J * T2K);
1376
T4j = FNMS(T2J, T2I, T2H * T2K);
1381
E T2O, T2Q, T2N, T2P;
1382
T2O = cr[WS(rs, 27)];
1383
T2Q = ci[WS(rs, 27)];
1386
T2R = FMA(T2N, T2O, T2P * T2Q);
1387
T4d = FNMS(T2P, T2O, T2N * T2Q);
1390
E T2T, T2V, T2S, T2U;
1391
T2T = cr[WS(rs, 11)];
1392
T2V = ci[WS(rs, 11)];
1395
T2W = FMA(T2S, T2T, T2U * T2V);
1396
T4e = FNMS(T2U, T2T, T2S * T2V);
1401
E T2M, T2X, T68, T69;
1412
E T4g, T4l, T4t, T4u;
1415
T4m = KP707106781 * (T4g - T4l);
1416
T5e = KP707106781 * (T4l + T4g);
1419
T4v = KP707106781 * (T4t - T4u);
1420
T5h = KP707106781 * (T4t + T4u);
1424
E T1t, T6X, T7a, T7c, T30, T7b, T70, T71;
1426
E TH, T1s, T72, T79;
1437
E T2e, T2Z, T6Y, T6Z;
1447
ci[WS(rs, 15)] = T1t - T30;
1448
cr[WS(rs, 24)] = T7b - T7c;
1449
ci[WS(rs, 23)] = T7b + T7c;
1451
cr[WS(rs, 8)] = T6X - T70;
1452
cr[WS(rs, 16)] = T71 - T7a;
1453
ci[WS(rs, 31)] = T71 + T7a;
1454
ci[WS(rs, 7)] = T6X + T70;
1457
E T4X, T5p, T7D, T7J, T54, T7y, T5z, T5D, T5c, T5m, T5s, T7I, T5w, T5C, T5j;
1459
T4W = KP707106781 * (T4U + T4V);
1462
T7z = KP707106781 * (T3a - T3f);
1466
E T50, T53, T5x, T5y;
1467
T50 = FMA(KP923879532, T4Y, KP382683432 * T4Z);
1468
T53 = FNMS(KP923879532, T52, KP382683432 * T51);
1473
T5z = FNMS(KP980785280, T5y, KP195090322 * T5x);
1474
T5D = FMA(KP980785280, T5x, KP195090322 * T5y);
1477
E T58, T5b, T5q, T5r;
1480
T5c = FMA(KP831469612, T58, KP555570233 * T5b);
1481
T5m = FNMS(KP831469612, T5b, KP555570233 * T58);
1482
T5q = FNMS(KP382683432, T4Y, KP923879532 * T4Z);
1483
T5r = FMA(KP382683432, T52, KP923879532 * T51);
1488
E T5u, T5v, T5f, T5i;
1491
T5w = FMA(KP195090322, T5u, KP980785280 * T5v);
1492
T5C = FNMS(KP195090322, T5v, KP980785280 * T5u);
1495
T5j = FNMS(KP555570233, T5i, KP831469612 * T5f);
1496
T5n = FMA(KP555570233, T5f, KP831469612 * T5i);
1499
E T55, T5k, T7H, T7K;
1502
ci[WS(rs, 12)] = T55 - T5k;
1503
cr[WS(rs, 3)] = T55 + T5k;
1506
cr[WS(rs, 19)] = T7H - T7K;
1507
ci[WS(rs, 28)] = T7H + T7K;
1510
E T7L, T7M, T5l, T5o;
1513
cr[WS(rs, 27)] = T7L - T7M;
1514
ci[WS(rs, 20)] = T7L + T7M;
1517
cr[WS(rs, 11)] = T5l - T5o;
1518
ci[WS(rs, 4)] = T5l + T5o;
1521
E T5t, T5A, T7x, T7E;
1524
ci[WS(rs, 8)] = T5t - T5A;
1525
cr[WS(rs, 7)] = T5t + T5A;
1528
cr[WS(rs, 31)] = T7x - T7E;
1529
ci[WS(rs, 16)] = T7x + T7E;
1532
E T7F, T7G, T5B, T5E;
1535
cr[WS(rs, 23)] = T7F - T7G;
1536
ci[WS(rs, 24)] = T7F + T7G;
1539
cr[WS(rs, 15)] = T5B - T5E;
1544
E T6H, T6T, T7g, T7i, T6M, T6U, T6R, T6V;
1546
E T6D, T6G, T7e, T7f;
1557
E T6I, T6L, T6N, T6Q;
1568
E T6S, T7h, T6W, T7d;
1569
T6S = KP707106781 * (T6M + T6R);
1570
ci[WS(rs, 11)] = T6H - T6S;
1571
cr[WS(rs, 4)] = T6H + T6S;
1572
T7h = KP707106781 * (T6V - T6U);
1573
cr[WS(rs, 20)] = T7h - T7i;
1574
ci[WS(rs, 27)] = T7h + T7i;
1575
T6W = KP707106781 * (T6U + T6V);
1576
cr[WS(rs, 12)] = T6T - T6W;
1577
ci[WS(rs, 3)] = T6T + T6W;
1578
T7d = KP707106781 * (T6R - T6M);
1579
cr[WS(rs, 28)] = T7d - T7g;
1580
ci[WS(rs, 19)] = T7d + T7g;
1584
E T5J, T7n, T7t, T6n, T5U, T7k, T6x, T6B, T6q, T7s, T66, T6k, T6u, T6A, T6h;
1587
E T5O, T5T, T60, T65;
1594
T5U = KP707106781 * (T5O + T5T);
1595
T7k = KP707106781 * (T5O - T5T);
1597
E T6v, T6w, T6o, T6p;
1600
T6x = FMA(KP382683432, T6v, KP923879532 * T6w);
1601
T6B = FNMS(KP923879532, T6v, KP382683432 * T6w);
1604
T6q = KP707106781 * (T6o + T6p);
1605
T7s = KP707106781 * (T6p - T6o);
1609
T66 = FMA(KP382683432, T60, KP923879532 * T65);
1610
T6k = FNMS(KP923879532, T60, KP382683432 * T65);
1612
E T6s, T6t, T6b, T6g;
1615
T6u = FNMS(KP382683432, T6t, KP923879532 * T6s);
1616
T6A = FMA(KP923879532, T6t, KP382683432 * T6s);
1619
T6h = FNMS(KP382683432, T6g, KP923879532 * T6b);
1620
T6l = FMA(KP923879532, T6g, KP382683432 * T6b);
1624
E T5V, T6i, T7r, T7u;
1627
ci[WS(rs, 13)] = T5V - T6i;
1628
cr[WS(rs, 2)] = T5V + T6i;
1631
cr[WS(rs, 18)] = T7r - T7u;
1632
ci[WS(rs, 29)] = T7r + T7u;
1635
E T7v, T7w, T6j, T6m;
1638
cr[WS(rs, 26)] = T7v - T7w;
1639
ci[WS(rs, 21)] = T7v + T7w;
1642
cr[WS(rs, 10)] = T6j - T6m;
1643
ci[WS(rs, 5)] = T6j + T6m;
1646
E T6r, T6y, T7j, T7o;
1649
cr[WS(rs, 14)] = T6r - T6y;
1650
ci[WS(rs, 1)] = T6r + T6y;
1653
cr[WS(rs, 30)] = T7j - T7o;
1654
ci[WS(rs, 17)] = T7j + T7o;
1657
E T7p, T7q, T6z, T6C;
1660
cr[WS(rs, 22)] = T7p - T7q;
1661
ci[WS(rs, 25)] = T7p + T7q;
1664
ci[WS(rs, 9)] = T6z - T6C;
1665
cr[WS(rs, 6)] = T6z + T6C;
1669
E T3h, T4D, T7R, T7X, T3E, T7O, T4N, T4R, T46, T4A, T4G, T7W, T4K, T4Q, T4x;
1671
T3g = KP707106781 * (T3a + T3f);
1674
T7P = KP707106781 * (T4V - T4U);
1678
E T3s, T3D, T4L, T4M;
1679
T3s = FNMS(KP923879532, T3r, KP382683432 * T3m);
1680
T3D = FMA(KP923879532, T3x, KP382683432 * T3C);
1685
T4N = FNMS(KP195090322, T4M, KP980785280 * T4L);
1686
T4R = FMA(KP980785280, T4M, KP195090322 * T4L);
1689
E T3W, T45, T4E, T4F;
1692
T46 = FNMS(KP555570233, T45, KP831469612 * T3W);
1693
T4A = FMA(KP831469612, T45, KP555570233 * T3W);
1694
T4E = FMA(KP382683432, T3r, KP923879532 * T3m);
1695
T4F = FNMS(KP382683432, T3x, KP923879532 * T3C);
1700
E T4I, T4J, T4n, T4w;
1703
T4K = FMA(KP195090322, T4I, KP980785280 * T4J);
1704
T4Q = FNMS(KP980785280, T4I, KP195090322 * T4J);
1707
T4x = FMA(KP555570233, T4n, KP831469612 * T4w);
1708
T4B = FNMS(KP831469612, T4n, KP555570233 * T4w);
1711
E T3F, T4y, T7V, T7Y;
1714
cr[WS(rs, 13)] = T3F - T4y;
1715
ci[WS(rs, 2)] = T3F + T4y;
1718
cr[WS(rs, 29)] = T7V - T7Y;
1719
ci[WS(rs, 18)] = T7V + T7Y;
1722
E T7Z, T80, T4z, T4C;
1725
cr[WS(rs, 21)] = T7Z - T80;
1726
ci[WS(rs, 26)] = T7Z + T80;
1729
ci[WS(rs, 10)] = T4z - T4C;
1730
cr[WS(rs, 5)] = T4z + T4C;
1733
E T4H, T4O, T7N, T7S;
1736
ci[WS(rs, 14)] = T4H - T4O;
1737
cr[WS(rs, 1)] = T4H + T4O;
1740
cr[WS(rs, 17)] = T7N - T7S;
1741
ci[WS(rs, 30)] = T7N + T7S;
1744
E T7T, T7U, T4P, T4S;
1747
cr[WS(rs, 25)] = T7T - T7U;
1748
ci[WS(rs, 22)] = T7T + T7U;
1751
cr[WS(rs, 9)] = T4P - T4S;
1752
ci[WS(rs, 6)] = T4P + T4S;