2
* Copyright (c) 2003, 2006 Matteo Frigo
3
* Copyright (c) 2003, 2006 Massachusetts Institute of Technology
5
* This program is free software; you can redistribute it and/or modify
6
* it under the terms of the GNU General Public License as published by
7
* the Free Software Foundation; either version 2 of the License, or
8
* (at your option) any later version.
10
* This program is distributed in the hope that it will be useful,
11
* but WITHOUT ANY WARRANTY; without even the implied warranty of
12
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
* GNU General Public License for more details.
15
* You should have received a copy of the GNU General Public License
16
* along with this program; if not, write to the Free Software
17
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
21
/* This file was automatically generated --- DO NOT EDIT */
22
/* Generated on Sun Jul 2 16:32:54 EDT 2006 */
24
#include "codelet-rdft.h"
28
/* Generated by: ../../../genfft/gen_hc2hc -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -sign 1 -n 64 -dif -name hb_64 -include hb.h */
31
* This function contains 1038 FP additions, 644 FP multiplications,
32
* (or, 520 additions, 126 multiplications, 518 fused multiply/add),
33
* 234 stack variables, and 256 memory accesses
37
* $Id: algsimp.ml,v 1.9 2006-02-12 23:34:12 athena Exp $
38
* $Id: fft.ml,v 1.4 2006-01-05 03:04:27 stevenj Exp $
39
* $Id: gen_hc2hc.ml,v 1.16 2006-02-12 23:34:12 athena Exp $
44
static const R *hb_64(R *rio, R *iio, const R *W, stride ios, INT m, INT dist)
46
DK(KP773010453, +0.773010453362736960810906609758469800971041293);
47
DK(KP820678790, +0.820678790828660330972281985331011598767386482);
48
DK(KP956940335, +0.956940335732208864935797886980269969482849206);
49
DK(KP303346683, +0.303346683607342391675883946941299872384187453);
50
DK(KP995184726, +0.995184726672196886244836953109479921575474869);
51
DK(KP980785280, +0.980785280403230449126182236134239036973933731);
52
DK(KP098491403, +0.098491403357164253077197521291327432293052451);
53
DK(KP881921264, +0.881921264348355029712756863660388349508442621);
54
DK(KP831469612, +0.831469612302545237078788377617905756738560812);
55
DK(KP534511135, +0.534511135950791641089685961295362908582039528);
56
DK(KP923879532, +0.923879532511286756128183189396788286822416626);
57
DK(KP198912367, +0.198912367379658006911597622644676228597850501);
58
DK(KP668178637, +0.668178637919298919997757686523080761552472251);
59
DK(KP707106781, +0.707106781186547524400844362104849039284835938);
60
DK(KP414213562, +0.414213562373095048801688724209698078569671875);
62
for (i = m - 2; i > 0; i = i - 2, rio = rio + dist, iio = iio - dist, W = W + 126, MAKE_VOLATILE_STRIDE(ios)) {
65
E T8v, Ta9, Tv, T9O, Tj7, TgN, TjA, ThQ, Tj6, ThN, T6y, T2v, T64, T2c, T6x;
66
E T4L, TeV, Tbv, Tfn, Tbg, Tjz, TgG, T65, T4O, T96, T7S, Ta8, T8y, Tfo, TdE;
67
E TeU, TdB, T10, Ta7, T9R, TaN, T2P, T4Q, T8A, T7X, T4R, T38, T8B, T82, T6A;
68
E T69, T6B, T6c, TeZ, Tfq, TdG, TbP, Tjb, TjC, ThS, TgV, Tf2, Tfr, TdH, Tc8;
69
E Tje, TjD, ThT, Th2, T9T, T1w, T9a, T8e, TaF, T9W, T99, T89, T6k, T7a, T5p;
70
E T3D, T6h, T79, T5q, T3Q, Tf7, Tg0, Teg, TcB, Tfa, TfZ, Tef, TcM, Tjj, Tkc;
71
E Tis, Thf, Tjm, Tkb, Tir, Thm, T21, T9Y, T9d, T8p, TaG, Ta1, T9c, T8k, T6r;
72
E T7d, T5s, T4k, T6o, T7c, T5t, T4x, Tfe, Tg3, Tej, Tde, Thq, Tjr, Tfh, Tg2;
73
E Tei, Tdp, ThC, ThD, Tjo, ThB, Tjp, Thx;
75
E Tc3, TgW, TgZ, TgX, Tc0, Tf1, Tc6, Th0, TdA, Tdx;
77
E Tdw, T4G, Tb7, T4J, Tb6, Tdv, T24, T7, T27, T2a, Tbe, Tdy, Tbb, Tdz, Te;
78
E T4D, Tbt, TgK, T2m, Tt, Tbq, TgL, T7Q, T2t, Tj, Tbh, Ti, Tbl, T2g, Tk;
85
T4F = rio[WS(ios, 32)];
86
T4H = iio[-WS(ios, 16)];
87
T4I = rio[WS(ios, 48)];
93
T2 = iio[-WS(ios, 32)];
94
T4 = rio[WS(ios, 16)];
95
T5 = iio[-WS(ios, 48)];
100
E Tbc, T28, Tbd, T29, Tb, Tc, T8, T9;
103
T25 = iio[-WS(ios, 8)];
108
T26 = rio[WS(ios, 40)];
109
T8 = rio[WS(ios, 8)];
110
T9 = iio[-WS(ios, 40)];
116
T28 = iio[-WS(ios, 24)];
119
T29 = rio[WS(ios, 56)];
120
Tb = iio[-WS(ios, 56)];
121
Tc = rio[WS(ios, 24)];
134
E Tq, Tbo, Tp, Tbs, T2p, Tr, T2q, T2r;
137
Tn = iio[-WS(ios, 60)];
138
To = rio[WS(ios, 28)];
139
T2n = iio[-WS(ios, 28)];
140
T2o = rio[WS(ios, 60)];
141
Tq = rio[WS(ios, 12)];
146
Tr = iio[-WS(ios, 44)];
147
T2q = iio[-WS(ios, 12)];
148
T2r = rio[WS(ios, 44)];
152
Tg = rio[WS(ios, 4)];
167
Th = iio[-WS(ios, 36)];
169
T2e = iio[-WS(ios, 4)];
170
T2f = rio[WS(ios, 36)];
171
Tj = rio[WS(ios, 20)];
176
Tk = iio[-WS(ios, 52)];
177
T2h = iio[-WS(ios, 20)];
178
T2i = rio[WS(ios, 52)];
184
E T7O, Tbm, Tbj, T7R, Tb8, T4M, T2l, T2u, T4N, T2b, T8x;
186
E T2d, T2k, ThO, TgJ, TgM, ThP, ThL, ThM;
188
E Tf, TgH, TgI, Tu, T7P;
192
E Tbk, Tl, Tbi, T2j, Tm;
210
ThO = FMA(KP414213562, TgH, TgI);
211
TgJ = FNMS(KP414213562, TgI, TgH);
212
TgM = FNMS(KP414213562, TgL, TgK);
213
ThP = FMA(KP414213562, TgK, TgL);
227
Tj6 = FNMS(KP707106781, ThM, ThL);
228
ThN = FMA(KP707106781, ThM, ThL);
235
E T8w, TdC, TdD, Tbf, TgF, TgE, T4K, Tbn, Tbu;
242
TdC = FMA(KP414213562, Tbj, Tbm);
243
Tbn = FNMS(KP414213562, Tbm, Tbj);
244
Tbu = FMA(KP414213562, Tbt, Tbq);
245
TdD = FNMS(KP414213562, Tbq, Tbt);
254
Tfn = FNMS(KP707106781, Tbf, Tb8);
255
Tbg = FMA(KP707106781, Tbf, Tb8);
256
Tjz = FNMS(KP707106781, TgF, TgE);
257
TgG = FMA(KP707106781, TgF, TgE);
270
E TbK, TgP, T2x, TC, Tbz, TgS, T7U, T2N, TbV, Tc4, TY, T2Z, Tc5, TbY, T2X;
271
E T80, TbF, TbL, TJ, T2G, TbM, TbC, T2E, T7V, TbQ, TN, T33, Tc2, T32, Tc1;
274
E T2T, TbW, T2W, TbX;
276
E Tbx, Ty, T2K, TbJ, T2J, TbI, TB, T2L;
278
E T2H, T2I, Tw, Tx, Tz, TA;
279
Tw = rio[WS(ios, 2)];
280
Tx = iio[-WS(ios, 34)];
281
T2H = iio[-WS(ios, 2)];
282
TeU = FNMS(KP707106781, TdA, Tdx);
283
TdB = FMA(KP707106781, TdA, Tdx);
286
T2I = rio[WS(ios, 34)];
287
Tz = rio[WS(ios, 18)];
288
TA = iio[-WS(ios, 50)];
289
T2K = iio[-WS(ios, 18)];
294
T2L = rio[WS(ios, 50)];
297
E TbT, TU, T2U, TbU, TX, T2V;
302
TS = rio[WS(ios, 6)];
309
TT = iio[-WS(ios, 38)];
310
T2R = iio[-WS(ios, 6)];
317
T2S = rio[WS(ios, 38)];
319
TV = iio[-WS(ios, 54)];
320
TW = rio[WS(ios, 22)];
321
T2U = iio[-WS(ios, 22)];
326
T2V = rio[WS(ios, 54)];
337
E T2A, TbA, T2D, TbB;
339
E TbE, TF, T2B, TbD, TI, T2C;
341
E T2y, T2z, TD, TE, TG, TH;
342
TD = rio[WS(ios, 10)];
343
TE = iio[-WS(ios, 42)];
350
T2y = iio[-WS(ios, 10)];
351
T2z = rio[WS(ios, 42)];
352
TG = iio[-WS(ios, 58)];
353
TH = rio[WS(ios, 26)];
354
T2B = iio[-WS(ios, 26)];
359
T2C = rio[WS(ios, 58)];
369
E T30, T31, TL, TM, TO, TP;
370
TL = iio[-WS(ios, 62)];
371
TM = rio[WS(ios, 30)];
378
T30 = iio[-WS(ios, 30)];
379
T31 = rio[WS(ios, 62)];
380
TO = rio[WS(ios, 14)];
381
TP = iio[-WS(ios, 46)];
382
T33 = iio[-WS(ios, 14)];
387
T34 = rio[WS(ios, 46)];
392
E T7Y, TbS, T67, T81, T68, T6a, T2Y, T37, T6b;
394
E T7T, T2Q, T9P, T7W, T36, T2F, T9Q, T2O;
396
E TK, TR, TbR, T35, T7Z, TZ;
424
T2P = FMA(KP414213562, T2O, T2F);
425
T4Q = FNMS(KP414213562, T2F, T2O);
434
E TgQ, TbH, TeY, TbN, TgT, TbG, TbZ, TeX, TbO;
435
T4R = FMA(KP414213562, T2Y, T37);
436
T38 = FNMS(KP414213562, T37, T2Y);
441
T6A = FMA(KP414213562, T67, T68);
442
T69 = FNMS(KP414213562, T68, T67);
443
T6B = FNMS(KP414213562, T6a, T6b);
444
T6c = FMA(KP414213562, T6b, T6a);
445
TbH = FMA(KP707106781, TbG, Tbz);
446
TeY = FNMS(KP707106781, TbG, Tbz);
451
TeX = FNMS(KP707106781, TbN, TbK);
452
TbO = FMA(KP707106781, TbN, TbK);
454
E Tja, TgR, Tj9, TgU;
455
Tja = FNMS(KP707106781, TgQ, TgP);
456
TgR = FMA(KP707106781, TgQ, TgP);
457
Tj9 = FNMS(KP707106781, TgT, TgS);
458
TgU = FMA(KP707106781, TgT, TgS);
459
TeZ = FNMS(KP668178637, TeY, TeX);
460
Tfq = FMA(KP668178637, TeX, TeY);
461
TdG = FMA(KP198912367, TbH, TbO);
462
TbP = FNMS(KP198912367, TbO, TbH);
463
Tjb = FNMS(KP668178637, Tja, Tj9);
464
TjC = FMA(KP668178637, Tj9, Tja);
465
ThS = FMA(KP198912367, TgR, TgU);
466
TgV = FNMS(KP198912367, TgU, TgR);
467
Tc0 = FMA(KP707106781, TbZ, TbS);
468
Tf1 = FNMS(KP707106781, TbZ, TbS);
476
E Th7, Tjk, Thj, Thk, Tjh, Thi, Tji, The;
478
E TcE, Th5, T3b, T18, Tcd, Thg, T8b, T3L, Tcy, Thb, T3t, T1u, Tcv, Thc, T87;
479
E T3A, Tcj, TcF, T1f, T3E, TcG, Tcg, T8c, T3i, T1k, Tcm, T1j, Tcq, T3n, T1l;
482
E T1r, Tct, T1q, Tcx, T3w, T1s, T3x, T3y;
484
E T15, Tcb, T14, TcD, T3H, T16, T3I, T3J;
486
E T12, T13, T3F, T3G, Tf0, Tc7;
487
T12 = rio[WS(ios, 1)];
488
Tf0 = FNMS(KP707106781, Tc6, Tc3);
489
Tc7 = FMA(KP707106781, Tc6, Tc3);
491
E Tjd, TgY, Tjc, Th1;
492
Tjd = FNMS(KP707106781, TgX, TgW);
493
TgY = FMA(KP707106781, TgX, TgW);
494
Tjc = FNMS(KP707106781, Th0, TgZ);
495
Th1 = FMA(KP707106781, Th0, TgZ);
496
Tf2 = FMA(KP668178637, Tf1, Tf0);
497
Tfr = FNMS(KP668178637, Tf0, Tf1);
498
TdH = FNMS(KP198912367, Tc0, Tc7);
499
Tc8 = FMA(KP198912367, Tc7, Tc0);
500
Tje = FNMS(KP668178637, Tjd, Tjc);
501
TjD = FMA(KP668178637, Tjc, Tjd);
502
ThT = FMA(KP198912367, TgY, Th1);
503
Th2 = FNMS(KP198912367, Th1, TgY);
504
T13 = iio[-WS(ios, 33)];
506
T3F = iio[-WS(ios, 1)];
507
T3G = rio[WS(ios, 33)];
508
T15 = rio[WS(ios, 17)];
513
T16 = iio[-WS(ios, 49)];
514
T3I = iio[-WS(ios, 17)];
515
T3J = rio[WS(ios, 49)];
518
E T1o, T1p, T3u, T3v;
519
T1o = iio[-WS(ios, 61)];
521
E TcC, T17, Tcc, T3K;
534
T1p = rio[WS(ios, 29)];
536
T3u = iio[-WS(ios, 29)];
537
T3v = rio[WS(ios, 61)];
538
T1r = rio[WS(ios, 13)];
543
T1s = iio[-WS(ios, 45)];
544
T3x = iio[-WS(ios, 13)];
545
T3y = rio[WS(ios, 45)];
549
E T1c, Tci, T1b, Tch, T3e, T1d, T3f, T3g;
551
E T19, T1a, T3c, T3d;
552
T19 = rio[WS(ios, 9)];
554
E Tcw, T1t, Tcu, T3z;
567
T1a = iio[-WS(ios, 41)];
569
T3c = iio[-WS(ios, 9)];
570
T3d = rio[WS(ios, 41)];
571
T1c = iio[-WS(ios, 57)];
576
T1d = rio[WS(ios, 25)];
577
T3f = iio[-WS(ios, 25)];
578
T3g = rio[WS(ios, 57)];
581
E Tce, T1e, Tcf, T3h;
597
E T1h, T1i, T3l, T3m;
598
T1h = rio[WS(ios, 5)];
599
T1i = iio[-WS(ios, 37)];
600
T3l = iio[-WS(ios, 5)];
601
T3m = rio[WS(ios, 37)];
602
T1k = rio[WS(ios, 21)];
607
T1l = iio[-WS(ios, 53)];
608
T3o = iio[-WS(ios, 21)];
609
T3p = rio[WS(ios, 53)];
613
E T85, Tcr, Th8, T3k, Tco, Th9, T3r, T9U, T9V, T88;
615
E T1g, T86, T1v, T8a, T8d;
619
E Tcp, T1m, Tcn, T3q, T1n;
645
E T6f, T3j, T6i, T3M, T3N, T3O, T6j, T3C, T3s, T3B;
661
E Th6, Tcl, Tf8, Thh, TcJ, TcK, Tf5, TcI, Tf6, TcA;
663
E TcH, T6g, T3P, Tck, Tcs, Tcz;
666
T6k = FMA(KP707106781, T6j, T6i);
667
T7a = FNMS(KP707106781, T6j, T6i);
668
T5p = FNMS(KP707106781, T3C, T3j);
669
T3D = FMA(KP707106781, T3C, T3j);
672
Tcl = FMA(KP707106781, Tck, Tcd);
673
Tf8 = FNMS(KP707106781, Tck, Tcd);
674
T6h = FMA(KP707106781, T6g, T6f);
675
T79 = FNMS(KP707106781, T6g, T6f);
676
T5q = FNMS(KP707106781, T3P, T3M);
677
T3Q = FMA(KP707106781, T3P, T3M);
680
TcJ = FMA(KP414213562, Tco, Tcr);
681
Tcs = FNMS(KP414213562, Tcr, Tco);
682
Tcz = FMA(KP414213562, Tcy, Tcv);
683
TcK = FNMS(KP414213562, Tcv, Tcy);
684
Tf5 = FNMS(KP707106781, TcH, TcE);
685
TcI = FMA(KP707106781, TcH, TcE);
690
E Tf9, TcL, Tha, Thd;
691
Tf7 = FMA(KP923879532, Tf6, Tf5);
692
Tg0 = FNMS(KP923879532, Tf6, Tf5);
693
Teg = FNMS(KP923879532, TcA, Tcl);
694
TcB = FMA(KP923879532, TcA, Tcl);
697
Th7 = FMA(KP707106781, Th6, Th5);
698
Tjk = FNMS(KP707106781, Th6, Th5);
699
Tfa = FMA(KP923879532, Tf9, Tf8);
700
TfZ = FNMS(KP923879532, Tf9, Tf8);
701
Tef = FNMS(KP923879532, TcL, TcI);
702
TcM = FMA(KP923879532, TcL, TcI);
703
Thj = FMA(KP414213562, Th8, Th9);
704
Tha = FNMS(KP414213562, Th9, Th8);
705
Thd = FNMS(KP414213562, Thc, Thb);
706
Thk = FMA(KP414213562, Thb, Thc);
707
Tjh = FNMS(KP707106781, Thh, Thg);
708
Thi = FMA(KP707106781, Thh, Thg);
717
E Tdh, Tho, T3S, T1D, TcQ, Thz, T8m, T4s, Tdb, Thu, T4a, T1Z, Td8, Thv, T8i;
718
E T4h, TcT, Tdi, T1K, T4l, Tdj, TcW, T3Z, T8n, TcZ, T1O, T45, Td3, T44, Td2;
721
E Td6, T1V, T4e, Tda, T4d, Td9, T1Y, T4f;
723
E TcO, T1z, T4p, Tdg, T4o, Tdf, T1C, T4q;
725
E T4m, T4n, T1A, T1B;
727
E T1x, Tjl, Thl, T1y;
728
T1x = iio[-WS(ios, 63)];
729
Tjj = FNMS(KP923879532, Tji, Tjh);
730
Tkc = FMA(KP923879532, Tji, Tjh);
731
Tis = FNMS(KP923879532, The, Th7);
732
Thf = FMA(KP923879532, The, Th7);
735
T1y = rio[WS(ios, 31)];
736
T4m = iio[-WS(ios, 31)];
737
Tjm = FMA(KP923879532, Tjl, Tjk);
738
Tkb = FNMS(KP923879532, Tjl, Tjk);
739
Tir = FNMS(KP923879532, Thl, Thi);
740
Thm = FMA(KP923879532, Thl, Thi);
743
T4n = rio[WS(ios, 63)];
745
T1A = rio[WS(ios, 15)];
746
T1B = iio[-WS(ios, 47)];
747
T4p = iio[-WS(ios, 15)];
752
T4q = rio[WS(ios, 47)];
755
E T4b, T4c, T1W, T1X;
757
E T1T, TcP, T4r, T1U;
758
T1T = iio[-WS(ios, 59)];
765
T1U = rio[WS(ios, 27)];
766
T4b = iio[-WS(ios, 27)];
773
T4c = rio[WS(ios, 59)];
775
T1W = rio[WS(ios, 11)];
776
T1X = iio[-WS(ios, 43)];
777
T4e = iio[-WS(ios, 11)];
782
T4f = rio[WS(ios, 43)];
786
E T3V, TcU, T3Y, TcV;
788
E TcR, T1G, T3W, TcS, T1J, T3X;
790
E T3T, T3U, T1H, T1I;
792
E T1E, Td7, T4g, T1F;
793
T1E = rio[WS(ios, 7)];
800
T1F = iio[-WS(ios, 39)];
801
T3T = iio[-WS(ios, 7)];
808
T3U = rio[WS(ios, 39)];
810
T1H = iio[-WS(ios, 55)];
811
T1I = rio[WS(ios, 23)];
812
T3W = iio[-WS(ios, 23)];
817
T3X = rio[WS(ios, 55)];
827
E T42, T43, T1M, T1N, T1P, T1Q;
828
T1M = rio[WS(ios, 3)];
829
T1N = iio[-WS(ios, 35)];
836
T42 = iio[-WS(ios, 3)];
837
T43 = rio[WS(ios, 35)];
838
T1P = rio[WS(ios, 19)];
839
T1Q = iio[-WS(ios, 51)];
840
T45 = iio[-WS(ios, 19)];
845
T46 = rio[WS(ios, 51)];
850
E T8g, Td4, Thr, T41, T9Z, Ths, Td1, T48, Ta0, T8j, T8h;
852
E T1L, T1S, Td0, T47, T8o, T20, T8l;
877
E T6m, T40, T6p, T4t, T4u, T4v, T6q, T4j, T49, T4i;
893
E Thp, TcY, Tff, ThA, Tdm, Tdn, Tfc, Tdl, Tfd, Tdd;
895
E Tdk, T6n, T4w, TcX, Td5, Tdc;
898
T6r = FMA(KP707106781, T6q, T6p);
899
T7d = FNMS(KP707106781, T6q, T6p);
900
T5s = FNMS(KP707106781, T4j, T40);
901
T4k = FMA(KP707106781, T4j, T40);
904
TcY = FMA(KP707106781, TcX, TcQ);
905
Tff = FNMS(KP707106781, TcX, TcQ);
906
T6o = FMA(KP707106781, T6n, T6m);
907
T7c = FNMS(KP707106781, T6n, T6m);
908
T5t = FNMS(KP707106781, T4w, T4t);
909
T4x = FMA(KP707106781, T4w, T4t);
912
Tdm = FMA(KP414213562, Td1, Td4);
913
Td5 = FNMS(KP414213562, Td4, Td1);
914
Tdc = FMA(KP414213562, Tdb, Td8);
915
Tdn = FNMS(KP414213562, Td8, Tdb);
916
Tfc = FNMS(KP707106781, Tdk, Tdh);
917
Tdl = FMA(KP707106781, Tdk, Tdh);
922
E Tfg, Tdo, Tht, Thw;
923
Tfe = FMA(KP923879532, Tfd, Tfc);
924
Tg3 = FNMS(KP923879532, Tfd, Tfc);
925
Tej = FNMS(KP923879532, Tdd, TcY);
926
Tde = FMA(KP923879532, Tdd, TcY);
929
Thq = FMA(KP707106781, Thp, Tho);
930
Tjr = FNMS(KP707106781, Thp, Tho);
931
Tfh = FMA(KP923879532, Tfg, Tff);
932
Tg2 = FNMS(KP923879532, Tfg, Tff);
933
Tei = FNMS(KP923879532, Tdo, Tdl);
934
Tdp = FMA(KP923879532, Tdo, Tdl);
935
ThC = FNMS(KP414213562, Thr, Ths);
936
Tht = FMA(KP414213562, Ths, Thr);
937
Thw = FNMS(KP414213562, Thv, Thu);
938
ThD = FMA(KP414213562, Thu, Thv);
939
Tjo = FNMS(KP707106781, ThA, Thz);
940
ThB = FMA(KP707106781, ThA, Thz);
951
E TiX, TiU, TiT, TeL, TeI, TeH;
953
E Tjq, Tkf, Tiv, Thy, Tjt, Tke, Tiu, ThF, T9F, T9C, T9x, T9G, T9B;
955
E Tav, Tas, Tan, Taw, Tar;
957
E Taa, TaY, Tb3, Tb0, TaX, Tb4;
959
E T22, TaL, Tb2, T11, TaM, TaS, TaI, TaO, Tb1;
961
E Tjs, ThE, TaH, TaE;
964
Tjq = FMA(KP923879532, Tjp, Tjo);
965
Tkf = FNMS(KP923879532, Tjp, Tjo);
966
Tiv = FNMS(KP923879532, Thx, Thq);
967
Thy = FMA(KP923879532, Thx, Thq);
974
Tjt = FMA(KP923879532, Tjs, Tjr);
975
Tke = FNMS(KP923879532, Tjs, Tjr);
976
Tiu = FNMS(KP923879532, ThE, ThB);
977
ThF = FMA(KP923879532, ThE, ThB);
988
E TaK, TaD, TaP, TaV, TaQ, TaJ;
991
iio[-WS(ios, 63)] = Tb2 + Tb1;
997
E TaU, TaR, TaW, TaT;
999
iio[-WS(ios, 15)] = FMA(TaD, TaP, TaQ);
1000
rio[WS(ios, 48)] = FNMS(TaK, TaP, TaJ);
1006
iio[-WS(ios, 47)] = FMA(TaR, TaV, TaW);
1009
rio[WS(ios, 16)] = FNMS(TaU, TaV, TaT);
1014
E Tao, T9S, T9X, Tab, Tat, Ta2, Tap, Tae, TaZ, Tac, Tad;
1016
iio[-WS(ios, 31)] = FMA(TaX, Tb3, Tb4);
1019
rio[WS(ios, 32)] = FNMS(Tb0, Tb3, TaZ);
1029
E Tal, Taq, Tai, TaB, TaA, Taz;
1031
E Ta6, Tau, Taf, Tay, Ta4, T9N, Ta3;
1035
Taf = FMA(KP707106781, Tae, Tab);
1036
Tal = FNMS(KP707106781, Tae, Tab);
1037
Tay = FNMS(KP707106781, Tap, Tao);
1038
Taq = FMA(KP707106781, Tap, Tao);
1039
Ta4 = FMA(KP707106781, Ta3, T9S);
1040
Tai = FNMS(KP707106781, Ta3, T9S);
1043
E Tax, TaC, Tag, Ta5;
1044
TaB = FNMS(KP707106781, Tau, Tat);
1045
Tav = FMA(KP707106781, Tau, Tat);
1051
iio[-WS(ios, 7)] = FMA(T9N, Taf, Tag);
1052
rio[WS(ios, 56)] = FNMS(Ta6, Taf, Ta5);
1054
iio[-WS(ios, 23)] = FMA(Tax, TaB, TaC);
1058
E Tak, Tah, Tam, Taj;
1060
rio[WS(ios, 40)] = FNMS(TaA, TaB, Taz);
1066
iio[-WS(ios, 39)] = FMA(Tah, Tal, Tam);
1068
rio[WS(ios, 24)] = FNMS(Tak, Tal, Taj);
1075
E T9j, T97, T9k, T8X, T8U, T8P, T8Y, T8T;
1077
E T8E, T8f, T8D, T8V, T84, T8Q, T8q, T8F, T8z, T8C, T83;
1080
iio[-WS(ios, 55)] = FMA(Tan, Tav, Taw);
1083
rio[WS(ios, 8)] = FNMS(Tas, Tav, Tar);
1086
T8E = FNMS(KP414213562, T89, T8e);
1087
T8f = FMA(KP414213562, T8e, T89);
1088
T8D = FMA(KP707106781, T8C, T8z);
1089
T8V = FNMS(KP707106781, T8C, T8z);
1090
T84 = FMA(KP707106781, T83, T7S);
1091
T8Q = FNMS(KP707106781, T83, T7S);
1092
T8q = FNMS(KP414213562, T8p, T8k);
1093
T8F = FMA(KP414213562, T8k, T8p);
1095
E T8N, T8S, T8K, T93, T92, T91;
1097
E T8u, T8W, T8H, T90, T8s, T7N, T8R, T8G, T8r;
1103
T8N = FNMS(KP923879532, T8G, T8D);
1104
T8H = FMA(KP923879532, T8G, T8D);
1105
T8S = FMA(KP923879532, T8R, T8Q);
1106
T90 = FNMS(KP923879532, T8R, T8Q);
1107
T8K = FNMS(KP923879532, T8r, T84);
1108
T8s = FMA(KP923879532, T8r, T84);
1111
E T8Z, T94, T8I, T8t;
1112
T8X = FMA(KP923879532, T8W, T8V);
1113
T93 = FNMS(KP923879532, T8W, T8V);
1119
iio[-WS(ios, 3)] = FMA(T7N, T8H, T8I);
1120
rio[WS(ios, 60)] = FNMS(T8u, T8H, T8t);
1122
iio[-WS(ios, 19)] = FMA(T8Z, T93, T94);
1126
E T8M, T8J, T8O, T8L;
1128
rio[WS(ios, 44)] = FNMS(T92, T93, T91);
1134
iio[-WS(ios, 35)] = FMA(T8J, T8N, T8O);
1136
rio[WS(ios, 28)] = FNMS(T8M, T8N, T8L);
1142
E T9b, T9m, T9D, T9l, T9y, T98, T9n, T9e;
1143
iio[-WS(ios, 51)] = FMA(T8P, T8X, T8Y);
1144
rio[WS(ios, 12)] = FNMS(T8U, T8X, T8T);
1145
T9b = FNMS(KP414213562, T9a, T99);
1146
T9m = FMA(KP414213562, T99, T9a);
1147
T9D = FNMS(KP707106781, T9k, T9j);
1148
T9l = FMA(KP707106781, T9k, T9j);
1149
T9y = FNMS(KP707106781, T97, T96);
1150
T98 = FMA(KP707106781, T97, T96);
1151
T9n = FNMS(KP414213562, T9c, T9d);
1152
T9e = FMA(KP414213562, T9d, T9c);
1154
E T9v, T9A, T9s, T9L, T9K, T9J;
1156
E T9i, T9E, T9p, T9I, T9g, T95, T9o, T9z, T9f;
1162
T9p = FMA(KP923879532, T9o, T9l);
1163
T9v = FNMS(KP923879532, T9o, T9l);
1164
T9I = FNMS(KP923879532, T9z, T9y);
1165
T9A = FMA(KP923879532, T9z, T9y);
1166
T9g = FMA(KP923879532, T9f, T98);
1167
T9s = FNMS(KP923879532, T9f, T98);
1170
E T9H, T9M, T9q, T9h;
1171
T9L = FNMS(KP923879532, T9E, T9D);
1172
T9F = FMA(KP923879532, T9E, T9D);
1178
iio[-WS(ios, 59)] = FMA(T95, T9p, T9q);
1179
rio[WS(ios, 4)] = FNMS(T9i, T9p, T9h);
1181
iio[-WS(ios, 43)] = FMA(T9H, T9L, T9M);
1185
E T9u, T9r, T9w, T9t;
1187
rio[WS(ios, 20)] = FNMS(T9K, T9L, T9J);
1193
iio[-WS(ios, 27)] = FMA(T9r, T9v, T9w);
1195
rio[WS(ios, 36)] = FNMS(T9u, T9v, T9t);
1203
E TjB, Tjf, Tj8, TjE, TiB, Tip, Tio, TiC, Tif, Tic, Tib, TkH, TkE, TkD, TjZ;
1206
E TkK, Tkx, Tki, TkC, TkN, Tku, Tkr;
1208
E Tkd, Tkg, Tka, TkF, TkG, Tkq, Tkl, Tkm;
1210
E Tko, Tkp, Tk8, Tk9;
1211
TjB = FMA(KP923879532, TjA, Tjz);
1212
Tk8 = FNMS(KP923879532, TjA, Tjz);
1213
iio[-WS(ios, 11)] = FMA(T9x, T9F, T9G);
1216
rio[WS(ios, 52)] = FNMS(T9C, T9F, T9B);
1217
Tkd = FNMS(KP534511135, Tkc, Tkb);
1218
Tko = FMA(KP534511135, Tkb, Tkc);
1219
Tkp = FMA(KP534511135, Tke, Tkf);
1220
Tkg = FNMS(KP534511135, Tkf, Tke);
1221
Tka = FMA(KP831469612, Tk9, Tk8);
1222
TkF = FNMS(KP831469612, Tk9, Tk8);
1225
Tj8 = FNMS(KP923879532, Tj7, Tj6);
1226
Tkl = FMA(KP923879532, Tj7, Tj6);
1231
E TkB, Tkh, TkA, Tkn;
1232
TkH = FMA(KP881921264, TkG, TkF);
1233
TkK = FNMS(KP881921264, TkG, TkF);
1236
TkA = FNMS(KP831469612, Tkm, Tkl);
1237
Tkn = FMA(KP831469612, Tkm, Tkl);
1238
Tkx = FNMS(KP881921264, Tkh, Tka);
1239
Tki = FMA(KP881921264, Tkh, Tka);
1240
TkC = FNMS(KP881921264, TkB, TkA);
1241
TkN = FMA(KP881921264, TkB, TkA);
1242
Tku = FNMS(KP881921264, Tkq, Tkn);
1243
Tkr = FMA(KP881921264, Tkq, Tkn);
1247
E TkM, TkL, Tk7, Tkk, Tkt, Tkw;
1251
E TkJ, Tks, Tkj, TkO;
1258
rio[WS(ios, 59)] = FNMS(Tkk, Tki, Tks);
1259
iio[-WS(ios, 4)] = FMA(Tkk, Tkr, Tkj);
1260
rio[WS(ios, 43)] = FNMS(TkM, TkK, TkO);
1262
iio[-WS(ios, 20)] = FMA(TkM, TkN, TkL);
1266
E Tkz, Tky, Tkv, TkI;
1273
iio[-WS(ios, 36)] = FMA(Tkw, Tku, Tky);
1274
rio[WS(ios, 27)] = FNMS(Tkw, Tkx, Tkv);
1275
iio[-WS(ios, 52)] = FMA(TkE, TkC, TkI);
1279
rio[WS(ios, 11)] = FNMS(TkE, TkH, TkD);
1281
E Tii, Ti5, ThI, Tia, Til, Ti2, ThZ;
1283
E Thn, ThG, Tid, Th4, Tie, ThY, ThR, ThU;
1285
E TgO, Th3, ThW, ThX;
1286
TiB = FNMS(KP923879532, TgN, TgG);
1287
TgO = FMA(KP923879532, TgN, TgG);
1290
Thn = FNMS(KP098491403, Thm, Thf);
1291
ThW = FMA(KP098491403, Thf, Thm);
1292
ThX = FMA(KP098491403, Thy, ThF);
1293
ThG = FNMS(KP098491403, ThF, Thy);
1294
Tid = FNMS(KP980785280, Th3, TgO);
1295
Th4 = FMA(KP980785280, Th3, TgO);
1298
Tio = FNMS(KP923879532, ThQ, ThN);
1299
ThR = FMA(KP923879532, ThQ, ThN);
1304
E Ti9, ThH, Ti8, ThV;
1305
Tif = FMA(KP995184726, Tie, Tid);
1306
Tii = FNMS(KP995184726, Tie, Tid);
1309
Ti8 = FNMS(KP980785280, ThU, ThR);
1310
ThV = FMA(KP980785280, ThU, ThR);
1311
Ti5 = FNMS(KP995184726, ThH, Th4);
1312
ThI = FMA(KP995184726, ThH, Th4);
1313
Tia = FNMS(KP995184726, Ti9, Ti8);
1314
Til = FMA(KP995184726, Ti9, Ti8);
1315
Ti2 = FNMS(KP995184726, ThY, ThV);
1316
ThZ = FMA(KP995184726, ThY, ThV);
1320
E Tik, Tij, TgD, ThK, Ti1, Ti4;
1324
E Tih, Ti0, ThJ, Tim;
1331
rio[WS(ios, 63)] = FNMS(ThK, ThI, Ti0);
1332
iio[0] = FMA(ThK, ThZ, ThJ);
1333
rio[WS(ios, 47)] = FNMS(Tik, Tii, Tim);
1335
iio[-WS(ios, 16)] = FMA(Tik, Til, Tij);
1339
E Ti7, Ti6, Ti3, Tig;
1346
iio[-WS(ios, 32)] = FMA(Ti4, Ti2, Ti6);
1347
rio[WS(ios, 31)] = FNMS(Ti4, Ti5, Ti3);
1348
iio[-WS(ios, 48)] = FMA(Tic, Tia, Tig);
1352
rio[WS(ios, 15)] = FNMS(Tic, Tif, Tib);
1354
E Tk2, TjP, Tjw, TjU, Tk5, TjM, TjJ;
1356
E Tjn, Tju, TjX, Tjg, TjY, TjI, TjG, TjH;
1357
Tjn = FNMS(KP303346683, Tjm, Tjj);
1358
TjG = FMA(KP303346683, Tjj, Tjm);
1359
TjH = FMA(KP303346683, Tjq, Tjt);
1360
Tju = FNMS(KP303346683, Tjt, Tjq);
1361
TjX = FNMS(KP831469612, Tjf, Tj8);
1362
Tjg = FMA(KP831469612, Tjf, Tj8);
1366
E TjT, Tjv, TjS, TjF;
1367
TjZ = FMA(KP956940335, TjY, TjX);
1368
Tk2 = FNMS(KP956940335, TjY, TjX);
1371
TjS = FNMS(KP831469612, TjE, TjB);
1372
TjF = FMA(KP831469612, TjE, TjB);
1373
TjP = FNMS(KP956940335, Tjv, Tjg);
1374
Tjw = FMA(KP956940335, Tjv, Tjg);
1375
TjU = FMA(KP956940335, TjT, TjS);
1376
Tk5 = FNMS(KP956940335, TjT, TjS);
1377
TjM = FNMS(KP956940335, TjI, TjF);
1378
TjJ = FMA(KP956940335, TjI, TjF);
1382
E Tk4, Tk3, Tj5, Tjy, TjL, TjO;
1386
E Tk1, TjK, Tjx, Tk6;
1393
iio[-WS(ios, 60)] = FMA(Tjy, Tjw, TjK);
1394
rio[WS(ios, 3)] = FNMS(Tjy, TjJ, Tjx);
1395
iio[-WS(ios, 44)] = FMA(Tk4, Tk2, Tk6);
1397
rio[WS(ios, 19)] = FNMS(Tk4, Tk5, Tk3);
1401
E TjR, TjQ, TjN, Tk0;
1408
rio[WS(ios, 35)] = FNMS(TjO, TjM, TjQ);
1409
iio[-WS(ios, 28)] = FMA(TjO, TjP, TjN);
1410
rio[WS(ios, 51)] = FNMS(TjW, TjU, Tk0);
1414
iio[-WS(ios, 12)] = FMA(TjW, TjZ, TjV);
1416
E Tj0, TiN, Tiy, TiS, Tj3, TiK, TiH;
1418
E Tit, Tiw, TiV, Tiq, TiW, TiG, TiE, TiF;
1419
Tit = FNMS(KP820678790, Tis, Tir);
1420
TiE = FMA(KP820678790, Tir, Tis);
1421
TiF = FMA(KP820678790, Tiu, Tiv);
1422
Tiw = FNMS(KP820678790, Tiv, Tiu);
1423
TiV = FMA(KP980785280, Tip, Tio);
1424
Tiq = FNMS(KP980785280, Tip, Tio);
1428
E TiR, Tix, TiQ, TiD;
1429
TiX = FMA(KP773010453, TiW, TiV);
1430
Tj0 = FNMS(KP773010453, TiW, TiV);
1433
TiQ = FNMS(KP980785280, TiC, TiB);
1434
TiD = FMA(KP980785280, TiC, TiB);
1435
TiN = FNMS(KP773010453, Tix, Tiq);
1436
Tiy = FMA(KP773010453, Tix, Tiq);
1437
TiS = FMA(KP773010453, TiR, TiQ);
1438
Tj3 = FNMS(KP773010453, TiR, TiQ);
1439
TiK = FNMS(KP773010453, TiG, TiD);
1440
TiH = FMA(KP773010453, TiG, TiD);
1444
E Tj2, Tj1, Tin, TiA, TiJ, TiM;
1448
E TiZ, TiI, Tiz, Tj4;
1455
iio[-WS(ios, 56)] = FMA(TiA, Tiy, TiI);
1456
rio[WS(ios, 7)] = FNMS(TiA, TiH, Tiz);
1457
iio[-WS(ios, 40)] = FMA(Tj2, Tj0, Tj4);
1459
rio[WS(ios, 23)] = FNMS(Tj2, Tj3, Tj1);
1463
E TiP, TiO, TiL, TiY;
1470
rio[WS(ios, 39)] = FNMS(TiM, TiK, TiO);
1471
iio[-WS(ios, 24)] = FMA(TiM, TiN, TiL);
1472
rio[WS(ios, 55)] = FNMS(TiU, TiS, TiY);
1478
iio[-WS(ios, 8)] = FMA(TiU, TiX, TiT);
1480
E T5z, T5n, T5m, T5A, Tep, Ted, Tec, Teq, Te3, Te0, TdZ;
1482
E T5d, T5a, T55, T5e, T59;
1484
E T5b, T4T, T4z, T5c, T57, T4W, T3a, T56, T4P, T4S;
1485
T5z = FNMS(KP707106781, T4O, T4L);
1486
T4P = FMA(KP707106781, T4O, T4L);
1490
E T4U, T4V, T2w, T39, T3R, T4y;
1491
T4U = FNMS(KP198912367, T3D, T3Q);
1492
T3R = FMA(KP198912367, T3Q, T3D);
1493
T4y = FNMS(KP198912367, T4x, T4k);
1494
T4V = FMA(KP198912367, T4k, T4x);
1495
T5b = FNMS(KP923879532, T4S, T4P);
1496
T4T = FMA(KP923879532, T4S, T4P);
1499
T2w = FMA(KP707106781, T2v, T2c);
1500
T5m = FNMS(KP707106781, T2v, T2c);
1505
T3a = FMA(KP923879532, T39, T2w);
1506
T56 = FNMS(KP923879532, T39, T2w);
1509
E T53, T58, T50, T5j, T5i, T5h;
1511
E T4X, T4C, T5g, T4A, T23;
1512
T53 = FNMS(KP980785280, T4W, T4T);
1513
T4X = FMA(KP980785280, T4W, T4T);
1515
T5g = FNMS(KP980785280, T57, T56);
1516
T58 = FMA(KP980785280, T57, T56);
1517
T4A = FMA(KP980785280, T4z, T3a);
1518
T50 = FNMS(KP980785280, T4z, T3a);
1521
E T5f, T5k, T4Y, T4B;
1522
T5j = FNMS(KP980785280, T5c, T5b);
1523
T5d = FMA(KP980785280, T5c, T5b);
1529
iio[-WS(ios, 1)] = FMA(T23, T4X, T4Y);
1530
rio[WS(ios, 62)] = FNMS(T4C, T4X, T4B);
1532
iio[-WS(ios, 17)] = FMA(T5f, T5j, T5k);
1536
E T52, T4Z, T54, T51;
1538
rio[WS(ios, 46)] = FNMS(T5i, T5j, T5h);
1544
iio[-WS(ios, 33)] = FMA(T4Z, T53, T54);
1546
rio[WS(ios, 30)] = FNMS(T52, T53, T51);
1552
E Te6, TdT, Tds, TdY, Te9, TdQ, TdN;
1554
E TcN, Tdq, Tca, Te1, Te2, TdM, TdF, TdI;
1556
E TdK, TdL, Tbw, Tc9;
1557
Tep = FNMS(KP923879532, Tbv, Tbg);
1558
Tbw = FMA(KP923879532, Tbv, Tbg);
1559
iio[-WS(ios, 49)] = FMA(T55, T5d, T5e);
1562
rio[WS(ios, 14)] = FNMS(T5a, T5d, T59);
1563
TcN = FNMS(KP098491403, TcM, TcB);
1564
TdK = FMA(KP098491403, TcB, TcM);
1565
TdL = FNMS(KP098491403, Tde, Tdp);
1566
Tdq = FMA(KP098491403, Tdp, Tde);
1567
Tca = FMA(KP980785280, Tc9, Tbw);
1568
Te1 = FNMS(KP980785280, Tc9, Tbw);
1571
Tec = FNMS(KP923879532, TdE, TdB);
1572
TdF = FMA(KP923879532, TdE, TdB);
1577
E TdX, Tdr, TdW, TdJ;
1578
Te3 = FMA(KP995184726, Te2, Te1);
1579
Te6 = FNMS(KP995184726, Te2, Te1);
1582
TdW = FNMS(KP980785280, TdI, TdF);
1583
TdJ = FMA(KP980785280, TdI, TdF);
1584
TdT = FNMS(KP995184726, Tdr, Tca);
1585
Tds = FMA(KP995184726, Tdr, Tca);
1586
TdY = FMA(KP995184726, TdX, TdW);
1587
Te9 = FNMS(KP995184726, TdX, TdW);
1588
TdQ = FNMS(KP995184726, TdM, TdJ);
1589
TdN = FMA(KP995184726, TdM, TdJ);
1593
E Te8, Te7, Tb5, Tdu, TdP, TdS;
1597
E Te5, TdO, Tdt, Tea;
1604
iio[-WS(ios, 62)] = FMA(Tdu, Tds, TdO);
1605
rio[WS(ios, 1)] = FNMS(Tdu, TdN, Tdt);
1606
iio[-WS(ios, 46)] = FMA(Te8, Te6, Tea);
1608
rio[WS(ios, 17)] = FNMS(Te8, Te9, Te7);
1612
E TdV, TdU, TdR, Te4;
1619
rio[WS(ios, 33)] = FNMS(TdS, TdQ, TdU);
1620
iio[-WS(ios, 30)] = FMA(TdS, TdT, TdR);
1621
rio[WS(ios, 49)] = FNMS(Te0, TdY, Te4);
1626
iio[-WS(ios, 14)] = FMA(Te0, Te3, TdZ);
1628
E T5V, T5S, T5N, T5W, T5R;
1630
E T5T, T5B, T5v, T5U, T5P, T5E, T5o, T5O;
1632
E T5C, T5D, T5r, T5u;
1633
T5C = FMA(KP668178637, T5p, T5q);
1634
T5r = FNMS(KP668178637, T5q, T5p);
1635
T5u = FMA(KP668178637, T5t, T5s);
1636
T5D = FNMS(KP668178637, T5s, T5t);
1637
T5T = FNMS(KP923879532, T5A, T5z);
1638
T5B = FMA(KP923879532, T5A, T5z);
1643
T5o = FMA(KP923879532, T5n, T5m);
1644
T5O = FNMS(KP923879532, T5n, T5m);
1647
E T5L, T5Q, T5I, T61, T60, T5Z;
1649
E T5F, T5y, T5Y, T5w, T5l;
1650
T5L = FNMS(KP831469612, T5E, T5B);
1651
T5F = FMA(KP831469612, T5E, T5B);
1653
T5Y = FNMS(KP831469612, T5P, T5O);
1654
T5Q = FMA(KP831469612, T5P, T5O);
1655
T5w = FMA(KP831469612, T5v, T5o);
1656
T5I = FNMS(KP831469612, T5v, T5o);
1659
E T5X, T62, T5G, T5x;
1660
T61 = FNMS(KP831469612, T5U, T5T);
1661
T5V = FMA(KP831469612, T5U, T5T);
1667
iio[-WS(ios, 57)] = FMA(T5l, T5F, T5G);
1668
rio[WS(ios, 6)] = FNMS(T5y, T5F, T5x);
1670
iio[-WS(ios, 41)] = FMA(T5X, T61, T62);
1674
E T5K, T5H, T5M, T5J;
1676
rio[WS(ios, 22)] = FNMS(T60, T61, T5Z);
1682
iio[-WS(ios, 25)] = FMA(T5H, T5L, T5M);
1684
rio[WS(ios, 38)] = FNMS(T5K, T5L, T5J);
1690
E TeO, TeB, Tem, TeG, TeR, Tey, Tev;
1692
E Teh, Tek, Tee, TeJ, TeK, Teu, Tes, Tet;
1693
iio[-WS(ios, 9)] = FMA(T5N, T5V, T5W);
1694
rio[WS(ios, 54)] = FNMS(T5S, T5V, T5R);
1695
Teh = FNMS(KP820678790, Teg, Tef);
1696
Tes = FMA(KP820678790, Tef, Teg);
1697
Tet = FNMS(KP820678790, Tei, Tej);
1698
Tek = FMA(KP820678790, Tej, Tei);
1699
Tee = FMA(KP980785280, Ted, Tec);
1700
TeJ = FNMS(KP980785280, Ted, Tec);
1704
E TeF, Tel, TeE, Ter;
1705
TeL = FMA(KP773010453, TeK, TeJ);
1706
TeO = FNMS(KP773010453, TeK, TeJ);
1709
TeE = FNMS(KP980785280, Teq, Tep);
1710
Ter = FMA(KP980785280, Teq, Tep);
1711
TeB = FNMS(KP773010453, Tel, Tee);
1712
Tem = FMA(KP773010453, Tel, Tee);
1713
TeG = FMA(KP773010453, TeF, TeE);
1714
TeR = FNMS(KP773010453, TeF, TeE);
1715
Tey = FNMS(KP773010453, Teu, Ter);
1716
Tev = FMA(KP773010453, Teu, Ter);
1720
E TeQ, TeP, Teb, Teo, Tex, TeA;
1724
E TeN, Tew, Ten, TeS;
1731
rio[WS(ios, 57)] = FNMS(Teo, Tem, Tew);
1732
iio[-WS(ios, 6)] = FMA(Teo, Tev, Ten);
1733
rio[WS(ios, 41)] = FNMS(TeQ, TeO, TeS);
1735
iio[-WS(ios, 22)] = FMA(TeQ, TeR, TeP);
1739
E TeD, TeC, Tez, TeM;
1746
iio[-WS(ios, 38)] = FMA(TeA, Tey, TeC);
1747
rio[WS(ios, 25)] = FNMS(TeA, TeB, Tez);
1748
iio[-WS(ios, 54)] = FMA(TeI, TeG, TeM);
1754
rio[WS(ios, 9)] = FNMS(TeI, TeL, TeH);
1756
E T7j, T77, T76, T7k, Tg9, TfX, TfW, Tga, TfN, TfK, TfJ;
1758
E T6X, T6U, T6P, T6Y, T6T;
1760
E T6V, T6D, T6t, T6W, T6R, T6G, T6e, T6Q, T6z, T6C;
1761
T7j = FNMS(KP707106781, T6y, T6x);
1762
T6z = FMA(KP707106781, T6y, T6x);
1766
E T6E, T6F, T66, T6d, T6l, T6s;
1767
T6E = FMA(KP198912367, T6h, T6k);
1768
T6l = FNMS(KP198912367, T6k, T6h);
1769
T6s = FMA(KP198912367, T6r, T6o);
1770
T6F = FNMS(KP198912367, T6o, T6r);
1771
T6V = FNMS(KP923879532, T6C, T6z);
1772
T6D = FMA(KP923879532, T6C, T6z);
1775
T66 = FMA(KP707106781, T65, T64);
1776
T76 = FNMS(KP707106781, T65, T64);
1781
T6e = FMA(KP923879532, T6d, T66);
1782
T6Q = FNMS(KP923879532, T6d, T66);
1785
E T6N, T6S, T6K, T73, T72, T71;
1787
E T6H, T6w, T70, T6u, T63;
1788
T6N = FNMS(KP980785280, T6G, T6D);
1789
T6H = FMA(KP980785280, T6G, T6D);
1791
T70 = FNMS(KP980785280, T6R, T6Q);
1792
T6S = FMA(KP980785280, T6R, T6Q);
1793
T6u = FMA(KP980785280, T6t, T6e);
1794
T6K = FNMS(KP980785280, T6t, T6e);
1797
E T6Z, T74, T6I, T6v;
1798
T73 = FNMS(KP980785280, T6W, T6V);
1799
T6X = FMA(KP980785280, T6W, T6V);
1805
iio[-WS(ios, 61)] = FMA(T63, T6H, T6I);
1806
rio[WS(ios, 2)] = FNMS(T6w, T6H, T6v);
1808
iio[-WS(ios, 45)] = FMA(T6Z, T73, T74);
1812
E T6M, T6J, T6O, T6L;
1814
rio[WS(ios, 18)] = FNMS(T72, T73, T71);
1820
iio[-WS(ios, 29)] = FMA(T6J, T6N, T6O);
1822
rio[WS(ios, 34)] = FNMS(T6M, T6N, T6L);
1828
E TfQ, TfD, Tfk, TfI, TfT, TfA, Tfx;
1830
E Tfb, Tfi, Tf4, TfL, TfM, Tfw, Tfp, Tfs;
1832
E Tfu, Tfv, TeW, Tf3;
1833
Tg9 = FNMS(KP923879532, TeV, TeU);
1834
TeW = FMA(KP923879532, TeV, TeU);
1835
iio[-WS(ios, 13)] = FMA(T6P, T6X, T6Y);
1838
rio[WS(ios, 50)] = FNMS(T6U, T6X, T6T);
1839
Tfb = FNMS(KP303346683, Tfa, Tf7);
1840
Tfu = FMA(KP303346683, Tf7, Tfa);
1841
Tfv = FNMS(KP303346683, Tfe, Tfh);
1842
Tfi = FMA(KP303346683, Tfh, Tfe);
1843
Tf4 = FMA(KP831469612, Tf3, TeW);
1844
TfL = FNMS(KP831469612, Tf3, TeW);
1847
TfW = FNMS(KP923879532, Tfo, Tfn);
1848
Tfp = FMA(KP923879532, Tfo, Tfn);
1853
E TfH, Tfj, TfG, Tft;
1854
TfN = FMA(KP956940335, TfM, TfL);
1855
TfQ = FNMS(KP956940335, TfM, TfL);
1858
TfG = FNMS(KP831469612, Tfs, Tfp);
1859
Tft = FMA(KP831469612, Tfs, Tfp);
1860
TfD = FNMS(KP956940335, Tfj, Tf4);
1861
Tfk = FMA(KP956940335, Tfj, Tf4);
1862
TfI = FMA(KP956940335, TfH, TfG);
1863
TfT = FNMS(KP956940335, TfH, TfG);
1864
TfA = FNMS(KP956940335, Tfw, Tft);
1865
Tfx = FMA(KP956940335, Tfw, Tft);
1869
E TfS, TfR, TeT, Tfm, Tfz, TfC;
1873
E TfP, Tfy, Tfl, TfU;
1880
rio[WS(ios, 61)] = FNMS(Tfm, Tfk, Tfy);
1881
iio[-WS(ios, 2)] = FMA(Tfm, Tfx, Tfl);
1882
rio[WS(ios, 45)] = FNMS(TfS, TfQ, TfU);
1884
iio[-WS(ios, 18)] = FMA(TfS, TfT, TfR);
1888
E TfF, TfE, TfB, TfO;
1895
iio[-WS(ios, 34)] = FMA(TfC, TfA, TfE);
1896
rio[WS(ios, 29)] = FNMS(TfC, TfD, TfB);
1897
iio[-WS(ios, 50)] = FMA(TfK, TfI, TfO);
1902
rio[WS(ios, 13)] = FNMS(TfK, TfN, TfJ);
1904
E T7F, T7C, T7x, T7G, T7B;
1906
E T7D, T7l, T7f, T7E, T7z, T7o, T78, T7y;
1908
E T7m, T7n, T7b, T7e;
1909
T7m = FNMS(KP668178637, T79, T7a);
1910
T7b = FMA(KP668178637, T7a, T79);
1911
T7e = FNMS(KP668178637, T7d, T7c);
1912
T7n = FMA(KP668178637, T7c, T7d);
1913
T7D = FNMS(KP923879532, T7k, T7j);
1914
T7l = FMA(KP923879532, T7k, T7j);
1919
T78 = FMA(KP923879532, T77, T76);
1920
T7y = FNMS(KP923879532, T77, T76);
1923
E T7v, T7A, T7s, T7L, T7K, T7J;
1925
E T7p, T7i, T7I, T7g, T75;
1926
T7v = FNMS(KP831469612, T7o, T7l);
1927
T7p = FMA(KP831469612, T7o, T7l);
1929
T7I = FNMS(KP831469612, T7z, T7y);
1930
T7A = FMA(KP831469612, T7z, T7y);
1931
T7g = FMA(KP831469612, T7f, T78);
1932
T7s = FNMS(KP831469612, T7f, T78);
1935
E T7H, T7M, T7q, T7h;
1936
T7L = FNMS(KP831469612, T7E, T7D);
1937
T7F = FMA(KP831469612, T7E, T7D);
1943
iio[-WS(ios, 5)] = FMA(T75, T7p, T7q);
1944
rio[WS(ios, 58)] = FNMS(T7i, T7p, T7h);
1946
iio[-WS(ios, 21)] = FMA(T7H, T7L, T7M);
1950
E T7u, T7r, T7w, T7t;
1952
rio[WS(ios, 42)] = FNMS(T7K, T7L, T7J);
1958
iio[-WS(ios, 37)] = FMA(T7r, T7v, T7w);
1960
rio[WS(ios, 26)] = FNMS(T7u, T7v, T7t);
1966
E Tgy, Tgl, Tg6, Tgq, TgB, Tgi, Tgf;
1968
E Tg1, Tg4, TfY, Tgt, Tgu, Tge, Tgc, Tgd;
1969
iio[-WS(ios, 53)] = FMA(T7x, T7F, T7G);
1970
rio[WS(ios, 10)] = FNMS(T7C, T7F, T7B);
1971
Tg1 = FNMS(KP534511135, Tg0, TfZ);
1972
Tgc = FMA(KP534511135, TfZ, Tg0);
1973
Tgd = FNMS(KP534511135, Tg2, Tg3);
1974
Tg4 = FMA(KP534511135, Tg3, Tg2);
1975
TfY = FMA(KP831469612, TfX, TfW);
1976
Tgt = FNMS(KP831469612, TfX, TfW);
1980
E Tgp, Tg5, Tgo, Tgb;
1981
Tgv = FMA(KP881921264, Tgu, Tgt);
1982
Tgy = FNMS(KP881921264, Tgu, Tgt);
1985
Tgo = FNMS(KP831469612, Tga, Tg9);
1986
Tgb = FMA(KP831469612, Tga, Tg9);
1987
Tgl = FNMS(KP881921264, Tg5, TfY);
1988
Tg6 = FMA(KP881921264, Tg5, TfY);
1989
Tgq = FMA(KP881921264, Tgp, Tgo);
1990
TgB = FNMS(KP881921264, Tgp, Tgo);
1991
Tgi = FNMS(KP881921264, Tge, Tgb);
1992
Tgf = FMA(KP881921264, Tge, Tgb);
1996
E TgA, Tgz, TfV, Tg8, Tgh, Tgk;
2000
E Tgx, Tgg, Tg7, TgC;
2007
iio[-WS(ios, 58)] = FMA(Tg8, Tg6, Tgg);
2008
rio[WS(ios, 5)] = FNMS(Tg8, Tgf, Tg7);
2009
iio[-WS(ios, 42)] = FMA(TgA, Tgy, TgC);
2011
rio[WS(ios, 21)] = FNMS(TgA, TgB, Tgz);
2015
E Tgn, Tgm, Tgj, Tgw;
2022
rio[WS(ios, 37)] = FNMS(Tgk, Tgi, Tgm);
2023
iio[-WS(ios, 26)] = FMA(Tgk, Tgl, Tgj);
2024
rio[WS(ios, 53)] = FNMS(Tgs, Tgq, Tgw);
2032
iio[-WS(ios, 10)] = FMA(Tgs, Tgv, Tgr);
2037
static const tw_instr twinstr[] = {
2042
static const hc2hc_desc desc = { 64, "hb_64", twinstr, &GENUS, {520, 126, 518, 0}, 0, 0, 0 };
2044
void X(codelet_hb_64) (planner *p) {
2045
X(khc2hc_register) (p, hb_64, &desc);
2047
#else /* HAVE_FMA */
2049
/* Generated by: ../../../genfft/gen_hc2hc -compact -variables 4 -pipeline-latency 4 -sign 1 -n 64 -dif -name hb_64 -include hb.h */
2052
* This function contains 1038 FP additions, 500 FP multiplications,
2053
* (or, 808 additions, 270 multiplications, 230 fused multiply/add),
2054
* 196 stack variables, and 256 memory accesses
2058
* $Id: algsimp.ml,v 1.9 2006-02-12 23:34:12 athena Exp $
2059
* $Id: fft.ml,v 1.4 2006-01-05 03:04:27 stevenj Exp $
2060
* $Id: gen_hc2hc.ml,v 1.16 2006-02-12 23:34:12 athena Exp $
2065
static const R *hb_64(R *rio, R *iio, const R *W, stride ios, INT m, INT dist)
2067
DK(KP634393284, +0.634393284163645498215171613225493370675687095);
2068
DK(KP773010453, +0.773010453362736960810906609758469800971041293);
2069
DK(KP098017140, +0.098017140329560601994195563888641845861136673);
2070
DK(KP995184726, +0.995184726672196886244836953109479921575474869);
2071
DK(KP471396736, +0.471396736825997648556387625905254377657460319);
2072
DK(KP881921264, +0.881921264348355029712756863660388349508442621);
2073
DK(KP290284677, +0.290284677254462367636192375817395274691476278);
2074
DK(KP956940335, +0.956940335732208864935797886980269969482849206);
2075
DK(KP195090322, +0.195090322016128267848284868477022240927691618);
2076
DK(KP980785280, +0.980785280403230449126182236134239036973933731);
2077
DK(KP555570233, +0.555570233019602224742830813948532874374937191);
2078
DK(KP831469612, +0.831469612302545237078788377617905756738560812);
2079
DK(KP382683432, +0.382683432365089771728459984030398866761344562);
2080
DK(KP923879532, +0.923879532511286756128183189396788286822416626);
2081
DK(KP707106781, +0.707106781186547524400844362104849039284835938);
2083
for (i = m - 2; i > 0; i = i - 2, rio = rio + dist, iio = iio - dist, W = W + 126, MAKE_VOLATILE_STRIDE(ios)) {
2084
E Tf, T7i, Tfa, ThM, Tgp, ThH, T2c, T5O, T4T, T6n, Tcp, Ted, TcA, TdE, T87;
2085
E T9o, TK, T93, T2P, T4F, Tfo, Thz, T5T, T6j, Tbx, TdI, Tfl, ThA, T7r, T81;
2086
E TbE, TdH, TZ, T94, T38, T4G, Tfv, ThC, T5W, T6k, TbQ, TdK, Tfs, ThD, T7w;
2087
E T82, TbX, TdL, Tu, T84, Tfh, ThG, Tgm, ThN, T2v, T6m, T4K, T5P, Tce, TdF;
2088
E TcD, Tec, T7l, T9p, T1L, T20, T9c, T9d, T9e, T9f, T40, T66, Tg1, Thu, Tg8;
2089
E Thv, Tg5, Thr, T4n, T67, T4j, T69, T4w, T6a, TaT, TdW, Tb8, TdZ, TfU, Ths;
2090
E T7O, T8y, T7T, T8z, Tbc, TdX, Tbj, Te0, T1g, T1v, T97, T98, T99, T9a, T3j;
2091
E T5Z, TfI, Thk, TfP, Thl, TfM, Tho, T3G, T60, T3C, T62, T3P, T63, Tak, TdQ;
2092
E Tav, TdT, TfB, Thn, T7D, T8v, T7I, T8w, TaD, TdP, TaG, TdS;
2094
E T3, Tcm, T4O, Tcv, T6, Tcu, T4R, Tcn, Td, Tcy, T2a, Tch, Ta, Tcx, T27;
2099
T2 = iio[-WS(ios, 32)];
2105
T4N = rio[WS(ios, 32)];
2108
T4 = rio[WS(ios, 16)];
2109
T5 = iio[-WS(ios, 48)];
2113
T4P = iio[-WS(ios, 16)];
2114
T4Q = rio[WS(ios, 48)];
2118
E Tb, Tc, Tcf, T28, T29, Tcg;
2119
Tb = iio[-WS(ios, 56)];
2120
Tc = rio[WS(ios, 24)];
2122
T28 = iio[-WS(ios, 24)];
2123
T29 = rio[WS(ios, 56)];
2131
E T8, T9, Tcj, T25, T26, Tci;
2132
T8 = rio[WS(ios, 8)];
2133
T9 = iio[-WS(ios, 40)];
2135
T25 = iio[-WS(ios, 8)];
2136
T26 = rio[WS(ios, 40)];
2151
Tf9 = KP707106781 * (Tck + Tch);
2156
E Tgn, Tgo, T24, T2b;
2157
Tgn = KP707106781 * (Tcx + Tcy);
2167
E T4L, T4S, Tcl, Tco;
2172
Tcl = KP707106781 * (Tch - Tck);
2178
E Tcw, Tcz, T85, T86;
2180
Tcz = KP707106781 * (Tcx - Tcy);
2190
E TC, Tby, T2x, Tbu, T2N, Tbz, T7o, Tbv, TJ, TbB, TbC, T2E, T2G, Tbp, Tbs;
2193
E Tw, Tx, Ty, Tz, TA, TB;
2194
Tw = rio[WS(ios, 2)];
2195
Tx = iio[-WS(ios, 34)];
2197
Tz = rio[WS(ios, 18)];
2198
TA = iio[-WS(ios, 50)];
2206
E T2H, T2I, T2J, T2K, T2L, T2M;
2207
T2H = iio[-WS(ios, 2)];
2208
T2I = rio[WS(ios, 34)];
2210
T2K = iio[-WS(ios, 18)];
2211
T2L = rio[WS(ios, 50)];
2219
E TF, Tbr, T2A, Tbq, TI, Tbn, T2D, Tbo;
2222
TD = rio[WS(ios, 10)];
2223
TE = iio[-WS(ios, 42)];
2226
T2y = iio[-WS(ios, 10)];
2227
T2z = rio[WS(ios, 42)];
2233
TG = iio[-WS(ios, 58)];
2234
TH = rio[WS(ios, 26)];
2237
T2B = iio[-WS(ios, 26)];
2238
T2C = rio[WS(ios, 58)];
2254
E T2F, T2O, Tfm, Tfn;
2257
T2P = FMA(KP923879532, T2F, KP382683432 * T2O);
2258
T4F = FNMS(KP382683432, T2F, KP923879532 * T2O);
2259
Tfm = KP707106781 * (TbB + TbC);
2265
E T5R, T5S, Tbt, Tbw;
2268
T5T = FNMS(KP382683432, T5S, KP923879532 * T5R);
2269
T6j = FMA(KP382683432, T5R, KP923879532 * T5S);
2270
Tbt = KP707106781 * (Tbp - Tbs);
2276
Tfk = KP707106781 * (Tbs + Tbp);
2280
E T7n, T7q, TbA, TbD;
2286
TbD = KP707106781 * (TbB - TbC);
2292
E TR, TbU, T2Q, TbN, T36, TbV, T7t, TbO, TY, TbR, TbS, T2X, T2Z, TbI, TbL;
2295
E TL, TM, TN, TO, TP, TQ;
2296
TL = iio[-WS(ios, 62)];
2297
TM = rio[WS(ios, 30)];
2299
TO = rio[WS(ios, 14)];
2300
TP = iio[-WS(ios, 46)];
2308
E T30, T31, T32, T33, T34, T35;
2309
T30 = iio[-WS(ios, 30)];
2310
T31 = rio[WS(ios, 62)];
2312
T33 = iio[-WS(ios, 14)];
2313
T34 = rio[WS(ios, 46)];
2321
E TU, TbG, T2T, TbH, TX, TbJ, T2W, TbK;
2324
TS = rio[WS(ios, 6)];
2325
TT = iio[-WS(ios, 38)];
2328
T2R = iio[-WS(ios, 6)];
2329
T2S = rio[WS(ios, 38)];
2335
TV = iio[-WS(ios, 54)];
2336
TW = rio[WS(ios, 22)];
2339
T2U = iio[-WS(ios, 22)];
2340
T2V = rio[WS(ios, 54)];
2356
E T2Y, T37, Tft, Tfu;
2359
T38 = FNMS(KP382683432, T37, KP923879532 * T2Y);
2360
T4G = FMA(KP382683432, T2Y, KP923879532 * T37);
2361
Tft = KP707106781 * (TbI + TbL);
2367
E T5U, T5V, TbM, TbP;
2370
T5W = FMA(KP923879532, T5U, KP382683432 * T5V);
2371
T6k = FNMS(KP382683432, T5U, KP923879532 * T5V);
2372
TbM = KP707106781 * (TbI - TbL);
2377
Tfq = KP707106781 * (TbS + TbR);
2382
E T7s, T7v, TbT, TbW;
2387
TbT = KP707106781 * (TbR - TbS);
2394
E Ti, T2g, Tl, T2j, T2d, T2k, Tfc, Tfb, Tc5, Tc2, Tp, T2p, Ts, T2s, T2m;
2395
E T2t, Tff, Tfe, Tcc, Tc9;
2397
E Tc0, Tc4, Tc3, Tc1;
2400
Tg = rio[WS(ios, 4)];
2401
Th = iio[-WS(ios, 36)];
2404
T2e = iio[-WS(ios, 4)];
2405
T2f = rio[WS(ios, 36)];
2411
Tj = rio[WS(ios, 20)];
2412
Tk = iio[-WS(ios, 52)];
2415
T2h = iio[-WS(ios, 20)];
2416
T2i = rio[WS(ios, 52)];
2428
E Tc7, Tcb, Tca, Tc8;
2431
Tn = iio[-WS(ios, 60)];
2432
To = rio[WS(ios, 28)];
2435
T2n = iio[-WS(ios, 28)];
2436
T2o = rio[WS(ios, 60)];
2442
Tq = rio[WS(ios, 12)];
2443
Tr = iio[-WS(ios, 44)];
2446
T2q = iio[-WS(ios, 12)];
2447
T2r = rio[WS(ios, 44)];
2464
Tfd = FNMS(KP382683432, Tfc, KP923879532 * Tfb);
2465
Tfg = FNMS(KP923879532, Tff, KP382683432 * Tfe);
2470
E Tgk, Tgl, T2l, T2u;
2471
Tgk = FMA(KP382683432, Tfb, KP923879532 * Tfc);
2472
Tgl = FMA(KP923879532, Tfe, KP382683432 * Tff);
2477
T2v = KP707106781 * (T2l + T2u);
2478
T6m = KP707106781 * (T2l - T2u);
2481
E T4I, T4J, Tc6, Tcd;
2484
T4K = KP707106781 * (T4I + T4J);
2485
T5P = KP707106781 * (T4J - T4I);
2486
Tc6 = FNMS(KP382683432, Tc5, KP923879532 * Tc2);
2487
Tcd = FMA(KP923879532, Tc9, KP382683432 * Tcc);
2492
E TcB, TcC, T7j, T7k;
2493
TcB = FMA(KP923879532, Tc5, KP382683432 * Tc2);
2494
TcC = FNMS(KP382683432, Tc9, KP923879532 * Tcc);
2504
E T1z, T1C, T1D, Tbg, TaQ, T4r, T4u, T7Q, Tbh, TaR, T1G, T3V, T1J, T3Y, T1K;
2505
E T7R, Tbe, Tbd, TaO, TaL, T1S, TfV, TfW, T41, T48, TaW, TaZ, T7L, T1Z, TfY;
2506
E TfZ, T4a, T4h, Tb3, Tb6, T7M;
2508
E T1x, T1y, T1A, T1B;
2509
T1x = iio[-WS(ios, 63)];
2510
T1y = rio[WS(ios, 31)];
2512
T1A = rio[WS(ios, 15)];
2513
T1B = iio[-WS(ios, 47)];
2520
E T4p, T4q, T4s, T4t;
2521
T4p = iio[-WS(ios, 31)];
2522
T4q = rio[WS(ios, 63)];
2524
T4s = iio[-WS(ios, 15)];
2525
T4t = rio[WS(ios, 47)];
2532
E TaJ, TaK, TaM, TaN;
2534
E T1E, T1F, T3T, T3U;
2535
T1E = rio[WS(ios, 7)];
2536
T1F = iio[-WS(ios, 39)];
2539
T3T = iio[-WS(ios, 7)];
2540
T3U = rio[WS(ios, 39)];
2545
E T1H, T1I, T3W, T3X;
2546
T1H = iio[-WS(ios, 55)];
2547
T1I = rio[WS(ios, 23)];
2550
T3W = iio[-WS(ios, 23)];
2551
T3X = rio[WS(ios, 55)];
2563
E T1O, TaX, T44, TaV, T1R, TaU, T47, TaY;
2565
E T1M, T1N, T42, T43;
2566
T1M = rio[WS(ios, 3)];
2567
T1N = iio[-WS(ios, 35)];
2570
T42 = iio[-WS(ios, 3)];
2571
T43 = rio[WS(ios, 35)];
2576
E T1P, T1Q, T45, T46;
2577
T1P = rio[WS(ios, 19)];
2578
T1Q = iio[-WS(ios, 51)];
2581
T45 = iio[-WS(ios, 19)];
2582
T46 = rio[WS(ios, 51)];
2596
E T1V, Tb4, T4d, Tb2, T1Y, Tb1, T4g, Tb5;
2598
E T1T, T1U, T4b, T4c;
2599
T1T = iio[-WS(ios, 59)];
2600
T1U = rio[WS(ios, 27)];
2603
T4b = iio[-WS(ios, 27)];
2604
T4c = rio[WS(ios, 59)];
2609
E T1W, T1X, T4e, T4f;
2610
T1W = rio[WS(ios, 11)];
2611
T1X = iio[-WS(ios, 43)];
2614
T4e = iio[-WS(ios, 11)];
2615
T4f = rio[WS(ios, 43)];
2635
E T3S, T3Z, TfX, Tg0;
2640
TfX = FNMS(KP382683432, TfW, KP923879532 * TfV);
2641
Tg0 = FNMS(KP923879532, TfZ, KP382683432 * TfY);
2646
E Tg6, Tg7, Tg3, Tg4;
2647
Tg6 = KP707106781 * (TaL + TaO);
2651
Tg3 = FMA(KP382683432, TfV, KP923879532 * TfW);
2652
Tg4 = FMA(KP923879532, TfY, KP382683432 * TfZ);
2657
E T4l, T4m, T49, T4i;
2660
T4n = KP707106781 * (T4l + T4m);
2661
T67 = KP707106781 * (T4m - T4l);
2664
T4j = KP707106781 * (T49 + T4i);
2665
T69 = KP707106781 * (T49 - T4i);
2668
E T4o, T4v, TaP, TaS;
2673
TaP = KP707106781 * (TaL - TaO);
2679
E Tb0, Tb7, TfS, TfT;
2680
Tb0 = FMA(KP923879532, TaW, KP382683432 * TaZ);
2681
Tb7 = FNMS(KP382683432, Tb6, KP923879532 * Tb3);
2684
TfS = KP707106781 * (Tbe + Tbd);
2690
E T7K, T7N, T7P, T7S;
2701
E Tba, Tbb, Tbf, Tbi;
2702
Tba = FNMS(KP382683432, TaW, KP923879532 * TaZ);
2703
Tbb = FMA(KP923879532, Tb6, KP382683432 * Tb3);
2706
Tbf = KP707106781 * (Tbd - Tbe);
2713
E T14, T17, T18, Tax, Tas, T3K, T3N, T7F, Tay, Tat, T1b, T3e, T1e, T3h, T1f;
2714
E T7G, TaB, TaA, Taq, Tan, T1n, TfC, TfD, T3k, T3r, Ta8, Tab, T7A, T1u, TfF;
2715
E TfG, T3t, T3A, Taf, Tai, T7B;
2717
E T12, T13, T15, T16;
2718
T12 = rio[WS(ios, 1)];
2719
T13 = iio[-WS(ios, 33)];
2721
T15 = rio[WS(ios, 17)];
2722
T16 = iio[-WS(ios, 49)];
2729
E T3I, T3J, T3L, T3M;
2730
T3I = iio[-WS(ios, 1)];
2731
T3J = rio[WS(ios, 33)];
2733
T3L = iio[-WS(ios, 17)];
2734
T3M = rio[WS(ios, 49)];
2741
E Tap, Tao, Tal, Tam;
2743
E T19, T1a, T3c, T3d;
2744
T19 = rio[WS(ios, 9)];
2745
T1a = iio[-WS(ios, 41)];
2748
T3c = iio[-WS(ios, 9)];
2749
T3d = rio[WS(ios, 41)];
2754
E T1c, T1d, T3f, T3g;
2755
T1c = iio[-WS(ios, 57)];
2756
T1d = rio[WS(ios, 25)];
2759
T3f = iio[-WS(ios, 25)];
2760
T3g = rio[WS(ios, 57)];
2772
E T1j, Ta6, T3n, Taa, T1m, Ta9, T3q, Ta7;
2774
E T1h, T1i, T3l, T3m;
2775
T1h = rio[WS(ios, 5)];
2776
T1i = iio[-WS(ios, 37)];
2779
T3l = iio[-WS(ios, 5)];
2780
T3m = rio[WS(ios, 37)];
2785
E T1k, T1l, T3o, T3p;
2786
T1k = rio[WS(ios, 21)];
2787
T1l = iio[-WS(ios, 53)];
2790
T3o = iio[-WS(ios, 21)];
2791
T3p = rio[WS(ios, 53)];
2805
E T1q, Tad, T3w, Tah, T1t, Tag, T3z, Tae;
2807
E T1o, T1p, T3u, T3v;
2808
T1o = iio[-WS(ios, 61)];
2809
T1p = rio[WS(ios, 29)];
2812
T3u = iio[-WS(ios, 29)];
2813
T3v = rio[WS(ios, 61)];
2818
E T1r, T1s, T3x, T3y;
2819
T1r = rio[WS(ios, 13)];
2820
T1s = iio[-WS(ios, 45)];
2823
T3x = iio[-WS(ios, 13)];
2824
T3y = rio[WS(ios, 45)];
2844
E T3b, T3i, TfE, TfH;
2849
TfE = FNMS(KP382683432, TfD, KP923879532 * TfC);
2850
TfH = FNMS(KP923879532, TfG, KP382683432 * TfF);
2855
E TfN, TfO, TfK, TfL;
2856
TfN = KP707106781 * (TaA + TaB);
2860
TfK = FMA(KP382683432, TfC, KP923879532 * TfD);
2861
TfL = FMA(KP923879532, TfF, KP382683432 * TfG);
2866
E T3E, T3F, T3s, T3B;
2869
T3G = KP707106781 * (T3E + T3F);
2870
T60 = KP707106781 * (T3F - T3E);
2873
T3C = KP707106781 * (T3s + T3B);
2874
T62 = KP707106781 * (T3s - T3B);
2877
E T3H, T3O, Tac, Taj;
2882
Tac = FNMS(KP382683432, Tab, KP923879532 * Ta8);
2883
Taj = FMA(KP923879532, Taf, KP382683432 * Tai);
2888
E Tar, Tau, Tfz, TfA;
2889
Tar = KP707106781 * (Tan - Taq);
2894
TfA = KP707106781 * (Taq + Tan);
2899
E T7z, T7C, T7E, T7H;
2910
E Taz, TaC, TaE, TaF;
2912
TaC = KP707106781 * (TaA - TaB);
2915
TaE = FMA(KP923879532, Tab, KP382683432 * Ta8);
2916
TaF = FNMS(KP382683432, Taf, KP923879532 * Tai);
2922
E T11, T9K, T9T, Ta2, T22, T9Q, T9N, Ta3;
2924
E Tv, T10, T9R, T9S;
2935
E T1w, T21, T9L, T9M;
2946
iio[-WS(ios, 63)] = Ta3 + Ta2;
2948
E T9O, T9U, T9J, T9P;
2953
rio[WS(ios, 48)] = FNMS(T9P, T9U, T9J * T9O);
2954
iio[-WS(ios, 15)] = FMA(T9P, T9O, T9J * T9U);
2957
E T9W, T9Y, T9V, T9X;
2962
rio[WS(ios, 16)] = FNMS(T9X, T9Y, T9V * T9W);
2963
iio[-WS(ios, 47)] = FMA(T9X, T9W, T9V * T9Y);
2966
E Ta0, Ta4, T9Z, Ta1;
2971
rio[WS(ios, 32)] = FNMS(Ta1, Ta4, T9Z * Ta0);
2972
iio[-WS(ios, 31)] = FMA(Ta1, Ta0, T9Z * Ta4);
2976
E T96, T9y, T9r, T9D, T9h, T9C, T9m, T9z;
2978
E T92, T95, T9n, T9q;
2989
E T9b, T9g, T9k, T9l;
2992
T9h = KP707106781 * (T9b + T9g);
2993
T9C = KP707106781 * (T9b - T9g);
2996
T9m = KP707106781 * (T9k + T9l);
2997
T9z = KP707106781 * (T9l - T9k);
3000
E T9i, T9s, T91, T9j;
3005
rio[WS(ios, 56)] = FNMS(T9j, T9s, T91 * T9i);
3006
iio[-WS(ios, 7)] = FMA(T9j, T9i, T91 * T9s);
3009
E T9G, T9I, T9F, T9H;
3014
rio[WS(ios, 40)] = FNMS(T9H, T9I, T9F * T9G);
3015
iio[-WS(ios, 23)] = FMA(T9H, T9G, T9F * T9I);
3018
E T9u, T9w, T9t, T9v;
3023
rio[WS(ios, 24)] = FNMS(T9v, T9w, T9t * T9u);
3024
iio[-WS(ios, 39)] = FMA(T9v, T9u, T9t * T9w);
3027
E T9A, T9E, T9x, T9B;
3032
rio[WS(ios, 8)] = FNMS(T9B, T9E, T9x * T9A);
3033
iio[-WS(ios, 55)] = FMA(T9B, T9A, T9x * T9E);
3037
E T8u, T8Q, T8J, T8V, T8B, T8U, T8G, T8R;
3039
E T8s, T8t, T8H, T8I;
3041
T8t = KP707106781 * (T82 - T81);
3044
T8H = KP707106781 * (T7r - T7w);
3050
E T8x, T8A, T8E, T8F;
3051
T8x = FNMS(KP382683432, T8w, KP923879532 * T8v);
3052
T8A = FMA(KP923879532, T8y, KP382683432 * T8z);
3055
T8E = FMA(KP382683432, T8v, KP923879532 * T8w);
3056
T8F = FNMS(KP382683432, T8y, KP923879532 * T8z);
3061
E T8C, T8K, T8r, T8D;
3066
rio[WS(ios, 4)] = FNMS(T8D, T8K, T8r * T8C);
3067
iio[-WS(ios, 59)] = FMA(T8D, T8C, T8r * T8K);
3070
E T8Y, T90, T8X, T8Z;
3075
rio[WS(ios, 20)] = FNMS(T8Z, T90, T8X * T8Y);
3076
iio[-WS(ios, 43)] = FMA(T8Z, T8Y, T8X * T90);
3079
E T8M, T8O, T8L, T8N;
3084
rio[WS(ios, 36)] = FNMS(T8N, T8O, T8L * T8M);
3085
iio[-WS(ios, 27)] = FMA(T8N, T8M, T8L * T8O);
3088
E T8S, T8W, T8P, T8T;
3093
rio[WS(ios, 52)] = FNMS(T8T, T8W, T8P * T8S);
3094
iio[-WS(ios, 11)] = FMA(T8T, T8S, T8P * T8W);
3098
E T7y, T8g, T89, T8l, T7V, T8k, T80, T8h;
3100
E T7m, T7x, T83, T88;
3102
T7x = KP707106781 * (T7r + T7w);
3105
T83 = KP707106781 * (T81 + T82);
3111
E T7J, T7U, T7Y, T7Z;
3112
T7J = FMA(KP923879532, T7D, KP382683432 * T7I);
3113
T7U = FNMS(KP382683432, T7T, KP923879532 * T7O);
3116
T7Y = FNMS(KP382683432, T7D, KP923879532 * T7I);
3117
T7Z = FMA(KP382683432, T7O, KP923879532 * T7T);
3122
E T7W, T8a, T7h, T7X;
3127
rio[WS(ios, 60)] = FNMS(T7X, T8a, T7h * T7W);
3128
iio[-WS(ios, 3)] = FMA(T7X, T7W, T7h * T8a);
3131
E T8o, T8q, T8n, T8p;
3136
rio[WS(ios, 44)] = FNMS(T8p, T8q, T8n * T8o);
3137
iio[-WS(ios, 19)] = FMA(T8p, T8o, T8n * T8q);
3140
E T8c, T8e, T8b, T8d;
3145
rio[WS(ios, 28)] = FNMS(T8d, T8e, T8b * T8c);
3146
iio[-WS(ios, 35)] = FMA(T8d, T8c, T8b * T8e);
3149
E T8i, T8m, T8f, T8j;
3154
rio[WS(ios, 12)] = FNMS(T8j, T8m, T8f * T8i);
3155
iio[-WS(ios, 51)] = FMA(T8j, T8i, T8f * T8m);
3159
E T6K, T76, T6Z, T7b, T6R, T7a, T6W, T77;
3161
E T6I, T6J, T6X, T6Y;
3171
E T6N, T6U, T6Q, T6V;
3173
E T6L, T6M, T6O, T6P;
3176
T6N = FMA(KP831469612, T6L, KP555570233 * T6M);
3177
T6U = FNMS(KP555570233, T6L, KP831469612 * T6M);
3180
T6Q = FNMS(KP555570233, T6P, KP831469612 * T6O);
3181
T6V = FMA(KP555570233, T6O, KP831469612 * T6P);
3190
E T6S, T70, T6H, T6T;
3195
rio[WS(ios, 58)] = FNMS(T6T, T70, T6H * T6S);
3196
iio[-WS(ios, 5)] = FMA(T6T, T6S, T6H * T70);
3199
E T7e, T7g, T7d, T7f;
3204
rio[WS(ios, 42)] = FNMS(T7f, T7g, T7d * T7e);
3205
iio[-WS(ios, 21)] = FMA(T7f, T7e, T7d * T7g);
3208
E T72, T74, T71, T73;
3213
rio[WS(ios, 26)] = FNMS(T73, T74, T71 * T72);
3214
iio[-WS(ios, 37)] = FMA(T73, T72, T71 * T74);
3217
E T78, T7c, T75, T79;
3222
rio[WS(ios, 10)] = FNMS(T79, T7c, T75 * T78);
3223
iio[-WS(ios, 53)] = FMA(T79, T78, T75 * T7c);
3227
E T3a, T52, T4V, T57, T4z, T56, T4E, T53;
3229
E T2w, T39, T4H, T4U;
3239
E T3R, T4C, T4y, T4D;
3241
E T3D, T3Q, T4k, T4x;
3244
T3R = FMA(KP980785280, T3D, KP195090322 * T3Q);
3245
T4C = FNMS(KP195090322, T3D, KP980785280 * T3Q);
3248
T4y = FNMS(KP195090322, T4x, KP980785280 * T4k);
3249
T4D = FMA(KP195090322, T4k, KP980785280 * T4x);
3258
E T4A, T4W, T23, T4B;
3263
rio[WS(ios, 62)] = FNMS(T4B, T4W, T23 * T4A);
3264
iio[-WS(ios, 1)] = FMA(T4B, T4A, T23 * T4W);
3267
E T5a, T5c, T59, T5b;
3272
rio[WS(ios, 46)] = FNMS(T5b, T5c, T59 * T5a);
3273
iio[-WS(ios, 17)] = FMA(T5b, T5a, T59 * T5c);
3276
E T4Y, T50, T4X, T4Z;
3281
rio[WS(ios, 30)] = FNMS(T4Z, T50, T4X * T4Y);
3282
iio[-WS(ios, 33)] = FMA(T4Z, T4Y, T4X * T50);
3285
E T54, T58, T51, T55;
3290
rio[WS(ios, 14)] = FNMS(T55, T58, T51 * T54);
3291
iio[-WS(ios, 49)] = FMA(T55, T54, T51 * T58);
3295
E T5g, T5C, T5v, T5H, T5n, T5G, T5s, T5D;
3297
E T5e, T5f, T5t, T5u;
3307
E T5j, T5q, T5m, T5r;
3309
E T5h, T5i, T5k, T5l;
3312
T5j = FNMS(KP555570233, T5i, KP831469612 * T5h);
3313
T5q = FMA(KP555570233, T5h, KP831469612 * T5i);
3316
T5m = FMA(KP831469612, T5k, KP555570233 * T5l);
3317
T5r = FNMS(KP555570233, T5k, KP831469612 * T5l);
3326
E T5o, T5w, T5d, T5p;
3331
rio[WS(ios, 6)] = FNMS(T5p, T5w, T5d * T5o);
3332
iio[-WS(ios, 57)] = FMA(T5p, T5o, T5d * T5w);
3335
E T5K, T5M, T5J, T5L;
3340
rio[WS(ios, 22)] = FNMS(T5L, T5M, T5J * T5K);
3341
iio[-WS(ios, 41)] = FMA(T5L, T5K, T5J * T5M);
3344
E T5y, T5A, T5x, T5z;
3349
rio[WS(ios, 38)] = FNMS(T5z, T5A, T5x * T5y);
3350
iio[-WS(ios, 25)] = FMA(T5z, T5y, T5x * T5A);
3353
E T5E, T5I, T5B, T5F;
3358
rio[WS(ios, 54)] = FNMS(T5F, T5I, T5B * T5E);
3359
iio[-WS(ios, 9)] = FMA(T5F, T5E, T5B * T5I);
3363
E T5Y, T6w, T6p, T6B, T6d, T6A, T6i, T6x;
3365
E T5Q, T5X, T6l, T6o;
3375
E T65, T6g, T6c, T6h;
3377
E T61, T64, T68, T6b;
3380
T65 = FNMS(KP195090322, T64, KP980785280 * T61);
3381
T6g = FMA(KP195090322, T61, KP980785280 * T64);
3384
T6c = FMA(KP980785280, T68, KP195090322 * T6b);
3385
T6h = FNMS(KP195090322, T68, KP980785280 * T6b);
3394
E T6e, T6q, T5N, T6f;
3399
rio[WS(ios, 2)] = FNMS(T6f, T6q, T5N * T6e);
3400
iio[-WS(ios, 61)] = FMA(T6f, T6e, T5N * T6q);
3403
E T6E, T6G, T6D, T6F;
3408
rio[WS(ios, 18)] = FNMS(T6F, T6G, T6D * T6E);
3409
iio[-WS(ios, 45)] = FMA(T6F, T6E, T6D * T6G);
3412
E T6s, T6u, T6r, T6t;
3417
rio[WS(ios, 34)] = FNMS(T6t, T6u, T6r * T6s);
3418
iio[-WS(ios, 29)] = FMA(T6t, T6s, T6r * T6u);
3421
E T6y, T6C, T6v, T6z;
3426
rio[WS(ios, 50)] = FNMS(T6z, T6C, T6v * T6y);
3427
iio[-WS(ios, 13)] = FMA(T6z, T6y, T6v * T6C);
3431
E TdO, Tf1, Teq, TeH, Tef, TeW, Ten, TeM, Te3, Ter, Te8, Tem, TeE, Tf0, TeP;
3434
E TdG, TeG, TdN, TeF, TdJ, TdM;
3437
TdJ = FNMS(KP555570233, TdI, KP831469612 * TdH);
3438
TdM = FMA(KP831469612, TdK, KP555570233 * TdL);
3447
E Tee, TeK, Teb, TeL, Te9, Tea;
3450
Te9 = FMA(KP555570233, TdH, KP831469612 * TdI);
3451
Tea = FNMS(KP555570233, TdK, KP831469612 * TdL);
3460
E TdV, Te6, Te2, Te7;
3462
E TdR, TdU, TdY, Te1;
3465
TdV = FNMS(KP290284677, TdU, KP956940335 * TdR);
3466
Te6 = FMA(KP290284677, TdR, KP956940335 * TdU);
3469
Te2 = FMA(KP956940335, TdY, KP290284677 * Te1);
3470
Te7 = FNMS(KP290284677, TdY, KP956940335 * Te1);
3478
E TeA, TeN, TeD, TeO;
3480
E Tey, Tez, TeB, TeC;
3483
TeA = FNMS(KP471396736, Tez, KP881921264 * Tey);
3484
TeN = FMA(KP881921264, Tez, KP471396736 * Tey);
3487
TeD = FMA(KP471396736, TeB, KP881921264 * TeC);
3488
TeO = FNMS(KP471396736, TeC, KP881921264 * TeB);
3496
E Te4, Teg, TdD, Te5;
3501
iio[-WS(ios, 2)] = FMA(TdD, Te4, Te5 * Teg);
3502
rio[WS(ios, 61)] = FNMS(Te5, Te4, TdD * Teg);
3505
E TeY, Tf2, TeV, TeZ;
3510
iio[-WS(ios, 10)] = FMA(TeV, TeY, TeZ * Tf2);
3511
rio[WS(ios, 53)] = FNMS(TeZ, TeY, TeV * Tf2);
3514
E Tf4, Tf6, Tf3, Tf5;
3519
rio[WS(ios, 21)] = FNMS(Tf5, Tf6, Tf3 * Tf4);
3520
iio[-WS(ios, 42)] = FMA(Tf3, Tf6, Tf5 * Tf4);
3523
E Tei, Tek, Teh, Tej;
3528
rio[WS(ios, 29)] = FNMS(Tej, Tek, Teh * Tei);
3529
iio[-WS(ios, 34)] = FMA(Teh, Tek, Tej * Tei);
3532
E Teo, Tes, Tel, Tep;
3537
rio[WS(ios, 13)] = FNMS(Tep, Tes, Tel * Teo);
3538
iio[-WS(ios, 50)] = FMA(Tel, Tes, Tep * Teo);
3541
E TeI, TeQ, Tex, TeJ;
3546
rio[WS(ios, 5)] = FNMS(TeJ, TeQ, Tex * TeI);
3547
iio[-WS(ios, 58)] = FMA(Tex, TeQ, TeJ * TeI);
3550
E TeS, TeU, TeR, TeT;
3555
iio[-WS(ios, 26)] = FMA(TeR, TeS, TeT * TeU);
3556
rio[WS(ios, 37)] = FNMS(TeT, TeS, TeR * TeU);
3559
E Teu, Tew, Tet, Tev;
3564
iio[-WS(ios, 18)] = FMA(Tet, Teu, Tev * Tew);
3565
rio[WS(ios, 45)] = FNMS(Tev, Teu, Tet * Tew);
3569
E Tcr, Tdw, TcX, Td6, TcI, Tdt, TcS, Tdl, Tbm, TcW, TcL, TcT, Tdd, Tdx, Tdi;
3572
E Tcq, Td4, TbZ, Td5, TbF, TbY;
3575
TbF = FNMS(KP195090322, TbE, KP980785280 * Tbx);
3576
TbY = FMA(KP195090322, TbQ, KP980785280 * TbX);
3585
E TcE, Tdk, TcH, Tdj, TcF, TcG;
3588
TcF = FMA(KP980785280, TbE, KP195090322 * Tbx);
3589
TcG = FNMS(KP195090322, TbX, KP980785280 * TbQ);
3598
E TaI, TcJ, Tbl, TcK;
3600
E Taw, TaH, Tb9, Tbk;
3603
TaI = FNMS(KP098017140, TaH, KP995184726 * Taw);
3604
TcJ = FMA(KP995184726, TaH, KP098017140 * Taw);
3607
Tbl = FMA(KP098017140, Tb9, KP995184726 * Tbk);
3608
TcK = FNMS(KP098017140, Tbk, KP995184726 * Tb9);
3616
E Td9, Tdg, Tdc, Tdh;
3618
E Td7, Td8, Tda, Tdb;
3621
Td9 = FNMS(KP634393284, Td8, KP773010453 * Td7);
3622
Tdg = FMA(KP634393284, Td7, KP773010453 * Td8);
3625
Tdc = FMA(KP773010453, Tda, KP634393284 * Tdb);
3626
Tdh = FNMS(KP634393284, Tda, KP773010453 * Tdb);
3634
E Tcs, TcM, Ta5, Tct;
3639
rio[WS(ios, 1)] = FNMS(Tct, TcM, Ta5 * Tcs);
3640
iio[-WS(ios, 62)] = FMA(Ta5, TcM, Tct * Tcs);
3643
E Tdu, Tdy, Tdr, Tdv;
3648
rio[WS(ios, 9)] = FNMS(Tdv, Tdy, Tdr * Tdu);
3649
iio[-WS(ios, 54)] = FMA(Tdr, Tdy, Tdv * Tdu);
3652
E TdA, TdC, Tdz, TdB;
3657
iio[-WS(ios, 22)] = FMA(Tdz, TdA, TdB * TdC);
3658
rio[WS(ios, 41)] = FNMS(TdB, TdA, Tdz * TdC);
3661
E TcO, TcQ, TcN, TcP;
3666
iio[-WS(ios, 30)] = FMA(TcN, TcO, TcP * TcQ);
3667
rio[WS(ios, 33)] = FNMS(TcP, TcO, TcN * TcQ);
3670
E TcU, TcY, TcR, TcV;
3675
iio[-WS(ios, 14)] = FMA(TcR, TcU, TcV * TcY);
3676
rio[WS(ios, 49)] = FNMS(TcV, TcU, TcR * TcY);
3679
E Tde, Tdm, Td3, Tdf;
3684
iio[-WS(ios, 6)] = FMA(Td3, Tde, Tdf * Tdm);
3685
rio[WS(ios, 57)] = FNMS(Tdf, Tde, Td3 * Tdm);
3688
E Tdo, Tdq, Tdn, Tdp;
3693
rio[WS(ios, 25)] = FNMS(Tdp, Tdq, Tdn * Tdo);
3694
iio[-WS(ios, 38)] = FMA(Tdn, Tdq, Tdp * Tdo);
3697
E Td0, Td2, TcZ, Td1;
3702
rio[WS(ios, 17)] = FNMS(Td1, Td2, TcZ * Td0);
3703
iio[-WS(ios, 46)] = FMA(TcZ, Td2, Td1 * Td0);
3707
E Tfy, Thd, TgC, TgT, Tgr, Th8, Tgz, TgY, Tgb, TgD, Tgg, Tgy, TgQ, Thc, Th1;
3710
E Tfi, TgS, Tfx, TgR, Tfp, Tfw;
3713
Tfp = FNMS(KP195090322, Tfo, KP980785280 * Tfl);
3714
Tfw = FMA(KP980785280, Tfs, KP195090322 * Tfv);
3723
E Tgq, TgW, Tgj, TgX, Tgh, Tgi;
3726
Tgh = FMA(KP195090322, Tfl, KP980785280 * Tfo);
3727
Tgi = FNMS(KP195090322, Tfs, KP980785280 * Tfv);
3736
E TfR, Tge, Tga, Tgf;
3738
E TfJ, TfQ, Tg2, Tg9;
3741
TfR = FNMS(KP098017140, TfQ, KP995184726 * TfJ);
3742
Tge = FMA(KP098017140, TfJ, KP995184726 * TfQ);
3745
Tga = FMA(KP995184726, Tg2, KP098017140 * Tg9);
3746
Tgf = FNMS(KP098017140, Tg2, KP995184726 * Tg9);
3754
E TgM, TgZ, TgP, Th0;
3756
E TgK, TgL, TgN, TgO;
3759
TgM = FNMS(KP634393284, TgL, KP773010453 * TgK);
3760
TgZ = FMA(KP773010453, TgL, KP634393284 * TgK);
3763
TgP = FMA(KP634393284, TgN, KP773010453 * TgO);
3764
Th0 = FNMS(KP634393284, TgO, KP773010453 * TgN);
3772
E Tgc, Tgs, Tf7, Tgd;
3777
iio[0] = FMA(Tf7, Tgc, Tgd * Tgs);
3778
rio[WS(ios, 63)] = FNMS(Tgd, Tgc, Tf7 * Tgs);
3781
E Tha, The, Th7, Thb;
3786
iio[-WS(ios, 8)] = FMA(Th7, Tha, Thb * The);
3787
rio[WS(ios, 55)] = FNMS(Thb, Tha, Th7 * The);
3790
E Thg, Thi, Thf, Thh;
3795
rio[WS(ios, 23)] = FNMS(Thh, Thi, Thf * Thg);
3796
iio[-WS(ios, 40)] = FMA(Thf, Thi, Thh * Thg);
3799
E Tgu, Tgw, Tgt, Tgv;
3804
rio[WS(ios, 31)] = FNMS(Tgv, Tgw, Tgt * Tgu);
3805
iio[-WS(ios, 32)] = FMA(Tgt, Tgw, Tgv * Tgu);
3808
E TgA, TgE, Tgx, TgB;
3813
rio[WS(ios, 15)] = FNMS(TgB, TgE, Tgx * TgA);
3814
iio[-WS(ios, 48)] = FMA(Tgx, TgE, TgB * TgA);
3817
E TgU, Th2, TgJ, TgV;
3822
rio[WS(ios, 7)] = FNMS(TgV, Th2, TgJ * TgU);
3823
iio[-WS(ios, 56)] = FMA(TgJ, Th2, TgV * TgU);
3826
E Th4, Th6, Th3, Th5;
3831
iio[-WS(ios, 24)] = FMA(Th3, Th4, Th5 * Th6);
3832
rio[WS(ios, 39)] = FNMS(Th5, Th4, Th3 * Th6);
3835
E TgG, TgI, TgF, TgH;
3840
iio[-WS(ios, 16)] = FMA(TgF, TgG, TgH * TgI);
3841
rio[WS(ios, 47)] = FNMS(TgH, TgG, TgF * TgI);
3845
E ThJ, TiG, Ti7, Tig, ThS, TiD, Ti2, Tiv, Thy, Ti6, ThV, Ti3, Tin, TiH, Tis;
3848
E ThI, Tie, ThF, Tif, ThB, ThE;
3851
ThB = FNMS(KP555570233, ThA, KP831469612 * Thz);
3852
ThE = FNMS(KP555570233, ThD, KP831469612 * ThC);
3861
E ThO, Tiu, ThR, Tit, ThP, ThQ;
3864
ThP = FMA(KP831469612, ThA, KP555570233 * Thz);
3865
ThQ = FMA(KP831469612, ThD, KP555570233 * ThC);
3874
E Thq, ThT, Thx, ThU;
3876
E Thm, Thp, Tht, Thw;
3879
Thq = FNMS(KP290284677, Thp, KP956940335 * Thm);
3880
ThT = FMA(KP956940335, Thp, KP290284677 * Thm);
3883
Thx = FMA(KP290284677, Tht, KP956940335 * Thw);
3884
ThU = FNMS(KP290284677, Thw, KP956940335 * Tht);
3892
E Tij, Tiq, Tim, Tir;
3894
E Tih, Tii, Tik, Til;
3897
Tij = FNMS(KP471396736, Tii, KP881921264 * Tih);
3898
Tiq = FMA(KP471396736, Tih, KP881921264 * Tii);
3901
Tim = FNMS(KP881921264, Til, KP471396736 * Tik);
3902
Tir = FMA(KP471396736, Til, KP881921264 * Tik);
3910
E ThK, ThW, Thj, ThL;
3915
rio[WS(ios, 3)] = FNMS(ThL, ThW, Thj * ThK);
3916
iio[-WS(ios, 60)] = FMA(Thj, ThW, ThL * ThK);
3919
E TiE, TiI, TiB, TiF;
3924
rio[WS(ios, 11)] = FNMS(TiF, TiI, TiB * TiE);
3925
iio[-WS(ios, 52)] = FMA(TiB, TiI, TiF * TiE);
3928
E TiK, TiM, TiJ, TiL;
3933
iio[-WS(ios, 20)] = FMA(TiJ, TiK, TiL * TiM);
3934
rio[WS(ios, 43)] = FNMS(TiL, TiK, TiJ * TiM);
3937
E ThY, Ti0, ThX, ThZ;
3942
iio[-WS(ios, 28)] = FMA(ThX, ThY, ThZ * Ti0);
3943
rio[WS(ios, 35)] = FNMS(ThZ, ThY, ThX * Ti0);
3946
E Ti4, Ti8, Ti1, Ti5;
3951
iio[-WS(ios, 12)] = FMA(Ti1, Ti4, Ti5 * Ti8);
3952
rio[WS(ios, 51)] = FNMS(Ti5, Ti4, Ti1 * Ti8);
3955
E Tio, Tiw, Tid, Tip;
3960
iio[-WS(ios, 4)] = FMA(Tid, Tio, Tip * Tiw);
3961
rio[WS(ios, 59)] = FNMS(Tip, Tio, Tid * Tiw);
3964
E Tiy, TiA, Tix, Tiz;
3969
rio[WS(ios, 27)] = FNMS(Tiz, TiA, Tix * Tiy);
3970
iio[-WS(ios, 36)] = FMA(Tix, TiA, Tiz * Tiy);
3973
E Tia, Tic, Ti9, Tib;
3978
rio[WS(ios, 19)] = FNMS(Tib, Tic, Ti9 * Tia);
3979
iio[-WS(ios, 44)] = FMA(Ti9, Tic, Tib * Tia);
3986
static const tw_instr twinstr[] = {
3991
static const hc2hc_desc desc = { 64, "hb_64", twinstr, &GENUS, {808, 270, 230, 0}, 0, 0, 0 };
3993
void X(codelet_hb_64) (planner *p) {
3994
X(khc2hc_register) (p, hb_64, &desc);
3996
#endif /* HAVE_FMA */