~siretart/ubuntu/utopic/blender/libav10

« back to all changes in this revision

Viewing changes to extern/fftw/dft/simd/codelets/t2bv_64.c

  • Committer: Bazaar Package Importer
  • Author(s): Kevin Roy
  • Date: 2011-02-08 22:20:54 UTC
  • mfrom: (1.4.2 upstream)
  • mto: (14.2.6 sid) (1.5.1)
  • mto: This revision was merged to the branch mainline in revision 27.
  • Revision ID: james.westby@ubuntu.com-20110208222054-kk0gwa4bu8h5lyq4
Tags: upstream-2.56.1-beta-svn34076
ImportĀ upstreamĀ versionĀ 2.56.1-beta-svn34076

Show diffs side-by-side

added added

removed removed

Lines of Context:
1
 
/*
2
 
 * Copyright (c) 2003, 2006 Matteo Frigo
3
 
 * Copyright (c) 2003, 2006 Massachusetts Institute of Technology
4
 
 *
5
 
 * This program is free software; you can redistribute it and/or modify
6
 
 * it under the terms of the GNU General Public License as published by
7
 
 * the Free Software Foundation; either version 2 of the License, or
8
 
 * (at your option) any later version.
9
 
 *
10
 
 * This program is distributed in the hope that it will be useful,
11
 
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12
 
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13
 
 * GNU General Public License for more details.
14
 
 *
15
 
 * You should have received a copy of the GNU General Public License
16
 
 * along with this program; if not, write to the Free Software
17
 
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
18
 
 *
19
 
 */
20
 
 
21
 
/* This file was automatically generated --- DO NOT EDIT */
22
 
/* Generated on Sat Jul  1 22:26:27 EDT 2006 */
23
 
 
24
 
#include "codelet-dft.h"
25
 
 
26
 
#ifdef HAVE_FMA
27
 
 
28
 
/* Generated by: ../../../genfft/gen_twiddle_c -fma -reorder-insns -schedule-for-pipeline -simd -compact -variables 4 -pipeline-latency 8 -n 64 -name t2bv_64 -include t2b.h -sign 1 */
29
 
 
30
 
/*
31
 
 * This function contains 519 FP additions, 384 FP multiplications,
32
 
 * (or, 261 additions, 126 multiplications, 258 fused multiply/add),
33
 
 * 187 stack variables, and 128 memory accesses
34
 
 */
35
 
/*
36
 
 * Generator Id's : 
37
 
 * $Id: algsimp.ml,v 1.9 2006-02-12 23:34:12 athena Exp $
38
 
 * $Id: fft.ml,v 1.4 2006-01-05 03:04:27 stevenj Exp $
39
 
 * $Id: gen_twiddle_c.ml,v 1.14 2006-02-12 23:34:12 athena Exp $
40
 
 */
41
 
 
42
 
#include "t2b.h"
43
 
 
44
 
static const R *t2bv_64(R *ri, R *ii, const R *W, stride ios, INT m, INT dist)
45
 
{
46
 
     DVK(KP773010453, +0.773010453362736960810906609758469800971041293);
47
 
     DVK(KP995184726, +0.995184726672196886244836953109479921575474869);
48
 
     DVK(KP820678790, +0.820678790828660330972281985331011598767386482);
49
 
     DVK(KP098491403, +0.098491403357164253077197521291327432293052451);
50
 
     DVK(KP956940335, +0.956940335732208864935797886980269969482849206);
51
 
     DVK(KP881921264, +0.881921264348355029712756863660388349508442621);
52
 
     DVK(KP303346683, +0.303346683607342391675883946941299872384187453);
53
 
     DVK(KP534511135, +0.534511135950791641089685961295362908582039528);
54
 
     DVK(KP831469612, +0.831469612302545237078788377617905756738560812);
55
 
     DVK(KP980785280, +0.980785280403230449126182236134239036973933731);
56
 
     DVK(KP668178637, +0.668178637919298919997757686523080761552472251);
57
 
     DVK(KP198912367, +0.198912367379658006911597622644676228597850501);
58
 
     DVK(KP923879532, +0.923879532511286756128183189396788286822416626);
59
 
     DVK(KP414213562, +0.414213562373095048801688724209698078569671875);
60
 
     DVK(KP707106781, +0.707106781186547524400844362104849039284835938);
61
 
     INT i;
62
 
     R *x;
63
 
     x = ii;
64
 
     for (i = m; i > 0; i = i - VL, x = x + (VL * dist), W = W + (TWVL * 126), MAKE_VOLATILE_STRIDE(ios)) {
65
 
          V T6L, T6M, T6O, T6P, T75, T6V, T5A, T6A, T72, T6K, T6t, T6D, T6w, T6B, T6h;
66
 
          V T6E;
67
 
          {
68
 
               V Ta, T3U, T3V, T37, T7a, T58, T7B, T6l, T1v, T24, T5Q, T7o, T5F, T7l, T43;
69
 
               V T4F, T2i, T2R, T6b, T7v, T60, T7s, T4a, T4I, T5u, T7h, T5x, T7g, T1i, T3b;
70
 
               V T4m, T4C, T7e, T5l, T7d, T5o, T3a, TV, T4B, T4j, T3X, T3Y, T6o, T7b, T5f;
71
 
               V T7C, Tx, T38, T2p, T61, T2n, T65, T2D, T7p, T5M, T7m, T5T, T4G, T46, T25;
72
 
               V T1S, T2q, T2u, T2w;
73
 
               {
74
 
                    V T5q, T10, T5v, T15, T1b, T5s, T1c, T1e;
75
 
                    {
76
 
                         V T1V, T1p, T5B, T5O, T1u, T1X, T20, T21;
77
 
                         {
78
 
                              V T1, T2, T7, T5, T32, T34, T2X, T2Z;
79
 
                              T1 = LD(&(x[0]), dist, &(x[0]));
80
 
                              T2 = LD(&(x[WS(ios, 32)]), dist, &(x[0]));
81
 
                              T7 = LD(&(x[WS(ios, 48)]), dist, &(x[0]));
82
 
                              T5 = LD(&(x[WS(ios, 16)]), dist, &(x[0]));
83
 
                              T32 = LD(&(x[WS(ios, 56)]), dist, &(x[0]));
84
 
                              T34 = LD(&(x[WS(ios, 24)]), dist, &(x[0]));
85
 
                              T2X = LD(&(x[WS(ios, 8)]), dist, &(x[0]));
86
 
                              T2Z = LD(&(x[WS(ios, 40)]), dist, &(x[0]));
87
 
                              {
88
 
                                   V T1m, T54, T6j, T36, T56, T31, T55, T1n, T1q, T1s, T4, T9;
89
 
                                   {
90
 
                                        V T3, T8, T6, T33, T35, T2Y, T30, T1l;
91
 
                                        T1l = LD(&(x[WS(ios, 1)]), dist, &(x[WS(ios, 1)]));
92
 
                                        T3 = BYTW(&(W[TWVL * 62]), T2);
93
 
                                        T8 = BYTW(&(W[TWVL * 94]), T7);
94
 
                                        T6 = BYTW(&(W[TWVL * 30]), T5);
95
 
                                        T33 = BYTW(&(W[TWVL * 110]), T32);
96
 
                                        T35 = BYTW(&(W[TWVL * 46]), T34);
97
 
                                        T2Y = BYTW(&(W[TWVL * 14]), T2X);
98
 
                                        T30 = BYTW(&(W[TWVL * 78]), T2Z);
99
 
                                        T1m = BYTW(&(W[0]), T1l);
100
 
                                        T54 = VSUB(T1, T3);
101
 
                                        T4 = VADD(T1, T3);
102
 
                                        T6j = VSUB(T6, T8);
103
 
                                        T9 = VADD(T6, T8);
104
 
                                        T36 = VADD(T33, T35);
105
 
                                        T56 = VSUB(T33, T35);
106
 
                                        T31 = VADD(T2Y, T30);
107
 
                                        T55 = VSUB(T2Y, T30);
108
 
                                        T1n = LD(&(x[WS(ios, 33)]), dist, &(x[WS(ios, 1)]));
109
 
                                   }
110
 
                                   T1q = LD(&(x[WS(ios, 17)]), dist, &(x[WS(ios, 1)]));
111
 
                                   T1s = LD(&(x[WS(ios, 49)]), dist, &(x[WS(ios, 1)]));
112
 
                                   Ta = VSUB(T4, T9);
113
 
                                   T3U = VADD(T4, T9);
114
 
                                   {
115
 
                                        V T57, T6k, T1o, T1r, T1t, T1W, T1U, T1Z;
116
 
                                        T1U = LD(&(x[WS(ios, 9)]), dist, &(x[WS(ios, 1)]));
117
 
                                        T3V = VADD(T31, T36);
118
 
                                        T37 = VSUB(T31, T36);
119
 
                                        T57 = VADD(T55, T56);
120
 
                                        T6k = VSUB(T55, T56);
121
 
                                        T1o = BYTW(&(W[TWVL * 64]), T1n);
122
 
                                        T1r = BYTW(&(W[TWVL * 32]), T1q);
123
 
                                        T1t = BYTW(&(W[TWVL * 96]), T1s);
124
 
                                        T1V = BYTW(&(W[TWVL * 16]), T1U);
125
 
                                        T1W = LD(&(x[WS(ios, 41)]), dist, &(x[WS(ios, 1)]));
126
 
                                        T1Z = LD(&(x[WS(ios, 57)]), dist, &(x[WS(ios, 1)]));
127
 
                                        T7a = VFNMS(LDK(KP707106781), T57, T54);
128
 
                                        T58 = VFMA(LDK(KP707106781), T57, T54);
129
 
                                        T7B = VFNMS(LDK(KP707106781), T6k, T6j);
130
 
                                        T6l = VFMA(LDK(KP707106781), T6k, T6j);
131
 
                                        T1p = VADD(T1m, T1o);
132
 
                                        T5B = VSUB(T1m, T1o);
133
 
                                        T5O = VSUB(T1r, T1t);
134
 
                                        T1u = VADD(T1r, T1t);
135
 
                                        T1X = BYTW(&(W[TWVL * 80]), T1W);
136
 
                                        T20 = BYTW(&(W[TWVL * 112]), T1Z);
137
 
                                        T21 = LD(&(x[WS(ios, 25)]), dist, &(x[WS(ios, 1)]));
138
 
                                   }
139
 
                              }
140
 
                         }
141
 
                         {
142
 
                              V T5W, T2N, T69, T2L, T5Y, T2P, T48, T2c, T2h;
143
 
                              {
144
 
                                   V T41, T1Y, T5C, T22, T2d, T29, T2b, T2f, T28, T2a, T2H, T2J;
145
 
                                   T28 = LD(&(x[WS(ios, 63)]), dist, &(x[WS(ios, 1)]));
146
 
                                   T2a = LD(&(x[WS(ios, 31)]), dist, &(x[WS(ios, 1)]));
147
 
                                   T1v = VSUB(T1p, T1u);
148
 
                                   T41 = VADD(T1p, T1u);
149
 
                                   T1Y = VADD(T1V, T1X);
150
 
                                   T5C = VSUB(T1V, T1X);
151
 
                                   T22 = BYTW(&(W[TWVL * 48]), T21);
152
 
                                   T2d = LD(&(x[WS(ios, 15)]), dist, &(x[WS(ios, 1)]));
153
 
                                   T29 = BYTW(&(W[TWVL * 124]), T28);
154
 
                                   T2b = BYTW(&(W[TWVL * 60]), T2a);
155
 
                                   T2f = LD(&(x[WS(ios, 47)]), dist, &(x[WS(ios, 1)]));
156
 
                                   T2H = LD(&(x[WS(ios, 55)]), dist, &(x[WS(ios, 1)]));
157
 
                                   T2J = LD(&(x[WS(ios, 23)]), dist, &(x[WS(ios, 1)]));
158
 
                                   {
159
 
                                        V T23, T5D, T2e, T2g, T2I, T2K, T2M;
160
 
                                        T2M = LD(&(x[WS(ios, 7)]), dist, &(x[WS(ios, 1)]));
161
 
                                        T23 = VADD(T20, T22);
162
 
                                        T5D = VSUB(T20, T22);
163
 
                                        T2e = BYTW(&(W[TWVL * 28]), T2d);
164
 
                                        T2c = VADD(T29, T2b);
165
 
                                        T5W = VSUB(T29, T2b);
166
 
                                        T2g = BYTW(&(W[TWVL * 92]), T2f);
167
 
                                        T2I = BYTW(&(W[TWVL * 108]), T2H);
168
 
                                        T2K = BYTW(&(W[TWVL * 44]), T2J);
169
 
                                        T2N = BYTW(&(W[TWVL * 12]), T2M);
170
 
                                        {
171
 
                                             V T5E, T5P, T42, T2O;
172
 
                                             T5E = VADD(T5C, T5D);
173
 
                                             T5P = VSUB(T5C, T5D);
174
 
                                             T24 = VSUB(T1Y, T23);
175
 
                                             T42 = VADD(T1Y, T23);
176
 
                                             T69 = VSUB(T2g, T2e);
177
 
                                             T2h = VADD(T2e, T2g);
178
 
                                             T2O = LD(&(x[WS(ios, 39)]), dist, &(x[WS(ios, 1)]));
179
 
                                             T2L = VADD(T2I, T2K);
180
 
                                             T5Y = VSUB(T2I, T2K);
181
 
                                             T5Q = VFMA(LDK(KP707106781), T5P, T5O);
182
 
                                             T7o = VFNMS(LDK(KP707106781), T5P, T5O);
183
 
                                             T5F = VFMA(LDK(KP707106781), T5E, T5B);
184
 
                                             T7l = VFNMS(LDK(KP707106781), T5E, T5B);
185
 
                                             T43 = VADD(T41, T42);
186
 
                                             T4F = VSUB(T41, T42);
187
 
                                             T2P = BYTW(&(W[TWVL * 76]), T2O);
188
 
                                        }
189
 
                                   }
190
 
                              }
191
 
                              T2i = VSUB(T2c, T2h);
192
 
                              T48 = VADD(T2c, T2h);
193
 
                              {
194
 
                                   V TW, TY, T11, T2Q, T5X, T13;
195
 
                                   TW = LD(&(x[WS(ios, 62)]), dist, &(x[0]));
196
 
                                   TY = LD(&(x[WS(ios, 30)]), dist, &(x[0]));
197
 
                                   T11 = LD(&(x[WS(ios, 14)]), dist, &(x[0]));
198
 
                                   T2Q = VADD(T2N, T2P);
199
 
                                   T5X = VSUB(T2N, T2P);
200
 
                                   T13 = LD(&(x[WS(ios, 46)]), dist, &(x[0]));
201
 
                                   {
202
 
                                        V T12, T5Z, T6a, T49, T14, T18, T1a;
203
 
                                        {
204
 
                                             V T17, T19, TX, TZ;
205
 
                                             T17 = LD(&(x[WS(ios, 54)]), dist, &(x[0]));
206
 
                                             T19 = LD(&(x[WS(ios, 22)]), dist, &(x[0]));
207
 
                                             TX = BYTW(&(W[TWVL * 122]), TW);
208
 
                                             TZ = BYTW(&(W[TWVL * 58]), TY);
209
 
                                             T12 = BYTW(&(W[TWVL * 26]), T11);
210
 
                                             T5Z = VADD(T5X, T5Y);
211
 
                                             T6a = VSUB(T5Y, T5X);
212
 
                                             T2R = VSUB(T2L, T2Q);
213
 
                                             T49 = VADD(T2Q, T2L);
214
 
                                             T14 = BYTW(&(W[TWVL * 90]), T13);
215
 
                                             T18 = BYTW(&(W[TWVL * 106]), T17);
216
 
                                             T5q = VSUB(TX, TZ);
217
 
                                             T10 = VADD(TX, TZ);
218
 
                                             T1a = BYTW(&(W[TWVL * 42]), T19);
219
 
                                        }
220
 
                                        T6b = VFMA(LDK(KP707106781), T6a, T69);
221
 
                                        T7v = VFNMS(LDK(KP707106781), T6a, T69);
222
 
                                        T60 = VFMA(LDK(KP707106781), T5Z, T5W);
223
 
                                        T7s = VFNMS(LDK(KP707106781), T5Z, T5W);
224
 
                                        T4a = VADD(T48, T49);
225
 
                                        T4I = VSUB(T48, T49);
226
 
                                        T5v = VSUB(T14, T12);
227
 
                                        T15 = VADD(T12, T14);
228
 
                                        T1b = VADD(T18, T1a);
229
 
                                        T5s = VSUB(T18, T1a);
230
 
                                   }
231
 
                                   T1c = LD(&(x[WS(ios, 6)]), dist, &(x[0]));
232
 
                                   T1e = LD(&(x[WS(ios, 38)]), dist, &(x[0]));
233
 
                              }
234
 
                         }
235
 
                    }
236
 
                    {
237
 
                         V Th, T59, Tf, Tv, T5d, Tj, Tm, To;
238
 
                         {
239
 
                              V T5h, TQ, T5m, T5i, TO, TS, TJ, T4h, TD, TI;
240
 
                              {
241
 
                                   V T4k, T16, TB, T1d, T1f, TE, TG, TA, Tz, TK, TM, TC;
242
 
                                   Tz = LD(&(x[WS(ios, 2)]), dist, &(x[0]));
243
 
                                   T4k = VADD(T10, T15);
244
 
                                   T16 = VSUB(T10, T15);
245
 
                                   TB = LD(&(x[WS(ios, 34)]), dist, &(x[0]));
246
 
                                   T1d = BYTW(&(W[TWVL * 10]), T1c);
247
 
                                   T1f = BYTW(&(W[TWVL * 74]), T1e);
248
 
                                   TE = LD(&(x[WS(ios, 18)]), dist, &(x[0]));
249
 
                                   TG = LD(&(x[WS(ios, 50)]), dist, &(x[0]));
250
 
                                   TA = BYTW(&(W[TWVL * 2]), Tz);
251
 
                                   TK = LD(&(x[WS(ios, 10)]), dist, &(x[0]));
252
 
                                   TM = LD(&(x[WS(ios, 42)]), dist, &(x[0]));
253
 
                                   TC = BYTW(&(W[TWVL * 66]), TB);
254
 
                                   {
255
 
                                        V T1g, T5r, TF, TH, TL, TN, TP;
256
 
                                        TP = LD(&(x[WS(ios, 58)]), dist, &(x[0]));
257
 
                                        T1g = VADD(T1d, T1f);
258
 
                                        T5r = VSUB(T1d, T1f);
259
 
                                        TF = BYTW(&(W[TWVL * 34]), TE);
260
 
                                        TH = BYTW(&(W[TWVL * 98]), TG);
261
 
                                        TL = BYTW(&(W[TWVL * 18]), TK);
262
 
                                        TN = BYTW(&(W[TWVL * 82]), TM);
263
 
                                        T5h = VSUB(TA, TC);
264
 
                                        TD = VADD(TA, TC);
265
 
                                        TQ = BYTW(&(W[TWVL * 114]), TP);
266
 
                                        {
267
 
                                             V T5w, T5t, T4l, T1h, TR;
268
 
                                             T5w = VSUB(T5s, T5r);
269
 
                                             T5t = VADD(T5r, T5s);
270
 
                                             T4l = VADD(T1g, T1b);
271
 
                                             T1h = VSUB(T1b, T1g);
272
 
                                             T5m = VSUB(TF, TH);
273
 
                                             TI = VADD(TF, TH);
274
 
                                             T5i = VSUB(TL, TN);
275
 
                                             TO = VADD(TL, TN);
276
 
                                             TR = LD(&(x[WS(ios, 26)]), dist, &(x[0]));
277
 
                                             T5u = VFMA(LDK(KP707106781), T5t, T5q);
278
 
                                             T7h = VFNMS(LDK(KP707106781), T5t, T5q);
279
 
                                             T5x = VFMA(LDK(KP707106781), T5w, T5v);
280
 
                                             T7g = VFNMS(LDK(KP707106781), T5w, T5v);
281
 
                                             T1i = VFNMS(LDK(KP414213562), T1h, T16);
282
 
                                             T3b = VFMA(LDK(KP414213562), T16, T1h);
283
 
                                             T4m = VADD(T4k, T4l);
284
 
                                             T4C = VSUB(T4k, T4l);
285
 
                                             TS = BYTW(&(W[TWVL * 50]), TR);
286
 
                                        }
287
 
                                   }
288
 
                              }
289
 
                              TJ = VSUB(TD, TI);
290
 
                              T4h = VADD(TD, TI);
291
 
                              {
292
 
                                   V Tb, Td, Tr, T5j, TT, Tt, Tg;
293
 
                                   Tb = LD(&(x[WS(ios, 4)]), dist, &(x[0]));
294
 
                                   Td = LD(&(x[WS(ios, 36)]), dist, &(x[0]));
295
 
                                   Tr = LD(&(x[WS(ios, 12)]), dist, &(x[0]));
296
 
                                   T5j = VSUB(TQ, TS);
297
 
                                   TT = VADD(TQ, TS);
298
 
                                   Tt = LD(&(x[WS(ios, 44)]), dist, &(x[0]));
299
 
                                   Tg = LD(&(x[WS(ios, 20)]), dist, &(x[0]));
300
 
                                   {
301
 
                                        V Ti, Tc, Te, Ts;
302
 
                                        Ti = LD(&(x[WS(ios, 52)]), dist, &(x[0]));
303
 
                                        Tc = BYTW(&(W[TWVL * 6]), Tb);
304
 
                                        Te = BYTW(&(W[TWVL * 70]), Td);
305
 
                                        Ts = BYTW(&(W[TWVL * 22]), Tr);
306
 
                                        {
307
 
                                             V T5k, T5n, TU, T4i, Tu;
308
 
                                             T5k = VADD(T5i, T5j);
309
 
                                             T5n = VSUB(T5i, T5j);
310
 
                                             TU = VSUB(TO, TT);
311
 
                                             T4i = VADD(TO, TT);
312
 
                                             Tu = BYTW(&(W[TWVL * 86]), Tt);
313
 
                                             Th = BYTW(&(W[TWVL * 38]), Tg);
314
 
                                             T59 = VSUB(Tc, Te);
315
 
                                             Tf = VADD(Tc, Te);
316
 
                                             T7e = VFNMS(LDK(KP707106781), T5k, T5h);
317
 
                                             T5l = VFMA(LDK(KP707106781), T5k, T5h);
318
 
                                             T7d = VFNMS(LDK(KP707106781), T5n, T5m);
319
 
                                             T5o = VFMA(LDK(KP707106781), T5n, T5m);
320
 
                                             T3a = VFMA(LDK(KP414213562), TJ, TU);
321
 
                                             TV = VFNMS(LDK(KP414213562), TU, TJ);
322
 
                                             T4B = VSUB(T4h, T4i);
323
 
                                             T4j = VADD(T4h, T4i);
324
 
                                             Tv = VADD(Ts, Tu);
325
 
                                             T5d = VSUB(Tu, Ts);
326
 
                                             Tj = BYTW(&(W[TWVL * 102]), Ti);
327
 
                                        }
328
 
                                   }
329
 
                                   Tm = LD(&(x[WS(ios, 60)]), dist, &(x[0]));
330
 
                                   To = LD(&(x[WS(ios, 28)]), dist, &(x[0]));
331
 
                              }
332
 
                         }
333
 
                         {
334
 
                              V T5b, T6m, Tl, T1A, T5G, T1Q, T5K, T1C, T1D, T5e, T6n, Tw, T1H, T1J;
335
 
                              {
336
 
                                   V T1w, T1y, T1M, T1O, Tq, T5c, T1B;
337
 
                                   T1w = LD(&(x[WS(ios, 5)]), dist, &(x[WS(ios, 1)]));
338
 
                                   T1y = LD(&(x[WS(ios, 37)]), dist, &(x[WS(ios, 1)]));
339
 
                                   T1M = LD(&(x[WS(ios, 13)]), dist, &(x[WS(ios, 1)]));
340
 
                                   T1O = LD(&(x[WS(ios, 45)]), dist, &(x[WS(ios, 1)]));
341
 
                                   T1B = LD(&(x[WS(ios, 21)]), dist, &(x[WS(ios, 1)]));
342
 
                                   {
343
 
                                        V Tk, T5a, Tn, Tp;
344
 
                                        Tk = VADD(Th, Tj);
345
 
                                        T5a = VSUB(Th, Tj);
346
 
                                        Tn = BYTW(&(W[TWVL * 118]), Tm);
347
 
                                        Tp = BYTW(&(W[TWVL * 54]), To);
348
 
                                        {
349
 
                                             V T1x, T1z, T1N, T1P;
350
 
                                             T1x = BYTW(&(W[TWVL * 8]), T1w);
351
 
                                             T1z = BYTW(&(W[TWVL * 72]), T1y);
352
 
                                             T1N = BYTW(&(W[TWVL * 24]), T1M);
353
 
                                             T1P = BYTW(&(W[TWVL * 88]), T1O);
354
 
                                             T5b = VFNMS(LDK(KP414213562), T5a, T59);
355
 
                                             T6m = VFMA(LDK(KP414213562), T59, T5a);
356
 
                                             T3X = VADD(Tf, Tk);
357
 
                                             Tl = VSUB(Tf, Tk);
358
 
                                             Tq = VADD(Tn, Tp);
359
 
                                             T5c = VSUB(Tn, Tp);
360
 
                                             T1A = VADD(T1x, T1z);
361
 
                                             T5G = VSUB(T1x, T1z);
362
 
                                             T1Q = VADD(T1N, T1P);
363
 
                                             T5K = VSUB(T1N, T1P);
364
 
                                             T1C = BYTW(&(W[TWVL * 40]), T1B);
365
 
                                        }
366
 
                                   }
367
 
                                   T1D = LD(&(x[WS(ios, 53)]), dist, &(x[WS(ios, 1)]));
368
 
                                   T5e = VFNMS(LDK(KP414213562), T5d, T5c);
369
 
                                   T6n = VFMA(LDK(KP414213562), T5c, T5d);
370
 
                                   T3Y = VADD(Tq, Tv);
371
 
                                   Tw = VSUB(Tq, Tv);
372
 
                                   T1H = LD(&(x[WS(ios, 61)]), dist, &(x[WS(ios, 1)]));
373
 
                                   T1J = LD(&(x[WS(ios, 29)]), dist, &(x[WS(ios, 1)]));
374
 
                              }
375
 
                              {
376
 
                                   V T1I, T1K, T1F, T5H, T2k, T2l, T2z, T2B, T2j, T1E;
377
 
                                   T2j = LD(&(x[WS(ios, 3)]), dist, &(x[WS(ios, 1)]));
378
 
                                   T1E = BYTW(&(W[TWVL * 104]), T1D);
379
 
                                   T6o = VSUB(T6m, T6n);
380
 
                                   T7b = VADD(T6m, T6n);
381
 
                                   T5f = VADD(T5b, T5e);
382
 
                                   T7C = VSUB(T5b, T5e);
383
 
                                   Tx = VADD(Tl, Tw);
384
 
                                   T38 = VSUB(Tl, Tw);
385
 
                                   T1I = BYTW(&(W[TWVL * 120]), T1H);
386
 
                                   T1K = BYTW(&(W[TWVL * 56]), T1J);
387
 
                                   T1F = VADD(T1C, T1E);
388
 
                                   T5H = VSUB(T1C, T1E);
389
 
                                   T2k = BYTW(&(W[TWVL * 4]), T2j);
390
 
                                   T2l = LD(&(x[WS(ios, 35)]), dist, &(x[WS(ios, 1)]));
391
 
                                   T2z = LD(&(x[WS(ios, 11)]), dist, &(x[WS(ios, 1)]));
392
 
                                   T2B = LD(&(x[WS(ios, 43)]), dist, &(x[WS(ios, 1)]));
393
 
                                   {
394
 
                                        V T5I, T5R, T44, T1G, T2m, T2A, T2C, T5S, T5L, T1R, T45, T2o, T5J, T1L;
395
 
                                        T2o = LD(&(x[WS(ios, 19)]), dist, &(x[WS(ios, 1)]));
396
 
                                        T5J = VSUB(T1I, T1K);
397
 
                                        T1L = VADD(T1I, T1K);
398
 
                                        T5I = VFNMS(LDK(KP414213562), T5H, T5G);
399
 
                                        T5R = VFMA(LDK(KP414213562), T5G, T5H);
400
 
                                        T44 = VADD(T1A, T1F);
401
 
                                        T1G = VSUB(T1A, T1F);
402
 
                                        T2m = BYTW(&(W[TWVL * 68]), T2l);
403
 
                                        T2A = BYTW(&(W[TWVL * 20]), T2z);
404
 
                                        T2C = BYTW(&(W[TWVL * 84]), T2B);
405
 
                                        T5S = VFNMS(LDK(KP414213562), T5J, T5K);
406
 
                                        T5L = VFMA(LDK(KP414213562), T5K, T5J);
407
 
                                        T1R = VSUB(T1L, T1Q);
408
 
                                        T45 = VADD(T1L, T1Q);
409
 
                                        T2p = BYTW(&(W[TWVL * 36]), T2o);
410
 
                                        T61 = VSUB(T2k, T2m);
411
 
                                        T2n = VADD(T2k, T2m);
412
 
                                        T65 = VSUB(T2C, T2A);
413
 
                                        T2D = VADD(T2A, T2C);
414
 
                                        T7p = VSUB(T5I, T5L);
415
 
                                        T5M = VADD(T5I, T5L);
416
 
                                        T7m = VSUB(T5R, T5S);
417
 
                                        T5T = VADD(T5R, T5S);
418
 
                                        T4G = VSUB(T44, T45);
419
 
                                        T46 = VADD(T44, T45);
420
 
                                        T25 = VSUB(T1G, T1R);
421
 
                                        T1S = VADD(T1G, T1R);
422
 
                                        T2q = LD(&(x[WS(ios, 51)]), dist, &(x[WS(ios, 1)]));
423
 
                                   }
424
 
                                   T2u = LD(&(x[WS(ios, 59)]), dist, &(x[WS(ios, 1)]));
425
 
                                   T2w = LD(&(x[WS(ios, 27)]), dist, &(x[WS(ios, 1)]));
426
 
                              }
427
 
                         }
428
 
                    }
429
 
               }
430
 
               {
431
 
                    V T67, T7w, T6e, T7t, T3s, T3E, T39, T3D, T1k, T3k, T3t, T3c, T1T, T3v, T3w;
432
 
                    V T26, T2G, T3y, T3z, T2T;
433
 
                    {
434
 
                         V T4A, T4N, T47, T4v, T2r, T2v, T2x, T4s, T40, T3W, T3Z;
435
 
                         T4A = VSUB(T3U, T3V);
436
 
                         T3W = VADD(T3U, T3V);
437
 
                         T3Z = VADD(T3X, T3Y);
438
 
                         T4N = VSUB(T3X, T3Y);
439
 
                         T47 = VSUB(T43, T46);
440
 
                         T4v = VADD(T43, T46);
441
 
                         T2r = BYTW(&(W[TWVL * 100]), T2q);
442
 
                         T2v = BYTW(&(W[TWVL * 116]), T2u);
443
 
                         T2x = BYTW(&(W[TWVL * 52]), T2w);
444
 
                         T4s = VADD(T3W, T3Z);
445
 
                         T40 = VSUB(T3W, T3Z);
446
 
                         {
447
 
                              V T4O, T4n, T4Q, T4H, T4E, T4W, T4u, T4y, T4d, T4J, T2F, T2S;
448
 
                              {
449
 
                                   V T6c, T63, T2t, T4b, T6d, T66, T2E, T4c;
450
 
                                   {
451
 
                                        V T4D, T62, T2s, T64, T2y, T4t;
452
 
                                        T4O = VSUB(T4B, T4C);
453
 
                                        T4D = VADD(T4B, T4C);
454
 
                                        T62 = VSUB(T2r, T2p);
455
 
                                        T2s = VADD(T2p, T2r);
456
 
                                        T64 = VSUB(T2v, T2x);
457
 
                                        T2y = VADD(T2v, T2x);
458
 
                                        T4t = VADD(T4j, T4m);
459
 
                                        T4n = VSUB(T4j, T4m);
460
 
                                        T4Q = VFMA(LDK(KP414213562), T4F, T4G);
461
 
                                        T4H = VFNMS(LDK(KP414213562), T4G, T4F);
462
 
                                        T4E = VFMA(LDK(KP707106781), T4D, T4A);
463
 
                                        T4W = VFNMS(LDK(KP707106781), T4D, T4A);
464
 
                                        T6c = VFNMS(LDK(KP414213562), T61, T62);
465
 
                                        T63 = VFMA(LDK(KP414213562), T62, T61);
466
 
                                        T2t = VSUB(T2n, T2s);
467
 
                                        T4b = VADD(T2n, T2s);
468
 
                                        T6d = VFMA(LDK(KP414213562), T64, T65);
469
 
                                        T66 = VFNMS(LDK(KP414213562), T65, T64);
470
 
                                        T2E = VSUB(T2y, T2D);
471
 
                                        T4c = VADD(T2y, T2D);
472
 
                                        T4u = VSUB(T4s, T4t);
473
 
                                        T4y = VADD(T4s, T4t);
474
 
                                   }
475
 
                                   T67 = VADD(T63, T66);
476
 
                                   T7w = VSUB(T66, T63);
477
 
                                   T6e = VADD(T6c, T6d);
478
 
                                   T7t = VSUB(T6d, T6c);
479
 
                                   T4d = VADD(T4b, T4c);
480
 
                                   T4J = VSUB(T4c, T4b);
481
 
                                   T2F = VADD(T2t, T2E);
482
 
                                   T2S = VSUB(T2E, T2t);
483
 
                              }
484
 
                              {
485
 
                                   V Ty, T1j, T4R, T4K;
486
 
                                   Ty = VFMA(LDK(KP707106781), Tx, Ta);
487
 
                                   T3s = VFNMS(LDK(KP707106781), Tx, Ta);
488
 
                                   T3E = VSUB(TV, T1i);
489
 
                                   T1j = VADD(TV, T1i);
490
 
                                   T39 = VFMA(LDK(KP707106781), T38, T37);
491
 
                                   T3D = VFNMS(LDK(KP707106781), T38, T37);
492
 
                                   T4R = VFMA(LDK(KP414213562), T4I, T4J);
493
 
                                   T4K = VFNMS(LDK(KP414213562), T4J, T4I);
494
 
                                   {
495
 
                                        V T4w, T4e, T4P, T4Z;
496
 
                                        T4w = VADD(T4a, T4d);
497
 
                                        T4e = VSUB(T4a, T4d);
498
 
                                        T4P = VFMA(LDK(KP707106781), T4O, T4N);
499
 
                                        T4Z = VFNMS(LDK(KP707106781), T4O, T4N);
500
 
                                        T1k = VFMA(LDK(KP923879532), T1j, Ty);
501
 
                                        T3k = VFNMS(LDK(KP923879532), T1j, Ty);
502
 
                                        {
503
 
                                             V T4L, T50, T4S, T4X;
504
 
                                             T4L = VADD(T4H, T4K);
505
 
                                             T50 = VSUB(T4H, T4K);
506
 
                                             T4S = VSUB(T4Q, T4R);
507
 
                                             T4X = VADD(T4Q, T4R);
508
 
                                             {
509
 
                                                  V T4f, T4o, T4x, T4z;
510
 
                                                  T4f = VADD(T47, T4e);
511
 
                                                  T4o = VSUB(T47, T4e);
512
 
                                                  T4x = VSUB(T4v, T4w);
513
 
                                                  T4z = VADD(T4v, T4w);
514
 
                                                  {
515
 
                                                       V T53, T51, T4M, T4U;
516
 
                                                       T53 = VFNMS(LDK(KP923879532), T50, T4Z);
517
 
                                                       T51 = VFMA(LDK(KP923879532), T50, T4Z);
518
 
                                                       T4M = VFNMS(LDK(KP923879532), T4L, T4E);
519
 
                                                       T4U = VFMA(LDK(KP923879532), T4L, T4E);
520
 
                                                       {
521
 
                                                            V T52, T4Y, T4T, T4V;
522
 
                                                            T52 = VFMA(LDK(KP923879532), T4X, T4W);
523
 
                                                            T4Y = VFNMS(LDK(KP923879532), T4X, T4W);
524
 
                                                            T4T = VFNMS(LDK(KP923879532), T4S, T4P);
525
 
                                                            T4V = VFMA(LDK(KP923879532), T4S, T4P);
526
 
                                                            {
527
 
                                                                 V T4p, T4r, T4g, T4q;
528
 
                                                                 T4p = VFNMS(LDK(KP707106781), T4o, T4n);
529
 
                                                                 T4r = VFMA(LDK(KP707106781), T4o, T4n);
530
 
                                                                 T4g = VFNMS(LDK(KP707106781), T4f, T40);
531
 
                                                                 T4q = VFMA(LDK(KP707106781), T4f, T40);
532
 
                                                                 ST(&(x[0]), VADD(T4y, T4z), dist, &(x[0]));
533
 
                                                                 ST(&(x[WS(ios, 32)]), VSUB(T4y, T4z), dist, &(x[0]));
534
 
                                                                 ST(&(x[WS(ios, 16)]), VFMAI(T4x, T4u), dist, &(x[0]));
535
 
                                                                 ST(&(x[WS(ios, 48)]), VFNMSI(T4x, T4u), dist, &(x[0]));
536
 
                                                                 ST(&(x[WS(ios, 44)]), VFNMSI(T51, T4Y), dist, &(x[0]));
537
 
                                                                 ST(&(x[WS(ios, 20)]), VFMAI(T51, T4Y), dist, &(x[0]));
538
 
                                                                 ST(&(x[WS(ios, 52)]), VFMAI(T53, T52), dist, &(x[0]));
539
 
                                                                 ST(&(x[WS(ios, 12)]), VFNMSI(T53, T52), dist, &(x[0]));
540
 
                                                                 ST(&(x[WS(ios, 4)]), VFMAI(T4V, T4U), dist, &(x[0]));
541
 
                                                                 ST(&(x[WS(ios, 60)]), VFNMSI(T4V, T4U), dist, &(x[0]));
542
 
                                                                 ST(&(x[WS(ios, 36)]), VFMAI(T4T, T4M), dist, &(x[0]));
543
 
                                                                 ST(&(x[WS(ios, 28)]), VFNMSI(T4T, T4M), dist, &(x[0]));
544
 
                                                                 ST(&(x[WS(ios, 56)]), VFNMSI(T4r, T4q), dist, &(x[0]));
545
 
                                                                 ST(&(x[WS(ios, 8)]), VFMAI(T4r, T4q), dist, &(x[0]));
546
 
                                                                 ST(&(x[WS(ios, 40)]), VFMAI(T4p, T4g), dist, &(x[0]));
547
 
                                                                 ST(&(x[WS(ios, 24)]), VFNMSI(T4p, T4g), dist, &(x[0]));
548
 
                                                                 T3t = VADD(T3a, T3b);
549
 
                                                                 T3c = VSUB(T3a, T3b);
550
 
                                                            }
551
 
                                                       }
552
 
                                                  }
553
 
                                             }
554
 
                                        }
555
 
                                   }
556
 
                                   T1T = VFMA(LDK(KP707106781), T1S, T1v);
557
 
                                   T3v = VFNMS(LDK(KP707106781), T1S, T1v);
558
 
                                   T3w = VFNMS(LDK(KP707106781), T25, T24);
559
 
                                   T26 = VFMA(LDK(KP707106781), T25, T24);
560
 
                                   T2G = VFMA(LDK(KP707106781), T2F, T2i);
561
 
                                   T3y = VFNMS(LDK(KP707106781), T2F, T2i);
562
 
                                   T3z = VFNMS(LDK(KP707106781), T2S, T2R);
563
 
                                   T2T = VFMA(LDK(KP707106781), T2S, T2R);
564
 
                              }
565
 
                         }
566
 
                    }
567
 
                    {
568
 
                         V T3u, T3M, T3F, T3P, T3x, T3G, T3q, T3m, T3h, T3j, T3r, T3p, T2W, T3i;
569
 
                         {
570
 
                              V T3d, T3n, T27, T3e, T2U, T3f;
571
 
                              T3d = VFMA(LDK(KP923879532), T3c, T39);
572
 
                              T3n = VFNMS(LDK(KP923879532), T3c, T39);
573
 
                              T27 = VFNMS(LDK(KP198912367), T26, T1T);
574
 
                              T3e = VFMA(LDK(KP198912367), T1T, T26);
575
 
                              T2U = VFNMS(LDK(KP198912367), T2T, T2G);
576
 
                              T3f = VFMA(LDK(KP198912367), T2G, T2T);
577
 
                              T3u = VFMA(LDK(KP923879532), T3t, T3s);
578
 
                              T3M = VFNMS(LDK(KP923879532), T3t, T3s);
579
 
                              {
580
 
                                   V T3g, T3l, T2V, T3o;
581
 
                                   T3g = VSUB(T3e, T3f);
582
 
                                   T3l = VADD(T3e, T3f);
583
 
                                   T2V = VADD(T27, T2U);
584
 
                                   T3o = VSUB(T27, T2U);
585
 
                                   T3F = VFNMS(LDK(KP923879532), T3E, T3D);
586
 
                                   T3P = VFMA(LDK(KP923879532), T3E, T3D);
587
 
                                   T3x = VFMA(LDK(KP668178637), T3w, T3v);
588
 
                                   T3G = VFNMS(LDK(KP668178637), T3v, T3w);
589
 
                                   T3q = VFMA(LDK(KP980785280), T3l, T3k);
590
 
                                   T3m = VFNMS(LDK(KP980785280), T3l, T3k);
591
 
                                   T3h = VFNMS(LDK(KP980785280), T3g, T3d);
592
 
                                   T3j = VFMA(LDK(KP980785280), T3g, T3d);
593
 
                                   T3r = VFNMS(LDK(KP980785280), T3o, T3n);
594
 
                                   T3p = VFMA(LDK(KP980785280), T3o, T3n);
595
 
                                   T2W = VFNMS(LDK(KP980785280), T2V, T1k);
596
 
                                   T3i = VFMA(LDK(KP980785280), T2V, T1k);
597
 
                              }
598
 
                         }
599
 
                         {
600
 
                              V T7n, T7Z, T8j, T89, T7k, T7O, T8g, T7Y, T7H, T7R, T80, T7q, T7u, T82, T83;
601
 
                              V T7x;
602
 
                              {
603
 
                                   V T7c, T7W, T7D, T87, T7f, T7E, T3A, T3H, T7F, T7i;
604
 
                                   T7c = VFNMS(LDK(KP923879532), T7b, T7a);
605
 
                                   T7W = VFMA(LDK(KP923879532), T7b, T7a);
606
 
                                   T7D = VFMA(LDK(KP923879532), T7C, T7B);
607
 
                                   T87 = VFNMS(LDK(KP923879532), T7C, T7B);
608
 
                                   T7f = VFNMS(LDK(KP668178637), T7e, T7d);
609
 
                                   T7E = VFMA(LDK(KP668178637), T7d, T7e);
610
 
                                   ST(&(x[WS(ios, 46)]), VFNMSI(T3p, T3m), dist, &(x[0]));
611
 
                                   ST(&(x[WS(ios, 18)]), VFMAI(T3p, T3m), dist, &(x[0]));
612
 
                                   ST(&(x[WS(ios, 50)]), VFMAI(T3r, T3q), dist, &(x[0]));
613
 
                                   ST(&(x[WS(ios, 14)]), VFNMSI(T3r, T3q), dist, &(x[0]));
614
 
                                   ST(&(x[WS(ios, 2)]), VFMAI(T3j, T3i), dist, &(x[0]));
615
 
                                   ST(&(x[WS(ios, 62)]), VFNMSI(T3j, T3i), dist, &(x[0]));
616
 
                                   ST(&(x[WS(ios, 34)]), VFMAI(T3h, T2W), dist, &(x[0]));
617
 
                                   ST(&(x[WS(ios, 30)]), VFNMSI(T3h, T2W), dist, &(x[0]));
618
 
                                   T3A = VFMA(LDK(KP668178637), T3z, T3y);
619
 
                                   T3H = VFNMS(LDK(KP668178637), T3y, T3z);
620
 
                                   T7F = VFMA(LDK(KP668178637), T7g, T7h);
621
 
                                   T7i = VFNMS(LDK(KP668178637), T7h, T7g);
622
 
                                   T7n = VFNMS(LDK(KP923879532), T7m, T7l);
623
 
                                   T7Z = VFMA(LDK(KP923879532), T7m, T7l);
624
 
                                   {
625
 
                                        V T3I, T3N, T3B, T3Q;
626
 
                                        T3I = VSUB(T3G, T3H);
627
 
                                        T3N = VADD(T3G, T3H);
628
 
                                        T3B = VADD(T3x, T3A);
629
 
                                        T3Q = VSUB(T3x, T3A);
630
 
                                        {
631
 
                                             V T7j, T88, T7G, T7X;
632
 
                                             T7j = VADD(T7f, T7i);
633
 
                                             T88 = VSUB(T7f, T7i);
634
 
                                             T7G = VSUB(T7E, T7F);
635
 
                                             T7X = VADD(T7E, T7F);
636
 
                                             {
637
 
                                                  V T3S, T3O, T3J, T3L;
638
 
                                                  T3S = VFNMS(LDK(KP831469612), T3N, T3M);
639
 
                                                  T3O = VFMA(LDK(KP831469612), T3N, T3M);
640
 
                                                  T3J = VFNMS(LDK(KP831469612), T3I, T3F);
641
 
                                                  T3L = VFMA(LDK(KP831469612), T3I, T3F);
642
 
                                                  {
643
 
                                                       V T3T, T3R, T3C, T3K;
644
 
                                                       T3T = VFMA(LDK(KP831469612), T3Q, T3P);
645
 
                                                       T3R = VFNMS(LDK(KP831469612), T3Q, T3P);
646
 
                                                       T3C = VFNMS(LDK(KP831469612), T3B, T3u);
647
 
                                                       T3K = VFMA(LDK(KP831469612), T3B, T3u);
648
 
                                                       T8j = VFNMS(LDK(KP831469612), T88, T87);
649
 
                                                       T89 = VFMA(LDK(KP831469612), T88, T87);
650
 
                                                       T7k = VFNMS(LDK(KP831469612), T7j, T7c);
651
 
                                                       T7O = VFMA(LDK(KP831469612), T7j, T7c);
652
 
                                                       T8g = VFNMS(LDK(KP831469612), T7X, T7W);
653
 
                                                       T7Y = VFMA(LDK(KP831469612), T7X, T7W);
654
 
                                                       T7H = VFMA(LDK(KP831469612), T7G, T7D);
655
 
                                                       T7R = VFNMS(LDK(KP831469612), T7G, T7D);
656
 
                                                       ST(&(x[WS(ios, 42)]), VFMAI(T3R, T3O), dist, &(x[0]));
657
 
                                                       ST(&(x[WS(ios, 22)]), VFNMSI(T3R, T3O), dist, &(x[0]));
658
 
                                                       ST(&(x[WS(ios, 54)]), VFNMSI(T3T, T3S), dist, &(x[0]));
659
 
                                                       ST(&(x[WS(ios, 10)]), VFMAI(T3T, T3S), dist, &(x[0]));
660
 
                                                       ST(&(x[WS(ios, 58)]), VFMAI(T3L, T3K), dist, &(x[0]));
661
 
                                                       ST(&(x[WS(ios, 6)]), VFNMSI(T3L, T3K), dist, &(x[0]));
662
 
                                                       ST(&(x[WS(ios, 26)]), VFMAI(T3J, T3C), dist, &(x[0]));
663
 
                                                       ST(&(x[WS(ios, 38)]), VFNMSI(T3J, T3C), dist, &(x[0]));
664
 
                                                       T80 = VFNMS(LDK(KP923879532), T7p, T7o);
665
 
                                                       T7q = VFMA(LDK(KP923879532), T7p, T7o);
666
 
                                                  }
667
 
                                             }
668
 
                                        }
669
 
                                   }
670
 
                                   T7u = VFNMS(LDK(KP923879532), T7t, T7s);
671
 
                                   T82 = VFMA(LDK(KP923879532), T7t, T7s);
672
 
                                   T83 = VFNMS(LDK(KP923879532), T7w, T7v);
673
 
                                   T7x = VFMA(LDK(KP923879532), T7w, T7v);
674
 
                              }
675
 
                              {
676
 
                                   V T5g, T6I, T6p, T6T, T5p, T6q, T6r, T5y;
677
 
                                   T5g = VFMA(LDK(KP923879532), T5f, T58);
678
 
                                   T6I = VFNMS(LDK(KP923879532), T5f, T58);
679
 
                                   {
680
 
                                        V T7r, T7I, T7y, T7J;
681
 
                                        T7r = VFNMS(LDK(KP534511135), T7q, T7n);
682
 
                                        T7I = VFMA(LDK(KP534511135), T7n, T7q);
683
 
                                        T7y = VFNMS(LDK(KP534511135), T7x, T7u);
684
 
                                        T7J = VFMA(LDK(KP534511135), T7u, T7x);
685
 
                                        {
686
 
                                             V T81, T8a, T84, T8b;
687
 
                                             T81 = VFMA(LDK(KP303346683), T80, T7Z);
688
 
                                             T8a = VFNMS(LDK(KP303346683), T7Z, T80);
689
 
                                             T84 = VFMA(LDK(KP303346683), T83, T82);
690
 
                                             T8b = VFNMS(LDK(KP303346683), T82, T83);
691
 
                                             T6p = VFMA(LDK(KP923879532), T6o, T6l);
692
 
                                             T6T = VFNMS(LDK(KP923879532), T6o, T6l);
693
 
                                             T5p = VFNMS(LDK(KP198912367), T5o, T5l);
694
 
                                             T6q = VFMA(LDK(KP198912367), T5l, T5o);
695
 
                                             {
696
 
                                                  V T7K, T7P, T7z, T7S;
697
 
                                                  T7K = VSUB(T7I, T7J);
698
 
                                                  T7P = VADD(T7I, T7J);
699
 
                                                  T7z = VADD(T7r, T7y);
700
 
                                                  T7S = VSUB(T7r, T7y);
701
 
                                                  {
702
 
                                                       V T8c, T8h, T85, T8k;
703
 
                                                       T8c = VSUB(T8a, T8b);
704
 
                                                       T8h = VADD(T8a, T8b);
705
 
                                                       T85 = VADD(T81, T84);
706
 
                                                       T8k = VSUB(T81, T84);
707
 
                                                       {
708
 
                                                            V T7Q, T7U, T7L, T7N;
709
 
                                                            T7Q = VFNMS(LDK(KP881921264), T7P, T7O);
710
 
                                                            T7U = VFMA(LDK(KP881921264), T7P, T7O);
711
 
                                                            T7L = VFNMS(LDK(KP881921264), T7K, T7H);
712
 
                                                            T7N = VFMA(LDK(KP881921264), T7K, T7H);
713
 
                                                            {
714
 
                                                                 V T7T, T7V, T7A, T7M;
715
 
                                                                 T7T = VFMA(LDK(KP881921264), T7S, T7R);
716
 
                                                                 T7V = VFNMS(LDK(KP881921264), T7S, T7R);
717
 
                                                                 T7A = VFNMS(LDK(KP881921264), T7z, T7k);
718
 
                                                                 T7M = VFMA(LDK(KP881921264), T7z, T7k);
719
 
                                                                 {
720
 
                                                                      V T8i, T8m, T8d, T8f;
721
 
                                                                      T8i = VFMA(LDK(KP956940335), T8h, T8g);
722
 
                                                                      T8m = VFNMS(LDK(KP956940335), T8h, T8g);
723
 
                                                                      T8d = VFNMS(LDK(KP956940335), T8c, T89);
724
 
                                                                      T8f = VFMA(LDK(KP956940335), T8c, T89);
725
 
                                                                      {
726
 
                                                                           V T8l, T8n, T86, T8e;
727
 
                                                                           T8l = VFNMS(LDK(KP956940335), T8k, T8j);
728
 
                                                                           T8n = VFMA(LDK(KP956940335), T8k, T8j);
729
 
                                                                           T86 = VFNMS(LDK(KP956940335), T85, T7Y);
730
 
                                                                           T8e = VFMA(LDK(KP956940335), T85, T7Y);
731
 
                                                                           ST(&(x[WS(ios, 53)]), VFMAI(T7V, T7U), dist, &(x[WS(ios, 1)]));
732
 
                                                                           ST(&(x[WS(ios, 11)]), VFNMSI(T7V, T7U), dist, &(x[WS(ios, 1)]));
733
 
                                                                           ST(&(x[WS(ios, 43)]), VFNMSI(T7T, T7Q), dist, &(x[WS(ios, 1)]));
734
 
                                                                           ST(&(x[WS(ios, 21)]), VFMAI(T7T, T7Q), dist, &(x[WS(ios, 1)]));
735
 
                                                                           ST(&(x[WS(ios, 5)]), VFMAI(T7N, T7M), dist, &(x[WS(ios, 1)]));
736
 
                                                                           ST(&(x[WS(ios, 59)]), VFNMSI(T7N, T7M), dist, &(x[WS(ios, 1)]));
737
 
                                                                           ST(&(x[WS(ios, 37)]), VFMAI(T7L, T7A), dist, &(x[WS(ios, 1)]));
738
 
                                                                           ST(&(x[WS(ios, 27)]), VFNMSI(T7L, T7A), dist, &(x[WS(ios, 1)]));
739
 
                                                                           ST(&(x[WS(ios, 51)]), VFNMSI(T8n, T8m), dist, &(x[WS(ios, 1)]));
740
 
                                                                           ST(&(x[WS(ios, 13)]), VFMAI(T8n, T8m), dist, &(x[WS(ios, 1)]));
741
 
                                                                           ST(&(x[WS(ios, 45)]), VFMAI(T8l, T8i), dist, &(x[WS(ios, 1)]));
742
 
                                                                           ST(&(x[WS(ios, 19)]), VFNMSI(T8l, T8i), dist, &(x[WS(ios, 1)]));
743
 
                                                                           ST(&(x[WS(ios, 61)]), VFMAI(T8f, T8e), dist, &(x[WS(ios, 1)]));
744
 
                                                                           ST(&(x[WS(ios, 3)]), VFNMSI(T8f, T8e), dist, &(x[WS(ios, 1)]));
745
 
                                                                           ST(&(x[WS(ios, 29)]), VFMAI(T8d, T86), dist, &(x[WS(ios, 1)]));
746
 
                                                                           ST(&(x[WS(ios, 35)]), VFNMSI(T8d, T86), dist, &(x[WS(ios, 1)]));
747
 
                                                                           T6r = VFMA(LDK(KP198912367), T5u, T5x);
748
 
                                                                           T5y = VFNMS(LDK(KP198912367), T5x, T5u);
749
 
                                                                      }
750
 
                                                                 }
751
 
                                                            }
752
 
                                                       }
753
 
                                                  }
754
 
                                             }
755
 
                                        }
756
 
                                   }
757
 
                                   {
758
 
                                        V T5N, T5U, T68, T5z, T6U, T6f;
759
 
                                        T5N = VFMA(LDK(KP923879532), T5M, T5F);
760
 
                                        T6L = VFNMS(LDK(KP923879532), T5M, T5F);
761
 
                                        T6M = VFNMS(LDK(KP923879532), T5T, T5Q);
762
 
                                        T5U = VFMA(LDK(KP923879532), T5T, T5Q);
763
 
                                        T68 = VFMA(LDK(KP923879532), T67, T60);
764
 
                                        T6O = VFNMS(LDK(KP923879532), T67, T60);
765
 
                                        T5z = VADD(T5p, T5y);
766
 
                                        T6U = VSUB(T5p, T5y);
767
 
                                        T6P = VFNMS(LDK(KP923879532), T6e, T6b);
768
 
                                        T6f = VFMA(LDK(KP923879532), T6e, T6b);
769
 
                                        {
770
 
                                             V T5V, T6u, T6g, T6v, T6s, T6J;
771
 
                                             T6s = VSUB(T6q, T6r);
772
 
                                             T6J = VADD(T6q, T6r);
773
 
                                             T5V = VFNMS(LDK(KP098491403), T5U, T5N);
774
 
                                             T6u = VFMA(LDK(KP098491403), T5N, T5U);
775
 
                                             T75 = VFMA(LDK(KP980785280), T6U, T6T);
776
 
                                             T6V = VFNMS(LDK(KP980785280), T6U, T6T);
777
 
                                             T5A = VFMA(LDK(KP980785280), T5z, T5g);
778
 
                                             T6A = VFNMS(LDK(KP980785280), T5z, T5g);
779
 
                                             T6g = VFNMS(LDK(KP098491403), T6f, T68);
780
 
                                             T6v = VFMA(LDK(KP098491403), T68, T6f);
781
 
                                             T72 = VFNMS(LDK(KP980785280), T6J, T6I);
782
 
                                             T6K = VFMA(LDK(KP980785280), T6J, T6I);
783
 
                                             T6t = VFMA(LDK(KP980785280), T6s, T6p);
784
 
                                             T6D = VFNMS(LDK(KP980785280), T6s, T6p);
785
 
                                             T6w = VSUB(T6u, T6v);
786
 
                                             T6B = VADD(T6u, T6v);
787
 
                                             T6h = VADD(T5V, T6g);
788
 
                                             T6E = VSUB(T5V, T6g);
789
 
                                        }
790
 
                                   }
791
 
                              }
792
 
                         }
793
 
                    }
794
 
               }
795
 
          }
796
 
          {
797
 
               V T6W, T6N, T6G, T6C, T6z, T6x, T6H, T6F, T6y, T6i, T6X, T6Q;
798
 
               T6W = VFNMS(LDK(KP820678790), T6L, T6M);
799
 
               T6N = VFMA(LDK(KP820678790), T6M, T6L);
800
 
               T6G = VFMA(LDK(KP995184726), T6B, T6A);
801
 
               T6C = VFNMS(LDK(KP995184726), T6B, T6A);
802
 
               T6z = VFMA(LDK(KP995184726), T6w, T6t);
803
 
               T6x = VFNMS(LDK(KP995184726), T6w, T6t);
804
 
               T6H = VFNMS(LDK(KP995184726), T6E, T6D);
805
 
               T6F = VFMA(LDK(KP995184726), T6E, T6D);
806
 
               T6y = VFMA(LDK(KP995184726), T6h, T5A);
807
 
               T6i = VFNMS(LDK(KP995184726), T6h, T5A);
808
 
               T6X = VFNMS(LDK(KP820678790), T6O, T6P);
809
 
               T6Q = VFMA(LDK(KP820678790), T6P, T6O);
810
 
               {
811
 
                    V T73, T6Y, T76, T6R;
812
 
                    ST(&(x[WS(ios, 49)]), VFMAI(T6H, T6G), dist, &(x[WS(ios, 1)]));
813
 
                    ST(&(x[WS(ios, 15)]), VFNMSI(T6H, T6G), dist, &(x[WS(ios, 1)]));
814
 
                    ST(&(x[WS(ios, 47)]), VFNMSI(T6F, T6C), dist, &(x[WS(ios, 1)]));
815
 
                    ST(&(x[WS(ios, 17)]), VFMAI(T6F, T6C), dist, &(x[WS(ios, 1)]));
816
 
                    ST(&(x[WS(ios, 1)]), VFMAI(T6z, T6y), dist, &(x[WS(ios, 1)]));
817
 
                    ST(&(x[WS(ios, 63)]), VFNMSI(T6z, T6y), dist, &(x[WS(ios, 1)]));
818
 
                    ST(&(x[WS(ios, 33)]), VFMAI(T6x, T6i), dist, &(x[WS(ios, 1)]));
819
 
                    ST(&(x[WS(ios, 31)]), VFNMSI(T6x, T6i), dist, &(x[WS(ios, 1)]));
820
 
                    T73 = VADD(T6W, T6X);
821
 
                    T6Y = VSUB(T6W, T6X);
822
 
                    T76 = VSUB(T6N, T6Q);
823
 
                    T6R = VADD(T6N, T6Q);
824
 
                    {
825
 
                         V T78, T74, T71, T6Z, T79, T77, T70, T6S;
826
 
                         T78 = VFNMS(LDK(KP773010453), T73, T72);
827
 
                         T74 = VFMA(LDK(KP773010453), T73, T72);
828
 
                         T71 = VFMA(LDK(KP773010453), T6Y, T6V);
829
 
                         T6Z = VFNMS(LDK(KP773010453), T6Y, T6V);
830
 
                         T79 = VFMA(LDK(KP773010453), T76, T75);
831
 
                         T77 = VFNMS(LDK(KP773010453), T76, T75);
832
 
                         T70 = VFMA(LDK(KP773010453), T6R, T6K);
833
 
                         T6S = VFNMS(LDK(KP773010453), T6R, T6K);
834
 
                         ST(&(x[WS(ios, 55)]), VFNMSI(T79, T78), dist, &(x[WS(ios, 1)]));
835
 
                         ST(&(x[WS(ios, 9)]), VFMAI(T79, T78), dist, &(x[WS(ios, 1)]));
836
 
                         ST(&(x[WS(ios, 41)]), VFMAI(T77, T74), dist, &(x[WS(ios, 1)]));
837
 
                         ST(&(x[WS(ios, 23)]), VFNMSI(T77, T74), dist, &(x[WS(ios, 1)]));
838
 
                         ST(&(x[WS(ios, 57)]), VFMAI(T71, T70), dist, &(x[WS(ios, 1)]));
839
 
                         ST(&(x[WS(ios, 7)]), VFNMSI(T71, T70), dist, &(x[WS(ios, 1)]));
840
 
                         ST(&(x[WS(ios, 25)]), VFMAI(T6Z, T6S), dist, &(x[WS(ios, 1)]));
841
 
                         ST(&(x[WS(ios, 39)]), VFNMSI(T6Z, T6S), dist, &(x[WS(ios, 1)]));
842
 
                    }
843
 
               }
844
 
          }
845
 
     }
846
 
     return W;
847
 
}
848
 
 
849
 
static const tw_instr twinstr[] = {
850
 
     VTW(1),
851
 
     VTW(2),
852
 
     VTW(3),
853
 
     VTW(4),
854
 
     VTW(5),
855
 
     VTW(6),
856
 
     VTW(7),
857
 
     VTW(8),
858
 
     VTW(9),
859
 
     VTW(10),
860
 
     VTW(11),
861
 
     VTW(12),
862
 
     VTW(13),
863
 
     VTW(14),
864
 
     VTW(15),
865
 
     VTW(16),
866
 
     VTW(17),
867
 
     VTW(18),
868
 
     VTW(19),
869
 
     VTW(20),
870
 
     VTW(21),
871
 
     VTW(22),
872
 
     VTW(23),
873
 
     VTW(24),
874
 
     VTW(25),
875
 
     VTW(26),
876
 
     VTW(27),
877
 
     VTW(28),
878
 
     VTW(29),
879
 
     VTW(30),
880
 
     VTW(31),
881
 
     VTW(32),
882
 
     VTW(33),
883
 
     VTW(34),
884
 
     VTW(35),
885
 
     VTW(36),
886
 
     VTW(37),
887
 
     VTW(38),
888
 
     VTW(39),
889
 
     VTW(40),
890
 
     VTW(41),
891
 
     VTW(42),
892
 
     VTW(43),
893
 
     VTW(44),
894
 
     VTW(45),
895
 
     VTW(46),
896
 
     VTW(47),
897
 
     VTW(48),
898
 
     VTW(49),
899
 
     VTW(50),
900
 
     VTW(51),
901
 
     VTW(52),
902
 
     VTW(53),
903
 
     VTW(54),
904
 
     VTW(55),
905
 
     VTW(56),
906
 
     VTW(57),
907
 
     VTW(58),
908
 
     VTW(59),
909
 
     VTW(60),
910
 
     VTW(61),
911
 
     VTW(62),
912
 
     VTW(63),
913
 
     {TW_NEXT, VL, 0}
914
 
};
915
 
 
916
 
static const ct_desc desc = { 64, "t2bv_64", twinstr, &GENUS, {261, 126, 258, 0}, 0, 0, 0 };
917
 
 
918
 
void X(codelet_t2bv_64) (planner *p) {
919
 
     X(kdft_dit_register) (p, t2bv_64, &desc);
920
 
}
921
 
#else                           /* HAVE_FMA */
922
 
 
923
 
/* Generated by: ../../../genfft/gen_twiddle_c -simd -compact -variables 4 -pipeline-latency 8 -n 64 -name t2bv_64 -include t2b.h -sign 1 */
924
 
 
925
 
/*
926
 
 * This function contains 519 FP additions, 250 FP multiplications,
927
 
 * (or, 467 additions, 198 multiplications, 52 fused multiply/add),
928
 
 * 107 stack variables, and 128 memory accesses
929
 
 */
930
 
/*
931
 
 * Generator Id's : 
932
 
 * $Id: algsimp.ml,v 1.9 2006-02-12 23:34:12 athena Exp $
933
 
 * $Id: fft.ml,v 1.4 2006-01-05 03:04:27 stevenj Exp $
934
 
 * $Id: gen_twiddle_c.ml,v 1.14 2006-02-12 23:34:12 athena Exp $
935
 
 */
936
 
 
937
 
#include "t2b.h"
938
 
 
939
 
static const R *t2bv_64(R *ri, R *ii, const R *W, stride ios, INT m, INT dist)
940
 
{
941
 
     DVK(KP290284677, +0.290284677254462367636192375817395274691476278);
942
 
     DVK(KP956940335, +0.956940335732208864935797886980269969482849206);
943
 
     DVK(KP471396736, +0.471396736825997648556387625905254377657460319);
944
 
     DVK(KP881921264, +0.881921264348355029712756863660388349508442621);
945
 
     DVK(KP634393284, +0.634393284163645498215171613225493370675687095);
946
 
     DVK(KP773010453, +0.773010453362736960810906609758469800971041293);
947
 
     DVK(KP098017140, +0.098017140329560601994195563888641845861136673);
948
 
     DVK(KP995184726, +0.995184726672196886244836953109479921575474869);
949
 
     DVK(KP195090322, +0.195090322016128267848284868477022240927691618);
950
 
     DVK(KP980785280, +0.980785280403230449126182236134239036973933731);
951
 
     DVK(KP555570233, +0.555570233019602224742830813948532874374937191);
952
 
     DVK(KP831469612, +0.831469612302545237078788377617905756738560812);
953
 
     DVK(KP382683432, +0.382683432365089771728459984030398866761344562);
954
 
     DVK(KP923879532, +0.923879532511286756128183189396788286822416626);
955
 
     DVK(KP707106781, +0.707106781186547524400844362104849039284835938);
956
 
     INT i;
957
 
     R *x;
958
 
     x = ii;
959
 
     for (i = m; i > 0; i = i - VL, x = x + (VL * dist), W = W + (TWVL * 126), MAKE_VOLATILE_STRIDE(ios)) {
960
 
          V Tg, T4B, T6v, T7G, T3r, T4w, T5q, T7F, T5Y, T62, T28, T4d, T2g, T4a, T7g;
961
 
          V T7Y, T6f, T6j, T2Z, T4k, T37, T4h, T7n, T81, T7w, T7x, T7y, T5M, T6q, T1k;
962
 
          V T4s, T1r, T4t, T7t, T7u, T7v, T5F, T6p, TV, T4p, T12, T4q, T7A, T7B, TD;
963
 
          V T4x, T3k, T4C, T5x, T6s, T1R, T4b, T7j, T7Z, T2j, T4e, T5V, T63, T2I, T4i;
964
 
          V T7q, T82, T3a, T4l, T6c, T6k;
965
 
          {
966
 
               V T1, T3, T3p, T3n, Tb, Td, Te, T6, T8, T9, T2, T3o, T3m;
967
 
               T1 = LD(&(x[0]), dist, &(x[0]));
968
 
               T2 = LD(&(x[WS(ios, 32)]), dist, &(x[0]));
969
 
               T3 = BYTW(&(W[TWVL * 62]), T2);
970
 
               T3o = LD(&(x[WS(ios, 48)]), dist, &(x[0]));
971
 
               T3p = BYTW(&(W[TWVL * 94]), T3o);
972
 
               T3m = LD(&(x[WS(ios, 16)]), dist, &(x[0]));
973
 
               T3n = BYTW(&(W[TWVL * 30]), T3m);
974
 
               {
975
 
                    V Ta, Tc, T5, T7;
976
 
                    Ta = LD(&(x[WS(ios, 56)]), dist, &(x[0]));
977
 
                    Tb = BYTW(&(W[TWVL * 110]), Ta);
978
 
                    Tc = LD(&(x[WS(ios, 24)]), dist, &(x[0]));
979
 
                    Td = BYTW(&(W[TWVL * 46]), Tc);
980
 
                    Te = VSUB(Tb, Td);
981
 
                    T5 = LD(&(x[WS(ios, 8)]), dist, &(x[0]));
982
 
                    T6 = BYTW(&(W[TWVL * 14]), T5);
983
 
                    T7 = LD(&(x[WS(ios, 40)]), dist, &(x[0]));
984
 
                    T8 = BYTW(&(W[TWVL * 78]), T7);
985
 
                    T9 = VSUB(T6, T8);
986
 
               }
987
 
               {
988
 
                    V T4, Tf, T6t, T6u;
989
 
                    T4 = VSUB(T1, T3);
990
 
                    Tf = VMUL(LDK(KP707106781), VADD(T9, Te));
991
 
                    Tg = VSUB(T4, Tf);
992
 
                    T4B = VADD(T4, Tf);
993
 
                    T6t = VADD(T6, T8);
994
 
                    T6u = VADD(Tb, Td);
995
 
                    T6v = VSUB(T6t, T6u);
996
 
                    T7G = VADD(T6t, T6u);
997
 
               }
998
 
               {
999
 
                    V T3l, T3q, T5o, T5p;
1000
 
                    T3l = VMUL(LDK(KP707106781), VSUB(T9, Te));
1001
 
                    T3q = VSUB(T3n, T3p);
1002
 
                    T3r = VSUB(T3l, T3q);
1003
 
                    T4w = VADD(T3q, T3l);
1004
 
                    T5o = VADD(T1, T3);
1005
 
                    T5p = VADD(T3n, T3p);
1006
 
                    T5q = VSUB(T5o, T5p);
1007
 
                    T7F = VADD(T5o, T5p);
1008
 
               }
1009
 
          }
1010
 
          {
1011
 
               V T24, T26, T61, T2b, T2d, T60, T1W, T5W, T21, T5X, T22, T27;
1012
 
               {
1013
 
                    V T23, T25, T2a, T2c;
1014
 
                    T23 = LD(&(x[WS(ios, 17)]), dist, &(x[WS(ios, 1)]));
1015
 
                    T24 = BYTW(&(W[TWVL * 32]), T23);
1016
 
                    T25 = LD(&(x[WS(ios, 49)]), dist, &(x[WS(ios, 1)]));
1017
 
                    T26 = BYTW(&(W[TWVL * 96]), T25);
1018
 
                    T61 = VADD(T24, T26);
1019
 
                    T2a = LD(&(x[WS(ios, 1)]), dist, &(x[WS(ios, 1)]));
1020
 
                    T2b = BYTW(&(W[0]), T2a);
1021
 
                    T2c = LD(&(x[WS(ios, 33)]), dist, &(x[WS(ios, 1)]));
1022
 
                    T2d = BYTW(&(W[TWVL * 64]), T2c);
1023
 
                    T60 = VADD(T2b, T2d);
1024
 
               }
1025
 
               {
1026
 
                    V T1T, T1V, T1S, T1U;
1027
 
                    T1S = LD(&(x[WS(ios, 9)]), dist, &(x[WS(ios, 1)]));
1028
 
                    T1T = BYTW(&(W[TWVL * 16]), T1S);
1029
 
                    T1U = LD(&(x[WS(ios, 41)]), dist, &(x[WS(ios, 1)]));
1030
 
                    T1V = BYTW(&(W[TWVL * 80]), T1U);
1031
 
                    T1W = VSUB(T1T, T1V);
1032
 
                    T5W = VADD(T1T, T1V);
1033
 
               }
1034
 
               {
1035
 
                    V T1Y, T20, T1X, T1Z;
1036
 
                    T1X = LD(&(x[WS(ios, 57)]), dist, &(x[WS(ios, 1)]));
1037
 
                    T1Y = BYTW(&(W[TWVL * 112]), T1X);
1038
 
                    T1Z = LD(&(x[WS(ios, 25)]), dist, &(x[WS(ios, 1)]));
1039
 
                    T20 = BYTW(&(W[TWVL * 48]), T1Z);
1040
 
                    T21 = VSUB(T1Y, T20);
1041
 
                    T5X = VADD(T1Y, T20);
1042
 
               }
1043
 
               T5Y = VSUB(T5W, T5X);
1044
 
               T62 = VSUB(T60, T61);
1045
 
               T22 = VMUL(LDK(KP707106781), VSUB(T1W, T21));
1046
 
               T27 = VSUB(T24, T26);
1047
 
               T28 = VSUB(T22, T27);
1048
 
               T4d = VADD(T27, T22);
1049
 
               {
1050
 
                    V T2e, T2f, T7e, T7f;
1051
 
                    T2e = VSUB(T2b, T2d);
1052
 
                    T2f = VMUL(LDK(KP707106781), VADD(T1W, T21));
1053
 
                    T2g = VSUB(T2e, T2f);
1054
 
                    T4a = VADD(T2e, T2f);
1055
 
                    T7e = VADD(T60, T61);
1056
 
                    T7f = VADD(T5W, T5X);
1057
 
                    T7g = VSUB(T7e, T7f);
1058
 
                    T7Y = VADD(T7e, T7f);
1059
 
               }
1060
 
          }
1061
 
          {
1062
 
               V T2V, T2X, T6i, T32, T34, T6h, T2N, T6d, T2S, T6e, T2T, T2Y;
1063
 
               {
1064
 
                    V T2U, T2W, T31, T33;
1065
 
                    T2U = LD(&(x[WS(ios, 15)]), dist, &(x[WS(ios, 1)]));
1066
 
                    T2V = BYTW(&(W[TWVL * 28]), T2U);
1067
 
                    T2W = LD(&(x[WS(ios, 47)]), dist, &(x[WS(ios, 1)]));
1068
 
                    T2X = BYTW(&(W[TWVL * 92]), T2W);
1069
 
                    T6i = VADD(T2V, T2X);
1070
 
                    T31 = LD(&(x[WS(ios, 63)]), dist, &(x[WS(ios, 1)]));
1071
 
                    T32 = BYTW(&(W[TWVL * 124]), T31);
1072
 
                    T33 = LD(&(x[WS(ios, 31)]), dist, &(x[WS(ios, 1)]));
1073
 
                    T34 = BYTW(&(W[TWVL * 60]), T33);
1074
 
                    T6h = VADD(T32, T34);
1075
 
               }
1076
 
               {
1077
 
                    V T2K, T2M, T2J, T2L;
1078
 
                    T2J = LD(&(x[WS(ios, 7)]), dist, &(x[WS(ios, 1)]));
1079
 
                    T2K = BYTW(&(W[TWVL * 12]), T2J);
1080
 
                    T2L = LD(&(x[WS(ios, 39)]), dist, &(x[WS(ios, 1)]));
1081
 
                    T2M = BYTW(&(W[TWVL * 76]), T2L);
1082
 
                    T2N = VSUB(T2K, T2M);
1083
 
                    T6d = VADD(T2K, T2M);
1084
 
               }
1085
 
               {
1086
 
                    V T2P, T2R, T2O, T2Q;
1087
 
                    T2O = LD(&(x[WS(ios, 55)]), dist, &(x[WS(ios, 1)]));
1088
 
                    T2P = BYTW(&(W[TWVL * 108]), T2O);
1089
 
                    T2Q = LD(&(x[WS(ios, 23)]), dist, &(x[WS(ios, 1)]));
1090
 
                    T2R = BYTW(&(W[TWVL * 44]), T2Q);
1091
 
                    T2S = VSUB(T2P, T2R);
1092
 
                    T6e = VADD(T2P, T2R);
1093
 
               }
1094
 
               T6f = VSUB(T6d, T6e);
1095
 
               T6j = VSUB(T6h, T6i);
1096
 
               T2T = VMUL(LDK(KP707106781), VSUB(T2N, T2S));
1097
 
               T2Y = VSUB(T2V, T2X);
1098
 
               T2Z = VSUB(T2T, T2Y);
1099
 
               T4k = VADD(T2Y, T2T);
1100
 
               {
1101
 
                    V T35, T36, T7l, T7m;
1102
 
                    T35 = VSUB(T32, T34);
1103
 
                    T36 = VMUL(LDK(KP707106781), VADD(T2N, T2S));
1104
 
                    T37 = VSUB(T35, T36);
1105
 
                    T4h = VADD(T35, T36);
1106
 
                    T7l = VADD(T6h, T6i);
1107
 
                    T7m = VADD(T6d, T6e);
1108
 
                    T7n = VSUB(T7l, T7m);
1109
 
                    T81 = VADD(T7l, T7m);
1110
 
               }
1111
 
          }
1112
 
          {
1113
 
               V T1g, T1i, T5K, T1m, T1o, T5J, T18, T5G, T1d, T5H, T5I, T5L;
1114
 
               {
1115
 
                    V T1f, T1h, T1l, T1n;
1116
 
                    T1f = LD(&(x[WS(ios, 14)]), dist, &(x[0]));
1117
 
                    T1g = BYTW(&(W[TWVL * 26]), T1f);
1118
 
                    T1h = LD(&(x[WS(ios, 46)]), dist, &(x[0]));
1119
 
                    T1i = BYTW(&(W[TWVL * 90]), T1h);
1120
 
                    T5K = VADD(T1g, T1i);
1121
 
                    T1l = LD(&(x[WS(ios, 62)]), dist, &(x[0]));
1122
 
                    T1m = BYTW(&(W[TWVL * 122]), T1l);
1123
 
                    T1n = LD(&(x[WS(ios, 30)]), dist, &(x[0]));
1124
 
                    T1o = BYTW(&(W[TWVL * 58]), T1n);
1125
 
                    T5J = VADD(T1m, T1o);
1126
 
               }
1127
 
               {
1128
 
                    V T15, T17, T14, T16;
1129
 
                    T14 = LD(&(x[WS(ios, 6)]), dist, &(x[0]));
1130
 
                    T15 = BYTW(&(W[TWVL * 10]), T14);
1131
 
                    T16 = LD(&(x[WS(ios, 38)]), dist, &(x[0]));
1132
 
                    T17 = BYTW(&(W[TWVL * 74]), T16);
1133
 
                    T18 = VSUB(T15, T17);
1134
 
                    T5G = VADD(T15, T17);
1135
 
               }
1136
 
               {
1137
 
                    V T1a, T1c, T19, T1b;
1138
 
                    T19 = LD(&(x[WS(ios, 54)]), dist, &(x[0]));
1139
 
                    T1a = BYTW(&(W[TWVL * 106]), T19);
1140
 
                    T1b = LD(&(x[WS(ios, 22)]), dist, &(x[0]));
1141
 
                    T1c = BYTW(&(W[TWVL * 42]), T1b);
1142
 
                    T1d = VSUB(T1a, T1c);
1143
 
                    T5H = VADD(T1a, T1c);
1144
 
               }
1145
 
               T7w = VADD(T5J, T5K);
1146
 
               T7x = VADD(T5G, T5H);
1147
 
               T7y = VSUB(T7w, T7x);
1148
 
               T5I = VSUB(T5G, T5H);
1149
 
               T5L = VSUB(T5J, T5K);
1150
 
               T5M = VFNMS(LDK(KP382683432), T5L, VMUL(LDK(KP923879532), T5I));
1151
 
               T6q = VFMA(LDK(KP923879532), T5L, VMUL(LDK(KP382683432), T5I));
1152
 
               {
1153
 
                    V T1e, T1j, T1p, T1q;
1154
 
                    T1e = VMUL(LDK(KP707106781), VSUB(T18, T1d));
1155
 
                    T1j = VSUB(T1g, T1i);
1156
 
                    T1k = VSUB(T1e, T1j);
1157
 
                    T4s = VADD(T1j, T1e);
1158
 
                    T1p = VSUB(T1m, T1o);
1159
 
                    T1q = VMUL(LDK(KP707106781), VADD(T18, T1d));
1160
 
                    T1r = VSUB(T1p, T1q);
1161
 
                    T4t = VADD(T1p, T1q);
1162
 
               }
1163
 
          }
1164
 
          {
1165
 
               V TR, TT, T5A, TX, TZ, T5z, TJ, T5C, TO, T5D, T5B, T5E;
1166
 
               {
1167
 
                    V TQ, TS, TW, TY;
1168
 
                    TQ = LD(&(x[WS(ios, 18)]), dist, &(x[0]));
1169
 
                    TR = BYTW(&(W[TWVL * 34]), TQ);
1170
 
                    TS = LD(&(x[WS(ios, 50)]), dist, &(x[0]));
1171
 
                    TT = BYTW(&(W[TWVL * 98]), TS);
1172
 
                    T5A = VADD(TR, TT);
1173
 
                    TW = LD(&(x[WS(ios, 2)]), dist, &(x[0]));
1174
 
                    TX = BYTW(&(W[TWVL * 2]), TW);
1175
 
                    TY = LD(&(x[WS(ios, 34)]), dist, &(x[0]));
1176
 
                    TZ = BYTW(&(W[TWVL * 66]), TY);
1177
 
                    T5z = VADD(TX, TZ);
1178
 
               }
1179
 
               {
1180
 
                    V TG, TI, TF, TH;
1181
 
                    TF = LD(&(x[WS(ios, 10)]), dist, &(x[0]));
1182
 
                    TG = BYTW(&(W[TWVL * 18]), TF);
1183
 
                    TH = LD(&(x[WS(ios, 42)]), dist, &(x[0]));
1184
 
                    TI = BYTW(&(W[TWVL * 82]), TH);
1185
 
                    TJ = VSUB(TG, TI);
1186
 
                    T5C = VADD(TG, TI);
1187
 
               }
1188
 
               {
1189
 
                    V TL, TN, TK, TM;
1190
 
                    TK = LD(&(x[WS(ios, 58)]), dist, &(x[0]));
1191
 
                    TL = BYTW(&(W[TWVL * 114]), TK);
1192
 
                    TM = LD(&(x[WS(ios, 26)]), dist, &(x[0]));
1193
 
                    TN = BYTW(&(W[TWVL * 50]), TM);
1194
 
                    TO = VSUB(TL, TN);
1195
 
                    T5D = VADD(TL, TN);
1196
 
               }
1197
 
               T7t = VADD(T5z, T5A);
1198
 
               T7u = VADD(T5C, T5D);
1199
 
               T7v = VSUB(T7t, T7u);
1200
 
               T5B = VSUB(T5z, T5A);
1201
 
               T5E = VSUB(T5C, T5D);
1202
 
               T5F = VFMA(LDK(KP382683432), T5B, VMUL(LDK(KP923879532), T5E));
1203
 
               T6p = VFNMS(LDK(KP382683432), T5E, VMUL(LDK(KP923879532), T5B));
1204
 
               {
1205
 
                    V TP, TU, T10, T11;
1206
 
                    TP = VMUL(LDK(KP707106781), VSUB(TJ, TO));
1207
 
                    TU = VSUB(TR, TT);
1208
 
                    TV = VSUB(TP, TU);
1209
 
                    T4p = VADD(TU, TP);
1210
 
                    T10 = VSUB(TX, TZ);
1211
 
                    T11 = VMUL(LDK(KP707106781), VADD(TJ, TO));
1212
 
                    T12 = VSUB(T10, T11);
1213
 
                    T4q = VADD(T10, T11);
1214
 
               }
1215
 
          }
1216
 
          {
1217
 
               V Tl, T5r, TB, T5u, Tq, T5s, Tw, T5v, Tr, TC;
1218
 
               {
1219
 
                    V Ti, Tk, Th, Tj;
1220
 
                    Th = LD(&(x[WS(ios, 4)]), dist, &(x[0]));
1221
 
                    Ti = BYTW(&(W[TWVL * 6]), Th);
1222
 
                    Tj = LD(&(x[WS(ios, 36)]), dist, &(x[0]));
1223
 
                    Tk = BYTW(&(W[TWVL * 70]), Tj);
1224
 
                    Tl = VSUB(Ti, Tk);
1225
 
                    T5r = VADD(Ti, Tk);
1226
 
               }
1227
 
               {
1228
 
                    V Ty, TA, Tx, Tz;
1229
 
                    Tx = LD(&(x[WS(ios, 60)]), dist, &(x[0]));
1230
 
                    Ty = BYTW(&(W[TWVL * 118]), Tx);
1231
 
                    Tz = LD(&(x[WS(ios, 28)]), dist, &(x[0]));
1232
 
                    TA = BYTW(&(W[TWVL * 54]), Tz);
1233
 
                    TB = VSUB(Ty, TA);
1234
 
                    T5u = VADD(Ty, TA);
1235
 
               }
1236
 
               {
1237
 
                    V Tn, Tp, Tm, To;
1238
 
                    Tm = LD(&(x[WS(ios, 20)]), dist, &(x[0]));
1239
 
                    Tn = BYTW(&(W[TWVL * 38]), Tm);
1240
 
                    To = LD(&(x[WS(ios, 52)]), dist, &(x[0]));
1241
 
                    Tp = BYTW(&(W[TWVL * 102]), To);
1242
 
                    Tq = VSUB(Tn, Tp);
1243
 
                    T5s = VADD(Tn, Tp);
1244
 
               }
1245
 
               {
1246
 
                    V Tt, Tv, Ts, Tu;
1247
 
                    Ts = LD(&(x[WS(ios, 12)]), dist, &(x[0]));
1248
 
                    Tt = BYTW(&(W[TWVL * 22]), Ts);
1249
 
                    Tu = LD(&(x[WS(ios, 44)]), dist, &(x[0]));
1250
 
                    Tv = BYTW(&(W[TWVL * 86]), Tu);
1251
 
                    Tw = VSUB(Tt, Tv);
1252
 
                    T5v = VADD(Tt, Tv);
1253
 
               }
1254
 
               T7A = VADD(T5r, T5s);
1255
 
               T7B = VADD(T5u, T5v);
1256
 
               Tr = VFMA(LDK(KP382683432), Tl, VMUL(LDK(KP923879532), Tq));
1257
 
               TC = VFNMS(LDK(KP382683432), TB, VMUL(LDK(KP923879532), Tw));
1258
 
               TD = VSUB(Tr, TC);
1259
 
               T4x = VADD(Tr, TC);
1260
 
               {
1261
 
                    V T3i, T3j, T5t, T5w;
1262
 
                    T3i = VFNMS(LDK(KP382683432), Tq, VMUL(LDK(KP923879532), Tl));
1263
 
                    T3j = VFMA(LDK(KP923879532), TB, VMUL(LDK(KP382683432), Tw));
1264
 
                    T3k = VSUB(T3i, T3j);
1265
 
                    T4C = VADD(T3i, T3j);
1266
 
                    T5t = VSUB(T5r, T5s);
1267
 
                    T5w = VSUB(T5u, T5v);
1268
 
                    T5x = VMUL(LDK(KP707106781), VADD(T5t, T5w));
1269
 
                    T6s = VMUL(LDK(KP707106781), VSUB(T5t, T5w));
1270
 
               }
1271
 
          }
1272
 
          {
1273
 
               V T1z, T5P, T1P, T5T, T1E, T5Q, T1K, T5S;
1274
 
               {
1275
 
                    V T1w, T1y, T1v, T1x;
1276
 
                    T1v = LD(&(x[WS(ios, 5)]), dist, &(x[WS(ios, 1)]));
1277
 
                    T1w = BYTW(&(W[TWVL * 8]), T1v);
1278
 
                    T1x = LD(&(x[WS(ios, 37)]), dist, &(x[WS(ios, 1)]));
1279
 
                    T1y = BYTW(&(W[TWVL * 72]), T1x);
1280
 
                    T1z = VSUB(T1w, T1y);
1281
 
                    T5P = VADD(T1w, T1y);
1282
 
               }
1283
 
               {
1284
 
                    V T1M, T1O, T1L, T1N;
1285
 
                    T1L = LD(&(x[WS(ios, 13)]), dist, &(x[WS(ios, 1)]));
1286
 
                    T1M = BYTW(&(W[TWVL * 24]), T1L);
1287
 
                    T1N = LD(&(x[WS(ios, 45)]), dist, &(x[WS(ios, 1)]));
1288
 
                    T1O = BYTW(&(W[TWVL * 88]), T1N);
1289
 
                    T1P = VSUB(T1M, T1O);
1290
 
                    T5T = VADD(T1M, T1O);
1291
 
               }
1292
 
               {
1293
 
                    V T1B, T1D, T1A, T1C;
1294
 
                    T1A = LD(&(x[WS(ios, 21)]), dist, &(x[WS(ios, 1)]));
1295
 
                    T1B = BYTW(&(W[TWVL * 40]), T1A);
1296
 
                    T1C = LD(&(x[WS(ios, 53)]), dist, &(x[WS(ios, 1)]));
1297
 
                    T1D = BYTW(&(W[TWVL * 104]), T1C);
1298
 
                    T1E = VSUB(T1B, T1D);
1299
 
                    T5Q = VADD(T1B, T1D);
1300
 
               }
1301
 
               {
1302
 
                    V T1H, T1J, T1G, T1I;
1303
 
                    T1G = LD(&(x[WS(ios, 61)]), dist, &(x[WS(ios, 1)]));
1304
 
                    T1H = BYTW(&(W[TWVL * 120]), T1G);
1305
 
                    T1I = LD(&(x[WS(ios, 29)]), dist, &(x[WS(ios, 1)]));
1306
 
                    T1J = BYTW(&(W[TWVL * 56]), T1I);
1307
 
                    T1K = VSUB(T1H, T1J);
1308
 
                    T5S = VADD(T1H, T1J);
1309
 
               }
1310
 
               {
1311
 
                    V T1F, T1Q, T7h, T7i;
1312
 
                    T1F = VFNMS(LDK(KP382683432), T1E, VMUL(LDK(KP923879532), T1z));
1313
 
                    T1Q = VFMA(LDK(KP923879532), T1K, VMUL(LDK(KP382683432), T1P));
1314
 
                    T1R = VSUB(T1F, T1Q);
1315
 
                    T4b = VADD(T1F, T1Q);
1316
 
                    T7h = VADD(T5P, T5Q);
1317
 
                    T7i = VADD(T5S, T5T);
1318
 
                    T7j = VSUB(T7h, T7i);
1319
 
                    T7Z = VADD(T7h, T7i);
1320
 
               }
1321
 
               {
1322
 
                    V T2h, T2i, T5R, T5U;
1323
 
                    T2h = VFMA(LDK(KP382683432), T1z, VMUL(LDK(KP923879532), T1E));
1324
 
                    T2i = VFNMS(LDK(KP382683432), T1K, VMUL(LDK(KP923879532), T1P));
1325
 
                    T2j = VSUB(T2h, T2i);
1326
 
                    T4e = VADD(T2h, T2i);
1327
 
                    T5R = VSUB(T5P, T5Q);
1328
 
                    T5U = VSUB(T5S, T5T);
1329
 
                    T5V = VMUL(LDK(KP707106781), VSUB(T5R, T5U));
1330
 
                    T63 = VMUL(LDK(KP707106781), VADD(T5R, T5U));
1331
 
               }
1332
 
          }
1333
 
          {
1334
 
               V T2q, T66, T2G, T6a, T2v, T67, T2B, T69;
1335
 
               {
1336
 
                    V T2n, T2p, T2m, T2o;
1337
 
                    T2m = LD(&(x[WS(ios, 3)]), dist, &(x[WS(ios, 1)]));
1338
 
                    T2n = BYTW(&(W[TWVL * 4]), T2m);
1339
 
                    T2o = LD(&(x[WS(ios, 35)]), dist, &(x[WS(ios, 1)]));
1340
 
                    T2p = BYTW(&(W[TWVL * 68]), T2o);
1341
 
                    T2q = VSUB(T2n, T2p);
1342
 
                    T66 = VADD(T2n, T2p);
1343
 
               }
1344
 
               {
1345
 
                    V T2D, T2F, T2C, T2E;
1346
 
                    T2C = LD(&(x[WS(ios, 11)]), dist, &(x[WS(ios, 1)]));
1347
 
                    T2D = BYTW(&(W[TWVL * 20]), T2C);
1348
 
                    T2E = LD(&(x[WS(ios, 43)]), dist, &(x[WS(ios, 1)]));
1349
 
                    T2F = BYTW(&(W[TWVL * 84]), T2E);
1350
 
                    T2G = VSUB(T2D, T2F);
1351
 
                    T6a = VADD(T2D, T2F);
1352
 
               }
1353
 
               {
1354
 
                    V T2s, T2u, T2r, T2t;
1355
 
                    T2r = LD(&(x[WS(ios, 19)]), dist, &(x[WS(ios, 1)]));
1356
 
                    T2s = BYTW(&(W[TWVL * 36]), T2r);
1357
 
                    T2t = LD(&(x[WS(ios, 51)]), dist, &(x[WS(ios, 1)]));
1358
 
                    T2u = BYTW(&(W[TWVL * 100]), T2t);
1359
 
                    T2v = VSUB(T2s, T2u);
1360
 
                    T67 = VADD(T2s, T2u);
1361
 
               }
1362
 
               {
1363
 
                    V T2y, T2A, T2x, T2z;
1364
 
                    T2x = LD(&(x[WS(ios, 59)]), dist, &(x[WS(ios, 1)]));
1365
 
                    T2y = BYTW(&(W[TWVL * 116]), T2x);
1366
 
                    T2z = LD(&(x[WS(ios, 27)]), dist, &(x[WS(ios, 1)]));
1367
 
                    T2A = BYTW(&(W[TWVL * 52]), T2z);
1368
 
                    T2B = VSUB(T2y, T2A);
1369
 
                    T69 = VADD(T2y, T2A);
1370
 
               }
1371
 
               {
1372
 
                    V T2w, T2H, T7o, T7p;
1373
 
                    T2w = VFNMS(LDK(KP382683432), T2v, VMUL(LDK(KP923879532), T2q));
1374
 
                    T2H = VFMA(LDK(KP923879532), T2B, VMUL(LDK(KP382683432), T2G));
1375
 
                    T2I = VSUB(T2w, T2H);
1376
 
                    T4i = VADD(T2w, T2H);
1377
 
                    T7o = VADD(T66, T67);
1378
 
                    T7p = VADD(T69, T6a);
1379
 
                    T7q = VSUB(T7o, T7p);
1380
 
                    T82 = VADD(T7o, T7p);
1381
 
               }
1382
 
               {
1383
 
                    V T38, T39, T68, T6b;
1384
 
                    T38 = VFMA(LDK(KP382683432), T2q, VMUL(LDK(KP923879532), T2v));
1385
 
                    T39 = VFNMS(LDK(KP382683432), T2B, VMUL(LDK(KP923879532), T2G));
1386
 
                    T3a = VSUB(T38, T39);
1387
 
                    T4l = VADD(T38, T39);
1388
 
                    T68 = VSUB(T66, T67);
1389
 
                    T6b = VSUB(T69, T6a);
1390
 
                    T6c = VMUL(LDK(KP707106781), VSUB(T68, T6b));
1391
 
                    T6k = VMUL(LDK(KP707106781), VADD(T68, T6b));
1392
 
               }
1393
 
          }
1394
 
          {
1395
 
               V T7s, T7R, T7M, T7U, T7D, T7T, T7J, T7Q;
1396
 
               {
1397
 
                    V T7k, T7r, T7K, T7L;
1398
 
                    T7k = VFNMS(LDK(KP382683432), T7j, VMUL(LDK(KP923879532), T7g));
1399
 
                    T7r = VFMA(LDK(KP923879532), T7n, VMUL(LDK(KP382683432), T7q));
1400
 
                    T7s = VSUB(T7k, T7r);
1401
 
                    T7R = VADD(T7k, T7r);
1402
 
                    T7K = VFMA(LDK(KP382683432), T7g, VMUL(LDK(KP923879532), T7j));
1403
 
                    T7L = VFNMS(LDK(KP382683432), T7n, VMUL(LDK(KP923879532), T7q));
1404
 
                    T7M = VSUB(T7K, T7L);
1405
 
                    T7U = VADD(T7K, T7L);
1406
 
               }
1407
 
               {
1408
 
                    V T7z, T7C, T7H, T7I;
1409
 
                    T7z = VMUL(LDK(KP707106781), VSUB(T7v, T7y));
1410
 
                    T7C = VSUB(T7A, T7B);
1411
 
                    T7D = VSUB(T7z, T7C);
1412
 
                    T7T = VADD(T7C, T7z);
1413
 
                    T7H = VSUB(T7F, T7G);
1414
 
                    T7I = VMUL(LDK(KP707106781), VADD(T7v, T7y));
1415
 
                    T7J = VSUB(T7H, T7I);
1416
 
                    T7Q = VADD(T7H, T7I);
1417
 
               }
1418
 
               {
1419
 
                    V T7E, T7N, T7W, T7X;
1420
 
                    T7E = VBYI(VSUB(T7s, T7D));
1421
 
                    T7N = VSUB(T7J, T7M);
1422
 
                    ST(&(x[WS(ios, 20)]), VADD(T7E, T7N), dist, &(x[0]));
1423
 
                    ST(&(x[WS(ios, 44)]), VSUB(T7N, T7E), dist, &(x[0]));
1424
 
                    T7W = VSUB(T7Q, T7R);
1425
 
                    T7X = VBYI(VSUB(T7U, T7T));
1426
 
                    ST(&(x[WS(ios, 36)]), VSUB(T7W, T7X), dist, &(x[0]));
1427
 
                    ST(&(x[WS(ios, 28)]), VADD(T7W, T7X), dist, &(x[0]));
1428
 
               }
1429
 
               {
1430
 
                    V T7O, T7P, T7S, T7V;
1431
 
                    T7O = VBYI(VADD(T7D, T7s));
1432
 
                    T7P = VADD(T7J, T7M);
1433
 
                    ST(&(x[WS(ios, 12)]), VADD(T7O, T7P), dist, &(x[0]));
1434
 
                    ST(&(x[WS(ios, 52)]), VSUB(T7P, T7O), dist, &(x[0]));
1435
 
                    T7S = VADD(T7Q, T7R);
1436
 
                    T7V = VBYI(VADD(T7T, T7U));
1437
 
                    ST(&(x[WS(ios, 60)]), VSUB(T7S, T7V), dist, &(x[0]));
1438
 
                    ST(&(x[WS(ios, 4)]), VADD(T7S, T7V), dist, &(x[0]));
1439
 
               }
1440
 
          }
1441
 
          {
1442
 
               V T84, T8c, T8l, T8n, T87, T8h, T8b, T8g, T8i, T8m;
1443
 
               {
1444
 
                    V T80, T83, T8j, T8k;
1445
 
                    T80 = VSUB(T7Y, T7Z);
1446
 
                    T83 = VSUB(T81, T82);
1447
 
                    T84 = VMUL(LDK(KP707106781), VSUB(T80, T83));
1448
 
                    T8c = VMUL(LDK(KP707106781), VADD(T80, T83));
1449
 
                    T8j = VADD(T7Y, T7Z);
1450
 
                    T8k = VADD(T81, T82);
1451
 
                    T8l = VBYI(VSUB(T8j, T8k));
1452
 
                    T8n = VADD(T8j, T8k);
1453
 
               }
1454
 
               {
1455
 
                    V T85, T86, T89, T8a;
1456
 
                    T85 = VADD(T7t, T7u);
1457
 
                    T86 = VADD(T7w, T7x);
1458
 
                    T87 = VSUB(T85, T86);
1459
 
                    T8h = VADD(T85, T86);
1460
 
                    T89 = VADD(T7F, T7G);
1461
 
                    T8a = VADD(T7A, T7B);
1462
 
                    T8b = VSUB(T89, T8a);
1463
 
                    T8g = VADD(T89, T8a);
1464
 
               }
1465
 
               T8i = VSUB(T8g, T8h);
1466
 
               ST(&(x[WS(ios, 48)]), VSUB(T8i, T8l), dist, &(x[0]));
1467
 
               ST(&(x[WS(ios, 16)]), VADD(T8i, T8l), dist, &(x[0]));
1468
 
               T8m = VADD(T8g, T8h);
1469
 
               ST(&(x[WS(ios, 32)]), VSUB(T8m, T8n), dist, &(x[0]));
1470
 
               ST(&(x[0]), VADD(T8m, T8n), dist, &(x[0]));
1471
 
               {
1472
 
                    V T88, T8d, T8e, T8f;
1473
 
                    T88 = VBYI(VSUB(T84, T87));
1474
 
                    T8d = VSUB(T8b, T8c);
1475
 
                    ST(&(x[WS(ios, 24)]), VADD(T88, T8d), dist, &(x[0]));
1476
 
                    ST(&(x[WS(ios, 40)]), VSUB(T8d, T88), dist, &(x[0]));
1477
 
                    T8e = VBYI(VADD(T87, T84));
1478
 
                    T8f = VADD(T8b, T8c);
1479
 
                    ST(&(x[WS(ios, 8)]), VADD(T8e, T8f), dist, &(x[0]));
1480
 
                    ST(&(x[WS(ios, 56)]), VSUB(T8f, T8e), dist, &(x[0]));
1481
 
               }
1482
 
          }
1483
 
          {
1484
 
               V T5O, T6H, T6x, T6F, T6n, T6I, T6A, T6E;
1485
 
               {
1486
 
                    V T5y, T5N, T6r, T6w;
1487
 
                    T5y = VSUB(T5q, T5x);
1488
 
                    T5N = VSUB(T5F, T5M);
1489
 
                    T5O = VSUB(T5y, T5N);
1490
 
                    T6H = VADD(T5y, T5N);
1491
 
                    T6r = VSUB(T6p, T6q);
1492
 
                    T6w = VSUB(T6s, T6v);
1493
 
                    T6x = VSUB(T6r, T6w);
1494
 
                    T6F = VADD(T6w, T6r);
1495
 
                    {
1496
 
                         V T65, T6y, T6m, T6z;
1497
 
                         {
1498
 
                              V T5Z, T64, T6g, T6l;
1499
 
                              T5Z = VSUB(T5V, T5Y);
1500
 
                              T64 = VSUB(T62, T63);
1501
 
                              T65 = VFMA(LDK(KP831469612), T5Z, VMUL(LDK(KP555570233), T64));
1502
 
                              T6y = VFNMS(LDK(KP555570233), T5Z, VMUL(LDK(KP831469612), T64));
1503
 
                              T6g = VSUB(T6c, T6f);
1504
 
                              T6l = VSUB(T6j, T6k);
1505
 
                              T6m = VFNMS(LDK(KP555570233), T6l, VMUL(LDK(KP831469612), T6g));
1506
 
                              T6z = VFMA(LDK(KP555570233), T6g, VMUL(LDK(KP831469612), T6l));
1507
 
                         }
1508
 
                         T6n = VSUB(T65, T6m);
1509
 
                         T6I = VADD(T6y, T6z);
1510
 
                         T6A = VSUB(T6y, T6z);
1511
 
                         T6E = VADD(T65, T6m);
1512
 
                    }
1513
 
               }
1514
 
               {
1515
 
                    V T6o, T6B, T6K, T6L;
1516
 
                    T6o = VADD(T5O, T6n);
1517
 
                    T6B = VBYI(VADD(T6x, T6A));
1518
 
                    ST(&(x[WS(ios, 54)]), VSUB(T6o, T6B), dist, &(x[0]));
1519
 
                    ST(&(x[WS(ios, 10)]), VADD(T6o, T6B), dist, &(x[0]));
1520
 
                    T6K = VBYI(VADD(T6F, T6E));
1521
 
                    T6L = VADD(T6H, T6I);
1522
 
                    ST(&(x[WS(ios, 6)]), VADD(T6K, T6L), dist, &(x[0]));
1523
 
                    ST(&(x[WS(ios, 58)]), VSUB(T6L, T6K), dist, &(x[0]));
1524
 
               }
1525
 
               {
1526
 
                    V T6C, T6D, T6G, T6J;
1527
 
                    T6C = VSUB(T5O, T6n);
1528
 
                    T6D = VBYI(VSUB(T6A, T6x));
1529
 
                    ST(&(x[WS(ios, 42)]), VSUB(T6C, T6D), dist, &(x[0]));
1530
 
                    ST(&(x[WS(ios, 22)]), VADD(T6C, T6D), dist, &(x[0]));
1531
 
                    T6G = VBYI(VSUB(T6E, T6F));
1532
 
                    T6J = VSUB(T6H, T6I);
1533
 
                    ST(&(x[WS(ios, 26)]), VADD(T6G, T6J), dist, &(x[0]));
1534
 
                    ST(&(x[WS(ios, 38)]), VSUB(T6J, T6G), dist, &(x[0]));
1535
 
               }
1536
 
          }
1537
 
          {
1538
 
               V T6O, T79, T6Z, T77, T6V, T7a, T72, T76;
1539
 
               {
1540
 
                    V T6M, T6N, T6X, T6Y;
1541
 
                    T6M = VADD(T5q, T5x);
1542
 
                    T6N = VADD(T6p, T6q);
1543
 
                    T6O = VSUB(T6M, T6N);
1544
 
                    T79 = VADD(T6M, T6N);
1545
 
                    T6X = VADD(T5F, T5M);
1546
 
                    T6Y = VADD(T6v, T6s);
1547
 
                    T6Z = VSUB(T6X, T6Y);
1548
 
                    T77 = VADD(T6Y, T6X);
1549
 
                    {
1550
 
                         V T6R, T70, T6U, T71;
1551
 
                         {
1552
 
                              V T6P, T6Q, T6S, T6T;
1553
 
                              T6P = VADD(T5Y, T5V);
1554
 
                              T6Q = VADD(T62, T63);
1555
 
                              T6R = VFMA(LDK(KP980785280), T6P, VMUL(LDK(KP195090322), T6Q));
1556
 
                              T70 = VFNMS(LDK(KP195090322), T6P, VMUL(LDK(KP980785280), T6Q));
1557
 
                              T6S = VADD(T6f, T6c);
1558
 
                              T6T = VADD(T6j, T6k);
1559
 
                              T6U = VFNMS(LDK(KP195090322), T6T, VMUL(LDK(KP980785280), T6S));
1560
 
                              T71 = VFMA(LDK(KP195090322), T6S, VMUL(LDK(KP980785280), T6T));
1561
 
                         }
1562
 
                         T6V = VSUB(T6R, T6U);
1563
 
                         T7a = VADD(T70, T71);
1564
 
                         T72 = VSUB(T70, T71);
1565
 
                         T76 = VADD(T6R, T6U);
1566
 
                    }
1567
 
               }
1568
 
               {
1569
 
                    V T6W, T73, T7c, T7d;
1570
 
                    T6W = VADD(T6O, T6V);
1571
 
                    T73 = VBYI(VADD(T6Z, T72));
1572
 
                    ST(&(x[WS(ios, 50)]), VSUB(T6W, T73), dist, &(x[0]));
1573
 
                    ST(&(x[WS(ios, 14)]), VADD(T6W, T73), dist, &(x[0]));
1574
 
                    T7c = VBYI(VADD(T77, T76));
1575
 
                    T7d = VADD(T79, T7a);
1576
 
                    ST(&(x[WS(ios, 2)]), VADD(T7c, T7d), dist, &(x[0]));
1577
 
                    ST(&(x[WS(ios, 62)]), VSUB(T7d, T7c), dist, &(x[0]));
1578
 
               }
1579
 
               {
1580
 
                    V T74, T75, T78, T7b;
1581
 
                    T74 = VSUB(T6O, T6V);
1582
 
                    T75 = VBYI(VSUB(T72, T6Z));
1583
 
                    ST(&(x[WS(ios, 46)]), VSUB(T74, T75), dist, &(x[0]));
1584
 
                    ST(&(x[WS(ios, 18)]), VADD(T74, T75), dist, &(x[0]));
1585
 
                    T78 = VBYI(VSUB(T76, T77));
1586
 
                    T7b = VSUB(T79, T7a);
1587
 
                    ST(&(x[WS(ios, 30)]), VADD(T78, T7b), dist, &(x[0]));
1588
 
                    ST(&(x[WS(ios, 34)]), VSUB(T7b, T78), dist, &(x[0]));
1589
 
               }
1590
 
          }
1591
 
          {
1592
 
               V T4z, T5g, T4R, T59, T4H, T5j, T4O, T55, T4o, T4S, T4K, T4P, T52, T5k, T5c;
1593
 
               V T5h;
1594
 
               {
1595
 
                    V T4y, T57, T4v, T58, T4r, T4u;
1596
 
                    T4y = VADD(T4w, T4x);
1597
 
                    T57 = VSUB(T4B, T4C);
1598
 
                    T4r = VFMA(LDK(KP980785280), T4p, VMUL(LDK(KP195090322), T4q));
1599
 
                    T4u = VFNMS(LDK(KP195090322), T4t, VMUL(LDK(KP980785280), T4s));
1600
 
                    T4v = VADD(T4r, T4u);
1601
 
                    T58 = VSUB(T4r, T4u);
1602
 
                    T4z = VSUB(T4v, T4y);
1603
 
                    T5g = VADD(T57, T58);
1604
 
                    T4R = VADD(T4y, T4v);
1605
 
                    T59 = VSUB(T57, T58);
1606
 
               }
1607
 
               {
1608
 
                    V T4D, T54, T4G, T53, T4E, T4F;
1609
 
                    T4D = VADD(T4B, T4C);
1610
 
                    T54 = VSUB(T4x, T4w);
1611
 
                    T4E = VFNMS(LDK(KP195090322), T4p, VMUL(LDK(KP980785280), T4q));
1612
 
                    T4F = VFMA(LDK(KP195090322), T4s, VMUL(LDK(KP980785280), T4t));
1613
 
                    T4G = VADD(T4E, T4F);
1614
 
                    T53 = VSUB(T4E, T4F);
1615
 
                    T4H = VSUB(T4D, T4G);
1616
 
                    T5j = VADD(T54, T53);
1617
 
                    T4O = VADD(T4D, T4G);
1618
 
                    T55 = VSUB(T53, T54);
1619
 
               }
1620
 
               {
1621
 
                    V T4g, T4I, T4n, T4J;
1622
 
                    {
1623
 
                         V T4c, T4f, T4j, T4m;
1624
 
                         T4c = VADD(T4a, T4b);
1625
 
                         T4f = VADD(T4d, T4e);
1626
 
                         T4g = VFNMS(LDK(KP098017140), T4f, VMUL(LDK(KP995184726), T4c));
1627
 
                         T4I = VFMA(LDK(KP098017140), T4c, VMUL(LDK(KP995184726), T4f));
1628
 
                         T4j = VADD(T4h, T4i);
1629
 
                         T4m = VADD(T4k, T4l);
1630
 
                         T4n = VFMA(LDK(KP995184726), T4j, VMUL(LDK(KP098017140), T4m));
1631
 
                         T4J = VFNMS(LDK(KP098017140), T4j, VMUL(LDK(KP995184726), T4m));
1632
 
                    }
1633
 
                    T4o = VSUB(T4g, T4n);
1634
 
                    T4S = VADD(T4I, T4J);
1635
 
                    T4K = VSUB(T4I, T4J);
1636
 
                    T4P = VADD(T4g, T4n);
1637
 
               }
1638
 
               {
1639
 
                    V T4Y, T5a, T51, T5b;
1640
 
                    {
1641
 
                         V T4W, T4X, T4Z, T50;
1642
 
                         T4W = VSUB(T4a, T4b);
1643
 
                         T4X = VSUB(T4e, T4d);
1644
 
                         T4Y = VFNMS(LDK(KP634393284), T4X, VMUL(LDK(KP773010453), T4W));
1645
 
                         T5a = VFMA(LDK(KP634393284), T4W, VMUL(LDK(KP773010453), T4X));
1646
 
                         T4Z = VSUB(T4h, T4i);
1647
 
                         T50 = VSUB(T4l, T4k);
1648
 
                         T51 = VFMA(LDK(KP773010453), T4Z, VMUL(LDK(KP634393284), T50));
1649
 
                         T5b = VFNMS(LDK(KP634393284), T4Z, VMUL(LDK(KP773010453), T50));
1650
 
                    }
1651
 
                    T52 = VSUB(T4Y, T51);
1652
 
                    T5k = VADD(T5a, T5b);
1653
 
                    T5c = VSUB(T5a, T5b);
1654
 
                    T5h = VADD(T4Y, T51);
1655
 
               }
1656
 
               {
1657
 
                    V T4A, T4L, T5i, T5l;
1658
 
                    T4A = VBYI(VSUB(T4o, T4z));
1659
 
                    T4L = VSUB(T4H, T4K);
1660
 
                    ST(&(x[WS(ios, 17)]), VADD(T4A, T4L), dist, &(x[WS(ios, 1)]));
1661
 
                    ST(&(x[WS(ios, 47)]), VSUB(T4L, T4A), dist, &(x[WS(ios, 1)]));
1662
 
                    T5i = VADD(T5g, T5h);
1663
 
                    T5l = VBYI(VADD(T5j, T5k));
1664
 
                    ST(&(x[WS(ios, 57)]), VSUB(T5i, T5l), dist, &(x[WS(ios, 1)]));
1665
 
                    ST(&(x[WS(ios, 7)]), VADD(T5i, T5l), dist, &(x[WS(ios, 1)]));
1666
 
               }
1667
 
               {
1668
 
                    V T5m, T5n, T4M, T4N;
1669
 
                    T5m = VSUB(T5g, T5h);
1670
 
                    T5n = VBYI(VSUB(T5k, T5j));
1671
 
                    ST(&(x[WS(ios, 39)]), VSUB(T5m, T5n), dist, &(x[WS(ios, 1)]));
1672
 
                    ST(&(x[WS(ios, 25)]), VADD(T5m, T5n), dist, &(x[WS(ios, 1)]));
1673
 
                    T4M = VBYI(VADD(T4z, T4o));
1674
 
                    T4N = VADD(T4H, T4K);
1675
 
                    ST(&(x[WS(ios, 15)]), VADD(T4M, T4N), dist, &(x[WS(ios, 1)]));
1676
 
                    ST(&(x[WS(ios, 49)]), VSUB(T4N, T4M), dist, &(x[WS(ios, 1)]));
1677
 
               }
1678
 
               {
1679
 
                    V T4Q, T4T, T56, T5d;
1680
 
                    T4Q = VADD(T4O, T4P);
1681
 
                    T4T = VBYI(VADD(T4R, T4S));
1682
 
                    ST(&(x[WS(ios, 63)]), VSUB(T4Q, T4T), dist, &(x[WS(ios, 1)]));
1683
 
                    ST(&(x[WS(ios, 1)]), VADD(T4Q, T4T), dist, &(x[WS(ios, 1)]));
1684
 
                    T56 = VBYI(VSUB(T52, T55));
1685
 
                    T5d = VSUB(T59, T5c);
1686
 
                    ST(&(x[WS(ios, 23)]), VADD(T56, T5d), dist, &(x[WS(ios, 1)]));
1687
 
                    ST(&(x[WS(ios, 41)]), VSUB(T5d, T56), dist, &(x[WS(ios, 1)]));
1688
 
               }
1689
 
               {
1690
 
                    V T5e, T5f, T4U, T4V;
1691
 
                    T5e = VBYI(VADD(T55, T52));
1692
 
                    T5f = VADD(T59, T5c);
1693
 
                    ST(&(x[WS(ios, 9)]), VADD(T5e, T5f), dist, &(x[WS(ios, 1)]));
1694
 
                    ST(&(x[WS(ios, 55)]), VSUB(T5f, T5e), dist, &(x[WS(ios, 1)]));
1695
 
                    T4U = VSUB(T4O, T4P);
1696
 
                    T4V = VBYI(VSUB(T4S, T4R));
1697
 
                    ST(&(x[WS(ios, 33)]), VSUB(T4U, T4V), dist, &(x[WS(ios, 1)]));
1698
 
                    ST(&(x[WS(ios, 31)]), VADD(T4U, T4V), dist, &(x[WS(ios, 1)]));
1699
 
               }
1700
 
          }
1701
 
          {
1702
 
               V T1u, T43, T3D, T3V, T3t, T45, T3B, T3K, T3d, T3E, T3w, T3A, T3R, T46, T3Y;
1703
 
               V T42;
1704
 
               {
1705
 
                    V TE, T3U, T1t, T3T, T13, T1s;
1706
 
                    TE = VSUB(Tg, TD);
1707
 
                    T3U = VADD(T3r, T3k);
1708
 
                    T13 = VFMA(LDK(KP831469612), TV, VMUL(LDK(KP555570233), T12));
1709
 
                    T1s = VFNMS(LDK(KP555570233), T1r, VMUL(LDK(KP831469612), T1k));
1710
 
                    T1t = VSUB(T13, T1s);
1711
 
                    T3T = VADD(T13, T1s);
1712
 
                    T1u = VSUB(TE, T1t);
1713
 
                    T43 = VADD(T3U, T3T);
1714
 
                    T3D = VADD(TE, T1t);
1715
 
                    T3V = VSUB(T3T, T3U);
1716
 
               }
1717
 
               {
1718
 
                    V T3s, T3I, T3h, T3J, T3f, T3g;
1719
 
                    T3s = VSUB(T3k, T3r);
1720
 
                    T3I = VADD(Tg, TD);
1721
 
                    T3f = VFNMS(LDK(KP555570233), TV, VMUL(LDK(KP831469612), T12));
1722
 
                    T3g = VFMA(LDK(KP555570233), T1k, VMUL(LDK(KP831469612), T1r));
1723
 
                    T3h = VSUB(T3f, T3g);
1724
 
                    T3J = VADD(T3f, T3g);
1725
 
                    T3t = VSUB(T3h, T3s);
1726
 
                    T45 = VADD(T3I, T3J);
1727
 
                    T3B = VADD(T3s, T3h);
1728
 
                    T3K = VSUB(T3I, T3J);
1729
 
               }
1730
 
               {
1731
 
                    V T2l, T3u, T3c, T3v;
1732
 
                    {
1733
 
                         V T29, T2k, T30, T3b;
1734
 
                         T29 = VSUB(T1R, T28);
1735
 
                         T2k = VSUB(T2g, T2j);
1736
 
                         T2l = VFMA(LDK(KP881921264), T29, VMUL(LDK(KP471396736), T2k));
1737
 
                         T3u = VFNMS(LDK(KP471396736), T29, VMUL(LDK(KP881921264), T2k));
1738
 
                         T30 = VSUB(T2I, T2Z);
1739
 
                         T3b = VSUB(T37, T3a);
1740
 
                         T3c = VFNMS(LDK(KP471396736), T3b, VMUL(LDK(KP881921264), T30));
1741
 
                         T3v = VFMA(LDK(KP471396736), T30, VMUL(LDK(KP881921264), T3b));
1742
 
                    }
1743
 
                    T3d = VSUB(T2l, T3c);
1744
 
                    T3E = VADD(T3u, T3v);
1745
 
                    T3w = VSUB(T3u, T3v);
1746
 
                    T3A = VADD(T2l, T3c);
1747
 
               }
1748
 
               {
1749
 
                    V T3N, T3W, T3Q, T3X;
1750
 
                    {
1751
 
                         V T3L, T3M, T3O, T3P;
1752
 
                         T3L = VADD(T28, T1R);
1753
 
                         T3M = VADD(T2g, T2j);
1754
 
                         T3N = VFMA(LDK(KP956940335), T3L, VMUL(LDK(KP290284677), T3M));
1755
 
                         T3W = VFNMS(LDK(KP290284677), T3L, VMUL(LDK(KP956940335), T3M));
1756
 
                         T3O = VADD(T2Z, T2I);
1757
 
                         T3P = VADD(T37, T3a);
1758
 
                         T3Q = VFNMS(LDK(KP290284677), T3P, VMUL(LDK(KP956940335), T3O));
1759
 
                         T3X = VFMA(LDK(KP290284677), T3O, VMUL(LDK(KP956940335), T3P));
1760
 
                    }
1761
 
                    T3R = VSUB(T3N, T3Q);
1762
 
                    T46 = VADD(T3W, T3X);
1763
 
                    T3Y = VSUB(T3W, T3X);
1764
 
                    T42 = VADD(T3N, T3Q);
1765
 
               }
1766
 
               {
1767
 
                    V T3e, T3x, T44, T47;
1768
 
                    T3e = VADD(T1u, T3d);
1769
 
                    T3x = VBYI(VADD(T3t, T3w));
1770
 
                    ST(&(x[WS(ios, 53)]), VSUB(T3e, T3x), dist, &(x[WS(ios, 1)]));
1771
 
                    ST(&(x[WS(ios, 11)]), VADD(T3e, T3x), dist, &(x[WS(ios, 1)]));
1772
 
                    T44 = VBYI(VSUB(T42, T43));
1773
 
                    T47 = VSUB(T45, T46);
1774
 
                    ST(&(x[WS(ios, 29)]), VADD(T44, T47), dist, &(x[WS(ios, 1)]));
1775
 
                    ST(&(x[WS(ios, 35)]), VSUB(T47, T44), dist, &(x[WS(ios, 1)]));
1776
 
               }
1777
 
               {
1778
 
                    V T48, T49, T3y, T3z;
1779
 
                    T48 = VBYI(VADD(T43, T42));
1780
 
                    T49 = VADD(T45, T46);
1781
 
                    ST(&(x[WS(ios, 3)]), VADD(T48, T49), dist, &(x[WS(ios, 1)]));
1782
 
                    ST(&(x[WS(ios, 61)]), VSUB(T49, T48), dist, &(x[WS(ios, 1)]));
1783
 
                    T3y = VSUB(T1u, T3d);
1784
 
                    T3z = VBYI(VSUB(T3w, T3t));
1785
 
                    ST(&(x[WS(ios, 43)]), VSUB(T3y, T3z), dist, &(x[WS(ios, 1)]));
1786
 
                    ST(&(x[WS(ios, 21)]), VADD(T3y, T3z), dist, &(x[WS(ios, 1)]));
1787
 
               }
1788
 
               {
1789
 
                    V T3C, T3F, T3S, T3Z;
1790
 
                    T3C = VBYI(VSUB(T3A, T3B));
1791
 
                    T3F = VSUB(T3D, T3E);
1792
 
                    ST(&(x[WS(ios, 27)]), VADD(T3C, T3F), dist, &(x[WS(ios, 1)]));
1793
 
                    ST(&(x[WS(ios, 37)]), VSUB(T3F, T3C), dist, &(x[WS(ios, 1)]));
1794
 
                    T3S = VADD(T3K, T3R);
1795
 
                    T3Z = VBYI(VADD(T3V, T3Y));
1796
 
                    ST(&(x[WS(ios, 51)]), VSUB(T3S, T3Z), dist, &(x[WS(ios, 1)]));
1797
 
                    ST(&(x[WS(ios, 13)]), VADD(T3S, T3Z), dist, &(x[WS(ios, 1)]));
1798
 
               }
1799
 
               {
1800
 
                    V T40, T41, T3G, T3H;
1801
 
                    T40 = VSUB(T3K, T3R);
1802
 
                    T41 = VBYI(VSUB(T3Y, T3V));
1803
 
                    ST(&(x[WS(ios, 45)]), VSUB(T40, T41), dist, &(x[WS(ios, 1)]));
1804
 
                    ST(&(x[WS(ios, 19)]), VADD(T40, T41), dist, &(x[WS(ios, 1)]));
1805
 
                    T3G = VBYI(VADD(T3B, T3A));
1806
 
                    T3H = VADD(T3D, T3E);
1807
 
                    ST(&(x[WS(ios, 5)]), VADD(T3G, T3H), dist, &(x[WS(ios, 1)]));
1808
 
                    ST(&(x[WS(ios, 59)]), VSUB(T3H, T3G), dist, &(x[WS(ios, 1)]));
1809
 
               }
1810
 
          }
1811
 
     }
1812
 
     return W;
1813
 
}
1814
 
 
1815
 
static const tw_instr twinstr[] = {
1816
 
     VTW(1),
1817
 
     VTW(2),
1818
 
     VTW(3),
1819
 
     VTW(4),
1820
 
     VTW(5),
1821
 
     VTW(6),
1822
 
     VTW(7),
1823
 
     VTW(8),
1824
 
     VTW(9),
1825
 
     VTW(10),
1826
 
     VTW(11),
1827
 
     VTW(12),
1828
 
     VTW(13),
1829
 
     VTW(14),
1830
 
     VTW(15),
1831
 
     VTW(16),
1832
 
     VTW(17),
1833
 
     VTW(18),
1834
 
     VTW(19),
1835
 
     VTW(20),
1836
 
     VTW(21),
1837
 
     VTW(22),
1838
 
     VTW(23),
1839
 
     VTW(24),
1840
 
     VTW(25),
1841
 
     VTW(26),
1842
 
     VTW(27),
1843
 
     VTW(28),
1844
 
     VTW(29),
1845
 
     VTW(30),
1846
 
     VTW(31),
1847
 
     VTW(32),
1848
 
     VTW(33),
1849
 
     VTW(34),
1850
 
     VTW(35),
1851
 
     VTW(36),
1852
 
     VTW(37),
1853
 
     VTW(38),
1854
 
     VTW(39),
1855
 
     VTW(40),
1856
 
     VTW(41),
1857
 
     VTW(42),
1858
 
     VTW(43),
1859
 
     VTW(44),
1860
 
     VTW(45),
1861
 
     VTW(46),
1862
 
     VTW(47),
1863
 
     VTW(48),
1864
 
     VTW(49),
1865
 
     VTW(50),
1866
 
     VTW(51),
1867
 
     VTW(52),
1868
 
     VTW(53),
1869
 
     VTW(54),
1870
 
     VTW(55),
1871
 
     VTW(56),
1872
 
     VTW(57),
1873
 
     VTW(58),
1874
 
     VTW(59),
1875
 
     VTW(60),
1876
 
     VTW(61),
1877
 
     VTW(62),
1878
 
     VTW(63),
1879
 
     {TW_NEXT, VL, 0}
1880
 
};
1881
 
 
1882
 
static const ct_desc desc = { 64, "t2bv_64", twinstr, &GENUS, {467, 198, 52, 0}, 0, 0, 0 };
1883
 
 
1884
 
void X(codelet_t2bv_64) (planner *p) {
1885
 
     X(kdft_dit_register) (p, t2bv_64, &desc);
1886
 
}
1887
 
#endif                          /* HAVE_FMA */