~ubuntu-branches/ubuntu/utopic/fftw3/utopic

« back to all changes in this revision

Viewing changes to dft/simd/common/t2fv_25.c

  • Committer: Package Import Robot
  • Author(s): Matthias Klose
  • Date: 2011-12-14 13:21:22 UTC
  • mfrom: (3.1.5 sid)
  • Revision ID: package-import@ubuntu.com-20111214132122-l4avyl2kkr7vq5aj
Tags: 3.3-1ubuntu1
* Merge with Debian; remaining changes:
  - Revert the ARM workaround.

Show diffs side-by-side

added added

removed removed

Lines of Context:
 
1
/*
 
2
 * Copyright (c) 2003, 2007-11 Matteo Frigo
 
3
 * Copyright (c) 2003, 2007-11 Massachusetts Institute of Technology
 
4
 *
 
5
 * This program is free software; you can redistribute it and/or modify
 
6
 * it under the terms of the GNU General Public License as published by
 
7
 * the Free Software Foundation; either version 2 of the License, or
 
8
 * (at your option) any later version.
 
9
 *
 
10
 * This program is distributed in the hope that it will be useful,
 
11
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 
12
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 
13
 * GNU General Public License for more details.
 
14
 *
 
15
 * You should have received a copy of the GNU General Public License
 
16
 * along with this program; if not, write to the Free Software
 
17
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 
18
 *
 
19
 */
 
20
 
 
21
/* This file was automatically generated --- DO NOT EDIT */
 
22
/* Generated on Wed Jul 27 06:15:18 EDT 2011 */
 
23
 
 
24
#include "codelet-dft.h"
 
25
 
 
26
#ifdef HAVE_FMA
 
27
 
 
28
/* Generated by: ../../../genfft/gen_twiddle_c.native -fma -reorder-insns -schedule-for-pipeline -simd -compact -variables 4 -pipeline-latency 8 -n 25 -name t2fv_25 -include t2f.h */
 
29
 
 
30
/*
 
31
 * This function contains 248 FP additions, 241 FP multiplications,
 
32
 * (or, 67 additions, 60 multiplications, 181 fused multiply/add),
 
33
 * 208 stack variables, 67 constants, and 50 memory accesses
 
34
 */
 
35
#include "t2f.h"
 
36
 
 
37
static void t2fv_25(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms)
 
38
{
 
39
     DVK(KP792626838, +0.792626838241819413632131824093538848057784557);
 
40
     DVK(KP876091699, +0.876091699473550838204498029706869638173524346);
 
41
     DVK(KP617882369, +0.617882369114440893914546919006756321695042882);
 
42
     DVK(KP803003575, +0.803003575438660414833440593570376004635464850);
 
43
     DVK(KP242145790, +0.242145790282157779872542093866183953459003101);
 
44
     DVK(KP968583161, +0.968583161128631119490168375464735813836012403);
 
45
     DVK(KP999544308, +0.999544308746292983948881682379742149196758193);
 
46
     DVK(KP916574801, +0.916574801383451584742370439148878693530976769);
 
47
     DVK(KP904730450, +0.904730450839922351881287709692877908104763647);
 
48
     DVK(KP809385824, +0.809385824416008241660603814668679683846476688);
 
49
     DVK(KP447417479, +0.447417479732227551498980015410057305749330693);
 
50
     DVK(KP894834959, +0.894834959464455102997960030820114611498661386);
 
51
     DVK(KP867381224, +0.867381224396525206773171885031575671309956167);
 
52
     DVK(KP683113946, +0.683113946453479238701949862233725244439656928);
 
53
     DVK(KP559154169, +0.559154169276087864842202529084232643714075927);
 
54
     DVK(KP958953096, +0.958953096729998668045963838399037225970891871);
 
55
     DVK(KP831864738, +0.831864738706457140726048799369896829771167132);
 
56
     DVK(KP829049696, +0.829049696159252993975487806364305442437946767);
 
57
     DVK(KP860541664, +0.860541664367944677098261680920518816412804187);
 
58
     DVK(KP897376177, +0.897376177523557693138608077137219684419427330);
 
59
     DVK(KP876306680, +0.876306680043863587308115903922062583399064238);
 
60
     DVK(KP681693190, +0.681693190061530575150324149145440022633095390);
 
61
     DVK(KP560319534, +0.560319534973832390111614715371676131169633784);
 
62
     DVK(KP855719849, +0.855719849902058969314654733608091555096772472);
 
63
     DVK(KP237294955, +0.237294955877110315393888866460840817927895961);
 
64
     DVK(KP949179823, +0.949179823508441261575555465843363271711583843);
 
65
     DVK(KP904508497, +0.904508497187473712051146708591409529430077295);
 
66
     DVK(KP997675361, +0.997675361079556513670859573984492383596555031);
 
67
     DVK(KP763932022, +0.763932022500210303590826331268723764559381640);
 
68
     DVK(KP690983005, +0.690983005625052575897706582817180941139845410);
 
69
     DVK(KP992114701, +0.992114701314477831049793042785778521453036709);
 
70
     DVK(KP952936919, +0.952936919628306576880750665357914584765951388);
 
71
     DVK(KP998026728, +0.998026728428271561952336806863450553336905220);
 
72
     DVK(KP262346850, +0.262346850930607871785420028382979691334784273);
 
73
     DVK(KP570584518, +0.570584518783621657366766175430996792655723863);
 
74
     DVK(KP669429328, +0.669429328479476605641803240971985825917022098);
 
75
     DVK(KP923225144, +0.923225144846402650453449441572664695995209956);
 
76
     DVK(KP945422727, +0.945422727388575946270360266328811958657216298);
 
77
     DVK(KP522616830, +0.522616830205754336872861364785224694908468440);
 
78
     DVK(KP956723877, +0.956723877038460305821989399535483155872969262);
 
79
     DVK(KP906616052, +0.906616052148196230441134447086066874408359177);
 
80
     DVK(KP772036680, +0.772036680810363904029489473607579825330539880);
 
81
     DVK(KP845997307, +0.845997307939530944175097360758058292389769300);
 
82
     DVK(KP951056516, +0.951056516295153572116439333379382143405698634);
 
83
     DVK(KP921078979, +0.921078979742360627699756128143719920817673854);
 
84
     DVK(KP912575812, +0.912575812670962425556968549836277086778922727);
 
85
     DVK(KP982009705, +0.982009705009746369461829878184175962711969869);
 
86
     DVK(KP734762448, +0.734762448793050413546343770063151342619912334);
 
87
     DVK(KP494780565, +0.494780565770515410344588413655324772219443730);
 
88
     DVK(KP447533225, +0.447533225982656890041886979663652563063114397);
 
89
     DVK(KP269969613, +0.269969613759572083574752974412347470060951301);
 
90
     DVK(KP244189809, +0.244189809627953270309879511234821255780225091);
 
91
     DVK(KP667278218, +0.667278218140296670899089292254759909713898805);
 
92
     DVK(KP603558818, +0.603558818296015001454675132653458027918768137);
 
93
     DVK(KP522847744, +0.522847744331509716623755382187077770911012542);
 
94
     DVK(KP578046249, +0.578046249379945007321754579646815604023525655);
 
95
     DVK(KP987388751, +0.987388751065621252324603216482382109400433949);
 
96
     DVK(KP893101515, +0.893101515366181661711202267938416198338079437);
 
97
     DVK(KP120146378, +0.120146378570687701782758537356596213647956445);
 
98
     DVK(KP132830569, +0.132830569247582714407653942074819768844536507);
 
99
     DVK(KP869845200, +0.869845200362138853122720822420327157933056305);
 
100
     DVK(KP786782374, +0.786782374965295178365099601674911834788448471);
 
101
     DVK(KP066152395, +0.066152395967733048213034281011006031460903353);
 
102
     DVK(KP059835404, +0.059835404262124915169548397419498386427871950);
 
103
     DVK(KP559016994, +0.559016994374947424102293417182819058860154590);
 
104
     DVK(KP250000000, +0.250000000000000000000000000000000000000000000);
 
105
     DVK(KP618033988, +0.618033988749894848204586834365638117720309180);
 
106
     {
 
107
          INT m;
 
108
          R *x;
 
109
          x = ri;
 
110
          for (m = mb, W = W + (mb * ((TWVL / VL) * 48)); m < me; m = m + VL, x = x + (VL * ms), W = W + (TWVL * 48), MAKE_VOLATILE_STRIDE(rs)) {
 
111
               V T25, T1B, T2y, T1K, T2s, T23, T1S, T26, T20, T1X;
 
112
               {
 
113
                    V T1O, T2X, Te, T3L, Td, T3Q, T3j, T3b, T2R, T2M, T2f, T27, T1y, T1H, T3M;
 
114
                    V TW, TR, TK, T2B, T3n, T3e, T2U, T2F, T2i, T2a, Tz, T1C, T3N, TQ, T11;
 
115
                    V T1b, T1c, T16;
 
116
                    {
 
117
                         V T1, T1g, T1i, T1p, T1k, T1m, Tb, T1N, T6, T1M;
 
118
                         {
 
119
                              V T7, T9, T2, T4, T1f, T1h, T1o;
 
120
                              T1 = LD(&(x[0]), ms, &(x[0]));
 
121
                              T7 = LD(&(x[WS(rs, 10)]), ms, &(x[0]));
 
122
                              T9 = LD(&(x[WS(rs, 15)]), ms, &(x[WS(rs, 1)]));
 
123
                              T2 = LD(&(x[WS(rs, 5)]), ms, &(x[WS(rs, 1)]));
 
124
                              T4 = LD(&(x[WS(rs, 20)]), ms, &(x[0]));
 
125
                              T1f = LD(&(x[WS(rs, 3)]), ms, &(x[WS(rs, 1)]));
 
126
                              T1h = LD(&(x[WS(rs, 8)]), ms, &(x[0]));
 
127
                              T1o = LD(&(x[WS(rs, 18)]), ms, &(x[0]));
 
128
                              {
 
129
                                   V T8, Ta, T3, T5, T1j;
 
130
                                   T1j = LD(&(x[WS(rs, 23)]), ms, &(x[WS(rs, 1)]));
 
131
                                   T8 = BYTWJ(&(W[TWVL * 18]), T7);
 
132
                                   Ta = BYTWJ(&(W[TWVL * 28]), T9);
 
133
                                   T3 = BYTWJ(&(W[TWVL * 8]), T2);
 
134
                                   T5 = BYTWJ(&(W[TWVL * 38]), T4);
 
135
                                   T1g = BYTWJ(&(W[TWVL * 4]), T1f);
 
136
                                   T1i = BYTWJ(&(W[TWVL * 14]), T1h);
 
137
                                   T1p = BYTWJ(&(W[TWVL * 34]), T1o);
 
138
                                   T1k = BYTWJ(&(W[TWVL * 44]), T1j);
 
139
                                   T1m = LD(&(x[WS(rs, 13)]), ms, &(x[WS(rs, 1)]));
 
140
                                   Tb = VADD(T8, Ta);
 
141
                                   T1N = VSUB(T8, Ta);
 
142
                                   T6 = VADD(T3, T5);
 
143
                                   T1M = VSUB(T3, T5);
 
144
                              }
 
145
                         }
 
146
                         {
 
147
                              V T1v, T1l, Th, Tj, T1w, T1q, Tq, Tk, Tn, Tg;
 
148
                              Tg = LD(&(x[WS(rs, 1)]), ms, &(x[WS(rs, 1)]));
 
149
                              {
 
150
                                   V Tc, Ti, T1n, Tp;
 
151
                                   Ti = LD(&(x[WS(rs, 6)]), ms, &(x[0]));
 
152
                                   T1v = VSUB(T1i, T1k);
 
153
                                   T1l = VADD(T1i, T1k);
 
154
                                   T1n = BYTWJ(&(W[TWVL * 24]), T1m);
 
155
                                   Tp = LD(&(x[WS(rs, 16)]), ms, &(x[0]));
 
156
                                   T1O = VFMA(LDK(KP618033988), T1N, T1M);
 
157
                                   T2X = VFNMS(LDK(KP618033988), T1M, T1N);
 
158
                                   Te = VSUB(T6, Tb);
 
159
                                   Tc = VADD(T6, Tb);
 
160
                                   Th = BYTWJ(&(W[0]), Tg);
 
161
                                   Tj = BYTWJ(&(W[TWVL * 10]), Ti);
 
162
                                   T1w = VSUB(T1n, T1p);
 
163
                                   T1q = VADD(T1n, T1p);
 
164
                                   Tq = BYTWJ(&(W[TWVL * 30]), Tp);
 
165
                                   Tk = LD(&(x[WS(rs, 21)]), ms, &(x[WS(rs, 1)]));
 
166
                                   T3L = VADD(T1, Tc);
 
167
                                   Td = VFNMS(LDK(KP250000000), Tc, T1);
 
168
                                   Tn = LD(&(x[WS(rs, 11)]), ms, &(x[WS(rs, 1)]));
 
169
                              }
 
170
                              {
 
171
                                   V T1x, T2K, TM, TB, Tw, Tm, Tx, Tr, TI, T2L, T1u, TD, TF, TL;
 
172
                                   TL = LD(&(x[WS(rs, 4)]), ms, &(x[0]));
 
173
                                   {
 
174
                                        V T1t, Tl, To, TH, T1s, T1r, TA, TC;
 
175
                                        TA = LD(&(x[WS(rs, 24)]), ms, &(x[0]));
 
176
                                        T1r = VADD(T1l, T1q);
 
177
                                        T1t = VSUB(T1q, T1l);
 
178
                                        T1x = VFMA(LDK(KP618033988), T1w, T1v);
 
179
                                        T2K = VFNMS(LDK(KP618033988), T1v, T1w);
 
180
                                        Tl = BYTWJ(&(W[TWVL * 40]), Tk);
 
181
                                        To = BYTWJ(&(W[TWVL * 20]), Tn);
 
182
                                        TM = BYTWJ(&(W[TWVL * 6]), TL);
 
183
                                        TB = BYTWJ(&(W[TWVL * 46]), TA);
 
184
                                        TH = LD(&(x[WS(rs, 14)]), ms, &(x[0]));
 
185
                                        T1s = VFNMS(LDK(KP250000000), T1r, T1g);
 
186
                                        T3Q = VADD(T1g, T1r);
 
187
                                        TC = LD(&(x[WS(rs, 9)]), ms, &(x[WS(rs, 1)]));
 
188
                                        Tw = VSUB(Tj, Tl);
 
189
                                        Tm = VADD(Tj, Tl);
 
190
                                        Tx = VSUB(Tq, To);
 
191
                                        Tr = VADD(To, Tq);
 
192
                                        TI = BYTWJ(&(W[TWVL * 26]), TH);
 
193
                                        T2L = VFMA(LDK(KP559016994), T1t, T1s);
 
194
                                        T1u = VFNMS(LDK(KP559016994), T1t, T1s);
 
195
                                        TD = BYTWJ(&(W[TWVL * 16]), TC);
 
196
                                        TF = LD(&(x[WS(rs, 19)]), ms, &(x[WS(rs, 1)]));
 
197
                                   }
 
198
                                   {
 
199
                                        V Tu, Ty, T2E, TE, TN, TG, Tt, TV, Ts;
 
200
                                        TV = LD(&(x[WS(rs, 2)]), ms, &(x[0]));
 
201
                                        Ts = VADD(Tm, Tr);
 
202
                                        Tu = VSUB(Tm, Tr);
 
203
                                        Ty = VFNMS(LDK(KP618033988), Tx, Tw);
 
204
                                        T2E = VFMA(LDK(KP618033988), Tw, Tx);
 
205
                                        T3j = VFNMS(LDK(KP059835404), T2K, T2L);
 
206
                                        T3b = VFMA(LDK(KP066152395), T2L, T2K);
 
207
                                        T2R = VFNMS(LDK(KP786782374), T2K, T2L);
 
208
                                        T2M = VFMA(LDK(KP869845200), T2L, T2K);
 
209
                                        T2f = VFMA(LDK(KP132830569), T1u, T1x);
 
210
                                        T27 = VFNMS(LDK(KP120146378), T1x, T1u);
 
211
                                        T1y = VFNMS(LDK(KP893101515), T1x, T1u);
 
212
                                        T1H = VFMA(LDK(KP987388751), T1u, T1x);
 
213
                                        TE = VSUB(TB, TD);
 
214
                                        TN = VADD(TD, TB);
 
215
                                        TG = BYTWJ(&(W[TWVL * 36]), TF);
 
216
                                        Tt = VFNMS(LDK(KP250000000), Ts, Th);
 
217
                                        T3M = VADD(Th, Ts);
 
218
                                        TW = BYTWJ(&(W[TWVL * 2]), TV);
 
219
                                        {
 
220
                                             V TJ, TO, Tv, T2D, TY, T15, T10, T13, TP;
 
221
                                             {
 
222
                                                  V TX, T14, TZ, T12;
 
223
                                                  TX = LD(&(x[WS(rs, 7)]), ms, &(x[WS(rs, 1)]));
 
224
                                                  T14 = LD(&(x[WS(rs, 17)]), ms, &(x[WS(rs, 1)]));
 
225
                                                  TZ = LD(&(x[WS(rs, 22)]), ms, &(x[0]));
 
226
                                                  T12 = LD(&(x[WS(rs, 12)]), ms, &(x[0]));
 
227
                                                  TJ = VSUB(TG, TI);
 
228
                                                  TO = VADD(TI, TG);
 
229
                                                  Tv = VFMA(LDK(KP559016994), Tu, Tt);
 
230
                                                  T2D = VFNMS(LDK(KP559016994), Tu, Tt);
 
231
                                                  TY = BYTWJ(&(W[TWVL * 12]), TX);
 
232
                                                  T15 = BYTWJ(&(W[TWVL * 32]), T14);
 
233
                                                  T10 = BYTWJ(&(W[TWVL * 42]), TZ);
 
234
                                                  T13 = BYTWJ(&(W[TWVL * 22]), T12);
 
235
                                             }
 
236
                                             TP = VADD(TN, TO);
 
237
                                             TR = VSUB(TN, TO);
 
238
                                             TK = VFMA(LDK(KP618033988), TJ, TE);
 
239
                                             T2B = VFNMS(LDK(KP618033988), TE, TJ);
 
240
                                             T3n = VFMA(LDK(KP578046249), T2D, T2E);
 
241
                                             T3e = VFNMS(LDK(KP522847744), T2E, T2D);
 
242
                                             T2U = VFNMS(LDK(KP987388751), T2D, T2E);
 
243
                                             T2F = VFMA(LDK(KP893101515), T2E, T2D);
 
244
                                             T2i = VFNMS(LDK(KP603558818), Ty, Tv);
 
245
                                             T2a = VFMA(LDK(KP667278218), Tv, Ty);
 
246
                                             Tz = VFNMS(LDK(KP244189809), Ty, Tv);
 
247
                                             T1C = VFMA(LDK(KP269969613), Tv, Ty);
 
248
                                             T3N = VADD(TM, TP);
 
249
                                             TQ = VFMS(LDK(KP250000000), TP, TM);
 
250
                                             T11 = VADD(TY, T10);
 
251
                                             T1b = VSUB(TY, T10);
 
252
                                             T1c = VSUB(T15, T13);
 
253
                                             T16 = VADD(T13, T15);
 
254
                                        }
 
255
                                   }
 
256
                              }
 
257
                         }
 
258
                    }
 
259
                    {
 
260
                         V T2z, Tf, T3W, T3O, T1d, T2H, T3m, T2j, T2b, TT, T1D, T2G, T35, T2V, T2Z;
 
261
                         V T3A, T3g, T2I, T1a, T3R, T3X;
 
262
                         T2z = VFNMS(LDK(KP559016994), Te, Td);
 
263
                         Tf = VFMA(LDK(KP559016994), Te, Td);
 
264
                         {
 
265
                              V TS, T2A, T17, T19;
 
266
                              TS = VFNMS(LDK(KP559016994), TR, TQ);
 
267
                              T2A = VFMA(LDK(KP559016994), TR, TQ);
 
268
                              T3W = VSUB(T3M, T3N);
 
269
                              T3O = VADD(T3M, T3N);
 
270
                              T1d = VFNMS(LDK(KP618033988), T1c, T1b);
 
271
                              T2H = VFMA(LDK(KP618033988), T1b, T1c);
 
272
                              T17 = VADD(T11, T16);
 
273
                              T19 = VSUB(T16, T11);
 
274
                              {
 
275
                                   V T3f, T2T, T2C, T18, T3P;
 
276
                                   T3m = VFMA(LDK(KP447533225), T2B, T2A);
 
277
                                   T3f = VFNMS(LDK(KP494780565), T2A, T2B);
 
278
                                   T2T = VFNMS(LDK(KP132830569), T2A, T2B);
 
279
                                   T2C = VFMA(LDK(KP120146378), T2B, T2A);
 
280
                                   T2j = VFNMS(LDK(KP786782374), TK, TS);
 
281
                                   T2b = VFMA(LDK(KP869845200), TS, TK);
 
282
                                   TT = VFNMS(LDK(KP667278218), TS, TK);
 
283
                                   T1D = VFMA(LDK(KP603558818), TK, TS);
 
284
                                   T18 = VFNMS(LDK(KP250000000), T17, TW);
 
285
                                   T3P = VADD(TW, T17);
 
286
                                   T2G = VFMA(LDK(KP734762448), T2F, T2C);
 
287
                                   T35 = VFNMS(LDK(KP734762448), T2F, T2C);
 
288
                                   T2V = VFNMS(LDK(KP734762448), T2U, T2T);
 
289
                                   T2Z = VFMA(LDK(KP734762448), T2U, T2T);
 
290
                                   T3A = VFMA(LDK(KP982009705), T3f, T3e);
 
291
                                   T3g = VFNMS(LDK(KP982009705), T3f, T3e);
 
292
                                   T2I = VFMA(LDK(KP559016994), T19, T18);
 
293
                                   T1a = VFNMS(LDK(KP559016994), T19, T18);
 
294
                                   T3R = VADD(T3P, T3Q);
 
295
                                   T3X = VSUB(T3P, T3Q);
 
296
                              }
 
297
                         }
 
298
                         {
 
299
                              V T2n, T2t, T1V, T22, T2l, T2d, T1Q, T1I, T2w, T1A, T1F, T2q;
 
300
                              {
 
301
                                   V T2k, T1G, T28, T2g, T3K, T3E, T3a, T34, T3x, T3H, T2c, TU, T1T, T1U, T1z;
 
302
                                   V T3o, T3t;
 
303
                                   T2n = VFNMS(LDK(KP912575812), T2j, T2i);
 
304
                                   T2k = VFMA(LDK(KP912575812), T2j, T2i);
 
305
                                   T3o = VFNMS(LDK(KP921078979), T3n, T3m);
 
306
                                   T3t = VFMA(LDK(KP921078979), T3n, T3m);
 
307
                                   {
 
308
                                        V T3c, T2Q, T2J, T3k, T1e;
 
309
                                        T3c = VFNMS(LDK(KP667278218), T2I, T2H);
 
310
                                        T2Q = VFNMS(LDK(KP059835404), T2H, T2I);
 
311
                                        T2J = VFMA(LDK(KP066152395), T2I, T2H);
 
312
                                        T3k = VFMA(LDK(KP603558818), T2H, T2I);
 
313
                                        T1G = VFMA(LDK(KP578046249), T1a, T1d);
 
314
                                        T1e = VFNMS(LDK(KP522847744), T1d, T1a);
 
315
                                        T28 = VFNMS(LDK(KP494780565), T1a, T1d);
 
316
                                        T2g = VFMA(LDK(KP447533225), T1d, T1a);
 
317
                                        {
 
318
                                             V T3U, T3S, T40, T3Y;
 
319
                                             T3U = VSUB(T3O, T3R);
 
320
                                             T3S = VADD(T3O, T3R);
 
321
                                             T40 = VMUL(LDK(KP951056516), VFNMS(LDK(KP618033988), T3W, T3X));
 
322
                                             T3Y = VMUL(LDK(KP951056516), VFMA(LDK(KP618033988), T3X, T3W));
 
323
                                             {
 
324
                                                  V T3s, T3l, T2N, T36;
 
325
                                                  T3s = VFNMS(LDK(KP845997307), T3k, T3j);
 
326
                                                  T3l = VFMA(LDK(KP845997307), T3k, T3j);
 
327
                                                  T2N = VFNMS(LDK(KP772036680), T2M, T2J);
 
328
                                                  T36 = VFMA(LDK(KP772036680), T2M, T2J);
 
329
                                                  {
 
330
                                                       V T30, T2S, T3d, T3z, T3T;
 
331
                                                       T30 = VFNMS(LDK(KP772036680), T2R, T2Q);
 
332
                                                       T2S = VFMA(LDK(KP772036680), T2R, T2Q);
 
333
                                                       T3d = VFNMS(LDK(KP845997307), T3c, T3b);
 
334
                                                       T3z = VFMA(LDK(KP845997307), T3c, T3b);
 
335
                                                       ST(&(x[0]), VADD(T3S, T3L), ms, &(x[0]));
 
336
                                                       T3T = VFNMS(LDK(KP250000000), T3S, T3L);
 
337
                                                       {
 
338
                                                            V T3C, T3p, T2O, T37;
 
339
                                                            T3C = VFMA(LDK(KP906616052), T3o, T3l);
 
340
                                                            T3p = VFNMS(LDK(KP906616052), T3o, T3l);
 
341
                                                            T2O = VFMA(LDK(KP956723877), T2N, T2G);
 
342
                                                            T37 = VFMA(LDK(KP522616830), T2V, T36);
 
343
                                                            {
 
344
                                                                 V T31, T2W, T3u, T3h;
 
345
                                                                 T31 = VFNMS(LDK(KP522616830), T2G, T30);
 
346
                                                                 T2W = VFMA(LDK(KP945422727), T2V, T2S);
 
347
                                                                 T3u = VFNMS(LDK(KP923225144), T3g, T3d);
 
348
                                                                 T3h = VFMA(LDK(KP923225144), T3g, T3d);
 
349
                                                                 {
 
350
                                                                      V T3I, T3B, T3V, T3Z;
 
351
                                                                      T3I = VFNMS(LDK(KP669429328), T3z, T3A);
 
352
                                                                      T3B = VFMA(LDK(KP570584518), T3A, T3z);
 
353
                                                                      T3V = VFMA(LDK(KP559016994), T3U, T3T);
 
354
                                                                      T3Z = VFNMS(LDK(KP559016994), T3U, T3T);
 
355
                                                                      {
 
356
                                                                           V T3y, T3q, T2P, T38;
 
357
                                                                           T3y = VFMA(LDK(KP262346850), T3p, T2X);
 
358
                                                                           T3q = VMUL(LDK(KP998026728), VFNMS(LDK(KP952936919), T2X, T3p));
 
359
                                                                           T2P = VFMA(LDK(KP992114701), T2O, T2z);
 
360
                                                                           T38 = VFNMS(LDK(KP690983005), T37, T2S);
 
361
                                                                           {
 
362
                                                                                V T32, T2Y, T3v, T3F;
 
363
                                                                                T32 = VFMA(LDK(KP763932022), T31, T2N);
 
364
                                                                                T2Y = VMUL(LDK(KP998026728), VFMA(LDK(KP952936919), T2X, T2W));
 
365
                                                                                T3v = VFNMS(LDK(KP997675361), T3u, T3t);
 
366
                                                                                T3F = VFNMS(LDK(KP904508497), T3u, T3s);
 
367
                                                                                {
 
368
                                                                                     V T3i, T3r, T3J, T3D;
 
369
                                                                                     T3i = VFMA(LDK(KP949179823), T3h, T2z);
 
370
                                                                                     T3r = VFNMS(LDK(KP237294955), T3h, T2z);
 
371
                                                                                     T3J = VFNMS(LDK(KP669429328), T3C, T3I);
 
372
                                                                                     T3D = VFMA(LDK(KP618033988), T3C, T3B);
 
373
                                                                                     ST(&(x[WS(rs, 20)]), VFMAI(T3Y, T3V), ms, &(x[0]));
 
374
                                                                                     ST(&(x[WS(rs, 5)]), VFNMSI(T3Y, T3V), ms, &(x[WS(rs, 1)]));
 
375
                                                                                     ST(&(x[WS(rs, 15)]), VFNMSI(T40, T3Z), ms, &(x[WS(rs, 1)]));
 
376
                                                                                     ST(&(x[WS(rs, 10)]), VFMAI(T40, T3Z), ms, &(x[0]));
 
377
                                                                                     {
 
378
                                                                                          V T39, T33, T3w, T3G;
 
379
                                                                                          T39 = VFMA(LDK(KP855719849), T38, T35);
 
380
                                                                                          T33 = VFNMS(LDK(KP855719849), T32, T2Z);
 
381
                                                                                          ST(&(x[WS(rs, 22)]), VFMAI(T2Y, T2P), ms, &(x[0]));
 
382
                                                                                          ST(&(x[WS(rs, 3)]), VFNMSI(T2Y, T2P), ms, &(x[WS(rs, 1)]));
 
383
                                                                                          T3w = VFMA(LDK(KP560319534), T3v, T3s);
 
384
                                                                                          T3G = VFNMS(LDK(KP681693190), T3F, T3t);
 
385
                                                                                          ST(&(x[WS(rs, 23)]), VFMAI(T3q, T3i), ms, &(x[WS(rs, 1)]));
 
386
                                                                                          ST(&(x[WS(rs, 2)]), VFNMSI(T3q, T3i), ms, &(x[0]));
 
387
                                                                                          T3K = VMUL(LDK(KP951056516), VFNMS(LDK(KP876306680), T3J, T3y));
 
388
                                                                                          T3E = VMUL(LDK(KP951056516), VFNMS(LDK(KP949179823), T3D, T3y));
 
389
                                                                                          T3a = VMUL(LDK(KP951056516), VFNMS(LDK(KP992114701), T39, T2X));
 
390
                                                                                          T34 = VFMA(LDK(KP897376177), T33, T2z);
 
391
                                                                                          T3x = VFNMS(LDK(KP949179823), T3w, T3r);
 
392
                                                                                          T3H = VFNMS(LDK(KP860541664), T3G, T3r);
 
393
                                                                                          T2t = VFNMS(LDK(KP912575812), T2b, T2a);
 
394
                                                                                          T2c = VFMA(LDK(KP912575812), T2b, T2a);
 
395
                                                                                          TU = VFMA(LDK(KP829049696), TT, Tz);
 
396
                                                                                          T1T = VFNMS(LDK(KP829049696), TT, Tz);
 
397
                                                                                          T1U = VFNMS(LDK(KP831864738), T1y, T1e);
 
398
                                                                                          T1z = VFMA(LDK(KP831864738), T1y, T1e);
 
399
                                                                                     }
 
400
                                                                                }
 
401
                                                                           }
 
402
                                                                      }
 
403
                                                                 }
 
404
                                                            }
 
405
                                                       }
 
406
                                                  }
 
407
                                             }
 
408
                                        }
 
409
                                   }
 
410
                                   {
 
411
                                        V T2o, T2h, T29, T2u, T2v, T2p;
 
412
                                        T2o = VFNMS(LDK(KP958953096), T2g, T2f);
 
413
                                        T2h = VFMA(LDK(KP958953096), T2g, T2f);
 
414
                                        ST(&(x[WS(rs, 17)]), VFMAI(T3a, T34), ms, &(x[WS(rs, 1)]));
 
415
                                        ST(&(x[WS(rs, 8)]), VFNMSI(T3a, T34), ms, &(x[0]));
 
416
                                        ST(&(x[WS(rs, 12)]), VFMAI(T3E, T3x), ms, &(x[0]));
 
417
                                        ST(&(x[WS(rs, 13)]), VFNMSI(T3E, T3x), ms, &(x[WS(rs, 1)]));
 
418
                                        ST(&(x[WS(rs, 18)]), VFNMSI(T3K, T3H), ms, &(x[0]));
 
419
                                        ST(&(x[WS(rs, 7)]), VFMAI(T3K, T3H), ms, &(x[WS(rs, 1)]));
 
420
                                        T1V = VFMA(LDK(KP559154169), T1U, T1T);
 
421
                                        T22 = VFNMS(LDK(KP683113946), T1T, T1U);
 
422
                                        T29 = VFNMS(LDK(KP867381224), T28, T27);
 
423
                                        T2u = VFMA(LDK(KP867381224), T28, T27);
 
424
                                        T2l = VFMA(LDK(KP894834959), T2k, T2h);
 
425
                                        T2v = VFMA(LDK(KP447417479), T2k, T2u);
 
426
                                        T2d = VFNMS(LDK(KP809385824), T2c, T29);
 
427
                                        T2p = VFMA(LDK(KP447417479), T2c, T2o);
 
428
                                        T1Q = VFMA(LDK(KP831864738), T1H, T1G);
 
429
                                        T1I = VFNMS(LDK(KP831864738), T1H, T1G);
 
430
                                        T2w = VFNMS(LDK(KP763932022), T2v, T2h);
 
431
                                        T1A = VFMA(LDK(KP904730450), T1z, TU);
 
432
                                        T1F = VFNMS(LDK(KP904730450), T1z, TU);
 
433
                                        T2q = VFMA(LDK(KP690983005), T2p, T29);
 
434
                                   }
 
435
                              }
 
436
                              {
 
437
                                   V T2e, T1E, T1P, T2m;
 
438
                                   T2e = VFNMS(LDK(KP992114701), T2d, Tf);
 
439
                                   T1E = VFMA(LDK(KP916574801), T1D, T1C);
 
440
                                   T1P = VFNMS(LDK(KP916574801), T1D, T1C);
 
441
                                   T2m = VMUL(LDK(KP951056516), VFNMS(LDK(KP992114701), T2l, T1O));
 
442
                                   {
 
443
                                        V T1J, T2r, T1R, T1W, T1Z, T2x;
 
444
                                        T2x = VFNMS(LDK(KP999544308), T2w, T2t);
 
445
                                        T1J = VFNMS(LDK(KP904730450), T1I, T1F);
 
446
                                        T25 = VFMA(LDK(KP968583161), T1A, Tf);
 
447
                                        T1B = VFNMS(LDK(KP242145790), T1A, Tf);
 
448
                                        T2r = VFNMS(LDK(KP999544308), T2q, T2n);
 
449
                                        T1R = VFMA(LDK(KP904730450), T1Q, T1P);
 
450
                                        T1W = VFNMS(LDK(KP904730450), T1Q, T1P);
 
451
                                        T1Z = VADD(T1E, T1F);
 
452
                                        ST(&(x[WS(rs, 21)]), VFNMSI(T2m, T2e), ms, &(x[WS(rs, 1)]));
 
453
                                        ST(&(x[WS(rs, 4)]), VFMAI(T2m, T2e), ms, &(x[0]));
 
454
                                        T2y = VMUL(LDK(KP951056516), VFNMS(LDK(KP803003575), T2x, T1O));
 
455
                                        T1K = VFNMS(LDK(KP618033988), T1J, T1E);
 
456
                                        T2s = VFNMS(LDK(KP803003575), T2r, Tf);
 
457
                                        T23 = VFMA(LDK(KP617882369), T1W, T22);
 
458
                                        T1S = VFNMS(LDK(KP242145790), T1R, T1O);
 
459
                                        T26 = VMUL(LDK(KP951056516), VFMA(LDK(KP968583161), T1R, T1O));
 
460
                                        T20 = VFNMS(LDK(KP683113946), T1Z, T1I);
 
461
                                        T1X = VFMA(LDK(KP559016994), T1W, T1V);
 
462
                                   }
 
463
                              }
 
464
                         }
 
465
                    }
 
466
               }
 
467
               {
 
468
                    V T1L, T24, T21, T1Y;
 
469
                    T1L = VFNMS(LDK(KP876091699), T1K, T1B);
 
470
                    ST(&(x[WS(rs, 9)]), VFMAI(T2y, T2s), ms, &(x[WS(rs, 1)]));
 
471
                    ST(&(x[WS(rs, 16)]), VFNMSI(T2y, T2s), ms, &(x[0]));
 
472
                    T24 = VMUL(LDK(KP951056516), VFNMS(LDK(KP876306680), T23, T1S));
 
473
                    ST(&(x[WS(rs, 24)]), VFMAI(T26, T25), ms, &(x[0]));
 
474
                    ST(&(x[WS(rs, 1)]), VFNMSI(T26, T25), ms, &(x[WS(rs, 1)]));
 
475
                    T21 = VFMA(LDK(KP792626838), T20, T1B);
 
476
                    T1Y = VMUL(LDK(KP951056516), VFMA(LDK(KP968583161), T1X, T1S));
 
477
                    ST(&(x[WS(rs, 11)]), VFNMSI(T24, T21), ms, &(x[WS(rs, 1)]));
 
478
                    ST(&(x[WS(rs, 14)]), VFMAI(T24, T21), ms, &(x[0]));
 
479
                    ST(&(x[WS(rs, 19)]), VFMAI(T1Y, T1L), ms, &(x[WS(rs, 1)]));
 
480
                    ST(&(x[WS(rs, 6)]), VFNMSI(T1Y, T1L), ms, &(x[0]));
 
481
               }
 
482
          }
 
483
     }
 
484
     VLEAVE();
 
485
}
 
486
 
 
487
static const tw_instr twinstr[] = {
 
488
     VTW(0, 1),
 
489
     VTW(0, 2),
 
490
     VTW(0, 3),
 
491
     VTW(0, 4),
 
492
     VTW(0, 5),
 
493
     VTW(0, 6),
 
494
     VTW(0, 7),
 
495
     VTW(0, 8),
 
496
     VTW(0, 9),
 
497
     VTW(0, 10),
 
498
     VTW(0, 11),
 
499
     VTW(0, 12),
 
500
     VTW(0, 13),
 
501
     VTW(0, 14),
 
502
     VTW(0, 15),
 
503
     VTW(0, 16),
 
504
     VTW(0, 17),
 
505
     VTW(0, 18),
 
506
     VTW(0, 19),
 
507
     VTW(0, 20),
 
508
     VTW(0, 21),
 
509
     VTW(0, 22),
 
510
     VTW(0, 23),
 
511
     VTW(0, 24),
 
512
     {TW_NEXT, VL, 0}
 
513
};
 
514
 
 
515
static const ct_desc desc = { 25, XSIMD_STRING("t2fv_25"), twinstr, &GENUS, {67, 60, 181, 0}, 0, 0, 0 };
 
516
 
 
517
void XSIMD(codelet_t2fv_25) (planner *p) {
 
518
     X(kdft_dit_register) (p, t2fv_25, &desc);
 
519
}
 
520
#else                           /* HAVE_FMA */
 
521
 
 
522
/* Generated by: ../../../genfft/gen_twiddle_c.native -simd -compact -variables 4 -pipeline-latency 8 -n 25 -name t2fv_25 -include t2f.h */
 
523
 
 
524
/*
 
525
 * This function contains 248 FP additions, 188 FP multiplications,
 
526
 * (or, 170 additions, 110 multiplications, 78 fused multiply/add),
 
527
 * 99 stack variables, 40 constants, and 50 memory accesses
 
528
 */
 
529
#include "t2f.h"
 
530
 
 
531
static void t2fv_25(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms)
 
532
{
 
533
     DVK(KP998026728, +0.998026728428271561952336806863450553336905220);
 
534
     DVK(KP125581039, +0.125581039058626752152356449131262266244969664);
 
535
     DVK(KP1_996053456, +1.996053456856543123904673613726901106673810439);
 
536
     DVK(KP062790519, +0.062790519529313376076178224565631133122484832);
 
537
     DVK(KP809016994, +0.809016994374947424102293417182819058860154590);
 
538
     DVK(KP309016994, +0.309016994374947424102293417182819058860154590);
 
539
     DVK(KP1_369094211, +1.369094211857377347464566715242418539779038465);
 
540
     DVK(KP728968627, +0.728968627421411523146730319055259111372571664);
 
541
     DVK(KP963507348, +0.963507348203430549974383005744259307057084020);
 
542
     DVK(KP876306680, +0.876306680043863587308115903922062583399064238);
 
543
     DVK(KP497379774, +0.497379774329709576484567492012895936835134813);
 
544
     DVK(KP968583161, +0.968583161128631119490168375464735813836012403);
 
545
     DVK(KP684547105, +0.684547105928688673732283357621209269889519233);
 
546
     DVK(KP1_457937254, +1.457937254842823046293460638110518222745143328);
 
547
     DVK(KP481753674, +0.481753674101715274987191502872129653528542010);
 
548
     DVK(KP1_752613360, +1.752613360087727174616231807844125166798128477);
 
549
     DVK(KP248689887, +0.248689887164854788242283746006447968417567406);
 
550
     DVK(KP1_937166322, +1.937166322257262238980336750929471627672024806);
 
551
     DVK(KP992114701, +0.992114701314477831049793042785778521453036709);
 
552
     DVK(KP250666467, +0.250666467128608490746237519633017587885836494);
 
553
     DVK(KP425779291, +0.425779291565072648862502445744251703979973042);
 
554
     DVK(KP1_809654104, +1.809654104932039055427337295865395187940827822);
 
555
     DVK(KP1_274847979, +1.274847979497379420353425623352032390869834596);
 
556
     DVK(KP770513242, +0.770513242775789230803009636396177847271667672);
 
557
     DVK(KP844327925, +0.844327925502015078548558063966681505381659241);
 
558
     DVK(KP1_071653589, +1.071653589957993236542617535735279956127150691);
 
559
     DVK(KP125333233, +0.125333233564304245373118759816508793942918247);
 
560
     DVK(KP1_984229402, +1.984229402628955662099586085571557042906073418);
 
561
     DVK(KP904827052, +0.904827052466019527713668647932697593970413911);
 
562
     DVK(KP851558583, +0.851558583130145297725004891488503407959946084);
 
563
     DVK(KP637423989, +0.637423989748689710176712811676016195434917298);
 
564
     DVK(KP1_541026485, +1.541026485551578461606019272792355694543335344);
 
565
     DVK(KP535826794, +0.535826794978996618271308767867639978063575346);
 
566
     DVK(KP1_688655851, +1.688655851004030157097116127933363010763318483);
 
567
     DVK(KP293892626, +0.293892626146236564584352977319536384298826219);
 
568
     DVK(KP475528258, +0.475528258147576786058219666689691071702849317);
 
569
     DVK(KP587785252, +0.587785252292473129168705954639072768597652438);
 
570
     DVK(KP951056516, +0.951056516295153572116439333379382143405698634);
 
571
     DVK(KP250000000, +0.250000000000000000000000000000000000000000000);
 
572
     DVK(KP559016994, +0.559016994374947424102293417182819058860154590);
 
573
     {
 
574
          INT m;
 
575
          R *x;
 
576
          x = ri;
 
577
          for (m = mb, W = W + (mb * ((TWVL / VL) * 48)); m < me; m = m + VL, x = x + (VL * ms), W = W + (TWVL * 48), MAKE_VOLATILE_STRIDE(rs)) {
 
578
               V Tc, Tb, Td, Te, T1C, T2t, T1E, T1x, T2m, T1u, T3c, T2n, Ty, T2i, Tv;
 
579
               V T38, T2j, TS, T2f, TP, T39, T2g, T1d, T2p, T1a, T3b, T2q;
 
580
               {
 
581
                    V T7, T9, Ta, T2, T4, T5, T1D;
 
582
                    Tc = LD(&(x[0]), ms, &(x[0]));
 
583
                    {
 
584
                         V T6, T8, T1, T3;
 
585
                         T6 = LD(&(x[WS(rs, 10)]), ms, &(x[0]));
 
586
                         T7 = BYTWJ(&(W[TWVL * 18]), T6);
 
587
                         T8 = LD(&(x[WS(rs, 15)]), ms, &(x[WS(rs, 1)]));
 
588
                         T9 = BYTWJ(&(W[TWVL * 28]), T8);
 
589
                         Ta = VADD(T7, T9);
 
590
                         T1 = LD(&(x[WS(rs, 5)]), ms, &(x[WS(rs, 1)]));
 
591
                         T2 = BYTWJ(&(W[TWVL * 8]), T1);
 
592
                         T3 = LD(&(x[WS(rs, 20)]), ms, &(x[0]));
 
593
                         T4 = BYTWJ(&(W[TWVL * 38]), T3);
 
594
                         T5 = VADD(T2, T4);
 
595
                    }
 
596
                    Tb = VMUL(LDK(KP559016994), VSUB(T5, Ta));
 
597
                    Td = VADD(T5, Ta);
 
598
                    Te = VFNMS(LDK(KP250000000), Td, Tc);
 
599
                    T1C = VSUB(T2, T4);
 
600
                    T1D = VSUB(T7, T9);
 
601
                    T2t = VMUL(LDK(KP951056516), T1D);
 
602
                    T1E = VFMA(LDK(KP951056516), T1C, VMUL(LDK(KP587785252), T1D));
 
603
               }
 
604
               {
 
605
                    V T1r, T1l, T1n, T1o, T1g, T1i, T1j, T1q;
 
606
                    T1q = LD(&(x[WS(rs, 3)]), ms, &(x[WS(rs, 1)]));
 
607
                    T1r = BYTWJ(&(W[TWVL * 4]), T1q);
 
608
                    {
 
609
                         V T1k, T1m, T1f, T1h;
 
610
                         T1k = LD(&(x[WS(rs, 13)]), ms, &(x[WS(rs, 1)]));
 
611
                         T1l = BYTWJ(&(W[TWVL * 24]), T1k);
 
612
                         T1m = LD(&(x[WS(rs, 18)]), ms, &(x[0]));
 
613
                         T1n = BYTWJ(&(W[TWVL * 34]), T1m);
 
614
                         T1o = VADD(T1l, T1n);
 
615
                         T1f = LD(&(x[WS(rs, 8)]), ms, &(x[0]));
 
616
                         T1g = BYTWJ(&(W[TWVL * 14]), T1f);
 
617
                         T1h = LD(&(x[WS(rs, 23)]), ms, &(x[WS(rs, 1)]));
 
618
                         T1i = BYTWJ(&(W[TWVL * 44]), T1h);
 
619
                         T1j = VADD(T1g, T1i);
 
620
                    }
 
621
                    {
 
622
                         V T1v, T1w, T1p, T1s, T1t;
 
623
                         T1v = VSUB(T1g, T1i);
 
624
                         T1w = VSUB(T1l, T1n);
 
625
                         T1x = VFMA(LDK(KP475528258), T1v, VMUL(LDK(KP293892626), T1w));
 
626
                         T2m = VFNMS(LDK(KP293892626), T1v, VMUL(LDK(KP475528258), T1w));
 
627
                         T1p = VMUL(LDK(KP559016994), VSUB(T1j, T1o));
 
628
                         T1s = VADD(T1j, T1o);
 
629
                         T1t = VFNMS(LDK(KP250000000), T1s, T1r);
 
630
                         T1u = VADD(T1p, T1t);
 
631
                         T3c = VADD(T1r, T1s);
 
632
                         T2n = VSUB(T1t, T1p);
 
633
                    }
 
634
               }
 
635
               {
 
636
                    V Ts, Tm, To, Tp, Th, Tj, Tk, Tr;
 
637
                    Tr = LD(&(x[WS(rs, 1)]), ms, &(x[WS(rs, 1)]));
 
638
                    Ts = BYTWJ(&(W[0]), Tr);
 
639
                    {
 
640
                         V Tl, Tn, Tg, Ti;
 
641
                         Tl = LD(&(x[WS(rs, 11)]), ms, &(x[WS(rs, 1)]));
 
642
                         Tm = BYTWJ(&(W[TWVL * 20]), Tl);
 
643
                         Tn = LD(&(x[WS(rs, 16)]), ms, &(x[0]));
 
644
                         To = BYTWJ(&(W[TWVL * 30]), Tn);
 
645
                         Tp = VADD(Tm, To);
 
646
                         Tg = LD(&(x[WS(rs, 6)]), ms, &(x[0]));
 
647
                         Th = BYTWJ(&(W[TWVL * 10]), Tg);
 
648
                         Ti = LD(&(x[WS(rs, 21)]), ms, &(x[WS(rs, 1)]));
 
649
                         Tj = BYTWJ(&(W[TWVL * 40]), Ti);
 
650
                         Tk = VADD(Th, Tj);
 
651
                    }
 
652
                    {
 
653
                         V Tw, Tx, Tq, Tt, Tu;
 
654
                         Tw = VSUB(Th, Tj);
 
655
                         Tx = VSUB(Tm, To);
 
656
                         Ty = VFMA(LDK(KP475528258), Tw, VMUL(LDK(KP293892626), Tx));
 
657
                         T2i = VFNMS(LDK(KP293892626), Tw, VMUL(LDK(KP475528258), Tx));
 
658
                         Tq = VMUL(LDK(KP559016994), VSUB(Tk, Tp));
 
659
                         Tt = VADD(Tk, Tp);
 
660
                         Tu = VFNMS(LDK(KP250000000), Tt, Ts);
 
661
                         Tv = VADD(Tq, Tu);
 
662
                         T38 = VADD(Ts, Tt);
 
663
                         T2j = VSUB(Tu, Tq);
 
664
                    }
 
665
               }
 
666
               {
 
667
                    V TM, TG, TI, TJ, TB, TD, TE, TL;
 
668
                    TL = LD(&(x[WS(rs, 4)]), ms, &(x[0]));
 
669
                    TM = BYTWJ(&(W[TWVL * 6]), TL);
 
670
                    {
 
671
                         V TF, TH, TA, TC;
 
672
                         TF = LD(&(x[WS(rs, 14)]), ms, &(x[0]));
 
673
                         TG = BYTWJ(&(W[TWVL * 26]), TF);
 
674
                         TH = LD(&(x[WS(rs, 19)]), ms, &(x[WS(rs, 1)]));
 
675
                         TI = BYTWJ(&(W[TWVL * 36]), TH);
 
676
                         TJ = VADD(TG, TI);
 
677
                         TA = LD(&(x[WS(rs, 9)]), ms, &(x[WS(rs, 1)]));
 
678
                         TB = BYTWJ(&(W[TWVL * 16]), TA);
 
679
                         TC = LD(&(x[WS(rs, 24)]), ms, &(x[0]));
 
680
                         TD = BYTWJ(&(W[TWVL * 46]), TC);
 
681
                         TE = VADD(TB, TD);
 
682
                    }
 
683
                    {
 
684
                         V TQ, TR, TK, TN, TO;
 
685
                         TQ = VSUB(TB, TD);
 
686
                         TR = VSUB(TG, TI);
 
687
                         TS = VFMA(LDK(KP475528258), TQ, VMUL(LDK(KP293892626), TR));
 
688
                         T2f = VFNMS(LDK(KP293892626), TQ, VMUL(LDK(KP475528258), TR));
 
689
                         TK = VMUL(LDK(KP559016994), VSUB(TE, TJ));
 
690
                         TN = VADD(TE, TJ);
 
691
                         TO = VFNMS(LDK(KP250000000), TN, TM);
 
692
                         TP = VADD(TK, TO);
 
693
                         T39 = VADD(TM, TN);
 
694
                         T2g = VSUB(TO, TK);
 
695
                    }
 
696
               }
 
697
               {
 
698
                    V T17, T11, T13, T14, TW, TY, TZ, T16;
 
699
                    T16 = LD(&(x[WS(rs, 2)]), ms, &(x[0]));
 
700
                    T17 = BYTWJ(&(W[TWVL * 2]), T16);
 
701
                    {
 
702
                         V T10, T12, TV, TX;
 
703
                         T10 = LD(&(x[WS(rs, 12)]), ms, &(x[0]));
 
704
                         T11 = BYTWJ(&(W[TWVL * 22]), T10);
 
705
                         T12 = LD(&(x[WS(rs, 17)]), ms, &(x[WS(rs, 1)]));
 
706
                         T13 = BYTWJ(&(W[TWVL * 32]), T12);
 
707
                         T14 = VADD(T11, T13);
 
708
                         TV = LD(&(x[WS(rs, 7)]), ms, &(x[WS(rs, 1)]));
 
709
                         TW = BYTWJ(&(W[TWVL * 12]), TV);
 
710
                         TX = LD(&(x[WS(rs, 22)]), ms, &(x[0]));
 
711
                         TY = BYTWJ(&(W[TWVL * 42]), TX);
 
712
                         TZ = VADD(TW, TY);
 
713
                    }
 
714
                    {
 
715
                         V T1b, T1c, T15, T18, T19;
 
716
                         T1b = VSUB(TW, TY);
 
717
                         T1c = VSUB(T11, T13);
 
718
                         T1d = VFMA(LDK(KP475528258), T1b, VMUL(LDK(KP293892626), T1c));
 
719
                         T2p = VFNMS(LDK(KP293892626), T1b, VMUL(LDK(KP475528258), T1c));
 
720
                         T15 = VMUL(LDK(KP559016994), VSUB(TZ, T14));
 
721
                         T18 = VADD(TZ, T14);
 
722
                         T19 = VFNMS(LDK(KP250000000), T18, T17);
 
723
                         T1a = VADD(T15, T19);
 
724
                         T3b = VADD(T17, T18);
 
725
                         T2q = VSUB(T19, T15);
 
726
                    }
 
727
               }
 
728
               {
 
729
                    V T3l, T3m, T3f, T3g, T3e, T3h, T3n, T3i;
 
730
                    {
 
731
                         V T3j, T3k, T3a, T3d;
 
732
                         T3j = VSUB(T38, T39);
 
733
                         T3k = VSUB(T3b, T3c);
 
734
                         T3l = VBYI(VFMA(LDK(KP951056516), T3j, VMUL(LDK(KP587785252), T3k)));
 
735
                         T3m = VBYI(VFNMS(LDK(KP587785252), T3j, VMUL(LDK(KP951056516), T3k)));
 
736
                         T3f = VADD(Tc, Td);
 
737
                         T3a = VADD(T38, T39);
 
738
                         T3d = VADD(T3b, T3c);
 
739
                         T3g = VADD(T3a, T3d);
 
740
                         T3e = VMUL(LDK(KP559016994), VSUB(T3a, T3d));
 
741
                         T3h = VFNMS(LDK(KP250000000), T3g, T3f);
 
742
                    }
 
743
                    ST(&(x[0]), VADD(T3f, T3g), ms, &(x[0]));
 
744
                    T3n = VSUB(T3h, T3e);
 
745
                    ST(&(x[WS(rs, 10)]), VADD(T3m, T3n), ms, &(x[0]));
 
746
                    ST(&(x[WS(rs, 15)]), VSUB(T3n, T3m), ms, &(x[WS(rs, 1)]));
 
747
                    T3i = VADD(T3e, T3h);
 
748
                    ST(&(x[WS(rs, 5)]), VSUB(T3i, T3l), ms, &(x[WS(rs, 1)]));
 
749
                    ST(&(x[WS(rs, 20)]), VADD(T3l, T3i), ms, &(x[0]));
 
750
               }
 
751
               {
 
752
                    V Tf, T1Z, T20, T21, T29, T2a, T2b, T26, T27, T28, T22, T23, T24, T1L, T1U;
 
753
                    V T1Q, T1S, T1A, T1V, T1N, T1O, T2d, T2e;
 
754
                    Tf = VADD(Tb, Te);
 
755
                    T1Z = VFMA(LDK(KP1_688655851), Ty, VMUL(LDK(KP535826794), Tv));
 
756
                    T20 = VFMA(LDK(KP1_541026485), TS, VMUL(LDK(KP637423989), TP));
 
757
                    T21 = VSUB(T1Z, T20);
 
758
                    T29 = VFMA(LDK(KP851558583), T1d, VMUL(LDK(KP904827052), T1a));
 
759
                    T2a = VFMA(LDK(KP1_984229402), T1x, VMUL(LDK(KP125333233), T1u));
 
760
                    T2b = VADD(T29, T2a);
 
761
                    T26 = VFNMS(LDK(KP844327925), Tv, VMUL(LDK(KP1_071653589), Ty));
 
762
                    T27 = VFNMS(LDK(KP1_274847979), TS, VMUL(LDK(KP770513242), TP));
 
763
                    T28 = VADD(T26, T27);
 
764
                    T22 = VFNMS(LDK(KP425779291), T1a, VMUL(LDK(KP1_809654104), T1d));
 
765
                    T23 = VFNMS(LDK(KP992114701), T1u, VMUL(LDK(KP250666467), T1x));
 
766
                    T24 = VADD(T22, T23);
 
767
                    {
 
768
                         V T1F, T1G, T1H, T1I, T1J, T1K;
 
769
                         T1F = VFMA(LDK(KP1_937166322), Ty, VMUL(LDK(KP248689887), Tv));
 
770
                         T1G = VFMA(LDK(KP1_071653589), TS, VMUL(LDK(KP844327925), TP));
 
771
                         T1H = VADD(T1F, T1G);
 
772
                         T1I = VFMA(LDK(KP1_752613360), T1d, VMUL(LDK(KP481753674), T1a));
 
773
                         T1J = VFMA(LDK(KP1_457937254), T1x, VMUL(LDK(KP684547105), T1u));
 
774
                         T1K = VADD(T1I, T1J);
 
775
                         T1L = VADD(T1H, T1K);
 
776
                         T1U = VSUB(T1J, T1I);
 
777
                         T1Q = VMUL(LDK(KP559016994), VSUB(T1K, T1H));
 
778
                         T1S = VSUB(T1G, T1F);
 
779
                    }
 
780
                    {
 
781
                         V Tz, TT, TU, T1e, T1y, T1z;
 
782
                         Tz = VFNMS(LDK(KP497379774), Ty, VMUL(LDK(KP968583161), Tv));
 
783
                         TT = VFNMS(LDK(KP1_688655851), TS, VMUL(LDK(KP535826794), TP));
 
784
                         TU = VADD(Tz, TT);
 
785
                         T1e = VFNMS(LDK(KP963507348), T1d, VMUL(LDK(KP876306680), T1a));
 
786
                         T1y = VFNMS(LDK(KP1_369094211), T1x, VMUL(LDK(KP728968627), T1u));
 
787
                         T1z = VADD(T1e, T1y);
 
788
                         T1A = VADD(TU, T1z);
 
789
                         T1V = VMUL(LDK(KP559016994), VSUB(TU, T1z));
 
790
                         T1N = VSUB(TT, Tz);
 
791
                         T1O = VSUB(T1e, T1y);
 
792
                    }
 
793
                    {
 
794
                         V T1B, T1M, T25, T2c;
 
795
                         T1B = VADD(Tf, T1A);
 
796
                         T1M = VBYI(VADD(T1E, T1L));
 
797
                         ST(&(x[WS(rs, 1)]), VSUB(T1B, T1M), ms, &(x[WS(rs, 1)]));
 
798
                         ST(&(x[WS(rs, 24)]), VADD(T1B, T1M), ms, &(x[0]));
 
799
                         T25 = VADD(Tf, VADD(T21, T24));
 
800
                         T2c = VBYI(VADD(T1E, VSUB(T28, T2b)));
 
801
                         ST(&(x[WS(rs, 21)]), VSUB(T25, T2c), ms, &(x[WS(rs, 1)]));
 
802
                         ST(&(x[WS(rs, 4)]), VADD(T25, T2c), ms, &(x[0]));
 
803
                    }
 
804
                    T2d = VBYI(VADD(T1E, VFMA(LDK(KP309016994), T28, VFMA(LDK(KP587785252), VSUB(T23, T22), VFNMS(LDK(KP951056516), VADD(T1Z, T20), VMUL(LDK(KP809016994), T2b))))));
 
805
                    T2e = VFMA(LDK(KP309016994), T21, VFMA(LDK(KP951056516), VSUB(T26, T27), VFMA(LDK(KP587785252), VSUB(T2a, T29), VFNMS(LDK(KP809016994), T24, Tf))));
 
806
                    ST(&(x[WS(rs, 9)]), VADD(T2d, T2e), ms, &(x[WS(rs, 1)]));
 
807
                    ST(&(x[WS(rs, 16)]), VSUB(T2e, T2d), ms, &(x[0]));
 
808
                    {
 
809
                         V T1R, T1X, T1W, T1Y, T1P, T1T;
 
810
                         T1P = VFMS(LDK(KP250000000), T1L, T1E);
 
811
                         T1R = VBYI(VADD(VFMA(LDK(KP587785252), T1N, VMUL(LDK(KP951056516), T1O)), VSUB(T1P, T1Q)));
 
812
                         T1X = VBYI(VADD(VFNMS(LDK(KP587785252), T1O, VMUL(LDK(KP951056516), T1N)), VADD(T1P, T1Q)));
 
813
                         T1T = VFNMS(LDK(KP250000000), T1A, Tf);
 
814
                         T1W = VFMA(LDK(KP587785252), T1S, VFNMS(LDK(KP951056516), T1U, VSUB(T1T, T1V)));
 
815
                         T1Y = VFMA(LDK(KP951056516), T1S, VADD(T1V, VFMA(LDK(KP587785252), T1U, T1T)));
 
816
                         ST(&(x[WS(rs, 11)]), VADD(T1R, T1W), ms, &(x[WS(rs, 1)]));
 
817
                         ST(&(x[WS(rs, 19)]), VSUB(T1Y, T1X), ms, &(x[WS(rs, 1)]));
 
818
                         ST(&(x[WS(rs, 14)]), VSUB(T1W, T1R), ms, &(x[0]));
 
819
                         ST(&(x[WS(rs, 6)]), VADD(T1X, T1Y), ms, &(x[0]));
 
820
                    }
 
821
               }
 
822
               {
 
823
                    V T2u, T2w, T2h, T2k, T2l, T2A, T2B, T2C, T2o, T2r, T2s, T2x, T2y, T2z, T2M;
 
824
                    V T2X, T2N, T2W, T2R, T31, T2U, T30, T2E, T2F;
 
825
                    T2u = VFNMS(LDK(KP587785252), T1C, T2t);
 
826
                    T2w = VSUB(Te, Tb);
 
827
                    T2h = VFNMS(LDK(KP125333233), T2g, VMUL(LDK(KP1_984229402), T2f));
 
828
                    T2k = VFMA(LDK(KP1_457937254), T2i, VMUL(LDK(KP684547105), T2j));
 
829
                    T2l = VSUB(T2h, T2k);
 
830
                    T2A = VFNMS(LDK(KP1_996053456), T2p, VMUL(LDK(KP062790519), T2q));
 
831
                    T2B = VFMA(LDK(KP1_541026485), T2m, VMUL(LDK(KP637423989), T2n));
 
832
                    T2C = VSUB(T2A, T2B);
 
833
                    T2o = VFNMS(LDK(KP770513242), T2n, VMUL(LDK(KP1_274847979), T2m));
 
834
                    T2r = VFMA(LDK(KP125581039), T2p, VMUL(LDK(KP998026728), T2q));
 
835
                    T2s = VSUB(T2o, T2r);
 
836
                    T2x = VFNMS(LDK(KP1_369094211), T2i, VMUL(LDK(KP728968627), T2j));
 
837
                    T2y = VFMA(LDK(KP250666467), T2f, VMUL(LDK(KP992114701), T2g));
 
838
                    T2z = VSUB(T2x, T2y);
 
839
                    {
 
840
                         V T2G, T2H, T2I, T2J, T2K, T2L;
 
841
                         T2G = VFNMS(LDK(KP481753674), T2j, VMUL(LDK(KP1_752613360), T2i));
 
842
                         T2H = VFMA(LDK(KP851558583), T2f, VMUL(LDK(KP904827052), T2g));
 
843
                         T2I = VSUB(T2G, T2H);
 
844
                         T2J = VFNMS(LDK(KP844327925), T2q, VMUL(LDK(KP1_071653589), T2p));
 
845
                         T2K = VFNMS(LDK(KP998026728), T2n, VMUL(LDK(KP125581039), T2m));
 
846
                         T2L = VADD(T2J, T2K);
 
847
                         T2M = VMUL(LDK(KP559016994), VSUB(T2I, T2L));
 
848
                         T2X = VSUB(T2J, T2K);
 
849
                         T2N = VADD(T2I, T2L);
 
850
                         T2W = VADD(T2G, T2H);
 
851
                    }
 
852
                    {
 
853
                         V T2P, T2Q, T2Y, T2S, T2T, T2Z;
 
854
                         T2P = VFNMS(LDK(KP425779291), T2g, VMUL(LDK(KP1_809654104), T2f));
 
855
                         T2Q = VFMA(LDK(KP963507348), T2i, VMUL(LDK(KP876306680), T2j));
 
856
                         T2Y = VADD(T2Q, T2P);
 
857
                         T2S = VFMA(LDK(KP1_688655851), T2p, VMUL(LDK(KP535826794), T2q));
 
858
                         T2T = VFMA(LDK(KP1_996053456), T2m, VMUL(LDK(KP062790519), T2n));
 
859
                         T2Z = VADD(T2S, T2T);
 
860
                         T2R = VSUB(T2P, T2Q);
 
861
                         T31 = VADD(T2Y, T2Z);
 
862
                         T2U = VSUB(T2S, T2T);
 
863
                         T30 = VMUL(LDK(KP559016994), VSUB(T2Y, T2Z));
 
864
                    }
 
865
                    {
 
866
                         V T36, T37, T2v, T2D;
 
867
                         T36 = VBYI(VADD(T2u, T2N));
 
868
                         T37 = VADD(T2w, T31);
 
869
                         ST(&(x[WS(rs, 2)]), VADD(T36, T37), ms, &(x[0]));
 
870
                         ST(&(x[WS(rs, 23)]), VSUB(T37, T36), ms, &(x[WS(rs, 1)]));
 
871
                         T2v = VBYI(VSUB(VADD(T2l, T2s), T2u));
 
872
                         T2D = VADD(T2w, VADD(T2z, T2C));
 
873
                         ST(&(x[WS(rs, 3)]), VADD(T2v, T2D), ms, &(x[WS(rs, 1)]));
 
874
                         ST(&(x[WS(rs, 22)]), VSUB(T2D, T2v), ms, &(x[0]));
 
875
                    }
 
876
                    T2E = VFMA(LDK(KP309016994), T2z, VFNMS(LDK(KP809016994), T2C, VFNMS(LDK(KP587785252), VADD(T2r, T2o), VFNMS(LDK(KP951056516), VADD(T2k, T2h), T2w))));
 
877
                    T2F = VBYI(VSUB(VFNMS(LDK(KP587785252), VADD(T2A, T2B), VFNMS(LDK(KP809016994), T2s, VFNMS(LDK(KP951056516), VADD(T2x, T2y), VMUL(LDK(KP309016994), T2l)))), T2u));
 
878
                    ST(&(x[WS(rs, 17)]), VSUB(T2E, T2F), ms, &(x[WS(rs, 1)]));
 
879
                    ST(&(x[WS(rs, 8)]), VADD(T2E, T2F), ms, &(x[0]));
 
880
                    {
 
881
                         V T2V, T34, T33, T35, T2O, T32;
 
882
                         T2O = VFNMS(LDK(KP250000000), T2N, T2u);
 
883
                         T2V = VBYI(VADD(T2M, VADD(T2O, VFNMS(LDK(KP587785252), T2U, VMUL(LDK(KP951056516), T2R)))));
 
884
                         T34 = VBYI(VADD(T2O, VSUB(VFMA(LDK(KP587785252), T2R, VMUL(LDK(KP951056516), T2U)), T2M)));
 
885
                         T32 = VFNMS(LDK(KP250000000), T31, T2w);
 
886
                         T33 = VFMA(LDK(KP951056516), T2W, VFMA(LDK(KP587785252), T2X, VADD(T30, T32)));
 
887
                         T35 = VFMA(LDK(KP587785252), T2W, VSUB(VFNMS(LDK(KP951056516), T2X, T32), T30));
 
888
                         ST(&(x[WS(rs, 7)]), VADD(T2V, T33), ms, &(x[WS(rs, 1)]));
 
889
                         ST(&(x[WS(rs, 13)]), VSUB(T35, T34), ms, &(x[WS(rs, 1)]));
 
890
                         ST(&(x[WS(rs, 18)]), VSUB(T33, T2V), ms, &(x[0]));
 
891
                         ST(&(x[WS(rs, 12)]), VADD(T34, T35), ms, &(x[0]));
 
892
                    }
 
893
               }
 
894
          }
 
895
     }
 
896
     VLEAVE();
 
897
}
 
898
 
 
899
static const tw_instr twinstr[] = {
 
900
     VTW(0, 1),
 
901
     VTW(0, 2),
 
902
     VTW(0, 3),
 
903
     VTW(0, 4),
 
904
     VTW(0, 5),
 
905
     VTW(0, 6),
 
906
     VTW(0, 7),
 
907
     VTW(0, 8),
 
908
     VTW(0, 9),
 
909
     VTW(0, 10),
 
910
     VTW(0, 11),
 
911
     VTW(0, 12),
 
912
     VTW(0, 13),
 
913
     VTW(0, 14),
 
914
     VTW(0, 15),
 
915
     VTW(0, 16),
 
916
     VTW(0, 17),
 
917
     VTW(0, 18),
 
918
     VTW(0, 19),
 
919
     VTW(0, 20),
 
920
     VTW(0, 21),
 
921
     VTW(0, 22),
 
922
     VTW(0, 23),
 
923
     VTW(0, 24),
 
924
     {TW_NEXT, VL, 0}
 
925
};
 
926
 
 
927
static const ct_desc desc = { 25, XSIMD_STRING("t2fv_25"), twinstr, &GENUS, {170, 110, 78, 0}, 0, 0, 0 };
 
928
 
 
929
void XSIMD(codelet_t2fv_25) (planner *p) {
 
930
     X(kdft_dit_register) (p, t2fv_25, &desc);
 
931
}
 
932
#endif                          /* HAVE_FMA */