~ubuntu-branches/ubuntu/utopic/fftw3/utopic

« back to all changes in this revision

Viewing changes to rdft/scalar/r2cf/hc2cfdft_8.c

  • Committer: Package Import Robot
  • Author(s): Matthias Klose
  • Date: 2011-12-14 13:21:22 UTC
  • mfrom: (3.1.5 sid)
  • Revision ID: package-import@ubuntu.com-20111214132122-l4avyl2kkr7vq5aj
Tags: 3.3-1ubuntu1
* Merge with Debian; remaining changes:
  - Revert the ARM workaround.

Show diffs side-by-side

added added

removed removed

Lines of Context:
1
1
/*
2
 
 * Copyright (c) 2003, 2007-8 Matteo Frigo
3
 
 * Copyright (c) 2003, 2007-8 Massachusetts Institute of Technology
 
2
 * Copyright (c) 2003, 2007-11 Matteo Frigo
 
3
 * Copyright (c) 2003, 2007-11 Massachusetts Institute of Technology
4
4
 *
5
5
 * This program is free software; you can redistribute it and/or modify
6
6
 * it under the terms of the GNU General Public License as published by
19
19
 */
20
20
 
21
21
/* This file was automatically generated --- DO NOT EDIT */
22
 
/* Generated on Sun Jul 12 06:45:13 EDT 2009 */
 
22
/* Generated on Wed Jul 27 06:17:53 EDT 2011 */
23
23
 
24
24
#include "codelet-rdft.h"
25
25
 
26
26
#ifdef HAVE_FMA
27
27
 
28
 
/* Generated by: ../../../genfft/gen_hc2cdft -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -n 8 -dit -name hc2cfdft_8 -include hc2cf.h */
 
28
/* Generated by: ../../../genfft/gen_hc2cdft.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -n 8 -dit -name hc2cfdft_8 -include hc2cf.h */
29
29
 
30
30
/*
31
31
 * This function contains 82 FP additions, 52 FP multiplications,
38
38
{
39
39
     DK(KP707106781, +0.707106781186547524400844362104849039284835938);
40
40
     DK(KP500000000, +0.500000000000000000000000000000000000000000000);
41
 
     INT m;
42
 
     for (m = mb, W = W + ((mb - 1) * 14); m < me; m = m + 1, Rp = Rp + ms, Ip = Ip + ms, Rm = Rm - ms, Im = Im - ms, W = W + 14, MAKE_VOLATILE_STRIDE(rs)) {
43
 
          E T1A, T1w, T1z, T1x, T1H, T1v, T1L, T1F;
44
 
          {
45
 
               E Ty, T14, TO, T1o, Tv, TG, T16, T1m, Ta, T19, T1h, TV, T10, TX, TZ;
46
 
               E Tk, T1i, TY, T1b, TF, TB, T1l;
47
 
               {
48
 
                    E TH, TN, TK, TM;
49
 
                    {
50
 
                         E Tw, Tx, TI, TJ;
51
 
                         Tw = Ip[0];
52
 
                         Tx = Im[0];
53
 
                         TI = Rm[0];
54
 
                         TJ = Rp[0];
55
 
                         TH = W[0];
56
 
                         Ty = Tw - Tx;
57
 
                         TN = Tw + Tx;
58
 
                         T14 = TJ + TI;
59
 
                         TK = TI - TJ;
60
 
                         TM = W[1];
61
 
                    }
62
 
                    {
63
 
                         E Ts, Tp, Tt, Tm, Tr;
64
 
                         {
65
 
                              E Tn, To, TL, T1n;
66
 
                              Tn = Ip[WS(rs, 2)];
67
 
                              To = Im[WS(rs, 2)];
68
 
                              TL = TH * TK;
69
 
                              T1n = TM * TK;
70
 
                              Ts = Rp[WS(rs, 2)];
71
 
                              TF = Tn + To;
72
 
                              Tp = Tn - To;
73
 
                              TO = FNMS(TM, TN, TL);
74
 
                              T1o = FMA(TH, TN, T1n);
75
 
                              Tt = Rm[WS(rs, 2)];
76
 
                         }
77
 
                         Tm = W[6];
78
 
                         Tr = W[7];
79
 
                         {
80
 
                              E TE, TD, T15, TC, Tu, Tq;
81
 
                              TB = W[8];
82
 
                              TC = Tt - Ts;
83
 
                              Tu = Ts + Tt;
84
 
                              Tq = Tm * Tp;
85
 
                              TE = W[9];
86
 
                              TD = TB * TC;
87
 
                              T15 = Tm * Tu;
88
 
                              Tv = FNMS(Tr, Tu, Tq);
89
 
                              T1l = TE * TC;
90
 
                              TG = FNMS(TE, TF, TD);
91
 
                              T16 = FMA(Tr, Tp, T15);
92
 
                         }
93
 
                    }
94
 
               }
95
 
               {
96
 
                    E TU, TR, TT, T1g, TS;
97
 
                    {
98
 
                         E T2, T3, T7, T8;
99
 
                         T2 = Ip[WS(rs, 1)];
100
 
                         T1m = FMA(TB, TF, T1l);
101
 
                         T3 = Im[WS(rs, 1)];
102
 
                         T7 = Rp[WS(rs, 1)];
103
 
                         T8 = Rm[WS(rs, 1)];
104
 
                         {
105
 
                              E T1, T4, T9, T6, T5, TQ, T18;
106
 
                              T1 = W[2];
107
 
                              TU = T2 + T3;
108
 
                              T4 = T2 - T3;
109
 
                              TR = T7 - T8;
110
 
                              T9 = T7 + T8;
111
 
                              T6 = W[3];
112
 
                              T5 = T1 * T4;
113
 
                              TQ = W[4];
114
 
                              T18 = T1 * T9;
115
 
                              TT = W[5];
116
 
                              Ta = FNMS(T6, T9, T5);
117
 
                              T1g = TQ * TU;
118
 
                              TS = TQ * TR;
119
 
                              T19 = FMA(T6, T4, T18);
120
 
                         }
121
 
                    }
122
 
                    {
123
 
                         E Tc, Td, Th, Ti;
124
 
                         Tc = Ip[WS(rs, 3)];
125
 
                         T1h = FNMS(TT, TR, T1g);
126
 
                         TV = FMA(TT, TU, TS);
127
 
                         Td = Im[WS(rs, 3)];
128
 
                         Th = Rp[WS(rs, 3)];
129
 
                         Ti = Rm[WS(rs, 3)];
130
 
                         {
131
 
                              E Tb, Te, Tj, Tg, Tf, TW, T1a;
132
 
                              Tb = W[10];
133
 
                              T10 = Tc + Td;
134
 
                              Te = Tc - Td;
135
 
                              TX = Th - Ti;
136
 
                              Tj = Th + Ti;
137
 
                              Tg = W[11];
138
 
                              Tf = Tb * Te;
139
 
                              TW = W[12];
140
 
                              T1a = Tb * Tj;
141
 
                              TZ = W[13];
142
 
                              Tk = FNMS(Tg, Tj, Tf);
143
 
                              T1i = TW * T10;
144
 
                              TY = TW * TX;
145
 
                              T1b = FMA(Tg, Te, T1a);
146
 
                         }
147
 
                    }
148
 
               }
149
 
               {
150
 
                    E T1E, T1t, TA, T1s, T1D, T1u, T1e, T13, T1r, T1d;
151
 
                    {
152
 
                         E TP, T1f, T1q, T12, T17, T1c;
153
 
                         {
154
 
                              E Tl, T11, Tz, T1p, T1k, T1j;
155
 
                              T1E = Ta - Tk;
156
 
                              Tl = Ta + Tk;
157
 
                              T1j = FNMS(TZ, TX, T1i);
158
 
                              T11 = FMA(TZ, T10, TY);
159
 
                              Tz = Tv + Ty;
160
 
                              T1t = Ty - Tv;
161
 
                              T1A = T1o - T1m;
162
 
                              T1p = T1m + T1o;
163
 
                              T1k = T1h + T1j;
164
 
                              T1w = T1j - T1h;
165
 
                              T1z = TO - TG;
166
 
                              TP = TG + TO;
167
 
                              T1f = Tz - Tl;
168
 
                              TA = Tl + Tz;
169
 
                              T1s = T1k + T1p;
170
 
                              T1q = T1k - T1p;
171
 
                              T12 = TV + T11;
172
 
                              T1x = TV - T11;
173
 
                              T1D = T14 - T16;
174
 
                              T17 = T14 + T16;
175
 
                              T1c = T19 + T1b;
176
 
                              T1u = T19 - T1b;
177
 
                         }
178
 
                         Im[WS(rs, 1)] = KP500000000 * (T1q - T1f);
179
 
                         T1e = T12 + TP;
180
 
                         T13 = TP - T12;
181
 
                         T1r = T17 + T1c;
182
 
                         T1d = T17 - T1c;
183
 
                         Ip[WS(rs, 2)] = KP500000000 * (T1f + T1q);
184
 
                    }
185
 
                    Im[WS(rs, 3)] = KP500000000 * (T13 - TA);
186
 
                    Ip[0] = KP500000000 * (TA + T13);
187
 
                    Rm[WS(rs, 3)] = KP500000000 * (T1r - T1s);
188
 
                    Rp[0] = KP500000000 * (T1r + T1s);
189
 
                    Rp[WS(rs, 2)] = KP500000000 * (T1d + T1e);
190
 
                    Rm[WS(rs, 1)] = KP500000000 * (T1d - T1e);
191
 
                    T1H = T1u + T1t;
192
 
                    T1v = T1t - T1u;
193
 
                    T1L = T1D + T1E;
194
 
                    T1F = T1D - T1E;
195
 
               }
196
 
          }
197
 
          {
198
 
               E T1y, T1I, T1B, T1J;
199
 
               T1y = T1w + T1x;
200
 
               T1I = T1w - T1x;
201
 
               T1B = T1z - T1A;
202
 
               T1J = T1z + T1A;
203
 
               {
204
 
                    E T1M, T1K, T1C, T1G;
205
 
                    T1M = T1I + T1J;
206
 
                    T1K = T1I - T1J;
207
 
                    T1C = T1y + T1B;
208
 
                    T1G = T1B - T1y;
209
 
                    Im[0] = -(KP500000000 * (FNMS(KP707106781, T1K, T1H)));
210
 
                    Ip[WS(rs, 3)] = KP500000000 * (FMA(KP707106781, T1K, T1H));
211
 
                    Rp[WS(rs, 1)] = KP500000000 * (FMA(KP707106781, T1M, T1L));
212
 
                    Rm[WS(rs, 2)] = KP500000000 * (FNMS(KP707106781, T1M, T1L));
213
 
                    Rp[WS(rs, 3)] = KP500000000 * (FMA(KP707106781, T1G, T1F));
214
 
                    Rm[0] = KP500000000 * (FNMS(KP707106781, T1G, T1F));
215
 
                    Im[WS(rs, 2)] = -(KP500000000 * (FNMS(KP707106781, T1C, T1v)));
216
 
                    Ip[WS(rs, 1)] = KP500000000 * (FMA(KP707106781, T1C, T1v));
 
41
     {
 
42
          INT m;
 
43
          for (m = mb, W = W + ((mb - 1) * 14); m < me; m = m + 1, Rp = Rp + ms, Ip = Ip + ms, Rm = Rm - ms, Im = Im - ms, W = W + 14, MAKE_VOLATILE_STRIDE(rs)) {
 
44
               E T1A, T1w, T1z, T1x, T1H, T1v, T1L, T1F;
 
45
               {
 
46
                    E Ty, T14, TO, T1o, Tv, TG, T16, T1m, Ta, T19, T1h, TV, T10, TX, TZ;
 
47
                    E Tk, T1i, TY, T1b, TF, TB, T1l;
 
48
                    {
 
49
                         E TH, TN, TK, TM;
 
50
                         {
 
51
                              E Tw, Tx, TI, TJ;
 
52
                              Tw = Ip[0];
 
53
                              Tx = Im[0];
 
54
                              TI = Rm[0];
 
55
                              TJ = Rp[0];
 
56
                              TH = W[0];
 
57
                              Ty = Tw - Tx;
 
58
                              TN = Tw + Tx;
 
59
                              T14 = TJ + TI;
 
60
                              TK = TI - TJ;
 
61
                              TM = W[1];
 
62
                         }
 
63
                         {
 
64
                              E Ts, Tp, Tt, Tm, Tr;
 
65
                              {
 
66
                                   E Tn, To, TL, T1n;
 
67
                                   Tn = Ip[WS(rs, 2)];
 
68
                                   To = Im[WS(rs, 2)];
 
69
                                   TL = TH * TK;
 
70
                                   T1n = TM * TK;
 
71
                                   Ts = Rp[WS(rs, 2)];
 
72
                                   TF = Tn + To;
 
73
                                   Tp = Tn - To;
 
74
                                   TO = FNMS(TM, TN, TL);
 
75
                                   T1o = FMA(TH, TN, T1n);
 
76
                                   Tt = Rm[WS(rs, 2)];
 
77
                              }
 
78
                              Tm = W[6];
 
79
                              Tr = W[7];
 
80
                              {
 
81
                                   E TE, TD, T15, TC, Tu, Tq;
 
82
                                   TB = W[8];
 
83
                                   TC = Tt - Ts;
 
84
                                   Tu = Ts + Tt;
 
85
                                   Tq = Tm * Tp;
 
86
                                   TE = W[9];
 
87
                                   TD = TB * TC;
 
88
                                   T15 = Tm * Tu;
 
89
                                   Tv = FNMS(Tr, Tu, Tq);
 
90
                                   T1l = TE * TC;
 
91
                                   TG = FNMS(TE, TF, TD);
 
92
                                   T16 = FMA(Tr, Tp, T15);
 
93
                              }
 
94
                         }
 
95
                    }
 
96
                    {
 
97
                         E TU, TR, TT, T1g, TS;
 
98
                         {
 
99
                              E T2, T3, T7, T8;
 
100
                              T2 = Ip[WS(rs, 1)];
 
101
                              T1m = FMA(TB, TF, T1l);
 
102
                              T3 = Im[WS(rs, 1)];
 
103
                              T7 = Rp[WS(rs, 1)];
 
104
                              T8 = Rm[WS(rs, 1)];
 
105
                              {
 
106
                                   E T1, T4, T9, T6, T5, TQ, T18;
 
107
                                   T1 = W[2];
 
108
                                   TU = T2 + T3;
 
109
                                   T4 = T2 - T3;
 
110
                                   TR = T7 - T8;
 
111
                                   T9 = T7 + T8;
 
112
                                   T6 = W[3];
 
113
                                   T5 = T1 * T4;
 
114
                                   TQ = W[4];
 
115
                                   T18 = T1 * T9;
 
116
                                   TT = W[5];
 
117
                                   Ta = FNMS(T6, T9, T5);
 
118
                                   T1g = TQ * TU;
 
119
                                   TS = TQ * TR;
 
120
                                   T19 = FMA(T6, T4, T18);
 
121
                              }
 
122
                         }
 
123
                         {
 
124
                              E Tc, Td, Th, Ti;
 
125
                              Tc = Ip[WS(rs, 3)];
 
126
                              T1h = FNMS(TT, TR, T1g);
 
127
                              TV = FMA(TT, TU, TS);
 
128
                              Td = Im[WS(rs, 3)];
 
129
                              Th = Rp[WS(rs, 3)];
 
130
                              Ti = Rm[WS(rs, 3)];
 
131
                              {
 
132
                                   E Tb, Te, Tj, Tg, Tf, TW, T1a;
 
133
                                   Tb = W[10];
 
134
                                   T10 = Tc + Td;
 
135
                                   Te = Tc - Td;
 
136
                                   TX = Th - Ti;
 
137
                                   Tj = Th + Ti;
 
138
                                   Tg = W[11];
 
139
                                   Tf = Tb * Te;
 
140
                                   TW = W[12];
 
141
                                   T1a = Tb * Tj;
 
142
                                   TZ = W[13];
 
143
                                   Tk = FNMS(Tg, Tj, Tf);
 
144
                                   T1i = TW * T10;
 
145
                                   TY = TW * TX;
 
146
                                   T1b = FMA(Tg, Te, T1a);
 
147
                              }
 
148
                         }
 
149
                    }
 
150
                    {
 
151
                         E T1E, T1t, TA, T1s, T1D, T1u, T1e, T13, T1r, T1d;
 
152
                         {
 
153
                              E TP, T1f, T1q, T12, T17, T1c;
 
154
                              {
 
155
                                   E Tl, T11, Tz, T1p, T1k, T1j;
 
156
                                   T1E = Ta - Tk;
 
157
                                   Tl = Ta + Tk;
 
158
                                   T1j = FNMS(TZ, TX, T1i);
 
159
                                   T11 = FMA(TZ, T10, TY);
 
160
                                   Tz = Tv + Ty;
 
161
                                   T1t = Ty - Tv;
 
162
                                   T1A = T1o - T1m;
 
163
                                   T1p = T1m + T1o;
 
164
                                   T1k = T1h + T1j;
 
165
                                   T1w = T1j - T1h;
 
166
                                   T1z = TO - TG;
 
167
                                   TP = TG + TO;
 
168
                                   T1f = Tz - Tl;
 
169
                                   TA = Tl + Tz;
 
170
                                   T1s = T1k + T1p;
 
171
                                   T1q = T1k - T1p;
 
172
                                   T12 = TV + T11;
 
173
                                   T1x = TV - T11;
 
174
                                   T1D = T14 - T16;
 
175
                                   T17 = T14 + T16;
 
176
                                   T1c = T19 + T1b;
 
177
                                   T1u = T19 - T1b;
 
178
                              }
 
179
                              Im[WS(rs, 1)] = KP500000000 * (T1q - T1f);
 
180
                              T1e = T12 + TP;
 
181
                              T13 = TP - T12;
 
182
                              T1r = T17 + T1c;
 
183
                              T1d = T17 - T1c;
 
184
                              Ip[WS(rs, 2)] = KP500000000 * (T1f + T1q);
 
185
                         }
 
186
                         Im[WS(rs, 3)] = KP500000000 * (T13 - TA);
 
187
                         Ip[0] = KP500000000 * (TA + T13);
 
188
                         Rm[WS(rs, 3)] = KP500000000 * (T1r - T1s);
 
189
                         Rp[0] = KP500000000 * (T1r + T1s);
 
190
                         Rp[WS(rs, 2)] = KP500000000 * (T1d + T1e);
 
191
                         Rm[WS(rs, 1)] = KP500000000 * (T1d - T1e);
 
192
                         T1H = T1u + T1t;
 
193
                         T1v = T1t - T1u;
 
194
                         T1L = T1D + T1E;
 
195
                         T1F = T1D - T1E;
 
196
                    }
 
197
               }
 
198
               {
 
199
                    E T1y, T1I, T1B, T1J;
 
200
                    T1y = T1w + T1x;
 
201
                    T1I = T1w - T1x;
 
202
                    T1B = T1z - T1A;
 
203
                    T1J = T1z + T1A;
 
204
                    {
 
205
                         E T1M, T1K, T1C, T1G;
 
206
                         T1M = T1I + T1J;
 
207
                         T1K = T1I - T1J;
 
208
                         T1C = T1y + T1B;
 
209
                         T1G = T1B - T1y;
 
210
                         Im[0] = -(KP500000000 * (FNMS(KP707106781, T1K, T1H)));
 
211
                         Ip[WS(rs, 3)] = KP500000000 * (FMA(KP707106781, T1K, T1H));
 
212
                         Rp[WS(rs, 1)] = KP500000000 * (FMA(KP707106781, T1M, T1L));
 
213
                         Rm[WS(rs, 2)] = KP500000000 * (FNMS(KP707106781, T1M, T1L));
 
214
                         Rp[WS(rs, 3)] = KP500000000 * (FMA(KP707106781, T1G, T1F));
 
215
                         Rm[0] = KP500000000 * (FNMS(KP707106781, T1G, T1F));
 
216
                         Im[WS(rs, 2)] = -(KP500000000 * (FNMS(KP707106781, T1C, T1v)));
 
217
                         Ip[WS(rs, 1)] = KP500000000 * (FMA(KP707106781, T1C, T1v));
 
218
                    }
217
219
               }
218
220
          }
219
221
     }
231
233
}
232
234
#else                           /* HAVE_FMA */
233
235
 
234
 
/* Generated by: ../../../genfft/gen_hc2cdft -compact -variables 4 -pipeline-latency 4 -n 8 -dit -name hc2cfdft_8 -include hc2cf.h */
 
236
/* Generated by: ../../../genfft/gen_hc2cdft.native -compact -variables 4 -pipeline-latency 4 -n 8 -dit -name hc2cfdft_8 -include hc2cf.h */
235
237
 
236
238
/*
237
239
 * This function contains 82 FP additions, 44 FP multiplications,
244
246
{
245
247
     DK(KP353553390, +0.353553390593273762200422181052424519642417969);
246
248
     DK(KP500000000, +0.500000000000000000000000000000000000000000000);
247
 
     INT m;
248
 
     for (m = mb, W = W + ((mb - 1) * 14); m < me; m = m + 1, Rp = Rp + ms, Ip = Ip + ms, Rm = Rm - ms, Im = Im - ms, W = W + 14, MAKE_VOLATILE_STRIDE(rs)) {
249
 
          E Tv, TX, Ts, TY, TE, T1a, TJ, T19, T1l, T1m, T9, T10, Ti, T11, TP;
250
 
          E T16, TU, T17, T1i, T1j;
251
 
          {
252
 
               E Tt, Tu, TD, Tz, TA, TB, Tn, TI, Tr, TG, Tk, To;
253
 
               Tt = Ip[0];
254
 
               Tu = Im[0];
255
 
               TD = Tt + Tu;
256
 
               Tz = Rm[0];
257
 
               TA = Rp[0];
258
 
               TB = Tz - TA;
259
 
               {
260
 
                    E Tl, Tm, Tp, Tq;
261
 
                    Tl = Ip[WS(rs, 2)];
262
 
                    Tm = Im[WS(rs, 2)];
263
 
                    Tn = Tl - Tm;
264
 
                    TI = Tl + Tm;
265
 
                    Tp = Rp[WS(rs, 2)];
266
 
                    Tq = Rm[WS(rs, 2)];
267
 
                    Tr = Tp + Tq;
268
 
                    TG = Tp - Tq;
269
 
               }
270
 
               Tv = Tt - Tu;
271
 
               TX = TA + Tz;
272
 
               Tk = W[6];
273
 
               To = W[7];
274
 
               Ts = FNMS(To, Tr, Tk * Tn);
275
 
               TY = FMA(Tk, Tr, To * Tn);
276
 
               {
277
 
                    E Ty, TC, TF, TH;
278
 
                    Ty = W[0];
279
 
                    TC = W[1];
280
 
                    TE = FNMS(TC, TD, Ty * TB);
281
 
                    T1a = FMA(TC, TB, Ty * TD);
282
 
                    TF = W[8];
283
 
                    TH = W[9];
284
 
                    TJ = FMA(TF, TG, TH * TI);
285
 
                    T19 = FNMS(TH, TG, TF * TI);
286
 
               }
287
 
               T1l = TJ + TE;
288
 
               T1m = T1a - T19;
289
 
          }
290
 
          {
291
 
               E T4, TO, T8, TM, Td, TT, Th, TR;
292
 
               {
293
 
                    E T2, T3, T6, T7;
294
 
                    T2 = Ip[WS(rs, 1)];
295
 
                    T3 = Im[WS(rs, 1)];
296
 
                    T4 = T2 - T3;
297
 
                    TO = T2 + T3;
298
 
                    T6 = Rp[WS(rs, 1)];
299
 
                    T7 = Rm[WS(rs, 1)];
300
 
                    T8 = T6 + T7;
301
 
                    TM = T6 - T7;
302
 
               }
303
 
               {
304
 
                    E Tb, Tc, Tf, Tg;
305
 
                    Tb = Ip[WS(rs, 3)];
306
 
                    Tc = Im[WS(rs, 3)];
307
 
                    Td = Tb - Tc;
308
 
                    TT = Tb + Tc;
309
 
                    Tf = Rp[WS(rs, 3)];
310
 
                    Tg = Rm[WS(rs, 3)];
311
 
                    Th = Tf + Tg;
312
 
                    TR = Tf - Tg;
313
 
               }
314
 
               {
315
 
                    E T1, T5, Ta, Te;
316
 
                    T1 = W[2];
317
 
                    T5 = W[3];
318
 
                    T9 = FNMS(T5, T8, T1 * T4);
319
 
                    T10 = FMA(T1, T8, T5 * T4);
320
 
                    Ta = W[10];
321
 
                    Te = W[11];
322
 
                    Ti = FNMS(Te, Th, Ta * Td);
323
 
                    T11 = FMA(Ta, Th, Te * Td);
324
 
                    {
325
 
                         E TL, TN, TQ, TS;
326
 
                         TL = W[4];
327
 
                         TN = W[5];
328
 
                         TP = FMA(TL, TM, TN * TO);
329
 
                         T16 = FNMS(TN, TM, TL * TO);
330
 
                         TQ = W[12];
331
 
                         TS = W[13];
332
 
                         TU = FMA(TQ, TR, TS * TT);
333
 
                         T17 = FNMS(TS, TR, TQ * TT);
334
 
                    }
335
 
                    T1i = T17 - T16;
336
 
                    T1j = TP - TU;
337
 
               }
338
 
          }
339
 
          {
340
 
               E T1h, T1t, T1w, T1y, T1o, T1s, T1r, T1x;
341
 
               {
342
 
                    E T1f, T1g, T1u, T1v;
343
 
                    T1f = Tv - Ts;
344
 
                    T1g = T10 - T11;
345
 
                    T1h = KP500000000 * (T1f - T1g);
346
 
                    T1t = KP500000000 * (T1g + T1f);
347
 
                    T1u = T1i - T1j;
348
 
                    T1v = T1l + T1m;
349
 
                    T1w = KP353553390 * (T1u - T1v);
350
 
                    T1y = KP353553390 * (T1u + T1v);
351
 
               }
352
 
               {
353
 
                    E T1k, T1n, T1p, T1q;
354
 
                    T1k = T1i + T1j;
355
 
                    T1n = T1l - T1m;
356
 
                    T1o = KP353553390 * (T1k + T1n);
357
 
                    T1s = KP353553390 * (T1n - T1k);
358
 
                    T1p = TX - TY;
359
 
                    T1q = T9 - Ti;
360
 
                    T1r = KP500000000 * (T1p - T1q);
361
 
                    T1x = KP500000000 * (T1p + T1q);
362
 
               }
363
 
               Ip[WS(rs, 1)] = T1h + T1o;
364
 
               Rp[WS(rs, 1)] = T1x + T1y;
365
 
               Im[WS(rs, 2)] = T1o - T1h;
366
 
               Rm[WS(rs, 2)] = T1x - T1y;
367
 
               Rm[0] = T1r - T1s;
368
 
               Im[0] = T1w - T1t;
369
 
               Rp[WS(rs, 3)] = T1r + T1s;
370
 
               Ip[WS(rs, 3)] = T1t + T1w;
371
 
          }
372
 
          {
373
 
               E Tx, T15, T1c, T1e, TW, T14, T13, T1d;
374
 
               {
375
 
                    E Tj, Tw, T18, T1b;
376
 
                    Tj = T9 + Ti;
377
 
                    Tw = Ts + Tv;
378
 
                    Tx = Tj + Tw;
379
 
                    T15 = Tw - Tj;
380
 
                    T18 = T16 + T17;
381
 
                    T1b = T19 + T1a;
382
 
                    T1c = T18 - T1b;
383
 
                    T1e = T18 + T1b;
384
 
               }
385
 
               {
386
 
                    E TK, TV, TZ, T12;
387
 
                    TK = TE - TJ;
388
 
                    TV = TP + TU;
389
 
                    TW = TK - TV;
390
 
                    T14 = TV + TK;
391
 
                    TZ = TX + TY;
392
 
                    T12 = T10 + T11;
393
 
                    T13 = TZ - T12;
394
 
                    T1d = TZ + T12;
395
 
               }
396
 
               Ip[0] = KP500000000 * (Tx + TW);
397
 
               Rp[0] = KP500000000 * (T1d + T1e);
398
 
               Im[WS(rs, 3)] = KP500000000 * (TW - Tx);
399
 
               Rm[WS(rs, 3)] = KP500000000 * (T1d - T1e);
400
 
               Rm[WS(rs, 1)] = KP500000000 * (T13 - T14);
401
 
               Im[WS(rs, 1)] = KP500000000 * (T1c - T15);
402
 
               Rp[WS(rs, 2)] = KP500000000 * (T13 + T14);
403
 
               Ip[WS(rs, 2)] = KP500000000 * (T15 + T1c);
 
249
     {
 
250
          INT m;
 
251
          for (m = mb, W = W + ((mb - 1) * 14); m < me; m = m + 1, Rp = Rp + ms, Ip = Ip + ms, Rm = Rm - ms, Im = Im - ms, W = W + 14, MAKE_VOLATILE_STRIDE(rs)) {
 
252
               E Tv, TX, Ts, TY, TE, T1a, TJ, T19, T1l, T1m, T9, T10, Ti, T11, TP;
 
253
               E T16, TU, T17, T1i, T1j;
 
254
               {
 
255
                    E Tt, Tu, TD, Tz, TA, TB, Tn, TI, Tr, TG, Tk, To;
 
256
                    Tt = Ip[0];
 
257
                    Tu = Im[0];
 
258
                    TD = Tt + Tu;
 
259
                    Tz = Rm[0];
 
260
                    TA = Rp[0];
 
261
                    TB = Tz - TA;
 
262
                    {
 
263
                         E Tl, Tm, Tp, Tq;
 
264
                         Tl = Ip[WS(rs, 2)];
 
265
                         Tm = Im[WS(rs, 2)];
 
266
                         Tn = Tl - Tm;
 
267
                         TI = Tl + Tm;
 
268
                         Tp = Rp[WS(rs, 2)];
 
269
                         Tq = Rm[WS(rs, 2)];
 
270
                         Tr = Tp + Tq;
 
271
                         TG = Tp - Tq;
 
272
                    }
 
273
                    Tv = Tt - Tu;
 
274
                    TX = TA + Tz;
 
275
                    Tk = W[6];
 
276
                    To = W[7];
 
277
                    Ts = FNMS(To, Tr, Tk * Tn);
 
278
                    TY = FMA(Tk, Tr, To * Tn);
 
279
                    {
 
280
                         E Ty, TC, TF, TH;
 
281
                         Ty = W[0];
 
282
                         TC = W[1];
 
283
                         TE = FNMS(TC, TD, Ty * TB);
 
284
                         T1a = FMA(TC, TB, Ty * TD);
 
285
                         TF = W[8];
 
286
                         TH = W[9];
 
287
                         TJ = FMA(TF, TG, TH * TI);
 
288
                         T19 = FNMS(TH, TG, TF * TI);
 
289
                    }
 
290
                    T1l = TJ + TE;
 
291
                    T1m = T1a - T19;
 
292
               }
 
293
               {
 
294
                    E T4, TO, T8, TM, Td, TT, Th, TR;
 
295
                    {
 
296
                         E T2, T3, T6, T7;
 
297
                         T2 = Ip[WS(rs, 1)];
 
298
                         T3 = Im[WS(rs, 1)];
 
299
                         T4 = T2 - T3;
 
300
                         TO = T2 + T3;
 
301
                         T6 = Rp[WS(rs, 1)];
 
302
                         T7 = Rm[WS(rs, 1)];
 
303
                         T8 = T6 + T7;
 
304
                         TM = T6 - T7;
 
305
                    }
 
306
                    {
 
307
                         E Tb, Tc, Tf, Tg;
 
308
                         Tb = Ip[WS(rs, 3)];
 
309
                         Tc = Im[WS(rs, 3)];
 
310
                         Td = Tb - Tc;
 
311
                         TT = Tb + Tc;
 
312
                         Tf = Rp[WS(rs, 3)];
 
313
                         Tg = Rm[WS(rs, 3)];
 
314
                         Th = Tf + Tg;
 
315
                         TR = Tf - Tg;
 
316
                    }
 
317
                    {
 
318
                         E T1, T5, Ta, Te;
 
319
                         T1 = W[2];
 
320
                         T5 = W[3];
 
321
                         T9 = FNMS(T5, T8, T1 * T4);
 
322
                         T10 = FMA(T1, T8, T5 * T4);
 
323
                         Ta = W[10];
 
324
                         Te = W[11];
 
325
                         Ti = FNMS(Te, Th, Ta * Td);
 
326
                         T11 = FMA(Ta, Th, Te * Td);
 
327
                         {
 
328
                              E TL, TN, TQ, TS;
 
329
                              TL = W[4];
 
330
                              TN = W[5];
 
331
                              TP = FMA(TL, TM, TN * TO);
 
332
                              T16 = FNMS(TN, TM, TL * TO);
 
333
                              TQ = W[12];
 
334
                              TS = W[13];
 
335
                              TU = FMA(TQ, TR, TS * TT);
 
336
                              T17 = FNMS(TS, TR, TQ * TT);
 
337
                         }
 
338
                         T1i = T17 - T16;
 
339
                         T1j = TP - TU;
 
340
                    }
 
341
               }
 
342
               {
 
343
                    E T1h, T1t, T1w, T1y, T1o, T1s, T1r, T1x;
 
344
                    {
 
345
                         E T1f, T1g, T1u, T1v;
 
346
                         T1f = Tv - Ts;
 
347
                         T1g = T10 - T11;
 
348
                         T1h = KP500000000 * (T1f - T1g);
 
349
                         T1t = KP500000000 * (T1g + T1f);
 
350
                         T1u = T1i - T1j;
 
351
                         T1v = T1l + T1m;
 
352
                         T1w = KP353553390 * (T1u - T1v);
 
353
                         T1y = KP353553390 * (T1u + T1v);
 
354
                    }
 
355
                    {
 
356
                         E T1k, T1n, T1p, T1q;
 
357
                         T1k = T1i + T1j;
 
358
                         T1n = T1l - T1m;
 
359
                         T1o = KP353553390 * (T1k + T1n);
 
360
                         T1s = KP353553390 * (T1n - T1k);
 
361
                         T1p = TX - TY;
 
362
                         T1q = T9 - Ti;
 
363
                         T1r = KP500000000 * (T1p - T1q);
 
364
                         T1x = KP500000000 * (T1p + T1q);
 
365
                    }
 
366
                    Ip[WS(rs, 1)] = T1h + T1o;
 
367
                    Rp[WS(rs, 1)] = T1x + T1y;
 
368
                    Im[WS(rs, 2)] = T1o - T1h;
 
369
                    Rm[WS(rs, 2)] = T1x - T1y;
 
370
                    Rm[0] = T1r - T1s;
 
371
                    Im[0] = T1w - T1t;
 
372
                    Rp[WS(rs, 3)] = T1r + T1s;
 
373
                    Ip[WS(rs, 3)] = T1t + T1w;
 
374
               }
 
375
               {
 
376
                    E Tx, T15, T1c, T1e, TW, T14, T13, T1d;
 
377
                    {
 
378
                         E Tj, Tw, T18, T1b;
 
379
                         Tj = T9 + Ti;
 
380
                         Tw = Ts + Tv;
 
381
                         Tx = Tj + Tw;
 
382
                         T15 = Tw - Tj;
 
383
                         T18 = T16 + T17;
 
384
                         T1b = T19 + T1a;
 
385
                         T1c = T18 - T1b;
 
386
                         T1e = T18 + T1b;
 
387
                    }
 
388
                    {
 
389
                         E TK, TV, TZ, T12;
 
390
                         TK = TE - TJ;
 
391
                         TV = TP + TU;
 
392
                         TW = TK - TV;
 
393
                         T14 = TV + TK;
 
394
                         TZ = TX + TY;
 
395
                         T12 = T10 + T11;
 
396
                         T13 = TZ - T12;
 
397
                         T1d = TZ + T12;
 
398
                    }
 
399
                    Ip[0] = KP500000000 * (Tx + TW);
 
400
                    Rp[0] = KP500000000 * (T1d + T1e);
 
401
                    Im[WS(rs, 3)] = KP500000000 * (TW - Tx);
 
402
                    Rm[WS(rs, 3)] = KP500000000 * (T1d - T1e);
 
403
                    Rm[WS(rs, 1)] = KP500000000 * (T13 - T14);
 
404
                    Im[WS(rs, 1)] = KP500000000 * (T1c - T15);
 
405
                    Rp[WS(rs, 2)] = KP500000000 * (T13 + T14);
 
406
                    Ip[WS(rs, 2)] = KP500000000 * (T15 + T1c);
 
407
               }
404
408
          }
405
409
     }
406
410
}