40
40
DK(KP1_414213562, +1.414213562373095048801688724209698078569671875);
41
41
DK(KP1_732050807, +1.732050807568877293527446341505872366942805254);
42
42
DK(KP2_000000000, +2.000000000000000000000000000000000000000000000);
44
for (i = v; i > 0; i = i - 1, R0 = R0 + ovs, R1 = R1 + ovs, Cr = Cr + ivs, Ci = Ci + ivs, MAKE_VOLATILE_STRIDE(rs), MAKE_VOLATILE_STRIDE(csr), MAKE_VOLATILE_STRIDE(csi)) {
47
E Tx, T6, Te, Tb, T5, Tw, Ts, To, Th, Ti, T9, TA;
49
E T1, Tq, Tc, Td, T4, T2, T3, T7, T8, Tr;
45
for (i = v; i > 0; i = i - 1, R0 = R0 + ovs, R1 = R1 + ovs, Cr = Cr + ivs, Ci = Ci + ivs, MAKE_VOLATILE_STRIDE(rs), MAKE_VOLATILE_STRIDE(csr), MAKE_VOLATILE_STRIDE(csi)) {
48
E Tx, T6, Te, Tb, T5, Tw, Ts, To, Th, Ti, T9, TA;
50
E T1, Tq, Tc, Td, T4, T2, T3, T7, T8, Tr;
62
Tb = FNMS(KP2_000000000, T1, T4);
65
Tw = FMA(KP2_000000000, Tq, Tr);
80
Tl = FNMS(KP1_732050807, Te, Tb);
81
Tf = FMA(KP1_732050807, Te, Tb);
84
Tg = FNMS(KP2_000000000, T6, T9);
88
Ty = FMA(KP1_732050807, Tx, Tw);
89
TE = FNMS(KP1_732050807, Tx, Tw);
90
Tz = FMA(KP2_000000000, To, Tn);
92
Tm = FMA(KP1_732050807, Tj, Tg);
93
Tk = FNMS(KP1_732050807, Tj, Tg);
95
R0[0] = KP2_000000000 * (T5 + Ta);
97
R0[WS(rs, 3)] = KP2_000000000 * (Ts + Tp);
99
TD = FMA(KP1_732050807, TA, Tz);
100
TB = FNMS(KP1_732050807, TA, Tz);
101
R1[WS(rs, 4)] = KP1_414213562 * (Tu + Tt);
102
R1[WS(rs, 1)] = KP1_414213562 * (Tt - Tu);
105
R0[WS(rs, 2)] = Tf + Tk;
107
R0[WS(rs, 5)] = TB - Ty;
109
R1[WS(rs, 3)] = KP707106781 * (Tv + TC);
110
R1[0] = KP707106781 * (Tv - TC);
112
R0[WS(rs, 4)] = -(Tl + Tm);
115
R0[WS(rs, 1)] = TD - TE;
117
R1[WS(rs, 5)] = KP707106781 * (TF - TG);
118
R1[WS(rs, 2)] = KP707106781 * (TF + TG);
123
static const kr2c_desc desc = { 12, "r2cbIII_12", {30, 8, 12, 0}, &GENUS };
125
void X(codelet_r2cbIII_12) (planner *p) {
126
X(kr2c_register) (p, r2cbIII_12, &desc);
131
/* Generated by: ../../../genfft/gen_r2cb.native -compact -variables 4 -pipeline-latency 4 -sign 1 -n 12 -name r2cbIII_12 -dft-III -include r2cbIII.h */
134
* This function contains 42 FP additions, 20 FP multiplications,
135
* (or, 38 additions, 16 multiplications, 4 fused multiply/add),
136
* 25 stack variables, 4 constants, and 24 memory accesses
140
static void r2cbIII_12(R *R0, R *R1, R *Cr, R *Ci, stride rs, stride csr, stride csi, INT v, INT ivs, INT ovs)
142
DK(KP1_414213562, +1.414213562373095048801688724209698078569671875);
143
DK(KP2_000000000, +2.000000000000000000000000000000000000000000000);
144
DK(KP500000000, +0.500000000000000000000000000000000000000000000);
145
DK(KP866025403, +0.866025403784438646763723170752936183471402627);
148
for (i = v; i > 0; i = i - 1, R0 = R0 + ovs, R1 = R1 + ovs, Cr = Cr + ivs, Ci = Ci + ivs, MAKE_VOLATILE_STRIDE(rs), MAKE_VOLATILE_STRIDE(csr), MAKE_VOLATILE_STRIDE(csi)) {
149
E T5, Tw, Tb, Te, Tx, Ts, Ta, TA, Tg, Tj, Tz, Tp, Tt, Tu;
50
152
T1 = Cr[WS(csr, 1)];
51
153
T2 = Cr[WS(csr, 5)];
52
154
T3 = Cr[WS(csr, 2)];
157
Tw = KP866025403 * (T2 - T3);
158
Tb = FNMS(KP500000000, T4, T1);
53
162
Tq = Ci[WS(csi, 1)];
54
163
Tc = Ci[WS(csi, 5)];
55
164
Td = Ci[WS(csi, 2)];
166
Te = KP866025403 * (Tc + Td);
167
Tx = FMA(KP500000000, Tr, Tq);
58
172
T6 = Cr[WS(csr, 4)];
61
Tb = FNMS(KP2_000000000, T1, T4);
64
Tw = FMA(KP2_000000000, Tq, Tr);
66
174
T8 = Cr[WS(csr, 3)];
177
TA = KP866025403 * (T7 - T8);
178
Tg = FNMS(KP500000000, T9, T6);
67
182
To = Ci[WS(csi, 4)];
69
184
Ti = Ci[WS(csi, 3)];
79
Tl = FNMS(KP1_732050807, Te, Tb);
80
Tf = FMA(KP1_732050807, Te, Tb);
83
Tg = FNMS(KP2_000000000, T6, T9);
87
Ty = FMA(KP1_732050807, Tx, Tw);
88
TE = FNMS(KP1_732050807, Tx, Tw);
89
Tz = FMA(KP2_000000000, To, Tn);
91
Tm = FMA(KP1_732050807, Tj, Tg);
92
Tk = FNMS(KP1_732050807, Tj, Tg);
94
R0[0] = KP2_000000000 * (T5 + Ta);
96
R0[WS(rs, 3)] = KP2_000000000 * (Ts + Tp);
98
TD = FMA(KP1_732050807, TA, Tz);
99
TB = FNMS(KP1_732050807, TA, Tz);
100
R1[WS(rs, 4)] = KP1_414213562 * (Tu + Tt);
101
R1[WS(rs, 1)] = KP1_414213562 * (Tt - Tu);
104
R0[WS(rs, 2)] = Tf + Tk;
106
R0[WS(rs, 5)] = TB - Ty;
108
R1[WS(rs, 3)] = KP707106781 * (Tv + TC);
109
R1[0] = KP707106781 * (Tv - TC);
111
R0[WS(rs, 4)] = -(Tl + Tm);
114
R0[WS(rs, 1)] = TD - TE;
116
R1[WS(rs, 5)] = KP707106781 * (TF - TG);
117
R1[WS(rs, 2)] = KP707106781 * (TF + TG);
121
static const kr2c_desc desc = { 12, "r2cbIII_12", {30, 8, 12, 0}, &GENUS };
123
void X(codelet_r2cbIII_12) (planner *p) {
124
X(kr2c_register) (p, r2cbIII_12, &desc);
129
/* Generated by: ../../../genfft/gen_r2cb -compact -variables 4 -pipeline-latency 4 -sign 1 -n 12 -name r2cbIII_12 -dft-III -include r2cbIII.h */
132
* This function contains 42 FP additions, 20 FP multiplications,
133
* (or, 38 additions, 16 multiplications, 4 fused multiply/add),
134
* 25 stack variables, 4 constants, and 24 memory accesses
138
static void r2cbIII_12(R *R0, R *R1, R *Cr, R *Ci, stride rs, stride csr, stride csi, INT v, INT ivs, INT ovs)
140
DK(KP1_414213562, +1.414213562373095048801688724209698078569671875);
141
DK(KP2_000000000, +2.000000000000000000000000000000000000000000000);
142
DK(KP500000000, +0.500000000000000000000000000000000000000000000);
143
DK(KP866025403, +0.866025403784438646763723170752936183471402627);
145
for (i = v; i > 0; i = i - 1, R0 = R0 + ovs, R1 = R1 + ovs, Cr = Cr + ivs, Ci = Ci + ivs, MAKE_VOLATILE_STRIDE(rs), MAKE_VOLATILE_STRIDE(csr), MAKE_VOLATILE_STRIDE(csi)) {
146
E T5, Tw, Tb, Te, Tx, Ts, Ta, TA, Tg, Tj, Tz, Tp, Tt, Tu;
154
Tw = KP866025403 * (T2 - T3);
155
Tb = FNMS(KP500000000, T4, T1);
163
Te = KP866025403 * (Tc + Td);
164
Tx = FMA(KP500000000, Tr, Tq);
174
TA = KP866025403 * (T7 - T8);
175
Tg = FNMS(KP500000000, T9, T6);
183
Tj = KP866025403 * (Th + Ti);
184
Tz = FMA(KP500000000, Tn, To);
187
R0[0] = KP2_000000000 * (T5 + Ta);
188
R0[WS(rs, 3)] = KP2_000000000 * (Ts + Tp);
191
R1[WS(rs, 1)] = KP1_414213562 * (Tt - Tu);
192
R1[WS(rs, 4)] = KP1_414213562 * (Tu + Tt);
194
E Tf, Tk, Tv, Ty, TB, TC;
201
R0[WS(rs, 2)] = -(KP2_000000000 * (Tf + Tk));
202
R0[WS(rs, 5)] = KP2_000000000 * (TB - Ty);
203
R1[0] = KP1_414213562 * (Tv - TC);
204
R1[WS(rs, 3)] = KP1_414213562 * (Tv + TC);
207
E Tl, Tm, TF, TD, TE, TG;
214
R0[WS(rs, 4)] = KP2_000000000 * (Tl + Tm);
215
R1[WS(rs, 2)] = KP1_414213562 * (TF + TG);
216
R0[WS(rs, 1)] = KP2_000000000 * (TD - TE);
217
R1[WS(rs, 5)] = KP1_414213562 * (TF - TG);
186
Tj = KP866025403 * (Th + Ti);
187
Tz = FMA(KP500000000, Tn, To);
190
R0[0] = KP2_000000000 * (T5 + Ta);
191
R0[WS(rs, 3)] = KP2_000000000 * (Ts + Tp);
194
R1[WS(rs, 1)] = KP1_414213562 * (Tt - Tu);
195
R1[WS(rs, 4)] = KP1_414213562 * (Tu + Tt);
197
E Tf, Tk, Tv, Ty, TB, TC;
204
R0[WS(rs, 2)] = -(KP2_000000000 * (Tf + Tk));
205
R0[WS(rs, 5)] = KP2_000000000 * (TB - Ty);
206
R1[0] = KP1_414213562 * (Tv - TC);
207
R1[WS(rs, 3)] = KP1_414213562 * (Tv + TC);
210
E Tl, Tm, TF, TD, TE, TG;
217
R0[WS(rs, 4)] = KP2_000000000 * (Tl + Tm);
218
R1[WS(rs, 2)] = KP1_414213562 * (TF + TG);
219
R0[WS(rs, 1)] = KP2_000000000 * (TD - TE);
220
R1[WS(rs, 5)] = KP1_414213562 * (TF - TG);