21
21
/* This file was automatically generated --- DO NOT EDIT */
22
/* Generated on Sat Jul 5 21:43:43 EDT 2003 */
22
/* Generated on Tue Mar 7 10:20:30 EST 2006 */
24
24
#include "codelet-dft.h"
26
/* Generated by: /homee/stevenj/cvs/fftw3.0.1/genfft/gen_twiddle_c -simd -compact -variables 4 -n 8 -name t1bv_8 -include t1b.h -sign 1 */
28
/* Generated by: ../../../genfft/gen_twiddle_c -fma -reorder-insns -schedule-for-pipeline -simd -compact -variables 4 -pipeline-latency 8 -n 8 -name t1bv_8 -include t1b.h -sign 1 */
31
* This function contains 33 FP additions, 24 FP multiplications,
32
* (or, 23 additions, 14 multiplications, 10 fused multiply/add),
33
* 36 stack variables, and 16 memory accesses
37
* $Id: algsimp.ml,v 1.9 2006-02-12 23:34:12 athena Exp $
38
* $Id: fft.ml,v 1.4 2006-01-05 03:04:27 stevenj Exp $
39
* $Id: gen_twiddle_c.ml,v 1.14 2006-02-12 23:34:12 athena Exp $
44
static const R *t1bv_8(R *ri, R *ii, const R *W, stride ios, INT m, INT dist)
46
DVK(KP707106781, +0.707106781186547524400844362104849039284835938);
50
for (i = m; i > 0; i = i - VL, x = x + (VL * dist), W = W + (TWVL * 14), MAKE_VOLATILE_STRIDE(ios)) {
51
V T1, T2, Th, Tj, T5, T7, Ta, Tc;
52
T1 = LD(&(x[0]), dist, &(x[0]));
53
T2 = LD(&(x[WS(ios, 4)]), dist, &(x[0]));
54
Th = LD(&(x[WS(ios, 2)]), dist, &(x[0]));
55
Tj = LD(&(x[WS(ios, 6)]), dist, &(x[0]));
56
T5 = LD(&(x[WS(ios, 1)]), dist, &(x[WS(ios, 1)]));
57
T7 = LD(&(x[WS(ios, 5)]), dist, &(x[WS(ios, 1)]));
58
Ta = LD(&(x[WS(ios, 7)]), dist, &(x[WS(ios, 1)]));
59
Tc = LD(&(x[WS(ios, 3)]), dist, &(x[WS(ios, 1)]));
61
V T3, Ti, Tk, T6, T8, Tb, Td;
62
T3 = BYTW(&(W[TWVL * 6]), T2);
63
Ti = BYTW(&(W[TWVL * 2]), Th);
64
Tk = BYTW(&(W[TWVL * 10]), Tj);
65
T6 = BYTW(&(W[0]), T5);
66
T8 = BYTW(&(W[TWVL * 8]), T7);
67
Tb = BYTW(&(W[TWVL * 12]), Ta);
68
Td = BYTW(&(W[TWVL * 4]), Tc);
70
V Tq, T4, Tr, Tl, Tt, T9, Tu, Te, Tw, Ts;
89
ST(&(x[0]), VADD(Tw, Tx), dist, &(x[0]));
90
ST(&(x[WS(ios, 4)]), VSUB(Tw, Tx), dist, &(x[0]));
91
ST(&(x[WS(ios, 2)]), VFMAI(Tv, Ts), dist, &(x[0]));
92
ST(&(x[WS(ios, 6)]), VFNMSI(Tv, Ts), dist, &(x[0]));
93
Tp = VFMA(LDK(KP707106781), Tm, Tl);
94
Tn = VFNMS(LDK(KP707106781), Tm, Tl);
95
To = VFMA(LDK(KP707106781), Tf, T4);
96
Tg = VFNMS(LDK(KP707106781), Tf, T4);
97
ST(&(x[WS(ios, 1)]), VFMAI(Tp, To), dist, &(x[WS(ios, 1)]));
98
ST(&(x[WS(ios, 7)]), VFNMSI(Tp, To), dist, &(x[WS(ios, 1)]));
99
ST(&(x[WS(ios, 5)]), VFMAI(Tn, Tg), dist, &(x[WS(ios, 1)]));
100
ST(&(x[WS(ios, 3)]), VFNMSI(Tn, Tg), dist, &(x[WS(ios, 1)]));
109
static const tw_instr twinstr[] = {
120
static const ct_desc desc = { 8, "t1bv_8", twinstr, &GENUS, {23, 14, 10, 0}, 0, 0, 0 };
122
void X(codelet_t1bv_8) (planner *p) {
123
X(kdft_dit_register) (p, t1bv_8, &desc);
127
/* Generated by: ../../../genfft/gen_twiddle_c -simd -compact -variables 4 -pipeline-latency 8 -n 8 -name t1bv_8 -include t1b.h -sign 1 */
29
130
* This function contains 33 FP additions, 16 FP multiplications,
34
135
* Generator Id's :
35
* $Id: algsimp.ml,v 1.7 2003/03/15 20:29:42 stevenj Exp $
36
* $Id: fft.ml,v 1.2 2003/03/15 20:29:42 stevenj Exp $
37
* $Id: gen_twiddle_c.ml,v 1.7 2003/04/16 19:51:27 athena Exp $
136
* $Id: algsimp.ml,v 1.9 2006-02-12 23:34:12 athena Exp $
137
* $Id: fft.ml,v 1.4 2006-01-05 03:04:27 stevenj Exp $
138
* $Id: gen_twiddle_c.ml,v 1.14 2006-02-12 23:34:12 athena Exp $
42
static const R *t1bv_8(R *ri, R *ii, const R *W, stride ios, int m, int dist)
143
static const R *t1bv_8(R *ri, R *ii, const R *W, stride ios, INT m, INT dist)
44
145
DVK(KP707106781, +0.707106781186547524400844362104849039284835938);
49
for (i = m; i > 0; i = i - VL, x = x + (VL * dist), W = W + (TWVL * 14)) {
149
for (i = m; i > 0; i = i - VL, x = x + (VL * dist), W = W + (TWVL * 14), MAKE_VOLATILE_STRIDE(ios)) {
50
150
V Tl, Tq, Tg, Tr, T5, Tt, Ta, Tu, Ti, Tk, Tj;
51
151
Ti = LD(&(x[0]), dist, &(x[0]));
52
152
Tj = LD(&(x[WS(ios, 4)]), dist, &(x[0]));