21
21
/* This file was automatically generated --- DO NOT EDIT */
22
/* Generated on Sat Jul 5 21:40:41 EDT 2003 */
22
/* Generated on Tue Mar 7 09:59:31 EST 2006 */
24
24
#include "codelet-dft.h"
26
/* Generated by: /homee/stevenj/cvs/fftw3.0.1/genfft/gen_notw_c -simd -compact -variables 4 -sign 1 -n 4 -name n2bv_4 -with-ostride 2 -include n2b.h */
28
/* Generated by: ../../../genfft/gen_notw_c -fma -reorder-insns -schedule-for-pipeline -simd -compact -variables 4 -pipeline-latency 8 -sign 1 -n 4 -name n2bv_4 -with-ostride 2 -include n2b.h -store-multiple 2 */
31
* This function contains 8 FP additions, 2 FP multiplications,
32
* (or, 6 additions, 0 multiplications, 2 fused multiply/add),
33
* 15 stack variables, and 10 memory accesses
37
* $Id: algsimp.ml,v 1.9 2006-02-12 23:34:12 athena Exp $
38
* $Id: fft.ml,v 1.4 2006-01-05 03:04:27 stevenj Exp $
39
* $Id: gen_notw_c.ml,v 1.17 2006-02-12 23:34:12 athena Exp $
44
static void n2bv_4(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs)
51
for (i = v; i > 0; i = i - VL, xi = xi + (VL * ivs), xo = xo + (VL * ovs), MAKE_VOLATILE_STRIDE(is), MAKE_VOLATILE_STRIDE(os)) {
53
T1 = LD(&(xi[0]), ivs, &(xi[0]));
54
T2 = LD(&(xi[WS(is, 2)]), ivs, &(xi[0]));
55
T4 = LD(&(xi[WS(is, 1)]), ivs, &(xi[WS(is, 1)]));
56
T5 = LD(&(xi[WS(is, 3)]), ivs, &(xi[WS(is, 1)]));
66
STM2(&(xo[4]), T9, ovs, &(xo[0]));
68
STM2(&(xo[0]), Ta, ovs, &(xo[0]));
70
STM2(&(xo[2]), Tb, ovs, &(xo[2]));
71
STN2(&(xo[0]), Ta, Tb, ovs);
73
STM2(&(xo[6]), Tc, ovs, &(xo[2]));
74
STN2(&(xo[4]), T9, Tc, ovs);
80
static const kdft_desc desc = { 4, "n2bv_4", {6, 0, 2, 0}, &GENUS, 0, 2, 0, 0 };
81
void X(codelet_n2bv_4) (planner *p) {
82
X(kdft_register) (p, n2bv_4, &desc);
87
/* Generated by: ../../../genfft/gen_notw_c -simd -compact -variables 4 -pipeline-latency 8 -sign 1 -n 4 -name n2bv_4 -with-ostride 2 -include n2b.h -store-multiple 2 */
29
90
* This function contains 8 FP additions, 0 FP multiplications,
30
91
* (or, 8 additions, 0 multiplications, 0 fused multiply/add),
31
* 11 stack variables, and 8 memory accesses
92
* 11 stack variables, and 10 memory accesses
35
* $Id: algsimp.ml,v 1.7 2003/03/15 20:29:42 stevenj Exp $
36
* $Id: fft.ml,v 1.2 2003/03/15 20:29:42 stevenj Exp $
37
* $Id: gen_notw_c.ml,v 1.9 2003/04/16 21:21:53 athena Exp $
96
* $Id: algsimp.ml,v 1.9 2006-02-12 23:34:12 athena Exp $
97
* $Id: fft.ml,v 1.4 2006-01-05 03:04:27 stevenj Exp $
98
* $Id: gen_notw_c.ml,v 1.17 2006-02-12 23:34:12 athena Exp $
42
static void n2bv_4(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, int v, int ivs, int ovs)
103
static void n2bv_4(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs)
50
for (i = v; i > 0; i = i - VL, xi = xi + (VL * ivs), xo = xo + (VL * ovs)) {
110
for (i = v; i > 0; i = i - VL, xi = xi + (VL * ivs), xo = xo + (VL * ovs), MAKE_VOLATILE_STRIDE(is), MAKE_VOLATILE_STRIDE(os)) {
60
120
T6 = VBYI(VSUB(T4, T5));
61
121
T8 = VADD(T4, T5);
63
ST(&(xo[6]), VSUB(T3, T6), ovs, &(xo[2]));
64
ST(&(xo[0]), VADD(T7, T8), ovs, &(xo[0]));
65
ST(&(xo[2]), VADD(T3, T6), ovs, &(xo[2]));
66
ST(&(xo[4]), VSUB(T7, T8), ovs, &(xo[0]));
126
STM2(&(xo[6]), T9, ovs, &(xo[2]));
128
STM2(&(xo[0]), Ta, ovs, &(xo[0]));
130
STM2(&(xo[2]), Tb, ovs, &(xo[2]));
131
STN2(&(xo[0]), Ta, Tb, ovs);
133
STM2(&(xo[4]), Tc, ovs, &(xo[0]));
134
STN2(&(xo[4]), Tc, T9, ovs);
71
139
static const kdft_desc desc = { 4, "n2bv_4", {8, 0, 0, 0}, &GENUS, 0, 2, 0, 0 };
72
140
void X(codelet_n2bv_4) (planner *p) {
73
141
X(kdft_register) (p, n2bv_4, &desc);
144
#endif /* HAVE_FMA */