~ubuntu-branches/ubuntu/utopic/fftw3/utopic

« back to all changes in this revision

Viewing changes to rdft/scalar/r2cf/hc2cfdft2_4.c

  • Committer: Package Import Robot
  • Author(s): Matthias Klose
  • Date: 2011-12-14 13:21:22 UTC
  • mfrom: (3.1.5 sid)
  • Revision ID: package-import@ubuntu.com-20111214132122-l4avyl2kkr7vq5aj
Tags: 3.3-1ubuntu1
* Merge with Debian; remaining changes:
  - Revert the ARM workaround.

Show diffs side-by-side

added added

removed removed

Lines of Context:
1
1
/*
2
 
 * Copyright (c) 2003, 2007-8 Matteo Frigo
3
 
 * Copyright (c) 2003, 2007-8 Massachusetts Institute of Technology
 
2
 * Copyright (c) 2003, 2007-11 Matteo Frigo
 
3
 * Copyright (c) 2003, 2007-11 Massachusetts Institute of Technology
4
4
 *
5
5
 * This program is free software; you can redistribute it and/or modify
6
6
 * it under the terms of the GNU General Public License as published by
19
19
 */
20
20
 
21
21
/* This file was automatically generated --- DO NOT EDIT */
22
 
/* Generated on Sun Jul 12 06:45:17 EDT 2009 */
 
22
/* Generated on Wed Jul 27 06:18:06 EDT 2011 */
23
23
 
24
24
#include "codelet-rdft.h"
25
25
 
26
26
#ifdef HAVE_FMA
27
27
 
28
 
/* Generated by: ../../../genfft/gen_hc2cdft -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -twiddle-log3 -precompute-twiddles -n 4 -dit -name hc2cfdft2_4 -include hc2cf.h */
 
28
/* Generated by: ../../../genfft/gen_hc2cdft.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -twiddle-log3 -precompute-twiddles -n 4 -dit -name hc2cfdft2_4 -include hc2cf.h */
29
29
 
30
30
/*
31
31
 * This function contains 32 FP additions, 24 FP multiplications,
37
37
static void hc2cfdft2_4(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms)
38
38
{
39
39
     DK(KP500000000, +0.500000000000000000000000000000000000000000000);
40
 
     INT m;
41
 
     for (m = mb, W = W + ((mb - 1) * 4); m < me; m = m + 1, Rp = Rp + ms, Ip = Ip + ms, Rm = Rm - ms, Im = Im - ms, W = W + 4, MAKE_VOLATILE_STRIDE(rs)) {
42
 
          E T1, T5, T2, T4;
43
 
          T1 = W[0];
44
 
          T5 = W[3];
45
 
          T2 = W[2];
46
 
          T4 = W[1];
47
 
          {
48
 
               E Tc, T6, Tp, Tj, Tw, Tt, T9, TE, To, TC, Ta, Tr, Tf, Tl, Tm;
49
 
               {
50
 
                    E Th, Tb, T3, Ti;
51
 
                    Th = Ip[0];
52
 
                    Tb = T1 * T5;
53
 
                    T3 = T1 * T2;
54
 
                    Ti = Im[0];
55
 
                    Tl = Rm[0];
56
 
                    Tc = FNMS(T4, T2, Tb);
57
 
                    T6 = FMA(T4, T5, T3);
58
 
                    Tp = Th + Ti;
59
 
                    Tj = Th - Ti;
60
 
                    Tm = Rp[0];
61
 
               }
62
 
               {
63
 
                    E T7, T8, Td, Tn, Te;
64
 
                    T7 = Ip[WS(rs, 1)];
65
 
                    T8 = Im[WS(rs, 1)];
66
 
                    Td = Rp[WS(rs, 1)];
67
 
                    Tw = Tm + Tl;
68
 
                    Tn = Tl - Tm;
69
 
                    Tt = T7 + T8;
70
 
                    T9 = T7 - T8;
71
 
                    Te = Rm[WS(rs, 1)];
72
 
                    TE = T4 * Tn;
73
 
                    To = T1 * Tn;
74
 
                    TC = T2 * Tt;
75
 
                    Ta = T6 * T9;
76
 
                    Tr = Td - Te;
77
 
                    Tf = Td + Te;
78
 
               }
79
 
               {
80
 
                    E Tq, Tk, TB, Ty, Tu, TI, TG, TF;
81
 
                    Tq = FNMS(T4, Tp, To);
82
 
                    TF = FMA(T1, Tp, TE);
83
 
                    {
84
 
                         E Tg, Tx, TD, Ts;
85
 
                         Tg = FNMS(Tc, Tf, Ta);
86
 
                         Tx = T6 * Tf;
87
 
                         TD = FNMS(T5, Tr, TC);
88
 
                         Ts = T2 * Tr;
89
 
                         Tk = Tg + Tj;
90
 
                         TB = Tj - Tg;
91
 
                         Ty = FMA(Tc, T9, Tx);
92
 
                         Tu = FMA(T5, Tt, Ts);
93
 
                         TI = TD + TF;
94
 
                         TG = TD - TF;
95
 
                    }
96
 
                    {
97
 
                         E Tz, TH, Tv, TA;
98
 
                         Tz = Tw - Ty;
99
 
                         TH = Tw + Ty;
100
 
                         Tv = Tq - Tu;
101
 
                         TA = Tu + Tq;
102
 
                         Rp[0] = KP500000000 * (TH + TI);
103
 
                         Rm[WS(rs, 1)] = KP500000000 * (TH - TI);
104
 
                         Rm[0] = KP500000000 * (Tz - TA);
105
 
                         Im[WS(rs, 1)] = KP500000000 * (Tv - Tk);
106
 
                         Ip[0] = KP500000000 * (Tk + Tv);
107
 
                         Im[0] = KP500000000 * (TG - TB);
108
 
                         Rp[WS(rs, 1)] = KP500000000 * (Tz + TA);
109
 
                         Ip[WS(rs, 1)] = KP500000000 * (TB + TG);
 
40
     {
 
41
          INT m;
 
42
          for (m = mb, W = W + ((mb - 1) * 4); m < me; m = m + 1, Rp = Rp + ms, Ip = Ip + ms, Rm = Rm - ms, Im = Im - ms, W = W + 4, MAKE_VOLATILE_STRIDE(rs)) {
 
43
               E T1, T5, T2, T4;
 
44
               T1 = W[0];
 
45
               T5 = W[3];
 
46
               T2 = W[2];
 
47
               T4 = W[1];
 
48
               {
 
49
                    E Tc, T6, Tp, Tj, Tw, Tt, T9, TE, To, TC, Ta, Tr, Tf, Tl, Tm;
 
50
                    {
 
51
                         E Th, Tb, T3, Ti;
 
52
                         Th = Ip[0];
 
53
                         Tb = T1 * T5;
 
54
                         T3 = T1 * T2;
 
55
                         Ti = Im[0];
 
56
                         Tl = Rm[0];
 
57
                         Tc = FNMS(T4, T2, Tb);
 
58
                         T6 = FMA(T4, T5, T3);
 
59
                         Tp = Th + Ti;
 
60
                         Tj = Th - Ti;
 
61
                         Tm = Rp[0];
 
62
                    }
 
63
                    {
 
64
                         E T7, T8, Td, Tn, Te;
 
65
                         T7 = Ip[WS(rs, 1)];
 
66
                         T8 = Im[WS(rs, 1)];
 
67
                         Td = Rp[WS(rs, 1)];
 
68
                         Tw = Tm + Tl;
 
69
                         Tn = Tl - Tm;
 
70
                         Tt = T7 + T8;
 
71
                         T9 = T7 - T8;
 
72
                         Te = Rm[WS(rs, 1)];
 
73
                         TE = T4 * Tn;
 
74
                         To = T1 * Tn;
 
75
                         TC = T2 * Tt;
 
76
                         Ta = T6 * T9;
 
77
                         Tr = Td - Te;
 
78
                         Tf = Td + Te;
 
79
                    }
 
80
                    {
 
81
                         E Tq, Tk, TB, Ty, Tu, TI, TG, TF;
 
82
                         Tq = FNMS(T4, Tp, To);
 
83
                         TF = FMA(T1, Tp, TE);
 
84
                         {
 
85
                              E Tg, Tx, TD, Ts;
 
86
                              Tg = FNMS(Tc, Tf, Ta);
 
87
                              Tx = T6 * Tf;
 
88
                              TD = FNMS(T5, Tr, TC);
 
89
                              Ts = T2 * Tr;
 
90
                              Tk = Tg + Tj;
 
91
                              TB = Tj - Tg;
 
92
                              Ty = FMA(Tc, T9, Tx);
 
93
                              Tu = FMA(T5, Tt, Ts);
 
94
                              TI = TD + TF;
 
95
                              TG = TD - TF;
 
96
                         }
 
97
                         {
 
98
                              E Tz, TH, Tv, TA;
 
99
                              Tz = Tw - Ty;
 
100
                              TH = Tw + Ty;
 
101
                              Tv = Tq - Tu;
 
102
                              TA = Tu + Tq;
 
103
                              Rp[0] = KP500000000 * (TH + TI);
 
104
                              Rm[WS(rs, 1)] = KP500000000 * (TH - TI);
 
105
                              Rm[0] = KP500000000 * (Tz - TA);
 
106
                              Im[WS(rs, 1)] = KP500000000 * (Tv - Tk);
 
107
                              Ip[0] = KP500000000 * (Tk + Tv);
 
108
                              Im[0] = KP500000000 * (TG - TB);
 
109
                              Rp[WS(rs, 1)] = KP500000000 * (Tz + TA);
 
110
                              Ip[WS(rs, 1)] = KP500000000 * (TB + TG);
 
111
                         }
110
112
                    }
111
113
               }
112
114
          }
126
128
}
127
129
#else                           /* HAVE_FMA */
128
130
 
129
 
/* Generated by: ../../../genfft/gen_hc2cdft -compact -variables 4 -pipeline-latency 4 -twiddle-log3 -precompute-twiddles -n 4 -dit -name hc2cfdft2_4 -include hc2cf.h */
 
131
/* Generated by: ../../../genfft/gen_hc2cdft.native -compact -variables 4 -pipeline-latency 4 -twiddle-log3 -precompute-twiddles -n 4 -dit -name hc2cfdft2_4 -include hc2cf.h */
130
132
 
131
133
/*
132
134
 * This function contains 32 FP additions, 24 FP multiplications,
138
140
static void hc2cfdft2_4(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms)
139
141
{
140
142
     DK(KP500000000, +0.500000000000000000000000000000000000000000000);
141
 
     INT m;
142
 
     for (m = mb, W = W + ((mb - 1) * 4); m < me; m = m + 1, Rp = Rp + ms, Ip = Ip + ms, Rm = Rm - ms, Im = Im - ms, W = W + 4, MAKE_VOLATILE_STRIDE(rs)) {
143
 
          E T1, T3, T2, T4, T5, T9;
144
 
          T1 = W[0];
145
 
          T3 = W[1];
146
 
          T2 = W[2];
147
 
          T4 = W[3];
148
 
          T5 = FMA(T1, T2, T3 * T4);
149
 
          T9 = FNMS(T3, T2, T1 * T4);
150
 
          {
151
 
               E Tg, Tr, Tm, Tx, Td, Tw, Tp, Ts;
152
 
               {
153
 
                    E Te, Tf, Tl, Ti, Tj, Tk;
154
 
                    Te = Ip[0];
155
 
                    Tf = Im[0];
156
 
                    Tl = Te + Tf;
157
 
                    Ti = Rm[0];
158
 
                    Tj = Rp[0];
159
 
                    Tk = Ti - Tj;
160
 
                    Tg = Te - Tf;
161
 
                    Tr = Tj + Ti;
162
 
                    Tm = FNMS(T3, Tl, T1 * Tk);
163
 
                    Tx = FMA(T3, Tk, T1 * Tl);
164
 
               }
165
 
               {
166
 
                    E T8, To, Tc, Tn;
167
 
                    {
168
 
                         E T6, T7, Ta, Tb;
169
 
                         T6 = Ip[WS(rs, 1)];
170
 
                         T7 = Im[WS(rs, 1)];
171
 
                         T8 = T6 - T7;
172
 
                         To = T6 + T7;
173
 
                         Ta = Rp[WS(rs, 1)];
174
 
                         Tb = Rm[WS(rs, 1)];
175
 
                         Tc = Ta + Tb;
176
 
                         Tn = Ta - Tb;
177
 
                    }
178
 
                    Td = FNMS(T9, Tc, T5 * T8);
179
 
                    Tw = FNMS(T4, Tn, T2 * To);
180
 
                    Tp = FMA(T2, Tn, T4 * To);
181
 
                    Ts = FMA(T5, Tc, T9 * T8);
182
 
               }
183
 
               {
184
 
                    E Th, Tq, Tz, TA;
185
 
                    Th = Td + Tg;
186
 
                    Tq = Tm - Tp;
187
 
                    Ip[0] = KP500000000 * (Th + Tq);
188
 
                    Im[WS(rs, 1)] = KP500000000 * (Tq - Th);
189
 
                    Tz = Tr + Ts;
190
 
                    TA = Tw + Tx;
191
 
                    Rm[WS(rs, 1)] = KP500000000 * (Tz - TA);
192
 
                    Rp[0] = KP500000000 * (Tz + TA);
193
 
               }
194
 
               {
195
 
                    E Tt, Tu, Tv, Ty;
196
 
                    Tt = Tr - Ts;
197
 
                    Tu = Tp + Tm;
198
 
                    Rm[0] = KP500000000 * (Tt - Tu);
199
 
                    Rp[WS(rs, 1)] = KP500000000 * (Tt + Tu);
200
 
                    Tv = Tg - Td;
201
 
                    Ty = Tw - Tx;
202
 
                    Ip[WS(rs, 1)] = KP500000000 * (Tv + Ty);
203
 
                    Im[0] = KP500000000 * (Ty - Tv);
 
143
     {
 
144
          INT m;
 
145
          for (m = mb, W = W + ((mb - 1) * 4); m < me; m = m + 1, Rp = Rp + ms, Ip = Ip + ms, Rm = Rm - ms, Im = Im - ms, W = W + 4, MAKE_VOLATILE_STRIDE(rs)) {
 
146
               E T1, T3, T2, T4, T5, T9;
 
147
               T1 = W[0];
 
148
               T3 = W[1];
 
149
               T2 = W[2];
 
150
               T4 = W[3];
 
151
               T5 = FMA(T1, T2, T3 * T4);
 
152
               T9 = FNMS(T3, T2, T1 * T4);
 
153
               {
 
154
                    E Tg, Tr, Tm, Tx, Td, Tw, Tp, Ts;
 
155
                    {
 
156
                         E Te, Tf, Tl, Ti, Tj, Tk;
 
157
                         Te = Ip[0];
 
158
                         Tf = Im[0];
 
159
                         Tl = Te + Tf;
 
160
                         Ti = Rm[0];
 
161
                         Tj = Rp[0];
 
162
                         Tk = Ti - Tj;
 
163
                         Tg = Te - Tf;
 
164
                         Tr = Tj + Ti;
 
165
                         Tm = FNMS(T3, Tl, T1 * Tk);
 
166
                         Tx = FMA(T3, Tk, T1 * Tl);
 
167
                    }
 
168
                    {
 
169
                         E T8, To, Tc, Tn;
 
170
                         {
 
171
                              E T6, T7, Ta, Tb;
 
172
                              T6 = Ip[WS(rs, 1)];
 
173
                              T7 = Im[WS(rs, 1)];
 
174
                              T8 = T6 - T7;
 
175
                              To = T6 + T7;
 
176
                              Ta = Rp[WS(rs, 1)];
 
177
                              Tb = Rm[WS(rs, 1)];
 
178
                              Tc = Ta + Tb;
 
179
                              Tn = Ta - Tb;
 
180
                         }
 
181
                         Td = FNMS(T9, Tc, T5 * T8);
 
182
                         Tw = FNMS(T4, Tn, T2 * To);
 
183
                         Tp = FMA(T2, Tn, T4 * To);
 
184
                         Ts = FMA(T5, Tc, T9 * T8);
 
185
                    }
 
186
                    {
 
187
                         E Th, Tq, Tz, TA;
 
188
                         Th = Td + Tg;
 
189
                         Tq = Tm - Tp;
 
190
                         Ip[0] = KP500000000 * (Th + Tq);
 
191
                         Im[WS(rs, 1)] = KP500000000 * (Tq - Th);
 
192
                         Tz = Tr + Ts;
 
193
                         TA = Tw + Tx;
 
194
                         Rm[WS(rs, 1)] = KP500000000 * (Tz - TA);
 
195
                         Rp[0] = KP500000000 * (Tz + TA);
 
196
                    }
 
197
                    {
 
198
                         E Tt, Tu, Tv, Ty;
 
199
                         Tt = Tr - Ts;
 
200
                         Tu = Tp + Tm;
 
201
                         Rm[0] = KP500000000 * (Tt - Tu);
 
202
                         Rp[WS(rs, 1)] = KP500000000 * (Tt + Tu);
 
203
                         Tv = Tg - Td;
 
204
                         Ty = Tw - Tx;
 
205
                         Ip[WS(rs, 1)] = KP500000000 * (Tv + Ty);
 
206
                         Im[0] = KP500000000 * (Ty - Tv);
 
207
                    }
204
208
               }
205
209
          }
206
210
     }