~siretart/ubuntu/utopic/blender/libav10

« back to all changes in this revision

Viewing changes to extern/fftw/rdft/codelets/hc2r/hb_12.c

  • Committer: Bazaar Package Importer
  • Author(s): Kevin Roy
  • Date: 2011-02-08 22:20:54 UTC
  • mfrom: (1.4.2 upstream)
  • mto: (14.2.6 sid) (1.5.1)
  • mto: This revision was merged to the branch mainline in revision 27.
  • Revision ID: james.westby@ubuntu.com-20110208222054-kk0gwa4bu8h5lyq4
Tags: upstream-2.56.1-beta-svn34076
ImportĀ upstreamĀ versionĀ 2.56.1-beta-svn34076

Show diffs side-by-side

added added

removed removed

Lines of Context:
1
 
/*
2
 
 * Copyright (c) 2003, 2006 Matteo Frigo
3
 
 * Copyright (c) 2003, 2006 Massachusetts Institute of Technology
4
 
 *
5
 
 * This program is free software; you can redistribute it and/or modify
6
 
 * it under the terms of the GNU General Public License as published by
7
 
 * the Free Software Foundation; either version 2 of the License, or
8
 
 * (at your option) any later version.
9
 
 *
10
 
 * This program is distributed in the hope that it will be useful,
11
 
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12
 
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13
 
 * GNU General Public License for more details.
14
 
 *
15
 
 * You should have received a copy of the GNU General Public License
16
 
 * along with this program; if not, write to the Free Software
17
 
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
18
 
 *
19
 
 */
20
 
 
21
 
/* This file was automatically generated --- DO NOT EDIT */
22
 
/* Generated on Sun Jul  2 16:31:20 EDT 2006 */
23
 
 
24
 
#include "codelet-rdft.h"
25
 
 
26
 
#ifdef HAVE_FMA
27
 
 
28
 
/* Generated by: ../../../genfft/gen_hc2hc -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -sign 1 -n 12 -dif -name hb_12 -include hb.h */
29
 
 
30
 
/*
31
 
 * This function contains 118 FP additions, 68 FP multiplications,
32
 
 * (or, 72 additions, 22 multiplications, 46 fused multiply/add),
33
 
 * 66 stack variables, and 48 memory accesses
34
 
 */
35
 
/*
36
 
 * Generator Id's : 
37
 
 * $Id: algsimp.ml,v 1.9 2006-02-12 23:34:12 athena Exp $
38
 
 * $Id: fft.ml,v 1.4 2006-01-05 03:04:27 stevenj Exp $
39
 
 * $Id: gen_hc2hc.ml,v 1.16 2006-02-12 23:34:12 athena Exp $
40
 
 */
41
 
 
42
 
#include "hb.h"
43
 
 
44
 
static const R *hb_12(R *rio, R *iio, const R *W, stride ios, INT m, INT dist)
45
 
{
46
 
     DK(KP866025403, +0.866025403784438646763723170752936183471402627);
47
 
     DK(KP500000000, +0.500000000000000000000000000000000000000000000);
48
 
     INT i;
49
 
     for (i = m - 2; i > 0; i = i - 2, rio = rio + dist, iio = iio - dist, W = W + 22, MAKE_VOLATILE_STRIDE(ios)) {
50
 
          E T18, T20, T21, T1b, T2a, T1s, T29, T1p, Tz, T11, TD, Tb, Tg, T23, T1f;
51
 
          E Tl, TN, TI, T1i, T24, T1z, T2d, T1w, T2c;
52
 
          {
53
 
               E T5, Ta, Ty, Tt;
54
 
               {
55
 
                    E T1, Tp, T6, Tu, T7, T1o, T4, T17, Ts, T8, Tv, Tw;
56
 
                    T1 = rio[0];
57
 
                    Tp = iio[0];
58
 
                    T6 = iio[-WS(ios, 6)];
59
 
                    Tu = rio[WS(ios, 6)];
60
 
                    {
61
 
                         E T2, T3, Tq, Tr;
62
 
                         T2 = rio[WS(ios, 4)];
63
 
                         T3 = iio[-WS(ios, 8)];
64
 
                         Tq = iio[-WS(ios, 4)];
65
 
                         Tr = rio[WS(ios, 8)];
66
 
                         T7 = iio[-WS(ios, 10)];
67
 
                         T1o = T2 - T3;
68
 
                         T4 = T2 + T3;
69
 
                         T17 = Tr + Tq;
70
 
                         Ts = Tq - Tr;
71
 
                         T8 = rio[WS(ios, 2)];
72
 
                         Tv = rio[WS(ios, 10)];
73
 
                         Tw = iio[-WS(ios, 2)];
74
 
                    }
75
 
                    {
76
 
                         E T1r, T1a, T19, T1q, T9, Tx, T16, T1n;
77
 
                         T5 = T1 + T4;
78
 
                         T16 = FNMS(KP500000000, T4, T1);
79
 
                         T1r = T7 - T8;
80
 
                         T9 = T7 + T8;
81
 
                         T1a = Tv + Tw;
82
 
                         Tx = Tv - Tw;
83
 
                         T18 = FMA(KP866025403, T17, T16);
84
 
                         T20 = FNMS(KP866025403, T17, T16);
85
 
                         T19 = FNMS(KP500000000, T9, T6);
86
 
                         Ta = T6 + T9;
87
 
                         Ty = Tu + Tx;
88
 
                         T1q = FNMS(KP500000000, Tx, Tu);
89
 
                         T1n = FNMS(KP500000000, Ts, Tp);
90
 
                         Tt = Tp + Ts;
91
 
                         T21 = FMA(KP866025403, T1a, T19);
92
 
                         T1b = FNMS(KP866025403, T1a, T19);
93
 
                         T2a = FNMS(KP866025403, T1r, T1q);
94
 
                         T1s = FMA(KP866025403, T1r, T1q);
95
 
                         T29 = FMA(KP866025403, T1o, T1n);
96
 
                         T1p = FNMS(KP866025403, T1o, T1n);
97
 
                    }
98
 
               }
99
 
               {
100
 
                    E Tc, TE, Th, TM, Ti, Tf, T1v, TH, T1e, Tj, TJ, TK;
101
 
                    Tc = rio[WS(ios, 3)];
102
 
                    Tz = Tt + Ty;
103
 
                    T11 = Tt - Ty;
104
 
                    TE = iio[-WS(ios, 3)];
105
 
                    TD = T5 - Ta;
106
 
                    Tb = T5 + Ta;
107
 
                    Th = iio[-WS(ios, 9)];
108
 
                    TM = rio[WS(ios, 9)];
109
 
                    {
110
 
                         E Td, Te, TF, TG;
111
 
                         Td = iio[-WS(ios, 7)];
112
 
                         Te = iio[-WS(ios, 11)];
113
 
                         TF = rio[WS(ios, 7)];
114
 
                         TG = rio[WS(ios, 11)];
115
 
                         Ti = rio[WS(ios, 1)];
116
 
                         Tf = Td + Te;
117
 
                         T1v = Td - Te;
118
 
                         TH = TF + TG;
119
 
                         T1e = TF - TG;
120
 
                         Tj = rio[WS(ios, 5)];
121
 
                         TJ = iio[-WS(ios, 5)];
122
 
                         TK = iio[-WS(ios, 1)];
123
 
                    }
124
 
                    {
125
 
                         E T1y, T1h, T1g, T1x, Tk, TL, T1d, T1u;
126
 
                         T1d = FNMS(KP500000000, Tf, Tc);
127
 
                         Tg = Tc + Tf;
128
 
                         Tk = Ti + Tj;
129
 
                         T1y = Ti - Tj;
130
 
                         TL = TJ + TK;
131
 
                         T1h = TJ - TK;
132
 
                         T23 = FMA(KP866025403, T1e, T1d);
133
 
                         T1f = FNMS(KP866025403, T1e, T1d);
134
 
                         Tl = Th + Tk;
135
 
                         T1g = FNMS(KP500000000, Tk, Th);
136
 
                         T1x = FMA(KP500000000, TL, TM);
137
 
                         TN = TL - TM;
138
 
                         TI = TE - TH;
139
 
                         T1u = FMA(KP500000000, TH, TE);
140
 
                         T1i = FNMS(KP866025403, T1h, T1g);
141
 
                         T24 = FMA(KP866025403, T1h, T1g);
142
 
                         T1z = FMA(KP866025403, T1y, T1x);
143
 
                         T2d = FNMS(KP866025403, T1y, T1x);
144
 
                         T1w = FNMS(KP866025403, T1v, T1u);
145
 
                         T2c = FMA(KP866025403, T1v, T1u);
146
 
                    }
147
 
               }
148
 
          }
149
 
          {
150
 
               E TY, T13, TX, T10;
151
 
               {
152
 
                    E Tn, T12, TC, Tm, To, TS, TP, TO;
153
 
                    Tn = W[16];
154
 
                    T12 = TI + TN;
155
 
                    TO = TI - TN;
156
 
                    TC = W[17];
157
 
                    Tm = Tg + Tl;
158
 
                    To = Tg - Tl;
159
 
                    TS = TD + TO;
160
 
                    TP = TD - TO;
161
 
                    {
162
 
                         E TV, TU, TW, TT;
163
 
                         {
164
 
                              E TB, TR, TA, TQ;
165
 
                              TV = Tz - To;
166
 
                              TA = To + Tz;
167
 
                              rio[0] = Tb + Tm;
168
 
                              TQ = Tn * TP;
169
 
                              TB = Tn * TA;
170
 
                              TR = W[4];
171
 
                              rio[WS(ios, 9)] = FNMS(TC, TA, TQ);
172
 
                              TU = W[5];
173
 
                              iio[-WS(ios, 2)] = FMA(TC, TP, TB);
174
 
                              TW = TR * TV;
175
 
                              TT = TR * TS;
176
 
                         }
177
 
                         iio[-WS(ios, 8)] = FMA(TU, TS, TW);
178
 
                         rio[WS(ios, 3)] = FNMS(TU, TV, TT);
179
 
                         TY = Tb - Tm;
180
 
                         T13 = T11 - T12;
181
 
                         TX = W[10];
182
 
                         T10 = W[11];
183
 
                         iio[-WS(ios, 11)] = T11 + T12;
184
 
                    }
185
 
               }
186
 
               {
187
 
                    E T1c, T1A, T1t, T1j, T22, T2e, T2b, T2B, T2q, T25, T2s, T2y, T2C, T2z, T2w;
188
 
                    E T2A;
189
 
                    {
190
 
                         E T1X, T1M, T1O, T1U, T1Y, T1V, T1S, T1W, T1P, T1Q;
191
 
                         {
192
 
                              E T1K, TZ, T14, T1L;
193
 
                              T1c = T18 + T1b;
194
 
                              T1K = T18 - T1b;
195
 
                              TZ = TX * TY;
196
 
                              T14 = T10 * TY;
197
 
                              T1L = T1w + T1z;
198
 
                              T1A = T1w - T1z;
199
 
                              T1t = T1p - T1s;
200
 
                              T1P = T1p + T1s;
201
 
                              rio[WS(ios, 6)] = FNMS(T10, T13, TZ);
202
 
                              iio[-WS(ios, 5)] = FMA(TX, T13, T14);
203
 
                              T1X = T1K + T1L;
204
 
                              T1M = T1K - T1L;
205
 
                              T1Q = T1f - T1i;
206
 
                              T1j = T1f + T1i;
207
 
                         }
208
 
                         {
209
 
                              E T1J, T1T, T1R, T1N;
210
 
                              T1J = W[8];
211
 
                              T1O = W[9];
212
 
                              T1T = W[20];
213
 
                              T1U = T1P - T1Q;
214
 
                              T1R = T1P + T1Q;
215
 
                              T1N = T1J * T1M;
216
 
                              T1Y = T1T * T1X;
217
 
                              T1V = T1T * T1U;
218
 
                              T1S = T1J * T1R;
219
 
                              rio[WS(ios, 5)] = FNMS(T1O, T1R, T1N);
220
 
                              T1W = W[21];
221
 
                         }
222
 
                         {
223
 
                              E T2t, T2u, T2o, T2p;
224
 
                              T2o = T20 - T21;
225
 
                              T22 = T20 + T21;
226
 
                              iio[-WS(ios, 6)] = FMA(T1O, T1M, T1S);
227
 
                              T2p = T2c + T2d;
228
 
                              T2e = T2c - T2d;
229
 
                              rio[WS(ios, 11)] = FNMS(T1W, T1U, T1Y);
230
 
                              iio[0] = FMA(T1W, T1X, T1V);
231
 
                              T2b = T29 - T2a;
232
 
                              T2t = T29 + T2a;
233
 
                              T2B = T2o + T2p;
234
 
                              T2q = T2o - T2p;
235
 
                              T2u = T23 - T24;
236
 
                              T25 = T23 + T24;
237
 
                              {
238
 
                                   E T2n, T2x, T2v, T2r;
239
 
                                   T2n = W[0];
240
 
                                   T2s = W[1];
241
 
                                   T2x = W[12];
242
 
                                   T2y = T2t - T2u;
243
 
                                   T2v = T2t + T2u;
244
 
                                   T2r = T2n * T2q;
245
 
                                   T2C = T2x * T2B;
246
 
                                   T2z = T2x * T2y;
247
 
                                   T2w = T2n * T2v;
248
 
                                   rio[WS(ios, 1)] = FNMS(T2s, T2v, T2r);
249
 
                                   T2A = W[13];
250
 
                              }
251
 
                         }
252
 
                    }
253
 
                    {
254
 
                         E T2i, T2h, T2l, T2j, T2k, T26;
255
 
                         iio[-WS(ios, 10)] = FMA(T2s, T2q, T2w);
256
 
                         rio[WS(ios, 7)] = FNMS(T2A, T2y, T2C);
257
 
                         iio[-WS(ios, 4)] = FMA(T2A, T2B, T2z);
258
 
                         T2i = T22 + T25;
259
 
                         T26 = T22 - T25;
260
 
                         {
261
 
                              E T1Z, T28, T2f, T27, T2g;
262
 
                              T1Z = W[18];
263
 
                              T28 = W[19];
264
 
                              T2h = W[6];
265
 
                              T2l = T2b + T2e;
266
 
                              T2f = T2b - T2e;
267
 
                              T27 = T1Z * T26;
268
 
                              T2g = T28 * T26;
269
 
                              T2j = T2h * T2i;
270
 
                              T2k = W[7];
271
 
                              rio[WS(ios, 10)] = FNMS(T28, T2f, T27);
272
 
                              iio[-WS(ios, 1)] = FMA(T1Z, T2f, T2g);
273
 
                         }
274
 
                         {
275
 
                              E T1k, T1E, T1H, T1B, T2m, T15, T1m;
276
 
                              rio[WS(ios, 4)] = FNMS(T2k, T2l, T2j);
277
 
                              T2m = T2k * T2i;
278
 
                              iio[-WS(ios, 7)] = FMA(T2h, T2l, T2m);
279
 
                              T1k = T1c - T1j;
280
 
                              T1E = T1c + T1j;
281
 
                              T1H = T1t + T1A;
282
 
                              T1B = T1t - T1A;
283
 
                              T15 = W[2];
284
 
                              T1m = W[3];
285
 
                              {
286
 
                                   E T1D, T1G, T1l, T1C, T1F, T1I;
287
 
                                   T1D = W[14];
288
 
                                   T1G = W[15];
289
 
                                   T1l = T15 * T1k;
290
 
                                   T1C = T1m * T1k;
291
 
                                   T1F = T1D * T1E;
292
 
                                   T1I = T1G * T1E;
293
 
                                   rio[WS(ios, 2)] = FNMS(T1m, T1B, T1l);
294
 
                                   iio[-WS(ios, 9)] = FMA(T15, T1B, T1C);
295
 
                                   rio[WS(ios, 8)] = FNMS(T1G, T1H, T1F);
296
 
                                   iio[-WS(ios, 3)] = FMA(T1D, T1H, T1I);
297
 
                              }
298
 
                         }
299
 
                    }
300
 
               }
301
 
          }
302
 
     }
303
 
     return W;
304
 
}
305
 
 
306
 
static const tw_instr twinstr[] = {
307
 
     {TW_FULL, 0, 12},
308
 
     {TW_NEXT, 1, 0}
309
 
};
310
 
 
311
 
static const hc2hc_desc desc = { 12, "hb_12", twinstr, &GENUS, {72, 22, 46, 0}, 0, 0, 0 };
312
 
 
313
 
void X(codelet_hb_12) (planner *p) {
314
 
     X(khc2hc_register) (p, hb_12, &desc);
315
 
}
316
 
#else                           /* HAVE_FMA */
317
 
 
318
 
/* Generated by: ../../../genfft/gen_hc2hc -compact -variables 4 -pipeline-latency 4 -sign 1 -n 12 -dif -name hb_12 -include hb.h */
319
 
 
320
 
/*
321
 
 * This function contains 118 FP additions, 60 FP multiplications,
322
 
 * (or, 88 additions, 30 multiplications, 30 fused multiply/add),
323
 
 * 39 stack variables, and 48 memory accesses
324
 
 */
325
 
/*
326
 
 * Generator Id's : 
327
 
 * $Id: algsimp.ml,v 1.9 2006-02-12 23:34:12 athena Exp $
328
 
 * $Id: fft.ml,v 1.4 2006-01-05 03:04:27 stevenj Exp $
329
 
 * $Id: gen_hc2hc.ml,v 1.16 2006-02-12 23:34:12 athena Exp $
330
 
 */
331
 
 
332
 
#include "hb.h"
333
 
 
334
 
static const R *hb_12(R *rio, R *iio, const R *W, stride ios, INT m, INT dist)
335
 
{
336
 
     DK(KP500000000, +0.500000000000000000000000000000000000000000000);
337
 
     DK(KP866025403, +0.866025403784438646763723170752936183471402627);
338
 
     INT i;
339
 
     for (i = m - 2; i > 0; i = i - 2, rio = rio + dist, iio = iio - dist, W = W + 22, MAKE_VOLATILE_STRIDE(ios)) {
340
 
          E T5, Tt, T12, T1M, T1i, T1U, Tl, TM, T1c, T1Y, T1s, T1Q, Ta, Ty, T15;
341
 
          E T1N, T1l, T1V, Tg, TH, T19, T1X, T1p, T1P;
342
 
          {
343
 
               E T1, Tp, T4, T1g, Ts, T11, T10, T1h;
344
 
               T1 = rio[0];
345
 
               Tp = iio[0];
346
 
               {
347
 
                    E T2, T3, Tq, Tr;
348
 
                    T2 = rio[WS(ios, 4)];
349
 
                    T3 = iio[-WS(ios, 8)];
350
 
                    T4 = T2 + T3;
351
 
                    T1g = KP866025403 * (T2 - T3);
352
 
                    Tq = rio[WS(ios, 8)];
353
 
                    Tr = iio[-WS(ios, 4)];
354
 
                    Ts = Tq - Tr;
355
 
                    T11 = KP866025403 * (Tq + Tr);
356
 
               }
357
 
               T5 = T1 + T4;
358
 
               Tt = Tp - Ts;
359
 
               T10 = FNMS(KP500000000, T4, T1);
360
 
               T12 = T10 - T11;
361
 
               T1M = T10 + T11;
362
 
               T1h = FMA(KP500000000, Ts, Tp);
363
 
               T1i = T1g + T1h;
364
 
               T1U = T1h - T1g;
365
 
          }
366
 
          {
367
 
               E Th, TL, Tk, T1a, TK, T1r, T1b, T1q;
368
 
               Th = iio[-WS(ios, 9)];
369
 
               TL = rio[WS(ios, 9)];
370
 
               {
371
 
                    E Ti, Tj, TI, TJ;
372
 
                    Ti = rio[WS(ios, 1)];
373
 
                    Tj = rio[WS(ios, 5)];
374
 
                    Tk = Ti + Tj;
375
 
                    T1a = KP866025403 * (Ti - Tj);
376
 
                    TI = iio[-WS(ios, 5)];
377
 
                    TJ = iio[-WS(ios, 1)];
378
 
                    TK = TI + TJ;
379
 
                    T1r = KP866025403 * (TI - TJ);
380
 
               }
381
 
               Tl = Th + Tk;
382
 
               TM = TK - TL;
383
 
               T1b = FMA(KP500000000, TK, TL);
384
 
               T1c = T1a - T1b;
385
 
               T1Y = T1a + T1b;
386
 
               T1q = FNMS(KP500000000, Tk, Th);
387
 
               T1s = T1q + T1r;
388
 
               T1Q = T1q - T1r;
389
 
          }
390
 
          {
391
 
               E T6, Tx, T9, T1j, Tw, T14, T13, T1k;
392
 
               T6 = iio[-WS(ios, 6)];
393
 
               Tx = rio[WS(ios, 6)];
394
 
               {
395
 
                    E T7, T8, Tu, Tv;
396
 
                    T7 = iio[-WS(ios, 10)];
397
 
                    T8 = rio[WS(ios, 2)];
398
 
                    T9 = T7 + T8;
399
 
                    T1j = KP866025403 * (T7 - T8);
400
 
                    Tu = rio[WS(ios, 10)];
401
 
                    Tv = iio[-WS(ios, 2)];
402
 
                    Tw = Tu - Tv;
403
 
                    T14 = KP866025403 * (Tu + Tv);
404
 
               }
405
 
               Ta = T6 + T9;
406
 
               Ty = Tw + Tx;
407
 
               T13 = FNMS(KP500000000, T9, T6);
408
 
               T15 = T13 + T14;
409
 
               T1N = T13 - T14;
410
 
               T1k = FMS(KP500000000, Tw, Tx);
411
 
               T1l = T1j + T1k;
412
 
               T1V = T1k - T1j;
413
 
          }
414
 
          {
415
 
               E Tc, TD, Tf, T17, TG, T1o, T18, T1n;
416
 
               Tc = rio[WS(ios, 3)];
417
 
               TD = iio[-WS(ios, 3)];
418
 
               {
419
 
                    E Td, Te, TE, TF;
420
 
                    Td = iio[-WS(ios, 7)];
421
 
                    Te = iio[-WS(ios, 11)];
422
 
                    Tf = Td + Te;
423
 
                    T17 = KP866025403 * (Td - Te);
424
 
                    TE = rio[WS(ios, 7)];
425
 
                    TF = rio[WS(ios, 11)];
426
 
                    TG = TE + TF;
427
 
                    T1o = KP866025403 * (TE - TF);
428
 
               }
429
 
               Tg = Tc + Tf;
430
 
               TH = TD - TG;
431
 
               T18 = FMA(KP500000000, TG, TD);
432
 
               T19 = T17 + T18;
433
 
               T1X = T18 - T17;
434
 
               T1n = FNMS(KP500000000, Tf, Tc);
435
 
               T1p = T1n + T1o;
436
 
               T1P = T1n - T1o;
437
 
          }
438
 
          {
439
 
               E Tb, Tm, TU, TW, TX, TY, TT, TV;
440
 
               Tb = T5 + Ta;
441
 
               Tm = Tg + Tl;
442
 
               TU = Tb - Tm;
443
 
               TW = Tt - Ty;
444
 
               TX = TH + TM;
445
 
               TY = TW - TX;
446
 
               rio[0] = Tb + Tm;
447
 
               iio[-WS(ios, 11)] = TW + TX;
448
 
               TT = W[10];
449
 
               TV = W[11];
450
 
               rio[WS(ios, 6)] = FNMS(TV, TY, TT * TU);
451
 
               iio[-WS(ios, 5)] = FMA(TV, TU, TT * TY);
452
 
          }
453
 
          {
454
 
               E T28, T2g, T2c, T2e;
455
 
               {
456
 
                    E T26, T27, T2a, T2b;
457
 
                    T26 = T1M - T1N;
458
 
                    T27 = T1X + T1Y;
459
 
                    T28 = T26 - T27;
460
 
                    T2g = T26 + T27;
461
 
                    T2a = T1U - T1V;
462
 
                    T2b = T1P - T1Q;
463
 
                    T2c = T2a + T2b;
464
 
                    T2e = T2a - T2b;
465
 
               }
466
 
               {
467
 
                    E T25, T29, T2d, T2f;
468
 
                    T25 = W[8];
469
 
                    T29 = W[9];
470
 
                    rio[WS(ios, 5)] = FNMS(T29, T2c, T25 * T28);
471
 
                    iio[-WS(ios, 6)] = FMA(T25, T2c, T29 * T28);
472
 
                    T2d = W[20];
473
 
                    T2f = W[21];
474
 
                    iio[0] = FMA(T2d, T2e, T2f * T2g);
475
 
                    rio[WS(ios, 11)] = FNMS(T2f, T2e, T2d * T2g);
476
 
               }
477
 
          }
478
 
          {
479
 
               E TA, TS, TO, TQ;
480
 
               {
481
 
                    E To, Tz, TC, TN;
482
 
                    To = Tg - Tl;
483
 
                    Tz = Tt + Ty;
484
 
                    TA = To + Tz;
485
 
                    TS = Tz - To;
486
 
                    TC = T5 - Ta;
487
 
                    TN = TH - TM;
488
 
                    TO = TC - TN;
489
 
                    TQ = TC + TN;
490
 
               }
491
 
               {
492
 
                    E Tn, TB, TP, TR;
493
 
                    Tn = W[16];
494
 
                    TB = W[17];
495
 
                    iio[-WS(ios, 2)] = FMA(Tn, TA, TB * TO);
496
 
                    rio[WS(ios, 9)] = FNMS(TB, TA, Tn * TO);
497
 
                    TP = W[4];
498
 
                    TR = W[5];
499
 
                    rio[WS(ios, 3)] = FNMS(TR, TS, TP * TQ);
500
 
                    iio[-WS(ios, 8)] = FMA(TP, TS, TR * TQ);
501
 
               }
502
 
          }
503
 
          {
504
 
               E T1S, T22, T20, T24;
505
 
               {
506
 
                    E T1O, T1R, T1W, T1Z;
507
 
                    T1O = T1M + T1N;
508
 
                    T1R = T1P + T1Q;
509
 
                    T1S = T1O - T1R;
510
 
                    T22 = T1O + T1R;
511
 
                    T1W = T1U + T1V;
512
 
                    T1Z = T1X - T1Y;
513
 
                    T20 = T1W - T1Z;
514
 
                    T24 = T1W + T1Z;
515
 
               }
516
 
               {
517
 
                    E T1L, T1T, T21, T23;
518
 
                    T1L = W[2];
519
 
                    T1T = W[3];
520
 
                    rio[WS(ios, 2)] = FNMS(T1T, T20, T1L * T1S);
521
 
                    iio[-WS(ios, 9)] = FMA(T1T, T1S, T1L * T20);
522
 
                    T21 = W[14];
523
 
                    T23 = W[15];
524
 
                    rio[WS(ios, 8)] = FNMS(T23, T24, T21 * T22);
525
 
                    iio[-WS(ios, 3)] = FMA(T23, T22, T21 * T24);
526
 
               }
527
 
          }
528
 
          {
529
 
               E T1C, T1I, T1G, T1K;
530
 
               {
531
 
                    E T1A, T1B, T1E, T1F;
532
 
                    T1A = T12 + T15;
533
 
                    T1B = T1p + T1s;
534
 
                    T1C = T1A - T1B;
535
 
                    T1I = T1A + T1B;
536
 
                    T1E = T1i + T1l;
537
 
                    T1F = T19 + T1c;
538
 
                    T1G = T1E - T1F;
539
 
                    T1K = T1E + T1F;
540
 
               }
541
 
               {
542
 
                    E T1z, T1D, T1H, T1J;
543
 
                    T1z = W[18];
544
 
                    T1D = W[19];
545
 
                    rio[WS(ios, 10)] = FNMS(T1D, T1G, T1z * T1C);
546
 
                    iio[-WS(ios, 1)] = FMA(T1D, T1C, T1z * T1G);
547
 
                    T1H = W[6];
548
 
                    T1J = W[7];
549
 
                    rio[WS(ios, 4)] = FNMS(T1J, T1K, T1H * T1I);
550
 
                    iio[-WS(ios, 7)] = FMA(T1J, T1I, T1H * T1K);
551
 
               }
552
 
          }
553
 
          {
554
 
               E T1e, T1y, T1u, T1w;
555
 
               {
556
 
                    E T16, T1d, T1m, T1t;
557
 
                    T16 = T12 - T15;
558
 
                    T1d = T19 - T1c;
559
 
                    T1e = T16 - T1d;
560
 
                    T1y = T16 + T1d;
561
 
                    T1m = T1i - T1l;
562
 
                    T1t = T1p - T1s;
563
 
                    T1u = T1m + T1t;
564
 
                    T1w = T1m - T1t;
565
 
               }
566
 
               {
567
 
                    E TZ, T1f, T1v, T1x;
568
 
                    TZ = W[0];
569
 
                    T1f = W[1];
570
 
                    rio[WS(ios, 1)] = FNMS(T1f, T1u, TZ * T1e);
571
 
                    iio[-WS(ios, 10)] = FMA(TZ, T1u, T1f * T1e);
572
 
                    T1v = W[12];
573
 
                    T1x = W[13];
574
 
                    iio[-WS(ios, 4)] = FMA(T1v, T1w, T1x * T1y);
575
 
                    rio[WS(ios, 7)] = FNMS(T1x, T1w, T1v * T1y);
576
 
               }
577
 
          }
578
 
     }
579
 
     return W;
580
 
}
581
 
 
582
 
static const tw_instr twinstr[] = {
583
 
     {TW_FULL, 0, 12},
584
 
     {TW_NEXT, 1, 0}
585
 
};
586
 
 
587
 
static const hc2hc_desc desc = { 12, "hb_12", twinstr, &GENUS, {88, 30, 30, 0}, 0, 0, 0 };
588
 
 
589
 
void X(codelet_hb_12) (planner *p) {
590
 
     X(khc2hc_register) (p, hb_12, &desc);
591
 
}
592
 
#endif                          /* HAVE_FMA */