1
// -------------------------------------------------------------------------
3
// Copyright (C) 2005-2007 Fons Adriaensen <fons@kokkinizita.net>
5
// This program is free software; you can redistribute it and/or modify
6
// it under the terms of the GNU General Public License as published by
7
// the Free Software Foundation; either version 2 of the License, or
8
// (at your option) any later version.
10
// This program is distributed in the hope that it will be useful,
11
// but WITHOUT ANY WARRANTY; without even the implied warranty of
12
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
// GNU General Public License for more details.
15
// You should have received a copy of the GNU General Public License
16
// along with this program; if not, write to the Free Software
17
// Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
19
// -------------------------------------------------------------------------
22
#define _XOPEN_SOURCE 600 // for posix_memalign()
31
#ifdef USE_SSE_AND_3DN
32
extern "C" void mac_sse (fftwf_complex *A, fftwf_complex *B, fftwf_complex *S, int N);
33
extern "C" void mac_3dn (fftwf_complex *A, fftwf_complex *B, fftwf_complex *S, int N);
35
extern "C" void mac_sse (fftwf_complex *A, fftwf_complex *B, fftwf_complex *S, int N) {}
36
extern "C" void mac_3dn (fftwf_complex *A, fftwf_complex *B, fftwf_complex *S, int N) {}
40
Convdata::Convdata (size_t part, size_t size, unsigned int opts) :
41
_refc (0), _part (part), _opts (opts), _nact (0), _buff (0), _plan (0)
45
if ( (part < 0x000040)
47
|| (part & (part - 1))
49
|| (size > 0x1000 * part)) abort ();
51
#ifndef USE_SSE_AND_3DN
55
_npar = (size + part - 1) / part;
57
_fftd = new fftwf_complex * [_npar];
58
for (i = 0; i < _npar; i++) _fftd [i] = 0;
62
Convdata::~Convdata (void)
67
for (i = 0; i < _npar; i++) free (_fftd [i]);
72
void Convdata::prepare_part (unsigned int ipar, float gain, float *data, int step)
76
if (ipar >= _npar) abort ();
83
posix_memalign ((void **)(&_buff), 16, 2 * _part * sizeof (float));
84
memset (_buff + _part, 0, _part * sizeof (float));
85
_plan = fftwf_plan_dft_r2c_1d (2 * _part, _buff, _fftd [ipar], FFTW_ESTIMATE);
87
for (i = 0; i < _part; i++) _buff [i] = data [i * step] * gain;
90
posix_memalign ((void **)(_fftd + ipar), 16, (_part + 1) * sizeof (fftwf_complex));
93
fftwf_execute_dft_r2c (_plan, _buff, _fftd [ipar]);
94
if (_opts) swap (_fftd [ipar], _part);
96
else if (_fftd [ipar])
105
void Convdata::prepare_done (void)
110
fftwf_destroy_plan (_plan);
117
void Convdata::swap (fftwf_complex *p, size_t n)
137
Convolver::Convolver (size_t part, size_t size, unsigned int opts, unsigned int nip, unsigned int nop) :
138
_part (part), _opts (opts), _nip (nip), _nop (nop)
142
if ( (part < 0x000040)
144
|| (part & (part - 1))
146
|| (size > 4096 * part)
149
|| (nip * nop > 1024)) abort ();
151
#ifndef USE_SSE_AND_3DN
155
_npar = (size + part - 1) / part;
156
_conv = new Convdata *[_nip * _nop];
157
for (i = 0; i < _nip * _nop; i++) _conv [i] = 0;
158
posix_memalign ((void **)(&_ip_buff), 16, (_nip + 1) * _part * sizeof (float));
159
posix_memalign ((void **)(&_oA_buff), 16, 2 * _nop * _part * sizeof (float));
160
posix_memalign ((void **)(&_oB_buff), 16, 2 * _nop * _part * sizeof (float));
161
posix_memalign ((void **)(&_mac_data), 16, (_part + 1) * sizeof (fftwf_complex));
162
_fwd_data = new fftwf_complex * [_nip * _npar];
163
for (i = 0; i < _nip * _npar; i++)
165
posix_memalign ((void **)(_fwd_data + i), 16, (_part + 1) * sizeof (fftwf_complex));
167
_fwd_plan = fftwf_plan_dft_r2c_1d (2 * part, _ip_buff, _mac_data, FFTW_ESTIMATE);
168
_rev_plan = fftwf_plan_dft_c2r_1d (2 * part, _mac_data, _oA_buff, FFTW_ESTIMATE);
174
Convolver::~Convolver (void)
178
fftwf_destroy_plan (_fwd_plan);
179
fftwf_destroy_plan (_rev_plan);
180
for (i = 0; i < _nip * _npar; i++) free (_fwd_data [i]);
186
for (i = 0; i < _nip * _nop; i++)
188
if (_conv [i] && (_conv [i]->dec_refc () == 0)) delete _conv [i];
194
int Convolver::set_conv (unsigned int ip, unsigned int op, Convdata *C)
198
if ((ip >= _nip) || (op >= _nop)) return -1;
201
if (_conv [i] && (_conv [i]->dec_refc () == 0))
208
if (C->_part != _part) return -1;
216
void Convolver::reset ()
220
memset (_ip_buff, 0, (_nip + 1) * _part * sizeof (float));
221
memset (_oA_buff, 0, 2 * _nop * _part * sizeof (float));
222
memset (_oB_buff, 0, 2 * _nop * _part * sizeof (float));
223
for (i = 0; i < _nip * _npar; i++)
225
memset (_fwd_data [i], 0, (_part + 1) * sizeof (fftwf_complex));
233
void Convolver::process ()
235
unsigned int g, h, i, j;
237
fftwf_complex *q1, *q2;
240
if (_offs == 0) _offs = _npar;
243
p1 = _ip_buff + _nip * _part;
244
for (i = 0; i < _nip; i++)
246
memset (p1, 0, _part * sizeof (float));
248
q1 = _fwd_data [_offs + (_nip - 1 - i) * _npar];
249
fftwf_execute_dft_r2c (_fwd_plan, p1, q1);
250
if (_opts) Convdata::swap (q1, _part);
256
p2 = _oB_buff + _part;
261
p2 = _oA_buff + _part;
264
for (j = 0; j < _nop; j++)
266
memset (_mac_data, 0, (_part + 1) * sizeof (fftwf_complex));
267
for (i = 0; i < _nip; i++)
269
C = _conv [i + j * _nip];
272
for (h = 0; h < _npar; h++)
278
if (g >= _npar) g -= _npar;
279
q1 = _fwd_data [g + i * _npar];
280
if (_opts == CONV_OPT_SSE) mac_sse (q1, q2, _mac_data, _part >> 2);
281
else if (_opts == CONV_OPT_3DN) mac_3dn (q1, q2, _mac_data, _part >> 2);
282
else for (g = 0; g <= _part; g++)
284
_mac_data [g][0] += q1 [g][0] * q2 [g][0] - q1 [g][1] * q2 [g][1];
285
_mac_data [g][1] += q1 [g][0] * q2 [g][1] + q1 [g][1] * q2 [g][0];
292
if (_opts) Convdata::swap (_mac_data, _part);
293
fftwf_execute_dft_c2r (_rev_plan, _mac_data, p1);
294
for (g = 0; g < _part; g++) *p1++ += *p2++;