~ubuntu-branches/ubuntu/vivid/soundscaperenderer/vivid

« back to all changes in this revision

Viewing changes to apf/apf/convolver.h

Committer: Package Import Robot
Author(s): IOhannes m zmölnig (Debian/GNU)
Date: 2014-05-08 16:58:09 UTC
Revision ID: package-import@ubuntu.com-20140508165809-7tz9dhu5pvo5wy25

Tags: upstream-0.4.1~dfsg

Import upstream version 0.4.1~dfsg

files added:

.gitignore

AUTHORS

COPYING

INSTALL

Makefile.am

NEWS

README

apf/AUTHORS

apf/COPYING

apf/NEWS

apf/README

apf/apf

apf/apf/biquad.h

apf/apf/blockdelayline.h

apf/apf/combine_channels.h

apf/apf/commandqueue.h

apf/apf/container.h

apf/apf/convolver.h

apf/apf/denormalprevention.h

apf/apf/dummy_thread_policy.h

apf/apf/fftwtools.h

apf/apf/iterator.h

apf/apf/jack_policy.h

apf/apf/jackclient.h

apf/apf/lockfreefifo.h

apf/apf/math.h

apf/apf/mextools.h

apf/apf/mimoprocessor.h

apf/apf/mimoprocessor_file_io.h

apf/apf/misc.h

apf/apf/parameter_map.h

apf/apf/pointer_policy.h

apf/apf/portaudio_policy.h

apf/apf/posix_thread_policy.h

apf/apf/rtlist.h

apf/apf/shareddata.h

apf/apf/sndfiletools.h

apf/apf/stopwatch.h

apf/apf/stringtools.h

apf/doc

apf/doc/Doxyfile

apf/doc/documentation.cpp

apf/doc/mimoprocessor.cpp

apf/examples

apf/examples/Makefile

apf/examples/README

apf/examples/audiofile_simpleprocessor.cpp

apf/examples/dummy_example.cpp

apf/examples/flext_simpleprocessor.cpp

apf/examples/jack_change_volume.cpp

apf/examples/jack_connections.cpp

apf/examples/jack_convolver.cpp

apf/examples/jack_dynamic_inputs.cpp

apf/examples/jack_dynamic_outputs.cpp

apf/examples/jack_matrix.cpp

apf/examples/jack_minimal.cpp

apf/examples/jack_query_thread.cpp

apf/examples/jack_simpleprocessor.cpp

apf/examples/mex_simpleprocessor.cpp

apf/examples/package.txt

apf/examples/portaudio_simpleprocessor.cpp

apf/examples/simpleprocessor.h

apf/examples/startpd.sh

apf/examples/test_simpleprocessor.pd

apf/misc

apf/misc/Makefile.dependencies

apf/performance_tests

apf/performance_tests/Makefile

apf/performance_tests/biquad_count_denormals.cpp

apf/performance_tests/biquad_denormals.cpp

apf/performance_tests/crossfade.cpp

apf/performance_tests/interpolation.cpp

apf/unit_tests

apf/unit_tests/Makefile

apf/unit_tests/catch

apf/unit_tests/catch/README

apf/unit_tests/catch/catch.hpp

apf/unit_tests/iterator_test_macros.h

apf/unit_tests/main.cpp

apf/unit_tests/test_accumulating_iterator.cpp

apf/unit_tests/test_biquad.cpp

apf/unit_tests/test_blockdelayline.cpp

apf/unit_tests/test_cast_iterator.cpp

apf/unit_tests/test_circular_iterator.cpp

apf/unit_tests/test_combine_channels.cpp

apf/unit_tests/test_container.cpp

apf/unit_tests/test_convolver.cpp

apf/unit_tests/test_discard_iterator.cpp

apf/unit_tests/test_dual_iterator.cpp

apf/unit_tests/test_fftwtools.cpp

apf/unit_tests/test_index_iterator.cpp

apf/unit_tests/test_iterator.cpp

apf/unit_tests/test_iterator_combinations.cpp

apf/unit_tests/test_math.cpp

apf/unit_tests/test_mimoprocessor.cpp

apf/unit_tests/test_misc.cpp

apf/unit_tests/test_stride_iterator.cpp

apf/unit_tests/test_stringtools.cpp

apf/unit_tests/test_transform_iterator.cpp

apf/unit_tests/test_trivial_iterator.cpp

autogen.sh

cleanse.sh

configure.ac

data

data/MacOSX

data/MacOSX/.background

data/MacOSX/.background/background.png

data/MacOSX/DMG-Layout.applescript.in

data/MacOSX/Getting-Started.txt

data/MacOSX/Info.plist.in

data/MacOSX/Jack.webloc

data/MacOSX/Makefile.am

data/MacOSX/SSRIcon.icns

data/MacOSX/dylibbundler

data/MacOSX/dylibbundler/dylibbundler.png

data/MacOSX/dylibbundler/index.html

data/MacOSX/dylibbundler/maclib.jpg

data/MacOSX/dylibbundler/makefile

data/MacOSX/dylibbundler/src

data/MacOSX/dylibbundler/src/Dependency.cpp

data/MacOSX/dylibbundler/src/Dependency.h

data/MacOSX/dylibbundler/src/DylibBundler.cpp

data/MacOSX/dylibbundler/src/DylibBundler.h

data/MacOSX/dylibbundler/src/Settings.cpp

data/MacOSX/dylibbundler/src/Settings.h

data/MacOSX/dylibbundler/src/Utils.cpp

data/MacOSX/dylibbundler/src/Utils.h

data/MacOSX/dylibbundler/src/main.cpp

data/MacOSX/qt.conf

data/MacOSX/run-ssr.scpt

data/MacOSX/run-ssr.sh

data/Makefile.am

data/asdf.xsd

data/images

data/images/listener.png

data/images/listener_background.png

data/images/listener_shadow.png

data/images/pause_button.png

data/images/pause_button_pressed.png

data/images/play_button.png

data/images/play_button_pressed.png

data/images/processing_button.png

data/images/processing_button_pressed.png

data/images/scene_menu_item.png

data/images/scene_menu_item_selected.png

data/images/skip_back_button.png

data/images/skip_back_button_pressed.png

data/images/source_shadow.png

data/images/ssr_logo.png

data/images/ssr_logo_large.png

data/impulse_responses

data/impulse_responses/hrirs

data/impulse_responses/hrirs/hrirs_fabian.wav

data/impulse_responses/wfs_prefilters

data/impulse_responses/wfs_prefilters/wfs_prefilter_100_1300_44100.wav

data/impulse_responses/wfs_prefilters/wfs_prefilter_100_1300_48000.wav

data/impulse_responses/wfs_prefilters/wfs_prefilter_100_1800_44100.wav

data/impulse_responses/wfs_prefilters/wfs_prefilter_100_1800_48000.wav

data/impulse_responses/wfs_prefilters/wfs_prefilter_120_1500_44100.wav

data/local_ssr.sh

data/matlab_scripts

data/matlab_scripts/make_wfs_prefilter.m

data/matlab_scripts/prepare_hrirs_kemar.m

data/reproduction_setups

data/reproduction_setups/2.0.asd

data/reproduction_setups/2.1.asd

data/reproduction_setups/5.1.asd

data/reproduction_setups/asdf2html.xsl

data/reproduction_setups/circle.asd

data/reproduction_setups/loudspeaker_setup_with_nearly_all_features.asd

data/reproduction_setups/rostock_horizontal.asd

data/reproduction_setups/rounded_rectangle.asd

data/scenes

data/scenes/asdf2html.xsl

data/scenes/live_input.asd

data/ssr

data/ssr.conf.example

data/ssr.conf.local

doc/manual

doc/manual/SoundScapeRenderer.tex

doc/manual/general.tex

doc/manual/gui.tex

doc/manual/images

doc/manual/images/anti_aliasing.eps

doc/manual/images/coordinate_system.eps

doc/manual/images/coordinate_system.svg

doc/manual/images/local_coordinate_system.eps

doc/manual/images/local_coordinate_system.svg

doc/manual/images/moving_source_without_doppler.svg

doc/manual/images/screenshot.eps

doc/manual/images/screenshot.png

doc/manual/images/screenshot_spd.eps

doc/manual/images/signal_processing.eps

doc/manual/images/signal_processing.svg

doc/manual/images/ssr_logo.mps

doc/manual/latexmkrc

doc/manual/network.tex

doc/manual/operation.tex

doc/manual/references.bib

doc/manual/renderers.tex

doc/manual/todo.tex

macros.m4

mex/Makefile

mex/README.md

mex/ssr_aap.cpp

mex/ssr_binaural.cpp

mex/ssr_helper.m

mex/ssr_mex.h

mex/ssr_nfc_hoa.cpp

mex/ssr_vbap.cpp

mex/ssr_wfs.cpp

mex/test_ssr.m

release.sh

src/Doxyfile

src/Makefile.am

src/aaprenderer.h

src/audioplayer.cpp

src/audioplayer.h

src/audiorecorder.cpp

src/audiorecorder.h

src/binauralrenderer.h

src/boostnetwork

src/boostnetwork/commandparser.cpp

src/boostnetwork/commandparser.h

src/boostnetwork/connection.cpp

src/boostnetwork/connection.h

src/boostnetwork/networksubscriber.cpp

src/boostnetwork/networksubscriber.h

src/boostnetwork/server.cpp

src/boostnetwork/server.h

src/brsrenderer.h

src/coding_style.txt

src/configuration.cpp

src/configuration.h

src/controller.h

src/directionalpoint.cpp

src/directionalpoint.h

src/genericrenderer.h

src/gui

src/gui/qclicktextlabel.cpp

src/gui/qclicktextlabel.h

src/gui/qcpulabel.cpp

src/gui/qcpulabel.h

src/gui/qfilemenulabel.cpp

src/gui/qfilemenulabel.h

src/gui/qgui.cpp

src/gui/qgui.h

src/gui/qguiframe.cpp

src/gui/qguiframe.h

src/gui/qopenglplotter.cpp

src/gui/qopenglplotter.h

src/gui/qscenebutton.cpp

src/gui/qscenebutton.h

src/gui/qsourceproperties.cpp

src/gui/qsourceproperties.h

src/gui/qssrtimeline.cpp

src/gui/qssrtimeline.h

src/gui/qtimeedit.cpp

src/gui/qtimeedit.h

src/gui/quserinterface.cpp

src/gui/quserinterface.h

src/gui/qvolumeslider.cpp

src/gui/qvolumeslider.h

src/gui/qzoomlabel.cpp

src/gui/qzoomlabel.h

src/hoacoefficients.h

src/laplace_coeffs_double.h

src/laplace_coeffs_float.h

src/loudspeaker.h

src/loudspeakerrenderer.h

src/maptools.h

src/nfchoarenderer.h

src/orientation.cpp

src/orientation.h

src/position.cpp

src/position.h

src/posixpathtools.h

src/publisher.h

src/razor-ahrs

src/razor-ahrs/Example.cpp

src/razor-ahrs/GPL.txt

src/razor-ahrs/README.txt

src/razor-ahrs/RazorAHRS.cpp

src/razor-ahrs/RazorAHRS.h

src/rendererbase.h

src/rendersubscriber.h

src/scene.cpp

src/scene.h

src/source.h

src/ssr_aap.cpp

src/ssr_binaural.cpp

src/ssr_brs.cpp

src/ssr_generic.cpp

src/ssr_global.cpp

src/ssr_global.h

src/ssr_nfc_hoa.cpp

src/ssr_vbap.cpp

src/ssr_wfs.cpp

src/subscriber.h

src/timetools.h

src/tracker.h

src/trackerintersense.cpp

src/trackerintersense.h

src/trackerpolhemus.cpp

src/trackerpolhemus.h

src/trackerrazor.cpp

src/trackerrazor.h

src/trackervrpn.cpp

src/trackervrpn.h

src/vbaprenderer.h

src/wfsrenderer.h

src/xmlparser.cpp

src/xmlparser.h

update_apf.txt

Show diffs side-by-side

added added

removed removed

apf/apf/convolver.h

/******************************************************************************

* Telekom Innovation Laboratories, TU Berlin *

* *

* This file is part of the Audio Processing Framework (APF). *

* *

* The APF is free software: you can redistribute it and/or modify it under *

* the terms of the GNU General Public License as published by the Free *

* Software Foundation, either version 3 of the License, or (at your option) *

* any later version. *

* *

* The APF is distributed in the hope that it will be useful, but WITHOUT ANY *

* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS *

* FOR A PARTICULAR PURPOSE. *

* See the GNU General Public License for more details. *

* *

* You should have received a copy of the GNU General Public License along *

* with this program. If not, see <http://www.gnu.org/licenses/>. *

* *

* http://AudioProcessingFramework.github.com *

******************************************************************************/

/// @file

/// Convolution engine.

#ifndef APF_CONVOLVER_H

#define APF_CONVOLVER_H

#include <algorithm> // for std::transform()

#include <functional> // for std::bind()

#include <cassert>

#ifdef __SSE__

#include <xmmintrin.h> // for SSE instrinsics

#endif

#include "apf/math.h"

#include "apf/fftwtools.h" // for fftw_allocator and fftw traits

#include "apf/container.h" // for fixed_vector, fixed_list

#include "apf/iterator.h" // for make_*_iterator()

namespace apf

{

/** Convolution engine.

* A convolution engine normally consists of an Input, a Filter and

* an Output / StaticOutput.

* There are also combinations:

* Input + Output = Convolver; Input + StaticOutput = StaticConvolver

* Uses (uniformly) partitioned convolution.

* TODO: describe thread (un)safety

**/

namespace conv

{

/// Calculate necessary number of partitions for a given filter length

static size_t min_partitions(size_t block_size, size_t filter_size)

{

return (filter_size + block_size - 1) / block_size;

}

/// Two blocks of time-domain or FFT (half-complex) data.

struct fft_node : fixed_vector<float, fftw_allocator<float>>

{

explicit fft_node(size_t n)

: fixed_vector<float, fftw_allocator<float>>(n)

, zero(true)

{}

fft_node(const fft_node&) = delete;

fft_node(fft_node&&) = default;

fft_node& operator=(const fft_node& rhs)

{

assert(this->size() == rhs.size());

if (rhs.zero)

{

this->zero = true;

}

else

{

std::copy(rhs.begin(), rhs.end(), this->begin());

this->zero = false;

}

return *this;

}

// WARNING: The 'zero' flag allows saving computation power, but it also

// raises the risk of programming errors! Handle with care!

/// To avoid unnecessary FFTs and filling buffers with zeros.

/// @note If zero == true, the buffer itself is not necessarily all zeros!

bool zero;

};

100

/// Container holding a number of FFT blocks.

101

struct Filter : fixed_vector<fft_node>

102

{

103

/// Constructor; create empty filter.

104

Filter(size_t block_size_, size_t partitions_)

105

: fixed_vector<fft_node>(partitions_, block_size_ * 2)

106

{

107

assert(this->partitions() > 0);

108

}

109

110

/// Constructor from time domain coefficients.

111

template<typename In>

112

Filter(size_t block_size_, In first, In last, size_t partitions_ = 0);

113

// Implementation below, after definition of Transform

114

115

size_t block_size() const { return this->front().size() / 2; }

116

size_t partition_size() const { return this->front().size(); }

117

size_t partitions() const { return this->size(); }

118

};

119

120

/// Forward-FFT-related functions

121

class TransformBase

122

{

123

public:

124

template<typename In>

125

void prepare_filter(In first, In last, Filter& filter) const;

126

127

size_t block_size() const { return _block_size; }

128

size_t partition_size() const { return _partition_size; }

129

130

template<typename In>

131

In prepare_partition(In first, In last, fft_node& partition) const;

132

133

protected:

134

explicit TransformBase(size_t block_size_);

135

136

TransformBase(TransformBase&&) = default;

137

~TransformBase() = default;

138

139

using scoped_plan = fftw<float>::scoped_plan;

140

using plan_ptr = std::unique_ptr<scoped_plan>;

141

142

plan_ptr _create_plan(float* array) const;

143

144

/// In-place FFT

145

void _fft(float* first) const

146

{

147

fftw<float>::execute_r2r(*_fft_plan, first, first);

148

_sort_coefficients(first);

149

}

150

151

plan_ptr _fft_plan;

152

153

private:

154

void _sort_coefficients(float* first) const;

155

156

const size_t _block_size;

157

const size_t _partition_size;

158

};

159

160

TransformBase::TransformBase(size_t block_size_)

161

: _block_size(block_size_)

162

, _partition_size(2 * _block_size)

163

{

164

if (_block_size % 8 != 0)

165

{

166

throw std::logic_error("Convolver: block size must be a multiple of 8!");

167

}

168

}

169

170

/** Create in-place FFT plan for halfcomplex data format.

171

* @note FFT plans are not re-entrant except when using FFTW_THREADSAFE!

172

* @note Once a plan of a certain size exists, creating further plans

173

* is very fast because "wisdom" is shared (and therefore the creation of

174

* plans is not thread-safe).

175

* It is not necessary to re-use plans in other convolver instances.

176

**/

177

TransformBase::plan_ptr

178

TransformBase::_create_plan(float* array) const

179

{

180

return plan_ptr(new scoped_plan(fftw<float>::plan_r2r_1d, int(_partition_size)

181

, array, array, FFTW_R2HC, FFTW_PATIENT));

182

}

183

184

/** %Transform time-domain samples.

185

* If there are too few input samples, the rest is zero-padded, if there are

186

* too few blocks in the container @p c, the rest of the samples is ignored.

187

* @param first Iterator to first time-domain sample

188

* @param last Past-the-end iterator

189

* @param[out] filter Target container

190

**/

191

template<typename In>

192

void

193

TransformBase::prepare_filter(In first, In last, Filter& filter) const

194

{

195

for (auto& partition: filter)

196

{

197

first = this->prepare_partition(first, last, partition);

198

}

199

}

200

201

/** FFT of one block.

202

* If there are too few coefficients, the rest is zero-padded.

203

* @param first Iterator to first coefficient

204

* @param last Past-the-end iterator

205

* @param[out] partition Target partition

206

* @tparam In Forward iterator

207

* @return Iterator to the first coefficient of the next block (for the next

208

* iteration, if needed)

209

**/

210

template<typename In>

211

212

TransformBase::prepare_partition(In first, In last, fft_node& partition) const

213

{

214

assert(size_t(std::distance(partition.begin(), partition.end()))

215

== _partition_size);

216

217

auto chunk = std::min(_block_size, size_t(std::distance(first, last)));

218

219

if (chunk == 0)

220

{

221

partition.zero = true;

222

// No FFT has to be done (FFT of zero is also zero)

223

}

224

else

225

{

226

std::copy(first, first + chunk, partition.begin());

227

std::fill(partition.begin() + chunk, partition.end(), 0.0f); // zero padding

228

_fft(partition.data());

229

partition.zero = false;

230

}

231

return first + chunk;

232

}

233

234

/** Sort the FFT coefficients to be in proper place for the efficient

235

* multiplication of the spectra.

236

**/

237

void

238

TransformBase::_sort_coefficients(float* data) const

239

{

240

auto buffer = fixed_vector<float>(_partition_size);

241

242

size_t base = 8;

243

244

buffer[0] = data[0];

245

buffer[1] = data[1];

246

buffer[2] = data[2];

247

buffer[3] = data[3];

248

buffer[4] = data[_block_size];

249

buffer[5] = data[_partition_size - 1];

250

buffer[6] = data[_partition_size - 2];

251

buffer[7] = data[_partition_size - 3];

252

253

for (size_t i = 0; i < (_partition_size / 8-1); i++)

254

{

255

for (size_t ii = 0; ii < 4; ii++)

256

{

257

buffer[base+ii] = data[base/2+ii];

258

}

259

260

for (size_t ii = 0; ii < 4; ii++)

261

{

262

buffer[base+4+ii] = data[_partition_size-base/2-ii];

263

}

264

265

base += 8;

266

}

267

268

std::copy(buffer.begin(), buffer.end(), data);

269

}

270

271

/// Helper class to prepare filters

272

struct Transform : TransformBase

273

{

274

Transform(size_t block_size_)

275

: TransformBase(block_size_)

276

{

277

// Temporary memory area for FFTW planning routines

278

fft_node planning_space(this->partition_size());

279

_fft_plan = _create_plan(planning_space.data());

280

}

281

};

282

283

template<typename In>

284

Filter::Filter(size_t block_size_, In first, In last, size_t partitions_)

285

: fixed_vector<fft_node>(partitions_ ? partitions_

286

: min_partitions(block_size_, std::distance(first, last))

287

, block_size_ * 2)

288

{

289

assert(this->partitions() > 0);

290

291

Transform(block_size_).prepare_filter(first, last, *this);

292

}

293

294

/** %Input stage of convolution.

295

* New audio data is fed in here, further processing happens in Output.

296

**/

297

struct Input : TransformBase

298

{

299

/// @param block_size_ audio block size

300

/// @param partitions_ number of partitions

301

Input(size_t block_size_, size_t partitions_)

302

: TransformBase(block_size_)

303

// One additional list element for preparing the upcoming partition:

304

, spectra(partitions_ + 1, this->partition_size())

305

{

306

assert(partitions_ > 0);

307

308

_fft_plan = _create_plan(spectra.front().data());

309

}

310

311

template<typename In>

312

void add_block(In first);

313

314

size_t partitions() const { return spectra.size() - 1; }

315

316

/// Spectra of the partitions (double-blocks) of the input signal to be

317

/// convolved. The first element is the most recent signal chunk.

318

fixed_list<fft_node> spectra;

319

};

320

321

/** Add a block of time-domain input samples.

322

* @param first Iterator to first sample.

323

* @tparam In Forward iterator

324

**/

325

template<typename In>

326

void

327

Input::add_block(In first)

328

{

329

In last = first;

330

std::advance(last, this->block_size());

331

332

// rotate buffers (this->spectra.size() is always at least 2)

333

this->spectra.move(--this->spectra.end(), this->spectra.begin());

334

335

auto& current = this->spectra.front();

336

auto& next = this->spectra.back();

337

338

if (math::has_only_zeros(first, last))

339

{

340

next.zero = true;

341

342

if (current.zero)

343

{

344

// Nothing to be done, actual data is ignored

345

}

346

else

347

{

348

// If first half is not zero, second half must be filled with zeros

349

std::fill(current.begin() + this->block_size(), current.end(), 0.0f);

350

}

351

}

352

else

353

{

354

if (current.zero)

355

{

356

// First half must be actually filled with zeros

357

std::fill(current.begin(), current.begin() + this->block_size(), 0.0f);

358

}

359

360

// Copy data to second half of the current partition

361

std::copy(first, last, current.begin() + this->block_size());

362

current.zero = false;

363

// Copy data to first half of the upcoming partition

364

std::copy(first, last, next.begin());

365

next.zero = false;

366

}

367

368

if (current.zero)

369

{

370

// Nothing to be done, FFT of zero is also zero

371

}

372

else

373

{

374

_fft(current.data());

375

}

376

}

377

378

/// Base class for Output and StaticOutput

379

class OutputBase

380

{

381

public:

382

float* convolve(float weight = 1.0f);

383

384

size_t block_size() const { return _input.block_size(); }

385

size_t partitions() const { return _filter_ptrs.size(); }

386

387

protected:

388

explicit OutputBase(const Input& input);

389

390

// This is non-const to allow automatic move-constructor:

391

fft_node _empty_partition;

392

393

using filter_ptrs_t = fixed_vector<const fft_node*>;

394

filter_ptrs_t _filter_ptrs;

395

396

private:

397

void _multiply_spectra();

398

void _multiply_partition_cpp(const float* signal, const float* filter);

399

#ifdef __SSE__

400

void _multiply_partition_simd(const float* signal, const float* filter);

401

#endif

402

403

void _unsort_coefficients();

404

405

void _ifft();

406

407

const Input& _input;

408

409

const size_t _partition_size;

410

411

fft_node _output_buffer;

412

fftw<float>::scoped_plan _ifft_plan;

413

};

414

415

OutputBase::OutputBase(const Input& input)

416

: _empty_partition(0)

417

// Initialize with empty partition

418

, _filter_ptrs(input.partitions(), &_empty_partition)

419

, _input(input)

420

, _partition_size(input.partition_size())

421

, _output_buffer(_partition_size)

422

, _ifft_plan(fftw<float>::plan_r2r_1d, int(_partition_size)

423

, _output_buffer.data()

424

, _output_buffer.data(), FFTW_HC2R, FFTW_PATIENT)

425

{

426

assert(_filter_ptrs.size() > 0);

427

}

428

429

/** Fast convolution of one audio block.

430

* %Input data has to be supplied with Input::add_block().

431

* @param weight amplitude weighting factor for current audio block.

432

* The filter has to be set in the constructor of StaticOutput or via

433

* Output::set_filter().

434

* @return pointer to the first sample of the convolved (and weighted) signal

435

**/

436

float*

437

OutputBase::convolve(float weight)

438

{

439

_multiply_spectra();

440

441

// The first half will be discarded

442

auto second_half = make_begin_and_end(

443

_output_buffer.begin() + _input.block_size(), _output_buffer.end());

444

445

assert(static_cast<size_t>(

446

std::distance(second_half.begin(), second_half.end()))

447

== _input.block_size());

448

449

if (_output_buffer.zero)

450

{

451

// Nothing to be done, IFFT of zero is also zero.

452

// _output_buffer was already reset to zero in _multiply_spectra().

453

}

454

else

455

{

456

_ifft();

457

458

// normalize buffer (fftw3 does not do this)

459

const auto norm = weight / float(_partition_size);

460

for (auto& x: second_half)

461

{

462

x *= norm;

463

}

464

}

465

return &second_half[0];

466

}

467

468

void

469

OutputBase::_multiply_partition_cpp(const float* signal, const float* filter)

470

{

471

// see http://www.ludd.luth.se/~torger/brutefir.html#bruteconv_4

472

473

auto d1s = _output_buffer[0] + signal[0] * filter[0];

474

auto d2s = _output_buffer[4] + signal[4] * filter[4];

475

476

for (size_t nn = 0; nn < _partition_size; nn += 8)

477

{

478

// real parts

479

_output_buffer[nn+0] += signal[nn+0] * filter[nn + 0] -

480

signal[nn+4] * filter[nn + 4];

481

_output_buffer[nn+1] += signal[nn+1] * filter[nn + 1] -

482

signal[nn+5] * filter[nn + 5];

483

_output_buffer[nn+2] += signal[nn+2] * filter[nn + 2] -

484

signal[nn+6] * filter[nn + 6];

485

_output_buffer[nn+3] += signal[nn+3] * filter[nn + 3] -

486

signal[nn+7] * filter[nn + 7];

487

488

// imaginary parts

489

_output_buffer[nn+4] += signal[nn+0] * filter[nn + 4] +

490

signal[nn+4] * filter[nn + 0];

491

_output_buffer[nn+5] += signal[nn+1] * filter[nn + 5] +

492

signal[nn+5] * filter[nn + 1];

493

_output_buffer[nn+6] += signal[nn+2] * filter[nn + 6] +

494

signal[nn+6] * filter[nn + 2];

495

_output_buffer[nn+7] += signal[nn+3] * filter[nn + 7] +

496

signal[nn+7] * filter[nn + 3];

497

498

} // for

499

500

_output_buffer[0] = d1s;

501

_output_buffer[4] = d2s;

502

}

503

504

#ifdef __SSE__

505

void

506

OutputBase::_multiply_partition_simd(const float* signal, const float* filter)

507

{

508

// 16 byte alignment is needed for _mm_load_ps()!

509

// This should be the case anyway because fftwf_malloc() is used.

510

511

auto dc = _output_buffer[0] + signal[0] * filter[0];

512

auto ny = _output_buffer[4] + signal[4] * filter[4];

513

514

for(size_t i = 0; i < _partition_size; i += 8)

515

{

516

// load real and imaginary parts of signal and filter

517

__m128 sigr = _mm_load_ps(signal + i);

518

__m128 sigi = _mm_load_ps(signal + i + 4);

519

__m128 filtr = _mm_load_ps(filter + i);

520

__m128 filti = _mm_load_ps(filter + i + 4);

521

522

// multiply and subtract

523

__m128 res1 = _mm_sub_ps(_mm_mul_ps(sigr, filtr), _mm_mul_ps(sigi, filti));

524

525

// multiply and add

526

__m128 res2 = _mm_add_ps(_mm_mul_ps(sigr, filti), _mm_mul_ps(sigi, filtr));

527

528

// load output data for accumulation

529

__m128 acc1 = _mm_load_ps(&_output_buffer[i]);

530

__m128 acc2 = _mm_load_ps(&_output_buffer[i + 4]);

531

532

// accumulate

533

acc1 = _mm_add_ps(acc1, res1);

534

acc2 = _mm_add_ps(acc2, res2);

535

536

// store output data

537

_mm_store_ps(&_output_buffer[i], acc1);

538

_mm_store_ps(&_output_buffer[i + 4], acc2);

539

}

540

541

_output_buffer[0] = dc;

542

_output_buffer[4] = ny;

543

}

544

#endif

545

546

/// Complex multiplication of input and filter spectra

547

void

548

OutputBase::_multiply_spectra()

549

{

550

// Clear IFFT buffer

551

std::fill(_output_buffer.begin(), _output_buffer.end(), 0.0f);

552

_output_buffer.zero = true;

553

554

assert(_filter_ptrs.size() == _input.partitions());

555

556

auto input = _input.spectra.begin();

557

558

for (const auto filter: _filter_ptrs)

559

{

560

assert(filter != nullptr);

561

562

if (input->zero || filter->zero) continue;

563

564

#ifdef __SSE__

565

_multiply_partition_simd(input->data(), filter->data());

566

#else

567

_multiply_partition_cpp(input->data(), filter->data());

568

#endif

569

570

_output_buffer.zero = false;

571

++input;

572

}

573

}

574

575

void

576

OutputBase::_unsort_coefficients()

577

{

578

fixed_vector<float> buffer(_partition_size);

579

580

size_t base = 8;

581

582

buffer[0] = _output_buffer[0];

583

buffer[1] = _output_buffer[1];

584

buffer[2] = _output_buffer[2];

585

buffer[3] = _output_buffer[3];

586

buffer[_input.block_size()] = _output_buffer[4];

587

buffer[_partition_size-1] = _output_buffer[5];

588

buffer[_partition_size-2] = _output_buffer[6];

589

buffer[_partition_size-3] = _output_buffer[7];

590

591

for (size_t i=0; i < (_partition_size / 8-1); i++)

592

{

593

for (size_t ii = 0; ii < 4; ii++)

594

{

595

buffer[base/2+ii] = _output_buffer[base+ii];

596

}

597

598

for (size_t ii = 0; ii < 4; ii++)

599

{

600

buffer[_partition_size-base/2-ii] = _output_buffer[base+4+ii];

601

}

602

603

base += 8;

604

}

605

606

std::copy(buffer.begin(), buffer.end(), _output_buffer.begin());

607

}

608

609

void

610

OutputBase::_ifft()

611

{

612

_unsort_coefficients();

613

fftw<float>::execute(_ifft_plan);

614

}

615

616

/** Convolution engine (output part).

617

* @see Input, StaticOutput

618

**/

619

class Output : public OutputBase

620

{

621

public:

622

Output(const Input& input)

623

: OutputBase(input)

624

, _queues(apf::make_index_iterator(size_t(1))

625

, apf::make_index_iterator(input.partitions()))

626

{}

627

628

void set_filter(const Filter& filter);

629

630

bool queues_empty() const;

631

void rotate_queues();

632

633

private:

634

fixed_vector<filter_ptrs_t> _queues;

635

};

636

637

/** Set a new filter.

638

* The first filter partition is updated immediately, the later partitions are

639

* updated with rotate_queues().

640

* @param filter Container with filter partitions. If too few partitions are

641

* given, the rest is set to zero, if too many are given, the rest is ignored.

642

**/

643

void

644

Output::set_filter(const Filter& filter)

645

{

646

auto partition = filter.begin();

647

648

// First partition has no queue and is updated immediately

649

if (partition != filter.end())

650

{

651

_filter_ptrs.front() = &*partition++;

652

}

653

654

for (size_t i = 0; i < _queues.size(); ++i)

655

{

656

_queues[i][i]

657

= (partition == filter.end()) ? &_empty_partition : &*partition++;

658

}

659

}

660

661

/** Check if there are still valid partitions in the queues.

662

* If this function returns @b false, rotate_queues() should be called.

663

* @note This is important for crossfades: even if set_filter() wasn't used,

664

* older partitions may still change! If the queues are empty, no crossfade is

665

* necessary (except @p weight was changed in convolve()).

666

**/

667

bool

668

Output::queues_empty() const

669

{

670

if (_queues.empty()) return true;

671

672

// It may not be obvious, but that's what the following code does:

673

// If some non-null pointer is found in the last queue, return false

674

675

auto first = _queues.rbegin()->begin();

676

auto last = _queues.rbegin()->end();

677

return std::find_if(first, last, math::identity<const fft_node*>()) == last;

678

}

679

680

/** Update filter queues.

681

* If queues_empty() returns @b true, calling this function is unnecessary.

682

* @note This can lead to artifacts, so a crossfade is recommended.

683

**/

684

void

685

Output::rotate_queues()

686

{

687

auto target = _filter_ptrs.begin();

688

// Skip first element, it doesn't have a queue

689

++target;

690

691

for (auto& queue: _queues)

692

{

693

// If first element is valid, use it

694

if (queue.front()) *target = queue.front();

695

696

std::copy(queue.begin() + 1, queue.end(), queue.begin());

697

*queue.rbegin() = nullptr;

698

++target;

699

}

700

}

701

702

/** %Convolver output stage with static filter.

703

* The filter coefficients are set in the constructor(s) and cannot be changed.

704

* @see Output

705

**/

706

class StaticOutput : public OutputBase

707

{

708

public:

709

/// Constructor from time domain samples

710

template<typename In>

711

StaticOutput(const Input& input, In first, In last)

712

: OutputBase(input)

713

{

714

_filter.reset(new Filter(input.block_size(), first, last

715

, input.partitions()));

716

717

_set_filter(*_filter);

718

}

719

720

/// Constructor from existing frequency domain filter coefficients.

721

/// @attention The filter coefficients are not copied, their lifetime must

722

/// exceed that of the StaticOutput!

723

StaticOutput(const Input& input, const Filter& filter)

724

: OutputBase(input)

725

{

726

_set_filter(filter);

727

}

728

729

private:

730

void _set_filter(const Filter& filter)

731

{

732

auto from = filter.begin();

733

734

for (auto& to: _filter_ptrs)

735

{

736

// If less partitions are given, the rest is set to zero

737

to = (from == filter.end()) ? &_empty_partition : &*from++;

738

}

739

// If further partitions are available, they are ignored

740

}

741

742

// This is only used for the first constructor!

743

std::unique_ptr<Filter> _filter;

744

};

745

746

/// Combination of Input and Output

747

struct Convolver : Input, Output

748

{

749

Convolver(size_t block_size_, size_t partitions_)

750

: Input(block_size_, partitions_)

751

// static_cast to resolve ambiguity

752

, Output(*static_cast<Input*>(this))

753

{}

754

};

755

756

/// Combination of Input and StaticOutput

757

struct StaticConvolver : Input, StaticOutput

758

{

759

template<typename In>

760

StaticConvolver(size_t block_size_, In first, In last, size_t partitions_ = 0)

761

: Input(block_size_, partitions_ ? partitions_

762

: min_partitions(block_size_, std::distance(first, last)))

763

, StaticOutput(*this, first, last)

764

{}

765

766

StaticConvolver(const Filter& filter, size_t partitions_ = 0)

767

: Input(filter.block_size()

768

, partitions_ ? partitions_ : filter.partitions())

769

, StaticOutput(*this, filter)

770

{}

771

};

772

773

/// Apply @c std::transform to a container of fft_node%s

774

template<typename BinaryFunction>

775

void transform_nested(const Filter& in1, const Filter& in2, Filter& out

776

, BinaryFunction f)

777

{

778

auto it1 = in1.begin();

779

auto it2 = in2.begin();

780

781

for (auto& result: out)

782

{

783

if (it1 == in1.end() || it1->zero)

784

{

785

if (it2 == in2.end() || it2->zero)

786

{

787

result.zero = true;

788

}

789

else

790

{

791

assert(it2->size() == result.size());

792

std::transform(it2->begin(), it2->end(), result.begin()

793

, std::bind(f, 0, std::placeholders::_1));

794

result.zero = false;

795

}

796

}

797

else

798

{

799

if (it2 == in2.end() || it2->zero)

800

{

801

assert(it1->size() == result.size());

802

std::transform(it1->begin(), it1->end(), result.begin()

803

, std::bind(f, std::placeholders::_1, 0));

804

result.zero = false;

805

}

806

else

807

{

808

assert(it1->size() == it2->size());

809

assert(it1->size() == result.size());

810

std::transform(it1->begin(), it1->end(), it2->begin(), result.begin()

811

, f);

812

result.zero = false;

813

}

814

}

815

if (it1 != in1.end()) ++it1;

816

if (it2 != in2.end()) ++it2;

817

}

818

}

819

820

} // namespace conv

821

822

} // namespace apf

823

824

#endif

825

826

// Settings for Vim (http://www.vim.org/), please do not remove:

827

// vim:softtabstop=2:shiftwidth=2:expandtab:textwidth=80:cindent

828

// vim:fdm=expr:foldexpr=getline(v\:lnum)=~'/\\*\\*'&&getline(v\:lnum)!~'\\*\\*/'?'a1'\:getline(v\:lnum)=~'\\*\\*/'&&getline(v\:lnum)!~'/\\*\\*'?'s1'\:'='

Older »