~ubuntu-branches/ubuntu/utopic/soundscaperenderer/utopic-proposed

« back to all changes in this revision

Viewing changes to doc/manual/renderers.tex

Committer: Package Import Robot
Author(s): IOhannes m zmölnig (Debian/GNU)
Date: 2014-05-08 16:58:09 UTC
Revision ID: package-import@ubuntu.com-20140508165809-7tz9dhu5pvo5wy25

Tags: upstream-0.4.1~dfsg

Import upstream version 0.4.1~dfsg

files added:

.gitignore

AUTHORS

COPYING

INSTALL

Makefile.am

NEWS

README

apf/AUTHORS

apf/COPYING

apf/NEWS

apf/README

apf/apf

apf/apf/biquad.h

apf/apf/blockdelayline.h

apf/apf/combine_channels.h

apf/apf/commandqueue.h

apf/apf/container.h

apf/apf/convolver.h

apf/apf/denormalprevention.h

apf/apf/dummy_thread_policy.h

apf/apf/fftwtools.h

apf/apf/iterator.h

apf/apf/jack_policy.h

apf/apf/jackclient.h

apf/apf/lockfreefifo.h

apf/apf/math.h

apf/apf/mextools.h

apf/apf/mimoprocessor.h

apf/apf/mimoprocessor_file_io.h

apf/apf/misc.h

apf/apf/parameter_map.h

apf/apf/pointer_policy.h

apf/apf/portaudio_policy.h

apf/apf/posix_thread_policy.h

apf/apf/rtlist.h

apf/apf/shareddata.h

apf/apf/sndfiletools.h

apf/apf/stopwatch.h

apf/apf/stringtools.h

apf/doc

apf/doc/Doxyfile

apf/doc/documentation.cpp

apf/doc/mimoprocessor.cpp

apf/examples

apf/examples/Makefile

apf/examples/README

apf/examples/audiofile_simpleprocessor.cpp

apf/examples/dummy_example.cpp

apf/examples/flext_simpleprocessor.cpp

apf/examples/jack_change_volume.cpp

apf/examples/jack_connections.cpp

apf/examples/jack_convolver.cpp

apf/examples/jack_dynamic_inputs.cpp

apf/examples/jack_dynamic_outputs.cpp

apf/examples/jack_matrix.cpp

apf/examples/jack_minimal.cpp

apf/examples/jack_query_thread.cpp

apf/examples/jack_simpleprocessor.cpp

apf/examples/mex_simpleprocessor.cpp

apf/examples/package.txt

apf/examples/portaudio_simpleprocessor.cpp

apf/examples/simpleprocessor.h

apf/examples/startpd.sh

apf/examples/test_simpleprocessor.pd

apf/misc

apf/misc/Makefile.dependencies

apf/performance_tests

apf/performance_tests/Makefile

apf/performance_tests/biquad_count_denormals.cpp

apf/performance_tests/biquad_denormals.cpp

apf/performance_tests/crossfade.cpp

apf/performance_tests/interpolation.cpp

apf/unit_tests

apf/unit_tests/Makefile

apf/unit_tests/catch

apf/unit_tests/catch/README

apf/unit_tests/catch/catch.hpp

apf/unit_tests/iterator_test_macros.h

apf/unit_tests/main.cpp

apf/unit_tests/test_accumulating_iterator.cpp

apf/unit_tests/test_biquad.cpp

apf/unit_tests/test_blockdelayline.cpp

apf/unit_tests/test_cast_iterator.cpp

apf/unit_tests/test_circular_iterator.cpp

apf/unit_tests/test_combine_channels.cpp

apf/unit_tests/test_container.cpp

apf/unit_tests/test_convolver.cpp

apf/unit_tests/test_discard_iterator.cpp

apf/unit_tests/test_dual_iterator.cpp

apf/unit_tests/test_fftwtools.cpp

apf/unit_tests/test_index_iterator.cpp

apf/unit_tests/test_iterator.cpp

apf/unit_tests/test_iterator_combinations.cpp

apf/unit_tests/test_math.cpp

apf/unit_tests/test_mimoprocessor.cpp

apf/unit_tests/test_misc.cpp

apf/unit_tests/test_stride_iterator.cpp

apf/unit_tests/test_stringtools.cpp

apf/unit_tests/test_transform_iterator.cpp

apf/unit_tests/test_trivial_iterator.cpp

autogen.sh

cleanse.sh

configure.ac

data

data/MacOSX

data/MacOSX/.background

data/MacOSX/.background/background.png

data/MacOSX/DMG-Layout.applescript.in

data/MacOSX/Getting-Started.txt

data/MacOSX/Info.plist.in

data/MacOSX/Jack.webloc

data/MacOSX/Makefile.am

data/MacOSX/SSRIcon.icns

data/MacOSX/dylibbundler

data/MacOSX/dylibbundler/dylibbundler.png

data/MacOSX/dylibbundler/index.html

data/MacOSX/dylibbundler/maclib.jpg

data/MacOSX/dylibbundler/makefile

data/MacOSX/dylibbundler/src

data/MacOSX/dylibbundler/src/Dependency.cpp

data/MacOSX/dylibbundler/src/Dependency.h

data/MacOSX/dylibbundler/src/DylibBundler.cpp

data/MacOSX/dylibbundler/src/DylibBundler.h

data/MacOSX/dylibbundler/src/Settings.cpp

data/MacOSX/dylibbundler/src/Settings.h

data/MacOSX/dylibbundler/src/Utils.cpp

data/MacOSX/dylibbundler/src/Utils.h

data/MacOSX/dylibbundler/src/main.cpp

data/MacOSX/qt.conf

data/MacOSX/run-ssr.scpt

data/MacOSX/run-ssr.sh

data/Makefile.am

data/asdf.xsd

data/images

data/images/listener.png

data/images/listener_background.png

data/images/listener_shadow.png

data/images/pause_button.png

data/images/pause_button_pressed.png

data/images/play_button.png

data/images/play_button_pressed.png

data/images/processing_button.png

data/images/processing_button_pressed.png

data/images/scene_menu_item.png

data/images/scene_menu_item_selected.png

data/images/skip_back_button.png

data/images/skip_back_button_pressed.png

data/images/source_shadow.png

data/images/ssr_logo.png

data/images/ssr_logo_large.png

data/impulse_responses

data/impulse_responses/hrirs

data/impulse_responses/hrirs/hrirs_fabian.wav

data/impulse_responses/wfs_prefilters

data/impulse_responses/wfs_prefilters/wfs_prefilter_100_1300_44100.wav

data/impulse_responses/wfs_prefilters/wfs_prefilter_100_1300_48000.wav

data/impulse_responses/wfs_prefilters/wfs_prefilter_100_1800_44100.wav

data/impulse_responses/wfs_prefilters/wfs_prefilter_100_1800_48000.wav

data/impulse_responses/wfs_prefilters/wfs_prefilter_120_1500_44100.wav

data/local_ssr.sh

data/matlab_scripts

data/matlab_scripts/make_wfs_prefilter.m

data/matlab_scripts/prepare_hrirs_kemar.m

data/reproduction_setups

data/reproduction_setups/2.0.asd

data/reproduction_setups/2.1.asd

data/reproduction_setups/5.1.asd

data/reproduction_setups/asdf2html.xsl

data/reproduction_setups/circle.asd

data/reproduction_setups/loudspeaker_setup_with_nearly_all_features.asd

data/reproduction_setups/rostock_horizontal.asd

data/reproduction_setups/rounded_rectangle.asd

data/scenes

data/scenes/asdf2html.xsl

data/scenes/live_input.asd

data/ssr

data/ssr.conf.example

data/ssr.conf.local

doc/manual

doc/manual/SoundScapeRenderer.tex

doc/manual/general.tex

doc/manual/gui.tex

doc/manual/images

doc/manual/images/anti_aliasing.eps

doc/manual/images/coordinate_system.eps

doc/manual/images/coordinate_system.svg

doc/manual/images/local_coordinate_system.eps

doc/manual/images/local_coordinate_system.svg

doc/manual/images/moving_source_without_doppler.svg

doc/manual/images/screenshot.eps

doc/manual/images/screenshot.png

doc/manual/images/screenshot_spd.eps

doc/manual/images/signal_processing.eps

doc/manual/images/signal_processing.svg

doc/manual/images/ssr_logo.mps

doc/manual/latexmkrc

doc/manual/network.tex

doc/manual/operation.tex

doc/manual/references.bib

doc/manual/renderers.tex

doc/manual/todo.tex

macros.m4

mex/Makefile

mex/README.md

mex/ssr_aap.cpp

mex/ssr_binaural.cpp

mex/ssr_helper.m

mex/ssr_mex.h

mex/ssr_nfc_hoa.cpp

mex/ssr_vbap.cpp

mex/ssr_wfs.cpp

mex/test_ssr.m

release.sh

src/Doxyfile

src/Makefile.am

src/aaprenderer.h

src/audioplayer.cpp

src/audioplayer.h

src/audiorecorder.cpp

src/audiorecorder.h

src/binauralrenderer.h

src/boostnetwork

src/boostnetwork/commandparser.cpp

src/boostnetwork/commandparser.h

src/boostnetwork/connection.cpp

src/boostnetwork/connection.h

src/boostnetwork/networksubscriber.cpp

src/boostnetwork/networksubscriber.h

src/boostnetwork/server.cpp

src/boostnetwork/server.h

src/brsrenderer.h

src/coding_style.txt

src/configuration.cpp

src/configuration.h

src/controller.h

src/directionalpoint.cpp

src/directionalpoint.h

src/genericrenderer.h

src/gui

src/gui/qclicktextlabel.cpp

src/gui/qclicktextlabel.h

src/gui/qcpulabel.cpp

src/gui/qcpulabel.h

src/gui/qfilemenulabel.cpp

src/gui/qfilemenulabel.h

src/gui/qgui.cpp

src/gui/qgui.h

src/gui/qguiframe.cpp

src/gui/qguiframe.h

src/gui/qopenglplotter.cpp

src/gui/qopenglplotter.h

src/gui/qscenebutton.cpp

src/gui/qscenebutton.h

src/gui/qsourceproperties.cpp

src/gui/qsourceproperties.h

src/gui/qssrtimeline.cpp

src/gui/qssrtimeline.h

src/gui/qtimeedit.cpp

src/gui/qtimeedit.h

src/gui/quserinterface.cpp

src/gui/quserinterface.h

src/gui/qvolumeslider.cpp

src/gui/qvolumeslider.h

src/gui/qzoomlabel.cpp

src/gui/qzoomlabel.h

src/hoacoefficients.h

src/laplace_coeffs_double.h

src/laplace_coeffs_float.h

src/loudspeaker.h

src/loudspeakerrenderer.h

src/maptools.h

src/nfchoarenderer.h

src/orientation.cpp

src/orientation.h

src/position.cpp

src/position.h

src/posixpathtools.h

src/publisher.h

src/razor-ahrs

src/razor-ahrs/Example.cpp

src/razor-ahrs/GPL.txt

src/razor-ahrs/README.txt

src/razor-ahrs/RazorAHRS.cpp

src/razor-ahrs/RazorAHRS.h

src/rendererbase.h

src/rendersubscriber.h

src/scene.cpp

src/scene.h

src/source.h

src/ssr_aap.cpp

src/ssr_binaural.cpp

src/ssr_brs.cpp

src/ssr_generic.cpp

src/ssr_global.cpp

src/ssr_global.h

src/ssr_nfc_hoa.cpp

src/ssr_vbap.cpp

src/ssr_wfs.cpp

src/subscriber.h

src/timetools.h

src/tracker.h

src/trackerintersense.cpp

src/trackerintersense.h

src/trackerpolhemus.cpp

src/trackerpolhemus.h

src/trackerrazor.cpp

src/trackerrazor.h

src/trackervrpn.cpp

src/trackervrpn.h

src/vbaprenderer.h

src/wfsrenderer.h

src/xmlparser.cpp

src/xmlparser.h

update_apf.txt

Show diffs side-by-side

added added

removed removed

doc/manual/renderers.tex

\section{The Renderers}

\label{sec:renderers}

\subsection{General}

\subsubsection{Reproduction Setups}

\label{sec:reproduction_setups}

The geometry of the actual reproduction setup is specified in

\texttt{.asd} files, just like sound scenes. By default, it is loaded from the

file \texttt{/usr/local/share/ssr/default\_setup.asd}.

Use the \texttt{--setup} command line option to load another reproduction setup file.

Note that the

loudspeaker setups have to be convex. This is not checked by the SSR.

The loudspeakers appear at the outputs of your sound card in the same

order as they are specified in the \texttt{.asd} file, starting with channel 1.

\noindent A sample reproduction setup description:

\begin{verbatim}

<?xml version="1.0"?>

<name>Circular Loudspeaker Array</name>

</header>

<reproduction_setup>

<circular_array number="56">

<first>

</first>

</circular_array>

</reproduction_setup>

</asdf>

\end{verbatim}

\noindent We provide the following setups in the directory

\verb+data/reproduction_setups/+:

\begin{itemize}

\item[-] \texttt{2.0.asd}: standard stereo setup at 1.5 mtrs distance

\item[-] \texttt{2.1.asd}: standard stereo setup at 1.5 mtrs distance plus subwoofer

\item[-] \texttt{5.1.asd}: standard 5.1 setup on circle with a diameter of 3 mtrs

\item[-] \texttt{rounded\_rectangle.asd}: Demonstrates how to combine circular

arcs and linear array segments.

\item[-] \texttt{circle.asd}: This is a circular array of 3 mtrs diameter

composed of 56 loudspeakers.

\item[-] \texttt{loudspeaker\_setup\_with\_nearly\_all\_features.asd}: This

setup describes all supported options, open it with your favorite text

editor and have a look inside.

\end{itemize}

\noindent Note that outputs specified as subwoofers receive a signal having

full bandwidth.

There is some limited freedom in assigning channels to loudspeakers:

If you insert the element \texttt{<skip number="5"/>},

the specified number of output channels are skipped and the following

loudspeakers get higher channel numbers accordingly.

Of course, the binaural and BRS renderers do not load a loudspeaker setup. By

default, they assume the listener to reside in the coordinate origin looking

straight forward.

\subsubsection{A Note on the Timing of the Audio Signals}

The WFS renderer is the only renderer in which the timing of the audio signals is

somewhat peculiar. None of the other renderers imposes any algorithmic delay on

individual source signals. Of course, if you use a renderer which is convolution

based such as the BRS renderer, the employed HRIRs do alter the timing of the

signals due to their inherent properties.

This is different with the WFS renderer. Here, also the propagation duration of

sound from the position of the virtual source to the loudspeaker array is

considered. That means that the farther a virtual source is located, the longer

is the delay imposed on its input signal. This also holds true for plane waves:

Theoretically, plane waves do originate from infinity. Though, the SSR does consider

the origin point of the plane wave which is specified in ASDF. This origin point

also specifies the location of the symbol which represents the respective plane

wave in the GUI.

We are aware that this procedure can cause confusion and reduces the ability of

a given scene of translating well between different types of renderers. In the

upcoming version~0.4 of the SSR we will implement an option that will allow you

specifying for each individual source whether the propagation duration of sound

shall be considered by a renderer or not.

\subsubsection{Distance Attenuation}

Note that in all renderers -- except the BRS renderer -- distance attenuation

is handled as $\nicefrac{1}{r}$ with respect to the distance $r$ of the

respective virtual source to the reference position. Sources closer than 0.5

mtrs to the reference position do not experience any increase of amplitude.

Virtual plane waves do not experience any algorithmic distance attenuation in

any renderer.

In future versions of the SSR more freedom in specifying the distance attenuation

will be provided.

The amplitude reference distance, i.e.~the distance from the reference at which

plane waves are as loud as the other source types (like point sources), can be

set in the SSR configuration file (Section~\ref{sec:ssr_configuration_file}).

100

The desired amplitude reference distance for a given sound scene can be

101

specified in the scene description (Section~\ref{sec:asdf}). The default value

102

is 3~m.

103

104

\subsubsection{Doppler Effect}

105

106

In the current version of the SSR the Doppler Effect in moving sources is not

107

supported by any of the renderers.

108

109

\subsubsection{Signal Processing}

110

111

All rendering algorithms are implemented on a frame-wise basis with an internal

112

precision of 32 bit floating point. The signal processing is illustrated in

113

Fig.~\ref{fig:signal_processing}.

114

115

The input signal is divided into individual frames of size \emph{nframes}, whereby

116

\emph{nframes} is the frame size with which JACK is running. Then e.g.\ frame number

117

$n+1$ is processed both with previous rendering parameters $n$ as well as with

118

current parameters $n+1$. It is then crossfaded between both processed frames

119

with cosine-shaped slopes. In other words the effective frame size of the

120

signal processing is $2\cdot\text{\emph{nframes}}$ with 50\% overlap. Due to the fade-in of

121

the frame processed with the current parameters $n+1$, the algorithmic latency

122

is slightly higher than for processing done with frames purely of size

123

\emph{nframes} and no crossfade.

124

125

\begin{figure}

126

\footnotesize \psfrag{input}{\bf input signal} \psfrag{output}{\bf

127

output signal} \psfrag{dots}{\bf \dots} \psfrag{+}{\bf +}

128

\psfrag{n}{frame $n$} \psfrag{n+1}{frame $n\!+\!1$}

129

\psfrag{n+2}{frame $n\!+\!2$} \psfrag{n+3}{frame $n\!+\!3$}

130

\psfrag{pn-1}{parameters $n\!-\!1$} \psfrag{pn}{parameters $n$}

131

\psfrag{pn+1}{parameters $n\!+\!1$} \psfrag{pn+2}{parameters

132

$n\!+\!2$} \psfrag{pn+3}{parameters $n\!+\!3$}

133

\hfill

134

\includegraphics[width=.95\linewidth]{signal_processing}

135

\caption{\label{fig:signal_processing}{Illustration of the

136

frame-wise signal processing as implemented in the SSR renderers

137

(see text).}}

138

\end{figure}

139

140

The implementation approach described above is one version of the standard way

141

of implementing time-varying audio processing. Note however that this means

142

that with \emph{all} renderers, moving sources are not physically correctly

143

reproduced. The physically correct reproduction of moving virtual sources as in

144

\cite{Ahrens08:MOVING_AES,Ahrens08:SUPERSONIC_AES} requires a different

145

implementation approach which is computationally significantly more costly.

146

147

\subsection{Binaural Renderer}

148

\label{sec:binaural_renderer}

149

150

Binaural rendering is a technique where the acoustical influence of the human

151

head is electronically simulated to position virtual sound sources in space.

152

{\bf Be sure that you use headphones to listen.} Note that the current binaural

153

renderer reproduces all virtual sources exclusively as point sources.

154

155

The acoustical influence of the human head is coded in so-called head-related

156

impulse responses (HRIRs). The HRIRs are loaded from the file

157

\texttt{/usr/local/share/ssr/default\_hrirs.wav}. If you want to use different

158

HRIRs then use the \texttt{--hrirs=FILE} command line option or the SSR

159

configuration file (Section~\ref{sec:ssr_configuration_file}) to specify your

160

custom location. The SSR connects its outputs automatically to outputs 1 and 2

161

of your sound card.

162

163

For virtual sound sources which are closer to the reference position (= the

164

listener position) than 0.5 mtrs, the HRTFs are interpolated with a Dirac impulse. This

165

ensures a smooth transition of virtual sources from the outside of the

166

listener's head to the inside.

167

168

SSR uses HRIRs with an angular resolution of 1$^\circ$. Thus, the HRIR file

169

contains 720 impulse responses (360 for each ear) stored as a 720-channel

170

.wav-file. The HRIRs all have to be of equal length and have to be arranged in

171

the following order:

172

173

\begin{itemize}

174

\item[-] 1st channel: left ear, virtual source position 0$^\circ$

175

\item[-] 2nd channel: right ear, virtual source position 0$^\circ$

176

\item[-] 3rd channel: left ear, virtual source position 1$^\circ$

177

\item[-] 4th channel: right ear, virtual source position 1$^\circ$

178

\item[] \dots

179

\item[-] 720th channel: right ear, virtual source position 359$^\circ$

180

\end{itemize}

181

182

If your HRIRs have lower angular resolution you have to interpolate them to the

183

target resolution or use the same HRIR for serveral adjacent directions in

184

order to fulfill the format requirements. Higher resolution is not supported.

185

Make sure that the sampling rate of the HRIRs matches that of JACK. So far, we

186

know that both 16bit and 24bit word lengths work.

187

188

The SSR automatically loads and uses all HRIR coefficients it finds in the

189

specified file. You can use the \texttt{--hrir-size=VALUE} command line option in order

190

to limit the number of HRIR coefficients read and used to \texttt{VALUE}. You

191

don't need to worry if your specified HRIR length \texttt{VALUE} exceeds the

192

one stored in the file. You will receive a warning telling you what the score

193

is. The SSR will render the audio in any case.

194

195

The actual size of the HRIRs is not restricted (apart from processing power).

196

The SSR cuts them into partitions of size equal to the JACK frame buffer size and

197

zero-pads the last partition if necessary.

198

199

Note that there's some potential to optimize the performance of the SSR by

200

adjusting the JACK frame size and accordingly the number of partitions when a

201

specific number of HRIR taps are desired. The least computational load arises

202

when the audio frames have the same size like the HRIRs. By choosing shorter

203

frames and thus using partitioned convolution the system latency is reduced but

204

computational load is increased.

205

206

The HRIRs \texttt{impulse\_responses/hrirs/hrirs\_fabian.wav} we have included

207

in the SSR are HRIRs of 512 taps of the FABIAN mannequin~\cite{fabian} in an

208

anechoic environment. See the file \texttt{hrirs\_fabian\_documentation.pdf}

209

for details of the measurement.

210

211

\paragraph{Preparing HRIR sets}%

212

213

You can easily prepare your own HRIR sets for use with the SSR by adopting

214

the MATLAB \cite{matlab} script \texttt{data/matlab\_scripts/prepare\_hrirs\_kemar.m}

215

to your needs. This script converts the HRIRs of the KEMAR mannequin included

216

in the CIPIC database \cite{cipic} to the format which the SSR expects. See the script for

217

further information and how to obtain the raw HRIRs.

218

219

220

\subsection{\label{sec:brs}Binaural Room Synthesis Renderer}

221

222

The Binaural Room Synthesis (BRS) renderer is a binaural renderer (refer to

223

Section~\ref{sec:binaural_renderer}) which uses one dedicated HRIR set of each

224

individual sound source. The motivation is to have more realistic reproduction

225

than in simple binaural rendering. In this context HRIRs are typically referred

226

to as binaural room impulse responses (BRIRs).

227

228

Note that the BRS renderer does not consider any specification of a virtual

229

source's position. The positions of the virtual sources (including their

230

distance) are exclusively coded in the BRIRs. Consequently, the BRS renderer

231

does not apply any distance attenuation. It only applies the respective

232

source's gain and the master volume. No interpolation with a Dirac as in the

233

binaural renderer is performed for very close virtual sources. The only

234

quantity which is explicitely considered is the orientation of the receiver,

235

i.e.~the reference. Therefore, specification of meaningful source and receiver

236

positions is only necessary when a correct graphical illustration is desired.

237

238

The BRIRs are stored in the a format similar to the one for the HRIRs for the

239

binaural renderer (refer to Section~\ref{sec:binaural_renderer}). However,

240

there is a fundamental difference: In order to be consequent, the different

241

channels do not hold the data for different positions of the virtual sound

242

source but they hold the information for different head orientations.

243

Explicitely,

244

245

\begin{itemize}

246

\item[-] 1st channel: left ear, head orientation 0$^\circ$

247

\item[-] 2nd channel: right ear, head orientation 0$^\circ$

248

\item[-] 3rd channel: left ear, head orientation 1$^\circ$

249

\item[-] 4th channel: right ear, head orientation 1$^\circ$

250

\item[] \dots

251

\item[-] 720th channel: right ear, head orientation 359$^\circ$

252

\end{itemize}

253

254

In order to assign a set of BRIRs to a given sound source an appropriate scene

255

description in \texttt{.asd}-format has to be prepared (refer also to

256

Section~\ref{sec:audio_scenes}). As shown in \texttt{brs\_example.asd} (from

257

the example scenes), a virtual source has the optional property

258

\texttt{properties\_file} which holds the location of the file containing the

259

desired BRIR set. The location to be specified is relative to the folder of the

260

scene file. Note that -- as described above -- specification of the virtual

261

source's position does not affect the audio processing. If you do not specify a

262

BRIR set for each virtual source, then the renderer will complain and refuse

263

processing the respective source.

264

265

We have measured the binaural room impulse responses of the FABIAN

266

mannequin~\cite{fabian} in one of our mid-size meeting rooms called Sputnik

267

with 8 different source positions. Due to the file size, we have not included

268

them in the release. Please contact \contactadress\ to obtain the data.

269

270

271

%\subsection{Binaural Playback Renderer}

272

273

%The binaural playback (BPB) renderer is actually not a renderer but

274

%a playback engine that enables real-time head-tracking in headphone

275

%playback. It is similar to BRS with the only difference that it does

276

%not employ impulse responses that are applied to the input signal.

277

%It is rather such that the entire signals for the two ears for all

278

%desired possible head orientations have to be precomputed and are then

279

%loaded into the memory. During playback, depending on the

280

%instantaneous head orientation of the listener as measured by the

281

%tracking system, the corresponding audio data are replayed. If a

282

%change in head orientation occurs then a crossfade is applied over

283

%the duration of one JACK frame. Playing is automatically looped. To

284

%stop replay, mute the source. When the source is unmuted, replay

285

%starts at the beginning of the data.

286

287

%The BPB renderer was designed for the simulation of time-varying

288

%systems, which are complicated to implement in real-time. The

289

%audio signals can be prepared in any desired software and also

290

%costly algorithms that do not run in real-time can be replayed with

291

%head-tracking.

292

293

%As shown in the example \texttt{bin/scenes/bpb\_example.asd} and

294

%similar to the description of a BRS scene, a virtual source has the

295

%optional property \texttt{properties\_file}, which holds the location

296

%of the file containing the audio data. By default, it is assumed

297

%that the data are stored in a 720-channel audio file the channels of

298

%which are arranged similarly to BRS impulse responses.

299

300

%Loading all 720 channels into memory can result in hundreds of

301

%megabytes even for signals of moderate length. In order to avoid

302

%restrictions due to the available memory caused by possibly unrequired

303

%data it is possible to restrict the interval of head

304

%orientations. This restriction has to be applied symmetrically,

305

%e.g.~$\pm60^\circ$. The resolution between the limits is still

306

%1$^\circ$. The channel arrangement for the $\pm60^\circ$ example

307

%would then be

308

309

%\begin{itemize}

310

%\item[-] 1st channel: left ear, head orientation 0$^\circ$

311

%\item[-] 2nd channel: right ear, head orientation 0$^\circ$

312

%\item[-] 3rd channel: left ear, head orientation 1$^\circ$

313

%\item[-] 4th channel: right ear, head orientation 1$^\circ$

314

%\item[] \dots

315

%\item[-] 121st channel: left ear, head orientation 60$^\circ$

316

%\item[-] 122nd channel: right ear, head orientation 60$^\circ$

317

%\item[-] 123rd channel: left ear, head orientation 300$^\circ$ (i.e.~-60$^\circ$)

318

%\item[-] 124th channel: right ear, head orientation 300$^\circ$ (i.e.~-60$^\circ$)

319

%\item[-] 125th channel: left ear, head orientation 301$^\circ$ (i.e.~-59$^\circ$)

320

%\item[-] 126th channel: right ear, head orientation 301$^\circ$ (i.e.~-59$^\circ$)

321

%\item[] \dots

322

%\item[-] 242nd channel: right ear, head orientation 359$^\circ$ (i.e.~-1$^ \circ$)

323

%\end{itemize}

324

325

%resulting thus in 242 channels. It is not necessary to explicitly

326

%specify the desired interval of possible head orientations. The SSR deduces it

327

%directly from the number of channels of the

328

%\texttt{properties\_file}. If the listener turns the head to

329

%orientations for which no data are available the BPB renderer

330

%automatically replays the data for the closest orientation available.

331

%We assume that this is less disturbing in practice than a full

332

%dropout of the signal.

333

334

%To fulfill the ASDF syntax, the specification of an input signal is

335

%required. In order to avoid the unnecessary opening and replaying of

336

%an audio file, we propose to specify an arbitrary input port such as

337

338

%\begin{verbatim}

339

%<source name="source" properties_file="../audio/binaural_data.wav">

340

%

341

% <port>0</port>

342

%

343

% <position x="-2" y="2"/>

344

%</source>

345

%\end{verbatim}

346

347

\subsection{Vector Base Amplitude Panning Renderer}

348

349

The Vector Base Amplitude Panning (VBAP) renderer uses

350

the algorithm described in

351

\cite{Pulkki97:JAES}. It tries to find a loudspeaker pair between

352

which the phantom source is located (in VBAP you speak of a phantom

353

source rather than a virtual one). If it does find a loudspeaker pair

354

whose angle is smaller than $180^\circ$ then it calculates the weights

355

$g_l$ and $g_r$ for

356

the left and right loudspeaker as

357

358

\begin{equation}

359

g_{l,r} = \frac{\cos\phi \sin \phi_0 \pm \sin \phi \cos \phi_0}

360

{2\cos \phi_0 \sin \phi_0} \ . \nonumber

361

\end{equation}

362

363

$\phi_0$ is half the angle between the two loudspeakers with respect to the

364

listening position, $\phi$ is the angle between the position of the phantom

365

source and the direction ``between the loudspeakers''.

366

367

If the VBAP renderer can not find a loudspeaker pair whose angle is smaller

368

than $180^\circ$ then it uses the closest loudspeaker provided that the latter

369

is situated within $30^\circ$. If not, then it does not render the source. If

370

you are in verbosity level 2 (i.e.~start the SSR with the \texttt{-vv} option)

371

you'll see a notification about what's happening.

372

373

Note that all virtual source types (i.e.~point and plane sources) are rendered

374

as phantom sources.

375

376

Contrary to WFS, non-uniform distributions of loudspeakers are ok here.

377

Ideally, the loudspeakers should be placed on a circle around the reference

378

position. You can optionally specify a delay for each loudspeakers in order to

379

compensate some amount of misplacement. In the ASDF (refer to

380

Section~\ref{sec:asdf}), each loudspeaker has the optional attribute

381

\texttt{delay} which determines the delay in seconds to be applied to the

382

respective loudspeaker. Note that the specified delay will be rounded to an

383

integer factor of the temporal sampling period. With 44.1 kHz sampling

384

frequency this corresponds to an accuracy of 22.676 $\mu$s, respectively an

385

accuracy of 7.78 mm in terms of loudspeaker placement. Additionally, you can

386

specify a weight for each loudspeaker in order to compensate for irregular

387

setups. In the ASDF format (refer to Section~\ref{sec:asdf}), each loudspeaker

388

has the optional attribute \texttt{weight} which determines the linear~(!)

389

weight to be applied to the respective loudspeaker. An example would be

390

391

\begin{verbatim}

392

393

394

395

</loudspeaker>

396

\end{verbatim}

397

398

Delay defaults to 0 if not specified, weight defaults to~1.

399

400

Although principally suitable, we do not recommend to use our amplitude panning

401

algorithm for dedicated 5.1 (or comparable) mixdowns. Our VBAP renderer only

402

uses adjacent loudspeaker pairs for panning which does not exploit all

403

potentials of such a loudspeaker setup. For the mentioned formats specialized

404

panning processes have been developed also employing non-adjacent loudspeaker

405

pairs if desired.

406

407

The VBAP renderer is rather meant to be used with non-standardized setups.

408

409

\subsection{Wave Field Synthesis Renderer}

410

411

The Wave Field Synthesis (WFS) renderer is the only renderer so far which

412

discriminates between virtual point sources and plane waves. It implements the

413

simple driving function given in~\cite{Spors08:WFS_AES}. Note that we have only

414

implemented a temporary solution to reduce artifacts when virtual sound sources

415

are moved. This topic is subject to ongoing research. We will work on that in

416

the future. In the SSR configuration file

417

(Section~\ref{sec:ssr_configuration_file}) you can specify an overall predelay

418

(this is necessary to render focused sources) and the overall length of the

419

involved delay lines. Both values are given in samples.

420

421

422

\paragraph{Prefiltering}%

423

424

As you might know, WFS requires a spectral correction additionally to the delay

425

and weighting of the input signal. Since this spectral correction is equal for

426

all loudspeakers, it needs to be performed only once on the input. We are

427

working on an automatic generation of the required filter. Until then, we load

428

the impulse response of the desired filter from a .wav-file which is specified

429

via the \texttt{--prefilter=FILE} command line option (see

430

Section~\ref{sec:running_ssr}) or in the SSR configuration file

431

(Section~\ref{sec:ssr_configuration_file}). Make sure that the specified audio

432

file contains only one channel. Files with a differing number of channels will

433

not be loaded. Of course, the sampling rate of the file also has to match that

434

of the JACK server.

435

436

Note that the filter will be zero-padded to the next highest power of 2. If the

437

resulting filter is then shorter than the current JACK frame size, each

438

incoming audio frame will be divided into subframes for prefiltering. That

439

means, if you load a filter of 100 taps and JACK frame size is 1024, the filter

440

will be padded to 128 taps and prefiltering will be done in 8 cycles. This is

441

done in order to save processing power since typical prefilters are much

442

shorter than typical JACK frame sizes. Zero-padding the prefilter to the JACK

443

frame size usually produces large overhead. If the prefilter is longer than the

444

JACK frame buffer size, the filter will be divided into partitions whose length

445

is equal to the JACK frame buffer size.

446

447

If you do not specify a filter, then no prefiltering is performed. This results

448

in a boost of bass frequencies in the reproduced sound field.

449

450

In order to assist you in the design of an appropriate prefilter, we have

451

included the MATLAB \cite{matlab} script

452

\texttt{data/matlab\_scripts/make\_wfs\_prefilter.m} which does the job. In the

453

very top of the file, you can specify the sampling frequency, the desired

454

length of the filter as well as the lower and upper frequency limits of the

455

spectral correction. The lower limit should be chosen such that the subwoofer

456

of your system receives a signal which is not spectrally altered. This is due

457

to the fact that only loudspeakers which are part of an array of loudspeakers

458

need to be corrected. The lower limit is typically around 100 Hz. The upper

459

limit is given by the spatial aliasing frequency. The spatial aliasing is

460

dependent on the mutual distance of the loudspeakers, the distance of the

461

considered listening position to the loudspeakers, and the array geometry. See

462

\cite{Spors06:Aliasing_AES} for detailed information on how to determine the

463

spatial aliasing frequency of a given loudspeaker setup. The spatial aliasing

464

frequency is typically between 1000 Hz and 2000 Hz. For a theoretical treatment

465

of WFS in general and also the prefiltering, see \cite{Spors08:WFS_AES}.

466

467

The script \texttt{make\_wfs\_prefilter.m} will save the impulse response of

468

the designed filter in a file like

469

\texttt{wfs\_prefilter\_120\_1500\_44100.wav}. From the file name you can

470

extract that the spectral correction starts at 120 Hz and goes up to 1500 Hz at

471

a sampling frequency of 44100 Hz. Check the folder

472

\texttt{data/impules\_responses/wfs\_prefilters} for a small selection of

473

prefilters.

474

475

\paragraph{Tapering}%

476

477

When the listening area is not enclosed by the loudspeaker setup, artifacts

478

arise in the reproduced sound field due to the limited aperture. This problem

479

of spatial truncation can be reduced by so-called tapering. Tapering is

480

essentially an attenuation of the loudspeakers towards the ends of the setup.

481

As a consequence, the boundaries of the aperture become smoother which reduces

482

the artifacts. Of course, no benefit comes without a cost. In this case the

483

cost is amplitude errors for which the human ear fortunately does not seem to

484

be too sensitive.

485

486

In order to taper, you can assign the optional attribute \texttt{weight} to

487

each loudspeaker in ASDF format (refer to Section~\ref{sec:asdf}). The

488

\texttt{weight} determines the linear~(!) weight to be applied to the

489

respective loudspeaker. It defaults to 1 if it is not specified.

490

491

492

\subsection{Ambisonics Amplitude Panning Renderer}

493

494

The Ambisonics Amplitude Panning (AAP) renderer does very simple Ambisonics

495

rendering. It does amplitude panning by simultaneously using all loudspeakers

496

which are not subwoofers to reproduce a virtual source (contrary to the VBAP

497

renderer which uses only two loudspeakers at a time). Note that the

498

loudspeakers should ideally be arranged on a circle and the reference should be

499

the center of the circle. The renderer checks for that and applies delays and

500

amplitude corrections to all loudspeakers which are closer to the reference

501

than the farthest. This also includes subwoofers. If you do not want close

502

loudspeakers to be delayed, then simply specify their location in the same

503

direction like its actual position but at a larger distance from the reference.

504

Then the graphical illustration will not be perfectly aligned with the real

505

setup, but the audio processing will take place as intended. Note that the AAP

506

renderer ignores delays assigned to an individual loudspeaker in ASDF. On the

507

other hand, it does consider weights assigned to the loudspeakers. This allows

508

you to compensate for irregular loudspeaker placement.

509

510

Note finally that AAP does not allow to encode the distance of a virtual sound

511

source since it is a simple panning renderer. All sources will appear at the

512

distance of the loudspeakers.

513

514

If you do not explicitly specify an Ambisonics order, then the maximum order

515

which makes sense on the given loudspeaker setup will be used. The

516

automatically chosen order will be one of \nicefrac{(L-1)}{2} for an odd number

517

$L$ of loudspeakers and accordingly for even numbers.

518

519

You can manually set the order via a command line option

520

(Section~\ref{sec:running_ssr}) or the SSR configuration file

521

(Section~\ref{sec:ssr_configuration_file}). We therefore do not explicitly

522

discriminate between ``higher order'' and ``lower order'' Ambisonics since this

523

is not a fundamental property. And where does ``lower order'' end and ``higher

524

order'' start anyway?

525

526

Note that the graphical user interface will not indicate the activity of the

527

loudspeakers since theoretically all loudspeakers contribute to the sound field

528

of a virtual source at any time.

529

530

\paragraph{Conventional driving function}

531

532

By default we use the standard Ambisonics panning function outlined e.g.~in

533

\cite{Neukom07} reading

534

535

\begin{equation}

536

d(\alpha_0) = \frac{\sin\left ( \frac{2M+1}{2} \ (\alpha_0 -

537

\alpha_\textnormal{s})\right )} {(2M+1) \ \sin \left ( \frac{\alpha_0 -

538

\alpha_\textnormal{s}}{2} \right ) } \ , \nonumber

539

\end{equation}

540

541

whereby $\alpha_0$ is the polar angle of the position of the considered secondary source,

542

$\alpha_\textnormal{s}$ is the polar angle of the position of the virtual source, and

543

$M$ is the Ambisonics order.

544

545

\paragraph{In-phase driving function}

546

547

The conventional driving function leads to both positive and negative weights

548

for individual loudspeakers. An object (e.g.~a listener) introduced into the

549

listening area can lead to an imperfect interference of the wave fields of the

550

individual loudspeakers and therefore to an inconsistent perception.

551

Furthermore, conventional Ambisonics panning can lead to audible artifacts for

552

fast source motions since it can happen that the weights of two adjacent audio

553

frames have a different algebraic sign.

554

555

These problems can be worked around when only positive weights are applied on

556

the input signal (\emph{in-phase} rendering). This can be accomplished via the

557

in-phase driving function given e.g.~in \cite{Neukom07} reading

558

559

\begin{equation}

560

d(\alpha_0) = \cos^{2M} \left (\frac{\alpha_0 - \alpha_\textnormal{s}}{2} \right ) \ . \nonumber

561

\end{equation}

562

563

Note that in-phase rendering leads to a less precise localization of the virtual source

564

and other unwanted perceptions. You can enable in-phase rendering via the according command-line

565

option or you can set

566

the \texttt{IN\_PHASE\_RENDERING} property in the SSR configuration file (see section~\ref{sec:ssr_configuration_file}) to be ``\texttt{TRUE}'' or ``\texttt{true}''.

567

568

\subsection{Generic Renderer}

569

570

The generic renderer turns the SSR into a multiple-input-multiple-output

571

convolution engine. You have to use an ASDF file in which the attribute

572

\texttt{properties\_file} of the individual sound source has to be set

573

properly. That means that the indicated file has to be a multichannel file with

574

the same number of channels like loudspeakers in the setup. The impulse

575

response in the file at channel 1 represents the driving function for

576

loudspeaker~1 and so on.

577

578

Be sure that you load a reproduction setup with the corresponding number of

579

loudspeakers.

580

581

It is obviously not possible to move virtual sound sources since the loaded

582

impulse responses are static. We use this renderer in order to test advanced

583

methods before implementing them in real-time or to compare two different

584

rendering methods by having one sound source in one method and another sound

585

source in the other method.

586

587

Download the ASDF examples from~\cite{ssr} and check out the file

588

\texttt{generic\_renderer\_example.asd} which comes with all required data.

589

590

\begin{table}%[htbp]

591

\begin{center}

592

\begin{tabular}{| l | c | c |}

593

\hline

594

& individual delay & weight \\

595

\hline

596

binaural renderer & - & - \\

597

BRS renderer & - & - \\

598

VBAP renderer & + & + \\

599

WFS renderer & - & + \\

600

AAP renderer & autom. & + \\

601

generic renderer & - & - \\\hline

602

\end{tabular}

603

\caption{\label{tab:loudspeaker_properties}Loudspeaker properties

604

considered by the different renderers.}

605

\end{center}

606

\end{table}

607

608

\begin{table}%[htbp]

609

\begin{center}%

610

\begin{minipage}{\textwidth}% to enable footnotes

611

\begin{tabular}{| l | c | c | c | c | c |}

612

\hline

613

& gain & mute & position & orientation\footnote{So far, only plane waves have a defined

614

orientation.} & model\\

615

\hline

616

binaural renderer & + & + & + & - & only ampl.\\

617

BRS renderer & + & + & - & - & -\\

618

VBAP renderer & + & + & + & - & only ampl.\\

619

WFS renderer & + & + & + & + & +\\

620

AAP renderer & + & + & + & - & only ampl.\\

621

generic renderer & + & + & - & - & -\\\hline

622

\end{tabular}

623

\end{minipage}

624

\caption{\label{tab:source_properties}Virtual source's properties

625

considered by the different renderers.}

626

\end{center}

627

\end{table}

628

629

630

%\subsection{Parallel Processing Renderers}

631

632

%The renderers as described above do not support parallel processing. We are

633

%currently redesigning the architecture of the SSR in order to support audio

634

%processing in multiple threads so that the power of multi-processor/multi-core machines can be

635

%properly exploited. The current SSR release contains versions of the WFS, the

636

%VBAP, and the binaural renderer which support parallel processing. These

637

%versions are disabled at compile time by default.

638

%If you want to enable these renderers use the option \texttt{./configure

639

%--enable-newrenderer} (Section~\ref{sec:comp_inst}) when configuring. All

640

%renderers other than WFS, VBAP, and binaural will then not be available.

641

642

%\textbf{WARNING:} The parallel processing renderers are under heavy development.

643

%If you encounter unexpected behaviour or bugs, please report them to

644

%\emph{SoundScapeRenderer@telekom.de}. Thank you.

645

646

\subsection{Summary}

647

648

Tables~\ref{tab:loudspeaker_properties}

649

and~\ref{tab:source_properties} summarize the functionality of the

650

SSR renderers.

651

Older »