1
/****************************************************************************
3
* NAME: smsPitchScale.cp
5
* HOME URL: http://www.dspdimension.com
8
* SYNOPSIS: Routine for doing pitch scaling while maintaining
9
* duration using the Short Time Fourier Transform.
11
* DESCRIPTION: The routine takes a pitchScale factor value which is between 0.5
12
* (one octave down) and 2. (one octave up). A value of exactly 1 does not change
13
* the pitch. numSampsToProcess tells the routine how many samples in indata[0...
14
* numSampsToProcess-1] should be pitch scaled and moved to outdata[0 ...
15
* numSampsToProcess-1]. The two buffers can be identical (ie. it can process the
16
* data in-place). fftFrameLength defines the FFT frame size used for the
17
* processing. Typical values are 1024, 2048 and 4096. It may be any value <=
18
* MAX_FFT_FRAME_LENGTH but it MUST be a power of 2. osamp is the STFT
19
* oversampling factor which also determines the overlap between adjacent STFT
20
* frames. It should at least be 4 for moderate scaling ratios. A value of 32 is
21
* recommended for best quality. sampleRate takes the sample rate for the signal
22
* in unit Hz, ie. 44100 for 44.1 kHz audio. The data passed to the routine in
23
* indata[] should be in the range [-1.0, 1.0), which is also the output range
26
* COPYRIGHT 1999 Stephan M. Sprenger <sms@dspdimension.com>
28
* The Wide Open License (WOL)
30
* Permission to use, copy, modify, distribute and sell this software and its
31
* documentation for any purpose is hereby granted without fee, provided that
32
* the above copyright notice and this license appear in all source copies.
33
* THIS SOFTWARE IS PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF
34
* ANY KIND. See http://www.dspguru.com/wol.htm for more information.
36
*****************************************************************************/
39
#include "../config.h"
42
#include "pitchscale.h"
44
static float ps_in[MAX_FRAME_LENGTH*2], ps_out[MAX_FRAME_LENGTH*2];
45
static fft_plan aplan = NULL, splan = NULL;
47
void pitch_scale(sbuffers *buffers, const double pitchScale, const long
48
fftFrameLength, const long osamp, const long numSampsToProcess,
49
const double sampleRate, const float *indata, float *outdata,
50
const int adding, const float gain) {
52
Routine smsPitchScale(). See top of file for explanation Purpose: doing
53
pitch scaling while maintaining duration using the Short Time Fourier
54
Transform. Author: (c)1999 Stephan M. Sprenger <sms@dspdimension.com>
56
double magn, phase, tmp;
57
double freqPerBin, expct, fadeZoneLen;
58
long i,k, qpd, index, inFifoLatency, stepSize,
60
double phaseArr[MAX_FRAME_LENGTH];
63
float *gInFIFO = buffers->gInFIFO;
64
float *gOutFIFO = buffers->gOutFIFO;
65
float *gLastPhase = buffers->gLastPhase;
66
float *gSumPhase = buffers->gSumPhase;
67
float *gOutputAccum = buffers->gOutputAccum;
68
float *gAnaFreq = buffers->gAnaFreq;
69
float *gAnaMagn = buffers->gAnaMagn;
70
float *gSynFreq = buffers->gSynFreq;
71
float *gSynMagn = buffers->gSynMagn;
72
float *gWindow = buffers->gWindow;
73
long gRover = buffers->gRover;
78
for (i=0; i<fftFrameLength; i++) {
79
ps_in[i + fftFrameLength] = 0.0f;
82
aplan = fftwf_plan_r2r_1d(fftFrameLength, ps_in, ps_out, FFTW_R2HC, FFTW_MEASURE);
83
splan = fftwf_plan_r2r_1d(fftFrameLength, ps_in, ps_out, FFTW_HC2R, FFTW_MEASURE);
85
aplan = rfftw_create_plan(fftFrameLength, FFTW_REAL_TO_COMPLEX, FFTW_ESTIMATE);
86
splan = rfftw_create_plan(fftFrameLength, FFTW_COMPLEX_TO_REAL, FFTW_ESTIMATE);
90
/* set up some handy variables */
91
fadeZoneLen = fftFrameLength/2;
92
fftFrameSize2 = fftFrameLength/2;
93
stepSize = fftFrameLength/osamp;
94
freqPerBin = sampleRate*2.0/(double)fftFrameLength;
95
expct = 2.0*M_PI*(double)stepSize/(double)fftFrameLength;
96
inFifoLatency = fftFrameLength-stepSize;
97
if (gRover == false) gRover = inFifoLatency;
99
/* main processing loop */
100
for (i = 0; i < numSampsToProcess; i++){
102
/* As long as we have not yet collected enough data just read in */
103
gInFIFO[gRover] = indata[i];
105
outdata[i] += (gOutFIFO[gRover-inFifoLatency] * gain);
107
outdata[i] = gOutFIFO[gRover-inFifoLatency];
111
/* As long as we have not yet collected enough data just read in */
112
/* now we have enough data for processing */
113
if (gRover >= fftFrameLength) {
114
gRover = inFifoLatency;
116
/* do windowing and store */
117
for (k = 0; k < fftFrameLength; k++) {
118
ps_in[k] = gInFIFO[k] * gWindow[k];
121
/* As long as we have not yet collected enough data just read in */
123
/* ***************** ANALYSIS ******************* */
126
fftwf_execute(aplan);
128
rfftw_one(aplan, ps_in, ps_out);
131
/* this is the analysis step */
133
/* Hard math first, we can 3dnow this */
134
for (k = 1; k <= fftFrameSize2; k+=8) {
135
float *mb = &gAnaMagn[k];
142
ri[10] = ps_out[k+5];
143
ri[12] = ps_out[k+6];
144
ri[14] = ps_out[k+7];
146
ri[1] = ps_out[fftFrameLength - k];
147
ri[3] = ps_out[fftFrameLength - (k + 1)];
148
ri[5] = ps_out[fftFrameLength - (k + 2)];
149
ri[7] = ps_out[fftFrameLength - (k + 3)];
150
ri[9] = ps_out[fftFrameLength - (k + 4)];
151
ri[11] = ps_out[fftFrameLength - (k + 5)];
152
ri[13] = ps_out[fftFrameLength - (k + 6)];
153
ri[15] = ps_out[fftFrameLength - (k + 7)];
155
/* compute magnitude and phase. */
157
#warning Using processor specific 3DNow! accelerations
158
__asm__ __volatile__ (
161
movq (%%eax), %%mm0 \n\
162
movq 8(%%eax), %%mm1 \n\
163
movq 16(%%eax), %%mm2 \n\
164
movq 24(%%eax), %%mm3 \n\
165
movq 32(%%eax), %%mm4 \n\
166
movq 40(%%eax), %%mm5 \n\
167
movq 48(%%eax), %%mm6 \n\
168
movq 56(%%eax), %%mm7 \n\
169
# do the squares and add \n\
170
pfmul %%mm0, %%mm0 \n\
171
pfacc %%mm0, %%mm0 \n\
172
pfmul %%mm1, %%mm1 \n\
173
pfacc %%mm1, %%mm1 \n\
174
pfmul %%mm2, %%mm2 \n\
175
pfacc %%mm2, %%mm2 \n\
176
pfmul %%mm3, %%mm3 \n\
177
pfacc %%mm3, %%mm3 \n\
178
pfmul %%mm4, %%mm4 \n\
179
pfacc %%mm4, %%mm4 \n\
180
pfmul %%mm5, %%mm5 \n\
181
pfacc %%mm5, %%mm5 \n\
182
pfmul %%mm6, %%mm6 \n\
183
pfacc %%mm6, %%mm6 \n\
184
pfmul %%mm7, %%mm7 \n\
185
pfacc %%mm7, %%mm7 \n\
186
# Recip square roots. \n\
187
pfrsqrt %%mm0, %%mm0 \n\
188
pfrsqrt %%mm1, %%mm1 \n\
189
pfrsqrt %%mm2, %%mm2 \n\
190
pfrsqrt %%mm3, %%mm3 \n\
191
pfrsqrt %%mm4, %%mm4 \n\
192
pfrsqrt %%mm5, %%mm5 \n\
193
pfrsqrt %%mm6, %%mm6 \n\
194
pfrsqrt %%mm7, %%mm7 \n\
195
pfrcp %%mm0, %%mm0 \n\
196
pfrcp %%mm1, %%mm1 \n\
197
pfrcp %%mm2, %%mm2 \n\
198
pfrcp %%mm3, %%mm3 \n\
199
pfrcp %%mm4, %%mm4 \n\
200
pfrcp %%mm5, %%mm5 \n\
201
pfrcp %%mm6, %%mm6 \n\
202
pfrcp %%mm7, %%mm7 \n\
204
movd %%mm0, (%%edx) \n\
205
movd %%mm1, 4(%%edx) \n\
206
movd %%mm2, 8(%%edx) \n\
207
movd %%mm3, 12(%%edx) \n\
208
movd %%mm4, 16(%%edx) \n\
209
movd %%mm5, 20(%%edx) \n\
210
movd %%mm6, 24(%%edx) \n\
211
movd %%mm7, 28(%%edx) \n\
220
mb[0] = sqrt(ri[0]*ri[0]+ ri[1]*ri[1]);
221
mb[1] = sqrt(ri[2]*ri[2] + ri[3]*ri[3]);
222
mb[2] = sqrt(ri[4]*ri[4] + ri[5]*ri[5]);
223
mb[3] = sqrt(ri[6]*ri[6] + ri[7]*ri[7]);
226
phaseArr[k] = atan2(ri[1], ri[0]);
227
phaseArr[k+1] = atan2(ri[3], ri[2]);
228
phaseArr[k+2] = atan2(ri[5], ri[4]);
229
phaseArr[k+3] = atan2(ri[7], ri[6]);
230
phaseArr[k+4] = atan2(ri[9], ri[8]);
231
phaseArr[k+5] = atan2(ri[11], ri[10]);
232
phaseArr[k+6] = atan2(ri[13], ri[12]);
233
phaseArr[k+7] = atan2(ri[15], ri[14]);
236
for (k = 1; k <= fftFrameSize2; k++) {
238
/* compute phase difference */
239
tmp = phaseArr[k] - gLastPhase[k];
240
gLastPhase[k] = phaseArr[k];
242
/* subtract expected phase difference */
243
tmp -= (double)k*expct;
245
/* map delta phase into +/- Pi interval */
247
if (qpd >= 0) qpd += qpd&1;
249
tmp -= M_PI*(double)qpd;
251
/* get deviation from bin frequency from the +/- Pi interval */
252
tmp = osamp*tmp/(2.0f*M_PI);
254
/* compute the k-th partials' true frequency */
255
tmp = (double)k*freqPerBin + tmp*freqPerBin;
257
/* store magnitude and true frequency in analysis arrays */
262
/* ***************** PROCESSING ******************* */
263
/* this does the actual pitch scaling */
264
memset(gSynMagn, 0, fftFrameLength*sizeof(float));
265
memset(gSynFreq, 0, fftFrameLength*sizeof(float));
266
for (k = 0; k <= fftFrameSize2; k++) {
267
index = k/pitchScale;
268
if (index <= fftFrameSize2) {
269
/* new bin overrides existing if magnitude is higher */
270
if (gAnaMagn[index] > gSynMagn[k]) {
271
gSynMagn[k] = gAnaMagn[index];
272
gSynFreq[k] = gAnaFreq[index] * pitchScale;
275
/* fill empty bins with nearest neighbour */
277
if ((gSynFreq[k] == 0.) && (k > 0)) {
278
gSynFreq[k] = gSynFreq[k-1];
279
gSynMagn[k] = gSynMagn[k-1];
285
/* ***************** SYNTHESIS ******************* */
286
/* this is the synthesis step */
287
for (k = 1; k <= fftFrameSize2; k++) {
289
/* get magnitude and true frequency from synthesis arrays */
293
/* subtract bin mid frequency */
294
tmp -= (double)k*freqPerBin;
296
/* get bin deviation from freq deviation */
299
/* take osamp into account */
300
tmp = 2.*M_PI*tmp/osamp;
302
/* add the overlap phase advance back in */
303
tmp += (double)k*expct;
305
/* accumulate delta phase to get bin phase */
307
phase = gSumPhase[k];
309
ps_in[k] = magn*cosf(phase);
310
ps_in[fftFrameLength - k] = magn*sinf(phase);
313
/* do inverse transform */
315
fftwf_execute(splan);
317
rfftw_one(splan, ps_in, ps_out);
320
/* do windowing and add to output accumulator */
321
for(k=0; k < fftFrameLength; k++) {
322
gOutputAccum[k] += 2.0f*gWindow[k]*ps_out[k]/(fftFrameSize2*osamp);
324
for (k = 0; k < stepSize; k++) gOutFIFO[k] = gOutputAccum[k];
326
/* shift accumulator */
327
memmove(gOutputAccum, gOutputAccum+stepSize, fftFrameLength*sizeof(float));
329
/* move input FIFO */
330
for (k = 0; k < inFifoLatency; k++) gInFIFO[k] = gInFIFO[k+stepSize];
334
buffers->gRover = gRover;