1
/* Copyright (C) 2003-2006 Jean-Marc Valin
4
Echo canceller based on the MDF algorithm (see below)
6
Redistribution and use in source and binary forms, with or without
7
modification, are permitted provided that the following conditions are
10
1. Redistributions of source code must retain the above copyright notice,
11
this list of conditions and the following disclaimer.
13
2. Redistributions in binary form must reproduce the above copyright
14
notice, this list of conditions and the following disclaimer in the
15
documentation and/or other materials provided with the distribution.
17
3. The name of the author may not be used to endorse or promote products
18
derived from this software without specific prior written permission.
20
THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
21
IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
22
OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23
DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
24
INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
25
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
26
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27
HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
28
STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
29
ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30
POSSIBILITY OF SUCH DAMAGE.
34
The echo canceller is based on the MDF algorithm described in:
36
J. S. Soo, K. K. Pang Multidelay block frequency adaptive filter,
37
IEEE Trans. Acoust. Speech Signal Process., Vol. ASSP-38, No. 2,
40
We use the Alternatively Updated MDF (AUMDF) variant. Robustness to
41
double-talk is achieved using a variable learning rate as described in:
43
Valin, J.-M., On Adjusting the Learning Rate in Frequency Domain Echo
44
Cancellation With Double-Talk. IEEE Transactions on Audio,
45
Speech and Language Processing, Vol. 15, No. 3, pp. 1030-1034, 2007.
46
http://people.xiph.org/~jm/papers/valin_taslp2006.pdf
48
There is no explicit double-talk detection, but a continuous variation
49
in the learning rate based on residual echo, double-talk and background
52
About the fixed-point version:
53
All the signals are represented with 16-bit words. The filter weights
54
are represented with 32-bit words, but only the top 16 bits are used
55
in most cases. The lower 16 bits are completely unreliable (due to the
56
fact that the update is done only on the top bits), but help in the
57
adaptation -- probably by removing a "threshold effect" due to
58
quantization (rounding going to zero) when the gradient is small.
60
Another kludge that seems to work good: when performing the weight
61
update, we only move half the way toward the "goal" this seems to
62
reduce the effect of quantization noise in the update phase. This
63
can be seen as applying a gradient descent on a "soft constraint"
64
instead of having a hard constraint.
73
#include "speex/speex_echo.h"
75
#include "pseudofloat.h"
76
#include "math_approx.h"
77
#include "os_support.h"
80
#define M_PI 3.14159265358979323846
84
#define WEIGHT_SHIFT 11
85
#define NORMALIZE_SCALEDOWN 5
86
#define NORMALIZE_SCALEUP 3
88
#define WEIGHT_SHIFT 0
91
/* If enabled, the AEC will use a foreground filter and a background filter to be more robust to double-talk
92
and difficult signals in general. The cost is an extra FFT and a matrix-vector multiply */
96
static const spx_float_t MIN_LEAK = {20972, -22};
98
/* Constants for the two-path filter */
99
static const spx_float_t VAR1_SMOOTH = {23593, -16};
100
static const spx_float_t VAR2_SMOOTH = {23675, -15};
101
static const spx_float_t VAR1_UPDATE = {16384, -15};
102
static const spx_float_t VAR2_UPDATE = {16384, -16};
103
static const spx_float_t VAR_BACKTRACK = {16384, -12};
104
#define TOP16(x) ((x)>>16)
108
static const spx_float_t MIN_LEAK = .005f;
110
/* Constants for the two-path filter */
111
static const spx_float_t VAR1_SMOOTH = .36f;
112
static const spx_float_t VAR2_SMOOTH = .7225f;
113
static const spx_float_t VAR1_UPDATE = .5f;
114
static const spx_float_t VAR2_UPDATE = .25f;
115
static const spx_float_t VAR_BACKTRACK = 4.f;
120
#define PLAYBACK_DELAY 2
122
void speex_echo_get_residual(SpeexEchoState *st, spx_word32_t *Yout, int len);
125
/** Speex echo cancellation state. */
126
struct SpeexEchoState_ {
127
int frame_size; /**< Number of samples processed each time */
134
spx_int32_t sampling_rate;
135
spx_word16_t spec_average;
137
spx_word16_t beta_max;
138
spx_word32_t sum_adapt;
139
spx_word16_t leak_estimate;
141
spx_word16_t *e; /* scratch */
142
spx_word16_t *x; /* Far-end input buffer (2N) */
143
spx_word16_t *X; /* Far-end buffer (M+1 frames) in frequency domain */
144
spx_word16_t *input; /* scratch */
145
spx_word16_t *y; /* scratch */
146
spx_word16_t *last_y;
147
spx_word16_t *Y; /* scratch */
149
spx_word32_t *PHI; /* scratch */
150
spx_word32_t *W; /* (Background) filter weights */
152
spx_word16_t *foreground; /* Foreground filter weights */
153
spx_word32_t Davg1; /* 1st recursive average of the residual power difference */
154
spx_word32_t Davg2; /* 2nd recursive average of the residual power difference */
155
spx_float_t Dvar1; /* Estimated variance of 1st estimator */
156
spx_float_t Dvar2; /* Estimated variance of 2nd estimator */
158
spx_word32_t *power; /* Power of the far-end signal */
159
spx_float_t *power_1;/* Inverse power of far-end */
160
spx_word16_t *wtmp; /* scratch */
162
spx_word16_t *wtmp2; /* scratch */
164
spx_word32_t *Rf; /* scratch */
165
spx_word32_t *Yf; /* scratch */
166
spx_word32_t *Xf; /* scratch */
171
spx_word16_t *window;
174
spx_word16_t memX, memD, memE;
175
spx_word16_t preemph;
176
spx_word16_t notch_radius;
177
spx_mem_t notch_mem[2];
179
/* NOTE: If you only use speex_echo_cancel() and want to save some memory, remove this */
180
spx_int16_t *play_buf;
182
int play_buf_started;
185
static inline void filter_dc_notch16(const spx_int16_t *in, spx_word16_t radius, spx_word16_t *out, int len, spx_mem_t *mem)
190
den2 = MULT16_16_Q15(radius,radius) + MULT16_16_Q15(QCONST16(.7,15),MULT16_16_Q15(32767-radius,32767-radius));
192
den2 = radius*radius + .7*(1-radius)*(1-radius);
194
/*printf ("%d %d %d %d %d %d\n", num[0], num[1], num[2], den[0], den[1], den[2]);*/
197
spx_word16_t vin = in[i];
198
spx_word32_t vout = mem[0] + SHL32(EXTEND32(vin),15);
200
mem[0] = mem[1] + SHL32(SHL32(-EXTEND32(vin),15) + MULT16_32_Q15(radius,vout),1);
202
mem[0] = mem[1] + 2*(-vin + radius*vout);
204
mem[1] = SHL32(EXTEND32(vin),15) - MULT16_32_Q15(den2,vout);
205
out[i] = SATURATE32(PSHR32(MULT16_32_Q15(radius,vout),15),32767);
209
/* This inner product is slightly different from the codec version because of fixed-point */
210
static inline spx_word32_t mdf_inner_prod(const spx_word16_t *x, const spx_word16_t *y, int len)
217
part = MAC16_16(part,*x++,*y++);
218
part = MAC16_16(part,*x++,*y++);
219
/* HINT: If you had a 40-bit accumulator, you could shift only at the end */
220
sum = ADD32(sum,SHR32(part,6));
225
/** Compute power spectrum of a half-complex (packed) vector */
226
static inline void power_spectrum(const spx_word16_t *X, spx_word32_t *ps, int N)
229
ps[0]=MULT16_16(X[0],X[0]);
230
for (i=1,j=1;i<N-1;i+=2,j++)
232
ps[j] = MULT16_16(X[i],X[i]) + MULT16_16(X[i+1],X[i+1]);
234
ps[j]=MULT16_16(X[i],X[i]);
237
/** Compute cross-power spectrum of a half-complex (packed) vectors and add to acc */
239
static inline void spectral_mul_accum(const spx_word16_t *X, const spx_word32_t *Y, spx_word16_t *acc, int N, int M)
242
spx_word32_t tmp1=0,tmp2=0;
245
tmp1 = MAC16_16(tmp1, X[j*N],TOP16(Y[j*N]));
247
acc[0] = PSHR32(tmp1,WEIGHT_SHIFT);
253
tmp1 = SUB32(MAC16_16(tmp1, X[j*N+i],TOP16(Y[j*N+i])), MULT16_16(X[j*N+i+1],TOP16(Y[j*N+i+1])));
254
tmp2 = MAC16_16(MAC16_16(tmp2, X[j*N+i+1],TOP16(Y[j*N+i])), X[j*N+i], TOP16(Y[j*N+i+1]));
256
acc[i] = PSHR32(tmp1,WEIGHT_SHIFT);
257
acc[i+1] = PSHR32(tmp2,WEIGHT_SHIFT);
262
tmp1 = MAC16_16(tmp1, X[(j+1)*N-1],TOP16(Y[(j+1)*N-1]));
264
acc[N-1] = PSHR32(tmp1,WEIGHT_SHIFT);
266
static inline void spectral_mul_accum16(const spx_word16_t *X, const spx_word16_t *Y, spx_word16_t *acc, int N, int M)
269
spx_word32_t tmp1=0,tmp2=0;
272
tmp1 = MAC16_16(tmp1, X[j*N],Y[j*N]);
274
acc[0] = PSHR32(tmp1,WEIGHT_SHIFT);
280
tmp1 = SUB32(MAC16_16(tmp1, X[j*N+i],Y[j*N+i]), MULT16_16(X[j*N+i+1],Y[j*N+i+1]));
281
tmp2 = MAC16_16(MAC16_16(tmp2, X[j*N+i+1],Y[j*N+i]), X[j*N+i], Y[j*N+i+1]);
283
acc[i] = PSHR32(tmp1,WEIGHT_SHIFT);
284
acc[i+1] = PSHR32(tmp2,WEIGHT_SHIFT);
289
tmp1 = MAC16_16(tmp1, X[(j+1)*N-1],Y[(j+1)*N-1]);
291
acc[N-1] = PSHR32(tmp1,WEIGHT_SHIFT);
295
static inline void spectral_mul_accum(const spx_word16_t *X, const spx_word32_t *Y, spx_word16_t *acc, int N, int M)
305
acc[i] += (X[i]*Y[i] - X[i+1]*Y[i+1]);
306
acc[i+1] += (X[i+1]*Y[i] + X[i]*Y[i+1]);
313
#define spectral_mul_accum16 spectral_mul_accum
316
/** Compute weighted cross-power spectrum of a half-complex (packed) vector with conjugate */
317
static inline void weighted_spectral_mul_conj(const spx_float_t *w, const spx_float_t p, const spx_word16_t *X, const spx_word16_t *Y, spx_word32_t *prod, int N)
321
W = FLOAT_AMULT(p, w[0]);
322
prod[0] = FLOAT_MUL32(W,MULT16_16(X[0],Y[0]));
323
for (i=1,j=1;i<N-1;i+=2,j++)
325
W = FLOAT_AMULT(p, w[j]);
326
prod[i] = FLOAT_MUL32(W,MAC16_16(MULT16_16(X[i],Y[i]), X[i+1],Y[i+1]));
327
prod[i+1] = FLOAT_MUL32(W,MAC16_16(MULT16_16(-X[i+1],Y[i]), X[i],Y[i+1]));
329
W = FLOAT_AMULT(p, w[j]);
330
prod[i] = FLOAT_MUL32(W,MULT16_16(X[i],Y[i]));
333
static inline void mdf_adjust_prop(const spx_word32_t *W, int N, int M, spx_word16_t *prop)
336
spx_word16_t max_sum = 1;
337
spx_word32_t prop_sum = 1;
340
spx_word32_t tmp = 1;
342
tmp += MULT16_16(EXTRACT16(SHR32(W[i*N+j],18)), EXTRACT16(SHR32(W[i*N+j],18)));
344
/* Just a security in case an overflow were to occur */
345
tmp = MIN32(ABS32(tmp), 536870912);
347
prop[i] = spx_sqrt(tmp);
348
if (prop[i] > max_sum)
353
prop[i] += MULT16_16_Q15(QCONST16(.1f,15),max_sum);
354
prop_sum += EXTEND32(prop[i]);
358
prop[i] = DIV32(MULT16_16(QCONST16(.99f,15), prop[i]),prop_sum);
359
/*printf ("%f ", prop[i]);*/
364
#ifdef DUMP_ECHO_CANCEL_DATA
366
static FILE *rFile=NULL, *pFile=NULL, *oFile=NULL;
368
static void dump_audio(const spx_int16_t *rec, const spx_int16_t *play, const spx_int16_t *out, int len)
370
if (!(rFile && pFile && oFile))
372
speex_fatal("Dump files not open");
374
fwrite(rec, sizeof(spx_int16_t), len, rFile);
375
fwrite(play, sizeof(spx_int16_t), len, pFile);
376
fwrite(out, sizeof(spx_int16_t), len, oFile);
380
/** Creates a new echo canceller state */
381
SpeexEchoState *speex_echo_state_init(int frame_size, int filter_length)
384
SpeexEchoState *st = (SpeexEchoState *)speex_alloc(sizeof(SpeexEchoState));
386
#ifdef DUMP_ECHO_CANCEL_DATA
387
if (rFile || pFile || oFile)
388
speex_fatal("Opening dump files twice");
389
rFile = fopen("aec_rec.sw", "wb");
390
pFile = fopen("aec_play.sw", "wb");
391
oFile = fopen("aec_out.sw", "wb");
394
st->frame_size = frame_size;
395
st->window_size = 2*frame_size;
397
M = st->M = (filter_length+st->frame_size-1)/frame_size;
402
/* This is the default sampling rate */
403
st->sampling_rate = 8000;
404
st->spec_average = DIV32_16(SHL32(EXTEND32(st->frame_size), 15), st->sampling_rate);
406
st->beta0 = DIV32_16(SHL32(EXTEND32(st->frame_size), 16), st->sampling_rate);
407
st->beta_max = DIV32_16(SHL32(EXTEND32(st->frame_size), 14), st->sampling_rate);
409
st->beta0 = (2.0f*st->frame_size)/st->sampling_rate;
410
st->beta_max = (.5f*st->frame_size)/st->sampling_rate;
412
st->leak_estimate = 0;
414
st->fft_table = spx_fft_init(N);
416
st->e = (spx_word16_t*)speex_alloc(N*sizeof(spx_word16_t));
417
st->x = (spx_word16_t*)speex_alloc(N*sizeof(spx_word16_t));
418
st->input = (spx_word16_t*)speex_alloc(st->frame_size*sizeof(spx_word16_t));
419
st->y = (spx_word16_t*)speex_alloc(N*sizeof(spx_word16_t));
420
st->last_y = (spx_word16_t*)speex_alloc(N*sizeof(spx_word16_t));
421
st->Yf = (spx_word32_t*)speex_alloc((st->frame_size+1)*sizeof(spx_word32_t));
422
st->Rf = (spx_word32_t*)speex_alloc((st->frame_size+1)*sizeof(spx_word32_t));
423
st->Xf = (spx_word32_t*)speex_alloc((st->frame_size+1)*sizeof(spx_word32_t));
424
st->Yh = (spx_word32_t*)speex_alloc((st->frame_size+1)*sizeof(spx_word32_t));
425
st->Eh = (spx_word32_t*)speex_alloc((st->frame_size+1)*sizeof(spx_word32_t));
427
st->X = (spx_word16_t*)speex_alloc((M+1)*N*sizeof(spx_word16_t));
428
st->Y = (spx_word16_t*)speex_alloc(N*sizeof(spx_word16_t));
429
st->E = (spx_word16_t*)speex_alloc(N*sizeof(spx_word16_t));
430
st->W = (spx_word32_t*)speex_alloc(M*N*sizeof(spx_word32_t));
432
st->foreground = (spx_word16_t*)speex_alloc(M*N*sizeof(spx_word16_t));
434
st->PHI = (spx_word32_t*)speex_alloc(N*sizeof(spx_word32_t));
435
st->power = (spx_word32_t*)speex_alloc((frame_size+1)*sizeof(spx_word32_t));
436
st->power_1 = (spx_float_t*)speex_alloc((frame_size+1)*sizeof(spx_float_t));
437
st->window = (spx_word16_t*)speex_alloc(N*sizeof(spx_word16_t));
438
st->prop = (spx_word16_t*)speex_alloc(M*sizeof(spx_word16_t));
439
st->wtmp = (spx_word16_t*)speex_alloc(N*sizeof(spx_word16_t));
441
st->wtmp2 = (spx_word16_t*)speex_alloc(N*sizeof(spx_word16_t));
444
st->window[i] = (16383-SHL16(spx_cos(DIV32_16(MULT16_16(25736,i<<1),N)),1));
445
st->window[N-i-1] = st->window[i];
449
st->window[i] = .5-.5*cos(2*M_PI*i/N);
451
for (i=0;i<=st->frame_size;i++)
452
st->power_1[i] = FLOAT_ONE;
456
spx_word32_t sum = 0;
457
/* Ratio of ~10 between adaptation rate of first and last block */
458
spx_word16_t decay = SHR32(spx_exp(NEG16(DIV32_16(QCONST16(2.4,11),M))),1);
459
st->prop[0] = QCONST16(.7, 15);
460
sum = EXTEND32(st->prop[0]);
463
st->prop[i] = MULT16_16_Q15(st->prop[i-1], decay);
464
sum = ADD32(sum, EXTEND32(st->prop[i]));
468
st->prop[i] = DIV32(MULT16_16(QCONST16(.8,15), st->prop[i]),sum);
472
st->memX=st->memD=st->memE=0;
473
st->preemph = QCONST16(.9,15);
474
if (st->sampling_rate<12000)
475
st->notch_radius = QCONST16(.9, 15);
476
else if (st->sampling_rate<24000)
477
st->notch_radius = QCONST16(.982, 15);
479
st->notch_radius = QCONST16(.992, 15);
481
st->notch_mem[0] = st->notch_mem[1] = 0;
483
st->Pey = st->Pyy = FLOAT_ONE;
486
st->Davg1 = st->Davg2 = 0;
487
st->Dvar1 = st->Dvar2 = FLOAT_ZERO;
490
st->play_buf = (spx_int16_t*)speex_alloc((PLAYBACK_DELAY+1)*st->frame_size*sizeof(spx_int16_t));
491
st->play_buf_pos = PLAYBACK_DELAY*st->frame_size;
492
st->play_buf_started = 0;
497
/** Resets echo canceller state */
498
void speex_echo_state_reset(SpeexEchoState *st)
509
st->foreground[i] = 0;
511
for (i=0;i<N*(M+1);i++)
513
for (i=0;i<=st->frame_size;i++)
516
st->power_1[i] = FLOAT_ONE;
520
for (i=0;i<st->frame_size;i++)
529
st->notch_mem[0] = st->notch_mem[1] = 0;
530
st->memX=st->memD=st->memE=0;
535
st->Pey = st->Pyy = FLOAT_ONE;
537
st->Davg1 = st->Davg2 = 0;
538
st->Dvar1 = st->Dvar2 = FLOAT_ZERO;
540
for (i=0;i<3*st->frame_size;i++)
542
st->play_buf_pos = PLAYBACK_DELAY*st->frame_size;
543
st->play_buf_started = 0;
547
/** Destroys an echo canceller state */
548
void speex_echo_state_destroy(SpeexEchoState *st)
550
spx_fft_destroy(st->fft_table);
554
speex_free(st->input);
556
speex_free(st->last_y);
568
speex_free(st->foreground);
571
speex_free(st->power);
572
speex_free(st->power_1);
573
speex_free(st->window);
574
speex_free(st->prop);
575
speex_free(st->wtmp);
577
speex_free(st->wtmp2);
579
speex_free(st->play_buf);
582
#ifdef DUMP_ECHO_CANCEL_DATA
586
rFile = pFile = oFile = NULL;
590
void speex_echo_capture(SpeexEchoState *st, const spx_int16_t *rec, spx_int16_t *out)
593
/*speex_warning_int("capture with fill level ", st->play_buf_pos/st->frame_size);*/
594
st->play_buf_started = 1;
595
if (st->play_buf_pos>=st->frame_size)
597
speex_echo_cancellation(st, rec, st->play_buf, out);
598
st->play_buf_pos -= st->frame_size;
599
for (i=0;i<st->play_buf_pos;i++)
600
st->play_buf[i] = st->play_buf[i+st->frame_size];
602
speex_warning("No playback frame available (your application is buggy and/or got xruns)");
603
if (st->play_buf_pos!=0)
605
speex_warning("internal playback buffer corruption?");
606
st->play_buf_pos = 0;
608
for (i=0;i<st->frame_size;i++)
613
void speex_echo_playback(SpeexEchoState *st, const spx_int16_t *play)
615
/*speex_warning_int("playback with fill level ", st->play_buf_pos/st->frame_size);*/
616
if (!st->play_buf_started)
618
speex_warning("discarded first playback frame");
621
if (st->play_buf_pos<=PLAYBACK_DELAY*st->frame_size)
624
for (i=0;i<st->frame_size;i++)
625
st->play_buf[st->play_buf_pos+i] = play[i];
626
st->play_buf_pos += st->frame_size;
627
if (st->play_buf_pos <= (PLAYBACK_DELAY-1)*st->frame_size)
629
speex_warning("Auto-filling the buffer (your application is buggy and/or got xruns)");
630
for (i=0;i<st->frame_size;i++)
631
st->play_buf[st->play_buf_pos+i] = play[i];
632
st->play_buf_pos += st->frame_size;
635
speex_warning("Had to discard a playback frame (your application is buggy and/or got xruns)");
639
/** Performs echo cancellation on a frame (deprecated, last arg now ignored) */
640
void speex_echo_cancel(SpeexEchoState *st, const spx_int16_t *in, const spx_int16_t *far_end, spx_int16_t *out, spx_int32_t *Yout)
642
speex_echo_cancellation(st, in, far_end, out);
645
/** Performs echo cancellation on a frame */
646
void speex_echo_cancellation(SpeexEchoState *st, const spx_int16_t *in, const spx_int16_t *far_end, spx_int16_t *out)
650
spx_word32_t Syy,See,Sxx,Sdd, Sff;
653
int update_foreground;
656
spx_word16_t ss, ss_1;
657
spx_float_t Pey = FLOAT_ONE, Pyy=FLOAT_ONE;
658
spx_float_t alpha, alpha_1;
666
ss=DIV32_16(11469,M);
667
ss_1 = SUB16(32767,ss);
673
/* Apply a notch filter to make sure DC doesn't end up causing problems */
674
filter_dc_notch16(in, st->notch_radius, st->input, st->frame_size, st->notch_mem);
675
/* Copy input data to buffer and apply pre-emphasis */
676
for (i=0;i<st->frame_size;i++)
679
tmp32 = SUB32(EXTEND32(far_end[i]), EXTEND32(MULT16_16_P15(st->preemph, st->memX)));
681
/* If saturation occurs here, we need to freeze adaptation for M+1 frames (not just one) */
693
st->x[i+st->frame_size] = EXTRACT16(tmp32);
694
st->memX = far_end[i];
696
tmp32 = SUB32(EXTEND32(st->input[i]), EXTEND32(MULT16_16_P15(st->preemph, st->memD)));
701
if (st->saturated == 0)
707
if (st->saturated == 0)
711
st->memD = st->input[i];
712
st->input[i] = tmp32;
715
/* Shift memory: this could be optimized eventually*/
719
st->X[(j+1)*N+i] = st->X[j*N+i];
722
/* Convert x (far end) to frequency domain */
723
spx_fft(st->fft_table, st->x, &st->X[0]);
725
st->last_y[i] = st->x[i];
726
Sxx = mdf_inner_prod(st->x+st->frame_size, st->x+st->frame_size, st->frame_size);
727
for (i=0;i<st->frame_size;i++)
728
st->x[i] = st->x[i+st->frame_size];
729
/* From here on, the top part of x is used as scratch space */
732
/* Compute foreground filter */
733
spectral_mul_accum16(st->X, st->foreground, st->Y, N, M);
734
spx_ifft(st->fft_table, st->Y, st->e);
735
for (i=0;i<st->frame_size;i++)
736
st->e[i] = SUB16(st->input[i], st->e[i+st->frame_size]);
737
Sff = mdf_inner_prod(st->e, st->e, st->frame_size);
740
/* Adjust proportional adaption rate */
741
mdf_adjust_prop (st->W, N, M, st->prop);
742
/* Compute weight gradient */
743
if (st->saturated == 0)
747
weighted_spectral_mul_conj(st->power_1, FLOAT_SHL(PSEUDOFLOAT(st->prop[j]),-15), &st->X[(j+1)*N], st->E, st->PHI, N);
749
st->W[j*N+i] = ADD32(st->W[j*N+i], st->PHI[i]);
756
/* Update weight to prevent circular convolution (MDF / AUMDF) */
759
/* This is a variant of the Alternatively Updated MDF (AUMDF) */
760
/* Remove the "if" to make this an MDF filter */
761
if (j==0 || st->cancel_count%(M-1) == j-1)
765
st->wtmp2[i] = EXTRACT16(PSHR32(st->W[j*N+i],NORMALIZE_SCALEDOWN+16));
766
spx_ifft(st->fft_table, st->wtmp2, st->wtmp);
767
for (i=0;i<st->frame_size;i++)
771
for (i=st->frame_size;i<N;i++)
773
st->wtmp[i]=SHL16(st->wtmp[i],NORMALIZE_SCALEUP);
775
spx_fft(st->fft_table, st->wtmp, st->wtmp2);
776
/* The "-1" in the shift is a sort of kludge that trades less efficient update speed for decrease noise */
778
st->W[j*N+i] -= SHL32(EXTEND32(st->wtmp2[i]),16+NORMALIZE_SCALEDOWN-NORMALIZE_SCALEUP-1);
780
spx_ifft(st->fft_table, &st->W[j*N], st->wtmp);
781
for (i=st->frame_size;i<N;i++)
785
spx_fft(st->fft_table, st->wtmp, &st->W[j*N]);
790
/* Compute filter response Y */
791
spectral_mul_accum(st->X, st->W, st->Y, N, M);
792
spx_ifft(st->fft_table, st->Y, st->y);
795
/* Difference in response, this is used to estimate the variance of our residual power estimate */
796
for (i=0;i<st->frame_size;i++)
797
st->e[i] = SUB16(st->e[i+st->frame_size], st->y[i+st->frame_size]);
798
Dbf = 10+mdf_inner_prod(st->e, st->e, st->frame_size);
801
for (i=0;i<st->frame_size;i++)
802
st->e[i] = SUB16(st->input[i], st->y[i+st->frame_size]);
803
See = mdf_inner_prod(st->e, st->e, st->frame_size);
809
/* Logic for updating the foreground filter */
811
/* For two time windows, compute the mean of the energy difference, as well as the variance */
812
st->Davg1 = ADD32(MULT16_32_Q15(QCONST16(.6f,15),st->Davg1), MULT16_32_Q15(QCONST16(.4f,15),SUB32(Sff,See)));
813
st->Davg2 = ADD32(MULT16_32_Q15(QCONST16(.85f,15),st->Davg2), MULT16_32_Q15(QCONST16(.15f,15),SUB32(Sff,See)));
814
st->Dvar1 = FLOAT_ADD(FLOAT_MULT(VAR1_SMOOTH, st->Dvar1), FLOAT_MUL32U(MULT16_32_Q15(QCONST16(.4f,15),Sff), MULT16_32_Q15(QCONST16(.4f,15),Dbf)));
815
st->Dvar2 = FLOAT_ADD(FLOAT_MULT(VAR2_SMOOTH, st->Dvar2), FLOAT_MUL32U(MULT16_32_Q15(QCONST16(.15f,15),Sff), MULT16_32_Q15(QCONST16(.15f,15),Dbf)));
817
/* Equivalent float code:
818
st->Davg1 = .6*st->Davg1 + .4*(Sff-See);
819
st->Davg2 = .85*st->Davg2 + .15*(Sff-See);
820
st->Dvar1 = .36*st->Dvar1 + .16*Sff*Dbf;
821
st->Dvar2 = .7225*st->Dvar2 + .0225*Sff*Dbf;
824
update_foreground = 0;
825
/* Check if we have a statistically significant reduction in the residual echo */
826
/* Note that this is *not* Gaussian, so we need to be careful about the longer tail */
827
if (FLOAT_GT(FLOAT_MUL32U(SUB32(Sff,See),ABS32(SUB32(Sff,See))), FLOAT_MUL32U(Sff,Dbf)))
828
update_foreground = 1;
829
else if (FLOAT_GT(FLOAT_MUL32U(st->Davg1, ABS32(st->Davg1)), FLOAT_MULT(VAR1_UPDATE,(st->Dvar1))))
830
update_foreground = 1;
831
else if (FLOAT_GT(FLOAT_MUL32U(st->Davg2, ABS32(st->Davg2)), FLOAT_MULT(VAR2_UPDATE,(st->Dvar2))))
832
update_foreground = 1;
835
if (update_foreground)
837
st->Davg1 = st->Davg2 = 0;
838
st->Dvar1 = st->Dvar2 = FLOAT_ZERO;
839
/* Copy background filter to foreground filter */
841
st->foreground[i] = EXTRACT16(PSHR32(st->W[i],16));
842
/* Apply a smooth transition so as to not introduce blocking artifacts */
843
for (i=0;i<st->frame_size;i++)
844
st->e[i+st->frame_size] = MULT16_16_Q15(st->window[i+st->frame_size],st->e[i+st->frame_size]) + MULT16_16_Q15(st->window[i],st->y[i+st->frame_size]);
846
int reset_background=0;
847
/* Otherwise, check if the background filter is significantly worse */
848
if (FLOAT_GT(FLOAT_MUL32U(NEG32(SUB32(Sff,See)),ABS32(SUB32(Sff,See))), FLOAT_MULT(VAR_BACKTRACK,FLOAT_MUL32U(Sff,Dbf))))
849
reset_background = 1;
850
if (FLOAT_GT(FLOAT_MUL32U(NEG32(st->Davg1), ABS32(st->Davg1)), FLOAT_MULT(VAR_BACKTRACK,st->Dvar1)))
851
reset_background = 1;
852
if (FLOAT_GT(FLOAT_MUL32U(NEG32(st->Davg2), ABS32(st->Davg2)), FLOAT_MULT(VAR_BACKTRACK,st->Dvar2)))
853
reset_background = 1;
854
if (reset_background)
856
/* Copy foreground filter to background filter */
858
st->W[i] = SHL32(EXTEND32(st->foreground[i]),16);
859
/* We also need to copy the output so as to get correct adaptation */
860
for (i=0;i<st->frame_size;i++)
861
st->y[i+st->frame_size] = st->e[i+st->frame_size];
862
for (i=0;i<st->frame_size;i++)
863
st->e[i] = SUB16(st->input[i], st->y[i+st->frame_size]);
865
st->Davg1 = st->Davg2 = 0;
866
st->Dvar1 = st->Dvar2 = FLOAT_ZERO;
871
/* Compute error signal (for the output with de-emphasis) */
872
for (i=0;i<st->frame_size;i++)
874
spx_word32_t tmp_out;
876
tmp_out = SUB32(EXTEND32(st->input[i]), EXTEND32(st->e[i+st->frame_size]));
878
tmp_out = SUB32(EXTEND32(st->input[i]), EXTEND32(st->y[i+st->frame_size]));
883
else if (tmp_out<-32768)
885
tmp_out = ADD32(tmp_out, EXTEND32(MULT16_16_P15(st->preemph, st->memE)));
886
/* This is an arbitrary test for saturation in the microphone signal */
887
if (in[i] <= -32000 || in[i] >= 32000)
890
if (st->saturated == 0)
893
out[i] = (spx_int16_t)tmp_out;
897
#ifdef DUMP_ECHO_CANCEL_DATA
898
dump_audio(in, far_end, out, st->frame_size);
901
/* Compute error signal (filter update version) */
902
for (i=0;i<st->frame_size;i++)
904
st->e[i+st->frame_size] = st->e[i];
908
/* Compute a bunch of correlations */
909
Sey = mdf_inner_prod(st->e+st->frame_size, st->y+st->frame_size, st->frame_size);
910
Syy = mdf_inner_prod(st->y+st->frame_size, st->y+st->frame_size, st->frame_size);
911
Sdd = mdf_inner_prod(st->input, st->input, st->frame_size);
913
/*printf ("%f %f %f %f\n", Sff, See, Syy, Sdd, st->update_cond);*/
915
/* Do some sanity check */
916
if (!(Syy>=0 && Sxx>=0 && See >= 0)
918
|| !(Sff < N*1e9 && Syy < N*1e9 && Sxx < N*1e9)
922
/* Things have gone really bad */
923
st->screwed_up += 50;
924
for (i=0;i<st->frame_size;i++)
926
} else if (SHR32(Sff, 2) > ADD32(Sdd, SHR32(MULT16_16(N, 10000),6)))
928
/* AEC seems to add lots of echo instead of removing it, let's see if it will improve */
931
/* Everything's fine */
934
if (st->screwed_up>=50)
936
speex_warning("The echo canceller started acting funny and got slapped (reset). It swears it will behave now.");
937
speex_echo_state_reset(st);
941
/* Add a small noise floor to make sure not to have problems when dividing */
942
See = MAX32(See, SHR32(MULT16_16(N, 100),6));
944
/* Convert error to frequency domain */
945
spx_fft(st->fft_table, st->e, st->E);
946
for (i=0;i<st->frame_size;i++)
948
spx_fft(st->fft_table, st->y, st->Y);
950
/* Compute power spectrum of far end (X), error (E) and filter response (Y) */
951
power_spectrum(st->E, st->Rf, N);
952
power_spectrum(st->Y, st->Yf, N);
953
power_spectrum(st->X, st->Xf, N);
955
/* Smooth far end energy estimate over time */
956
for (j=0;j<=st->frame_size;j++)
957
st->power[j] = MULT16_32_Q15(ss_1,st->power[j]) + 1 + MULT16_32_Q15(ss,st->Xf[j]);
959
/* Enable this to compute the power based only on the tail (would need to compute more
960
efficiently to make this really useful */
963
float scale2 = .5f/M;
964
for (j=0;j<=st->frame_size;j++)
968
power_spectrum(&st->X[i*N], st->Xf, N);
969
for (j=0;j<=st->frame_size;j++)
970
st->power[j] += scale2*st->Xf[j];
974
/* Compute filtered spectra and (cross-)correlations */
975
for (j=st->frame_size;j>=0;j--)
978
Eh = PSEUDOFLOAT(st->Rf[j] - st->Eh[j]);
979
Yh = PSEUDOFLOAT(st->Yf[j] - st->Yh[j]);
980
Pey = FLOAT_ADD(Pey,FLOAT_MULT(Eh,Yh));
981
Pyy = FLOAT_ADD(Pyy,FLOAT_MULT(Yh,Yh));
983
st->Eh[j] = MAC16_32_Q15(MULT16_32_Q15(SUB16(32767,st->spec_average),st->Eh[j]), st->spec_average, st->Rf[j]);
984
st->Yh[j] = MAC16_32_Q15(MULT16_32_Q15(SUB16(32767,st->spec_average),st->Yh[j]), st->spec_average, st->Yf[j]);
986
st->Eh[j] = (1-st->spec_average)*st->Eh[j] + st->spec_average*st->Rf[j];
987
st->Yh[j] = (1-st->spec_average)*st->Yh[j] + st->spec_average*st->Yf[j];
991
Pyy = FLOAT_SQRT(Pyy);
992
Pey = FLOAT_DIVU(Pey,Pyy);
994
/* Compute correlation updatete rate */
995
tmp32 = MULT16_32_Q15(st->beta0,Syy);
996
if (tmp32 > MULT16_32_Q15(st->beta_max,See))
997
tmp32 = MULT16_32_Q15(st->beta_max,See);
998
alpha = FLOAT_DIV32(tmp32, See);
999
alpha_1 = FLOAT_SUB(FLOAT_ONE, alpha);
1000
/* Update correlations (recursive average) */
1001
st->Pey = FLOAT_ADD(FLOAT_MULT(alpha_1,st->Pey) , FLOAT_MULT(alpha,Pey));
1002
st->Pyy = FLOAT_ADD(FLOAT_MULT(alpha_1,st->Pyy) , FLOAT_MULT(alpha,Pyy));
1003
if (FLOAT_LT(st->Pyy, FLOAT_ONE))
1004
st->Pyy = FLOAT_ONE;
1005
/* We don't really hope to get better than 33 dB (MIN_LEAK-3dB) attenuation anyway */
1006
if (FLOAT_LT(st->Pey, FLOAT_MULT(MIN_LEAK,st->Pyy)))
1007
st->Pey = FLOAT_MULT(MIN_LEAK,st->Pyy);
1008
if (FLOAT_GT(st->Pey, st->Pyy))
1010
/* leak_estimate is the linear regression result */
1011
st->leak_estimate = FLOAT_EXTRACT16(FLOAT_SHL(FLOAT_DIVU(st->Pey, st->Pyy),14));
1012
/* This looks like a stupid bug, but it's right (because we convert from Q14 to Q15) */
1013
if (st->leak_estimate > 16383)
1014
st->leak_estimate = 32767;
1016
st->leak_estimate = SHL16(st->leak_estimate,1);
1017
/*printf ("%f\n", st->leak_estimate);*/
1019
/* Compute Residual to Error Ratio */
1021
tmp32 = MULT16_32_Q15(st->leak_estimate,Syy);
1022
tmp32 = ADD32(SHR32(Sxx,13), ADD32(tmp32, SHL32(tmp32,1)));
1023
/* Check for y in e (lower bound on RER) */
1025
spx_float_t bound = PSEUDOFLOAT(Sey);
1026
bound = FLOAT_DIVU(FLOAT_MULT(bound, bound), PSEUDOFLOAT(ADD32(1,Syy)));
1027
if (FLOAT_GT(bound, PSEUDOFLOAT(See)))
1029
else if (tmp32 < FLOAT_EXTRACT32(bound))
1030
tmp32 = FLOAT_EXTRACT32(bound);
1032
if (tmp32 > SHR32(See,1))
1033
tmp32 = SHR32(See,1);
1034
RER = FLOAT_EXTRACT16(FLOAT_SHL(FLOAT_DIV32(tmp32,See),15));
1036
RER = (.0001*Sxx + 3.*MULT16_32_Q15(st->leak_estimate,Syy)) / See;
1037
/* Check for y in e (lower bound on RER) */
1038
if (RER < Sey*Sey/(1+See*Syy))
1039
RER = Sey*Sey/(1+See*Syy);
1044
/* We consider that the filter has had minimal adaptation if the following is true*/
1045
if (!st->adapted && st->sum_adapt > SHL32(EXTEND32(M),15) && MULT16_32_Q15(st->leak_estimate,Syy) > MULT16_32_Q15(QCONST16(.03f,15),Syy))
1052
/* Normal learning rate calculation once we're past the minimal adaptation phase */
1053
for (i=0;i<=st->frame_size;i++)
1056
/* Compute frequency-domain adaptation mask */
1057
r = MULT16_32_Q15(st->leak_estimate,SHL32(st->Yf[i],3));
1058
e = SHL32(st->Rf[i],3)+1;
1066
r = MULT16_32_Q15(QCONST16(.7,15),r) + MULT16_32_Q15(QCONST16(.3,15),(spx_word32_t)(MULT16_32_Q15(RER,e)));
1067
/*st->power_1[i] = adapt_rate*r/(e*(1+st->power[i]));*/
1068
st->power_1[i] = FLOAT_SHL(FLOAT_DIV32_FLOAT(r,FLOAT_MUL32U(e,st->power[i]+10)),WEIGHT_SHIFT+16);
1071
/* Temporary adaption rate if filter is not yet adapted enough */
1072
spx_word16_t adapt_rate=0;
1074
if (Sxx > SHR32(MULT16_16(N, 1000),6))
1076
tmp32 = MULT16_32_Q15(QCONST16(.25f, 15), Sxx);
1078
if (tmp32 > SHR32(See,2))
1079
tmp32 = SHR32(See,2);
1081
if (tmp32 > .25*See)
1084
adapt_rate = FLOAT_EXTRACT16(FLOAT_SHL(FLOAT_DIV32(tmp32, See),15));
1086
for (i=0;i<=st->frame_size;i++)
1087
st->power_1[i] = FLOAT_SHL(FLOAT_DIV32(EXTEND32(adapt_rate),ADD32(st->power[i],10)),WEIGHT_SHIFT+1);
1090
/* How much have we adapted so far? */
1091
st->sum_adapt = ADD32(st->sum_adapt,adapt_rate);
1094
/* Save residual echo so it can be used by the nonlinear processor */
1097
/* If the filter is adapted, take the filtered echo */
1098
for (i=0;i<st->frame_size;i++)
1099
st->last_y[i] = st->last_y[st->frame_size+i];
1100
for (i=0;i<st->frame_size;i++)
1101
st->last_y[st->frame_size+i] = in[i]-out[i];
1103
/* If filter isn't adapted yet, all we can do is take the far end signal directly */
1104
/* moved earlier: for (i=0;i<N;i++)
1105
st->last_y[i] = st->x[i];*/
1110
/* Compute spectrum of estimated echo for use in an echo post-filter */
1111
void speex_echo_get_residual(SpeexEchoState *st, spx_word32_t *residual_echo, int len)
1117
N = st->window_size;
1119
/* Apply hanning window (should pre-compute it)*/
1121
st->y[i] = MULT16_16_Q15(st->window[i],st->last_y[i]);
1123
/* Compute power spectrum of the echo */
1124
spx_fft(st->fft_table, st->y, st->Y);
1125
power_spectrum(st->Y, residual_echo, N);
1128
if (st->leak_estimate > 16383)
1131
leak2 = SHL16(st->leak_estimate, 1);
1133
if (st->leak_estimate>.5)
1136
leak2 = 2*st->leak_estimate;
1138
/* Estimate residual echo */
1139
for (i=0;i<=st->frame_size;i++)
1140
residual_echo[i] = (spx_int32_t)MULT16_32_Q15(leak2,residual_echo[i]);
1144
int speex_echo_ctl(SpeexEchoState *st, int request, void *ptr)
1149
case SPEEX_ECHO_GET_FRAME_SIZE:
1150
(*(int*)ptr) = st->frame_size;
1152
case SPEEX_ECHO_SET_SAMPLING_RATE:
1153
st->sampling_rate = (*(int*)ptr);
1154
st->spec_average = DIV32_16(SHL32(EXTEND32(st->frame_size), 15), st->sampling_rate);
1156
st->beta0 = DIV32_16(SHL32(EXTEND32(st->frame_size), 16), st->sampling_rate);
1157
st->beta_max = DIV32_16(SHL32(EXTEND32(st->frame_size), 14), st->sampling_rate);
1159
st->beta0 = (2.0f*st->frame_size)/st->sampling_rate;
1160
st->beta_max = (.5f*st->frame_size)/st->sampling_rate;
1162
if (st->sampling_rate<12000)
1163
st->notch_radius = QCONST16(.9, 15);
1164
else if (st->sampling_rate<24000)
1165
st->notch_radius = QCONST16(.982, 15);
1167
st->notch_radius = QCONST16(.992, 15);
1169
case SPEEX_ECHO_GET_SAMPLING_RATE:
1170
(*(int*)ptr) = st->sampling_rate;
1173
speex_warning_int("Unknown speex_echo_ctl request: ", request);