1
/* Copyright (C) 2002-2006 Jean-Marc Valin
4
Redistribution and use in source and binary forms, with or without
5
modification, are permitted provided that the following conditions
8
- Redistributions of source code must retain the above copyright
9
notice, this list of conditions and the following disclaimer.
11
- Redistributions in binary form must reproduce the above copyright
12
notice, this list of conditions and the following disclaimer in the
13
documentation and/or other materials provided with the distribution.
15
- Neither the name of the Xiph.org Foundation nor the names of its
16
contributors may be used to endorse or promote products derived from
17
this software without specific prior written permission.
19
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
23
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
24
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
25
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
26
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
27
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
28
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
29
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
41
#include "quant_lsp.h"
42
#include "cb_search.h"
44
#include "stack_alloc.h"
46
#include <speex/speex_bits.h>
49
#include "math_approx.h"
50
#include "os_support.h"
51
#include <speex/speex_callbacks.h>
54
#include "vorbis_psy.h"
58
#define M_PI 3.14159265358979323846 /* pi */
65
#define SUBMODE(x) st->submodes[st->submodeID]->x
67
/* Default size for the encoder and decoder stack (can be changed at compile time).
68
This does not apply when using variable-size arrays or alloca. */
70
#define NB_ENC_STACK (8000*sizeof(spx_sig_t))
74
#define NB_DEC_STACK (4000*sizeof(spx_sig_t))
79
const spx_word32_t ol_gain_table[32]={18900, 25150, 33468, 44536, 59265, 78865, 104946, 139653, 185838, 247297, 329081, 437913, 582736, 775454, 1031906, 1373169, 1827293, 2431601, 3235761, 4305867, 5729870, 7624808, 10146425, 13501971, 17967238, 23909222, 31816294, 42338330, 56340132, 74972501, 99766822, 132760927};
80
const spx_word16_t exc_gain_quant_scal3_bound[7]={1841, 3883, 6051, 8062, 10444, 13580, 18560};
81
const spx_word16_t exc_gain_quant_scal3[8]={1002, 2680, 5086, 7016, 9108, 11781, 15380, 21740};
82
const spx_word16_t exc_gain_quant_scal1_bound[1]={14385};
83
const spx_word16_t exc_gain_quant_scal1[2]={11546, 17224};
86
#define LSP_DELTA1 6553
87
#define LSP_DELTA2 1638
91
const float exc_gain_quant_scal3_bound[7]={0.112338f, 0.236980f, 0.369316f, 0.492054f, 0.637471f, 0.828874f, 1.132784f};
92
const float exc_gain_quant_scal3[8]={0.061130f, 0.163546f, 0.310413f, 0.428220f, 0.555887f, 0.719055f, 0.938694f, 1.326874f};
93
const float exc_gain_quant_scal1_bound[1]={0.87798f};
94
const float exc_gain_quant_scal1[2]={0.70469f, 1.05127f};
96
#define LSP_MARGIN .002f
97
#define LSP_DELTA1 .2f
98
#define LSP_DELTA2 .05f
103
#define EXTRA_BUFFER 100
105
#define EXTRA_BUFFER 0
109
#define sqr(x) ((x)*(x))
111
extern const spx_word16_t lag_window[];
112
extern const spx_word16_t lpc_window[];
114
void *nb_encoder_init(const SpeexMode *m)
117
const SpeexNBMode *mode;
120
mode=(const SpeexNBMode *)m->mode;
121
st = (EncState*)speex_alloc(sizeof(EncState));
124
#if defined(VAR_ARRAYS) || defined (USE_ALLOCA)
127
st->stack = (char*)speex_alloc_scratch(NB_ENC_STACK);
132
st->frameSize = mode->frameSize;
133
st->nbSubframes=mode->frameSize/mode->subframeSize;
134
st->subframeSize=mode->subframeSize;
135
st->windowSize = st->frameSize+st->subframeSize;
136
st->lpcSize = mode->lpcSize;
137
st->gamma1=mode->gamma1;
138
st->gamma2=mode->gamma2;
139
st->min_pitch=mode->pitchStart;
140
st->max_pitch=mode->pitchEnd;
141
st->lpc_floor = mode->lpc_floor;
143
st->submodes=mode->submodes;
144
st->submodeID=st->submodeSelect=mode->defaultSubmode;
145
st->bounded_pitch = 1;
147
st->encode_submode = 1;
150
st->psy = vorbis_psy_init(8000, 256);
151
st->curve = (float*)speex_alloc(128*sizeof(float));
152
st->old_curve = (float*)speex_alloc(128*sizeof(float));
153
st->psy_window = (float*)speex_alloc(256*sizeof(float));
156
st->cumul_gain = 1024;
158
/* Allocating input buffer */
159
st->winBuf = (spx_word16_t*)speex_alloc((st->windowSize-st->frameSize)*sizeof(spx_word16_t));
160
/* Allocating excitation buffer */
161
st->excBuf = (spx_word16_t*)speex_alloc((mode->frameSize+mode->pitchEnd+2)*sizeof(spx_word16_t));
162
st->exc = st->excBuf + mode->pitchEnd + 2;
163
st->swBuf = (spx_word16_t*)speex_alloc((mode->frameSize+mode->pitchEnd+2)*sizeof(spx_word16_t));
164
st->sw = st->swBuf + mode->pitchEnd + 2;
166
st->window= lpc_window;
168
/* Create the window for autocorrelation (lag-windowing) */
169
st->lagWindow = lag_window;
171
st->old_lsp = (spx_lsp_t*)speex_alloc((st->lpcSize)*sizeof(spx_lsp_t));
172
st->old_qlsp = (spx_lsp_t*)speex_alloc((st->lpcSize)*sizeof(spx_lsp_t));
174
for (i=0;i<st->lpcSize;i++)
175
st->old_lsp[i]= DIV32(MULT16_16(QCONST16(3.1415927f, LSP_SHIFT), i+1), st->lpcSize+1);
177
st->mem_sp = (spx_mem_t*)speex_alloc((st->lpcSize)*sizeof(spx_mem_t));
178
st->mem_sw = (spx_mem_t*)speex_alloc((st->lpcSize)*sizeof(spx_mem_t));
179
st->mem_sw_whole = (spx_mem_t*)speex_alloc((st->lpcSize)*sizeof(spx_mem_t));
180
st->mem_exc = (spx_mem_t*)speex_alloc((st->lpcSize)*sizeof(spx_mem_t));
181
st->mem_exc2 = (spx_mem_t*)speex_alloc((st->lpcSize)*sizeof(spx_mem_t));
183
st->pi_gain = (spx_word32_t*)speex_alloc((st->nbSubframes)*sizeof(spx_word32_t));
184
st->innov_rms_save = NULL;
186
st->pitch = (int*)speex_alloc((st->nbSubframes)*sizeof(int));
189
st->vbr = (VBRState*)speex_alloc(sizeof(VBRState));
200
#endif /* #ifndef DISABLE_VBR */
204
st->sampling_rate=8000;
206
st->highpass_enabled = 1;
208
#ifdef ENABLE_VALGRIND
209
VALGRIND_MAKE_READABLE(st, NB_ENC_STACK);
214
void nb_encoder_destroy(void *state)
216
EncState *st=(EncState *)state;
217
/* Free all allocated memory */
218
#if !(defined(VAR_ARRAYS) || defined (USE_ALLOCA))
219
speex_free_scratch(st->stack);
222
speex_free (st->winBuf);
223
speex_free (st->excBuf);
224
speex_free (st->old_qlsp);
225
speex_free (st->swBuf);
227
speex_free (st->old_lsp);
228
speex_free (st->mem_sp);
229
speex_free (st->mem_sw);
230
speex_free (st->mem_sw_whole);
231
speex_free (st->mem_exc);
232
speex_free (st->mem_exc2);
233
speex_free (st->pi_gain);
234
speex_free (st->pitch);
237
vbr_destroy(st->vbr);
238
speex_free (st->vbr);
239
#endif /* #ifndef DISABLE_VBR */
242
vorbis_psy_destroy(st->psy);
243
speex_free (st->curve);
244
speex_free (st->old_curve);
245
speex_free (st->psy_window);
248
/*Free state memory... should be last*/
252
int nb_encode(void *state, void *vin, SpeexBits *bits)
257
spx_word16_t ol_pitch_coef;
258
spx_word32_t ol_gain;
259
VARDECL(spx_word16_t *ringing);
260
VARDECL(spx_word16_t *target);
261
VARDECL(spx_sig_t *innov);
262
VARDECL(spx_word32_t *exc32);
263
VARDECL(spx_mem_t *mem);
264
VARDECL(spx_coef_t *bw_lpc1);
265
VARDECL(spx_coef_t *bw_lpc2);
266
VARDECL(spx_coef_t *lpc);
267
VARDECL(spx_lsp_t *lsp);
268
VARDECL(spx_lsp_t *qlsp);
269
VARDECL(spx_lsp_t *interp_lsp);
270
VARDECL(spx_lsp_t *interp_qlsp);
271
VARDECL(spx_coef_t *interp_lpc);
272
VARDECL(spx_coef_t *interp_qlpc);
274
VARDECL(spx_word16_t *syn_resp);
275
VARDECL(spx_word16_t *real_exc);
278
spx_word16_t fine_gain;
279
spx_word16_t *in = (spx_word16_t*)vin;
281
st=(EncState *)state;
284
ALLOC(lpc, st->lpcSize, spx_coef_t);
285
ALLOC(bw_lpc1, st->lpcSize, spx_coef_t);
286
ALLOC(bw_lpc2, st->lpcSize, spx_coef_t);
287
ALLOC(lsp, st->lpcSize, spx_lsp_t);
288
ALLOC(qlsp, st->lpcSize, spx_lsp_t);
289
ALLOC(interp_lsp, st->lpcSize, spx_lsp_t);
290
ALLOC(interp_qlsp, st->lpcSize, spx_lsp_t);
291
ALLOC(interp_lpc, st->lpcSize, spx_coef_t);
292
ALLOC(interp_qlpc, st->lpcSize, spx_coef_t);
294
/* Move signals 1 frame towards the past */
295
SPEEX_MOVE(st->excBuf, st->excBuf+st->frameSize, st->max_pitch+2);
296
SPEEX_MOVE(st->swBuf, st->swBuf+st->frameSize, st->max_pitch+2);
298
if (st->highpass_enabled)
299
highpass(in, in, st->frameSize, (st->isWideband?HIGHPASS_WIDEBAND:HIGHPASS_NARROWBAND)|HIGHPASS_INPUT, st->mem_hp);
302
VARDECL(spx_word16_t *w_sig);
303
VARDECL(spx_word16_t *autocorr);
304
ALLOC(w_sig, st->windowSize, spx_word16_t);
305
ALLOC(autocorr, st->lpcSize+1, spx_word16_t);
306
/* Window for analysis */
307
for (i=0;i<st->windowSize-st->frameSize;i++)
308
w_sig[i] = EXTRACT16(SHR32(MULT16_16(st->winBuf[i],st->window[i]),SIG_SHIFT));
309
for (;i<st->windowSize;i++)
310
w_sig[i] = EXTRACT16(SHR32(MULT16_16(in[i-st->windowSize+st->frameSize],st->window[i]),SIG_SHIFT));
311
/* Compute auto-correlation */
312
_spx_autocorr(w_sig, autocorr, st->lpcSize+1, st->windowSize);
313
autocorr[0] = ADD16(autocorr[0],MULT16_16_Q15(autocorr[0],st->lpc_floor)); /* Noise floor in auto-correlation domain */
315
/* Lag windowing: equivalent to filtering in the power-spectrum domain */
316
for (i=0;i<st->lpcSize+1;i++)
317
autocorr[i] = MULT16_16_Q14(autocorr[i],st->lagWindow[i]);
319
/* Levinson-Durbin */
320
_spx_lpc(lpc, autocorr, st->lpcSize);
321
/* LPC to LSPs (x-domain) transform */
322
roots=lpc_to_lsp (lpc, st->lpcSize, lsp, 10, LSP_DELTA1, stack);
323
/* Check if we found all the roots */
324
if (roots!=st->lpcSize)
326
/*If we can't find all LSP's, do some damage control and use previous filter*/
327
for (i=0;i<st->lpcSize;i++)
329
lsp[i]=st->old_lsp[i];
337
/* Whole frame analysis (open-loop estimation of pitch and excitation gain) */
339
int diff = st->windowSize-st->frameSize;
341
for (i=0;i<st->lpcSize;i++)
342
interp_lsp[i] = lsp[i];
344
lsp_interpolate(st->old_lsp, lsp, interp_lsp, st->lpcSize, st->nbSubframes, st->nbSubframes<<1);
346
lsp_enforce_margin(interp_lsp, st->lpcSize, LSP_MARGIN);
348
/* Compute interpolated LPCs (unquantized) for whole frame*/
349
lsp_to_lpc(interp_lsp, interp_lpc, st->lpcSize,stack);
353
if (!st->submodes[st->submodeID] || (st->complexity>2 && SUBMODE(have_subframe_gain)<3) || SUBMODE(forced_pitch_gain) || SUBMODE(lbr_pitch) != -1
355
|| st->vbr_enabled || st->vad_enabled
360
spx_word16_t nol_pitch_coef[6];
362
bw_lpc(st->gamma1, interp_lpc, bw_lpc1, st->lpcSize);
363
bw_lpc(st->gamma2, interp_lpc, bw_lpc2, st->lpcSize);
365
SPEEX_COPY(st->sw, st->winBuf, diff);
366
SPEEX_COPY(st->sw+diff, in, st->frameSize-diff);
367
filter_mem16(st->sw, bw_lpc1, bw_lpc2, st->sw, st->frameSize, st->lpcSize, st->mem_sw_whole, stack);
369
open_loop_nbest_pitch(st->sw, st->min_pitch, st->max_pitch, st->frameSize,
370
nol_pitch, nol_pitch_coef, 6, stack);
371
ol_pitch=nol_pitch[0];
372
ol_pitch_coef = nol_pitch_coef[0];
373
/*Try to remove pitch multiples*/
377
if ((nol_pitch_coef[i]>MULT16_16_Q15(nol_pitch_coef[0],27853)) &&
379
if ((nol_pitch_coef[i]>.85*nol_pitch_coef[0]) &&
381
(ABS(2*nol_pitch[i]-ol_pitch)<=2 || ABS(3*nol_pitch[i]-ol_pitch)<=3 ||
382
ABS(4*nol_pitch[i]-ol_pitch)<=4 || ABS(5*nol_pitch[i]-ol_pitch)<=5))
384
/*ol_pitch_coef=nol_pitch_coef[i];*/
385
ol_pitch = nol_pitch[i];
390
/*ol_pitch_coef = sqrt(ol_pitch_coef);*/
397
/*Compute "real" excitation*/
398
SPEEX_COPY(st->exc, st->winBuf, diff);
399
SPEEX_COPY(st->exc+diff, in, st->frameSize-diff);
400
fir_mem16(st->exc, interp_lpc, st->exc, st->frameSize, st->lpcSize, st->mem_exc, stack);
402
/* Compute open-loop excitation gain */
404
spx_word16_t g = compute_rms16(st->exc, st->frameSize);
405
if (st->submodeID!=1 && ol_pitch>0)
406
ol_gain = MULT16_16(g, MULT16_16_Q14(QCONST16(1.1,14),
407
spx_sqrt(QCONST32(1.,28)-MULT16_32_Q15(QCONST16(.8,15),SHL32(MULT16_16(ol_pitch_coef,ol_pitch_coef),16)))));
409
ol_gain = SHL32(EXTEND32(g),SIG_SHIFT);
414
SPEEX_MOVE(st->psy_window, st->psy_window+st->frameSize, 256-st->frameSize);
415
SPEEX_COPY(&st->psy_window[256-st->frameSize], in, st->frameSize);
416
compute_curve(st->psy, st->psy_window, st->curve);
417
/*print_vec(st->curve, 128, "curve");*/
419
SPEEX_COPY(st->old_curve, st->curve, 128);
424
if (st->vbr && (st->vbr_enabled||st->vad_enabled))
427
for (i=0;i<st->lpcSize;i++)
428
lsp_dist += (st->old_lsp[i] - lsp[i])*(st->old_lsp[i] - lsp[i]);
429
lsp_dist /= LSP_SCALING*LSP_SCALING;
434
if (st->abr_drift2 * st->abr_drift > 0)
436
/* Only adapt if long-term and short-term drift are the same sign */
437
qual_change = -.00001*st->abr_drift/(1+st->abr_count);
440
if (qual_change<-.05)
443
st->vbr_quality += qual_change;
444
if (st->vbr_quality>10)
446
if (st->vbr_quality<0)
450
st->relative_quality = vbr_analysis(st->vbr, in, st->frameSize, ol_pitch, GAIN_SCALING_1*ol_pitch_coef);
451
/*if (delta_qual<0)*/
452
/* delta_qual*=.1*(3+st->vbr_quality);*/
463
v1=(int)floor(st->vbr_quality);
465
thresh = vbr_nb_thresh[mode][v1];
467
thresh = (st->vbr_quality-v1)*vbr_nb_thresh[mode][v1+1] + (1+v1-st->vbr_quality)*vbr_nb_thresh[mode][v1];
468
if (st->relative_quality > thresh &&
469
st->relative_quality-thresh<min_diff)
472
min_diff = st->relative_quality-thresh;
479
if (st->dtx_count==0 || lsp_dist>.05 || !st->dtx_enabled || st->dtx_count>20)
491
speex_encoder_ctl(state, SPEEX_SET_MODE, &mode);
495
speex_encoder_ctl(state, SPEEX_GET_BITRATE, &rate);
496
if (rate > st->vbr_max)
499
speex_encoder_ctl(state, SPEEX_SET_BITRATE, &rate);
506
speex_encoder_ctl(state, SPEEX_GET_BITRATE, &bitrate);
507
st->abr_drift+=(bitrate-st->abr_enabled);
508
st->abr_drift2 = .95*st->abr_drift2 + .05*(bitrate-st->abr_enabled);
509
st->abr_count += 1.0;
515
if (st->relative_quality<2)
517
if (st->dtx_count==0 || lsp_dist>.05 || !st->dtx_enabled || st->dtx_count>20)
527
mode=st->submodeSelect;
529
/*speex_encoder_ctl(state, SPEEX_SET_MODE, &mode);*/
533
st->relative_quality = -1;
535
#endif /* #ifndef DISABLE_VBR */
537
if (st->encode_submode)
539
/* First, transmit a zero for narrowband */
540
speex_bits_pack(bits, 0, 1);
542
/* Transmit the sub-mode we use for this frame */
543
speex_bits_pack(bits, st->submodeID, NB_SUBMODE_BITS);
547
/* If null mode (no transmission), just set a couple things to zero*/
548
if (st->submodes[st->submodeID] == NULL)
550
for (i=0;i<st->frameSize;i++)
551
st->exc[i]=st->sw[i]=VERY_SMALL;
553
for (i=0;i<st->lpcSize;i++)
556
st->bounded_pitch = 1;
558
SPEEX_COPY(st->winBuf, in+2*st->frameSize-st->windowSize, st->windowSize-st->frameSize);
560
/* Clear memory (no need to really compute it) */
561
for (i=0;i<st->lpcSize;i++)
567
/* LSP Quantization */
570
for (i=0;i<st->lpcSize;i++)
571
st->old_lsp[i] = lsp[i];
576
#if 1 /*0 for unquantized*/
577
SUBMODE(lsp_quant)(lsp, qlsp, st->lpcSize, bits);
579
for (i=0;i<st->lpcSize;i++)
583
/*If we use low bit-rate pitch mode, transmit open-loop pitch*/
584
if (SUBMODE(lbr_pitch)!=-1)
586
speex_bits_pack(bits, ol_pitch-st->min_pitch, 7);
589
if (SUBMODE(forced_pitch_gain))
592
/* This just damps the pitch a bit, because it tends to be too aggressive when forced */
593
ol_pitch_coef = MULT16_16_Q15(QCONST16(.9,15), ol_pitch_coef);
595
quant = PSHR16(MULT16_16_16(15, ol_pitch_coef),GAIN_SHIFT);
597
quant = (int)floor(.5+15*ol_pitch_coef*GAIN_SCALING_1);
603
speex_bits_pack(bits, quant, 4);
604
ol_pitch_coef=MULT16_16_P15(QCONST16(0.066667,15),SHL16(quant,GAIN_SHIFT));
608
/*Quantize and transmit open-loop excitation gain*/
611
int qe = scal_quant32(ol_gain, ol_gain_table, 32);
612
/*ol_gain = exp(qe/3.5)*SIG_SCALING;*/
613
ol_gain = MULT16_32_Q15(28406,ol_gain_table[qe]);
614
speex_bits_pack(bits, qe, 5);
618
int qe = (int)(floor(.5+3.5*log(ol_gain*1.0/SIG_SCALING)));
623
ol_gain = exp(qe/3.5)*SIG_SCALING;
624
speex_bits_pack(bits, qe, 5);
630
/* Special case for first frame */
633
for (i=0;i<st->lpcSize;i++)
634
st->old_qlsp[i] = qlsp[i];
638
ALLOC(target, st->subframeSize, spx_word16_t);
639
ALLOC(innov, st->subframeSize, spx_sig_t);
640
ALLOC(exc32, st->subframeSize, spx_word32_t);
641
ALLOC(ringing, st->subframeSize, spx_word16_t);
642
ALLOC(syn_resp, st->subframeSize, spx_word16_t);
643
ALLOC(real_exc, st->subframeSize, spx_word16_t);
644
ALLOC(mem, st->lpcSize, spx_mem_t);
646
/* Loop on sub-frames */
647
for (sub=0;sub<st->nbSubframes;sub++)
653
int response_bound = st->subframeSize;
655
/* Offset relative to start of frame */
656
offset = st->subframeSize*sub;
659
/* Weighted signal */
662
/* LSP interpolation (quantized and unquantized) */
663
lsp_interpolate(st->old_lsp, lsp, interp_lsp, st->lpcSize, sub, st->nbSubframes);
664
lsp_interpolate(st->old_qlsp, qlsp, interp_qlsp, st->lpcSize, sub, st->nbSubframes);
666
/* Make sure the filters are stable */
667
lsp_enforce_margin(interp_lsp, st->lpcSize, LSP_MARGIN);
668
lsp_enforce_margin(interp_qlsp, st->lpcSize, LSP_MARGIN);
670
/* Compute interpolated LPCs (quantized and unquantized) */
671
lsp_to_lpc(interp_lsp, interp_lpc, st->lpcSize,stack);
673
lsp_to_lpc(interp_qlsp, interp_qlpc, st->lpcSize, stack);
675
/* Compute analysis filter gain at w=pi (for use in SB-CELP) */
677
spx_word32_t pi_g=LPC_SCALING;
678
for (i=0;i<st->lpcSize;i+=2)
680
/*pi_g += -st->interp_qlpc[i] + st->interp_qlpc[i+1];*/
681
pi_g = ADD32(pi_g, SUB32(EXTEND32(interp_qlpc[i+1]),EXTEND32(interp_qlpc[i])));
683
st->pi_gain[sub] = pi_g;
688
float curr_curve[128];
689
float fact = ((float)sub+1.0f)/st->nbSubframes;
691
curr_curve[i] = (1.0f-fact)*st->old_curve[i] + fact*st->curve[i];
692
curve_to_lpc(st->psy, curr_curve, bw_lpc1, bw_lpc2, 10);
695
/* Compute bandwidth-expanded (unquantized) LPCs for perceptual weighting */
696
bw_lpc(st->gamma1, interp_lpc, bw_lpc1, st->lpcSize);
698
bw_lpc(st->gamma2, interp_lpc, bw_lpc2, st->lpcSize);
701
for (i=0;i<st->lpcSize;i++)
704
/*print_vec(st->bw_lpc1, 10, "bw_lpc");*/
707
/*FIXME: This will break if we change the window size */
708
speex_assert(st->windowSize-st->frameSize == st->subframeSize);
711
for (i=0;i<st->subframeSize;i++)
712
real_exc[i] = sw[i] = st->winBuf[i];
714
for (i=0;i<st->subframeSize;i++)
715
real_exc[i] = sw[i] = in[i+((sub-1)*st->subframeSize)];
717
fir_mem16(real_exc, interp_qlpc, real_exc, st->subframeSize, st->lpcSize, st->mem_exc2, stack);
719
if (st->complexity==0)
720
response_bound >>= 1;
721
compute_impulse_response(interp_qlpc, bw_lpc1, bw_lpc2, syn_resp, response_bound, st->lpcSize, stack);
722
for (i=response_bound;i<st->subframeSize;i++)
723
syn_resp[i]=VERY_SMALL;
725
/* Compute zero response of A(z/g1) / ( A(z/g2) * A(z) ) */
726
for (i=0;i<st->lpcSize;i++)
727
mem[i]=SHL32(st->mem_sp[i],1);
728
for (i=0;i<st->subframeSize;i++)
729
ringing[i] = VERY_SMALL;
731
iir_mem16(ringing, interp_qlpc, ringing, response_bound, st->lpcSize, mem, stack);
732
for (i=0;i<st->lpcSize;i++)
733
mem[i]=SHL32(st->mem_sw[i],1);
734
filter_mem16(ringing, st->bw_lpc1, st->bw_lpc2, ringing, response_bound, st->lpcSize, mem, stack);
735
SPEEX_MEMSET(&ringing[response_bound], 0, st->subframeSize-response_bound);
737
iir_mem16(ringing, interp_qlpc, ringing, st->subframeSize, st->lpcSize, mem, stack);
738
for (i=0;i<st->lpcSize;i++)
739
mem[i]=SHL32(st->mem_sw[i],1);
740
filter_mem16(ringing, bw_lpc1, bw_lpc2, ringing, st->subframeSize, st->lpcSize, mem, stack);
743
/* Compute weighted signal */
744
for (i=0;i<st->lpcSize;i++)
745
mem[i]=st->mem_sw[i];
746
filter_mem16(sw, bw_lpc1, bw_lpc2, sw, st->subframeSize, st->lpcSize, mem, stack);
748
if (st->complexity==0)
749
for (i=0;i<st->lpcSize;i++)
750
st->mem_sw[i]=mem[i];
752
/* Compute target signal (saturation prevents overflows on clipped input speech) */
753
for (i=0;i<st->subframeSize;i++)
754
target[i]=EXTRACT16(SATURATE(SUB32(sw[i],PSHR32(ringing[i],1)),32767));
756
/* Reset excitation */
757
SPEEX_MEMSET(exc, 0, st->subframeSize);
759
/* If we have a long-term predictor (otherwise, something's wrong) */
760
speex_assert (SUBMODE(ltp_quant));
762
int pit_min, pit_max;
763
/* Long-term prediction */
764
if (SUBMODE(lbr_pitch) != -1)
766
/* Low bit-rate pitch handling */
768
margin = SUBMODE(lbr_pitch);
771
if (ol_pitch < st->min_pitch+margin-1)
772
ol_pitch=st->min_pitch+margin-1;
773
if (ol_pitch > st->max_pitch-margin)
774
ol_pitch=st->max_pitch-margin;
775
pit_min = ol_pitch-margin+1;
776
pit_max = ol_pitch+margin;
778
pit_min=pit_max=ol_pitch;
781
pit_min = st->min_pitch;
782
pit_max = st->max_pitch;
785
/* Force pitch to use only the current frame if needed */
786
if (st->bounded_pitch && pit_max>offset)
789
/* Perform pitch search */
790
pitch = SUBMODE(ltp_quant)(target, sw, interp_qlpc, bw_lpc1, bw_lpc2,
791
exc32, SUBMODE(ltp_params), pit_min, pit_max, ol_pitch_coef,
792
st->lpcSize, st->subframeSize, bits, stack,
793
exc, syn_resp, st->complexity, 0, st->plc_tuning, &st->cumul_gain);
795
st->pitch[sub]=pitch;
797
/* Quantization of innovation */
798
SPEEX_MEMSET(innov, 0, st->subframeSize);
800
/* FIXME: Make sure this is save from overflows (so far so good) */
801
for (i=0;i<st->subframeSize;i++)
802
real_exc[i] = EXTRACT16(SUB32(EXTEND32(real_exc[i]), PSHR32(exc32[i],SIG_SHIFT-1)));
804
ener = SHL32(EXTEND32(compute_rms16(real_exc, st->subframeSize)),SIG_SHIFT);
806
/*FIXME: Should use DIV32_16 and make sure result fits in 16 bits */
809
spx_word32_t f = PDIV32(ener,PSHR32(ol_gain,SIG_SHIFT));
816
fine_gain = PDIV32_16(ener,PSHR32(ol_gain,SIG_SHIFT));
818
/* Calculate gain correction for the sub-frame (if any) */
819
if (SUBMODE(have_subframe_gain))
822
if (SUBMODE(have_subframe_gain)==3)
824
qe = scal_quant(fine_gain, exc_gain_quant_scal3_bound, 8);
825
speex_bits_pack(bits, qe, 3);
826
ener=MULT16_32_Q14(exc_gain_quant_scal3[qe],ol_gain);
828
qe = scal_quant(fine_gain, exc_gain_quant_scal1_bound, 2);
829
speex_bits_pack(bits, qe, 1);
830
ener=MULT16_32_Q14(exc_gain_quant_scal1[qe],ol_gain);
836
/*printf ("%f %f\n", ener, ol_gain);*/
838
/* Normalize innovation */
839
signal_div(target, target, ener, st->subframeSize);
841
/* Quantize innovation */
842
speex_assert (SUBMODE(innovation_quant));
844
/* Codebook search */
845
SUBMODE(innovation_quant)(target, interp_qlpc, bw_lpc1, bw_lpc2,
846
SUBMODE(innovation_params), st->lpcSize, st->subframeSize,
847
innov, syn_resp, bits, stack, st->complexity, SUBMODE(double_codebook));
849
/* De-normalize innovation and update excitation */
850
signal_mul(innov, innov, ener, st->subframeSize);
852
for (i=0;i<st->subframeSize;i++)
853
exc[i] = EXTRACT16(SATURATE32(PSHR32(ADD32(SHL32(exc32[i],1),innov[i]),SIG_SHIFT),32767));
855
/* In some (rare) modes, we do a second search (more bits) to reduce noise even more */
856
if (SUBMODE(double_codebook)) {
857
char *tmp_stack=stack;
858
VARDECL(spx_sig_t *innov2);
859
ALLOC(innov2, st->subframeSize, spx_sig_t);
860
SPEEX_MEMSET(innov2, 0, st->subframeSize);
861
for (i=0;i<st->subframeSize;i++)
862
target[i]=MULT16_16_P13(QCONST16(2.2f,13), target[i]);
863
SUBMODE(innovation_quant)(target, interp_qlpc, bw_lpc1, bw_lpc2,
864
SUBMODE(innovation_params), st->lpcSize, st->subframeSize,
865
innov2, syn_resp, bits, stack, st->complexity, 0);
866
signal_mul(innov2, innov2, MULT16_32_Q15(QCONST16(0.454545f,15),ener), st->subframeSize);
867
for (i=0;i<st->subframeSize;i++)
868
innov[i] = ADD32(innov[i],innov2[i]);
871
for (i=0;i<st->subframeSize;i++)
872
exc[i] = EXTRACT16(SATURATE32(PSHR32(ADD32(SHL32(exc32[i],1),innov[i]),SIG_SHIFT),32767));
873
if (st->innov_rms_save)
875
st->innov_rms_save[sub] = compute_rms(innov, st->subframeSize);
879
/* Final signal synthesis from excitation */
880
iir_mem16(exc, interp_qlpc, sw, st->subframeSize, st->lpcSize, st->mem_sp, stack);
882
/* Compute weighted signal again, from synthesized speech (not sure it's the right thing) */
883
if (st->complexity!=0)
884
filter_mem16(sw, bw_lpc1, bw_lpc2, sw, st->subframeSize, st->lpcSize, st->mem_sw, stack);
888
/* Store the LSPs for interpolation in the next frame */
889
if (st->submodeID>=1)
891
for (i=0;i<st->lpcSize;i++)
892
st->old_lsp[i] = lsp[i];
893
for (i=0;i<st->lpcSize;i++)
894
st->old_qlsp[i] = qlsp[i];
898
if (st->submodeID>=1)
899
SPEEX_COPY(st->old_curve, st->curve, 128);
902
if (st->submodeID==1)
906
speex_bits_pack(bits, 15, 4);
909
speex_bits_pack(bits, 0, 4);
912
/* The next frame will not be the first (Duh!) */
914
SPEEX_COPY(st->winBuf, in+2*st->frameSize-st->windowSize, st->windowSize-st->frameSize);
916
if (SUBMODE(innovation_quant) == noise_codebook_quant || st->submodeID==0)
917
st->bounded_pitch = 1;
919
st->bounded_pitch = 0;
924
void *nb_decoder_init(const SpeexMode *m)
927
const SpeexNBMode *mode;
930
mode=(const SpeexNBMode*)m->mode;
931
st = (DecState *)speex_alloc(sizeof(DecState));
934
#if defined(VAR_ARRAYS) || defined (USE_ALLOCA)
937
st->stack = (char*)speex_alloc_scratch(NB_DEC_STACK);
943
st->encode_submode = 1;
946
/* Codec parameters, should eventually have several "modes"*/
947
st->frameSize = mode->frameSize;
948
st->nbSubframes=mode->frameSize/mode->subframeSize;
949
st->subframeSize=mode->subframeSize;
950
st->lpcSize = mode->lpcSize;
951
st->min_pitch=mode->pitchStart;
952
st->max_pitch=mode->pitchEnd;
954
st->submodes=mode->submodes;
955
st->submodeID=mode->defaultSubmode;
957
st->lpc_enh_enabled=1;
959
st->excBuf = (spx_word16_t*)speex_alloc((st->frameSize + 2*st->max_pitch + st->subframeSize + 12)*sizeof(spx_word16_t));
960
st->exc = st->excBuf + 2*st->max_pitch + st->subframeSize + 6;
961
SPEEX_MEMSET(st->excBuf, 0, st->frameSize + st->max_pitch);
963
st->interp_qlpc = (spx_coef_t*)speex_alloc(st->lpcSize*sizeof(spx_coef_t));
964
st->old_qlsp = (spx_lsp_t*)speex_alloc(st->lpcSize*sizeof(spx_lsp_t));
965
st->mem_sp = (spx_mem_t*)speex_alloc(st->lpcSize*sizeof(spx_mem_t));
966
st->pi_gain = (spx_word32_t*)speex_alloc((st->nbSubframes)*sizeof(spx_word32_t));
969
st->pitch_gain_buf[0] = st->pitch_gain_buf[1] = st->pitch_gain_buf[2] = 0;
970
st->pitch_gain_buf_idx = 0;
973
st->sampling_rate=8000;
974
st->last_ol_gain = 0;
976
st->user_callback.func = &speex_default_user_handler;
977
st->user_callback.data = NULL;
979
st->speex_callbacks[i].func = NULL;
981
st->voc_m1=st->voc_m2=st->voc_mean=0;
985
st->highpass_enabled = 1;
987
#ifdef ENABLE_VALGRIND
988
VALGRIND_MAKE_READABLE(st, NB_DEC_STACK);
993
void nb_decoder_destroy(void *state)
998
#if !(defined(VAR_ARRAYS) || defined (USE_ALLOCA))
999
speex_free_scratch(st->stack);
1002
speex_free (st->excBuf);
1003
speex_free (st->interp_qlpc);
1004
speex_free (st->old_qlsp);
1005
speex_free (st->mem_sp);
1006
speex_free (st->pi_gain);
1011
#define median3(a, b, c) ((a) < (b) ? ((b) < (c) ? (b) : ((a) < (c) ? (c) : (a))) : ((c) < (b) ? (b) : ((c) < (a) ? (c) : (a))))
1014
const spx_word16_t attenuation[10] = {32767, 31483, 27923, 22861, 17278, 12055, 7764, 4616, 2533, 1283};
1016
const spx_word16_t attenuation[10] = {1., 0.961, 0.852, 0.698, 0.527, 0.368, 0.237, 0.141, 0.077, 0.039};
1020
static void nb_decode_lost(DecState *st, spx_word16_t *out, char *stack)
1024
spx_word16_t pitch_gain;
1026
spx_word16_t gain_med;
1027
spx_word16_t innov_gain;
1028
spx_word16_t noise_gain;
1030
if (st->count_lost<10)
1031
fact = attenuation[st->count_lost];
1035
gain_med = median3(st->pitch_gain_buf[0], st->pitch_gain_buf[1], st->pitch_gain_buf[2]);
1036
if (gain_med < st->last_pitch_gain)
1037
st->last_pitch_gain = gain_med;
1040
pitch_gain = st->last_pitch_gain;
1043
pitch_gain = SHL16(pitch_gain, 9);
1045
pitch_gain = GAIN_SCALING_1*st->last_pitch_gain;
1049
pitch_gain = MULT16_16_Q15(fact,pitch_gain) + VERY_SMALL;
1050
/* FIXME: This was rms of innovation (not exc) */
1051
innov_gain = compute_rms16(st->exc, st->frameSize);
1052
noise_gain = MULT16_16_Q15(innov_gain, MULT16_16_Q15(fact, SUB16(Q15ONE,MULT16_16_Q15(pitch_gain,pitch_gain))));
1053
/* Shift all buffers by one frame */
1054
SPEEX_MOVE(st->excBuf, st->excBuf+st->frameSize, 2*st->max_pitch + st->subframeSize + 12);
1057
pitch_val = st->last_pitch + SHR32((spx_int32_t)speex_rand(1+st->count_lost, &st->seed),SIG_SHIFT);
1058
if (pitch_val > st->max_pitch)
1059
pitch_val = st->max_pitch;
1060
if (pitch_val < st->min_pitch)
1061
pitch_val = st->min_pitch;
1062
for (i=0;i<st->frameSize;i++)
1064
st->exc[i]= MULT16_16_Q15(pitch_gain, (st->exc[i-pitch_val]+VERY_SMALL)) +
1065
speex_rand(noise_gain, &st->seed);
1068
bw_lpc(QCONST16(.98,15), st->interp_qlpc, st->interp_qlpc, st->lpcSize);
1069
iir_mem16(&st->exc[-st->subframeSize], st->interp_qlpc, out, st->frameSize,
1070
st->lpcSize, st->mem_sp, stack);
1071
highpass(out, out, st->frameSize, HIGHPASS_NARROWBAND|HIGHPASS_OUTPUT, st->mem_hp);
1075
st->pitch_gain_buf[st->pitch_gain_buf_idx++] = PSHR16(pitch_gain,9);
1076
if (st->pitch_gain_buf_idx > 2) /* rollover */
1077
st->pitch_gain_buf_idx = 0;
1080
/* Just so we don't need to carry the complete wideband mode information */
1081
static const int wb_skip_table[8] = {0, 36, 112, 192, 352, 0, 0, 0};
1083
int nb_decode(void *state, SpeexBits *bits, void *vout)
1088
spx_word16_t pitch_gain[3];
1089
spx_word32_t ol_gain=0;
1091
spx_word16_t ol_pitch_coef=0;
1093
spx_word16_t best_pitch_gain=0;
1097
VARDECL(spx_sig_t *innov);
1098
VARDECL(spx_word32_t *exc32);
1099
VARDECL(spx_coef_t *ak);
1100
VARDECL(spx_lsp_t *qlsp);
1101
spx_word16_t pitch_average=0;
1103
spx_word16_t *out = (spx_word16_t*)vout;
1104
VARDECL(spx_lsp_t *interp_qlsp);
1106
st=(DecState*)state;
1109
/* Check if we're in DTX mode*/
1110
if (!bits && st->dtx_enabled)
1115
/* If bits is NULL, consider the packet to be lost (what could we do anyway) */
1118
nb_decode_lost(st, out, stack);
1122
if (st->encode_submode)
1125
/* Search for next narrowband block (handle requests, skip wideband blocks) */
1127
if (speex_bits_remaining(bits)<5)
1129
wideband = speex_bits_unpack_unsigned(bits, 1);
1130
if (wideband) /* Skip wideband block (for compatibility) */
1134
advance = submode = speex_bits_unpack_unsigned(bits, SB_SUBMODE_BITS);
1135
/*speex_mode_query(&speex_wb_mode, SPEEX_SUBMODE_BITS_PER_FRAME, &advance);*/
1136
advance = wb_skip_table[submode];
1139
speex_notify("Invalid mode encountered. The stream is corrupted.");
1142
advance -= (SB_SUBMODE_BITS+1);
1143
speex_bits_advance(bits, advance);
1145
if (speex_bits_remaining(bits)<5)
1147
wideband = speex_bits_unpack_unsigned(bits, 1);
1150
advance = submode = speex_bits_unpack_unsigned(bits, SB_SUBMODE_BITS);
1151
/*speex_mode_query(&speex_wb_mode, SPEEX_SUBMODE_BITS_PER_FRAME, &advance);*/
1152
advance = wb_skip_table[submode];
1155
speex_notify("Invalid mode encountered. The stream is corrupted.");
1158
advance -= (SB_SUBMODE_BITS+1);
1159
speex_bits_advance(bits, advance);
1160
wideband = speex_bits_unpack_unsigned(bits, 1);
1163
speex_notify("More than two wideband layers found. The stream is corrupted.");
1169
if (speex_bits_remaining(bits)<4)
1171
/* FIXME: Check for overflow */
1172
m = speex_bits_unpack_unsigned(bits, 4);
1173
if (m==15) /* We found a terminator */
1176
} else if (m==14) /* Speex in-band request */
1178
int ret = speex_inband_handler(bits, st->speex_callbacks, state);
1181
} else if (m==13) /* User in-band request */
1183
int ret = st->user_callback.func(bits, state, st->user_callback.data);
1186
} else if (m>8) /* Invalid mode */
1188
speex_notify("Invalid mode encountered. The stream is corrupted.");
1194
/* Get the sub-mode that was used */
1200
/* Shift all buffers by one frame */
1201
SPEEX_MOVE(st->excBuf, st->excBuf+st->frameSize, 2*st->max_pitch + st->subframeSize + 12);
1203
/* If null mode (no transmission), just set a couple things to zero*/
1204
if (st->submodes[st->submodeID] == NULL)
1206
VARDECL(spx_coef_t *lpc);
1207
ALLOC(lpc, st->lpcSize, spx_coef_t);
1208
bw_lpc(QCONST16(0.93f,15), st->interp_qlpc, lpc, st->lpcSize);
1210
spx_word16_t innov_gain=0;
1211
/* FIXME: This was innov, not exc */
1212
innov_gain = compute_rms16(st->exc, st->frameSize);
1213
for (i=0;i<st->frameSize;i++)
1214
st->exc[i]=speex_rand(innov_gain, &st->seed);
1220
/* Final signal synthesis from excitation */
1221
iir_mem16(st->exc, lpc, out, st->frameSize, st->lpcSize, st->mem_sp, stack);
1227
ALLOC(qlsp, st->lpcSize, spx_lsp_t);
1229
/* Unquantize LSPs */
1230
SUBMODE(lsp_unquant)(qlsp, st->lpcSize, bits);
1232
/*Damp memory if a frame was lost and the LSP changed too much*/
1236
spx_word32_t lsp_dist=0;
1237
for (i=0;i<st->lpcSize;i++)
1238
lsp_dist = ADD32(lsp_dist, EXTEND32(ABS(st->old_qlsp[i] - qlsp[i])));
1240
fact = SHR16(19661,SHR32(lsp_dist,LSP_SHIFT+2));
1242
fact = .6*exp(-.2*lsp_dist);
1244
for (i=0;i<st->lpcSize;i++)
1245
st->mem_sp[i] = MULT16_32_Q15(fact,st->mem_sp[i]);
1249
/* Handle first frame and lost-packet case */
1250
if (st->first || st->count_lost)
1252
for (i=0;i<st->lpcSize;i++)
1253
st->old_qlsp[i] = qlsp[i];
1256
/* Get open-loop pitch estimation for low bit-rate pitch coding */
1257
if (SUBMODE(lbr_pitch)!=-1)
1259
ol_pitch = st->min_pitch+speex_bits_unpack_unsigned(bits, 7);
1262
if (SUBMODE(forced_pitch_gain))
1265
quant = speex_bits_unpack_unsigned(bits, 4);
1266
ol_pitch_coef=MULT16_16_P15(QCONST16(0.066667,15),SHL16(quant,GAIN_SHIFT));
1269
/* Get global excitation gain */
1272
qe = speex_bits_unpack_unsigned(bits, 5);
1274
/* FIXME: Perhaps we could slightly lower the gain here when the output is going to saturate? */
1275
ol_gain = MULT16_32_Q15(28406,ol_gain_table[qe]);
1277
ol_gain = SIG_SCALING*exp(qe/3.5);
1281
ALLOC(ak, st->lpcSize, spx_coef_t);
1282
ALLOC(innov, st->subframeSize, spx_sig_t);
1283
ALLOC(exc32, st->subframeSize, spx_word32_t);
1285
if (st->submodeID==1)
1288
extra = speex_bits_unpack_unsigned(bits, 4);
1295
if (st->submodeID>1)
1298
/*Loop on subframes */
1299
for (sub=0;sub<st->nbSubframes;sub++)
1304
spx_word16_t *innov_save = NULL;
1307
/* Offset relative to start of frame */
1308
offset = st->subframeSize*sub;
1311
/* Original signal */
1314
innov_save = st->innov_save+offset;
1317
/* Reset excitation */
1318
SPEEX_MEMSET(exc, 0, st->subframeSize);
1320
/*Adaptive codebook contribution*/
1321
speex_assert (SUBMODE(ltp_unquant));
1323
int pit_min, pit_max;
1324
/* Handle pitch constraints if any */
1325
if (SUBMODE(lbr_pitch) != -1)
1328
margin = SUBMODE(lbr_pitch);
1331
/* GT - need optimization?
1332
if (ol_pitch < st->min_pitch+margin-1)
1333
ol_pitch=st->min_pitch+margin-1;
1334
if (ol_pitch > st->max_pitch-margin)
1335
ol_pitch=st->max_pitch-margin;
1336
pit_min = ol_pitch-margin+1;
1337
pit_max = ol_pitch+margin;
1339
pit_min = ol_pitch-margin+1;
1340
if (pit_min < st->min_pitch)
1341
pit_min = st->min_pitch;
1342
pit_max = ol_pitch+margin;
1343
if (pit_max > st->max_pitch)
1344
pit_max = st->max_pitch;
1346
pit_min = pit_max = ol_pitch;
1349
pit_min = st->min_pitch;
1350
pit_max = st->max_pitch;
1355
SUBMODE(ltp_unquant)(exc, exc32, pit_min, pit_max, ol_pitch_coef, SUBMODE(ltp_params),
1356
st->subframeSize, &pitch, &pitch_gain[0], bits, stack,
1357
st->count_lost, offset, st->last_pitch_gain, 0);
1359
/* Ensuring that things aren't blowing up as would happen if e.g. an encoder is
1360
crafting packets to make us produce NaNs and slow down the decoder (vague DoS threat).
1361
We can probably be even more aggressive and limit to 15000 or so. */
1362
sanitize_values32(exc32, NEG32(QCONST32(32000,SIG_SHIFT-1)), QCONST32(32000,SIG_SHIFT-1), st->subframeSize);
1364
tmp = gain_3tap_to_1tap(pitch_gain);
1366
pitch_average += tmp;
1367
if ((tmp>best_pitch_gain&&ABS(2*best_pitch-pitch)>=3&&ABS(3*best_pitch-pitch)>=4&&ABS(4*best_pitch-pitch)>=5)
1368
|| (tmp>MULT16_16_Q15(QCONST16(.6,15),best_pitch_gain)&&(ABS(best_pitch-2*pitch)<3||ABS(best_pitch-3*pitch)<4||ABS(best_pitch-4*pitch)<5))
1369
|| (MULT16_16_Q15(QCONST16(.67,15),tmp)>best_pitch_gain&&(ABS(2*best_pitch-pitch)<3||ABS(3*best_pitch-pitch)<4||ABS(4*best_pitch-pitch)<5)) )
1372
if (tmp > best_pitch_gain)
1373
best_pitch_gain = tmp;
1377
/* Unquantize the innovation */
1382
SPEEX_MEMSET(innov, 0, st->subframeSize);
1384
/* Decode sub-frame gain correction */
1385
if (SUBMODE(have_subframe_gain)==3)
1387
q_energy = speex_bits_unpack_unsigned(bits, 3);
1388
ener = MULT16_32_Q14(exc_gain_quant_scal3[q_energy],ol_gain);
1389
} else if (SUBMODE(have_subframe_gain)==1)
1391
q_energy = speex_bits_unpack_unsigned(bits, 1);
1392
ener = MULT16_32_Q14(exc_gain_quant_scal1[q_energy],ol_gain);
1397
speex_assert (SUBMODE(innovation_unquant));
1399
/*Fixed codebook contribution*/
1400
SUBMODE(innovation_unquant)(innov, SUBMODE(innovation_params), st->subframeSize, bits, stack, &st->seed);
1401
/* De-normalize innovation and update excitation */
1403
signal_mul(innov, innov, ener, st->subframeSize);
1405
/* Decode second codebook (only for some modes) */
1406
if (SUBMODE(double_codebook))
1408
char *tmp_stack=stack;
1409
VARDECL(spx_sig_t *innov2);
1410
ALLOC(innov2, st->subframeSize, spx_sig_t);
1411
SPEEX_MEMSET(innov2, 0, st->subframeSize);
1412
SUBMODE(innovation_unquant)(innov2, SUBMODE(innovation_params), st->subframeSize, bits, stack, &st->seed);
1413
signal_mul(innov2, innov2, MULT16_32_Q15(QCONST16(0.454545f,15),ener), st->subframeSize);
1414
for (i=0;i<st->subframeSize;i++)
1415
innov[i] = ADD32(innov[i], innov2[i]);
1418
for (i=0;i<st->subframeSize;i++)
1419
exc[i]=EXTRACT16(SATURATE32(PSHR32(ADD32(SHL32(exc32[i],1),innov[i]),SIG_SHIFT),32767));
1420
/*print_vec(exc, 40, "innov");*/
1423
for (i=0;i<st->subframeSize;i++)
1424
innov_save[i] = EXTRACT16(PSHR32(innov[i], SIG_SHIFT));
1429
if (st->submodeID==1)
1431
spx_word16_t g=ol_pitch_coef;
1432
g=MULT16_16_P14(QCONST16(1.5f,14),(g-QCONST16(.2f,6)));
1438
SPEEX_MEMSET(exc, 0, st->subframeSize);
1439
while (st->voc_offset<st->subframeSize)
1441
/* exc[st->voc_offset]= g*sqrt(2*ol_pitch)*ol_gain;
1442
Not quite sure why we need the factor of two in the sqrt */
1443
if (st->voc_offset>=0)
1444
exc[st->voc_offset]=MULT16_16(spx_sqrt(MULT16_16_16(2,ol_pitch)),EXTRACT16(PSHR32(MULT16_16(g,PSHR32(ol_gain,SIG_SHIFT)),6)));
1445
st->voc_offset+=ol_pitch;
1447
st->voc_offset -= st->subframeSize;
1449
for (i=0;i<st->subframeSize;i++)
1451
spx_word16_t exci=exc[i];
1452
exc[i]= ADD16(ADD16(MULT16_16_Q15(QCONST16(.7f,15),exc[i]) , MULT16_16_Q15(QCONST16(.3f,15),st->voc_m1)),
1453
SUB16(MULT16_16_Q15(Q15_ONE-MULT16_16_16(QCONST16(.85f,9),g),EXTRACT16(PSHR32(innov[i],SIG_SHIFT))),
1454
MULT16_16_Q15(MULT16_16_16(QCONST16(.15f,9),g),EXTRACT16(PSHR32(st->voc_m2,SIG_SHIFT)))
1457
st->voc_m2=innov[i];
1458
st->voc_mean = EXTRACT16(PSHR32(ADD32(MULT16_16(QCONST16(.8f,15),st->voc_mean), MULT16_16(QCONST16(.2f,15),exc[i])), 15));
1459
exc[i]-=st->voc_mean;
1466
ALLOC(interp_qlsp, st->lpcSize, spx_lsp_t);
1468
if (st->lpc_enh_enabled && SUBMODE(comb_gain)>0 && !st->count_lost)
1470
multicomb(st->exc-st->subframeSize, out, st->interp_qlpc, st->lpcSize, 2*st->subframeSize, best_pitch, 40, SUBMODE(comb_gain), stack);
1471
multicomb(st->exc+st->subframeSize, out+2*st->subframeSize, st->interp_qlpc, st->lpcSize, 2*st->subframeSize, best_pitch, 40, SUBMODE(comb_gain), stack);
1473
SPEEX_COPY(out, &st->exc[-st->subframeSize], st->frameSize);
1476
/* If the last packet was lost, re-scale the excitation to obtain the same energy as encoded in ol_gain */
1479
spx_word16_t exc_ener;
1480
spx_word32_t gain32;
1482
exc_ener = compute_rms16 (st->exc, st->frameSize);
1483
gain32 = PDIV32(ol_gain, ADD16(exc_ener,1));
1487
gain = EXTRACT16(gain32);
1493
for (i=0;i<st->frameSize;i++)
1495
st->exc[i] = MULT16_16_Q14(gain, st->exc[i]);
1496
out[i]=st->exc[i-st->subframeSize];
1500
/*Loop on subframes */
1501
for (sub=0;sub<st->nbSubframes;sub++)
1506
/* Offset relative to start of frame */
1507
offset = st->subframeSize*sub;
1508
/* Original signal */
1513
/* LSP interpolation (quantized and unquantized) */
1514
lsp_interpolate(st->old_qlsp, qlsp, interp_qlsp, st->lpcSize, sub, st->nbSubframes);
1516
/* Make sure the LSP's are stable */
1517
lsp_enforce_margin(interp_qlsp, st->lpcSize, LSP_MARGIN);
1519
/* Compute interpolated LPCs (unquantized) */
1520
lsp_to_lpc(interp_qlsp, ak, st->lpcSize, stack);
1522
/* Compute analysis filter at w=pi */
1524
spx_word32_t pi_g=LPC_SCALING;
1525
for (i=0;i<st->lpcSize;i+=2)
1527
/*pi_g += -st->interp_qlpc[i] + st->interp_qlpc[i+1];*/
1528
pi_g = ADD32(pi_g, SUB32(EXTEND32(ak[i+1]),EXTEND32(ak[i])));
1530
st->pi_gain[sub] = pi_g;
1533
iir_mem16(sp, st->interp_qlpc, sp, st->subframeSize, st->lpcSize,
1536
for (i=0;i<st->lpcSize;i++)
1537
st->interp_qlpc[i] = ak[i];
1541
if (st->highpass_enabled)
1542
highpass(out, out, st->frameSize, (st->isWideband?HIGHPASS_WIDEBAND:HIGHPASS_NARROWBAND)|HIGHPASS_OUTPUT, st->mem_hp);
1543
/*for (i=0;i<st->frameSize;i++)
1544
printf ("%d\n", (int)st->frame[i]);*/
1546
/* Tracking output level */
1547
st->level = 1+PSHR32(ol_gain,SIG_SHIFT);
1548
st->max_level = MAX16(MULT16_16_Q15(QCONST16(.99f,15), st->max_level), st->level);
1549
st->min_level = MIN16(ADD16(1,MULT16_16_Q14(QCONST16(1.01f,14), st->min_level)), st->level);
1550
if (st->max_level < st->min_level+1)
1551
st->max_level = st->min_level+1;
1552
/*printf ("%f %f %f %d\n", og, st->min_level, st->max_level, update);*/
1554
/* Store the LSPs for interpolation in the next frame */
1555
for (i=0;i<st->lpcSize;i++)
1556
st->old_qlsp[i] = qlsp[i];
1558
/* The next frame will not be the first (Duh!) */
1561
st->last_pitch = best_pitch;
1563
st->last_pitch_gain = PSHR16(pitch_average,2);
1565
st->last_pitch_gain = .25*pitch_average;
1567
st->pitch_gain_buf[st->pitch_gain_buf_idx++] = st->last_pitch_gain;
1568
if (st->pitch_gain_buf_idx > 2) /* rollover */
1569
st->pitch_gain_buf_idx = 0;
1571
st->last_ol_gain = ol_gain;
1576
int nb_encoder_ctl(void *state, int request, void *ptr)
1579
st=(EncState*)state;
1582
case SPEEX_GET_FRAME_SIZE:
1583
(*(spx_int32_t*)ptr) = st->frameSize;
1585
case SPEEX_SET_LOW_MODE:
1586
case SPEEX_SET_MODE:
1587
st->submodeSelect = st->submodeID = (*(spx_int32_t*)ptr);
1589
case SPEEX_GET_LOW_MODE:
1590
case SPEEX_GET_MODE:
1591
(*(spx_int32_t*)ptr) = st->submodeID;
1595
st->vbr_enabled = (*(spx_int32_t*)ptr);
1598
(*(spx_int32_t*)ptr) = st->vbr_enabled;
1601
st->vad_enabled = (*(spx_int32_t*)ptr);
1604
(*(spx_int32_t*)ptr) = st->vad_enabled;
1607
st->dtx_enabled = (*(spx_int32_t*)ptr);
1610
(*(spx_int32_t*)ptr) = st->dtx_enabled;
1613
st->abr_enabled = (*(spx_int32_t*)ptr);
1614
st->vbr_enabled = st->abr_enabled!=0;
1615
if (st->vbr_enabled)
1618
spx_int32_t rate, target;
1620
target = (*(spx_int32_t*)ptr);
1623
speex_encoder_ctl(st, SPEEX_SET_QUALITY, &i);
1624
speex_encoder_ctl(st, SPEEX_GET_BITRATE, &rate);
1632
speex_encoder_ctl(st, SPEEX_SET_VBR_QUALITY, &vbr_qual);
1640
(*(spx_int32_t*)ptr) = st->abr_enabled;
1642
#endif /* #ifndef DISABLE_VBR */
1643
#if !defined(DISABLE_VBR) && !defined(DISABLE_FLOAT_API)
1644
case SPEEX_SET_VBR_QUALITY:
1645
st->vbr_quality = (*(float*)ptr);
1647
case SPEEX_GET_VBR_QUALITY:
1648
(*(float*)ptr) = st->vbr_quality;
1650
#endif /* !defined(DISABLE_VBR) && !defined(DISABLE_FLOAT_API) */
1651
case SPEEX_SET_QUALITY:
1653
int quality = (*(spx_int32_t*)ptr);
1658
st->submodeSelect = st->submodeID = ((const SpeexNBMode*)(st->mode->mode))->quality_map[quality];
1661
case SPEEX_SET_COMPLEXITY:
1662
st->complexity = (*(spx_int32_t*)ptr);
1663
if (st->complexity<0)
1666
case SPEEX_GET_COMPLEXITY:
1667
(*(spx_int32_t*)ptr) = st->complexity;
1669
case SPEEX_SET_BITRATE:
1672
spx_int32_t rate, target;
1673
target = (*(spx_int32_t*)ptr);
1676
speex_encoder_ctl(st, SPEEX_SET_QUALITY, &i);
1677
speex_encoder_ctl(st, SPEEX_GET_BITRATE, &rate);
1684
case SPEEX_GET_BITRATE:
1685
if (st->submodes[st->submodeID])
1686
(*(spx_int32_t*)ptr) = st->sampling_rate*SUBMODE(bits_per_frame)/st->frameSize;
1688
(*(spx_int32_t*)ptr) = st->sampling_rate*(NB_SUBMODE_BITS+1)/st->frameSize;
1690
case SPEEX_SET_SAMPLING_RATE:
1691
st->sampling_rate = (*(spx_int32_t*)ptr);
1693
case SPEEX_GET_SAMPLING_RATE:
1694
(*(spx_int32_t*)ptr)=st->sampling_rate;
1696
case SPEEX_RESET_STATE:
1699
st->bounded_pitch = 1;
1701
for (i=0;i<st->lpcSize;i++)
1702
st->old_lsp[i]= DIV32(MULT16_16(QCONST16(3.1415927f, LSP_SHIFT), i+1), st->lpcSize+1);
1703
for (i=0;i<st->lpcSize;i++)
1704
st->mem_sw[i]=st->mem_sw_whole[i]=st->mem_sp[i]=st->mem_exc[i]=0;
1705
for (i=0;i<st->frameSize+st->max_pitch+1;i++)
1706
st->excBuf[i]=st->swBuf[i]=0;
1707
for (i=0;i<st->windowSize-st->frameSize;i++)
1711
case SPEEX_SET_SUBMODE_ENCODING:
1712
st->encode_submode = (*(spx_int32_t*)ptr);
1714
case SPEEX_GET_SUBMODE_ENCODING:
1715
(*(spx_int32_t*)ptr) = st->encode_submode;
1717
case SPEEX_GET_LOOKAHEAD:
1718
(*(spx_int32_t*)ptr)=(st->windowSize-st->frameSize);
1720
case SPEEX_SET_PLC_TUNING:
1721
st->plc_tuning = (*(spx_int32_t*)ptr);
1722
if (st->plc_tuning>100)
1725
case SPEEX_GET_PLC_TUNING:
1726
(*(spx_int32_t*)ptr)=(st->plc_tuning);
1729
case SPEEX_SET_VBR_MAX_BITRATE:
1730
st->vbr_max = (*(spx_int32_t*)ptr);
1732
case SPEEX_GET_VBR_MAX_BITRATE:
1733
(*(spx_int32_t*)ptr) = st->vbr_max;
1735
#endif /* #ifndef DISABLE_VBR */
1736
case SPEEX_SET_HIGHPASS:
1737
st->highpass_enabled = (*(spx_int32_t*)ptr);
1739
case SPEEX_GET_HIGHPASS:
1740
(*(spx_int32_t*)ptr) = st->highpass_enabled;
1743
/* This is all internal stuff past this point */
1744
case SPEEX_GET_PI_GAIN:
1747
spx_word32_t *g = (spx_word32_t*)ptr;
1748
for (i=0;i<st->nbSubframes;i++)
1749
g[i]=st->pi_gain[i];
1755
for (i=0;i<st->nbSubframes;i++)
1756
((spx_word16_t*)ptr)[i] = compute_rms16(st->exc+i*st->subframeSize, st->subframeSize);
1760
case SPEEX_GET_RELATIVE_QUALITY:
1761
(*(float*)ptr)=st->relative_quality;
1763
#endif /* #ifndef DISABLE_VBR */
1764
case SPEEX_SET_INNOVATION_SAVE:
1765
st->innov_rms_save = (spx_word16_t*)ptr;
1767
case SPEEX_SET_WIDEBAND:
1768
st->isWideband = *((spx_int32_t*)ptr);
1770
case SPEEX_GET_STACK:
1771
*((char**)ptr) = st->stack;
1774
speex_warning_int("Unknown nb_ctl request: ", request);
1780
int nb_decoder_ctl(void *state, int request, void *ptr)
1783
st=(DecState*)state;
1786
case SPEEX_SET_LOW_MODE:
1787
case SPEEX_SET_MODE:
1788
st->submodeID = (*(spx_int32_t*)ptr);
1790
case SPEEX_GET_LOW_MODE:
1791
case SPEEX_GET_MODE:
1792
(*(spx_int32_t*)ptr) = st->submodeID;
1795
st->lpc_enh_enabled = *((spx_int32_t*)ptr);
1798
*((spx_int32_t*)ptr) = st->lpc_enh_enabled;
1800
case SPEEX_GET_FRAME_SIZE:
1801
(*(spx_int32_t*)ptr) = st->frameSize;
1803
case SPEEX_GET_BITRATE:
1804
if (st->submodes[st->submodeID])
1805
(*(spx_int32_t*)ptr) = st->sampling_rate*SUBMODE(bits_per_frame)/st->frameSize;
1807
(*(spx_int32_t*)ptr) = st->sampling_rate*(NB_SUBMODE_BITS+1)/st->frameSize;
1809
case SPEEX_SET_SAMPLING_RATE:
1810
st->sampling_rate = (*(spx_int32_t*)ptr);
1812
case SPEEX_GET_SAMPLING_RATE:
1813
(*(spx_int32_t*)ptr)=st->sampling_rate;
1815
case SPEEX_SET_HANDLER:
1817
SpeexCallback *c = (SpeexCallback*)ptr;
1818
st->speex_callbacks[c->callback_id].func=c->func;
1819
st->speex_callbacks[c->callback_id].data=c->data;
1820
st->speex_callbacks[c->callback_id].callback_id=c->callback_id;
1823
case SPEEX_SET_USER_HANDLER:
1825
SpeexCallback *c = (SpeexCallback*)ptr;
1826
st->user_callback.func=c->func;
1827
st->user_callback.data=c->data;
1828
st->user_callback.callback_id=c->callback_id;
1831
case SPEEX_RESET_STATE:
1834
for (i=0;i<st->lpcSize;i++)
1836
for (i=0;i<st->frameSize + st->max_pitch + 1;i++)
1840
case SPEEX_SET_SUBMODE_ENCODING:
1841
st->encode_submode = (*(spx_int32_t*)ptr);
1843
case SPEEX_GET_SUBMODE_ENCODING:
1844
(*(spx_int32_t*)ptr) = st->encode_submode;
1846
case SPEEX_GET_LOOKAHEAD:
1847
(*(spx_int32_t*)ptr)=st->subframeSize;
1849
case SPEEX_SET_HIGHPASS:
1850
st->highpass_enabled = (*(spx_int32_t*)ptr);
1852
case SPEEX_GET_HIGHPASS:
1853
(*(spx_int32_t*)ptr) = st->highpass_enabled;
1855
/* FIXME: Convert to fixed-point and re-enable even when float API is disabled */
1856
#ifndef DISABLE_FLOAT_API
1857
case SPEEX_GET_ACTIVITY:
1860
ret = log(st->level/st->min_level)/log(st->max_level/st->min_level);
1863
/* Done in a strange way to catch NaNs as well */
1866
/*printf ("%f %f %f %f\n", st->level, st->min_level, st->max_level, ret);*/
1867
(*(spx_int32_t*)ptr) = (int)(100*ret);
1871
case SPEEX_GET_PI_GAIN:
1874
spx_word32_t *g = (spx_word32_t*)ptr;
1875
for (i=0;i<st->nbSubframes;i++)
1876
g[i]=st->pi_gain[i];
1882
for (i=0;i<st->nbSubframes;i++)
1883
((spx_word16_t*)ptr)[i] = compute_rms16(st->exc+i*st->subframeSize, st->subframeSize);
1886
case SPEEX_GET_DTX_STATUS:
1887
*((spx_int32_t*)ptr) = st->dtx_enabled;
1889
case SPEEX_SET_INNOVATION_SAVE:
1890
st->innov_save = (spx_word16_t*)ptr;
1892
case SPEEX_SET_WIDEBAND:
1893
st->isWideband = *((spx_int32_t*)ptr);
1895
case SPEEX_GET_STACK:
1896
*((char**)ptr) = st->stack;
1899
speex_warning_int("Unknown nb_ctl request: ", request);