1
/* Copyright (C) 2002-2006 Jean-Marc Valin
3
Long-Term Prediction functions
5
Redistribution and use in source and binary forms, with or without
6
modification, are permitted provided that the following conditions
9
- Redistributions of source code must retain the above copyright
10
notice, this list of conditions and the following disclaimer.
12
- Redistributions in binary form must reproduce the above copyright
13
notice, this list of conditions and the following disclaimer in the
14
documentation and/or other materials provided with the distribution.
16
- Neither the name of the Xiph.org Foundation nor the names of its
17
contributors may be used to endorse or promote products derived from
18
this software without specific prior written permission.
20
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
24
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
25
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
26
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
27
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
28
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
29
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
30
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
39
#include "stack_alloc.h"
41
#include <speex/speex_bits.h>
42
#include "math_approx.h"
43
#include "os_support.h"
52
#elif defined (ARM4_ASM) || defined(ARM5E_ASM)
54
#elif defined (BFIN_ASM)
58
#ifndef OVERRIDE_INNER_PROD
59
spx_word32_t inner_prod(const spx_word16_t *x, const spx_word16_t *y, int len)
66
part = MAC16_16(part,*x++,*y++);
67
part = MAC16_16(part,*x++,*y++);
68
part = MAC16_16(part,*x++,*y++);
69
part = MAC16_16(part,*x++,*y++);
70
/* HINT: If you had a 40-bit accumulator, you could shift only at the end */
71
sum = ADD32(sum,SHR32(part,6));
77
#ifndef OVERRIDE_PITCH_XCORR
78
#if 0 /* HINT: Enable this for machines with enough registers (i.e. not x86) */
79
void pitch_xcorr(const spx_word16_t *_x, const spx_word16_t *_y, spx_word32_t *corr, int len, int nb_pitch, char *stack)
82
for (i=0;i<nb_pitch;i+=4)
84
/* Compute correlation*/
85
/*corr[nb_pitch-1-i]=inner_prod(x, _y+i, len);*/
90
const spx_word16_t *y = _y+i;
91
const spx_word16_t *x = _x;
92
spx_word16_t y0, y1, y2, y3;
93
/*y0=y[0];y1=y[1];y2=y[2];y3=y[3];*/
104
part1 = MULT16_16(*x,y0);
105
part2 = MULT16_16(*x,y1);
106
part3 = MULT16_16(*x,y2);
107
part4 = MULT16_16(*x,y3);
110
part1 = MAC16_16(part1,*x,y1);
111
part2 = MAC16_16(part2,*x,y2);
112
part3 = MAC16_16(part3,*x,y3);
113
part4 = MAC16_16(part4,*x,y0);
116
part1 = MAC16_16(part1,*x,y2);
117
part2 = MAC16_16(part2,*x,y3);
118
part3 = MAC16_16(part3,*x,y0);
119
part4 = MAC16_16(part4,*x,y1);
122
part1 = MAC16_16(part1,*x,y3);
123
part2 = MAC16_16(part2,*x,y0);
124
part3 = MAC16_16(part3,*x,y1);
125
part4 = MAC16_16(part4,*x,y2);
129
sum1 = ADD32(sum1,SHR32(part1,6));
130
sum2 = ADD32(sum2,SHR32(part2,6));
131
sum3 = ADD32(sum3,SHR32(part3,6));
132
sum4 = ADD32(sum4,SHR32(part4,6));
134
corr[nb_pitch-1-i]=sum1;
135
corr[nb_pitch-2-i]=sum2;
136
corr[nb_pitch-3-i]=sum3;
137
corr[nb_pitch-4-i]=sum4;
142
void pitch_xcorr(const spx_word16_t *_x, const spx_word16_t *_y, spx_word32_t *corr, int len, int nb_pitch, char *stack)
145
for (i=0;i<nb_pitch;i++)
147
/* Compute correlation*/
148
corr[nb_pitch-1-i]=inner_prod(_x, _y+i, len);
155
#ifndef OVERRIDE_COMPUTE_PITCH_ERROR
156
static inline spx_word32_t compute_pitch_error(spx_word16_t *C, spx_word16_t *g, spx_word16_t pitch_control)
158
spx_word32_t sum = 0;
159
sum = ADD32(sum,MULT16_16(MULT16_16_16(g[0],pitch_control),C[0]));
160
sum = ADD32(sum,MULT16_16(MULT16_16_16(g[1],pitch_control),C[1]));
161
sum = ADD32(sum,MULT16_16(MULT16_16_16(g[2],pitch_control),C[2]));
162
sum = SUB32(sum,MULT16_16(MULT16_16_16(g[0],g[1]),C[3]));
163
sum = SUB32(sum,MULT16_16(MULT16_16_16(g[2],g[1]),C[4]));
164
sum = SUB32(sum,MULT16_16(MULT16_16_16(g[2],g[0]),C[5]));
165
sum = SUB32(sum,MULT16_16(MULT16_16_16(g[0],g[0]),C[6]));
166
sum = SUB32(sum,MULT16_16(MULT16_16_16(g[1],g[1]),C[7]));
167
sum = SUB32(sum,MULT16_16(MULT16_16_16(g[2],g[2]),C[8]));
172
#ifndef OVERRIDE_OPEN_LOOP_NBEST_PITCH
173
void open_loop_nbest_pitch(spx_word16_t *sw, int start, int end, int len, int *pitch, spx_word16_t *gain, int N, char *stack)
176
VARDECL(spx_word32_t *best_score);
177
VARDECL(spx_word32_t *best_ener);
179
VARDECL(spx_word32_t *corr);
181
/* In fixed-point, we need only one (temporary) array of 32-bit values and two (corr16, ener16)
182
arrays for (normalized) 16-bit values */
183
VARDECL(spx_word16_t *corr16);
184
VARDECL(spx_word16_t *ener16);
185
spx_word32_t *energy;
186
int cshift=0, eshift=0;
188
ALLOC(corr16, end-start+1, spx_word16_t);
189
ALLOC(ener16, end-start+1, spx_word16_t);
190
ALLOC(corr, end-start+1, spx_word32_t);
193
/* In floating-point, we need to float arrays and no normalized copies */
194
VARDECL(spx_word32_t *energy);
195
spx_word16_t *corr16;
196
spx_word16_t *ener16;
197
ALLOC(energy, end-start+2, spx_word32_t);
198
ALLOC(corr, end-start+1, spx_word32_t);
203
ALLOC(best_score, N, spx_word32_t);
204
ALLOC(best_ener, N, spx_word32_t);
213
for (i=-end;i<len;i++)
215
if (ABS16(sw[i])>16383)
221
/* If the weighted input is close to saturation, then we scale it down */
224
for (i=-end;i<len;i++)
226
sw[i]=SHR16(sw[i],1);
230
energy[0]=inner_prod(sw-start, sw-start, len);
231
e0=inner_prod(sw, sw, len);
232
for (i=start;i<end;i++)
234
/* Update energy for next pitch*/
235
energy[i-start+1] = SUB32(ADD32(energy[i-start],SHR32(MULT16_16(sw[-i-1],sw[-i-1]),6)), SHR32(MULT16_16(sw[-i+len-1],sw[-i+len-1]),6));
236
if (energy[i-start+1] < 0)
237
energy[i-start+1] = 0;
241
eshift = normalize16(energy, ener16, 32766, end-start+1);
244
/* In fixed-point, this actually overrites the energy array (aliased to corr) */
245
pitch_xcorr(sw, sw-end, corr, len, end-start+1, stack);
248
/* Normalize to 180 so we can square it and it still fits in 16 bits */
249
cshift = normalize16(corr, corr16, 180, end-start+1);
250
/* If we scaled weighted input down, we need to scale it up again (OK, so we've just lost the LSB, who cares?) */
253
for (i=-end;i<len;i++)
255
sw[i]=SHL16(sw[i],1);
260
/* Search for the best pitch prediction gain */
261
for (i=start;i<=end;i++)
263
spx_word16_t tmp = MULT16_16_16(corr16[i-start],corr16[i-start]);
264
/* Instead of dividing the tmp by the energy, we multiply on the other side */
265
if (MULT16_16(tmp,best_ener[N-1])>MULT16_16(best_score[N-1],ADD16(1,ener16[i-start])))
267
/* We can safely put it last and then check */
269
best_ener[N-1]=ener16[i-start]+1;
271
/* Check if it comes in front of others */
274
if (MULT16_16(tmp,best_ener[j])>MULT16_16(best_score[j],ADD16(1,ener16[i-start])))
278
best_score[k]=best_score[k-1];
279
best_ener[k]=best_ener[k-1];
283
best_ener[j]=ener16[i-start]+1;
291
/* Compute open-loop gain if necessary */
298
g = DIV32(SHL32(EXTEND32(corr16[i-start]),cshift), 10+SHR32(MULT16_16(spx_sqrt(e0),spx_sqrt(SHL32(EXTEND32(ener16[i-start]),eshift))),6));
299
/* FIXME: g = max(g,corr/energy) */
310
#ifndef OVERRIDE_PITCH_GAIN_SEARCH_3TAP_VQ
311
static int pitch_gain_search_3tap_vq(
312
const signed char *gain_cdbk,
315
spx_word16_t max_gain
318
const signed char *ptr=gain_cdbk;
320
spx_word32_t best_sum=-VERY_LARGE32;
323
spx_word16_t pitch_control=64;
324
spx_word16_t gain_sum;
327
for (i=0;i<gain_cdbk_size;i++) {
330
g[0]=ADD16((spx_word16_t)ptr[0],32);
331
g[1]=ADD16((spx_word16_t)ptr[1],32);
332
g[2]=ADD16((spx_word16_t)ptr[2],32);
333
gain_sum = (spx_word16_t)ptr[3];
335
sum = compute_pitch_error(C16, g, pitch_control);
337
if (sum>best_sum && gain_sum<=max_gain) {
347
/** Finds the best quantized 3-tap pitch predictor by analysis by synthesis */
348
static spx_word32_t pitch_gain_search_3tap(
349
const spx_word16_t target[], /* Target vector */
350
const spx_coef_t ak[], /* LPCs for this subframe */
351
const spx_coef_t awk1[], /* Weighted LPCs #1 for this subframe */
352
const spx_coef_t awk2[], /* Weighted LPCs #2 for this subframe */
353
spx_sig_t exc[], /* Excitation */
354
const signed char *gain_cdbk,
356
int pitch, /* Pitch value */
357
int p, /* Number of LPC coeffs */
358
int nsf, /* Number of samples in subframe */
361
const spx_word16_t *exc2,
362
const spx_word16_t *r,
363
spx_word16_t *new_target,
366
spx_word32_t cumul_gain,
371
VARDECL(spx_word16_t *tmp1);
372
VARDECL(spx_word16_t *e);
374
spx_word32_t corr[3];
375
spx_word32_t A[3][3];
376
spx_word16_t gain[3];
378
spx_word16_t max_gain=128;
381
ALLOC(tmp1, 3*nsf, spx_word16_t);
382
ALLOC(e, nsf, spx_word16_t);
384
if (cumul_gain > 262144)
392
new_target[j] = target[j];
395
VARDECL(spx_mem_t *mm);
397
ALLOC(mm, p, spx_mem_t);
402
else if (j-pp-pitch<0)
403
e[j]=exc2[j-pp-pitch];
408
/* Scale target and excitation down if needed (avoiding overflow) */
412
e[j] = SHR16(e[j],1);
414
new_target[j] = SHR16(new_target[j],1);
419
iir_mem16(e, ak, e, nsf, p, mm, stack);
422
filter_mem16(e, awk1, awk2, e, nsf, p, mm, stack);
428
spx_word16_t e0=exc2[-pitch-1+i];
430
/* Scale excitation down if needed (avoiding overflow) */
434
x[i][0]=MULT16_16_Q14(r[0], e0);
435
for (j=0;j<nsf-1;j++)
436
x[i][j+1]=ADD32(x[i+1][j],MULT16_16_P14(r[j+1], e0));
440
corr[i]=inner_prod(x[i],new_target,nsf);
443
A[i][j]=A[j][i]=inner_prod(x[i],x[j],nsf);
468
C[0] = SHL32(C[0],1);
469
C[1] = SHL32(C[1],1);
470
C[2] = SHL32(C[2],1);
471
C[3] = SHL32(C[3],1);
472
C[4] = SHL32(C[4],1);
473
C[5] = SHL32(C[5],1);
474
C[6] = MAC16_32_Q15(C[6],MULT16_16_16(plc_tuning,655),C[6]);
475
C[7] = MAC16_32_Q15(C[7],MULT16_16_16(plc_tuning,655),C[7]);
476
C[8] = MAC16_32_Q15(C[8],MULT16_16_16(plc_tuning,655),C[8]);
477
normalize16(C, C16, 32767, 9);
479
C[6]*=.5*(1+.02*plc_tuning);
480
C[7]*=.5*(1+.02*plc_tuning);
481
C[8]*=.5*(1+.02*plc_tuning);
484
best_cdbk = pitch_gain_search_3tap_vq(gain_cdbk, gain_cdbk_size, C16, max_gain);
487
gain[0] = ADD16(32,(spx_word16_t)gain_cdbk[best_cdbk*4]);
488
gain[1] = ADD16(32,(spx_word16_t)gain_cdbk[best_cdbk*4+1]);
489
gain[2] = ADD16(32,(spx_word16_t)gain_cdbk[best_cdbk*4+2]);
490
/*printf ("%d %d %d %d\n",gain[0],gain[1],gain[2], best_cdbk);*/
492
gain[0] = 0.015625*gain_cdbk[best_cdbk*4] + .5;
493
gain[1] = 0.015625*gain_cdbk[best_cdbk*4+1]+ .5;
494
gain[2] = 0.015625*gain_cdbk[best_cdbk*4+2]+ .5;
496
*cdbk_index=best_cdbk;
499
SPEEX_MEMSET(exc, 0, nsf);
509
exc[j]=MAC16_16(exc[j],SHL16(gain[2-i],7),exc2[j-pp]);
513
for (j=tmp1;j<tmp3;j++)
514
exc[j]=MAC16_16(exc[j],SHL16(gain[2-i],7),exc2[j-pp-pitch]);
518
spx_word32_t tmp = ADD32(ADD32(MULT16_16(gain[0],x[2][i]),MULT16_16(gain[1],x[1][i])),
519
MULT16_16(gain[2],x[0][i]));
520
new_target[i] = SUB16(new_target[i], EXTRACT16(PSHR32(tmp,6)));
522
err = inner_prod(new_target, new_target, nsf);
527
/** Finds the best quantized 3-tap pitch predictor by analysis by synthesis */
528
int pitch_search_3tap(
529
spx_word16_t target[], /* Target vector */
531
spx_coef_t ak[], /* LPCs for this subframe */
532
spx_coef_t awk1[], /* Weighted LPCs #1 for this subframe */
533
spx_coef_t awk2[], /* Weighted LPCs #2 for this subframe */
534
spx_sig_t exc[], /* Excitation */
536
int start, /* Smallest pitch value allowed */
537
int end, /* Largest pitch value allowed */
538
spx_word16_t pitch_coef, /* Voicing (pitch) coefficient */
539
int p, /* Number of LPC coeffs */
540
int nsf, /* Number of samples in subframe */
548
spx_word32_t *cumul_gain
552
int cdbk_index, pitch=0, best_gain_index=0;
553
VARDECL(spx_sig_t *best_exc);
554
VARDECL(spx_word16_t *new_target);
555
VARDECL(spx_word16_t *best_target);
557
spx_word32_t err, best_err=-1;
559
const ltp_params *params;
560
const signed char *gain_cdbk;
566
params = (const ltp_params*) par;
567
gain_cdbk_size = 1<<params->gain_bits;
568
gain_cdbk = params->gain_cdbk + 4*gain_cdbk_size*cdbk_offset;
576
ALLOC(nbest, N, int);
577
params = (const ltp_params*) par;
581
speex_bits_pack(bits, 0, params->pitch_bits);
582
speex_bits_pack(bits, 0, params->gain_bits);
583
SPEEX_MEMSET(exc, 0, nsf);
588
/* Check if we need to scale everything down in the pitch search to avoid overflows */
591
if (ABS16(target[i])>16383)
597
for (i=-end;i<nsf;i++)
599
if (ABS16(exc2[i])>16383)
609
open_loop_nbest_pitch(sw, start, end, nsf, nbest, NULL, N, stack);
613
ALLOC(best_exc, nsf, spx_sig_t);
614
ALLOC(new_target, nsf, spx_word16_t);
615
ALLOC(best_target, nsf, spx_word16_t);
620
SPEEX_MEMSET(exc, 0, nsf);
621
err=pitch_gain_search_3tap(target, ak, awk1, awk2, exc, gain_cdbk, gain_cdbk_size, pitch, p, nsf,
622
bits, stack, exc2, r, new_target, &cdbk_index, plc_tuning, *cumul_gain, scaledown);
623
if (err<best_err || best_err<0)
625
SPEEX_COPY(best_exc, exc, nsf);
626
SPEEX_COPY(best_target, new_target, nsf);
629
best_gain_index=cdbk_index;
632
/*printf ("pitch: %d %d\n", best_pitch, best_gain_index);*/
633
speex_bits_pack(bits, best_pitch-start, params->pitch_bits);
634
speex_bits_pack(bits, best_gain_index, params->gain_bits);
636
*cumul_gain = MULT16_32_Q13(SHL16(params->gain_cdbk[4*best_gain_index+3],8), MAX32(1024,*cumul_gain));
638
*cumul_gain = 0.03125*MAX32(1024,*cumul_gain)*params->gain_cdbk[4*best_gain_index+3];
640
/*printf ("%f\n", cumul_gain);*/
641
/*printf ("encode pitch: %d %d\n", best_pitch, best_gain_index);*/
642
SPEEX_COPY(exc, best_exc, nsf);
643
SPEEX_COPY(target, best_target, nsf);
645
/* Scale target back up if needed */
649
target[i]=SHL16(target[i],1);
655
void pitch_unquant_3tap(
656
spx_word16_t exc[], /* Input excitation */
657
spx_word32_t exc_out[], /* Output excitation */
658
int start, /* Smallest pitch value allowed */
659
int end, /* Largest pitch value allowed */
660
spx_word16_t pitch_coef, /* Voicing (pitch) coefficient */
662
int nsf, /* Number of samples in subframe */
664
spx_word16_t *gain_val,
669
spx_word16_t last_pitch_gain,
676
spx_word16_t gain[3];
677
const signed char *gain_cdbk;
679
const ltp_params *params;
681
params = (const ltp_params*) par;
682
gain_cdbk_size = 1<<params->gain_bits;
683
gain_cdbk = params->gain_cdbk + 4*gain_cdbk_size*cdbk_offset;
685
pitch = speex_bits_unpack_unsigned(bits, params->pitch_bits);
687
gain_index = speex_bits_unpack_unsigned(bits, params->gain_bits);
688
/*printf ("decode pitch: %d %d\n", pitch, gain_index);*/
690
gain[0] = ADD16(32,(spx_word16_t)gain_cdbk[gain_index*4]);
691
gain[1] = ADD16(32,(spx_word16_t)gain_cdbk[gain_index*4+1]);
692
gain[2] = ADD16(32,(spx_word16_t)gain_cdbk[gain_index*4+2]);
694
gain[0] = 0.015625*gain_cdbk[gain_index*4]+.5;
695
gain[1] = 0.015625*gain_cdbk[gain_index*4+1]+.5;
696
gain[2] = 0.015625*gain_cdbk[gain_index*4+2]+.5;
699
if (count_lost && pitch > subframe_offset)
701
spx_word16_t gain_sum;
704
spx_word16_t tmp = count_lost < 4 ? last_pitch_gain : SHR16(last_pitch_gain,1);
708
spx_word16_t tmp = count_lost < 4 ? last_pitch_gain : 0.5 * last_pitch_gain;
712
gain_sum = gain_3tap_to_1tap(gain);
716
spx_word16_t fact = DIV32_16(SHL32(EXTEND32(tmp),14),gain_sum);
718
gain[i]=MULT16_16_Q14(fact,gain[i]);
729
gain[0] = SHL16(gain[0],7);
730
gain[1] = SHL16(gain[1],7);
731
gain[2] = SHL16(gain[2],7);
732
SPEEX_MEMSET(exc_out, 0, nsf);
742
exc_out[j]=MAC16_16(exc_out[j],gain[2-i],exc[j-pp]);
746
for (j=tmp1;j<tmp3;j++)
747
exc_out[j]=MAC16_16(exc_out[j],gain[2-i],exc[j-pp-pitch]);
749
/*for (i=0;i<nsf;i++)
750
exc[i]=PSHR32(exc32[i],13);*/
754
/** Forced pitch delay and gain */
755
int forced_pitch_quant(
756
spx_word16_t target[], /* Target vector */
758
spx_coef_t ak[], /* LPCs for this subframe */
759
spx_coef_t awk1[], /* Weighted LPCs #1 for this subframe */
760
spx_coef_t awk2[], /* Weighted LPCs #2 for this subframe */
761
spx_sig_t exc[], /* Excitation */
763
int start, /* Smallest pitch value allowed */
764
int end, /* Largest pitch value allowed */
765
spx_word16_t pitch_coef, /* Voicing (pitch) coefficient */
766
int p, /* Number of LPC coeffs */
767
int nsf, /* Number of samples in subframe */
775
spx_word32_t *cumul_gain
779
VARDECL(spx_word16_t *res);
780
ALLOC(res, nsf, spx_word16_t);
788
for (i=0;i<nsf&&i<start;i++)
790
exc[i]=MULT16_16(SHL16(pitch_coef, 7),exc2[i-start]);
794
exc[i]=MULT16_32_Q15(SHL16(pitch_coef, 9),exc[i-start]);
797
res[i] = EXTRACT16(PSHR32(exc[i], SIG_SHIFT-1));
798
syn_percep_zero16(res, ak, awk1, awk2, res, nsf, p, stack);
800
target[i]=EXTRACT16(SATURATE(SUB32(EXTEND32(target[i]),EXTEND32(res[i])),32700));
804
/** Unquantize forced pitch delay and gain */
805
void forced_pitch_unquant(
806
spx_word16_t exc[], /* Input excitation */
807
spx_word32_t exc_out[], /* Output excitation */
808
int start, /* Smallest pitch value allowed */
809
int end, /* Largest pitch value allowed */
810
spx_word16_t pitch_coef, /* Voicing (pitch) coefficient */
812
int nsf, /* Number of samples in subframe */
814
spx_word16_t *gain_val,
819
spx_word16_t last_pitch_gain,
833
exc_out[i]=MULT16_16(exc[i-start],SHL16(pitch_coef,7));
834
exc[i] = EXTRACT16(PSHR32(exc_out[i],13));
837
gain_val[0]=gain_val[2]=0;
838
gain_val[1] = pitch_coef;