1
/* Copyright (C) 2002-2006 Jean-Marc Valin
4
Redistribution and use in source and binary forms, with or without
5
modification, are permitted provided that the following conditions
8
- Redistributions of source code must retain the above copyright
9
notice, this list of conditions and the following disclaimer.
11
- Redistributions in binary form must reproduce the above copyright
12
notice, this list of conditions and the following disclaimer in the
13
documentation and/or other materials provided with the distribution.
15
- Neither the name of the Xiph.org Foundation nor the names of its
16
contributors may be used to endorse or promote products derived from
17
this software without specific prior written permission.
19
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
23
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
24
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
25
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
26
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
27
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
28
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
29
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36
#include "cb_search.h"
38
#include "stack_alloc.h"
41
#include "math_approx.h"
42
#include "os_support.h"
45
#include "cb_search_sse.h"
46
#elif defined(ARM4_ASM) || defined(ARM5E_ASM)
47
#include "cb_search_arm4.h"
48
#elif defined(BFIN_ASM)
49
#include "cb_search_bfin.h"
52
#ifndef OVERRIDE_COMPUTE_WEIGHTED_CODEBOOK
53
static void compute_weighted_codebook(const signed char *shape_cb, const spx_word16_t *r, spx_word16_t *resp, spx_word16_t *resp2, spx_word32_t *E, int shape_cb_size, int subvect_size, char *stack)
56
VARDECL(spx_word16_t *shape);
57
ALLOC(shape, subvect_size, spx_word16_t);
58
for (i=0;i<shape_cb_size;i++)
62
res = resp+i*subvect_size;
63
for (k=0;k<subvect_size;k++)
64
shape[k] = (spx_word16_t)shape_cb[i*subvect_size+k];
67
/* Compute codeword response using convolution with impulse response */
68
for(j=0;j<subvect_size;j++)
73
resj = MAC16_16(resj,shape[k],r[j-k]);
75
res16 = EXTRACT16(SHR32(resj, 13));
77
res16 = 0.03125f*resj;
79
/* Compute codeword energy */
80
E[i]=MAC16_16(E[i],res16,res16);
82
/*printf ("%d\n", (int)res[j]);*/
89
#ifndef OVERRIDE_TARGET_UPDATE
90
static inline void target_update(spx_word16_t *t, spx_word16_t g, spx_word16_t *r, int len)
94
t[n] = SUB16(t[n],PSHR32(MULT16_16(g,r[n]),13));
100
static void split_cb_search_shape_sign_N1(
101
spx_word16_t target[], /* target vector */
102
spx_coef_t ak[], /* LPCs for this subframe */
103
spx_coef_t awk1[], /* Weighted LPCs for this subframe */
104
spx_coef_t awk2[], /* Weighted LPCs for this subframe */
105
const void *par, /* Codebook/search parameters*/
106
int p, /* number of LPC coeffs */
107
int nsf, /* number of samples in subframe */
116
VARDECL(spx_word16_t *resp);
118
VARDECL(__m128 *resp2);
122
VARDECL(spx_word32_t *E);
124
VARDECL(spx_word16_t *t);
125
VARDECL(spx_sig_t *e);
126
const signed char *shape_cb;
127
int shape_cb_size, subvect_size, nb_subvect;
128
const split_cb_params *params;
130
spx_word32_t best_dist;
133
params = (const split_cb_params *) par;
134
subvect_size = params->subvect_size;
135
nb_subvect = params->nb_subvect;
136
shape_cb_size = 1<<params->shape_bits;
137
shape_cb = params->shape_cb;
138
have_sign = params->have_sign;
139
ALLOC(resp, shape_cb_size*subvect_size, spx_word16_t);
141
ALLOC(resp2, (shape_cb_size*subvect_size)>>2, __m128);
142
ALLOC(E, shape_cb_size>>2, __m128);
145
ALLOC(E, shape_cb_size, spx_word32_t);
147
ALLOC(t, nsf, spx_word16_t);
148
ALLOC(e, nsf, spx_sig_t);
150
/* FIXME: Do we still need to copy the target? */
151
SPEEX_COPY(t, target, nsf);
153
compute_weighted_codebook(shape_cb, r, resp, resp2, E, shape_cb_size, subvect_size, stack);
155
for (i=0;i<nb_subvect;i++)
157
spx_word16_t *x=t+subvect_size*i;
158
/*Find new n-best based on previous n-best j*/
160
vq_nbest_sign(x, resp2, subvect_size, shape_cb_size, E, 1, &best_index, &best_dist, stack);
162
vq_nbest(x, resp2, subvect_size, shape_cb_size, E, 1, &best_index, &best_dist, stack);
164
speex_bits_pack(bits,best_index,params->shape_bits+have_sign);
171
if (rind>=shape_cb_size)
176
res = resp+rind*subvect_size;
178
for (m=0;m<subvect_size;m++)
179
t[subvect_size*i+m] = SUB16(t[subvect_size*i+m], res[m]);
181
for (m=0;m<subvect_size;m++)
182
t[subvect_size*i+m] = ADD16(t[subvect_size*i+m], res[m]);
187
for (j=0;j<subvect_size;j++)
188
e[subvect_size*i+j]=SHL32(EXTEND32(shape_cb[rind*subvect_size+j]),SIG_SHIFT-5);
190
for (j=0;j<subvect_size;j++)
191
e[subvect_size*i+j]=NEG32(SHL32(EXTEND32(shape_cb[rind*subvect_size+j]),SIG_SHIFT-5));
194
for (j=0;j<subvect_size;j++)
195
e[subvect_size*i+j]=sign*0.03125*shape_cb[rind*subvect_size+j];
200
for (m=0;m<subvect_size;m++)
206
if (rind>=shape_cb_size)
214
g=sign*shape_cb[rind*subvect_size+m];
216
g=sign*0.03125*shape_cb[rind*subvect_size+m];
218
target_update(t+subvect_size*(i+1), g, r+q, nsf-subvect_size*(i+1));
222
/* Update excitation */
223
/* FIXME: We could update the excitation directly above */
225
exc[j]=ADD32(exc[j],e[j]);
227
/* Update target: only update target if necessary */
230
VARDECL(spx_word16_t *r2);
231
ALLOC(r2, nsf, spx_word16_t);
233
r2[j] = EXTRACT16(PSHR32(e[j] ,6));
234
syn_percep_zero16(r2, ak, awk1, awk2, r2, nsf,p, stack);
236
target[j]=SUB16(target[j],PSHR16(r2[j],2));
242
void split_cb_search_shape_sign(
243
spx_word16_t target[], /* target vector */
244
spx_coef_t ak[], /* LPCs for this subframe */
245
spx_coef_t awk1[], /* Weighted LPCs for this subframe */
246
spx_coef_t awk2[], /* Weighted LPCs for this subframe */
247
const void *par, /* Codebook/search parameters*/
248
int p, /* number of LPC coeffs */
249
int nsf, /* number of samples in subframe */
259
VARDECL(spx_word16_t *resp);
261
VARDECL(__m128 *resp2);
265
VARDECL(spx_word32_t *E);
267
VARDECL(spx_word16_t *t);
268
VARDECL(spx_sig_t *e);
269
VARDECL(spx_word16_t *tmp);
270
VARDECL(spx_word32_t *ndist);
271
VARDECL(spx_word32_t *odist);
273
VARDECL(spx_word16_t **ot2);
274
VARDECL(spx_word16_t **nt2);
275
spx_word16_t **ot, **nt;
279
const signed char *shape_cb;
280
int shape_cb_size, subvect_size, nb_subvect;
281
const split_cb_params *params;
283
VARDECL(int *best_index);
284
VARDECL(spx_word32_t *best_dist);
285
VARDECL(int *best_nind);
286
VARDECL(int *best_ntarget);
291
/* Complexity isn't as important for the codebooks as it is for the pitch */
297
split_cb_search_shape_sign_N1(target,ak,awk1,awk2,par,p,nsf,exc,r,bits,stack,update_target);
300
ALLOC(ot2, N, spx_word16_t*);
301
ALLOC(nt2, N, spx_word16_t*);
302
ALLOC(oind, N, int*);
303
ALLOC(nind, N, int*);
305
params = (const split_cb_params *) par;
306
subvect_size = params->subvect_size;
307
nb_subvect = params->nb_subvect;
308
shape_cb_size = 1<<params->shape_bits;
309
shape_cb = params->shape_cb;
310
have_sign = params->have_sign;
311
ALLOC(resp, shape_cb_size*subvect_size, spx_word16_t);
313
ALLOC(resp2, (shape_cb_size*subvect_size)>>2, __m128);
314
ALLOC(E, shape_cb_size>>2, __m128);
317
ALLOC(E, shape_cb_size, spx_word32_t);
319
ALLOC(t, nsf, spx_word16_t);
320
ALLOC(e, nsf, spx_sig_t);
321
ALLOC(ind, nb_subvect, int);
323
ALLOC(tmp, 2*N*nsf, spx_word16_t);
327
nt2[i]=tmp+(2*i+1)*nsf;
331
ALLOC(best_index, N, int);
332
ALLOC(best_dist, N, spx_word32_t);
333
ALLOC(best_nind, N, int);
334
ALLOC(best_ntarget, N, int);
335
ALLOC(ndist, N, spx_word32_t);
336
ALLOC(odist, N, spx_word32_t);
338
ALLOC(itmp, 2*N*nb_subvect, int);
341
nind[i]=itmp+2*i*nb_subvect;
342
oind[i]=itmp+(2*i+1)*nb_subvect;
345
SPEEX_COPY(t, target, nsf);
348
SPEEX_COPY(&ot[j][0], t, nsf);
350
/* Pre-compute codewords response and energy */
351
compute_weighted_codebook(shape_cb, r, resp, resp2, E, shape_cb_size, subvect_size, stack);
356
/*For all subvectors*/
357
for (i=0;i<nb_subvect;i++)
359
/*"erase" nbest list*/
361
ndist[j]=VERY_LARGE32;
362
/* This is not strictly necessary, but it provides an additonal safety
363
to prevent crashes in case something goes wrong in the previous
366
best_nind[j] = best_ntarget[j] = 0;
367
/*For all n-bests of previous subvector*/
370
spx_word16_t *x=ot[j]+subvect_size*i;
371
spx_word32_t tener = 0;
372
for (m=0;m<subvect_size;m++)
373
tener = MAC16_16(tener, x[m],x[m]);
375
tener = SHR32(tener,1);
379
/*Find new n-best based on previous n-best j*/
381
vq_nbest_sign(x, resp2, subvect_size, shape_cb_size, E, N, best_index, best_dist, stack);
383
vq_nbest(x, resp2, subvect_size, shape_cb_size, E, N, best_index, best_dist, stack);
385
/*For all new n-bests*/
388
/* Compute total distance (including previous sub-vectors */
389
spx_word32_t err = ADD32(ADD32(odist[j],best_dist[k]),tener);
391
/*update n-best list*/
400
ndist[n] = ndist[n-1];
401
best_nind[n] = best_nind[n-1];
402
best_ntarget[n] = best_ntarget[n-1];
404
/* n is equal to m here, so they're interchangeable */
406
best_nind[n] = best_index[k];
418
/*previous target (we don't care what happened before*/
419
for (m=(i+1)*subvect_size;m<nsf;m++)
420
nt[j][m]=ot[best_ntarget[j]][m];
422
/* New code: update the rest of the target only if it's worth it */
423
for (m=0;m<subvect_size;m++)
429
if (rind>=shape_cb_size)
437
g=sign*shape_cb[rind*subvect_size+m];
439
g=sign*0.03125*shape_cb[rind*subvect_size+m];
441
target_update(nt[j]+subvect_size*(i+1), g, r+q, nsf-subvect_size*(i+1));
444
for (q=0;q<nb_subvect;q++)
445
nind[j][q]=oind[best_ntarget[j]][q];
446
nind[j][i]=best_nind[j];
449
/*update old-new data*/
450
/* just swap pointers instead of a long copy */
458
for (m=0;m<nb_subvect;m++)
459
oind[j][m]=nind[j][m];
465
for (i=0;i<nb_subvect;i++)
468
speex_bits_pack(bits,ind[i],params->shape_bits+have_sign);
471
/* Put everything back together */
472
for (i=0;i<nb_subvect;i++)
477
if (rind>=shape_cb_size)
485
for (j=0;j<subvect_size;j++)
486
e[subvect_size*i+j]=SHL32(EXTEND32(shape_cb[rind*subvect_size+j]),SIG_SHIFT-5);
488
for (j=0;j<subvect_size;j++)
489
e[subvect_size*i+j]=NEG32(SHL32(EXTEND32(shape_cb[rind*subvect_size+j]),SIG_SHIFT-5));
492
for (j=0;j<subvect_size;j++)
493
e[subvect_size*i+j]=sign*0.03125*shape_cb[rind*subvect_size+j];
496
/* Update excitation */
498
exc[j]=ADD32(exc[j],e[j]);
500
/* Update target: only update target if necessary */
503
VARDECL(spx_word16_t *r2);
504
ALLOC(r2, nsf, spx_word16_t);
506
r2[j] = EXTRACT16(PSHR32(e[j] ,6));
507
syn_percep_zero16(r2, ak, awk1, awk2, r2, nsf,p, stack);
509
target[j]=SUB16(target[j],PSHR16(r2[j],2));
514
void split_cb_shape_sign_unquant(
516
const void *par, /* non-overlapping codebook */
517
int nsf, /* number of samples in subframe */
526
const signed char *shape_cb;
527
int shape_cb_size, subvect_size, nb_subvect;
528
const split_cb_params *params;
531
params = (const split_cb_params *) par;
532
subvect_size = params->subvect_size;
533
nb_subvect = params->nb_subvect;
534
shape_cb_size = 1<<params->shape_bits;
535
shape_cb = params->shape_cb;
536
have_sign = params->have_sign;
538
ALLOC(ind, nb_subvect, int);
539
ALLOC(signs, nb_subvect, int);
541
/* Decode codewords and gains */
542
for (i=0;i<nb_subvect;i++)
545
signs[i] = speex_bits_unpack_unsigned(bits, 1);
548
ind[i] = speex_bits_unpack_unsigned(bits, params->shape_bits);
550
/* Compute decoded excitation */
551
for (i=0;i<nb_subvect;i++)
559
for (j=0;j<subvect_size;j++)
560
exc[subvect_size*i+j]=SHL32(EXTEND32(shape_cb[ind[i]*subvect_size+j]),SIG_SHIFT-5);
562
for (j=0;j<subvect_size;j++)
563
exc[subvect_size*i+j]=NEG32(SHL32(EXTEND32(shape_cb[ind[i]*subvect_size+j]),SIG_SHIFT-5));
566
for (j=0;j<subvect_size;j++)
567
exc[subvect_size*i+j]+=s*0.03125*shape_cb[ind[i]*subvect_size+j];
572
void noise_codebook_quant(
573
spx_word16_t target[], /* target vector */
574
spx_coef_t ak[], /* LPCs for this subframe */
575
spx_coef_t awk1[], /* Weighted LPCs for this subframe */
576
spx_coef_t awk2[], /* Weighted LPCs for this subframe */
577
const void *par, /* Codebook/search parameters*/
578
int p, /* number of LPC coeffs */
579
int nsf, /* number of samples in subframe */
589
VARDECL(spx_word16_t *tmp);
590
ALLOC(tmp, nsf, spx_word16_t);
591
residue_percep_zero16(target, ak, awk1, awk2, tmp, nsf, p, stack);
594
exc[i]+=SHL32(EXTEND32(tmp[i]),8);
595
SPEEX_MEMSET(target, 0, nsf);
599
void noise_codebook_unquant(
601
const void *par, /* non-overlapping codebook */
602
int nsf, /* number of samples in subframe */
609
/* FIXME: This is bad, but I don't think the function ever gets called anyway */
611
exc[i]=SHL32(EXTEND32(speex_rand(1, seed)),SIG_SHIFT);