1
/* Copyright (C) 2005 Analog Devices */
4
@author Jean-Marc Valin
5
@brief Long-Term Prediction functions (Blackfin version)
8
Redistribution and use in source and binary forms, with or without
9
modification, are permitted provided that the following conditions
12
- Redistributions of source code must retain the above copyright
13
notice, this list of conditions and the following disclaimer.
15
- Redistributions in binary form must reproduce the above copyright
16
notice, this list of conditions and the following disclaimer in the
17
documentation and/or other materials provided with the distribution.
19
- Neither the name of the Xiph.org Foundation nor the names of its
20
contributors may be used to endorse or promote products derived from
21
this software without specific prior written permission.
23
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
24
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
25
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
26
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
27
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
28
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
29
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
30
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
31
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
32
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
33
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36
#define OVERRIDE_INNER_PROD
37
spx_word32_t inner_prod(const spx_word16_t *x, const spx_word16_t *y, int len)
40
__asm__ __volatile__ (
49
"R0.L = W[I0++] || R1.L = W[I1++];\n\t"
50
"LOOP inner%= LC0 = P0;\n\t"
51
"LOOP_BEGIN inner%=;\n\t"
52
"A0 += R0.L*R1.L (IS) || R0.L = W[I0++] || R1.L = W[I1++];\n\t"
53
"LOOP_END inner%=;\n\t"
54
"A0 += R0.L*R1.L (IS);\n\t"
59
: "m" (x), "m" (y), "d" (len-1)
60
: "P0", "P1", "P2", "R0", "R1", "A0", "I0", "I1", "L0", "L1", "R3"
65
#define OVERRIDE_PITCH_XCORR
66
void pitch_xcorr(const spx_word16_t *_x, const spx_word16_t *_y, spx_word32_t *corr, int len, int nb_pitch, char *stack)
69
__asm__ __volatile__ (
71
"I0 = P2;\n\t" /* x in I0 */
72
"B0 = P2;\n\t" /* x in B0 */
73
"R0 = %3;\n\t" /* len in R0 */
75
"P3 += -2;\n\t" /* len in R0 */
76
"P4 = %4;\n\t" /* nb_pitch in R0 */
77
"R1 = R0 << 1;\n\t" /* number of bytes in x */
83
"L1 = 0;\n\t" /*Disable looping on I1*/
86
"LOOP pitch%= LC0 = P4 >> 1;\n\t"
87
"LOOP_BEGIN pitch%=;\n\t"
91
"LOOP inner_prod%= LC1 = P3 >> 1;\n\t"
92
"LOOP_BEGIN inner_prod%=;\n\t"
93
"A1 += R0.L*R1.H, A0 += R0.L*R1.L (IS) || R1.L = W[I1++];\n\t"
94
"A1 += R0.H*R1.L, A0 += R0.H*R1.H (IS) || R1.H = W[I1++] || R0 = [I0++];\n\t"
95
"LOOP_END inner_prod%=;\n\t"
96
"A1 += R0.L*R1.H, A0 += R0.L*R1.L (IS) || R1.L = W[I1++];\n\t"
97
"A1 += R0.H*R1.L, A0 += R0.H*R1.H (IS) || R0 = [I0++];\n\t"
100
"R2 = A0, R3 = A1;\n\t"
104
"LOOP_END pitch%=;\n\t"
106
: : "m" (_x), "m" (_y), "m" (corr), "m" (len), "m" (nb_pitch)
107
: "A0", "A1", "P0", "P1", "P2", "P3", "P4", "R0", "R1", "R2", "R3", "I0", "I1", "L0", "L1", "B0", "B1", "memory"
111
#define OVERRIDE_COMPUTE_PITCH_ERROR
112
static inline spx_word32_t compute_pitch_error(spx_word16_t *C, spx_word16_t *g, spx_word16_t pitch_control)
120
"R1.L = %2.L*%5.L (IS);\n\t"
121
"A0 += R1.L*R0.L (IS) || R0 = W[%1++];\n\t"
123
"R1.L = %3.L*%5.L (IS);\n\t"
124
"A0 += R1.L*R0.L (IS) || R0 = W[%1++];\n\t"
126
"R1.L = %4.L*%5.L (IS);\n\t"
127
"A0 += R1.L*R0.L (IS) || R0 = W[%1++];\n\t"
129
"R1.L = %2.L*%3.L (IS);\n\t"
130
"A0 -= R1.L*R0.L (IS) || R0 = W[%1++];\n\t"
132
"R1.L = %4.L*%3.L (IS);\n\t"
133
"A0 -= R1.L*R0.L (IS) || R0 = W[%1++];\n\t"
135
"R1.L = %4.L*%2.L (IS);\n\t"
136
"A0 -= R1.L*R0.L (IS) || R0 = W[%1++];\n\t"
138
"R1.L = %2.L*%2.L (IS);\n\t"
139
"A0 -= R1.L*R0.L (IS) || R0 = W[%1++];\n\t"
141
"R1.L = %3.L*%3.L (IS);\n\t"
142
"A0 -= R1.L*R0.L (IS) || R0 = W[%1++];\n\t"
144
"R1.L = %4.L*%4.L (IS);\n\t"
145
"A0 -= R1.L*R0.L (IS);\n\t"
148
: "=&D" (sum), "=a" (C)
149
: "d" (g[0]), "d" (g[1]), "d" (g[2]), "d" (pitch_control), "1" (C)
150
: "R0", "R1", "R2", "A0"
155
#define OVERRIDE_OPEN_LOOP_NBEST_PITCH
156
#ifdef OVERRIDE_OPEN_LOOP_NBEST_PITCH
157
void open_loop_nbest_pitch(spx_word16_t *sw, int start, int end, int len, int *pitch, spx_word16_t *gain, int N, char *stack)
160
VARDECL(spx_word32_t *best_score);
161
VARDECL(spx_word32_t *best_ener);
163
VARDECL(spx_word32_t *corr);
164
VARDECL(spx_word32_t *energy);
166
ALLOC(best_score, N, spx_word32_t);
167
ALLOC(best_ener, N, spx_word32_t);
168
ALLOC(corr, end-start+1, spx_word32_t);
169
ALLOC(energy, end-start+2, spx_word32_t);
178
energy[0]=inner_prod(sw-start, sw-start, len);
179
e0=inner_prod(sw, sw, len);
181
/* energy update -------------------------------------*/
192
" LSETUP (eu1, eu2) LC1 = %3;\n\t"
193
"eu1: R1.L = W [I1--] || R0.L = W [I2--] ;\n\t"
194
" R1 = R1.L * R1.L (IS);\n\t"
195
" R0 = R0.L * R0.L (IS);\n\t"
200
" R2 = MAX(R1,R3);\n\t"
201
"eu2: [P0++] = R2;\n\t"
202
: : "d" (energy), "d" (&sw[-start-1]), "d" (&sw[-start+len-1]),
204
: "P0", "I1", "I2", "R0", "R1", "R2", "R3"
210
pitch_xcorr(sw, sw-end, corr, len, end-start+1, stack);
212
/* FIXME: Fixed-point and floating-point code should be merged */
214
VARDECL(spx_word16_t *corr16);
215
VARDECL(spx_word16_t *ener16);
216
ALLOC(corr16, end-start+1, spx_word16_t);
217
ALLOC(ener16, end-start+1, spx_word16_t);
218
/* Normalize to 180 so we can square it and it still fits in 16 bits */
219
normalize16(corr, corr16, 180, end-start+1);
220
normalize16(energy, ener16, 180, end-start+1);
223
/* optimised asm to handle N==1 case */
226
" I0 = %1;\n\t" /* I0: corr16[] */
228
" I1 = %2;\n\t" /* I1: energy */
230
" R2 = -1;\n\t" /* R2: best score */
231
" R3 = 0;\n\t" /* R3: best energy */
232
" P0 = %4;\n\t" /* P0: best pitch */
233
" P1 = %4;\n\t" /* P1: counter */
234
" LSETUP (sl1, sl2) LC1 = %3;\n\t"
235
"sl1: R0.L = W [I0++] || R1.L = W [I1++];\n\t"
236
" R0 = R0.L * R0.L (IS);\n\t"
238
" R4 = R0.L * R3.L;\n\t"
239
" R5 = R2.L * R1.L;\n\t"
241
" if cc R2 = R0;\n\t"
242
" if cc R3 = R1;\n\t"
243
" if cc P0 = P1;\n\t"
247
: "a" (corr16), "a" (ener16), "a" (end+1-start), "d" (start)
248
: "P0", "P1", "I0", "I1", "R0", "R1", "R2", "R3", "R4", "R5"
256
for (i=start;i<=end;i++)
258
spx_word16_t tmp = MULT16_16_16(corr16[i-start],corr16[i-start]);
259
/* Instead of dividing the tmp by the energy, we multiply on the other side */
260
if (MULT16_16(tmp,best_ener[N-1])>MULT16_16(best_score[N-1],ADD16(1,ener16[i-start])))
262
/* We can safely put it last and then check */
264
best_ener[N-1]=ener16[i-start]+1;
266
/* Check if it comes in front of others */
269
if (MULT16_16(tmp,best_ener[j])>MULT16_16(best_score[j],ADD16(1,ener16[i-start])))
273
best_score[k]=best_score[k-1];
274
best_ener[k]=best_ener[k-1];
278
best_ener[j]=ener16[i-start]+1;
288
/* Compute open-loop gain */
295
g = DIV32(corr[i-start], 10+SHR32(MULT16_16(spx_sqrt(e0),spx_sqrt(energy[i-start])),6));
296
/* FIXME: g = max(g,corr/energy) */
305
#define OVERRIDE_PITCH_GAIN_SEARCH_3TAP_VQ
306
#ifdef OVERRIDE_PITCH_GAIN_SEARCH_3TAP_VQ
307
static int pitch_gain_search_3tap_vq(
308
const signed char *gain_cdbk,
311
spx_word16_t max_gain
314
const signed char *ptr=gain_cdbk;
316
spx_word32_t best_sum=-VERY_LARGE32;
319
spx_word16_t pitch_control=64;
320
spx_word16_t gain_sum;
323
/* fast asm version of VQ codebook search */
328
" P0 = %2;\n\t" /* P0: ptr to gain_cdbk */
329
" L1 = 0;\n\t" /* no circ addr for L1 */
330
" %0 = 0;\n\t" /* %0: best_sum */
331
" %1 = 0;\n\t" /* %1: best_cbdk */
332
" P1 = 0;\n\t" /* P1: loop counter */
334
" LSETUP (pgs1, pgs2) LC1 = %4;\n\t"
335
"pgs1: R2 = B [P0++] (X);\n\t" /* R2: g[0] */
336
" R3 = B [P0++] (X);\n\t" /* R3: g[1] */
337
" R4 = B [P0++] (X);\n\t" /* R4: g[2] */
341
" R4.H = 64;\n\t" /* R4.H: pitch_control */
343
" R0 = B [P0++] (X);\n\t"
344
" B0 = R0;\n\t" /* BO: gain_sum */
346
/* compute_pitch_error() -------------------------------*/
348
" I1 = %3;\n\t" /* I1: ptr to C */
351
" R0.L = W[I1++];\n\t"
352
" R1.L = R2.L*R4.H (IS);\n\t"
353
" A0 += R1.L*R0.L (IS) || R0.L = W[I1++];\n\t"
355
" R1.L = R3.L*R4.H (IS);\n\t"
356
" A0 += R1.L*R0.L (IS) || R0.L = W[I1++];\n\t"
358
" R1.L = R4.L*R4.H (IS);\n\t"
359
" A0 += R1.L*R0.L (IS) || R0.L = W[I1++];\n\t"
361
" R1.L = R2.L*R3.L (IS);\n\t"
362
" A0 -= R1.L*R0.L (IS) || R0.L = W[I1++];\n\t"
364
" R1.L = R4.L*R3.L (IS);\n\t"
365
" A0 -= R1.L*R0.L (IS) || R0.L = W[I1++];\n\t"
367
" R1.L = R4.L*R2.L (IS);\n\t"
368
" A0 -= R1.L*R0.L (IS) || R0.L = W[I1++];\n\t"
370
" R1.L = R2.L*R2.L (IS);\n\t"
371
" A0 -= R1.L*R0.L (IS) || R0.L = W[I1++];\n\t"
373
" R1.L = R3.L*R3.L (IS);\n\t"
374
" A0 -= R1.L*R0.L (IS) || R0.L = W[I1++];\n\t"
376
" R1.L = R4.L*R4.L (IS);\n\t"
377
" R0 = (A0 -= R1.L*R0.L) (IS);\n\t"
380
Re-arrange the if-then to code efficiently on the Blackfin:
382
if (sum>best_sum && gain_sum<=max_gain) ------ (1)
384
if (sum>best_sum && !(gain_sum>max_gain)) ------ (2)
386
if (max_gain<=gain_sum) { ------ (3)
391
The blackin cc instructions are all of the form:
393
cc = x < y (or cc = x <= y)
398
" cc = R2 <= R1;\n\t"
399
" if cc R0 = R3;\n\t"
400
" cc = %0 <= R0;\n\t"
401
" if cc %0 = R0;\n\t"
402
" if cc %1 = P1;\n\t"
406
: "=&d" (best_sum), "=&d" (best_cdbk)
407
: "a" (gain_cdbk), "a" (C16), "a" (gain_cdbk_size), "a" (max_gain),
409
: "R0", "R1", "R2", "R3", "R4", "P0",
410
"P1", "I1", "L1", "A0", "B0"