1
/* Copyright (C) 2002 Jean-Marc Valin
6
Redistribution and use in source and binary forms, with or without
7
modification, are permitted provided that the following conditions
10
- Redistributions of source code must retain the above copyright
11
notice, this list of conditions and the following disclaimer.
13
- Redistributions in binary form must reproduce the above copyright
14
notice, this list of conditions and the following disclaimer in the
15
documentation and/or other materials provided with the distribution.
17
- Neither the name of the Xiph.org Foundation nor the names of its
18
contributors may be used to endorse or promote products derived from
19
this software without specific prior written permission.
21
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
25
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
26
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
27
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
28
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
29
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
30
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
31
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
43
#define sqr(x) ((x)*(x))
45
#define MIN_ENERGY 6000
50
const float vbr_nb_thresh[9][11]={
51
{-1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f}, /* CNG */
52
{ 4.0f, 2.5f, 2.0f, 1.2f, 0.5f, 0.0f, -0.5f, -0.7f, -0.8f, -0.9f, -1.0f}, /* 2 kbps */
53
{10.0f, 6.5f, 5.2f, 4.5f, 3.9f, 3.5f, 3.0f, 2.5f, 2.3f, 1.8f, 1.0f}, /* 6 kbps */
54
{11.0f, 8.8f, 7.5f, 6.5f, 5.0f, 3.9f, 3.9f, 3.9f, 3.5f, 3.0f, 1.0f}, /* 8 kbps */
55
{11.0f, 11.0f, 9.9f, 8.5f, 7.0f, 6.0f, 4.5f, 4.0f, 4.0f, 4.0f, 2.0f}, /* 11 kbps */
56
{11.0f, 11.0f, 11.0f, 11.0f, 9.5f, 8.5f, 8.0f, 7.0f, 6.0f, 5.0f, 3.0f}, /* 15 kbps */
57
{11.0f, 11.0f, 11.0f, 11.0f, 11.0f, 11.0f, 9.5f, 8.5f, 7.0f, 6.0f, 5.0f}, /* 18 kbps */
58
{11.0f, 11.0f, 11.0f, 11.0f, 11.0f, 11.0f, 11.0f, 11.0f, 9.8f, 9.5f, 7.5f}, /* 24 kbps */
59
{ 7.0f, 4.5f, 3.7f, 3.0f, 2.5f, 2.0f, 1.8f, 1.5f, 1.0f, 0.0f, 0.0f} /* 4 kbps */
63
const float vbr_hb_thresh[5][11]={
64
{-1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f}, /* silence */
65
{-1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f}, /* 2 kbps */
66
{11.0f, 11.0f, 9.5f, 8.5f, 7.5f, 6.0f, 5.0f, 3.9f, 3.0f, 2.0f, 1.0f}, /* 6 kbps */
67
{11.0f, 11.0f, 11.0f, 11.0f, 11.0f, 9.5f, 8.7f, 7.8f, 7.0f, 6.5f, 4.0f}, /* 10 kbps */
68
{11.0f, 11.0f, 11.0f, 11.0f, 11.0f, 11.0f, 11.0f, 11.0f, 9.8f, 7.5f, 5.5f} /* 18 kbps */
71
const float vbr_uhb_thresh[2][11]={
72
{-1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f}, /* silence */
73
{ 3.9f, 2.5f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, -1.0f} /* 2 kbps */
76
void vbr_init(VBRState *vbr)
80
vbr->average_energy=0;
85
vbr->last_pitch_coef=0;
88
vbr->noise_accum = .05*pow(MIN_ENERGY, NOISE_POW);
89
vbr->noise_accum_count=.05;
90
vbr->noise_level=vbr->noise_accum/vbr->noise_accum_count;
94
for (i=0;i<VBR_MEMORY_SIZE;i++)
95
vbr->last_log_energy[i] = log(MIN_ENERGY);
100
This function should analyse the signal and decide how critical the
101
coding error will be perceptually. The following factors should be
104
-Attacks (positive energy derivative) should be coded with more bits
106
-Stationary voiced segments should receive more bits
108
-Segments with (very) low absolute energy should receive less bits (maybe
111
-DTX for near-zero energy?
113
-Stationary fricative segments should have less bits
115
-Temporal masking: when energy slope is decreasing, decrease the bit-rate
117
-Decrease bit-rate for males (low pitch)?
119
-(wideband only) less bits in the high-band when signal is very
120
non-stationary (harder to notice high-frequency noise)???
124
float vbr_analysis(VBRState *vbr, spx_word16_t *sig, int len, int pitch, float pitch_coef)
127
float ener=0, ener1=0, ener2=0;
135
for (i=0;i<len>>1;i++)
136
ener1 += ((float)sig[i])*sig[i];
138
for (i=len>>1;i<len;i++)
139
ener2 += ((float)sig[i])*sig[i];
142
log_energy = log(ener+MIN_ENERGY);
143
for (i=0;i<VBR_MEMORY_SIZE;i++)
144
non_st += sqr(log_energy-vbr->last_log_energy[i]);
145
non_st = non_st/(30*VBR_MEMORY_SIZE);
149
voicing = 3*(pitch_coef-.4)*fabs(pitch_coef-.4);
150
vbr->average_energy = (1-vbr->energy_alpha)*vbr->average_energy + vbr->energy_alpha*ener;
151
vbr->noise_level=vbr->noise_accum/vbr->noise_accum_count;
152
pow_ener = pow(ener,NOISE_POW);
153
if (vbr->noise_accum_count<.06 && ener>MIN_ENERGY)
154
vbr->noise_accum = .05*pow_ener;
156
if ((voicing<.3 && non_st < .2 && pow_ener < 1.2*vbr->noise_level)
157
|| (voicing<.3 && non_st < .05 && pow_ener < 1.5*vbr->noise_level)
158
|| (voicing<.4 && non_st < .05 && pow_ener < 1.2*vbr->noise_level)
159
|| (voicing<0 && non_st < .05))
164
if (pow_ener > 3*vbr->noise_level)
165
tmp = 3*vbr->noise_level;
168
if (vbr->consec_noise>=4)
170
vbr->noise_accum = .95*vbr->noise_accum + .05*tmp;
171
vbr->noise_accum_count = .95*vbr->noise_accum_count + .05;
178
if (pow_ener < vbr->noise_level && ener>MIN_ENERGY)
180
vbr->noise_accum = .95*vbr->noise_accum + .05*pow_ener;
181
vbr->noise_accum_count = .95*vbr->noise_accum_count + .05;
184
/* Checking for very low absolute energy */
193
float short_diff, long_diff;
194
short_diff = log((ener+1)/(1+vbr->last_energy));
195
long_diff = log((ener+1)/(1+vbr->average_energy));
196
/*fprintf (stderr, "%f %f\n", short_diff, long_diff);*/
204
qual += .6*long_diff;
206
qual += .5*long_diff;
211
qual += .5*short_diff;
213
/* Checking for energy increases */
214
if (ener2 > 1.6*ener1)
217
vbr->last_energy = ener;
218
vbr->soft_pitch = .6*vbr->soft_pitch + .4*pitch_coef;
219
qual += 2.2*((pitch_coef-.4) + (vbr->soft_pitch-.4));
221
if (qual < vbr->last_quality)
222
qual = .5*qual + .5*vbr->last_quality;
229
if (vbr->consec_noise>=2)
231
if (vbr->consec_noise>=5)
233
if (vbr->consec_noise>=12)
236
if (vbr->consec_noise>=3)
239
if (vbr->consec_noise)
240
qual -= 1.0 * (log(3.0 + vbr->consec_noise)-log(3));
246
if (vbr->consec_noise>2)
247
qual-=0.5*(log(3.0 + vbr->consec_noise)-log(3));
248
if (ener<10000&&vbr->consec_noise>2)
249
qual-=0.5*(log(3.0 + vbr->consec_noise)-log(3));
252
qual += .3*log(.0001+ener/60000.0);
257
/*printf ("%f %f %f %f %d\n", qual, voicing, non_st, pow_ener/(.01+vbr->noise_level), va);*/
259
vbr->last_pitch_coef = pitch_coef;
260
vbr->last_quality = qual;
262
for (i=VBR_MEMORY_SIZE-1;i>0;i--)
263
vbr->last_log_energy[i] = vbr->last_log_energy[i-1];
264
vbr->last_log_energy[0] = log_energy;
266
/*printf ("VBR: %f %f %f %d %f\n", (float)(log_energy-log(vbr->average_energy+MIN_ENERGY)), non_st, voicing, va, vbr->noise_level);*/
271
void vbr_destroy(VBRState *vbr)
275
#endif /* #ifndef DISABLE_VBR */