1
/* Copyright (C) 2005 Jean-Marc Valin, CSIRO, Christopher Montgomery
4
Redistribution and use in source and binary forms, with or without
5
modification, are permitted provided that the following conditions
8
- Redistributions of source code must retain the above copyright
9
notice, this list of conditions and the following disclaimer.
11
- Redistributions in binary form must reproduce the above copyright
12
notice, this list of conditions and the following disclaimer in the
13
documentation and/or other materials provided with the distribution.
15
- Neither the name of the Xiph.org Foundation nor the names of its
16
contributors may be used to endorse or promote products derived from
17
this software without specific prior written permission.
19
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
23
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
24
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
25
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
26
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
27
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
28
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
29
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
41
#include "vorbis_psy.h"
47
/* psychoacoustic setup ********************************************/
49
static VorbisPsyInfo example_tuning = {
54
/*63 125 250 500 1k 2k 4k 8k 16k*/
55
// vorbis mode 4 style
56
//{-32,-32,-32,-32,-28,-24,-22,-20,-20, -20, -20, -8, -6, -6, -6, -6, -6},
57
{ -4, -6, -6, -6, -6, -6, -6, -6, -8, -8,-10,-10, -8, -6, -4, -4, -2},
60
0, 1, 2, 3, 4, 5, 5, 5, /* 7dB */
61
6, 6, 6, 5, 4, 4, 4, 4, /* 15dB */
62
4, 4, 5, 5, 5, 6, 6, 6, /* 23dB */
63
7, 7, 7, 8, 8, 8, 9, 10, /* 31dB */
64
11,12,13,14,15,16,17, 18, /* 39dB */
71
/* there was no great place to put this.... */
73
static void _analysis_output(char *base,int i,float *v,int n,int bark,int dB){
78
sprintf(buffer,"%s_%d.m",base,i);
81
if(!of)perror("failed to open data dump file");
85
float b=toBARK((4000.f*j/n)+.25);
88
fprintf(of,"%f ",(double)j);
96
fprintf(of,"%f\n",val);
98
fprintf(of,"%f\n",v[j]);
104
static void bark_noise_hybridmp(int n,const long *b,
110
float *N=alloca(n*sizeof(*N));
111
float *X=alloca(n*sizeof(*N));
112
float *XX=alloca(n*sizeof(*N));
113
float *Y=alloca(n*sizeof(*N));
114
float *XY=alloca(n*sizeof(*N));
116
float tN, tX, tXX, tY, tXY;
123
tN = tX = tXX = tY = tXY = 0.f;
126
if (y < 1.f) y = 1.f;
140
for (i = 1, x = 1.f; i < n; i++, x += 1.f) {
143
if (y < 1.f) y = 1.f;
160
for (i = 0, x = 0.f;; i++, x += 1.f) {
168
tXX = XX[hi] + XX[-lo];
170
tXY = XY[hi] - XY[-lo];
172
A = tY * tXX - tX * tXY;
173
B = tN * tXY - tX * tY;
174
D = tN * tXX - tX * tX;
179
noise[i] = R - offset;
182
for ( ;; i++, x += 1.f) {
190
tXX = XX[hi] - XX[lo];
192
tXY = XY[hi] - XY[lo];
194
A = tY * tXX - tX * tXY;
195
B = tN * tXY - tX * tY;
196
D = tN * tXX - tX * tX;
198
if (R < 0.f) R = 0.f;
200
noise[i] = R - offset;
202
for ( ; i < n; i++, x += 1.f) {
205
if (R < 0.f) R = 0.f;
207
noise[i] = R - offset;
210
if (fixed <= 0) return;
212
for (i = 0, x = 0.f;; i++, x += 1.f) {
219
tXX = XX[hi] + XX[-lo];
221
tXY = XY[hi] - XY[-lo];
224
A = tY * tXX - tX * tXY;
225
B = tN * tXY - tX * tY;
226
D = tN * tXX - tX * tX;
229
if (R - offset < noise[i]) noise[i] = R - offset;
231
for ( ;; i++, x += 1.f) {
239
tXX = XX[hi] - XX[lo];
241
tXY = XY[hi] - XY[lo];
243
A = tY * tXX - tX * tXY;
244
B = tN * tXY - tX * tY;
245
D = tN * tXX - tX * tX;
248
if (R - offset < noise[i]) noise[i] = R - offset;
250
for ( ; i < n; i++, x += 1.f) {
252
if (R - offset < noise[i]) noise[i] = R - offset;
256
static void _vp_noisemask(VorbisPsy *p,
261
float *work=alloca(n*sizeof(*work));
263
bark_noise_hybridmp(n,p->bark,logfreq,logmask,
266
for(i=0;i<n;i++)work[i]=logfreq[i]-logmask[i];
268
bark_noise_hybridmp(n,p->bark,work,logmask,0.,
269
p->vi->noisewindowfixed);
271
for(i=0;i<n;i++)work[i]=logfreq[i]-work[i];
278
work2[i]=logmask[i]+work[i];
281
//_analysis_output("logfreq",seq,logfreq,n,0,0);
282
//_analysis_output("median",seq,work,n,0,0);
283
//_analysis_output("envelope",seq,work2,n,0,0);
288
int dB=logmask[i]+.5;
289
if(dB>=NOISE_COMPAND_LEVELS)dB=NOISE_COMPAND_LEVELS-1;
291
logmask[i]= work[i]+p->vi->noisecompand[dB]+p->noiseoffset[i];
296
VorbisPsy *vorbis_psy_init(int rate, int n)
298
long i,j,lo=-99,hi=1;
299
VorbisPsy *p = speex_alloc(sizeof(VorbisPsy));
300
memset(p,0,sizeof(*p));
303
spx_drft_init(&p->lookup, n);
304
p->bark = speex_alloc(n*sizeof(*p->bark));
306
p->vi = &example_tuning;
309
p->window = speex_alloc(sizeof(*p->window)*n);
315
p->window[i] = //a0 - a1*cos(2.*M_PI/n*(i+.5)) + a2*cos(4.*M_PI/n*(i+.5)) - a3*cos(6.*M_PI/n*(i+.5));
316
sin((i+.5)/n * M_PI)*sin((i+.5)/n * M_PI);
317
/* bark scale lookups */
319
float bark=toBARK(rate/(2*n)*i);
321
for(;lo+p->vi->noisewindowlomin<i &&
322
toBARK(rate/(2*n)*lo)<(bark-p->vi->noisewindowlo);lo++);
324
for(;hi<=n && (hi<i+p->vi->noisewindowhimin ||
325
toBARK(rate/(2*n)*hi)<(bark+p->vi->noisewindowhi));hi++);
327
p->bark[i]=((lo-1)<<16)+(hi-1);
331
/* set up rolling noise median */
332
p->noiseoffset=speex_alloc(n*sizeof(*p->noiseoffset));
335
float halfoc=toOC((i+.5)*rate/(2.*n))*2.;
339
if(halfoc<0)halfoc=0;
340
if(halfoc>=P_BANDS-1)halfoc=P_BANDS-1;
341
inthalfoc=(int)halfoc;
342
del=halfoc-inthalfoc;
345
p->vi->noiseoff[inthalfoc]*(1.-del) +
346
p->vi->noiseoff[inthalfoc+1]*del;
350
_analysis_output_always("noiseoff0",ls,p->noiseoffset,n,1,0,0);
356
void vorbis_psy_destroy(VorbisPsy *p)
359
spx_drft_clear(&p->lookup);
363
speex_free(p->noiseoffset);
365
speex_free(p->window);
366
memset(p,0,sizeof(*p));
371
void compute_curve(VorbisPsy *psy, float *audio, float *curve)
376
float scale=4.f/psy->n;
379
scale_dB=todB(scale);
381
/* window the PCM data; use a BH4 window, not vorbis */
382
for(i=0;i<psy->n;i++)
383
work[i]=audio[i] * psy->window[i];
388
//_analysis_output("win",seq,work,psy->n,0,0);
393
/* FFT yields more accurate tonal estimation (not phase sensitive) */
394
spx_drft_forward(&psy->lookup,work);
397
work[0]=scale_dB+todB(work[0]);
398
for(i=1;i<psy->n-1;i+=2){
399
float temp = work[i]*work[i] + work[i+1]*work[i+1];
400
work[(i+1)>>1] = scale_dB+.5f * todB(temp);
403
/* derive a noise curve */
404
_vp_noisemask(psy,work,curve);
406
for (i=0;i<SIDEL;i++)
408
curve[i]=curve[SIDEL];
411
for (i=0;i<SIDEH;i++)
413
curve[(psy->n>>1)-i-1]=curve[(psy->n>>1)-SIDEH];
415
for(i=0;i<((psy->n)>>1);i++)
416
curve[i] = fromdB(1.2*curve[i]+.2*i);
417
//curve[i] = fromdB(0.8*curve[i]+.35*i);
418
//curve[i] = fromdB(0.9*curve[i])*pow(1.0*i+45,1.3);
421
/* Transform a masking curve (power spectrum) into a pole-zero filter */
422
void curve_to_lpc(VorbisPsy *psy, float *curve, float *awk1, float *awk2, int ord)
427
int len = psy->n >> 1;
428
for (i=0;i<2*len;i++)
431
ac[2*i-1] = curve[i];
433
ac[2*len-1] = curve[len-1];
435
spx_drft_backward(&psy->lookup, ac);
436
_spx_lpc(awk1, ac, ord);
447
/* Use the second (awk2) filter to correct the first one */
448
for (i=0;i<2*len;i++)
453
spx_drft_forward(&psy->lookup, ac);
454
/* Compute (power) response of awk1 (all zero) */
457
ac[i] = ac[2*i-1]*ac[2*i-1] + ac[2*i]*ac[2*i];
458
ac[len] = ac[2*len-1]*ac[2*len-1];
459
/* Compute correction required */
461
curve[i] = 1. / (1e-6f+curve[i]*ac[i]);
463
for (i=0;i<2*len;i++)
466
ac[2*i-1] = curve[i];
468
ac[2*len-1] = curve[len-1];
470
spx_drft_backward(&psy->lookup, ac);
471
_spx_lpc(awk2, ac, ord);
486
#define CURVE_SIZE 24
491
float curve[CURVE_SIZE];
492
float awk1[ORDER], awk2[ORDER];
493
for (i=0;i<CURVE_SIZE;i++)
494
scanf("%f ", &curve[i]);
495
for (i=0;i<CURVE_SIZE;i++)
496
curve[i] = pow(10.f, .1*curve[i]);
497
curve_to_lpc(curve, CURVE_SIZE, awk1, awk2, ORDER);
498
for (i=0;i<ORDER;i++)
499
printf("%f ", awk1[i]);
501
for (i=0;i<ORDER;i++)
502
printf("%f ", awk2[i]);