2
** FAAD2 - Freeware Advanced Audio (AAC) Decoder including SBR decoding
3
** Copyright (C) 2003-2004 M. Bakker, Ahead Software AG, http://www.nero.com
5
** This program is free software; you can redistribute it and/or modify
6
** it under the terms of the GNU General Public License as published by
7
** the Free Software Foundation; either version 2 of the License, or
8
** (at your option) any later version.
10
** This program is distributed in the hope that it will be useful,
11
** but WITHOUT ANY WARRANTY; without even the implied warranty of
12
** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
** GNU General Public License for more details.
15
** You should have received a copy of the GNU General Public License
16
** along with this program; if not, write to the Free Software
17
** Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
19
** Any non-GPL usage of this software or parts of this software is strictly
22
** Commercial non-GPL licensing of this software is possible.
23
** For more info contact Ahead Software through Mpeg4AAClicense@nero.com.
25
** $Id: filtbank.c,v 1.38 2004/06/30 12:45:56 menno Exp $
47
fb_info *filter_bank_init(uint16_t frame_len)
49
uint16_t nshort = frame_len/8;
51
uint16_t frame_len_ld = frame_len/2;
54
fb_info *fb = (fb_info*)faad_malloc(sizeof(fb_info));
55
memset(fb, 0, sizeof(fb_info));
58
fb->mdct256 = faad_mdct_init(2*nshort);
59
fb->mdct2048 = faad_mdct_init(2*frame_len);
62
fb->mdct1024 = faad_mdct_init(2*frame_len_ld);
65
#ifdef ALLOW_SMALL_FRAMELENGTH
66
if (frame_len == 1024)
69
fb->long_window[0] = sine_long_1024;
70
fb->short_window[0] = sine_short_128;
71
fb->long_window[1] = kbd_long_1024;
72
fb->short_window[1] = kbd_short_128;
74
fb->ld_window[0] = sine_mid_512;
75
fb->ld_window[1] = ld_mid_512;
77
#ifdef ALLOW_SMALL_FRAMELENGTH
78
} else /* (frame_len == 960) */ {
79
fb->long_window[0] = sine_long_960;
80
fb->short_window[0] = sine_short_120;
81
fb->long_window[1] = kbd_long_960;
82
fb->short_window[1] = kbd_short_120;
84
fb->ld_window[0] = sine_mid_480;
85
fb->ld_window[1] = ld_mid_480;
93
fb->if_func = ifilter_bank_sse;
95
fb->if_func = ifilter_bank;
102
void filter_bank_end(fb_info *fb)
107
printf("FB: %I64d cycles\n", fb->cycles);
110
faad_mdct_end(fb->mdct256);
111
faad_mdct_end(fb->mdct2048);
113
faad_mdct_end(fb->mdct1024);
120
static INLINE void imdct_long(fb_info *fb, real_t *in_data, real_t *out_data, uint16_t len)
123
mdct_info *mdct = NULL;
137
faad_imdct(mdct, in_data, out_data);
139
faad_imdct(fb->mdct2048, in_data, out_data);
144
static INLINE void imdct_long_sse(fb_info *fb, real_t *in_data, real_t *out_data, uint16_t len)
147
mdct_info *mdct = NULL;
161
faad_imdct_sse(mdct, in_data, out_data);
163
faad_imdct_sse(fb->mdct2048, in_data, out_data);
169
static INLINE void mdct(fb_info *fb, real_t *in_data, real_t *out_data, uint16_t len)
171
mdct_info *mdct = NULL;
191
faad_mdct(mdct, in_data, out_data);
195
void ifilter_bank(fb_info *fb, uint8_t window_sequence, uint8_t window_shape,
196
uint8_t window_shape_prev, real_t *freq_in,
197
real_t *time_out, real_t *overlap,
198
uint8_t object_type, uint16_t frame_len)
201
ALIGN real_t transf_buf[2*1024] = {0};
203
const real_t *window_long = NULL;
204
const real_t *window_long_prev = NULL;
205
const real_t *window_short = NULL;
206
const real_t *window_short_prev = NULL;
208
uint16_t nlong = frame_len;
209
uint16_t nshort = frame_len/8;
210
uint16_t trans = nshort/2;
212
uint16_t nflat_ls = (nlong-nshort)/2;
215
int64_t count = faad_get_ts();
218
/* select windows of current frame and previous frame (Sine or KBD) */
220
if (object_type == LD)
222
window_long = fb->ld_window[window_shape];
223
window_long_prev = fb->ld_window[window_shape_prev];
226
window_long = fb->long_window[window_shape];
227
window_long_prev = fb->long_window[window_shape_prev];
228
window_short = fb->short_window[window_shape];
229
window_short_prev = fb->short_window[window_shape_prev];
235
for (i = 0; i < 1024; i++)
237
printf("%d\n", freq_in[i]);
242
printf("%d %d\n", window_sequence, window_shape);
245
switch (window_sequence)
247
case ONLY_LONG_SEQUENCE:
249
imdct_long(fb, freq_in, transf_buf, 2*nlong);
251
/* add second half output of previous frame to windowed output of current frame */
252
for (i = 0; i < nlong; i+=4)
254
time_out[i] = overlap[i] + MUL_F(transf_buf[i],window_long_prev[i]);
255
time_out[i+1] = overlap[i+1] + MUL_F(transf_buf[i+1],window_long_prev[i+1]);
256
time_out[i+2] = overlap[i+2] + MUL_F(transf_buf[i+2],window_long_prev[i+2]);
257
time_out[i+3] = overlap[i+3] + MUL_F(transf_buf[i+3],window_long_prev[i+3]);
260
/* window the second half and save as overlap for next frame */
261
for (i = 0; i < nlong; i+=4)
263
overlap[i] = MUL_F(transf_buf[nlong+i],window_long[nlong-1-i]);
264
overlap[i+1] = MUL_F(transf_buf[nlong+i+1],window_long[nlong-2-i]);
265
overlap[i+2] = MUL_F(transf_buf[nlong+i+2],window_long[nlong-3-i]);
266
overlap[i+3] = MUL_F(transf_buf[nlong+i+3],window_long[nlong-4-i]);
270
case LONG_START_SEQUENCE:
272
imdct_long(fb, freq_in, transf_buf, 2*nlong);
274
/* add second half output of previous frame to windowed output of current frame */
275
for (i = 0; i < nlong; i+=4)
277
time_out[i] = overlap[i] + MUL_F(transf_buf[i],window_long_prev[i]);
278
time_out[i+1] = overlap[i+1] + MUL_F(transf_buf[i+1],window_long_prev[i+1]);
279
time_out[i+2] = overlap[i+2] + MUL_F(transf_buf[i+2],window_long_prev[i+2]);
280
time_out[i+3] = overlap[i+3] + MUL_F(transf_buf[i+3],window_long_prev[i+3]);
283
/* window the second half and save as overlap for next frame */
284
/* construct second half window using padding with 1's and 0's */
285
for (i = 0; i < nflat_ls; i++)
286
overlap[i] = transf_buf[nlong+i];
287
for (i = 0; i < nshort; i++)
288
overlap[nflat_ls+i] = MUL_F(transf_buf[nlong+nflat_ls+i],window_short[nshort-i-1]);
289
for (i = 0; i < nflat_ls; i++)
290
overlap[nflat_ls+nshort+i] = 0;
293
case EIGHT_SHORT_SEQUENCE:
294
/* perform iMDCT for each short block */
295
faad_imdct(fb->mdct256, freq_in+0*nshort, transf_buf+2*nshort*0);
296
faad_imdct(fb->mdct256, freq_in+1*nshort, transf_buf+2*nshort*1);
297
faad_imdct(fb->mdct256, freq_in+2*nshort, transf_buf+2*nshort*2);
298
faad_imdct(fb->mdct256, freq_in+3*nshort, transf_buf+2*nshort*3);
299
faad_imdct(fb->mdct256, freq_in+4*nshort, transf_buf+2*nshort*4);
300
faad_imdct(fb->mdct256, freq_in+5*nshort, transf_buf+2*nshort*5);
301
faad_imdct(fb->mdct256, freq_in+6*nshort, transf_buf+2*nshort*6);
302
faad_imdct(fb->mdct256, freq_in+7*nshort, transf_buf+2*nshort*7);
304
/* add second half output of previous frame to windowed output of current frame */
305
for (i = 0; i < nflat_ls; i++)
306
time_out[i] = overlap[i];
307
for(i = 0; i < nshort; i++)
309
time_out[nflat_ls+ i] = overlap[nflat_ls+ i] + MUL_F(transf_buf[nshort*0+i],window_short_prev[i]);
310
time_out[nflat_ls+1*nshort+i] = overlap[nflat_ls+nshort*1+i] + MUL_F(transf_buf[nshort*1+i],window_short[nshort-1-i]) + MUL_F(transf_buf[nshort*2+i],window_short[i]);
311
time_out[nflat_ls+2*nshort+i] = overlap[nflat_ls+nshort*2+i] + MUL_F(transf_buf[nshort*3+i],window_short[nshort-1-i]) + MUL_F(transf_buf[nshort*4+i],window_short[i]);
312
time_out[nflat_ls+3*nshort+i] = overlap[nflat_ls+nshort*3+i] + MUL_F(transf_buf[nshort*5+i],window_short[nshort-1-i]) + MUL_F(transf_buf[nshort*6+i],window_short[i]);
314
time_out[nflat_ls+4*nshort+i] = overlap[nflat_ls+nshort*4+i] + MUL_F(transf_buf[nshort*7+i],window_short[nshort-1-i]) + MUL_F(transf_buf[nshort*8+i],window_short[i]);
317
/* window the second half and save as overlap for next frame */
318
for(i = 0; i < nshort; i++)
321
overlap[nflat_ls+4*nshort+i-nlong] = MUL_F(transf_buf[nshort*7+i],window_short[nshort-1-i]) + MUL_F(transf_buf[nshort*8+i],window_short[i]);
322
overlap[nflat_ls+5*nshort+i-nlong] = MUL_F(transf_buf[nshort*9+i],window_short[nshort-1-i]) + MUL_F(transf_buf[nshort*10+i],window_short[i]);
323
overlap[nflat_ls+6*nshort+i-nlong] = MUL_F(transf_buf[nshort*11+i],window_short[nshort-1-i]) + MUL_F(transf_buf[nshort*12+i],window_short[i]);
324
overlap[nflat_ls+7*nshort+i-nlong] = MUL_F(transf_buf[nshort*13+i],window_short[nshort-1-i]) + MUL_F(transf_buf[nshort*14+i],window_short[i]);
325
overlap[nflat_ls+8*nshort+i-nlong] = MUL_F(transf_buf[nshort*15+i],window_short[nshort-1-i]);
327
for (i = 0; i < nflat_ls; i++)
328
overlap[nflat_ls+nshort+i] = 0;
331
case LONG_STOP_SEQUENCE:
333
imdct_long(fb, freq_in, transf_buf, 2*nlong);
335
/* add second half output of previous frame to windowed output of current frame */
336
/* construct first half window using padding with 1's and 0's */
337
for (i = 0; i < nflat_ls; i++)
338
time_out[i] = overlap[i];
339
for (i = 0; i < nshort; i++)
340
time_out[nflat_ls+i] = overlap[nflat_ls+i] + MUL_F(transf_buf[nflat_ls+i],window_short_prev[i]);
341
for (i = 0; i < nflat_ls; i++)
342
time_out[nflat_ls+nshort+i] = overlap[nflat_ls+nshort+i] + transf_buf[nflat_ls+nshort+i];
344
/* window the second half and save as overlap for next frame */
345
for (i = 0; i < nlong; i++)
346
overlap[i] = MUL_F(transf_buf[nlong+i],window_long[nlong-1-i]);
351
for (i = 0; i < 1024; i++)
353
//printf("%d\n", time_out[i]);
354
printf("0x%.8X\n", time_out[i]);
360
count = faad_get_ts() - count;
366
void ifilter_bank_sse(fb_info *fb, uint8_t window_sequence, uint8_t window_shape,
367
uint8_t window_shape_prev, real_t *freq_in,
368
real_t *time_out, uint8_t object_type, uint16_t frame_len)
371
ALIGN real_t transf_buf[2*1024] = {0};
373
const real_t *window_long = NULL;
374
const real_t *window_long_prev = NULL;
375
const real_t *window_short = NULL;
376
const real_t *window_short_prev = NULL;
378
uint16_t nlong = frame_len;
379
uint16_t nshort = frame_len/8;
380
uint16_t trans = nshort/2;
382
uint16_t nflat_ls = (nlong-nshort)/2;
385
int64_t count = faad_get_ts();
389
if (object_type == LD)
391
window_long = fb->ld_window[window_shape];
392
window_long_prev = fb->ld_window[window_shape_prev];
395
window_long = fb->long_window[window_shape];
396
window_long_prev = fb->long_window[window_shape_prev];
397
window_short = fb->short_window[window_shape];
398
window_short_prev = fb->short_window[window_shape_prev];
403
switch (window_sequence)
405
case ONLY_LONG_SEQUENCE:
406
imdct_long_sse(fb, freq_in, transf_buf, 2*nlong);
407
for (i = 0; i < nlong; i+=4)
409
__m128 m1, m2, m3, m4, m5, m6, m7, m8;
411
m1 = _mm_load_ps(&transf_buf[i]);
412
m2 = _mm_load_ps(&window_long_prev[i]);
413
m6 = _mm_load_ps(&window_long[nlong-4-i]);
414
m3 = _mm_load_ps(&time_out[nlong+i]);
415
m5 = _mm_load_ps(&transf_buf[nlong+i]);
417
m4 = _mm_mul_ps(m1, m2);
418
m7 = _mm_shuffle_ps(m6, m6, _MM_SHUFFLE(0, 1, 2, 3));
420
m4 = _mm_add_ps(m4, m3);
421
m8 = _mm_mul_ps(m5, m7);
423
_mm_store_ps(&time_out[i], m4);
424
_mm_store_ps(&time_out[nlong+i], m8);
428
case LONG_START_SEQUENCE:
429
imdct_long_sse(fb, freq_in, transf_buf, 2*nlong);
430
for (i = 0; i < nlong; i+=4)
432
__m128 m1 = _mm_load_ps(&transf_buf[i]);
433
__m128 m2 = _mm_load_ps(&window_long_prev[i]);
434
__m128 m3 = _mm_load_ps(&time_out[nlong+i]);
436
__m128 m4 = _mm_mul_ps(m1, m2);
437
m4 = _mm_add_ps(m4, m3);
439
_mm_store_ps(&time_out[i], m4);
441
for (i = 0; i < nflat_ls; i+=4)
443
__m128 m1 = _mm_load_ps(&transf_buf[nlong+i]);
444
_mm_store_ps(&time_out[nlong+i], m1);
446
for (i = 0; i < nshort; i+=4)
448
__m128 m1 = _mm_load_ps(&transf_buf[nlong+nflat_ls+i]);
449
__m128 m2 = _mm_load_ps(&window_short[nshort-4-i]);
452
m3 = _mm_shuffle_ps(m2, m2, _MM_SHUFFLE(0, 1, 2, 3));
454
m4 = _mm_mul_ps(m1, m3);
456
_mm_store_ps(&time_out[nlong+nflat_ls+i], m4);
458
for (i = 0; i < nflat_ls; i+=4)
460
__m128 m1 = _mm_setzero_ps();
461
_mm_store_ps(&time_out[nlong+nflat_ls+nshort+i], m1);
465
case EIGHT_SHORT_SEQUENCE:
466
faad_imdct_sse(fb->mdct256, &freq_in[0*nshort], &transf_buf[2*nshort*0]);
467
faad_imdct_sse(fb->mdct256, &freq_in[1*nshort], &transf_buf[2*nshort*1]);
468
faad_imdct_sse(fb->mdct256, &freq_in[2*nshort], &transf_buf[2*nshort*2]);
469
faad_imdct_sse(fb->mdct256, &freq_in[3*nshort], &transf_buf[2*nshort*3]);
470
faad_imdct_sse(fb->mdct256, &freq_in[4*nshort], &transf_buf[2*nshort*4]);
471
faad_imdct_sse(fb->mdct256, &freq_in[5*nshort], &transf_buf[2*nshort*5]);
472
faad_imdct_sse(fb->mdct256, &freq_in[6*nshort], &transf_buf[2*nshort*6]);
473
faad_imdct_sse(fb->mdct256, &freq_in[7*nshort], &transf_buf[2*nshort*7]);
474
for (i = 0; i < nflat_ls; i+=4)
476
__m128 m1 = _mm_load_ps(&time_out[nlong+i]);
477
_mm_store_ps(&time_out[i], m1);
479
for (i = 0; i < nshort; i+=4)
481
__m128 m1 = _mm_load_ps(&transf_buf[nshort*0+i]);
482
__m128 m2 = _mm_load_ps(&window_short_prev[i]);
483
__m128 m3 = _mm_load_ps(&time_out[nlong+nflat_ls+i]);
485
__m128 m4 = _mm_mul_ps(m1, m2);
486
m4 = _mm_add_ps(m4, m3);
488
_mm_store_ps(&time_out[nflat_ls+i], m4);
490
for (i = 0; i < nshort; i+=4)
492
__m128 m1, m2, m3, m4, m5, m6, m7, m8;
493
m1 = _mm_load_ps(&transf_buf[nshort*1+i]);
494
m2 = _mm_load_ps(&window_short[nshort-4-i]);
495
m3 = _mm_load_ps(&time_out[nlong+nflat_ls+nshort*1+i]);
496
m6 = _mm_load_ps(&transf_buf[nshort*2+i]);
497
m7 = _mm_load_ps(&window_short[i]);
499
m5 = _mm_shuffle_ps(m2, m2, _MM_SHUFFLE(0, 1, 2, 3));
501
m4 = _mm_mul_ps(m1, m5);
502
m8 = _mm_mul_ps(m6, m7);
503
m4 = _mm_add_ps(m4, m3);
504
m4 = _mm_add_ps(m4, m8);
506
_mm_store_ps(&time_out[nflat_ls+1*nshort+i], m4);
508
for (i = 0; i < nshort; i+=4)
510
__m128 m1, m2, m3, m4, m5, m6, m7, m8;
511
m1 = _mm_load_ps(&transf_buf[nshort*3+i]);
512
m2 = _mm_load_ps(&window_short[nshort-4-i]);
513
m3 = _mm_load_ps(&time_out[nlong+nflat_ls+nshort*2+i]);
514
m6 = _mm_load_ps(&transf_buf[nshort*4+i]);
515
m7 = _mm_load_ps(&window_short[i]);
517
m5 = _mm_shuffle_ps(m2, m2, _MM_SHUFFLE(0, 1, 2, 3));
519
m4 = _mm_mul_ps(m1, m5);
520
m8 = _mm_mul_ps(m6, m7);
521
m4 = _mm_add_ps(m4, m3);
522
m4 = _mm_add_ps(m4, m8);
524
_mm_store_ps(&time_out[nflat_ls+2*nshort+i], m4);
526
for (i = 0; i < nshort; i+=4)
528
__m128 m1, m2, m3, m4, m5, m6, m7, m8;
529
m1 = _mm_load_ps(&transf_buf[nshort*5+i]);
530
m2 = _mm_load_ps(&window_short[nshort-4-i]);
531
m3 = _mm_load_ps(&time_out[nlong+nflat_ls+nshort*3+i]);
532
m6 = _mm_load_ps(&transf_buf[nshort*6+i]);
533
m7 = _mm_load_ps(&window_short[i]);
535
m5 = _mm_shuffle_ps(m2, m2, _MM_SHUFFLE(0, 1, 2, 3));
537
m4 = _mm_mul_ps(m1, m5);
538
m8 = _mm_mul_ps(m6, m7);
539
m4 = _mm_add_ps(m4, m3);
540
m4 = _mm_add_ps(m4, m8);
542
_mm_store_ps(&time_out[nflat_ls+3*nshort+i], m4);
544
for(i = 0; i < trans; i+=4)
546
__m128 m1, m2, m3, m4, m5, m6, m7, m8;
547
m1 = _mm_load_ps(&transf_buf[nshort*7+i]);
548
m2 = _mm_load_ps(&window_short[nshort-4-i]);
549
m3 = _mm_load_ps(&time_out[nlong+nflat_ls+nshort*4+i]);
550
m6 = _mm_load_ps(&transf_buf[nshort*8+i]);
551
m7 = _mm_load_ps(&window_short[i]);
553
m5 = _mm_shuffle_ps(m2, m2, _MM_SHUFFLE(0, 1, 2, 3));
555
m4 = _mm_mul_ps(m1, m5);
556
m8 = _mm_mul_ps(m6, m7);
557
m4 = _mm_add_ps(m4, m3);
558
m4 = _mm_add_ps(m4, m8);
560
_mm_store_ps(&time_out[nflat_ls+4*nshort+i], m4);
562
for (i = trans; i < nshort; i+=4)
564
__m128 m1, m2, m3, m4, m5, m6, m7, m8;
565
m1 = _mm_load_ps(&transf_buf[nshort*7+i]);
566
m2 = _mm_load_ps(&window_short[nshort-4-i]);
567
m6 = _mm_load_ps(&transf_buf[nshort*8+i]);
568
m7 = _mm_load_ps(&window_short[i]);
570
m5 = _mm_shuffle_ps(m2, m2, _MM_SHUFFLE(0, 1, 2, 3));
572
m4 = _mm_mul_ps(m1, m5);
573
m8 = _mm_mul_ps(m6, m7);
574
m3 = _mm_add_ps(m4, m8);
576
_mm_store_ps(&time_out[nflat_ls+4*nshort+i], m3);
578
for (i = 0; i < nshort; i+=4)
580
__m128 m1, m2, m3, m4, m5, m6, m7, m8;
581
m1 = _mm_load_ps(&transf_buf[nshort*9+i]);
582
m2 = _mm_load_ps(&window_short[nshort-4-i]);
583
m6 = _mm_load_ps(&transf_buf[nshort*10+i]);
584
m7 = _mm_load_ps(&window_short[i]);
586
m5 = _mm_shuffle_ps(m2, m2, _MM_SHUFFLE(0, 1, 2, 3));
588
m4 = _mm_mul_ps(m1, m5);
589
m8 = _mm_mul_ps(m6, m7);
590
m3 = _mm_add_ps(m4, m8);
592
_mm_store_ps(&time_out[nflat_ls+5*nshort+i], m3);
594
for (i = 0; i < nshort; i+=4)
596
__m128 m1, m2, m3, m4, m5, m6, m7, m8;
597
m1 = _mm_load_ps(&transf_buf[nshort*11+i]);
598
m2 = _mm_load_ps(&window_short[nshort-4-i]);
599
m6 = _mm_load_ps(&transf_buf[nshort*12+i]);
600
m7 = _mm_load_ps(&window_short[i]);
602
m5 = _mm_shuffle_ps(m2, m2, _MM_SHUFFLE(0, 1, 2, 3));
604
m4 = _mm_mul_ps(m1, m5);
605
m8 = _mm_mul_ps(m6, m7);
606
m3 = _mm_add_ps(m4, m8);
608
_mm_store_ps(&time_out[nflat_ls+6*nshort+i], m3);
610
for (i = 0; i < nshort; i+=4)
612
__m128 m1, m2, m3, m4, m5, m6, m7, m8;
613
m1 = _mm_load_ps(&transf_buf[nshort*13+i]);
614
m2 = _mm_load_ps(&window_short[nshort-4-i]);
615
m6 = _mm_load_ps(&transf_buf[nshort*14+i]);
616
m7 = _mm_load_ps(&window_short[i]);
618
m5 = _mm_shuffle_ps(m2, m2, _MM_SHUFFLE(0, 1, 2, 3));
620
m4 = _mm_mul_ps(m1, m5);
621
m8 = _mm_mul_ps(m6, m7);
622
m3 = _mm_add_ps(m4, m8);
624
_mm_store_ps(&time_out[nflat_ls+7*nshort+i], m3);
626
for (i = 0; i < nshort; i+=4)
628
__m128 m1, m2, m3, m5;
629
m1 = _mm_load_ps(&transf_buf[nshort*15+i]);
630
m2 = _mm_load_ps(&window_short[nshort-4-i]);
632
m5 = _mm_shuffle_ps(m2, m2, _MM_SHUFFLE(0, 1, 2, 3));
634
m3 = _mm_mul_ps(m1, m5);
636
_mm_store_ps(&time_out[nflat_ls+8*nshort+i], m3);
638
for (i = 0; i < nflat_ls; i+=4)
640
__m128 m1 = _mm_setzero_ps();
641
_mm_store_ps(&time_out[nlong+nflat_ls+nshort+i], m1);
645
case LONG_STOP_SEQUENCE:
646
imdct_long_sse(fb, freq_in, transf_buf, 2*nlong);
647
for (i = 0; i < nflat_ls; i+=4)
649
__m128 m1 = _mm_load_ps(&time_out[nlong+i]);
650
_mm_store_ps(&time_out[i], m1);
652
for (i = 0; i < nshort; i+=4)
654
__m128 m1 = _mm_load_ps(&transf_buf[nflat_ls+i]);
655
__m128 m2 = _mm_load_ps(&window_short_prev[i]);
656
__m128 m3 = _mm_load_ps(&time_out[nlong+nflat_ls+i]);
658
__m128 m4 = _mm_mul_ps(m1, m2);
659
m4 = _mm_add_ps(m4, m3);
661
_mm_store_ps(&time_out[nflat_ls+i], m4);
663
for (i = 0; i < nflat_ls; i+=4)
665
__m128 m1 = _mm_load_ps(&transf_buf[nflat_ls+nshort+i]);
666
__m128 m2 = _mm_load_ps(&time_out[nlong+nflat_ls+nshort+i]);
668
__m128 m3 = _mm_add_ps(m1, m2);
670
_mm_store_ps(&time_out[nflat_ls+nshort+i], m3);
672
for (i = 0; i < nlong; i+=4)
674
__m128 m1 = _mm_load_ps(&transf_buf[nlong+i]);
675
__m128 m2 = _mm_load_ps(&window_long[nlong-4-i]);
678
m3 = _mm_shuffle_ps(m2, m2, _MM_SHUFFLE(0, 1, 2, 3));
680
m4 = _mm_mul_ps(m1, m3);
682
_mm_store_ps(&time_out[nlong+i], m4);
688
count = faad_get_ts() - count;
695
/* only works for LTP -> no overlapping, no short blocks */
696
void filter_bank_ltp(fb_info *fb, uint8_t window_sequence, uint8_t window_shape,
697
uint8_t window_shape_prev, real_t *in_data, real_t *out_mdct,
698
uint8_t object_type, uint16_t frame_len)
701
ALIGN real_t windowed_buf[2*1024] = {0};
703
const real_t *window_long = NULL;
704
const real_t *window_long_prev = NULL;
705
const real_t *window_short = NULL;
706
const real_t *window_short_prev = NULL;
708
uint16_t nlong = frame_len;
709
uint16_t nshort = frame_len/8;
710
uint16_t nflat_ls = (nlong-nshort)/2;
712
assert(window_sequence != EIGHT_SHORT_SEQUENCE);
715
if (object_type == LD)
717
window_long = fb->ld_window[window_shape];
718
window_long_prev = fb->ld_window[window_shape_prev];
721
window_long = fb->long_window[window_shape];
722
window_long_prev = fb->long_window[window_shape_prev];
723
window_short = fb->short_window[window_shape];
724
window_short_prev = fb->short_window[window_shape_prev];
729
switch(window_sequence)
731
case ONLY_LONG_SEQUENCE:
732
for (i = nlong-1; i >= 0; i--)
734
windowed_buf[i] = MUL_F(in_data[i], window_long_prev[i]);
735
windowed_buf[i+nlong] = MUL_F(in_data[i+nlong], window_long[nlong-1-i]);
737
mdct(fb, windowed_buf, out_mdct, 2*nlong);
740
case LONG_START_SEQUENCE:
741
for (i = 0; i < nlong; i++)
742
windowed_buf[i] = MUL_F(in_data[i], window_long_prev[i]);
743
for (i = 0; i < nflat_ls; i++)
744
windowed_buf[i+nlong] = in_data[i+nlong];
745
for (i = 0; i < nshort; i++)
746
windowed_buf[i+nlong+nflat_ls] = MUL_F(in_data[i+nlong+nflat_ls], window_short[nshort-1-i]);
747
for (i = 0; i < nflat_ls; i++)
748
windowed_buf[i+nlong+nflat_ls+nshort] = 0;
749
mdct(fb, windowed_buf, out_mdct, 2*nlong);
752
case LONG_STOP_SEQUENCE:
753
for (i = 0; i < nflat_ls; i++)
755
for (i = 0; i < nshort; i++)
756
windowed_buf[i+nflat_ls] = MUL_F(in_data[i+nflat_ls], window_short_prev[i]);
757
for (i = 0; i < nflat_ls; i++)
758
windowed_buf[i+nflat_ls+nshort] = in_data[i+nflat_ls+nshort];
759
for (i = 0; i < nlong; i++)
760
windowed_buf[i+nlong] = MUL_F(in_data[i+nlong], window_long[nlong-1-i]);
761
mdct(fb, windowed_buf, out_mdct, 2*nlong);