2
* MMX optimized LPC DSP utils
3
* Copyright (c) 2007 Loren Merritt
5
* This file is part of FFmpeg.
7
* FFmpeg is free software; you can redistribute it and/or
8
* modify it under the terms of the GNU Lesser General Public
9
* License as published by the Free Software Foundation; either
10
* version 2.1 of the License, or (at your option) any later version.
12
* FFmpeg is distributed in the hope that it will be useful,
13
* but WITHOUT ANY WARRANTY; without even the implied warranty of
14
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15
* Lesser General Public License for more details.
17
* You should have received a copy of the GNU Lesser General Public
18
* License along with FFmpeg; if not, write to the Free Software
19
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22
#include "libavutil/x86_cpu.h"
23
#include "dsputil_mmx.h"
25
static void apply_welch_window_sse2(const int32_t *data, int len, double *w_data)
27
double c = 2.0 / (len-1.0);
29
x86_reg i = -n2*sizeof(int32_t);
30
x86_reg j = n2*sizeof(int32_t);
32
"movsd %0, %%xmm7 \n\t"
33
"movapd "MANGLE(ff_pd_1)", %%xmm6 \n\t"
34
"movapd "MANGLE(ff_pd_2)", %%xmm5 \n\t"
35
"movlhps %%xmm7, %%xmm7 \n\t"
36
"subpd %%xmm5, %%xmm7 \n\t"
37
"addsd %%xmm6, %%xmm7 \n\t"
40
#define WELCH(MOVPD, offset)\
43
"movapd %%xmm7, %%xmm1 \n\t"\
44
"mulpd %%xmm1, %%xmm1 \n\t"\
45
"movapd %%xmm6, %%xmm0 \n\t"\
46
"subpd %%xmm1, %%xmm0 \n\t"\
47
"pshufd $0x4e, %%xmm0, %%xmm1 \n\t"\
48
"cvtpi2pd (%3,%0), %%xmm2 \n\t"\
49
"cvtpi2pd "#offset"*4(%3,%1), %%xmm3 \n\t"\
50
"mulpd %%xmm0, %%xmm2 \n\t"\
51
"mulpd %%xmm1, %%xmm3 \n\t"\
52
"movapd %%xmm2, (%2,%0,2) \n\t"\
53
MOVPD" %%xmm3, "#offset"*8(%2,%1,2) \n\t"\
54
"subpd %%xmm5, %%xmm7 \n\t"\
59
:"r"(w_data+n2), "r"(data+n2)\
68
void ff_lpc_compute_autocorr_sse2(const int32_t *data, int len, int lag,
71
double tmp[len + lag + 2];
72
double *data1 = tmp + lag;
75
if((x86_reg)data1 & 15)
78
apply_welch_window_sse2(data, len, data1);
84
for(j=0; j<lag; j+=2){
85
x86_reg i = -len*sizeof(double);
88
"movsd "MANGLE(ff_pd_1)", %%xmm0 \n\t"
89
"movsd "MANGLE(ff_pd_1)", %%xmm1 \n\t"
90
"movsd "MANGLE(ff_pd_1)", %%xmm2 \n\t"
92
"movapd (%2,%0), %%xmm3 \n\t"
93
"movupd -8(%3,%0), %%xmm4 \n\t"
94
"movapd (%3,%0), %%xmm5 \n\t"
95
"mulpd %%xmm3, %%xmm4 \n\t"
96
"mulpd %%xmm3, %%xmm5 \n\t"
97
"mulpd -16(%3,%0), %%xmm3 \n\t"
98
"addpd %%xmm4, %%xmm1 \n\t"
99
"addpd %%xmm5, %%xmm0 \n\t"
100
"addpd %%xmm3, %%xmm2 \n\t"
103
"movhlps %%xmm0, %%xmm3 \n\t"
104
"movhlps %%xmm1, %%xmm4 \n\t"
105
"movhlps %%xmm2, %%xmm5 \n\t"
106
"addsd %%xmm3, %%xmm0 \n\t"
107
"addsd %%xmm4, %%xmm1 \n\t"
108
"addsd %%xmm5, %%xmm2 \n\t"
109
"movsd %%xmm0, (%1) \n\t"
110
"movsd %%xmm1, 8(%1) \n\t"
111
"movsd %%xmm2, 16(%1) \n\t"
113
:"r"(autoc+j), "r"(data1+len), "r"(data1+len-j)
118
"movsd "MANGLE(ff_pd_1)", %%xmm0 \n\t"
119
"movsd "MANGLE(ff_pd_1)", %%xmm1 \n\t"
121
"movapd (%3,%0), %%xmm3 \n\t"
122
"movupd -8(%4,%0), %%xmm4 \n\t"
123
"mulpd %%xmm3, %%xmm4 \n\t"
124
"mulpd (%4,%0), %%xmm3 \n\t"
125
"addpd %%xmm4, %%xmm1 \n\t"
126
"addpd %%xmm3, %%xmm0 \n\t"
129
"movhlps %%xmm0, %%xmm3 \n\t"
130
"movhlps %%xmm1, %%xmm4 \n\t"
131
"addsd %%xmm3, %%xmm0 \n\t"
132
"addsd %%xmm4, %%xmm1 \n\t"
133
"movsd %%xmm0, %1 \n\t"
134
"movsd %%xmm1, %2 \n\t"
135
:"+&r"(i), "=m"(autoc[j]), "=m"(autoc[j+1])
136
:"r"(data1+len), "r"(data1+len-j)