19
19
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22
#include "libavutil/x86_cpu.h"
22
23
#include "dsputil_mmx.h"
24
25
static void apply_welch_window_sse2(const int32_t *data, int len, double *w_data)
26
27
double c = 2.0 / (len-1.0);
28
long i = -n2*sizeof(int32_t);
29
long j = n2*sizeof(int32_t);
31
"movsd %0, %%xmm7 \n\t"
32
"movapd %1, %%xmm6 \n\t"
33
"movapd %2, %%xmm5 \n\t"
34
"movlhps %%xmm7, %%xmm7 \n\t"
35
"subpd %%xmm5, %%xmm7 \n\t"
36
"addsd %%xmm6, %%xmm7 \n\t"
37
::"m"(c), "m"(*ff_pd_1), "m"(*ff_pd_2)
29
x86_reg i = -n2*sizeof(int32_t);
30
x86_reg j = n2*sizeof(int32_t);
32
"movsd %0, %%xmm7 \n\t"
33
"movapd "MANGLE(ff_pd_1)", %%xmm6 \n\t"
34
"movapd "MANGLE(ff_pd_2)", %%xmm5 \n\t"
35
"movlhps %%xmm7, %%xmm7 \n\t"
36
"subpd %%xmm5, %%xmm7 \n\t"
37
"addsd %%xmm6, %%xmm7 \n\t"
39
40
#define WELCH(MOVPD, offset)\
42
"movapd %%xmm7, %%xmm1 \n\t"\
43
"mulpd %%xmm1, %%xmm1 \n\t"\
44
"movapd %%xmm6, %%xmm0 \n\t"\
45
"subpd %%xmm1, %%xmm0 \n\t"\
46
"pshufd $0x4e, %%xmm0, %%xmm1 \n\t"\
47
"cvtpi2pd (%3,%0), %%xmm2 \n\t"\
48
"cvtpi2pd "#offset"*4(%3,%1), %%xmm3 \n\t"\
49
"mulpd %%xmm0, %%xmm2 \n\t"\
50
"mulpd %%xmm1, %%xmm3 \n\t"\
51
"movapd %%xmm2, (%2,%0,2) \n\t"\
43
"movapd %%xmm7, %%xmm1 \n\t"\
44
"mulpd %%xmm1, %%xmm1 \n\t"\
45
"movapd %%xmm6, %%xmm0 \n\t"\
46
"subpd %%xmm1, %%xmm0 \n\t"\
47
"pshufd $0x4e, %%xmm0, %%xmm1 \n\t"\
48
"cvtpi2pd (%3,%0), %%xmm2 \n\t"\
49
"cvtpi2pd "#offset"*4(%3,%1), %%xmm3 \n\t"\
50
"mulpd %%xmm0, %%xmm2 \n\t"\
51
"mulpd %%xmm1, %%xmm3 \n\t"\
52
"movapd %%xmm2, (%2,%0,2) \n\t"\
52
53
MOVPD" %%xmm3, "#offset"*8(%2,%1,2) \n\t"\
53
"subpd %%xmm5, %%xmm7 \n\t"\
54
"subpd %%xmm5, %%xmm7 \n\t"\
57
58
:"+&r"(i), "+&r"(j)\
58
59
:"r"(w_data+n2), "r"(data+n2)\
83
84
for(j=0; j<lag; j+=2){
84
long i = -len*sizeof(double);
85
x86_reg i = -len*sizeof(double);
87
"movsd %6, %%xmm0 \n\t"
88
"movsd %6, %%xmm1 \n\t"
89
"movsd %6, %%xmm2 \n\t"
91
"movapd (%4,%0), %%xmm3 \n\t"
92
"movupd -8(%5,%0), %%xmm4 \n\t"
93
"movapd (%5,%0), %%xmm5 \n\t"
94
"mulpd %%xmm3, %%xmm4 \n\t"
95
"mulpd %%xmm3, %%xmm5 \n\t"
96
"mulpd -16(%5,%0), %%xmm3 \n\t"
97
"addpd %%xmm4, %%xmm1 \n\t"
98
"addpd %%xmm5, %%xmm0 \n\t"
99
"addpd %%xmm3, %%xmm2 \n\t"
102
"movhlps %%xmm0, %%xmm3 \n\t"
103
"movhlps %%xmm1, %%xmm4 \n\t"
104
"movhlps %%xmm2, %%xmm5 \n\t"
105
"addsd %%xmm3, %%xmm0 \n\t"
106
"addsd %%xmm4, %%xmm1 \n\t"
107
"addsd %%xmm5, %%xmm2 \n\t"
108
"movsd %%xmm0, %1 \n\t"
109
"movsd %%xmm1, %2 \n\t"
110
"movsd %%xmm2, %3 \n\t"
88
"movsd "MANGLE(ff_pd_1)", %%xmm0 \n\t"
89
"movsd "MANGLE(ff_pd_1)", %%xmm1 \n\t"
90
"movsd "MANGLE(ff_pd_1)", %%xmm2 \n\t"
92
"movapd (%4,%0), %%xmm3 \n\t"
93
"movupd -8(%5,%0), %%xmm4 \n\t"
94
"movapd (%5,%0), %%xmm5 \n\t"
95
"mulpd %%xmm3, %%xmm4 \n\t"
96
"mulpd %%xmm3, %%xmm5 \n\t"
97
"mulpd -16(%5,%0), %%xmm3 \n\t"
98
"addpd %%xmm4, %%xmm1 \n\t"
99
"addpd %%xmm5, %%xmm0 \n\t"
100
"addpd %%xmm3, %%xmm2 \n\t"
103
"movhlps %%xmm0, %%xmm3 \n\t"
104
"movhlps %%xmm1, %%xmm4 \n\t"
105
"movhlps %%xmm2, %%xmm5 \n\t"
106
"addsd %%xmm3, %%xmm0 \n\t"
107
"addsd %%xmm4, %%xmm1 \n\t"
108
"addsd %%xmm5, %%xmm2 \n\t"
109
"movsd %%xmm0, %1 \n\t"
110
"movsd %%xmm1, %2 \n\t"
111
"movsd %%xmm2, %3 \n\t"
111
112
:"+&r"(i), "=m"(autoc[j]), "=m"(autoc[j+1]), "=m"(autoc[j+2])
112
:"r"(data1+len), "r"(data1+len-j), "m"(*ff_pd_1)
113
:"r"(data1+len), "r"(data1+len-j)
116
"movsd %5, %%xmm0 \n\t"
117
"movsd %5, %%xmm1 \n\t"
119
"movapd (%3,%0), %%xmm3 \n\t"
120
"movupd -8(%4,%0), %%xmm4 \n\t"
121
"mulpd %%xmm3, %%xmm4 \n\t"
122
"mulpd (%4,%0), %%xmm3 \n\t"
123
"addpd %%xmm4, %%xmm1 \n\t"
124
"addpd %%xmm3, %%xmm0 \n\t"
127
"movhlps %%xmm0, %%xmm3 \n\t"
128
"movhlps %%xmm1, %%xmm4 \n\t"
129
"addsd %%xmm3, %%xmm0 \n\t"
130
"addsd %%xmm4, %%xmm1 \n\t"
131
"movsd %%xmm0, %1 \n\t"
132
"movsd %%xmm1, %2 \n\t"
117
"movsd "MANGLE(ff_pd_1)", %%xmm0 \n\t"
118
"movsd "MANGLE(ff_pd_1)", %%xmm1 \n\t"
120
"movapd (%3,%0), %%xmm3 \n\t"
121
"movupd -8(%4,%0), %%xmm4 \n\t"
122
"mulpd %%xmm3, %%xmm4 \n\t"
123
"mulpd (%4,%0), %%xmm3 \n\t"
124
"addpd %%xmm4, %%xmm1 \n\t"
125
"addpd %%xmm3, %%xmm0 \n\t"
128
"movhlps %%xmm0, %%xmm3 \n\t"
129
"movhlps %%xmm1, %%xmm4 \n\t"
130
"addsd %%xmm3, %%xmm0 \n\t"
131
"addsd %%xmm4, %%xmm1 \n\t"
132
"movsd %%xmm0, %1 \n\t"
133
"movsd %%xmm1, %2 \n\t"
133
134
:"+&r"(i), "=m"(autoc[j]), "=m"(autoc[j+1])
134
:"r"(data1+len), "r"(data1+len-j), "m"(*ff_pd_1)
135
:"r"(data1+len), "r"(data1+len-j)