63
63
while (loop < bufsize)
66
* pre : mm6 = [buffratio<<16|buffratio<<16]
67
* post : mm0 = S + ((D-S)*buffratio)>>16 format [X|Y]
66
* pre : mm6 = [rat16|rat16]
67
* post : mm0 = S + ((D-S)*rat16 format [X|Y]
68
68
* modified = mm0,mm1,mm2
71
__asm__ __volatile__ (
74
: :"X"(brutS[loop]),"X"(brutD[loop])
77
psubd_r2r (mm0,mm1); /* mm1 = D - S */
78
movq_r2r (mm1, mm2); /* mm2 = D - S */
81
mmx_r2r (pmulhuw, mm6, mm1); /* mm1 = ?? */
82
pmullw_r2r (mm6, mm2);
84
paddd_r2r (mm2, mm1); /* mm1 = (D - S) * buffratio >> 16 */
87
paddd_r2r (mm1, mm0); /* mm0 = S + mm1 */
72
("#1 \n\t movq %0, %%mm0"
73
"#1 \n\t movq %1, %%mm1"
74
"#1 \n\t psubd %%mm0, %%mm1" /* mm1 = D - S */
75
"#1 \n\t movq %%mm1, %%mm2" /* mm2 = D - S */
76
"#1 \n\t pslld $16, %%mm1"
77
"#1 \n\t pmullw %%mm6, %%mm2"
78
"#1 \n\t pmulhuw %%mm6, %%mm1"
80
"#1 \n\t pslld $16, %%mm0"
81
"#1 \n\t paddd %%mm2, %%mm1" /* mm1 = (D - S) * buffratio >> 16 */
83
"#1 \n\t paddd %%mm1, %%mm0" /* mm0 = S + mm1 */
84
"#1 \n\t psrld $16, %%mm0"
91
91
* pre : mm0 : position vector on screen
93
93
* post : clipped mm0
94
94
* modified : mm0,mm1,mm2
96
movq_m2r (prevXY,mm1);
97
pcmpgtd_r2r (mm0, mm1); /* mm0 en X contient :
97
("#1 \n\t movq %0, %%mm1"
98
"#1 \n\t pcmpgtd %%mm0, %%mm1"
99
/* mm0 en X contient (idem pour Y) :
100
* 1111 si prevXY > px
101
* 0000 si prevXY <= px */
101
102
#ifdef STRICT_COMPAT
103
punpckhdq_r2r (mm2,mm2);
104
punpckldq_r2r (mm1,mm1);
103
"#1 \n\t movq %%mm1, %%mm2"
104
"#1 \n\t punpckhdq %%mm2, %%mm2"
105
"#1 \n\t punpckldq %%mm1, %%mm1"
106
"#1 \n\t pand %%mm2, %%mm0"
107
pand_r2r (mm1, mm0); /* on met a zero la partie qui deborde */
110
* pre : mm0 : clipped position on screen
112
* post : mm3 & mm4 : coefs for this position
113
* mm1 : X vector [0|X]
109
"#1 \n\t pand %%mm1, %%mm0" /* on met a zero la partie qui deborde */
113
* pre : mm0 : clipped position on screen
115
* post : mm3 : coefs for this position
116
* mm1 : X vector [0|X]
117
120
__asm__ __volatile__ (
130
"movd (%%ebx,%%eax,4),%%mm3\n"
131
::"g"(precalCoef):"eax","ebx");
121
"#2 \n\t movd %%mm0,%%esi"
122
"#2 \n\t movq %%mm0,%%mm1"
124
"#2 \n\t andl $15,%%esi"
125
"#2 \n\t psrlq $32,%%mm1"
127
"#2 \n\t shll $6,%%esi"
128
"#2 \n\t movd %%mm1,%%eax"
130
"#2 \n\t addl %0,%%esi"
131
"#2 \n\t andl $15,%%eax"
133
"#2 \n\t movd (%%esi,%%eax,4),%%mm3"
134
::"g"(precalCoef):"eax","esi");
134
* extraction des coefficients...
137
* extraction des coefficients... (Thread #3)
136
139
* pre : coef dans mm3
144
/* entrelace avec portion d'apres (cf les '^')
145
movq_r2r (mm3, mm5); / * ??-??-??-??-c4-c3-c2-c1 * /
146
punpcklbw_r2r (mm5, mm3); / * c4-c4-c3-c3-c2-c2-c1-c1 * /
147
movq_r2r (mm3, mm4); / * c4-c4-c3-c3-c2-c2-c1-c1 * /
148
movq_r2r (mm3, mm5); / * c4-c4-c3-c3-c2-c2-c1-c1 * /
150
punpcklbw_r2r (mm5, mm3); / * c2-c2-c2-c2-c1-c1-c1-c1 * /
151
punpckhbw_r2r (mm5, mm4); / * c4-c4-c4-c4-c3-c3-c3-c3 * /
155
148
* pre : mm0 : Y pos [*|Y]
156
149
* mm1 : X pos [*|X]
158
151
* post : mm0 : expix1[position]
159
152
* mm2 : expix1[position+largeur]
163
psrld_i2r (PERTEDEC,mm0);
164
psrld_i2r (PERTEDEC,mm1);
165
156
__asm__ __volatile__ (
167
/*^*/ "movq %%mm3,%%mm5\n" /*^*/
171
/*^*/ "punpcklbw %%mm5, %%mm3\n" /*^*/
174
/*^*/ "movq %%mm3,%%mm4\n" /*^*/
175
/*^*/ "movq %%mm3,%%mm5\n" /*^*/
178
/*^*/ "punpcklbw %%mm5,%%mm3\n" /*^*/
180
"movq (%%ebx,%%eax,4),%%mm0\n"
181
/*^*/ "punpckhbw %%mm5,%%mm4\n" /*^*/
184
"movq (%%ebx,%%eax,4),%%mm2\n"
186
: : "X"(expix1), "X"(prevX):"eax","ebx"
157
"#2 \n\t psrld $4, %%mm0"
158
"#2 \n\t psrld $4, %%mm1" /* PERTEDEC = $4 */
160
"#4 \n\t movd %%mm1,%%eax"
161
"#3 \n\t movq %%mm3,%%mm5"
164
"#4 \n\t movd %%mm0,%%esi"
166
"#3 \n\t punpcklbw %%mm5, %%mm3"
167
"#4 \n\t addl %%esi, %%eax"
169
"#3 \n\t movq %%mm3, %%mm4"
170
"#3 \n\t movq %%mm3, %%mm5"
172
"#4 \n\t movl %0, %%esi"
173
"#3 \n\t punpcklbw %%mm5, %%mm3"
175
"#4 \n\t movq (%%esi,%%eax,4),%%mm0"
176
"#3 \n\t punpckhbw %%mm5, %%mm4"
178
"#4 \n\t addl %1,%%eax"
179
"#4 \n\t movq (%%esi,%%eax,4),%%mm2"