57
57
" punpcklwd %%mm4, "#s" \n\t" /* .. | 0 | p0 | */ \
58
58
" pcmpgtw "#v", %%mm4 \n\t" /* .. | 0 | s(vl) | */ \
59
59
" pand "#s", %%mm4 \n\t" /* .. | 0 | (p0) | (vl >> 15) & p */ \
60
" movq %%mm6, %%mm5 \n\t" /* .. | ffff | 0 | */ \
61
" pand "#v", %%mm5 \n\t" /* .. | vh | 0 | */ \
62
" por %%mm5, %%mm4 \n\t" /* .. | vh | (p0) | */ \
63
" pmulhw "#s", "#v" \n\t" /* .. | 0 | vl*p0 | */ \
64
" paddw %%mm4, "#v" \n\t" /* .. | vh | vl*p0 | vh + sign correct */ \
65
" pslld $16, "#s" \n\t" /* .. | p0 | 0 | */ \
66
" por %%mm7, "#s" \n\t" /* .. | p0 | 1 | */ \
67
" pmaddwd "#s", "#v" \n\t" /* .. | p0 * v0 | */ \
60
" movq "#s", %%mm5 \n\t" \
61
" pmulhw "#v", "#s" \n\t" /* .. | 0 | vl*p0 | */ \
62
" paddw %%mm4, "#s" \n\t" /* .. | 0 | vl*p0 | + sign correct */ \
63
" psrld $16, "#v" \n\t" /* .. | 0 | vh | */ \
64
" pmaddwd %%mm5, "#v" \n\t" /* .. | p0 * vh | */ \
65
" paddd "#s", "#v" \n\t" /* .. | p0 * v0 | */ \
68
66
" packssdw "#v", "#v" \n\t" /* .. | p1*v1 | p0*v0 | */
70
68
/* approximately advances %3 = (%3 + a) % b. This function requires that
105
103
__asm__ __volatile__ (
106
104
" xor %3, %3 \n\t"
107
105
" sar $1, %2 \n\t" /* length /= sizeof (int16_t) */
108
" pcmpeqw %%mm6, %%mm6 \n\t" /* .. | ffff | ffff | */
109
" pcmpeqw %%mm7, %%mm7 \n\t" /* .. | ffff | ffff | */
110
" pslld $16, %%mm6 \n\t" /* .. | ffff | 0 | */
111
" psrld $31, %%mm7 \n\t" /* .. | 0 | 1 | */
113
107
" test $1, %2 \n\t" /* check for odd samples */