4
* Extended MMX prediction composition
5
* routines handling the four different interpolation cases...
7
* Copyright (C) 2000 Andrew Stevens <as@comlab.ox.ac.uk>
10
* This program is free software; you can reaxstribute it and/or
11
* modify it under the terms of the GNU General Public License
12
* as published by the Free Software Foundation; either version 2
13
* of the License, or (at your option) any later version.
15
* This program is distributed in the hope that it will be useful,
16
* but WITHOUT ANY WARRANTY; without even the implied warranty of
17
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18
* GNU General Public License for more details.
20
* You should have received a copy of the GNU General Public License
21
* along with this program; if not, write to the Free Software
22
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
27
#if defined( ARCH_X86) || defined(ARCH_X86_64)
28
#include "mjpeg_types.h"
32
* void predcomp_<ix><iy>_mmxe(char *src,char *dst,int lx, int w, int h, int mask);
34
* ix - Interpolation in x iy - Interpolation in y
39
/* The no interpolation case... */
41
void predcomp_00_mmxe(char *src,char *dst,int lx, int w, int h, int mask)
45
punpckldq_r2r(mm0, mm0);
46
pcmpeqd_r2r(mm2, mm0);
48
pcmpeqd_r2r(mm2, mm0);
51
movq_m2r(*src, mm4); /* first 8 bytes of row */
62
movq_m2r(*(src+8), mm4); /* first 8 bytes of row */
63
movq_m2r(*(dst+8), mm2);
69
movq_r2m(mm4, *(dst+8));
82
/* The x-axis interpolation case... */
84
void predcomp_10_mmxe(char *src,char *dst,int lx, int w, int h, int mask)
88
punpckldq_r2r(mm0, mm0);
89
pcmpeqd_r2r(mm2, mm0);
91
pcmpeqd_r2r(mm2, mm0);
94
movq_m2r(*src, mm4); /* first 8 bytes row: avg src in x */
95
pavgb_m2r(*(src+1), mm4);
101
pavgb_r2r(mm2, mm4); /* combine */
106
movq_m2r(*(src+8), mm4); /* first 8 bytes row: avg src in x */
107
pavgb_m2r(*(src+9), mm4);
108
movq_m2r(*(dst+8), mm2);
113
pavgb_r2r(mm2, mm4); /* combine */
114
movq_r2m(mm4, *(dst+8));
128
/* The x-axis and y-axis interpolation case... */
130
void predcomp_11_mmxe(char *src,char *dst,int lx, int w, int h, int mask)
133
/* mm2 = [0,0,0,0]W */
134
/* mm3 = [2,2,2,2]W */
136
movd_g2r(0x00020002, mm3);
137
punpckldq_r2r(mm3, mm3);
141
punpckldq_r2r(mm0, mm0);
142
pcmpeqd_r2r(mm2, mm0);
144
pcmpeqd_r2r(mm2, mm0);
147
movq_m2r(*src, mm4); /* mm4 and mm6 accumulate partial sums for interp. */
149
punpcklbw_r2r(mm2, mm4);
150
punpckhbw_r2r(mm2, mm6);
152
movq_m2r(*(src+1), mm5);
154
punpcklbw_r2r(mm2, mm5);
156
punpckhbw_r2r(mm2, mm7);
161
movq_m2r(*src, mm5); /* first 8 bytes 1st row: avg src in x */
163
punpcklbw_r2r(mm2, mm5);
165
punpckhbw_r2r(mm2, mm7);
168
movq_m2r(*(src+1), mm5);
170
punpcklbw_r2r(mm2, mm5);
172
punpckhbw_r2r(mm2, mm7);
175
/* Now round and repack... */
180
packuswb_r2r(mm6, mm4);
192
src -= lx; /* Back to 1st row */
194
movq_m2r(*(src+8), mm4); /* mm4 and mm6 accumulate partial sums for interp. */
196
punpcklbw_r2r(mm2, mm4);
197
punpckhbw_r2r(mm2, mm6);
199
movq_m2r(*(src+9), mm5);
201
punpcklbw_r2r(mm2, mm5);
203
punpckhbw_r2r(mm2, mm7);
208
movq_m2r(*(src+8), mm5); /* first 8 bytes 1st row: avg src in x */
210
punpcklbw_r2r(mm2, mm5);
212
punpckhbw_r2r(mm2, mm7);
215
movq_m2r(*(src+9), mm5);
217
punpcklbw_r2r(mm2, mm5);
219
punpckhbw_r2r(mm2, mm7);
222
/* Now round and repack... */
227
packuswb_r2r(mm6, mm4);
229
movq_m2r(*(dst+8), mm7);
235
movq_r2m(mm4, *(dst+8));
247
/* The y-axis interpolation case... */
249
void predcomp_01_mmxe(char *src,char *dst,int lx, int w, int h, int mask)
253
punpckldq_r2r(mm0, mm0);
254
pcmpeqd_r2r(mm2, mm0);
256
pcmpeqd_r2r(mm2, mm0);
259
movq_m2r(*src, mm4); /* first 8 bytes row */
260
src+=lx; /* update pointer to next row */
261
pavgb_m2r(*src, mm4); /* Average in y */
273
src-=lx; /* Back to prev row */
274
movq_m2r(*(src+8), mm4); /* first 8 bytes row */
275
src+=lx; /* update pointer to next row */
276
pavgb_m2r(*(src+8), mm4); /* Average in y */
278
movq_m2r(*(dst+8), mm2);
284
movq_r2m(mm4, *(dst+8));
287
dst+=lx; /* update pointer to next row */