1
/////////////////////////////////////////////////////////////////////////////
3
/////////////////////////////////////////////////////////////////////////////
4
// Copyright (c) 2000 Tom Barry All rights reserved.
5
/////////////////////////////////////////////////////////////////////////////
7
// This file is subject to the terms of the GNU General Public License as
8
// published by the Free Software Foundation. A copy of this license is
9
// included with this software distribution in the file COPYING. If you
10
// do not have a copy, you may obtain a copy by writing to the Free
11
// Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
13
// This software is distributed in the hope that it will be useful,
14
// but WITHOUT ANY WARRANTY; without even the implied warranty of
15
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16
// GNU General Public License for more details
17
/////////////////////////////////////////////////////////////////////////////
20
#define MAINLOOP_LABEL DoNext8Bytes_SSE
21
#elif defined(IS_3DNOW)
22
#define MAINLOOP_LABEL DoNext8Bytes_3DNow
24
#define MAINLOOP_LABEL DoNext8Bytes_MMX
28
// This is a simple lightweight DeInterlace method that uses little CPU time
29
// but gives very good results for low or intermedite motion.
30
// It defers frames by one field, but that does not seem to produce noticeable
33
// The method used is to take either the older or newer weave pixel depending
34
// upon which give the smaller comb factor, and then clip to avoid large damage
37
// I'd intended this to be part of a larger more elaborate method added to
38
// Blended Clip but this give too good results for the CPU to ignore here.
41
BOOL DeinterlaceGreedyH_SSE(DEINTERLACE_INFO *info)
42
#elif defined(IS_3DNOW)
43
BOOL DeinterlaceGreedyH_3DNOW(DEINTERLACE_INFO *info)
45
BOOL DeinterlaceGreedyH_MMX(DEINTERLACE_INFO *info)
50
short* L1; // ptr to Line1, of 3
51
short* L2; // ptr to Line2, the weave line
52
short* L3; // ptr to Line3
53
short* LP2; // ptr to prev Line2
55
BYTE *lpCurOverlay = info->Overlay;
56
short **pOddLines = info->OddLines[0];
57
short **pEvenLines = info->EvenLines[0];
58
short **pPrevLines = info->IsOdd ? info->OddLines[1] : info->EvenLines[1];
60
const __int64 ShiftMask = 0xfefffefffefffeffLL; // to avoid shifting chroma to luma
65
i = GreedyMaxComb; // How badly do we let it weave? 0-255
66
MaxComb = i << 56 | i << 48 | i << 40 | i << 32 | i << 24 | i << 16 | i << 8 | i;
69
if (pOddLines == NULL || pEvenLines == NULL || pPrevLines == NULL)
72
// copy first even line no matter what, and the first odd line if we're
73
// processing an EVEN field. (note diff from other deint rtns.)
74
info->pMemcpy(lpCurOverlay, pEvenLines[0], info->LineLength); // DL0
76
info->pMemcpy(lpCurOverlay + info->OverlayPitch, pOddLines[0], info->LineLength); // DL1
77
for (Line = 0; Line < (info->FieldHeight - 1); ++Line)
80
LoopCtr = info->LineLength / 8; // there are LineLength / 8 qwords per line
84
L1 = pEvenLines[Line];
86
L3 = pEvenLines[Line + 1];
87
LP2 = pPrevLines[Line]; // prev Odd lines
88
Dest = lpCurOverlay + (Line * 2 + 1) * info->OverlayPitch; // DL1
92
L1 = pOddLines[Line] ;
93
L2 = pEvenLines[Line + 1];
94
L3 = pOddLines[Line + 1];
95
LP2 = pPrevLines[Line + 1]; // prev even lines
96
Dest = lpCurOverlay + (Line * 2 + 2) * info->OverlayPitch; // DL2
98
info->pMemcpy(Dest + info->OverlayPitch, L3, info->LineLength);
100
// For ease of reading, the comments below assume that we're operating on an odd
101
// field (i.e., that info->IsOdd is true). Assume the obvious for even lines..
107
" mov esi, %[LP2] \n"
108
" mov edi, %[Dest] ## DL1 if Odd or DL2 if Even \n"
111
_strf(MAINLOOP_LABEL) ": \n"
112
" movq mm1, qword ptr[eax] ## L1\n"
113
" movq mm2, qword ptr[ebx] ## L2\n"
114
" movq mm3, qword ptr[edx] ## L3\n"
115
" movq mm0, qword ptr[esi] ## LP2\n"
117
" ## average L1 and L3 leave result in mm4\n"
118
" movq mm4, mm1 ## L1\n"
121
#elif defined(IS_3DNOW)
122
" pavgusb mm4, mm3\n"
124
" pand mm4, %[ShiftMask] ## "\n"
126
" movq mm5, mm3 ## L3\n"
127
" pand mm5, %[ShiftMask] ## "\n"
129
" paddb mm4, mm5 ## the average, for computing comb\n"
132
"## get abs value of possible L2 comb\n"
133
" movq mm7, mm2 ## L2\n"
134
" psubusb mm7, mm4 ## L2 - avg\n"
135
" movq mm5, mm4 ## avg\n"
136
" psubusb mm5, mm2 ## avg - L2\n"
137
" por mm5, mm7 ## abs(avg-L2)\n"
138
" movq mm6, mm4 ## copy of avg for later\n"
140
"## get abs value of possible LP2 comb\n"
141
" movq mm7, mm0 ## LP2\n"
142
" psubusb mm7, mm4 ## LP2 - avg\n"
143
" psubusb mm4, mm0 ## avg - LP2\n"
144
" por mm4, mm7 ## abs(avg-LP2)\n"
146
"## use L2 or LP2 depending upon which makes smaller comb\n"
147
" psubusb mm4, mm5 ## see if it goes to zero\n"
148
" psubusb mm5, mm5 ## 0\n"
149
" pcmpeqb mm4, mm5 ## if (mm4=0) then FF else 0\n"
150
" pcmpeqb mm5, mm4 ## opposite of mm4\n"
152
"## if Comb(LP2) <= Comb(L2) then mm4=ff, mm5=0 else mm4=0, mm5 = 55\n"
153
" pand mm5, mm2 ## use L2 if mm5 == ff, else 0\n"
154
" pand mm4, mm0 ## use LP2 if mm4 = ff, else 0\n"
155
" por mm4, mm5 ## may the best win\n"
157
"## Now lets clip our chosen value to be not outside of the range\n"
158
"## of the high/low range L1-L3 by more than abs(L1-L3)\n"
159
"## This allows some comb but limits the damages and also allows more\n"
160
"## detail than a boring oversmoothed clip.\n"
162
" movq mm2, mm1 ## copy L1\n"
163
" psubusb mm2, mm3 ## - L3, with saturation\n"
164
" paddusb mm2, mm3 ## now = Max(L1,L3)\n"
166
" pcmpeqb mm7, mm7 ## all ffffffff\n"
167
" psubusb mm7, mm1 ## - L1 \n"
168
" paddusb mm3, mm7 ## add, may sat at fff..\n"
169
" psubusb mm3, mm7 ## now = Min(L1,L3)\n"
171
"## allow the value to be above the high or below the low by amt of MaxComb\n"
172
" paddusb mm2, %[MaxComb] ## increase max by diff\n"
173
" psubusb mm3, %[MaxComb] ## lower min by diff\n"
175
" psubusb mm4, mm3 ## best - Min\n"
176
" paddusb mm4, mm3 ## now = Max(best,Min(L1,L3)\n"
178
" pcmpeqb mm7, mm7 ## all ffffffff\n"
179
" psubusb mm7, mm4 ## - Max(best,Min(best,L3) \n"
180
" paddusb mm2, mm7 ## add may sat at FFF..\n"
181
" psubusb mm2, mm7 ## now = Min( Max(best, Min(L1,L3), L2 )=L2 clipped\n"
184
" movntq qword ptr[edi], mm2 ## move in our clipped best\n"
186
" movq qword ptr[edi], mm2 ## move in our clipped best\n"
189
"## bump ptrs and loop\n"
190
" lea eax,[eax+8] \n"
193
" lea edi,[edi+8] \n"
196
" jnz " _strf(MAINLOOP_LABEL) "\n"
198
_m(L1), _m(L2), _m(L3), _m(LP2), _m(Dest), _m(ShiftMask), _m(MaxComb), _m(LoopCtr)
199
: "eax", "edx", "esi", "edi");
201
// Copy last odd line if we're processing an Odd field.
204
info->pMemcpy(lpCurOverlay + (info->FrameHeight - 1) * info->OverlayPitch,
205
pOddLines[info->FieldHeight - 1],
209
// clear out the MMX registers ready for doing floating point
218
// #undef MAINLOOP_LABEL