1
/////////////////////////////////////////////////////////////////////////////
2
// $Id: DI_GreedyHF.asm,v 1.2 2005/01/20 01:38:33 mschimek Exp $
3
/////////////////////////////////////////////////////////////////////////////
4
// Copyright (c) 2001 Tom Barry. All rights reserved.
5
/////////////////////////////////////////////////////////////////////////////
7
// This file is subject to the terms of the GNU General Public License as
8
// published by the Free Software Foundation. A copy of this license is
9
// included with this software distribution in the file COPYING. If you
10
// do not have a copy, you may obtain a copy by writing to the Free
11
// Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
13
// This software is distributed in the hope that it will be useful,
14
// but WITHOUT ANY WARRANTY; without even the implied warranty of
15
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16
// GNU General Public License for more details
18
/////////////////////////////////////////////////////////////////////////////
21
// Date Developer Changes
23
// 01 Feb 2001 Tom Barry New Greedy (High Motion)Deinterlace method
24
// 29 Jul 2001 Tom Barry Add 3DNOW, MMX support, create .asm mem
25
/////////////////////////////////////////////////////////////////////////////
28
// $Log: DI_GreedyHF.asm,v $
29
// Revision 1.2 2005/01/20 01:38:33 mschimek
30
// *** empty log message ***
32
// Revision 1.1 2005/01/08 14:54:23 mschimek
33
// *** empty log message ***
35
// Revision 1.3 2001/11/25 04:33:37 trbarry
36
// Fix for TDeinterlace_Info. Also release UN-Filter code, 5-tap V & H sharp/soft filters optimized to reverse excessive filtering (or EE?)
38
// Revision 1.2 2001/07/31 13:34:46 trbarry
39
// Add missing end of line range check
41
// Revision 1.1 2001/07/30 21:50:32 trbarry
42
// Use weave chroma for reduced chroma jitter. Fix DJR bug again.
43
// Turn off Greedy Pulldown default.
45
// Revision 1.5 2001/07/30 18:18:59 trbarry
48
// Revision 1.3 2001/07/28 18:47:24 trbarry
49
// Fix Sharpness with Median Filter
50
// Increase Sharpness default to make obvious
51
// Adjust deinterlace defaults for less jitter
53
// Revision 1.2 2001/07/25 12:04:31 adcockj
54
// Moved Control stuff into DS_Control.h
55
// Added $Id and $Log to comment blocks as per standards
57
/////////////////////////////////////////////////////////////////////////////
59
// FUNCT_NAME must be defined before include
60
BOOL FUNCT_NAME(TDeinterlaceInfo* pInfo)
62
#include "DI_GreedyHM2.h"
65
DWORD Pitch = pInfo->InputPitch;
67
/*>>>> short* L1; // ptr to Line1, of 3
68
short* L2; // ptr to Line2, the weave line
69
short* L3; // ptr to Line3
70
short* L2P; // ptr to prev Line2
72
BYTE* L1; // ptr to Line1, of 3
73
BYTE* L2; // ptr to Line2, the weave line
74
BYTE* L3; // ptr to Line3
76
BYTE* L2P; // ptr to prev Line2
77
BYTE* Dest = pInfo->Overlay;
80
__int64 LastAvg=0; //interp value from left qword
84
QW256B = i << 48 | i << 32 | i << 16 | i; // save a couple instr on PMINSW instruct.
86
//>>> if (pOddLines == NULL || pEvenLines == NULL || pPrevLines == NULL)
89
// copy first even line no matter what, and the first odd line if we're
90
// processing an EVEN field. (note diff from other deint rtns.)
92
/* >>> pMemcpy(lpCurOverlay, pEvenLines[0], LineLength); // DL0
94
pMemcpy(lpCurOverlay + OverlayPitch, pOddLines[0], LineLength); // DL1
95
for (Line = 0; Line < (FieldHeight - 1); ++Line)
97
LoopCtr = LineLength / 8 - 1; // there are LineLength / 8 qwords per line
98
// but do 1 less, adj at end of loop
101
L1 = pEvenLines[Line];
102
L2 = pOddLines[Line];
103
L3 = pEvenLines[Line + 1];
104
L2P = pPrevLines[Line]; // prev Odd lines
105
Dest = lpCurOverlay + (Line * 2 + 1) * OverlayPitch; // DL1
109
L1 = pOddLines[Line] ;
110
L2 = pEvenLines[Line + 1];
111
L3 = pOddLines[Line + 1];
112
L2P = pPrevLines[Line + 1]; // prev even lines
113
Dest = lpCurOverlay + (Line * 2 + 2) * OverlayPitch; // DL2
115
pMemcpy(Dest + OverlayPitch, L3, LineLength);
118
//>>> if(pInfo->PictureHistory[0]->Flags | PICTURE_INTERLACED_ODD)
121
L1 = pInfo->PictureHistory[1]->pData;
122
L2 = pInfo->PictureHistory[0]->pData;
124
L2P = pInfo->PictureHistory[2]->pData;
126
// copy first even line
127
pInfo->pMemcpy(Dest, L1, pInfo->LineLength);
128
Dest += pInfo->OverlayPitch;
132
L1 = pInfo->PictureHistory[1]->pData;
133
L2 = pInfo->PictureHistory[0]->pData + Pitch;
135
L2P = pInfo->PictureHistory[2]->pData + Pitch;
137
// copy first even line
138
pInfo->pMemcpy(Dest, pInfo->PictureHistory[0]->pData, pInfo->LineLength);
139
Dest += pInfo->OverlayPitch;
140
// then first odd line
141
pInfo->pMemcpy(Dest, L1, pInfo->LineLength);
142
Dest += pInfo->OverlayPitch;
145
for (Line = 0; Line < (pInfo->FieldHeight - 1); ++Line)
148
LoopCtr = LineLength / 8 - 1; // there are LineLength / 8 qwords per line
149
// but do 1 less, adj at end of loop
153
// For ease of reading, the comments below assume that we're operating on an odd
154
// field (i.e., that InfoIsOdd is true). Assume the obvious for even lines..
162
" mov %[Int_LastAvg],0 ## init easy way\n"
164
" lea ebx, [eax+8] ## next qword needed by DJR\n"
166
" sub ecx, eax ## carry L3 addr as an offset\n"
167
" mov edx, %[L2P] \n"
169
" mov edi, %[Dest] ## DL1 if Odd or DL2 if Even \n"
172
"1: # DoNext8Bytes: \n"
174
" movq mm0, qword ptr[esi] ## L2 - the newest weave pixel value \n"
175
" movq mm1, qword ptr[eax] ## L1 - the top pixel\n"
176
" movq mm2, qword ptr[edx] ## L2P - the prev weave pixel \n"
177
" movq mm3, qword ptr[eax+ecx] ## L3, next odd row\n"
178
" movq mm6, mm1 ## L1 - get simple single pixel interp\n"
179
"## pavgb mm6, mm3 ## use macro below\n"
180
V_PAVGB (mm6, mm3, mm4, ShiftMask)
182
"## DJR - Diagonal Jaggie Reduction\n"
183
"## In the event that we are going to use an average (Bob) pixel we do not want a jagged\n"
184
"## stair step effect. To combat this we avg in the 2 horizontally adjacen pixels into the\n"
185
"## interpolated Bob mix. This will do horizontal smoothing for only the Bob'd pixels.\n"
187
" movq mm4, %[LastAvg] ## the bob value from prev qword in row\n"
188
" movq %[LastAvg], mm6 ## save for next pass\n"
189
" psrlq mm4, 48 ## right justify 1 pixel\n"
190
" movq mm7, mm6 ## copy of simple bob pixel\n"
191
" psllq mm7, 16 ## left justify 3 pixels\n"
192
" por mm4, mm7 ## and combine\n"
194
" movq mm5, qword ptr[ebx] ## next horiz qword from L1\n"
195
"## pavgb mm5, qword ptr[ebx+ecx] ## next horiz qword from L3, use macro below\n"
196
V_PAVGB (mm5, qword ptr[ebx+ecx], mm7, ShiftMask)
197
" psllq mm5, 48 ## left just 1 pixel\n"
198
" movq mm7, mm6 ## another copy of simple bob pixel\n"
199
" psrlq mm7, 16 ## right just 3 pixels\n"
200
" por mm5, mm7 ## combine\n"
201
"## pavgb mm4, mm5 ## avg of forward and prev by 1 pixel, use macro\n"
202
V_PAVGB (mm4, mm5, mm5, ShiftMask) // mm5 gets modified if MMX
203
"## pavgb mm6, mm4 ## avg of center and surround interp vals, use macro\n"
204
V_PAVGB (mm6, mm4, mm7, ShiftMask)
206
"## Don't do any more averaging than needed for mmx. It hurts performance and causes rounding errors.\n"
208
"## pavgb mm4, mm6 ## 1/4 center, 3/4 adjacent\n"
209
V_PAVGB (mm4, mm6, mm7, ShiftMask)
210
"## pavgb mm6, mm4 ## 3/8 center, 5/8 adjacent\n"
211
V_PAVGB (mm6, mm4, mm7, ShiftMask)
214
"## get abs value of possible L2 comb\n"
215
" movq mm4, mm6 ## work copy of interp val\n"
216
" movq mm7, mm2 ## L2\n"
217
" psubusb mm7, mm4 ## L2 - avg\n"
218
" movq mm5, mm4 ## avg\n"
219
" psubusb mm5, mm2 ## avg - L2\n"
220
" por mm5, mm7 ## abs(avg-L2)\n"
222
"## get abs value of possible L2P comb\n"
223
" movq mm7, mm0 ## L2P\n"
224
" psubusb mm7, mm4 ## L2P - avg\n"
225
" psubusb mm4, mm0 ## avg - L2P\n"
226
" por mm4, mm7 ## abs(avg-L2P)\n"
228
"## use L2 or L2P depending upon which makes smaller comb\n"
229
" psubusb mm4, mm5 ## see if it goes to zero\n"
230
" psubusb mm5, mm5 ## 0\n"
231
" pcmpeqb mm4, mm5 ## if (mm4=0) then FF else 0\n"
232
" pcmpeqb mm5, mm4 ## opposite of mm4\n"
234
"## if Comb(L2P) <= Comb(L2) then mm4=ff, mm5=0 else mm4=0, mm5 = 55\n"
235
" pand mm5, mm2 ## use L2 if mm5 == ff, else 0\n"
236
" pand mm4, mm0 ## use L2P if mm4 = ff, else 0\n"
237
" por mm4, mm5 ## may the best win\n"
239
"## Inventory: at this point we have the following values:\n"
240
"## mm0 = L2P (or L2)\n"
242
"## mm2 = L2 (or L2P)\n"
244
"## mm4 = the best of L2,L2P weave pixel, base upon comb \n"
245
"## mm6 = the avg interpolated value, if we need to use it\n"
247
"## Let's measure movement, as how much the weave pixel has changed\n"
249
" psubusb mm2, mm0\n"
250
" psubusb mm0, mm7\n"
251
" por mm0, mm2 ## abs value of change, used later\n"
253
"## Now lets clip our chosen value to be not outside of the range\n"
254
"## of the high/low range L1-L3 by more than MaxComb.\n"
255
"## This allows some comb but limits the damages and also allows more\n"
256
"## detail than a boring oversmoothed clip.\n"
257
" movq mm2, mm1 ## copy L1\n"
258
"## pmaxub mm2, mm3 ## use macro\n"
259
V_PMAXUB (mm2, mm3) // now = Max(L1,L3)
260
" movq mm5, mm1 ## copy L1\n"
261
"## pminub mm5, mm3 ## now = Min(L1,L3), use macro\n"
262
V_PMINUB (mm5, mm3, mm7)
263
"## allow the value to be above the high or below the low by amt of MaxComb\n"
264
" psubusb mm5, %[MaxCombW] ## lower min by diff\n"
265
" paddusb mm2, %[MaxCombW] ## increase max by diff\n"
266
"## pmaxub mm4, mm5 ## now = Max(best,Min(L1,L3) use macro\n"
268
"## pminub mm4, mm2 ## now = Min( Max(best, Min(L1,L3), L2 )=L2 clipped\n"
269
V_PMINUB (mm4, mm2, mm7)
271
"## Blend weave pixel with bob pixel, depending on motion val in mm0 \n"
272
" psubusb mm0, %[MotionThresholdW] ## test Threshold, clear chroma change >>>??\n"
273
" pmullw mm0, %[MotionSenseW] ## mul by user factor, keep low 16 bits\n"
274
" movq mm7, %[QW256]\n"
276
" pminsw mm0, mm7 ## max = 256 \n"
278
" paddusw mm0, %[QW256B] ## add, may sat at fff..\n"
279
" psubusw mm0, %[QW256B] ## now = Min(L1,256)\n"
281
" psubusw mm7, mm0 ## so the 2 sum to 256, weighted avg\n"
282
" movq mm2, mm4 ## save weave chroma info before trashing\n"
283
" pand mm4, %[YMask] ## keep only luma from calc'd value\n"
284
" pmullw mm4, mm7 ## use more weave for less motion\n"
285
" pand mm6, %[YMask] ## keep only luma from calc'd value\n"
286
" pmullw mm6, mm0 ## use more bob for large motion\n"
287
" paddusw mm4, mm6 ## combine\n"
288
" psrlw mm4, 8 ## div by 256 to get weighted avg \n"
290
"## chroma comes from weave pixel\n"
291
" pand mm2, %[UVMask] ## keep chroma\n"
292
" por mm2, mm4 ## and combine\n"
294
V_MOVNTQ (qword ptr[edi], mm2) // move in our clipped best, use macro
296
"## bump ptrs and loop\n"
297
" lea eax,[eax+8] \n"
298
" lea ebx,[ebx+8] \n"
300
" lea edi,[edi+8] \n"
303
" jg 1b # DoNext8Bytes ## loop if not to last line\n"
304
" ## note P-III default assumes backward branches taken\n"
305
" jl 2f # LoopDone ## done\n"
306
" mov ebx, eax ## sharpness lookahead 1 byte only, be wrong on 1\n"
307
" jmp 1b # DoNext8Bytes\n"
316
_m_nth(LastAvg, 6), _m(L1), _m(L2), _m(L3), _m(L2P),
317
_m(Dest), _m(ShiftMask), _m(LastAvg), _m_int(LastAvg), _m(MaxCombW),
318
_m(MotionThresholdW), _m(MotionSenseW), _m(QW256), _m(QW256B),
319
_m(YMask), _m(UVMask), _m(LoopCtr)) ;
321
Dest += pInfo->OverlayPitch;
322
pInfo->pMemcpy(Dest, L3, pInfo->LineLength);
323
Dest += pInfo->OverlayPitch;
333
// Copy last odd line if we're processing an Odd field.
336
pMemcpy(lpCurOverlay + (FrameHeight - 1) * OverlayPitch,
337
pOddLines[FieldHeight - 1],
341
// Copy last odd line if we're processing an Odd field.
342
//>>> if(pInfo->PictureHistory[0]->Flags | PICTURE_INTERLACED_ODD)
350
// clear out the MMX registers ready for doing floating point