1
/////////////////////////////////////////////////////////////////////////////
2
// $Id: DI_GreedyDeLoop.asm,v 1.1 2005/01/08 14:54:23 mschimek Exp $
3
/////////////////////////////////////////////////////////////////////////////
4
// Copyright (c) 2001 Tom Barry All rights reserved.
5
/////////////////////////////////////////////////////////////////////////////
7
// This file is subject to the terms of the GNU General Public License as
8
// published by the Free Software Foundation. A copy of this license is
9
// included with this software distribution in the file COPYING. If you
10
// do not have a copy, you may obtain a copy by writing to the Free
11
// Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
13
// This software is distributed in the hope that it will be useful,
14
// but WITHOUT ANY WARRANTY; without even the implied warranty of
15
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16
// GNU General Public License for more details
17
/////////////////////////////////////////////////////////////////////////////
20
// $Log: DI_GreedyDeLoop.asm,v $
21
// Revision 1.1 2005/01/08 14:54:23 mschimek
22
// *** empty log message ***
24
// Revision 1.3 2001/08/01 00:37:41 trbarry
25
// More chroma jitter fixes, tweak defaults
27
// Revision 1.2 2001/07/25 12:04:31 adcockj
28
// Moved Control stuff into DS_Control.h
29
// Added $Id and $Log to comment blocks as per standards
31
/////////////////////////////////////////////////////////////////////////////
33
// This file contains the body of the Greedy High Motion Deinterlace loop
34
// It may be included in the body of the real loop multiple times for performance
35
// as may graphics cards still like at least 32 bytes written at a time.
37
// The following 4 values should be defined before using this include but may be removed for debugging:
38
// #define USE_JAGGIE_REDUCTION
39
// #define USE_GREEDY_CHOICE
41
// #define USE_BOB_BLEND
43
// The Value FSOFFS must be defined before including this header.
45
// On exit mm2 will contain the value of the calculated weave pixel, not yet stored.
46
// It is also expected that mm1 and mm3 will still contain the vertically adjacent pixels
47
// which may be needed for the vertical filter.
48
" movq mm0, qword ptr[esi+16+" _strf(FSOFFS) "] ## L2 - the other one \n"
49
" movq mm1, qword ptr[esi+eax+" _strf(FSOFFS) "] ## L1\n"
50
" movq mm2, qword ptr[esi+" _strf(FSOFFS) "] ## L2 \n"
51
" movq mm3, qword ptr[esi+ebx+" _strf(FSOFFS) "] ## L3\n"
53
"## calc simple interp value in case we need it \n"
54
" movq mm6, mm1 ## L1 - get simple single pixel interp\n"
57
"## DJR - Diagonal Jaggie Reduction\n"
58
"## In the event that we are going to use an average (Bob) pixel we do not want a jagged\n"
59
"## stair step effect. To combat this we will average in the 2 horizontally adjacent\n"
60
"## pixels into the interp Bob mix.\n"
62
#ifdef USE_JAGGIE_REDUCTION // always used but can turn off for test
63
" movq mm4, %[LastAvg] ## the bob value from prev qword in row\n"
64
" movq %[LastAvg], mm6 ## save for next pass\n"
65
" psrlq mm4, 48 ## right justify 1 pixel\n"
66
" movq mm7, mm6 ## copy of simple bob pixel\n"
67
" psllq mm7, 16 ## left justify 3 pixels\n"
68
" por mm4, mm7 ## and combine\n"
70
" movq mm5, qword ptr[esi+eax+" _strf(FSCOLSIZE+FSOFFS) "] ## next horiz qword from L1\n"
71
" pavgb mm5, qword ptr[esi+ebx+" _strf(FSCOLSIZE+FSOFFS) "] ## next horiz qword from L3\n"
72
" psllq mm5, 48 ## left just 1 pixel\n"
73
" movq mm7, mm6 ## another copy of simple bob pixel\n"
74
" psrlq mm7, 16 ## right just 3 pixels\n"
75
" por mm5, mm7 ## combine\n"
76
" pavgb mm4, mm5 ## avg of forward and prev by 1 pixel\n"
77
" pavgb mm6, mm4 ## avg of center with adjacent\n"
78
" pavgb mm4, mm6 ## 1/4 center, 3/4 adjacent\n"
79
" pavgb mm6, mm4 ## 3/8 center, 5/8 adjacent\n"
81
#endif // end of jaggie reduction code
84
"## For a weave pixel candidate we choose whichever (preceding or following) pixel that would\n"
85
"## yield the lowest comb factor. This allows the possibilty of selecting choice pixels from 2\n"
86
"## different field.\n"
87
#ifdef USE_GREEDY_CHOICE // always use but can turn off for test
88
"## get abs value of possible L2 comb\n"
89
" movq mm4, mm6 ## work copy of interp val\n"
90
" movq mm7, mm2 ## L2\n"
91
" psubusb mm7, mm4 ## L2 - avg\n"
92
" movq mm5, mm4 ## avg\n"
93
" psubusb mm5, mm2 ## avg - L2\n"
94
" por mm5, mm7 ## abs(avg-L2)\n"
96
"## get abs value of possible LP2 comb\n"
97
" movq mm7, mm0 ## LP2\n"
98
" psubusb mm7, mm4 ## LP2 - avg\n"
99
" psubusb mm4, mm0 ## avg - LP2\n"
100
" por mm4, mm7 ## abs(avg-LP2)\n"
102
"## use L2 or LP2 depending upon which makes smaller comb\n"
103
" psubusb mm4, mm5 ## see if it goes to zero\n"
104
" psubusb mm5, mm5 ## 0\n"
105
" pcmpeqb mm4, mm5 ## if (mm4=0) then FF else 0\n"
106
" pcmpeqb mm5, mm4 ## opposite of mm4\n"
108
"## if Comb(LP2) <= Comb(L2) then mm4=ff, mm5=0 else mm4=0, mm5 = 55\n"
109
" pand mm5, mm2 ## use L2 if mm5 == ff, else 0\n"
110
" pand mm4, mm0 ## use LP2 if mm4 = ff, else 0\n"
111
" por mm4, mm5 ## may the best win\n"
113
" movq mm4, mm2 ## just use L2P for Weave pixel\n"
114
#endif // end of Greedy choice code
117
"## Inventory: at this point we have the following values:\n"
122
"## mm4 = the best of L2,LP2 weave pixel, based upon comb \n"
123
"## mm6 = the avg interpolated value, if we need to use it\n"
125
"## Let's measure movement, as how much the weave pixel has changed\n"
127
" psubusb mm2, mm0\n"
128
" psubusb mm0, mm7\n"
129
" por mm0, mm2 ## abs value of change, used later\n"
131
#ifdef USE_CLIP // always use but can turn off for test
132
"## Now lets clip our chosen weave pixel value to be not outside of the range\n"
133
"## of the high/low range L1-L3 by more than MaxComb.\n"
134
"## This allows some comb but limits the damages and also allows more\n"
135
"## detail than a boring oversmoothed clip.\n"
136
" movq mm2, mm1 ## copy L1\n"
137
" pmaxub mm2, mm3 ## now = Max(L1,L3)\n"
138
" movq mm5, mm1 ## copy L1\n"
139
" pminub mm5, mm3 ## now = Min(L1,L3)\n"
140
"## allow the value to be above the high or below the low by amt of MaxComb\n"
141
" psubusb mm5, %[MaxCombW] ## lower min by diff\n"
142
" paddusb mm2, %[MaxCombW] ## increase max by diff\n"
143
" pmaxub mm4, mm5 ## now = Max(best,Min(L1,L3)\n"
144
" pminub mm4, mm2 ## now = Min( Max(best, Min(L1,L3), L2 )=L2 clipped\n"
145
#endif // end of clip code
146
" movq mm2, mm4 ## save copy of clipped val for luma\n"
148
#ifdef USE_BOB_BLEND // always use but can turn off for test
149
"## the ratio of bob/weave will be dependend upon apparent damage we expect\n"
150
"## from seeing large motion. \n"
151
" psubusb mm0, %[MotionThresholdW] ## test motion Threshold, clear chroma\n"
152
" pmullw mm0, %[MotionSenseW] ## mul by user factor, keep low 16 bits\n"
153
" movq mm7, %[QW256]\n"
154
" pminsw mm0, mm7 ## max = 256\n"
155
" psubusw mm7, mm0 ## so the 2 sum to 256, weighted avg\n"
156
" pand mm4, %[YMaskW] ## keep only luma from clipped weave value\n"
157
" pmullw mm4, mm7 ## use more weave for less motion\n"
158
" pand mm6, %[YMaskW] ## keep only luma from interp bob DJR value\n"
159
" pmullw mm6, mm0 ## use more bob for large motion\n"
160
" paddusw mm4, mm6 ## combine\n"
161
" psrlw mm4, 8 ## div by 256 to get weighted avg \n"
162
#endif // end of motion sensitive bob blend
164
"## chroma comes from our clipped weave value - gives more chroma res & lower chroma jitter\n"
165
" pand mm2, %[UVMask] ## get only chroma\n"
166
" por mm4, mm2 ## and combine\n"