~ubuntu-branches/ubuntu/maverick/zapping/maverick

« back to all changes in this revision

Viewing changes to plugins/deinterlace/DI_GreedyH/DI_GreedyH.asm

  • Committer: Bazaar Package Importer
  • Author(s): Daniel T Chen
  • Date: 2005-11-08 11:07:34 UTC
  • mfrom: (1.1.2 upstream)
  • Revision ID: james.westby@ubuntu.com-20051108110734-ygvf6uljvgcjmca7
Tags: 0.9.6-1ubuntu1
* Resynchronise with Debian (Closes: #4022):
  - Fix desktop file to not use absolute path.

Show diffs side-by-side

added added

removed removed

Lines of Context:
1
 
/////////////////////////////////////////////////////////////////////////////
2
 
// DI_GreedyH.asm
3
 
/////////////////////////////////////////////////////////////////////////////
4
 
// Copyright (c) 2000 Tom Barry  All rights reserved.
5
 
/////////////////////////////////////////////////////////////////////////////
6
 
//
7
 
//      This file is subject to the terms of the GNU General Public License as
8
 
//      published by the Free Software Foundation.  A copy of this license is
9
 
//      included with this software distribution in the file COPYING.  If you
10
 
//      do not have a copy, you may obtain a copy by writing to the Free
11
 
//      Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
12
 
//
13
 
//      This software is distributed in the hope that it will be useful,
14
 
//      but WITHOUT ANY WARRANTY; without even the implied warranty of
15
 
//      MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16
 
//      GNU General Public License for more details
17
 
/////////////////////////////////////////////////////////////////////////////
18
 
/*
19
 
#if defined(IS_SSE)
20
 
#define MAINLOOP_LABEL DoNext8Bytes_SSE
21
 
#elif defined(IS_3DNOW)
22
 
#define MAINLOOP_LABEL DoNext8Bytes_3DNow
23
 
#else
24
 
#define MAINLOOP_LABEL DoNext8Bytes_MMX
25
 
#endif
26
 
*/
27
 
 
28
 
// This is a simple lightweight DeInterlace method that uses little CPU time
29
 
// but gives very good results for low or intermedite motion.
30
 
// It defers frames by one field, but that does not seem to produce noticeable
31
 
// lip sync problems.
32
 
//
33
 
// The method used is to take either the older or newer weave pixel depending
34
 
// upon which give the smaller comb factor, and then clip to avoid large damage
35
 
// when wrong.
36
 
//
37
 
// I'd intended this to be part of a larger more elaborate method added to 
38
 
// Blended Clip but this give too good results for the CPU to ignore here.
39
 
 
40
 
#if defined(IS_SSE)
41
 
BOOL DeinterlaceGreedyH_SSE(DEINTERLACE_INFO *info)
42
 
#elif defined(IS_3DNOW)
43
 
BOOL DeinterlaceGreedyH_3DNOW(DEINTERLACE_INFO *info)
44
 
#else
45
 
BOOL DeinterlaceGreedyH_MMX(DEINTERLACE_INFO *info)
46
 
#endif
47
 
{
48
 
        int Line;
49
 
        int     LoopCtr;
50
 
        short* L1;                                      // ptr to Line1, of 3
51
 
        short* L2;                                      // ptr to Line2, the weave line
52
 
        short* L3;                                      // ptr to Line3
53
 
        short* LP2;                                     // ptr to prev Line2
54
 
        BYTE* Dest;
55
 
        BYTE *lpCurOverlay = info->Overlay;
56
 
        short **pOddLines = info->OddLines[0];
57
 
        short **pEvenLines = info->EvenLines[0];
58
 
        short **pPrevLines = info->IsOdd ? info->OddLines[1] : info->EvenLines[1];
59
 
#ifdef IS_MMX
60
 
        const __int64 ShiftMask = 0xfefffefffefffeffLL; // to avoid shifting chroma to luma
61
 
#endif
62
 
        __int64 MaxComb;
63
 
        __int64 i;
64
 
 
65
 
        i = GreedyMaxComb;                      // How badly do we let it weave? 0-255
66
 
        MaxComb = i << 56 | i << 48 | i << 40 | i << 32 | i << 24 | i << 16 | i << 8 | i;    
67
 
        
68
 
 
69
 
        if (pOddLines == NULL || pEvenLines == NULL || pPrevLines == NULL)
70
 
                return FALSE;
71
 
 
72
 
        // copy first even line no matter what, and the first odd line if we're
73
 
        // processing an EVEN field. (note diff from other deint rtns.)
74
 
        info->pMemcpy(lpCurOverlay, pEvenLines[0], info->LineLength);   // DL0
75
 
        if (!info->IsOdd)
76
 
                info->pMemcpy(lpCurOverlay + info->OverlayPitch, pOddLines[0], info->LineLength);  // DL1
77
 
        for (Line = 0; Line < (info->FieldHeight - 1); ++Line)
78
 
        {
79
 
_saved_regs;
80
 
                LoopCtr = info->LineLength / 8;                         // there are LineLength / 8 qwords per line
81
 
 
82
 
                if (info->IsOdd)
83
 
                {
84
 
                        L1 = pEvenLines[Line];          
85
 
                        L2 = pOddLines[Line];   
86
 
                        L3 = pEvenLines[Line + 1];      
87
 
                        LP2 = pPrevLines[Line];                 // prev Odd lines
88
 
                        Dest = lpCurOverlay + (Line * 2 + 1) * info->OverlayPitch;      // DL1
89
 
                }
90
 
                else
91
 
                {
92
 
                        L1 = pOddLines[Line] ;          
93
 
                        L2 = pEvenLines[Line + 1];              
94
 
                        L3 = pOddLines[Line + 1];   
95
 
                        LP2 = pPrevLines[Line + 1];                     // prev even lines
96
 
                        Dest = lpCurOverlay + (Line * 2 + 2) * info->OverlayPitch;      // DL2
97
 
                }
98
 
                info->pMemcpy(Dest + info->OverlayPitch, L3, info->LineLength);
99
 
 
100
 
// For ease of reading, the comments below assume that we're operating on an odd
101
 
// field (i.e., that info->IsOdd is true).  Assume the obvious for even lines..
102
 
 
103
 
_asm_begin
104
 
"                       mov eax, %[L1]          \n"
105
 
"                       mov ebx, %[L2]          \n"
106
 
"                       mov edx, %[L3]          \n"
107
 
"                       mov esi, %[LP2]         \n"
108
 
"                       mov edi, %[Dest]       ## DL1 if Odd or DL2 if Even \n"
109
 
"                       \n"
110
 
".align 8\n"
111
 
_strf(MAINLOOP_LABEL) ":                        \n"
112
 
"                       movq    mm1, qword ptr[eax]             ## L1\n"
113
 
"                       movq    mm2, qword ptr[ebx]             ## L2\n"
114
 
"                       movq    mm3, qword ptr[edx]             ## L3\n"
115
 
"                       movq    mm0, qword ptr[esi]     ## LP2\n"
116
 
"\n"
117
 
"            ## average L1 and L3 leave result in mm4\n"
118
 
"            movq       mm4, mm1                                ## L1\n"
119
 
#if defined(IS_SSE)
120
 
"                   pavgb mm4, mm3\n"
121
 
#elif defined(IS_3DNOW)
122
 
"                   pavgusb mm4, mm3\n"
123
 
#else
124
 
"                       pand    mm4, %[ShiftMask]                       ## "\n"
125
 
"                       psrlw   mm4, 1\n"
126
 
"                       movq    mm5, mm3                                ## L3\n"
127
 
"                       pand    mm5, %[ShiftMask]                       ## "\n"
128
 
"                       psrlw   mm5, 1\n"
129
 
"                       paddb   mm4, mm5                                ## the average, for computing comb\n"
130
 
#endif
131
 
"\n"
132
 
"## get abs value of possible L2 comb\n"
133
 
"                       movq    mm7, mm2                                ## L2\n"
134
 
"                       psubusb mm7, mm4                                ## L2 - avg\n"
135
 
"                       movq    mm5, mm4                                ## avg\n"
136
 
"                       psubusb mm5, mm2                                ## avg - L2\n"
137
 
"                       por             mm5, mm7                                ## abs(avg-L2)\n"
138
 
"                       movq    mm6, mm4                ## copy of avg for later\n"
139
 
"\n"
140
 
"## get abs value of possible LP2 comb\n"
141
 
"                       movq    mm7, mm0                                ## LP2\n"
142
 
"                       psubusb mm7, mm4                                ## LP2 - avg\n"
143
 
"                       psubusb mm4, mm0                                ## avg - LP2\n"
144
 
"                       por             mm4, mm7                                ## abs(avg-LP2)\n"
145
 
"\n"
146
 
"## use L2 or LP2 depending upon which makes smaller comb\n"
147
 
"                       psubusb mm4, mm5                                ## see if it goes to zero\n"
148
 
"                       psubusb mm5, mm5                                ## 0\n"
149
 
"                       pcmpeqb mm4, mm5                                ## if (mm4=0) then FF else 0\n"
150
 
"                       pcmpeqb mm5, mm4                                ## opposite of mm4\n"
151
 
"\n"
152
 
"## if Comb(LP2) <= Comb(L2) then mm4=ff, mm5=0 else mm4=0, mm5 = 55\n"
153
 
"                       pand    mm5, mm2                                ## use L2 if mm5 == ff, else 0\n"
154
 
"                       pand    mm4, mm0                                ## use LP2 if mm4 = ff, else 0\n"
155
 
"                       por             mm4, mm5                                ## may the best win\n"
156
 
"\n"
157
 
"## Now lets clip our chosen value to be not outside of the range\n"
158
 
"## of the high/low range L1-L3 by more than abs(L1-L3)\n"
159
 
"## This allows some comb but limits the damages and also allows more\n"
160
 
"## detail than a boring oversmoothed clip.\n"
161
 
"\n"
162
 
"                       movq    mm2, mm1                                ## copy L1\n"
163
 
"                       psubusb mm2, mm3                                ## - L3, with saturation\n"
164
 
"                       paddusb mm2, mm3                ## now = Max(L1,L3)\n"
165
 
"\n"
166
 
"                       pcmpeqb mm7, mm7                                ## all ffffffff\n"
167
 
"                       psubusb mm7, mm1                                ## - L1 \n"
168
 
"                       paddusb mm3, mm7                                ## add, may sat at fff..\n"
169
 
"                       psubusb mm3, mm7                                ## now = Min(L1,L3)\n"
170
 
"\n"
171
 
"## allow the value to be above the high or below the low by amt of MaxComb\n"
172
 
"                       paddusb mm2, %[MaxComb]                 ## increase max by diff\n"
173
 
"                       psubusb mm3, %[MaxComb]                 ## lower min by diff\n"
174
 
"\n"
175
 
"                       psubusb mm4, mm3                                ## best - Min\n"
176
 
"                       paddusb mm4, mm3                                ## now = Max(best,Min(L1,L3)\n"
177
 
"\n"
178
 
"                       pcmpeqb mm7, mm7                                ## all ffffffff\n"
179
 
"                       psubusb mm7, mm4                                ## - Max(best,Min(best,L3) \n"
180
 
"                       paddusb mm2, mm7                                ## add may sat at FFF..\n"
181
 
"                       psubusb mm2, mm7                                ## now = Min( Max(best, Min(L1,L3), L2 )=L2 clipped\n"
182
 
"\n"
183
 
#ifdef IS_SSE
184
 
"                   movntq qword ptr[edi], mm2      ## move in our clipped best\n"
185
 
#else
186
 
"                   movq qword ptr[edi], mm2        ## move in our clipped best\n"
187
 
#endif
188
 
"\n"
189
 
"## bump ptrs and loop\n"
190
 
"                       lea             eax,[eax+8]                             \n"
191
 
"                       lea             ebx,[ebx+8]\n"
192
 
"                       lea             edx,[edx+8]\n"
193
 
"                       lea             edi,[edi+8]                     \n"
194
 
"                       lea             esi,[esi+8]\n"
195
 
"                       dec             %[LoopCtr]\n"
196
 
"                       jnz             " _strf(MAINLOOP_LABEL) "\n"
197
 
_asm_end,
198
 
_m(L1), _m(L2), _m(L3), _m(LP2), _m(Dest), _m(ShiftMask), _m(MaxComb), _m(LoopCtr)
199
 
: "eax", "edx", "esi", "edi");
200
 
 
201
 
        // Copy last odd line if we're processing an Odd field.
202
 
        if (info->IsOdd)
203
 
        {
204
 
                info->pMemcpy(lpCurOverlay + (info->FrameHeight - 1) * info->OverlayPitch,
205
 
                                  pOddLines[info->FieldHeight - 1],
206
 
                                  info->LineLength);
207
 
        }
208
 
 
209
 
    // clear out the MMX registers ready for doing floating point
210
 
    // again
211
 
    _asm
212
 
    {
213
 
        emms
214
 
    }
215
 
        return TRUE;
216
 
}
217
 
 
218
 
// #undef MAINLOOP_LABEL