5
//Z #include <malloc.h>
7
#define BOOL unsigned int
8
#define BYTE unsigned char
10
#ifdef USE_FOR_DSCALER
11
//#define USE_VERTICAL_FILTER
13
BOOL DeinterlaceTomsMoComp_SSE(TDeinterlaceInfo* pInfo);
14
//BOOL DeinterlaceTomsMoComp_SSE2(TDeinterlaceInfo* pInfo);
15
BOOL DeinterlaceTomsMoComp_MMX(TDeinterlaceInfo* pInfo);
16
BOOL DeinterlaceTomsMoComp_3DNOW(TDeinterlaceInfo* pInfo);
17
#define MyMemCopy pMyMemcpy
18
extern long SearchEffort;
19
extern BOOL UseStrangeBob;
22
//#define USE_VERTICAL_FILTER
24
#define MyMemCopy memcpy
27
//static int Do_Dbl_Resize();
28
// int __stdcall Search_Effort_21();
30
static int Do_Dbl_Resize();
34
static int Fieldcopy(void *dest, const void *src, size_t count,
35
int rows, int dst_pitch, int src_pitch);
36
static int Search_Effort_0();
37
static int Search_Effort_1();
38
static int Search_Effort_3();
39
static int Search_Effort_5();
40
static int Search_Effort_9();
41
static int Search_Effort_11();
42
static int Search_Effort_13();
43
static int Search_Effort_15();
44
static int Search_Effort_19();
45
static int Search_Effort_21();
46
static int Search_Effort_Max();
48
static int Search_Effort_0_SB();
49
static int Search_Effort_1_SB();
50
static int Search_Effort_3_SB();
51
static int Search_Effort_5_SB();
52
static int Search_Effort_9_SB();
53
static int Search_Effort_11_SB();
54
static int Search_Effort_13_SB();
55
static int Search_Effort_15_SB();
56
static int Search_Effort_19_SB();
57
static int Search_Effort_21_SB();
58
static int Search_Effort_Max_SB();
61
__declspec(align(128))
62
static __int64 BobDiffW[2];
65
static __int64 BobVal[2];
67
//Z __declspec(align(16))
68
//Z static __int64 Min_Vals[2];
70
//Z __declspec(align(16))
71
//Z static __int64 Max_Vals[2];
74
static const __int64 Max_Mov[2] = {0x0404040404040404LL,0x0404040404040404LL};
75
//static const __int64 Max_Mov[2] = {0x0f0f0f0f0f0f0f0fLL,0x0f0f0f0f0f0f0f0fLL};
76
//static const __int64 Max_Mov[2] = {0x0808080808080808LL,0x0808080808080808LL};
79
static const __int64 DiffThres[2] = {0x0f0f0f0f0f0f0f0fLL,0x0f0f0f0f0f0f0f0fLL};
82
static const __int64 YMask[2] = {0x00ff00ff00ff00ffLL,0x00ff00ff00ff00ffLL}; // keeps only luma
85
static const __int64 UVMask[2] = {0xff00ff00ff00ff00LL,0xff00ff00ff00ff00LL}; // keeps only chroma
88
static const __int64 TENS[2] = {0x0a0a0a0a0a0a0a0aLL,0x0a0a0a0a0a0a0a0aLL};
91
static const __int64 FOURS[2] = {0x0404040404040404LL,0x0404040404040404LL};
94
static const __int64 ONES[2] = {0x0101010101010101LL,0x0101010101010101LL};
97
static const __int64 Max_Comb[2] = {0x0202020202020202LL,0x0202020202020202LL};
100
static const __int64 WHITE[2] = {0x7fff0fff7fff0fffLL,0x7fff0fff7fff0fffLL};
102
static const __int64 ShiftMask = 0xfefffefffefffeffLL; // to avoid shifting chroma to luma
104
static __int64 swork = 0;
107
// A bunch of things that may need 16 byte alignment
109
__declspec(align(16))
110
static const __int64 FIFTEENS[2] = {0x0F0F0F0F0F0F0F0FLL,0x0F0F0F0F0F0F0F0FLL};
112
__declspec(align(16))
113
static const __int64 TWENTIES[2] = {0x1414141414141414LL,0x1414141414141414LL};
115
__declspec(align(16))
116
static const __int64 SIXES[2] = {0x0606060606060606LL,0x0606060606060606LL};
118
__declspec(align(16))
119
static const __int64 FIVES[2] = {0x0505050505050505LL,0x0505050505050505LL};
121
__declspec(align(16))
122
static const __int64 THREES[2] = {0x0303030303030303LL,0x0303030303030303LL};
124
__declspec(align(16))
125
static const __int64 TWOS[2] = {0x0202020202020202LL,0x0202020202020202LL};
130
// Define a few macros for CPU dependent instructions.
131
// I suspect I don't really understand how the C macro preprocessor works but
132
// this seems to get the job done. // TRB 7/01
134
// BEFORE USING THESE YOU MUST SET:
136
// #define SSE_TYPE SSE (or MMX or 3DNOW)
138
// some macros for pavgb instruction
139
// V_PAVGB(mmr1, mmr2, mmr work register, smask) mmr2 may = mmrw if you can trash it
142
#define V_PAVGB_MMX(mmr1,mmr2,mmrw,smask) \
143
" movq " #mmrw "," #mmr2 "\n" \
144
" pand " #mmrw "," #smask "\n" \
145
" psrlw " #mmrw ",1\n" \
146
" pand " #mmr1 "," #smask "\n" \
147
" psrlw " #mmr1 ",1\n" \
148
" paddusb " #mmr1 "," #mmrw "\n"
150
#define V_PAVGB_SSE(mmr1,mmr2,mmrw,smask) " pavgb " #mmr1 "," #mmr2 "\n"
151
#define V_PAVGB_3DNOW(mmr1,mmr2,mmrw,smask) " pavgusb " #mmr1 "," #mmr2 "\n"
152
#define V_PAVGB(mmr1,mmr2,mmrw,smask) V_PAVGB2(mmr1,mmr2,mmrw,smask,SSE_TYPE)
153
#define V_PAVGB2(mmr1,mmr2,mmrw,smask,ssetyp) V_PAVGB3(mmr1,mmr2,mmrw,smask,ssetyp)
154
#define V_PAVGB3(mmr1,mmr2,mmrw,smask,ssetyp) V_PAVGB_##ssetyp(mmr1,mmr2,mmrw,smask)
156
// some macros for pmaxub instruction
157
// V_PMAXUB(mmr1, mmr2)
158
#define V_PMAXUB_MMX(mmr1,mmr2) \
159
" psubusb " #mmr1 "," #mmr2 "\n" \
160
" paddusb " #mmr1 "," #mmr2 "\n"
162
#define V_PMAXUB_SSE(mmr1,mmr2) " pmaxub " #mmr1 "," #mmr2 "\n"
163
#define V_PMAXUB_3DNOW(mmr1,mmr2) V_PMAXUB_MMX(mmr1,mmr2) // use MMX version
164
#define V_PMAXUB(mmr1,mmr2) V_PMAXUB2(mmr1,mmr2,SSE_TYPE)
165
#define V_PMAXUB2(mmr1,mmr2,ssetyp) V_PMAXUB3(mmr1,mmr2,ssetyp)
166
#define V_PMAXUB3(mmr1,mmr2,ssetyp) V_PMAXUB_##ssetyp(mmr1,mmr2)
168
// some macros for pminub instruction
169
// V_PMINUB(mmr1, mmr2, mmr work register) mmr2 may NOT = mmrw
170
#define V_PMINUB_MMX(mmr1,mmr2,mmrw) \
171
" pcmpeqb " #mmrw "," #mmrw "\n" \
172
" psubusb " #mmrw "," #mmr2 "\n" \
173
" paddusb " #mmr1 "," #mmrw "\n" \
174
" psubusb " #mmr1 "," #mmrw "\n"
176
#define V_PMINUB_SSE(mmr1,mmr2,mmrw) " pminub " #mmr1 "," #mmr2 "\n"
177
#define V_PMINUB_3DNOW(mmr1,mmr2,mmrw) V_PMINUB_MMX(mmr1,mmr2,mmrw) // use MMX version
178
#define V_PMINUB(mmr1,mmr2,mmrw) V_PMINUB2(mmr1,mmr2,mmrw,SSE_TYPE)
179
#define V_PMINUB2(mmr1,mmr2,mmrw,ssetyp) V_PMINUB3(mmr1,mmr2,mmrw,ssetyp)
180
#define V_PMINUB3(mmr1,mmr2,mmrw,ssetyp) V_PMINUB_##ssetyp(mmr1,mmr2,mmrw)
182
// some macros for movntq instruction
183
// V_MOVNTQ(mmr1, mmr2)
184
#define V_MOVNTQ_MMX(mmr1,mmr2) " movq " #mmr1 "," #mmr2 "\n"
185
#define V_MOVNTQ_3DNOW(mmr1,mmr2) " movq " #mmr1 "," #mmr2 "\n"
186
#define V_MOVNTQ_SSE(mmr1,mmr2) " movntq " #mmr1 "," #mmr2 "\n"
187
#define V_MOVNTQ(mmr1,mmr2) V_MOVNTQ2(mmr1,mmr2,SSE_TYPE)
188
#define V_MOVNTQ2(mmr1,mmr2,ssetyp) V_MOVNTQ3(mmr1,mmr2,ssetyp)
189
#define V_MOVNTQ3(mmr1,mmr2,ssetyp) V_MOVNTQ_##ssetyp(mmr1,mmr2)
195
// macro load a field from this object
196
#define thisLoad(reg, intfield) \
198
__asm mov reg, this \
199
__asm mov reg, dword ptr [reg].intfield \
207
#define MERGE4PIXavg(PADDR1, PADDR2) \
208
" movdqu xmm0, [" _strf(PADDR1) "] #/* our 4 pixels */ \n" \
209
" movdqu xmm1, [" _strf(PADDR2) "] #/* our pixel2 value */ \n" \
210
" movdqa xmm2, xmm0 #/* another copy of our pixel1 value */ \n" \
211
" movdqa xmm3, xmm1 #/* another copy of our pixel1 value */ \n" \
212
" psubusb xmm2, xmm1 \n" \
213
" psubusb xmm3, xmm0 \n" \
214
" por xmm2,xmm3 \n" \
215
" pavgb xmm0, xmm1 #/* avg of 2 pixels */ \n" \
216
" movdqa xmm3, xmm2 #/* another copy of our our weights */ \n" \
217
" pxor xmm1, xmm1 \n" \
218
" psubusb xmm3, xmm7 #/* nonzero where old weights lower, else 0 */ \n" \
219
" pcmpeqb xmm3, xmm1 #/* now ff where new better, else 00 */ \n" \
220
" pcmpeqb xmm1, xmm3 #/* here ff where old better, else 00 */ \n" \
221
" pand xmm0, xmm3 #/* keep only better new pixels */ \n" \
222
" pand xmm2, xmm3 #/* and weights */ \n" \
223
" pand xmm5, xmm1 #/* keep only better old pixels */ \n" \
224
" pand xmm7, xmm1 \n" \
225
" por xmm5, xmm0 #/* and merge new & old vals */ \n" \
229
#define MERGE4PIXavgH(PADDR1A, PADDR1B, PADDR2A, PADDR2B) \
230
" movdqu xmm0, [" _strf(PADDR1A) "] #/* our 4 pixels */ \n" \
231
" movdqu xmm1, [" _strf(PADDR2A) "] #/* our pixel2 value */ \n" \
232
" movdqu xmm2, [" _strf(PADDR1B) "] #/* our 4 pixels */ \n" \
233
" movdqu xmm3, [" _strf(PADDR2B) "] #/* our pixel2 value */ \n" \
234
" pavgb xmm0, xmm2 \n" \
235
" pavgb xmm1, xmm3 \n" \
236
" movdqa xmm2, xmm0 #/* another copy of our pixel1 value */ \n" \
237
" movdqa xmm3, xmm1 #/* another copy of our pixel1 value */ \n" \
238
" psubusb xmm2, xmm1 \n" \
239
" psubusb xmm3, xmm0 \n" \
240
" por xmm2,xmm3 \n" \
241
" pavgb xmm0, xmm1 #/* avg of 2 pixels */ \n" \
242
" movdqa xmm3, xmm2 #/* another copy of our our weights */ \n" \
243
" pxor xmm1, xmm1 \n" \
244
" psubusb xmm3, xmm7 #/* nonzero where old weights lower, else 0 */ \n" \
245
" pcmpeqb xmm3, xmm1 #/* now ff where new better, else 00 */ \n" \
246
" pcmpeqb xmm1, xmm3 #/* here ff where old better, else 00 */ \n" \
247
" pand xmm0, xmm3 #/* keep only better new pixels */ \n" \
248
" pand xmm2, xmm3 #/* and weights */ \n" \
249
" pand xmm5, xmm1 #/* keep only better old pixels */ \n" \
250
" pand xmm7, xmm1 \n" \
251
" por xmm5, xmm0 #/* and merge new & old vals */ \n" \
252
" por xmm7, xmm2 \n" \
254
#define RESET_CHROMA " por xmm7, UVMask \n"
258
#define MERGE4PIXavg(PADDR1, PADDR2) \
259
" movq mm0, [" _strf(PADDR1) "] #/* our 4 pixels */ \n" \
260
" movq mm1, [" _strf(PADDR2) "] #/* our pixel2 value */ \n" \
261
" movq mm2, mm0 #/* another copy of our pixel1 value */ \n" \
262
" movq mm3, mm1 #/* another copy of our pixel1 value */ \n" \
263
" psubusb mm2, mm1 \n" \
264
" psubusb mm3, mm0 \n" \
266
V_PAVGB (mm0, mm1, mm3, ShiftMask) /* avg of 2 pixels */ \
267
" movq mm3, mm2 #/* another copy of our our weights */ \n" \
268
" pxor mm1, mm1 \n" \
269
" psubusb mm3, mm7 #/* nonzero where old weights lower, else 0 */ \n" \
270
" pcmpeqb mm3, mm1 #/* now ff where new better, else 00 */ \n" \
271
" pcmpeqb mm1, mm3 #/* here ff where old better, else 00 */ \n" \
272
" pand mm0, mm3 #/* keep only better new pixels */ \n" \
273
" pand mm2, mm3 #/* and weights */ \n" \
274
" pand mm5, mm1 #/* keep only better old pixels */ \n" \
275
" pand mm7, mm1 \n" \
276
" por mm5, mm0 #/* and merge new & old vals */ \n" \
280
#define MERGE4PIXavgH(PADDR1A, PADDR1B, PADDR2A, PADDR2B) \
281
" movq mm0, [" _strf(PADDR1A) "] #/* our 4 pixels */ \n" \
282
" movq mm1, [" _strf(PADDR2A) "] #/* our pixel2 value */ \n" \
283
" movq mm2, [" _strf(PADDR1B) "] #/* our 4 pixels */ \n" \
284
" movq mm3, [" _strf(PADDR2B) "] #/* our pixel2 value */ \n" \
285
V_PAVGB(mm0, mm2, mm2, ShiftMask) \
286
V_PAVGB(mm1, mm3, mm3, ShiftMask) \
287
" movq mm2, mm0 #/* another copy of our pixel1 value */ \n" \
288
" movq mm3, mm1 #/* another copy of our pixel1 value */ \n" \
289
" psubusb mm2, mm1 \n" \
290
" psubusb mm3, mm0 \n" \
292
V_PAVGB(mm0, mm1, mm3, ShiftMask) /* avg of 2 pixels */ \
293
" movq mm3, mm2 #/* another copy of our our weights */ \n" \
294
" pxor mm1, mm1 \n" \
295
" psubusb mm3, mm7 #/* nonzero where old weights lower, else 0 */ \n" \
296
" pcmpeqb mm3, mm1 #/* now ff where new better, else 00 */ \n" \
297
" pcmpeqb mm1, mm3 #/* here ff where old better, else 00 */ \n" \
298
" pand mm0, mm3 #/* keep only better new pixels */ \n" \
299
" pand mm2, mm3 #/* and weights */ \n" \
300
" pand mm5, mm1 #/* keep only better old pixels */ \n" \
301
" pand mm7, mm1 \n" \
302
" por mm5, mm0 #/* and merge new & old vals */ \n" \
306
#define RESET_CHROMA " por mm7, UVMask \n"