4
const unsigned char* pSrcP;
5
const unsigned char* pSrc;
6
const unsigned char* pBob;
7
const unsigned char* pBobP;
9
int64_t Max_Mov = 0x0404040404040404ull;
10
int64_t DiffThres = 0x0f0f0f0f0f0f0f0full;
11
int64_t YMask = 0x00ff00ff00ff00ffull; // keeps only luma
12
int64_t UVMask = 0xff00ff00ff00ff00ull; // keeps only chroma
13
int64_t TENS = 0x0a0a0a0a0a0a0a0aull;
14
int64_t FOURS = 0x0404040404040404ull;
15
int64_t ONES = 0x0101010101010101ull;
16
int64_t Min_Vals = 0x0000000000000000ull;
17
int64_t Max_Vals = 0x0000000000000000ull;
18
int64_t ShiftMask = 0xfefffefffefffeffull;
20
// long is int32 on ARCH_368, int64 on ARCH_AMD64. Declaring it this way
21
// saves a lot of xor's to delete 64bit garbage.
23
#if defined(DBL_RESIZE) || defined(USE_FOR_DSCALER)
24
long src_pitch2 = src_pitch; // even & odd lines are not longerleaved in DScaler
26
long src_pitch2 = 2 * src_pitch; // even & odd lines are longerleaved in Avisynth
29
long dst_pitch2 = 2 * dst_pitch;
33
long Last8 = (rowsize-16); // ofs to last 16 bytes in row for SSE2
35
long Last8 = (rowsize-8); // ofs to last 8 bytes in row
38
long dst_pitchw = dst_pitch; // local stor so asm can ref
39
pSrc = pWeaveSrc; // polongs 1 weave line above
40
pSrcP = pWeaveSrcP; // "
44
#ifdef USE_VERTICAL_FILTER
45
pDest = pWeaveDest + dst_pitch2;
47
pDest = pWeaveDest + 3*dst_pitch;
52
#ifdef USE_VERTICAL_FILTER
53
pDest = pWeaveDest + dst_pitch;
55
pDest = pWeaveDest + dst_pitch2;
62
pBob = pCopySrc + src_pitch2; // remember one weave line just copied previously
63
pBobP = pCopySrcP + src_pitch2;
73
#define _src_pitch2 "%1"
74
#define _ShiftMask "%2"
76
#define _dst_pitchw "%4"
81
#define _DiffThres "%9"
82
#define _Min_Vals "%10"
83
#define _Max_Vals "%11"
88
#define _Max_Mov "%16"
95
for (y=1; y < FldHeight-1; y++)
97
// pretend it's indented -->>
100
// Loop general reg usage
102
// XAX - pBobP, then pDest
105
// XDX - current offset
106
// XDI - prev weave pixels, 1 line up
107
// XSI - next weave pixels, 1 line up
109
// Save "XBX" (-fPIC)
110
MOVX" %%"XBX", "_oldbx"\n\t"
114
// sse2 code deleted for now
117
// simple bob first 8 bytes
118
MOVX" "_pBob", %%"XBX"\n\t"
119
MOVX" "_src_pitch2", %%"XCX"\n\t"
121
#ifdef USE_VERTICAL_FILTER
122
"movq (%%"XBX"), %%mm0\n\t"
123
"movq (%%"XBX", %%"XCX"), %%mm1\n\t" //, qword ptr["XBX"+"XCX"]
124
"movq %%mm0, %%mm2\n\t"
125
V_PAVGB ("%%mm2", "%%mm1", "%%mm3", _ShiftMask) // halfway between
126
V_PAVGB ("%%mm0", "%%mm2", "%%mm3", _ShiftMask) // 1/4 way
127
V_PAVGB ("%%mm1", "%%mm2", "%%mm3", _ShiftMask) // 3/4 way
128
MOVX" "_pDest", %%"XDI"\n\t"
129
MOVX" "_dst_pitchw", %%"XAX"\n\t"
130
V_MOVNTQ ("(%%"XDI")", "%%mm0")
131
V_MOVNTQ ("(%%"XDI", %%"XAX")", "%%mm1") // qword ptr["XDI"+"XAX"], mm1
133
// simple bob last 8 bytes
134
MOVX" "_Last8", %%"XDX"\n\t"
135
LEAX" (%%"XBX", %%"XDX"), %%"XSI"\n\t" // ["XBX"+"XDX"]
136
"movq (%%"XSI"), %%mm0\n\t"
137
"movq (%%"XSI", %%"XCX"), %%mm1\n\t" // qword ptr["XSI"+"XCX"]
138
"movq %%mm0, %%mm2\n\t"
139
V_PAVGB ("%%mm2", "%%mm1", "%%mm3", _ShiftMask) // halfway between
140
V_PAVGB ("%%mm0", "%%mm2", "%%mm3", _ShiftMask) // 1/4 way
141
V_PAVGB ("%%mm1", "%%mm2", "%%mm3", _ShiftMask) // 3/4 way
142
ADDX" %%"XDX", %%"XDI"\n\t" // last 8 bytes of dest
143
V_MOVNTQ ("%%"XDI"", "%%mm0")
144
V_MOVNTQ ("(%%"XDI", %%"XAX")", "%%mm1") // qword ptr["XDI"+"XAX"], mm1)
147
"movq (%%"XBX"), %%mm0\n\t"
148
// pavgb mm0, qword ptr["XBX"+"XCX"]
149
V_PAVGB ("%%mm0", "(%%"XBX", %%"XCX")", "%%mm2", _ShiftMask) // qword ptr["XBX"+"XCX"], mm2, ShiftMask)
150
MOVX" "_pDest", %%"XDI"\n\t"
151
V_MOVNTQ ("(%%"XDI")", "%%mm0")
153
// simple bob last 8 bytes
154
MOVX" "_Last8", %%"XDX"\n\t"
155
LEAX" (%%"XBX", %%"XDX"), %%"XSI"\n\t" //"XSI", ["XBX"+"XDX"]
156
"movq (%%"XSI"), %%mm0\n\t"
157
// pavgb mm0, qword ptr["XSI"+"XCX"]
158
V_PAVGB ("%%mm0", "(%%"XSI", %%"XCX")", "%%mm2", _ShiftMask) // qword ptr["XSI"+"XCX"], mm2, ShiftMask)
159
V_MOVNTQ ("(%%"XDI", %%"XDX")", "%%mm0") // qword ptr["XDI"+"XDX"], mm0)
161
// now loop and get the middle qwords
162
MOVX" "_pSrc", %%"XSI"\n\t"
163
MOVX" "_pSrcP", %%"XDI"\n\t"
164
MOVX" $8, %%"XDX"\n\t" // curr offset longo all lines
167
MOVX" "_pBobP", %%"XAX"\n\t"
168
ADDX" $8, %%"XDI"\n\t"
169
ADDX" $8, %%"XSI"\n\t"
170
ADDX" $8, %%"XBX"\n\t"
171
ADDX" %%"XDX", %%"XAX"\n\t"
173
#ifdef USE_STRANGE_BOB
174
#include "StrangeBob.inc"
176
#include "WierdBob.inc"
180
// through out most of the rest of this loop we will malongain
181
// mm4 our min bob value
182
// mm5 best weave pixels so far
183
// mm6 our max Bob value
184
// mm7 best weighted pixel ratings so far
186
// We will keep a slight bias to using the weave pixels
187
// from the current location, by rating them by the min distance
188
// from the Bob value instead of the avg distance from that value.
189
// our best and only rating so far
190
"pcmpeqb %%mm7, %%mm7\n\t" // ffff, say we didn't find anything good yet