10
#define USE_FOR_DSCALER
12
#define MyMemCopy pMyMemcpy
14
// Define a few macros for CPU dependent instructions.
15
// I suspect I don't really understand how the C macro preprocessor works but
16
// this seems to get the job done. // TRB 7/01
18
// BEFORE USING THESE YOU MUST SET:
20
// #define SSE_TYPE SSE (or MMX or 3DNOW)
22
// some macros for pavgb instruction
23
// V_PAVGB(mmr1, mmr2, mmr work register, smask) mmr2 may = mmrw if you can trash it
25
#define V_PAVGB_MMX(mmr1, mmr2, mmrw, smask) \
26
"movq "mmr2", "mmrw"\n\t" \
27
"pand "smask", "mmrw"\n\t" \
28
"psrlw $1, "mmrw"\n\t" \
29
"pand "smask", "mmr1"\n\t" \
30
"psrlw $1, "mmr1"\n\t" \
31
"paddusb "mmrw", "mmr1"\n\t"
32
#define V_PAVGB_SSE(mmr1, mmr2, mmrw, smask) "pavgb "mmr2", "mmr1"\n\t"
33
#define V_PAVGB_3DNOW(mmr1, mmr2, mmrw, smask) "pavgusb "mmr2", "mmr1"\n\t"
34
#define V_PAVGB(mmr1, mmr2, mmrw, smask) V_PAVGB2(mmr1, mmr2, mmrw, smask, SSE_TYPE)
35
#define V_PAVGB2(mmr1, mmr2, mmrw, smask, ssetyp) V_PAVGB3(mmr1, mmr2, mmrw, smask, ssetyp)
36
#define V_PAVGB3(mmr1, mmr2, mmrw, smask, ssetyp) V_PAVGB_##ssetyp(mmr1, mmr2, mmrw, smask)
38
// some macros for pmaxub instruction
39
#define V_PMAXUB_MMX(mmr1, mmr2) \
40
"psubusb "mmr2", "mmr1"\n\t" \
41
"paddusb "mmr2", "mmr1"\n\t"
42
#define V_PMAXUB_SSE(mmr1, mmr2) "pmaxub "mmr2", "mmr1"\n\t"
43
#define V_PMAXUB_3DNOW(mmr1, mmr2) V_PMAXUB_MMX(mmr1, mmr2) // use MMX version
44
#define V_PMAXUB(mmr1, mmr2) V_PMAXUB2(mmr1, mmr2, SSE_TYPE)
45
#define V_PMAXUB2(mmr1, mmr2, ssetyp) V_PMAXUB3(mmr1, mmr2, ssetyp)
46
#define V_PMAXUB3(mmr1, mmr2, ssetyp) V_PMAXUB_##ssetyp(mmr1, mmr2)
48
// some macros for pminub instruction
49
// V_PMINUB(mmr1, mmr2, mmr work register) mmr2 may NOT = mmrw
50
#define V_PMINUB_MMX(mmr1, mmr2, mmrw) \
51
"pcmpeqb "mmrw", "mmrw"\n\t" \
52
"psubusb "mmr2", "mmrw"\n\t" \
53
"paddusb "mmrw", "mmr1"\n\t" \
54
"psubusb "mmrw", "mmr1"\n\t"
55
#define V_PMINUB_SSE(mmr1, mmr2, mmrw) "pminub "mmr2", "mmr1"\n\t"
56
#define V_PMINUB_3DNOW(mmr1, mmr2, mmrw) V_PMINUB_MMX(mmr1, mmr2, mmrw) // use MMX version
57
#define V_PMINUB(mmr1, mmr2, mmrw) V_PMINUB2(mmr1, mmr2, mmrw, SSE_TYPE)
58
#define V_PMINUB2(mmr1, mmr2, mmrw, ssetyp) V_PMINUB3(mmr1, mmr2, mmrw, ssetyp)
59
#define V_PMINUB3(mmr1, mmr2, mmrw, ssetyp) V_PMINUB_##ssetyp(mmr1, mmr2, mmrw)
61
// some macros for movntq instruction
62
// V_MOVNTQ(mmr1, mmr2)
63
#define V_MOVNTQ_MMX(mmr1, mmr2) "movq "mmr2", "mmr1"\n\t"
64
#define V_MOVNTQ_3DNOW(mmr1, mmr2) "movq "mmr2", "mmr1"\n\t"
65
#define V_MOVNTQ_SSE(mmr1, mmr2) "movntq "mmr2", "mmr1"\n\t"
66
#define V_MOVNTQ(mmr1, mmr2) V_MOVNTQ2(mmr1, mmr2, SSE_TYPE)
67
#define V_MOVNTQ2(mmr1, mmr2, ssetyp) V_MOVNTQ3(mmr1, mmr2, ssetyp)
68
#define V_MOVNTQ3(mmr1, mmr2, ssetyp) V_MOVNTQ_##ssetyp(mmr1, mmr2)
74
#define MERGE4PIXavg(PADDR1, PADDR2) \
75
"movdqu "PADDR1", %%xmm0\n\t" /* our 4 pixels */ \
76
"movdqu "PADDR2", %%xmm1\n\t" /* our pixel2 value */ \
77
"movdqa %%xmm0, %%xmm2\n\t" /* another copy of our pixel1 value */ \
78
"movdqa %%xmm1, %%xmm3\n\t" /* another copy of our pixel1 value */ \
79
"psubusb %%xmm1, %%xmm2\n\t" \
80
"psubusb %%xmm0, %%xmm3\n\t" \
81
"por %%xmm3, %%xmm2\n\t" \
82
"pavgb %%xmm1, %%xmm0\n\t" /* avg of 2 pixels */ \
83
"movdqa %%xmm2, %%xmm3\n\t" /* another copy of our our weights */ \
84
"pxor %%xmm1, %%xmm1\n\t" \
85
"psubusb %%xmm7, %%xmm3\n\t" /* nonzero where old weights lower, else 0 */ \
86
"pcmpeqb %%xmm1, %%xmm3\n\t" /* now ff where new better, else 00 */ \
87
"pcmpeqb %%xmm3, %%xmm1\n\t" /* here ff where old better, else 00 */ \
88
"pand %%xmm3, %%xmm0\n\t" /* keep only better new pixels */ \
89
"pand %%xmm3, %%xmm2\n\t" /* and weights */ \
90
"pand %%xmm1, %%xmm5\n\t" /* keep only better old pixels */ \
91
"pand %%xmm1, %%xmm7\n\t" \
92
"por %%xmm0, %%xmm5\n\t" /* and merge new & old vals */ \
93
"por %%xmm2, %%xmm7\n\t"
95
#define MERGE4PIXavgH(PADDR1A, PADDR1B, PADDR2A, PADDR2B) \
96
"movdqu "PADDR1A", %%xmm0\n\t" /* our 4 pixels */ \
97
"movdqu "PADDR2A", %%xmm1\n\t" /* our pixel2 value */ \
98
"movdqu "PADDR1B", %%xmm2\n\t" /* our 4 pixels */ \
99
"movdqu "PADDR2B", %%xmm3\n\t" /* our pixel2 value */ \
100
"pavgb %%xmm2, %%xmm0\n\t" \
101
"pavgb %%xmm3, %%xmm1\n\t" \
102
"movdqa %%xmm0, %%xmm2\n\t" /* another copy of our pixel1 value */ \
103
"movdqa %%xmm1, %%xmm3\n\t" /* another copy of our pixel1 value */ \
104
"psubusb %%xmm1, %%xmm2\n\t" \
105
"psubusb %%xmm0, %%xmm3\n\t" \
106
"por %%xmm3, %%xmm2\n\t" \
107
"pavgb %%xmm1, %%xmm0\n\t" /* avg of 2 pixels */ \
108
"movdqa %%xmm2, %%xmm3\n\t" /* another copy of our our weights */ \
109
"pxor %%xmm1, %%xmm1\n\t" \
110
"psubusb %%xmm7, %%xmm3\n\t" /* nonzero where old weights lower, else 0 */ \
111
"pcmpeqb %%xmm1, %%xmm3\n\t" /* now ff where new better, else 00 */ \
112
"pcmpeqb %%xmm3, %%xmm1\n\t" /* here ff where old better, else 00 */ \
113
"pand %%xmm3, %%xmm0\n\t" /* keep only better new pixels */ \
114
"pand %%xmm3, %%xmm2\n\t" /* and weights */ \
115
"pand %%xmm1, %%xmm5\n\t" /* keep only better old pixels */ \
116
"pand %%xmm1, %%xmm7\n\t" \
117
"por %%xmm0, %%xmm5\n\t" /* and merge new & old vals */ \
118
"por %%xmm2, %%xmm7\n\t"
120
#define RESET_CHROMA "por "_UVMask", %%xmm7\n\t"
122
#else // ifdef IS_SSE2
124
#define MERGE4PIXavg(PADDR1, PADDR2) \
125
"movq "PADDR1", %%mm0\n\t" /* our 4 pixels */ \
126
"movq "PADDR2", %%mm1\n\t" /* our pixel2 value */ \
127
"movq %%mm0, %%mm2\n\t" /* another copy of our pixel1 value */ \
128
"movq %%mm1, %%mm3\n\t" /* another copy of our pixel1 value */ \
129
"psubusb %%mm1, %%mm2\n\t" \
130
"psubusb %%mm0, %%mm3\n\t" \
131
"por %%mm3, %%mm2\n\t" \
132
V_PAVGB ("%%mm0", "%%mm1", "%%mm3", _ShiftMask) /* avg of 2 pixels */ \
133
"movq %%mm2, %%mm3\n\t" /* another copy of our our weights */ \
134
"pxor %%mm1, %%mm1\n\t" \
135
"psubusb %%mm7, %%mm3\n\t" /* nonzero where old weights lower, else 0 */ \
136
"pcmpeqb %%mm1, %%mm3\n\t" /* now ff where new better, else 00 */ \
137
"pcmpeqb %%mm3, %%mm1\n\t" /* here ff where old better, else 00 */ \
138
"pand %%mm3, %%mm0\n\t" /* keep only better new pixels */ \
139
"pand %%mm3, %%mm2\n\t" /* and weights */ \
140
"pand %%mm1, %%mm5\n\t" /* keep only better old pixels */ \
141
"pand %%mm1, %%mm7\n\t" \
142
"por %%mm0, %%mm5\n\t" /* and merge new & old vals */ \
143
"por %%mm2, %%mm7\n\t"
145
#define MERGE4PIXavgH(PADDR1A, PADDR1B, PADDR2A, PADDR2B) \
146
"movq "PADDR1A", %%mm0\n\t" /* our 4 pixels */ \
147
"movq "PADDR2A", %%mm1\n\t" /* our pixel2 value */ \
148
"movq "PADDR1B", %%mm2\n\t" /* our 4 pixels */ \
149
"movq "PADDR2B", %%mm3\n\t" /* our pixel2 value */ \
150
V_PAVGB("%%mm0", "%%mm2", "%%mm2", _ShiftMask) \
151
V_PAVGB("%%mm1", "%%mm3", "%%mm3", _ShiftMask) \
152
"movq %%mm0, %%mm2\n\t" /* another copy of our pixel1 value */ \
153
"movq %%mm1, %%mm3\n\t" /* another copy of our pixel1 value */ \
154
"psubusb %%mm1, %%mm2\n\t" \
155
"psubusb %%mm0, %%mm3\n\t" \
156
"por %%mm3, %%mm2\n\t" \
157
V_PAVGB("%%mm0", "%%mm1", "%%mm3", _ShiftMask) /* avg of 2 pixels */ \
158
"movq %%mm2, %%mm3\n\t" /* another copy of our our weights */ \
159
"pxor %%mm1, %%mm1\n\t" \
160
"psubusb %%mm7, %%mm3\n\t" /* nonzero where old weights lower, else 0 */ \
161
"pcmpeqb %%mm1, %%mm3\n\t" /* now ff where new better, else 00 */ \
162
"pcmpeqb %%mm3, %%mm1\n\t" /* here ff where old better, else 00 */ \
163
"pand %%mm3, %%mm0\n\t" /* keep only better new pixels */ \
164
"pand %%mm3, %%mm2\n\t" /* and weights */ \
165
"pand %%mm1, %%mm5\n\t" /* keep only better old pixels */ \
166
"pand %%mm1, %%mm7\n\t" \
167
"por %%mm0, %%mm5\n\t" /* and merge new & old vals */ \
168
"por %%mm2, %%mm7\n\t"
170
#define RESET_CHROMA "por "_UVMask", %%mm7\n\t"