2
* quarterpel DSP function templates
4
* This file is part of Libav.
6
* Libav is free software; you can redistribute it and/or
7
* modify it under the terms of the GNU Lesser General Public
8
* License as published by the Free Software Foundation; either
9
* version 2.1 of the License, or (at your option) any later version.
11
* Libav is distributed in the hope that it will be useful,
12
* but WITHOUT ANY WARRANTY; without even the implied warranty of
13
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14
* Lesser General Public License for more details.
16
* You should have received a copy of the GNU Lesser General Public
17
* License along with Libav; if not, write to the Free Software
18
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23
* quarterpel DSP function templates
26
#define PIXOP2(OPNAME, OP) \
27
static inline void OPNAME ## _no_rnd_pixels8_l2_8(uint8_t *dst, \
28
const uint8_t *src1, \
29
const uint8_t *src2, \
37
for (i = 0; i < h; i++) { \
39
a = AV_RN32(&src1[i * src_stride1]); \
40
b = AV_RN32(&src2[i * src_stride2]); \
41
OP(*((uint32_t *) &dst[i * dst_stride]), \
42
no_rnd_avg32(a, b)); \
43
a = AV_RN32(&src1[i * src_stride1 + 4]); \
44
b = AV_RN32(&src2[i * src_stride2 + 4]); \
45
OP(*((uint32_t *) &dst[i * dst_stride + 4]), \
46
no_rnd_avg32(a, b)); \
50
static inline void OPNAME ## _no_rnd_pixels16_l2_8(uint8_t *dst, \
51
const uint8_t *src1, \
52
const uint8_t *src2, \
58
OPNAME ## _no_rnd_pixels8_l2_8(dst, src1, src2, dst_stride, \
59
src_stride1, src_stride2, h); \
60
OPNAME ## _no_rnd_pixels8_l2_8(dst + 8, \
63
dst_stride, src_stride1, \
67
static inline void OPNAME ## _pixels8_l4_8(uint8_t *dst, \
68
const uint8_t *src1, \
69
const uint8_t *src2, \
70
const uint8_t *src3, \
71
const uint8_t *src4, \
79
/* FIXME HIGH BIT DEPTH */ \
82
for (i = 0; i < h; i++) { \
83
uint32_t a, b, c, d, l0, l1, h0, h1; \
84
a = AV_RN32(&src1[i * src_stride1]); \
85
b = AV_RN32(&src2[i * src_stride2]); \
86
c = AV_RN32(&src3[i * src_stride3]); \
87
d = AV_RN32(&src4[i * src_stride4]); \
88
l0 = (a & 0x03030303UL) + \
89
(b & 0x03030303UL) + \
91
h0 = ((a & 0xFCFCFCFCUL) >> 2) + \
92
((b & 0xFCFCFCFCUL) >> 2); \
93
l1 = (c & 0x03030303UL) + \
95
h1 = ((c & 0xFCFCFCFCUL) >> 2) + \
96
((d & 0xFCFCFCFCUL) >> 2); \
97
OP(*((uint32_t *) &dst[i * dst_stride]), \
98
h0 + h1 + (((l0 + l1) >> 2) & 0x0F0F0F0FUL)); \
99
a = AV_RN32(&src1[i * src_stride1 + 4]); \
100
b = AV_RN32(&src2[i * src_stride2 + 4]); \
101
c = AV_RN32(&src3[i * src_stride3 + 4]); \
102
d = AV_RN32(&src4[i * src_stride4 + 4]); \
103
l0 = (a & 0x03030303UL) + \
104
(b & 0x03030303UL) + \
106
h0 = ((a & 0xFCFCFCFCUL) >> 2) + \
107
((b & 0xFCFCFCFCUL) >> 2); \
108
l1 = (c & 0x03030303UL) + \
109
(d & 0x03030303UL); \
110
h1 = ((c & 0xFCFCFCFCUL) >> 2) + \
111
((d & 0xFCFCFCFCUL) >> 2); \
112
OP(*((uint32_t *) &dst[i * dst_stride + 4]), \
113
h0 + h1 + (((l0 + l1) >> 2) & 0x0F0F0F0FUL)); \
117
static inline void OPNAME ## _no_rnd_pixels8_l4_8(uint8_t *dst, \
118
const uint8_t *src1, \
119
const uint8_t *src2, \
120
const uint8_t *src3, \
121
const uint8_t *src4, \
129
/* FIXME HIGH BIT DEPTH */ \
132
for (i = 0; i < h; i++) { \
133
uint32_t a, b, c, d, l0, l1, h0, h1; \
134
a = AV_RN32(&src1[i * src_stride1]); \
135
b = AV_RN32(&src2[i * src_stride2]); \
136
c = AV_RN32(&src3[i * src_stride3]); \
137
d = AV_RN32(&src4[i * src_stride4]); \
138
l0 = (a & 0x03030303UL) + \
139
(b & 0x03030303UL) + \
141
h0 = ((a & 0xFCFCFCFCUL) >> 2) + \
142
((b & 0xFCFCFCFCUL) >> 2); \
143
l1 = (c & 0x03030303UL) + \
144
(d & 0x03030303UL); \
145
h1 = ((c & 0xFCFCFCFCUL) >> 2) + \
146
((d & 0xFCFCFCFCUL) >> 2); \
147
OP(*((uint32_t *) &dst[i * dst_stride]), \
148
h0 + h1 + (((l0 + l1) >> 2) & 0x0F0F0F0FUL)); \
149
a = AV_RN32(&src1[i * src_stride1 + 4]); \
150
b = AV_RN32(&src2[i * src_stride2 + 4]); \
151
c = AV_RN32(&src3[i * src_stride3 + 4]); \
152
d = AV_RN32(&src4[i * src_stride4 + 4]); \
153
l0 = (a & 0x03030303UL) + \
154
(b & 0x03030303UL) + \
156
h0 = ((a & 0xFCFCFCFCUL) >> 2) + \
157
((b & 0xFCFCFCFCUL) >> 2); \
158
l1 = (c & 0x03030303UL) + \
159
(d & 0x03030303UL); \
160
h1 = ((c & 0xFCFCFCFCUL) >> 2) + \
161
((d & 0xFCFCFCFCUL) >> 2); \
162
OP(*((uint32_t *) &dst[i * dst_stride + 4]), \
163
h0 + h1 + (((l0 + l1) >> 2) & 0x0F0F0F0FUL)); \
167
static inline void OPNAME ## _pixels16_l4_8(uint8_t *dst, \
168
const uint8_t *src1, \
169
const uint8_t *src2, \
170
const uint8_t *src3, \
171
const uint8_t *src4, \
179
OPNAME ## _pixels8_l4_8(dst, src1, src2, src3, src4, dst_stride, \
180
src_stride1, src_stride2, src_stride3, \
182
OPNAME ## _pixels8_l4_8(dst + 8, \
183
src1 + 8, src2 + 8, \
184
src3 + 8, src4 + 8, \
185
dst_stride, src_stride1, src_stride2, \
186
src_stride3, src_stride4, h); \
189
static inline void OPNAME ## _no_rnd_pixels16_l4_8(uint8_t *dst, \
190
const uint8_t *src1, \
191
const uint8_t *src2, \
192
const uint8_t *src3, \
193
const uint8_t *src4, \
201
OPNAME ## _no_rnd_pixels8_l4_8(dst, src1, src2, src3, src4, \
202
dst_stride, src_stride1, \
203
src_stride2, src_stride3, \
205
OPNAME ## _no_rnd_pixels8_l4_8(dst + 8, \
206
src1 + 8, src2 + 8, \
207
src3 + 8, src4 + 8, \
208
dst_stride, src_stride1, \
209
src_stride2, src_stride3, \
213
#define op_avg(a, b) a = rnd_avg32(a, b)
214
#define op_put(a, b) a = b
215
#define put_no_rnd_pixels8_8_c put_pixels8_8_c