2
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
4
* Use of this source code is governed by a BSD-style license
5
* that can be found in the LICENSE file in the root of the source
6
* tree. An additional intellectual property rights grant can be found
7
* in the file PATENTS. All contributing project authors may
8
* be found in the AUTHORS file in the root of the source tree.
14
DECLARE_ALIGNED(16, const short, vp8_bilinear_filters[8][2]) =
26
DECLARE_ALIGNED(16, const short, vp8_sub_pel_filters[8][6]) =
29
{ 0, 0, 128, 0, 0, 0 }, /* note that 1/8 pel positions are just as per alpha -0.5 bicubic */
30
{ 0, -6, 123, 12, -1, 0 },
31
{ 2, -11, 108, 36, -8, 1 }, /* New 1/4 pel 6 tap filter */
32
{ 0, -9, 93, 50, -6, 0 },
33
{ 3, -16, 77, 77, -16, 3 }, /* New 1/2 pel 6 tap filter */
34
{ 0, -6, 50, 93, -9, 0 },
35
{ 1, -8, 36, 108, -11, 2 }, /* New 1/4 pel 6 tap filter */
36
{ 0, -1, 12, 123, -6, 0 },
39
static void filter_block2d_first_pass
41
unsigned char *src_ptr,
43
unsigned int src_pixels_per_line,
44
unsigned int pixel_step,
45
unsigned int output_height,
46
unsigned int output_width,
47
const short *vp8_filter
53
for (i = 0; i < output_height; i++)
55
for (j = 0; j < output_width; j++)
57
Temp = ((int)src_ptr[-2 * (int)pixel_step] * vp8_filter[0]) +
58
((int)src_ptr[-1 * (int)pixel_step] * vp8_filter[1]) +
59
((int)src_ptr[0] * vp8_filter[2]) +
60
((int)src_ptr[pixel_step] * vp8_filter[3]) +
61
((int)src_ptr[2*pixel_step] * vp8_filter[4]) +
62
((int)src_ptr[3*pixel_step] * vp8_filter[5]) +
63
(VP8_FILTER_WEIGHT >> 1); /* Rounding */
65
/* Normalize back to 0-255 */
66
Temp = Temp >> VP8_FILTER_SHIFT;
78
src_ptr += src_pixels_per_line - output_width;
79
output_ptr += output_width;
83
static void filter_block2d_second_pass
86
unsigned char *output_ptr,
88
unsigned int src_pixels_per_line,
89
unsigned int pixel_step,
90
unsigned int output_height,
91
unsigned int output_width,
92
const short *vp8_filter
98
for (i = 0; i < output_height; i++)
100
for (j = 0; j < output_width; j++)
103
Temp = ((int)src_ptr[-2 * (int)pixel_step] * vp8_filter[0]) +
104
((int)src_ptr[-1 * (int)pixel_step] * vp8_filter[1]) +
105
((int)src_ptr[0] * vp8_filter[2]) +
106
((int)src_ptr[pixel_step] * vp8_filter[3]) +
107
((int)src_ptr[2*pixel_step] * vp8_filter[4]) +
108
((int)src_ptr[3*pixel_step] * vp8_filter[5]) +
109
(VP8_FILTER_WEIGHT >> 1); /* Rounding */
111
/* Normalize back to 0-255 */
112
Temp = Temp >> VP8_FILTER_SHIFT;
119
output_ptr[j] = (unsigned char)Temp;
124
src_ptr += src_pixels_per_line - output_width;
125
output_ptr += output_pitch;
130
static void filter_block2d
132
unsigned char *src_ptr,
133
unsigned char *output_ptr,
134
unsigned int src_pixels_per_line,
136
const short *HFilter,
140
int FData[9*4]; /* Temp data buffer used in filtering */
142
/* First filter 1-D horizontally... */
143
filter_block2d_first_pass(src_ptr - (2 * src_pixels_per_line), FData, src_pixels_per_line, 1, 9, 4, HFilter);
145
/* then filter verticaly... */
146
filter_block2d_second_pass(FData + 8, output_ptr, output_pitch, 4, 4, 4, 4, VFilter);
150
void vp8_sixtap_predict4x4_c
152
unsigned char *src_ptr,
153
int src_pixels_per_line,
156
unsigned char *dst_ptr,
160
const short *HFilter;
161
const short *VFilter;
163
HFilter = vp8_sub_pel_filters[xoffset]; /* 6 tap */
164
VFilter = vp8_sub_pel_filters[yoffset]; /* 6 tap */
166
filter_block2d(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter);
168
void vp8_sixtap_predict8x8_c
170
unsigned char *src_ptr,
171
int src_pixels_per_line,
174
unsigned char *dst_ptr,
178
const short *HFilter;
179
const short *VFilter;
180
int FData[13*16]; /* Temp data buffer used in filtering */
182
HFilter = vp8_sub_pel_filters[xoffset]; /* 6 tap */
183
VFilter = vp8_sub_pel_filters[yoffset]; /* 6 tap */
185
/* First filter 1-D horizontally... */
186
filter_block2d_first_pass(src_ptr - (2 * src_pixels_per_line), FData, src_pixels_per_line, 1, 13, 8, HFilter);
189
/* then filter verticaly... */
190
filter_block2d_second_pass(FData + 16, dst_ptr, dst_pitch, 8, 8, 8, 8, VFilter);
194
void vp8_sixtap_predict8x4_c
196
unsigned char *src_ptr,
197
int src_pixels_per_line,
200
unsigned char *dst_ptr,
204
const short *HFilter;
205
const short *VFilter;
206
int FData[13*16]; /* Temp data buffer used in filtering */
208
HFilter = vp8_sub_pel_filters[xoffset]; /* 6 tap */
209
VFilter = vp8_sub_pel_filters[yoffset]; /* 6 tap */
211
/* First filter 1-D horizontally... */
212
filter_block2d_first_pass(src_ptr - (2 * src_pixels_per_line), FData, src_pixels_per_line, 1, 9, 8, HFilter);
215
/* then filter verticaly... */
216
filter_block2d_second_pass(FData + 16, dst_ptr, dst_pitch, 8, 8, 4, 8, VFilter);
220
void vp8_sixtap_predict16x16_c
222
unsigned char *src_ptr,
223
int src_pixels_per_line,
226
unsigned char *dst_ptr,
230
const short *HFilter;
231
const short *VFilter;
232
int FData[21*24]; /* Temp data buffer used in filtering */
235
HFilter = vp8_sub_pel_filters[xoffset]; /* 6 tap */
236
VFilter = vp8_sub_pel_filters[yoffset]; /* 6 tap */
238
/* First filter 1-D horizontally... */
239
filter_block2d_first_pass(src_ptr - (2 * src_pixels_per_line), FData, src_pixels_per_line, 1, 21, 16, HFilter);
241
/* then filter verticaly... */
242
filter_block2d_second_pass(FData + 32, dst_ptr, dst_pitch, 16, 16, 16, 16, VFilter);
247
/****************************************************************************
249
* ROUTINE : filter_block2d_bil_first_pass
251
* INPUTS : UINT8 *src_ptr : Pointer to source block.
252
* UINT32 src_stride : Stride of source block.
253
* UINT32 height : Block height.
254
* UINT32 width : Block width.
255
* INT32 *vp8_filter : Array of 2 bi-linear filter taps.
257
* OUTPUTS : INT32 *dst_ptr : Pointer to filtered block.
261
* FUNCTION : Applies a 1-D 2-tap bi-linear filter to the source block
262
* in the horizontal direction to produce the filtered output
263
* block. Used to implement first-pass of 2-D separable filter.
265
* SPECIAL NOTES : Produces INT32 output to retain precision for next pass.
266
* Two filter taps should sum to VP8_FILTER_WEIGHT.
268
****************************************************************************/
269
static void filter_block2d_bil_first_pass
271
unsigned char *src_ptr,
272
unsigned short *dst_ptr,
273
unsigned int src_stride,
276
const short *vp8_filter
281
for (i = 0; i < height; i++)
283
for (j = 0; j < width; j++)
285
/* Apply bilinear filter */
286
dst_ptr[j] = (((int)src_ptr[0] * vp8_filter[0]) +
287
((int)src_ptr[1] * vp8_filter[1]) +
288
(VP8_FILTER_WEIGHT / 2)) >> VP8_FILTER_SHIFT;
293
src_ptr += src_stride - width;
298
/****************************************************************************
300
* ROUTINE : filter_block2d_bil_second_pass
302
* INPUTS : INT32 *src_ptr : Pointer to source block.
303
* UINT32 dst_pitch : Destination block pitch.
304
* UINT32 height : Block height.
305
* UINT32 width : Block width.
306
* INT32 *vp8_filter : Array of 2 bi-linear filter taps.
308
* OUTPUTS : UINT16 *dst_ptr : Pointer to filtered block.
312
* FUNCTION : Applies a 1-D 2-tap bi-linear filter to the source block
313
* in the vertical direction to produce the filtered output
314
* block. Used to implement second-pass of 2-D separable filter.
316
* SPECIAL NOTES : Requires 32-bit input as produced by filter_block2d_bil_first_pass.
317
* Two filter taps should sum to VP8_FILTER_WEIGHT.
319
****************************************************************************/
320
static void filter_block2d_bil_second_pass
322
unsigned short *src_ptr,
323
unsigned char *dst_ptr,
327
const short *vp8_filter
333
for (i = 0; i < height; i++)
335
for (j = 0; j < width; j++)
338
Temp = ((int)src_ptr[0] * vp8_filter[0]) +
339
((int)src_ptr[width] * vp8_filter[1]) +
340
(VP8_FILTER_WEIGHT / 2);
341
dst_ptr[j] = (unsigned int)(Temp >> VP8_FILTER_SHIFT);
346
dst_ptr += dst_pitch;
351
/****************************************************************************
353
* ROUTINE : filter_block2d_bil
355
* INPUTS : UINT8 *src_ptr : Pointer to source block.
356
* UINT32 src_pitch : Stride of source block.
357
* UINT32 dst_pitch : Stride of destination block.
358
* INT32 *HFilter : Array of 2 horizontal filter taps.
359
* INT32 *VFilter : Array of 2 vertical filter taps.
360
* INT32 Width : Block width
361
* INT32 Height : Block height
363
* OUTPUTS : UINT16 *dst_ptr : Pointer to filtered block.
367
* FUNCTION : 2-D filters an input block by applying a 2-tap
368
* bi-linear filter horizontally followed by a 2-tap
369
* bi-linear filter vertically on the result.
371
* SPECIAL NOTES : The largest block size can be handled here is 16x16
373
****************************************************************************/
374
static void filter_block2d_bil
376
unsigned char *src_ptr,
377
unsigned char *dst_ptr,
378
unsigned int src_pitch,
379
unsigned int dst_pitch,
380
const short *HFilter,
381
const short *VFilter,
387
unsigned short FData[17*16]; /* Temp data buffer used in filtering */
389
/* First filter 1-D horizontally... */
390
filter_block2d_bil_first_pass(src_ptr, FData, src_pitch, Height + 1, Width, HFilter);
392
/* then 1-D vertically... */
393
filter_block2d_bil_second_pass(FData, dst_ptr, dst_pitch, Height, Width, VFilter);
397
void vp8_bilinear_predict4x4_c
399
unsigned char *src_ptr,
400
int src_pixels_per_line,
403
unsigned char *dst_ptr,
407
const short *HFilter;
408
const short *VFilter;
410
HFilter = vp8_bilinear_filters[xoffset];
411
VFilter = vp8_bilinear_filters[yoffset];
415
unsigned char temp1[16];
416
unsigned char temp2[16];
418
bilinear_predict4x4_mmx(src_ptr, src_pixels_per_line, xoffset, yoffset, temp1, 4);
419
filter_block2d_bil(src_ptr, temp2, src_pixels_per_line, 4, HFilter, VFilter, 4, 4);
421
for (i = 0; i < 16; i++)
423
if (temp1[i] != temp2[i])
425
bilinear_predict4x4_mmx(src_ptr, src_pixels_per_line, xoffset, yoffset, temp1, 4);
426
filter_block2d_bil(src_ptr, temp2, src_pixels_per_line, 4, HFilter, VFilter, 4, 4);
431
filter_block2d_bil(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter, 4, 4);
435
void vp8_bilinear_predict8x8_c
437
unsigned char *src_ptr,
438
int src_pixels_per_line,
441
unsigned char *dst_ptr,
445
const short *HFilter;
446
const short *VFilter;
448
HFilter = vp8_bilinear_filters[xoffset];
449
VFilter = vp8_bilinear_filters[yoffset];
451
filter_block2d_bil(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter, 8, 8);
455
void vp8_bilinear_predict8x4_c
457
unsigned char *src_ptr,
458
int src_pixels_per_line,
461
unsigned char *dst_ptr,
465
const short *HFilter;
466
const short *VFilter;
468
HFilter = vp8_bilinear_filters[xoffset];
469
VFilter = vp8_bilinear_filters[yoffset];
471
filter_block2d_bil(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter, 8, 4);
475
void vp8_bilinear_predict16x16_c
477
unsigned char *src_ptr,
478
int src_pixels_per_line,
481
unsigned char *dst_ptr,
485
const short *HFilter;
486
const short *VFilter;
488
HFilter = vp8_bilinear_filters[xoffset];
489
VFilter = vp8_bilinear_filters[yoffset];
491
filter_block2d_bil(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter, 16, 16);