2
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
4
* Use of this source code is governed by a BSD-style license
5
* that can be found in the LICENSE file in the root of the source
6
* tree. An additional intellectual property rights grant can be found
7
* in the file PATENTS. All contributing project authors may
8
* be found in the AUTHORS file in the root of the source tree.
11
#include "vpx_config.h"
12
#include "vp8/common/variance.h"
13
#include "vp8/common/pragmas.h"
14
#include "vpx_ports/mem.h"
15
#include "vp8/common/x86/filter_x86.h"
17
extern void filter_block1d_h6_mmx(const unsigned char *src_ptr, unsigned short *output_ptr, unsigned int src_pixels_per_line, unsigned int pixel_step, unsigned int output_height, unsigned int output_width, short *filter);
18
extern void filter_block1d_v6_mmx(const short *src_ptr, unsigned char *output_ptr, unsigned int pixels_per_line, unsigned int pixel_step, unsigned int output_height, unsigned int output_width, short *filter);
19
extern void filter_block1d8_h6_sse2(const unsigned char *src_ptr, unsigned short *output_ptr, unsigned int src_pixels_per_line, unsigned int pixel_step, unsigned int output_height, unsigned int output_width, short *filter);
20
extern void filter_block1d8_v6_sse2(const short *src_ptr, unsigned char *output_ptr, unsigned int pixels_per_line, unsigned int pixel_step, unsigned int output_height, unsigned int output_width, short *filter);
22
extern void vp8_filter_block2d_bil4x4_var_mmx
24
const unsigned char *ref_ptr,
25
int ref_pixels_per_line,
26
const unsigned char *src_ptr,
27
int src_pixels_per_line,
31
unsigned int *sumsquared
34
extern unsigned int vp8_get4x4var_mmx
36
const unsigned char *src_ptr,
38
const unsigned char *ref_ptr,
44
unsigned int vp8_get_mb_ss_sse2
48
unsigned int vp8_get16x16var_sse2
50
const unsigned char *src_ptr,
52
const unsigned char *ref_ptr,
57
unsigned int vp8_get8x8var_sse2
59
const unsigned char *src_ptr,
61
const unsigned char *ref_ptr,
66
void vp8_filter_block2d_bil_var_sse2
68
const unsigned char *ref_ptr,
69
int ref_pixels_per_line,
70
const unsigned char *src_ptr,
71
int src_pixels_per_line,
76
unsigned int *sumsquared
78
void vp8_half_horiz_vert_variance8x_h_sse2
80
const unsigned char *ref_ptr,
81
int ref_pixels_per_line,
82
const unsigned char *src_ptr,
83
int src_pixels_per_line,
86
unsigned int *sumsquared
88
void vp8_half_horiz_vert_variance16x_h_sse2
90
const unsigned char *ref_ptr,
91
int ref_pixels_per_line,
92
const unsigned char *src_ptr,
93
int src_pixels_per_line,
96
unsigned int *sumsquared
98
void vp8_half_horiz_variance8x_h_sse2
100
const unsigned char *ref_ptr,
101
int ref_pixels_per_line,
102
const unsigned char *src_ptr,
103
int src_pixels_per_line,
106
unsigned int *sumsquared
108
void vp8_half_horiz_variance16x_h_sse2
110
const unsigned char *ref_ptr,
111
int ref_pixels_per_line,
112
const unsigned char *src_ptr,
113
int src_pixels_per_line,
116
unsigned int *sumsquared
118
void vp8_half_vert_variance8x_h_sse2
120
const unsigned char *ref_ptr,
121
int ref_pixels_per_line,
122
const unsigned char *src_ptr,
123
int src_pixels_per_line,
126
unsigned int *sumsquared
128
void vp8_half_vert_variance16x_h_sse2
130
const unsigned char *ref_ptr,
131
int ref_pixels_per_line,
132
const unsigned char *src_ptr,
133
int src_pixels_per_line,
136
unsigned int *sumsquared
139
unsigned int vp8_variance4x4_wmt(
140
const unsigned char *src_ptr,
142
const unsigned char *ref_ptr,
149
vp8_get4x4var_mmx(src_ptr, source_stride, ref_ptr, recon_stride, &var, &avg) ;
151
return (var - (((unsigned int)avg * avg) >> 4));
155
unsigned int vp8_variance8x8_wmt
157
const unsigned char *src_ptr,
159
const unsigned char *ref_ptr,
166
vp8_get8x8var_sse2(src_ptr, source_stride, ref_ptr, recon_stride, &var, &avg) ;
168
return (var - (((unsigned int)avg * avg) >> 6));
173
unsigned int vp8_variance16x16_wmt
175
const unsigned char *src_ptr,
177
const unsigned char *ref_ptr,
185
vp8_get16x16var_sse2(src_ptr, source_stride, ref_ptr, recon_stride, &sse0, &sum0) ;
187
return (sse0 - (((unsigned int)sum0 * sum0) >> 8));
189
unsigned int vp8_mse16x16_wmt(
190
const unsigned char *src_ptr,
192
const unsigned char *ref_ptr,
199
vp8_get16x16var_sse2(src_ptr, source_stride, ref_ptr, recon_stride, &sse0, &sum0) ;
206
unsigned int vp8_variance16x8_wmt
208
const unsigned char *src_ptr,
210
const unsigned char *ref_ptr,
214
unsigned int sse0, sse1, var;
217
vp8_get8x8var_sse2(src_ptr, source_stride, ref_ptr, recon_stride, &sse0, &sum0) ;
218
vp8_get8x8var_sse2(src_ptr + 8, source_stride, ref_ptr + 8, recon_stride, &sse1, &sum1);
223
return (var - (((unsigned int)avg * avg) >> 7));
227
unsigned int vp8_variance8x16_wmt
229
const unsigned char *src_ptr,
231
const unsigned char *ref_ptr,
235
unsigned int sse0, sse1, var;
238
vp8_get8x8var_sse2(src_ptr, source_stride, ref_ptr, recon_stride, &sse0, &sum0) ;
239
vp8_get8x8var_sse2(src_ptr + 8 * source_stride, source_stride, ref_ptr + 8 * recon_stride, recon_stride, &sse1, &sum1) ;
244
return (var - (((unsigned int)avg * avg) >> 7));
248
unsigned int vp8_sub_pixel_variance4x4_wmt
250
const unsigned char *src_ptr,
251
int src_pixels_per_line,
254
const unsigned char *dst_ptr,
255
int dst_pixels_per_line,
261
vp8_filter_block2d_bil4x4_var_mmx(
262
src_ptr, src_pixels_per_line,
263
dst_ptr, dst_pixels_per_line,
264
vp8_bilinear_filters_x86_4[xoffset], vp8_bilinear_filters_x86_4[yoffset],
268
return (xxsum - (((unsigned int)xsum * xsum) >> 4));
272
unsigned int vp8_sub_pixel_variance8x8_wmt
274
const unsigned char *src_ptr,
275
int src_pixels_per_line,
278
const unsigned char *dst_ptr,
279
int dst_pixels_per_line,
286
if (xoffset == 4 && yoffset == 0)
288
vp8_half_horiz_variance8x_h_sse2(
289
src_ptr, src_pixels_per_line,
290
dst_ptr, dst_pixels_per_line, 8,
293
else if (xoffset == 0 && yoffset == 4)
295
vp8_half_vert_variance8x_h_sse2(
296
src_ptr, src_pixels_per_line,
297
dst_ptr, dst_pixels_per_line, 8,
300
else if (xoffset == 4 && yoffset == 4)
302
vp8_half_horiz_vert_variance8x_h_sse2(
303
src_ptr, src_pixels_per_line,
304
dst_ptr, dst_pixels_per_line, 8,
309
vp8_filter_block2d_bil_var_sse2(
310
src_ptr, src_pixels_per_line,
311
dst_ptr, dst_pixels_per_line, 8,
317
return (xxsum - (((unsigned int)xsum * xsum) >> 6));
320
unsigned int vp8_sub_pixel_variance16x16_wmt
322
const unsigned char *src_ptr,
323
int src_pixels_per_line,
326
const unsigned char *dst_ptr,
327
int dst_pixels_per_line,
332
unsigned int xxsum0, xxsum1;
335
/* note we could avoid these if statements if the calling function
336
* just called the appropriate functions inside.
338
if (xoffset == 4 && yoffset == 0)
340
vp8_half_horiz_variance16x_h_sse2(
341
src_ptr, src_pixels_per_line,
342
dst_ptr, dst_pixels_per_line, 16,
345
else if (xoffset == 0 && yoffset == 4)
347
vp8_half_vert_variance16x_h_sse2(
348
src_ptr, src_pixels_per_line,
349
dst_ptr, dst_pixels_per_line, 16,
352
else if (xoffset == 4 && yoffset == 4)
354
vp8_half_horiz_vert_variance16x_h_sse2(
355
src_ptr, src_pixels_per_line,
356
dst_ptr, dst_pixels_per_line, 16,
361
vp8_filter_block2d_bil_var_sse2(
362
src_ptr, src_pixels_per_line,
363
dst_ptr, dst_pixels_per_line, 16,
368
vp8_filter_block2d_bil_var_sse2(
369
src_ptr + 8, src_pixels_per_line,
370
dst_ptr + 8, dst_pixels_per_line, 16,
379
return (xxsum0 - (((unsigned int)xsum0 * xsum0) >> 8));
382
unsigned int vp8_sub_pixel_mse16x16_wmt(
383
const unsigned char *src_ptr,
384
int src_pixels_per_line,
387
const unsigned char *dst_ptr,
388
int dst_pixels_per_line,
392
vp8_sub_pixel_variance16x16_wmt(src_ptr, src_pixels_per_line, xoffset, yoffset, dst_ptr, dst_pixels_per_line, sse);
396
unsigned int vp8_sub_pixel_variance16x8_wmt
398
const unsigned char *src_ptr,
399
int src_pixels_per_line,
402
const unsigned char *dst_ptr,
403
int dst_pixels_per_line,
409
unsigned int xxsum0, xxsum1;
411
if (xoffset == 4 && yoffset == 0)
413
vp8_half_horiz_variance16x_h_sse2(
414
src_ptr, src_pixels_per_line,
415
dst_ptr, dst_pixels_per_line, 8,
418
else if (xoffset == 0 && yoffset == 4)
420
vp8_half_vert_variance16x_h_sse2(
421
src_ptr, src_pixels_per_line,
422
dst_ptr, dst_pixels_per_line, 8,
425
else if (xoffset == 4 && yoffset == 4)
427
vp8_half_horiz_vert_variance16x_h_sse2(
428
src_ptr, src_pixels_per_line,
429
dst_ptr, dst_pixels_per_line, 8,
434
vp8_filter_block2d_bil_var_sse2(
435
src_ptr, src_pixels_per_line,
436
dst_ptr, dst_pixels_per_line, 8,
440
vp8_filter_block2d_bil_var_sse2(
441
src_ptr + 8, src_pixels_per_line,
442
dst_ptr + 8, dst_pixels_per_line, 8,
450
return (xxsum0 - (((unsigned int)xsum0 * xsum0) >> 7));
453
unsigned int vp8_sub_pixel_variance8x16_wmt
455
const unsigned char *src_ptr,
456
int src_pixels_per_line,
459
const unsigned char *dst_ptr,
460
int dst_pixels_per_line,
467
if (xoffset == 4 && yoffset == 0)
469
vp8_half_horiz_variance8x_h_sse2(
470
src_ptr, src_pixels_per_line,
471
dst_ptr, dst_pixels_per_line, 16,
474
else if (xoffset == 0 && yoffset == 4)
476
vp8_half_vert_variance8x_h_sse2(
477
src_ptr, src_pixels_per_line,
478
dst_ptr, dst_pixels_per_line, 16,
481
else if (xoffset == 4 && yoffset == 4)
483
vp8_half_horiz_vert_variance8x_h_sse2(
484
src_ptr, src_pixels_per_line,
485
dst_ptr, dst_pixels_per_line, 16,
490
vp8_filter_block2d_bil_var_sse2(
491
src_ptr, src_pixels_per_line,
492
dst_ptr, dst_pixels_per_line, 16,
498
return (xxsum - (((unsigned int)xsum * xsum) >> 7));
502
unsigned int vp8_variance_halfpixvar16x16_h_wmt(
503
const unsigned char *src_ptr,
504
int src_pixels_per_line,
505
const unsigned char *dst_ptr,
506
int dst_pixels_per_line,
512
vp8_half_horiz_variance16x_h_sse2(
513
src_ptr, src_pixels_per_line,
514
dst_ptr, dst_pixels_per_line, 16,
518
return (xxsum0 - (((unsigned int)xsum0 * xsum0) >> 8));
522
unsigned int vp8_variance_halfpixvar16x16_v_wmt(
523
const unsigned char *src_ptr,
524
int src_pixels_per_line,
525
const unsigned char *dst_ptr,
526
int dst_pixels_per_line,
531
vp8_half_vert_variance16x_h_sse2(
532
src_ptr, src_pixels_per_line,
533
dst_ptr, dst_pixels_per_line, 16,
537
return (xxsum0 - (((unsigned int)xsum0 * xsum0) >> 8));
541
unsigned int vp8_variance_halfpixvar16x16_hv_wmt(
542
const unsigned char *src_ptr,
543
int src_pixels_per_line,
544
const unsigned char *dst_ptr,
545
int dst_pixels_per_line,
551
vp8_half_horiz_vert_variance16x_h_sse2(
552
src_ptr, src_pixels_per_line,
553
dst_ptr, dst_pixels_per_line, 16,
557
return (xxsum0 - (((unsigned int)xsum0 * xsum0) >> 8));