1
;******************************************************************************
2
;* MMX/SSE2-optimized functions for the VP6 decoder
3
;* Copyright (C) 2009 Sebastien Lucas <sebastien.lucas@gmail.com>
4
;* Copyright (C) 2009 Zuxy Meng <zuxy.meng@gmail.com>
6
;* This file is part of Libav.
8
;* Libav is free software; you can redistribute it and/or
9
;* modify it under the terms of the GNU Lesser General Public
10
;* License as published by the Free Software Foundation; either
11
;* version 2.1 of the License, or (at your option) any later version.
13
;* Libav is distributed in the hope that it will be useful,
14
;* but WITHOUT ANY WARRANTY; without even the implied warranty of
15
;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16
;* Lesser General Public License for more details.
18
;* You should have received a copy of the GNU Lesser General Public
19
;* License along with Libav; if not, write to the Free Software
20
;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21
;******************************************************************************
23
%include "libavutil/x86/x86util.asm"
39
pmullw m0, [rsp+8*11] ; src[x-8 ] * biweight [0]
40
pmullw m1, [rsp+8*12] ; src[x ] * biweight [1]
41
pmullw m3, [rsp+8*11] ; src[x-8 ] * biweight [0]
42
pmullw m4, [rsp+8*12] ; src[x ] * biweight [1]
53
pmullw m1, [rsp+8*13] ; src[x+8 ] * biweight [2]
54
pmullw m2, [rsp+8*14] ; src[x+16] * biweight [3]
55
pmullw m4, [rsp+8*13] ; src[x+8 ] * biweight [2]
56
pmullw m5, [rsp+8*14] ; src[x+16] * biweight [3]
61
paddsw m0, m6 ; Add 64
62
paddsw m3, m6 ; Add 64
72
pmullw m0, m4 ; src[x-8 ] * biweight [0]
73
pmullw m1, m5 ; src[x ] * biweight [1]
79
pmullw m1, m6 ; src[x+8 ] * biweight [2]
80
pmullw m2, m3 ; src[x+16] * biweight [3]
83
paddsw m0, [pw_64] ; Add 64
87
%endif ; mmsize == 8/16
114
%endif ; mmsize == 8/16
117
%macro vp6_filter_diag4 0
118
; void ff_vp6_filter_diag4_<opt>(uint8_t *dst, uint8_t *src, int stride,
119
; const int16_t h_weight[4], const int16_t v_weights[4])
120
cglobal vp6_filter_diag4, 5, 7, 8
121
mov r5, rsp ; backup stack pointer
122
and rsp, ~(mmsize-1) ; align stack
142
DIAG4 r1, -1, 0, 1, 2, r3
154
DIAG4 r3, -8, 0, 8, 16, r0
160
mov rsp, r5 ; restore stack pointer