1
;*****************************************************************************
2
;* MMX/SSE2/SSSE3-optimized H.264 QPEL code
3
;*****************************************************************************
4
;* Copyright (c) 2004-2005 Michael Niedermayer, Loren Merritt
5
;* Copyright (C) 2012 Daniel Kang
7
;* Authors: Daniel Kang <daniel.d.kang@gmail.com>
9
;* This file is part of Libav.
11
;* Libav is free software; you can redistribute it and/or
12
;* modify it under the terms of the GNU Lesser General Public
13
;* License as published by the Free Software Foundation; either
14
;* version 2.1 of the License, or (at your option) any later version.
16
;* Libav is distributed in the hope that it will be useful,
17
;* but WITHOUT ANY WARRANTY; without even the implied warranty of
18
;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19
;* Lesser General Public License for more details.
21
;* You should have received a copy of the GNU Lesser General Public
22
;* License along with Libav; if not, write to the Free Software
23
;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
24
;******************************************************************************
26
%include "libavutil/x86/x86util.asm"
56
%macro QPEL4_H_LOWPASS_OP 1
57
cglobal %1_h264_qpel4_h_lowpass, 4,5 ; dst, src, dstStride, srcStride
96
QPEL4_H_LOWPASS_OP put
97
QPEL4_H_LOWPASS_OP avg
99
%macro QPEL8_H_LOWPASS_OP 1
100
cglobal %1_h264_qpel8_h_lowpass, 4,5 ; dst, src, dstStride, srcStride
156
QPEL8_H_LOWPASS_OP put
157
QPEL8_H_LOWPASS_OP avg
159
%macro QPEL8_H_LOWPASS_OP_XMM 1
160
cglobal %1_h264_qpel8_h_lowpass, 4,5,8 ; dst, src, dstStride, srcStride
199
QPEL8_H_LOWPASS_OP_XMM put
200
QPEL8_H_LOWPASS_OP_XMM avg
203
%macro QPEL4_H_LOWPASS_L2_OP 1
204
cglobal %1_h264_qpel4_h_lowpass_l2, 5,6 ; dst, src, src2, dstStride, srcStride
246
QPEL4_H_LOWPASS_L2_OP put
247
QPEL4_H_LOWPASS_L2_OP avg
250
%macro QPEL8_H_LOWPASS_L2_OP 1
251
cglobal %1_h264_qpel8_h_lowpass_l2, 5,6 ; dst, src, src2, dstStride, srcStride
310
QPEL8_H_LOWPASS_L2_OP put
311
QPEL8_H_LOWPASS_L2_OP avg
314
%macro QPEL8_H_LOWPASS_L2_OP_XMM 1
315
cglobal %1_h264_qpel8_h_lowpass_l2, 5,6,8 ; dst, src, src2, dstStride, src2Stride
357
QPEL8_H_LOWPASS_L2_OP_XMM put
358
QPEL8_H_LOWPASS_L2_OP_XMM avg
361
; All functions that call this are required to have function arguments of
362
; dst, src, dstStride, srcStride
378
op_%1h m6, [r0], m0 ; 1
380
SWAP 0, 1, 2, 3, 4, 5
383
%macro QPEL4_V_LOWPASS_OP 1
384
cglobal %1_h264_qpel4_v_lowpass, 4,4 ; dst, src, dstStride, srcStride
411
QPEL4_V_LOWPASS_OP put
412
QPEL4_V_LOWPASS_OP avg
416
%macro QPEL8OR16_V_LOWPASS_OP 1
418
cglobal %1_h264_qpel8or16_v_lowpass, 5,5,8 ; dst, src, dstStride, srcStride, h
424
cglobal %1_h264_qpel8or16_v_lowpass_op, 5,5,8 ; dst, src, dstStride, srcStride, h
465
QPEL8OR16_V_LOWPASS_OP put
466
QPEL8OR16_V_LOWPASS_OP avg
469
QPEL8OR16_V_LOWPASS_OP put
470
QPEL8OR16_V_LOWPASS_OP avg
473
; All functions that use this are required to have args:
475
%macro FILT_HV 1 ; offset
489
SWAP 0, 1, 2, 3, 4, 5
492
%macro QPEL4_HV1_LOWPASS_OP 1
493
cglobal %1_h264_qpel4_hv_lowpass_v, 3,3 ; src, tmp, srcStride
515
cglobal %1_h264_qpel4_hv_lowpass_h, 3,4 ; tmp, dst, dstStride
542
QPEL4_HV1_LOWPASS_OP put
543
QPEL4_HV1_LOWPASS_OP avg
545
%macro QPEL8OR16_HV1_LOWPASS_OP 1
546
cglobal %1_h264_qpel8or16_hv1_lowpass_op, 4,4,8 ; src, tmp, srcStride, size
585
QPEL8OR16_HV1_LOWPASS_OP put
586
QPEL8OR16_HV1_LOWPASS_OP avg
589
QPEL8OR16_HV1_LOWPASS_OP put
593
%macro QPEL8OR16_HV2_LOWPASS_OP 1
594
; unused is to match ssse3 and mmxext args
595
cglobal %1_h264_qpel8or16_hv2_lowpass_op, 5,5 ; dst, tmp, dstStride, unused, h
634
QPEL8OR16_HV2_LOWPASS_OP put
635
QPEL8OR16_HV2_LOWPASS_OP avg
637
%macro QPEL8OR16_HV2_LOWPASS_OP_XMM 1
638
cglobal %1_h264_qpel8or16_hv2_lowpass, 5,5,8 ; dst, tmp, dstStride, tmpStride, size
725
QPEL8OR16_HV2_LOWPASS_OP_XMM put
726
QPEL8OR16_HV2_LOWPASS_OP_XMM avg
729
%macro PIXELS4_L2_SHIFT5 1
730
cglobal %1_pixels4_l2_shift5,6,6 ; dst, src16, src8, dstStride, src8Stride, h
742
op_%1h m1, [r0+r3], m5
754
op_%1h m1, [r0+r3], m5
759
PIXELS4_L2_SHIFT5 put
760
PIXELS4_L2_SHIFT5 avg
763
%macro PIXELS8_L2_SHIFT5 1
764
cglobal %1_pixels8_l2_shift5, 6, 6 ; dst, src16, src8, dstStride, src8Stride, h
781
op_%1 m2, [r0+r3], m5
791
PIXELS8_L2_SHIFT5 put
792
PIXELS8_L2_SHIFT5 avg
796
%macro QPEL16_H_LOWPASS_L2_OP 1
797
cglobal %1_h264_qpel16_h_lowpass_l2, 5, 6, 16 ; dst, src, src2, dstStride, src2Stride
860
QPEL16_H_LOWPASS_L2_OP put
861
QPEL16_H_LOWPASS_L2_OP avg