~ubuntu-branches/ubuntu/jaunty/xvidcap/jaunty-proposed

« back to all changes in this revision

Viewing changes to ffmpeg/libavcodec/dsputil.c

Committer: Bazaar Package Importer
Author(s): Christian Marillat
Date: 2004-08-29 10:53:42 UTC
Revision ID: james.westby@ubuntu.com-20040829105342-qgmnry37eadfkoxx

Tags: upstream-1.1.3

Import upstream version 1.1.3

files added:

AUTHORS

COPYING

ChangeLog

INSTALL

Makefile.am

Makefile.in

NEWS

README

TODO

Xw/Base.c

Xw/Base.h

Xw/BaseP.h

Xw/Box.c

Xw/Box.h

Xw/BoxP.h

Xw/Button.c

Xw/Button.h

Xw/ButtonP.h

Xw/Field.c

Xw/Field.h

Xw/FieldP.h

Xw/Label.c

Xw/Label.h

Xw/LabelP.h

Xw/Makefile.am

Xw/Makefile.in

Xw/README

Xw/RootIcon.c

Xw/RootIcon.h

Xw/RootIconP.h

Xw/Toggle.c

Xw/Toggle.h

Xw/ToggleP.h

Xw/simple.c

Xw/testxw.c

Xw/testxw.xbm

aclocal.m4

compile

config.h.in

configure

configure.ac

depcomp

ffmpeg

ffmpeg/.nbattrs

ffmpeg/COPYING

ffmpeg/CREDITS

ffmpeg/Changelog

ffmpeg/INSTALL

ffmpeg/Makefile

ffmpeg/README

ffmpeg/berrno.h

ffmpeg/cmdutils.c

ffmpeg/cmdutils.h

ffmpeg/configure

ffmpeg/cygwin_inttypes.h

ffmpeg/doc

ffmpeg/doc/.nbattrs

ffmpeg/doc/Makefile

ffmpeg/doc/TODO

ffmpeg/doc/faq.html

ffmpeg/doc/faq.texi

ffmpeg/doc/ffmpeg-doc.html

ffmpeg/doc/ffmpeg-doc.texi

ffmpeg/doc/ffmpeg.1

ffmpeg/doc/ffmpeg_powerpc_performance_evaluation_howto.txt

ffmpeg/doc/ffplay-doc.html

ffmpeg/doc/ffplay-doc.texi

ffmpeg/doc/ffplay.1

ffmpeg/doc/ffserver-doc.html

ffmpeg/doc/ffserver-doc.texi

ffmpeg/doc/ffserver.1

ffmpeg/doc/ffserver.conf

ffmpeg/doc/hooks.html

ffmpeg/doc/hooks.texi

ffmpeg/doc/optimization.txt

ffmpeg/doc/texi2pod.pl

ffmpeg/ffinstall.nsi

ffmpeg/ffmpeg.c

ffmpeg/ffplay.c

ffmpeg/ffserver.c

ffmpeg/ffserver.h

ffmpeg/libavcodec

ffmpeg/libavcodec/.nbattrs

ffmpeg/libavcodec/4xm.c

ffmpeg/libavcodec/Doxyfile

ffmpeg/libavcodec/Makefile

ffmpeg/libavcodec/a52dec.c

ffmpeg/libavcodec/ac3.h

ffmpeg/libavcodec/ac3dec.c

ffmpeg/libavcodec/ac3enc.c

ffmpeg/libavcodec/ac3tab.h

ffmpeg/libavcodec/adpcm.c

ffmpeg/libavcodec/allcodecs.c

ffmpeg/libavcodec/alpha

ffmpeg/libavcodec/alpha/asm.h

ffmpeg/libavcodec/alpha/dsputil_alpha.c

ffmpeg/libavcodec/alpha/dsputil_alpha_asm.S

ffmpeg/libavcodec/alpha/motion_est_alpha.c

ffmpeg/libavcodec/alpha/motion_est_mvi_asm.S

ffmpeg/libavcodec/alpha/mpegvideo_alpha.c

ffmpeg/libavcodec/alpha/regdef.h

ffmpeg/libavcodec/alpha/simple_idct_alpha.c

ffmpeg/libavcodec/amr.c

ffmpeg/libavcodec/apiexample.c

ffmpeg/libavcodec/armv4l

ffmpeg/libavcodec/armv4l/dsputil_arm.c

ffmpeg/libavcodec/armv4l/jrevdct_arm.S

ffmpeg/libavcodec/armv4l/mpegvideo_arm.c

ffmpeg/libavcodec/armv4l/simple_idct_arm.S

ffmpeg/libavcodec/asv1.c

ffmpeg/libavcodec/avcodec.c

ffmpeg/libavcodec/avcodec.h

ffmpeg/libavcodec/bswap.h

ffmpeg/libavcodec/cabac.c

ffmpeg/libavcodec/cabac.h

ffmpeg/libavcodec/cljr.c

ffmpeg/libavcodec/common.c

ffmpeg/libavcodec/common.h

ffmpeg/libavcodec/cyuv.c

ffmpeg/libavcodec/dct-test.c

ffmpeg/libavcodec/dpcm.c

ffmpeg/libavcodec/dsputil.c

ffmpeg/libavcodec/dsputil.h

ffmpeg/libavcodec/dv.c

ffmpeg/libavcodec/dvdata.h

ffmpeg/libavcodec/error_resilience.c

ffmpeg/libavcodec/eval.c

ffmpeg/libavcodec/faad.c

ffmpeg/libavcodec/fastmemcpy.h

ffmpeg/libavcodec/fdctref.c

ffmpeg/libavcodec/fft-test.c

ffmpeg/libavcodec/fft.c

ffmpeg/libavcodec/ffv1.c

ffmpeg/libavcodec/golomb.c

ffmpeg/libavcodec/golomb.h

ffmpeg/libavcodec/h263.c

ffmpeg/libavcodec/h263data.h

ffmpeg/libavcodec/h263dec.c

ffmpeg/libavcodec/h264.c

ffmpeg/libavcodec/h264data.h

ffmpeg/libavcodec/huffyuv.c

ffmpeg/libavcodec/i386

ffmpeg/libavcodec/i386/cputest.c

ffmpeg/libavcodec/i386/dsputil_mmx.c

ffmpeg/libavcodec/i386/dsputil_mmx_avg.h

ffmpeg/libavcodec/i386/dsputil_mmx_rnd.h

ffmpeg/libavcodec/i386/fdct_mmx.c

ffmpeg/libavcodec/i386/fft_sse.c

ffmpeg/libavcodec/i386/idct_mmx.c

ffmpeg/libavcodec/i386/mmx.h

ffmpeg/libavcodec/i386/motion_est_mmx.c

ffmpeg/libavcodec/i386/mpegvideo_mmx.c

ffmpeg/libavcodec/i386/mpegvideo_mmx_template.c

ffmpeg/libavcodec/i386/simple_idct_mmx.c

ffmpeg/libavcodec/imgconvert.c

ffmpeg/libavcodec/imgconvert_template.h

ffmpeg/libavcodec/imgresample.c

ffmpeg/libavcodec/indeo3.c

ffmpeg/libavcodec/indeo3data.h

ffmpeg/libavcodec/interplayvideo.c

ffmpeg/libavcodec/jfdctfst.c

ffmpeg/libavcodec/jfdctint.c

ffmpeg/libavcodec/jrevdct.c

ffmpeg/libavcodec/liba52

ffmpeg/libavcodec/liba52/a52.h

ffmpeg/libavcodec/liba52/a52_internal.h

ffmpeg/libavcodec/liba52/a52_util.h

ffmpeg/libavcodec/liba52/bit_allocate.c

ffmpeg/libavcodec/liba52/bitstream.c

ffmpeg/libavcodec/liba52/bitstream.h

ffmpeg/libavcodec/liba52/crc.c

ffmpeg/libavcodec/liba52/downmix.c

ffmpeg/libavcodec/liba52/imdct.c

ffmpeg/libavcodec/liba52/mm_accel.h

ffmpeg/libavcodec/liba52/parse.c

ffmpeg/libavcodec/liba52/resample.c

ffmpeg/libavcodec/liba52/resample_c.c

ffmpeg/libavcodec/liba52/resample_mmx.c

ffmpeg/libavcodec/liba52/tables.h

ffmpeg/libavcodec/libpostproc

ffmpeg/libavcodec/libpostproc/.nbattrs

ffmpeg/libavcodec/libpostproc/Makefile

ffmpeg/libavcodec/libpostproc/mangle.h

ffmpeg/libavcodec/libpostproc/postprocess.c

ffmpeg/libavcodec/libpostproc/postprocess.h

ffmpeg/libavcodec/libpostproc/postprocess_internal.h

ffmpeg/libavcodec/libpostproc/postprocess_template.c

ffmpeg/libavcodec/mace.c

ffmpeg/libavcodec/mdct.c

ffmpeg/libavcodec/mdec.c

ffmpeg/libavcodec/mem.c

ffmpeg/libavcodec/mjpeg.c

ffmpeg/libavcodec/mlib

ffmpeg/libavcodec/mlib/dsputil_mlib.c

ffmpeg/libavcodec/motion_est.c

ffmpeg/libavcodec/motion_est_template.c

ffmpeg/libavcodec/motion_test.c

ffmpeg/libavcodec/mp3lameaudio.c

ffmpeg/libavcodec/mpeg12.c

ffmpeg/libavcodec/mpeg12data.h

ffmpeg/libavcodec/mpeg4data.h

ffmpeg/libavcodec/mpegaudio.c

ffmpeg/libavcodec/mpegaudio.h

ffmpeg/libavcodec/mpegaudiodec.c

ffmpeg/libavcodec/mpegaudiodectab.h

ffmpeg/libavcodec/mpegaudiotab.h

ffmpeg/libavcodec/mpegvideo.c

ffmpeg/libavcodec/mpegvideo.h

ffmpeg/libavcodec/msmpeg4.c

ffmpeg/libavcodec/msmpeg4data.h

ffmpeg/libavcodec/oggvorbis.c

ffmpeg/libavcodec/oggvorbis.h

ffmpeg/libavcodec/opts.c

ffmpeg/libavcodec/pcm.c

ffmpeg/libavcodec/ppc

ffmpeg/libavcodec/ppc/dsputil_altivec.c

ffmpeg/libavcodec/ppc/dsputil_altivec.h

ffmpeg/libavcodec/ppc/dsputil_ppc.c

ffmpeg/libavcodec/ppc/dsputil_ppc.h

ffmpeg/libavcodec/ppc/fft_altivec.c

ffmpeg/libavcodec/ppc/gcc_fixes.h

ffmpeg/libavcodec/ppc/gmc_altivec.c

ffmpeg/libavcodec/ppc/idct_altivec.c

ffmpeg/libavcodec/ppc/mpegvideo_altivec.c

ffmpeg/libavcodec/ppc/mpegvideo_ppc.c

ffmpeg/libavcodec/ps2

ffmpeg/libavcodec/ps2/dsputil_mmi.c

ffmpeg/libavcodec/ps2/idct_mmi.c

ffmpeg/libavcodec/ps2/mmi.h

ffmpeg/libavcodec/ps2/mpegvideo_mmi.c

ffmpeg/libavcodec/ra144.c

ffmpeg/libavcodec/ra144.h

ffmpeg/libavcodec/ra288.c

ffmpeg/libavcodec/ra288.h

ffmpeg/libavcodec/ratecontrol.c

ffmpeg/libavcodec/raw.c

ffmpeg/libavcodec/resample.c

ffmpeg/libavcodec/roqvideo.c

ffmpeg/libavcodec/rv10.c

ffmpeg/libavcodec/sh4

ffmpeg/libavcodec/sh4/dsputil_align.c

ffmpeg/libavcodec/sh4/dsputil_sh4.c

ffmpeg/libavcodec/sh4/idct_sh4.c

ffmpeg/libavcodec/sh4/qpel.c

ffmpeg/libavcodec/simple_idct.c

ffmpeg/libavcodec/simple_idct.h

ffmpeg/libavcodec/svq1.c

ffmpeg/libavcodec/svq1_cb.h

ffmpeg/libavcodec/svq1_vlc.h

ffmpeg/libavcodec/svq3.c

ffmpeg/libavcodec/utils.c

ffmpeg/libavcodec/vcr1.c

ffmpeg/libavcodec/vp3.c

ffmpeg/libavcodec/vp3data.h

ffmpeg/libavcodec/wmadata.h

ffmpeg/libavcodec/wmadec.c

ffmpeg/libavcodec/wmv2.c

ffmpeg/libavcodec/xan.c

ffmpeg/libavcodec/xvmcvideo.c

ffmpeg/libavformat

ffmpeg/libavformat/.nbattrs

ffmpeg/libavformat/4xm.c

ffmpeg/libavformat/Makefile

ffmpeg/libavformat/allformats.c

ffmpeg/libavformat/amr.c

ffmpeg/libavformat/asf.c

ffmpeg/libavformat/au.c

ffmpeg/libavformat/audio.c

ffmpeg/libavformat/avformat.h

ffmpeg/libavformat/avi.h

ffmpeg/libavformat/avidec.c

ffmpeg/libavformat/avienc.c

ffmpeg/libavformat/avio.c

ffmpeg/libavformat/avio.h

ffmpeg/libavformat/aviobuf.c

ffmpeg/libavformat/barpainet.c

ffmpeg/libavformat/barpainet.h

ffmpeg/libavformat/beosaudio.cpp

ffmpeg/libavformat/crc.c

ffmpeg/libavformat/cutils.c

ffmpeg/libavformat/dv.c

ffmpeg/libavformat/dv1394.c

ffmpeg/libavformat/dv1394.h

ffmpeg/libavformat/dvcore.c

ffmpeg/libavformat/dvcore.h

ffmpeg/libavformat/ffm.c

ffmpeg/libavformat/file.c

ffmpeg/libavformat/flvdec.c

ffmpeg/libavformat/flvenc.c

ffmpeg/libavformat/framehook.c

ffmpeg/libavformat/framehook.h

ffmpeg/libavformat/gif.c

ffmpeg/libavformat/gifdec.c

ffmpeg/libavformat/grab.c

ffmpeg/libavformat/http.c

ffmpeg/libavformat/idroq.c

ffmpeg/libavformat/img.c

ffmpeg/libavformat/ipmovie.c

ffmpeg/libavformat/jpeg.c

ffmpeg/libavformat/mov.c

ffmpeg/libavformat/movenc.c

ffmpeg/libavformat/mp3.c

ffmpeg/libavformat/mpeg.c

ffmpeg/libavformat/mpegts.c

ffmpeg/libavformat/mpegts.h

ffmpeg/libavformat/mpegtsenc.c

ffmpeg/libavformat/mpjpeg.c

ffmpeg/libavformat/nut.c

ffmpeg/libavformat/ogg.c

ffmpeg/libavformat/os_support.c

ffmpeg/libavformat/os_support.h

ffmpeg/libavformat/png.c

ffmpeg/libavformat/pnm.c

ffmpeg/libavformat/psxstr.c

ffmpeg/libavformat/raw.c

ffmpeg/libavformat/rm.c

ffmpeg/libavformat/rtp.c

ffmpeg/libavformat/rtp.h

ffmpeg/libavformat/rtpproto.c

ffmpeg/libavformat/rtsp.c

ffmpeg/libavformat/rtsp.h

ffmpeg/libavformat/rtspcodes.h

ffmpeg/libavformat/swf.c

ffmpeg/libavformat/tcp.c

ffmpeg/libavformat/udp.c

ffmpeg/libavformat/utils.c

ffmpeg/libavformat/wav.c

ffmpeg/libavformat/wc3movie.c

ffmpeg/libavformat/yuv.c

ffmpeg/libavformat/yuv4mpeg.c

ffmpeg/output_example.c

ffmpeg/tests

ffmpeg/tests/.nbattrs

ffmpeg/tests/Makefile

ffmpeg/tests/audiogen.c

ffmpeg/tests/dsptest.c

ffmpeg/tests/ffmpeg.regression.ref

ffmpeg/tests/ffserver.regression.ref

ffmpeg/tests/lena.pnm

ffmpeg/tests/libav.regression.ref

ffmpeg/tests/regression.sh

ffmpeg/tests/rotozoom.c

ffmpeg/tests/rotozoom.regression.ref

ffmpeg/tests/server-regression.sh

ffmpeg/tests/test.conf

ffmpeg/tests/tiny_psnr.c

ffmpeg/tests/videogen.c

ffmpeg/vhook

ffmpeg/vhook/.nbattrs

ffmpeg/vhook/Makefile

ffmpeg/vhook/drawtext.c

ffmpeg/vhook/fish.c

ffmpeg/vhook/imlib2.c

ffmpeg/vhook/null.c

ffmpeg/vhook/ppm.c

ffmpeg/xvmc_render.h

getopt

getopt/README

getopt/getopt.c

getopt/getopt.h

getopt/getopt1.c

gt/COPYING.LIB

gt/Makefile.am

gt/Makefile.in

gt/README

gt/gnuavi.c

gt/gnuavi.h

gt/gnuplay.c

gt/gt.c

gt/gt.h

gt/gtapi.c

gt/gtapi.h

gt/gtatoms.h

gt/gttypes.h

gt/gtvr.c

gt/gtvr.h

gt/parse_gt.c

gt/ppm2qt.c

gt/ppm2qtvr.c

gt/sound.c

gt/sound.h

gt/video.c

gt/video.h

gt/xt.c

gt/xt.h

install-sh

man/gvidcap.man

man/index.bt

man/xvidcap.man

missing

mkinstalldirs

src/Makefile.am

src/Makefile.in

src/app_data.h

src/capture.c

src/capture.h

src/codecs.h

src/colors.c

src/colors.h

src/control.h

src/fallback.h

src/frame.c

src/frame.h

src/gtk2_control.c

src/gtk2_control.h

src/gtk2_frame.c

src/gtk2_frame.h

src/gtk2_options.c

src/gtk2_options.h

src/gtk2_support.c

src/gtk2_support.h

src/job.c

src/job.h

src/led_meter.c

src/led_meter.h

src/main.c

src/main.h

src/malloc.c

src/mkresfile.sh

src/mngutil.c

src/mngutil.h

src/options.c

src/realloc.c

src/util.c

src/util.h

src/video.c

src/video.h

src/xt_control.c

src/xt_control.h

src/xt_frame.c

src/xt_frame.h

src/xt_options.c

src/xt_options.h

src/xtoffmpeg.c

src/xtoffmpeg.h

src/xtojpg.c

src/xtojpg.h

src/xtomng.c

src/xtomng.h

src/xtopng.c

src/xtopng.h

src/xtopnm.c

src/xtopnm.h

src/xtoqtf.c

src/xtoqtf.h

src/xtoxwd.c

src/xtoxwd.h

src/xutil.c

src/xutil.h

xbm/animate.png

xbm/animate.xbm

xbm/edit.png

xbm/edit.xbm

xbm/help.xbm

xbm/mkvideo.xbm

xbm/move.png

xbm/move.xbm

xbm/next.png

xbm/next.xbm

xbm/pause.png

xbm/pause.xbm

xbm/prev.png

xbm/prev.xbm

xbm/record.png

xbm/record.xbm

xbm/record_off.png

xbm/record_on.png

xbm/select.png

xbm/select.xbm

xbm/step.png

xbm/step.xbm

xbm/stop.png

xbm/stop.xbm

Show diffs side-by-side

added added

removed removed

ffmpeg/libavcodec/dsputil.c

* DSP utils

* This library is free software; you can redistribute it and/or

* modify it under the terms of the GNU Lesser General Public

* License as published by the Free Software Foundation; either

* version 2 of the License, or (at your option) any later version.

* This library is distributed in the hope that it will be useful,

* but WITHOUT ANY WARRANTY; without even the implied warranty of

* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU

* Lesser General Public License for more details.

* You should have received a copy of the GNU Lesser General Public

* License along with this library; if not, write to the Free Software

* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA

* gmc & q-pel & 32/64 bit based MC by Michael Niedermayer <michaelni@gmx.at>

/**

* @file dsputil.c

* DSP utils

#include "avcodec.h"

#include "dsputil.h"

#include "mpegvideo.h"

#include "simple_idct.h"

uint8_t cropTbl[256 + 2 * MAX_NEG_CROP];

uint32_t squareTbl[512];

const uint8_t ff_zigzag_direct[64] = {

0, 1, 8, 16, 9, 2, 3, 10,

17, 24, 32, 25, 18, 11, 4, 5,

12, 19, 26, 33, 40, 48, 41, 34,

27, 20, 13, 6, 7, 14, 21, 28,

35, 42, 49, 56, 57, 50, 43, 36,

29, 22, 15, 23, 30, 37, 44, 51,

58, 59, 52, 45, 38, 31, 39, 46,

53, 60, 61, 54, 47, 55, 62, 63

};

/* not permutated inverse zigzag_direct + 1 for MMX quantizer */

uint16_t __align8 inv_zigzag_direct16[64];

const uint8_t ff_alternate_horizontal_scan[64] = {

0, 1, 2, 3, 8, 9, 16, 17,

10, 11, 4, 5, 6, 7, 15, 14,

13, 12, 19, 18, 24, 25, 32, 33,

26, 27, 20, 21, 22, 23, 28, 29,

30, 31, 34, 35, 40, 41, 48, 49,

42, 43, 36, 37, 38, 39, 44, 45,

46, 47, 50, 51, 56, 57, 58, 59,

52, 53, 54, 55, 60, 61, 62, 63,

};

const uint8_t ff_alternate_vertical_scan[64] = {

0, 8, 16, 24, 1, 9, 2, 10,

17, 25, 32, 40, 48, 56, 57, 49,

41, 33, 26, 18, 3, 11, 4, 12,

19, 27, 34, 42, 50, 58, 35, 43,

51, 59, 20, 28, 5, 13, 6, 14,

21, 29, 36, 44, 52, 60, 37, 45,

53, 61, 22, 30, 7, 15, 23, 31,

38, 46, 54, 62, 39, 47, 55, 63,

};

/* a*inverse[b]>>32 == a/b for all 0<=a<=65536 && 2<=b<=255 */

const uint32_t inverse[256]={

0, 4294967295U,2147483648U,1431655766, 1073741824, 858993460, 715827883, 613566757,

536870912, 477218589, 429496730, 390451573, 357913942, 330382100, 306783379, 286331154,

268435456, 252645136, 238609295, 226050911, 214748365, 204522253, 195225787, 186737709,

178956971, 171798692, 165191050, 159072863, 153391690, 148102321, 143165577, 138547333,

134217728, 130150525, 126322568, 122713352, 119304648, 116080198, 113025456, 110127367,

107374183, 104755300, 102261127, 99882961, 97612894, 95443718, 93368855, 91382283,

89478486, 87652394, 85899346, 84215046, 82595525, 81037119, 79536432, 78090315,

76695845, 75350304, 74051161, 72796056, 71582789, 70409300, 69273667, 68174085,

67108864, 66076420, 65075263, 64103990, 63161284, 62245903, 61356676, 60492498,

59652324, 58835169, 58040099, 57266231, 56512728, 55778797, 55063684, 54366675,

53687092, 53024288, 52377650, 51746594, 51130564, 50529028, 49941481, 49367441,

48806447, 48258060, 47721859, 47197443, 46684428, 46182445, 45691142, 45210183,

44739243, 44278014, 43826197, 43383509, 42949673, 42524429, 42107523, 41698712,

41297763, 40904451, 40518560, 40139882, 39768216, 39403370, 39045158, 38693400,

38347923, 38008561, 37675152, 37347542, 37025581, 36709123, 36398028, 36092163,

35791395, 35495598, 35204650, 34918434, 34636834, 34359739, 34087043, 33818641,

33554432, 33294321, 33038210, 32786010, 32537632, 32292988, 32051995, 31814573,

31580642, 31350127, 31122952, 30899046, 30678338, 30460761, 30246249, 30034737,

29826162, 29620465, 29417585, 29217465, 29020050, 28825284, 28633116, 28443493,

28256364, 28071682, 27889399, 27709467, 27531842, 27356480, 27183338, 27012373,

26843546, 26676816, 26512144, 26349493, 26188825, 26030105, 25873297, 25718368,

25565282, 25414008, 25264514, 25116768, 24970741, 24826401, 24683721, 24542671,

24403224, 24265352, 24129030, 23994231, 23860930, 23729102, 23598722, 23469767,

23342214, 23216040, 23091223, 22967740, 22845571, 22724695, 22605092, 22486740,

22369622, 22253717, 22139007, 22025474, 21913099, 21801865, 21691755, 21582751,

21474837, 21367997, 21262215, 21157475, 21053762, 20951060, 20849356, 20748635,

100

20648882, 20550083, 20452226, 20355296, 20259280, 20164166, 20069941, 19976593,

101

19884108, 19792477, 19701685, 19611723, 19522579, 19434242, 19346700, 19259944,

102

19173962, 19088744, 19004281, 18920561, 18837576, 18755316, 18673771, 18592933,

103

18512791, 18433337, 18354562, 18276457, 18199014, 18122225, 18046082, 17970575,

104

17895698, 17821442, 17747799, 17674763, 17602325, 17530479, 17459217, 17388532,

105

17318417, 17248865, 17179870, 17111424, 17043522, 16976156, 16909321, 16843010,

106

};

107

108

/* Input permutation for the simple_idct_mmx */

109

static const uint8_t simple_mmx_permutation[64]={

110

0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D,

111

0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D,

112

0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D,

113

0x12, 0x1A, 0x16, 0x1B, 0x13, 0x1E, 0x17, 0x1F,

114

0x02, 0x0A, 0x06, 0x0B, 0x03, 0x0E, 0x07, 0x0F,

115

0x30, 0x38, 0x34, 0x39, 0x31, 0x3C, 0x35, 0x3D,

116

0x22, 0x2A, 0x26, 0x2B, 0x23, 0x2E, 0x27, 0x2F,

117

0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F,

118

};

119

120

static int pix_sum_c(uint8_t * pix, int line_size)

121

{

122

int s, i, j;

123

124

s = 0;

125

for (i = 0; i < 16; i++) {

126

for (j = 0; j < 16; j += 8) {

127

s += pix[0];

128

s += pix[1];

129

s += pix[2];

130

s += pix[3];

131

s += pix[4];

132

s += pix[5];

133

s += pix[6];

134

s += pix[7];

135

pix += 8;

136

}

137

pix += line_size - 16;

138

}

139

return s;

140

}

141

142

static int pix_norm1_c(uint8_t * pix, int line_size)

143

{

144

int s, i, j;

145

uint32_t *sq = squareTbl + 256;

146

147

s = 0;

148

for (i = 0; i < 16; i++) {

149

for (j = 0; j < 16; j += 8) {

150

#if 0

151

s += sq[pix[0]];

152

s += sq[pix[1]];

153

s += sq[pix[2]];

154

s += sq[pix[3]];

155

s += sq[pix[4]];

156

s += sq[pix[5]];

157

s += sq[pix[6]];

158

s += sq[pix[7]];

159

#else

160

#if LONG_MAX > 2147483647

161

162

s += sq[x&0xff];

163

s += sq[(x>>8)&0xff];

164

s += sq[(x>>16)&0xff];

165

s += sq[(x>>24)&0xff];

166

s += sq[(x>>32)&0xff];

167

s += sq[(x>>40)&0xff];

168

s += sq[(x>>48)&0xff];

169

s += sq[(x>>56)&0xff];

170

#else

171

172

s += sq[x&0xff];

173

s += sq[(x>>8)&0xff];

174

s += sq[(x>>16)&0xff];

175

s += sq[(x>>24)&0xff];

176

x=*(uint32_t*)(pix+4);

177

s += sq[x&0xff];

178

s += sq[(x>>8)&0xff];

179

s += sq[(x>>16)&0xff];

180

s += sq[(x>>24)&0xff];

181

#endif

182

#endif

183

pix += 8;

184

}

185

pix += line_size - 16;

186

}

187

return s;

188

}

189

190

static void bswap_buf(uint32_t *dst, uint32_t *src, int w){

191

int i;

192

193

for(i=0; i+8<=w; i+=8){

194

dst[i+0]= bswap_32(src[i+0]);

195

dst[i+1]= bswap_32(src[i+1]);

196

dst[i+2]= bswap_32(src[i+2]);

197

dst[i+3]= bswap_32(src[i+3]);

198

dst[i+4]= bswap_32(src[i+4]);

199

dst[i+5]= bswap_32(src[i+5]);

200

dst[i+6]= bswap_32(src[i+6]);

201

dst[i+7]= bswap_32(src[i+7]);

202

}

203

for(;i<w; i++){

204

dst[i+0]= bswap_32(src[i+0]);

205

}

206

}

207

208

static int sse8_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size)

209

{

210

int s, i;

211

uint32_t *sq = squareTbl + 256;

212

213

s = 0;

214

for (i = 0; i < 8; i++) {

215

s += sq[pix1[0] - pix2[0]];

216

s += sq[pix1[1] - pix2[1]];

217

s += sq[pix1[2] - pix2[2]];

218

s += sq[pix1[3] - pix2[3]];

219

s += sq[pix1[4] - pix2[4]];

220

s += sq[pix1[5] - pix2[5]];

221

s += sq[pix1[6] - pix2[6]];

222

s += sq[pix1[7] - pix2[7]];

223

pix1 += line_size;

224

pix2 += line_size;

225

}

226

return s;

227

}

228

229

static int sse16_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size)

230

{

231

int s, i;

232

uint32_t *sq = squareTbl + 256;

233

234

s = 0;

235

for (i = 0; i < 16; i++) {

236

s += sq[pix1[ 0] - pix2[ 0]];

237

s += sq[pix1[ 1] - pix2[ 1]];

238

s += sq[pix1[ 2] - pix2[ 2]];

239

s += sq[pix1[ 3] - pix2[ 3]];

240

s += sq[pix1[ 4] - pix2[ 4]];

241

s += sq[pix1[ 5] - pix2[ 5]];

242

s += sq[pix1[ 6] - pix2[ 6]];

243

s += sq[pix1[ 7] - pix2[ 7]];

244

s += sq[pix1[ 8] - pix2[ 8]];

245

s += sq[pix1[ 9] - pix2[ 9]];

246

s += sq[pix1[10] - pix2[10]];

247

s += sq[pix1[11] - pix2[11]];

248

s += sq[pix1[12] - pix2[12]];

249

s += sq[pix1[13] - pix2[13]];

250

s += sq[pix1[14] - pix2[14]];

251

s += sq[pix1[15] - pix2[15]];

252

253

pix1 += line_size;

254

pix2 += line_size;

255

}

256

return s;

257

}

258

259

static void get_pixels_c(DCTELEM *restrict block, const uint8_t *pixels, int line_size)

260

{

261

int i;

262

263

/* read the pixels */

264

for(i=0;i<8;i++) {

265

block[0] = pixels[0];

266

block[1] = pixels[1];

267

block[2] = pixels[2];

268

block[3] = pixels[3];

269

block[4] = pixels[4];

270

block[5] = pixels[5];

271

block[6] = pixels[6];

272

block[7] = pixels[7];

273

pixels += line_size;

274

block += 8;

275

}

276

}

277

278

static void diff_pixels_c(DCTELEM *restrict block, const uint8_t *s1,

279

const uint8_t *s2, int stride){

280

int i;

281

282

/* read the pixels */

283

for(i=0;i<8;i++) {

284

block[0] = s1[0] - s2[0];

285

block[1] = s1[1] - s2[1];

286

block[2] = s1[2] - s2[2];

287

block[3] = s1[3] - s2[3];

288

block[4] = s1[4] - s2[4];

289

block[5] = s1[5] - s2[5];

290

block[6] = s1[6] - s2[6];

291

block[7] = s1[7] - s2[7];

292

s1 += stride;

293

s2 += stride;

294

block += 8;

295

}

296

}

297

298

299

static void put_pixels_clamped_c(const DCTELEM *block, uint8_t *restrict pixels,

300

int line_size)

301

{

302

int i;

303

uint8_t *cm = cropTbl + MAX_NEG_CROP;

304

305

/* read the pixels */

306

for(i=0;i<8;i++) {

307

pixels[0] = cm[block[0]];

308

pixels[1] = cm[block[1]];

309

pixels[2] = cm[block[2]];

310

pixels[3] = cm[block[3]];

311

pixels[4] = cm[block[4]];

312

pixels[5] = cm[block[5]];

313

pixels[6] = cm[block[6]];

314

pixels[7] = cm[block[7]];

315

316

pixels += line_size;

317

block += 8;

318

}

319

}

320

321

static void add_pixels_clamped_c(const DCTELEM *block, uint8_t *restrict pixels,

322

int line_size)

323

{

324

int i;

325

uint8_t *cm = cropTbl + MAX_NEG_CROP;

326

327

/* read the pixels */

328

for(i=0;i<8;i++) {

329

pixels[0] = cm[pixels[0] + block[0]];

330

pixels[1] = cm[pixels[1] + block[1]];

331

pixels[2] = cm[pixels[2] + block[2]];

332

pixels[3] = cm[pixels[3] + block[3]];

333

pixels[4] = cm[pixels[4] + block[4]];

334

pixels[5] = cm[pixels[5] + block[5]];

335

pixels[6] = cm[pixels[6] + block[6]];

336

pixels[7] = cm[pixels[7] + block[7]];

337

pixels += line_size;

338

block += 8;

339

}

340

}

341

#if 0

342

343

#define PIXOP2(OPNAME, OP) \

344

static void OPNAME ## _pixels(uint8_t *block, const uint8_t *pixels, int line_size, int h)\

345

346

int i;\

347

for(i=0; i<h; i++){\

348

OP(*((uint64_t*)block), LD64(pixels));\

349

pixels+=line_size;\

350

block +=line_size;\

351

352

353

354

static void OPNAME ## _no_rnd_pixels_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\

355

356

int i;\

357

for(i=0; i<h; i++){\

358

const uint64_t a= LD64(pixels );\

359

const uint64_t b= LD64(pixels+1);\

360

OP(*((uint64_t*)block), (a&b) + (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\

361

pixels+=line_size;\

362

block +=line_size;\

363

364

365

366

static void OPNAME ## _pixels_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\

367

368

int i;\

369

for(i=0; i<h; i++){\

370

const uint64_t a= LD64(pixels );\

371

const uint64_t b= LD64(pixels+1);\

372

OP(*((uint64_t*)block), (a|b) - (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\

373

pixels+=line_size;\

374

block +=line_size;\

375

376

377

378

static void OPNAME ## _no_rnd_pixels_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\

379

380

int i;\

381

for(i=0; i<h; i++){\

382

const uint64_t a= LD64(pixels );\

383

const uint64_t b= LD64(pixels+line_size);\

384

OP(*((uint64_t*)block), (a&b) + (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\

385

pixels+=line_size;\

386

block +=line_size;\

387

388

389

390

static void OPNAME ## _pixels_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\

391

392

int i;\

393

for(i=0; i<h; i++){\

394

const uint64_t a= LD64(pixels );\

395

const uint64_t b= LD64(pixels+line_size);\

396

OP(*((uint64_t*)block), (a|b) - (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\

397

pixels+=line_size;\

398

block +=line_size;\

399

400

401

402

static void OPNAME ## _pixels_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\

403

404

int i;\

405

const uint64_t a= LD64(pixels );\

406

const uint64_t b= LD64(pixels+1);\

407

uint64_t l0= (a&0x0303030303030303ULL)\

408

+ (b&0x0303030303030303ULL)\

409

+ 0x0202020202020202ULL;\

410

uint64_t h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\

411

+ ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\

412

uint64_t l1,h1;\

413

414

pixels+=line_size;\

415

for(i=0; i<h; i+=2){\

416

uint64_t a= LD64(pixels );\

417

uint64_t b= LD64(pixels+1);\

418

l1= (a&0x0303030303030303ULL)\

419

+ (b&0x0303030303030303ULL);\

420

h1= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\

421

+ ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\

422

OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\

423

pixels+=line_size;\

424

block +=line_size;\

425

a= LD64(pixels );\

426

b= LD64(pixels+1);\

427

l0= (a&0x0303030303030303ULL)\

428

+ (b&0x0303030303030303ULL)\

429

+ 0x0202020202020202ULL;\

430

h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\

431

+ ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\

432

OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\

433

pixels+=line_size;\

434

block +=line_size;\

435

436

437

438

static void OPNAME ## _no_rnd_pixels_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\

439

440

int i;\

441

const uint64_t a= LD64(pixels );\

442

const uint64_t b= LD64(pixels+1);\

443

uint64_t l0= (a&0x0303030303030303ULL)\

444

+ (b&0x0303030303030303ULL)\

445

+ 0x0101010101010101ULL;\

446

uint64_t h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\

447

+ ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\

448

uint64_t l1,h1;\

449

450

pixels+=line_size;\

451

for(i=0; i<h; i+=2){\

452

uint64_t a= LD64(pixels );\

453

uint64_t b= LD64(pixels+1);\

454

l1= (a&0x0303030303030303ULL)\

455

+ (b&0x0303030303030303ULL);\

456

h1= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\

457

+ ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\

458

OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\

459

pixels+=line_size;\

460

block +=line_size;\

461

a= LD64(pixels );\

462

b= LD64(pixels+1);\

463

l0= (a&0x0303030303030303ULL)\

464

+ (b&0x0303030303030303ULL)\

465

+ 0x0101010101010101ULL;\

466

h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\

467

+ ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\

468

OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\

469

pixels+=line_size;\

470

block +=line_size;\

471

472

473

474

CALL_2X_PIXELS(OPNAME ## _pixels16_c , OPNAME ## _pixels_c , 8)\

475

CALL_2X_PIXELS(OPNAME ## _pixels16_x2_c , OPNAME ## _pixels_x2_c , 8)\

476

CALL_2X_PIXELS(OPNAME ## _pixels16_y2_c , OPNAME ## _pixels_y2_c , 8)\

477

CALL_2X_PIXELS(OPNAME ## _pixels16_xy2_c, OPNAME ## _pixels_xy2_c, 8)\

478

CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_x2_c , OPNAME ## _no_rnd_pixels_x2_c , 8)\

479

CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_y2_c , OPNAME ## _no_rnd_pixels_y2_c , 8)\

480

CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_xy2_c, OPNAME ## _no_rnd_pixels_xy2_c, 8)

481

482

#define op_avg(a, b) a = ( ((a)|(b)) - ((((a)^(b))&0xFEFEFEFEFEFEFEFEULL)>>1) )

483

#else // 64 bit variant

484

485

#define PIXOP2(OPNAME, OP) \

486

static void OPNAME ## _pixels2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\

487

int i;\

488

for(i=0; i<h; i++){\

489

OP(*((uint16_t*)(block )), LD16(pixels ));\

490

pixels+=line_size;\

491

block +=line_size;\

492

493

494

static void OPNAME ## _pixels4_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\

495

int i;\

496

for(i=0; i<h; i++){\

497

OP(*((uint32_t*)(block )), LD32(pixels ));\

498

pixels+=line_size;\

499

block +=line_size;\

500

501

502

static void OPNAME ## _pixels8_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\

503

int i;\

504

for(i=0; i<h; i++){\

505

OP(*((uint32_t*)(block )), LD32(pixels ));\

506

OP(*((uint32_t*)(block+4)), LD32(pixels+4));\

507

pixels+=line_size;\

508

block +=line_size;\

509

510

511

static inline void OPNAME ## _no_rnd_pixels8_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\

512

OPNAME ## _pixels8_c(block, pixels, line_size, h);\

513

514

515

static inline void OPNAME ## _no_rnd_pixels8_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \

516

int src_stride1, int src_stride2, int h){\

517

int i;\

518

for(i=0; i<h; i++){\

519

uint32_t a,b;\

520

a= LD32(&src1[i*src_stride1 ]);\

521

b= LD32(&src2[i*src_stride2 ]);\

522

OP(*((uint32_t*)&dst[i*dst_stride ]), no_rnd_avg32(a, b));\

523

a= LD32(&src1[i*src_stride1+4]);\

524

b= LD32(&src2[i*src_stride2+4]);\

525

OP(*((uint32_t*)&dst[i*dst_stride+4]), no_rnd_avg32(a, b));\

526

527

528

529

static inline void OPNAME ## _pixels8_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \

530

int src_stride1, int src_stride2, int h){\

531

int i;\

532

for(i=0; i<h; i++){\

533

uint32_t a,b;\

534

a= LD32(&src1[i*src_stride1 ]);\

535

b= LD32(&src2[i*src_stride2 ]);\

536

OP(*((uint32_t*)&dst[i*dst_stride ]), rnd_avg32(a, b));\

537

a= LD32(&src1[i*src_stride1+4]);\

538

b= LD32(&src2[i*src_stride2+4]);\

539

OP(*((uint32_t*)&dst[i*dst_stride+4]), rnd_avg32(a, b));\

540

541

542

543

static inline void OPNAME ## _pixels4_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \

544

int src_stride1, int src_stride2, int h){\

545

int i;\

546

for(i=0; i<h; i++){\

547

uint32_t a,b;\

548

a= LD32(&src1[i*src_stride1 ]);\

549

b= LD32(&src2[i*src_stride2 ]);\

550

OP(*((uint32_t*)&dst[i*dst_stride ]), rnd_avg32(a, b));\

551

552

553

554

static inline void OPNAME ## _pixels2_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \

555

int src_stride1, int src_stride2, int h){\

556

int i;\

557

for(i=0; i<h; i++){\

558

uint32_t a,b;\

559

a= LD16(&src1[i*src_stride1 ]);\

560

b= LD16(&src2[i*src_stride2 ]);\

561

OP(*((uint16_t*)&dst[i*dst_stride ]), rnd_avg32(a, b));\

562

563

564

565

static inline void OPNAME ## _pixels16_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \

566

int src_stride1, int src_stride2, int h){\

567

OPNAME ## _pixels8_l2(dst , src1 , src2 , dst_stride, src_stride1, src_stride2, h);\

568

OPNAME ## _pixels8_l2(dst+8, src1+8, src2+8, dst_stride, src_stride1, src_stride2, h);\

569

570

571

static inline void OPNAME ## _no_rnd_pixels16_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \

572

int src_stride1, int src_stride2, int h){\

573

OPNAME ## _no_rnd_pixels8_l2(dst , src1 , src2 , dst_stride, src_stride1, src_stride2, h);\

574

OPNAME ## _no_rnd_pixels8_l2(dst+8, src1+8, src2+8, dst_stride, src_stride1, src_stride2, h);\

575

576

577

static inline void OPNAME ## _no_rnd_pixels8_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\

578

OPNAME ## _no_rnd_pixels8_l2(block, pixels, pixels+1, line_size, line_size, line_size, h);\

579

580

581

static inline void OPNAME ## _pixels8_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\

582

OPNAME ## _pixels8_l2(block, pixels, pixels+1, line_size, line_size, line_size, h);\

583

584

585

static inline void OPNAME ## _no_rnd_pixels8_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\

586

OPNAME ## _no_rnd_pixels8_l2(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\

587

588

589

static inline void OPNAME ## _pixels8_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\

590

OPNAME ## _pixels8_l2(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\

591

592

593

static inline void OPNAME ## _pixels8_l4(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,\

594

int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\

595

int i;\

596

for(i=0; i<h; i++){\

597

uint32_t a, b, c, d, l0, l1, h0, h1;\

598

a= LD32(&src1[i*src_stride1]);\

599

b= LD32(&src2[i*src_stride2]);\

600

c= LD32(&src3[i*src_stride3]);\

601

d= LD32(&src4[i*src_stride4]);\

602

l0= (a&0x03030303UL)\

603

+ (b&0x03030303UL)\

604

+ 0x02020202UL;\

605

h0= ((a&0xFCFCFCFCUL)>>2)\

606

+ ((b&0xFCFCFCFCUL)>>2);\

607

l1= (c&0x03030303UL)\

608

+ (d&0x03030303UL);\

609

h1= ((c&0xFCFCFCFCUL)>>2)\

610

+ ((d&0xFCFCFCFCUL)>>2);\

611

OP(*((uint32_t*)&dst[i*dst_stride]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\

612

a= LD32(&src1[i*src_stride1+4]);\

613

b= LD32(&src2[i*src_stride2+4]);\

614

c= LD32(&src3[i*src_stride3+4]);\

615

d= LD32(&src4[i*src_stride4+4]);\

616

l0= (a&0x03030303UL)\

617

+ (b&0x03030303UL)\

618

+ 0x02020202UL;\

619

h0= ((a&0xFCFCFCFCUL)>>2)\

620

+ ((b&0xFCFCFCFCUL)>>2);\

621

l1= (c&0x03030303UL)\

622

+ (d&0x03030303UL);\

623

h1= ((c&0xFCFCFCFCUL)>>2)\

624

+ ((d&0xFCFCFCFCUL)>>2);\

625

OP(*((uint32_t*)&dst[i*dst_stride+4]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\

626

627

628

629

static inline void OPNAME ## _pixels4_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\

630

OPNAME ## _pixels4_l2(block, pixels, pixels+1, line_size, line_size, line_size, h);\

631

632

633

static inline void OPNAME ## _pixels4_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\

634

OPNAME ## _pixels4_l2(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\

635

636

637

static inline void OPNAME ## _pixels2_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\

638

OPNAME ## _pixels2_l2(block, pixels, pixels+1, line_size, line_size, line_size, h);\

639

640

641

static inline void OPNAME ## _pixels2_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\

642

OPNAME ## _pixels2_l2(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\

643

644

645

static inline void OPNAME ## _no_rnd_pixels8_l4(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,\

646

int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\

647

int i;\

648

for(i=0; i<h; i++){\

649

uint32_t a, b, c, d, l0, l1, h0, h1;\

650

a= LD32(&src1[i*src_stride1]);\

651

b= LD32(&src2[i*src_stride2]);\

652

c= LD32(&src3[i*src_stride3]);\

653

d= LD32(&src4[i*src_stride4]);\

654

l0= (a&0x03030303UL)\

655

+ (b&0x03030303UL)\

656

+ 0x01010101UL;\

657

h0= ((a&0xFCFCFCFCUL)>>2)\

658

+ ((b&0xFCFCFCFCUL)>>2);\

659

l1= (c&0x03030303UL)\

660

+ (d&0x03030303UL);\

661

h1= ((c&0xFCFCFCFCUL)>>2)\

662

+ ((d&0xFCFCFCFCUL)>>2);\

663

OP(*((uint32_t*)&dst[i*dst_stride]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\

664

a= LD32(&src1[i*src_stride1+4]);\

665

b= LD32(&src2[i*src_stride2+4]);\

666

c= LD32(&src3[i*src_stride3+4]);\

667

d= LD32(&src4[i*src_stride4+4]);\

668

l0= (a&0x03030303UL)\

669

+ (b&0x03030303UL)\

670

+ 0x01010101UL;\

671

h0= ((a&0xFCFCFCFCUL)>>2)\

672

+ ((b&0xFCFCFCFCUL)>>2);\

673

l1= (c&0x03030303UL)\

674

+ (d&0x03030303UL);\

675

h1= ((c&0xFCFCFCFCUL)>>2)\

676

+ ((d&0xFCFCFCFCUL)>>2);\

677

OP(*((uint32_t*)&dst[i*dst_stride+4]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\

678

679

680

static inline void OPNAME ## _pixels16_l4(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,\

681

int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\

682

OPNAME ## _pixels8_l4(dst , src1 , src2 , src3 , src4 , dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\

683

OPNAME ## _pixels8_l4(dst+8, src1+8, src2+8, src3+8, src4+8, dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\

684

685

static inline void OPNAME ## _no_rnd_pixels16_l4(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,\

686

int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\

687

OPNAME ## _no_rnd_pixels8_l4(dst , src1 , src2 , src3 , src4 , dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\

688

OPNAME ## _no_rnd_pixels8_l4(dst+8, src1+8, src2+8, src3+8, src4+8, dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\

689

690

691

static inline void OPNAME ## _pixels2_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\

692

693

int i, a0, b0, a1, b1;\

694

a0= pixels[0];\

695

b0= pixels[1] + 2;\

696

a0 += b0;\

697

b0 += pixels[2];\

698

699

pixels+=line_size;\

700

for(i=0; i<h; i+=2){\

701

a1= pixels[0];\

702

b1= pixels[1];\

703

a1 += b1;\

704

b1 += pixels[2];\

705

706

block[0]= (a1+a0)>>2; /* FIXME non put */\

707

block[1]= (b1+b0)>>2;\

708

709

pixels+=line_size;\

710

block +=line_size;\

711

712

a0= pixels[0];\

713

b0= pixels[1] + 2;\

714

a0 += b0;\

715

b0 += pixels[2];\

716

717

block[0]= (a1+a0)>>2;\

718

block[1]= (b1+b0)>>2;\

719

pixels+=line_size;\

720

block +=line_size;\

721

722

723

724

static inline void OPNAME ## _pixels4_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\

725

726

int i;\

727

const uint32_t a= LD32(pixels );\

728

const uint32_t b= LD32(pixels+1);\

729

uint32_t l0= (a&0x03030303UL)\

730

+ (b&0x03030303UL)\

731

+ 0x02020202UL;\

732

uint32_t h0= ((a&0xFCFCFCFCUL)>>2)\

733

+ ((b&0xFCFCFCFCUL)>>2);\

734

uint32_t l1,h1;\

735

736

pixels+=line_size;\

737

for(i=0; i<h; i+=2){\

738

uint32_t a= LD32(pixels );\

739

uint32_t b= LD32(pixels+1);\

740

l1= (a&0x03030303UL)\

741

+ (b&0x03030303UL);\

742

h1= ((a&0xFCFCFCFCUL)>>2)\

743

+ ((b&0xFCFCFCFCUL)>>2);\

744

OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\

745

pixels+=line_size;\

746

block +=line_size;\

747

a= LD32(pixels );\

748

b= LD32(pixels+1);\

749

l0= (a&0x03030303UL)\

750

+ (b&0x03030303UL)\

751

+ 0x02020202UL;\

752

h0= ((a&0xFCFCFCFCUL)>>2)\

753

+ ((b&0xFCFCFCFCUL)>>2);\

754

OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\

755

pixels+=line_size;\

756

block +=line_size;\

757

758

759

760

static inline void OPNAME ## _pixels8_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\

761

762

int j;\

763

for(j=0; j<2; j++){\

764

int i;\

765

const uint32_t a= LD32(pixels );\

766

const uint32_t b= LD32(pixels+1);\

767

uint32_t l0= (a&0x03030303UL)\

768

+ (b&0x03030303UL)\

769

+ 0x02020202UL;\

770

uint32_t h0= ((a&0xFCFCFCFCUL)>>2)\

771

+ ((b&0xFCFCFCFCUL)>>2);\

772

uint32_t l1,h1;\

773

774

pixels+=line_size;\

775

for(i=0; i<h; i+=2){\

776

uint32_t a= LD32(pixels );\

777

uint32_t b= LD32(pixels+1);\

778

l1= (a&0x03030303UL)\

779

+ (b&0x03030303UL);\

780

h1= ((a&0xFCFCFCFCUL)>>2)\

781

+ ((b&0xFCFCFCFCUL)>>2);\

782

OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\

783

pixels+=line_size;\

784

block +=line_size;\

785

a= LD32(pixels );\

786

b= LD32(pixels+1);\

787

l0= (a&0x03030303UL)\

788

+ (b&0x03030303UL)\

789

+ 0x02020202UL;\

790

h0= ((a&0xFCFCFCFCUL)>>2)\

791

+ ((b&0xFCFCFCFCUL)>>2);\

792

OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\

793

pixels+=line_size;\

794

block +=line_size;\

795

796

pixels+=4-line_size*(h+1);\

797

block +=4-line_size*h;\

798

799

800

801

static inline void OPNAME ## _no_rnd_pixels8_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\

802

803

int j;\

804

for(j=0; j<2; j++){\

805

int i;\

806

const uint32_t a= LD32(pixels );\

807

const uint32_t b= LD32(pixels+1);\

808

uint32_t l0= (a&0x03030303UL)\

809

+ (b&0x03030303UL)\

810

+ 0x01010101UL;\

811

uint32_t h0= ((a&0xFCFCFCFCUL)>>2)\

812

+ ((b&0xFCFCFCFCUL)>>2);\

813

uint32_t l1,h1;\

814

815

pixels+=line_size;\

816

for(i=0; i<h; i+=2){\

817

uint32_t a= LD32(pixels );\

818

uint32_t b= LD32(pixels+1);\

819

l1= (a&0x03030303UL)\

820

+ (b&0x03030303UL);\

821

h1= ((a&0xFCFCFCFCUL)>>2)\

822

+ ((b&0xFCFCFCFCUL)>>2);\

823

OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\

824

pixels+=line_size;\

825

block +=line_size;\

826

a= LD32(pixels );\

827

b= LD32(pixels+1);\

828

l0= (a&0x03030303UL)\

829

+ (b&0x03030303UL)\

830

+ 0x01010101UL;\

831

h0= ((a&0xFCFCFCFCUL)>>2)\

832

+ ((b&0xFCFCFCFCUL)>>2);\

833

OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\

834

pixels+=line_size;\

835

block +=line_size;\

836

837

pixels+=4-line_size*(h+1);\

838

block +=4-line_size*h;\

839

840

841

842

CALL_2X_PIXELS(OPNAME ## _pixels16_c , OPNAME ## _pixels8_c , 8)\

843

CALL_2X_PIXELS(OPNAME ## _pixels16_x2_c , OPNAME ## _pixels8_x2_c , 8)\

844

CALL_2X_PIXELS(OPNAME ## _pixels16_y2_c , OPNAME ## _pixels8_y2_c , 8)\

845

CALL_2X_PIXELS(OPNAME ## _pixels16_xy2_c, OPNAME ## _pixels8_xy2_c, 8)\

846

CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_c , OPNAME ## _pixels8_c , 8)\

847

CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_x2_c , OPNAME ## _no_rnd_pixels8_x2_c , 8)\

848

CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_y2_c , OPNAME ## _no_rnd_pixels8_y2_c , 8)\

849

CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_xy2_c, OPNAME ## _no_rnd_pixels8_xy2_c, 8)\

850

851

#define op_avg(a, b) a = rnd_avg32(a, b)

852

#endif

853

#define op_put(a, b) a = b

854

855

PIXOP2(avg, op_avg)

856

PIXOP2(put, op_put)

857

#undef op_avg

858

#undef op_put

859

860

#define avg2(a,b) ((a+b+1)>>1)

861

#define avg4(a,b,c,d) ((a+b+c+d+2)>>2)

862

863

864

static void gmc1_c(uint8_t *dst, uint8_t *src, int stride, int h, int x16, int y16, int rounder)

865

{

866

const int A=(16-x16)*(16-y16);

867

const int B=( x16)*(16-y16);

868

const int C=(16-x16)*( y16);

869

const int D=( x16)*( y16);

870

int i;

871

872

for(i=0; i<h; i++)

873

{

874

dst[0]= (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1] + rounder)>>8;

875

dst[1]= (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2] + rounder)>>8;

876

dst[2]= (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3] + rounder)>>8;

877

dst[3]= (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4] + rounder)>>8;

878

dst[4]= (A*src[4] + B*src[5] + C*src[stride+4] + D*src[stride+5] + rounder)>>8;

879

dst[5]= (A*src[5] + B*src[6] + C*src[stride+5] + D*src[stride+6] + rounder)>>8;

880

dst[6]= (A*src[6] + B*src[7] + C*src[stride+6] + D*src[stride+7] + rounder)>>8;

881

dst[7]= (A*src[7] + B*src[8] + C*src[stride+7] + D*src[stride+8] + rounder)>>8;

882

dst+= stride;

883

src+= stride;

884

}

885

}

886

887

static void gmc_c(uint8_t *dst, uint8_t *src, int stride, int h, int ox, int oy,

888

int dxx, int dxy, int dyx, int dyy, int shift, int r, int width, int height)

889

{

890

int y, vx, vy;

891

const int s= 1<<shift;

892

893

width--;

894

height--;

895

896

for(y=0; y<h; y++){

897

int x;

898

899

vx= ox;

900

vy= oy;

901

for(x=0; x<8; x++){ //XXX FIXME optimize

902

int src_x, src_y, frac_x, frac_y, index;

903

904

src_x= vx>>16;

905

src_y= vy>>16;

906

frac_x= src_x&(s-1);

907

frac_y= src_y&(s-1);

908

src_x>>=shift;

909

src_y>>=shift;

910

911

if((unsigned)src_x < width){

912

if((unsigned)src_y < height){

913

index= src_x + src_y*stride;

914

dst[y*stride + x]= ( ( src[index ]*(s-frac_x)

915

+ src[index +1]* frac_x )*(s-frac_y)

916

+ ( src[index+stride ]*(s-frac_x)

917

+ src[index+stride+1]* frac_x )* frac_y

918

+ r)>>(shift*2);

919

}else{

920

index= src_x + clip(src_y, 0, height)*stride;

921

dst[y*stride + x]= ( ( src[index ]*(s-frac_x)

922

+ src[index +1]* frac_x )*s

923

+ r)>>(shift*2);

924

}

925

}else{

926

if((unsigned)src_y < height){

927

index= clip(src_x, 0, width) + src_y*stride;

928

dst[y*stride + x]= ( ( src[index ]*(s-frac_y)

929

+ src[index+stride ]* frac_y )*s

930

+ r)>>(shift*2);

931

}else{

932

index= clip(src_x, 0, width) + clip(src_y, 0, height)*stride;

933

dst[y*stride + x]= src[index ];

934

}

935

}

936

937

vx+= dxx;

938

vy+= dyx;

939

}

940

ox += dxy;

941

oy += dyy;

942

}

943

}

944

945

static inline void put_tpel_pixels_mc00_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){

946

switch(width){

947

case 2: put_pixels2_c (dst, src, stride, height); break;

948

case 4: put_pixels4_c (dst, src, stride, height); break;

949

case 8: put_pixels8_c (dst, src, stride, height); break;

950

case 16:put_pixels16_c(dst, src, stride, height); break;

951

}

952

}

953

954

static inline void put_tpel_pixels_mc10_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){

955

int i,j;

956

for (i=0; i < height; i++) {

957

for (j=0; j < width; j++) {

958

dst[j] = (683*(2*src[j] + src[j+1] + 1)) >> 11;

959

}

960

src += stride;

961

dst += stride;

962

}

963

}

964

965

static inline void put_tpel_pixels_mc20_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){

966

int i,j;

967

for (i=0; i < height; i++) {

968

for (j=0; j < width; j++) {

969

dst[j] = (683*(src[j] + 2*src[j+1] + 1)) >> 11;

970

}

971

src += stride;

972

dst += stride;

973

}

974

}

975

976

static inline void put_tpel_pixels_mc01_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){

977

int i,j;

978

for (i=0; i < height; i++) {

979

for (j=0; j < width; j++) {

980

dst[j] = (683*(2*src[j] + src[j+stride] + 1)) >> 11;

981

}

982

src += stride;

983

dst += stride;

984

}

985

}

986

987

static inline void put_tpel_pixels_mc11_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){

988

int i,j;

989

for (i=0; i < height; i++) {

990

for (j=0; j < width; j++) {

991

dst[j] = (2731*(4*src[j] + 3*src[j+1] + 3*src[j+stride] + 2*src[j+stride+1] + 6)) >> 15;

992

}

993

src += stride;

994

dst += stride;

995

}

996

}

997

998

static inline void put_tpel_pixels_mc12_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){

999

int i,j;

1000

for (i=0; i < height; i++) {

1001

for (j=0; j < width; j++) {

1002

dst[j] = (2731*(3*src[j] + 2*src[j+1] + 4*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15;

1003

}

1004

src += stride;

1005

dst += stride;

1006

}

1007

}

1008

1009

static inline void put_tpel_pixels_mc02_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){

1010

int i,j;

1011

for (i=0; i < height; i++) {

1012

for (j=0; j < width; j++) {

1013

dst[j] = (683*(src[j] + 2*src[j+stride] + 1)) >> 11;

1014

}

1015

src += stride;

1016

dst += stride;

1017

}

1018

}

1019

1020

static inline void put_tpel_pixels_mc21_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){

1021

int i,j;

1022

for (i=0; i < height; i++) {

1023

for (j=0; j < width; j++) {

1024

dst[j] = (2731*(3*src[j] + 4*src[j+1] + 2*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15;

1025

}

1026

src += stride;

1027

dst += stride;

1028

}

1029

}

1030

1031

static inline void put_tpel_pixels_mc22_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){

1032

int i,j;

1033

for (i=0; i < height; i++) {

1034

for (j=0; j < width; j++) {

1035

dst[j] = (2731*(2*src[j] + 3*src[j+1] + 3*src[j+stride] + 4*src[j+stride+1] + 6)) >> 15;

1036

}

1037

src += stride;

1038

dst += stride;

1039

}

1040

}

1041

1042

static inline void avg_tpel_pixels_mc00_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){

1043

switch(width){

1044

case 2: avg_pixels2_c (dst, src, stride, height); break;

1045

case 4: avg_pixels4_c (dst, src, stride, height); break;

1046

case 8: avg_pixels8_c (dst, src, stride, height); break;

1047

case 16:avg_pixels16_c(dst, src, stride, height); break;

1048

}

1049

}

1050

1051

static inline void avg_tpel_pixels_mc10_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){

1052

int i,j;

1053

for (i=0; i < height; i++) {

1054

for (j=0; j < width; j++) {

1055

dst[j] = (dst[j] + ((683*(2*src[j] + src[j+1] + 1)) >> 11) + 1) >> 1;

1056

}

1057

src += stride;

1058

dst += stride;

1059

}

1060

}

1061

1062

static inline void avg_tpel_pixels_mc20_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){

1063

int i,j;

1064

for (i=0; i < height; i++) {

1065

for (j=0; j < width; j++) {

1066

dst[j] = (dst[j] + ((683*(src[j] + 2*src[j+1] + 1)) >> 11) + 1) >> 1;

1067

}

1068

src += stride;

1069

dst += stride;

1070

}

1071

}

1072

1073

static inline void avg_tpel_pixels_mc01_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){

1074

int i,j;

1075

for (i=0; i < height; i++) {

1076

for (j=0; j < width; j++) {

1077

dst[j] = (dst[j] + ((683*(2*src[j] + src[j+stride] + 1)) >> 11) + 1) >> 1;

1078

}

1079

src += stride;

1080

dst += stride;

1081

}

1082

}

1083

1084

static inline void avg_tpel_pixels_mc11_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){

1085

int i,j;

1086

for (i=0; i < height; i++) {

1087

for (j=0; j < width; j++) {

1088

dst[j] = (dst[j] + ((2731*(4*src[j] + 3*src[j+1] + 3*src[j+stride] + 2*src[j+stride+1] + 6)) >> 15) + 1) >> 1;

1089

}

1090

src += stride;

1091

dst += stride;

1092

}

1093

}

1094

1095

static inline void avg_tpel_pixels_mc12_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){

1096

int i,j;

1097

for (i=0; i < height; i++) {

1098

for (j=0; j < width; j++) {

1099

dst[j] = (dst[j] + ((2731*(3*src[j] + 2*src[j+1] + 4*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15) + 1) >> 1;

1100

}

1101

src += stride;

1102

dst += stride;

1103

}

1104

}

1105

1106

static inline void avg_tpel_pixels_mc02_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){

1107

int i,j;

1108

for (i=0; i < height; i++) {

1109

for (j=0; j < width; j++) {

1110

dst[j] = (dst[j] + ((683*(src[j] + 2*src[j+stride] + 1)) >> 11) + 1) >> 1;

1111

}

1112

src += stride;

1113

dst += stride;

1114

}

1115

}

1116

1117

static inline void avg_tpel_pixels_mc21_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){

1118

int i,j;

1119

for (i=0; i < height; i++) {

1120

for (j=0; j < width; j++) {

1121

dst[j] = (dst[j] + ((2731*(3*src[j] + 4*src[j+1] + 2*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15) + 1) >> 1;

1122

}

1123

src += stride;

1124

dst += stride;

1125

}

1126

}

1127

1128

static inline void avg_tpel_pixels_mc22_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){

1129

int i,j;

1130

for (i=0; i < height; i++) {

1131

for (j=0; j < width; j++) {

1132

dst[j] = (dst[j] + ((2731*(2*src[j] + 3*src[j+1] + 3*src[j+stride] + 4*src[j+stride+1] + 6)) >> 15) + 1) >> 1;

1133

}

1134

src += stride;

1135

dst += stride;

1136

}

1137

}

1138

#if 0

1139

#define TPEL_WIDTH(width)\

1140

static void put_tpel_pixels ## width ## _mc00_c(uint8_t *dst, const uint8_t *src, int stride, int height){\

1141

void put_tpel_pixels_mc00_c(dst, src, stride, width, height);}\

1142

static void put_tpel_pixels ## width ## _mc10_c(uint8_t *dst, const uint8_t *src, int stride, int height){\

1143

void put_tpel_pixels_mc10_c(dst, src, stride, width, height);}\

1144

static void put_tpel_pixels ## width ## _mc20_c(uint8_t *dst, const uint8_t *src, int stride, int height){\

1145

void put_tpel_pixels_mc20_c(dst, src, stride, width, height);}\

1146

static void put_tpel_pixels ## width ## _mc01_c(uint8_t *dst, const uint8_t *src, int stride, int height){\

1147

void put_tpel_pixels_mc01_c(dst, src, stride, width, height);}\

1148

static void put_tpel_pixels ## width ## _mc11_c(uint8_t *dst, const uint8_t *src, int stride, int height){\

1149

void put_tpel_pixels_mc11_c(dst, src, stride, width, height);}\

1150

static void put_tpel_pixels ## width ## _mc21_c(uint8_t *dst, const uint8_t *src, int stride, int height){\

1151

void put_tpel_pixels_mc21_c(dst, src, stride, width, height);}\

1152

static void put_tpel_pixels ## width ## _mc02_c(uint8_t *dst, const uint8_t *src, int stride, int height){\

1153

void put_tpel_pixels_mc02_c(dst, src, stride, width, height);}\

1154

static void put_tpel_pixels ## width ## _mc12_c(uint8_t *dst, const uint8_t *src, int stride, int height){\

1155

void put_tpel_pixels_mc12_c(dst, src, stride, width, height);}\

1156

static void put_tpel_pixels ## width ## _mc22_c(uint8_t *dst, const uint8_t *src, int stride, int height){\

1157

void put_tpel_pixels_mc22_c(dst, src, stride, width, height);}

1158

#endif

1159

1160

#define H264_CHROMA_MC(OPNAME, OP)\

1161

static void OPNAME ## h264_chroma_mc2_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){\

1162

const int A=(8-x)*(8-y);\

1163

const int B=( x)*(8-y);\

1164

const int C=(8-x)*( y);\

1165

const int D=( x)*( y);\

1166

int i;\

1167

1168

assert(x<8 && y<8 && x>=0 && y>=0);\

1169

1170

for(i=0; i<h; i++)\

1171

1172

OP(dst[0], (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1]));\

1173

OP(dst[1], (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2]));\

1174

dst+= stride;\

1175

src+= stride;\

1176

1177

1178

1179

static void OPNAME ## h264_chroma_mc4_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){\

1180

const int A=(8-x)*(8-y);\

1181

const int B=( x)*(8-y);\

1182

const int C=(8-x)*( y);\

1183

const int D=( x)*( y);\

1184

int i;\

1185

1186

assert(x<8 && y<8 && x>=0 && y>=0);\

1187

1188

for(i=0; i<h; i++)\

1189

1190

OP(dst[0], (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1]));\

1191

OP(dst[1], (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2]));\

1192

OP(dst[2], (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3]));\

1193

OP(dst[3], (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4]));\

1194

dst+= stride;\

1195

src+= stride;\

1196

1197

1198

1199

static void OPNAME ## h264_chroma_mc8_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){\

1200

const int A=(8-x)*(8-y);\

1201

const int B=( x)*(8-y);\

1202

const int C=(8-x)*( y);\

1203

const int D=( x)*( y);\

1204

int i;\

1205

1206

assert(x<8 && y<8 && x>=0 && y>=0);\

1207

1208

for(i=0; i<h; i++)\

1209

1210

OP(dst[0], (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1]));\

1211

OP(dst[1], (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2]));\

1212

OP(dst[2], (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3]));\

1213

OP(dst[3], (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4]));\

1214

OP(dst[4], (A*src[4] + B*src[5] + C*src[stride+4] + D*src[stride+5]));\

1215

OP(dst[5], (A*src[5] + B*src[6] + C*src[stride+5] + D*src[stride+6]));\

1216

OP(dst[6], (A*src[6] + B*src[7] + C*src[stride+6] + D*src[stride+7]));\

1217

OP(dst[7], (A*src[7] + B*src[8] + C*src[stride+7] + D*src[stride+8]));\

1218

dst+= stride;\

1219

src+= stride;\

1220

1221

}

1222

1223

#define op_avg(a, b) a = (((a)+(((b) + 32)>>6)+1)>>1)

1224

#define op_put(a, b) a = (((b) + 32)>>6)

1225

1226

H264_CHROMA_MC(put_ , op_put)

1227

H264_CHROMA_MC(avg_ , op_avg)

1228

#undef op_avg

1229

#undef op_put

1230

1231

static inline void copy_block4(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h)

1232

{

1233

int i;

1234

for(i=0; i<h; i++)

1235

{

1236

ST32(dst , LD32(src ));

1237

dst+=dstStride;

1238

src+=srcStride;

1239

}

1240

}

1241

1242

static inline void copy_block8(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h)

1243

{

1244

int i;

1245

for(i=0; i<h; i++)

1246

{

1247

ST32(dst , LD32(src ));

1248

ST32(dst+4 , LD32(src+4 ));

1249

dst+=dstStride;

1250

src+=srcStride;

1251

}

1252

}

1253

1254

static inline void copy_block16(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h)

1255

{

1256

int i;

1257

for(i=0; i<h; i++)

1258

{

1259

ST32(dst , LD32(src ));

1260

ST32(dst+4 , LD32(src+4 ));

1261

ST32(dst+8 , LD32(src+8 ));

1262

ST32(dst+12, LD32(src+12));

1263

dst+=dstStride;

1264

src+=srcStride;

1265

}

1266

}

1267

1268

static inline void copy_block17(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h)

1269

{

1270

int i;

1271

for(i=0; i<h; i++)

1272

{

1273

ST32(dst , LD32(src ));

1274

ST32(dst+4 , LD32(src+4 ));

1275

ST32(dst+8 , LD32(src+8 ));

1276

ST32(dst+12, LD32(src+12));

1277

dst[16]= src[16];

1278

dst+=dstStride;

1279

src+=srcStride;

1280

}

1281

}

1282

1283

static inline void copy_block9(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h)

1284

{

1285

int i;

1286

for(i=0; i<h; i++)

1287

{

1288

ST32(dst , LD32(src ));

1289

ST32(dst+4 , LD32(src+4 ));

1290

dst[8]= src[8];

1291

dst+=dstStride;

1292

src+=srcStride;

1293

}

1294

}

1295

1296

1297

#define QPEL_MC(r, OPNAME, RND, OP) \

1298

static void OPNAME ## mpeg4_qpel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\

1299

uint8_t *cm = cropTbl + MAX_NEG_CROP;\

1300

int i;\

1301

for(i=0; i<h; i++)\

1302

1303

OP(dst[0], (src[0]+src[1])*20 - (src[0]+src[2])*6 + (src[1]+src[3])*3 - (src[2]+src[4]));\

1304

OP(dst[1], (src[1]+src[2])*20 - (src[0]+src[3])*6 + (src[0]+src[4])*3 - (src[1]+src[5]));\

1305

OP(dst[2], (src[2]+src[3])*20 - (src[1]+src[4])*6 + (src[0]+src[5])*3 - (src[0]+src[6]));\

1306

OP(dst[3], (src[3]+src[4])*20 - (src[2]+src[5])*6 + (src[1]+src[6])*3 - (src[0]+src[7]));\

1307

OP(dst[4], (src[4]+src[5])*20 - (src[3]+src[6])*6 + (src[2]+src[7])*3 - (src[1]+src[8]));\

1308

OP(dst[5], (src[5]+src[6])*20 - (src[4]+src[7])*6 + (src[3]+src[8])*3 - (src[2]+src[8]));\

1309

OP(dst[6], (src[6]+src[7])*20 - (src[5]+src[8])*6 + (src[4]+src[8])*3 - (src[3]+src[7]));\

1310

OP(dst[7], (src[7]+src[8])*20 - (src[6]+src[8])*6 + (src[5]+src[7])*3 - (src[4]+src[6]));\

1311

dst+=dstStride;\

1312

src+=srcStride;\

1313

1314

1315

1316

static void OPNAME ## mpeg4_qpel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\

1317

const int w=8;\

1318

uint8_t *cm = cropTbl + MAX_NEG_CROP;\

1319

int i;\

1320

for(i=0; i<w; i++)\

1321

1322

const int src0= src[0*srcStride];\

1323

const int src1= src[1*srcStride];\

1324

const int src2= src[2*srcStride];\

1325

const int src3= src[3*srcStride];\

1326

const int src4= src[4*srcStride];\

1327

const int src5= src[5*srcStride];\

1328

const int src6= src[6*srcStride];\

1329

const int src7= src[7*srcStride];\

1330

const int src8= src[8*srcStride];\

1331

OP(dst[0*dstStride], (src0+src1)*20 - (src0+src2)*6 + (src1+src3)*3 - (src2+src4));\

1332

OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*6 + (src0+src4)*3 - (src1+src5));\

1333

OP(dst[2*dstStride], (src2+src3)*20 - (src1+src4)*6 + (src0+src5)*3 - (src0+src6));\

1334

OP(dst[3*dstStride], (src3+src4)*20 - (src2+src5)*6 + (src1+src6)*3 - (src0+src7));\

1335

OP(dst[4*dstStride], (src4+src5)*20 - (src3+src6)*6 + (src2+src7)*3 - (src1+src8));\

1336

OP(dst[5*dstStride], (src5+src6)*20 - (src4+src7)*6 + (src3+src8)*3 - (src2+src8));\

1337

OP(dst[6*dstStride], (src6+src7)*20 - (src5+src8)*6 + (src4+src8)*3 - (src3+src7));\

1338

OP(dst[7*dstStride], (src7+src8)*20 - (src6+src8)*6 + (src5+src7)*3 - (src4+src6));\

1339

dst++;\

1340

src++;\

1341

1342

1343

1344

static void OPNAME ## mpeg4_qpel16_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\

1345

uint8_t *cm = cropTbl + MAX_NEG_CROP;\

1346

int i;\

1347

1348

for(i=0; i<h; i++)\

1349

1350

OP(dst[ 0], (src[ 0]+src[ 1])*20 - (src[ 0]+src[ 2])*6 + (src[ 1]+src[ 3])*3 - (src[ 2]+src[ 4]));\

1351

OP(dst[ 1], (src[ 1]+src[ 2])*20 - (src[ 0]+src[ 3])*6 + (src[ 0]+src[ 4])*3 - (src[ 1]+src[ 5]));\

1352

OP(dst[ 2], (src[ 2]+src[ 3])*20 - (src[ 1]+src[ 4])*6 + (src[ 0]+src[ 5])*3 - (src[ 0]+src[ 6]));\

1353

OP(dst[ 3], (src[ 3]+src[ 4])*20 - (src[ 2]+src[ 5])*6 + (src[ 1]+src[ 6])*3 - (src[ 0]+src[ 7]));\

1354

OP(dst[ 4], (src[ 4]+src[ 5])*20 - (src[ 3]+src[ 6])*6 + (src[ 2]+src[ 7])*3 - (src[ 1]+src[ 8]));\

1355

OP(dst[ 5], (src[ 5]+src[ 6])*20 - (src[ 4]+src[ 7])*6 + (src[ 3]+src[ 8])*3 - (src[ 2]+src[ 9]));\

1356

OP(dst[ 6], (src[ 6]+src[ 7])*20 - (src[ 5]+src[ 8])*6 + (src[ 4]+src[ 9])*3 - (src[ 3]+src[10]));\

1357

OP(dst[ 7], (src[ 7]+src[ 8])*20 - (src[ 6]+src[ 9])*6 + (src[ 5]+src[10])*3 - (src[ 4]+src[11]));\

1358

OP(dst[ 8], (src[ 8]+src[ 9])*20 - (src[ 7]+src[10])*6 + (src[ 6]+src[11])*3 - (src[ 5]+src[12]));\

1359

OP(dst[ 9], (src[ 9]+src[10])*20 - (src[ 8]+src[11])*6 + (src[ 7]+src[12])*3 - (src[ 6]+src[13]));\

1360

OP(dst[10], (src[10]+src[11])*20 - (src[ 9]+src[12])*6 + (src[ 8]+src[13])*3 - (src[ 7]+src[14]));\

1361

OP(dst[11], (src[11]+src[12])*20 - (src[10]+src[13])*6 + (src[ 9]+src[14])*3 - (src[ 8]+src[15]));\

1362

OP(dst[12], (src[12]+src[13])*20 - (src[11]+src[14])*6 + (src[10]+src[15])*3 - (src[ 9]+src[16]));\

1363

OP(dst[13], (src[13]+src[14])*20 - (src[12]+src[15])*6 + (src[11]+src[16])*3 - (src[10]+src[16]));\

1364

OP(dst[14], (src[14]+src[15])*20 - (src[13]+src[16])*6 + (src[12]+src[16])*3 - (src[11]+src[15]));\

1365

OP(dst[15], (src[15]+src[16])*20 - (src[14]+src[16])*6 + (src[13]+src[15])*3 - (src[12]+src[14]));\

1366

dst+=dstStride;\

1367

src+=srcStride;\

1368

1369

1370

1371

static void OPNAME ## mpeg4_qpel16_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\

1372

uint8_t *cm = cropTbl + MAX_NEG_CROP;\

1373

int i;\

1374

const int w=16;\

1375

for(i=0; i<w; i++)\

1376

1377

const int src0= src[0*srcStride];\

1378

const int src1= src[1*srcStride];\

1379

const int src2= src[2*srcStride];\

1380

const int src3= src[3*srcStride];\

1381

const int src4= src[4*srcStride];\

1382

const int src5= src[5*srcStride];\

1383

const int src6= src[6*srcStride];\

1384

const int src7= src[7*srcStride];\

1385

const int src8= src[8*srcStride];\

1386

const int src9= src[9*srcStride];\

1387

const int src10= src[10*srcStride];\

1388

const int src11= src[11*srcStride];\

1389

const int src12= src[12*srcStride];\

1390

const int src13= src[13*srcStride];\

1391

const int src14= src[14*srcStride];\

1392

const int src15= src[15*srcStride];\

1393

const int src16= src[16*srcStride];\

1394

OP(dst[ 0*dstStride], (src0 +src1 )*20 - (src0 +src2 )*6 + (src1 +src3 )*3 - (src2 +src4 ));\

1395

OP(dst[ 1*dstStride], (src1 +src2 )*20 - (src0 +src3 )*6 + (src0 +src4 )*3 - (src1 +src5 ));\

1396

OP(dst[ 2*dstStride], (src2 +src3 )*20 - (src1 +src4 )*6 + (src0 +src5 )*3 - (src0 +src6 ));\

1397

OP(dst[ 3*dstStride], (src3 +src4 )*20 - (src2 +src5 )*6 + (src1 +src6 )*3 - (src0 +src7 ));\

1398

OP(dst[ 4*dstStride], (src4 +src5 )*20 - (src3 +src6 )*6 + (src2 +src7 )*3 - (src1 +src8 ));\

1399

OP(dst[ 5*dstStride], (src5 +src6 )*20 - (src4 +src7 )*6 + (src3 +src8 )*3 - (src2 +src9 ));\

1400

OP(dst[ 6*dstStride], (src6 +src7 )*20 - (src5 +src8 )*6 + (src4 +src9 )*3 - (src3 +src10));\

1401

OP(dst[ 7*dstStride], (src7 +src8 )*20 - (src6 +src9 )*6 + (src5 +src10)*3 - (src4 +src11));\

1402

OP(dst[ 8*dstStride], (src8 +src9 )*20 - (src7 +src10)*6 + (src6 +src11)*3 - (src5 +src12));\

1403

OP(dst[ 9*dstStride], (src9 +src10)*20 - (src8 +src11)*6 + (src7 +src12)*3 - (src6 +src13));\

1404

OP(dst[10*dstStride], (src10+src11)*20 - (src9 +src12)*6 + (src8 +src13)*3 - (src7 +src14));\

1405

OP(dst[11*dstStride], (src11+src12)*20 - (src10+src13)*6 + (src9 +src14)*3 - (src8 +src15));\

1406

OP(dst[12*dstStride], (src12+src13)*20 - (src11+src14)*6 + (src10+src15)*3 - (src9 +src16));\

1407

OP(dst[13*dstStride], (src13+src14)*20 - (src12+src15)*6 + (src11+src16)*3 - (src10+src16));\

1408

OP(dst[14*dstStride], (src14+src15)*20 - (src13+src16)*6 + (src12+src16)*3 - (src11+src15));\

1409

OP(dst[15*dstStride], (src15+src16)*20 - (src14+src16)*6 + (src13+src15)*3 - (src12+src14));\

1410

dst++;\

1411

src++;\

1412

1413

1414

1415

static void OPNAME ## qpel8_mc00_c (uint8_t *dst, uint8_t *src, int stride){\

1416

OPNAME ## pixels8_c(dst, src, stride, 8);\

1417

1418

1419

static void OPNAME ## qpel8_mc10_c(uint8_t *dst, uint8_t *src, int stride){\

1420

uint8_t half[64];\

1421

put ## RND ## mpeg4_qpel8_h_lowpass(half, src, 8, stride, 8);\

1422

OPNAME ## pixels8_l2(dst, src, half, stride, stride, 8, 8);\

1423

1424

1425

static void OPNAME ## qpel8_mc20_c(uint8_t *dst, uint8_t *src, int stride){\

1426

OPNAME ## mpeg4_qpel8_h_lowpass(dst, src, stride, stride, 8);\

1427

1428

1429

static void OPNAME ## qpel8_mc30_c(uint8_t *dst, uint8_t *src, int stride){\

1430

uint8_t half[64];\

1431

put ## RND ## mpeg4_qpel8_h_lowpass(half, src, 8, stride, 8);\

1432

OPNAME ## pixels8_l2(dst, src+1, half, stride, stride, 8, 8);\

1433

1434

1435

static void OPNAME ## qpel8_mc01_c(uint8_t *dst, uint8_t *src, int stride){\

1436

uint8_t full[16*9];\

1437

uint8_t half[64];\

1438

copy_block9(full, src, 16, stride, 9);\

1439

put ## RND ## mpeg4_qpel8_v_lowpass(half, full, 8, 16);\

1440

OPNAME ## pixels8_l2(dst, full, half, stride, 16, 8, 8);\

1441

1442

1443

static void OPNAME ## qpel8_mc02_c(uint8_t *dst, uint8_t *src, int stride){\

1444

uint8_t full[16*9];\

1445

copy_block9(full, src, 16, stride, 9);\

1446

OPNAME ## mpeg4_qpel8_v_lowpass(dst, full, stride, 16);\

1447

1448

1449

static void OPNAME ## qpel8_mc03_c(uint8_t *dst, uint8_t *src, int stride){\

1450

uint8_t full[16*9];\

1451

uint8_t half[64];\

1452

copy_block9(full, src, 16, stride, 9);\

1453

put ## RND ## mpeg4_qpel8_v_lowpass(half, full, 8, 16);\

1454

OPNAME ## pixels8_l2(dst, full+16, half, stride, 16, 8, 8);\

1455

1456

void ff_ ## OPNAME ## qpel8_mc11_old_c(uint8_t *dst, uint8_t *src, int stride){\

1457

uint8_t full[16*9];\

1458

uint8_t halfH[72];\

1459

uint8_t halfV[64];\

1460

uint8_t halfHV[64];\

1461

copy_block9(full, src, 16, stride, 9);\

1462

put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\

1463

put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\

1464

put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\

1465

OPNAME ## pixels8_l4(dst, full, halfH, halfV, halfHV, stride, 16, 8, 8, 8, 8);\

1466

1467

static void OPNAME ## qpel8_mc11_c(uint8_t *dst, uint8_t *src, int stride){\

1468

uint8_t full[16*9];\

1469

uint8_t halfH[72];\

1470

uint8_t halfHV[64];\

1471

copy_block9(full, src, 16, stride, 9);\

1472

put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\

1473

put ## RND ## pixels8_l2(halfH, halfH, full, 8, 8, 16, 9);\

1474

put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\

1475

OPNAME ## pixels8_l2(dst, halfH, halfHV, stride, 8, 8, 8);\

1476

1477

void ff_ ## OPNAME ## qpel8_mc31_old_c(uint8_t *dst, uint8_t *src, int stride){\

1478

uint8_t full[16*9];\

1479

uint8_t halfH[72];\

1480

uint8_t halfV[64];\

1481

uint8_t halfHV[64];\

1482

copy_block9(full, src, 16, stride, 9);\

1483

put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\

1484

put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\

1485

put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\

1486

OPNAME ## pixels8_l4(dst, full+1, halfH, halfV, halfHV, stride, 16, 8, 8, 8, 8);\

1487

1488

static void OPNAME ## qpel8_mc31_c(uint8_t *dst, uint8_t *src, int stride){\

1489

uint8_t full[16*9];\

1490

uint8_t halfH[72];\

1491

uint8_t halfHV[64];\

1492

copy_block9(full, src, 16, stride, 9);\

1493

put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\

1494

put ## RND ## pixels8_l2(halfH, halfH, full+1, 8, 8, 16, 9);\

1495

put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\

1496

OPNAME ## pixels8_l2(dst, halfH, halfHV, stride, 8, 8, 8);\

1497

1498

void ff_ ## OPNAME ## qpel8_mc13_old_c(uint8_t *dst, uint8_t *src, int stride){\

1499

uint8_t full[16*9];\

1500

uint8_t halfH[72];\

1501

uint8_t halfV[64];\

1502

uint8_t halfHV[64];\

1503

copy_block9(full, src, 16, stride, 9);\

1504

put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\

1505

put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\

1506

put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\

1507

OPNAME ## pixels8_l4(dst, full+16, halfH+8, halfV, halfHV, stride, 16, 8, 8, 8, 8);\

1508

1509

static void OPNAME ## qpel8_mc13_c(uint8_t *dst, uint8_t *src, int stride){\

1510

uint8_t full[16*9];\

1511

uint8_t halfH[72];\

1512

uint8_t halfHV[64];\

1513

copy_block9(full, src, 16, stride, 9);\

1514

put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\

1515

put ## RND ## pixels8_l2(halfH, halfH, full, 8, 8, 16, 9);\

1516

put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\

1517

OPNAME ## pixels8_l2(dst, halfH+8, halfHV, stride, 8, 8, 8);\

1518

1519

void ff_ ## OPNAME ## qpel8_mc33_old_c(uint8_t *dst, uint8_t *src, int stride){\

1520

uint8_t full[16*9];\

1521

uint8_t halfH[72];\

1522

uint8_t halfV[64];\

1523

uint8_t halfHV[64];\

1524

copy_block9(full, src, 16, stride, 9);\

1525

put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full , 8, 16, 9);\

1526

put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\

1527

put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\

1528

OPNAME ## pixels8_l4(dst, full+17, halfH+8, halfV, halfHV, stride, 16, 8, 8, 8, 8);\

1529

1530

static void OPNAME ## qpel8_mc33_c(uint8_t *dst, uint8_t *src, int stride){\

1531

uint8_t full[16*9];\

1532

uint8_t halfH[72];\

1533

uint8_t halfHV[64];\

1534

copy_block9(full, src, 16, stride, 9);\

1535

put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\

1536

put ## RND ## pixels8_l2(halfH, halfH, full+1, 8, 8, 16, 9);\

1537

put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\

1538

OPNAME ## pixels8_l2(dst, halfH+8, halfHV, stride, 8, 8, 8);\

1539

1540

static void OPNAME ## qpel8_mc21_c(uint8_t *dst, uint8_t *src, int stride){\

1541

uint8_t halfH[72];\

1542

uint8_t halfHV[64];\

1543

put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\

1544

put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\

1545

OPNAME ## pixels8_l2(dst, halfH, halfHV, stride, 8, 8, 8);\

1546

1547

static void OPNAME ## qpel8_mc23_c(uint8_t *dst, uint8_t *src, int stride){\

1548

uint8_t halfH[72];\

1549

uint8_t halfHV[64];\

1550

put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\

1551

put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\

1552

OPNAME ## pixels8_l2(dst, halfH+8, halfHV, stride, 8, 8, 8);\

1553

1554

void ff_ ## OPNAME ## qpel8_mc12_old_c(uint8_t *dst, uint8_t *src, int stride){\

1555

uint8_t full[16*9];\

1556

uint8_t halfH[72];\

1557

uint8_t halfV[64];\

1558

uint8_t halfHV[64];\

1559

copy_block9(full, src, 16, stride, 9);\

1560

put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\

1561

put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\

1562

put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\

1563

OPNAME ## pixels8_l2(dst, halfV, halfHV, stride, 8, 8, 8);\

1564

1565

static void OPNAME ## qpel8_mc12_c(uint8_t *dst, uint8_t *src, int stride){\

1566

uint8_t full[16*9];\

1567

uint8_t halfH[72];\

1568

copy_block9(full, src, 16, stride, 9);\

1569

put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\

1570

put ## RND ## pixels8_l2(halfH, halfH, full, 8, 8, 16, 9);\

1571

OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\

1572

1573

void ff_ ## OPNAME ## qpel8_mc32_old_c(uint8_t *dst, uint8_t *src, int stride){\

1574

uint8_t full[16*9];\

1575

uint8_t halfH[72];\

1576

uint8_t halfV[64];\

1577

uint8_t halfHV[64];\

1578

copy_block9(full, src, 16, stride, 9);\

1579

put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\

1580

put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\

1581

put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\

1582

OPNAME ## pixels8_l2(dst, halfV, halfHV, stride, 8, 8, 8);\

1583

1584

static void OPNAME ## qpel8_mc32_c(uint8_t *dst, uint8_t *src, int stride){\

1585

uint8_t full[16*9];\

1586

uint8_t halfH[72];\

1587

copy_block9(full, src, 16, stride, 9);\

1588

put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\

1589

put ## RND ## pixels8_l2(halfH, halfH, full+1, 8, 8, 16, 9);\

1590

OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\

1591

1592

static void OPNAME ## qpel8_mc22_c(uint8_t *dst, uint8_t *src, int stride){\

1593

uint8_t halfH[72];\

1594

put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\

1595

OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\

1596

1597

static void OPNAME ## qpel16_mc00_c (uint8_t *dst, uint8_t *src, int stride){\

1598

OPNAME ## pixels16_c(dst, src, stride, 16);\

1599

1600

1601

static void OPNAME ## qpel16_mc10_c(uint8_t *dst, uint8_t *src, int stride){\

1602

uint8_t half[256];\

1603

put ## RND ## mpeg4_qpel16_h_lowpass(half, src, 16, stride, 16);\

1604

OPNAME ## pixels16_l2(dst, src, half, stride, stride, 16, 16);\

1605

1606

1607

static void OPNAME ## qpel16_mc20_c(uint8_t *dst, uint8_t *src, int stride){\

1608

OPNAME ## mpeg4_qpel16_h_lowpass(dst, src, stride, stride, 16);\

1609

1610

1611

static void OPNAME ## qpel16_mc30_c(uint8_t *dst, uint8_t *src, int stride){\

1612

uint8_t half[256];\

1613

put ## RND ## mpeg4_qpel16_h_lowpass(half, src, 16, stride, 16);\

1614

OPNAME ## pixels16_l2(dst, src+1, half, stride, stride, 16, 16);\

1615

1616

1617

static void OPNAME ## qpel16_mc01_c(uint8_t *dst, uint8_t *src, int stride){\

1618

uint8_t full[24*17];\

1619

uint8_t half[256];\

1620

copy_block17(full, src, 24, stride, 17);\

1621

put ## RND ## mpeg4_qpel16_v_lowpass(half, full, 16, 24);\

1622

OPNAME ## pixels16_l2(dst, full, half, stride, 24, 16, 16);\

1623

1624

1625

static void OPNAME ## qpel16_mc02_c(uint8_t *dst, uint8_t *src, int stride){\

1626

uint8_t full[24*17];\

1627

copy_block17(full, src, 24, stride, 17);\

1628

OPNAME ## mpeg4_qpel16_v_lowpass(dst, full, stride, 24);\

1629

1630

1631

static void OPNAME ## qpel16_mc03_c(uint8_t *dst, uint8_t *src, int stride){\

1632

uint8_t full[24*17];\

1633

uint8_t half[256];\

1634

copy_block17(full, src, 24, stride, 17);\

1635

put ## RND ## mpeg4_qpel16_v_lowpass(half, full, 16, 24);\

1636

OPNAME ## pixels16_l2(dst, full+24, half, stride, 24, 16, 16);\

1637

1638

void ff_ ## OPNAME ## qpel16_mc11_old_c(uint8_t *dst, uint8_t *src, int stride){\

1639

uint8_t full[24*17];\

1640

uint8_t halfH[272];\

1641

uint8_t halfV[256];\

1642

uint8_t halfHV[256];\

1643

copy_block17(full, src, 24, stride, 17);\

1644

put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\

1645

put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\

1646

put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\

1647

OPNAME ## pixels16_l4(dst, full, halfH, halfV, halfHV, stride, 24, 16, 16, 16, 16);\

1648

1649

static void OPNAME ## qpel16_mc11_c(uint8_t *dst, uint8_t *src, int stride){\

1650

uint8_t full[24*17];\

1651

uint8_t halfH[272];\

1652

uint8_t halfHV[256];\

1653

copy_block17(full, src, 24, stride, 17);\

1654

put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\

1655

put ## RND ## pixels16_l2(halfH, halfH, full, 16, 16, 24, 17);\

1656

put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\

1657

OPNAME ## pixels16_l2(dst, halfH, halfHV, stride, 16, 16, 16);\

1658

1659

void ff_ ## OPNAME ## qpel16_mc31_old_c(uint8_t *dst, uint8_t *src, int stride){\

1660

uint8_t full[24*17];\

1661

uint8_t halfH[272];\

1662

uint8_t halfV[256];\

1663

uint8_t halfHV[256];\

1664

copy_block17(full, src, 24, stride, 17);\

1665

put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\

1666

put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\

1667

put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\

1668

OPNAME ## pixels16_l4(dst, full+1, halfH, halfV, halfHV, stride, 24, 16, 16, 16, 16);\

1669

1670

static void OPNAME ## qpel16_mc31_c(uint8_t *dst, uint8_t *src, int stride){\

1671

uint8_t full[24*17];\

1672

uint8_t halfH[272];\

1673

uint8_t halfHV[256];\

1674

copy_block17(full, src, 24, stride, 17);\

1675

put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\

1676

put ## RND ## pixels16_l2(halfH, halfH, full+1, 16, 16, 24, 17);\

1677

put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\

1678

OPNAME ## pixels16_l2(dst, halfH, halfHV, stride, 16, 16, 16);\

1679

1680

void ff_ ## OPNAME ## qpel16_mc13_old_c(uint8_t *dst, uint8_t *src, int stride){\

1681

uint8_t full[24*17];\

1682

uint8_t halfH[272];\

1683

uint8_t halfV[256];\

1684

uint8_t halfHV[256];\

1685

copy_block17(full, src, 24, stride, 17);\

1686

put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\

1687

put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\

1688

put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\

1689

OPNAME ## pixels16_l4(dst, full+24, halfH+16, halfV, halfHV, stride, 24, 16, 16, 16, 16);\

1690

1691

static void OPNAME ## qpel16_mc13_c(uint8_t *dst, uint8_t *src, int stride){\

1692

uint8_t full[24*17];\

1693

uint8_t halfH[272];\

1694

uint8_t halfHV[256];\

1695

copy_block17(full, src, 24, stride, 17);\

1696

put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\

1697

put ## RND ## pixels16_l2(halfH, halfH, full, 16, 16, 24, 17);\

1698

put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\

1699

OPNAME ## pixels16_l2(dst, halfH+16, halfHV, stride, 16, 16, 16);\

1700

1701

void ff_ ## OPNAME ## qpel16_mc33_old_c(uint8_t *dst, uint8_t *src, int stride){\

1702

uint8_t full[24*17];\

1703

uint8_t halfH[272];\

1704

uint8_t halfV[256];\

1705

uint8_t halfHV[256];\

1706

copy_block17(full, src, 24, stride, 17);\

1707

put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full , 16, 24, 17);\

1708

put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\

1709

put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\

1710

OPNAME ## pixels16_l4(dst, full+25, halfH+16, halfV, halfHV, stride, 24, 16, 16, 16, 16);\

1711

1712

static void OPNAME ## qpel16_mc33_c(uint8_t *dst, uint8_t *src, int stride){\

1713

uint8_t full[24*17];\

1714

uint8_t halfH[272];\

1715

uint8_t halfHV[256];\

1716

copy_block17(full, src, 24, stride, 17);\

1717

put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\

1718

put ## RND ## pixels16_l2(halfH, halfH, full+1, 16, 16, 24, 17);\

1719

put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\

1720

OPNAME ## pixels16_l2(dst, halfH+16, halfHV, stride, 16, 16, 16);\

1721

1722

static void OPNAME ## qpel16_mc21_c(uint8_t *dst, uint8_t *src, int stride){\

1723

uint8_t halfH[272];\

1724

uint8_t halfHV[256];\

1725

put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\

1726

put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\

1727

OPNAME ## pixels16_l2(dst, halfH, halfHV, stride, 16, 16, 16);\

1728

1729

static void OPNAME ## qpel16_mc23_c(uint8_t *dst, uint8_t *src, int stride){\

1730

uint8_t halfH[272];\

1731

uint8_t halfHV[256];\

1732

put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\

1733

put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\

1734

OPNAME ## pixels16_l2(dst, halfH+16, halfHV, stride, 16, 16, 16);\

1735

1736

void ff_ ## OPNAME ## qpel16_mc12_old_c(uint8_t *dst, uint8_t *src, int stride){\

1737

uint8_t full[24*17];\

1738

uint8_t halfH[272];\

1739

uint8_t halfV[256];\

1740

uint8_t halfHV[256];\

1741

copy_block17(full, src, 24, stride, 17);\

1742

put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\

1743

put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\

1744

put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\

1745

OPNAME ## pixels16_l2(dst, halfV, halfHV, stride, 16, 16, 16);\

1746

1747

static void OPNAME ## qpel16_mc12_c(uint8_t *dst, uint8_t *src, int stride){\

1748

uint8_t full[24*17];\

1749

uint8_t halfH[272];\

1750

copy_block17(full, src, 24, stride, 17);\

1751

put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\

1752

put ## RND ## pixels16_l2(halfH, halfH, full, 16, 16, 24, 17);\

1753

OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\

1754

1755

void ff_ ## OPNAME ## qpel16_mc32_old_c(uint8_t *dst, uint8_t *src, int stride){\

1756

uint8_t full[24*17];\

1757

uint8_t halfH[272];\

1758

uint8_t halfV[256];\

1759

uint8_t halfHV[256];\

1760

copy_block17(full, src, 24, stride, 17);\

1761

put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\

1762

put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\

1763

put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\

1764

OPNAME ## pixels16_l2(dst, halfV, halfHV, stride, 16, 16, 16);\

1765

1766

static void OPNAME ## qpel16_mc32_c(uint8_t *dst, uint8_t *src, int stride){\

1767

uint8_t full[24*17];\

1768

uint8_t halfH[272];\

1769

copy_block17(full, src, 24, stride, 17);\

1770

put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\

1771

put ## RND ## pixels16_l2(halfH, halfH, full+1, 16, 16, 24, 17);\

1772

OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\

1773

1774

static void OPNAME ## qpel16_mc22_c(uint8_t *dst, uint8_t *src, int stride){\

1775

uint8_t halfH[272];\

1776

put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\

1777

OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\

1778

}

1779

1780

#define op_avg(a, b) a = (((a)+cm[((b) + 16)>>5]+1)>>1)

1781

#define op_avg_no_rnd(a, b) a = (((a)+cm[((b) + 15)>>5])>>1)

1782

#define op_put(a, b) a = cm[((b) + 16)>>5]

1783

#define op_put_no_rnd(a, b) a = cm[((b) + 15)>>5]

1784

1785

QPEL_MC(0, put_ , _ , op_put)

1786

QPEL_MC(1, put_no_rnd_, _no_rnd_, op_put_no_rnd)

1787

QPEL_MC(0, avg_ , _ , op_avg)

1788

//QPEL_MC(1, avg_no_rnd , _ , op_avg)

1789

#undef op_avg

1790

#undef op_avg_no_rnd

1791

#undef op_put

1792

#undef op_put_no_rnd

1793

1794

#if 1

1795

#define H264_LOWPASS(OPNAME, OP, OP2) \

1796

static void OPNAME ## h264_qpel4_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\

1797

const int h=4;\

1798

uint8_t *cm = cropTbl + MAX_NEG_CROP;\

1799

int i;\

1800

for(i=0; i<h; i++)\

1801

1802

OP(dst[0], (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3]));\

1803

OP(dst[1], (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4]));\

1804

OP(dst[2], (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5]));\

1805

OP(dst[3], (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6]));\

1806

dst+=dstStride;\

1807

src+=srcStride;\

1808

1809

1810

1811

static void OPNAME ## h264_qpel4_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\

1812

const int w=4;\

1813

uint8_t *cm = cropTbl + MAX_NEG_CROP;\

1814

int i;\

1815

for(i=0; i<w; i++)\

1816

1817

const int srcB= src[-2*srcStride];\

1818

const int srcA= src[-1*srcStride];\

1819

const int src0= src[0 *srcStride];\

1820

const int src1= src[1 *srcStride];\

1821

const int src2= src[2 *srcStride];\

1822

const int src3= src[3 *srcStride];\

1823

const int src4= src[4 *srcStride];\

1824

const int src5= src[5 *srcStride];\

1825

const int src6= src[6 *srcStride];\

1826

OP(dst[0*dstStride], (src0+src1)*20 - (srcA+src2)*5 + (srcB+src3));\

1827

OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*5 + (srcA+src4));\

1828

OP(dst[2*dstStride], (src2+src3)*20 - (src1+src4)*5 + (src0+src5));\

1829

OP(dst[3*dstStride], (src3+src4)*20 - (src2+src5)*5 + (src1+src6));\

1830

dst++;\

1831

src++;\

1832

1833

1834

1835

static void OPNAME ## h264_qpel4_hv_lowpass(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\

1836

const int h=4;\

1837

const int w=4;\

1838

uint8_t *cm = cropTbl + MAX_NEG_CROP;\

1839

int i;\

1840

src -= 2*srcStride;\

1841

for(i=0; i<h+5; i++)\

1842

1843

tmp[0]= (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3]);\

1844

tmp[1]= (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4]);\

1845

tmp[2]= (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5]);\

1846

tmp[3]= (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6]);\

1847

tmp+=tmpStride;\

1848

src+=srcStride;\

1849

1850

tmp -= tmpStride*(h+5-2);\

1851

for(i=0; i<w; i++)\

1852

1853

const int tmpB= tmp[-2*tmpStride];\

1854

const int tmpA= tmp[-1*tmpStride];\

1855

const int tmp0= tmp[0 *tmpStride];\

1856

const int tmp1= tmp[1 *tmpStride];\

1857

const int tmp2= tmp[2 *tmpStride];\

1858

const int tmp3= tmp[3 *tmpStride];\

1859

const int tmp4= tmp[4 *tmpStride];\

1860

const int tmp5= tmp[5 *tmpStride];\

1861

const int tmp6= tmp[6 *tmpStride];\

1862

OP2(dst[0*dstStride], (tmp0+tmp1)*20 - (tmpA+tmp2)*5 + (tmpB+tmp3));\

1863

OP2(dst[1*dstStride], (tmp1+tmp2)*20 - (tmp0+tmp3)*5 + (tmpA+tmp4));\

1864

OP2(dst[2*dstStride], (tmp2+tmp3)*20 - (tmp1+tmp4)*5 + (tmp0+tmp5));\

1865

OP2(dst[3*dstStride], (tmp3+tmp4)*20 - (tmp2+tmp5)*5 + (tmp1+tmp6));\

1866

dst++;\

1867

tmp++;\

1868

1869

1870

1871

static void OPNAME ## h264_qpel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\

1872

const int h=8;\

1873

uint8_t *cm = cropTbl + MAX_NEG_CROP;\

1874

int i;\

1875

for(i=0; i<h; i++)\

1876

1877

OP(dst[0], (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3 ]));\

1878

OP(dst[1], (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4 ]));\

1879

OP(dst[2], (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5 ]));\

1880

OP(dst[3], (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6 ]));\

1881

OP(dst[4], (src[4]+src[5])*20 - (src[3 ]+src[6])*5 + (src[2 ]+src[7 ]));\

1882

OP(dst[5], (src[5]+src[6])*20 - (src[4 ]+src[7])*5 + (src[3 ]+src[8 ]));\

1883

OP(dst[6], (src[6]+src[7])*20 - (src[5 ]+src[8])*5 + (src[4 ]+src[9 ]));\

1884

OP(dst[7], (src[7]+src[8])*20 - (src[6 ]+src[9])*5 + (src[5 ]+src[10]));\

1885

dst+=dstStride;\

1886

src+=srcStride;\

1887

1888

1889

1890

static void OPNAME ## h264_qpel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\

1891

const int w=8;\

1892

uint8_t *cm = cropTbl + MAX_NEG_CROP;\

1893

int i;\

1894

for(i=0; i<w; i++)\

1895

1896

const int srcB= src[-2*srcStride];\

1897

const int srcA= src[-1*srcStride];\

1898

const int src0= src[0 *srcStride];\

1899

const int src1= src[1 *srcStride];\

1900

const int src2= src[2 *srcStride];\

1901

const int src3= src[3 *srcStride];\

1902

const int src4= src[4 *srcStride];\

1903

const int src5= src[5 *srcStride];\

1904

const int src6= src[6 *srcStride];\

1905

const int src7= src[7 *srcStride];\

1906

const int src8= src[8 *srcStride];\

1907

const int src9= src[9 *srcStride];\

1908

const int src10=src[10*srcStride];\

1909

OP(dst[0*dstStride], (src0+src1)*20 - (srcA+src2)*5 + (srcB+src3));\

1910

OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*5 + (srcA+src4));\

1911

OP(dst[2*dstStride], (src2+src3)*20 - (src1+src4)*5 + (src0+src5));\

1912

OP(dst[3*dstStride], (src3+src4)*20 - (src2+src5)*5 + (src1+src6));\

1913

OP(dst[4*dstStride], (src4+src5)*20 - (src3+src6)*5 + (src2+src7));\

1914

OP(dst[5*dstStride], (src5+src6)*20 - (src4+src7)*5 + (src3+src8));\

1915

OP(dst[6*dstStride], (src6+src7)*20 - (src5+src8)*5 + (src4+src9));\

1916

OP(dst[7*dstStride], (src7+src8)*20 - (src6+src9)*5 + (src5+src10));\

1917

dst++;\

1918

src++;\

1919

1920

1921

1922

static void OPNAME ## h264_qpel8_hv_lowpass(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\

1923

const int h=8;\

1924

const int w=8;\

1925

uint8_t *cm = cropTbl + MAX_NEG_CROP;\

1926

int i;\

1927

src -= 2*srcStride;\

1928

for(i=0; i<h+5; i++)\

1929

1930

tmp[0]= (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3 ]);\

1931

tmp[1]= (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4 ]);\

1932

tmp[2]= (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5 ]);\

1933

tmp[3]= (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6 ]);\

1934

tmp[4]= (src[4]+src[5])*20 - (src[3 ]+src[6])*5 + (src[2 ]+src[7 ]);\

1935

tmp[5]= (src[5]+src[6])*20 - (src[4 ]+src[7])*5 + (src[3 ]+src[8 ]);\

1936

tmp[6]= (src[6]+src[7])*20 - (src[5 ]+src[8])*5 + (src[4 ]+src[9 ]);\

1937

tmp[7]= (src[7]+src[8])*20 - (src[6 ]+src[9])*5 + (src[5 ]+src[10]);\

1938

tmp+=tmpStride;\

1939

src+=srcStride;\

1940

1941

tmp -= tmpStride*(h+5-2);\

1942

for(i=0; i<w; i++)\

1943

1944

const int tmpB= tmp[-2*tmpStride];\

1945

const int tmpA= tmp[-1*tmpStride];\

1946

const int tmp0= tmp[0 *tmpStride];\

1947

const int tmp1= tmp[1 *tmpStride];\

1948

const int tmp2= tmp[2 *tmpStride];\

1949

const int tmp3= tmp[3 *tmpStride];\

1950

const int tmp4= tmp[4 *tmpStride];\

1951

const int tmp5= tmp[5 *tmpStride];\

1952

const int tmp6= tmp[6 *tmpStride];\

1953

const int tmp7= tmp[7 *tmpStride];\

1954

const int tmp8= tmp[8 *tmpStride];\

1955

const int tmp9= tmp[9 *tmpStride];\

1956

const int tmp10=tmp[10*tmpStride];\

1957

OP2(dst[0*dstStride], (tmp0+tmp1)*20 - (tmpA+tmp2)*5 + (tmpB+tmp3));\

1958

OP2(dst[1*dstStride], (tmp1+tmp2)*20 - (tmp0+tmp3)*5 + (tmpA+tmp4));\

1959

OP2(dst[2*dstStride], (tmp2+tmp3)*20 - (tmp1+tmp4)*5 + (tmp0+tmp5));\

1960

OP2(dst[3*dstStride], (tmp3+tmp4)*20 - (tmp2+tmp5)*5 + (tmp1+tmp6));\

1961

OP2(dst[4*dstStride], (tmp4+tmp5)*20 - (tmp3+tmp6)*5 + (tmp2+tmp7));\

1962

OP2(dst[5*dstStride], (tmp5+tmp6)*20 - (tmp4+tmp7)*5 + (tmp3+tmp8));\

1963

OP2(dst[6*dstStride], (tmp6+tmp7)*20 - (tmp5+tmp8)*5 + (tmp4+tmp9));\

1964

OP2(dst[7*dstStride], (tmp7+tmp8)*20 - (tmp6+tmp9)*5 + (tmp5+tmp10));\

1965

dst++;\

1966

tmp++;\

1967

1968

1969

1970

static void OPNAME ## h264_qpel16_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\

1971

OPNAME ## h264_qpel8_v_lowpass(dst , src , dstStride, srcStride);\

1972

OPNAME ## h264_qpel8_v_lowpass(dst+8, src+8, dstStride, srcStride);\

1973

src += 8*srcStride;\

1974

dst += 8*dstStride;\

1975

OPNAME ## h264_qpel8_v_lowpass(dst , src , dstStride, srcStride);\

1976

OPNAME ## h264_qpel8_v_lowpass(dst+8, src+8, dstStride, srcStride);\

1977

1978

1979

static void OPNAME ## h264_qpel16_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\

1980

OPNAME ## h264_qpel8_h_lowpass(dst , src , dstStride, srcStride);\

1981

OPNAME ## h264_qpel8_h_lowpass(dst+8, src+8, dstStride, srcStride);\

1982

src += 8*srcStride;\

1983

dst += 8*dstStride;\

1984

OPNAME ## h264_qpel8_h_lowpass(dst , src , dstStride, srcStride);\

1985

OPNAME ## h264_qpel8_h_lowpass(dst+8, src+8, dstStride, srcStride);\

1986

1987

1988

static void OPNAME ## h264_qpel16_hv_lowpass(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\

1989

OPNAME ## h264_qpel8_hv_lowpass(dst , tmp , src , dstStride, tmpStride, srcStride);\

1990

OPNAME ## h264_qpel8_hv_lowpass(dst+8, tmp+8, src+8, dstStride, tmpStride, srcStride);\

1991

src += 8*srcStride;\

1992

tmp += 8*tmpStride;\

1993

dst += 8*dstStride;\

1994

OPNAME ## h264_qpel8_hv_lowpass(dst , tmp , src , dstStride, tmpStride, srcStride);\

1995

OPNAME ## h264_qpel8_hv_lowpass(dst+8, tmp+8, src+8, dstStride, tmpStride, srcStride);\

1996

1997

1998

#define H264_MC(OPNAME, SIZE) \

1999

static void OPNAME ## h264_qpel ## SIZE ## _mc00_c (uint8_t *dst, uint8_t *src, int stride){\

2000

OPNAME ## pixels ## SIZE ## _c(dst, src, stride, SIZE);\

2001

2002

2003

static void OPNAME ## h264_qpel ## SIZE ## _mc10_c(uint8_t *dst, uint8_t *src, int stride){\

2004

uint8_t half[SIZE*SIZE];\

2005

put_h264_qpel ## SIZE ## _h_lowpass(half, src, SIZE, stride);\

2006

OPNAME ## pixels ## SIZE ## _l2(dst, src, half, stride, stride, SIZE, SIZE);\

2007

2008

2009

static void OPNAME ## h264_qpel ## SIZE ## _mc20_c(uint8_t *dst, uint8_t *src, int stride){\

2010

OPNAME ## h264_qpel ## SIZE ## _h_lowpass(dst, src, stride, stride);\

2011

2012

2013

static void OPNAME ## h264_qpel ## SIZE ## _mc30_c(uint8_t *dst, uint8_t *src, int stride){\

2014

uint8_t half[SIZE*SIZE];\

2015

put_h264_qpel ## SIZE ## _h_lowpass(half, src, SIZE, stride);\

2016

OPNAME ## pixels ## SIZE ## _l2(dst, src+1, half, stride, stride, SIZE, SIZE);\

2017

2018

2019

static void OPNAME ## h264_qpel ## SIZE ## _mc01_c(uint8_t *dst, uint8_t *src, int stride){\

2020

uint8_t full[SIZE*(SIZE+5)];\

2021

uint8_t * const full_mid= full + SIZE*2;\

2022

uint8_t half[SIZE*SIZE];\

2023

copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\

2024

put_h264_qpel ## SIZE ## _v_lowpass(half, full_mid, SIZE, SIZE);\

2025

OPNAME ## pixels ## SIZE ## _l2(dst, full_mid, half, stride, SIZE, SIZE, SIZE);\

2026

2027

2028

static void OPNAME ## h264_qpel ## SIZE ## _mc02_c(uint8_t *dst, uint8_t *src, int stride){\

2029

uint8_t full[SIZE*(SIZE+5)];\

2030

uint8_t * const full_mid= full + SIZE*2;\

2031

copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\

2032

OPNAME ## h264_qpel ## SIZE ## _v_lowpass(dst, full_mid, stride, SIZE);\

2033

2034

2035

static void OPNAME ## h264_qpel ## SIZE ## _mc03_c(uint8_t *dst, uint8_t *src, int stride){\

2036

uint8_t full[SIZE*(SIZE+5)];\

2037

uint8_t * const full_mid= full + SIZE*2;\

2038

uint8_t half[SIZE*SIZE];\

2039

copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\

2040

put_h264_qpel ## SIZE ## _v_lowpass(half, full_mid, SIZE, SIZE);\

2041

OPNAME ## pixels ## SIZE ## _l2(dst, full_mid+SIZE, half, stride, SIZE, SIZE, SIZE);\

2042

2043

2044

static void OPNAME ## h264_qpel ## SIZE ## _mc11_c(uint8_t *dst, uint8_t *src, int stride){\

2045

uint8_t full[SIZE*(SIZE+5)];\

2046

uint8_t * const full_mid= full + SIZE*2;\

2047

uint8_t halfH[SIZE*SIZE];\

2048

uint8_t halfV[SIZE*SIZE];\

2049

put_h264_qpel ## SIZE ## _h_lowpass(halfH, src, SIZE, stride);\

2050

copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\

2051

put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\

2052

OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\

2053

2054

2055

static void OPNAME ## h264_qpel ## SIZE ## _mc31_c(uint8_t *dst, uint8_t *src, int stride){\

2056

uint8_t full[SIZE*(SIZE+5)];\

2057

uint8_t * const full_mid= full + SIZE*2;\

2058

uint8_t halfH[SIZE*SIZE];\

2059

uint8_t halfV[SIZE*SIZE];\

2060

put_h264_qpel ## SIZE ## _h_lowpass(halfH, src, SIZE, stride);\

2061

copy_block ## SIZE (full, src - stride*2 + 1, SIZE, stride, SIZE + 5);\

2062

put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\

2063

OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\

2064

2065

2066

static void OPNAME ## h264_qpel ## SIZE ## _mc13_c(uint8_t *dst, uint8_t *src, int stride){\

2067

uint8_t full[SIZE*(SIZE+5)];\

2068

uint8_t * const full_mid= full + SIZE*2;\

2069

uint8_t halfH[SIZE*SIZE];\

2070

uint8_t halfV[SIZE*SIZE];\

2071

put_h264_qpel ## SIZE ## _h_lowpass(halfH, src + stride, SIZE, stride);\

2072

copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\

2073

put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\

2074

OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\

2075

2076

2077

static void OPNAME ## h264_qpel ## SIZE ## _mc33_c(uint8_t *dst, uint8_t *src, int stride){\

2078

uint8_t full[SIZE*(SIZE+5)];\

2079

uint8_t * const full_mid= full + SIZE*2;\

2080

uint8_t halfH[SIZE*SIZE];\

2081

uint8_t halfV[SIZE*SIZE];\

2082

put_h264_qpel ## SIZE ## _h_lowpass(halfH, src + stride, SIZE, stride);\

2083

copy_block ## SIZE (full, src - stride*2 + 1, SIZE, stride, SIZE + 5);\

2084

put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\

2085

OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\

2086

2087

2088

static void OPNAME ## h264_qpel ## SIZE ## _mc22_c(uint8_t *dst, uint8_t *src, int stride){\

2089

int16_t tmp[SIZE*(SIZE+5)];\

2090

OPNAME ## h264_qpel ## SIZE ## _hv_lowpass(dst, tmp, src, stride, SIZE, stride);\

2091

2092

2093

static void OPNAME ## h264_qpel ## SIZE ## _mc21_c(uint8_t *dst, uint8_t *src, int stride){\

2094

int16_t tmp[SIZE*(SIZE+5)];\

2095

uint8_t halfH[SIZE*SIZE];\

2096

uint8_t halfHV[SIZE*SIZE];\

2097

put_h264_qpel ## SIZE ## _h_lowpass(halfH, src, SIZE, stride);\

2098

put_h264_qpel ## SIZE ## _hv_lowpass(halfHV, tmp, src, SIZE, SIZE, stride);\

2099

OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfHV, stride, SIZE, SIZE, SIZE);\

2100

2101

2102

static void OPNAME ## h264_qpel ## SIZE ## _mc23_c(uint8_t *dst, uint8_t *src, int stride){\

2103

int16_t tmp[SIZE*(SIZE+5)];\

2104

uint8_t halfH[SIZE*SIZE];\

2105

uint8_t halfHV[SIZE*SIZE];\

2106

put_h264_qpel ## SIZE ## _h_lowpass(halfH, src + stride, SIZE, stride);\

2107

put_h264_qpel ## SIZE ## _hv_lowpass(halfHV, tmp, src, SIZE, SIZE, stride);\

2108

OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfHV, stride, SIZE, SIZE, SIZE);\

2109

2110

2111

static void OPNAME ## h264_qpel ## SIZE ## _mc12_c(uint8_t *dst, uint8_t *src, int stride){\

2112

uint8_t full[SIZE*(SIZE+5)];\

2113

uint8_t * const full_mid= full + SIZE*2;\

2114

int16_t tmp[SIZE*(SIZE+5)];\

2115

uint8_t halfV[SIZE*SIZE];\

2116

uint8_t halfHV[SIZE*SIZE];\

2117

copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\

2118

put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\

2119

put_h264_qpel ## SIZE ## _hv_lowpass(halfHV, tmp, src, SIZE, SIZE, stride);\

2120

OPNAME ## pixels ## SIZE ## _l2(dst, halfV, halfHV, stride, SIZE, SIZE, SIZE);\

2121

2122

2123

static void OPNAME ## h264_qpel ## SIZE ## _mc32_c(uint8_t *dst, uint8_t *src, int stride){\

2124

uint8_t full[SIZE*(SIZE+5)];\

2125

uint8_t * const full_mid= full + SIZE*2;\

2126

int16_t tmp[SIZE*(SIZE+5)];\

2127

uint8_t halfV[SIZE*SIZE];\

2128

uint8_t halfHV[SIZE*SIZE];\

2129

copy_block ## SIZE (full, src - stride*2 + 1, SIZE, stride, SIZE + 5);\

2130

put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\

2131

put_h264_qpel ## SIZE ## _hv_lowpass(halfHV, tmp, src, SIZE, SIZE, stride);\

2132

OPNAME ## pixels ## SIZE ## _l2(dst, halfV, halfHV, stride, SIZE, SIZE, SIZE);\

2133

2134

2135

#define op_avg(a, b) a = (((a)+cm[((b) + 16)>>5]+1)>>1)

2136

//#define op_avg2(a, b) a = (((a)*w1+cm[((b) + 16)>>5]*w2 + o + 64)>>7)

2137

#define op_put(a, b) a = cm[((b) + 16)>>5]

2138

#define op2_avg(a, b) a = (((a)+cm[((b) + 512)>>10]+1)>>1)

2139

#define op2_put(a, b) a = cm[((b) + 512)>>10]

2140

2141

H264_LOWPASS(put_ , op_put, op2_put)

2142

H264_LOWPASS(avg_ , op_avg, op2_avg)

2143

H264_MC(put_, 4)

2144

H264_MC(put_, 8)

2145

H264_MC(put_, 16)

2146

H264_MC(avg_, 4)

2147

H264_MC(avg_, 8)

2148

H264_MC(avg_, 16)

2149

2150

#undef op_avg

2151

#undef op_put

2152

#undef op2_avg

2153

#undef op2_put

2154

#endif

2155

2156

static void wmv2_mspel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){

2157

uint8_t *cm = cropTbl + MAX_NEG_CROP;

2158

int i;

2159

2160

for(i=0; i<h; i++){

2161

dst[0]= cm[(9*(src[0] + src[1]) - (src[-1] + src[2]) + 8)>>4];

2162

dst[1]= cm[(9*(src[1] + src[2]) - (src[ 0] + src[3]) + 8)>>4];

2163

dst[2]= cm[(9*(src[2] + src[3]) - (src[ 1] + src[4]) + 8)>>4];

2164

dst[3]= cm[(9*(src[3] + src[4]) - (src[ 2] + src[5]) + 8)>>4];

2165

dst[4]= cm[(9*(src[4] + src[5]) - (src[ 3] + src[6]) + 8)>>4];

2166

dst[5]= cm[(9*(src[5] + src[6]) - (src[ 4] + src[7]) + 8)>>4];

2167

dst[6]= cm[(9*(src[6] + src[7]) - (src[ 5] + src[8]) + 8)>>4];

2168

dst[7]= cm[(9*(src[7] + src[8]) - (src[ 6] + src[9]) + 8)>>4];

2169

dst+=dstStride;

2170

src+=srcStride;

2171

}

2172

}

2173

2174

static void wmv2_mspel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int w){

2175

uint8_t *cm = cropTbl + MAX_NEG_CROP;

2176

int i;

2177

2178

for(i=0; i<w; i++){

2179

const int src_1= src[ -srcStride];

2180

const int src0 = src[0 ];

2181

const int src1 = src[ srcStride];

2182

const int src2 = src[2*srcStride];

2183

const int src3 = src[3*srcStride];

2184

const int src4 = src[4*srcStride];

2185

const int src5 = src[5*srcStride];

2186

const int src6 = src[6*srcStride];

2187

const int src7 = src[7*srcStride];

2188

const int src8 = src[8*srcStride];

2189

const int src9 = src[9*srcStride];

2190

dst[0*dstStride]= cm[(9*(src0 + src1) - (src_1 + src2) + 8)>>4];

2191

dst[1*dstStride]= cm[(9*(src1 + src2) - (src0 + src3) + 8)>>4];

2192

dst[2*dstStride]= cm[(9*(src2 + src3) - (src1 + src4) + 8)>>4];

2193

dst[3*dstStride]= cm[(9*(src3 + src4) - (src2 + src5) + 8)>>4];

2194

dst[4*dstStride]= cm[(9*(src4 + src5) - (src3 + src6) + 8)>>4];

2195

dst[5*dstStride]= cm[(9*(src5 + src6) - (src4 + src7) + 8)>>4];

2196

dst[6*dstStride]= cm[(9*(src6 + src7) - (src5 + src8) + 8)>>4];

2197

dst[7*dstStride]= cm[(9*(src7 + src8) - (src6 + src9) + 8)>>4];

2198

src++;

2199

dst++;

2200

}

2201

}

2202

2203

static void put_mspel8_mc00_c (uint8_t *dst, uint8_t *src, int stride){

2204

put_pixels8_c(dst, src, stride, 8);

2205

}

2206

2207

static void put_mspel8_mc10_c(uint8_t *dst, uint8_t *src, int stride){

2208

uint8_t half[64];

2209

wmv2_mspel8_h_lowpass(half, src, 8, stride, 8);

2210

put_pixels8_l2(dst, src, half, stride, stride, 8, 8);

2211

}

2212

2213

static void put_mspel8_mc20_c(uint8_t *dst, uint8_t *src, int stride){

2214

wmv2_mspel8_h_lowpass(dst, src, stride, stride, 8);

2215

}

2216

2217

static void put_mspel8_mc30_c(uint8_t *dst, uint8_t *src, int stride){

2218

uint8_t half[64];

2219

wmv2_mspel8_h_lowpass(half, src, 8, stride, 8);

2220

put_pixels8_l2(dst, src+1, half, stride, stride, 8, 8);

2221

}

2222

2223

static void put_mspel8_mc02_c(uint8_t *dst, uint8_t *src, int stride){

2224

wmv2_mspel8_v_lowpass(dst, src, stride, stride, 8);

2225

}

2226

2227

static void put_mspel8_mc12_c(uint8_t *dst, uint8_t *src, int stride){

2228

uint8_t halfH[88];

2229

uint8_t halfV[64];

2230

uint8_t halfHV[64];

2231

wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11);

2232

wmv2_mspel8_v_lowpass(halfV, src, 8, stride, 8);

2233

wmv2_mspel8_v_lowpass(halfHV, halfH+8, 8, 8, 8);

2234

put_pixels8_l2(dst, halfV, halfHV, stride, 8, 8, 8);

2235

}

2236

static void put_mspel8_mc32_c(uint8_t *dst, uint8_t *src, int stride){

2237

uint8_t halfH[88];

2238

uint8_t halfV[64];

2239

uint8_t halfHV[64];

2240

wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11);

2241

wmv2_mspel8_v_lowpass(halfV, src+1, 8, stride, 8);

2242

wmv2_mspel8_v_lowpass(halfHV, halfH+8, 8, 8, 8);

2243

put_pixels8_l2(dst, halfV, halfHV, stride, 8, 8, 8);

2244

}

2245

static void put_mspel8_mc22_c(uint8_t *dst, uint8_t *src, int stride){

2246

uint8_t halfH[88];

2247

wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11);

2248

wmv2_mspel8_v_lowpass(dst, halfH+8, stride, 8, 8);

2249

}

2250

2251

2252

static inline int pix_abs16x16_c(uint8_t *pix1, uint8_t *pix2, int line_size)

2253

{

2254

int s, i;

2255

2256

s = 0;

2257

for(i=0;i<16;i++) {

2258

s += abs(pix1[0] - pix2[0]);

2259

s += abs(pix1[1] - pix2[1]);

2260

s += abs(pix1[2] - pix2[2]);

2261

s += abs(pix1[3] - pix2[3]);

2262

s += abs(pix1[4] - pix2[4]);

2263

s += abs(pix1[5] - pix2[5]);

2264

s += abs(pix1[6] - pix2[6]);

2265

s += abs(pix1[7] - pix2[7]);

2266

s += abs(pix1[8] - pix2[8]);

2267

s += abs(pix1[9] - pix2[9]);

2268

s += abs(pix1[10] - pix2[10]);

2269

s += abs(pix1[11] - pix2[11]);

2270

s += abs(pix1[12] - pix2[12]);

2271

s += abs(pix1[13] - pix2[13]);

2272

s += abs(pix1[14] - pix2[14]);

2273

s += abs(pix1[15] - pix2[15]);

2274

pix1 += line_size;

2275

pix2 += line_size;

2276

}

2277

return s;

2278

}

2279

2280

static int pix_abs16x16_x2_c(uint8_t *pix1, uint8_t *pix2, int line_size)

2281

{

2282

int s, i;

2283

2284

s = 0;

2285

for(i=0;i<16;i++) {

2286

s += abs(pix1[0] - avg2(pix2[0], pix2[1]));

2287

s += abs(pix1[1] - avg2(pix2[1], pix2[2]));

2288

s += abs(pix1[2] - avg2(pix2[2], pix2[3]));

2289

s += abs(pix1[3] - avg2(pix2[3], pix2[4]));

2290

s += abs(pix1[4] - avg2(pix2[4], pix2[5]));

2291

s += abs(pix1[5] - avg2(pix2[5], pix2[6]));

2292

s += abs(pix1[6] - avg2(pix2[6], pix2[7]));

2293

s += abs(pix1[7] - avg2(pix2[7], pix2[8]));

2294

s += abs(pix1[8] - avg2(pix2[8], pix2[9]));

2295

s += abs(pix1[9] - avg2(pix2[9], pix2[10]));

2296

s += abs(pix1[10] - avg2(pix2[10], pix2[11]));

2297

s += abs(pix1[11] - avg2(pix2[11], pix2[12]));

2298

s += abs(pix1[12] - avg2(pix2[12], pix2[13]));

2299

s += abs(pix1[13] - avg2(pix2[13], pix2[14]));

2300

s += abs(pix1[14] - avg2(pix2[14], pix2[15]));

2301

s += abs(pix1[15] - avg2(pix2[15], pix2[16]));

2302

pix1 += line_size;

2303

pix2 += line_size;

2304

}

2305

return s;

2306

}

2307

2308

static int pix_abs16x16_y2_c(uint8_t *pix1, uint8_t *pix2, int line_size)

2309

{

2310

int s, i;

2311

uint8_t *pix3 = pix2 + line_size;

2312

2313

s = 0;

2314

for(i=0;i<16;i++) {

2315

s += abs(pix1[0] - avg2(pix2[0], pix3[0]));

2316

s += abs(pix1[1] - avg2(pix2[1], pix3[1]));

2317

s += abs(pix1[2] - avg2(pix2[2], pix3[2]));

2318

s += abs(pix1[3] - avg2(pix2[3], pix3[3]));

2319

s += abs(pix1[4] - avg2(pix2[4], pix3[4]));

2320

s += abs(pix1[5] - avg2(pix2[5], pix3[5]));

2321

s += abs(pix1[6] - avg2(pix2[6], pix3[6]));

2322

s += abs(pix1[7] - avg2(pix2[7], pix3[7]));

2323

s += abs(pix1[8] - avg2(pix2[8], pix3[8]));

2324

s += abs(pix1[9] - avg2(pix2[9], pix3[9]));

2325

s += abs(pix1[10] - avg2(pix2[10], pix3[10]));

2326

s += abs(pix1[11] - avg2(pix2[11], pix3[11]));

2327

s += abs(pix1[12] - avg2(pix2[12], pix3[12]));

2328

s += abs(pix1[13] - avg2(pix2[13], pix3[13]));

2329

s += abs(pix1[14] - avg2(pix2[14], pix3[14]));

2330

s += abs(pix1[15] - avg2(pix2[15], pix3[15]));

2331

pix1 += line_size;

2332

pix2 += line_size;

2333

pix3 += line_size;

2334

}

2335

return s;

2336

}

2337

2338

static int pix_abs16x16_xy2_c(uint8_t *pix1, uint8_t *pix2, int line_size)

2339

{

2340

int s, i;

2341

uint8_t *pix3 = pix2 + line_size;

2342

2343

s = 0;

2344

for(i=0;i<16;i++) {

2345

s += abs(pix1[0] - avg4(pix2[0], pix2[1], pix3[0], pix3[1]));

2346

s += abs(pix1[1] - avg4(pix2[1], pix2[2], pix3[1], pix3[2]));

2347

s += abs(pix1[2] - avg4(pix2[2], pix2[3], pix3[2], pix3[3]));

2348

s += abs(pix1[3] - avg4(pix2[3], pix2[4], pix3[3], pix3[4]));

2349

s += abs(pix1[4] - avg4(pix2[4], pix2[5], pix3[4], pix3[5]));

2350

s += abs(pix1[5] - avg4(pix2[5], pix2[6], pix3[5], pix3[6]));

2351

s += abs(pix1[6] - avg4(pix2[6], pix2[7], pix3[6], pix3[7]));

2352

s += abs(pix1[7] - avg4(pix2[7], pix2[8], pix3[7], pix3[8]));

2353

s += abs(pix1[8] - avg4(pix2[8], pix2[9], pix3[8], pix3[9]));

2354

s += abs(pix1[9] - avg4(pix2[9], pix2[10], pix3[9], pix3[10]));

2355

s += abs(pix1[10] - avg4(pix2[10], pix2[11], pix3[10], pix3[11]));

2356

s += abs(pix1[11] - avg4(pix2[11], pix2[12], pix3[11], pix3[12]));

2357

s += abs(pix1[12] - avg4(pix2[12], pix2[13], pix3[12], pix3[13]));

2358

s += abs(pix1[13] - avg4(pix2[13], pix2[14], pix3[13], pix3[14]));

2359

s += abs(pix1[14] - avg4(pix2[14], pix2[15], pix3[14], pix3[15]));

2360

s += abs(pix1[15] - avg4(pix2[15], pix2[16], pix3[15], pix3[16]));

2361

pix1 += line_size;

2362

pix2 += line_size;

2363

pix3 += line_size;

2364

}

2365

return s;

2366

}

2367

2368

static inline int pix_abs8x8_c(uint8_t *pix1, uint8_t *pix2, int line_size)

2369

{

2370

int s, i;

2371

2372

s = 0;

2373

for(i=0;i<8;i++) {

2374

s += abs(pix1[0] - pix2[0]);

2375

s += abs(pix1[1] - pix2[1]);

2376

s += abs(pix1[2] - pix2[2]);

2377

s += abs(pix1[3] - pix2[3]);

2378

s += abs(pix1[4] - pix2[4]);

2379

s += abs(pix1[5] - pix2[5]);

2380

s += abs(pix1[6] - pix2[6]);

2381

s += abs(pix1[7] - pix2[7]);

2382

pix1 += line_size;

2383

pix2 += line_size;

2384

}

2385

return s;

2386

}

2387

2388

static int pix_abs8x8_x2_c(uint8_t *pix1, uint8_t *pix2, int line_size)

2389

{

2390

int s, i;

2391

2392

s = 0;

2393

for(i=0;i<8;i++) {

2394

s += abs(pix1[0] - avg2(pix2[0], pix2[1]));

2395

s += abs(pix1[1] - avg2(pix2[1], pix2[2]));

2396

s += abs(pix1[2] - avg2(pix2[2], pix2[3]));

2397

s += abs(pix1[3] - avg2(pix2[3], pix2[4]));

2398

s += abs(pix1[4] - avg2(pix2[4], pix2[5]));

2399

s += abs(pix1[5] - avg2(pix2[5], pix2[6]));

2400

s += abs(pix1[6] - avg2(pix2[6], pix2[7]));

2401

s += abs(pix1[7] - avg2(pix2[7], pix2[8]));

2402

pix1 += line_size;

2403

pix2 += line_size;

2404

}

2405

return s;

2406

}

2407

2408

static int pix_abs8x8_y2_c(uint8_t *pix1, uint8_t *pix2, int line_size)

2409

{

2410

int s, i;

2411

uint8_t *pix3 = pix2 + line_size;

2412

2413

s = 0;

2414

for(i=0;i<8;i++) {

2415

s += abs(pix1[0] - avg2(pix2[0], pix3[0]));

2416

s += abs(pix1[1] - avg2(pix2[1], pix3[1]));

2417

s += abs(pix1[2] - avg2(pix2[2], pix3[2]));

2418

s += abs(pix1[3] - avg2(pix2[3], pix3[3]));

2419

s += abs(pix1[4] - avg2(pix2[4], pix3[4]));

2420

s += abs(pix1[5] - avg2(pix2[5], pix3[5]));

2421

s += abs(pix1[6] - avg2(pix2[6], pix3[6]));

2422

s += abs(pix1[7] - avg2(pix2[7], pix3[7]));

2423

pix1 += line_size;

2424

pix2 += line_size;

2425

pix3 += line_size;

2426

}

2427

return s;

2428

}

2429

2430

static int pix_abs8x8_xy2_c(uint8_t *pix1, uint8_t *pix2, int line_size)

2431

{

2432

int s, i;

2433

uint8_t *pix3 = pix2 + line_size;

2434

2435

s = 0;

2436

for(i=0;i<8;i++) {

2437

s += abs(pix1[0] - avg4(pix2[0], pix2[1], pix3[0], pix3[1]));

2438

s += abs(pix1[1] - avg4(pix2[1], pix2[2], pix3[1], pix3[2]));

2439

s += abs(pix1[2] - avg4(pix2[2], pix2[3], pix3[2], pix3[3]));

2440

s += abs(pix1[3] - avg4(pix2[3], pix2[4], pix3[3], pix3[4]));

2441

s += abs(pix1[4] - avg4(pix2[4], pix2[5], pix3[4], pix3[5]));

2442

s += abs(pix1[5] - avg4(pix2[5], pix2[6], pix3[5], pix3[6]));

2443

s += abs(pix1[6] - avg4(pix2[6], pix2[7], pix3[6], pix3[7]));

2444

s += abs(pix1[7] - avg4(pix2[7], pix2[8], pix3[7], pix3[8]));

2445

pix1 += line_size;

2446

pix2 += line_size;

2447

pix3 += line_size;

2448

}

2449

return s;

2450

}

2451

2452

static int sad16x16_c(void *s, uint8_t *a, uint8_t *b, int stride){

2453

return pix_abs16x16_c(a,b,stride);

2454

}

2455

2456

static int sad8x8_c(void *s, uint8_t *a, uint8_t *b, int stride){

2457

return pix_abs8x8_c(a,b,stride);

2458

}

2459

2460

/**

2461

* permutes an 8x8 block.

2462

* @param block the block which will be permuted according to the given permutation vector

2463

* @param permutation the permutation vector

2464

* @param last the last non zero coefficient in scantable order, used to speed the permutation up

2465

* @param scantable the used scantable, this is only used to speed the permutation up, the block is not

2466

* (inverse) permutated to scantable order!

2467

2468

void ff_block_permute(DCTELEM *block, uint8_t *permutation, const uint8_t *scantable, int last)

2469

{

2470

int i;

2471

DCTELEM temp[64];

2472

2473

if(last<=0) return;

2474

//if(permutation[1]==1) return; //FIXME its ok but not clean and might fail for some perms

2475

2476

for(i=0; i<=last; i++){

2477

const int j= scantable[i];

2478

temp[j]= block[j];

2479

block[j]=0;

2480

}

2481

2482

for(i=0; i<=last; i++){

2483

const int j= scantable[i];

2484

const int perm_j= permutation[j];

2485

block[perm_j]= temp[j];

2486

}

2487

}

2488

2489

/**

2490

* memset(blocks, 0, sizeof(DCTELEM)*6*64)

2491

2492

static void clear_blocks_c(DCTELEM *blocks)

2493

{

2494

memset(blocks, 0, sizeof(DCTELEM)*6*64);

2495

}

2496

2497

static void add_bytes_c(uint8_t *dst, uint8_t *src, int w){

2498

int i;

2499

for(i=0; i+7<w; i+=8){

2500

dst[i+0] += src[i+0];

2501

dst[i+1] += src[i+1];

2502

dst[i+2] += src[i+2];

2503

dst[i+3] += src[i+3];

2504

dst[i+4] += src[i+4];

2505

dst[i+5] += src[i+5];

2506

dst[i+6] += src[i+6];

2507

dst[i+7] += src[i+7];

2508

}

2509

for(; i<w; i++)

2510

dst[i+0] += src[i+0];

2511

}

2512

2513

static void diff_bytes_c(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w){

2514

int i;

2515

for(i=0; i+7<w; i+=8){

2516

dst[i+0] = src1[i+0]-src2[i+0];

2517

dst[i+1] = src1[i+1]-src2[i+1];

2518

dst[i+2] = src1[i+2]-src2[i+2];

2519

dst[i+3] = src1[i+3]-src2[i+3];

2520

dst[i+4] = src1[i+4]-src2[i+4];

2521

dst[i+5] = src1[i+5]-src2[i+5];

2522

dst[i+6] = src1[i+6]-src2[i+6];

2523

dst[i+7] = src1[i+7]-src2[i+7];

2524

}

2525

for(; i<w; i++)

2526

dst[i+0] = src1[i+0]-src2[i+0];

2527

}

2528

2529

#define BUTTERFLY2(o1,o2,i1,i2) \

2530

o1= (i1)+(i2);\

2531

o2= (i1)-(i2);

2532

2533

#define BUTTERFLY1(x,y) \

2534

2535

int a,b;\

2536

a= x;\

2537

b= y;\

2538

x= a+b;\

2539

y= a-b;\

2540

}

2541

2542

#define BUTTERFLYA(x,y) (ABS((x)+(y)) + ABS((x)-(y)))

2543

2544

static int hadamard8_diff_c(/*MpegEncContext*/ void *s, uint8_t *dst, uint8_t *src, int stride){

2545

int i;

2546

int temp[64];

2547

int sum=0;

2548

2549

for(i=0; i<8; i++){

2550

//FIXME try pointer walks

2551

BUTTERFLY2(temp[8*i+0], temp[8*i+1], src[stride*i+0]-dst[stride*i+0],src[stride*i+1]-dst[stride*i+1]);

2552

BUTTERFLY2(temp[8*i+2], temp[8*i+3], src[stride*i+2]-dst[stride*i+2],src[stride*i+3]-dst[stride*i+3]);

2553

BUTTERFLY2(temp[8*i+4], temp[8*i+5], src[stride*i+4]-dst[stride*i+4],src[stride*i+5]-dst[stride*i+5]);

2554

BUTTERFLY2(temp[8*i+6], temp[8*i+7], src[stride*i+6]-dst[stride*i+6],src[stride*i+7]-dst[stride*i+7]);

2555

2556

BUTTERFLY1(temp[8*i+0], temp[8*i+2]);

2557

BUTTERFLY1(temp[8*i+1], temp[8*i+3]);

2558

BUTTERFLY1(temp[8*i+4], temp[8*i+6]);

2559

BUTTERFLY1(temp[8*i+5], temp[8*i+7]);

2560

2561

BUTTERFLY1(temp[8*i+0], temp[8*i+4]);

2562

BUTTERFLY1(temp[8*i+1], temp[8*i+5]);

2563

BUTTERFLY1(temp[8*i+2], temp[8*i+6]);

2564

BUTTERFLY1(temp[8*i+3], temp[8*i+7]);

2565

}

2566

2567

for(i=0; i<8; i++){

2568

BUTTERFLY1(temp[8*0+i], temp[8*1+i]);

2569

BUTTERFLY1(temp[8*2+i], temp[8*3+i]);

2570

BUTTERFLY1(temp[8*4+i], temp[8*5+i]);

2571

BUTTERFLY1(temp[8*6+i], temp[8*7+i]);

2572

2573

BUTTERFLY1(temp[8*0+i], temp[8*2+i]);

2574

BUTTERFLY1(temp[8*1+i], temp[8*3+i]);

2575

BUTTERFLY1(temp[8*4+i], temp[8*6+i]);

2576

BUTTERFLY1(temp[8*5+i], temp[8*7+i]);

2577

2578

sum +=

2579

BUTTERFLYA(temp[8*0+i], temp[8*4+i])

2580

+BUTTERFLYA(temp[8*1+i], temp[8*5+i])

2581

+BUTTERFLYA(temp[8*2+i], temp[8*6+i])

2582

+BUTTERFLYA(temp[8*3+i], temp[8*7+i]);

2583

}

2584

#if 0

2585

static int maxi=0;

2586

if(sum>maxi){

2587

maxi=sum;

2588

printf("MAX:%d\n", maxi);

2589

}

2590

#endif

2591

return sum;

2592

}

2593

2594

static int hadamard8_abs_c(uint8_t *src, int stride, int mean){

2595

int i;

2596

int temp[64];

2597

int sum=0;

2598

//FIXME OOOPS ignore 0 term instead of mean mess

2599

for(i=0; i<8; i++){

2600

//FIXME try pointer walks

2601

BUTTERFLY2(temp[8*i+0], temp[8*i+1], src[stride*i+0]-mean,src[stride*i+1]-mean);

2602

BUTTERFLY2(temp[8*i+2], temp[8*i+3], src[stride*i+2]-mean,src[stride*i+3]-mean);

2603

BUTTERFLY2(temp[8*i+4], temp[8*i+5], src[stride*i+4]-mean,src[stride*i+5]-mean);

2604

BUTTERFLY2(temp[8*i+6], temp[8*i+7], src[stride*i+6]-mean,src[stride*i+7]-mean);

2605

2606

BUTTERFLY1(temp[8*i+0], temp[8*i+2]);

2607

BUTTERFLY1(temp[8*i+1], temp[8*i+3]);

2608

BUTTERFLY1(temp[8*i+4], temp[8*i+6]);

2609

BUTTERFLY1(temp[8*i+5], temp[8*i+7]);

2610

2611

BUTTERFLY1(temp[8*i+0], temp[8*i+4]);

2612

BUTTERFLY1(temp[8*i+1], temp[8*i+5]);

2613

BUTTERFLY1(temp[8*i+2], temp[8*i+6]);

2614

BUTTERFLY1(temp[8*i+3], temp[8*i+7]);

2615

}

2616

2617

for(i=0; i<8; i++){

2618

BUTTERFLY1(temp[8*0+i], temp[8*1+i]);

2619

BUTTERFLY1(temp[8*2+i], temp[8*3+i]);

2620

BUTTERFLY1(temp[8*4+i], temp[8*5+i]);

2621

BUTTERFLY1(temp[8*6+i], temp[8*7+i]);

2622

2623

BUTTERFLY1(temp[8*0+i], temp[8*2+i]);

2624

BUTTERFLY1(temp[8*1+i], temp[8*3+i]);

2625

BUTTERFLY1(temp[8*4+i], temp[8*6+i]);

2626

BUTTERFLY1(temp[8*5+i], temp[8*7+i]);

2627

2628

sum +=

2629

BUTTERFLYA(temp[8*0+i], temp[8*4+i])

2630

+BUTTERFLYA(temp[8*1+i], temp[8*5+i])

2631

+BUTTERFLYA(temp[8*2+i], temp[8*6+i])

2632

+BUTTERFLYA(temp[8*3+i], temp[8*7+i]);

2633

}

2634

2635

return sum;

2636

}

2637

2638

static int dct_sad8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride){

2639

MpegEncContext * const s= (MpegEncContext *)c;

2640

uint64_t __align8 aligned_temp[sizeof(DCTELEM)*64/8];

2641

DCTELEM * const temp= (DCTELEM*)aligned_temp;

2642

int sum=0, i;

2643

2644

s->dsp.diff_pixels(temp, src1, src2, stride);

2645

s->dsp.fdct(temp);

2646

2647

for(i=0; i<64; i++)

2648

sum+= ABS(temp[i]);

2649

2650

return sum;

2651

}

2652

2653

void simple_idct(DCTELEM *block); //FIXME

2654

2655

static int quant_psnr8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride){

2656

MpegEncContext * const s= (MpegEncContext *)c;

2657

uint64_t __align8 aligned_temp[sizeof(DCTELEM)*64*2/8];

2658

DCTELEM * const temp= (DCTELEM*)aligned_temp;

2659

DCTELEM * const bak = ((DCTELEM*)aligned_temp)+64;

2660

int sum=0, i;

2661

2662

s->mb_intra=0;

2663

2664

s->dsp.diff_pixels(temp, src1, src2, stride);

2665

2666

memcpy(bak, temp, 64*sizeof(DCTELEM));

2667

2668

s->block_last_index[0/*FIXME*/]= s->fast_dct_quantize(s, temp, 0/*FIXME*/, s->qscale, &i);

2669

s->dct_unquantize(s, temp, 0, s->qscale);

2670

simple_idct(temp); //FIXME

2671

2672

for(i=0; i<64; i++)

2673

sum+= (temp[i]-bak[i])*(temp[i]-bak[i]);

2674

2675

return sum;

2676

}

2677

2678

static int rd8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride){

2679

MpegEncContext * const s= (MpegEncContext *)c;

2680

const uint8_t *scantable= s->intra_scantable.permutated;

2681

uint64_t __align8 aligned_temp[sizeof(DCTELEM)*64/8];

2682

uint64_t __align8 aligned_bak[stride];

2683

DCTELEM * const temp= (DCTELEM*)aligned_temp;

2684

uint8_t * const bak= (uint8_t*)aligned_bak;

2685

int i, last, run, bits, level, distoration, start_i;

2686

const int esc_length= s->ac_esc_length;

2687

uint8_t * length;

2688

uint8_t * last_length;

2689

2690

for(i=0; i<8; i++){

2691

((uint32_t*)(bak + i*stride))[0]= ((uint32_t*)(src2 + i*stride))[0];

2692

((uint32_t*)(bak + i*stride))[1]= ((uint32_t*)(src2 + i*stride))[1];

2693

}

2694

2695

s->dsp.diff_pixels(temp, src1, src2, stride);

2696

2697

s->block_last_index[0/*FIXME*/]= last= s->fast_dct_quantize(s, temp, 0/*FIXME*/, s->qscale, &i);

2698

2699

bits=0;

2700

2701

if (s->mb_intra) {

2702

start_i = 1;

2703

length = s->intra_ac_vlc_length;

2704

last_length= s->intra_ac_vlc_last_length;

2705

bits+= s->luma_dc_vlc_length[temp[0] + 256]; //FIXME chroma

2706

} else {

2707

start_i = 0;

2708

length = s->inter_ac_vlc_length;

2709

last_length= s->inter_ac_vlc_last_length;

2710

}

2711

2712

if(last>=start_i){

2713

run=0;

2714

for(i=start_i; i<last; i++){

2715

int j= scantable[i];

2716

level= temp[j];

2717

2718

if(level){

2719

level+=64;

2720

if((level&(~127)) == 0){

2721

bits+= length[UNI_AC_ENC_INDEX(run, level)];

2722

}else

2723

bits+= esc_length;

2724

run=0;

2725

}else

2726

run++;

2727

}

2728

i= scantable[last];

2729

2730

level= temp[i] + 64;

2731

2732

assert(level - 64);

2733

2734

if((level&(~127)) == 0){

2735

bits+= last_length[UNI_AC_ENC_INDEX(run, level)];

2736

}else

2737

bits+= esc_length;

2738

2739

}

2740

2741

if(last>=0){

2742

s->dct_unquantize(s, temp, 0, s->qscale);

2743

}

2744

2745

s->dsp.idct_add(bak, stride, temp);

2746

2747

distoration= s->dsp.sse[1](NULL, bak, src1, stride);

2748

2749

return distoration + ((bits*s->qscale*s->qscale*109 + 64)>>7);

2750

}

2751

2752

static int bit8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride){

2753

MpegEncContext * const s= (MpegEncContext *)c;

2754

const uint8_t *scantable= s->intra_scantable.permutated;

2755

uint64_t __align8 aligned_temp[sizeof(DCTELEM)*64/8];

2756

DCTELEM * const temp= (DCTELEM*)aligned_temp;

2757

int i, last, run, bits, level, start_i;

2758

const int esc_length= s->ac_esc_length;

2759

uint8_t * length;

2760

uint8_t * last_length;

2761

2762

s->dsp.diff_pixels(temp, src1, src2, stride);

2763

2764

s->block_last_index[0/*FIXME*/]= last= s->fast_dct_quantize(s, temp, 0/*FIXME*/, s->qscale, &i);

2765

2766

bits=0;

2767

2768

if (s->mb_intra) {

2769

start_i = 1;

2770

length = s->intra_ac_vlc_length;

2771

last_length= s->intra_ac_vlc_last_length;

2772

bits+= s->luma_dc_vlc_length[temp[0] + 256]; //FIXME chroma

2773

} else {

2774

start_i = 0;

2775

length = s->inter_ac_vlc_length;

2776

last_length= s->inter_ac_vlc_last_length;

2777

}

2778

2779

if(last>=start_i){

2780

run=0;

2781

for(i=start_i; i<last; i++){

2782

int j= scantable[i];

2783

level= temp[j];

2784

2785

if(level){

2786

level+=64;

2787

if((level&(~127)) == 0){

2788

bits+= length[UNI_AC_ENC_INDEX(run, level)];

2789

}else

2790

bits+= esc_length;

2791

run=0;

2792

}else

2793

run++;

2794

}

2795

i= scantable[last];

2796

2797

level= temp[i] + 64;

2798

2799

assert(level - 64);

2800

2801

if((level&(~127)) == 0){

2802

bits+= last_length[UNI_AC_ENC_INDEX(run, level)];

2803

}else

2804

bits+= esc_length;

2805

}

2806

2807

return bits;

2808

}

2809

2810

2811

WARPER88_1616(hadamard8_diff_c, hadamard8_diff16_c)

2812

WARPER88_1616(dct_sad8x8_c, dct_sad16x16_c)

2813

WARPER88_1616(quant_psnr8x8_c, quant_psnr16x16_c)

2814

WARPER88_1616(rd8x8_c, rd16x16_c)

2815

WARPER88_1616(bit8x8_c, bit16x16_c)

2816

2817

/* XXX: those functions should be suppressed ASAP when all IDCTs are

2818

converted */

2819

static void ff_jref_idct_put(uint8_t *dest, int line_size, DCTELEM *block)

2820

{

2821

j_rev_dct (block);

2822

put_pixels_clamped_c(block, dest, line_size);

2823

}

2824

static void ff_jref_idct_add(uint8_t *dest, int line_size, DCTELEM *block)

2825

{

2826

j_rev_dct (block);

2827

add_pixels_clamped_c(block, dest, line_size);

2828

}

2829

2830

/* init static data */

2831

void dsputil_static_init(void)

2832

{

2833

int i;

2834

2835

for(i=0;i<256;i++) cropTbl[i + MAX_NEG_CROP] = i;

2836

for(i=0;i<MAX_NEG_CROP;i++) {

2837

cropTbl[i] = 0;

2838

cropTbl[i + MAX_NEG_CROP + 256] = 255;

2839

}

2840

2841

for(i=0;i<512;i++) {

2842

squareTbl[i] = (i - 256) * (i - 256);

2843

}

2844

2845

for(i=0; i<64; i++) inv_zigzag_direct16[ff_zigzag_direct[i]]= i+1;

2846

}

2847

2848

2849

void dsputil_init(DSPContext* c, AVCodecContext *avctx)

2850

{

2851

int i;

2852

2853

#ifdef CONFIG_ENCODERS

2854

if(avctx->dct_algo==FF_DCT_FASTINT)

2855

c->fdct = fdct_ifast;

2856

else

2857

c->fdct = ff_jpeg_fdct_islow; //slow/accurate/default

2858

#endif //CONFIG_ENCODERS

2859

2860

if(avctx->idct_algo==FF_IDCT_INT){

2861

c->idct_put= ff_jref_idct_put;

2862

c->idct_add= ff_jref_idct_add;

2863

c->idct = j_rev_dct;

2864

c->idct_permutation_type= FF_LIBMPEG2_IDCT_PERM;

2865

}else{ //accurate/default

2866

c->idct_put= simple_idct_put;

2867

c->idct_add= simple_idct_add;

2868

c->idct = simple_idct;

2869

c->idct_permutation_type= FF_NO_IDCT_PERM;

2870

}

2871

2872

c->get_pixels = get_pixels_c;

2873

c->diff_pixels = diff_pixels_c;

2874

c->put_pixels_clamped = put_pixels_clamped_c;

2875

c->add_pixels_clamped = add_pixels_clamped_c;

2876

c->gmc1 = gmc1_c;

2877

c->gmc = gmc_c;

2878

c->clear_blocks = clear_blocks_c;

2879

c->pix_sum = pix_sum_c;

2880

c->pix_norm1 = pix_norm1_c;

2881

c->sse[0]= sse16_c;

2882

c->sse[1]= sse8_c;

2883

2884

/* TODO [0] 16 [1] 8 */

2885

c->pix_abs16x16 = pix_abs16x16_c;

2886

c->pix_abs16x16_x2 = pix_abs16x16_x2_c;

2887

c->pix_abs16x16_y2 = pix_abs16x16_y2_c;

2888

c->pix_abs16x16_xy2 = pix_abs16x16_xy2_c;

2889

c->pix_abs8x8 = pix_abs8x8_c;

2890

c->pix_abs8x8_x2 = pix_abs8x8_x2_c;

2891

c->pix_abs8x8_y2 = pix_abs8x8_y2_c;

2892

c->pix_abs8x8_xy2 = pix_abs8x8_xy2_c;

2893

2894

#define dspfunc(PFX, IDX, NUM) \

2895

c->PFX ## _pixels_tab[IDX][0] = PFX ## _pixels ## NUM ## _c; \

2896

c->PFX ## _pixels_tab[IDX][1] = PFX ## _pixels ## NUM ## _x2_c; \

2897

c->PFX ## _pixels_tab[IDX][2] = PFX ## _pixels ## NUM ## _y2_c; \

2898

c->PFX ## _pixels_tab[IDX][3] = PFX ## _pixels ## NUM ## _xy2_c

2899

2900

dspfunc(put, 0, 16);

2901

dspfunc(put_no_rnd, 0, 16);

2902

dspfunc(put, 1, 8);

2903

dspfunc(put_no_rnd, 1, 8);

2904

dspfunc(put, 2, 4);

2905

dspfunc(put, 3, 2);

2906

2907

dspfunc(avg, 0, 16);

2908

dspfunc(avg_no_rnd, 0, 16);

2909

dspfunc(avg, 1, 8);

2910

dspfunc(avg_no_rnd, 1, 8);

2911

dspfunc(avg, 2, 4);

2912

dspfunc(avg, 3, 2);

2913

#undef dspfunc

2914

2915

c->put_tpel_pixels_tab[ 0] = put_tpel_pixels_mc00_c;

2916

c->put_tpel_pixels_tab[ 1] = put_tpel_pixels_mc10_c;

2917

c->put_tpel_pixels_tab[ 2] = put_tpel_pixels_mc20_c;

2918

c->put_tpel_pixels_tab[ 4] = put_tpel_pixels_mc01_c;

2919

c->put_tpel_pixels_tab[ 5] = put_tpel_pixels_mc11_c;

2920

c->put_tpel_pixels_tab[ 6] = put_tpel_pixels_mc21_c;

2921

c->put_tpel_pixels_tab[ 8] = put_tpel_pixels_mc02_c;

2922

c->put_tpel_pixels_tab[ 9] = put_tpel_pixels_mc12_c;

2923

c->put_tpel_pixels_tab[10] = put_tpel_pixels_mc22_c;

2924

2925

c->avg_tpel_pixels_tab[ 0] = avg_tpel_pixels_mc00_c;

2926

c->avg_tpel_pixels_tab[ 1] = avg_tpel_pixels_mc10_c;

2927

c->avg_tpel_pixels_tab[ 2] = avg_tpel_pixels_mc20_c;

2928

c->avg_tpel_pixels_tab[ 4] = avg_tpel_pixels_mc01_c;

2929

c->avg_tpel_pixels_tab[ 5] = avg_tpel_pixels_mc11_c;

2930

c->avg_tpel_pixels_tab[ 6] = avg_tpel_pixels_mc21_c;

2931

c->avg_tpel_pixels_tab[ 8] = avg_tpel_pixels_mc02_c;

2932

c->avg_tpel_pixels_tab[ 9] = avg_tpel_pixels_mc12_c;

2933

c->avg_tpel_pixels_tab[10] = avg_tpel_pixels_mc22_c;

2934

2935

#define dspfunc(PFX, IDX, NUM) \

2936

c->PFX ## _pixels_tab[IDX][ 0] = PFX ## NUM ## _mc00_c; \

2937

c->PFX ## _pixels_tab[IDX][ 1] = PFX ## NUM ## _mc10_c; \

2938

c->PFX ## _pixels_tab[IDX][ 2] = PFX ## NUM ## _mc20_c; \

2939

c->PFX ## _pixels_tab[IDX][ 3] = PFX ## NUM ## _mc30_c; \

2940

c->PFX ## _pixels_tab[IDX][ 4] = PFX ## NUM ## _mc01_c; \

2941

c->PFX ## _pixels_tab[IDX][ 5] = PFX ## NUM ## _mc11_c; \

2942

c->PFX ## _pixels_tab[IDX][ 6] = PFX ## NUM ## _mc21_c; \

2943

c->PFX ## _pixels_tab[IDX][ 7] = PFX ## NUM ## _mc31_c; \

2944

c->PFX ## _pixels_tab[IDX][ 8] = PFX ## NUM ## _mc02_c; \

2945

c->PFX ## _pixels_tab[IDX][ 9] = PFX ## NUM ## _mc12_c; \

2946

c->PFX ## _pixels_tab[IDX][10] = PFX ## NUM ## _mc22_c; \

2947

c->PFX ## _pixels_tab[IDX][11] = PFX ## NUM ## _mc32_c; \

2948

c->PFX ## _pixels_tab[IDX][12] = PFX ## NUM ## _mc03_c; \

2949

c->PFX ## _pixels_tab[IDX][13] = PFX ## NUM ## _mc13_c; \

2950

c->PFX ## _pixels_tab[IDX][14] = PFX ## NUM ## _mc23_c; \

2951

c->PFX ## _pixels_tab[IDX][15] = PFX ## NUM ## _mc33_c

2952

2953

dspfunc(put_qpel, 0, 16);

2954

dspfunc(put_no_rnd_qpel, 0, 16);

2955

2956

dspfunc(avg_qpel, 0, 16);

2957

/* dspfunc(avg_no_rnd_qpel, 0, 16); */

2958

2959

dspfunc(put_qpel, 1, 8);

2960

dspfunc(put_no_rnd_qpel, 1, 8);

2961

2962

dspfunc(avg_qpel, 1, 8);

2963

/* dspfunc(avg_no_rnd_qpel, 1, 8); */

2964

2965

dspfunc(put_h264_qpel, 0, 16);

2966

dspfunc(put_h264_qpel, 1, 8);

2967

dspfunc(put_h264_qpel, 2, 4);

2968

dspfunc(avg_h264_qpel, 0, 16);

2969

dspfunc(avg_h264_qpel, 1, 8);

2970

dspfunc(avg_h264_qpel, 2, 4);

2971

2972

#undef dspfunc

2973

c->put_h264_chroma_pixels_tab[0]= put_h264_chroma_mc8_c;

2974

c->put_h264_chroma_pixels_tab[1]= put_h264_chroma_mc4_c;

2975

c->put_h264_chroma_pixels_tab[2]= put_h264_chroma_mc2_c;

2976

c->avg_h264_chroma_pixels_tab[0]= avg_h264_chroma_mc8_c;

2977

c->avg_h264_chroma_pixels_tab[1]= avg_h264_chroma_mc4_c;

2978

c->avg_h264_chroma_pixels_tab[2]= avg_h264_chroma_mc2_c;

2979

2980

c->put_mspel_pixels_tab[0]= put_mspel8_mc00_c;

2981

c->put_mspel_pixels_tab[1]= put_mspel8_mc10_c;

2982

c->put_mspel_pixels_tab[2]= put_mspel8_mc20_c;

2983

c->put_mspel_pixels_tab[3]= put_mspel8_mc30_c;

2984

c->put_mspel_pixels_tab[4]= put_mspel8_mc02_c;

2985

c->put_mspel_pixels_tab[5]= put_mspel8_mc12_c;

2986

c->put_mspel_pixels_tab[6]= put_mspel8_mc22_c;

2987

c->put_mspel_pixels_tab[7]= put_mspel8_mc32_c;

2988

2989

c->hadamard8_diff[0]= hadamard8_diff16_c;

2990

c->hadamard8_diff[1]= hadamard8_diff_c;

2991

c->hadamard8_abs = hadamard8_abs_c;

2992

2993

c->dct_sad[0]= dct_sad16x16_c;

2994

c->dct_sad[1]= dct_sad8x8_c;

2995

2996

c->sad[0]= sad16x16_c;

2997

c->sad[1]= sad8x8_c;

2998

2999

c->quant_psnr[0]= quant_psnr16x16_c;

3000

c->quant_psnr[1]= quant_psnr8x8_c;

3001

3002

c->rd[0]= rd16x16_c;

3003

c->rd[1]= rd8x8_c;

3004

3005

c->bit[0]= bit16x16_c;

3006

c->bit[1]= bit8x8_c;

3007

3008

c->add_bytes= add_bytes_c;

3009

c->diff_bytes= diff_bytes_c;

3010

c->bswap_buf= bswap_buf;

3011

3012

#ifdef HAVE_MMX

3013

dsputil_init_mmx(c, avctx);

3014

#endif

3015

#ifdef ARCH_ARMV4L

3016

dsputil_init_armv4l(c, avctx);

3017

#endif

3018

#ifdef HAVE_MLIB

3019

dsputil_init_mlib(c, avctx);

3020

#endif

3021

#ifdef ARCH_ALPHA

3022

dsputil_init_alpha(c, avctx);

3023

#endif

3024

#ifdef ARCH_POWERPC

3025

dsputil_init_ppc(c, avctx);

3026

#endif

3027

#ifdef HAVE_MMI

3028

dsputil_init_mmi(c, avctx);

3029

#endif

3030

#ifdef ARCH_SH4

3031

dsputil_init_sh4(c,avctx);

3032

#endif

3033

3034

switch(c->idct_permutation_type){

3035

case FF_NO_IDCT_PERM:

3036

for(i=0; i<64; i++)

3037

c->idct_permutation[i]= i;

3038

break;

3039

case FF_LIBMPEG2_IDCT_PERM:

3040

for(i=0; i<64; i++)

3041

c->idct_permutation[i]= (i & 0x38) | ((i & 6) >> 1) | ((i & 1) << 2);

3042

break;

3043

case FF_SIMPLE_IDCT_PERM:

3044

for(i=0; i<64; i++)

3045

c->idct_permutation[i]= simple_mmx_permutation[i];

3046

break;

3047

case FF_TRANSPOSE_IDCT_PERM:

3048

for(i=0; i<64; i++)

3049

c->idct_permutation[i]= ((i&7)<<3) | (i>>3);

3050

break;

3051

default:

3052

fprintf(stderr, "Internal error, IDCT permutation not set\n");

3053

}

3054

}

3055

Older »