~ubuntu-branches/ubuntu/jaunty/xvidcap/jaunty-proposed

« back to all changes in this revision

Viewing changes to ffmpeg/libavcodec/i386/dsputil_mmx_avg.h

Committer: Bazaar Package Importer
Author(s): Christian Marillat
Date: 2004-08-29 10:53:42 UTC
Revision ID: james.westby@ubuntu.com-20040829105342-qgmnry37eadfkoxx

Tags: upstream-1.1.3

Import upstream version 1.1.3

files added:

AUTHORS

COPYING

ChangeLog

INSTALL

Makefile.am

Makefile.in

NEWS

README

TODO

Xw/Base.c

Xw/Base.h

Xw/BaseP.h

Xw/Box.c

Xw/Box.h

Xw/BoxP.h

Xw/Button.c

Xw/Button.h

Xw/ButtonP.h

Xw/Field.c

Xw/Field.h

Xw/FieldP.h

Xw/Label.c

Xw/Label.h

Xw/LabelP.h

Xw/Makefile.am

Xw/Makefile.in

Xw/README

Xw/RootIcon.c

Xw/RootIcon.h

Xw/RootIconP.h

Xw/Toggle.c

Xw/Toggle.h

Xw/ToggleP.h

Xw/simple.c

Xw/testxw.c

Xw/testxw.xbm

aclocal.m4

compile

config.h.in

configure

configure.ac

depcomp

ffmpeg

ffmpeg/.nbattrs

ffmpeg/COPYING

ffmpeg/CREDITS

ffmpeg/Changelog

ffmpeg/INSTALL

ffmpeg/Makefile

ffmpeg/README

ffmpeg/berrno.h

ffmpeg/cmdutils.c

ffmpeg/cmdutils.h

ffmpeg/configure

ffmpeg/cygwin_inttypes.h

ffmpeg/doc

ffmpeg/doc/.nbattrs

ffmpeg/doc/Makefile

ffmpeg/doc/TODO

ffmpeg/doc/faq.html

ffmpeg/doc/faq.texi

ffmpeg/doc/ffmpeg-doc.html

ffmpeg/doc/ffmpeg-doc.texi

ffmpeg/doc/ffmpeg.1

ffmpeg/doc/ffmpeg_powerpc_performance_evaluation_howto.txt

ffmpeg/doc/ffplay-doc.html

ffmpeg/doc/ffplay-doc.texi

ffmpeg/doc/ffplay.1

ffmpeg/doc/ffserver-doc.html

ffmpeg/doc/ffserver-doc.texi

ffmpeg/doc/ffserver.1

ffmpeg/doc/ffserver.conf

ffmpeg/doc/hooks.html

ffmpeg/doc/hooks.texi

ffmpeg/doc/optimization.txt

ffmpeg/doc/texi2pod.pl

ffmpeg/ffinstall.nsi

ffmpeg/ffmpeg.c

ffmpeg/ffplay.c

ffmpeg/ffserver.c

ffmpeg/ffserver.h

ffmpeg/libavcodec

ffmpeg/libavcodec/.nbattrs

ffmpeg/libavcodec/4xm.c

ffmpeg/libavcodec/Doxyfile

ffmpeg/libavcodec/Makefile

ffmpeg/libavcodec/a52dec.c

ffmpeg/libavcodec/ac3.h

ffmpeg/libavcodec/ac3dec.c

ffmpeg/libavcodec/ac3enc.c

ffmpeg/libavcodec/ac3tab.h

ffmpeg/libavcodec/adpcm.c

ffmpeg/libavcodec/allcodecs.c

ffmpeg/libavcodec/alpha

ffmpeg/libavcodec/alpha/asm.h

ffmpeg/libavcodec/alpha/dsputil_alpha.c

ffmpeg/libavcodec/alpha/dsputil_alpha_asm.S

ffmpeg/libavcodec/alpha/motion_est_alpha.c

ffmpeg/libavcodec/alpha/motion_est_mvi_asm.S

ffmpeg/libavcodec/alpha/mpegvideo_alpha.c

ffmpeg/libavcodec/alpha/regdef.h

ffmpeg/libavcodec/alpha/simple_idct_alpha.c

ffmpeg/libavcodec/amr.c

ffmpeg/libavcodec/apiexample.c

ffmpeg/libavcodec/armv4l

ffmpeg/libavcodec/armv4l/dsputil_arm.c

ffmpeg/libavcodec/armv4l/jrevdct_arm.S

ffmpeg/libavcodec/armv4l/mpegvideo_arm.c

ffmpeg/libavcodec/armv4l/simple_idct_arm.S

ffmpeg/libavcodec/asv1.c

ffmpeg/libavcodec/avcodec.c

ffmpeg/libavcodec/avcodec.h

ffmpeg/libavcodec/bswap.h

ffmpeg/libavcodec/cabac.c

ffmpeg/libavcodec/cabac.h

ffmpeg/libavcodec/cljr.c

ffmpeg/libavcodec/common.c

ffmpeg/libavcodec/common.h

ffmpeg/libavcodec/cyuv.c

ffmpeg/libavcodec/dct-test.c

ffmpeg/libavcodec/dpcm.c

ffmpeg/libavcodec/dsputil.c

ffmpeg/libavcodec/dsputil.h

ffmpeg/libavcodec/dv.c

ffmpeg/libavcodec/dvdata.h

ffmpeg/libavcodec/error_resilience.c

ffmpeg/libavcodec/eval.c

ffmpeg/libavcodec/faad.c

ffmpeg/libavcodec/fastmemcpy.h

ffmpeg/libavcodec/fdctref.c

ffmpeg/libavcodec/fft-test.c

ffmpeg/libavcodec/fft.c

ffmpeg/libavcodec/ffv1.c

ffmpeg/libavcodec/golomb.c

ffmpeg/libavcodec/golomb.h

ffmpeg/libavcodec/h263.c

ffmpeg/libavcodec/h263data.h

ffmpeg/libavcodec/h263dec.c

ffmpeg/libavcodec/h264.c

ffmpeg/libavcodec/h264data.h

ffmpeg/libavcodec/huffyuv.c

ffmpeg/libavcodec/i386

ffmpeg/libavcodec/i386/cputest.c

ffmpeg/libavcodec/i386/dsputil_mmx.c

ffmpeg/libavcodec/i386/dsputil_mmx_avg.h

ffmpeg/libavcodec/i386/dsputil_mmx_rnd.h

ffmpeg/libavcodec/i386/fdct_mmx.c

ffmpeg/libavcodec/i386/fft_sse.c

ffmpeg/libavcodec/i386/idct_mmx.c

ffmpeg/libavcodec/i386/mmx.h

ffmpeg/libavcodec/i386/motion_est_mmx.c

ffmpeg/libavcodec/i386/mpegvideo_mmx.c

ffmpeg/libavcodec/i386/mpegvideo_mmx_template.c

ffmpeg/libavcodec/i386/simple_idct_mmx.c

ffmpeg/libavcodec/imgconvert.c

ffmpeg/libavcodec/imgconvert_template.h

ffmpeg/libavcodec/imgresample.c

ffmpeg/libavcodec/indeo3.c

ffmpeg/libavcodec/indeo3data.h

ffmpeg/libavcodec/interplayvideo.c

ffmpeg/libavcodec/jfdctfst.c

ffmpeg/libavcodec/jfdctint.c

ffmpeg/libavcodec/jrevdct.c

ffmpeg/libavcodec/liba52

ffmpeg/libavcodec/liba52/a52.h

ffmpeg/libavcodec/liba52/a52_internal.h

ffmpeg/libavcodec/liba52/a52_util.h

ffmpeg/libavcodec/liba52/bit_allocate.c

ffmpeg/libavcodec/liba52/bitstream.c

ffmpeg/libavcodec/liba52/bitstream.h

ffmpeg/libavcodec/liba52/crc.c

ffmpeg/libavcodec/liba52/downmix.c

ffmpeg/libavcodec/liba52/imdct.c

ffmpeg/libavcodec/liba52/mm_accel.h

ffmpeg/libavcodec/liba52/parse.c

ffmpeg/libavcodec/liba52/resample.c

ffmpeg/libavcodec/liba52/resample_c.c

ffmpeg/libavcodec/liba52/resample_mmx.c

ffmpeg/libavcodec/liba52/tables.h

ffmpeg/libavcodec/libpostproc

ffmpeg/libavcodec/libpostproc/.nbattrs

ffmpeg/libavcodec/libpostproc/Makefile

ffmpeg/libavcodec/libpostproc/mangle.h

ffmpeg/libavcodec/libpostproc/postprocess.c

ffmpeg/libavcodec/libpostproc/postprocess.h

ffmpeg/libavcodec/libpostproc/postprocess_internal.h

ffmpeg/libavcodec/libpostproc/postprocess_template.c

ffmpeg/libavcodec/mace.c

ffmpeg/libavcodec/mdct.c

ffmpeg/libavcodec/mdec.c

ffmpeg/libavcodec/mem.c

ffmpeg/libavcodec/mjpeg.c

ffmpeg/libavcodec/mlib

ffmpeg/libavcodec/mlib/dsputil_mlib.c

ffmpeg/libavcodec/motion_est.c

ffmpeg/libavcodec/motion_est_template.c

ffmpeg/libavcodec/motion_test.c

ffmpeg/libavcodec/mp3lameaudio.c

ffmpeg/libavcodec/mpeg12.c

ffmpeg/libavcodec/mpeg12data.h

ffmpeg/libavcodec/mpeg4data.h

ffmpeg/libavcodec/mpegaudio.c

ffmpeg/libavcodec/mpegaudio.h

ffmpeg/libavcodec/mpegaudiodec.c

ffmpeg/libavcodec/mpegaudiodectab.h

ffmpeg/libavcodec/mpegaudiotab.h

ffmpeg/libavcodec/mpegvideo.c

ffmpeg/libavcodec/mpegvideo.h

ffmpeg/libavcodec/msmpeg4.c

ffmpeg/libavcodec/msmpeg4data.h

ffmpeg/libavcodec/oggvorbis.c

ffmpeg/libavcodec/oggvorbis.h

ffmpeg/libavcodec/opts.c

ffmpeg/libavcodec/pcm.c

ffmpeg/libavcodec/ppc

ffmpeg/libavcodec/ppc/dsputil_altivec.c

ffmpeg/libavcodec/ppc/dsputil_altivec.h

ffmpeg/libavcodec/ppc/dsputil_ppc.c

ffmpeg/libavcodec/ppc/dsputil_ppc.h

ffmpeg/libavcodec/ppc/fft_altivec.c

ffmpeg/libavcodec/ppc/gcc_fixes.h

ffmpeg/libavcodec/ppc/gmc_altivec.c

ffmpeg/libavcodec/ppc/idct_altivec.c

ffmpeg/libavcodec/ppc/mpegvideo_altivec.c

ffmpeg/libavcodec/ppc/mpegvideo_ppc.c

ffmpeg/libavcodec/ps2

ffmpeg/libavcodec/ps2/dsputil_mmi.c

ffmpeg/libavcodec/ps2/idct_mmi.c

ffmpeg/libavcodec/ps2/mmi.h

ffmpeg/libavcodec/ps2/mpegvideo_mmi.c

ffmpeg/libavcodec/ra144.c

ffmpeg/libavcodec/ra144.h

ffmpeg/libavcodec/ra288.c

ffmpeg/libavcodec/ra288.h

ffmpeg/libavcodec/ratecontrol.c

ffmpeg/libavcodec/raw.c

ffmpeg/libavcodec/resample.c

ffmpeg/libavcodec/roqvideo.c

ffmpeg/libavcodec/rv10.c

ffmpeg/libavcodec/sh4

ffmpeg/libavcodec/sh4/dsputil_align.c

ffmpeg/libavcodec/sh4/dsputil_sh4.c

ffmpeg/libavcodec/sh4/idct_sh4.c

ffmpeg/libavcodec/sh4/qpel.c

ffmpeg/libavcodec/simple_idct.c

ffmpeg/libavcodec/simple_idct.h

ffmpeg/libavcodec/svq1.c

ffmpeg/libavcodec/svq1_cb.h

ffmpeg/libavcodec/svq1_vlc.h

ffmpeg/libavcodec/svq3.c

ffmpeg/libavcodec/utils.c

ffmpeg/libavcodec/vcr1.c

ffmpeg/libavcodec/vp3.c

ffmpeg/libavcodec/vp3data.h

ffmpeg/libavcodec/wmadata.h

ffmpeg/libavcodec/wmadec.c

ffmpeg/libavcodec/wmv2.c

ffmpeg/libavcodec/xan.c

ffmpeg/libavcodec/xvmcvideo.c

ffmpeg/libavformat

ffmpeg/libavformat/.nbattrs

ffmpeg/libavformat/4xm.c

ffmpeg/libavformat/Makefile

ffmpeg/libavformat/allformats.c

ffmpeg/libavformat/amr.c

ffmpeg/libavformat/asf.c

ffmpeg/libavformat/au.c

ffmpeg/libavformat/audio.c

ffmpeg/libavformat/avformat.h

ffmpeg/libavformat/avi.h

ffmpeg/libavformat/avidec.c

ffmpeg/libavformat/avienc.c

ffmpeg/libavformat/avio.c

ffmpeg/libavformat/avio.h

ffmpeg/libavformat/aviobuf.c

ffmpeg/libavformat/barpainet.c

ffmpeg/libavformat/barpainet.h

ffmpeg/libavformat/beosaudio.cpp

ffmpeg/libavformat/crc.c

ffmpeg/libavformat/cutils.c

ffmpeg/libavformat/dv.c

ffmpeg/libavformat/dv1394.c

ffmpeg/libavformat/dv1394.h

ffmpeg/libavformat/dvcore.c

ffmpeg/libavformat/dvcore.h

ffmpeg/libavformat/ffm.c

ffmpeg/libavformat/file.c

ffmpeg/libavformat/flvdec.c

ffmpeg/libavformat/flvenc.c

ffmpeg/libavformat/framehook.c

ffmpeg/libavformat/framehook.h

ffmpeg/libavformat/gif.c

ffmpeg/libavformat/gifdec.c

ffmpeg/libavformat/grab.c

ffmpeg/libavformat/http.c

ffmpeg/libavformat/idroq.c

ffmpeg/libavformat/img.c

ffmpeg/libavformat/ipmovie.c

ffmpeg/libavformat/jpeg.c

ffmpeg/libavformat/mov.c

ffmpeg/libavformat/movenc.c

ffmpeg/libavformat/mp3.c

ffmpeg/libavformat/mpeg.c

ffmpeg/libavformat/mpegts.c

ffmpeg/libavformat/mpegts.h

ffmpeg/libavformat/mpegtsenc.c

ffmpeg/libavformat/mpjpeg.c

ffmpeg/libavformat/nut.c

ffmpeg/libavformat/ogg.c

ffmpeg/libavformat/os_support.c

ffmpeg/libavformat/os_support.h

ffmpeg/libavformat/png.c

ffmpeg/libavformat/pnm.c

ffmpeg/libavformat/psxstr.c

ffmpeg/libavformat/raw.c

ffmpeg/libavformat/rm.c

ffmpeg/libavformat/rtp.c

ffmpeg/libavformat/rtp.h

ffmpeg/libavformat/rtpproto.c

ffmpeg/libavformat/rtsp.c

ffmpeg/libavformat/rtsp.h

ffmpeg/libavformat/rtspcodes.h

ffmpeg/libavformat/swf.c

ffmpeg/libavformat/tcp.c

ffmpeg/libavformat/udp.c

ffmpeg/libavformat/utils.c

ffmpeg/libavformat/wav.c

ffmpeg/libavformat/wc3movie.c

ffmpeg/libavformat/yuv.c

ffmpeg/libavformat/yuv4mpeg.c

ffmpeg/output_example.c

ffmpeg/tests

ffmpeg/tests/.nbattrs

ffmpeg/tests/Makefile

ffmpeg/tests/audiogen.c

ffmpeg/tests/dsptest.c

ffmpeg/tests/ffmpeg.regression.ref

ffmpeg/tests/ffserver.regression.ref

ffmpeg/tests/lena.pnm

ffmpeg/tests/libav.regression.ref

ffmpeg/tests/regression.sh

ffmpeg/tests/rotozoom.c

ffmpeg/tests/rotozoom.regression.ref

ffmpeg/tests/server-regression.sh

ffmpeg/tests/test.conf

ffmpeg/tests/tiny_psnr.c

ffmpeg/tests/videogen.c

ffmpeg/vhook

ffmpeg/vhook/.nbattrs

ffmpeg/vhook/Makefile

ffmpeg/vhook/drawtext.c

ffmpeg/vhook/fish.c

ffmpeg/vhook/imlib2.c

ffmpeg/vhook/null.c

ffmpeg/vhook/ppm.c

ffmpeg/xvmc_render.h

getopt

getopt/README

getopt/getopt.c

getopt/getopt.h

getopt/getopt1.c

gt/COPYING.LIB

gt/Makefile.am

gt/Makefile.in

gt/README

gt/gnuavi.c

gt/gnuavi.h

gt/gnuplay.c

gt/gt.c

gt/gt.h

gt/gtapi.c

gt/gtapi.h

gt/gtatoms.h

gt/gttypes.h

gt/gtvr.c

gt/gtvr.h

gt/parse_gt.c

gt/ppm2qt.c

gt/ppm2qtvr.c

gt/sound.c

gt/sound.h

gt/video.c

gt/video.h

gt/xt.c

gt/xt.h

install-sh

man/gvidcap.man

man/index.bt

man/xvidcap.man

missing

mkinstalldirs

src/Makefile.am

src/Makefile.in

src/app_data.h

src/capture.c

src/capture.h

src/codecs.h

src/colors.c

src/colors.h

src/control.h

src/fallback.h

src/frame.c

src/frame.h

src/gtk2_control.c

src/gtk2_control.h

src/gtk2_frame.c

src/gtk2_frame.h

src/gtk2_options.c

src/gtk2_options.h

src/gtk2_support.c

src/gtk2_support.h

src/job.c

src/job.h

src/led_meter.c

src/led_meter.h

src/main.c

src/main.h

src/malloc.c

src/mkresfile.sh

src/mngutil.c

src/mngutil.h

src/options.c

src/realloc.c

src/util.c

src/util.h

src/video.c

src/video.h

src/xt_control.c

src/xt_control.h

src/xt_frame.c

src/xt_frame.h

src/xt_options.c

src/xt_options.h

src/xtoffmpeg.c

src/xtoffmpeg.h

src/xtojpg.c

src/xtojpg.h

src/xtomng.c

src/xtomng.h

src/xtopng.c

src/xtopng.h

src/xtopnm.c

src/xtopnm.h

src/xtoqtf.c

src/xtoqtf.h

src/xtoxwd.c

src/xtoxwd.h

src/xutil.c

src/xutil.h

xbm/animate.png

xbm/animate.xbm

xbm/edit.png

xbm/edit.xbm

xbm/help.xbm

xbm/mkvideo.xbm

xbm/move.png

xbm/move.xbm

xbm/next.png

xbm/next.xbm

xbm/pause.png

xbm/pause.xbm

xbm/prev.png

xbm/prev.xbm

xbm/record.png

xbm/record.xbm

xbm/record_off.png

xbm/record_on.png

xbm/select.png

xbm/select.xbm

xbm/step.png

xbm/step.xbm

xbm/stop.png

xbm/stop.xbm

Show diffs side-by-side

added added

removed removed

ffmpeg/libavcodec/i386/dsputil_mmx_avg.h

* DSP utils : average functions are compiled twice for 3dnow/mmx2

* This library is free software; you can redistribute it and/or

* modify it under the terms of the GNU Lesser General Public

* License as published by the Free Software Foundation; either

* version 2 of the License, or (at your option) any later version.

* This library is distributed in the hope that it will be useful,

* but WITHOUT ANY WARRANTY; without even the implied warranty of

* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU

* Lesser General Public License for more details.

* You should have received a copy of the GNU Lesser General Public

* License along with this library; if not, write to the Free Software

* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA

* MMX optimization by Nick Kurshev <nickols_k@mail.ru>

* mostly rewritten by Michael Niedermayer <michaelni@gmx.at>

* and improved by Zdenek Kabelac <kabi@users.sf.net>

/* XXX: we use explicit registers to avoid a gcc 2.95.2 register asm

clobber bug - now it will work with 2.95.2 and also with -fPIC

static void DEF(put_pixels8_x2)(uint8_t *block, const uint8_t *pixels, int line_size, int h)

{

__asm __volatile(

"lea (%3, %3), %%eax \n\t"

"1: \n\t"

"movq (%1), %%mm0 \n\t"

"movq (%1, %3), %%mm1 \n\t"

PAVGB" 1(%1), %%mm0 \n\t"

PAVGB" 1(%1, %3), %%mm1 \n\t"

"movq %%mm0, (%2) \n\t"

"movq %%mm1, (%2, %3) \n\t"

"addl %%eax, %1 \n\t"

"addl %%eax, %2 \n\t"

"movq (%1), %%mm0 \n\t"

"movq (%1, %3), %%mm1 \n\t"

PAVGB" 1(%1), %%mm0 \n\t"

PAVGB" 1(%1, %3), %%mm1 \n\t"

"addl %%eax, %1 \n\t"

"movq %%mm0, (%2) \n\t"

"movq %%mm1, (%2, %3) \n\t"

"addl %%eax, %2 \n\t"

"subl $4, %0 \n\t"

"jnz 1b \n\t"

:"+g"(h), "+S"(pixels), "+D"(block)

:"r" (line_size)

:"%eax", "memory");

}

static void DEF(put_pixels8_l2)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int dstStride, int src1Stride, int h)

{

__asm __volatile(

"1: \n\t"

"movq (%1), %%mm0 \n\t"

"addl %4, %1 \n\t"

"movq (%1), %%mm1 \n\t"

"addl %4, %1 \n\t"

PAVGB" (%2), %%mm0 \n\t"

PAVGB" 8(%2), %%mm1 \n\t"

"movq %%mm0, (%3) \n\t"

"addl %5, %3 \n\t"

"movq %%mm1, (%3) \n\t"

"addl %5, %3 \n\t"

"movq (%1), %%mm0 \n\t"

"addl %4, %1 \n\t"

"movq (%1), %%mm1 \n\t"

"addl %4, %1 \n\t"

PAVGB" 16(%2), %%mm0 \n\t"

PAVGB" 24(%2), %%mm1 \n\t"

"movq %%mm0, (%3) \n\t"

"addl %5, %3 \n\t"

"movq %%mm1, (%3) \n\t"

"addl %5, %3 \n\t"

"addl $32, %2 \n\t"

"subl $4, %0 \n\t"

"jnz 1b \n\t"

:"+g"(h), "+r"(src1), "+r"(src2), "+r"(dst)

:"r"(src1Stride), "r"(dstStride)

:"memory");

}

static void DEF(put_pixels16_x2)(uint8_t *block, const uint8_t *pixels, int line_size, int h)

{

__asm __volatile(

"lea (%3, %3), %%eax \n\t"

"1: \n\t"

"movq (%1), %%mm0 \n\t"

"movq (%1, %3), %%mm1 \n\t"

"movq 8(%1), %%mm2 \n\t"

"movq 8(%1, %3), %%mm3 \n\t"

PAVGB" 1(%1), %%mm0 \n\t"

PAVGB" 1(%1, %3), %%mm1 \n\t"

PAVGB" 9(%1), %%mm2 \n\t"

100

PAVGB" 9(%1, %3), %%mm3 \n\t"

101

"movq %%mm0, (%2) \n\t"

102

"movq %%mm1, (%2, %3) \n\t"

103

"movq %%mm2, 8(%2) \n\t"

104

"movq %%mm3, 8(%2, %3) \n\t"

105

"addl %%eax, %1 \n\t"

106

"addl %%eax, %2 \n\t"

107

"movq (%1), %%mm0 \n\t"

108

"movq (%1, %3), %%mm1 \n\t"

109

"movq 8(%1), %%mm2 \n\t"

110

"movq 8(%1, %3), %%mm3 \n\t"

111

PAVGB" 1(%1), %%mm0 \n\t"

112

PAVGB" 1(%1, %3), %%mm1 \n\t"

113

PAVGB" 9(%1), %%mm2 \n\t"

114

PAVGB" 9(%1, %3), %%mm3 \n\t"

115

"addl %%eax, %1 \n\t"

116

"movq %%mm0, (%2) \n\t"

117

"movq %%mm1, (%2, %3) \n\t"

118

"movq %%mm2, 8(%2) \n\t"

119

"movq %%mm3, 8(%2, %3) \n\t"

120

"addl %%eax, %2 \n\t"

121

"subl $4, %0 \n\t"

122

"jnz 1b \n\t"

123

:"+g"(h), "+S"(pixels), "+D"(block)

124

:"r" (line_size)

125

:"%eax", "memory");

126

}

127

128

static void DEF(put_pixels16_l2)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int dstStride, int src1Stride, int h)

129

{

130

__asm __volatile(

131

"1: \n\t"

132

"movq (%1), %%mm0 \n\t"

133

"movq 8(%1), %%mm1 \n\t"

134

"addl %4, %1 \n\t"

135

PAVGB" (%2), %%mm0 \n\t"

136

PAVGB" 8(%2), %%mm1 \n\t"

137

"movq %%mm0, (%3) \n\t"

138

"movq %%mm1, 8(%3) \n\t"

139

"addl %5, %3 \n\t"

140

"movq (%1), %%mm0 \n\t"

141

"movq 8(%1), %%mm1 \n\t"

142

"addl %4, %1 \n\t"

143

PAVGB" 16(%2), %%mm0 \n\t"

144

PAVGB" 24(%2), %%mm1 \n\t"

145

"movq %%mm0, (%3) \n\t"

146

"movq %%mm1, 8(%3) \n\t"

147

"addl %5, %3 \n\t"

148

"addl $32, %2 \n\t"

149

"subl $2, %0 \n\t"

150

"jnz 1b \n\t"

151

:"+g"(h), "+r"(src1), "+r"(src2), "+r"(dst)

152

:"r"(src1Stride), "r"(dstStride)

153

:"memory");

154

}

155

156

/* GL: this function does incorrect rounding if overflow */

157

static void DEF(put_no_rnd_pixels8_x2)(uint8_t *block, const uint8_t *pixels, int line_size, int h)

158

{

159

MOVQ_BONE(mm6);

160

__asm __volatile(

161

"lea (%3, %3), %%eax \n\t"

162

"1: \n\t"

163

"movq (%1), %%mm0 \n\t"

164

"movq (%1, %3), %%mm2 \n\t"

165

"movq 1(%1), %%mm1 \n\t"

166

"movq 1(%1, %3), %%mm3 \n\t"

167

"addl %%eax, %1 \n\t"

168

"psubusb %%mm6, %%mm0 \n\t"

169

"psubusb %%mm6, %%mm2 \n\t"

170

PAVGB" %%mm1, %%mm0 \n\t"

171

PAVGB" %%mm3, %%mm2 \n\t"

172

"movq %%mm0, (%2) \n\t"

173

"movq %%mm2, (%2, %3) \n\t"

174

"movq (%1), %%mm0 \n\t"

175

"movq 1(%1), %%mm1 \n\t"

176

"movq (%1, %3), %%mm2 \n\t"

177

"movq 1(%1, %3), %%mm3 \n\t"

178

"addl %%eax, %2 \n\t"

179

"addl %%eax, %1 \n\t"

180

"psubusb %%mm6, %%mm0 \n\t"

181

"psubusb %%mm6, %%mm2 \n\t"

182

PAVGB" %%mm1, %%mm0 \n\t"

183

PAVGB" %%mm3, %%mm2 \n\t"

184

"movq %%mm0, (%2) \n\t"

185

"movq %%mm2, (%2, %3) \n\t"

186

"addl %%eax, %2 \n\t"

187

"subl $4, %0 \n\t"

188

"jnz 1b \n\t"

189

:"+g"(h), "+S"(pixels), "+D"(block)

190

:"r" (line_size)

191

:"%eax", "memory");

192

}

193

194

static void DEF(put_pixels8_y2)(uint8_t *block, const uint8_t *pixels, int line_size, int h)

195

{

196

__asm __volatile(

197

"lea (%3, %3), %%eax \n\t"

198

"movq (%1), %%mm0 \n\t"

199

"subl %3, %2 \n\t"

200

"1: \n\t"

201

"movq (%1, %3), %%mm1 \n\t"

202

"movq (%1, %%eax), %%mm2 \n\t"

203

"addl %%eax, %1 \n\t"

204

PAVGB" %%mm1, %%mm0 \n\t"

205

PAVGB" %%mm2, %%mm1 \n\t"

206

"movq %%mm0, (%2, %3) \n\t"

207

"movq %%mm1, (%2, %%eax) \n\t"

208

"movq (%1, %3), %%mm1 \n\t"

209

"movq (%1, %%eax), %%mm0 \n\t"

210

"addl %%eax, %2 \n\t"

211

"addl %%eax, %1 \n\t"

212

PAVGB" %%mm1, %%mm2 \n\t"

213

PAVGB" %%mm0, %%mm1 \n\t"

214

"movq %%mm2, (%2, %3) \n\t"

215

"movq %%mm1, (%2, %%eax) \n\t"

216

"addl %%eax, %2 \n\t"

217

"subl $4, %0 \n\t"

218

"jnz 1b \n\t"

219

:"+g"(h), "+S"(pixels), "+D" (block)

220

:"r" (line_size)

221

:"%eax", "memory");

222

}

223

224

/* GL: this function does incorrect rounding if overflow */

225

static void DEF(put_no_rnd_pixels8_y2)(uint8_t *block, const uint8_t *pixels, int line_size, int h)

226

{

227

MOVQ_BONE(mm6);

228

__asm __volatile(

229

"lea (%3, %3), %%eax \n\t"

230

"movq (%1), %%mm0 \n\t"

231

"subl %3, %2 \n\t"

232

"1: \n\t"

233

"movq (%1, %3), %%mm1 \n\t"

234

"movq (%1, %%eax), %%mm2 \n\t"

235

"addl %%eax, %1 \n\t"

236

"psubusb %%mm6, %%mm1 \n\t"

237

PAVGB" %%mm1, %%mm0 \n\t"

238

PAVGB" %%mm2, %%mm1 \n\t"

239

"movq %%mm0, (%2, %3) \n\t"

240

"movq %%mm1, (%2, %%eax) \n\t"

241

"movq (%1, %3), %%mm1 \n\t"

242

"movq (%1, %%eax), %%mm0 \n\t"

243

"addl %%eax, %2 \n\t"

244

"addl %%eax, %1 \n\t"

245

"psubusb %%mm6, %%mm1 \n\t"

246

PAVGB" %%mm1, %%mm2 \n\t"

247

PAVGB" %%mm0, %%mm1 \n\t"

248

"movq %%mm2, (%2, %3) \n\t"

249

"movq %%mm1, (%2, %%eax) \n\t"

250

"addl %%eax, %2 \n\t"

251

"subl $4, %0 \n\t"

252

"jnz 1b \n\t"

253

:"+g"(h), "+S"(pixels), "+D" (block)

254

:"r" (line_size)

255

:"%eax", "memory");

256

}

257

258

static void DEF(avg_pixels8)(uint8_t *block, const uint8_t *pixels, int line_size, int h)

259

{

260

__asm __volatile(

261

"lea (%3, %3), %%eax \n\t"

262

"1: \n\t"

263

"movq (%2), %%mm0 \n\t"

264

"movq (%2, %3), %%mm1 \n\t"

265

PAVGB" (%1), %%mm0 \n\t"

266

PAVGB" (%1, %3), %%mm1 \n\t"

267

"movq %%mm0, (%2) \n\t"

268

"movq %%mm1, (%2, %3) \n\t"

269

"addl %%eax, %1 \n\t"

270

"addl %%eax, %2 \n\t"

271

"movq (%2), %%mm0 \n\t"

272

"movq (%2, %3), %%mm1 \n\t"

273

PAVGB" (%1), %%mm0 \n\t"

274

PAVGB" (%1, %3), %%mm1 \n\t"

275

"addl %%eax, %1 \n\t"

276

"movq %%mm0, (%2) \n\t"

277

"movq %%mm1, (%2, %3) \n\t"

278

"addl %%eax, %2 \n\t"

279

"subl $4, %0 \n\t"

280

"jnz 1b \n\t"

281

:"+g"(h), "+S"(pixels), "+D"(block)

282

:"r" (line_size)

283

:"%eax", "memory");

284

}

285

286

static void DEF(avg_pixels8_x2)(uint8_t *block, const uint8_t *pixels, int line_size, int h)

287

{

288

__asm __volatile(

289

"lea (%3, %3), %%eax \n\t"

290

"1: \n\t"

291

"movq (%1), %%mm0 \n\t"

292

"movq (%1, %3), %%mm2 \n\t"

293

PAVGB" 1(%1), %%mm0 \n\t"

294

PAVGB" 1(%1, %3), %%mm2 \n\t"

295

PAVGB" (%2), %%mm0 \n\t"

296

PAVGB" (%2, %3), %%mm2 \n\t"

297

"addl %%eax, %1 \n\t"

298

"movq %%mm0, (%2) \n\t"

299

"movq %%mm2, (%2, %3) \n\t"

300

"movq (%1), %%mm0 \n\t"

301

"movq (%1, %3), %%mm2 \n\t"

302

PAVGB" 1(%1), %%mm0 \n\t"

303

PAVGB" 1(%1, %3), %%mm2 \n\t"

304

"addl %%eax, %2 \n\t"

305

"addl %%eax, %1 \n\t"

306

PAVGB" (%2), %%mm0 \n\t"

307

PAVGB" (%2, %3), %%mm2 \n\t"

308

"movq %%mm0, (%2) \n\t"

309

"movq %%mm2, (%2, %3) \n\t"

310

"addl %%eax, %2 \n\t"

311

"subl $4, %0 \n\t"

312

"jnz 1b \n\t"

313

:"+g"(h), "+S"(pixels), "+D"(block)

314

:"r" (line_size)

315

:"%eax", "memory");

316

}

317

318

static void DEF(avg_pixels8_y2)(uint8_t *block, const uint8_t *pixels, int line_size, int h)

319

{

320

__asm __volatile(

321

"lea (%3, %3), %%eax \n\t"

322

"movq (%1), %%mm0 \n\t"

323

"subl %3, %2 \n\t"

324

"1: \n\t"

325

"movq (%1, %3), %%mm1 \n\t"

326

"movq (%1, %%eax), %%mm2 \n\t"

327

"addl %%eax, %1 \n\t"

328

PAVGB" %%mm1, %%mm0 \n\t"

329

PAVGB" %%mm2, %%mm1 \n\t"

330

"movq (%2, %3), %%mm3 \n\t"

331

"movq (%2, %%eax), %%mm4 \n\t"

332

PAVGB" %%mm3, %%mm0 \n\t"

333

PAVGB" %%mm4, %%mm1 \n\t"

334

"movq %%mm0, (%2, %3) \n\t"

335

"movq %%mm1, (%2, %%eax) \n\t"

336

"movq (%1, %3), %%mm1 \n\t"

337

"movq (%1, %%eax), %%mm0 \n\t"

338

PAVGB" %%mm1, %%mm2 \n\t"

339

PAVGB" %%mm0, %%mm1 \n\t"

340

"addl %%eax, %2 \n\t"

341

"addl %%eax, %1 \n\t"

342

"movq (%2, %3), %%mm3 \n\t"

343

"movq (%2, %%eax), %%mm4 \n\t"

344

PAVGB" %%mm3, %%mm2 \n\t"

345

PAVGB" %%mm4, %%mm1 \n\t"

346

"movq %%mm2, (%2, %3) \n\t"

347

"movq %%mm1, (%2, %%eax) \n\t"

348

"addl %%eax, %2 \n\t"

349

"subl $4, %0 \n\t"

350

"jnz 1b \n\t"

351

:"+g"(h), "+S"(pixels), "+D"(block)

352

:"r" (line_size)

353

:"%eax", "memory");

354

}

355

356

// Note this is not correctly rounded, but this function is only used for b frames so it doesnt matter

357

static void DEF(avg_pixels8_xy2)(uint8_t *block, const uint8_t *pixels, int line_size, int h)

358

{

359

MOVQ_BONE(mm6);

360

__asm __volatile(

361

"lea (%3, %3), %%eax \n\t"

362

"movq (%1), %%mm0 \n\t"

363

PAVGB" 1(%1), %%mm0 \n\t"

364

".balign 8 \n\t"

365

"1: \n\t"

366

"movq (%1, %%eax), %%mm2 \n\t"

367

"movq (%1, %3), %%mm1 \n\t"

368

"psubusb %%mm6, %%mm2 \n\t"

369

PAVGB" 1(%1, %3), %%mm1 \n\t"

370

PAVGB" 1(%1, %%eax), %%mm2 \n\t"

371

"addl %%eax, %1 \n\t"

372

PAVGB" %%mm1, %%mm0 \n\t"

373

PAVGB" %%mm2, %%mm1 \n\t"

374

PAVGB" (%2), %%mm0 \n\t"

375

PAVGB" (%2, %3), %%mm1 \n\t"

376

"movq %%mm0, (%2) \n\t"

377

"movq %%mm1, (%2, %3) \n\t"

378

"movq (%1, %3), %%mm1 \n\t"

379

"movq (%1, %%eax), %%mm0 \n\t"

380

PAVGB" 1(%1, %3), %%mm1 \n\t"

381

PAVGB" 1(%1, %%eax), %%mm0 \n\t"

382

"addl %%eax, %2 \n\t"

383

"addl %%eax, %1 \n\t"

384

PAVGB" %%mm1, %%mm2 \n\t"

385

PAVGB" %%mm0, %%mm1 \n\t"

386

PAVGB" (%2), %%mm2 \n\t"

387

PAVGB" (%2, %3), %%mm1 \n\t"

388

"movq %%mm2, (%2) \n\t"

389

"movq %%mm1, (%2, %3) \n\t"

390

"addl %%eax, %2 \n\t"

391

"subl $4, %0 \n\t"

392

"jnz 1b \n\t"

393

:"+g"(h), "+S"(pixels), "+D"(block)

394

:"r" (line_size)

395

:"%eax", "memory");

396

}

397

398

//FIXME the following could be optimized too ...

399

static void DEF(put_no_rnd_pixels16_x2)(uint8_t *block, const uint8_t *pixels, int line_size, int h){

400

DEF(put_no_rnd_pixels8_x2)(block , pixels , line_size, h);

401

DEF(put_no_rnd_pixels8_x2)(block+8, pixels+8, line_size, h);

402

}

403

static void DEF(put_pixels16_y2)(uint8_t *block, const uint8_t *pixels, int line_size, int h){

404

DEF(put_pixels8_y2)(block , pixels , line_size, h);

405

DEF(put_pixels8_y2)(block+8, pixels+8, line_size, h);

406

}

407

static void DEF(put_no_rnd_pixels16_y2)(uint8_t *block, const uint8_t *pixels, int line_size, int h){

408

DEF(put_no_rnd_pixels8_y2)(block , pixels , line_size, h);

409

DEF(put_no_rnd_pixels8_y2)(block+8, pixels+8, line_size, h);

410

}

411

static void DEF(avg_pixels16)(uint8_t *block, const uint8_t *pixels, int line_size, int h){

412

DEF(avg_pixels8)(block , pixels , line_size, h);

413

DEF(avg_pixels8)(block+8, pixels+8, line_size, h);

414

}

415

static void DEF(avg_pixels16_x2)(uint8_t *block, const uint8_t *pixels, int line_size, int h){

416

DEF(avg_pixels8_x2)(block , pixels , line_size, h);

417

DEF(avg_pixels8_x2)(block+8, pixels+8, line_size, h);

418

}

419

static void DEF(avg_pixels16_y2)(uint8_t *block, const uint8_t *pixels, int line_size, int h){

420

DEF(avg_pixels8_y2)(block , pixels , line_size, h);

421

DEF(avg_pixels8_y2)(block+8, pixels+8, line_size, h);

422

}

423

static void DEF(avg_pixels16_xy2)(uint8_t *block, const uint8_t *pixels, int line_size, int h){

424

DEF(avg_pixels8_xy2)(block , pixels , line_size, h);

425

DEF(avg_pixels8_xy2)(block+8, pixels+8, line_size, h);

426

}

427

Older »