~ubuntu-branches/ubuntu/precise/ffmpeg-debian/precise

« back to all changes in this revision

Viewing changes to libavcodec/x86/vc1dsp_mmx.c

Committer: Bazaar Package Importer
Author(s): Reinhard Tartler
Date: 2009-01-20 09:20:53 UTC
mfrom: (1.1.3 upstream)
Revision ID: james.westby@ubuntu.com-20090120092053-izz63p40hc98qfgp

Tags: 3:0.svn20090119-1ubuntu1

https://launchpad.net/bugs/318501

https://launchpad.net/bugs/253767

* merge from debian. LP: #318501
* new version fixes CVE-2008-3230, LP: #253767

files added:
.svnrevision

debian/README.upstream-upgrade

ffpresets/libx264-baseline.ffpreset

ffpresets/libx264-ipod320.ffpreset

ffpresets/libx264-ipod640.ffpreset

ffpresets/libx264-lossless_fast.ffpreset

ffpresets/libx264-lossless_max.ffpreset

ffpresets/libx264-lossless_medium.ffpreset

ffpresets/libx264-lossless_slow.ffpreset

ffpresets/libx264-lossless_slower.ffpreset

ffpresets/libx264-lossless_ultrafast.ffpreset

ffpresets/libx264-main.ffpreset

ffpresets/libx264-slowfirstpass.ffpreset

libavcodec/aac_parser.h

libavcodec/aandcttab.c

libavcodec/aandcttab.h

libavcodec/arm

libavcodec/arm/asm.S

libavcodec/arm/dsputil_arm.c

libavcodec/arm/dsputil_arm_s.S

libavcodec/arm/dsputil_iwmmxt.c

libavcodec/arm/dsputil_iwmmxt_rnd_template.c

libavcodec/arm/dsputil_neon.c

libavcodec/arm/dsputil_neon_s.S

libavcodec/arm/dsputil_vfp.S

libavcodec/arm/float_arm_vfp.c

libavcodec/arm/h264dsp_neon.S

libavcodec/arm/h264idct_neon.S

libavcodec/arm/jrevdct_arm.S

libavcodec/arm/mathops.h

libavcodec/arm/mpegvideo_arm.c

libavcodec/arm/mpegvideo_armv5te.c

libavcodec/arm/mpegvideo_armv5te_s.S

libavcodec/arm/mpegvideo_iwmmxt.c

libavcodec/arm/simple_idct_arm.S

libavcodec/arm/simple_idct_armv5te.S

libavcodec/arm/simple_idct_armv6.S

libavcodec/arm/simple_idct_neon.S

libavcodec/dnxhdenc.h

libavcodec/faxcompr.c

libavcodec/faxcompr.h

libavcodec/internal.h

libavcodec/options.c

libavcodec/qcelp_lsp.c

libavcodec/rv40dsp.c

libavcodec/sh4/sh4.h

libavcodec/vdpau.h

libavcodec/vdpau_internal.h

libavcodec/vdpauvideo.c

libavcodec/x86

libavcodec/x86/cavsdsp_mmx.c

libavcodec/x86/cpuid.c

libavcodec/x86/dnxhd_mmx.c

libavcodec/x86/dsputil_h264_template_mmx.c

libavcodec/x86/dsputil_h264_template_ssse3.c

libavcodec/x86/dsputil_mmx.c

libavcodec/x86/dsputil_mmx.h

libavcodec/x86/dsputil_mmx_avg_template.c

libavcodec/x86/dsputil_mmx_qns_template.c

libavcodec/x86/dsputil_mmx_rnd_template.c

libavcodec/x86/dsputil_yasm.asm

libavcodec/x86/dsputilenc_mmx.c

libavcodec/x86/fdct_mmx.c

libavcodec/x86/fft_3dn.c

libavcodec/x86/fft_3dn2.c

libavcodec/x86/fft_mmx.asm

libavcodec/x86/fft_sse.c

libavcodec/x86/flacdsp_mmx.c

libavcodec/x86/h264_deblock_sse2.asm

libavcodec/x86/h264_i386.h

libavcodec/x86/h264_idct_sse2.asm

libavcodec/x86/h264dsp_mmx.c

libavcodec/x86/idct_mmx.c

libavcodec/x86/idct_mmx_xvid.c

libavcodec/x86/idct_sse2_xvid.c

libavcodec/x86/idct_xvid.h

libavcodec/x86/mathops.h

libavcodec/x86/mmx.h

libavcodec/x86/motion_est_mmx.c

libavcodec/x86/mpegvideo_mmx.c

libavcodec/x86/mpegvideo_mmx_template.c

libavcodec/x86/rv40dsp_mmx.c

libavcodec/x86/simple_idct_mmx.c

libavcodec/x86/snowdsp_mmx.c

libavcodec/x86/vc1dsp_mmx.c

libavcodec/x86/vp3dsp_mmx.c

libavcodec/x86/vp3dsp_mmx.h

libavcodec/x86/vp3dsp_sse2.c

libavcodec/x86/vp3dsp_sse2.h

libavcodec/x86/x86inc.asm

libavcodec/x86/x86util.asm

libavformat/id3v2.c

libavformat/id3v2.h

libavformat/iss.c

libavformat/metadata.c

libavformat/metadata.h

libavformat/metadata_compat.c

libavformat/options.c

libavutil/timer.h

libswscale/.svnrevision

files removed:
debian/patches/050_fix_pkgconfig_files.patch

libavcodec/armv4l

libavcodec/armv4l/asm.S

libavcodec/armv4l/dsputil_arm.c

libavcodec/armv4l/dsputil_arm_s.S

libavcodec/armv4l/dsputil_iwmmxt.c

libavcodec/armv4l/dsputil_iwmmxt_rnd_template.c

libavcodec/armv4l/dsputil_vfp.S

libavcodec/armv4l/float_arm_vfp.c

libavcodec/armv4l/jrevdct_arm.S

libavcodec/armv4l/mathops.h

libavcodec/armv4l/mpegvideo_arm.c

libavcodec/armv4l/mpegvideo_armv5te.c

libavcodec/armv4l/mpegvideo_iwmmxt.c

libavcodec/armv4l/simple_idct_arm.S

libavcodec/armv4l/simple_idct_armv5te.S

libavcodec/armv4l/simple_idct_armv6.S

libavcodec/i386

libavcodec/i386/cavsdsp_mmx.c

libavcodec/i386/cpuid.c

libavcodec/i386/dsputil_h264_template_mmx.c

libavcodec/i386/dsputil_h264_template_ssse3.c

libavcodec/i386/dsputil_mmx.c

libavcodec/i386/dsputil_mmx.h

libavcodec/i386/dsputil_mmx_avg_template.c

libavcodec/i386/dsputil_mmx_qns_template.c

libavcodec/i386/dsputil_mmx_rnd_template.c

libavcodec/i386/dsputil_yasm.asm

libavcodec/i386/dsputilenc_mmx.c

libavcodec/i386/fdct_mmx.c

libavcodec/i386/fft_3dn.c

libavcodec/i386/fft_3dn2.c

libavcodec/i386/fft_mmx.asm

libavcodec/i386/fft_sse.c

libavcodec/i386/flacdsp_mmx.c

libavcodec/i386/h264_i386.h

libavcodec/i386/h264dsp_mmx.c

libavcodec/i386/idct_mmx.c

libavcodec/i386/idct_mmx_xvid.c

libavcodec/i386/idct_sse2_xvid.c

libavcodec/i386/idct_xvid.h

libavcodec/i386/mathops.h

libavcodec/i386/mmx.h

libavcodec/i386/motion_est_mmx.c

libavcodec/i386/mpegvideo_mmx.c

libavcodec/i386/mpegvideo_mmx_template.c

libavcodec/i386/simple_idct_mmx.c

libavcodec/i386/snowdsp_mmx.c

libavcodec/i386/vc1dsp_mmx.c

libavcodec/i386/vp3dsp_mmx.c

libavcodec/i386/vp3dsp_mmx.h

libavcodec/i386/vp3dsp_sse2.c

libavcodec/i386/vp3dsp_sse2.h

libavcodec/i386/x86inc.asm

files modified:
Changelog

MAINTAINERS

Makefile

README

cmdutils.c

cmdutils.h

common.mak

configure

debian/changelog

debian/confflags

debian/control

debian/control.common

debian/control.ffmpeg-debian

debian/fixup-config.sh

debian/get-orig-source.sh

debian/patches/series

debian/rules

doc/faq.texi

doc/ffmpeg-doc.texi

doc/general.texi

doc/issue_tracker.txt

doc/optimization.txt

ffmpeg.c

ffplay.c

ffpresets/libx264-fastfirstpass.ffpreset

ffpresets/libx264-hq.ffpreset

ffpresets/libx264-max.ffpreset

ffpresets/libx264-normal.ffpreset

ffserver.c

libavcodec/4xm.c

libavcodec/8bps.c

libavcodec/Makefile

libavcodec/aac.c

libavcodec/aac.h

libavcodec/aac_ac3_parser.h

libavcodec/aac_parser.c

libavcodec/aactab.c

libavcodec/aactab.h

libavcodec/aasc.c

libavcodec/ac3.c

libavcodec/ac3.h

libavcodec/ac3_parser.c

libavcodec/ac3_parser.h

libavcodec/ac3dec.c

libavcodec/acelp_pitch_delay.c

libavcodec/acelp_pitch_delay.h

libavcodec/adpcm.c

libavcodec/adxdec.c

libavcodec/adxenc.c

libavcodec/alacenc.c

libavcodec/allcodecs.c

libavcodec/alpha/asm.h

libavcodec/alpha/dsputil_alpha.c

libavcodec/alpha/dsputil_alpha_asm.S

libavcodec/alpha/motion_est_mvi_asm.S

libavcodec/alpha/mpegvideo_alpha.c

libavcodec/alpha/simple_idct_alpha.c

libavcodec/apedec.c

libavcodec/asv1.c

libavcodec/audioconvert.c

libavcodec/avcodec.h

libavcodec/avs.c

libavcodec/bfin/dsputil_bfin.c

libavcodec/bfin/mathops.h

libavcodec/bfin/mpegvideo_bfin.c

libavcodec/bfin/vp3_bfin.c

libavcodec/bitstream.c

libavcodec/bitstream.h

libavcodec/bmp.c

libavcodec/bytestream.h

libavcodec/cabac.h

libavcodec/cavs.c

libavcodec/cavs.h

libavcodec/cavsdata.h

libavcodec/cavsdec.c

libavcodec/celp_math.h

libavcodec/cinepak.c

libavcodec/cljr.c

libavcodec/cook.c

libavcodec/cscd.c

libavcodec/dca.c

libavcodec/dca.h

libavcodec/dca_parser.c

libavcodec/dct-test.c

libavcodec/dirac_parser.c

libavcodec/dnxhddata.c

libavcodec/dnxhddec.c

libavcodec/dnxhdenc.c

libavcodec/dpcm.c

libavcodec/dsputil.c

libavcodec/dsputil.h

libavcodec/dv.c

libavcodec/dvbsub.c

libavcodec/dvbsubdec.c

libavcodec/dvdata.h

libavcodec/dvdsub_parser.c

libavcodec/dvdsubdec.c

libavcodec/dvdsubenc.c

libavcodec/dxa.c

libavcodec/eac3dec.c

libavcodec/eacmv.c

libavcodec/eatgq.c

libavcodec/eatgv.c

libavcodec/error_resilience.c

libavcodec/eval.c

libavcodec/eval.h

libavcodec/fft.c

libavcodec/ffv1.c

libavcodec/flacenc.c

libavcodec/flashsv.c

libavcodec/flicvideo.c

libavcodec/fraps.c

libavcodec/g726.c

libavcodec/gif.c

libavcodec/golomb.c

libavcodec/golomb.h

libavcodec/h263.c

libavcodec/h263.h

libavcodec/h263dec.c

libavcodec/h264.c

libavcodec/h264.h

libavcodec/h264_mp4toannexb_bsf.c

libavcodec/h264_parser.c

libavcodec/h264data.h

libavcodec/h264enc.c

libavcodec/h264idct.c

libavcodec/h264pred.c

libavcodec/h264pred.h

libavcodec/huffman.c

libavcodec/huffman.h

libavcodec/huffyuv.c

libavcodec/idcinvideo.c

libavcodec/imc.c

libavcodec/imgconvert.c

libavcodec/imgconvert.h

libavcodec/imgconvert_template.c

libavcodec/imgresample.c

libavcodec/indeo3.c

libavcodec/intrax8.c

libavcodec/jpeglsdec.c

libavcodec/jpeglsenc.c

libavcodec/lcldec.c

libavcodec/lclenc.c

libavcodec/libamr.c

libavcodec/libdirac.h

libavcodec/libdiracdec.c

libavcodec/libfaad.c

libavcodec/libmp3lame.c

libavcodec/libschroedinger.h

libavcodec/libschroedingerdec.c

libavcodec/libtheoraenc.c

libavcodec/libxvidff.c

libavcodec/loco.c

libavcodec/lsp.c

libavcodec/mathops.h

libavcodec/mdct.c

libavcodec/mimic.c

libavcodec/mjpega_dump_header_bsf.c

libavcodec/mjpegdec.c

libavcodec/mjpegdec.h

libavcodec/mlp.c

libavcodec/mlp.h

libavcodec/mlp_parser.c

libavcodec/mlpdec.c

libavcodec/mmvideo.c

libavcodec/motion_est.c

libavcodec/movsub_bsf.c

libavcodec/mpc.c

libavcodec/mpc.h

libavcodec/mpc7.c

libavcodec/mpc8.c

libavcodec/mpeg12.c

libavcodec/mpegaudio.h

libavcodec/mpegaudio_parser.c

libavcodec/mpegaudiodec.c

libavcodec/mpegaudioenc.c

libavcodec/mpegvideo.c

libavcodec/mpegvideo.h

libavcodec/mpegvideo_common.h

libavcodec/mpegvideo_enc.c

libavcodec/msmpeg4.c

libavcodec/msmpeg4.h

libavcodec/msmpeg4data.c

libavcodec/msrledec.c

libavcodec/msvideo1.c

libavcodec/nellymoserdec.c

libavcodec/nellymoserenc.c

libavcodec/opt.c

libavcodec/opt.h

libavcodec/pcm.c

libavcodec/png.h

libavcodec/pnmenc.c

libavcodec/ppc/dsputil_altivec.c

libavcodec/ppc/dsputil_altivec.h

libavcodec/ppc/dsputil_ppc.c

libavcodec/ppc/dsputil_ppc.h

libavcodec/ppc/fft_altivec.c

libavcodec/ppc/float_altivec.c

libavcodec/ppc/gcc_fixes.h

libavcodec/ppc/h264_altivec.c

libavcodec/ppc/h264_template_altivec.c

libavcodec/ppc/idct_altivec.c

libavcodec/ppc/imgresample_altivec.c

libavcodec/ppc/int_altivec.c

libavcodec/ppc/mathops.h

libavcodec/ppc/mpegvideo_altivec.c

libavcodec/ppc/types_altivec.h

libavcodec/ppc/util_altivec.h

libavcodec/pthread.c

libavcodec/ptx.c

libavcodec/qcelpdata.h

libavcodec/qcelpdec.c

libavcodec/qdm2.c

libavcodec/qdrw.c

libavcodec/qtrle.c

libavcodec/ratecontrol.c

libavcodec/raw.c

libavcodec/rectangle.h

libavcodec/rl2.c

libavcodec/roqaudioenc.c

libavcodec/roqvideoenc.c

libavcodec/rpza.c

libavcodec/rv10.c

libavcodec/rv30.c

libavcodec/rv30data.h

libavcodec/rv30dsp.c

libavcodec/rv34.c

libavcodec/rv34.h

libavcodec/rv34data.h

libavcodec/rv40.c

libavcodec/s3tc.c

libavcodec/sh4/dsputil_align.c

libavcodec/sh4/dsputil_sh4.c

libavcodec/sh4/idct_sh4.c

libavcodec/sh4/qpel.c

libavcodec/shorten.c

libavcodec/simple_idct.c

libavcodec/smacker.c

libavcodec/smc.c

libavcodec/snow.c

libavcodec/snow.h

libavcodec/sonic.c

libavcodec/sparc/dsputil_vis.c

libavcodec/sparc/simple_idct_vis.c

libavcodec/sunrast.c

libavcodec/svq1.c

libavcodec/svq1.h

libavcodec/svq1dec.c

libavcodec/svq1enc.c

libavcodec/svq3.c

libavcodec/targa.c

libavcodec/targaenc.c

libavcodec/tiff.c

libavcodec/tiff.h

libavcodec/tiffenc.c

libavcodec/truespeech.c

libavcodec/tta.c

libavcodec/txd.c

libavcodec/utils.c

libavcodec/vc1.c

libavcodec/vc1.h

libavcodec/vcr1.c

libavcodec/vmdav.c

libavcodec/vmnc.c

libavcodec/vorbis.c

libavcodec/vorbis.h

libavcodec/vorbis_dec.c

libavcodec/vorbis_enc.c

libavcodec/vp3.c

libavcodec/vp5.c

libavcodec/vp56.c

libavcodec/vp56.h

libavcodec/vp56data.c

libavcodec/vp56data.h

libavcodec/vp6.c

libavcodec/vp6data.h

libavcodec/vqavideo.c

libavcodec/wavpack.c

libavcodec/wmv2.c

libavcodec/wmv2dec.c

libavcodec/ws-snd1.c

libavcodec/xan.c

libavcodec/xiph.c

libavcodec/xl.c

libavcodec/xsubdec.c

libavcodec/xvmc_render.h

libavcodec/xvmcvideo.c

libavcodec/zmbv.c

libavcodec/zmbvenc.c

libavdevice/alldevices.c

libavdevice/audio.c

libavdevice/beosaudio.cpp

libavdevice/bktr.c

libavdevice/libdc1394.c

libavdevice/v4l.c

libavdevice/v4l2.c

libavdevice/vfwcap.c

libavdevice/x11grab.c

libavfilter/Makefile

libavfilter/allfilters.c

libavfilter/avfilter.c

libavfilter/avfilter.h

libavfilter/avfiltergraph.h

libavfilter/defaults.c

libavfilter/graphparser.c

libavfilter/graphparser.h

libavformat/4xm.c

libavformat/Makefile

libavformat/aiff.c

libavformat/allformats.c

libavformat/amr.c

libavformat/ape.c

libavformat/asf-enc.c

libavformat/asf.c

libavformat/asf.h

libavformat/au.c

libavformat/avc.c

libavformat/avc.h

libavformat/avformat.h

libavformat/avidec.c

libavformat/avienc.c

libavformat/avio.h

libavformat/aviobuf.c

libavformat/avs.c

libavformat/bethsoftvid.c

libavformat/bfi.c

libavformat/c93.c

libavformat/daud.c

libavformat/dsicin.c

libavformat/dv.c

libavformat/dv.h

libavformat/dvenc.c

libavformat/dxa.c

libavformat/electronicarts.c

libavformat/ffmdec.c

libavformat/ffmenc.c

libavformat/file.c

libavformat/flic.c

libavformat/flvdec.c

libavformat/flvenc.c

libavformat/framehook.c

libavformat/framehook.h

libavformat/gif.c

libavformat/gxf.c

libavformat/gxf.h

libavformat/gxfenc.c

libavformat/idcin.c

libavformat/idroq.c

libavformat/iff.c

libavformat/img2.c

libavformat/internal.h

libavformat/ipmovie.c

libavformat/isom.c

libavformat/isom.h

libavformat/libnut.c

libavformat/lmlm4.c

libavformat/matroska.c

libavformat/matroskadec.c

libavformat/matroskaenc.c

libavformat/mm.c

libavformat/mmf.c

libavformat/mov.c

libavformat/movenc.c

libavformat/mp3.c

libavformat/mpegenc.c

libavformat/mpegts.c

libavformat/mpegtsenc.c

libavformat/mtv.c

libavformat/mxf.h

libavformat/mxfdec.c

libavformat/network.h

libavformat/nut.c

libavformat/nut.h

libavformat/nutdec.c

libavformat/nutenc.c

libavformat/nuv.c

libavformat/oggdec.h

libavformat/oggparseflac.c

libavformat/oggparsevorbis.c

libavformat/os_support.c

libavformat/os_support.h

libavformat/psxstr.c

libavformat/raw.c

libavformat/rdt.c

libavformat/rdt.h

libavformat/riff.c

libavformat/riff.h

libavformat/rl2.c

libavformat/rm.h

libavformat/rmdec.c

libavformat/rmenc.c

libavformat/rtp.h

libavformat/rtp_h264.c

libavformat/rtp_internal.h

libavformat/rtpdec.c

libavformat/rtpproto.c

libavformat/rtsp.c

libavformat/rtsp.h

libavformat/sdp.c

libavformat/segafilm.c

libavformat/sierravmd.c

libavformat/siff.c

libavformat/smacker.c

libavformat/swfdec.c

libavformat/swfenc.c

libavformat/tcp.c

libavformat/thp.c

libavformat/tta.c

libavformat/txd.c

libavformat/udp.c

libavformat/utils.c

libavformat/vc1test.c

libavformat/voc.h

libavformat/vocdec.c

libavformat/vocenc.c

libavformat/wav.c

libavformat/wc3movie.c

libavformat/westwood.c

libavformat/wv.c

libavformat/xa.c

libavformat/yuv4mpeg.c

libavutil/Makefile

libavutil/adler32.c

libavutil/aes.c

libavutil/arm/bswap.h

libavutil/avutil.h

libavutil/bswap.h

libavutil/common.h

libavutil/crc.c

libavutil/des.c

libavutil/fifo.h

libavutil/internal.h

libavutil/intreadwrite.h

libavutil/log.h

libavutil/mathematics.c

libavutil/mathematics.h

libavutil/md5.c

libavutil/mem.c

libavutil/mem.h

libavutil/pca.c

libavutil/random.c

libavutil/random.h

libavutil/rational.c

libavutil/sha1.c

libavutil/tree.c

libavutil/x86/bswap.h

libavutil/x86_cpu.h

libpostproc/postprocess.c

libpostproc/postprocess.h

libpostproc/postprocess_template.c

libswscale/Makefile

libswscale/rgb2rgb.c

libswscale/rgb2rgb.h

libswscale/rgb2rgb_template.c

libswscale/swscale-example.c

libswscale/swscale.c

libswscale/swscale_bfin.c

libswscale/swscale_internal.h

libswscale/swscale_template.c

libswscale/yuv2rgb.c

libswscale/yuv2rgb_altivec.c

libswscale/yuv2rgb_bfin.c

libswscale/yuv2rgb_template.c

subdir.mak

tests/ffmpeg.regression.ref

tests/libav.regression.ref

tests/regression.sh

tests/rotozoom.regression.ref

tests/seek.regression.ref

tests/seek_test.sh

Show diffs side-by-side

added added

removed removed

libavcodec/x86/vc1dsp_mmx.c

* VC-1 and WMV3 - DSP functions MMX-optimized

* Permission is hereby granted, free of charge, to any person

* obtaining a copy of this software and associated documentation

* files (the "Software"), to deal in the Software without

* restriction, including without limitation the rights to use,

* copy, modify, merge, publish, distribute, sublicense, and/or sell

* copies of the Software, and to permit persons to whom the

* Software is furnished to do so, subject to the following

* conditions:

* The above copyright notice and this permission notice shall be

* included in all copies or substantial portions of the Software.

* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,

* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES

* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND

* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT

* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,

* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING

* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR

* OTHER DEALINGS IN THE SOFTWARE.

#include "libavutil/x86_cpu.h"

#include "libavcodec/dsputil.h"

#include "dsputil_mmx.h"

/** Add rounder from mm7 to mm3 and pack result at destination */

#define NORMALIZE_MMX(SHIFT) \

"paddw %%mm7, %%mm3 \n\t" /* +bias-r */ \

"paddw %%mm7, %%mm4 \n\t" /* +bias-r */ \

"psraw "SHIFT", %%mm3 \n\t" \

"psraw "SHIFT", %%mm4 \n\t"

#define TRANSFER_DO_PACK \

"packuswb %%mm4, %%mm3 \n\t" \

"movq %%mm3, (%2) \n\t"

#define TRANSFER_DONT_PACK \

"movq %%mm3, 0(%2) \n\t" \

"movq %%mm4, 8(%2) \n\t"

/** @see MSPEL_FILTER13_CORE for use as UNPACK macro */

#define DO_UNPACK(reg) "punpcklbw %%mm0, " reg "\n\t"

#define DONT_UNPACK(reg)

/** Compute the rounder 32-r or 8-r and unpacks it to mm7 */

#define LOAD_ROUNDER_MMX(ROUND) \

"movd "ROUND", %%mm7 \n\t" \

"punpcklwd %%mm7, %%mm7 \n\t" \

"punpckldq %%mm7, %%mm7 \n\t"

#define SHIFT2_LINE(OFF, R0,R1,R2,R3) \

"paddw %%mm"#R2", %%mm"#R1" \n\t" \

"movd (%0,%3), %%mm"#R0" \n\t" \

"pmullw %%mm6, %%mm"#R1" \n\t" \

"punpcklbw %%mm0, %%mm"#R0" \n\t" \

"movd (%0,%2), %%mm"#R3" \n\t" \

"psubw %%mm"#R0", %%mm"#R1" \n\t" \

"punpcklbw %%mm0, %%mm"#R3" \n\t" \

"paddw %%mm7, %%mm"#R1" \n\t" \

"psubw %%mm"#R3", %%mm"#R1" \n\t" \

"psraw %4, %%mm"#R1" \n\t" \

"movq %%mm"#R1", "#OFF"(%1) \n\t" \

"add %2, %0 \n\t"

DECLARE_ALIGNED_16(const uint64_t, ff_pw_9) = 0x0009000900090009ULL;

/** Sacrifying mm6 allows to pipeline loads from src */

static void vc1_put_ver_16b_shift2_mmx(int16_t *dst,

const uint8_t *src, x86_reg stride,

int rnd, int64_t shift)

{

__asm__ volatile(

"mov $3, %%"REG_c" \n\t"

LOAD_ROUNDER_MMX("%5")

"movq "MANGLE(ff_pw_9)", %%mm6 \n\t"

"1: \n\t"

"movd (%0), %%mm2 \n\t"

"add %2, %0 \n\t"

"movd (%0), %%mm3 \n\t"

"punpcklbw %%mm0, %%mm2 \n\t"

"punpcklbw %%mm0, %%mm3 \n\t"

SHIFT2_LINE( 0, 1, 2, 3, 4)

SHIFT2_LINE( 24, 2, 3, 4, 1)

SHIFT2_LINE( 48, 3, 4, 1, 2)

SHIFT2_LINE( 72, 4, 1, 2, 3)

SHIFT2_LINE( 96, 1, 2, 3, 4)

SHIFT2_LINE(120, 2, 3, 4, 1)

SHIFT2_LINE(144, 3, 4, 1, 2)

SHIFT2_LINE(168, 4, 1, 2, 3)

"sub %6, %0 \n\t"

"add $8, %1 \n\t"

"dec %%"REG_c" \n\t"

"jnz 1b \n\t"

: "+r"(src), "+r"(dst)

100

: "r"(stride), "r"(-2*stride),

101

"m"(shift), "m"(rnd), "r"(9*stride-4)

102

: "%"REG_c, "memory"

103

);

104

}

105

106

/**

107

* Data is already unpacked, so some operations can directly be made from

108

* memory.

109

110

static void vc1_put_hor_16b_shift2_mmx(uint8_t *dst, x86_reg stride,

111

const int16_t *src, int rnd)

112

{

113

int h = 8;

114

115

src -= 1;

116

rnd -= (-1+9+9-1)*1024; /* Add -1024 bias */

117

__asm__ volatile(

118

LOAD_ROUNDER_MMX("%4")

119

"movq "MANGLE(ff_pw_128)", %%mm6\n\t"

120

"movq "MANGLE(ff_pw_9)", %%mm5 \n\t"

121

"1: \n\t"

122

"movq 2*0+0(%1), %%mm1 \n\t"

123

"movq 2*0+8(%1), %%mm2 \n\t"

124

"movq 2*1+0(%1), %%mm3 \n\t"

125

"movq 2*1+8(%1), %%mm4 \n\t"

126

"paddw 2*3+0(%1), %%mm1 \n\t"

127

"paddw 2*3+8(%1), %%mm2 \n\t"

128

"paddw 2*2+0(%1), %%mm3 \n\t"

129

"paddw 2*2+8(%1), %%mm4 \n\t"

130

"pmullw %%mm5, %%mm3 \n\t"

131

"pmullw %%mm5, %%mm4 \n\t"

132

"psubw %%mm1, %%mm3 \n\t"

133

"psubw %%mm2, %%mm4 \n\t"

134

NORMALIZE_MMX("$7")

135

/* Remove bias */

136

"paddw %%mm6, %%mm3 \n\t"

137

"paddw %%mm6, %%mm4 \n\t"

138

TRANSFER_DO_PACK

139

"add $24, %1 \n\t"

140

"add %3, %2 \n\t"

141

"decl %0 \n\t"

142

"jnz 1b \n\t"

143

: "+r"(h), "+r" (src), "+r" (dst)

144

: "r"(stride), "m"(rnd)

145

: "memory"

146

);

147

}

148

149

150

/**

151

* Purely vertical or horizontal 1/2 shift interpolation.

152

* Sacrify mm6 for *9 factor.

153

154

static void vc1_put_shift2_mmx(uint8_t *dst, const uint8_t *src,

155

x86_reg stride, int rnd, x86_reg offset)

156

{

157

rnd = 8-rnd;

158

__asm__ volatile(

159

"mov $8, %%"REG_c" \n\t"

160

LOAD_ROUNDER_MMX("%5")

161

"movq "MANGLE(ff_pw_9)", %%mm6\n\t"

162

"1: \n\t"

163

"movd 0(%0 ), %%mm3 \n\t"

164

"movd 4(%0 ), %%mm4 \n\t"

165

"movd 0(%0,%2), %%mm1 \n\t"

166

"movd 4(%0,%2), %%mm2 \n\t"

167

"add %2, %0 \n\t"

168

"punpcklbw %%mm0, %%mm3 \n\t"

169

"punpcklbw %%mm0, %%mm4 \n\t"

170

"punpcklbw %%mm0, %%mm1 \n\t"

171

"punpcklbw %%mm0, %%mm2 \n\t"

172

"paddw %%mm1, %%mm3 \n\t"

173

"paddw %%mm2, %%mm4 \n\t"

174

"movd 0(%0,%3), %%mm1 \n\t"

175

"movd 4(%0,%3), %%mm2 \n\t"

176

"pmullw %%mm6, %%mm3 \n\t" /* 0,9,9,0*/

177

"pmullw %%mm6, %%mm4 \n\t" /* 0,9,9,0*/

178

"punpcklbw %%mm0, %%mm1 \n\t"

179

"punpcklbw %%mm0, %%mm2 \n\t"

180

"psubw %%mm1, %%mm3 \n\t" /*-1,9,9,0*/

181

"psubw %%mm2, %%mm4 \n\t" /*-1,9,9,0*/

182

"movd 0(%0,%2), %%mm1 \n\t"

183

"movd 4(%0,%2), %%mm2 \n\t"

184

"punpcklbw %%mm0, %%mm1 \n\t"

185

"punpcklbw %%mm0, %%mm2 \n\t"

186

"psubw %%mm1, %%mm3 \n\t" /*-1,9,9,-1*/

187

"psubw %%mm2, %%mm4 \n\t" /*-1,9,9,-1*/

188

NORMALIZE_MMX("$4")

189

"packuswb %%mm4, %%mm3 \n\t"

190

"movq %%mm3, (%1) \n\t"

191

"add %6, %0 \n\t"

192

"add %4, %1 \n\t"

193

"dec %%"REG_c" \n\t"

194

"jnz 1b \n\t"

195

: "+r"(src), "+r"(dst)

196

: "r"(offset), "r"(-2*offset), "g"(stride), "m"(rnd),

197

"g"(stride-offset)

198

: "%"REG_c, "memory"

199

);

200

}

201

202

/**

203

* Filter coefficients made global to allow access by all 1 or 3 quarter shift

204

* interpolation functions.

205

206

DECLARE_ASM_CONST(16, uint64_t, ff_pw_53) = 0x0035003500350035ULL;

207

DECLARE_ASM_CONST(16, uint64_t, ff_pw_18) = 0x0012001200120012ULL;

208

209

/**

210

* Core of the 1/4 and 3/4 shift bicubic interpolation.

211

212

* @param UNPACK Macro unpacking arguments from 8 to 16bits (can be empty).

213

* @param MOVQ "movd 1" or "movq 2", if data read is already unpacked.

214

* @param A1 Address of 1st tap (beware of unpacked/packed).

215

* @param A2 Address of 2nd tap

216

* @param A3 Address of 3rd tap

217

* @param A4 Address of 4th tap

218

219

#define MSPEL_FILTER13_CORE(UNPACK, MOVQ, A1, A2, A3, A4) \

220

MOVQ "*0+"A1", %%mm1 \n\t" \

221

MOVQ "*4+"A1", %%mm2 \n\t" \

222

UNPACK("%%mm1") \

223

UNPACK("%%mm2") \

224

"pmullw "MANGLE(ff_pw_3)", %%mm1\n\t" \

225

"pmullw "MANGLE(ff_pw_3)", %%mm2\n\t" \

226

MOVQ "*0+"A2", %%mm3 \n\t" \

227

MOVQ "*4+"A2", %%mm4 \n\t" \

228

UNPACK("%%mm3") \

229

UNPACK("%%mm4") \

230

"pmullw %%mm6, %%mm3 \n\t" /* *18 */ \

231

"pmullw %%mm6, %%mm4 \n\t" /* *18 */ \

232

"psubw %%mm1, %%mm3 \n\t" /* 18,-3 */ \

233

"psubw %%mm2, %%mm4 \n\t" /* 18,-3 */ \

234

MOVQ "*0+"A4", %%mm1 \n\t" \

235

MOVQ "*4+"A4", %%mm2 \n\t" \

236

UNPACK("%%mm1") \

237

UNPACK("%%mm2") \

238

"psllw $2, %%mm1 \n\t" /* 4* */ \

239

"psllw $2, %%mm2 \n\t" /* 4* */ \

240

"psubw %%mm1, %%mm3 \n\t" /* -4,18,-3 */ \

241

"psubw %%mm2, %%mm4 \n\t" /* -4,18,-3 */ \

242

MOVQ "*0+"A3", %%mm1 \n\t" \

243

MOVQ "*4+"A3", %%mm2 \n\t" \

244

UNPACK("%%mm1") \

245

UNPACK("%%mm2") \

246

"pmullw %%mm5, %%mm1 \n\t" /* *53 */ \

247

"pmullw %%mm5, %%mm2 \n\t" /* *53 */ \

248

"paddw %%mm1, %%mm3 \n\t" /* 4,53,18,-3 */ \

249

"paddw %%mm2, %%mm4 \n\t" /* 4,53,18,-3 */

250

251

/**

252

* Macro to build the vertical 16bits version of vc1_put_shift[13].

253

* Here, offset=src_stride. Parameters passed A1 to A4 must use

254

* %3 (src_stride) and %4 (3*src_stride).

255

256

* @param NAME Either 1 or 3

257

* @see MSPEL_FILTER13_CORE for information on A1->A4

258

259

#define MSPEL_FILTER13_VER_16B(NAME, A1, A2, A3, A4) \

260

static void \

261

vc1_put_ver_16b_ ## NAME ## _mmx(int16_t *dst, const uint8_t *src, \

262

x86_reg src_stride, \

263

int rnd, int64_t shift) \

264

{ \

265

int h = 8; \

266

src -= src_stride; \

267

__asm__ volatile( \

268

LOAD_ROUNDER_MMX("%5") \

269

"movq "MANGLE(ff_pw_53)", %%mm5\n\t" \

270

"movq "MANGLE(ff_pw_18)", %%mm6\n\t" \

271

ASMALIGN(3) \

272

"1: \n\t" \

273

MSPEL_FILTER13_CORE(DO_UNPACK, "movd 1", A1, A2, A3, A4) \

274

NORMALIZE_MMX("%6") \

275

TRANSFER_DONT_PACK \

276

/* Last 3 (in fact 4) bytes on the line */ \

277

"movd 8+"A1", %%mm1 \n\t" \

278

DO_UNPACK("%%mm1") \

279

"movq %%mm1, %%mm3 \n\t" \

280

"paddw %%mm1, %%mm1 \n\t" \

281

"paddw %%mm3, %%mm1 \n\t" /* 3* */ \

282

"movd 8+"A2", %%mm3 \n\t" \

283

DO_UNPACK("%%mm3") \

284

"pmullw %%mm6, %%mm3 \n\t" /* *18 */ \

285

"psubw %%mm1, %%mm3 \n\t" /*18,-3 */ \

286

"movd 8+"A3", %%mm1 \n\t" \

287

DO_UNPACK("%%mm1") \

288

"pmullw %%mm5, %%mm1 \n\t" /* *53 */ \

289

"paddw %%mm1, %%mm3 \n\t" /*53,18,-3 */ \

290

"movd 8+"A4", %%mm1 \n\t" \

291

DO_UNPACK("%%mm1") \

292

"psllw $2, %%mm1 \n\t" /* 4* */ \

293

"psubw %%mm1, %%mm3 \n\t" \

294

"paddw %%mm7, %%mm3 \n\t" \

295

"psraw %6, %%mm3 \n\t" \

296

"movq %%mm3, 16(%2) \n\t" \

297

"add %3, %1 \n\t" \

298

"add $24, %2 \n\t" \

299

"decl %0 \n\t" \

300

"jnz 1b \n\t" \

301

: "+r"(h), "+r" (src), "+r" (dst) \

302

: "r"(src_stride), "r"(3*src_stride), \

303

"m"(rnd), "m"(shift) \

304

: "memory" \

305

); \

306

}

307

308

/**

309

* Macro to build the horizontal 16bits version of vc1_put_shift[13].

310

* Here, offset=16bits, so parameters passed A1 to A4 should be simple.

311

312

* @param NAME Either 1 or 3

313

* @see MSPEL_FILTER13_CORE for information on A1->A4

314

315

#define MSPEL_FILTER13_HOR_16B(NAME, A1, A2, A3, A4) \

316

static void \

317

vc1_put_hor_16b_ ## NAME ## _mmx(uint8_t *dst, x86_reg stride, \

318

const int16_t *src, int rnd) \

319

{ \

320

int h = 8; \

321

src -= 1; \

322

rnd -= (-4+58+13-3)*256; /* Add -256 bias */ \

323

__asm__ volatile( \

324

LOAD_ROUNDER_MMX("%4") \

325

"movq "MANGLE(ff_pw_18)", %%mm6 \n\t" \

326

"movq "MANGLE(ff_pw_53)", %%mm5 \n\t" \

327

ASMALIGN(3) \

328

"1: \n\t" \

329

MSPEL_FILTER13_CORE(DONT_UNPACK, "movq 2", A1, A2, A3, A4) \

330

NORMALIZE_MMX("$7") \

331

/* Remove bias */ \

332

"paddw "MANGLE(ff_pw_128)", %%mm3 \n\t" \

333

"paddw "MANGLE(ff_pw_128)", %%mm4 \n\t" \

334

TRANSFER_DO_PACK \

335

"add $24, %1 \n\t" \

336

"add %3, %2 \n\t" \

337

"decl %0 \n\t" \

338

"jnz 1b \n\t" \

339

: "+r"(h), "+r" (src), "+r" (dst) \

340

: "r"(stride), "m"(rnd) \

341

: "memory" \

342

); \

343

}

344

345

/**

346

* Macro to build the 8bits, any direction, version of vc1_put_shift[13].

347

* Here, offset=src_stride. Parameters passed A1 to A4 must use

348

* %3 (offset) and %4 (3*offset).

349

350

* @param NAME Either 1 or 3

351

* @see MSPEL_FILTER13_CORE for information on A1->A4

352

353

#define MSPEL_FILTER13_8B(NAME, A1, A2, A3, A4) \

354

static void \

355

vc1_put_## NAME ## _mmx(uint8_t *dst, const uint8_t *src, \

356

x86_reg stride, int rnd, x86_reg offset) \

357

{ \

358

int h = 8; \

359

src -= offset; \

360

rnd = 32-rnd; \

361

__asm__ volatile ( \

362

LOAD_ROUNDER_MMX("%6") \

363

"movq "MANGLE(ff_pw_53)", %%mm5 \n\t" \

364

"movq "MANGLE(ff_pw_18)", %%mm6 \n\t" \

365

ASMALIGN(3) \

366

"1: \n\t" \

367

MSPEL_FILTER13_CORE(DO_UNPACK, "movd 1", A1, A2, A3, A4) \

368

NORMALIZE_MMX("$6") \

369

TRANSFER_DO_PACK \

370

"add %5, %1 \n\t" \

371

"add %5, %2 \n\t" \

372

"decl %0 \n\t" \

373

"jnz 1b \n\t" \

374

: "+r"(h), "+r" (src), "+r" (dst) \

375

: "r"(offset), "r"(3*offset), "g"(stride), "m"(rnd) \

376

: "memory" \

377

); \

378

}

379

380

/** 1/4 shift bicubic interpolation */

381

MSPEL_FILTER13_8B (shift1, "0(%1,%4 )", "0(%1,%3,2)", "0(%1,%3 )", "0(%1 )")

382

MSPEL_FILTER13_VER_16B(shift1, "0(%1,%4 )", "0(%1,%3,2)", "0(%1,%3 )", "0(%1 )")

383

MSPEL_FILTER13_HOR_16B(shift1, "2*3(%1)", "2*2(%1)", "2*1(%1)", "2*0(%1)")

384

385

/** 3/4 shift bicubic interpolation */

386

MSPEL_FILTER13_8B (shift3, "0(%1 )", "0(%1,%3 )", "0(%1,%3,2)", "0(%1,%4 )")

387

MSPEL_FILTER13_VER_16B(shift3, "0(%1 )", "0(%1,%3 )", "0(%1,%3,2)", "0(%1,%4 )")

388

MSPEL_FILTER13_HOR_16B(shift3, "2*0(%1)", "2*1(%1)", "2*2(%1)", "2*3(%1)")

389

390

typedef void (*vc1_mspel_mc_filter_ver_16bits)(int16_t *dst, const uint8_t *src, x86_reg src_stride, int rnd, int64_t shift);

391

typedef void (*vc1_mspel_mc_filter_hor_16bits)(uint8_t *dst, x86_reg dst_stride, const int16_t *src, int rnd);

392

typedef void (*vc1_mspel_mc_filter_8bits)(uint8_t *dst, const uint8_t *src, x86_reg stride, int rnd, x86_reg offset);

393

394

/**

395

* Interpolates fractional pel values by applying proper vertical then

396

* horizontal filter.

397

398

* @param dst Destination buffer for interpolated pels.

399

* @param src Source buffer.

400

* @param stride Stride for both src and dst buffers.

401

* @param hmode Horizontal filter (expressed in quarter pixels shift).

402

* @param hmode Vertical filter.

403

* @param rnd Rounding bias.

404

405

static void vc1_mspel_mc(uint8_t *dst, const uint8_t *src, int stride,

406

int hmode, int vmode, int rnd)

407

{

408

static const vc1_mspel_mc_filter_ver_16bits vc1_put_shift_ver_16bits[] =

409

{ NULL, vc1_put_ver_16b_shift1_mmx, vc1_put_ver_16b_shift2_mmx, vc1_put_ver_16b_shift3_mmx };

410

static const vc1_mspel_mc_filter_hor_16bits vc1_put_shift_hor_16bits[] =

411

{ NULL, vc1_put_hor_16b_shift1_mmx, vc1_put_hor_16b_shift2_mmx, vc1_put_hor_16b_shift3_mmx };

412

static const vc1_mspel_mc_filter_8bits vc1_put_shift_8bits[] =

413

{ NULL, vc1_put_shift1_mmx, vc1_put_shift2_mmx, vc1_put_shift3_mmx };

414

415

__asm__ volatile(

416

"pxor %%mm0, %%mm0 \n\t"

417

::: "memory"

418

);

419

420

if (vmode) { /* Vertical filter to apply */

421

if (hmode) { /* Horizontal filter to apply, output to tmp */

422

static const int shift_value[] = { 0, 5, 1, 5 };

423

int shift = (shift_value[hmode]+shift_value[vmode])>>1;

424

int r;

425

DECLARE_ALIGNED_16(int16_t, tmp[12*8]);

426

427

r = (1<<(shift-1)) + rnd-1;

428

vc1_put_shift_ver_16bits[vmode](tmp, src-1, stride, r, shift);

429

430

vc1_put_shift_hor_16bits[hmode](dst, stride, tmp+1, 64-rnd);

431

return;

432

}

433

else { /* No horizontal filter, output 8 lines to dst */

434

vc1_put_shift_8bits[vmode](dst, src, stride, 1-rnd, stride);

435

return;

436

}

437

}

438

439

/* Horizontal mode with no vertical mode */

440

vc1_put_shift_8bits[hmode](dst, src, stride, rnd, 1);

441

}

442

443

void ff_put_vc1_mspel_mc00_mmx(uint8_t *dst, const uint8_t *src, int stride, int rnd);

444

445

/** Macro to ease bicubic filter interpolation functions declarations */

446

#define DECLARE_FUNCTION(a, b) \

447

static void put_vc1_mspel_mc ## a ## b ## _mmx(uint8_t *dst, const uint8_t *src, int stride, int rnd) { \

448

vc1_mspel_mc(dst, src, stride, a, b, rnd); \

449

}

450

451

DECLARE_FUNCTION(0, 1)

452

DECLARE_FUNCTION(0, 2)

453

DECLARE_FUNCTION(0, 3)

454

455

DECLARE_FUNCTION(1, 0)

456

DECLARE_FUNCTION(1, 1)

457

DECLARE_FUNCTION(1, 2)

458

DECLARE_FUNCTION(1, 3)

459

460

DECLARE_FUNCTION(2, 0)

461

DECLARE_FUNCTION(2, 1)

462

DECLARE_FUNCTION(2, 2)

463

DECLARE_FUNCTION(2, 3)

464

465

DECLARE_FUNCTION(3, 0)

466

DECLARE_FUNCTION(3, 1)

467

DECLARE_FUNCTION(3, 2)

468

DECLARE_FUNCTION(3, 3)

469

470

void ff_vc1dsp_init_mmx(DSPContext* dsp, AVCodecContext *avctx) {

471

dsp->put_vc1_mspel_pixels_tab[ 0] = ff_put_vc1_mspel_mc00_mmx;

472

dsp->put_vc1_mspel_pixels_tab[ 4] = put_vc1_mspel_mc01_mmx;

473

dsp->put_vc1_mspel_pixels_tab[ 8] = put_vc1_mspel_mc02_mmx;

474

dsp->put_vc1_mspel_pixels_tab[12] = put_vc1_mspel_mc03_mmx;

475

476

dsp->put_vc1_mspel_pixels_tab[ 1] = put_vc1_mspel_mc10_mmx;

477

dsp->put_vc1_mspel_pixels_tab[ 5] = put_vc1_mspel_mc11_mmx;

478

dsp->put_vc1_mspel_pixels_tab[ 9] = put_vc1_mspel_mc12_mmx;

479

dsp->put_vc1_mspel_pixels_tab[13] = put_vc1_mspel_mc13_mmx;

480

481

dsp->put_vc1_mspel_pixels_tab[ 2] = put_vc1_mspel_mc20_mmx;

482

dsp->put_vc1_mspel_pixels_tab[ 6] = put_vc1_mspel_mc21_mmx;

483

dsp->put_vc1_mspel_pixels_tab[10] = put_vc1_mspel_mc22_mmx;

484

dsp->put_vc1_mspel_pixels_tab[14] = put_vc1_mspel_mc23_mmx;

485

486

dsp->put_vc1_mspel_pixels_tab[ 3] = put_vc1_mspel_mc30_mmx;

487

dsp->put_vc1_mspel_pixels_tab[ 7] = put_vc1_mspel_mc31_mmx;

488

dsp->put_vc1_mspel_pixels_tab[11] = put_vc1_mspel_mc32_mmx;

489

dsp->put_vc1_mspel_pixels_tab[15] = put_vc1_mspel_mc33_mmx;

490

}

Older »