~ubuntu-branches/ubuntu/utopic/ffmpeg-debian/utopic

« back to all changes in this revision

Viewing changes to libavcodec/x86/idct_sse2_xvid.c

Committer: Bazaar Package Importer
Author(s): Reinhard Tartler
Date: 2009-01-20 09:20:53 UTC
mfrom: (1.1.3 upstream)
Revision ID: james.westby@ubuntu.com-20090120092053-izz63p40hc98qfgp

Tags: 3:0.svn20090119-1ubuntu1

https://launchpad.net/bugs/318501

https://launchpad.net/bugs/253767

* merge from debian. LP: #318501
* new version fixes CVE-2008-3230, LP: #253767

files added:
.svnrevision

debian/README.upstream-upgrade

ffpresets/libx264-baseline.ffpreset

ffpresets/libx264-ipod320.ffpreset

ffpresets/libx264-ipod640.ffpreset

ffpresets/libx264-lossless_fast.ffpreset

ffpresets/libx264-lossless_max.ffpreset

ffpresets/libx264-lossless_medium.ffpreset

ffpresets/libx264-lossless_slow.ffpreset

ffpresets/libx264-lossless_slower.ffpreset

ffpresets/libx264-lossless_ultrafast.ffpreset

ffpresets/libx264-main.ffpreset

ffpresets/libx264-slowfirstpass.ffpreset

libavcodec/aac_parser.h

libavcodec/aandcttab.c

libavcodec/aandcttab.h

libavcodec/arm

libavcodec/arm/asm.S

libavcodec/arm/dsputil_arm.c

libavcodec/arm/dsputil_arm_s.S

libavcodec/arm/dsputil_iwmmxt.c

libavcodec/arm/dsputil_iwmmxt_rnd_template.c

libavcodec/arm/dsputil_neon.c

libavcodec/arm/dsputil_neon_s.S

libavcodec/arm/dsputil_vfp.S

libavcodec/arm/float_arm_vfp.c

libavcodec/arm/h264dsp_neon.S

libavcodec/arm/h264idct_neon.S

libavcodec/arm/jrevdct_arm.S

libavcodec/arm/mathops.h

libavcodec/arm/mpegvideo_arm.c

libavcodec/arm/mpegvideo_armv5te.c

libavcodec/arm/mpegvideo_armv5te_s.S

libavcodec/arm/mpegvideo_iwmmxt.c

libavcodec/arm/simple_idct_arm.S

libavcodec/arm/simple_idct_armv5te.S

libavcodec/arm/simple_idct_armv6.S

libavcodec/arm/simple_idct_neon.S

libavcodec/dnxhdenc.h

libavcodec/faxcompr.c

libavcodec/faxcompr.h

libavcodec/internal.h

libavcodec/options.c

libavcodec/qcelp_lsp.c

libavcodec/rv40dsp.c

libavcodec/sh4/sh4.h

libavcodec/vdpau.h

libavcodec/vdpau_internal.h

libavcodec/vdpauvideo.c

libavcodec/x86

libavcodec/x86/cavsdsp_mmx.c

libavcodec/x86/cpuid.c

libavcodec/x86/dnxhd_mmx.c

libavcodec/x86/dsputil_h264_template_mmx.c

libavcodec/x86/dsputil_h264_template_ssse3.c

libavcodec/x86/dsputil_mmx.c

libavcodec/x86/dsputil_mmx.h

libavcodec/x86/dsputil_mmx_avg_template.c

libavcodec/x86/dsputil_mmx_qns_template.c

libavcodec/x86/dsputil_mmx_rnd_template.c

libavcodec/x86/dsputil_yasm.asm

libavcodec/x86/dsputilenc_mmx.c

libavcodec/x86/fdct_mmx.c

libavcodec/x86/fft_3dn.c

libavcodec/x86/fft_3dn2.c

libavcodec/x86/fft_mmx.asm

libavcodec/x86/fft_sse.c

libavcodec/x86/flacdsp_mmx.c

libavcodec/x86/h264_deblock_sse2.asm

libavcodec/x86/h264_i386.h

libavcodec/x86/h264_idct_sse2.asm

libavcodec/x86/h264dsp_mmx.c

libavcodec/x86/idct_mmx.c

libavcodec/x86/idct_mmx_xvid.c

libavcodec/x86/idct_sse2_xvid.c

libavcodec/x86/idct_xvid.h

libavcodec/x86/mathops.h

libavcodec/x86/mmx.h

libavcodec/x86/motion_est_mmx.c

libavcodec/x86/mpegvideo_mmx.c

libavcodec/x86/mpegvideo_mmx_template.c

libavcodec/x86/rv40dsp_mmx.c

libavcodec/x86/simple_idct_mmx.c

libavcodec/x86/snowdsp_mmx.c

libavcodec/x86/vc1dsp_mmx.c

libavcodec/x86/vp3dsp_mmx.c

libavcodec/x86/vp3dsp_mmx.h

libavcodec/x86/vp3dsp_sse2.c

libavcodec/x86/vp3dsp_sse2.h

libavcodec/x86/x86inc.asm

libavcodec/x86/x86util.asm

libavformat/id3v2.c

libavformat/id3v2.h

libavformat/iss.c

libavformat/metadata.c

libavformat/metadata.h

libavformat/metadata_compat.c

libavformat/options.c

libavutil/timer.h

libswscale/.svnrevision

files removed:
debian/patches/050_fix_pkgconfig_files.patch

libavcodec/armv4l

libavcodec/armv4l/asm.S

libavcodec/armv4l/dsputil_arm.c

libavcodec/armv4l/dsputil_arm_s.S

libavcodec/armv4l/dsputil_iwmmxt.c

libavcodec/armv4l/dsputil_iwmmxt_rnd_template.c

libavcodec/armv4l/dsputil_vfp.S

libavcodec/armv4l/float_arm_vfp.c

libavcodec/armv4l/jrevdct_arm.S

libavcodec/armv4l/mathops.h

libavcodec/armv4l/mpegvideo_arm.c

libavcodec/armv4l/mpegvideo_armv5te.c

libavcodec/armv4l/mpegvideo_iwmmxt.c

libavcodec/armv4l/simple_idct_arm.S

libavcodec/armv4l/simple_idct_armv5te.S

libavcodec/armv4l/simple_idct_armv6.S

libavcodec/i386

libavcodec/i386/cavsdsp_mmx.c

libavcodec/i386/cpuid.c

libavcodec/i386/dsputil_h264_template_mmx.c

libavcodec/i386/dsputil_h264_template_ssse3.c

libavcodec/i386/dsputil_mmx.c

libavcodec/i386/dsputil_mmx.h

libavcodec/i386/dsputil_mmx_avg_template.c

libavcodec/i386/dsputil_mmx_qns_template.c

libavcodec/i386/dsputil_mmx_rnd_template.c

libavcodec/i386/dsputil_yasm.asm

libavcodec/i386/dsputilenc_mmx.c

libavcodec/i386/fdct_mmx.c

libavcodec/i386/fft_3dn.c

libavcodec/i386/fft_3dn2.c

libavcodec/i386/fft_mmx.asm

libavcodec/i386/fft_sse.c

libavcodec/i386/flacdsp_mmx.c

libavcodec/i386/h264_i386.h

libavcodec/i386/h264dsp_mmx.c

libavcodec/i386/idct_mmx.c

libavcodec/i386/idct_mmx_xvid.c

libavcodec/i386/idct_sse2_xvid.c

libavcodec/i386/idct_xvid.h

libavcodec/i386/mathops.h

libavcodec/i386/mmx.h

libavcodec/i386/motion_est_mmx.c

libavcodec/i386/mpegvideo_mmx.c

libavcodec/i386/mpegvideo_mmx_template.c

libavcodec/i386/simple_idct_mmx.c

libavcodec/i386/snowdsp_mmx.c

libavcodec/i386/vc1dsp_mmx.c

libavcodec/i386/vp3dsp_mmx.c

libavcodec/i386/vp3dsp_mmx.h

libavcodec/i386/vp3dsp_sse2.c

libavcodec/i386/vp3dsp_sse2.h

libavcodec/i386/x86inc.asm

files modified:
Changelog

MAINTAINERS

Makefile

README

cmdutils.c

cmdutils.h

common.mak

configure

debian/changelog

debian/confflags

debian/control

debian/control.common

debian/control.ffmpeg-debian

debian/fixup-config.sh

debian/get-orig-source.sh

debian/patches/series

debian/rules

doc/faq.texi

doc/ffmpeg-doc.texi

doc/general.texi

doc/issue_tracker.txt

doc/optimization.txt

ffmpeg.c

ffplay.c

ffpresets/libx264-fastfirstpass.ffpreset

ffpresets/libx264-hq.ffpreset

ffpresets/libx264-max.ffpreset

ffpresets/libx264-normal.ffpreset

ffserver.c

libavcodec/4xm.c

libavcodec/8bps.c

libavcodec/Makefile

libavcodec/aac.c

libavcodec/aac.h

libavcodec/aac_ac3_parser.h

libavcodec/aac_parser.c

libavcodec/aactab.c

libavcodec/aactab.h

libavcodec/aasc.c

libavcodec/ac3.c

libavcodec/ac3.h

libavcodec/ac3_parser.c

libavcodec/ac3_parser.h

libavcodec/ac3dec.c

libavcodec/acelp_pitch_delay.c

libavcodec/acelp_pitch_delay.h

libavcodec/adpcm.c

libavcodec/adxdec.c

libavcodec/adxenc.c

libavcodec/alacenc.c

libavcodec/allcodecs.c

libavcodec/alpha/asm.h

libavcodec/alpha/dsputil_alpha.c

libavcodec/alpha/dsputil_alpha_asm.S

libavcodec/alpha/motion_est_mvi_asm.S

libavcodec/alpha/mpegvideo_alpha.c

libavcodec/alpha/simple_idct_alpha.c

libavcodec/apedec.c

libavcodec/asv1.c

libavcodec/audioconvert.c

libavcodec/avcodec.h

libavcodec/avs.c

libavcodec/bfin/dsputil_bfin.c

libavcodec/bfin/mathops.h

libavcodec/bfin/mpegvideo_bfin.c

libavcodec/bfin/vp3_bfin.c

libavcodec/bitstream.c

libavcodec/bitstream.h

libavcodec/bmp.c

libavcodec/bytestream.h

libavcodec/cabac.h

libavcodec/cavs.c

libavcodec/cavs.h

libavcodec/cavsdata.h

libavcodec/cavsdec.c

libavcodec/celp_math.h

libavcodec/cinepak.c

libavcodec/cljr.c

libavcodec/cook.c

libavcodec/cscd.c

libavcodec/dca.c

libavcodec/dca.h

libavcodec/dca_parser.c

libavcodec/dct-test.c

libavcodec/dirac_parser.c

libavcodec/dnxhddata.c

libavcodec/dnxhddec.c

libavcodec/dnxhdenc.c

libavcodec/dpcm.c

libavcodec/dsputil.c

libavcodec/dsputil.h

libavcodec/dv.c

libavcodec/dvbsub.c

libavcodec/dvbsubdec.c

libavcodec/dvdata.h

libavcodec/dvdsub_parser.c

libavcodec/dvdsubdec.c

libavcodec/dvdsubenc.c

libavcodec/dxa.c

libavcodec/eac3dec.c

libavcodec/eacmv.c

libavcodec/eatgq.c

libavcodec/eatgv.c

libavcodec/error_resilience.c

libavcodec/eval.c

libavcodec/eval.h

libavcodec/fft.c

libavcodec/ffv1.c

libavcodec/flacenc.c

libavcodec/flashsv.c

libavcodec/flicvideo.c

libavcodec/fraps.c

libavcodec/g726.c

libavcodec/gif.c

libavcodec/golomb.c

libavcodec/golomb.h

libavcodec/h263.c

libavcodec/h263.h

libavcodec/h263dec.c

libavcodec/h264.c

libavcodec/h264.h

libavcodec/h264_mp4toannexb_bsf.c

libavcodec/h264_parser.c

libavcodec/h264data.h

libavcodec/h264enc.c

libavcodec/h264idct.c

libavcodec/h264pred.c

libavcodec/h264pred.h

libavcodec/huffman.c

libavcodec/huffman.h

libavcodec/huffyuv.c

libavcodec/idcinvideo.c

libavcodec/imc.c

libavcodec/imgconvert.c

libavcodec/imgconvert.h

libavcodec/imgconvert_template.c

libavcodec/imgresample.c

libavcodec/indeo3.c

libavcodec/intrax8.c

libavcodec/jpeglsdec.c

libavcodec/jpeglsenc.c

libavcodec/lcldec.c

libavcodec/lclenc.c

libavcodec/libamr.c

libavcodec/libdirac.h

libavcodec/libdiracdec.c

libavcodec/libfaad.c

libavcodec/libmp3lame.c

libavcodec/libschroedinger.h

libavcodec/libschroedingerdec.c

libavcodec/libtheoraenc.c

libavcodec/libxvidff.c

libavcodec/loco.c

libavcodec/lsp.c

libavcodec/mathops.h

libavcodec/mdct.c

libavcodec/mimic.c

libavcodec/mjpega_dump_header_bsf.c

libavcodec/mjpegdec.c

libavcodec/mjpegdec.h

libavcodec/mlp.c

libavcodec/mlp.h

libavcodec/mlp_parser.c

libavcodec/mlpdec.c

libavcodec/mmvideo.c

libavcodec/motion_est.c

libavcodec/movsub_bsf.c

libavcodec/mpc.c

libavcodec/mpc.h

libavcodec/mpc7.c

libavcodec/mpc8.c

libavcodec/mpeg12.c

libavcodec/mpegaudio.h

libavcodec/mpegaudio_parser.c

libavcodec/mpegaudiodec.c

libavcodec/mpegaudioenc.c

libavcodec/mpegvideo.c

libavcodec/mpegvideo.h

libavcodec/mpegvideo_common.h

libavcodec/mpegvideo_enc.c

libavcodec/msmpeg4.c

libavcodec/msmpeg4.h

libavcodec/msmpeg4data.c

libavcodec/msrledec.c

libavcodec/msvideo1.c

libavcodec/nellymoserdec.c

libavcodec/nellymoserenc.c

libavcodec/opt.c

libavcodec/opt.h

libavcodec/pcm.c

libavcodec/png.h

libavcodec/pnmenc.c

libavcodec/ppc/dsputil_altivec.c

libavcodec/ppc/dsputil_altivec.h

libavcodec/ppc/dsputil_ppc.c

libavcodec/ppc/dsputil_ppc.h

libavcodec/ppc/fft_altivec.c

libavcodec/ppc/float_altivec.c

libavcodec/ppc/gcc_fixes.h

libavcodec/ppc/h264_altivec.c

libavcodec/ppc/h264_template_altivec.c

libavcodec/ppc/idct_altivec.c

libavcodec/ppc/imgresample_altivec.c

libavcodec/ppc/int_altivec.c

libavcodec/ppc/mathops.h

libavcodec/ppc/mpegvideo_altivec.c

libavcodec/ppc/types_altivec.h

libavcodec/ppc/util_altivec.h

libavcodec/pthread.c

libavcodec/ptx.c

libavcodec/qcelpdata.h

libavcodec/qcelpdec.c

libavcodec/qdm2.c

libavcodec/qdrw.c

libavcodec/qtrle.c

libavcodec/ratecontrol.c

libavcodec/raw.c

libavcodec/rectangle.h

libavcodec/rl2.c

libavcodec/roqaudioenc.c

libavcodec/roqvideoenc.c

libavcodec/rpza.c

libavcodec/rv10.c

libavcodec/rv30.c

libavcodec/rv30data.h

libavcodec/rv30dsp.c

libavcodec/rv34.c

libavcodec/rv34.h

libavcodec/rv34data.h

libavcodec/rv40.c

libavcodec/s3tc.c

libavcodec/sh4/dsputil_align.c

libavcodec/sh4/dsputil_sh4.c

libavcodec/sh4/idct_sh4.c

libavcodec/sh4/qpel.c

libavcodec/shorten.c

libavcodec/simple_idct.c

libavcodec/smacker.c

libavcodec/smc.c

libavcodec/snow.c

libavcodec/snow.h

libavcodec/sonic.c

libavcodec/sparc/dsputil_vis.c

libavcodec/sparc/simple_idct_vis.c

libavcodec/sunrast.c

libavcodec/svq1.c

libavcodec/svq1.h

libavcodec/svq1dec.c

libavcodec/svq1enc.c

libavcodec/svq3.c

libavcodec/targa.c

libavcodec/targaenc.c

libavcodec/tiff.c

libavcodec/tiff.h

libavcodec/tiffenc.c

libavcodec/truespeech.c

libavcodec/tta.c

libavcodec/txd.c

libavcodec/utils.c

libavcodec/vc1.c

libavcodec/vc1.h

libavcodec/vcr1.c

libavcodec/vmdav.c

libavcodec/vmnc.c

libavcodec/vorbis.c

libavcodec/vorbis.h

libavcodec/vorbis_dec.c

libavcodec/vorbis_enc.c

libavcodec/vp3.c

libavcodec/vp5.c

libavcodec/vp56.c

libavcodec/vp56.h

libavcodec/vp56data.c

libavcodec/vp56data.h

libavcodec/vp6.c

libavcodec/vp6data.h

libavcodec/vqavideo.c

libavcodec/wavpack.c

libavcodec/wmv2.c

libavcodec/wmv2dec.c

libavcodec/ws-snd1.c

libavcodec/xan.c

libavcodec/xiph.c

libavcodec/xl.c

libavcodec/xsubdec.c

libavcodec/xvmc_render.h

libavcodec/xvmcvideo.c

libavcodec/zmbv.c

libavcodec/zmbvenc.c

libavdevice/alldevices.c

libavdevice/audio.c

libavdevice/beosaudio.cpp

libavdevice/bktr.c

libavdevice/libdc1394.c

libavdevice/v4l.c

libavdevice/v4l2.c

libavdevice/vfwcap.c

libavdevice/x11grab.c

libavfilter/Makefile

libavfilter/allfilters.c

libavfilter/avfilter.c

libavfilter/avfilter.h

libavfilter/avfiltergraph.h

libavfilter/defaults.c

libavfilter/graphparser.c

libavfilter/graphparser.h

libavformat/4xm.c

libavformat/Makefile

libavformat/aiff.c

libavformat/allformats.c

libavformat/amr.c

libavformat/ape.c

libavformat/asf-enc.c

libavformat/asf.c

libavformat/asf.h

libavformat/au.c

libavformat/avc.c

libavformat/avc.h

libavformat/avformat.h

libavformat/avidec.c

libavformat/avienc.c

libavformat/avio.h

libavformat/aviobuf.c

libavformat/avs.c

libavformat/bethsoftvid.c

libavformat/bfi.c

libavformat/c93.c

libavformat/daud.c

libavformat/dsicin.c

libavformat/dv.c

libavformat/dv.h

libavformat/dvenc.c

libavformat/dxa.c

libavformat/electronicarts.c

libavformat/ffmdec.c

libavformat/ffmenc.c

libavformat/file.c

libavformat/flic.c

libavformat/flvdec.c

libavformat/flvenc.c

libavformat/framehook.c

libavformat/framehook.h

libavformat/gif.c

libavformat/gxf.c

libavformat/gxf.h

libavformat/gxfenc.c

libavformat/idcin.c

libavformat/idroq.c

libavformat/iff.c

libavformat/img2.c

libavformat/internal.h

libavformat/ipmovie.c

libavformat/isom.c

libavformat/isom.h

libavformat/libnut.c

libavformat/lmlm4.c

libavformat/matroska.c

libavformat/matroskadec.c

libavformat/matroskaenc.c

libavformat/mm.c

libavformat/mmf.c

libavformat/mov.c

libavformat/movenc.c

libavformat/mp3.c

libavformat/mpegenc.c

libavformat/mpegts.c

libavformat/mpegtsenc.c

libavformat/mtv.c

libavformat/mxf.h

libavformat/mxfdec.c

libavformat/network.h

libavformat/nut.c

libavformat/nut.h

libavformat/nutdec.c

libavformat/nutenc.c

libavformat/nuv.c

libavformat/oggdec.h

libavformat/oggparseflac.c

libavformat/oggparsevorbis.c

libavformat/os_support.c

libavformat/os_support.h

libavformat/psxstr.c

libavformat/raw.c

libavformat/rdt.c

libavformat/rdt.h

libavformat/riff.c

libavformat/riff.h

libavformat/rl2.c

libavformat/rm.h

libavformat/rmdec.c

libavformat/rmenc.c

libavformat/rtp.h

libavformat/rtp_h264.c

libavformat/rtp_internal.h

libavformat/rtpdec.c

libavformat/rtpproto.c

libavformat/rtsp.c

libavformat/rtsp.h

libavformat/sdp.c

libavformat/segafilm.c

libavformat/sierravmd.c

libavformat/siff.c

libavformat/smacker.c

libavformat/swfdec.c

libavformat/swfenc.c

libavformat/tcp.c

libavformat/thp.c

libavformat/tta.c

libavformat/txd.c

libavformat/udp.c

libavformat/utils.c

libavformat/vc1test.c

libavformat/voc.h

libavformat/vocdec.c

libavformat/vocenc.c

libavformat/wav.c

libavformat/wc3movie.c

libavformat/westwood.c

libavformat/wv.c

libavformat/xa.c

libavformat/yuv4mpeg.c

libavutil/Makefile

libavutil/adler32.c

libavutil/aes.c

libavutil/arm/bswap.h

libavutil/avutil.h

libavutil/bswap.h

libavutil/common.h

libavutil/crc.c

libavutil/des.c

libavutil/fifo.h

libavutil/internal.h

libavutil/intreadwrite.h

libavutil/log.h

libavutil/mathematics.c

libavutil/mathematics.h

libavutil/md5.c

libavutil/mem.c

libavutil/mem.h

libavutil/pca.c

libavutil/random.c

libavutil/random.h

libavutil/rational.c

libavutil/sha1.c

libavutil/tree.c

libavutil/x86/bswap.h

libavutil/x86_cpu.h

libpostproc/postprocess.c

libpostproc/postprocess.h

libpostproc/postprocess_template.c

libswscale/Makefile

libswscale/rgb2rgb.c

libswscale/rgb2rgb.h

libswscale/rgb2rgb_template.c

libswscale/swscale-example.c

libswscale/swscale.c

libswscale/swscale_bfin.c

libswscale/swscale_internal.h

libswscale/swscale_template.c

libswscale/yuv2rgb.c

libswscale/yuv2rgb_altivec.c

libswscale/yuv2rgb_bfin.c

libswscale/yuv2rgb_template.c

subdir.mak

tests/ffmpeg.regression.ref

tests/libav.regression.ref

tests/regression.sh

tests/rotozoom.regression.ref

tests/seek.regression.ref

tests/seek_test.sh

Show diffs side-by-side

added added

removed removed

libavcodec/x86/idct_sse2_xvid.c

* XVID MPEG-4 VIDEO CODEC

* - SSE2 inverse discrete cosine transform -

* Conversion to gcc syntax with modifications

* by Alexander Strange <astrange@ithinksw.com>

* Originally from dct/x86_asm/fdct_sse2_skal.asm in Xvid.

* This file is part of FFmpeg.

* Vertical pass is an implementation of the scheme:

* Loeffler C., Ligtenberg A., and Moschytz C.S.:

* Practical Fast 1D DCT Algorithm with Eleven Multiplications,

* Proc. ICASSP 1989, 988-991.

* Horizontal pass is a double 4x4 vector/matrix multiplication,

* (see also Intel's Application Note 922:

* http://developer.intel.com/vtune/cbts/strmsimd/922down.htm

* More details at http://skal.planet-d.net/coding/dct.html

* FFmpeg is free software; you can redistribute it and/or

* modify it under the terms of the GNU Lesser General Public

* License as published by the Free Software Foundation; either

* version 2.1 of the License, or (at your option) any later version.

* FFmpeg is distributed in the hope that it will be useful,

* but WITHOUT ANY WARRANTY; without even the implied warranty of

* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU

* Lesser General Public License for more details.

* You should have received a copy of the GNU Lesser General Public License

* along with FFmpeg; if not, write to the Free Software Foundation,

* Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA

#include "libavcodec/dsputil.h"

#include "idct_xvid.h"

/*!

* @file idct_sse2_xvid.c

* @brief SSE2 idct compatible with xvidmmx

#define X8(x) x,x,x,x,x,x,x,x

#define ROW_SHIFT 11

#define COL_SHIFT 6

DECLARE_ASM_CONST(16, int16_t, tan1[]) = {X8(13036)}; // tan( pi/16)

DECLARE_ASM_CONST(16, int16_t, tan2[]) = {X8(27146)}; // tan(2pi/16) = sqrt(2)-1

DECLARE_ASM_CONST(16, int16_t, tan3[]) = {X8(43790)}; // tan(3pi/16)-1

DECLARE_ASM_CONST(16, int16_t, sqrt2[])= {X8(23170)}; // 0.5/sqrt(2)

DECLARE_ASM_CONST(8, uint8_t, m127[]) = {X8(127)};

DECLARE_ASM_CONST(16, int16_t, iTab1[]) = {

0x4000, 0x539f, 0xc000, 0xac61, 0x4000, 0xdd5d, 0x4000, 0xdd5d,

0x4000, 0x22a3, 0x4000, 0x22a3, 0xc000, 0x539f, 0x4000, 0xac61,

0x3249, 0x11a8, 0x4b42, 0xee58, 0x11a8, 0x4b42, 0x11a8, 0xcdb7,

0x58c5, 0x4b42, 0xa73b, 0xcdb7, 0x3249, 0xa73b, 0x4b42, 0xa73b

};

DECLARE_ASM_CONST(16, int16_t, iTab2[]) = {

0x58c5, 0x73fc, 0xa73b, 0x8c04, 0x58c5, 0xcff5, 0x58c5, 0xcff5,

0x58c5, 0x300b, 0x58c5, 0x300b, 0xa73b, 0x73fc, 0x58c5, 0x8c04,

0x45bf, 0x187e, 0x6862, 0xe782, 0x187e, 0x6862, 0x187e, 0xba41,

0x7b21, 0x6862, 0x84df, 0xba41, 0x45bf, 0x84df, 0x6862, 0x84df

};

DECLARE_ASM_CONST(16, int16_t, iTab3[]) = {

0x539f, 0x6d41, 0xac61, 0x92bf, 0x539f, 0xd2bf, 0x539f, 0xd2bf,

0x539f, 0x2d41, 0x539f, 0x2d41, 0xac61, 0x6d41, 0x539f, 0x92bf,

0x41b3, 0x1712, 0x6254, 0xe8ee, 0x1712, 0x6254, 0x1712, 0xbe4d,

0x73fc, 0x6254, 0x8c04, 0xbe4d, 0x41b3, 0x8c04, 0x6254, 0x8c04

};

DECLARE_ASM_CONST(16, int16_t, iTab4[]) = {

0x4b42, 0x6254, 0xb4be, 0x9dac, 0x4b42, 0xd746, 0x4b42, 0xd746,

0x4b42, 0x28ba, 0x4b42, 0x28ba, 0xb4be, 0x6254, 0x4b42, 0x9dac,

0x3b21, 0x14c3, 0x587e, 0xeb3d, 0x14c3, 0x587e, 0x14c3, 0xc4df,

0x6862, 0x587e, 0x979e, 0xc4df, 0x3b21, 0x979e, 0x587e, 0x979e

};

DECLARE_ASM_CONST(16, int32_t, walkenIdctRounders[]) = {

65536, 65536, 65536, 65536,

3597, 3597, 3597, 3597,

2260, 2260, 2260, 2260,

1203, 1203, 1203, 1203,

120, 120, 120, 120,

512, 512, 512, 512

};

// Temporary storage before the column pass

#define ROW1 "%%xmm6"

#define ROW3 "%%xmm4"

100

#define ROW5 "%%xmm5"

101

#define ROW7 "%%xmm7"

102

103

#define CLEAR_ODD(r) "pxor "r","r" \n\t"

104

#define PUT_ODD(dst) "pshufhw $0x1B, %%xmm2, "dst" \n\t"

105

106

#if ARCH_X86_64

107

108

# define ROW0 "%%xmm8"

109

# define REG0 ROW0

110

# define ROW2 "%%xmm9"

111

# define REG2 ROW2

112

# define ROW4 "%%xmm10"

113

# define REG4 ROW4

114

# define ROW6 "%%xmm11"

115

# define REG6 ROW6

116

# define CLEAR_EVEN(r) CLEAR_ODD(r)

117

# define PUT_EVEN(dst) PUT_ODD(dst)

118

# define XMMS "%%xmm12"

119

# define MOV_32_ONLY "#"

120

# define SREG2 REG2

121

# define TAN3 "%%xmm13"

122

# define TAN1 "%%xmm14"

123

124

#else

125

126

# define ROW0 "(%0)"

127

# define REG0 "%%xmm4"

128

# define ROW2 "2*16(%0)"

129

# define REG2 "%%xmm4"

130

# define ROW4 "4*16(%0)"

131

# define REG4 "%%xmm6"

132

# define ROW6 "6*16(%0)"

133

# define REG6 "%%xmm6"

134

# define CLEAR_EVEN(r)

135

# define PUT_EVEN(dst) \

136

"pshufhw $0x1B, %%xmm2, %%xmm2 \n\t" \

137

"movdqa %%xmm2, "dst" \n\t"

138

# define XMMS "%%xmm2"

139

# define MOV_32_ONLY "movdqa "

140

# define SREG2 "%%xmm7"

141

# define TAN3 "%%xmm0"

142

# define TAN1 "%%xmm2"

143

144

#endif

145

146

#define ROUND(x) "paddd "MANGLE(x)

147

148

#define JZ(reg, to) \

149

"testl "reg","reg" \n\t" \

150

"jz "to" \n\t"

151

152

#define JNZ(reg, to) \

153

"testl "reg","reg" \n\t" \

154

"jnz "to" \n\t"

155

156

#define TEST_ONE_ROW(src, reg, clear) \

157

clear \

158

"movq "src", %%mm1 \n\t" \

159

"por 8+"src", %%mm1 \n\t" \

160

"paddusb %%mm0, %%mm1 \n\t" \

161

"pmovmskb %%mm1, "reg" \n\t"

162

163

#define TEST_TWO_ROWS(row1, row2, reg1, reg2, clear1, clear2) \

164

clear1 \

165

clear2 \

166

"movq "row1", %%mm1 \n\t" \

167

"por 8+"row1", %%mm1 \n\t" \

168

"movq "row2", %%mm2 \n\t" \

169

"por 8+"row2", %%mm2 \n\t" \

170

"paddusb %%mm0, %%mm1 \n\t" \

171

"paddusb %%mm0, %%mm2 \n\t" \

172

"pmovmskb %%mm1, "reg1" \n\t" \

173

"pmovmskb %%mm2, "reg2" \n\t"

174

175

///IDCT pass on rows.

176

#define iMTX_MULT(src, table, rounder, put) \

177

"movdqa "src", %%xmm3 \n\t" \

178

"movdqa %%xmm3, %%xmm0 \n\t" \

179

"pshufd $0x11, %%xmm3, %%xmm1 \n\t" /* 4602 */ \

180

"punpcklqdq %%xmm0, %%xmm0 \n\t" /* 0246 */ \

181

"pmaddwd "table", %%xmm0 \n\t" \

182

"pmaddwd 16+"table", %%xmm1 \n\t" \

183

"pshufd $0xBB, %%xmm3, %%xmm2 \n\t" /* 5713 */ \

184

"punpckhqdq %%xmm3, %%xmm3 \n\t" /* 1357 */ \

185

"pmaddwd 32+"table", %%xmm2 \n\t" \

186

"pmaddwd 48+"table", %%xmm3 \n\t" \

187

"paddd %%xmm1, %%xmm0 \n\t" \

188

"paddd %%xmm3, %%xmm2 \n\t" \

189

rounder", %%xmm0 \n\t" \

190

"movdqa %%xmm2, %%xmm3 \n\t" \

191

"paddd %%xmm0, %%xmm2 \n\t" \

192

"psubd %%xmm3, %%xmm0 \n\t" \

193

"psrad $11, %%xmm2 \n\t" \

194

"psrad $11, %%xmm0 \n\t" \

195

"packssdw %%xmm0, %%xmm2 \n\t" \

196

put \

197

"1: \n\t"

198

199

#define iLLM_HEAD \

200

"movdqa "MANGLE(tan3)", "TAN3" \n\t" \

201

"movdqa "MANGLE(tan1)", "TAN1" \n\t" \

202

203

///IDCT pass on columns.

204

#define iLLM_PASS(dct) \

205

"movdqa "TAN3", %%xmm1 \n\t" \

206

"movdqa "TAN1", %%xmm3 \n\t" \

207

"pmulhw %%xmm4, "TAN3" \n\t" \

208

"pmulhw %%xmm5, %%xmm1 \n\t" \

209

"paddsw %%xmm4, "TAN3" \n\t" \

210

"paddsw %%xmm5, %%xmm1 \n\t" \

211

"psubsw %%xmm5, "TAN3" \n\t" \

212

"paddsw %%xmm4, %%xmm1 \n\t" \

213

"pmulhw %%xmm7, %%xmm3 \n\t" \

214

"pmulhw %%xmm6, "TAN1" \n\t" \

215

"paddsw %%xmm6, %%xmm3 \n\t" \

216

"psubsw %%xmm7, "TAN1" \n\t" \

217

"movdqa %%xmm3, %%xmm7 \n\t" \

218

"movdqa "TAN1", %%xmm6 \n\t" \

219

"psubsw %%xmm1, %%xmm3 \n\t" \

220

"psubsw "TAN3", "TAN1" \n\t" \

221

"paddsw %%xmm7, %%xmm1 \n\t" \

222

"paddsw %%xmm6, "TAN3" \n\t" \

223

"movdqa %%xmm3, %%xmm6 \n\t" \

224

"psubsw "TAN3", %%xmm3 \n\t" \

225

"paddsw %%xmm6, "TAN3" \n\t" \

226

"movdqa "MANGLE(sqrt2)", %%xmm4 \n\t" \

227

"pmulhw %%xmm4, %%xmm3 \n\t" \

228

"pmulhw %%xmm4, "TAN3" \n\t" \

229

"paddsw "TAN3", "TAN3" \n\t" \

230

"paddsw %%xmm3, %%xmm3 \n\t" \

231

"movdqa "MANGLE(tan2)", %%xmm7 \n\t" \

232

MOV_32_ONLY ROW2", "REG2" \n\t" \

233

MOV_32_ONLY ROW6", "REG6" \n\t" \

234

"movdqa %%xmm7, %%xmm5 \n\t" \

235

"pmulhw "REG6", %%xmm7 \n\t" \

236

"pmulhw "REG2", %%xmm5 \n\t" \

237

"paddsw "REG2", %%xmm7 \n\t" \

238

"psubsw "REG6", %%xmm5 \n\t" \

239

MOV_32_ONLY ROW0", "REG0" \n\t" \

240

MOV_32_ONLY ROW4", "REG4" \n\t" \

241

MOV_32_ONLY" "TAN1", (%0) \n\t" \

242

"movdqa "REG0", "XMMS" \n\t" \

243

"psubsw "REG4", "REG0" \n\t" \

244

"paddsw "XMMS", "REG4" \n\t" \

245

"movdqa "REG4", "XMMS" \n\t" \

246

"psubsw %%xmm7, "REG4" \n\t" \

247

"paddsw "XMMS", %%xmm7 \n\t" \

248

"movdqa "REG0", "XMMS" \n\t" \

249

"psubsw %%xmm5, "REG0" \n\t" \

250

"paddsw "XMMS", %%xmm5 \n\t" \

251

"movdqa %%xmm5, "XMMS" \n\t" \

252

"psubsw "TAN3", %%xmm5 \n\t" \

253

"paddsw "XMMS", "TAN3" \n\t" \

254

"movdqa "REG0", "XMMS" \n\t" \

255

"psubsw %%xmm3, "REG0" \n\t" \

256

"paddsw "XMMS", %%xmm3 \n\t" \

257

MOV_32_ONLY" (%0), "TAN1" \n\t" \

258

"psraw $6, %%xmm5 \n\t" \

259

"psraw $6, "REG0" \n\t" \

260

"psraw $6, "TAN3" \n\t" \

261

"psraw $6, %%xmm3 \n\t" \

262

"movdqa "TAN3", 1*16("dct") \n\t" \

263

"movdqa %%xmm3, 2*16("dct") \n\t" \

264

"movdqa "REG0", 5*16("dct") \n\t" \

265

"movdqa %%xmm5, 6*16("dct") \n\t" \

266

"movdqa %%xmm7, %%xmm0 \n\t" \

267

"movdqa "REG4", %%xmm4 \n\t" \

268

"psubsw %%xmm1, %%xmm7 \n\t" \

269

"psubsw "TAN1", "REG4" \n\t" \

270

"paddsw %%xmm0, %%xmm1 \n\t" \

271

"paddsw %%xmm4, "TAN1" \n\t" \

272

"psraw $6, %%xmm1 \n\t" \

273

"psraw $6, %%xmm7 \n\t" \

274

"psraw $6, "TAN1" \n\t" \

275

"psraw $6, "REG4" \n\t" \

276

"movdqa %%xmm1, ("dct") \n\t" \

277

"movdqa "TAN1", 3*16("dct") \n\t" \

278

"movdqa "REG4", 4*16("dct") \n\t" \

279

"movdqa %%xmm7, 7*16("dct") \n\t"

280

281

///IDCT pass on columns, assuming rows 4-7 are zero.

282

#define iLLM_PASS_SPARSE(dct) \

283

"pmulhw %%xmm4, "TAN3" \n\t" \

284

"paddsw %%xmm4, "TAN3" \n\t" \

285

"movdqa %%xmm6, %%xmm3 \n\t" \

286

"pmulhw %%xmm6, "TAN1" \n\t" \

287

"movdqa %%xmm4, %%xmm1 \n\t" \

288

"psubsw %%xmm1, %%xmm3 \n\t" \

289

"paddsw %%xmm6, %%xmm1 \n\t" \

290

"movdqa "TAN1", %%xmm6 \n\t" \

291

"psubsw "TAN3", "TAN1" \n\t" \

292

"paddsw %%xmm6, "TAN3" \n\t" \

293

"movdqa %%xmm3, %%xmm6 \n\t" \

294

"psubsw "TAN3", %%xmm3 \n\t" \

295

"paddsw %%xmm6, "TAN3" \n\t" \

296

"movdqa "MANGLE(sqrt2)", %%xmm4 \n\t" \

297

"pmulhw %%xmm4, %%xmm3 \n\t" \

298

"pmulhw %%xmm4, "TAN3" \n\t" \

299

"paddsw "TAN3", "TAN3" \n\t" \

300

"paddsw %%xmm3, %%xmm3 \n\t" \

301

"movdqa "MANGLE(tan2)", %%xmm5 \n\t" \

302

MOV_32_ONLY ROW2", "SREG2" \n\t" \

303

"pmulhw "SREG2", %%xmm5 \n\t" \

304

MOV_32_ONLY ROW0", "REG0" \n\t" \

305

"movdqa "REG0", %%xmm6 \n\t" \

306

"psubsw "SREG2", %%xmm6 \n\t" \

307

"paddsw "REG0", "SREG2" \n\t" \

308

MOV_32_ONLY" "TAN1", (%0) \n\t" \

309

"movdqa "REG0", "XMMS" \n\t" \

310

"psubsw %%xmm5, "REG0" \n\t" \

311

"paddsw "XMMS", %%xmm5 \n\t" \

312

"movdqa %%xmm5, "XMMS" \n\t" \

313

"psubsw "TAN3", %%xmm5 \n\t" \

314

"paddsw "XMMS", "TAN3" \n\t" \

315

"movdqa "REG0", "XMMS" \n\t" \

316

"psubsw %%xmm3, "REG0" \n\t" \

317

"paddsw "XMMS", %%xmm3 \n\t" \

318

MOV_32_ONLY" (%0), "TAN1" \n\t" \

319

"psraw $6, %%xmm5 \n\t" \

320

"psraw $6, "REG0" \n\t" \

321

"psraw $6, "TAN3" \n\t" \

322

"psraw $6, %%xmm3 \n\t" \

323

"movdqa "TAN3", 1*16("dct") \n\t" \

324

"movdqa %%xmm3, 2*16("dct") \n\t" \

325

"movdqa "REG0", 5*16("dct") \n\t" \

326

"movdqa %%xmm5, 6*16("dct") \n\t" \

327

"movdqa "SREG2", %%xmm0 \n\t" \

328

"movdqa %%xmm6, %%xmm4 \n\t" \

329

"psubsw %%xmm1, "SREG2" \n\t" \

330

"psubsw "TAN1", %%xmm6 \n\t" \

331

"paddsw %%xmm0, %%xmm1 \n\t" \

332

"paddsw %%xmm4, "TAN1" \n\t" \

333

"psraw $6, %%xmm1 \n\t" \

334

"psraw $6, "SREG2" \n\t" \

335

"psraw $6, "TAN1" \n\t" \

336

"psraw $6, %%xmm6 \n\t" \

337

"movdqa %%xmm1, ("dct") \n\t" \

338

"movdqa "TAN1", 3*16("dct") \n\t" \

339

"movdqa %%xmm6, 4*16("dct") \n\t" \

340

"movdqa "SREG2", 7*16("dct") \n\t"

341

342

inline void ff_idct_xvid_sse2(short *block)

343

{

344

__asm__ volatile(

345

"movq "MANGLE(m127)", %%mm0 \n\t"

346

iMTX_MULT("(%0)", MANGLE(iTab1), ROUND(walkenIdctRounders), PUT_EVEN(ROW0))

347

iMTX_MULT("1*16(%0)", MANGLE(iTab2), ROUND(walkenIdctRounders+1*16), PUT_ODD(ROW1))

348

iMTX_MULT("2*16(%0)", MANGLE(iTab3), ROUND(walkenIdctRounders+2*16), PUT_EVEN(ROW2))

349

350

TEST_TWO_ROWS("3*16(%0)", "4*16(%0)", "%%eax", "%%ecx", CLEAR_ODD(ROW3), CLEAR_EVEN(ROW4))

351

JZ("%%eax", "1f")

352

iMTX_MULT("3*16(%0)", MANGLE(iTab4), ROUND(walkenIdctRounders+3*16), PUT_ODD(ROW3))

353

354

TEST_TWO_ROWS("5*16(%0)", "6*16(%0)", "%%eax", "%%edx", CLEAR_ODD(ROW5), CLEAR_EVEN(ROW6))

355

TEST_ONE_ROW("7*16(%0)", "%%esi", CLEAR_ODD(ROW7))

356

iLLM_HEAD

357

ASMALIGN(4)

358

JNZ("%%ecx", "2f")

359

JNZ("%%eax", "3f")

360

JNZ("%%edx", "4f")

361

JNZ("%%esi", "5f")

362

iLLM_PASS_SPARSE("%0")

363

"jmp 6f \n\t"

364

"2: \n\t"

365

iMTX_MULT("4*16(%0)", MANGLE(iTab1), "#", PUT_EVEN(ROW4))

366

"3: \n\t"

367

iMTX_MULT("5*16(%0)", MANGLE(iTab4), ROUND(walkenIdctRounders+4*16), PUT_ODD(ROW5))

368

JZ("%%edx", "1f")

369

"4: \n\t"

370

iMTX_MULT("6*16(%0)", MANGLE(iTab3), ROUND(walkenIdctRounders+5*16), PUT_EVEN(ROW6))

371

JZ("%%esi", "1f")

372

"5: \n\t"

373

iMTX_MULT("7*16(%0)", MANGLE(iTab2), ROUND(walkenIdctRounders+5*16), PUT_ODD(ROW7))

374

#if !ARCH_X86_64

375

iLLM_HEAD

376

#endif

377

iLLM_PASS("%0")

378

"6: \n\t"

379

: "+r"(block)

380

381

: "%eax", "%ecx", "%edx", "%esi", "memory");

382

}

383

384

void ff_idct_xvid_sse2_put(uint8_t *dest, int line_size, short *block)

385

{

386

ff_idct_xvid_sse2(block);

387

put_pixels_clamped_mmx(block, dest, line_size);

388

}

389

390

void ff_idct_xvid_sse2_add(uint8_t *dest, int line_size, short *block)

391

{

392

ff_idct_xvid_sse2(block);

393

add_pixels_clamped_mmx(block, dest, line_size);

394

}

Older »