~ubuntu-branches/ubuntu/lucid/ffmpeg/lucid-updates

« back to all changes in this revision

Viewing changes to libavcodec/i386/mpegvideo_mmx.c

Committer: Bazaar Package Importer
Author(s): Reinhard Tartler
Date: 2009-01-20 17:51:19 UTC
mfrom: (1.1.6 upstream)
Revision ID: james.westby@ubuntu.com-20090120175119-gu6kw1arv5tmf1vr

Tags: 3:0.svn20090119-1ubuntu1+unstripped1

https://launchpad.net/bugs/303537

* merge with the ubuntu.jaunty branch
* reenable x264 LP: #303537
* build against vdpau
* enable xvmc support

files added:
.svnrevision

debian/README.upstream-upgrade

ffpresets/libx264-baseline.ffpreset

ffpresets/libx264-ipod320.ffpreset

ffpresets/libx264-ipod640.ffpreset

ffpresets/libx264-lossless_fast.ffpreset

ffpresets/libx264-lossless_max.ffpreset

ffpresets/libx264-lossless_medium.ffpreset

ffpresets/libx264-lossless_slow.ffpreset

ffpresets/libx264-lossless_slower.ffpreset

ffpresets/libx264-lossless_ultrafast.ffpreset

ffpresets/libx264-main.ffpreset

ffpresets/libx264-slowfirstpass.ffpreset

libavcodec/aac_parser.h

libavcodec/aandcttab.c

libavcodec/aandcttab.h

libavcodec/arm

libavcodec/arm/asm.S

libavcodec/arm/dsputil_arm.c

libavcodec/arm/dsputil_arm_s.S

libavcodec/arm/dsputil_iwmmxt.c

libavcodec/arm/dsputil_iwmmxt_rnd_template.c

libavcodec/arm/dsputil_neon.c

libavcodec/arm/dsputil_neon_s.S

libavcodec/arm/dsputil_vfp.S

libavcodec/arm/float_arm_vfp.c

libavcodec/arm/h264dsp_neon.S

libavcodec/arm/h264idct_neon.S

libavcodec/arm/jrevdct_arm.S

libavcodec/arm/mathops.h

libavcodec/arm/mpegvideo_arm.c

libavcodec/arm/mpegvideo_armv5te.c

libavcodec/arm/mpegvideo_armv5te_s.S

libavcodec/arm/mpegvideo_iwmmxt.c

libavcodec/arm/simple_idct_arm.S

libavcodec/arm/simple_idct_armv5te.S

libavcodec/arm/simple_idct_armv6.S

libavcodec/arm/simple_idct_neon.S

libavcodec/dnxhdenc.h

libavcodec/faxcompr.c

libavcodec/faxcompr.h

libavcodec/internal.h

libavcodec/options.c

libavcodec/qcelp_lsp.c

libavcodec/rv40dsp.c

libavcodec/sh4/sh4.h

libavcodec/vdpau.h

libavcodec/vdpau_internal.h

libavcodec/vdpauvideo.c

libavcodec/x86

libavcodec/x86/cavsdsp_mmx.c

libavcodec/x86/cpuid.c

libavcodec/x86/dnxhd_mmx.c

libavcodec/x86/dsputil_h264_template_mmx.c

libavcodec/x86/dsputil_h264_template_ssse3.c

libavcodec/x86/dsputil_mmx.c

libavcodec/x86/dsputil_mmx.h

libavcodec/x86/dsputil_mmx_avg_template.c

libavcodec/x86/dsputil_mmx_qns_template.c

libavcodec/x86/dsputil_mmx_rnd_template.c

libavcodec/x86/dsputil_yasm.asm

libavcodec/x86/dsputilenc_mmx.c

libavcodec/x86/fdct_mmx.c

libavcodec/x86/fft_3dn.c

libavcodec/x86/fft_3dn2.c

libavcodec/x86/fft_mmx.asm

libavcodec/x86/fft_sse.c

libavcodec/x86/flacdsp_mmx.c

libavcodec/x86/h264_deblock_sse2.asm

libavcodec/x86/h264_i386.h

libavcodec/x86/h264_idct_sse2.asm

libavcodec/x86/h264dsp_mmx.c

libavcodec/x86/idct_mmx.c

libavcodec/x86/idct_mmx_xvid.c

libavcodec/x86/idct_sse2_xvid.c

libavcodec/x86/idct_xvid.h

libavcodec/x86/mathops.h

libavcodec/x86/mmx.h

libavcodec/x86/motion_est_mmx.c

libavcodec/x86/mpegvideo_mmx.c

libavcodec/x86/mpegvideo_mmx_template.c

libavcodec/x86/rv40dsp_mmx.c

libavcodec/x86/simple_idct_mmx.c

libavcodec/x86/snowdsp_mmx.c

libavcodec/x86/vc1dsp_mmx.c

libavcodec/x86/vp3dsp_mmx.c

libavcodec/x86/vp3dsp_mmx.h

libavcodec/x86/vp3dsp_sse2.c

libavcodec/x86/vp3dsp_sse2.h

libavcodec/x86/x86inc.asm

libavcodec/x86/x86util.asm

libavformat/id3v2.c

libavformat/id3v2.h

libavformat/iss.c

libavformat/metadata.c

libavformat/metadata.h

libavformat/metadata_compat.c

libavformat/options.c

libavutil/timer.h

libswscale/.svnrevision

files removed:
debian/patches/050_fix_pkgconfig_files.patch

libavcodec/armv4l

libavcodec/armv4l/asm.S

libavcodec/armv4l/dsputil_arm.c

libavcodec/armv4l/dsputil_arm_s.S

libavcodec/armv4l/dsputil_iwmmxt.c

libavcodec/armv4l/dsputil_iwmmxt_rnd_template.c

libavcodec/armv4l/dsputil_vfp.S

libavcodec/armv4l/float_arm_vfp.c

libavcodec/armv4l/jrevdct_arm.S

libavcodec/armv4l/mathops.h

libavcodec/armv4l/mpegvideo_arm.c

libavcodec/armv4l/mpegvideo_armv5te.c

libavcodec/armv4l/mpegvideo_iwmmxt.c

libavcodec/armv4l/simple_idct_arm.S

libavcodec/armv4l/simple_idct_armv5te.S

libavcodec/armv4l/simple_idct_armv6.S

libavcodec/i386

libavcodec/i386/cavsdsp_mmx.c

libavcodec/i386/cpuid.c

libavcodec/i386/dsputil_h264_template_mmx.c

libavcodec/i386/dsputil_h264_template_ssse3.c

libavcodec/i386/dsputil_mmx.c

libavcodec/i386/dsputil_mmx.h

libavcodec/i386/dsputil_mmx_avg_template.c

libavcodec/i386/dsputil_mmx_qns_template.c

libavcodec/i386/dsputil_mmx_rnd_template.c

libavcodec/i386/dsputil_yasm.asm

libavcodec/i386/dsputilenc_mmx.c

libavcodec/i386/fdct_mmx.c

libavcodec/i386/fft_3dn.c

libavcodec/i386/fft_3dn2.c

libavcodec/i386/fft_mmx.asm

libavcodec/i386/fft_sse.c

libavcodec/i386/flacdsp_mmx.c

libavcodec/i386/h264_i386.h

libavcodec/i386/h264dsp_mmx.c

libavcodec/i386/idct_mmx.c

libavcodec/i386/idct_mmx_xvid.c

libavcodec/i386/idct_sse2_xvid.c

libavcodec/i386/idct_xvid.h

libavcodec/i386/mathops.h

libavcodec/i386/mmx.h

libavcodec/i386/motion_est_mmx.c

libavcodec/i386/mpegvideo_mmx.c

libavcodec/i386/mpegvideo_mmx_template.c

libavcodec/i386/simple_idct_mmx.c

libavcodec/i386/snowdsp_mmx.c

libavcodec/i386/vc1dsp_mmx.c

libavcodec/i386/vp3dsp_mmx.c

libavcodec/i386/vp3dsp_mmx.h

libavcodec/i386/vp3dsp_sse2.c

libavcodec/i386/vp3dsp_sse2.h

libavcodec/i386/x86inc.asm

files modified:
Changelog

MAINTAINERS

Makefile

README

cmdutils.c

cmdutils.h

common.mak

configure

debian/changelog

debian/confflags

debian/control

debian/control.common

debian/control.ffmpeg

debian/control.ffmpeg-debian

debian/fixup-config.sh

debian/get-orig-source.sh

debian/patches/series

debian/rules

doc/faq.texi

doc/ffmpeg-doc.texi

doc/general.texi

doc/issue_tracker.txt

doc/optimization.txt

ffmpeg.c

ffplay.c

ffpresets/libx264-fastfirstpass.ffpreset

ffpresets/libx264-hq.ffpreset

ffpresets/libx264-max.ffpreset

ffpresets/libx264-normal.ffpreset

ffserver.c

libavcodec/4xm.c

libavcodec/8bps.c

libavcodec/Makefile

libavcodec/aac.c

libavcodec/aac.h

libavcodec/aac_ac3_parser.h

libavcodec/aac_parser.c

libavcodec/aactab.c

libavcodec/aactab.h

libavcodec/aasc.c

libavcodec/ac3.c

libavcodec/ac3.h

libavcodec/ac3_parser.c

libavcodec/ac3_parser.h

libavcodec/ac3dec.c

libavcodec/acelp_pitch_delay.c

libavcodec/acelp_pitch_delay.h

libavcodec/adpcm.c

libavcodec/adxdec.c

libavcodec/adxenc.c

libavcodec/alacenc.c

libavcodec/allcodecs.c

libavcodec/alpha/asm.h

libavcodec/alpha/dsputil_alpha.c

libavcodec/alpha/dsputil_alpha_asm.S

libavcodec/alpha/motion_est_mvi_asm.S

libavcodec/alpha/mpegvideo_alpha.c

libavcodec/alpha/simple_idct_alpha.c

libavcodec/apedec.c

libavcodec/asv1.c

libavcodec/audioconvert.c

libavcodec/avcodec.h

libavcodec/avs.c

libavcodec/bfin/dsputil_bfin.c

libavcodec/bfin/mathops.h

libavcodec/bfin/mpegvideo_bfin.c

libavcodec/bfin/vp3_bfin.c

libavcodec/bitstream.c

libavcodec/bitstream.h

libavcodec/bmp.c

libavcodec/bytestream.h

libavcodec/cabac.h

libavcodec/cavs.c

libavcodec/cavs.h

libavcodec/cavsdata.h

libavcodec/cavsdec.c

libavcodec/celp_math.h

libavcodec/cinepak.c

libavcodec/cljr.c

libavcodec/cook.c

libavcodec/cscd.c

libavcodec/dca.c

libavcodec/dca.h

libavcodec/dca_parser.c

libavcodec/dct-test.c

libavcodec/dirac_parser.c

libavcodec/dnxhddata.c

libavcodec/dnxhddec.c

libavcodec/dnxhdenc.c

libavcodec/dpcm.c

libavcodec/dsputil.c

libavcodec/dsputil.h

libavcodec/dv.c

libavcodec/dvbsub.c

libavcodec/dvbsubdec.c

libavcodec/dvdata.h

libavcodec/dvdsub_parser.c

libavcodec/dvdsubdec.c

libavcodec/dvdsubenc.c

libavcodec/dxa.c

libavcodec/eac3dec.c

libavcodec/eacmv.c

libavcodec/eatgq.c

libavcodec/eatgv.c

libavcodec/error_resilience.c

libavcodec/eval.c

libavcodec/eval.h

libavcodec/fft.c

libavcodec/ffv1.c

libavcodec/flacenc.c

libavcodec/flashsv.c

libavcodec/flicvideo.c

libavcodec/fraps.c

libavcodec/g726.c

libavcodec/gif.c

libavcodec/golomb.c

libavcodec/golomb.h

libavcodec/h263.c

libavcodec/h263.h

libavcodec/h263dec.c

libavcodec/h264.c

libavcodec/h264.h

libavcodec/h264_mp4toannexb_bsf.c

libavcodec/h264_parser.c

libavcodec/h264data.h

libavcodec/h264enc.c

libavcodec/h264idct.c

libavcodec/h264pred.c

libavcodec/h264pred.h

libavcodec/huffman.c

libavcodec/huffman.h

libavcodec/huffyuv.c

libavcodec/idcinvideo.c

libavcodec/imc.c

libavcodec/imgconvert.c

libavcodec/imgconvert.h

libavcodec/imgconvert_template.c

libavcodec/imgresample.c

libavcodec/indeo3.c

libavcodec/intrax8.c

libavcodec/jpeglsdec.c

libavcodec/jpeglsenc.c

libavcodec/lcldec.c

libavcodec/lclenc.c

libavcodec/libamr.c

libavcodec/libdirac.h

libavcodec/libdiracdec.c

libavcodec/libfaad.c

libavcodec/libmp3lame.c

libavcodec/libschroedinger.h

libavcodec/libschroedingerdec.c

libavcodec/libtheoraenc.c

libavcodec/libxvidff.c

libavcodec/loco.c

libavcodec/lsp.c

libavcodec/mathops.h

libavcodec/mdct.c

libavcodec/mimic.c

libavcodec/mjpega_dump_header_bsf.c

libavcodec/mjpegdec.c

libavcodec/mjpegdec.h

libavcodec/mlp.c

libavcodec/mlp.h

libavcodec/mlp_parser.c

libavcodec/mlpdec.c

libavcodec/mmvideo.c

libavcodec/motion_est.c

libavcodec/movsub_bsf.c

libavcodec/mpc.c

libavcodec/mpc.h

libavcodec/mpc7.c

libavcodec/mpc8.c

libavcodec/mpeg12.c

libavcodec/mpegaudio.h

libavcodec/mpegaudio_parser.c

libavcodec/mpegaudiodec.c

libavcodec/mpegaudioenc.c

libavcodec/mpegvideo.c

libavcodec/mpegvideo.h

libavcodec/mpegvideo_common.h

libavcodec/mpegvideo_enc.c

libavcodec/msmpeg4.c

libavcodec/msmpeg4.h

libavcodec/msmpeg4data.c

libavcodec/msrledec.c

libavcodec/msvideo1.c

libavcodec/nellymoserdec.c

libavcodec/nellymoserenc.c

libavcodec/opt.c

libavcodec/opt.h

libavcodec/pcm.c

libavcodec/png.h

libavcodec/pnmenc.c

libavcodec/ppc/dsputil_altivec.c

libavcodec/ppc/dsputil_altivec.h

libavcodec/ppc/dsputil_ppc.c

libavcodec/ppc/dsputil_ppc.h

libavcodec/ppc/fft_altivec.c

libavcodec/ppc/float_altivec.c

libavcodec/ppc/gcc_fixes.h

libavcodec/ppc/h264_altivec.c

libavcodec/ppc/h264_template_altivec.c

libavcodec/ppc/idct_altivec.c

libavcodec/ppc/imgresample_altivec.c

libavcodec/ppc/int_altivec.c

libavcodec/ppc/mathops.h

libavcodec/ppc/mpegvideo_altivec.c

libavcodec/ppc/types_altivec.h

libavcodec/ppc/util_altivec.h

libavcodec/pthread.c

libavcodec/ptx.c

libavcodec/qcelpdata.h

libavcodec/qcelpdec.c

libavcodec/qdm2.c

libavcodec/qdrw.c

libavcodec/qtrle.c

libavcodec/ratecontrol.c

libavcodec/raw.c

libavcodec/rectangle.h

libavcodec/rl2.c

libavcodec/roqaudioenc.c

libavcodec/roqvideoenc.c

libavcodec/rpza.c

libavcodec/rv10.c

libavcodec/rv30.c

libavcodec/rv30data.h

libavcodec/rv30dsp.c

libavcodec/rv34.c

libavcodec/rv34.h

libavcodec/rv34data.h

libavcodec/rv40.c

libavcodec/s3tc.c

libavcodec/sh4/dsputil_align.c

libavcodec/sh4/dsputil_sh4.c

libavcodec/sh4/idct_sh4.c

libavcodec/sh4/qpel.c

libavcodec/shorten.c

libavcodec/simple_idct.c

libavcodec/smacker.c

libavcodec/smc.c

libavcodec/snow.c

libavcodec/snow.h

libavcodec/sonic.c

libavcodec/sparc/dsputil_vis.c

libavcodec/sparc/simple_idct_vis.c

libavcodec/sunrast.c

libavcodec/svq1.c

libavcodec/svq1.h

libavcodec/svq1dec.c

libavcodec/svq1enc.c

libavcodec/svq3.c

libavcodec/targa.c

libavcodec/targaenc.c

libavcodec/tiff.c

libavcodec/tiff.h

libavcodec/tiffenc.c

libavcodec/truespeech.c

libavcodec/tta.c

libavcodec/txd.c

libavcodec/utils.c

libavcodec/vc1.c

libavcodec/vc1.h

libavcodec/vcr1.c

libavcodec/vmdav.c

libavcodec/vmnc.c

libavcodec/vorbis.c

libavcodec/vorbis.h

libavcodec/vorbis_dec.c

libavcodec/vorbis_enc.c

libavcodec/vp3.c

libavcodec/vp5.c

libavcodec/vp56.c

libavcodec/vp56.h

libavcodec/vp56data.c

libavcodec/vp56data.h

libavcodec/vp6.c

libavcodec/vp6data.h

libavcodec/vqavideo.c

libavcodec/wavpack.c

libavcodec/wmv2.c

libavcodec/wmv2dec.c

libavcodec/ws-snd1.c

libavcodec/xan.c

libavcodec/xiph.c

libavcodec/xl.c

libavcodec/xsubdec.c

libavcodec/xvmc_render.h

libavcodec/xvmcvideo.c

libavcodec/zmbv.c

libavcodec/zmbvenc.c

libavdevice/alldevices.c

libavdevice/audio.c

libavdevice/beosaudio.cpp

libavdevice/bktr.c

libavdevice/libdc1394.c

libavdevice/v4l.c

libavdevice/v4l2.c

libavdevice/vfwcap.c

libavdevice/x11grab.c

libavfilter/Makefile

libavfilter/allfilters.c

libavfilter/avfilter.c

libavfilter/avfilter.h

libavfilter/avfiltergraph.h

libavfilter/defaults.c

libavfilter/graphparser.c

libavfilter/graphparser.h

libavformat/4xm.c

libavformat/Makefile

libavformat/aiff.c

libavformat/allformats.c

libavformat/amr.c

libavformat/ape.c

libavformat/asf-enc.c

libavformat/asf.c

libavformat/asf.h

libavformat/au.c

libavformat/avc.c

libavformat/avc.h

libavformat/avformat.h

libavformat/avidec.c

libavformat/avienc.c

libavformat/avio.h

libavformat/aviobuf.c

libavformat/avs.c

libavformat/bethsoftvid.c

libavformat/bfi.c

libavformat/c93.c

libavformat/daud.c

libavformat/dsicin.c

libavformat/dv.c

libavformat/dv.h

libavformat/dvenc.c

libavformat/dxa.c

libavformat/electronicarts.c

libavformat/ffmdec.c

libavformat/ffmenc.c

libavformat/file.c

libavformat/flic.c

libavformat/flvdec.c

libavformat/flvenc.c

libavformat/framehook.c

libavformat/framehook.h

libavformat/gif.c

libavformat/gxf.c

libavformat/gxf.h

libavformat/gxfenc.c

libavformat/idcin.c

libavformat/idroq.c

libavformat/iff.c

libavformat/img2.c

libavformat/internal.h

libavformat/ipmovie.c

libavformat/isom.c

libavformat/isom.h

libavformat/libnut.c

libavformat/lmlm4.c

libavformat/matroska.c

libavformat/matroskadec.c

libavformat/matroskaenc.c

libavformat/mm.c

libavformat/mmf.c

libavformat/mov.c

libavformat/movenc.c

libavformat/mp3.c

libavformat/mpegenc.c

libavformat/mpegts.c

libavformat/mpegtsenc.c

libavformat/mtv.c

libavformat/mxf.h

libavformat/mxfdec.c

libavformat/network.h

libavformat/nut.c

libavformat/nut.h

libavformat/nutdec.c

libavformat/nutenc.c

libavformat/nuv.c

libavformat/oggdec.h

libavformat/oggparseflac.c

libavformat/oggparsevorbis.c

libavformat/os_support.c

libavformat/os_support.h

libavformat/psxstr.c

libavformat/raw.c

libavformat/rdt.c

libavformat/rdt.h

libavformat/riff.c

libavformat/riff.h

libavformat/rl2.c

libavformat/rm.h

libavformat/rmdec.c

libavformat/rmenc.c

libavformat/rtp.h

libavformat/rtp_h264.c

libavformat/rtp_internal.h

libavformat/rtpdec.c

libavformat/rtpproto.c

libavformat/rtsp.c

libavformat/rtsp.h

libavformat/sdp.c

libavformat/segafilm.c

libavformat/sierravmd.c

libavformat/siff.c

libavformat/smacker.c

libavformat/swfdec.c

libavformat/swfenc.c

libavformat/tcp.c

libavformat/thp.c

libavformat/tta.c

libavformat/txd.c

libavformat/udp.c

libavformat/utils.c

libavformat/vc1test.c

libavformat/voc.h

libavformat/vocdec.c

libavformat/vocenc.c

libavformat/wav.c

libavformat/wc3movie.c

libavformat/westwood.c

libavformat/wv.c

libavformat/xa.c

libavformat/yuv4mpeg.c

libavutil/Makefile

libavutil/adler32.c

libavutil/aes.c

libavutil/arm/bswap.h

libavutil/avutil.h

libavutil/bswap.h

libavutil/common.h

libavutil/crc.c

libavutil/des.c

libavutil/fifo.h

libavutil/internal.h

libavutil/intreadwrite.h

libavutil/log.h

libavutil/mathematics.c

libavutil/mathematics.h

libavutil/md5.c

libavutil/mem.c

libavutil/mem.h

libavutil/pca.c

libavutil/random.c

libavutil/random.h

libavutil/rational.c

libavutil/sha1.c

libavutil/tree.c

libavutil/x86/bswap.h

libavutil/x86_cpu.h

libpostproc/postprocess.c

libpostproc/postprocess.h

libpostproc/postprocess_template.c

libswscale/Makefile

libswscale/rgb2rgb.c

libswscale/rgb2rgb.h

libswscale/rgb2rgb_template.c

libswscale/swscale-example.c

libswscale/swscale.c

libswscale/swscale_bfin.c

libswscale/swscale_internal.h

libswscale/swscale_template.c

libswscale/yuv2rgb.c

libswscale/yuv2rgb_altivec.c

libswscale/yuv2rgb_bfin.c

libswscale/yuv2rgb_template.c

subdir.mak

tests/ffmpeg.regression.ref

tests/libav.regression.ref

tests/regression.sh

tests/rotozoom.regression.ref

tests/seek.regression.ref

tests/seek_test.sh

Show diffs side-by-side

added added

removed removed

libavcodec/i386/mpegvideo_mmx.c

* The simplest mpeg encoder (well, it was the simplest!)

* Optimized for ia32 CPUs by Nick Kurshev <nickols_k@mail.ru>

* h263, mpeg1, mpeg2 dequantizer & draw_edges by Michael Niedermayer <michaelni@gmx.at>

* This file is part of FFmpeg.

* FFmpeg is free software; you can redistribute it and/or

* modify it under the terms of the GNU Lesser General Public

* License as published by the Free Software Foundation; either

* version 2.1 of the License, or (at your option) any later version.

* FFmpeg is distributed in the hope that it will be useful,

* but WITHOUT ANY WARRANTY; without even the implied warranty of

* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU

* Lesser General Public License for more details.

* You should have received a copy of the GNU Lesser General Public

* License along with FFmpeg; if not, write to the Free Software

* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA

#include "libavutil/x86_cpu.h"

#include "libavcodec/avcodec.h"

#include "libavcodec/dsputil.h"

#include "libavcodec/mpegvideo.h"

#include "dsputil_mmx.h"

extern uint16_t inv_zigzag_direct16[64];

static void dct_unquantize_h263_intra_mmx(MpegEncContext *s,

DCTELEM *block, int n, int qscale)

{

x86_reg level, qmul, qadd, nCoeffs;

qmul = qscale << 1;

assert(s->block_last_index[n]>=0 || s->h263_aic);

if (!s->h263_aic) {

if (n < 4)

level = block[0] * s->y_dc_scale;

else

level = block[0] * s->c_dc_scale;

qadd = (qscale - 1) | 1;

}else{

qadd = 0;

level= block[0];

}

if(s->ac_pred)

nCoeffs=63;

else

nCoeffs= s->inter_scantable.raster_end[ s->block_last_index[n] ];

//printf("%d %d ", qmul, qadd);

__asm__ volatile(

"movd %1, %%mm6 \n\t" //qmul

"packssdw %%mm6, %%mm6 \n\t"

"movd %2, %%mm5 \n\t" //qadd

"pxor %%mm7, %%mm7 \n\t"

"packssdw %%mm5, %%mm5 \n\t"

"psubw %%mm5, %%mm7 \n\t"

"pxor %%mm4, %%mm4 \n\t"

ASMALIGN(4)

"1: \n\t"

"movq (%0, %3), %%mm0 \n\t"

"movq 8(%0, %3), %%mm1 \n\t"

"pmullw %%mm6, %%mm0 \n\t"

"pmullw %%mm6, %%mm1 \n\t"

"movq (%0, %3), %%mm2 \n\t"

"movq 8(%0, %3), %%mm3 \n\t"

"pcmpgtw %%mm4, %%mm2 \n\t" // block[i] < 0 ? -1 : 0

"pcmpgtw %%mm4, %%mm3 \n\t" // block[i] < 0 ? -1 : 0

"pxor %%mm2, %%mm0 \n\t"

"pxor %%mm3, %%mm1 \n\t"

"paddw %%mm7, %%mm0 \n\t"

"paddw %%mm7, %%mm1 \n\t"

"pxor %%mm0, %%mm2 \n\t"

"pxor %%mm1, %%mm3 \n\t"

"pcmpeqw %%mm7, %%mm0 \n\t" // block[i] == 0 ? -1 : 0

"pcmpeqw %%mm7, %%mm1 \n\t" // block[i] == 0 ? -1 : 0

"pandn %%mm2, %%mm0 \n\t"

"pandn %%mm3, %%mm1 \n\t"

"movq %%mm0, (%0, %3) \n\t"

"movq %%mm1, 8(%0, %3) \n\t"

100

"add $16, %3 \n\t"

101

"jng 1b \n\t"

102

::"r" (block+nCoeffs), "rm"(qmul), "rm" (qadd), "r" (2*(-nCoeffs))

103

: "memory"

104

);

105

block[0]= level;

106

}

107

108

109

static void dct_unquantize_h263_inter_mmx(MpegEncContext *s,

110

DCTELEM *block, int n, int qscale)

111

{

112

x86_reg qmul, qadd, nCoeffs;

113

114

qmul = qscale << 1;

115

qadd = (qscale - 1) | 1;

116

117

assert(s->block_last_index[n]>=0 || s->h263_aic);

118

119

nCoeffs= s->inter_scantable.raster_end[ s->block_last_index[n] ];

120

//printf("%d %d ", qmul, qadd);

121

__asm__ volatile(

122

"movd %1, %%mm6 \n\t" //qmul

123

"packssdw %%mm6, %%mm6 \n\t"

124

"packssdw %%mm6, %%mm6 \n\t"

125

"movd %2, %%mm5 \n\t" //qadd

126

"pxor %%mm7, %%mm7 \n\t"

127

"packssdw %%mm5, %%mm5 \n\t"

128

"packssdw %%mm5, %%mm5 \n\t"

129

"psubw %%mm5, %%mm7 \n\t"

130

"pxor %%mm4, %%mm4 \n\t"

131

ASMALIGN(4)

132

"1: \n\t"

133

"movq (%0, %3), %%mm0 \n\t"

134

"movq 8(%0, %3), %%mm1 \n\t"

135

136

"pmullw %%mm6, %%mm0 \n\t"

137

"pmullw %%mm6, %%mm1 \n\t"

138

139

"movq (%0, %3), %%mm2 \n\t"

140

"movq 8(%0, %3), %%mm3 \n\t"

141

142

"pcmpgtw %%mm4, %%mm2 \n\t" // block[i] < 0 ? -1 : 0

143

"pcmpgtw %%mm4, %%mm3 \n\t" // block[i] < 0 ? -1 : 0

144

145

"pxor %%mm2, %%mm0 \n\t"

146

"pxor %%mm3, %%mm1 \n\t"

147

148

"paddw %%mm7, %%mm0 \n\t"

149

"paddw %%mm7, %%mm1 \n\t"

150

151

"pxor %%mm0, %%mm2 \n\t"

152

"pxor %%mm1, %%mm3 \n\t"

153

154

"pcmpeqw %%mm7, %%mm0 \n\t" // block[i] == 0 ? -1 : 0

155

"pcmpeqw %%mm7, %%mm1 \n\t" // block[i] == 0 ? -1 : 0

156

157

"pandn %%mm2, %%mm0 \n\t"

158

"pandn %%mm3, %%mm1 \n\t"

159

160

"movq %%mm0, (%0, %3) \n\t"

161

"movq %%mm1, 8(%0, %3) \n\t"

162

163

"add $16, %3 \n\t"

164

"jng 1b \n\t"

165

::"r" (block+nCoeffs), "rm"(qmul), "rm" (qadd), "r" (2*(-nCoeffs))

166

: "memory"

167

);

168

}

169

170

171

172

NK:

173

Note: looking at PARANOID:

174

"enable all paranoid tests for rounding, overflows, etc..."

175

176

#ifdef PARANOID

177

if (level < -2048 || level > 2047)

178

fprintf(stderr, "unquant error %d %d\n", i, level);

179

#endif

180

We can suppose that result of two multiplications can't be greater than 0xFFFF

181

i.e. is 16-bit, so we use here only PMULLW instruction and can avoid

182

a complex multiplication.

183

=====================================================

184

Full formula for multiplication of 2 integer numbers

185

which are represent as high:low words:

186

input: value1 = high1:low1

187

value2 = high2:low2

188

output: value3 = value1*value2

189

value3=high3:low3 (on overflow: modulus 2^32 wrap-around)

190

this mean that for 0x123456 * 0x123456 correct result is 0x766cb0ce4

191

but this algorithm will compute only 0x66cb0ce4

192

this limited by 16-bit size of operands

193

---------------------------------

194

tlow1 = high1*low2

195

tlow2 = high2*low1

196

tlow1 = tlow1 + tlow2

197

high3:low3 = low1*low2

198

high3 += tlow1

199

200

static void dct_unquantize_mpeg1_intra_mmx(MpegEncContext *s,

201

DCTELEM *block, int n, int qscale)

202

{

203

x86_reg nCoeffs;

204

const uint16_t *quant_matrix;

205

int block0;

206

207

assert(s->block_last_index[n]>=0);

208

209

nCoeffs= s->intra_scantable.raster_end[ s->block_last_index[n] ]+1;

210

211

if (n < 4)

212

block0 = block[0] * s->y_dc_scale;

213

else

214

block0 = block[0] * s->c_dc_scale;

215

/* XXX: only mpeg1 */

216

quant_matrix = s->intra_matrix;

217

__asm__ volatile(

218

"pcmpeqw %%mm7, %%mm7 \n\t"

219

"psrlw $15, %%mm7 \n\t"

220

"movd %2, %%mm6 \n\t"

221

"packssdw %%mm6, %%mm6 \n\t"

222

"packssdw %%mm6, %%mm6 \n\t"

223

"mov %3, %%"REG_a" \n\t"

224

ASMALIGN(4)

225

"1: \n\t"

226

"movq (%0, %%"REG_a"), %%mm0 \n\t"

227

"movq 8(%0, %%"REG_a"), %%mm1 \n\t"

228

"movq (%1, %%"REG_a"), %%mm4 \n\t"

229

"movq 8(%1, %%"REG_a"), %%mm5 \n\t"

230

"pmullw %%mm6, %%mm4 \n\t" // q=qscale*quant_matrix[i]

231

"pmullw %%mm6, %%mm5 \n\t" // q=qscale*quant_matrix[i]

232

"pxor %%mm2, %%mm2 \n\t"

233

"pxor %%mm3, %%mm3 \n\t"

234

"pcmpgtw %%mm0, %%mm2 \n\t" // block[i] < 0 ? -1 : 0

235

"pcmpgtw %%mm1, %%mm3 \n\t" // block[i] < 0 ? -1 : 0

236

"pxor %%mm2, %%mm0 \n\t"

237

"pxor %%mm3, %%mm1 \n\t"

238

"psubw %%mm2, %%mm0 \n\t" // abs(block[i])

239

"psubw %%mm3, %%mm1 \n\t" // abs(block[i])

240

"pmullw %%mm4, %%mm0 \n\t" // abs(block[i])*q

241

"pmullw %%mm5, %%mm1 \n\t" // abs(block[i])*q

242

"pxor %%mm4, %%mm4 \n\t"

243

"pxor %%mm5, %%mm5 \n\t" // FIXME slow

244

"pcmpeqw (%0, %%"REG_a"), %%mm4 \n\t" // block[i] == 0 ? -1 : 0

245

"pcmpeqw 8(%0, %%"REG_a"), %%mm5\n\t" // block[i] == 0 ? -1 : 0

246

"psraw $3, %%mm0 \n\t"

247

"psraw $3, %%mm1 \n\t"

248

"psubw %%mm7, %%mm0 \n\t"

249

"psubw %%mm7, %%mm1 \n\t"

250

"por %%mm7, %%mm0 \n\t"

251

"por %%mm7, %%mm1 \n\t"

252

"pxor %%mm2, %%mm0 \n\t"

253

"pxor %%mm3, %%mm1 \n\t"

254

"psubw %%mm2, %%mm0 \n\t"

255

"psubw %%mm3, %%mm1 \n\t"

256

"pandn %%mm0, %%mm4 \n\t"

257

"pandn %%mm1, %%mm5 \n\t"

258

"movq %%mm4, (%0, %%"REG_a") \n\t"

259

"movq %%mm5, 8(%0, %%"REG_a") \n\t"

260

261

"add $16, %%"REG_a" \n\t"

262

"js 1b \n\t"

263

::"r" (block+nCoeffs), "r"(quant_matrix+nCoeffs), "rm" (qscale), "g" (-2*nCoeffs)

264

: "%"REG_a, "memory"

265

);

266

block[0]= block0;

267

}

268

269

static void dct_unquantize_mpeg1_inter_mmx(MpegEncContext *s,

270

DCTELEM *block, int n, int qscale)

271

{

272

x86_reg nCoeffs;

273

const uint16_t *quant_matrix;

274

275

assert(s->block_last_index[n]>=0);

276

277

nCoeffs= s->intra_scantable.raster_end[ s->block_last_index[n] ]+1;

278

279

quant_matrix = s->inter_matrix;

280

__asm__ volatile(

281

"pcmpeqw %%mm7, %%mm7 \n\t"

282

"psrlw $15, %%mm7 \n\t"

283

"movd %2, %%mm6 \n\t"

284

"packssdw %%mm6, %%mm6 \n\t"

285

"packssdw %%mm6, %%mm6 \n\t"

286

"mov %3, %%"REG_a" \n\t"

287

ASMALIGN(4)

288

"1: \n\t"

289

"movq (%0, %%"REG_a"), %%mm0 \n\t"

290

"movq 8(%0, %%"REG_a"), %%mm1 \n\t"

291

"movq (%1, %%"REG_a"), %%mm4 \n\t"

292

"movq 8(%1, %%"REG_a"), %%mm5 \n\t"

293

"pmullw %%mm6, %%mm4 \n\t" // q=qscale*quant_matrix[i]

294

"pmullw %%mm6, %%mm5 \n\t" // q=qscale*quant_matrix[i]

295

"pxor %%mm2, %%mm2 \n\t"

296

"pxor %%mm3, %%mm3 \n\t"

297

"pcmpgtw %%mm0, %%mm2 \n\t" // block[i] < 0 ? -1 : 0

298

"pcmpgtw %%mm1, %%mm3 \n\t" // block[i] < 0 ? -1 : 0

299

"pxor %%mm2, %%mm0 \n\t"

300

"pxor %%mm3, %%mm1 \n\t"

301

"psubw %%mm2, %%mm0 \n\t" // abs(block[i])

302

"psubw %%mm3, %%mm1 \n\t" // abs(block[i])

303

"paddw %%mm0, %%mm0 \n\t" // abs(block[i])*2

304

"paddw %%mm1, %%mm1 \n\t" // abs(block[i])*2

305

"paddw %%mm7, %%mm0 \n\t" // abs(block[i])*2 + 1

306

"paddw %%mm7, %%mm1 \n\t" // abs(block[i])*2 + 1

307

"pmullw %%mm4, %%mm0 \n\t" // (abs(block[i])*2 + 1)*q

308

"pmullw %%mm5, %%mm1 \n\t" // (abs(block[i])*2 + 1)*q

309

"pxor %%mm4, %%mm4 \n\t"

310

"pxor %%mm5, %%mm5 \n\t" // FIXME slow

311

"pcmpeqw (%0, %%"REG_a"), %%mm4 \n\t" // block[i] == 0 ? -1 : 0

312

"pcmpeqw 8(%0, %%"REG_a"), %%mm5\n\t" // block[i] == 0 ? -1 : 0

313

"psraw $4, %%mm0 \n\t"

314

"psraw $4, %%mm1 \n\t"

315

"psubw %%mm7, %%mm0 \n\t"

316

"psubw %%mm7, %%mm1 \n\t"

317

"por %%mm7, %%mm0 \n\t"

318

"por %%mm7, %%mm1 \n\t"

319

"pxor %%mm2, %%mm0 \n\t"

320

"pxor %%mm3, %%mm1 \n\t"

321

"psubw %%mm2, %%mm0 \n\t"

322

"psubw %%mm3, %%mm1 \n\t"

323

"pandn %%mm0, %%mm4 \n\t"

324

"pandn %%mm1, %%mm5 \n\t"

325

"movq %%mm4, (%0, %%"REG_a") \n\t"

326

"movq %%mm5, 8(%0, %%"REG_a") \n\t"

327

328

"add $16, %%"REG_a" \n\t"

329

"js 1b \n\t"

330

::"r" (block+nCoeffs), "r"(quant_matrix+nCoeffs), "rm" (qscale), "g" (-2*nCoeffs)

331

: "%"REG_a, "memory"

332

);

333

}

334

335

static void dct_unquantize_mpeg2_intra_mmx(MpegEncContext *s,

336

DCTELEM *block, int n, int qscale)

337

{

338

x86_reg nCoeffs;

339

const uint16_t *quant_matrix;

340

int block0;

341

342

assert(s->block_last_index[n]>=0);

343

344

if(s->alternate_scan) nCoeffs= 63; //FIXME

345

else nCoeffs= s->intra_scantable.raster_end[ s->block_last_index[n] ];

346

347

if (n < 4)

348

block0 = block[0] * s->y_dc_scale;

349

else

350

block0 = block[0] * s->c_dc_scale;

351

quant_matrix = s->intra_matrix;

352

__asm__ volatile(

353

"pcmpeqw %%mm7, %%mm7 \n\t"

354

"psrlw $15, %%mm7 \n\t"

355

"movd %2, %%mm6 \n\t"

356

"packssdw %%mm6, %%mm6 \n\t"

357

"packssdw %%mm6, %%mm6 \n\t"

358

"mov %3, %%"REG_a" \n\t"

359

ASMALIGN(4)

360

"1: \n\t"

361

"movq (%0, %%"REG_a"), %%mm0 \n\t"

362

"movq 8(%0, %%"REG_a"), %%mm1 \n\t"

363

"movq (%1, %%"REG_a"), %%mm4 \n\t"

364

"movq 8(%1, %%"REG_a"), %%mm5 \n\t"

365

"pmullw %%mm6, %%mm4 \n\t" // q=qscale*quant_matrix[i]

366

"pmullw %%mm6, %%mm5 \n\t" // q=qscale*quant_matrix[i]

367

"pxor %%mm2, %%mm2 \n\t"

368

"pxor %%mm3, %%mm3 \n\t"

369

"pcmpgtw %%mm0, %%mm2 \n\t" // block[i] < 0 ? -1 : 0

370

"pcmpgtw %%mm1, %%mm3 \n\t" // block[i] < 0 ? -1 : 0

371

"pxor %%mm2, %%mm0 \n\t"

372

"pxor %%mm3, %%mm1 \n\t"

373

"psubw %%mm2, %%mm0 \n\t" // abs(block[i])

374

"psubw %%mm3, %%mm1 \n\t" // abs(block[i])

375

"pmullw %%mm4, %%mm0 \n\t" // abs(block[i])*q

376

"pmullw %%mm5, %%mm1 \n\t" // abs(block[i])*q

377

"pxor %%mm4, %%mm4 \n\t"

378

"pxor %%mm5, %%mm5 \n\t" // FIXME slow

379

"pcmpeqw (%0, %%"REG_a"), %%mm4 \n\t" // block[i] == 0 ? -1 : 0

380

"pcmpeqw 8(%0, %%"REG_a"), %%mm5\n\t" // block[i] == 0 ? -1 : 0

381

"psraw $3, %%mm0 \n\t"

382

"psraw $3, %%mm1 \n\t"

383

"pxor %%mm2, %%mm0 \n\t"

384

"pxor %%mm3, %%mm1 \n\t"

385

"psubw %%mm2, %%mm0 \n\t"

386

"psubw %%mm3, %%mm1 \n\t"

387

"pandn %%mm0, %%mm4 \n\t"

388

"pandn %%mm1, %%mm5 \n\t"

389

"movq %%mm4, (%0, %%"REG_a") \n\t"

390

"movq %%mm5, 8(%0, %%"REG_a") \n\t"

391

392

"add $16, %%"REG_a" \n\t"

393

"jng 1b \n\t"

394

::"r" (block+nCoeffs), "r"(quant_matrix+nCoeffs), "rm" (qscale), "g" (-2*nCoeffs)

395

: "%"REG_a, "memory"

396

);

397

block[0]= block0;

398

//Note, we do not do mismatch control for intra as errors cannot accumulate

399

}

400

401

static void dct_unquantize_mpeg2_inter_mmx(MpegEncContext *s,

402

DCTELEM *block, int n, int qscale)

403

{

404

x86_reg nCoeffs;

405

const uint16_t *quant_matrix;

406

407

assert(s->block_last_index[n]>=0);

408

409

if(s->alternate_scan) nCoeffs= 63; //FIXME

410

else nCoeffs= s->intra_scantable.raster_end[ s->block_last_index[n] ];

411

412

quant_matrix = s->inter_matrix;

413

__asm__ volatile(

414

"pcmpeqw %%mm7, %%mm7 \n\t"

415

"psrlq $48, %%mm7 \n\t"

416

"movd %2, %%mm6 \n\t"

417

"packssdw %%mm6, %%mm6 \n\t"

418

"packssdw %%mm6, %%mm6 \n\t"

419

"mov %3, %%"REG_a" \n\t"

420

ASMALIGN(4)

421

"1: \n\t"

422

"movq (%0, %%"REG_a"), %%mm0 \n\t"

423

"movq 8(%0, %%"REG_a"), %%mm1 \n\t"

424

"movq (%1, %%"REG_a"), %%mm4 \n\t"

425

"movq 8(%1, %%"REG_a"), %%mm5 \n\t"

426

"pmullw %%mm6, %%mm4 \n\t" // q=qscale*quant_matrix[i]

427

"pmullw %%mm6, %%mm5 \n\t" // q=qscale*quant_matrix[i]

428

"pxor %%mm2, %%mm2 \n\t"

429

"pxor %%mm3, %%mm3 \n\t"

430

"pcmpgtw %%mm0, %%mm2 \n\t" // block[i] < 0 ? -1 : 0

431

"pcmpgtw %%mm1, %%mm3 \n\t" // block[i] < 0 ? -1 : 0

432

"pxor %%mm2, %%mm0 \n\t"

433

"pxor %%mm3, %%mm1 \n\t"

434

"psubw %%mm2, %%mm0 \n\t" // abs(block[i])

435

"psubw %%mm3, %%mm1 \n\t" // abs(block[i])

436

"paddw %%mm0, %%mm0 \n\t" // abs(block[i])*2

437

"paddw %%mm1, %%mm1 \n\t" // abs(block[i])*2

438

"pmullw %%mm4, %%mm0 \n\t" // abs(block[i])*2*q

439

"pmullw %%mm5, %%mm1 \n\t" // abs(block[i])*2*q

440

"paddw %%mm4, %%mm0 \n\t" // (abs(block[i])*2 + 1)*q

441

"paddw %%mm5, %%mm1 \n\t" // (abs(block[i])*2 + 1)*q

442

"pxor %%mm4, %%mm4 \n\t"

443

"pxor %%mm5, %%mm5 \n\t" // FIXME slow

444

"pcmpeqw (%0, %%"REG_a"), %%mm4 \n\t" // block[i] == 0 ? -1 : 0

445

"pcmpeqw 8(%0, %%"REG_a"), %%mm5\n\t" // block[i] == 0 ? -1 : 0

446

"psrlw $4, %%mm0 \n\t"

447

"psrlw $4, %%mm1 \n\t"

448

"pxor %%mm2, %%mm0 \n\t"

449

"pxor %%mm3, %%mm1 \n\t"

450

"psubw %%mm2, %%mm0 \n\t"

451

"psubw %%mm3, %%mm1 \n\t"

452

"pandn %%mm0, %%mm4 \n\t"

453

"pandn %%mm1, %%mm5 \n\t"

454

"pxor %%mm4, %%mm7 \n\t"

455

"pxor %%mm5, %%mm7 \n\t"

456

"movq %%mm4, (%0, %%"REG_a") \n\t"

457

"movq %%mm5, 8(%0, %%"REG_a") \n\t"

458

459

"add $16, %%"REG_a" \n\t"

460

"jng 1b \n\t"

461

"movd 124(%0, %3), %%mm0 \n\t"

462

"movq %%mm7, %%mm6 \n\t"

463

"psrlq $32, %%mm7 \n\t"

464

"pxor %%mm6, %%mm7 \n\t"

465

"movq %%mm7, %%mm6 \n\t"

466

"psrlq $16, %%mm7 \n\t"

467

"pxor %%mm6, %%mm7 \n\t"

468

"pslld $31, %%mm7 \n\t"

469

"psrlq $15, %%mm7 \n\t"

470

"pxor %%mm7, %%mm0 \n\t"

471

"movd %%mm0, 124(%0, %3) \n\t"

472

473

::"r" (block+nCoeffs), "r"(quant_matrix+nCoeffs), "rm" (qscale), "r" (-2*nCoeffs)

474

: "%"REG_a, "memory"

475

);

476

}

477

478

static void denoise_dct_mmx(MpegEncContext *s, DCTELEM *block){

479

const int intra= s->mb_intra;

480

int *sum= s->dct_error_sum[intra];

481

uint16_t *offset= s->dct_offset[intra];

482

483

s->dct_count[intra]++;

484

485

__asm__ volatile(

486

"pxor %%mm7, %%mm7 \n\t"

487

"1: \n\t"

488

"pxor %%mm0, %%mm0 \n\t"

489

"pxor %%mm1, %%mm1 \n\t"

490

"movq (%0), %%mm2 \n\t"

491

"movq 8(%0), %%mm3 \n\t"

492

"pcmpgtw %%mm2, %%mm0 \n\t"

493

"pcmpgtw %%mm3, %%mm1 \n\t"

494

"pxor %%mm0, %%mm2 \n\t"

495

"pxor %%mm1, %%mm3 \n\t"

496

"psubw %%mm0, %%mm2 \n\t"

497

"psubw %%mm1, %%mm3 \n\t"

498

"movq %%mm2, %%mm4 \n\t"

499

"movq %%mm3, %%mm5 \n\t"

500

"psubusw (%2), %%mm2 \n\t"

501

"psubusw 8(%2), %%mm3 \n\t"

502

"pxor %%mm0, %%mm2 \n\t"

503

"pxor %%mm1, %%mm3 \n\t"

504

"psubw %%mm0, %%mm2 \n\t"

505

"psubw %%mm1, %%mm3 \n\t"

506

"movq %%mm2, (%0) \n\t"

507

"movq %%mm3, 8(%0) \n\t"

508

"movq %%mm4, %%mm2 \n\t"

509

"movq %%mm5, %%mm3 \n\t"

510

"punpcklwd %%mm7, %%mm4 \n\t"

511

"punpckhwd %%mm7, %%mm2 \n\t"

512

"punpcklwd %%mm7, %%mm5 \n\t"

513

"punpckhwd %%mm7, %%mm3 \n\t"

514

"paddd (%1), %%mm4 \n\t"

515

"paddd 8(%1), %%mm2 \n\t"

516

"paddd 16(%1), %%mm5 \n\t"

517

"paddd 24(%1), %%mm3 \n\t"

518

"movq %%mm4, (%1) \n\t"

519

"movq %%mm2, 8(%1) \n\t"

520

"movq %%mm5, 16(%1) \n\t"

521

"movq %%mm3, 24(%1) \n\t"

522

"add $16, %0 \n\t"

523

"add $32, %1 \n\t"

524

"add $16, %2 \n\t"

525

"cmp %3, %0 \n\t"

526

" jb 1b \n\t"

527

: "+r" (block), "+r" (sum), "+r" (offset)

528

: "r"(block+64)

529

);

530

}

531

532

static void denoise_dct_sse2(MpegEncContext *s, DCTELEM *block){

533

const int intra= s->mb_intra;

534

int *sum= s->dct_error_sum[intra];

535

uint16_t *offset= s->dct_offset[intra];

536

537

s->dct_count[intra]++;

538

539

__asm__ volatile(

540

"pxor %%xmm7, %%xmm7 \n\t"

541

"1: \n\t"

542

"pxor %%xmm0, %%xmm0 \n\t"

543

"pxor %%xmm1, %%xmm1 \n\t"

544

"movdqa (%0), %%xmm2 \n\t"

545

"movdqa 16(%0), %%xmm3 \n\t"

546

"pcmpgtw %%xmm2, %%xmm0 \n\t"

547

"pcmpgtw %%xmm3, %%xmm1 \n\t"

548

"pxor %%xmm0, %%xmm2 \n\t"

549

"pxor %%xmm1, %%xmm3 \n\t"

550

"psubw %%xmm0, %%xmm2 \n\t"

551

"psubw %%xmm1, %%xmm3 \n\t"

552

"movdqa %%xmm2, %%xmm4 \n\t"

553

"movdqa %%xmm3, %%xmm5 \n\t"

554

"psubusw (%2), %%xmm2 \n\t"

555

"psubusw 16(%2), %%xmm3 \n\t"

556

"pxor %%xmm0, %%xmm2 \n\t"

557

"pxor %%xmm1, %%xmm3 \n\t"

558

"psubw %%xmm0, %%xmm2 \n\t"

559

"psubw %%xmm1, %%xmm3 \n\t"

560

"movdqa %%xmm2, (%0) \n\t"

561

"movdqa %%xmm3, 16(%0) \n\t"

562

"movdqa %%xmm4, %%xmm6 \n\t"

563

"movdqa %%xmm5, %%xmm0 \n\t"

564

"punpcklwd %%xmm7, %%xmm4 \n\t"

565

"punpckhwd %%xmm7, %%xmm6 \n\t"

566

"punpcklwd %%xmm7, %%xmm5 \n\t"

567

"punpckhwd %%xmm7, %%xmm0 \n\t"

568

"paddd (%1), %%xmm4 \n\t"

569

"paddd 16(%1), %%xmm6 \n\t"

570

"paddd 32(%1), %%xmm5 \n\t"

571

"paddd 48(%1), %%xmm0 \n\t"

572

"movdqa %%xmm4, (%1) \n\t"

573

"movdqa %%xmm6, 16(%1) \n\t"

574

"movdqa %%xmm5, 32(%1) \n\t"

575

"movdqa %%xmm0, 48(%1) \n\t"

576

"add $32, %0 \n\t"

577

"add $64, %1 \n\t"

578

"add $32, %2 \n\t"

579

"cmp %3, %0 \n\t"

580

" jb 1b \n\t"

581

: "+r" (block), "+r" (sum), "+r" (offset)

582

: "r"(block+64)

583

);

584

}

585

586

#ifdef HAVE_SSSE3

587

#define HAVE_SSSE3_BAK

588

#endif

589

#undef HAVE_SSSE3

590

591

#undef HAVE_SSE2

592

#undef HAVE_MMX2

593

#define RENAME(a) a ## _MMX

594

#define RENAMEl(a) a ## _mmx

595

#include "mpegvideo_mmx_template.c"

596

597

#define HAVE_MMX2

598

#undef RENAME

599

#undef RENAMEl

600

#define RENAME(a) a ## _MMX2

601

#define RENAMEl(a) a ## _mmx2

602

#include "mpegvideo_mmx_template.c"

603

604

#define HAVE_SSE2

605

#undef RENAME

606

#undef RENAMEl

607

#define RENAME(a) a ## _SSE2

608

#define RENAMEl(a) a ## _sse2

609

#include "mpegvideo_mmx_template.c"

610

611

#ifdef HAVE_SSSE3_BAK

612

#define HAVE_SSSE3

613

#undef RENAME

614

#undef RENAMEl

615

#define RENAME(a) a ## _SSSE3

616

#define RENAMEl(a) a ## _sse2

617

#include "mpegvideo_mmx_template.c"

618

#endif

619

620

void MPV_common_init_mmx(MpegEncContext *s)

621

{

622

if (mm_flags & FF_MM_MMX) {

623

const int dct_algo = s->avctx->dct_algo;

624

625

s->dct_unquantize_h263_intra = dct_unquantize_h263_intra_mmx;

626

s->dct_unquantize_h263_inter = dct_unquantize_h263_inter_mmx;

627

s->dct_unquantize_mpeg1_intra = dct_unquantize_mpeg1_intra_mmx;

628

s->dct_unquantize_mpeg1_inter = dct_unquantize_mpeg1_inter_mmx;

629

if(!(s->flags & CODEC_FLAG_BITEXACT))

630

s->dct_unquantize_mpeg2_intra = dct_unquantize_mpeg2_intra_mmx;

631

s->dct_unquantize_mpeg2_inter = dct_unquantize_mpeg2_inter_mmx;

632

633

if (mm_flags & FF_MM_SSE2) {

634

s->denoise_dct= denoise_dct_sse2;

635

} else {

636

s->denoise_dct= denoise_dct_mmx;

637

}

638

639

if(dct_algo==FF_DCT_AUTO || dct_algo==FF_DCT_MMX){

640

#ifdef HAVE_SSSE3

641

if(mm_flags & FF_MM_SSSE3){

642

s->dct_quantize= dct_quantize_SSSE3;

643

} else

644

#endif

645

if(mm_flags & FF_MM_SSE2){

646

s->dct_quantize= dct_quantize_SSE2;

647

} else if(mm_flags & FF_MM_MMXEXT){

648

s->dct_quantize= dct_quantize_MMX2;

649

} else {

650

s->dct_quantize= dct_quantize_MMX;

651

}

652

}

653

}

654

}

Older »