~ubuntu-branches/ubuntu/karmic/ffmpeg-extra/karmic-security

« back to all changes in this revision

Viewing changes to libpostproc/postprocess_altivec_template.c

Committer: Bazaar Package Importer
Author(s): Reinhard Tartler
Date: 2009-08-15 18:18:23 UTC
Revision ID: james.westby@ubuntu.com-20090815181823-ws50ln2m1nfli317

Tags: upstream-0.5+svn20090706

Import upstream version 0.5+svn20090706

files added:

.svnrevision

COPYING.GPLv2

COPYING.GPLv3

COPYING.LGPLv2.1

COPYING.LGPLv3

CREDITS

Changelog

Doxyfile

INSTALL

LICENSE

MAINTAINERS

Makefile

README

RELEASE

VERSION

cmdutils.c

cmdutils.h

common.mak

configure

doc/APIchanges

doc/TODO

doc/avutil.txt

doc/faq.texi

doc/ffmpeg-doc.texi

doc/ffmpeg_powerpc_performance_evaluation_howto.txt

doc/ffplay-doc.texi

doc/ffserver-doc.texi

doc/ffserver.conf

doc/general.texi

doc/hooks.texi

doc/issue_tracker.txt

doc/optimization.txt

doc/snow.txt

doc/soc.txt

doc/swscale.txt

doc/texi2pod.pl

ffmpeg.c

ffplay.c

ffpresets

ffpresets/libx264-baseline.ffpreset

ffpresets/libx264-default.ffpreset

ffpresets/libx264-fastfirstpass.ffpreset

ffpresets/libx264-hq.ffpreset

ffpresets/libx264-ipod320.ffpreset

ffpresets/libx264-ipod640.ffpreset

ffpresets/libx264-lossless_fast.ffpreset

ffpresets/libx264-lossless_max.ffpreset

ffpresets/libx264-lossless_medium.ffpreset

ffpresets/libx264-lossless_slow.ffpreset

ffpresets/libx264-lossless_slower.ffpreset

ffpresets/libx264-lossless_ultrafast.ffpreset

ffpresets/libx264-main.ffpreset

ffpresets/libx264-max.ffpreset

ffpresets/libx264-normal.ffpreset

ffpresets/libx264-slowfirstpass.ffpreset

ffserver.c

ffserver.h

libavcodec

libavcodec/4xm.c

libavcodec/8bps.c

libavcodec/8svx.c

libavcodec/Makefile

libavcodec/aac.c

libavcodec/aac.h

libavcodec/aac_ac3_parser.c

libavcodec/aac_ac3_parser.h

libavcodec/aac_parser.c

libavcodec/aac_parser.h

libavcodec/aacdectab.h

libavcodec/aacenc.c

libavcodec/aacpsy.c

libavcodec/aacpsy.h

libavcodec/aactab.c

libavcodec/aactab.h

libavcodec/aandcttab.c

libavcodec/aandcttab.h

libavcodec/aasc.c

libavcodec/ac3.c

libavcodec/ac3.h

libavcodec/ac3_parser.c

libavcodec/ac3_parser.h

libavcodec/ac3dec.c

libavcodec/ac3dec.h

libavcodec/ac3dec_data.c

libavcodec/ac3dec_data.h

libavcodec/ac3enc.c

libavcodec/ac3tab.c

libavcodec/ac3tab.h

libavcodec/acelp_filters.c

libavcodec/acelp_filters.h

libavcodec/acelp_pitch_delay.c

libavcodec/acelp_pitch_delay.h

libavcodec/acelp_vectors.c

libavcodec/acelp_vectors.h

libavcodec/adpcm.c

libavcodec/adx.h

libavcodec/adxdec.c

libavcodec/adxenc.c

libavcodec/alac.c

libavcodec/alacenc.c

libavcodec/allcodecs.c

libavcodec/alpha

libavcodec/alpha/asm.h

libavcodec/alpha/dsputil_alpha.c

libavcodec/alpha/dsputil_alpha_asm.S

libavcodec/alpha/motion_est_alpha.c

libavcodec/alpha/motion_est_mvi_asm.S

libavcodec/alpha/mpegvideo_alpha.c

libavcodec/alpha/regdef.h

libavcodec/alpha/simple_idct_alpha.c

libavcodec/apedec.c

libavcodec/apiexample.c

libavcodec/arm

libavcodec/arm/asm.S

libavcodec/arm/dsputil_arm.c

libavcodec/arm/dsputil_arm_s.S

libavcodec/arm/dsputil_iwmmxt.c

libavcodec/arm/dsputil_iwmmxt_rnd_template.c

libavcodec/arm/dsputil_neon.c

libavcodec/arm/dsputil_neon_s.S

libavcodec/arm/dsputil_vfp.S

libavcodec/arm/float_arm_vfp.c

libavcodec/arm/h264dsp_neon.S

libavcodec/arm/h264idct_neon.S

libavcodec/arm/jrevdct_arm.S

libavcodec/arm/mathops.h

libavcodec/arm/mpegvideo_arm.c

libavcodec/arm/mpegvideo_armv5te.c

libavcodec/arm/mpegvideo_armv5te_s.S

libavcodec/arm/mpegvideo_iwmmxt.c

libavcodec/arm/simple_idct_arm.S

libavcodec/arm/simple_idct_armv5te.S

libavcodec/arm/simple_idct_armv6.S

libavcodec/arm/simple_idct_neon.S

libavcodec/asv1.c

libavcodec/atrac3.c

libavcodec/atrac3data.h

libavcodec/audioconvert.c

libavcodec/audioconvert.h

libavcodec/avcodec.h

libavcodec/avs.c

libavcodec/beosthread.c

libavcodec/bethsoftvideo.c

libavcodec/bethsoftvideo.h

libavcodec/bfi.c

libavcodec/bfin

libavcodec/bfin/config_bfin.h

libavcodec/bfin/dsputil_bfin.c

libavcodec/bfin/dsputil_bfin.h

libavcodec/bfin/fdct_bfin.S

libavcodec/bfin/idct_bfin.S

libavcodec/bfin/mathops.h

libavcodec/bfin/mpegvideo_bfin.c

libavcodec/bfin/pixels_bfin.S

libavcodec/bfin/vp3_bfin.c

libavcodec/bfin/vp3_idct_bfin.S

libavcodec/bitstream.c

libavcodec/bitstream.h

libavcodec/bitstream_filter.c

libavcodec/bmp.c

libavcodec/bmp.h

libavcodec/bmpenc.c

libavcodec/bytestream.h

libavcodec/c93.c

libavcodec/cabac.c

libavcodec/cabac.h

libavcodec/cavs.c

libavcodec/cavs.h

libavcodec/cavs_parser.c

libavcodec/cavsdata.h

libavcodec/cavsdec.c

libavcodec/cavsdsp.c

libavcodec/celp_filters.c

libavcodec/celp_filters.h

libavcodec/celp_math.c

libavcodec/celp_math.h

libavcodec/cinepak.c

libavcodec/cljr.c

libavcodec/colorspace.h

libavcodec/cook.c

libavcodec/cookdata.h

libavcodec/cscd.c

libavcodec/cyuv.c

libavcodec/dca.c

libavcodec/dca.h

libavcodec/dca_parser.c

libavcodec/dcadata.h

libavcodec/dcahuff.h

libavcodec/dct-test.c

libavcodec/dctref.c

libavcodec/dirac_parser.c

libavcodec/dnxhd_parser.c

libavcodec/dnxhddata.c

libavcodec/dnxhddata.h

libavcodec/dnxhddec.c

libavcodec/dnxhdenc.c

libavcodec/dnxhdenc.h

libavcodec/dpcm.c

libavcodec/dsicinav.c

libavcodec/dsputil.c

libavcodec/dsputil.h

libavcodec/dump_extradata_bsf.c

libavcodec/dv.c

libavcodec/dvbsub.c

libavcodec/dvbsub_parser.c

libavcodec/dvbsubdec.c

libavcodec/dvdata.h

libavcodec/dvdsub_parser.c

libavcodec/dvdsubdec.c

libavcodec/dvdsubenc.c

libavcodec/dxa.c

libavcodec/eac3dec.c

libavcodec/eacmv.c

libavcodec/eaidct.c

libavcodec/eatgq.c

libavcodec/eatgv.c

libavcodec/eatqi.c

libavcodec/elbg.c

libavcodec/elbg.h

libavcodec/error_resilience.c

libavcodec/escape124.c

libavcodec/eval.c

libavcodec/eval.h

libavcodec/faandct.c

libavcodec/faandct.h

libavcodec/faanidct.c

libavcodec/faanidct.h

libavcodec/faxcompr.c

libavcodec/faxcompr.h

libavcodec/fft-test.c

libavcodec/fft.c

libavcodec/ffv1.c

libavcodec/flac.h

libavcodec/flacdec.c

libavcodec/flacenc.c

libavcodec/flashsv.c

libavcodec/flashsvenc.c

libavcodec/flicvideo.c

libavcodec/fraps.c

libavcodec/g726.c

libavcodec/g729.h

libavcodec/g729data.h

libavcodec/g729dec.c

libavcodec/gif.c

libavcodec/gifdec.c

libavcodec/golomb.c

libavcodec/golomb.h

libavcodec/h261.c

libavcodec/h261.h

libavcodec/h261_parser.c

libavcodec/h261data.h

libavcodec/h261dec.c

libavcodec/h261enc.c

libavcodec/h263.c

libavcodec/h263.h

libavcodec/h263_parser.c

libavcodec/h263_parser.h

libavcodec/h263data.h

libavcodec/h263dec.c

libavcodec/h264.c

libavcodec/h264.h

libavcodec/h264_mp4toannexb_bsf.c

libavcodec/h264_parser.c

libavcodec/h264_parser.h

libavcodec/h264data.h

libavcodec/h264dspenc.c

libavcodec/h264enc.c

libavcodec/h264idct.c

libavcodec/h264pred.c

libavcodec/h264pred.h

libavcodec/huffman.c

libavcodec/huffman.h

libavcodec/huffyuv.c

libavcodec/idcinvideo.c

libavcodec/iirfilter.c

libavcodec/iirfilter.h

libavcodec/imc.c

libavcodec/imcdata.h

libavcodec/imgconvert.c

libavcodec/imgconvert.h

libavcodec/imgconvert_template.c

libavcodec/imgresample.c

libavcodec/imx_dump_header_bsf.c

libavcodec/indeo2.c

libavcodec/indeo2data.h

libavcodec/indeo3.c

libavcodec/indeo3data.h

libavcodec/internal.h

libavcodec/interplayvideo.c

libavcodec/intrax8.c

libavcodec/intrax8.h

libavcodec/intrax8dsp.c

libavcodec/intrax8huf.h

libavcodec/jfdctfst.c

libavcodec/jfdctint.c

libavcodec/jpegls.c

libavcodec/jpegls.h

libavcodec/jpeglsdec.c

libavcodec/jpeglsdec.h

libavcodec/jpeglsenc.c

libavcodec/jrevdct.c

libavcodec/kmvc.c

libavcodec/lcl.h

libavcodec/lcldec.c

libavcodec/lclenc.c

libavcodec/libamr.c

libavcodec/libdirac.h

libavcodec/libdirac_libschro.c

libavcodec/libdirac_libschro.h

libavcodec/libdiracdec.c

libavcodec/libdiracenc.c

libavcodec/libfaac.c

libavcodec/libfaad.c

libavcodec/libgsm.c

libavcodec/libmp3lame.c

libavcodec/libopencore-amr.c

libavcodec/libopenjpeg.c

libavcodec/libschroedinger.c

libavcodec/libschroedinger.h

libavcodec/libschroedingerdec.c

libavcodec/libschroedingerenc.c

libavcodec/libspeexdec.c

libavcodec/libtheoraenc.c

libavcodec/libvorbis.c

libavcodec/libx264.c

libavcodec/libxvid_internal.h

libavcodec/libxvid_rc.c

libavcodec/libxvidff.c

libavcodec/ljpegenc.c

libavcodec/loco.c

libavcodec/lpc.c

libavcodec/lpc.h

libavcodec/lsp.c

libavcodec/lsp.h

libavcodec/lzw.c

libavcodec/lzw.h

libavcodec/lzwenc.c

libavcodec/mace.c

libavcodec/mathops.h

libavcodec/mdct.c

libavcodec/mdec.c

libavcodec/mimic.c

libavcodec/mjpeg.c

libavcodec/mjpeg.h

libavcodec/mjpeg_parser.c

libavcodec/mjpega_dump_header_bsf.c

libavcodec/mjpegbdec.c

libavcodec/mjpegdec.c

libavcodec/mjpegdec.h

libavcodec/mjpegenc.c

libavcodec/mjpegenc.h

libavcodec/mlib

libavcodec/mlib/dsputil_mlib.c

libavcodec/mlp.c

libavcodec/mlp.h

libavcodec/mlp_parser.c

libavcodec/mlp_parser.h

libavcodec/mlpdec.c

libavcodec/mmvideo.c

libavcodec/motion-test.c

libavcodec/motion_est.c

libavcodec/motion_est_template.c

libavcodec/motionpixels.c

libavcodec/movsub_bsf.c

libavcodec/mp3_header_compress_bsf.c

libavcodec/mp3_header_decompress_bsf.c

libavcodec/mpc.c

libavcodec/mpc.h

libavcodec/mpc7.c

libavcodec/mpc7data.h

libavcodec/mpc8.c

libavcodec/mpc8data.h

libavcodec/mpc8huff.h

libavcodec/mpcdata.h

libavcodec/mpeg12.c

libavcodec/mpeg12.h

libavcodec/mpeg12data.c

libavcodec/mpeg12data.h

libavcodec/mpeg12decdata.h

libavcodec/mpeg12enc.c

libavcodec/mpeg4audio.c

libavcodec/mpeg4audio.h

libavcodec/mpeg4data.h

libavcodec/mpeg4video_parser.c

libavcodec/mpeg4video_parser.h

libavcodec/mpegaudio.c

libavcodec/mpegaudio.h

libavcodec/mpegaudio_parser.c

libavcodec/mpegaudiodata.c

libavcodec/mpegaudiodata.h

libavcodec/mpegaudiodec.c

libavcodec/mpegaudiodecheader.c

libavcodec/mpegaudiodecheader.h

libavcodec/mpegaudiodectab.h

libavcodec/mpegaudioenc.c

libavcodec/mpegaudiotab.h

libavcodec/mpegvideo.c

libavcodec/mpegvideo.h

libavcodec/mpegvideo_common.h

libavcodec/mpegvideo_enc.c

libavcodec/mpegvideo_parser.c

libavcodec/mpegvideo_xvmc.c

libavcodec/msmpeg4.c

libavcodec/msmpeg4.h

libavcodec/msmpeg4data.c

libavcodec/msmpeg4data.h

libavcodec/msrle.c

libavcodec/msrledec.c

libavcodec/msrledec.h

libavcodec/msvideo1.c

libavcodec/nellymoser.c

libavcodec/nellymoser.h

libavcodec/nellymoserdec.c

libavcodec/nellymoserenc.c

libavcodec/noise_bsf.c

libavcodec/nuv.c

libavcodec/opt.c

libavcodec/opt.h

libavcodec/options.c

libavcodec/os2thread.c

libavcodec/parser.c

libavcodec/parser.h

libavcodec/pcm.c

libavcodec/pcx.c

libavcodec/pixdesc.c

libavcodec/pixdesc.h

libavcodec/png.c

libavcodec/png.h

libavcodec/pngdec.c

libavcodec/pngenc.c

libavcodec/pnm.c

libavcodec/pnm.h

libavcodec/pnm_parser.c

libavcodec/pnmenc.c

libavcodec/ppc

libavcodec/ppc/check_altivec.c

libavcodec/ppc/dsputil_altivec.c

libavcodec/ppc/dsputil_altivec.h

libavcodec/ppc/dsputil_ppc.c

libavcodec/ppc/dsputil_ppc.h

libavcodec/ppc/fdct_altivec.c

libavcodec/ppc/fft_altivec.c

libavcodec/ppc/float_altivec.c

libavcodec/ppc/gcc_fixes.h

libavcodec/ppc/gmc_altivec.c

libavcodec/ppc/h264_altivec.c

libavcodec/ppc/h264_template_altivec.c

libavcodec/ppc/idct_altivec.c

libavcodec/ppc/imgresample_altivec.c

libavcodec/ppc/imgresample_altivec.h

libavcodec/ppc/int_altivec.c

libavcodec/ppc/mathops.h

libavcodec/ppc/mpegvideo_altivec.c

libavcodec/ppc/snow_altivec.c

libavcodec/ppc/types_altivec.h

libavcodec/ppc/util_altivec.h

libavcodec/ppc/vc1dsp_altivec.c

libavcodec/ps2

libavcodec/ps2/dsputil_mmi.c

libavcodec/ps2/idct_mmi.c

libavcodec/ps2/mmi.h

libavcodec/ps2/mpegvideo_mmi.c

libavcodec/pthread.c

libavcodec/ptx.c

libavcodec/qcelp_lsp.c

libavcodec/qcelpdata.h

libavcodec/qcelpdec.c

libavcodec/qdm2.c

libavcodec/qdm2data.h

libavcodec/qdrw.c

libavcodec/qpeg.c

libavcodec/qtrle.c

libavcodec/qtrleenc.c

libavcodec/ra144.c

libavcodec/ra144.h

libavcodec/ra288.c

libavcodec/ra288.h

libavcodec/rangecoder.c

libavcodec/rangecoder.h

libavcodec/ratecontrol.c

libavcodec/ratecontrol.h

libavcodec/raw.c

libavcodec/raw.h

libavcodec/rawdec.c

libavcodec/rawenc.c

libavcodec/rdft.c

libavcodec/rectangle.h

libavcodec/remove_extradata_bsf.c

libavcodec/resample.c

libavcodec/resample2.c

libavcodec/rl.h

libavcodec/rl2.c

libavcodec/rle.c

libavcodec/rle.h

libavcodec/roqaudioenc.c

libavcodec/roqvideo.c

libavcodec/roqvideo.h

libavcodec/roqvideodec.c

libavcodec/roqvideoenc.c

libavcodec/rpza.c

libavcodec/rtjpeg.c

libavcodec/rtjpeg.h

libavcodec/rv10.c

libavcodec/rv30.c

libavcodec/rv30data.h

libavcodec/rv30dsp.c

libavcodec/rv34.c

libavcodec/rv34.h

libavcodec/rv34data.h

libavcodec/rv34vlc.h

libavcodec/rv40.c

libavcodec/rv40data.h

libavcodec/rv40dsp.c

libavcodec/rv40vlc2.h

libavcodec/s3tc.c

libavcodec/s3tc.h

libavcodec/sgi.h

libavcodec/sgidec.c

libavcodec/sgienc.c

libavcodec/sh4

libavcodec/sh4/dsputil_align.c

libavcodec/sh4/dsputil_sh4.c

libavcodec/sh4/idct_sh4.c

libavcodec/sh4/qpel.c

libavcodec/sh4/sh4.h

libavcodec/shorten.c

libavcodec/simple_idct.c

libavcodec/simple_idct.h

libavcodec/smacker.c

libavcodec/smc.c

libavcodec/snow.c

libavcodec/snow.h

libavcodec/sonic.c

libavcodec/sp5x.h

libavcodec/sp5xdec.c

libavcodec/sparc

libavcodec/sparc/dsputil_vis.c

libavcodec/sparc/simple_idct_vis.c

libavcodec/sparc/vis.h

libavcodec/sunrast.c

libavcodec/svq1.c

libavcodec/svq1.h

libavcodec/svq1_cb.h

libavcodec/svq1_vlc.h

libavcodec/svq1dec.c

libavcodec/svq1enc.c

libavcodec/svq1enc_cb.h

libavcodec/svq3.c

libavcodec/targa.c

libavcodec/targaenc.c

libavcodec/tiertexseqv.c

libavcodec/tiff.c

libavcodec/tiff.h

libavcodec/tiffenc.c

libavcodec/truemotion1.c

libavcodec/truemotion1data.h

libavcodec/truemotion2.c

libavcodec/truespeech.c

libavcodec/truespeech_data.h

libavcodec/tscc.c

libavcodec/tta.c

libavcodec/txd.c

libavcodec/ulti.c

libavcodec/ulti_cb.h

libavcodec/unary.h

libavcodec/utils.c

libavcodec/vb.c

libavcodec/vc1.c

libavcodec/vc1.h

libavcodec/vc1_parser.c

libavcodec/vc1acdata.h

libavcodec/vc1data.c

libavcodec/vc1data.h

libavcodec/vc1dsp.c

libavcodec/vcr1.c

libavcodec/vdpau.c

libavcodec/vdpau.h

libavcodec/vdpau_internal.h

libavcodec/vmdav.c

libavcodec/vmnc.c

libavcodec/vorbis.c

libavcodec/vorbis.h

libavcodec/vorbis_data.c

libavcodec/vorbis_dec.c

libavcodec/vorbis_enc.c

libavcodec/vorbis_enc_data.h

libavcodec/vp3.c

libavcodec/vp3_parser.c

libavcodec/vp3data.h

libavcodec/vp3dsp.c

libavcodec/vp5.c

libavcodec/vp56.c

libavcodec/vp56.h

libavcodec/vp56data.c

libavcodec/vp56data.h

libavcodec/vp5data.h

libavcodec/vp6.c

libavcodec/vp6data.h

libavcodec/vp6dsp.c

libavcodec/vqavideo.c

libavcodec/w32thread.c

libavcodec/wavpack.c

libavcodec/wma.c

libavcodec/wma.h

libavcodec/wmadata.h

libavcodec/wmadec.c

libavcodec/wmaenc.c

libavcodec/wmv2.c

libavcodec/wmv2.h

libavcodec/wmv2dec.c

libavcodec/wmv2enc.c

libavcodec/wnv1.c

libavcodec/ws-snd1.c

libavcodec/x86

libavcodec/x86/cavsdsp_mmx.c

libavcodec/x86/cpuid.c

libavcodec/x86/dnxhd_mmx.c

libavcodec/x86/dsputil_h264_template_mmx.c

libavcodec/x86/dsputil_h264_template_ssse3.c

libavcodec/x86/dsputil_mmx.c

libavcodec/x86/dsputil_mmx.h

libavcodec/x86/dsputil_mmx_avg_template.c

libavcodec/x86/dsputil_mmx_qns_template.c

libavcodec/x86/dsputil_mmx_rnd_template.c

libavcodec/x86/dsputil_yasm.asm

libavcodec/x86/dsputilenc_mmx.c

libavcodec/x86/fdct_mmx.c

libavcodec/x86/fft_3dn.c

libavcodec/x86/fft_3dn2.c

libavcodec/x86/fft_mmx.asm

libavcodec/x86/fft_sse.c

libavcodec/x86/flacdsp_mmx.c

libavcodec/x86/h264_deblock_sse2.asm

libavcodec/x86/h264_i386.h

libavcodec/x86/h264_idct_sse2.asm

libavcodec/x86/h264dsp_mmx.c

libavcodec/x86/idct_mmx.c

libavcodec/x86/idct_mmx_xvid.c

libavcodec/x86/idct_sse2_xvid.c

libavcodec/x86/idct_xvid.h

libavcodec/x86/mathops.h

libavcodec/x86/mmx.h

libavcodec/x86/motion_est_mmx.c

libavcodec/x86/mpegvideo_mmx.c

libavcodec/x86/mpegvideo_mmx_template.c

libavcodec/x86/rv40dsp_mmx.c

libavcodec/x86/simple_idct_mmx.c

libavcodec/x86/snowdsp_mmx.c

libavcodec/x86/vc1dsp_mmx.c

libavcodec/x86/vp3dsp_mmx.c

libavcodec/x86/vp3dsp_mmx.h

libavcodec/x86/vp3dsp_sse2.c

libavcodec/x86/vp3dsp_sse2.h

libavcodec/x86/vp6dsp_mmx.c

libavcodec/x86/vp6dsp_mmx.h

libavcodec/x86/vp6dsp_sse2.c

libavcodec/x86/vp6dsp_sse2.h

libavcodec/x86/x86inc.asm

libavcodec/x86/x86util.asm

libavcodec/xan.c

libavcodec/xiph.c

libavcodec/xiph.h

libavcodec/xl.c

libavcodec/xsubdec.c

libavcodec/xvmc.h

libavcodec/xvmc_internal.h

libavcodec/zmbv.c

libavcodec/zmbvenc.c

libavdevice

libavdevice/Makefile

libavdevice/alldevices.c

libavdevice/alsa-audio-common.c

libavdevice/alsa-audio-dec.c

libavdevice/alsa-audio-enc.c

libavdevice/alsa-audio.h

libavdevice/avdevice.h

libavdevice/beosaudio.cpp

libavdevice/bktr.c

libavdevice/dv1394.c

libavdevice/dv1394.h

libavdevice/libdc1394.c

libavdevice/oss_audio.c

libavdevice/v4l.c

libavdevice/v4l2.c

libavdevice/vfwcap.c

libavdevice/x11grab.c

libavfilter

libavfilter/Makefile

libavfilter/allfilters.c

libavfilter/avfilter.c

libavfilter/avfilter.h

libavfilter/avfiltergraph.c

libavfilter/avfiltergraph.h

libavfilter/defaults.c

libavfilter/formats.c

libavfilter/graphparser.c

libavfilter/graphparser.h

libavformat

libavformat/4xm.c

libavformat/Makefile

libavformat/adtsenc.c

libavformat/aiff.c

libavformat/allformats.c

libavformat/amr.c

libavformat/apc.c

libavformat/ape.c

libavformat/asf.c

libavformat/asf.h

libavformat/asfcrypt.c

libavformat/asfcrypt.h

libavformat/asfdec.c

libavformat/asfenc.c

libavformat/assdec.c

libavformat/assenc.c

libavformat/au.c

libavformat/audiointerleave.c

libavformat/audiointerleave.h

libavformat/avc.c

libavformat/avc.h

libavformat/avformat.h

libavformat/avi.h

libavformat/avidec.c

libavformat/avienc.c

libavformat/avio.c

libavformat/avio.h

libavformat/aviobuf.c

libavformat/avisynth.c

libavformat/avs.c

libavformat/bethsoftvid.c

libavformat/bfi.c

libavformat/c93.c

libavformat/crcenc.c

libavformat/cutils.c

libavformat/daud.c

libavformat/dsicin.c

libavformat/dv.c

libavformat/dv.h

libavformat/dvenc.c

libavformat/dxa.c

libavformat/eacdata.c

libavformat/electronicarts.c

libavformat/ffm.h

libavformat/ffmdec.c

libavformat/ffmenc.c

libavformat/file.c

libavformat/flacdec.c

libavformat/flacenc.c

libavformat/flacenc.h

libavformat/flic.c

libavformat/flv.h

libavformat/flvdec.c

libavformat/flvenc.c

libavformat/framecrcenc.c

libavformat/framehook.c

libavformat/framehook.h

libavformat/gif.c

libavformat/gopher.c

libavformat/gxf.c

libavformat/gxf.h

libavformat/gxfenc.c

libavformat/http.c

libavformat/id3v2.c

libavformat/id3v2.h

libavformat/idcin.c

libavformat/idroq.c

libavformat/iff.c

libavformat/img2.c

libavformat/internal.h

libavformat/ipmovie.c

libavformat/isom.c

libavformat/isom.h

libavformat/iss.c

libavformat/libnut.c

libavformat/lmlm4.c

libavformat/matroska.c

libavformat/matroska.h

libavformat/matroskadec.c

libavformat/matroskaenc.c

libavformat/metadata.c

libavformat/metadata.h

libavformat/metadata_compat.c

libavformat/mm.c

libavformat/mmf.c

libavformat/mov.c

libavformat/movenc.c

libavformat/mp3.c

libavformat/mpc.c

libavformat/mpc8.c

libavformat/mpeg.c

libavformat/mpeg.h

libavformat/mpegenc.c

libavformat/mpegts.c

libavformat/mpegts.h

libavformat/mpegtsenc.c

libavformat/mpjpeg.c

libavformat/msnwc_tcp.c

libavformat/mtv.c

libavformat/mvi.c

libavformat/mxf.c

libavformat/mxf.h

libavformat/mxfdec.c

libavformat/mxfenc.c

libavformat/ncdec.c

libavformat/network.h

libavformat/nsvdec.c

libavformat/nut.c

libavformat/nut.h

libavformat/nutdec.c

libavformat/nutenc.c

libavformat/nuv.c

libavformat/oggdec.c

libavformat/oggdec.h

libavformat/oggenc.c

libavformat/oggparseflac.c

libavformat/oggparseogm.c

libavformat/oggparsespeex.c

libavformat/oggparsetheora.c

libavformat/oggparsevorbis.c

libavformat/oma.c

libavformat/options.c

libavformat/os_support.c

libavformat/os_support.h

libavformat/psxstr.c

libavformat/pva.c

libavformat/qtpalette.h

libavformat/r3d.c

libavformat/raw.c

libavformat/raw.h

libavformat/rdt.c

libavformat/rdt.h

libavformat/riff.c

libavformat/riff.h

libavformat/rl2.c

libavformat/rm.c

libavformat/rm.h

libavformat/rmdec.c

libavformat/rmenc.c

libavformat/rpl.c

libavformat/rtp.c

libavformat/rtp.h

libavformat/rtp_aac.c

libavformat/rtp_h264.c

libavformat/rtp_h264.h

libavformat/rtp_mpv.c

libavformat/rtpdec.c

libavformat/rtpdec.h

libavformat/rtpenc.c

libavformat/rtpenc.h

libavformat/rtpenc_h264.c

libavformat/rtpproto.c

libavformat/rtsp.c

libavformat/rtsp.h

libavformat/rtspcodes.h

libavformat/sdp.c

libavformat/segafilm.c

libavformat/sierravmd.c

libavformat/siff.c

libavformat/smacker.c

libavformat/sol.c

libavformat/swf.h

libavformat/swfdec.c

libavformat/swfenc.c

libavformat/tcp.c

libavformat/thp.c

libavformat/tiertexseq.c

libavformat/tta.c

libavformat/txd.c

libavformat/udp.c

libavformat/utils.c

libavformat/vc1test.c

libavformat/vc1testenc.c

libavformat/voc.c

libavformat/voc.h

libavformat/vocdec.c

libavformat/vocenc.c

libavformat/wav.c

libavformat/wc3movie.c

libavformat/westwood.c

libavformat/wv.c

libavformat/xa.c

libavformat/yuv4mpeg.c

libavutil

libavutil/Makefile

libavutil/adler32.c

libavutil/adler32.h

libavutil/aes.c

libavutil/aes.h

libavutil/arm

libavutil/arm/bswap.h

libavutil/avstring.c

libavutil/avstring.h

libavutil/avutil.h

libavutil/base64.c

libavutil/base64.h

libavutil/bfin

libavutil/bfin/bswap.h

libavutil/bswap.h

libavutil/common.h

libavutil/crc.c

libavutil/crc.h

libavutil/crc_data.h

libavutil/des.c

libavutil/des.h

libavutil/fifo.c

libavutil/fifo.h

libavutil/integer.c

libavutil/integer.h

libavutil/internal.h

libavutil/intfloat_readwrite.c

libavutil/intfloat_readwrite.h

libavutil/intreadwrite.h

libavutil/lfg.c

libavutil/lfg.h

libavutil/lls.c

libavutil/lls.h

libavutil/log.c

libavutil/log.h

libavutil/lzo.c

libavutil/lzo.h

libavutil/mathematics.c

libavutil/mathematics.h

libavutil/md5.c

libavutil/md5.h

libavutil/mem.c

libavutil/mem.h

libavutil/pca.c

libavutil/pca.h

libavutil/pixfmt.h

libavutil/random.c

libavutil/random.h

libavutil/rational.c

libavutil/rational.h

libavutil/rc4.c

libavutil/rc4.h

libavutil/sh4

libavutil/sh4/bswap.h

libavutil/sha1.c

libavutil/sha1.h

libavutil/softfloat.c

libavutil/softfloat.h

libavutil/timer.h

libavutil/tree.c

libavutil/tree.h

libavutil/utils.c

libavutil/x86

libavutil/x86/bswap.h

libavutil/x86_cpu.h

libpostproc

libpostproc/Makefile

libpostproc/postprocess.c

libpostproc/postprocess.h

libpostproc/postprocess_altivec_template.c

libpostproc/postprocess_internal.h

libpostproc/postprocess_template.c

libswscale

libswscale/Makefile

libswscale/cs_test.c

libswscale/internal_bfin.S

libswscale/rgb2rgb.c

libswscale/rgb2rgb.h

libswscale/rgb2rgb_template.c

libswscale/swscale-example.c

libswscale/swscale.c

libswscale/swscale.h

libswscale/swscale_altivec_template.c

libswscale/swscale_avoption.c

libswscale/swscale_bfin.c

libswscale/swscale_internal.h

libswscale/swscale_template.c

libswscale/yuv2rgb.c

libswscale/yuv2rgb_altivec.c

libswscale/yuv2rgb_bfin.c

libswscale/yuv2rgb_mlib.c

libswscale/yuv2rgb_template.c

libswscale/yuv2rgb_vis.c

output_example.c

subdir.mak

tests

tests/audiogen.c

tests/ffmpeg.regression.ref

tests/ffserver.regression.ref

tests/lena.pnm

tests/libav.regression.ref

tests/regression.sh

tests/rotozoom.c

tests/rotozoom.regression.ref

tests/seek.regression.ref

tests/seek_test.c

tests/seek_test.sh

tests/server-regression.sh

tests/test.conf

tests/tiny_psnr.c

tests/videogen.c

tools

tools/build_avopt

tools/clean-diff

tools/cws2fws.c

tools/patcheck

tools/pktdumper.c

tools/qt-faststart.c

tools/trasher.c

tools/unwrap-diff

version.sh

vhook

vhook/drawtext.c

vhook/fish.c

vhook/imlib2.c

vhook/null.c

vhook/ppm.c

vhook/watermark.c

Show diffs side-by-side

added added

removed removed

libpostproc/postprocess_altivec_template.c

* This file is part of FFmpeg.

* FFmpeg is free software; you can redistribute it and/or modify

* it under the terms of the GNU General Public License as published by

* the Free Software Foundation; either version 2 of the License, or

* (at your option) any later version.

* FFmpeg is distributed in the hope that it will be useful,

* but WITHOUT ANY WARRANTY; without even the implied warranty of

* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the

* GNU General Public License for more details.

* You should have received a copy of the GNU General Public License

* along with FFmpeg; if not, write to the Free Software

* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA

#include "libavutil/avutil.h"

#define ALTIVEC_TRANSPOSE_8x8_SHORT(src_a,src_b,src_c,src_d,src_e,src_f,src_g,src_h) \

do { \

__typeof__(src_a) tempA1, tempB1, tempC1, tempD1; \

__typeof__(src_a) tempE1, tempF1, tempG1, tempH1; \

__typeof__(src_a) tempA2, tempB2, tempC2, tempD2; \

__typeof__(src_a) tempE2, tempF2, tempG2, tempH2; \

tempA1 = vec_mergeh (src_a, src_e); \

tempB1 = vec_mergel (src_a, src_e); \

tempC1 = vec_mergeh (src_b, src_f); \

tempD1 = vec_mergel (src_b, src_f); \

tempE1 = vec_mergeh (src_c, src_g); \

tempF1 = vec_mergel (src_c, src_g); \

tempG1 = vec_mergeh (src_d, src_h); \

tempH1 = vec_mergel (src_d, src_h); \

tempA2 = vec_mergeh (tempA1, tempE1); \

tempB2 = vec_mergel (tempA1, tempE1); \

tempC2 = vec_mergeh (tempB1, tempF1); \

tempD2 = vec_mergel (tempB1, tempF1); \

tempE2 = vec_mergeh (tempC1, tempG1); \

tempF2 = vec_mergel (tempC1, tempG1); \

tempG2 = vec_mergeh (tempD1, tempH1); \

tempH2 = vec_mergel (tempD1, tempH1); \

src_a = vec_mergeh (tempA2, tempE2); \

src_b = vec_mergel (tempA2, tempE2); \

src_c = vec_mergeh (tempB2, tempF2); \

src_d = vec_mergel (tempB2, tempF2); \

src_e = vec_mergeh (tempC2, tempG2); \

src_f = vec_mergel (tempC2, tempG2); \

src_g = vec_mergeh (tempD2, tempH2); \

src_h = vec_mergel (tempD2, tempH2); \

} while (0)

static inline int vertClassify_altivec(uint8_t src[], int stride, PPContext *c) {

this code makes no assumption on src or stride.

One could remove the recomputation of the perm

vector by assuming (stride % 16) == 0, unfortunately

this is not always true.

DECLARE_ALIGNED(16, short, data[8]) =

{

((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1,

data[0] * 2 + 1,

c->QP * 2,

c->QP * 4

};

int numEq;

uint8_t *src2 = src;

vector signed short v_dcOffset;

vector signed short v2QP;

vector unsigned short v4QP;

vector unsigned short v_dcThreshold;

const int properStride = (stride % 16);

const int srcAlign = ((unsigned long)src2 % 16);

const int two_vectors = ((srcAlign > 8) || properStride) ? 1 : 0;

const vector signed int zero = vec_splat_s32(0);

const vector signed short mask = vec_splat_s16(1);

vector signed int v_numEq = vec_splat_s32(0);

vector signed short v_data = vec_ld(0, data);

vector signed short v_srcAss0, v_srcAss1, v_srcAss2, v_srcAss3,

v_srcAss4, v_srcAss5, v_srcAss6, v_srcAss7;

//FIXME avoid this mess if possible

j1 = stride,

j2 = 2 * stride,

j3 = 3 * stride,

j4 = 4 * stride,

j5 = 5 * stride,

j6 = 6 * stride,

j7 = 7 * stride;

vector unsigned char v_srcA0, v_srcA1, v_srcA2, v_srcA3,

v_srcA4, v_srcA5, v_srcA6, v_srcA7;

v_dcOffset = vec_splat(v_data, 0);

100

v_dcThreshold = (vector unsigned short)vec_splat(v_data, 1);

101

v2QP = vec_splat(v_data, 2);

102

v4QP = (vector unsigned short)vec_splat(v_data, 3);

103

104

src2 += stride * 4;

105

106

#define LOAD_LINE(i) \

107

{ \

108

vector unsigned char perm##i = vec_lvsl(j##i, src2); \

109

vector unsigned char v_srcA2##i; \

110

vector unsigned char v_srcA1##i = vec_ld(j##i, src2); \

111

if (two_vectors) \

112

v_srcA2##i = vec_ld(j##i + 16, src2); \

113

v_srcA##i = \

114

vec_perm(v_srcA1##i, v_srcA2##i, perm##i); \

115

v_srcAss##i = \

116

(vector signed short)vec_mergeh((vector signed char)zero, \

117

(vector signed char)v_srcA##i); }

118

119

#define LOAD_LINE_ALIGNED(i) \

120

v_srcA##i = vec_ld(j##i, src2); \

121

v_srcAss##i = \

122

(vector signed short)vec_mergeh((vector signed char)zero, \

123

(vector signed char)v_srcA##i)

124

125

/* Special-casing the aligned case is worthwhile, as all calls from

126

* the (transposed) horizontable deblocks will be aligned, in addition

127

* to the naturally aligned vertical deblocks. */

128

if (properStride && srcAlign) {

129

LOAD_LINE_ALIGNED(0);

130

LOAD_LINE_ALIGNED(1);

131

LOAD_LINE_ALIGNED(2);

132

LOAD_LINE_ALIGNED(3);

133

LOAD_LINE_ALIGNED(4);

134

LOAD_LINE_ALIGNED(5);

135

LOAD_LINE_ALIGNED(6);

136

LOAD_LINE_ALIGNED(7);

137

} else {

138

LOAD_LINE(0);

139

LOAD_LINE(1);

140

LOAD_LINE(2);

141

LOAD_LINE(3);

142

LOAD_LINE(4);

143

LOAD_LINE(5);

144

LOAD_LINE(6);

145

LOAD_LINE(7);

146

}

147

#undef LOAD_LINE

148

#undef LOAD_LINE_ALIGNED

149

150

#define ITER(i, j) \

151

const vector signed short v_diff##i = \

152

vec_sub(v_srcAss##i, v_srcAss##j); \

153

const vector signed short v_sum##i = \

154

vec_add(v_diff##i, v_dcOffset); \

155

const vector signed short v_comp##i = \

156

(vector signed short)vec_cmplt((vector unsigned short)v_sum##i, \

157

v_dcThreshold); \

158

const vector signed short v_part##i = vec_and(mask, v_comp##i);

159

160

{

161

ITER(0, 1)

162

ITER(1, 2)

163

ITER(2, 3)

164

ITER(3, 4)

165

ITER(4, 5)

166

ITER(5, 6)

167

ITER(6, 7)

168

169

v_numEq = vec_sum4s(v_part0, v_numEq);

170

v_numEq = vec_sum4s(v_part1, v_numEq);

171

v_numEq = vec_sum4s(v_part2, v_numEq);

172

v_numEq = vec_sum4s(v_part3, v_numEq);

173

v_numEq = vec_sum4s(v_part4, v_numEq);

174

v_numEq = vec_sum4s(v_part5, v_numEq);

175

v_numEq = vec_sum4s(v_part6, v_numEq);

176

}

177

178

#undef ITER

179

180

v_numEq = vec_sums(v_numEq, zero);

181

182

v_numEq = vec_splat(v_numEq, 3);

183

vec_ste(v_numEq, 0, &numEq);

184

185

if (numEq > c->ppMode.flatnessThreshold){

186

const vector unsigned char mmoP1 = (const vector unsigned char)

187

{0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f,

188

0x00, 0x01, 0x12, 0x13, 0x08, 0x09, 0x1A, 0x1B};

189

const vector unsigned char mmoP2 = (const vector unsigned char)

190

{0x04, 0x05, 0x16, 0x17, 0x0C, 0x0D, 0x1E, 0x1F,

191

0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f};

192

const vector unsigned char mmoP = (const vector unsigned char)

193

vec_lvsl(8, (unsigned char*)0);

194

195

vector signed short mmoL1 = vec_perm(v_srcAss0, v_srcAss2, mmoP1);

196

vector signed short mmoL2 = vec_perm(v_srcAss4, v_srcAss6, mmoP2);

197

vector signed short mmoL = vec_perm(mmoL1, mmoL2, mmoP);

198

vector signed short mmoR1 = vec_perm(v_srcAss5, v_srcAss7, mmoP1);

199

vector signed short mmoR2 = vec_perm(v_srcAss1, v_srcAss3, mmoP2);

200

vector signed short mmoR = vec_perm(mmoR1, mmoR2, mmoP);

201

vector signed short mmoDiff = vec_sub(mmoL, mmoR);

202

vector unsigned short mmoSum = (vector unsigned short)vec_add(mmoDiff, v2QP);

203

204

if (vec_any_gt(mmoSum, v4QP))

205

return 0;

206

else

207

return 1;

208

}

209

else return 2;

210

}

211

212

static inline void doVertLowPass_altivec(uint8_t *src, int stride, PPContext *c) {

213

214

this code makes no assumption on src or stride.

215

One could remove the recomputation of the perm

216

vector by assuming (stride % 16) == 0, unfortunately

217

this is not always true. Quite a lot of load/stores

218

can be removed by assuming proper alignment of

219

src & stride :-(

220

221

uint8_t *src2 = src;

222

const vector signed int zero = vec_splat_s32(0);

223

const int properStride = (stride % 16);

224

const int srcAlign = ((unsigned long)src2 % 16);

225

DECLARE_ALIGNED(16, short, qp[8]) = {c->QP};

226

vector signed short vqp = vec_ld(0, qp);

227

vector signed short vb0, vb1, vb2, vb3, vb4, vb5, vb6, vb7, vb8, vb9;

228

vector unsigned char vbA0, vbA1, vbA2, vbA3, vbA4, vbA5, vbA6, vbA7, vbA8, vbA9;

229

vector unsigned char vbB0, vbB1, vbB2, vbB3, vbB4, vbB5, vbB6, vbB7, vbB8, vbB9;

230

vector unsigned char vbT0, vbT1, vbT2, vbT3, vbT4, vbT5, vbT6, vbT7, vbT8, vbT9;

231

vector unsigned char perml0, perml1, perml2, perml3, perml4,

232

perml5, perml6, perml7, perml8, perml9;

233

234

j1 = stride,

235

j2 = 2 * stride,

236

j3 = 3 * stride,

237

j4 = 4 * stride,

238

j5 = 5 * stride,

239

j6 = 6 * stride,

240

j7 = 7 * stride,

241

j8 = 8 * stride,

242

j9 = 9 * stride;

243

244

vqp = vec_splat(vqp, 0);

245

246

src2 += stride*3;

247

248

#define LOAD_LINE(i) \

249

perml##i = vec_lvsl(i * stride, src2); \

250

vbA##i = vec_ld(i * stride, src2); \

251

vbB##i = vec_ld(i * stride + 16, src2); \

252

vbT##i = vec_perm(vbA##i, vbB##i, perml##i); \

253

vb##i = \

254

(vector signed short)vec_mergeh((vector unsigned char)zero, \

255

(vector unsigned char)vbT##i)

256

257

#define LOAD_LINE_ALIGNED(i) \

258

vbT##i = vec_ld(j##i, src2); \

259

vb##i = \

260

(vector signed short)vec_mergeh((vector signed char)zero, \

261

(vector signed char)vbT##i)

262

263

/* Special-casing the aligned case is worthwhile, as all calls from

264

* the (transposed) horizontable deblocks will be aligned, in addition

265

* to the naturally aligned vertical deblocks. */

266

if (properStride && srcAlign) {

267

LOAD_LINE_ALIGNED(0);

268

LOAD_LINE_ALIGNED(1);

269

LOAD_LINE_ALIGNED(2);

270

LOAD_LINE_ALIGNED(3);

271

LOAD_LINE_ALIGNED(4);

272

LOAD_LINE_ALIGNED(5);

273

LOAD_LINE_ALIGNED(6);

274

LOAD_LINE_ALIGNED(7);

275

LOAD_LINE_ALIGNED(8);

276

LOAD_LINE_ALIGNED(9);

277

} else {

278

LOAD_LINE(0);

279

LOAD_LINE(1);

280

LOAD_LINE(2);

281

LOAD_LINE(3);

282

LOAD_LINE(4);

283

LOAD_LINE(5);

284

LOAD_LINE(6);

285

LOAD_LINE(7);

286

LOAD_LINE(8);

287

LOAD_LINE(9);

288

}

289

#undef LOAD_LINE

290

#undef LOAD_LINE_ALIGNED

291

{

292

const vector unsigned short v_2 = vec_splat_u16(2);

293

const vector unsigned short v_4 = vec_splat_u16(4);

294

295

const vector signed short v_diff01 = vec_sub(vb0, vb1);

296

const vector unsigned short v_cmp01 =

297

(const vector unsigned short) vec_cmplt(vec_abs(v_diff01), vqp);

298

const vector signed short v_first = vec_sel(vb1, vb0, v_cmp01);

299

const vector signed short v_diff89 = vec_sub(vb8, vb9);

300

const vector unsigned short v_cmp89 =

301

(const vector unsigned short) vec_cmplt(vec_abs(v_diff89), vqp);

302

const vector signed short v_last = vec_sel(vb8, vb9, v_cmp89);

303

304

const vector signed short temp01 = vec_mladd(v_first, (vector signed short)v_4, vb1);

305

const vector signed short temp02 = vec_add(vb2, vb3);

306

const vector signed short temp03 = vec_add(temp01, (vector signed short)v_4);

307

const vector signed short v_sumsB0 = vec_add(temp02, temp03);

308

309

const vector signed short temp11 = vec_sub(v_sumsB0, v_first);

310

const vector signed short v_sumsB1 = vec_add(temp11, vb4);

311

312

const vector signed short temp21 = vec_sub(v_sumsB1, v_first);

313

const vector signed short v_sumsB2 = vec_add(temp21, vb5);

314

315

const vector signed short temp31 = vec_sub(v_sumsB2, v_first);

316

const vector signed short v_sumsB3 = vec_add(temp31, vb6);

317

318

const vector signed short temp41 = vec_sub(v_sumsB3, v_first);

319

const vector signed short v_sumsB4 = vec_add(temp41, vb7);

320

321

const vector signed short temp51 = vec_sub(v_sumsB4, vb1);

322

const vector signed short v_sumsB5 = vec_add(temp51, vb8);

323

324

const vector signed short temp61 = vec_sub(v_sumsB5, vb2);

325

const vector signed short v_sumsB6 = vec_add(temp61, v_last);

326

327

const vector signed short temp71 = vec_sub(v_sumsB6, vb3);

328

const vector signed short v_sumsB7 = vec_add(temp71, v_last);

329

330

const vector signed short temp81 = vec_sub(v_sumsB7, vb4);

331

const vector signed short v_sumsB8 = vec_add(temp81, v_last);

332

333

const vector signed short temp91 = vec_sub(v_sumsB8, vb5);

334

const vector signed short v_sumsB9 = vec_add(temp91, v_last);

335

336

#define COMPUTE_VR(i, j, k) \

337

const vector signed short temps1##i = \

338

vec_add(v_sumsB##i, v_sumsB##k); \

339

const vector signed short temps2##i = \

340

vec_mladd(vb##j, (vector signed short)v_2, temps1##i); \

341

const vector signed short vr##j = vec_sra(temps2##i, v_4)

342

343

COMPUTE_VR(0, 1, 2);

344

COMPUTE_VR(1, 2, 3);

345

COMPUTE_VR(2, 3, 4);

346

COMPUTE_VR(3, 4, 5);

347

COMPUTE_VR(4, 5, 6);

348

COMPUTE_VR(5, 6, 7);

349

COMPUTE_VR(6, 7, 8);

350

COMPUTE_VR(7, 8, 9);

351

352

const vector signed char neg1 = vec_splat_s8(-1);

353

const vector unsigned char permHH = (const vector unsigned char){0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,

354

0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F};

355

356

#define PACK_AND_STORE(i) \

357

{ const vector unsigned char perms##i = \

358

vec_lvsr(i * stride, src2); \

359

const vector unsigned char vf##i = \

360

vec_packsu(vr##i, (vector signed short)zero); \

361

const vector unsigned char vg##i = \

362

vec_perm(vf##i, vbT##i, permHH); \

363

const vector unsigned char mask##i = \

364

vec_perm((vector unsigned char)zero, (vector unsigned char)neg1, perms##i); \

365

const vector unsigned char vg2##i = \

366

vec_perm(vg##i, vg##i, perms##i); \

367

const vector unsigned char svA##i = \

368

vec_sel(vbA##i, vg2##i, mask##i); \

369

const vector unsigned char svB##i = \

370

vec_sel(vg2##i, vbB##i, mask##i); \

371

vec_st(svA##i, i * stride, src2); \

372

vec_st(svB##i, i * stride + 16, src2);}

373

374

#define PACK_AND_STORE_ALIGNED(i) \

375

{ const vector unsigned char vf##i = \

376

vec_packsu(vr##i, (vector signed short)zero); \

377

const vector unsigned char vg##i = \

378

vec_perm(vf##i, vbT##i, permHH); \

379

vec_st(vg##i, i * stride, src2);}

380

381

/* Special-casing the aligned case is worthwhile, as all calls from

382

* the (transposed) horizontable deblocks will be aligned, in addition

383

* to the naturally aligned vertical deblocks. */

384

if (properStride && srcAlign) {

385

PACK_AND_STORE_ALIGNED(1)

386

PACK_AND_STORE_ALIGNED(2)

387

PACK_AND_STORE_ALIGNED(3)

388

PACK_AND_STORE_ALIGNED(4)

389

PACK_AND_STORE_ALIGNED(5)

390

PACK_AND_STORE_ALIGNED(6)

391

PACK_AND_STORE_ALIGNED(7)

392

PACK_AND_STORE_ALIGNED(8)

393

} else {

394

PACK_AND_STORE(1)

395

PACK_AND_STORE(2)

396

PACK_AND_STORE(3)

397

PACK_AND_STORE(4)

398

PACK_AND_STORE(5)

399

PACK_AND_STORE(6)

400

PACK_AND_STORE(7)

401

PACK_AND_STORE(8)

402

}

403

#undef PACK_AND_STORE

404

#undef PACK_AND_STORE_ALIGNED

405

}

406

}

407

408

409

410

static inline void doVertDefFilter_altivec(uint8_t src[], int stride, PPContext *c) {

411

412

this code makes no assumption on src or stride.

413

One could remove the recomputation of the perm

414

vector by assuming (stride % 16) == 0, unfortunately

415

this is not always true. Quite a lot of load/stores

416

can be removed by assuming proper alignment of

417

src & stride :-(

418

419

uint8_t *src2 = src + stride*3;

420

const vector signed int zero = vec_splat_s32(0);

421

DECLARE_ALIGNED(16, short, qp[8]) = {8*c->QP};

422

vector signed short vqp = vec_splat(

423

(vector signed short)vec_ld(0, qp), 0);

424

425

#define LOAD_LINE(i) \

426

const vector unsigned char perm##i = \

427

vec_lvsl(i * stride, src2); \

428

const vector unsigned char vbA##i = \

429

vec_ld(i * stride, src2); \

430

const vector unsigned char vbB##i = \

431

vec_ld(i * stride + 16, src2); \

432

const vector unsigned char vbT##i = \

433

vec_perm(vbA##i, vbB##i, perm##i); \

434

const vector signed short vb##i = \

435

(vector signed short)vec_mergeh((vector unsigned char)zero, \

436

(vector unsigned char)vbT##i)

437

438

LOAD_LINE(1);

439

LOAD_LINE(2);

440

LOAD_LINE(3);

441

LOAD_LINE(4);

442

LOAD_LINE(5);

443

LOAD_LINE(6);

444

LOAD_LINE(7);

445

LOAD_LINE(8);

446

#undef LOAD_LINE

447

448

const vector signed short v_1 = vec_splat_s16(1);

449

const vector signed short v_2 = vec_splat_s16(2);

450

const vector signed short v_5 = vec_splat_s16(5);

451

const vector signed short v_32 = vec_sl(v_1,

452

(vector unsigned short)v_5);

453

/* middle energy */

454

const vector signed short l3minusl6 = vec_sub(vb3, vb6);

455

const vector signed short l5minusl4 = vec_sub(vb5, vb4);

456

const vector signed short twotimes_l3minusl6 = vec_mladd(v_2, l3minusl6, (vector signed short)zero);

457

const vector signed short mE = vec_mladd(v_5, l5minusl4, twotimes_l3minusl6);

458

const vector signed short absmE = vec_abs(mE);

459

/* left & right energy */

460

const vector signed short l1minusl4 = vec_sub(vb1, vb4);

461

const vector signed short l3minusl2 = vec_sub(vb3, vb2);

462

const vector signed short l5minusl8 = vec_sub(vb5, vb8);

463

const vector signed short l7minusl6 = vec_sub(vb7, vb6);

464

const vector signed short twotimes_l1minusl4 = vec_mladd(v_2, l1minusl4, (vector signed short)zero);

465

const vector signed short twotimes_l5minusl8 = vec_mladd(v_2, l5minusl8, (vector signed short)zero);

466

const vector signed short lE = vec_mladd(v_5, l3minusl2, twotimes_l1minusl4);

467

const vector signed short rE = vec_mladd(v_5, l7minusl6, twotimes_l5minusl8);

468

/* d */

469

const vector signed short ddiff = vec_sub(absmE,

470

vec_min(vec_abs(lE),

471

vec_abs(rE)));

472

const vector signed short ddiffclamp = vec_max(ddiff, (vector signed short)zero);

473

const vector signed short dtimes64 = vec_mladd(v_5, ddiffclamp, v_32);

474

const vector signed short d = vec_sra(dtimes64, vec_splat_u16(6));

475

const vector signed short minusd = vec_sub((vector signed short)zero, d);

476

const vector signed short finald = vec_sel(minusd,

477

478

vec_cmpgt(vec_sub((vector signed short)zero, mE),

479

(vector signed short)zero));

480

/* q */

481

const vector signed short qtimes2 = vec_sub(vb4, vb5);

482

/* for a shift right to behave like /2, we need to add one

483

to all negative integer */

484

const vector signed short rounddown = vec_sel((vector signed short)zero,

485

v_1,

486

vec_cmplt(qtimes2, (vector signed short)zero));

487

const vector signed short q = vec_sra(vec_add(qtimes2, rounddown), vec_splat_u16(1));

488

/* clamp */

489

const vector signed short dclamp_P1 = vec_max((vector signed short)zero, finald);

490

const vector signed short dclamp_P = vec_min(dclamp_P1, q);

491

const vector signed short dclamp_N1 = vec_min((vector signed short)zero, finald);

492

const vector signed short dclamp_N = vec_max(dclamp_N1, q);

493

494

const vector signed short dclampedfinal = vec_sel(dclamp_N,

495

dclamp_P,

496

vec_cmpgt(q, (vector signed short)zero));

497

const vector signed short dornotd = vec_sel((vector signed short)zero,

498

dclampedfinal,

499

vec_cmplt(absmE, vqp));

500

/* add/subtract to l4 and l5 */

501

const vector signed short vb4minusd = vec_sub(vb4, dornotd);

502

const vector signed short vb5plusd = vec_add(vb5, dornotd);

503

/* finally, stores */

504

const vector unsigned char st4 = vec_packsu(vb4minusd, (vector signed short)zero);

505

const vector unsigned char st5 = vec_packsu(vb5plusd, (vector signed short)zero);

506

507

const vector signed char neg1 = vec_splat_s8(-1);

508

const vector unsigned char permHH = (const vector unsigned char){0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,

509

0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F};

510

511

#define STORE(i) \

512

{ const vector unsigned char perms##i = \

513

vec_lvsr(i * stride, src2); \

514

const vector unsigned char vg##i = \

515

vec_perm(st##i, vbT##i, permHH); \

516

const vector unsigned char mask##i = \

517

vec_perm((vector unsigned char)zero, (vector unsigned char)neg1, perms##i); \

518

const vector unsigned char vg2##i = \

519

vec_perm(vg##i, vg##i, perms##i); \

520

const vector unsigned char svA##i = \

521

vec_sel(vbA##i, vg2##i, mask##i); \

522

const vector unsigned char svB##i = \

523

vec_sel(vg2##i, vbB##i, mask##i); \

524

vec_st(svA##i, i * stride, src2); \

525

vec_st(svB##i, i * stride + 16, src2);}

526

527

STORE(4)

528

STORE(5)

529

}

530

531

static inline void dering_altivec(uint8_t src[], int stride, PPContext *c) {

532

533

this code makes no assumption on src or stride.

534

One could remove the recomputation of the perm

535

vector by assuming (stride % 16) == 0, unfortunately

536

this is not always true. Quite a lot of load/stores

537

can be removed by assuming proper alignment of

538

src & stride :-(

539

540

uint8_t *srcCopy = src;

541

DECLARE_ALIGNED(16, uint8_t, dt[16]);

542

const vector signed int zero = vec_splat_s32(0);

543

vector unsigned char v_dt;

544

dt[0] = deringThreshold;

545

v_dt = vec_splat(vec_ld(0, dt), 0);

546

547

#define LOAD_LINE(i) \

548

const vector unsigned char perm##i = \

549

vec_lvsl(i * stride, srcCopy); \

550

vector unsigned char sA##i = vec_ld(i * stride, srcCopy); \

551

vector unsigned char sB##i = vec_ld(i * stride + 16, srcCopy); \

552

vector unsigned char src##i = vec_perm(sA##i, sB##i, perm##i)

553

554

LOAD_LINE(0);

555

LOAD_LINE(1);

556

LOAD_LINE(2);

557

LOAD_LINE(3);

558

LOAD_LINE(4);

559

LOAD_LINE(5);

560

LOAD_LINE(6);

561

LOAD_LINE(7);

562

LOAD_LINE(8);

563

LOAD_LINE(9);

564

#undef LOAD_LINE

565

566

vector unsigned char v_avg;

567

{

568

const vector unsigned char trunc_perm = (vector unsigned char)

569

{0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08,

570

0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18};

571

const vector unsigned char trunc_src12 = vec_perm(src1, src2, trunc_perm);

572

const vector unsigned char trunc_src34 = vec_perm(src3, src4, trunc_perm);

573

const vector unsigned char trunc_src56 = vec_perm(src5, src6, trunc_perm);

574

const vector unsigned char trunc_src78 = vec_perm(src7, src8, trunc_perm);

575

576

#define EXTRACT(op) do { \

577

const vector unsigned char s##op##_1 = vec_##op(trunc_src12, trunc_src34); \

578

const vector unsigned char s##op##_2 = vec_##op(trunc_src56, trunc_src78); \

579

const vector unsigned char s##op##_6 = vec_##op(s##op##_1, s##op##_2); \

580

const vector unsigned char s##op##_8h = vec_mergeh(s##op##_6, s##op##_6); \

581

const vector unsigned char s##op##_8l = vec_mergel(s##op##_6, s##op##_6); \

582

const vector unsigned char s##op##_9 = vec_##op(s##op##_8h, s##op##_8l); \

583

const vector unsigned char s##op##_9h = vec_mergeh(s##op##_9, s##op##_9); \

584

const vector unsigned char s##op##_9l = vec_mergel(s##op##_9, s##op##_9); \

585

const vector unsigned char s##op##_10 = vec_##op(s##op##_9h, s##op##_9l); \

586

const vector unsigned char s##op##_10h = vec_mergeh(s##op##_10, s##op##_10); \

587

const vector unsigned char s##op##_10l = vec_mergel(s##op##_10, s##op##_10); \

588

const vector unsigned char s##op##_11 = vec_##op(s##op##_10h, s##op##_10l); \

589

const vector unsigned char s##op##_11h = vec_mergeh(s##op##_11, s##op##_11); \

590

const vector unsigned char s##op##_11l = vec_mergel(s##op##_11, s##op##_11); \

591

v_##op = vec_##op(s##op##_11h, s##op##_11l); } while (0)

592

593

vector unsigned char v_min;

594

vector unsigned char v_max;

595

EXTRACT(min);

596

EXTRACT(max);

597

#undef EXTRACT

598

599

if (vec_all_lt(vec_sub(v_max, v_min), v_dt))

600

return;

601

602

v_avg = vec_avg(v_min, v_max);

603

}

604

605

DECLARE_ALIGNED(16, signed int, S[8]);

606

{

607

const vector unsigned short mask1 = (vector unsigned short)

608

{0x0001, 0x0002, 0x0004, 0x0008,

609

0x0010, 0x0020, 0x0040, 0x0080};

610

const vector unsigned short mask2 = (vector unsigned short)

611

{0x0100, 0x0200, 0x0000, 0x0000,

612

0x0000, 0x0000, 0x0000, 0x0000};

613

614

const vector unsigned int vuint32_16 = vec_sl(vec_splat_u32(1), vec_splat_u32(4));

615

const vector unsigned int vuint32_1 = vec_splat_u32(1);

616

617

#define COMPARE(i) \

618

vector signed int sum##i; \

619

do { \

620

const vector unsigned char cmp##i = \

621

(vector unsigned char)vec_cmpgt(src##i, v_avg); \

622

const vector unsigned short cmpHi##i = \

623

(vector unsigned short)vec_mergeh(cmp##i, cmp##i); \

624

const vector unsigned short cmpLi##i = \

625

(vector unsigned short)vec_mergel(cmp##i, cmp##i); \

626

const vector signed short cmpHf##i = \

627

(vector signed short)vec_and(cmpHi##i, mask1); \

628

const vector signed short cmpLf##i = \

629

(vector signed short)vec_and(cmpLi##i, mask2); \

630

const vector signed int sump##i = vec_sum4s(cmpHf##i, zero); \

631

const vector signed int sumq##i = vec_sum4s(cmpLf##i, sump##i); \

632

sum##i = vec_sums(sumq##i, zero); } while (0)

633

634

COMPARE(0);

635

COMPARE(1);

636

COMPARE(2);

637

COMPARE(3);

638

COMPARE(4);

639

COMPARE(5);

640

COMPARE(6);

641

COMPARE(7);

642

COMPARE(8);

643

COMPARE(9);

644

#undef COMPARE

645

646

vector signed int sumA2;

647

vector signed int sumB2;

648

{

649

const vector signed int sump02 = vec_mergel(sum0, sum2);

650

const vector signed int sump13 = vec_mergel(sum1, sum3);

651

const vector signed int sumA = vec_mergel(sump02, sump13);

652

653

const vector signed int sump46 = vec_mergel(sum4, sum6);

654

const vector signed int sump57 = vec_mergel(sum5, sum7);

655

const vector signed int sumB = vec_mergel(sump46, sump57);

656

657

const vector signed int sump8A = vec_mergel(sum8, zero);

658

const vector signed int sump9B = vec_mergel(sum9, zero);

659

const vector signed int sumC = vec_mergel(sump8A, sump9B);

660

661

const vector signed int tA = vec_sl(vec_nor(zero, sumA), vuint32_16);

662

const vector signed int tB = vec_sl(vec_nor(zero, sumB), vuint32_16);

663

const vector signed int tC = vec_sl(vec_nor(zero, sumC), vuint32_16);

664

const vector signed int t2A = vec_or(sumA, tA);

665

const vector signed int t2B = vec_or(sumB, tB);

666

const vector signed int t2C = vec_or(sumC, tC);

667

const vector signed int t3A = vec_and(vec_sra(t2A, vuint32_1),

668

vec_sl(t2A, vuint32_1));

669

const vector signed int t3B = vec_and(vec_sra(t2B, vuint32_1),

670

vec_sl(t2B, vuint32_1));

671

const vector signed int t3C = vec_and(vec_sra(t2C, vuint32_1),

672

vec_sl(t2C, vuint32_1));

673

const vector signed int yA = vec_and(t2A, t3A);

674

const vector signed int yB = vec_and(t2B, t3B);

675

const vector signed int yC = vec_and(t2C, t3C);

676

677

const vector unsigned char strangeperm1 = vec_lvsl(4, (unsigned char*)0);

678

const vector unsigned char strangeperm2 = vec_lvsl(8, (unsigned char*)0);

679

const vector signed int sumAd4 = vec_perm(yA, yB, strangeperm1);

680

const vector signed int sumAd8 = vec_perm(yA, yB, strangeperm2);

681

const vector signed int sumBd4 = vec_perm(yB, yC, strangeperm1);

682

const vector signed int sumBd8 = vec_perm(yB, yC, strangeperm2);

683

const vector signed int sumAp = vec_and(yA,

684

vec_and(sumAd4,sumAd8));

685

const vector signed int sumBp = vec_and(yB,

686

vec_and(sumBd4,sumBd8));

687

sumA2 = vec_or(sumAp,

688

vec_sra(sumAp,

689

vuint32_16));

690

sumB2 = vec_or(sumBp,

691

vec_sra(sumBp,

692

vuint32_16));

693

}

694

vec_st(sumA2, 0, S);

695

vec_st(sumB2, 16, S);

696

}

697

698

/* I'm not sure the following is actually faster

699

than straight, unvectorized C code :-( */

700

701

DECLARE_ALIGNED(16, int, tQP2[4]);

702

tQP2[0]= c->QP/2 + 1;

703

vector signed int vQP2 = vec_ld(0, tQP2);

704

vQP2 = vec_splat(vQP2, 0);

705

const vector signed int vsint32_8 = vec_splat_s32(8);

706

const vector unsigned int vuint32_4 = vec_splat_u32(4);

707

708

const vector unsigned char permA1 = (vector unsigned char)

709

{0x00, 0x01, 0x02, 0x10, 0x11, 0x12, 0x1F, 0x1F,

710

0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F};

711

const vector unsigned char permA2 = (vector unsigned char)

712

{0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x10, 0x11,

713

0x12, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F};

714

const vector unsigned char permA1inc = (vector unsigned char)

715

{0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x00, 0x00,

716

0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00};

717

const vector unsigned char permA2inc = (vector unsigned char)

718

{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x01,

719

0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00};

720

const vector unsigned char magic = (vector unsigned char)

721

{0x01, 0x02, 0x01, 0x02, 0x04, 0x02, 0x01, 0x02,

722

0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00};

723

const vector unsigned char extractPerm = (vector unsigned char)

724

{0x10, 0x10, 0x10, 0x01, 0x10, 0x10, 0x10, 0x01,

725

0x10, 0x10, 0x10, 0x01, 0x10, 0x10, 0x10, 0x01};

726

const vector unsigned char extractPermInc = (vector unsigned char)

727

{0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01,

728

0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01};

729

const vector unsigned char identity = vec_lvsl(0,(unsigned char *)0);

730

const vector unsigned char tenRight = (vector unsigned char)

731

{0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,

732

0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00};

733

const vector unsigned char eightLeft = (vector unsigned char)

734

{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,

735

0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x08};

736

737

738

#define F_INIT(i) \

739

vector unsigned char tenRightM##i = tenRight; \

740

vector unsigned char permA1M##i = permA1; \

741

vector unsigned char permA2M##i = permA2; \

742

vector unsigned char extractPermM##i = extractPerm

743

744

#define F2(i, j, k, l) \

745

if (S[i] & (1 << (l+1))) { \

746

const vector unsigned char a_##j##_A##l = \

747

vec_perm(src##i, src##j, permA1M##i); \

748

const vector unsigned char a_##j##_B##l = \

749

vec_perm(a_##j##_A##l, src##k, permA2M##i); \

750

const vector signed int a_##j##_sump##l = \

751

(vector signed int)vec_msum(a_##j##_B##l, magic, \

752

(vector unsigned int)zero); \

753

vector signed int F_##j##_##l = \

754

vec_sr(vec_sums(a_##j##_sump##l, vsint32_8), vuint32_4); \

755

F_##j##_##l = vec_splat(F_##j##_##l, 3); \

756

const vector signed int p_##j##_##l = \

757

(vector signed int)vec_perm(src##j, \

758

(vector unsigned char)zero, \

759

extractPermM##i); \

760

const vector signed int sum_##j##_##l = vec_add( p_##j##_##l, vQP2);\

761

const vector signed int diff_##j##_##l = vec_sub( p_##j##_##l, vQP2);\

762

vector signed int newpm_##j##_##l; \

763

if (vec_all_lt(sum_##j##_##l, F_##j##_##l)) \

764

newpm_##j##_##l = sum_##j##_##l; \

765

else if (vec_all_gt(diff_##j##_##l, F_##j##_##l)) \

766

newpm_##j##_##l = diff_##j##_##l; \

767

else newpm_##j##_##l = F_##j##_##l; \

768

const vector unsigned char newpm2_##j##_##l = \

769

vec_splat((vector unsigned char)newpm_##j##_##l, 15); \

770

const vector unsigned char mask##j##l = vec_add(identity, \

771

tenRightM##i); \

772

src##j = vec_perm(src##j, newpm2_##j##_##l, mask##j##l); \

773

} \

774

permA1M##i = vec_add(permA1M##i, permA1inc); \

775

permA2M##i = vec_add(permA2M##i, permA2inc); \

776

tenRightM##i = vec_sro(tenRightM##i, eightLeft); \

777

extractPermM##i = vec_add(extractPermM##i, extractPermInc)

778

779

#define ITER(i, j, k) \

780

F_INIT(i); \

781

F2(i, j, k, 0); \

782

F2(i, j, k, 1); \

783

F2(i, j, k, 2); \

784

F2(i, j, k, 3); \

785

F2(i, j, k, 4); \

786

F2(i, j, k, 5); \

787

F2(i, j, k, 6); \

788

F2(i, j, k, 7)

789

790

ITER(0, 1, 2);

791

ITER(1, 2, 3);

792

ITER(2, 3, 4);

793

ITER(3, 4, 5);

794

ITER(4, 5, 6);

795

ITER(5, 6, 7);

796

ITER(6, 7, 8);

797

ITER(7, 8, 9);

798

799

const vector signed char neg1 = vec_splat_s8(-1);

800

801

#define STORE_LINE(i) \

802

const vector unsigned char permST##i = \

803

vec_lvsr(i * stride, srcCopy); \

804

const vector unsigned char maskST##i = \

805

vec_perm((vector unsigned char)zero, \

806

(vector unsigned char)neg1, permST##i);\

807

src##i = vec_perm(src##i ,src##i, permST##i); \

808

sA##i= vec_sel(sA##i, src##i, maskST##i); \

809

sB##i= vec_sel(src##i, sB##i, maskST##i); \

810

vec_st(sA##i, i * stride, srcCopy); \

811

vec_st(sB##i, i * stride + 16, srcCopy)

812

813

STORE_LINE(1);

814

STORE_LINE(2);

815

STORE_LINE(3);

816

STORE_LINE(4);

817

STORE_LINE(5);

818

STORE_LINE(6);

819

STORE_LINE(7);

820

STORE_LINE(8);

821

822

#undef STORE_LINE

823

#undef ITER

824

#undef F2

825

}

826

827

#define doHorizLowPass_altivec(a...) doHorizLowPass_C(a)

828

#define doHorizDefFilter_altivec(a...) doHorizDefFilter_C(a)

829

#define do_a_deblock_altivec(a...) do_a_deblock_C(a)

830

831

static inline void RENAME(tempNoiseReducer)(uint8_t *src, int stride,

832

uint8_t *tempBlurred, uint32_t *tempBlurredPast, int *maxNoise)

833

{

834

const vector signed int zero = vec_splat_s32(0);

835

const vector signed short vsint16_1 = vec_splat_s16(1);

836

vector signed int v_dp = zero;

837

vector signed int v_sysdp = zero;

838

int d, sysd, i;

839

840

tempBlurredPast[127]= maxNoise[0];

841

tempBlurredPast[128]= maxNoise[1];

842

tempBlurredPast[129]= maxNoise[2];

843

844

#define LOAD_LINE(src, i) \

845

846

vector unsigned char perm##src##i = vec_lvsl(j##src##i, src); \

847

const vector unsigned char v_##src##A1##i = vec_ld(j##src##i, src); \

848

const vector unsigned char v_##src##A2##i = vec_ld(j##src##i + 16, src); \

849

const vector unsigned char v_##src##A##i = \

850

vec_perm(v_##src##A1##i, v_##src##A2##i, perm##src##i); \

851

vector signed short v_##src##Ass##i = \

852

(vector signed short)vec_mergeh((vector signed char)zero, \

853

(vector signed char)v_##src##A##i)

854

855

LOAD_LINE(src, 0);

856

LOAD_LINE(src, 1);

857

LOAD_LINE(src, 2);

858

LOAD_LINE(src, 3);

859

LOAD_LINE(src, 4);

860

LOAD_LINE(src, 5);

861

LOAD_LINE(src, 6);

862

LOAD_LINE(src, 7);

863

864

LOAD_LINE(tempBlurred, 0);

865

LOAD_LINE(tempBlurred, 1);

866

LOAD_LINE(tempBlurred, 2);

867

LOAD_LINE(tempBlurred, 3);

868

LOAD_LINE(tempBlurred, 4);

869

LOAD_LINE(tempBlurred, 5);

870

LOAD_LINE(tempBlurred, 6);

871

LOAD_LINE(tempBlurred, 7);

872

#undef LOAD_LINE

873

874

#define ACCUMULATE_DIFFS(i) \

875

vector signed short v_d##i = vec_sub(v_tempBlurredAss##i, \

876

v_srcAss##i); \

877

v_dp = vec_msums(v_d##i, v_d##i, v_dp); \

878

v_sysdp = vec_msums(v_d##i, vsint16_1, v_sysdp)

879

880

ACCUMULATE_DIFFS(0);

881

ACCUMULATE_DIFFS(1);

882

ACCUMULATE_DIFFS(2);

883

ACCUMULATE_DIFFS(3);

884

ACCUMULATE_DIFFS(4);

885

ACCUMULATE_DIFFS(5);

886

ACCUMULATE_DIFFS(6);

887

ACCUMULATE_DIFFS(7);

888

#undef ACCUMULATE_DIFFS

889

890

v_dp = vec_sums(v_dp, zero);

891

v_sysdp = vec_sums(v_sysdp, zero);

892

893

v_dp = vec_splat(v_dp, 3);

894

v_sysdp = vec_splat(v_sysdp, 3);

895

896

vec_ste(v_dp, 0, &d);

897

vec_ste(v_sysdp, 0, &sysd);

898

899

i = d;

900

d = (4*d

901

+(*(tempBlurredPast-256))

902

+(*(tempBlurredPast-1))+ (*(tempBlurredPast+1))

903

+(*(tempBlurredPast+256))

904

+4)>>3;

905

906

*tempBlurredPast=i;

907

908

if (d > maxNoise[1]) {

909

if (d < maxNoise[2]) {

910

#define OP(i) v_tempBlurredAss##i = vec_avg(v_tempBlurredAss##i, v_srcAss##i);

911

912

OP(0);

913

OP(1);

914

OP(2);

915

OP(3);

916

OP(4);

917

OP(5);

918

OP(6);

919

OP(7);

920

#undef OP

921

} else {

922

#define OP(i) v_tempBlurredAss##i = v_srcAss##i;

923

924

OP(0);

925

OP(1);

926

OP(2);

927

OP(3);

928

OP(4);

929

OP(5);

930

OP(6);

931

OP(7);

932

#undef OP

933

}

934

} else {

935

if (d < maxNoise[0]) {

936

const vector signed short vsint16_7 = vec_splat_s16(7);

937

const vector signed short vsint16_4 = vec_splat_s16(4);

938

const vector unsigned short vuint16_3 = vec_splat_u16(3);

939

940

#define OP(i) \

941

const vector signed short v_temp##i = \

942

vec_mladd(v_tempBlurredAss##i, \

943

vsint16_7, v_srcAss##i); \

944

const vector signed short v_temp2##i = \

945

vec_add(v_temp##i, vsint16_4); \

946

v_tempBlurredAss##i = vec_sr(v_temp2##i, vuint16_3)

947

948

OP(0);

949

OP(1);

950

OP(2);

951

OP(3);

952

OP(4);

953

OP(5);

954

OP(6);

955

OP(7);

956

#undef OP

957

} else {

958

const vector signed short vsint16_3 = vec_splat_s16(3);

959

const vector signed short vsint16_2 = vec_splat_s16(2);

960

961

#define OP(i) \

962

const vector signed short v_temp##i = \

963

vec_mladd(v_tempBlurredAss##i, \

964

vsint16_3, v_srcAss##i); \

965

const vector signed short v_temp2##i = \

966

vec_add(v_temp##i, vsint16_2); \

967

v_tempBlurredAss##i = vec_sr(v_temp2##i, (vector unsigned short)vsint16_2)

968

969

OP(0);

970

OP(1);

971

OP(2);

972

OP(3);

973

OP(4);

974

OP(5);

975

OP(6);

976

OP(7);

977

#undef OP

978

}

979

}

980

981

const vector signed char neg1 = vec_splat_s8(-1);

982

const vector unsigned char permHH = (const vector unsigned char){0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,

983

0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F};

984

985

#define PACK_AND_STORE(src, i) \

986

const vector unsigned char perms##src##i = \

987

vec_lvsr(i * stride, src); \

988

const vector unsigned char vf##src##i = \

989

vec_packsu(v_tempBlurredAss##i, (vector signed short)zero); \

990

const vector unsigned char vg##src##i = \

991

vec_perm(vf##src##i, v_##src##A##i, permHH); \

992

const vector unsigned char mask##src##i = \

993

vec_perm((vector unsigned char)zero, (vector unsigned char)neg1, perms##src##i); \

994

const vector unsigned char vg2##src##i = \

995

vec_perm(vg##src##i, vg##src##i, perms##src##i); \

996

const vector unsigned char svA##src##i = \

997

vec_sel(v_##src##A1##i, vg2##src##i, mask##src##i); \

998

const vector unsigned char svB##src##i = \

999

vec_sel(vg2##src##i, v_##src##A2##i, mask##src##i); \

1000

vec_st(svA##src##i, i * stride, src); \

1001

vec_st(svB##src##i, i * stride + 16, src)

1002

1003

PACK_AND_STORE(src, 0);

1004

PACK_AND_STORE(src, 1);

1005

PACK_AND_STORE(src, 2);

1006

PACK_AND_STORE(src, 3);

1007

PACK_AND_STORE(src, 4);

1008

PACK_AND_STORE(src, 5);

1009

PACK_AND_STORE(src, 6);

1010

PACK_AND_STORE(src, 7);

1011

PACK_AND_STORE(tempBlurred, 0);

1012

PACK_AND_STORE(tempBlurred, 1);

1013

PACK_AND_STORE(tempBlurred, 2);

1014

PACK_AND_STORE(tempBlurred, 3);

1015

PACK_AND_STORE(tempBlurred, 4);

1016

PACK_AND_STORE(tempBlurred, 5);

1017

PACK_AND_STORE(tempBlurred, 6);

1018

PACK_AND_STORE(tempBlurred, 7);

1019

#undef PACK_AND_STORE

1020

}

1021

1022

static inline void transpose_16x8_char_toPackedAlign_altivec(unsigned char* dst, unsigned char* src, int stride) {

1023

const vector unsigned char zero = vec_splat_u8(0);

1024

1025

#define LOAD_DOUBLE_LINE(i, j) \

1026

vector unsigned char perm1##i = vec_lvsl(i * stride, src); \

1027

vector unsigned char perm2##i = vec_lvsl(j * stride, src); \

1028

vector unsigned char srcA##i = vec_ld(i * stride, src); \

1029

vector unsigned char srcB##i = vec_ld(i * stride + 16, src); \

1030

vector unsigned char srcC##i = vec_ld(j * stride, src); \

1031

vector unsigned char srcD##i = vec_ld(j * stride+ 16, src); \

1032

vector unsigned char src##i = vec_perm(srcA##i, srcB##i, perm1##i); \

1033

vector unsigned char src##j = vec_perm(srcC##i, srcD##i, perm2##i)

1034

1035

LOAD_DOUBLE_LINE(0, 1);

1036

LOAD_DOUBLE_LINE(2, 3);

1037

LOAD_DOUBLE_LINE(4, 5);

1038

LOAD_DOUBLE_LINE(6, 7);

1039

#undef LOAD_DOUBLE_LINE

1040

1041

vector unsigned char tempA = vec_mergeh(src0, zero);

1042

vector unsigned char tempB = vec_mergel(src0, zero);

1043

vector unsigned char tempC = vec_mergeh(src1, zero);

1044

vector unsigned char tempD = vec_mergel(src1, zero);

1045

vector unsigned char tempE = vec_mergeh(src2, zero);

1046

vector unsigned char tempF = vec_mergel(src2, zero);

1047

vector unsigned char tempG = vec_mergeh(src3, zero);

1048

vector unsigned char tempH = vec_mergel(src3, zero);

1049

vector unsigned char tempI = vec_mergeh(src4, zero);

1050

vector unsigned char tempJ = vec_mergel(src4, zero);

1051

vector unsigned char tempK = vec_mergeh(src5, zero);

1052

vector unsigned char tempL = vec_mergel(src5, zero);

1053

vector unsigned char tempM = vec_mergeh(src6, zero);

1054

vector unsigned char tempN = vec_mergel(src6, zero);

1055

vector unsigned char tempO = vec_mergeh(src7, zero);

1056

vector unsigned char tempP = vec_mergel(src7, zero);

1057

1058

vector unsigned char temp0 = vec_mergeh(tempA, tempI);

1059

vector unsigned char temp1 = vec_mergel(tempA, tempI);

1060

vector unsigned char temp2 = vec_mergeh(tempB, tempJ);

1061

vector unsigned char temp3 = vec_mergel(tempB, tempJ);

1062

vector unsigned char temp4 = vec_mergeh(tempC, tempK);

1063

vector unsigned char temp5 = vec_mergel(tempC, tempK);

1064

vector unsigned char temp6 = vec_mergeh(tempD, tempL);

1065

vector unsigned char temp7 = vec_mergel(tempD, tempL);

1066

vector unsigned char temp8 = vec_mergeh(tempE, tempM);

1067

vector unsigned char temp9 = vec_mergel(tempE, tempM);

1068

vector unsigned char temp10 = vec_mergeh(tempF, tempN);

1069

vector unsigned char temp11 = vec_mergel(tempF, tempN);

1070

vector unsigned char temp12 = vec_mergeh(tempG, tempO);

1071

vector unsigned char temp13 = vec_mergel(tempG, tempO);

1072

vector unsigned char temp14 = vec_mergeh(tempH, tempP);

1073

vector unsigned char temp15 = vec_mergel(tempH, tempP);

1074

1075

tempA = vec_mergeh(temp0, temp8);

1076

tempB = vec_mergel(temp0, temp8);

1077

tempC = vec_mergeh(temp1, temp9);

1078

tempD = vec_mergel(temp1, temp9);

1079

tempE = vec_mergeh(temp2, temp10);

1080

tempF = vec_mergel(temp2, temp10);

1081

tempG = vec_mergeh(temp3, temp11);

1082

tempH = vec_mergel(temp3, temp11);

1083

tempI = vec_mergeh(temp4, temp12);

1084

tempJ = vec_mergel(temp4, temp12);

1085

tempK = vec_mergeh(temp5, temp13);

1086

tempL = vec_mergel(temp5, temp13);

1087

tempM = vec_mergeh(temp6, temp14);

1088

tempN = vec_mergel(temp6, temp14);

1089

tempO = vec_mergeh(temp7, temp15);

1090

tempP = vec_mergel(temp7, temp15);

1091

1092

temp0 = vec_mergeh(tempA, tempI);

1093

temp1 = vec_mergel(tempA, tempI);

1094

temp2 = vec_mergeh(tempB, tempJ);

1095

temp3 = vec_mergel(tempB, tempJ);

1096

temp4 = vec_mergeh(tempC, tempK);

1097

temp5 = vec_mergel(tempC, tempK);

1098

temp6 = vec_mergeh(tempD, tempL);

1099

temp7 = vec_mergel(tempD, tempL);

1100

temp8 = vec_mergeh(tempE, tempM);

1101

temp9 = vec_mergel(tempE, tempM);

1102

temp10 = vec_mergeh(tempF, tempN);

1103

temp11 = vec_mergel(tempF, tempN);

1104

temp12 = vec_mergeh(tempG, tempO);

1105

temp13 = vec_mergel(tempG, tempO);

1106

temp14 = vec_mergeh(tempH, tempP);

1107

temp15 = vec_mergel(tempH, tempP);

1108

1109

vec_st(temp0, 0, dst);

1110

vec_st(temp1, 16, dst);

1111

vec_st(temp2, 32, dst);

1112

vec_st(temp3, 48, dst);

1113

vec_st(temp4, 64, dst);

1114

vec_st(temp5, 80, dst);

1115

vec_st(temp6, 96, dst);

1116

vec_st(temp7, 112, dst);

1117

vec_st(temp8, 128, dst);

1118

vec_st(temp9, 144, dst);

1119

vec_st(temp10, 160, dst);

1120

vec_st(temp11, 176, dst);

1121

vec_st(temp12, 192, dst);

1122

vec_st(temp13, 208, dst);

1123

vec_st(temp14, 224, dst);

1124

vec_st(temp15, 240, dst);

1125

}

1126

1127

static inline void transpose_8x16_char_fromPackedAlign_altivec(unsigned char* dst, unsigned char* src, int stride) {

1128

const vector unsigned char zero = vec_splat_u8(0);

1129

1130

#define LOAD_DOUBLE_LINE(i, j) \

1131

vector unsigned char src##i = vec_ld(i * 16, src); \

1132

vector unsigned char src##j = vec_ld(j * 16, src)

1133

1134

LOAD_DOUBLE_LINE(0, 1);

1135

LOAD_DOUBLE_LINE(2, 3);

1136

LOAD_DOUBLE_LINE(4, 5);

1137

LOAD_DOUBLE_LINE(6, 7);

1138

LOAD_DOUBLE_LINE(8, 9);

1139

LOAD_DOUBLE_LINE(10, 11);

1140

LOAD_DOUBLE_LINE(12, 13);

1141

LOAD_DOUBLE_LINE(14, 15);

1142

#undef LOAD_DOUBLE_LINE

1143

1144

vector unsigned char tempA = vec_mergeh(src0, src8);

1145

vector unsigned char tempB;

1146

vector unsigned char tempC = vec_mergeh(src1, src9);

1147

vector unsigned char tempD;

1148

vector unsigned char tempE = vec_mergeh(src2, src10);

1149

vector unsigned char tempG = vec_mergeh(src3, src11);

1150

vector unsigned char tempI = vec_mergeh(src4, src12);

1151

vector unsigned char tempJ;

1152

vector unsigned char tempK = vec_mergeh(src5, src13);

1153

vector unsigned char tempL;

1154

vector unsigned char tempM = vec_mergeh(src6, src14);

1155

vector unsigned char tempO = vec_mergeh(src7, src15);

1156

1157

vector unsigned char temp0 = vec_mergeh(tempA, tempI);

1158

vector unsigned char temp1 = vec_mergel(tempA, tempI);

1159

vector unsigned char temp2;

1160

vector unsigned char temp3;

1161

vector unsigned char temp4 = vec_mergeh(tempC, tempK);

1162

vector unsigned char temp5 = vec_mergel(tempC, tempK);

1163

vector unsigned char temp6;

1164

vector unsigned char temp7;

1165

vector unsigned char temp8 = vec_mergeh(tempE, tempM);

1166

vector unsigned char temp9 = vec_mergel(tempE, tempM);

1167

vector unsigned char temp12 = vec_mergeh(tempG, tempO);

1168

vector unsigned char temp13 = vec_mergel(tempG, tempO);

1169

1170

tempA = vec_mergeh(temp0, temp8);

1171

tempB = vec_mergel(temp0, temp8);

1172

tempC = vec_mergeh(temp1, temp9);

1173

tempD = vec_mergel(temp1, temp9);

1174

tempI = vec_mergeh(temp4, temp12);

1175

tempJ = vec_mergel(temp4, temp12);

1176

tempK = vec_mergeh(temp5, temp13);

1177

tempL = vec_mergel(temp5, temp13);

1178

1179

temp0 = vec_mergeh(tempA, tempI);

1180

temp1 = vec_mergel(tempA, tempI);

1181

temp2 = vec_mergeh(tempB, tempJ);

1182

temp3 = vec_mergel(tempB, tempJ);

1183

temp4 = vec_mergeh(tempC, tempK);

1184

temp5 = vec_mergel(tempC, tempK);

1185

temp6 = vec_mergeh(tempD, tempL);

1186

temp7 = vec_mergel(tempD, tempL);

1187

1188

1189

const vector signed char neg1 = vec_splat_s8(-1);

1190

#define STORE_DOUBLE_LINE(i, j) \

1191

vector unsigned char dstA##i = vec_ld(i * stride, dst); \

1192

vector unsigned char dstB##i = vec_ld(i * stride + 16, dst); \

1193

vector unsigned char dstA##j = vec_ld(j * stride, dst); \

1194

vector unsigned char dstB##j = vec_ld(j * stride+ 16, dst); \

1195

vector unsigned char align##i = vec_lvsr(i * stride, dst); \

1196

vector unsigned char align##j = vec_lvsr(j * stride, dst); \

1197

vector unsigned char mask##i = vec_perm(zero, (vector unsigned char)neg1, align##i); \

1198

vector unsigned char mask##j = vec_perm(zero, (vector unsigned char)neg1, align##j); \

1199

vector unsigned char dstR##i = vec_perm(temp##i, temp##i, align##i);\

1200

vector unsigned char dstR##j = vec_perm(temp##j, temp##j, align##j);\

1201

vector unsigned char dstAF##i = vec_sel(dstA##i, dstR##i, mask##i); \

1202

vector unsigned char dstBF##i = vec_sel(dstR##i, dstB##i, mask##i); \

1203

vector unsigned char dstAF##j = vec_sel(dstA##j, dstR##j, mask##j); \

1204

vector unsigned char dstBF##j = vec_sel(dstR##j, dstB##j, mask##j); \

1205

vec_st(dstAF##i, i * stride, dst); \

1206

vec_st(dstBF##i, i * stride + 16, dst); \

1207

vec_st(dstAF##j, j * stride, dst); \

1208

vec_st(dstBF##j, j * stride + 16, dst)

1209

1210

STORE_DOUBLE_LINE(0,1);

1211

STORE_DOUBLE_LINE(2,3);

1212

STORE_DOUBLE_LINE(4,5);

1213

STORE_DOUBLE_LINE(6,7);

1214

}

Older »