~ubuntu-branches/ubuntu/jaunty/xvidcap/jaunty-proposed

Viewing changes to ffmpeg/libpostproc/postprocess_altivec_template.c

Committer: Bazaar Package Importer
Author(s): John Dong
Date: 2008-02-25 15:47:12 UTC
mfrom: (1.1.1 upstream)
Revision ID: james.westby@ubuntu.com-20080225154712-qvr11ekcea4c9ry8

Tags: 1.1.6-0.1ubuntu1

https://launchpad.net/bugs/120003

* Merge from debian-multimedia (LP: #120003), Ubuntu Changes:
- For ffmpeg-related build-deps, remove cvs from package names.
- Standards-Version 3.7.3
- Maintainer Spec

files added:
TODO.tasks

autogen.sh

config.guess

config.sub

configure.in

debian/patches/01_xterm.dpatch

debian/watch

doc/Makefile.am

doc/Makefile.in

doc/man

doc/man/C

doc/man/C/Makefile.am

doc/man/C/Makefile.in

doc/man/C/xvidcap.1

doc/man/C/xvidcap.1.xml

doc/man/C/xvidcap.1.xml.pot

doc/man/Makefile.am

doc/man/Makefile.in

doc/man/de

doc/man/de/Makefile.am

doc/man/de/Makefile.in

doc/man/de/xvidcap.1

doc/man/de/xvidcap.1-de.po

doc/man/de/xvidcap.1.xml

doc/man/es

doc/man/es/Makefile.am

doc/man/es/Makefile.in

doc/man/es/xvidcap.1

doc/man/es/xvidcap.1.xml

doc/man/it

doc/man/it/Makefile.am

doc/man/it/Makefile.in

doc/man/it/xvidcap.1

doc/man/it/xvidcap.1-it.po

doc/man/it/xvidcap.1.xml

doc/man/man.make

doc/omf.make

doc/xmldocs.make

doc/xvidcap

doc/xvidcap/C

doc/xvidcap/C/Makefile.am

doc/xvidcap/C/Makefile.in

doc/xvidcap/C/figures

doc/xvidcap/C/figures/back-button.png

doc/xvidcap/C/figures/edit-button.png

doc/xvidcap/C/figures/filename-button.png

doc/xvidcap/C/figures/frame-drop-meter.png

doc/xvidcap/C/figures/fwd-button.png

doc/xvidcap/C/figures/lock-toggle.png

doc/xvidcap/C/figures/menu-about.png

doc/xvidcap/C/figures/menu-animate.png

doc/xvidcap/C/figures/menu-cont.png

doc/xvidcap/C/figures/menu-help.png

doc/xvidcap/C/figures/menu-make-video.png

doc/xvidcap/C/figures/menu-mf.png

doc/xvidcap/C/figures/menu-prefs.png

doc/xvidcap/C/figures/menu-quit.png

doc/xvidcap/C/figures/menu-save-prefs.png

doc/xvidcap/C/figures/menu-sf.png

doc/xvidcap/C/figures/menu.png

doc/xvidcap/C/figures/pause-toggle.png

doc/xvidcap/C/figures/prefs-commands.png

doc/xvidcap/C/figures/prefs-general.png

doc/xvidcap/C/figures/prefs-mf.png

doc/xvidcap/C/figures/prefs-sf.png

doc/xvidcap/C/figures/record-toggle.png

doc/xvidcap/C/figures/results-dialog-ask.png

doc/xvidcap/C/figures/results-dialog.png

doc/xvidcap/C/figures/selector-button.png

doc/xvidcap/C/figures/step-button.png

doc/xvidcap/C/figures/stop-toggle.png

doc/xvidcap/C/figures/xvidcap-main-ctrl.png

doc/xvidcap/C/figures/xvidcap-warning.png

doc/xvidcap/C/xvidcap-C.omf

doc/xvidcap/C/xvidcap.xml

doc/xvidcap/Makefile.am

doc/xvidcap/Makefile.in

doc/xvidcap/de

doc/xvidcap/de/Makefile.am

doc/xvidcap/de/Makefile.in

doc/xvidcap/de/figures

doc/xvidcap/de/figures/back-button.png

doc/xvidcap/de/figures/edit-button.png

doc/xvidcap/de/figures/filename-button.png

doc/xvidcap/de/figures/frame-drop-meter.png

doc/xvidcap/de/figures/fwd-button.png

doc/xvidcap/de/figures/lock-toggle.png

doc/xvidcap/de/figures/menu-about.png

doc/xvidcap/de/figures/menu-animate.png

doc/xvidcap/de/figures/menu-cont.png

doc/xvidcap/de/figures/menu-help.png

doc/xvidcap/de/figures/menu-make-video.png

doc/xvidcap/de/figures/menu-mf.png

doc/xvidcap/de/figures/menu-prefs.png

doc/xvidcap/de/figures/menu-quit.png

doc/xvidcap/de/figures/menu-save-prefs.png

doc/xvidcap/de/figures/menu-sf.png

doc/xvidcap/de/figures/menu.png

doc/xvidcap/de/figures/pause-toggle.png

doc/xvidcap/de/figures/prefs-commands.png

doc/xvidcap/de/figures/prefs-general.png

doc/xvidcap/de/figures/prefs-mf.png

doc/xvidcap/de/figures/prefs-sf.png

doc/xvidcap/de/figures/record-toggle.png

doc/xvidcap/de/figures/results-dialog-ask.png

doc/xvidcap/de/figures/results-dialog.png

doc/xvidcap/de/figures/selector-button.png

doc/xvidcap/de/figures/step-button.png

doc/xvidcap/de/figures/stop-toggle.png

doc/xvidcap/de/figures/xvidcap-main-ctrl.png

doc/xvidcap/de/figures/xvidcap-warning.png

doc/xvidcap/de/xvidcap-de.omf

doc/xvidcap/de/xvidcap.xml

doc/xvidcap/es

doc/xvidcap/es/Makefile.am

doc/xvidcap/es/Makefile.in

doc/xvidcap/es/figures

doc/xvidcap/es/figures/back-button.png

doc/xvidcap/es/figures/edit-button.png

doc/xvidcap/es/figures/filename-button.png

doc/xvidcap/es/figures/frame-drop-meter.png

doc/xvidcap/es/figures/fwd-button.png

doc/xvidcap/es/figures/lock-toggle.png

doc/xvidcap/es/figures/menu-about.png

doc/xvidcap/es/figures/menu-animate.png

doc/xvidcap/es/figures/menu-cont.png

doc/xvidcap/es/figures/menu-help.png

doc/xvidcap/es/figures/menu-make-video.png

doc/xvidcap/es/figures/menu-mf.png

doc/xvidcap/es/figures/menu-prefs.png

doc/xvidcap/es/figures/menu-quit.png

doc/xvidcap/es/figures/menu-save-prefs.png

doc/xvidcap/es/figures/menu-sf.png

doc/xvidcap/es/figures/menu.png

doc/xvidcap/es/figures/pause-toggle.png

doc/xvidcap/es/figures/prefs-commands.png

doc/xvidcap/es/figures/prefs-general.png

doc/xvidcap/es/figures/prefs-mf.png

doc/xvidcap/es/figures/prefs-sf.png

doc/xvidcap/es/figures/record-toggle.png

doc/xvidcap/es/figures/results-dialog-ask.png

doc/xvidcap/es/figures/results-dialog.png

doc/xvidcap/es/figures/selector-button.png

doc/xvidcap/es/figures/step-button.png

doc/xvidcap/es/figures/stop-toggle.png

doc/xvidcap/es/figures/xvidcap-main-ctrl.png

doc/xvidcap/es/figures/xvidcap-warning.png

doc/xvidcap/es/xvidcap-es.omf

doc/xvidcap/es/xvidcap.xml

doc/xvidcap/it

doc/xvidcap/it/Makefile.am

doc/xvidcap/it/Makefile.in

doc/xvidcap/it/figures

doc/xvidcap/it/figures/back-button.png

doc/xvidcap/it/figures/edit-button.png

doc/xvidcap/it/figures/filename-button.png

doc/xvidcap/it/figures/frame-drop-meter.png

doc/xvidcap/it/figures/fwd-button.png

doc/xvidcap/it/figures/lock-toggle.png

doc/xvidcap/it/figures/menu-about.png

doc/xvidcap/it/figures/menu-animate.png

doc/xvidcap/it/figures/menu-cont.png

doc/xvidcap/it/figures/menu-help.png

doc/xvidcap/it/figures/menu-make-video.png

doc/xvidcap/it/figures/menu-mf.png

doc/xvidcap/it/figures/menu-prefs.png

doc/xvidcap/it/figures/menu-quit.png

doc/xvidcap/it/figures/menu-save-prefs.png

doc/xvidcap/it/figures/menu-sf.png

doc/xvidcap/it/figures/menu.png

doc/xvidcap/it/figures/pause-toggle.png

doc/xvidcap/it/figures/prefs-commands.png

doc/xvidcap/it/figures/prefs-general.png

doc/xvidcap/it/figures/prefs-mf.png

doc/xvidcap/it/figures/prefs-sf.png

doc/xvidcap/it/figures/record-toggle.png

doc/xvidcap/it/figures/results-dialog-ask.png

doc/xvidcap/it/figures/results-dialog.png

doc/xvidcap/it/figures/selector-button.png

doc/xvidcap/it/figures/step-button.png

doc/xvidcap/it/figures/stop-toggle.png

doc/xvidcap/it/figures/xvidcap-ctrl.png

doc/xvidcap/it/figures/xvidcap-main-ctrl.png

doc/xvidcap/it/figures/xvidcap-warning.png

doc/xvidcap/it/xvidcap-it.omf

doc/xvidcap/it/xvidcap.xml

ffmpeg/Doxyfile

ffmpeg/MAINTAINERS

ffmpeg/avcodec.h

ffmpeg/avformat.h

ffmpeg/avio.h

ffmpeg/avutil.h

ffmpeg/build_avopt

ffmpeg/clean-diff

ffmpeg/common.h

ffmpeg/common.mak

ffmpeg/cws2fws.c

ffmpeg/doc/avutil.txt

ffmpeg/doc/snow.txt

ffmpeg/doc/soc.txt

ffmpeg/dsputil.h

ffmpeg/fifo.h

ffmpeg/integer.h

ffmpeg/intfloat_readwrite.h

ffmpeg/libavcodec/8bps.c

ffmpeg/libavcodec/aasc.c

ffmpeg/libavcodec/adx.c

ffmpeg/libavcodec/alac.c

ffmpeg/libavcodec/armv4l/dsputil_arm_s.S

ffmpeg/libavcodec/armv4l/dsputil_iwmmxt.c

ffmpeg/libavcodec/armv4l/dsputil_iwmmxt_rnd.h

ffmpeg/libavcodec/armv4l/mathops.h

ffmpeg/libavcodec/armv4l/mpegvideo_armv5te.c

ffmpeg/libavcodec/armv4l/mpegvideo_iwmmxt.c

ffmpeg/libavcodec/armv4l/simple_idct_armv5te.S

ffmpeg/libavcodec/armv4l/simple_idct_armv6.S

ffmpeg/libavcodec/audioconvert.c

ffmpeg/libavcodec/avs.c

ffmpeg/libavcodec/beosthread.c

ffmpeg/libavcodec/bfin

ffmpeg/libavcodec/bfin/dsputil_bfin.c

ffmpeg/libavcodec/bitstream.c

ffmpeg/libavcodec/bitstream.h

ffmpeg/libavcodec/bitstream_filter.c

ffmpeg/libavcodec/bmp.c

ffmpeg/libavcodec/bmp.h

ffmpeg/libavcodec/bmpenc.c

ffmpeg/libavcodec/bytestream.h

ffmpeg/libavcodec/cavs.c

ffmpeg/libavcodec/cavsdata.h

ffmpeg/libavcodec/cavsdsp.c

ffmpeg/libavcodec/cinepak.c

ffmpeg/libavcodec/cook.c

ffmpeg/libavcodec/cookdata.h

ffmpeg/libavcodec/cscd.c

ffmpeg/libavcodec/dca.c

ffmpeg/libavcodec/dcadata.h

ffmpeg/libavcodec/dcahuff.h

ffmpeg/libavcodec/dsicinav.c

ffmpeg/libavcodec/dtsdec.c

ffmpeg/libavcodec/dvbsub.c

ffmpeg/libavcodec/dvbsubdec.c

ffmpeg/libavcodec/dvdsubdec.c

ffmpeg/libavcodec/dvdsubenc.c

ffmpeg/libavcodec/eval.h

ffmpeg/libavcodec/faac.c

ffmpeg/libavcodec/faandct.c

ffmpeg/libavcodec/faandct.h

ffmpeg/libavcodec/flac.c

ffmpeg/libavcodec/flacenc.c

ffmpeg/libavcodec/flashsv.c

ffmpeg/libavcodec/flashsvenc.c

ffmpeg/libavcodec/flicvideo.c

ffmpeg/libavcodec/fraps.c

ffmpeg/libavcodec/g726.c

ffmpeg/libavcodec/gif.c

ffmpeg/libavcodec/gifdec.c

ffmpeg/libavcodec/h261.c

ffmpeg/libavcodec/h261data.h

ffmpeg/libavcodec/h264dsp.c

ffmpeg/libavcodec/h264enc.c

ffmpeg/libavcodec/h264idct.c

ffmpeg/libavcodec/i386/cavsdsp_mmx.c

ffmpeg/libavcodec/i386/dsputil_h264_template_mmx.c

ffmpeg/libavcodec/i386/fft_3dn.c

ffmpeg/libavcodec/i386/fft_3dn2.c

ffmpeg/libavcodec/i386/h264dsp_mmx.c

ffmpeg/libavcodec/i386/idct_mmx_xvid.c

ffmpeg/libavcodec/i386/mathops.h

ffmpeg/libavcodec/i386/snowdsp_mmx.c

ffmpeg/libavcodec/i386/vp3dsp_mmx.c

ffmpeg/libavcodec/i386/vp3dsp_sse2.c

ffmpeg/libavcodec/idcinvideo.c

ffmpeg/libavcodec/imc.c

ffmpeg/libavcodec/imcdata.h

ffmpeg/libavcodec/indeo2.c

ffmpeg/libavcodec/indeo2data.h

ffmpeg/libavcodec/jpeg_ls.c

ffmpeg/libavcodec/kmvc.c

ffmpeg/libavcodec/lcl.c

ffmpeg/libavcodec/libgsm.c

ffmpeg/libavcodec/libtheoraenc.c

ffmpeg/libavcodec/loco.c

ffmpeg/libavcodec/lzw.c

ffmpeg/libavcodec/lzw.h

ffmpeg/libavcodec/mathops.h

ffmpeg/libavcodec/mmvideo.c

ffmpeg/libavcodec/mpc.c

ffmpeg/libavcodec/mpcdata.h

ffmpeg/libavcodec/msrle.c

ffmpeg/libavcodec/msvideo1.c

ffmpeg/libavcodec/nuv.c

ffmpeg/libavcodec/opt.c

ffmpeg/libavcodec/opt.h

ffmpeg/libavcodec/os2thread.c

ffmpeg/libavcodec/parser.c

ffmpeg/libavcodec/parser.h

ffmpeg/libavcodec/png.c

ffmpeg/libavcodec/pnm.c

ffmpeg/libavcodec/ppc/fdct_altivec.c

ffmpeg/libavcodec/ppc/float_altivec.c

ffmpeg/libavcodec/ppc/h264_altivec.c

ffmpeg/libavcodec/ppc/h264_template_altivec.c

ffmpeg/libavcodec/ppc/mathops.h

ffmpeg/libavcodec/ppc/snow_altivec.c

ffmpeg/libavcodec/ppc/types_altivec.h

ffmpeg/libavcodec/ppc/vc1dsp_altivec.c

ffmpeg/libavcodec/pthread.c

ffmpeg/libavcodec/qdm2.c

ffmpeg/libavcodec/qdm2data.h

ffmpeg/libavcodec/qdrw.c

ffmpeg/libavcodec/qpeg.c

ffmpeg/libavcodec/qtrle.c

ffmpeg/libavcodec/rangecoder.c

ffmpeg/libavcodec/rangecoder.h

ffmpeg/libavcodec/ratecontrol.h

ffmpeg/libavcodec/resample2.c

ffmpeg/libavcodec/rpza.c

ffmpeg/libavcodec/rtjpeg.c

ffmpeg/libavcodec/rtjpeg.h

ffmpeg/libavcodec/sedlKOs8K

ffmpeg/libavcodec/shorten.c

ffmpeg/libavcodec/smacker.c

ffmpeg/libavcodec/smc.c

ffmpeg/libavcodec/snow.c

ffmpeg/libavcodec/snow.h

ffmpeg/libavcodec/sonic.c

ffmpeg/libavcodec/sp5x.h

ffmpeg/libavcodec/sparc

ffmpeg/libavcodec/sparc/dsputil_vis.c

ffmpeg/libavcodec/sparc/vis.h

ffmpeg/libavcodec/targa.c

ffmpeg/libavcodec/tiertexseqv.c

ffmpeg/libavcodec/tiff.c

ffmpeg/libavcodec/truemotion1.c

ffmpeg/libavcodec/truemotion1data.h

ffmpeg/libavcodec/truemotion2.c

ffmpeg/libavcodec/truespeech.c

ffmpeg/libavcodec/truespeech_data.h

ffmpeg/libavcodec/tscc.c

ffmpeg/libavcodec/tta.c

ffmpeg/libavcodec/ulti.c

ffmpeg/libavcodec/ulti_cb.h

ffmpeg/libavcodec/vc1.c

ffmpeg/libavcodec/vc1acdata.h

ffmpeg/libavcodec/vc1data.h

ffmpeg/libavcodec/vc1dsp.c

ffmpeg/libavcodec/vmdav.c

ffmpeg/libavcodec/vmnc.c

ffmpeg/libavcodec/vorbis.c

ffmpeg/libavcodec/vorbis.h

ffmpeg/libavcodec/vorbis_data.c

ffmpeg/libavcodec/vorbis_enc.c

ffmpeg/libavcodec/vorbis_enc_data.h

ffmpeg/libavcodec/vp3dsp.c

ffmpeg/libavcodec/vp5.c

ffmpeg/libavcodec/vp56.c

ffmpeg/libavcodec/vp56.h

ffmpeg/libavcodec/vp56data.c

ffmpeg/libavcodec/vp56data.h

ffmpeg/libavcodec/vp5data.h

ffmpeg/libavcodec/vp6.c

ffmpeg/libavcodec/vp6data.h

ffmpeg/libavcodec/vqavideo.c

ffmpeg/libavcodec/w32thread.c

ffmpeg/libavcodec/wavpack.c

ffmpeg/libavcodec/wma.c

ffmpeg/libavcodec/wma.h

ffmpeg/libavcodec/wmaenc.c

ffmpeg/libavcodec/wnv1.c

ffmpeg/libavcodec/ws-snd1.c

ffmpeg/libavcodec/x264.c

ffmpeg/libavcodec/xl.c

ffmpeg/libavcodec/xvid_internal.h

ffmpeg/libavcodec/xvid_rc.c

ffmpeg/libavcodec/xvidff.c

ffmpeg/libavcodec/xvmc_render.h

ffmpeg/libavcodec/zmbv.c

ffmpeg/libavcodec/zmbvenc.c

ffmpeg/libavformat/adtsenc.c

ffmpeg/libavformat/aiff.c

ffmpeg/libavformat/allformats.h

ffmpeg/libavformat/asf-enc.c

ffmpeg/libavformat/asf.h

ffmpeg/libavformat/avisynth.c

ffmpeg/libavformat/avs.c

ffmpeg/libavformat/daud.c

ffmpeg/libavformat/dc1394.c

ffmpeg/libavformat/dsicin.c

ffmpeg/libavformat/dv.h

ffmpeg/libavformat/dvenc.c

ffmpeg/libavformat/electronicarts.c

ffmpeg/libavformat/flic.c

ffmpeg/libavformat/flv.h

ffmpeg/libavformat/grab_bktr.c

ffmpeg/libavformat/gxf.c

ffmpeg/libavformat/gxf.h

ffmpeg/libavformat/gxfenc.c

ffmpeg/libavformat/idcin.c

ffmpeg/libavformat/img2.c

ffmpeg/libavformat/isom.c

ffmpeg/libavformat/isom.h

ffmpeg/libavformat/libnut.c

ffmpeg/libavformat/matroska.c

ffmpeg/libavformat/mm.c

ffmpeg/libavformat/mmf.c

ffmpeg/libavformat/mpc.c

ffmpeg/libavformat/mtv.c

ffmpeg/libavformat/mxf.c

ffmpeg/libavformat/network.h

ffmpeg/libavformat/nsvdec.c

ffmpeg/libavformat/nut.h

ffmpeg/libavformat/nutdec.c

ffmpeg/libavformat/nuv.c

ffmpeg/libavformat/ogg2.c

ffmpeg/libavformat/ogg2.h

ffmpeg/libavformat/oggparseflac.c

ffmpeg/libavformat/oggparseogm.c

ffmpeg/libavformat/oggparsetheora.c

ffmpeg/libavformat/oggparsevorbis.c

ffmpeg/libavformat/qtpalette.h

ffmpeg/libavformat/riff.c

ffmpeg/libavformat/riff.h

ffmpeg/libavformat/rtp_h264.c

ffmpeg/libavformat/rtp_h264.h

ffmpeg/libavformat/rtp_internal.h

ffmpeg/libavformat/segafilm.c

ffmpeg/libavformat/sierravmd.c

ffmpeg/libavformat/smacker.c

ffmpeg/libavformat/sol.c

ffmpeg/libavformat/tiertexseq.c

ffmpeg/libavformat/tta.c

ffmpeg/libavformat/v4l2.c

ffmpeg/libavformat/voc.c

ffmpeg/libavformat/voc.h

ffmpeg/libavformat/vocdec.c

ffmpeg/libavformat/vocenc.c

ffmpeg/libavformat/westwood.c

ffmpeg/libavformat/wv.c

ffmpeg/libavformat/x11grab.c

ffmpeg/libavutil

ffmpeg/libavutil/Makefile

ffmpeg/libavutil/adler32.c

ffmpeg/libavutil/adler32.h

ffmpeg/libavutil/aes.c

ffmpeg/libavutil/aes.h

ffmpeg/libavutil/avutil.h

ffmpeg/libavutil/base64.c

ffmpeg/libavutil/base64.h

ffmpeg/libavutil/bswap.h

ffmpeg/libavutil/common.h

ffmpeg/libavutil/crc.c

ffmpeg/libavutil/crc.h

ffmpeg/libavutil/fifo.c

ffmpeg/libavutil/fifo.h

ffmpeg/libavutil/integer.c

ffmpeg/libavutil/integer.h

ffmpeg/libavutil/internal.h

ffmpeg/libavutil/intfloat_readwrite.c

ffmpeg/libavutil/intfloat_readwrite.h

ffmpeg/libavutil/intreadwrite.h

ffmpeg/libavutil/lls.c

ffmpeg/libavutil/lls.h

ffmpeg/libavutil/log.c

ffmpeg/libavutil/log.h

ffmpeg/libavutil/lzo.c

ffmpeg/libavutil/lzo.h

ffmpeg/libavutil/mathematics.c

ffmpeg/libavutil/mathematics.h

ffmpeg/libavutil/md5.c

ffmpeg/libavutil/md5.h

ffmpeg/libavutil/mem.c

ffmpeg/libavutil/random.c

ffmpeg/libavutil/random.h

ffmpeg/libavutil/rational.c

ffmpeg/libavutil/rational.h

ffmpeg/libavutil/softfloat.c

ffmpeg/libavutil/softfloat.h

ffmpeg/libavutil/tree.c

ffmpeg/libavutil/tree.h

ffmpeg/libavutil/x86_cpu.h

ffmpeg/libpostproc

ffmpeg/libpostproc/Makefile

ffmpeg/libpostproc/mangle.h

ffmpeg/libpostproc/postprocess.c

ffmpeg/libpostproc/postprocess.h

ffmpeg/libpostproc/postprocess_altivec_template.c

ffmpeg/libpostproc/postprocess_internal.h

ffmpeg/libpostproc/postprocess_template.c

ffmpeg/libswscale

ffmpeg/libswscale/Makefile

ffmpeg/libswscale/cs_test.c

ffmpeg/libswscale/rgb2rgb.c

ffmpeg/libswscale/rgb2rgb.h

ffmpeg/libswscale/rgb2rgb_template.c

ffmpeg/libswscale/swscale-example.c

ffmpeg/libswscale/swscale.c

ffmpeg/libswscale/swscale.h

ffmpeg/libswscale/swscale_altivec_template.c

ffmpeg/libswscale/swscale_internal.h

ffmpeg/libswscale/swscale_template.c

ffmpeg/libswscale/yuv2rgb.c

ffmpeg/libswscale/yuv2rgb_altivec.c

ffmpeg/libswscale/yuv2rgb_init.c

ffmpeg/libswscale/yuv2rgb_mlib.c

ffmpeg/libswscale/yuv2rgb_template.c

ffmpeg/mathematics.h

ffmpeg/pktdumper.c

ffmpeg/qt-faststart.c

ffmpeg/rational.h

ffmpeg/rgb2rgb.h

ffmpeg/rtp.h

ffmpeg/rtp_h264.h

ffmpeg/rtp_internal.h

ffmpeg/rtsp.h

ffmpeg/rtspcodes.h

ffmpeg/swscale.h

ffmpeg/test.dv

ffmpeg/tests/seek_test.c

ffmpeg/tests/seek_test.sh

ffmpeg/unwrap-diff

ffmpeg/version.sh

ffmpeg/vhook/watermark.c

intltool-extract.in

intltool-merge.in

intltool-update.in

po/ChangeLog

po/Makefile.in.in

po/POTFILES.in

po/de.gmo

po/de.po

po/en.gmo

po/en.po

po/es.gmo

po/es.po

po/it.gmo

po/it.po

po/xvidcap.pot

ppm2mpeg.sh

src/app_data.c

src/codecs.c

src/eggtrayicon.c

src/eggtrayicon.h

src/gnome-xvidcap.glade

src/gnome_frame.c

src/gnome_frame.h

src/gnome_options.c

src/gnome_options.h

src/gnome_ui.c

src/gnome_ui.h

src/gnome_warning.c

src/gnome_warning.h

src/pixmaps

src/pixmaps/xvidcap_logo.png

src/xv_error_item.c

src/xv_error_item.h

src/xvidcap-intl.h

xvidcap.desktop

xvidcap.png

files removed:
TODO

Xw/Base.c

Xw/Base.h

Xw/BaseP.h

Xw/Box.c

Xw/Box.h

Xw/BoxP.h

Xw/Button.c

Xw/Button.h

Xw/ButtonP.h

Xw/Field.c

Xw/Field.h

Xw/FieldP.h

Xw/Label.c

Xw/Label.h

Xw/LabelP.h

Xw/Makefile.am

Xw/Makefile.in

Xw/README

Xw/RootIcon.c

Xw/RootIcon.h

Xw/RootIconP.h

Xw/Toggle.c

Xw/Toggle.h

Xw/ToggleP.h

Xw/simple.c

Xw/testxw.c

Xw/testxw.xbm

compile

configure.ac

debian/bts

debian/gvidcap.files

debian/patches/01_rxvt.dpatch

ffmpeg/.nbattrs

ffmpeg/berrno.h

ffmpeg/cygwin_inttypes.h

ffmpeg/doc/.nbattrs

ffmpeg/doc/faq.html

ffmpeg/doc/ffmpeg-doc.html

ffmpeg/doc/ffmpeg.1

ffmpeg/doc/ffplay-doc.html

ffmpeg/doc/ffplay.1

ffmpeg/doc/ffserver-doc.html

ffmpeg/doc/ffserver.1

ffmpeg/doc/hooks.html

ffmpeg/libavcodec/.nbattrs

ffmpeg/libavcodec/Doxyfile

ffmpeg/libavcodec/ac3dec.c

ffmpeg/libavcodec/avcodec.c

ffmpeg/libavcodec/bswap.h

ffmpeg/libavcodec/common.c

ffmpeg/libavcodec/common.h

ffmpeg/libavcodec/fastmemcpy.h

ffmpeg/libavcodec/liba52

ffmpeg/libavcodec/liba52/a52.h

ffmpeg/libavcodec/liba52/a52_internal.h

ffmpeg/libavcodec/liba52/a52_util.h

ffmpeg/libavcodec/liba52/bit_allocate.c

ffmpeg/libavcodec/liba52/bitstream.c

ffmpeg/libavcodec/liba52/bitstream.h

ffmpeg/libavcodec/liba52/crc.c

ffmpeg/libavcodec/liba52/downmix.c

ffmpeg/libavcodec/liba52/imdct.c

ffmpeg/libavcodec/liba52/mm_accel.h

ffmpeg/libavcodec/liba52/parse.c

ffmpeg/libavcodec/liba52/resample.c

ffmpeg/libavcodec/liba52/resample_c.c

ffmpeg/libavcodec/liba52/resample_mmx.c

ffmpeg/libavcodec/liba52/tables.h

ffmpeg/libavcodec/libpostproc

ffmpeg/libavcodec/libpostproc/.nbattrs

ffmpeg/libavcodec/libpostproc/Makefile

ffmpeg/libavcodec/libpostproc/mangle.h

ffmpeg/libavcodec/libpostproc/postprocess.c

ffmpeg/libavcodec/libpostproc/postprocess.h

ffmpeg/libavcodec/libpostproc/postprocess_internal.h

ffmpeg/libavcodec/libpostproc/postprocess_template.c

ffmpeg/libavcodec/mem.c

ffmpeg/libavcodec/oggvorbis.h

ffmpeg/libavcodec/opts.c

ffmpeg/libavformat/.nbattrs

ffmpeg/libavformat/barpainet.c

ffmpeg/libavformat/dvcore.c

ffmpeg/libavformat/dvcore.h

ffmpeg/libavformat/img.c

ffmpeg/libavformat/jpeg.c

ffmpeg/libavformat/png.c

ffmpeg/libavformat/pnm.c

ffmpeg/libavformat/yuv.c

ffmpeg/tests/.nbattrs

ffmpeg/vhook/.nbattrs

ffmpeg/xvmc_render.h

getopt

getopt/README

getopt/getopt.c

getopt/getopt.h

getopt/getopt1.c

gt/COPYING.LIB

gt/Makefile.am

gt/Makefile.in

gt/README

gt/gnuavi.c

gt/gnuavi.h

gt/gnuplay.c

gt/gt.c

gt/gt.h

gt/gtapi.c

gt/gtapi.h

gt/gtatoms.h

gt/gttypes.h

gt/gtvr.c

gt/gtvr.h

gt/parse_gt.c

gt/ppm2qt.c

gt/ppm2qtvr.c

gt/sound.c

gt/sound.h

gt/video.c

gt/video.h

gt/xt.c

gt/xt.h

man/gvidcap.man

man/index.bt

man/xvidcap.man

src/fallback.h

src/gtk2_control.c

src/gtk2_control.h

src/gtk2_frame.c

src/gtk2_frame.h

src/gtk2_options.c

src/gtk2_options.h

src/gtk2_support.c

src/gtk2_support.h

src/main.h

src/malloc.c

src/mkresfile.sh

src/mngutil.c

src/mngutil.h

src/realloc.c

src/util.c

src/util.h

src/video.c

src/video.h

src/xt_control.c

src/xt_control.h

src/xt_frame.c

src/xt_frame.h

src/xt_options.c

src/xt_options.h

src/xtojpg.c

src/xtojpg.h

src/xtomng.c

src/xtomng.h

src/xtopng.c

src/xtopng.h

src/xtopnm.c

src/xtopnm.h

src/xtoqtf.c

src/xtoqtf.h

src/xutil.c

src/xutil.h

xbm/animate.png

xbm/animate.xbm

xbm/edit.png

xbm/edit.xbm

xbm/help.xbm

xbm/mkvideo.xbm

xbm/move.png

xbm/move.xbm

xbm/next.png

xbm/next.xbm

xbm/pause.png

xbm/pause.xbm

xbm/prev.png

xbm/prev.xbm

xbm/record.png

xbm/record.xbm

xbm/record_off.png

xbm/record_on.png

xbm/select.png

xbm/select.xbm

xbm/step.png

xbm/step.xbm

xbm/stop.png

xbm/stop.xbm

files modified:
AUTHORS

COPYING

ChangeLog

INSTALL

Makefile.am

Makefile.in

NEWS

README

aclocal.m4

config.h.in

configure

debian/changelog

debian/compat

debian/control

debian/copyright

debian/patches/00list

debian/rules

depcomp

ffmpeg/COPYING

ffmpeg/CREDITS

ffmpeg/Changelog

ffmpeg/INSTALL

ffmpeg/Makefile

ffmpeg/README

ffmpeg/cmdutils.c

ffmpeg/cmdutils.h

ffmpeg/configure

ffmpeg/doc/Makefile

ffmpeg/doc/TODO

ffmpeg/doc/faq.texi

ffmpeg/doc/ffmpeg-doc.texi

ffmpeg/doc/ffmpeg_powerpc_performance_evaluation_howto.txt

ffmpeg/doc/ffplay-doc.texi

ffmpeg/doc/ffserver-doc.texi

ffmpeg/doc/ffserver.conf

ffmpeg/doc/hooks.texi

ffmpeg/doc/optimization.txt

ffmpeg/doc/texi2pod.pl

ffmpeg/ffinstall.nsi

ffmpeg/ffmpeg.c

ffmpeg/ffplay.c

ffmpeg/ffserver.c

ffmpeg/ffserver.h

ffmpeg/libavcodec/4xm.c

ffmpeg/libavcodec/Makefile

ffmpeg/libavcodec/a52dec.c

ffmpeg/libavcodec/ac3.h

ffmpeg/libavcodec/ac3enc.c

ffmpeg/libavcodec/ac3tab.h

ffmpeg/libavcodec/adpcm.c

ffmpeg/libavcodec/allcodecs.c

ffmpeg/libavcodec/alpha/asm.h

ffmpeg/libavcodec/alpha/dsputil_alpha.c

ffmpeg/libavcodec/alpha/dsputil_alpha_asm.S

ffmpeg/libavcodec/alpha/motion_est_alpha.c

ffmpeg/libavcodec/alpha/motion_est_mvi_asm.S

ffmpeg/libavcodec/alpha/mpegvideo_alpha.c

ffmpeg/libavcodec/alpha/regdef.h

ffmpeg/libavcodec/alpha/simple_idct_alpha.c

ffmpeg/libavcodec/amr.c

ffmpeg/libavcodec/apiexample.c

ffmpeg/libavcodec/armv4l/dsputil_arm.c

ffmpeg/libavcodec/armv4l/jrevdct_arm.S

ffmpeg/libavcodec/armv4l/mpegvideo_arm.c

ffmpeg/libavcodec/armv4l/simple_idct_arm.S

ffmpeg/libavcodec/asv1.c

ffmpeg/libavcodec/avcodec.h

ffmpeg/libavcodec/cabac.c

ffmpeg/libavcodec/cabac.h

ffmpeg/libavcodec/cljr.c

ffmpeg/libavcodec/cyuv.c

ffmpeg/libavcodec/dct-test.c

ffmpeg/libavcodec/dpcm.c

ffmpeg/libavcodec/dsputil.c

ffmpeg/libavcodec/dsputil.h

ffmpeg/libavcodec/dv.c

ffmpeg/libavcodec/dvdata.h

ffmpeg/libavcodec/error_resilience.c

ffmpeg/libavcodec/eval.c

ffmpeg/libavcodec/faad.c

ffmpeg/libavcodec/fdctref.c

ffmpeg/libavcodec/fft-test.c

ffmpeg/libavcodec/fft.c

ffmpeg/libavcodec/ffv1.c

ffmpeg/libavcodec/golomb.c

ffmpeg/libavcodec/golomb.h

ffmpeg/libavcodec/h263.c

ffmpeg/libavcodec/h263data.h

ffmpeg/libavcodec/h263dec.c

ffmpeg/libavcodec/h264.c

ffmpeg/libavcodec/h264data.h

ffmpeg/libavcodec/huffyuv.c

ffmpeg/libavcodec/i386/cputest.c

ffmpeg/libavcodec/i386/dsputil_mmx.c

ffmpeg/libavcodec/i386/dsputil_mmx_avg.h

ffmpeg/libavcodec/i386/dsputil_mmx_rnd.h

ffmpeg/libavcodec/i386/fdct_mmx.c

ffmpeg/libavcodec/i386/fft_sse.c

ffmpeg/libavcodec/i386/idct_mmx.c

ffmpeg/libavcodec/i386/mmx.h

ffmpeg/libavcodec/i386/motion_est_mmx.c

ffmpeg/libavcodec/i386/mpegvideo_mmx.c

ffmpeg/libavcodec/i386/mpegvideo_mmx_template.c

ffmpeg/libavcodec/i386/simple_idct_mmx.c

ffmpeg/libavcodec/imgconvert.c

ffmpeg/libavcodec/imgconvert_template.h

ffmpeg/libavcodec/imgresample.c

ffmpeg/libavcodec/indeo3.c

ffmpeg/libavcodec/indeo3data.h

ffmpeg/libavcodec/interplayvideo.c

ffmpeg/libavcodec/jfdctfst.c

ffmpeg/libavcodec/jfdctint.c

ffmpeg/libavcodec/jrevdct.c

ffmpeg/libavcodec/mace.c

ffmpeg/libavcodec/mdct.c

ffmpeg/libavcodec/mdec.c

ffmpeg/libavcodec/mjpeg.c

ffmpeg/libavcodec/mlib/dsputil_mlib.c

ffmpeg/libavcodec/motion_est.c

ffmpeg/libavcodec/motion_est_template.c

ffmpeg/libavcodec/motion_test.c

ffmpeg/libavcodec/mp3lameaudio.c

ffmpeg/libavcodec/mpeg12.c

ffmpeg/libavcodec/mpeg12data.h

ffmpeg/libavcodec/mpeg4data.h

ffmpeg/libavcodec/mpegaudio.c

ffmpeg/libavcodec/mpegaudio.h

ffmpeg/libavcodec/mpegaudiodec.c

ffmpeg/libavcodec/mpegaudiodectab.h

ffmpeg/libavcodec/mpegaudiotab.h

ffmpeg/libavcodec/mpegvideo.c

ffmpeg/libavcodec/mpegvideo.h

ffmpeg/libavcodec/msmpeg4.c

ffmpeg/libavcodec/msmpeg4data.h

ffmpeg/libavcodec/oggvorbis.c

ffmpeg/libavcodec/pcm.c

ffmpeg/libavcodec/ppc/dsputil_altivec.c

ffmpeg/libavcodec/ppc/dsputil_altivec.h

ffmpeg/libavcodec/ppc/dsputil_ppc.c

ffmpeg/libavcodec/ppc/dsputil_ppc.h

ffmpeg/libavcodec/ppc/fft_altivec.c

ffmpeg/libavcodec/ppc/gcc_fixes.h

ffmpeg/libavcodec/ppc/gmc_altivec.c

ffmpeg/libavcodec/ppc/idct_altivec.c

ffmpeg/libavcodec/ppc/mpegvideo_altivec.c

ffmpeg/libavcodec/ppc/mpegvideo_ppc.c

ffmpeg/libavcodec/ps2/dsputil_mmi.c

ffmpeg/libavcodec/ps2/idct_mmi.c

ffmpeg/libavcodec/ps2/mmi.h

ffmpeg/libavcodec/ps2/mpegvideo_mmi.c

ffmpeg/libavcodec/ra144.c

ffmpeg/libavcodec/ra144.h

ffmpeg/libavcodec/ra288.c

ffmpeg/libavcodec/ra288.h

ffmpeg/libavcodec/ratecontrol.c

ffmpeg/libavcodec/raw.c

ffmpeg/libavcodec/resample.c

ffmpeg/libavcodec/roqvideo.c

ffmpeg/libavcodec/rv10.c

ffmpeg/libavcodec/sh4/dsputil_align.c

ffmpeg/libavcodec/sh4/dsputil_sh4.c

ffmpeg/libavcodec/sh4/idct_sh4.c

ffmpeg/libavcodec/sh4/qpel.c

ffmpeg/libavcodec/simple_idct.c

ffmpeg/libavcodec/simple_idct.h

ffmpeg/libavcodec/svq1.c

ffmpeg/libavcodec/svq1_cb.h

ffmpeg/libavcodec/svq1_vlc.h

ffmpeg/libavcodec/svq3.c

ffmpeg/libavcodec/utils.c

ffmpeg/libavcodec/vcr1.c

ffmpeg/libavcodec/vp3.c

ffmpeg/libavcodec/vp3data.h

ffmpeg/libavcodec/wmadata.h

ffmpeg/libavcodec/wmadec.c

ffmpeg/libavcodec/wmv2.c

ffmpeg/libavcodec/xan.c

ffmpeg/libavcodec/xvmcvideo.c

ffmpeg/libavformat/4xm.c

ffmpeg/libavformat/Makefile

ffmpeg/libavformat/allformats.c

ffmpeg/libavformat/amr.c

ffmpeg/libavformat/asf.c

ffmpeg/libavformat/au.c

ffmpeg/libavformat/audio.c

ffmpeg/libavformat/avformat.h

ffmpeg/libavformat/avi.h

ffmpeg/libavformat/avidec.c

ffmpeg/libavformat/avienc.c

ffmpeg/libavformat/avio.c

ffmpeg/libavformat/avio.h

ffmpeg/libavformat/aviobuf.c

ffmpeg/libavformat/barpainet.h

ffmpeg/libavformat/beosaudio.cpp

ffmpeg/libavformat/crc.c

ffmpeg/libavformat/cutils.c

ffmpeg/libavformat/dv.c

ffmpeg/libavformat/dv1394.c

ffmpeg/libavformat/dv1394.h

ffmpeg/libavformat/ffm.c

ffmpeg/libavformat/file.c

ffmpeg/libavformat/flvdec.c

ffmpeg/libavformat/flvenc.c

ffmpeg/libavformat/framehook.c

ffmpeg/libavformat/framehook.h

ffmpeg/libavformat/gif.c

ffmpeg/libavformat/gifdec.c

ffmpeg/libavformat/grab.c

ffmpeg/libavformat/http.c

ffmpeg/libavformat/idroq.c

ffmpeg/libavformat/ipmovie.c

ffmpeg/libavformat/mov.c

ffmpeg/libavformat/movenc.c

ffmpeg/libavformat/mp3.c

ffmpeg/libavformat/mpeg.c

ffmpeg/libavformat/mpegts.c

ffmpeg/libavformat/mpegts.h

ffmpeg/libavformat/mpegtsenc.c

ffmpeg/libavformat/mpjpeg.c

ffmpeg/libavformat/nut.c

ffmpeg/libavformat/ogg.c

ffmpeg/libavformat/os_support.c

ffmpeg/libavformat/os_support.h

ffmpeg/libavformat/psxstr.c

ffmpeg/libavformat/raw.c

ffmpeg/libavformat/rm.c

ffmpeg/libavformat/rtp.c

ffmpeg/libavformat/rtp.h

ffmpeg/libavformat/rtpproto.c

ffmpeg/libavformat/rtsp.c

ffmpeg/libavformat/rtsp.h

ffmpeg/libavformat/rtspcodes.h

ffmpeg/libavformat/swf.c

ffmpeg/libavformat/tcp.c

ffmpeg/libavformat/udp.c

ffmpeg/libavformat/utils.c

ffmpeg/libavformat/wav.c

ffmpeg/libavformat/wc3movie.c

ffmpeg/libavformat/yuv4mpeg.c

ffmpeg/output_example.c

ffmpeg/tests/Makefile

ffmpeg/tests/audiogen.c

ffmpeg/tests/dsptest.c

ffmpeg/tests/ffmpeg.regression.ref

ffmpeg/tests/ffserver.regression.ref

ffmpeg/tests/lena.pnm

ffmpeg/tests/libav.regression.ref

ffmpeg/tests/regression.sh

ffmpeg/tests/rotozoom.c

ffmpeg/tests/rotozoom.regression.ref

ffmpeg/tests/server-regression.sh

ffmpeg/tests/test.conf

ffmpeg/tests/tiny_psnr.c

ffmpeg/tests/videogen.c

ffmpeg/vhook/Makefile

ffmpeg/vhook/drawtext.c *

ffmpeg/vhook/fish.c

ffmpeg/vhook/imlib2.c

ffmpeg/vhook/null.c

ffmpeg/vhook/ppm.c

install-sh

missing

src/Makefile.am

src/Makefile.in

src/app_data.h

src/capture.c

src/capture.h

src/codecs.h

src/colors.c

src/colors.h

src/control.h

src/frame.c

src/frame.h

src/job.c

src/job.h

src/led_meter.c

src/led_meter.h

src/main.c

src/options.c

src/xtoffmpeg.c

src/xtoffmpeg.h

src/xtoxwd.c

src/xtoxwd.h

Show diffs side-by-side

added added

removed removed

ffmpeg/libpostproc/postprocess_altivec_template.c

* This file is part of FFmpeg.

* FFmpeg is free software; you can redistribute it and/or modify

* it under the terms of the GNU General Public License as published by

* the Free Software Foundation; either version 2 of the License, or

* (at your option) any later version.

* FFmpeg is distributed in the hope that it will be useful,

* but WITHOUT ANY WARRANTY; without even the implied warranty of

* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the

* GNU General Public License for more details.

* You should have received a copy of the GNU General Public License

* along with FFmpeg; if not, write to the Free Software

* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA

#include "avutil.h"

#ifdef CONFIG_DARWIN

#define AVV(x...) (x)

#else

#define AVV(x...) {x}

#endif

#define ALTIVEC_TRANSPOSE_8x8_SHORT(src_a,src_b,src_c,src_d,src_e,src_f,src_g,src_h) \

do { \

__typeof__(src_a) tempA1, tempB1, tempC1, tempD1; \

__typeof__(src_a) tempE1, tempF1, tempG1, tempH1; \

__typeof__(src_a) tempA2, tempB2, tempC2, tempD2; \

__typeof__(src_a) tempE2, tempF2, tempG2, tempH2; \

tempA1 = vec_mergeh (src_a, src_e); \

tempB1 = vec_mergel (src_a, src_e); \

tempC1 = vec_mergeh (src_b, src_f); \

tempD1 = vec_mergel (src_b, src_f); \

tempE1 = vec_mergeh (src_c, src_g); \

tempF1 = vec_mergel (src_c, src_g); \

tempG1 = vec_mergeh (src_d, src_h); \

tempH1 = vec_mergel (src_d, src_h); \

tempA2 = vec_mergeh (tempA1, tempE1); \

tempB2 = vec_mergel (tempA1, tempE1); \

tempC2 = vec_mergeh (tempB1, tempF1); \

tempD2 = vec_mergel (tempB1, tempF1); \

tempE2 = vec_mergeh (tempC1, tempG1); \

tempF2 = vec_mergel (tempC1, tempG1); \

tempG2 = vec_mergeh (tempD1, tempH1); \

tempH2 = vec_mergel (tempD1, tempH1); \

src_a = vec_mergeh (tempA2, tempE2); \

src_b = vec_mergel (tempA2, tempE2); \

src_c = vec_mergeh (tempB2, tempF2); \

src_d = vec_mergel (tempB2, tempF2); \

src_e = vec_mergeh (tempC2, tempG2); \

src_f = vec_mergel (tempC2, tempG2); \

src_g = vec_mergeh (tempD2, tempH2); \

src_h = vec_mergel (tempD2, tempH2); \

} while (0)

static inline int vertClassify_altivec(uint8_t src[], int stride, PPContext *c) {

this code makes no assumption on src or stride.

One could remove the recomputation of the perm

vector by assuming (stride % 16) == 0, unfortunately

this is not always true.

DECLARE_ALIGNED(16, short, data[8]);

int numEq;

uint8_t *src2 = src;

vector signed short v_dcOffset;

vector signed short v2QP;

vector unsigned short v4QP;

vector unsigned short v_dcThreshold;

const int properStride = (stride % 16);

const int srcAlign = ((unsigned long)src2 % 16);

const int two_vectors = ((srcAlign > 8) || properStride) ? 1 : 0;

const vector signed int zero = vec_splat_s32(0);

const vector signed short mask = vec_splat_s16(1);

vector signed int v_numEq = vec_splat_s32(0);

data[0] = ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;

data[1] = data[0] * 2 + 1;

data[2] = c->QP * 2;

data[3] = c->QP * 4;

vector signed short v_data = vec_ld(0, data);

v_dcOffset = vec_splat(v_data, 0);

v_dcThreshold = (vector unsigned short)vec_splat(v_data, 1);

v2QP = vec_splat(v_data, 2);

v4QP = (vector unsigned short)vec_splat(v_data, 3);

src2 += stride * 4;

vector signed short v_srcAss0, v_srcAss1, v_srcAss2, v_srcAss3, v_srcAss4, v_srcAss5, v_srcAss6, v_srcAss7;

#define LOAD_LINE(i) \

100

101

vector unsigned char perm##i = vec_lvsl(j##i, src2); \

102

const vector unsigned char v_srcA1##i = vec_ld(j##i, src2); \

103

vector unsigned char v_srcA2##i; \

104

if (two_vectors) \

105

v_srcA2##i = vec_ld(j##i + 16, src2); \

106

const vector unsigned char v_srcA##i = \

107

vec_perm(v_srcA1##i, v_srcA2##i, perm##i); \

108

v_srcAss##i = \

109

(vector signed short)vec_mergeh((vector signed char)zero, \

110

(vector signed char)v_srcA##i)

111

112

#define LOAD_LINE_ALIGNED(i) \

113

114

const vector unsigned char v_srcA##i = vec_ld(j##i, src2); \

115

v_srcAss##i = \

116

(vector signed short)vec_mergeh((vector signed char)zero, \

117

(vector signed char)v_srcA##i)

118

119

// special casing the aligned case is worthwhile, as all call from

120

// the (transposed) horizontable deblocks will be aligned, i naddition

121

// to the naturraly aligned vertical deblocks.

122

if (properStride && srcAlign) {

123

LOAD_LINE_ALIGNED(0);

124

LOAD_LINE_ALIGNED(1);

125

LOAD_LINE_ALIGNED(2);

126

LOAD_LINE_ALIGNED(3);

127

LOAD_LINE_ALIGNED(4);

128

LOAD_LINE_ALIGNED(5);

129

LOAD_LINE_ALIGNED(6);

130

LOAD_LINE_ALIGNED(7);

131

} else {

132

LOAD_LINE(0);

133

LOAD_LINE(1);

134

LOAD_LINE(2);

135

LOAD_LINE(3);

136

LOAD_LINE(4);

137

LOAD_LINE(5);

138

LOAD_LINE(6);

139

LOAD_LINE(7);

140

}

141

#undef LOAD_LINE

142

#undef LOAD_LINE_ALIGNED

143

144

#define ITER(i, j) \

145

const vector signed short v_diff##i = \

146

vec_sub(v_srcAss##i, v_srcAss##j); \

147

const vector signed short v_sum##i = \

148

vec_add(v_diff##i, v_dcOffset); \

149

const vector signed short v_comp##i = \

150

(vector signed short)vec_cmplt((vector unsigned short)v_sum##i, \

151

v_dcThreshold); \

152

const vector signed short v_part##i = vec_and(mask, v_comp##i); \

153

v_numEq = vec_sum4s(v_part##i, v_numEq);

154

155

ITER(0, 1);

156

ITER(1, 2);

157

ITER(2, 3);

158

ITER(3, 4);

159

ITER(4, 5);

160

ITER(5, 6);

161

ITER(6, 7);

162

#undef ITER

163

164

v_numEq = vec_sums(v_numEq, zero);

165

166

v_numEq = vec_splat(v_numEq, 3);

167

vec_ste(v_numEq, 0, &numEq);

168

169

if (numEq > c->ppMode.flatnessThreshold)

170

{

171

const vector unsigned char mmoP1 = (const vector unsigned char)

172

AVV(0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f,

173

0x00, 0x01, 0x12, 0x13, 0x08, 0x09, 0x1A, 0x1B);

174

const vector unsigned char mmoP2 = (const vector unsigned char)

175

AVV(0x04, 0x05, 0x16, 0x17, 0x0C, 0x0D, 0x1E, 0x1F,

176

0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f);

177

const vector unsigned char mmoP = (const vector unsigned char)

178

vec_lvsl(8, (unsigned char*)0);

179

180

vector signed short mmoL1 = vec_perm(v_srcAss0, v_srcAss2, mmoP1);

181

vector signed short mmoL2 = vec_perm(v_srcAss4, v_srcAss6, mmoP2);

182

vector signed short mmoL = vec_perm(mmoL1, mmoL2, mmoP);

183

vector signed short mmoR1 = vec_perm(v_srcAss5, v_srcAss7, mmoP1);

184

vector signed short mmoR2 = vec_perm(v_srcAss1, v_srcAss3, mmoP2);

185

vector signed short mmoR = vec_perm(mmoR1, mmoR2, mmoP);

186

vector signed short mmoDiff = vec_sub(mmoL, mmoR);

187

vector unsigned short mmoSum = (vector unsigned short)vec_add(mmoDiff, v2QP);

188

189

if (vec_any_gt(mmoSum, v4QP))

190

return 0;

191

else

192

return 1;

193

}

194

else return 2;

195

}

196

197

static inline void doVertLowPass_altivec(uint8_t *src, int stride, PPContext *c) {

198

199

this code makes no assumption on src or stride.

200

One could remove the recomputation of the perm

201

vector by assuming (stride % 16) == 0, unfortunately

202

this is not always true. Quite a lot of load/stores

203

can be removed by assuming proper alignement of

204

src & stride :-(

205

206

uint8_t *src2 = src;

207

const vector signed int zero = vec_splat_s32(0);

208

const int properStride = (stride % 16);

209

const int srcAlign = ((unsigned long)src2 % 16);

210

DECLARE_ALIGNED(16, short, qp[8]);

211

qp[0] = c->QP;

212

vector signed short vqp = vec_ld(0, qp);

213

vqp = vec_splat(vqp, 0);

214

215

src2 += stride*3;

216

217

vector signed short vb0, vb1, vb2, vb3, vb4, vb5, vb6, vb7, vb8, vb9;

218

vector unsigned char vbA0, vbA1, vbA2, vbA3, vbA4, vbA5, vbA6, vbA7, vbA8, vbA9;

219

vector unsigned char vbB0, vbB1, vbB2, vbB3, vbB4, vbB5, vbB6, vbB7, vbB8, vbB9;

220

vector unsigned char vbT0, vbT1, vbT2, vbT3, vbT4, vbT5, vbT6, vbT7, vbT8, vbT9;

221

222

#define LOAD_LINE(i) \

223

const vector unsigned char perml##i = \

224

vec_lvsl(i * stride, src2); \

225

vbA##i = vec_ld(i * stride, src2); \

226

vbB##i = vec_ld(i * stride + 16, src2); \

227

vbT##i = vec_perm(vbA##i, vbB##i, perml##i); \

228

vb##i = \

229

(vector signed short)vec_mergeh((vector unsigned char)zero, \

230

(vector unsigned char)vbT##i)

231

232

#define LOAD_LINE_ALIGNED(i) \

233

234

vbT##i = vec_ld(j##i, src2); \

235

vb##i = \

236

(vector signed short)vec_mergeh((vector signed char)zero, \

237

(vector signed char)vbT##i)

238

239

// special casing the aligned case is worthwhile, as all call from

240

// the (transposed) horizontable deblocks will be aligned, in addition

241

// to the naturraly aligned vertical deblocks.

242

if (properStride && srcAlign) {

243

LOAD_LINE_ALIGNED(0);

244

LOAD_LINE_ALIGNED(1);

245

LOAD_LINE_ALIGNED(2);

246

LOAD_LINE_ALIGNED(3);

247

LOAD_LINE_ALIGNED(4);

248

LOAD_LINE_ALIGNED(5);

249

LOAD_LINE_ALIGNED(6);

250

LOAD_LINE_ALIGNED(7);

251

LOAD_LINE_ALIGNED(8);

252

LOAD_LINE_ALIGNED(9);

253

} else {

254

LOAD_LINE(0);

255

LOAD_LINE(1);

256

LOAD_LINE(2);

257

LOAD_LINE(3);

258

LOAD_LINE(4);

259

LOAD_LINE(5);

260

LOAD_LINE(6);

261

LOAD_LINE(7);

262

LOAD_LINE(8);

263

LOAD_LINE(9);

264

}

265

#undef LOAD_LINE

266

#undef LOAD_LINE_ALIGNED

267

268

const vector unsigned short v_2 = vec_splat_u16(2);

269

const vector unsigned short v_4 = vec_splat_u16(4);

270

271

const vector signed short v_diff01 = vec_sub(vb0, vb1);

272

const vector unsigned short v_cmp01 =

273

(const vector unsigned short) vec_cmplt(vec_abs(v_diff01), vqp);

274

const vector signed short v_first = vec_sel(vb1, vb0, v_cmp01);

275

const vector signed short v_diff89 = vec_sub(vb8, vb9);

276

const vector unsigned short v_cmp89 =

277

(const vector unsigned short) vec_cmplt(vec_abs(v_diff89), vqp);

278

const vector signed short v_last = vec_sel(vb8, vb9, v_cmp89);

279

280

const vector signed short temp01 = vec_mladd(v_first, (vector signed short)v_4, vb1);

281

const vector signed short temp02 = vec_add(vb2, vb3);

282

const vector signed short temp03 = vec_add(temp01, (vector signed short)v_4);

283

const vector signed short v_sumsB0 = vec_add(temp02, temp03);

284

285

const vector signed short temp11 = vec_sub(v_sumsB0, v_first);

286

const vector signed short v_sumsB1 = vec_add(temp11, vb4);

287

288

const vector signed short temp21 = vec_sub(v_sumsB1, v_first);

289

const vector signed short v_sumsB2 = vec_add(temp21, vb5);

290

291

const vector signed short temp31 = vec_sub(v_sumsB2, v_first);

292

const vector signed short v_sumsB3 = vec_add(temp31, vb6);

293

294

const vector signed short temp41 = vec_sub(v_sumsB3, v_first);

295

const vector signed short v_sumsB4 = vec_add(temp41, vb7);

296

297

const vector signed short temp51 = vec_sub(v_sumsB4, vb1);

298

const vector signed short v_sumsB5 = vec_add(temp51, vb8);

299

300

const vector signed short temp61 = vec_sub(v_sumsB5, vb2);

301

const vector signed short v_sumsB6 = vec_add(temp61, v_last);

302

303

const vector signed short temp71 = vec_sub(v_sumsB6, vb3);

304

const vector signed short v_sumsB7 = vec_add(temp71, v_last);

305

306

const vector signed short temp81 = vec_sub(v_sumsB7, vb4);

307

const vector signed short v_sumsB8 = vec_add(temp81, v_last);

308

309

const vector signed short temp91 = vec_sub(v_sumsB8, vb5);

310

const vector signed short v_sumsB9 = vec_add(temp91, v_last);

311

312

#define COMPUTE_VR(i, j, k) \

313

const vector signed short temps1##i = \

314

vec_add(v_sumsB##i, v_sumsB##k); \

315

const vector signed short temps2##i = \

316

vec_mladd(vb##j, (vector signed short)v_2, temps1##i); \

317

const vector signed short vr##j = vec_sra(temps2##i, v_4)

318

319

COMPUTE_VR(0, 1, 2);

320

COMPUTE_VR(1, 2, 3);

321

COMPUTE_VR(2, 3, 4);

322

COMPUTE_VR(3, 4, 5);

323

COMPUTE_VR(4, 5, 6);

324

COMPUTE_VR(5, 6, 7);

325

COMPUTE_VR(6, 7, 8);

326

COMPUTE_VR(7, 8, 9);

327

328

const vector signed char neg1 = vec_splat_s8(-1);

329

const vector unsigned char permHH = (const vector unsigned char)AVV(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,

330

0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F);

331

332

#define PACK_AND_STORE(i) \

333

const vector unsigned char perms##i = \

334

vec_lvsr(i * stride, src2); \

335

const vector unsigned char vf##i = \

336

vec_packsu(vr##i, (vector signed short)zero); \

337

const vector unsigned char vg##i = \

338

vec_perm(vf##i, vbT##i, permHH); \

339

const vector unsigned char mask##i = \

340

vec_perm((vector unsigned char)zero, (vector unsigned char)neg1, perms##i); \

341

const vector unsigned char vg2##i = \

342

vec_perm(vg##i, vg##i, perms##i); \

343

const vector unsigned char svA##i = \

344

vec_sel(vbA##i, vg2##i, mask##i); \

345

const vector unsigned char svB##i = \

346

vec_sel(vg2##i, vbB##i, mask##i); \

347

vec_st(svA##i, i * stride, src2); \

348

vec_st(svB##i, i * stride + 16, src2)

349

350

#define PACK_AND_STORE_ALIGNED(i) \

351

const vector unsigned char vf##i = \

352

vec_packsu(vr##i, (vector signed short)zero); \

353

const vector unsigned char vg##i = \

354

vec_perm(vf##i, vbT##i, permHH); \

355

vec_st(vg##i, i * stride, src2)

356

357

// special casing the aligned case is worthwhile, as all call from

358

// the (transposed) horizontable deblocks will be aligned, in addition

359

// to the naturraly aligned vertical deblocks.

360

if (properStride && srcAlign) {

361

PACK_AND_STORE_ALIGNED(1);

362

PACK_AND_STORE_ALIGNED(2);

363

PACK_AND_STORE_ALIGNED(3);

364

PACK_AND_STORE_ALIGNED(4);

365

PACK_AND_STORE_ALIGNED(5);

366

PACK_AND_STORE_ALIGNED(6);

367

PACK_AND_STORE_ALIGNED(7);

368

PACK_AND_STORE_ALIGNED(8);

369

} else {

370

PACK_AND_STORE(1);

371

PACK_AND_STORE(2);

372

PACK_AND_STORE(3);

373

PACK_AND_STORE(4);

374

PACK_AND_STORE(5);

375

PACK_AND_STORE(6);

376

PACK_AND_STORE(7);

377

PACK_AND_STORE(8);

378

}

379

#undef PACK_AND_STORE

380

#undef PACK_AND_STORE_ALIGNED

381

}

382

383

384

385

static inline void doVertDefFilter_altivec(uint8_t src[], int stride, PPContext *c) {

386

387

this code makes no assumption on src or stride.

388

One could remove the recomputation of the perm

389

vector by assuming (stride % 16) == 0, unfortunately

390

this is not always true. Quite a lot of load/stores

391

can be removed by assuming proper alignement of

392

src & stride :-(

393

394

uint8_t *src2 = src;

395

const vector signed int zero = vec_splat_s32(0);

396

DECLARE_ALIGNED(16, short, qp[8]);

397

qp[0] = 8*c->QP;

398

vector signed short vqp = vec_ld(0, qp);

399

vqp = vec_splat(vqp, 0);

400

401

#define LOAD_LINE(i) \

402

const vector unsigned char perm##i = \

403

vec_lvsl(i * stride, src2); \

404

const vector unsigned char vbA##i = \

405

vec_ld(i * stride, src2); \

406

const vector unsigned char vbB##i = \

407

vec_ld(i * stride + 16, src2); \

408

const vector unsigned char vbT##i = \

409

vec_perm(vbA##i, vbB##i, perm##i); \

410

const vector signed short vb##i = \

411

(vector signed short)vec_mergeh((vector unsigned char)zero, \

412

(vector unsigned char)vbT##i)

413

414

src2 += stride*3;

415

416

LOAD_LINE(1);

417

LOAD_LINE(2);

418

LOAD_LINE(3);

419

LOAD_LINE(4);

420

LOAD_LINE(5);

421

LOAD_LINE(6);

422

LOAD_LINE(7);

423

LOAD_LINE(8);

424

#undef LOAD_LINE

425

426

const vector signed short v_1 = vec_splat_s16(1);

427

const vector signed short v_2 = vec_splat_s16(2);

428

const vector signed short v_5 = vec_splat_s16(5);

429

const vector signed short v_32 = vec_sl(v_1,

430

(vector unsigned short)v_5);

431

/* middle energy */

432

const vector signed short l3minusl6 = vec_sub(vb3, vb6);

433

const vector signed short l5minusl4 = vec_sub(vb5, vb4);

434

const vector signed short twotimes_l3minusl6 = vec_mladd(v_2, l3minusl6, (vector signed short)zero);

435

const vector signed short mE = vec_mladd(v_5, l5minusl4, twotimes_l3minusl6);

436

const vector signed short absmE = vec_abs(mE);

437

/* left & right energy */

438

const vector signed short l1minusl4 = vec_sub(vb1, vb4);

439

const vector signed short l3minusl2 = vec_sub(vb3, vb2);

440

const vector signed short l5minusl8 = vec_sub(vb5, vb8);

441

const vector signed short l7minusl6 = vec_sub(vb7, vb6);

442

const vector signed short twotimes_l1minusl4 = vec_mladd(v_2, l1minusl4, (vector signed short)zero);

443

const vector signed short twotimes_l5minusl8 = vec_mladd(v_2, l5minusl8, (vector signed short)zero);

444

const vector signed short lE = vec_mladd(v_5, l3minusl2, twotimes_l1minusl4);

445

const vector signed short rE = vec_mladd(v_5, l7minusl6, twotimes_l5minusl8);

446

/* d */

447

const vector signed short ddiff = vec_sub(absmE,

448

vec_min(vec_abs(lE),

449

vec_abs(rE)));

450

const vector signed short ddiffclamp = vec_max(ddiff, (vector signed short)zero);

451

const vector signed short dtimes64 = vec_mladd(v_5, ddiffclamp, v_32);

452

const vector signed short d = vec_sra(dtimes64, vec_splat_u16(6));

453

const vector signed short minusd = vec_sub((vector signed short)zero, d);

454

const vector signed short finald = vec_sel(minusd,

455

456

vec_cmpgt(vec_sub((vector signed short)zero, mE),

457

(vector signed short)zero));

458

/* q */

459

const vector signed short qtimes2 = vec_sub(vb4, vb5);

460

/* for a shift right to behave like /2, we need to add one

461

to all negative integer */

462

const vector signed short rounddown = vec_sel((vector signed short)zero,

463

v_1,

464

vec_cmplt(qtimes2, (vector signed short)zero));

465

const vector signed short q = vec_sra(vec_add(qtimes2, rounddown), vec_splat_u16(1));

466

/* clamp */

467

const vector signed short dclamp_P1 = vec_max((vector signed short)zero, finald);

468

const vector signed short dclamp_P = vec_min(dclamp_P1, q);

469

const vector signed short dclamp_N1 = vec_min((vector signed short)zero, finald);

470

const vector signed short dclamp_N = vec_max(dclamp_N1, q);

471

472

const vector signed short dclampedfinal = vec_sel(dclamp_N,

473

dclamp_P,

474

vec_cmpgt(q, (vector signed short)zero));

475

const vector signed short dornotd = vec_sel((vector signed short)zero,

476

dclampedfinal,

477

vec_cmplt(absmE, vqp));

478

/* add/substract to l4 and l5 */

479

const vector signed short vb4minusd = vec_sub(vb4, dornotd);

480

const vector signed short vb5plusd = vec_add(vb5, dornotd);

481

/* finally, stores */

482

const vector unsigned char st4 = vec_packsu(vb4minusd, (vector signed short)zero);

483

const vector unsigned char st5 = vec_packsu(vb5plusd, (vector signed short)zero);

484

485

const vector signed char neg1 = vec_splat_s8(-1);

486

const vector unsigned char permHH = (const vector unsigned char)AVV(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,

487

0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F);

488

489

#define STORE(i) \

490

const vector unsigned char perms##i = \

491

vec_lvsr(i * stride, src2); \

492

const vector unsigned char vg##i = \

493

vec_perm(st##i, vbT##i, permHH); \

494

const vector unsigned char mask##i = \

495

vec_perm((vector unsigned char)zero, (vector unsigned char)neg1, perms##i); \

496

const vector unsigned char vg2##i = \

497

vec_perm(vg##i, vg##i, perms##i); \

498

const vector unsigned char svA##i = \

499

vec_sel(vbA##i, vg2##i, mask##i); \

500

const vector unsigned char svB##i = \

501

vec_sel(vg2##i, vbB##i, mask##i); \

502

vec_st(svA##i, i * stride, src2); \

503

vec_st(svB##i, i * stride + 16, src2)

504

505

STORE(4);

506

STORE(5);

507

}

508

509

static inline void dering_altivec(uint8_t src[], int stride, PPContext *c) {

510

511

this code makes no assumption on src or stride.

512

One could remove the recomputation of the perm

513

vector by assuming (stride % 16) == 0, unfortunately

514

this is not always true. Quite a lot of load/stores

515

can be removed by assuming proper alignement of

516

src & stride :-(

517

518

uint8_t *srcCopy = src;

519

DECLARE_ALIGNED(16, uint8_t, dt[16]);

520

const vector signed int zero = vec_splat_s32(0);

521

vector unsigned char v_dt;

522

dt[0] = deringThreshold;

523

v_dt = vec_splat(vec_ld(0, dt), 0);

524

525

#define LOAD_LINE(i) \

526

const vector unsigned char perm##i = \

527

vec_lvsl(i * stride, srcCopy); \

528

vector unsigned char sA##i = vec_ld(i * stride, srcCopy); \

529

vector unsigned char sB##i = vec_ld(i * stride + 16, srcCopy); \

530

vector unsigned char src##i = vec_perm(sA##i, sB##i, perm##i)

531

532

LOAD_LINE(0);

533

LOAD_LINE(1);

534

LOAD_LINE(2);

535

LOAD_LINE(3);

536

LOAD_LINE(4);

537

LOAD_LINE(5);

538

LOAD_LINE(6);

539

LOAD_LINE(7);

540

LOAD_LINE(8);

541

LOAD_LINE(9);

542

#undef LOAD_LINE

543

544

vector unsigned char v_avg;

545

{

546

const vector unsigned char trunc_perm = (vector unsigned char)

547

AVV(0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08,

548

0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18);

549

const vector unsigned char trunc_src12 = vec_perm(src1, src2, trunc_perm);

550

const vector unsigned char trunc_src34 = vec_perm(src3, src4, trunc_perm);

551

const vector unsigned char trunc_src56 = vec_perm(src5, src6, trunc_perm);

552

const vector unsigned char trunc_src78 = vec_perm(src7, src8, trunc_perm);

553

554

#define EXTRACT(op) do { \

555

const vector unsigned char s##op##_1 = vec_##op(trunc_src12, trunc_src34); \

556

const vector unsigned char s##op##_2 = vec_##op(trunc_src56, trunc_src78); \

557

const vector unsigned char s##op##_6 = vec_##op(s##op##_1, s##op##_2); \

558

const vector unsigned char s##op##_8h = vec_mergeh(s##op##_6, s##op##_6); \

559

const vector unsigned char s##op##_8l = vec_mergel(s##op##_6, s##op##_6); \

560

const vector unsigned char s##op##_9 = vec_##op(s##op##_8h, s##op##_8l); \

561

const vector unsigned char s##op##_9h = vec_mergeh(s##op##_9, s##op##_9); \

562

const vector unsigned char s##op##_9l = vec_mergel(s##op##_9, s##op##_9); \

563

const vector unsigned char s##op##_10 = vec_##op(s##op##_9h, s##op##_9l); \

564

const vector unsigned char s##op##_10h = vec_mergeh(s##op##_10, s##op##_10); \

565

const vector unsigned char s##op##_10l = vec_mergel(s##op##_10, s##op##_10); \

566

const vector unsigned char s##op##_11 = vec_##op(s##op##_10h, s##op##_10l); \

567

const vector unsigned char s##op##_11h = vec_mergeh(s##op##_11, s##op##_11); \

568

const vector unsigned char s##op##_11l = vec_mergel(s##op##_11, s##op##_11); \

569

v_##op = vec_##op(s##op##_11h, s##op##_11l); } while (0)

570

571

vector unsigned char v_min;

572

vector unsigned char v_max;

573

EXTRACT(min);

574

EXTRACT(max);

575

#undef EXTRACT

576

577

if (vec_all_lt(vec_sub(v_max, v_min), v_dt))

578

return;

579

580

v_avg = vec_avg(v_min, v_max);

581

}

582

583

DECLARE_ALIGNED(16, signed int, S[8]);

584

{

585

const vector unsigned short mask1 = (vector unsigned short)

586

AVV(0x0001, 0x0002, 0x0004, 0x0008,

587

0x0010, 0x0020, 0x0040, 0x0080);

588

const vector unsigned short mask2 = (vector unsigned short)

589

AVV(0x0100, 0x0200, 0x0000, 0x0000,

590

0x0000, 0x0000, 0x0000, 0x0000);

591

592

const vector unsigned int vuint32_16 = vec_sl(vec_splat_u32(1), vec_splat_u32(4));

593

const vector unsigned int vuint32_1 = vec_splat_u32(1);

594

595

#define COMPARE(i) \

596

vector signed int sum##i; \

597

do { \

598

const vector unsigned char cmp##i = \

599

(vector unsigned char)vec_cmpgt(src##i, v_avg); \

600

const vector unsigned short cmpHi##i = \

601

(vector unsigned short)vec_mergeh(cmp##i, cmp##i); \

602

const vector unsigned short cmpLi##i = \

603

(vector unsigned short)vec_mergel(cmp##i, cmp##i); \

604

const vector signed short cmpHf##i = \

605

(vector signed short)vec_and(cmpHi##i, mask1); \

606

const vector signed short cmpLf##i = \

607

(vector signed short)vec_and(cmpLi##i, mask2); \

608

const vector signed int sump##i = vec_sum4s(cmpHf##i, zero); \

609

const vector signed int sumq##i = vec_sum4s(cmpLf##i, sump##i); \

610

sum##i = vec_sums(sumq##i, zero); } while (0)

611

612

COMPARE(0);

613

COMPARE(1);

614

COMPARE(2);

615

COMPARE(3);

616

COMPARE(4);

617

COMPARE(5);

618

COMPARE(6);

619

COMPARE(7);

620

COMPARE(8);

621

COMPARE(9);

622

#undef COMPARE

623

624

vector signed int sumA2;

625

vector signed int sumB2;

626

{

627

const vector signed int sump02 = vec_mergel(sum0, sum2);

628

const vector signed int sump13 = vec_mergel(sum1, sum3);

629

const vector signed int sumA = vec_mergel(sump02, sump13);

630

631

const vector signed int sump46 = vec_mergel(sum4, sum6);

632

const vector signed int sump57 = vec_mergel(sum5, sum7);

633

const vector signed int sumB = vec_mergel(sump46, sump57);

634

635

const vector signed int sump8A = vec_mergel(sum8, zero);

636

const vector signed int sump9B = vec_mergel(sum9, zero);

637

const vector signed int sumC = vec_mergel(sump8A, sump9B);

638

639

const vector signed int tA = vec_sl(vec_nor(zero, sumA), vuint32_16);

640

const vector signed int tB = vec_sl(vec_nor(zero, sumB), vuint32_16);

641

const vector signed int tC = vec_sl(vec_nor(zero, sumC), vuint32_16);

642

const vector signed int t2A = vec_or(sumA, tA);

643

const vector signed int t2B = vec_or(sumB, tB);

644

const vector signed int t2C = vec_or(sumC, tC);

645

const vector signed int t3A = vec_and(vec_sra(t2A, vuint32_1),

646

vec_sl(t2A, vuint32_1));

647

const vector signed int t3B = vec_and(vec_sra(t2B, vuint32_1),

648

vec_sl(t2B, vuint32_1));

649

const vector signed int t3C = vec_and(vec_sra(t2C, vuint32_1),

650

vec_sl(t2C, vuint32_1));

651

const vector signed int yA = vec_and(t2A, t3A);

652

const vector signed int yB = vec_and(t2B, t3B);

653

const vector signed int yC = vec_and(t2C, t3C);

654

655

const vector unsigned char strangeperm1 = vec_lvsl(4, (unsigned char*)0);

656

const vector unsigned char strangeperm2 = vec_lvsl(8, (unsigned char*)0);

657

const vector signed int sumAd4 = vec_perm(yA, yB, strangeperm1);

658

const vector signed int sumAd8 = vec_perm(yA, yB, strangeperm2);

659

const vector signed int sumBd4 = vec_perm(yB, yC, strangeperm1);

660

const vector signed int sumBd8 = vec_perm(yB, yC, strangeperm2);

661

const vector signed int sumAp = vec_and(yA,

662

vec_and(sumAd4,sumAd8));

663

const vector signed int sumBp = vec_and(yB,

664

vec_and(sumBd4,sumBd8));

665

sumA2 = vec_or(sumAp,

666

vec_sra(sumAp,

667

vuint32_16));

668

sumB2 = vec_or(sumBp,

669

vec_sra(sumBp,

670

vuint32_16));

671

}

672

vec_st(sumA2, 0, S);

673

vec_st(sumB2, 16, S);

674

}

675

676

/* I'm not sure the following is actually faster

677

than straight, unvectorized C code :-( */

678

679

DECLARE_ALIGNED(16, int, tQP2[4]);

680

tQP2[0]= c->QP/2 + 1;

681

vector signed int vQP2 = vec_ld(0, tQP2);

682

vQP2 = vec_splat(vQP2, 0);

683

const vector signed int vsint32_8 = vec_splat_s32(8);

684

const vector unsigned int vuint32_4 = vec_splat_u32(4);

685

686

const vector unsigned char permA1 = (vector unsigned char)

687

AVV(0x00, 0x01, 0x02, 0x10, 0x11, 0x12, 0x1F, 0x1F,

688

0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F);

689

const vector unsigned char permA2 = (vector unsigned char)

690

AVV(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x10, 0x11,

691

0x12, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F);

692

const vector unsigned char permA1inc = (vector unsigned char)

693

AVV(0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x00, 0x00,

694

0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00);

695

const vector unsigned char permA2inc = (vector unsigned char)

696

AVV(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x01,

697

0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00);

698

const vector unsigned char magic = (vector unsigned char)

699

AVV(0x01, 0x02, 0x01, 0x02, 0x04, 0x02, 0x01, 0x02,

700

0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00);

701

const vector unsigned char extractPerm = (vector unsigned char)

702

AVV(0x10, 0x10, 0x10, 0x01, 0x10, 0x10, 0x10, 0x01,

703

0x10, 0x10, 0x10, 0x01, 0x10, 0x10, 0x10, 0x01);

704

const vector unsigned char extractPermInc = (vector unsigned char)

705

AVV(0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01,

706

0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01);

707

const vector unsigned char identity = vec_lvsl(0,(unsigned char *)0);

708

const vector unsigned char tenRight = (vector unsigned char)

709

AVV(0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,

710

0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00);

711

const vector unsigned char eightLeft = (vector unsigned char)

712

AVV(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,

713

0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x08);

714

715

716

#define F_INIT(i) \

717

vector unsigned char tenRightM##i = tenRight; \

718

vector unsigned char permA1M##i = permA1; \

719

vector unsigned char permA2M##i = permA2; \

720

vector unsigned char extractPermM##i = extractPerm

721

722

#define F2(i, j, k, l) \

723

if (S[i] & (1 << (l+1))) { \

724

const vector unsigned char a_##j##_A##l = \

725

vec_perm(src##i, src##j, permA1M##i); \

726

const vector unsigned char a_##j##_B##l = \

727

vec_perm(a_##j##_A##l, src##k, permA2M##i); \

728

const vector signed int a_##j##_sump##l = \

729

(vector signed int)vec_msum(a_##j##_B##l, magic, \

730

(vector unsigned int)zero); \

731

vector signed int F_##j##_##l = \

732

vec_sr(vec_sums(a_##j##_sump##l, vsint32_8), vuint32_4); \

733

F_##j##_##l = vec_splat(F_##j##_##l, 3); \

734

const vector signed int p_##j##_##l = \

735

(vector signed int)vec_perm(src##j, \

736

(vector unsigned char)zero, \

737

extractPermM##i); \

738

const vector signed int sum_##j##_##l = vec_add( p_##j##_##l, vQP2);\

739

const vector signed int diff_##j##_##l = vec_sub( p_##j##_##l, vQP2);\

740

vector signed int newpm_##j##_##l; \

741

if (vec_all_lt(sum_##j##_##l, F_##j##_##l)) \

742

newpm_##j##_##l = sum_##j##_##l; \

743

else if (vec_all_gt(diff_##j##_##l, F_##j##_##l)) \

744

newpm_##j##_##l = diff_##j##_##l; \

745

else newpm_##j##_##l = F_##j##_##l; \

746

const vector unsigned char newpm2_##j##_##l = \

747

vec_splat((vector unsigned char)newpm_##j##_##l, 15); \

748

const vector unsigned char mask##j##l = vec_add(identity, \

749

tenRightM##i); \

750

src##j = vec_perm(src##j, newpm2_##j##_##l, mask##j##l); \

751

} \

752

permA1M##i = vec_add(permA1M##i, permA1inc); \

753

permA2M##i = vec_add(permA2M##i, permA2inc); \

754

tenRightM##i = vec_sro(tenRightM##i, eightLeft); \

755

extractPermM##i = vec_add(extractPermM##i, extractPermInc)

756

757

#define ITER(i, j, k) \

758

F_INIT(i); \

759

F2(i, j, k, 0); \

760

F2(i, j, k, 1); \

761

F2(i, j, k, 2); \

762

F2(i, j, k, 3); \

763

F2(i, j, k, 4); \

764

F2(i, j, k, 5); \

765

F2(i, j, k, 6); \

766

F2(i, j, k, 7)

767

768

ITER(0, 1, 2);

769

ITER(1, 2, 3);

770

ITER(2, 3, 4);

771

ITER(3, 4, 5);

772

ITER(4, 5, 6);

773

ITER(5, 6, 7);

774

ITER(6, 7, 8);

775

ITER(7, 8, 9);

776

777

const vector signed char neg1 = vec_splat_s8(-1);

778

779

#define STORE_LINE(i) \

780

const vector unsigned char permST##i = \

781

vec_lvsr(i * stride, srcCopy); \

782

const vector unsigned char maskST##i = \

783

vec_perm((vector unsigned char)zero, \

784

(vector unsigned char)neg1, permST##i); \

785

src##i = vec_perm(src##i ,src##i, permST##i); \

786

sA##i= vec_sel(sA##i, src##i, maskST##i); \

787

sB##i= vec_sel(src##i, sB##i, maskST##i); \

788

vec_st(sA##i, i * stride, srcCopy); \

789

vec_st(sB##i, i * stride + 16, srcCopy)

790

791

STORE_LINE(1);

792

STORE_LINE(2);

793

STORE_LINE(3);

794

STORE_LINE(4);

795

STORE_LINE(5);

796

STORE_LINE(6);

797

STORE_LINE(7);

798

STORE_LINE(8);

799

800

#undef STORE_LINE

801

#undef ITER

802

#undef F2

803

}

804

805

#define doHorizLowPass_altivec(a...) doHorizLowPass_C(a)

806

#define doHorizDefFilter_altivec(a...) doHorizDefFilter_C(a)

807

#define do_a_deblock_altivec(a...) do_a_deblock_C(a)

808

809

static inline void RENAME(tempNoiseReducer)(uint8_t *src, int stride,

810

uint8_t *tempBlured, uint32_t *tempBluredPast, int *maxNoise)

811

{

812

const vector signed int zero = vec_splat_s32(0);

813

const vector signed short vsint16_1 = vec_splat_s16(1);

814

vector signed int v_dp = zero;

815

vector signed int v_sysdp = zero;

816

int d, sysd, i;

817

818

tempBluredPast[127]= maxNoise[0];

819

tempBluredPast[128]= maxNoise[1];

820

tempBluredPast[129]= maxNoise[2];

821

822

#define LOAD_LINE(src, i) \

823

824

vector unsigned char perm##src##i = vec_lvsl(j##src##i, src); \

825

const vector unsigned char v_##src##A1##i = vec_ld(j##src##i, src); \

826

const vector unsigned char v_##src##A2##i = vec_ld(j##src##i + 16, src); \

827

const vector unsigned char v_##src##A##i = \

828

vec_perm(v_##src##A1##i, v_##src##A2##i, perm##src##i); \

829

vector signed short v_##src##Ass##i = \

830

(vector signed short)vec_mergeh((vector signed char)zero, \

831

(vector signed char)v_##src##A##i)

832

833

LOAD_LINE(src, 0);

834

LOAD_LINE(src, 1);

835

LOAD_LINE(src, 2);

836

LOAD_LINE(src, 3);

837

LOAD_LINE(src, 4);

838

LOAD_LINE(src, 5);

839

LOAD_LINE(src, 6);

840

LOAD_LINE(src, 7);

841

842

LOAD_LINE(tempBlured, 0);

843

LOAD_LINE(tempBlured, 1);

844

LOAD_LINE(tempBlured, 2);

845

LOAD_LINE(tempBlured, 3);

846

LOAD_LINE(tempBlured, 4);

847

LOAD_LINE(tempBlured, 5);

848

LOAD_LINE(tempBlured, 6);

849

LOAD_LINE(tempBlured, 7);

850

#undef LOAD_LINE

851

852

#define ACCUMULATE_DIFFS(i) \

853

vector signed short v_d##i = vec_sub(v_tempBluredAss##i, \

854

v_srcAss##i); \

855

v_dp = vec_msums(v_d##i, v_d##i, v_dp); \

856

v_sysdp = vec_msums(v_d##i, vsint16_1, v_sysdp)

857

858

ACCUMULATE_DIFFS(0);

859

ACCUMULATE_DIFFS(1);

860

ACCUMULATE_DIFFS(2);

861

ACCUMULATE_DIFFS(3);

862

ACCUMULATE_DIFFS(4);

863

ACCUMULATE_DIFFS(5);

864

ACCUMULATE_DIFFS(6);

865

ACCUMULATE_DIFFS(7);

866

#undef ACCUMULATE_DIFFS

867

868

v_dp = vec_sums(v_dp, zero);

869

v_sysdp = vec_sums(v_sysdp, zero);

870

871

v_dp = vec_splat(v_dp, 3);

872

v_sysdp = vec_splat(v_sysdp, 3);

873

874

vec_ste(v_dp, 0, &d);

875

vec_ste(v_sysdp, 0, &sysd);

876

877

i = d;

878

d = (4*d

879

+(*(tempBluredPast-256))

880

+(*(tempBluredPast-1))+ (*(tempBluredPast+1))

881

+(*(tempBluredPast+256))

882

+4)>>3;

883

884

*tempBluredPast=i;

885

886

if (d > maxNoise[1]) {

887

if (d < maxNoise[2]) {

888

#define OP(i) v_tempBluredAss##i = vec_avg(v_tempBluredAss##i, v_srcAss##i);

889

890

OP(0);

891

OP(1);

892

OP(2);

893

OP(3);

894

OP(4);

895

OP(5);

896

OP(6);

897

OP(7);

898

#undef OP

899

} else {

900

#define OP(i) v_tempBluredAss##i = v_srcAss##i;

901

902

OP(0);

903

OP(1);

904

OP(2);

905

OP(3);

906

OP(4);

907

OP(5);

908

OP(6);

909

OP(7);

910

#undef OP

911

}

912

} else {

913

if (d < maxNoise[0]) {

914

const vector signed short vsint16_7 = vec_splat_s16(7);

915

const vector signed short vsint16_4 = vec_splat_s16(4);

916

const vector unsigned short vuint16_3 = vec_splat_u16(3);

917

918

#define OP(i) \

919

const vector signed short v_temp##i = \

920

vec_mladd(v_tempBluredAss##i, \

921

vsint16_7, v_srcAss##i); \

922

const vector signed short v_temp2##i = \

923

vec_add(v_temp##i, vsint16_4); \

924

v_tempBluredAss##i = vec_sr(v_temp2##i, vuint16_3)

925

926

OP(0);

927

OP(1);

928

OP(2);

929

OP(3);

930

OP(4);

931

OP(5);

932

OP(6);

933

OP(7);

934

#undef OP

935

} else {

936

const vector signed short vsint16_3 = vec_splat_s16(3);

937

const vector signed short vsint16_2 = vec_splat_s16(2);

938

939

#define OP(i) \

940

const vector signed short v_temp##i = \

941

vec_mladd(v_tempBluredAss##i, \

942

vsint16_3, v_srcAss##i); \

943

const vector signed short v_temp2##i = \

944

vec_add(v_temp##i, vsint16_2); \

945

v_tempBluredAss##i = vec_sr(v_temp2##i, (vector unsigned short)vsint16_2)

946

947

OP(0);

948

OP(1);

949

OP(2);

950

OP(3);

951

OP(4);

952

OP(5);

953

OP(6);

954

OP(7);

955

#undef OP

956

}

957

}

958

959

const vector signed char neg1 = vec_splat_s8(-1);

960

const vector unsigned char permHH = (const vector unsigned char)AVV(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,

961

0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F);

962

963

#define PACK_AND_STORE(src, i) \

964

const vector unsigned char perms##src##i = \

965

vec_lvsr(i * stride, src); \

966

const vector unsigned char vf##src##i = \

967

vec_packsu(v_tempBluredAss##i, (vector signed short)zero); \

968

const vector unsigned char vg##src##i = \

969

vec_perm(vf##src##i, v_##src##A##i, permHH); \

970

const vector unsigned char mask##src##i = \

971

vec_perm((vector unsigned char)zero, (vector unsigned char)neg1, perms##src##i); \

972

const vector unsigned char vg2##src##i = \

973

vec_perm(vg##src##i, vg##src##i, perms##src##i); \

974

const vector unsigned char svA##src##i = \

975

vec_sel(v_##src##A1##i, vg2##src##i, mask##src##i); \

976

const vector unsigned char svB##src##i = \

977

vec_sel(vg2##src##i, v_##src##A2##i, mask##src##i); \

978

vec_st(svA##src##i, i * stride, src); \

979

vec_st(svB##src##i, i * stride + 16, src)

980

981

PACK_AND_STORE(src, 0);

982

PACK_AND_STORE(src, 1);

983

PACK_AND_STORE(src, 2);

984

PACK_AND_STORE(src, 3);

985

PACK_AND_STORE(src, 4);

986

PACK_AND_STORE(src, 5);

987

PACK_AND_STORE(src, 6);

988

PACK_AND_STORE(src, 7);

989

PACK_AND_STORE(tempBlured, 0);

990

PACK_AND_STORE(tempBlured, 1);

991

PACK_AND_STORE(tempBlured, 2);

992

PACK_AND_STORE(tempBlured, 3);

993

PACK_AND_STORE(tempBlured, 4);

994

PACK_AND_STORE(tempBlured, 5);

995

PACK_AND_STORE(tempBlured, 6);

996

PACK_AND_STORE(tempBlured, 7);

997

#undef PACK_AND_STORE

998

}

999

1000

static inline void transpose_16x8_char_toPackedAlign_altivec(unsigned char* dst, unsigned char* src, int stride) {

1001

const vector unsigned char zero = vec_splat_u8(0);

1002

1003

#define LOAD_DOUBLE_LINE(i, j) \

1004

vector unsigned char perm1##i = vec_lvsl(i * stride, src); \

1005

vector unsigned char perm2##i = vec_lvsl(j * stride, src); \

1006

vector unsigned char srcA##i = vec_ld(i * stride, src); \

1007

vector unsigned char srcB##i = vec_ld(i * stride + 16, src); \

1008

vector unsigned char srcC##i = vec_ld(j * stride, src); \

1009

vector unsigned char srcD##i = vec_ld(j * stride+ 16, src); \

1010

vector unsigned char src##i = vec_perm(srcA##i, srcB##i, perm1##i); \

1011

vector unsigned char src##j = vec_perm(srcC##i, srcD##i, perm2##i)

1012

1013

LOAD_DOUBLE_LINE(0, 1);

1014

LOAD_DOUBLE_LINE(2, 3);

1015

LOAD_DOUBLE_LINE(4, 5);

1016

LOAD_DOUBLE_LINE(6, 7);

1017

#undef LOAD_DOUBLE_LINE

1018

1019

vector unsigned char tempA = vec_mergeh(src0, zero);

1020

vector unsigned char tempB = vec_mergel(src0, zero);

1021

vector unsigned char tempC = vec_mergeh(src1, zero);

1022

vector unsigned char tempD = vec_mergel(src1, zero);

1023

vector unsigned char tempE = vec_mergeh(src2, zero);

1024

vector unsigned char tempF = vec_mergel(src2, zero);

1025

vector unsigned char tempG = vec_mergeh(src3, zero);

1026

vector unsigned char tempH = vec_mergel(src3, zero);

1027

vector unsigned char tempI = vec_mergeh(src4, zero);

1028

vector unsigned char tempJ = vec_mergel(src4, zero);

1029

vector unsigned char tempK = vec_mergeh(src5, zero);

1030

vector unsigned char tempL = vec_mergel(src5, zero);

1031

vector unsigned char tempM = vec_mergeh(src6, zero);

1032

vector unsigned char tempN = vec_mergel(src6, zero);

1033

vector unsigned char tempO = vec_mergeh(src7, zero);

1034

vector unsigned char tempP = vec_mergel(src7, zero);

1035

1036

vector unsigned char temp0 = vec_mergeh(tempA, tempI);

1037

vector unsigned char temp1 = vec_mergel(tempA, tempI);

1038

vector unsigned char temp2 = vec_mergeh(tempB, tempJ);

1039

vector unsigned char temp3 = vec_mergel(tempB, tempJ);

1040

vector unsigned char temp4 = vec_mergeh(tempC, tempK);

1041

vector unsigned char temp5 = vec_mergel(tempC, tempK);

1042

vector unsigned char temp6 = vec_mergeh(tempD, tempL);

1043

vector unsigned char temp7 = vec_mergel(tempD, tempL);

1044

vector unsigned char temp8 = vec_mergeh(tempE, tempM);

1045

vector unsigned char temp9 = vec_mergel(tempE, tempM);

1046

vector unsigned char temp10 = vec_mergeh(tempF, tempN);

1047

vector unsigned char temp11 = vec_mergel(tempF, tempN);

1048

vector unsigned char temp12 = vec_mergeh(tempG, tempO);

1049

vector unsigned char temp13 = vec_mergel(tempG, tempO);

1050

vector unsigned char temp14 = vec_mergeh(tempH, tempP);

1051

vector unsigned char temp15 = vec_mergel(tempH, tempP);

1052

1053

tempA = vec_mergeh(temp0, temp8);

1054

tempB = vec_mergel(temp0, temp8);

1055

tempC = vec_mergeh(temp1, temp9);

1056

tempD = vec_mergel(temp1, temp9);

1057

tempE = vec_mergeh(temp2, temp10);

1058

tempF = vec_mergel(temp2, temp10);

1059

tempG = vec_mergeh(temp3, temp11);

1060

tempH = vec_mergel(temp3, temp11);

1061

tempI = vec_mergeh(temp4, temp12);

1062

tempJ = vec_mergel(temp4, temp12);

1063

tempK = vec_mergeh(temp5, temp13);

1064

tempL = vec_mergel(temp5, temp13);

1065

tempM = vec_mergeh(temp6, temp14);

1066

tempN = vec_mergel(temp6, temp14);

1067

tempO = vec_mergeh(temp7, temp15);

1068

tempP = vec_mergel(temp7, temp15);

1069

1070

temp0 = vec_mergeh(tempA, tempI);

1071

temp1 = vec_mergel(tempA, tempI);

1072

temp2 = vec_mergeh(tempB, tempJ);

1073

temp3 = vec_mergel(tempB, tempJ);

1074

temp4 = vec_mergeh(tempC, tempK);

1075

temp5 = vec_mergel(tempC, tempK);

1076

temp6 = vec_mergeh(tempD, tempL);

1077

temp7 = vec_mergel(tempD, tempL);

1078

temp8 = vec_mergeh(tempE, tempM);

1079

temp9 = vec_mergel(tempE, tempM);

1080

temp10 = vec_mergeh(tempF, tempN);

1081

temp11 = vec_mergel(tempF, tempN);

1082

temp12 = vec_mergeh(tempG, tempO);

1083

temp13 = vec_mergel(tempG, tempO);

1084

temp14 = vec_mergeh(tempH, tempP);

1085

temp15 = vec_mergel(tempH, tempP);

1086

1087

vec_st(temp0, 0, dst);

1088

vec_st(temp1, 16, dst);

1089

vec_st(temp2, 32, dst);

1090

vec_st(temp3, 48, dst);

1091

vec_st(temp4, 64, dst);

1092

vec_st(temp5, 80, dst);

1093

vec_st(temp6, 96, dst);

1094

vec_st(temp7, 112, dst);

1095

vec_st(temp8, 128, dst);

1096

vec_st(temp9, 144, dst);

1097

vec_st(temp10, 160, dst);

1098

vec_st(temp11, 176, dst);

1099

vec_st(temp12, 192, dst);

1100

vec_st(temp13, 208, dst);

1101

vec_st(temp14, 224, dst);

1102

vec_st(temp15, 240, dst);

1103

}

1104

1105

static inline void transpose_8x16_char_fromPackedAlign_altivec(unsigned char* dst, unsigned char* src, int stride) {

1106

const vector unsigned char zero = vec_splat_u8(0);

1107

1108

#define LOAD_DOUBLE_LINE(i, j) \

1109

vector unsigned char src##i = vec_ld(i * 16, src); \

1110

vector unsigned char src##j = vec_ld(j * 16, src)

1111

1112

LOAD_DOUBLE_LINE(0, 1);

1113

LOAD_DOUBLE_LINE(2, 3);

1114

LOAD_DOUBLE_LINE(4, 5);

1115

LOAD_DOUBLE_LINE(6, 7);

1116

LOAD_DOUBLE_LINE(8, 9);

1117

LOAD_DOUBLE_LINE(10, 11);

1118

LOAD_DOUBLE_LINE(12, 13);

1119

LOAD_DOUBLE_LINE(14, 15);

1120

#undef LOAD_DOUBLE_LINE

1121

1122

vector unsigned char tempA = vec_mergeh(src0, src8);

1123

vector unsigned char tempB;

1124

vector unsigned char tempC = vec_mergeh(src1, src9);

1125

vector unsigned char tempD;

1126

vector unsigned char tempE = vec_mergeh(src2, src10);

1127

vector unsigned char tempG = vec_mergeh(src3, src11);

1128

vector unsigned char tempI = vec_mergeh(src4, src12);

1129

vector unsigned char tempJ;

1130

vector unsigned char tempK = vec_mergeh(src5, src13);

1131

vector unsigned char tempL;

1132

vector unsigned char tempM = vec_mergeh(src6, src14);

1133

vector unsigned char tempO = vec_mergeh(src7, src15);

1134

1135

vector unsigned char temp0 = vec_mergeh(tempA, tempI);

1136

vector unsigned char temp1 = vec_mergel(tempA, tempI);

1137

vector unsigned char temp2;

1138

vector unsigned char temp3;

1139

vector unsigned char temp4 = vec_mergeh(tempC, tempK);

1140

vector unsigned char temp5 = vec_mergel(tempC, tempK);

1141

vector unsigned char temp6;

1142

vector unsigned char temp7;

1143

vector unsigned char temp8 = vec_mergeh(tempE, tempM);

1144

vector unsigned char temp9 = vec_mergel(tempE, tempM);

1145

vector unsigned char temp12 = vec_mergeh(tempG, tempO);

1146

vector unsigned char temp13 = vec_mergel(tempG, tempO);

1147

1148

tempA = vec_mergeh(temp0, temp8);

1149

tempB = vec_mergel(temp0, temp8);

1150

tempC = vec_mergeh(temp1, temp9);

1151

tempD = vec_mergel(temp1, temp9);

1152

tempI = vec_mergeh(temp4, temp12);

1153

tempJ = vec_mergel(temp4, temp12);

1154

tempK = vec_mergeh(temp5, temp13);

1155

tempL = vec_mergel(temp5, temp13);

1156

1157

temp0 = vec_mergeh(tempA, tempI);

1158

temp1 = vec_mergel(tempA, tempI);

1159

temp2 = vec_mergeh(tempB, tempJ);

1160

temp3 = vec_mergel(tempB, tempJ);

1161

temp4 = vec_mergeh(tempC, tempK);

1162

temp5 = vec_mergel(tempC, tempK);

1163

temp6 = vec_mergeh(tempD, tempL);

1164

temp7 = vec_mergel(tempD, tempL);

1165

1166

1167

const vector signed char neg1 = vec_splat_s8(-1);

1168

#define STORE_DOUBLE_LINE(i, j) \

1169

vector unsigned char dstA##i = vec_ld(i * stride, dst); \

1170

vector unsigned char dstB##i = vec_ld(i * stride + 16, dst); \

1171

vector unsigned char dstA##j = vec_ld(j * stride, dst); \

1172

vector unsigned char dstB##j = vec_ld(j * stride+ 16, dst); \

1173

vector unsigned char align##i = vec_lvsr(i * stride, dst); \

1174

vector unsigned char align##j = vec_lvsr(j * stride, dst); \

1175

vector unsigned char mask##i = vec_perm(zero, (vector unsigned char)neg1, align##i); \

1176

vector unsigned char mask##j = vec_perm(zero, (vector unsigned char)neg1, align##j); \

1177

vector unsigned char dstR##i = vec_perm(temp##i, temp##i, align##i); \

1178

vector unsigned char dstR##j = vec_perm(temp##j, temp##j, align##j); \

1179

vector unsigned char dstAF##i = vec_sel(dstA##i, dstR##i, mask##i); \

1180

vector unsigned char dstBF##i = vec_sel(dstR##i, dstB##i, mask##i); \

1181

vector unsigned char dstAF##j = vec_sel(dstA##j, dstR##j, mask##j); \

1182

vector unsigned char dstBF##j = vec_sel(dstR##j, dstB##j, mask##j); \

1183

vec_st(dstAF##i, i * stride, dst); \

1184

vec_st(dstBF##i, i * stride + 16, dst); \

1185

vec_st(dstAF##j, j * stride, dst); \

1186

vec_st(dstBF##j, j * stride + 16, dst)

1187

1188

STORE_DOUBLE_LINE(0,1);

1189

STORE_DOUBLE_LINE(2,3);

1190

STORE_DOUBLE_LINE(4,5);

1191

STORE_DOUBLE_LINE(6,7);

1192

}

Older »