~ubuntu-branches/ubuntu/vivid/rawstudio/vivid

Viewing changes to plugins/denoise/complexfilter-x86.cpp

Committer: Bazaar Package Importer
Author(s): Bernd Zeimetz
Date: 2011-07-28 17:36:32 UTC
mfrom: (2.1.11 upstream)
Revision ID: james.westby@ubuntu.com-20110728173632-5czluz9ye3c83zc5

Tags: 2.0-1

* [3750b2cf] Merge commit 'upstream/2.0'
* [63637468] Removing Patch, not necessary anymore.
* [2fb580dc] Add new build-dependencies.
* [c57d953b] Run dh_autoreconf due to patches in configure.in
* [13febe39] Add patch to remove the libssl requirement.
* [5ae773fe] Replace libjpeg62-dev by libjpeg8-dev :)
* [1969d755] Don't build static libraries.
* [7cfe0a2e] Add a patch to fix the plugin directory path.
  As plugins are shared libraries, they need to go into /usr/lib,
  not into /usr/share.
  Thanks to Andrew McMillan
* [c1d0d9dd] Don't install .la files for all plugins and libraries.

files added:
.version

config.guess

config.sub

debian/patches/0001-Remove-libssl-requirement-from-configure.patch

debian/patches/0002-Fix-the-plugin-directory-path.patch

gettext.h

librawstudio

librawstudio/Makefile.am

librawstudio/Makefile.in

librawstudio/conf_interface.c

librawstudio/conf_interface.h

librawstudio/lens_fix.xml

librawstudio/rawstudio-2.0.pc.in

librawstudio/rawstudio.h

librawstudio/rs-1d-function.c

librawstudio/rs-1d-function.h

librawstudio/rs-color-space-icc.c

librawstudio/rs-color-space-icc.h

librawstudio/rs-color-space-selector.c

librawstudio/rs-color-space-selector.h

librawstudio/rs-color-space.c

librawstudio/rs-color-space.h

librawstudio/rs-color.c

librawstudio/rs-color.h

librawstudio/rs-curve.c

librawstudio/rs-curve.h

librawstudio/rs-dcp-file.c

librawstudio/rs-dcp-file.h

librawstudio/rs-debug.c

librawstudio/rs-debug.h

librawstudio/rs-exif.cc

librawstudio/rs-exif.h

librawstudio/rs-filetypes.c

librawstudio/rs-filetypes.h

librawstudio/rs-filter-param.c

librawstudio/rs-filter-param.h

librawstudio/rs-filter-request.c

librawstudio/rs-filter-request.h

librawstudio/rs-filter-response.c

librawstudio/rs-filter-response.h

librawstudio/rs-filter.c

librawstudio/rs-filter.h

librawstudio/rs-gui-functions.c

librawstudio/rs-gui-functions.h

librawstudio/rs-huesat-map.c

librawstudio/rs-huesat-map.h

librawstudio/rs-icc-profile.c

librawstudio/rs-icc-profile.h

librawstudio/rs-image.c

librawstudio/rs-image.h

librawstudio/rs-image16.c

librawstudio/rs-image16.h

librawstudio/rs-io-job-checksum.c

librawstudio/rs-io-job-checksum.h

librawstudio/rs-io-job-metadata.c

librawstudio/rs-io-job-metadata.h

librawstudio/rs-io-job-prefetch.c

librawstudio/rs-io-job-prefetch.h

librawstudio/rs-io-job-tagging.c

librawstudio/rs-io-job-tagging.h

librawstudio/rs-io-job.c

librawstudio/rs-io-job.h

librawstudio/rs-io.c

librawstudio/rs-io.h

librawstudio/rs-job-queue.c

librawstudio/rs-job-queue.h

librawstudio/rs-lens-db-editor.c

librawstudio/rs-lens-db-editor.h

librawstudio/rs-lens-db.c

librawstudio/rs-lens-db.h

librawstudio/rs-lens-fix.c

librawstudio/rs-lens-fix.h

librawstudio/rs-lens.c

librawstudio/rs-lens.h

librawstudio/rs-library.c

librawstudio/rs-library.h

librawstudio/rs-macros.h

librawstudio/rs-math.c

librawstudio/rs-math.h

librawstudio/rs-metadata.c

librawstudio/rs-metadata.h

librawstudio/rs-output.c

librawstudio/rs-output.h

librawstudio/rs-plugin-manager.c

librawstudio/rs-plugin-manager.h

librawstudio/rs-plugin.c

librawstudio/rs-plugin.h

librawstudio/rs-profile-camera.c

librawstudio/rs-profile-camera.h

librawstudio/rs-profile-factory-model.h

librawstudio/rs-profile-factory.c

librawstudio/rs-profile-factory.h

librawstudio/rs-profile-selector.c

librawstudio/rs-profile-selector.h

librawstudio/rs-rawfile.c

librawstudio/rs-rawfile.h

librawstudio/rs-settings.c

librawstudio/rs-settings.h

librawstudio/rs-spline.c

librawstudio/rs-spline.h

librawstudio/rs-stock.c

librawstudio/rs-stock.h

librawstudio/rs-tiff-ifd-entry.c

librawstudio/rs-tiff-ifd-entry.h

librawstudio/rs-tiff-ifd.c

librawstudio/rs-tiff-ifd.h

librawstudio/rs-tiff.c

librawstudio/rs-tiff.h

librawstudio/rs-types.h

librawstudio/rs-utils.c

librawstudio/rs-utils.h

librawstudio/x86-cpu.h

ltmain.sh

pixmaps/camera-photo.png

pixmaps/cursor-color-picker.png

pixmaps/cursor-crop.png

pixmaps/cursor-rotate.png

pixmaps/tool-color-picker.png

pixmaps/tool-crop.png

pixmaps/tool-rotate.png

plugins

plugins/Makefile.am

plugins/Makefile.in

plugins/cache

plugins/cache/Makefile.am

plugins/cache/Makefile.in

plugins/cache/cache.c

plugins/colorspace-adobergb

plugins/colorspace-adobergb/Makefile.am

plugins/colorspace-adobergb/Makefile.in

plugins/colorspace-adobergb/colorspace-adobergb.c

plugins/colorspace-prophoto

plugins/colorspace-prophoto/Makefile.am

plugins/colorspace-prophoto/Makefile.in

plugins/colorspace-prophoto/colorspace-prophoto.c

plugins/colorspace-srgb

plugins/colorspace-srgb/Makefile.am

plugins/colorspace-srgb/Makefile.in

plugins/colorspace-srgb/colorspace-srgb.c

plugins/colorspace-transform

plugins/colorspace-transform/Makefile.am

plugins/colorspace-transform/Makefile.in

plugins/colorspace-transform/colorspace_transform.c

plugins/colorspace-transform/colorspace_transform.h

plugins/colorspace-transform/colorspace_transform_sse2.c

plugins/colorspace-transform/rs-cmm.c

plugins/colorspace-transform/rs-cmm.h

plugins/crop

plugins/crop/Makefile.am

plugins/crop/Makefile.in

plugins/crop/crop.c

plugins/dcp

plugins/dcp/Makefile.am

plugins/dcp/Makefile.in

plugins/dcp/adobe-camera-raw-tone.c

plugins/dcp/adobe-camera-raw-tone.h

plugins/dcp/dcp-sse2.c

plugins/dcp/dcp-sse4.c

plugins/dcp/dcp.c

plugins/dcp/dcp.h

plugins/demosaic

plugins/demosaic/Makefile.am

plugins/demosaic/Makefile.in

plugins/demosaic/demosaic.c

plugins/denoise

plugins/denoise/Makefile.am

plugins/denoise/Makefile.in

plugins/denoise/complexblock.cpp

plugins/denoise/complexblock.h

plugins/denoise/complexfilter-x86.cpp

plugins/denoise/complexfilter.cpp

plugins/denoise/complexfilter.h

plugins/denoise/denoise.c

plugins/denoise/denoiseinterface.h

plugins/denoise/denoisethread.cpp

plugins/denoise/denoisethread.h

plugins/denoise/fftdenoiser.cpp

plugins/denoise/fftdenoiser.h

plugins/denoise/fftdenoiseryuv.cpp

plugins/denoise/fftdenoiseryuv.h

plugins/denoise/fftwindow.cpp

plugins/denoise/fftwindow.h

plugins/denoise/floatimageplane.cpp

plugins/denoise/floatimageplane.h

plugins/denoise/floatplanarimage-x86.cpp

plugins/denoise/floatplanarimage.cpp

plugins/denoise/floatplanarimage.h

plugins/denoise/jobqueue.cpp

plugins/denoise/jobqueue.h

plugins/denoise/planarimageslice.cpp

plugins/denoise/planarimageslice.h

plugins/exposure-mask

plugins/exposure-mask/Makefile.am

plugins/exposure-mask/Makefile.in

plugins/exposure-mask/exposure-mask.c

plugins/fuji-rotate

plugins/fuji-rotate/Makefile.am

plugins/fuji-rotate/Makefile.in

plugins/fuji-rotate/fuji-rotate.c

plugins/input-file

plugins/input-file/Makefile.am

plugins/input-file/Makefile.in

plugins/input-file/input-file.c

plugins/input-image16

plugins/input-image16/Makefile.am

plugins/input-image16/Makefile.in

plugins/input-image16/input-image16.c

plugins/lensfun

plugins/lensfun/Makefile.am

plugins/lensfun/Makefile.in

plugins/lensfun/lensfun-sse2.c

plugins/lensfun/lensfun-version.c

plugins/lensfun/lensfun-version.h

plugins/lensfun/lensfun.c

plugins/load-dcraw

plugins/load-dcraw/Makefile.am

plugins/load-dcraw/Makefile.in

plugins/load-dcraw/dcraw.cc

plugins/load-dcraw/dcraw.h

plugins/load-dcraw/dcraw_api.cc

plugins/load-dcraw/dcraw_api.h

plugins/load-dcraw/dcrawloader.c

plugins/load-dcraw/mmap-hack.c

plugins/load-dcraw/mmap-hack.h

plugins/load-gdk

plugins/load-gdk/Makefile.am

plugins/load-gdk/Makefile.in

plugins/load-gdk/exiv2-colorspace.cpp

plugins/load-gdk/exiv2-colorspace.h

plugins/load-gdk/load-gdk.c

plugins/load-rawspeed

plugins/load-rawspeed/Makefile.am

plugins/load-rawspeed/Makefile.in

plugins/load-rawspeed/data

plugins/load-rawspeed/data/cameras.xml

plugins/load-rawspeed/rawspeed

plugins/load-rawspeed/rawspeed/ArwDecoder.cpp

plugins/load-rawspeed/rawspeed/ArwDecoder.h

plugins/load-rawspeed/rawspeed/BitPumpJPEG.cpp

plugins/load-rawspeed/rawspeed/BitPumpJPEG.h

plugins/load-rawspeed/rawspeed/BitPumpMSB.cpp

plugins/load-rawspeed/rawspeed/BitPumpMSB.h

plugins/load-rawspeed/rawspeed/BitPumpMSB32.cpp

plugins/load-rawspeed/rawspeed/BitPumpMSB32.h

plugins/load-rawspeed/rawspeed/BitPumpPlain.cpp

plugins/load-rawspeed/rawspeed/BitPumpPlain.h

plugins/load-rawspeed/rawspeed/BlackArea.cpp

plugins/load-rawspeed/rawspeed/BlackArea.h

plugins/load-rawspeed/rawspeed/ByteStream.cpp

plugins/load-rawspeed/rawspeed/ByteStream.h

plugins/load-rawspeed/rawspeed/ByteStreamSwap.cpp

plugins/load-rawspeed/rawspeed/ByteStreamSwap.h

plugins/load-rawspeed/rawspeed/Camera.cpp

plugins/load-rawspeed/rawspeed/Camera.h

plugins/load-rawspeed/rawspeed/CameraMetaData.cpp

plugins/load-rawspeed/rawspeed/CameraMetaData.h

plugins/load-rawspeed/rawspeed/CameraMetadataException.cpp

plugins/load-rawspeed/rawspeed/CameraMetadataException.h

plugins/load-rawspeed/rawspeed/ColorFilterArray.cpp

plugins/load-rawspeed/rawspeed/ColorFilterArray.h

plugins/load-rawspeed/rawspeed/Common.cpp

plugins/load-rawspeed/rawspeed/Common.h

plugins/load-rawspeed/rawspeed/Cr2Decoder.cpp

plugins/load-rawspeed/rawspeed/Cr2Decoder.h

plugins/load-rawspeed/rawspeed/DngDecoder.cpp

plugins/load-rawspeed/rawspeed/DngDecoder.h

plugins/load-rawspeed/rawspeed/DngDecoderSlices.cpp

plugins/load-rawspeed/rawspeed/DngDecoderSlices.h

plugins/load-rawspeed/rawspeed/FileIOException.cpp

plugins/load-rawspeed/rawspeed/FileIOException.h

plugins/load-rawspeed/rawspeed/FileMap.cpp

plugins/load-rawspeed/rawspeed/FileMap.h

plugins/load-rawspeed/rawspeed/FileReader.cpp

plugins/load-rawspeed/rawspeed/FileReader.h

plugins/load-rawspeed/rawspeed/IOException.cpp

plugins/load-rawspeed/rawspeed/IOException.h

plugins/load-rawspeed/rawspeed/LJpegDecompressor.cpp

plugins/load-rawspeed/rawspeed/LJpegDecompressor.h

plugins/load-rawspeed/rawspeed/LJpegPlain.cpp

plugins/load-rawspeed/rawspeed/LJpegPlain.h

plugins/load-rawspeed/rawspeed/NefDecoder.cpp

plugins/load-rawspeed/rawspeed/NefDecoder.h

plugins/load-rawspeed/rawspeed/NikonDecompressor.cpp

plugins/load-rawspeed/rawspeed/NikonDecompressor.h

plugins/load-rawspeed/rawspeed/OrfDecoder.cpp

plugins/load-rawspeed/rawspeed/OrfDecoder.h

plugins/load-rawspeed/rawspeed/PefDecoder.cpp

plugins/load-rawspeed/rawspeed/PefDecoder.h

plugins/load-rawspeed/rawspeed/PentaxDecompressor.cpp

plugins/load-rawspeed/rawspeed/PentaxDecompressor.h

plugins/load-rawspeed/rawspeed/Point.h

plugins/load-rawspeed/rawspeed/RawDecoder.cpp

plugins/load-rawspeed/rawspeed/RawDecoder.h

plugins/load-rawspeed/rawspeed/RawDecoderException.cpp

plugins/load-rawspeed/rawspeed/RawDecoderException.h

plugins/load-rawspeed/rawspeed/RawImage.cpp

plugins/load-rawspeed/rawspeed/RawImage.h

plugins/load-rawspeed/rawspeed/RawImageDataFloat.cpp

plugins/load-rawspeed/rawspeed/RawImageDataU16.cpp

plugins/load-rawspeed/rawspeed/Rw2Decoder.cpp

plugins/load-rawspeed/rawspeed/Rw2Decoder.h

plugins/load-rawspeed/rawspeed/SrwDecoder.cpp

plugins/load-rawspeed/rawspeed/SrwDecoder.h

plugins/load-rawspeed/rawspeed/StdAfx.cpp

plugins/load-rawspeed/rawspeed/StdAfx.h

plugins/load-rawspeed/rawspeed/TiffEntry.cpp

plugins/load-rawspeed/rawspeed/TiffEntry.h

plugins/load-rawspeed/rawspeed/TiffEntryBE.cpp

plugins/load-rawspeed/rawspeed/TiffEntryBE.h

plugins/load-rawspeed/rawspeed/TiffIFD.cpp

plugins/load-rawspeed/rawspeed/TiffIFD.h

plugins/load-rawspeed/rawspeed/TiffIFDBE.cpp

plugins/load-rawspeed/rawspeed/TiffIFDBE.h

plugins/load-rawspeed/rawspeed/TiffParser.cpp

plugins/load-rawspeed/rawspeed/TiffParser.h

plugins/load-rawspeed/rawspeed/TiffParserException.cpp

plugins/load-rawspeed/rawspeed/TiffParserException.h

plugins/load-rawspeed/rawspeed/TiffParserHeaderless.cpp

plugins/load-rawspeed/rawspeed/TiffParserHeaderless.h

plugins/load-rawspeed/rawspeed/TiffParserOlympus.cpp

plugins/load-rawspeed/rawspeed/TiffParserOlympus.h

plugins/load-rawspeed/rawspeed/TiffTag.h

plugins/load-rawspeed/rawstudio-plugin-api.cpp

plugins/load-rawspeed/rawstudio-plugin-api.h

plugins/load-rawspeed/rawstudio-plugin.c

plugins/meta-ciff

plugins/meta-ciff/Makefile.am

plugins/meta-ciff/Makefile.in

plugins/meta-ciff/ciff-meta.c

plugins/meta-exiv2

plugins/meta-exiv2/Makefile.am

plugins/meta-exiv2/Makefile.in

plugins/meta-exiv2/exiv2-meta.c

plugins/meta-exiv2/exiv2-metadata.cpp

plugins/meta-exiv2/exiv2-metadata.h

plugins/meta-mrw

plugins/meta-mrw/Makefile.am

plugins/meta-mrw/Makefile.in

plugins/meta-mrw/mrw-meta.c

plugins/meta-raf

plugins/meta-raf/Makefile.am

plugins/meta-raf/Makefile.in

plugins/meta-raf/raf-meta.c

plugins/meta-tiff

plugins/meta-tiff/Makefile.am

plugins/meta-tiff/Makefile.in

plugins/meta-tiff/tiff-meta.c

plugins/meta-x3f

plugins/meta-x3f/Makefile.am

plugins/meta-x3f/Makefile.in

plugins/meta-x3f/x3f-meta.c

plugins/output-facebook

plugins/output-facebook/Makefile.am

plugins/output-facebook/Makefile.in

plugins/output-facebook/facebook-logo.svg

plugins/output-facebook/output-facebook.c

plugins/output-facebook/output-facebook.h

plugins/output-facebook/rs-facebook-client-param.c

plugins/output-facebook/rs-facebook-client-param.h

plugins/output-facebook/rs-facebook-client.c

plugins/output-facebook/rs-facebook-client.h

plugins/output-flickr

plugins/output-flickr/Makefile.am

plugins/output-flickr/Makefile.in

plugins/output-flickr/flickr-logo.svg

plugins/output-flickr/output-flickr.c

plugins/output-flickr/output-flickr.h

plugins/output-jpegfile

plugins/output-jpegfile/Makefile.am

plugins/output-jpegfile/Makefile.in

plugins/output-jpegfile/output-jpegfile.c

plugins/output-picasa

plugins/output-picasa/Makefile.am

plugins/output-picasa/Makefile.in

plugins/output-picasa/output-picasa.c

plugins/output-picasa/picasa-logo.svg

plugins/output-picasa/rs-picasa-client.c

plugins/output-picasa/rs-picasa-client.h

plugins/output-pngfile

plugins/output-pngfile/Makefile.am

plugins/output-pngfile/Makefile.in

plugins/output-pngfile/output-pngfile.c

plugins/output-tifffile

plugins/output-tifffile/Makefile.am

plugins/output-tifffile/Makefile.in

plugins/output-tifffile/output-tifffile.c

plugins/resample

plugins/resample/Makefile.am

plugins/resample/Makefile.in

plugins/resample/resample-sse2.c

plugins/resample/resample.c

plugins/rotate

plugins/rotate/Makefile.am

plugins/rotate/Makefile.in

plugins/rotate/rotate.c

po/LINGUAS

po/hu.gmo

po/hu.po

po/ja.gmo

po/ja.po

profiles

profiles/AGFAPHOTO_DC-833m-simple.dcp

profiles/Apple_QuickTake-simple.dcp

profiles/CASIO_EX-S20-simple.dcp

profiles/CASIO_EX-Z750-simple.dcp

profiles/CINE-simple.dcp

profiles/CINE_650-simple.dcp

profiles/CINE_660-simple.dcp

profiles/Canon_EOS-1D-simple.dcp

profiles/Canon_EOS-1DS-simple.dcp

profiles/Canon_EOS-1D_Mark_II-simple.dcp

profiles/Canon_EOS-1D_Mark_III-simple.dcp

profiles/Canon_EOS-1D_Mark_II_N-simple.dcp

profiles/Canon_EOS-1D_Mark_IV-simple.dcp

profiles/Canon_EOS-1Ds_Mark_II-simple.dcp

profiles/Canon_EOS-1Ds_Mark_III-simple.dcp

profiles/Canon_EOS-simple.dcp

profiles/Canon_EOS_1000D-simple.dcp

profiles/Canon_EOS_10D-simple.dcp

profiles/Canon_EOS_1100D-simple.dcp

profiles/Canon_EOS_20D-simple.dcp

profiles/Canon_EOS_20Da-simple.dcp

profiles/Canon_EOS_300D-simple.dcp

profiles/Canon_EOS_30D-simple.dcp

profiles/Canon_EOS_350D-simple.dcp

profiles/Canon_EOS_400D-simple.dcp

profiles/Canon_EOS_40D-simple.dcp

profiles/Canon_EOS_450D-simple.dcp

profiles/Canon_EOS_500D-simple.dcp

profiles/Canon_EOS_50D-simple.dcp

profiles/Canon_EOS_550D-simple.dcp

profiles/Canon_EOS_5D-simple.dcp

profiles/Canon_EOS_5D_Mark_II-simple.dcp

profiles/Canon_EOS_600D-simple.dcp

profiles/Canon_EOS_60D-simple.dcp

profiles/Canon_EOS_7D-simple.dcp

profiles/Canon_EOS_D2000-simple.dcp

profiles/Canon_EOS_D30-simple.dcp

profiles/Canon_EOS_D60-simple.dcp

profiles/Canon_EOS_D6000-simple.dcp

profiles/Canon_PowerShot_A470-simple.dcp

profiles/Canon_PowerShot_A5-simple.dcp

profiles/Canon_PowerShot_A50-simple.dcp

profiles/Canon_PowerShot_A530-simple.dcp

profiles/Canon_PowerShot_A610-simple.dcp

profiles/Canon_PowerShot_A620-simple.dcp

profiles/Canon_PowerShot_A630-simple.dcp

profiles/Canon_PowerShot_A640-simple.dcp

profiles/Canon_PowerShot_A650-simple.dcp

profiles/Canon_PowerShot_A720-simple.dcp

profiles/Canon_PowerShot_G1-simple.dcp

profiles/Canon_PowerShot_G10-simple.dcp

profiles/Canon_PowerShot_G11-simple.dcp

profiles/Canon_PowerShot_G12-simple.dcp

profiles/Canon_PowerShot_G2-simple.dcp

profiles/Canon_PowerShot_G3-simple.dcp

profiles/Canon_PowerShot_G5-simple.dcp

profiles/Canon_PowerShot_G6-simple.dcp

profiles/Canon_PowerShot_G9-simple.dcp

profiles/Canon_PowerShot_Pro1-simple.dcp

profiles/Canon_PowerShot_Pro70-simple.dcp

profiles/Canon_PowerShot_Pro90-simple.dcp

profiles/Canon_PowerShot_S30-simple.dcp

profiles/Canon_PowerShot_S3_IS-simple.dcp

profiles/Canon_PowerShot_S40-simple.dcp

profiles/Canon_PowerShot_S45-simple.dcp

profiles/Canon_PowerShot_S50-simple.dcp

profiles/Canon_PowerShot_S60-simple.dcp

profiles/Canon_PowerShot_S70-simple.dcp

profiles/Canon_PowerShot_S90-simple.dcp

profiles/Canon_PowerShot_S95-simple.dcp

profiles/Canon_PowerShot_SX110_IS-simple.dcp

profiles/Canon_PowerShot_SX1_IS-simple.dcp

profiles/Contax_N_Digital-simple.dcp

profiles/EPSON_R-D1-simple.dcp

profiles/FUJIFILM_FinePix_E550-simple.dcp

profiles/FUJIFILM_FinePix_E900-simple.dcp

profiles/FUJIFILM_FinePix_F7-simple.dcp

profiles/FUJIFILM_FinePix_F8-simple.dcp

profiles/FUJIFILM_FinePix_S100FS-simple.dcp

profiles/FUJIFILM_FinePix_S20Pro-simple.dcp

profiles/FUJIFILM_FinePix_S2Pro-simple.dcp

profiles/FUJIFILM_FinePix_S3Pro-simple.dcp

profiles/FUJIFILM_FinePix_S5000-simple.dcp

profiles/FUJIFILM_FinePix_S5100-simple.dcp

profiles/FUJIFILM_FinePix_S5200-simple.dcp

profiles/FUJIFILM_FinePix_S5500-simple.dcp

profiles/FUJIFILM_FinePix_S5600-simple.dcp

profiles/FUJIFILM_FinePix_S5Pro-simple.dcp

profiles/FUJIFILM_FinePix_S6-simple.dcp

profiles/FUJIFILM_FinePix_S6000fd-simple.dcp

profiles/FUJIFILM_FinePix_S7000-simple.dcp

profiles/FUJIFILM_FinePix_S9000-simple.dcp

profiles/FUJIFILM_FinePix_S9100-simple.dcp

profiles/FUJIFILM_FinePix_S9500-simple.dcp

profiles/FUJIFILM_FinePix_S9600-simple.dcp

profiles/FUJIFILM_IS-1-simple.dcp

profiles/FUJIFILM_IS_Pro-simple.dcp

profiles/Fuji_HS10-simple.dcp

profiles/Imacon_Ixpress-simple.dcp

profiles/KODAK_DCS420-simple.dcp

profiles/KODAK_DCS460-simple.dcp

profiles/KODAK_EASYSHARE_Z1015-simple.dcp

profiles/KODAK_EOSDCS1-simple.dcp

profiles/KODAK_EOSDCS3B-simple.dcp

profiles/KODAK_EasyShare_Z980-simple.dcp

profiles/KODAK_NC2000-simple.dcp

profiles/KODAK_P712-simple.dcp

profiles/KODAK_P850-simple.dcp

profiles/KODAK_P880-simple.dcp

profiles/Kodak_DCS315C-simple.dcp

profiles/Kodak_DCS330C-simple.dcp

profiles/Kodak_DCS520C-simple.dcp

profiles/Kodak_DCS560C-simple.dcp

profiles/Kodak_DCS620C-simple.dcp

profiles/Kodak_DCS620X-simple.dcp

profiles/Kodak_DCS660C-simple.dcp

profiles/Kodak_DCS720X-simple.dcp

profiles/Kodak_DCS760C-simple.dcp

profiles/Kodak_DCS_Pro_14-simple.dcp

profiles/Kodak_DCS_Pro_14nx-simple.dcp

profiles/Kodak_DCS_Pro_SLR-simple.dcp

profiles/Kodak_ProBack-simple.dcp

profiles/Kodak_ProBack645-simple.dcp

profiles/Konica_Minolta_DiMAGE_A2-simple.dcp

profiles/Konica_Minolta_DiMAGE_A200-simple.dcp

profiles/Konica_Minolta_Maxxum_5D-simple.dcp

profiles/Konica_Minolta_Maxxum_7D-simple.dcp

profiles/Leaf-simple.dcp

profiles/Leaf_Aptus_54S-simple.dcp

profiles/Leaf_Aptus_65-simple.dcp

profiles/Leaf_Aptus_75-simple.dcp

profiles/Leaf_CMost-simple.dcp

profiles/Leaf_Valeo_6-simple.dcp

profiles/Leica-M8-simple.dcp

profiles/Leica-S2-simple.dcp

profiles/Leica-X1-native.dcp

profiles/MINOLTA_DYNAX_5-simple.dcp

profiles/MINOLTA_DYNAX_7-simple.dcp

profiles/MOTOROLA_PIXL-simple.dcp

profiles/Makefile.am

profiles/Makefile.in

profiles/Mamiya_ZD-simple.dcp

profiles/Micron_2010-simple.dcp

profiles/Minolta_DiMAGE_5-simple.dcp

profiles/Minolta_DiMAGE_7-simple.dcp

profiles/Minolta_DiMAGE_7Hi-simple.dcp

profiles/Minolta_DiMAGE_7i-simple.dcp

profiles/Minolta_DiMAGE_A1-simple.dcp

profiles/Minolta_DiMAGE_Z2-simple.dcp

profiles/NIKON_COOLPIX_P6000-simple.dcp

profiles/NIKON_COOLPIX_P7000-simple.dcp

profiles/NIKON_D1-simple.dcp

profiles/NIKON_D100-simple.dcp

profiles/NIKON_D1H-simple.dcp

profiles/NIKON_D1X-simple.dcp

profiles/NIKON_D200-simple.dcp

profiles/NIKON_D2H-simple.dcp

profiles/NIKON_D2X-simple.dcp

profiles/NIKON_D3-simple.dcp

profiles/NIKON_D300-simple.dcp

profiles/NIKON_D3000-simple.dcp

profiles/NIKON_D3100-simple.dcp

profiles/NIKON_D3S-simple.dcp

profiles/NIKON_D3X-simple.dcp

profiles/NIKON_D40-simple.dcp

profiles/NIKON_D40X-simple.dcp

profiles/NIKON_D50-simple.dcp

profiles/NIKON_D5000-simple.dcp

profiles/NIKON_D60-simple.dcp

profiles/NIKON_D70-simple.dcp

profiles/NIKON_D700-simple.dcp

profiles/NIKON_D7000-simple.dcp

profiles/NIKON_D70s-simple.dcp

profiles/NIKON_D80-simple.dcp

profiles/NIKON_D90-simple.dcp

profiles/NIKON_E2100-simple.dcp

profiles/NIKON_E2500-simple.dcp

profiles/NIKON_E4300-simple.dcp

profiles/NIKON_E4500-simple.dcp

profiles/NIKON_E5000-simple.dcp

profiles/NIKON_E5400-simple.dcp

profiles/NIKON_E5700-simple.dcp

profiles/NIKON_E8400-simple.dcp

profiles/NIKON_E8700-simple.dcp

profiles/NIKON_E8800-simple.dcp

profiles/NIKON_E950-simple.dcp

profiles/NIKON_E995-simple.dcp

profiles/Neutral-With-Tonecurve-AdobeRGB.dcp

profiles/Neutral-With-Tonecurve-Prophoto.dcp

profiles/Neutral-With-Tonecurve-sRGB.dcp

profiles/OLYMPUS_C5050-simple.dcp

profiles/OLYMPUS_C5060-simple.dcp

profiles/OLYMPUS_C70-simple.dcp

profiles/OLYMPUS_C7070-simple.dcp

profiles/OLYMPUS_C80-simple.dcp

profiles/OLYMPUS_E-1-simple.dcp

profiles/OLYMPUS_E-10-simple.dcp

profiles/OLYMPUS_E-20-simple.dcp

profiles/OLYMPUS_E-3-simple.dcp

profiles/OLYMPUS_E-30-simple.dcp

profiles/OLYMPUS_E-300-simple.dcp

profiles/OLYMPUS_E-330-simple.dcp

profiles/OLYMPUS_E-400-simple.dcp

profiles/OLYMPUS_E-410-simple.dcp

profiles/OLYMPUS_E-420-simple.dcp

profiles/OLYMPUS_E-450-simple.dcp

profiles/OLYMPUS_E-5-simple.dcp

profiles/OLYMPUS_E-500-simple.dcp

profiles/OLYMPUS_E-510-simple.dcp

profiles/OLYMPUS_E-520-simple.dcp

profiles/OLYMPUS_E-620-simple.dcp

profiles/OLYMPUS_E-P1-simple.dcp

profiles/OLYMPUS_E-PL1-simple.dcp

profiles/OLYMPUS_E-PL1s-simple.dcp

profiles/OLYMPUS_E-PL2-simple.dcp

profiles/OLYMPUS_SP3-simple.dcp

profiles/OLYMPUS_SP350-simple.dcp

profiles/OLYMPUS_SP500UZ-simple.dcp

profiles/OLYMPUS_SP510UZ-simple.dcp

profiles/OLYMPUS_SP550UZ-simple.dcp

profiles/OLYMPUS_SP560UZ-simple.dcp

profiles/OLYMPUS_SP570UZ-simple.dcp

profiles/OLYMPUS_XZ-1-simple.dcp

profiles/PENTAX_645D-simple.dcp

profiles/PENTAX_K-5-simple.dcp

profiles/PENTAX_K-7-simple.dcp

profiles/PENTAX_K-m-simple.dcp

profiles/PENTAX_K-r-simple.dcp

profiles/PENTAX_K-x-simple.dcp

profiles/PENTAX_K1-simple.dcp

profiles/PENTAX_K10D-simple.dcp

profiles/PENTAX_K2000-simple.dcp

profiles/PENTAX_K200D-simple.dcp

profiles/PENTAX_K20D-simple.dcp

profiles/Panasonic_DMC-FX150-simple.dcp

profiles/Panasonic_DMC-FZ18-simple.dcp

profiles/Panasonic_DMC-FZ28-simple.dcp

profiles/Panasonic_DMC-FZ30-simple.dcp

profiles/Panasonic_DMC-FZ35-simple.dcp

profiles/Panasonic_DMC-FZ50-simple.dcp

profiles/Panasonic_DMC-FZ8-simple.dcp

profiles/Panasonic_DMC-G1-simple.dcp

profiles/Panasonic_DMC-G10-simple.dcp

profiles/Panasonic_DMC-G2-simple.dcp

profiles/Panasonic_DMC-GF1-simple.dcp

profiles/Panasonic_DMC-GF2-simple.dcp

profiles/Panasonic_DMC-GH1-simple.dcp

profiles/Panasonic_DMC-GH2-simple.dcp

profiles/Panasonic_DMC-L1-simple.dcp

profiles/Panasonic_DMC-L10-simple.dcp

profiles/Panasonic_DMC-LC1-simple.dcp

profiles/Panasonic_DMC-LX1-simple.dcp

profiles/Panasonic_DMC-LX2-simple.dcp

profiles/Panasonic_DMC-LX3-simple.dcp

profiles/Panasonic_FZ100-simple.dcp

profiles/Panasonic_FZ40-simple.dcp

profiles/Panasonic_LX5-simple.dcp

profiles/Phase_One_H_20-simple.dcp

profiles/Phase_One_P65-simple.dcp

profiles/Phase_One_P_2-simple.dcp

profiles/Phase_One_P_30-simple.dcp

profiles/Phase_One_P_45-simple.dcp

profiles/Ricoh_A12_28mm-simple.dcp

profiles/Ricoh_A12_50mm-simple.dcp

profiles/Ricoh_Digital_3-simple.dcp

profiles/Ricoh_GX200-simple.dcp

profiles/Ricoh_S10_24-72mm-simple.dcp

profiles/SAMSUNG_EX1-simple.dcp

profiles/SAMSUNG_GX-1-simple.dcp

profiles/SAMSUNG_GX-1L-simple.dcp

profiles/SAMSUNG_GX-1S-simple.dcp

profiles/SAMSUNG_GX10-simple.dcp

profiles/SAMSUNG_GX20-simple.dcp

profiles/SAMSUNG_NX10-simple.dcp

profiles/SAMSUNG_NX100-simple.dcp

profiles/SAMSUNG_NX11-simple.dcp

profiles/SAMSUNG_NX5-simple.dcp

profiles/SAMSUNG_S85-simple.dcp

profiles/SAMSUNG_WB2000-simple.dcp

profiles/SONY_DSC-F828-simple.dcp

profiles/SONY_DSC-R1-simple.dcp

profiles/SONY_DSC-V3-simple.dcp

profiles/SONY_DSLR-A100-simple.dcp

profiles/SONY_DSLR-A200-simple.dcp

profiles/SONY_DSLR-A230-simple.dcp

profiles/SONY_DSLR-A290-simple.dcp

profiles/SONY_DSLR-A300-simple.dcp

profiles/SONY_DSLR-A330-simple.dcp

profiles/SONY_DSLR-A350-simple.dcp

profiles/SONY_DSLR-A380-simple.dcp

profiles/SONY_DSLR-A390-simple.dcp

profiles/SONY_DSLR-A450-simple.dcp

profiles/SONY_DSLR-A5-simple.dcp

profiles/SONY_DSLR-A500-simple.dcp

profiles/SONY_DSLR-A550-simple.dcp

profiles/SONY_DSLR-A560-simple.dcp

profiles/SONY_DSLR-A580-simple.dcp

profiles/SONY_DSLR-A700-simple.dcp

profiles/SONY_DSLR-A850-simple.dcp

profiles/SONY_DSLR-A900-simple.dcp

profiles/SONY_NEX-3-simple.dcp

profiles/SONY_NEX-5-simple.dcp

profiles/SONY_SLTA33-simple.dcp

profiles/SONY_SLTA55V-simple.dcp

profiles/Sinar-simple.dcp

profiles/compatibleWithAdobeRGB1998-linear.icc

profiles/compatibleWithAdobeRGB1998.icc

profiles/generic_camera_profile.icc

profiles/prophoto-linear.icc

profiles/prophoto.icc

profiles/rawstudio-cameras.xml

profiles/sRGB-linear.icc

profiles/sRGB.icc

profiles/simple-src

profiles/simple-src/AGFAPHOTO_DC-833m-simple.xml

profiles/simple-src/Apple_QuickTake-simple.xml

profiles/simple-src/CASIO_EX-S20-simple.xml

profiles/simple-src/CASIO_EX-Z750-simple.xml

profiles/simple-src/CINE-simple.xml

profiles/simple-src/CINE_650-simple.xml

profiles/simple-src/CINE_660-simple.xml

profiles/simple-src/Canon_EOS-1D-simple.xml

profiles/simple-src/Canon_EOS-1DS-simple.xml

profiles/simple-src/Canon_EOS-1D_Mark_II-simple.xml

profiles/simple-src/Canon_EOS-1D_Mark_III-simple.xml

profiles/simple-src/Canon_EOS-1D_Mark_II_N-simple.xml

profiles/simple-src/Canon_EOS-1D_Mark_IV-simple.xml

profiles/simple-src/Canon_EOS-1Ds_Mark_II-simple.xml

profiles/simple-src/Canon_EOS-1Ds_Mark_III-simple.xml

profiles/simple-src/Canon_EOS-simple.xml

profiles/simple-src/Canon_EOS_1000D-simple.xml

profiles/simple-src/Canon_EOS_10D-simple.xml

profiles/simple-src/Canon_EOS_1100D-simple.xml

profiles/simple-src/Canon_EOS_20D-simple.xml

profiles/simple-src/Canon_EOS_20Da-simple.xml

profiles/simple-src/Canon_EOS_300D-simple.xml

profiles/simple-src/Canon_EOS_30D-simple.xml

profiles/simple-src/Canon_EOS_350D-simple.xml

profiles/simple-src/Canon_EOS_400D-simple.xml

profiles/simple-src/Canon_EOS_40D-simple.xml

profiles/simple-src/Canon_EOS_450D-simple.xml

profiles/simple-src/Canon_EOS_500D-simple.xml

profiles/simple-src/Canon_EOS_50D-simple.xml

profiles/simple-src/Canon_EOS_550D-simple.xml

profiles/simple-src/Canon_EOS_5D-simple.xml

profiles/simple-src/Canon_EOS_5D_Mark_II-simple.xml

profiles/simple-src/Canon_EOS_600D-simple.xml

profiles/simple-src/Canon_EOS_60D-simple.xml

profiles/simple-src/Canon_EOS_7D-simple.xml

profiles/simple-src/Canon_EOS_D2000-simple.xml

profiles/simple-src/Canon_EOS_D30-simple.xml

profiles/simple-src/Canon_EOS_D60-simple.xml

profiles/simple-src/Canon_EOS_D6000-simple.xml

profiles/simple-src/Canon_PowerShot_A470-simple.xml

profiles/simple-src/Canon_PowerShot_A5-simple.xml

profiles/simple-src/Canon_PowerShot_A50-simple.xml

profiles/simple-src/Canon_PowerShot_A530-simple.xml

profiles/simple-src/Canon_PowerShot_A610-simple.xml

profiles/simple-src/Canon_PowerShot_A620-simple.xml

profiles/simple-src/Canon_PowerShot_A630-simple.xml

profiles/simple-src/Canon_PowerShot_A640-simple.xml

profiles/simple-src/Canon_PowerShot_A650-simple.xml

profiles/simple-src/Canon_PowerShot_A720-simple.xml

profiles/simple-src/Canon_PowerShot_G1-simple.xml

profiles/simple-src/Canon_PowerShot_G10-simple.xml

profiles/simple-src/Canon_PowerShot_G11-simple.xml

profiles/simple-src/Canon_PowerShot_G12-simple.xml

profiles/simple-src/Canon_PowerShot_G2-simple.xml

profiles/simple-src/Canon_PowerShot_G3-simple.xml

profiles/simple-src/Canon_PowerShot_G5-simple.xml

profiles/simple-src/Canon_PowerShot_G6-simple.xml

profiles/simple-src/Canon_PowerShot_G9-simple.xml

profiles/simple-src/Canon_PowerShot_Pro1-simple.xml

profiles/simple-src/Canon_PowerShot_Pro70-simple.xml

profiles/simple-src/Canon_PowerShot_Pro90-simple.xml

profiles/simple-src/Canon_PowerShot_S30-simple.xml

profiles/simple-src/Canon_PowerShot_S3_IS-simple.xml

profiles/simple-src/Canon_PowerShot_S40-simple.xml

profiles/simple-src/Canon_PowerShot_S45-simple.xml

profiles/simple-src/Canon_PowerShot_S50-simple.xml

profiles/simple-src/Canon_PowerShot_S60-simple.xml

profiles/simple-src/Canon_PowerShot_S70-simple.xml

profiles/simple-src/Canon_PowerShot_S90-simple.xml

profiles/simple-src/Canon_PowerShot_S95-simple.xml

profiles/simple-src/Canon_PowerShot_SX110_IS-simple.xml

profiles/simple-src/Canon_PowerShot_SX1_IS-simple.xml

profiles/simple-src/Contax_N_Digital-simple.xml

profiles/simple-src/EPSON_R-D1-simple.xml

profiles/simple-src/FUJIFILM_FinePix_E550-simple.xml

profiles/simple-src/FUJIFILM_FinePix_E900-simple.xml

profiles/simple-src/FUJIFILM_FinePix_F7-simple.xml

profiles/simple-src/FUJIFILM_FinePix_F8-simple.xml

profiles/simple-src/FUJIFILM_FinePix_S100FS-simple.xml

profiles/simple-src/FUJIFILM_FinePix_S20Pro-simple.xml

profiles/simple-src/FUJIFILM_FinePix_S2Pro-simple.xml

profiles/simple-src/FUJIFILM_FinePix_S3Pro-simple.xml

profiles/simple-src/FUJIFILM_FinePix_S5000-simple.xml

profiles/simple-src/FUJIFILM_FinePix_S5100-simple.xml

profiles/simple-src/FUJIFILM_FinePix_S5200-simple.xml

profiles/simple-src/FUJIFILM_FinePix_S5500-simple.xml

profiles/simple-src/FUJIFILM_FinePix_S5600-simple.xml

profiles/simple-src/FUJIFILM_FinePix_S5Pro-simple.xml

profiles/simple-src/FUJIFILM_FinePix_S6-simple.xml

profiles/simple-src/FUJIFILM_FinePix_S6000fd-simple.xml

profiles/simple-src/FUJIFILM_FinePix_S7000-simple.xml

profiles/simple-src/FUJIFILM_FinePix_S9000-simple.xml

profiles/simple-src/FUJIFILM_FinePix_S9100-simple.xml

profiles/simple-src/FUJIFILM_FinePix_S9500-simple.xml

profiles/simple-src/FUJIFILM_FinePix_S9600-simple.xml

profiles/simple-src/FUJIFILM_IS-1-simple.xml

profiles/simple-src/FUJIFILM_IS_Pro-simple.xml

profiles/simple-src/Fuji_HS10-simple.xml

profiles/simple-src/Imacon_Ixpress-simple.xml

profiles/simple-src/KODAK_DCS420-simple.xml

profiles/simple-src/KODAK_DCS460-simple.xml

profiles/simple-src/KODAK_EASYSHARE_Z1015-simple.xml

profiles/simple-src/KODAK_EOSDCS1-simple.xml

profiles/simple-src/KODAK_EOSDCS3B-simple.xml

profiles/simple-src/KODAK_EasyShare_Z980-simple.xml

profiles/simple-src/KODAK_NC2000-simple.xml

profiles/simple-src/KODAK_P712-simple.xml

profiles/simple-src/KODAK_P850-simple.xml

profiles/simple-src/KODAK_P880-simple.xml

profiles/simple-src/Kodak_DCS315C-simple.xml

profiles/simple-src/Kodak_DCS330C-simple.xml

profiles/simple-src/Kodak_DCS520C-simple.xml

profiles/simple-src/Kodak_DCS560C-simple.xml

profiles/simple-src/Kodak_DCS620C-simple.xml

profiles/simple-src/Kodak_DCS620X-simple.xml

profiles/simple-src/Kodak_DCS660C-simple.xml

profiles/simple-src/Kodak_DCS720X-simple.xml

profiles/simple-src/Kodak_DCS760C-simple.xml

profiles/simple-src/Kodak_DCS_Pro_14-simple.xml

profiles/simple-src/Kodak_DCS_Pro_14nx-simple.xml

profiles/simple-src/Kodak_DCS_Pro_SLR-simple.xml

profiles/simple-src/Kodak_ProBack-simple.xml

profiles/simple-src/Kodak_ProBack645-simple.xml

profiles/simple-src/Konica_Minolta_DiMAGE_A2-simple.xml

profiles/simple-src/Konica_Minolta_DiMAGE_A200-simple.xml

profiles/simple-src/Konica_Minolta_Maxxum_5D-simple.xml

profiles/simple-src/Konica_Minolta_Maxxum_7D-simple.xml

profiles/simple-src/Leaf-simple.xml

profiles/simple-src/Leaf_Aptus_54S-simple.xml

profiles/simple-src/Leaf_Aptus_65-simple.xml

profiles/simple-src/Leaf_Aptus_75-simple.xml

profiles/simple-src/Leaf_CMost-simple.xml

profiles/simple-src/Leaf_Valeo_6-simple.xml

profiles/simple-src/Leica-M8-simple.xml

profiles/simple-src/Leica-S2-simple.xml

profiles/simple-src/Leica-X1-native.xml

profiles/simple-src/MINOLTA_DYNAX_5-simple.xml

profiles/simple-src/MINOLTA_DYNAX_7-simple.xml

profiles/simple-src/MOTOROLA_PIXL-simple.xml

profiles/simple-src/Mamiya_ZD-simple.xml

profiles/simple-src/Micron_2010-simple.xml

profiles/simple-src/Minolta_DiMAGE_5-simple.xml

profiles/simple-src/Minolta_DiMAGE_7-simple.xml

profiles/simple-src/Minolta_DiMAGE_7Hi-simple.xml

profiles/simple-src/Minolta_DiMAGE_7i-simple.xml

profiles/simple-src/Minolta_DiMAGE_A1-simple.xml

profiles/simple-src/Minolta_DiMAGE_Z2-simple.xml

profiles/simple-src/NIKON_COOLPIX_P6000-simple.xml

profiles/simple-src/NIKON_COOLPIX_P7000-simple.xml

profiles/simple-src/NIKON_D1-simple.xml

profiles/simple-src/NIKON_D100-simple.xml

profiles/simple-src/NIKON_D1H-simple.xml

profiles/simple-src/NIKON_D1X-simple.xml

profiles/simple-src/NIKON_D200-simple.xml

profiles/simple-src/NIKON_D2H-simple.xml

profiles/simple-src/NIKON_D2X-simple.xml

profiles/simple-src/NIKON_D3-simple.xml

profiles/simple-src/NIKON_D300-simple.xml

profiles/simple-src/NIKON_D3000-simple.xml

profiles/simple-src/NIKON_D3100-simple.xml

profiles/simple-src/NIKON_D3S-simple.xml

profiles/simple-src/NIKON_D3X-simple.xml

profiles/simple-src/NIKON_D40-simple.xml

profiles/simple-src/NIKON_D40X-simple.xml

profiles/simple-src/NIKON_D50-simple.xml

profiles/simple-src/NIKON_D5000-simple.xml

profiles/simple-src/NIKON_D60-simple.xml

profiles/simple-src/NIKON_D70-simple.xml

profiles/simple-src/NIKON_D700-simple.xml

profiles/simple-src/NIKON_D7000-simple.xml

profiles/simple-src/NIKON_D70s-simple.xml

profiles/simple-src/NIKON_D80-simple.xml

profiles/simple-src/NIKON_D90-simple.xml

profiles/simple-src/NIKON_E2100-simple.xml

profiles/simple-src/NIKON_E2500-simple.xml

profiles/simple-src/NIKON_E4300-simple.xml

profiles/simple-src/NIKON_E4500-simple.xml

profiles/simple-src/NIKON_E5000-simple.xml

profiles/simple-src/NIKON_E5400-simple.xml

profiles/simple-src/NIKON_E5700-simple.xml

profiles/simple-src/NIKON_E8400-simple.xml

profiles/simple-src/NIKON_E8700-simple.xml

profiles/simple-src/NIKON_E8800-simple.xml

profiles/simple-src/NIKON_E950-simple.xml

profiles/simple-src/NIKON_E995-simple.xml

profiles/simple-src/Neutral-With-Tonecurve-AdobeRGB.xml

profiles/simple-src/Neutral-With-Tonecurve-Prophoto.xml

profiles/simple-src/Neutral-With-Tonecurve-sRGB.xml

profiles/simple-src/OLYMPUS_C5050-simple.xml

profiles/simple-src/OLYMPUS_C5060-simple.xml

profiles/simple-src/OLYMPUS_C70-simple.xml

profiles/simple-src/OLYMPUS_C7070-simple.xml

profiles/simple-src/OLYMPUS_C80-simple.xml

profiles/simple-src/OLYMPUS_E-1-simple.xml

profiles/simple-src/OLYMPUS_E-10-simple.xml

profiles/simple-src/OLYMPUS_E-20-simple.xml

profiles/simple-src/OLYMPUS_E-3-simple.xml

profiles/simple-src/OLYMPUS_E-30-simple.xml

profiles/simple-src/OLYMPUS_E-300-simple.xml

profiles/simple-src/OLYMPUS_E-330-simple.xml

profiles/simple-src/OLYMPUS_E-400-simple.xml

profiles/simple-src/OLYMPUS_E-410-simple.xml

profiles/simple-src/OLYMPUS_E-420-simple.xml

profiles/simple-src/OLYMPUS_E-450-simple.xml

profiles/simple-src/OLYMPUS_E-5-simple.xml

profiles/simple-src/OLYMPUS_E-500-simple.xml

profiles/simple-src/OLYMPUS_E-510-simple.xml

profiles/simple-src/OLYMPUS_E-520-simple.xml

profiles/simple-src/OLYMPUS_E-620-simple.xml

profiles/simple-src/OLYMPUS_E-P1-simple.xml

profiles/simple-src/OLYMPUS_E-PL1-simple.xml

profiles/simple-src/OLYMPUS_E-PL1s-simple.xml

profiles/simple-src/OLYMPUS_E-PL2-simple.xml

profiles/simple-src/OLYMPUS_SP3-simple.xml

profiles/simple-src/OLYMPUS_SP350-simple.xml

profiles/simple-src/OLYMPUS_SP500UZ-simple.xml

profiles/simple-src/OLYMPUS_SP510UZ-simple.xml

profiles/simple-src/OLYMPUS_SP550UZ-simple.xml

profiles/simple-src/OLYMPUS_SP560UZ-simple.xml

profiles/simple-src/OLYMPUS_SP570UZ-simple.xml

profiles/simple-src/OLYMPUS_XZ-1-simple.xml

profiles/simple-src/PENTAX_645D-simple.xml

profiles/simple-src/PENTAX_K-5-simple.xml

profiles/simple-src/PENTAX_K-7-simple.xml

profiles/simple-src/PENTAX_K-m-simple.xml

profiles/simple-src/PENTAX_K-r-simple.xml

profiles/simple-src/PENTAX_K-x-simple.xml

profiles/simple-src/PENTAX_K1-simple.xml

profiles/simple-src/PENTAX_K10D-simple.xml

profiles/simple-src/PENTAX_K2000-simple.xml

profiles/simple-src/PENTAX_K200D-simple.xml

profiles/simple-src/PENTAX_K20D-simple.xml

profiles/simple-src/Panasonic_DMC-FX150-simple.xml

profiles/simple-src/Panasonic_DMC-FZ18-simple.xml

profiles/simple-src/Panasonic_DMC-FZ28-simple.xml

profiles/simple-src/Panasonic_DMC-FZ30-simple.xml

profiles/simple-src/Panasonic_DMC-FZ35-simple.xml

profiles/simple-src/Panasonic_DMC-FZ50-simple.xml

profiles/simple-src/Panasonic_DMC-FZ8-simple.xml

profiles/simple-src/Panasonic_DMC-G1-simple.xml

profiles/simple-src/Panasonic_DMC-G10-simple.xml

profiles/simple-src/Panasonic_DMC-G2-simple.xml

profiles/simple-src/Panasonic_DMC-GF1-simple.xml

profiles/simple-src/Panasonic_DMC-GF2-simple.xml

profiles/simple-src/Panasonic_DMC-GH1-simple.xml

profiles/simple-src/Panasonic_DMC-GH2-simple.xml

profiles/simple-src/Panasonic_DMC-L1-simple.xml

profiles/simple-src/Panasonic_DMC-L10-simple.xml

profiles/simple-src/Panasonic_DMC-LC1-simple.xml

profiles/simple-src/Panasonic_DMC-LX1-simple.xml

profiles/simple-src/Panasonic_DMC-LX2-simple.xml

profiles/simple-src/Panasonic_DMC-LX3-simple.xml

profiles/simple-src/Panasonic_FZ100-simple.xml

profiles/simple-src/Panasonic_FZ40-simple.xml

profiles/simple-src/Panasonic_LX5-simple.xml

profiles/simple-src/Phase_One_H_20-simple.xml

profiles/simple-src/Phase_One_P65-simple.xml

profiles/simple-src/Phase_One_P_2-simple.xml

profiles/simple-src/Phase_One_P_30-simple.xml

profiles/simple-src/Phase_One_P_45-simple.xml

profiles/simple-src/Ricoh_A12_28mm-simple.xml

profiles/simple-src/Ricoh_A12_50mm-simple.xml

profiles/simple-src/Ricoh_Digital_3-simple.xml

profiles/simple-src/Ricoh_GX200-simple.xml

profiles/simple-src/Ricoh_S10_24-72mm-simple.xml

profiles/simple-src/SAMSUNG_EX1-simple.xml

profiles/simple-src/SAMSUNG_GX-1-simple.xml

profiles/simple-src/SAMSUNG_GX-1L-simple.xml

profiles/simple-src/SAMSUNG_GX-1S-simple.xml

profiles/simple-src/SAMSUNG_GX10-simple.xml

profiles/simple-src/SAMSUNG_GX20-simple.xml

profiles/simple-src/SAMSUNG_NX10-simple.xml

profiles/simple-src/SAMSUNG_NX100-simple.xml

profiles/simple-src/SAMSUNG_NX11-simple.xml

profiles/simple-src/SAMSUNG_NX5-simple.xml

profiles/simple-src/SAMSUNG_S85-simple.xml

profiles/simple-src/SAMSUNG_WB2000-simple.xml

profiles/simple-src/SONY_DSC-F828-simple.xml

profiles/simple-src/SONY_DSC-R1-simple.xml

profiles/simple-src/SONY_DSC-V3-simple.xml

profiles/simple-src/SONY_DSLR-A100-simple.xml

profiles/simple-src/SONY_DSLR-A200-simple.xml

profiles/simple-src/SONY_DSLR-A230-simple.xml

profiles/simple-src/SONY_DSLR-A290-simple.xml

profiles/simple-src/SONY_DSLR-A300-simple.xml

profiles/simple-src/SONY_DSLR-A330-simple.xml

profiles/simple-src/SONY_DSLR-A350-simple.xml

profiles/simple-src/SONY_DSLR-A380-simple.xml

profiles/simple-src/SONY_DSLR-A390-simple.xml

profiles/simple-src/SONY_DSLR-A450-simple.xml

profiles/simple-src/SONY_DSLR-A5-simple.xml

profiles/simple-src/SONY_DSLR-A500-simple.xml

profiles/simple-src/SONY_DSLR-A550-simple.xml

profiles/simple-src/SONY_DSLR-A560-simple.xml

profiles/simple-src/SONY_DSLR-A580-simple.xml

profiles/simple-src/SONY_DSLR-A700-simple.xml

profiles/simple-src/SONY_DSLR-A850-simple.xml

profiles/simple-src/SONY_DSLR-A900-simple.xml

profiles/simple-src/SONY_NEX-3-simple.xml

profiles/simple-src/SONY_NEX-5-simple.xml

profiles/simple-src/SONY_SLTA33-simple.xml

profiles/simple-src/SONY_SLTA55V-simple.xml

profiles/simple-src/Sinar-simple.xml

src/application.c

src/application.h

src/rs-camera-db.c

src/rs-camera-db.h

src/rs-loupe.c

src/rs-loupe.h

src/rs-navigator.c

src/rs-navigator.h

src/rs-save-dialog.c

src/rs-save-dialog.h

src/rs-tag-gui.c

src/rs-tag-gui.h

src/rs-tethered-shooting.c

src/rs-tethered-shooting.h

src/rs-toolbox.c

src/rs-toolbox.h

files removed:
debian/patches/dcraw.cc-ftbfs-fix

src/adobe-coeff.c

src/adobe-coeff.h

src/arch-generic.c

src/arch-x86.c

src/ciff-meta.c

src/ciff-meta.h

src/color.h

src/conf_interface.c

src/conf_interface.h

src/dcraw.cc

src/dcraw.h

src/dcraw_api.cc

src/dcraw_api.h

src/gettext.h

src/gtk-save-dialog.c

src/gtk-save-dialog.h

src/mmap-hack.c

src/mmap-hack.h

src/mrw-meta.c

src/mrw-meta.h

src/raf-meta.c

src/raf-meta.h

src/rawfile.c

src/rawfile.h

src/rawstudio.c

src/rawstudio.h

src/rs-arch.h

src/rs-cms.c

src/rs-cms.h

src/rs-color-transform.c

src/rs-color-transform.h

src/rs-curve.c

src/rs-curve.h

src/rs-exif.cc

src/rs-exif.h

src/rs-filetypes.c

src/rs-filetypes.h

src/rs-image.c

src/rs-image.h

src/rs-job.c

src/rs-job.h

src/rs-jpeg.c

src/rs-jpeg.h

src/rs-math.c

src/rs-math.h

src/rs-metadata.c

src/rs-metadata.h

src/rs-preload.c

src/rs-preload.h

src/rs-settings.c

src/rs-settings.h

src/rs-spline.c

src/rs-spline.h

src/rs-utils.c

src/rs-utils.h

src/tiff-meta.c

src/tiff-meta.h

src/toolbox.c

src/toolbox.h

src/x3f-meta.c

src/x3f-meta.h

src/x86_cpu.h

files modified:
AUTHORS

INSTALL

Makefile.am

Makefile.in

NEWS

README

TODO

aclocal.m4

autogen.sh

config.h.in

configure

configure.in

debian/changelog

debian/control

debian/patches/series

debian/rules

depcomp

install-sh

missing

pixmaps/Makefile.am

pixmaps/Makefile.in

po/Makefile.in.in

po/POTFILES.in

po/ca.gmo

po/ca.po

po/cs.gmo

po/cs.po

po/da.gmo

po/da.po

po/de.gmo

po/de.po

po/en.gmo

po/en.po

po/es.gmo

po/es.po

po/fi.gmo

po/fi.po

po/fr.gmo

po/fr.po

po/it.gmo

po/it.po

po/nb.gmo

po/nb.po

po/nl.gmo

po/nl.po

po/pl.gmo

po/pl.po

po/pt_BR.gmo

po/pt_BR.po

po/rawstudio.pot

po/ru.gmo

po/ru.po

po/sv.gmo

po/sv.po

src/Makefile.am

src/Makefile.in

src/filename.c

src/filename.h

src/gtk-helper.c

src/gtk-helper.h

src/gtk-interface.c

src/gtk-interface.h

src/gtk-progress.c

src/gtk-progress.h

src/rs-actions.c

src/rs-actions.h

src/rs-batch.c

src/rs-batch.h

src/rs-cache.c

src/rs-cache.h

src/rs-dir-selector.c

src/rs-dir-selector.h

src/rs-external-editor.c

src/rs-external-editor.h

src/rs-histogram.c

src/rs-histogram.h

src/rs-photo.c

src/rs-photo.h

src/rs-pixbuf.c

src/rs-pixbuf.h

src/rs-preview-widget.c

src/rs-preview-widget.h

src/rs-store.c

src/rs-store.h

src/rs-tiff.c

src/rs-tiff.h

src/ui.xml

Show diffs side-by-side

added added

removed removed

plugins/denoise/complexfilter-x86.cpp

* * Anders Kvist <akv@lnxbx.dk> and Klaus Post <klauspost@gmail.com>

* This program is free software; you can redistribute it and/or

* modify it under the terms of the GNU General Public License

* as published by the Free Software Foundation; either version 2

* of the License, or (at your option) any later version.

* This program is distributed in the hope that it will be useful,

* but WITHOUT ANY WARRANTY; without even the implied warranty of

* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the

* GNU General Public License for more details.

* You should have received a copy of the GNU General Public License

* along with this program; if not, write to the Free Software

* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.

#include "complexfilter.h"

#include <math.h>

#include "fftwindow.h"

namespace RawStudio {

namespace FFTFilter {

#if defined (__i386__) || defined (__x86_64__)

#if defined (__x86_64__)

void DeGridComplexFilter::processSharpenOnlySSE3(ComplexBlock* block) {

fftwf_complex* outcur = block->complex;

fftwf_complex* gridsample = grid->complex;

float gridfraction = degrid*outcur[0][0]/gridsample[0][0];

float* temp = block->temp->data; // Get aligned temp area, at least 256 bytes, only used by this thread.

float *wsharpen = sharpenWindow->getLine(0);

for (int i = 0; i < 4; i++) {

temp[i+0] = 1e-15f; // 0

temp[i+4] = gridfraction; // 16

temp[i+8] = sigmaSquaredSharpenMin; // 32

temp[i+12] = sigmaSquaredSharpenMax; // 48

temp[i+16] = 1.0f; // 64

}

int size = bw*bh;

if ((size & 7) == 0) { // TODO: Benchmark on non-vm platform - slower under VMWARE

asm volatile

(

"movaps 16(%1),%%xmm14\n" // Load gridfraction into xmm6

"loop_sharpenonly_sse3_big:\n"

"movaps (%2), %%xmm0\n" // in r0i0 r1i1

"movaps 16(%2), %%xmm1\n" //in r2i2 r3i3

"movaps 32(%2), %%xmm8\n" // in r0i0 r1i1

"movaps 48(%2), %%xmm9\n" //in r2i2 r3i3

"movaps (%3), %%xmm4\n" // grid r0i0 r1i1

"movaps 16(%3), %%xmm5\n" // grid r2i2 r3i3

"movaps 32(%3), %%xmm12\n" // grid r0i0 r1i1

"movaps 48(%3), %%xmm13\n" // grid r2i2 r3i3

"mulps %%xmm14, %%xmm4\n" //grid r0*gf i0*gf r1*gf i1*gf (xmm4: gridcorrection0 + 1)

"mulps %%xmm14, %%xmm5\n" //grid r2*gf i2*gf r3*gf i3*gf (gridfraction*gridsample[x])

"mulps %%xmm14, %%xmm12\n" //grid r0*gf i0*gf r1*gf i1*gf (xmm4: gridcorrection0 + 1)

"mulps %%xmm14, %%xmm13\n" //grid r2*gf i2*gf r3*gf i3*gf (gridfraction*gridsample[x])

"movaps %%xmm4, %%xmm2\n" // maintain gridcorrection in memory

"movaps %%xmm5, %%xmm3\n"

"movaps %%xmm12, %%xmm10\n" // maintain gridcorrection in memory

"movaps %%xmm13, %%xmm11\n"

"subps %%xmm4, %%xmm0\n" // re0 im0 re1 im1 (re = outcur[x][0] - gridcorrection0;, etc) (xmm0 - xmm4)

"subps %%xmm5, %%xmm1\n" // re2 im2 re3 im3 -

"subps %%xmm12, %%xmm8\n" // re0 im0 re1 im1 (re = outcur[x][0] - gridcorrection0;, etc) (xmm0 - xmm4)

"subps %%xmm13, %%xmm9\n" // re2 im2 re3 im3 -

"movaps %%xmm0, %%xmm4\n" // copy re0+im0 ... into xmm4 and 5, xmm0 & 1 retained

"movaps %%xmm1, %%xmm5\n"

"movaps %%xmm8, %%xmm12\n" // copy re0+im0 ... into xmm4 and 5, xmm0 & 1 retained

"movaps %%xmm9, %%xmm13\n"

"movaps (%1), %%xmm6\n" // Move 1e-15 into xmm6

"mulps %%xmm4, %%xmm4\n" //r0i0 r1i1 squared

"mulps %%xmm5, %%xmm5\n" //r2i2 r3i3 squared

"mulps %%xmm12, %%xmm12\n" //r0i0 r1i1 squared

"mulps %%xmm13, %%xmm13\n" //r2i2 r3i3 squared

"haddps %%xmm5, %%xmm4\n" //r0+i0 r1+i1 r2+i2 r3+i3 r4+i4 (all squared) (SSE3!) - xmm 5 free

"haddps %%xmm13, %%xmm12\n" //r0+i0 r1+i1 r2+i2 r3+i3 r4+i4 (all squared) (SSE3!) - xmm 5 free

"addps %%xmm6, %%xmm4\n" // add 1e-15 (xmm4: psd for all 4 pixels)

"addps %%xmm6, %%xmm12\n" // add 1e-15 (xmm4: psd for all 4 pixels)

"movaps 48(%1), %%xmm7\n" // Move sigmaSquaredSharpenMax into xmm7

// float sfact = (1 + wsharpen[x]*sqrt( psd*sigmaSquaredSharpenMax/((psd + sigmaSquaredSharpenMin)*(psd + sigmaSquaredSharpenMax)) )) ;

"movaps 32(%1), %%xmm6\n" // Move sigmaSquaredSharpenMin into xmm6

"movaps %%xmm4, %%xmm5\n" // Copy psd into xmm5

"movaps %%xmm12, %%xmm13\n" // Copy psd into xmm5

"movaps %%xmm7, %%xmm15\n" // Copy sigmaSquaredSharpenMax

"addps %%xmm7, %%xmm4\n" // xmm4 = psd + sigmaSquaredSharpenMax

"addps %%xmm7, %%xmm12\n" // xmm4 = psd + sigmaSquaredSharpenMax

"mulps %%xmm5, %%xmm7\n" // xmm7 = psd*sigmaSquaredSharpenMax

"mulps %%xmm13, %%xmm15\n" // xmm7 = psd*sigmaSquaredSharpenMax

"addps %%xmm6, %%xmm5\n" //xmm5 = psd + sigmaSquaredSharpenMin //xmm6 free

"addps %%xmm6, %%xmm13\n" //xmm5 = psd + sigmaSquaredSharpenMin //xmm6 free

"mulps %%xmm4, %%xmm5\n" // (psd + sigmaSquaredSharpenMin)*(psd + sigmaSquaredSharpenMax) xmm4 free

100

"mulps %%xmm12, %%xmm13\n" // (psd + sigmaSquaredSharpenMin)*(psd + sigmaSquaredSharpenMax) xmm4 free

101

"rcpps %%xmm5, %%xmm5\n" // 1 / (psd + sigmaSquaredSharpenMin)*(psd + sigmaSquaredSharpenMax) (stall)

102

"movaps (%4), %%xmm6\n" // load wsharpen[0->4]

103

"rcpps %%xmm13, %%xmm13\n" // 1 / (psd + sigmaSquaredSharpenMin)*(psd + sigmaSquaredSharpenMax) (stall)

104

"movaps 16(%4), %%xmm4\n" // Load wsharpen

105

"mulps %%xmm5, %%xmm7\n" // psd*sigmaSquaredSharpenMax/((psd + sigmaSquaredSharpenMin)*(psd + sigmaSquaredSharpenMax)) - xmm5 free

106

"mulps %%xmm13, %%xmm15\n" // psd*sigmaSquaredSharpenMax/((psd + sigmaSquaredSharpenMin)*(psd + sigmaSquaredSharpenMax)) - xmm5 free

107

"movaps 64(%1), %%xmm5\n" // Load "1.0"

108

"rsqrtps %%xmm7, %%xmm7\n" // 1.0 / sqrt( psd*sigmaSquaredSharpenMax/((psd + sigmaSquaredSharpenMin)*(psd + sigmaSquaredSharpenMax))

109

"rsqrtps %%xmm15, %%xmm15\n" // 1.0 / sqrt( psd*sigmaSquaredSharpenMax/((psd + sigmaSquaredSharpenMin)*(psd + sigmaSquaredSharpenMax))

110

"rcpps %%xmm7, %%xmm7\n" // sqrt (..)

111

"rcpps %%xmm15, %%xmm15\n" // sqrt (..)

112

"mulps %%xmm6, %%xmm7\n" // multiply wsharpen

113

"mulps %%xmm4, %%xmm15\n" // multiply wsharpen

114

"addps %%xmm5, %%xmm7\n" // + 1.0 xmm7 = sfact

115

"addps %%xmm5, %%xmm15\n" // + 1.0 xmm7 = sfact

116

"movaps %%xmm7, %%xmm5\n"

117

"movaps %%xmm15, %%xmm13\n"

118

"unpcklps %%xmm7, %%xmm7\n" // unpack low to xmm7

119

"unpckhps %%xmm5, %%xmm5\n" // unpack high to xmm5

120

"unpcklps %%xmm15, %%xmm15\n" // unpack low to xmm7

121

"unpckhps %%xmm13, %%xmm13\n" // unpack high to xmm5

122

123

"mulps %%xmm7, %%xmm0\n" // re+im *= sfact

124

"mulps %%xmm5, %%xmm1\n" // re+im *= sfact

125

"mulps %%xmm15, %%xmm8\n" // re+im *= sfact

126

"mulps %%xmm13, %%xmm9\n" // re+im *= sfact

127

"addps %%xmm2, %%xmm0\n" // add gridcorrection

128

"addps %%xmm3, %%xmm1\n" // add gridcorrection

129

"addps %%xmm10, %%xmm8\n" // add gridcorrection

130

"addps %%xmm11, %%xmm9\n" // add gridcorrection

131

"movaps %%xmm0, (%2)\n" // Store

132

"movaps %%xmm1, 16(%2)\n" // Store

133

"movaps %%xmm8, 32(%2)\n" // Store

134

"movaps %%xmm9, 48(%2)\n" // Store

135

"sub $8, %0\n" // size -=8

136

"add $64, %2\n" // outcur+=64

137

"add $64, %3\n" // gridsample+=64

138

"add $32, %4\n" // wsharpen+=32

139

"cmp $0, %0\n"

140

"jg loop_sharpenonly_sse3_big\n"

141

: /* no output registers */

142

: "r" (size), "r" (temp), "r" (outcur), "r" (gridsample), "r"(wsharpen)

143

: /* %0 %1 %2 %3 %4 */

144

);

145

} else {

146

asm volatile

147

(

148

"movaps (%1), %%xmm11\n"

149

"movaps 16(%1), %%xmm12\n"

150

"movaps 32(%1), %%xmm13\n"

151

"movaps 48(%1), %%xmm14\n"

152

"movaps 64(%1), %%xmm15\n"

153

"loop_sharpenonly_sse3:\n"

154

"movaps (%2), %%xmm0\n" // in r0i0 r1i1

155

"movaps 16(%2), %%xmm1\n" //in r2i2 r3i3

156

"movaps (%3), %%xmm4\n" // grid r0i0 r1i1

157

"movaps 16(%3), %%xmm5\n" // grid r2i2 r3i3

158

159

"mulps %%xmm12, %%xmm4\n" //grid r0*gf i0*gf r1*gf i1*gf (xmm4: gridcorrection0 + 1)

160

"mulps %%xmm12, %%xmm5\n" //grid r2*gf i2*gf r3*gf i3*gf (gridfraction*gridsample[x])

161

"movaps %%xmm4, %%xmm2\n" // maintain gridcorrection in memory

162

"movaps %%xmm5, %%xmm3\n"

163

"subps %%xmm4, %%xmm0\n" // re0 im0 re1 im1 (re = outcur[x][0] - gridcorrection0;, etc) (xmm0 - xmm4)

164

"subps %%xmm5, %%xmm1\n" // re2 im2 re3 im3 -

165

"movaps %%xmm0, %%xmm4\n" // copy re0+im0 ... into xmm4 and 5, xmm0 & 1 retained

166

"movaps %%xmm1, %%xmm5\n"

167

168

"mulps %%xmm4, %%xmm4\n" //r0i0 r1i1 squared

169

"mulps %%xmm5, %%xmm5\n" //r2i2 r3i3 squared

170

"haddps %%xmm5, %%xmm4\n" //r0+i0 r1+i1 r2+i2 r3+i3 r4+i4 (all squared) (SSE3!) - xmm 5 free

171

"addps %%xmm11, %%xmm4\n" // add 1e-15 (xmm4: psd for all 4 pixels)

172

"movaps %%xmm14, %%xmm7\n" // Move sigmaSquaredSharpenMax into xmm7

173

174

// float sfact = (1 + wsharpen[x]*sqrt( psd*sigmaSquaredSharpenMax/((psd + sigmaSquaredSharpenMin)*(psd + sigmaSquaredSharpenMax)) )) ;

175

"movaps %%xmm4, %%xmm5\n" // Copy psd into xmm5

176

"addps %%xmm7, %%xmm4\n" // xmm4 = psd + sigmaSquaredSharpenMax

177

"mulps %%xmm5, %%xmm7\n" // xmm7 = psd*sigmaSquaredSharpenMax

178

"addps %%xmm13, %%xmm5\n" //xmm5 = psd + sigmaSquaredSharpenMin //xmm6 free

179

180

"mulps %%xmm4, %%xmm5\n" // (psd + sigmaSquaredSharpenMin)*(psd + sigmaSquaredSharpenMax) xmm4 free

181

"movaps (%4), %%xmm6\n" // load wsharpen[0->4]

182

"rcpps %%xmm5, %%xmm5\n" // 1 / (psd + sigmaSquaredSharpenMin)*(psd + sigmaSquaredSharpenMax) (stall)

183

"mulps %%xmm5, %%xmm7\n" // psd*sigmaSquaredSharpenMax/((psd + sigmaSquaredSharpenMin)*(psd + sigmaSquaredSharpenMax)) - xmm5 free

184

"rsqrtps %%xmm7, %%xmm7\n" // 1.0 / sqrt( psd*sigmaSquaredSharpenMax/((psd + sigmaSquaredSharpenMin)*(psd + sigmaSquaredSharpenMax))

185

"rcpps %%xmm7, %%xmm7\n" // sqrt (..)

186

"mulps %%xmm6, %%xmm7\n" // multiply wsharpen

187

"addps %%xmm15, %%xmm7\n" // + 1.0 xmm7 = sfact

188

"movaps %%xmm7, %%xmm5\n"

189

"unpcklps %%xmm7, %%xmm7\n" // unpack low to xmm7

190

"unpckhps %%xmm5, %%xmm5\n" // unpack high to xmm5

191

192

"mulps %%xmm7, %%xmm0\n" // re+im *= sfact

193

"mulps %%xmm5, %%xmm1\n" // re+im *= sfact

194

"addps %%xmm2, %%xmm0\n" // add gridcorrection

195

"addps %%xmm3, %%xmm1\n" // add gridcorrection

196

"movaps %%xmm0, (%2)\n" // Store

197

"movaps %%xmm1, 16(%2)\n" // Store

198

"sub $4, %0\n" // size -=4

199

"add $32, %2\n" // outcur+=32

200

"add $32, %3\n" // gridsample+=32

201

"add $16, %4\n" // wsharpen+=16

202

"cmp $0, %0\n"

203

"jg loop_sharpenonly_sse3\n"

204

: /* no output registers */

205

: "r" (size), "r" (temp), "r" (outcur), "r" (gridsample), "r"(wsharpen)

206

: /* %0 %1 %2 %3 %4 */

207

);

208

}

209

}

210

211

#else // 32 bits

212

void DeGridComplexFilter::processSharpenOnlySSE3(ComplexBlock* block) {

213

fftwf_complex* outcur = block->complex;

214

fftwf_complex* gridsample = grid->complex;

215

float gridfraction = degrid*outcur[0][0]/gridsample[0][0];

216

float* temp = block->temp->data; // Get aligned temp area, at least 256 bytes, only used by this thread.

217

float *wsharpen = sharpenWindow->getLine(0);

218

219

for (int i = 0; i < 4; i++) {

220

temp[i+0] = 1e-15f; // 0

221

temp[i+4] = gridfraction; // 16

222

temp[i+8] = sigmaSquaredSharpenMin; // 32

223

temp[i+12] = sigmaSquaredSharpenMax; // 48

224

temp[i+16] = 1.0f; // 64

225

}

226

int size = bw*bh;

227

asm volatile

228

(

229

"loop_sharpenonly_sse3:\n"

230

"movaps 16(%1),%%xmm6\n" // Load gridfraction into xmm6

231

"movaps (%2), %%xmm0\n" // in r0i0 r1i1

232

"movaps 16(%2), %%xmm1\n" //in r2i2 r3i3

233

"movaps (%3), %%xmm4\n" // grid r0i0 r1i1

234

"movaps 16(%3), %%xmm5\n" // grid r2i2 r3i3

235

236

"mulps %%xmm6, %%xmm4\n" //grid r0*gf i0*gf r1*gf i1*gf (xmm4: gridcorrection0 + 1)

237

"mulps %%xmm6, %%xmm5\n" //grid r2*gf i2*gf r3*gf i3*gf (gridfraction*gridsample[x])

238

"movaps %%xmm4, %%xmm2\n" // maintain gridcorrection in memory

239

"movaps %%xmm5, %%xmm3\n"

240

"subps %%xmm4, %%xmm0\n" // re0 im0 re1 im1 (re = outcur[x][0] - gridcorrection0;, etc) (xmm0 - xmm4)

241

"subps %%xmm5, %%xmm1\n" // re2 im2 re3 im3 -

242

"movaps %%xmm0, %%xmm4\n" // copy re0+im0 ... into xmm4 and 5, xmm0 & 1 retained

243

"movaps %%xmm1, %%xmm5\n"

244

245

"mulps %%xmm4, %%xmm4\n" //r0i0 r1i1 squared

246

"mulps %%xmm5, %%xmm5\n" //r2i2 r3i3 squared

247

"movaps 32(%1), %%xmm6\n" // Move sigmaSquaredSharpenMin into xmm6

248

"haddps %%xmm5, %%xmm4\n" //r0+i0 r1+i1 r2+i2 r3+i3 r4+i4 (all squared) (SSE3!) - xmm 5 free

249

"addps (%1), %%xmm4\n" // add 1e-15 (xmm4: psd for all 4 pixels)

250

"movaps 48(%1), %%xmm7\n" // Move sigmaSquaredSharpenMax into xmm7

251

252

// float sfact = (1 + wsharpen[x]*sqrt( psd*sigmaSquaredSharpenMax/((psd + sigmaSquaredSharpenMin)*(psd + sigmaSquaredSharpenMax)) )) ;

253

"movaps %%xmm4, %%xmm5\n" // Copy psd into xmm5

254

"addps %%xmm7, %%xmm4\n" // xmm4 = psd + sigmaSquaredSharpenMax

255

"mulps %%xmm5, %%xmm7\n" // xmm7 = psd*sigmaSquaredSharpenMax

256

"addps %%xmm6, %%xmm5\n" //xmm5 = psd + sigmaSquaredSharpenMin //xmm6 free

257

258

"mulps %%xmm4, %%xmm5\n" // (psd + sigmaSquaredSharpenMin)*(psd + sigmaSquaredSharpenMax) xmm4 free

259

"movaps (%4), %%xmm6\n" // load wsharpen[0->4]

260

"rcpps %%xmm5, %%xmm5\n" // 1 / (psd + sigmaSquaredSharpenMin)*(psd + sigmaSquaredSharpenMax) (stall)

261

"mulps %%xmm5, %%xmm7\n" // psd*sigmaSquaredSharpenMax/((psd + sigmaSquaredSharpenMin)*(psd + sigmaSquaredSharpenMax)) - xmm5 free

262

"movaps 64(%1), %%xmm5\n" // Load "1.0"

263

"rsqrtps %%xmm7, %%xmm7\n" // 1.0 / sqrt( psd*sigmaSquaredSharpenMax/((psd + sigmaSquaredSharpenMin)*(psd + sigmaSquaredSharpenMax))

264

"rcpps %%xmm7, %%xmm7\n" // sqrt (..)

265

"mulps %%xmm6, %%xmm7\n" // multiply wsharpen

266

"addps %%xmm5, %%xmm7\n" // + 1.0 xmm7 = sfact

267

"movaps %%xmm7, %%xmm5\n"

268

"unpcklps %%xmm7, %%xmm7\n" // unpack low to xmm7

269

"unpckhps %%xmm5, %%xmm5\n" // unpack high to xmm5

270

271

"mulps %%xmm7, %%xmm0\n" // re+im *= sfact

272

"mulps %%xmm5, %%xmm1\n" // re+im *= sfact

273

"addps %%xmm2, %%xmm0\n" // add gridcorrection

274

"addps %%xmm3, %%xmm1\n" // add gridcorrection

275

"movaps %%xmm0, (%2)\n" // Store

276

"movaps %%xmm1, 16(%2)\n" // Store

277

"sub $4, %0\n" // size -=4

278

"add $32, %2\n" // outcur+=32

279

"add $32, %3\n" // gridsample+=32

280

"add $16, %4\n" // wsharpen+=16

281

"cmp $0, %0\n"

282

"jg loop_sharpenonly_sse3\n"

283

: /* no output registers */

284

: "r" (size), "r" (temp), "r" (outcur), "r" (gridsample), "r"(wsharpen)

285

: /* %0 %1 %2 %3 %4 */

286

);

287

}

288

#endif

289

void DeGridComplexFilter::processSharpenOnlySSE(ComplexBlock* block) {

290

fftwf_complex* outcur = block->complex;

291

fftwf_complex* gridsample = grid->complex;

292

float gridfraction = degrid*outcur[0][0]/gridsample[0][0];

293

float* temp = block->temp->data; // Get aligned temp area, at least 256 bytes, only used by this thread.

294

float *wsharpen = sharpenWindow->getLine(0);

295

296

for (int i = 0; i < 4; i++) {

297

temp[i+0] = 1e-15f; // 0

298

temp[i+4] = gridfraction; // 16

299

temp[i+8] = sigmaSquaredSharpenMin; // 32

300

temp[i+12] = sigmaSquaredSharpenMax; // 48

301

temp[i+16] = 1.0f; // 64

302

}

303

int size = bw*bh;

304

asm volatile

305

(

306

"loop_sharpenonly_sse:\n"

307

"movaps 16(%1),%%xmm6\n" // Load gridfraction into xmm6

308

"movaps (%2), %%xmm0\n" // in r0i0 r1i1

309

"movaps 16(%2), %%xmm1\n" //in r2i2 r3i3

310

"movaps (%3), %%xmm4\n" // grid r0i0 r1i1

311

"movaps 16(%3), %%xmm5\n" // grid r2i2 r3i3

312

313

"mulps %%xmm6, %%xmm4\n" //grid r0*gf i0*gf r1*gf i1*gf (xmm4: gridcorrection0 + 1)

314

"mulps %%xmm6, %%xmm5\n" //grid r2*gf i2*gf r3*gf i3*gf (gridfraction*gridsample[x])

315

"movaps %%xmm4, %%xmm2\n" // maintain gridcorrection in memory

316

"movaps %%xmm5, %%xmm3\n"

317

"subps %%xmm4, %%xmm0\n" // re0 im0 re1 im1 (re = outcur[x][0] - gridcorrection0;, etc) (xmm0 - xmm4)

318

"subps %%xmm5, %%xmm1\n" // re2 im2 re3 im3 -

319

"movaps %%xmm0, %%xmm4\n" // copy re0+im0 ... into xmm4 and 5, xmm0 & 1 retained

320

"movaps %%xmm1, %%xmm5\n"

321

322

"mulps %%xmm4, %%xmm4\n" //r0i0 r1i1 squared

323

"mulps %%xmm5, %%xmm5\n" //r2i2 r3i3 squared

324

325

"movaps %%xmm4, %%xmm7\n"

326

"shufps $136, %%xmm5, %%xmm4\n" // xmm7 r0r1 r2r3 [10 00 10 00 = 136]

327

"shufps $221, %%xmm5, %%xmm7\n" // xmm6 i0i1 i2i3 [11 01 11 01 = 221]

328

"movaps 32(%1), %%xmm6\n" // Move sigmaSquaredSharpenMin into xmm6

329

"addps %%xmm7, %%xmm4\n"

330

"movaps 48(%1), %%xmm7\n" // Move sigmaSquaredSharpenMax into xmm7

331

"addps (%1), %%xmm4\n" // add 1e-15 (xmm4: psd for all 4 pixels)

332

333

// float sfact = (1 + wsharpen[x]*sqrt( psd*sigmaSquaredSharpenMax/((psd + sigmaSquaredSharpenMin)*(psd + sigmaSquaredSharpenMax)) )) ;

334

"movaps %%xmm4, %%xmm5\n" // Copy psd into xmm5

335

"addps %%xmm7, %%xmm4\n" // xmm4 = psd + sigmaSquaredSharpenMax

336

"mulps %%xmm5, %%xmm7\n" // xmm7 = psd*sigmaSquaredSharpenMax

337

"addps %%xmm6, %%xmm5\n" //xmm5 = psd + sigmaSquaredSharpenMin //xmm6 free

338

339

"mulps %%xmm4, %%xmm5\n" // (psd + sigmaSquaredSharpenMin)*(psd + sigmaSquaredSharpenMax) xmm4 free

340

"movaps (%4), %%xmm6\n" // load wsharpen[0->4]

341

"rcpps %%xmm5, %%xmm5\n" // 1 / (psd + sigmaSquaredSharpenMin)*(psd + sigmaSquaredSharpenMax) (stall)

342

"mulps %%xmm5, %%xmm7\n" // psd*sigmaSquaredSharpenMax/((psd + sigmaSquaredSharpenMin)*(psd + sigmaSquaredSharpenMax)) - xmm5 free

343

"movaps 64(%1), %%xmm5\n" // Load "1.0"

344

"rsqrtps %%xmm7, %%xmm7\n" // 1.0 / sqrt( psd*sigmaSquaredSharpenMax/((psd + sigmaSquaredSharpenMin)*(psd + sigmaSquaredSharpenMax))

345

"rcpps %%xmm7, %%xmm7\n" // sqrt (..)

346

"mulps %%xmm6, %%xmm7\n" // multiply wsharpen

347

"addps %%xmm5, %%xmm7\n" // + 1.0 xmm7 = sfact

348

"movaps %%xmm7, %%xmm5\n"

349

"unpcklps %%xmm7, %%xmm7\n" // unpack low to xmm7

350

"unpckhps %%xmm5, %%xmm5\n" // unpack high to xmm5

351

352

"mulps %%xmm7, %%xmm0\n" // re+im *= sfact

353

"mulps %%xmm5, %%xmm1\n" // re+im *= sfact

354

"addps %%xmm2, %%xmm0\n" // add gridcorrection

355

"addps %%xmm3, %%xmm1\n" // add gridcorrection

356

"movaps %%xmm0, (%2)\n" // Store

357

"movaps %%xmm1, 16(%2)\n" // Store

358

"sub $4, %0\n" // size -=4

359

"add $32, %2\n" // outcur+=32

360

"add $32, %3\n" // gridsample+=32

361

"add $16, %4\n" // wsharpen+=16

362

"cmp $0, %0\n"

363

"jg loop_sharpenonly_sse\n"

364

: /* no output registers */

365

: "r" (size), "r" (temp), "r" (outcur), "r" (gridsample), "r"(wsharpen)

366

: /* %0 %1 %2 %3 %4 */

367

);

368

}

369

#if defined (__x86_64__)

370

void ComplexWienerFilterDeGrid::processSharpen_SSE3( ComplexBlock* block )

371

{

372

fftwf_complex* outcur = block->complex;

373

fftwf_complex* gridsample = grid->complex;

374

float gridfraction = degrid*outcur[0][0]/gridsample[0][0];

375

float* temp = block->temp->data; // Get aligned temp area, at least 256 bytes, only used by this thread.

376

float *wsharpen = sharpenWindow->getLine(0);

377

378

for (int i = 0; i < 4; i++) {

379

temp[i+0] = 1e-15f; // 0

380

temp[i+4] = gridfraction; // 16

381

temp[i+8] = sigmaSquaredSharpenMin; // 32

382

temp[i+12] = sigmaSquaredSharpenMax; // 48

383

temp[i+16] = 1.0f; // 64

384

temp[i+20] = sigmaSquaredNoiseNormed; // 80

385

temp[i+24] = lowlimit; // 96

386

}

387

int size = bw*bh;

388

asm volatile

389

(

390

"movaps (%1), %%xmm15\n" // 1e-15f

391

"movaps 16(%1),%%xmm14\n" // Load gridfraction into xmm14

392

"movaps 32(%1), %%xmm10\n" //xmm10 sigmaSquaredSharpenMin

393

"movaps 48(%1), %%xmm11\n" // Move sigmaSquaredSharpenMax into xmm11

394

"movaps 64(%1), %%xmm9\n" // Load "1.0"

395

"movaps 80(%1), %%xmm13\n" //sigmaSquaredNoiseNormed in xmm13

396

"movaps 96(%1), %%xmm12\n" // xmm12 = lowlimit

397

"loop_wienerdegridsharpen_sse3:\n"

398

"movaps (%2), %%xmm0\n" // in r0i0 r1i1

399

"movaps 16(%2), %%xmm1\n" //in r2i2 r3i3

400

"movaps (%3), %%xmm4\n" // grid r0i0 r1i1

401

"movaps 16(%3), %%xmm5\n" // grid r2i2 r3i3

402

403

"mulps %%xmm14, %%xmm4\n" //grid r0*gf i0*gf r1*gf i1*gf (xmm4: gridcorrection0 + 1)

404

"mulps %%xmm14, %%xmm5\n" //grid r2*gf i2*gf r3*gf i3*gf (gridfraction*gridsample[x])

405

"movaps %%xmm4, %%xmm2\n" // maintain gridcorrection in memory

406

"movaps %%xmm5, %%xmm3\n"

407

"subps %%xmm4, %%xmm0\n" // re0 im0 re1 im1 (re = outcur[x][0] - gridcorrection0;, etc) (xmm0 - xmm4)

408

"subps %%xmm5, %%xmm1\n" // re2 im2 re3 im3 -

409

"movaps %%xmm0, %%xmm4\n" // copy re0+im0 ... into xmm4 and 5, xmm0 & 1 retained

410

"movaps %%xmm1, %%xmm5\n"

411

412

"mulps %%xmm4, %%xmm4\n" //r0i0 r1i1 squared

413

"mulps %%xmm5, %%xmm5\n" //r2i2 r3i3 squared

414

"haddps %%xmm5, %%xmm4\n" //r0+i0 r1+i1 r2+i2 r3+i3 r4+i4 (all squared) (SSE3!) - xmm 5 free

415

"addps %%xmm15, %%xmm4\n" // add 1e-15 (xmm4: psd for all 4 pixels)

416

417

//WienerFactor = MAX((psd - sigmaSquaredNoiseNormed)/psd, lowlimit); // limited Wiener filter

418

419

"movaps %%xmm4, %%xmm6\n" // Copy psd into xmm6

420

"rcpps %%xmm4, %%xmm7\n" // xmm7: (1 / psd)

421

"subps %%xmm13, %%xmm6\n" // xmm6 (psd) - xmm5 (ssnn) xmm5 free

422

"mulps %%xmm7, %%xmm6\n" // xmm6 = (psd - sigmaSquaredNoiseNormed)/psd

423

"maxps %%xmm12, %%xmm6\n" // xmm6 = Wienerfactor = MAX(xmm6, lowlimit)

424

425

// float sfact = (1 + wsharpen[x]*sqrt( psd*sigmaSquaredSharpenMax/((psd + sigmaSquaredSharpenMin)*(psd + sigmaSquaredSharpenMax)) )) ;

426

"movaps %%xmm11, %%xmm7\n" // Move sigmaSquaredSharpenMax into xmm7

427

"movaps %%xmm4, %%xmm5\n" // Copy psd into xmm5

428

"addps %%xmm11, %%xmm4\n" // xmm4 = psd + sigmaSquaredSharpenMax

429

"mulps %%xmm5, %%xmm7\n" // xmm7 = psd*sigmaSquaredSharpenMax

430

"addps %%xmm10, %%xmm5\n" //xmm5 = psd + sigmaSquaredSharpenMin //xmm6 free

431

432

"mulps %%xmm4, %%xmm5\n" // (psd + sigmaSquaredSharpenMin)*(psd + sigmaSquaredSharpenMax) xmm4 free

433

"rcpps %%xmm5, %%xmm5\n" // 1 / (psd + sigmaSquaredSharpenMin)*(psd + sigmaSquaredSharpenMax) (stall)

434

"mulps %%xmm5, %%xmm7\n" // psd*sigmaSquaredSharpenMax/((psd + sigmaSquaredSharpenMin)*(psd + sigmaSquaredSharpenMax)) - xmm5 free

435

"rsqrtps %%xmm7, %%xmm7\n" // 1 / sqrt( psd*sigmaSquaredSharpenMax/((psd + sigmaSquaredSharpenMin)*(psd + sigmaSquaredSharpenMax))

436

"rcpps %%xmm7, %%xmm7\n" // sqrt(...)

437

"mulps (%4), %%xmm7\n" // multiply wsharpen

438

"addps %%xmm9, %%xmm7\n" // + 1.0 xmm7 = sfact

439

"mulps %%xmm6, %%xmm7\n" // *= Wienerfactor

440

"movaps %%xmm7, %%xmm5\n"

441

"unpcklps %%xmm7, %%xmm7\n" // unpack low to xmm7

442

"unpckhps %%xmm5, %%xmm5\n" // unpack high to xmm5

443

444

"mulps %%xmm7, %%xmm0\n" // re+im *= sfact

445

"mulps %%xmm5, %%xmm1\n" // re+im *= sfact

446

"addps %%xmm2, %%xmm0\n" // add gridcorrection

447

"addps %%xmm3, %%xmm1\n" // add gridcorrection

448

"movaps %%xmm0, (%2)\n" // Store

449

"movaps %%xmm1, 16(%2)\n" // Store

450

"sub $4, %0\n" // size -=4

451

"add $32, %2\n" // outcur+=32

452

"add $32, %3\n" // gridsample+=32

453

"add $16, %4\n" // wsharpen+=16

454

"cmp $0, %0\n"

455

"jg loop_wienerdegridsharpen_sse3\n"

456

: /* no output registers */

457

: "r" (size), "r" (temp), "r" (outcur), "r" (gridsample), "r"(wsharpen)

458

: /* %0 %1 %2 %3 %4 */

459

);

460

}

461

462

#else // 32 bits

463

464

void ComplexWienerFilterDeGrid::processSharpen_SSE3( ComplexBlock* block )

465

{

466

fftwf_complex* outcur = block->complex;

467

fftwf_complex* gridsample = grid->complex;

468

float gridfraction = degrid*outcur[0][0]/gridsample[0][0];

469

float* temp = block->temp->data; // Get aligned temp area, at least 256 bytes, only used by this thread.

470

float *wsharpen = sharpenWindow->getLine(0);

471

472

for (int i = 0; i < 4; i++) {

473

temp[i+0] = 1e-15f; // 0

474

temp[i+4] = gridfraction; // 16

475

temp[i+8] = sigmaSquaredSharpenMin; // 32

476

temp[i+12] = sigmaSquaredSharpenMax; // 48

477

temp[i+16] = 1.0f; // 64

478

temp[i+20] = sigmaSquaredNoiseNormed; // 80

479

temp[i+24] = lowlimit; // 96

480

}

481

int size = bw*bh;

482

asm volatile

483

(

484

"loop_wienerdegridsharpen_sse3:\n"

485

"movaps 16(%1),%%xmm6\n" // Load gridfraction into xmm6

486

"movaps (%2), %%xmm0\n" // in r0i0 r1i1

487

"movaps 16(%2), %%xmm1\n" //in r2i2 r3i3

488

"movaps (%3), %%xmm4\n" // grid r0i0 r1i1

489

"movaps 16(%3), %%xmm5\n" // grid r2i2 r3i3

490

491

"mulps %%xmm6, %%xmm4\n" //grid r0*gf i0*gf r1*gf i1*gf (xmm4: gridcorrection0 + 1)

492

"mulps %%xmm6, %%xmm5\n" //grid r2*gf i2*gf r3*gf i3*gf (gridfraction*gridsample[x])

493

"movaps %%xmm4, %%xmm2\n" // maintain gridcorrection in memory

494

"movaps %%xmm5, %%xmm3\n"

495

"subps %%xmm4, %%xmm0\n" // re0 im0 re1 im1 (re = outcur[x][0] - gridcorrection0;, etc) (xmm0 - xmm4)

496

"subps %%xmm5, %%xmm1\n" // re2 im2 re3 im3 -

497

"movaps %%xmm0, %%xmm4\n" // copy re0+im0 ... into xmm4 and 5, xmm0 & 1 retained

498

"movaps %%xmm1, %%xmm5\n"

499

500

"mulps %%xmm4, %%xmm4\n" //r0i0 r1i1 squared

501

"mulps %%xmm5, %%xmm5\n" //r2i2 r3i3 squared

502

"haddps %%xmm5, %%xmm4\n" //r0+i0 r1+i1 r2+i2 r3+i3 r4+i4 (all squared) (SSE3!) - xmm 5 free

503

"addps (%1), %%xmm4\n" // add 1e-15 (xmm4: psd for all 4 pixels)

504

505

//WienerFactor = MAX((psd - sigmaSquaredNoiseNormed)/psd, lowlimit); // limited Wiener filter

506

507

"movaps 80(%1), %%xmm5\n" //sigmaSquaredNoiseNormed in xmm5

508

"movaps %%xmm4, %%xmm6\n" // Copy psd into xmm6

509

"rcpps %%xmm4, %%xmm7\n" // xmm7: (1 / psd)

510

"subps %%xmm5, %%xmm6\n" // xmm6 (psd) - xmm5 (ssnn) xmm5 free

511

"movaps 96(%1), %%xmm5\n" // xmm5 = lowlimit

512

"mulps %%xmm7, %%xmm6\n" // xmm6 = (psd - sigmaSquaredNoiseNormed)/psd

513

"maxps %%xmm5, %%xmm6\n" // xmm6 = Wienerfactor = MAX(xmm6, lowlimit)

514

515

// float sfact = (1 + wsharpen[x]*sqrt( psd*sigmaSquaredSharpenMax/((psd + sigmaSquaredSharpenMin)*(psd + sigmaSquaredSharpenMax)) )) ;

516

"movaps 48(%1), %%xmm7\n" // Move sigmaSquaredSharpenMax into xmm7

517

"movaps %%xmm4, %%xmm5\n" // Copy psd into xmm5

518

"addps %%xmm7, %%xmm4\n" // xmm4 = psd + sigmaSquaredSharpenMax

519

"mulps %%xmm5, %%xmm7\n" // xmm7 = psd*sigmaSquaredSharpenMax

520

"addps 32(%1), %%xmm5\n" //xmm5 = psd + sigmaSquaredSharpenMin //xmm6 free

521

522

"mulps %%xmm4, %%xmm5\n" // (psd + sigmaSquaredSharpenMin)*(psd + sigmaSquaredSharpenMax) xmm4 free

523

"rcpps %%xmm5, %%xmm5\n" // 1 / (psd + sigmaSquaredSharpenMin)*(psd + sigmaSquaredSharpenMax) (stall)

524

"mulps %%xmm5, %%xmm7\n" // psd*sigmaSquaredSharpenMax/((psd + sigmaSquaredSharpenMin)*(psd + sigmaSquaredSharpenMax)) - xmm5 free

525

"movaps 64(%1), %%xmm5\n" // Load "1.0"

526

"rsqrtps %%xmm7, %%xmm7\n" // 1.0 / sqrt( psd*sigmaSquaredSharpenMax/((psd + sigmaSquaredSharpenMin)*(psd + sigmaSquaredSharpenMax))

527

"rcpps %%xmm7, %%xmm7\n" // sqrt (..)

528

"mulps (%4), %%xmm7\n" // multiply wsharpen

529

"addps %%xmm5, %%xmm7\n" // + 1.0 xmm7 = sfact

530

"mulps %%xmm6, %%xmm7\n" // *= Wienerfactor

531

"movaps %%xmm7, %%xmm5\n"

532

"unpcklps %%xmm7, %%xmm7\n" // unpack low to xmm7

533

"unpckhps %%xmm5, %%xmm5\n" // unpack high to xmm5

534

535

"mulps %%xmm7, %%xmm0\n" // re+im *= sfact

536

"mulps %%xmm5, %%xmm1\n" // re+im *= sfact

537

"addps %%xmm2, %%xmm0\n" // add gridcorrection

538

"addps %%xmm3, %%xmm1\n" // add gridcorrection

539

"movaps %%xmm0, (%2)\n" // Store

540

"movaps %%xmm1, 16(%2)\n" // Store

541

"sub $4, %0\n" // size -=4

542

"add $32, %2\n" // outcur+=32

543

"add $32, %3\n" // gridsample+=32

544

"add $16, %4\n" // wsharpen+=16

545

"cmp $0, %0\n"

546

"jg loop_wienerdegridsharpen_sse3\n"

547

: /* no output registers */

548

: "r" (size), "r" (temp), "r" (outcur), "r" (gridsample), "r"(wsharpen)

549

: /* %0 %1 %2 %3 %4 */

550

);

551

}

552

#endif

553

554

void ComplexWienerFilterDeGrid::processSharpen_SSE( ComplexBlock* block )

555

{

556

fftwf_complex* outcur = block->complex;

557

fftwf_complex* gridsample = grid->complex;

558

float gridfraction = degrid*outcur[0][0]/gridsample[0][0];

559

float* temp = block->temp->data; // Get aligned temp area, at least 256 bytes, only used by this thread.

560

float *wsharpen = sharpenWindow->getLine(0);

561

562

for (int i = 0; i < 4; i++) {

563

temp[i+0] = 1e-15f; // 0

564

temp[i+4] = gridfraction; // 16

565

temp[i+8] = sigmaSquaredSharpenMin; // 32

566

temp[i+12] = sigmaSquaredSharpenMax; // 48

567

temp[i+16] = 1.0f; // 64

568

temp[i+20] = sigmaSquaredNoiseNormed; // 72

569

temp[i+24] = lowlimit; // 96

570

}

571

int size = bw*bh;

572

asm volatile

573

(

574

"loop_wienerdegridsharpen_sse:\n"

575

"movaps 16(%1),%%xmm6\n" // Load gridfraction into xmm6

576

"movaps (%2), %%xmm0\n" // in r0i0 r1i1

577

"movaps 16(%2), %%xmm1\n" //in r2i2 r3i3

578

"movaps (%3), %%xmm4\n" // grid r0i0 r1i1

579

"movaps 16(%3), %%xmm5\n" // grid r2i2 r3i3

580

581

"mulps %%xmm6, %%xmm4\n" //grid r0*gf i0*gf r1*gf i1*gf (xmm4: gridcorrection0 + 1)

582

"mulps %%xmm6, %%xmm5\n" //grid r2*gf i2*gf r3*gf i3*gf (gridfraction*gridsample[x])

583

"movaps %%xmm4, %%xmm2\n" // maintain gridcorrection in memory

584

"movaps %%xmm5, %%xmm3\n"

585

"subps %%xmm4, %%xmm0\n" // re0 im0 re1 im1 (re = outcur[x][0] - gridcorrection0;, etc) (xmm0 - xmm4)

586

"subps %%xmm5, %%xmm1\n" // re2 im2 re3 im3 -

587

"movaps %%xmm0, %%xmm4\n" // copy re0+im0 ... into xmm4 and 5, xmm0 & 1 retained

588

"movaps %%xmm1, %%xmm5\n"

589

590

"mulps %%xmm4, %%xmm4\n" //r0i0 r1i1 squared

591

"mulps %%xmm5, %%xmm5\n" //r2i2 r3i3 squared

592

"movaps %%xmm4, %%xmm7\n"

593

"shufps $136, %%xmm5, %%xmm4\n" // xmm7 r0r1 r2r3 [10 00 10 00 = 136]

594

"shufps $221, %%xmm5, %%xmm7\n" // xmm6 i0i1 i2i3 [11 01 11 01 = 221]

595

"addps %%xmm7, %%xmm4\n"

596

597

598

"addps (%1), %%xmm4\n" // add 1e-15 (xmm4: psd for all 4 pixels)

599

600

//WienerFactor = MAX((psd - sigmaSquaredNoiseNormed)/psd, lowlimit); // limited Wiener filter

601

602

"movaps 80(%1), %%xmm5\n" //sigmaSquaredNoiseNormed in xmm5

603

"movaps %%xmm4, %%xmm6\n" // Copy psd into xmm6

604

"rcpps %%xmm4, %%xmm7\n" // xmm7: (1 / psd)

605

"subps %%xmm5, %%xmm6\n" // xmm6 (psd) - xmm5 (ssnn) xmm5 free

606

"movaps 96(%1), %%xmm5\n" // xmm5 = lowlimit

607

"mulps %%xmm7, %%xmm6\n" // xmm6 = (psd - sigmaSquaredNoiseNormed)/psd

608

"maxps %%xmm5, %%xmm6\n" // xmm6 = Wienerfactor = MAX(xmm6, lowlimit)

609

610

// float sfact = (1 + wsharpen[x]*sqrt( psd*sigmaSquaredSharpenMax/((psd + sigmaSquaredSharpenMin)*(psd + sigmaSquaredSharpenMax)) )) ;

611

"movaps 48(%1), %%xmm7\n" // Move sigmaSquaredSharpenMax into xmm7

612

"movaps %%xmm4, %%xmm5\n" // Copy psd into xmm5

613

"addps %%xmm7, %%xmm4\n" // xmm4 = psd + sigmaSquaredSharpenMax

614

"mulps %%xmm5, %%xmm7\n" // xmm7 = psd*sigmaSquaredSharpenMax

615

"addps 32(%1), %%xmm5\n" //xmm5 = psd + sigmaSquaredSharpenMin //xmm6 free

616

617

"mulps %%xmm4, %%xmm5\n" // (psd + sigmaSquaredSharpenMin)*(psd + sigmaSquaredSharpenMax) xmm4 free

618

"rcpps %%xmm5, %%xmm5\n" // 1 / (psd + sigmaSquaredSharpenMin)*(psd + sigmaSquaredSharpenMax) (stall)

619

"mulps %%xmm5, %%xmm7\n" // psd*sigmaSquaredSharpenMax/((psd + sigmaSquaredSharpenMin)*(psd + sigmaSquaredSharpenMax)) - xmm5 free

620

"movaps 64(%1), %%xmm5\n" // Load "1.0"

621

"rsqrtps %%xmm7, %%xmm7\n" // 1.0 / sqrt( psd*sigmaSquaredSharpenMax/((psd + sigmaSquaredSharpenMin)*(psd + sigmaSquaredSharpenMax))

622

"rcpps %%xmm7, %%xmm7\n" // sqrt (..)

623

"mulps (%4), %%xmm7\n" // multiply wsharpen

624

"addps %%xmm5, %%xmm7\n" // + 1.0 xmm7 = sfact

625

"mulps %%xmm6, %%xmm7\n" // *= Wienerfactor

626

"movaps %%xmm7, %%xmm5\n"

627

"unpcklps %%xmm7, %%xmm7\n" // unpack low to xmm7

628

"unpckhps %%xmm5, %%xmm5\n" // unpack high to xmm5

629

630

"mulps %%xmm7, %%xmm0\n" // re+im *= sfact

631

"mulps %%xmm5, %%xmm1\n" // re+im *= sfact

632

"addps %%xmm2, %%xmm0\n" // add gridcorrection

633

"addps %%xmm3, %%xmm1\n" // add gridcorrection

634

"movaps %%xmm0, (%2)\n" // Store

635

"movaps %%xmm1, 16(%2)\n" // Store

636

"sub $4, %0\n" // size -=4

637

"add $32, %2\n" // outcur+=32

638

"add $32, %3\n" // gridsample+=32

639

"add $16, %4\n" // wsharpen+=16

640

"cmp $0, %0\n"

641

"jg loop_wienerdegridsharpen_sse\n"

642

: /* no output registers */

643

: "r" (size), "r" (temp), "r" (outcur), "r" (gridsample), "r"(wsharpen)

644

: /* %0 %1 %2 %3 %4 */

645

);

646

647

}

648

649

void ComplexWienerFilterDeGrid::processNoSharpen_SSE( ComplexBlock* block )

650

{

651

fftwf_complex* outcur = block->complex;

652

fftwf_complex* gridsample = grid->complex;

653

float gridfraction = degrid*outcur[0][0]/gridsample[0][0];

654

float* temp = block->temp->data; // Get aligned temp area, at least 256 bytes, only used by this thread.

655

656

for (int i = 0; i < 4; i++) {

657

temp[i+0] = 1e-15f; // 0

658

temp[i+4] = gridfraction; // 16

659

temp[i+8] = sigmaSquaredNoiseNormed; // 32

660

temp[i+12] = lowlimit; // 48

661

}

662

int size = bw*bh;

663

asm volatile

664

(

665

"loop_wienerdegridnosharpen_sse:\n"

666

"movaps 16(%1),%%xmm6\n" // Load gridfraction into xmm6

667

"movaps (%2), %%xmm0\n" // in r0i0 r1i1

668

"movaps 16(%2), %%xmm1\n" //in r2i2 r3i3

669

"movaps (%3), %%xmm4\n" // grid r0i0 r1i1

670

"movaps 16(%3), %%xmm5\n" // grid r2i2 r3i3

671

672

"mulps %%xmm6, %%xmm4\n" //grid r0*gf i0*gf r1*gf i1*gf (xmm4: gridcorrection0 + 1)

673

"mulps %%xmm6, %%xmm5\n" //grid r2*gf i2*gf r3*gf i3*gf (gridfraction*gridsample[x])

674

"movaps %%xmm4, %%xmm2\n" // maintain gridcorrection in memory

675

"movaps %%xmm5, %%xmm3\n"

676

"subps %%xmm4, %%xmm0\n" // re0 im0 re1 im1 (re = outcur[x][0] - gridcorrection0;, etc) (xmm0 - xmm4)

677

"subps %%xmm5, %%xmm1\n" // re2 im2 re3 im3 -

678

"movaps %%xmm0, %%xmm4\n" // copy re0+im0 ... into xmm4 and 5, xmm0 & 1 retained

679

"movaps %%xmm1, %%xmm5\n"

680

681

"mulps %%xmm4, %%xmm4\n" //r0i0 r1i1 squared

682

"mulps %%xmm5, %%xmm5\n" //r2i2 r3i3 squared

683

"movaps %%xmm4, %%xmm7\n"

684

"shufps $136, %%xmm5, %%xmm4\n" // xmm7 r0r1 r2r3 [10 00 10 00 = 136]

685

"shufps $221, %%xmm5, %%xmm7\n" // xmm6 i0i1 i2i3 [11 01 11 01 = 221]

686

"addps %%xmm7, %%xmm4\n"

687

688

"addps (%1), %%xmm4\n" // add 1e-15 (xmm4: psd for all 4 pixels)

689

690

//WienerFactor = MAX((psd - sigmaSquaredNoiseNormed)/psd, lowlimit); // limited Wiener filter

691

692

"movaps 32(%1), %%xmm5\n" //sigmaSquaredNoiseNormed in xmm5

693

"movaps %%xmm4, %%xmm6\n" // Copy psd into xmm6

694

"rcpps %%xmm4, %%xmm7\n" // xmm7: (1 / psd)

695

"subps %%xmm5, %%xmm6\n" // xmm6 (psd) - xmm5 (ssnn) xmm5 free

696

"movaps 48(%1), %%xmm5\n" // xmm5 = lowlimit

697

"mulps %%xmm7, %%xmm6\n" // xmm6 = (psd - sigmaSquaredNoiseNormed)/psd

698

"maxps %%xmm6, %%xmm5\n" // xmm6 = Wienerfactor = MAX(xmm6, lowlimit)

699

700

"movaps %%xmm5, %%xmm7\n"

701

"unpcklps %%xmm7, %%xmm7\n" // unpack low to xmm7

702

"unpckhps %%xmm5, %%xmm5\n" // unpack high to xmm5

703

704

"mulps %%xmm7, %%xmm0\n" // re+im *= sfact

705

"mulps %%xmm5, %%xmm1\n" // re+im *= sfact

706

"addps %%xmm2, %%xmm0\n" // add gridcorrection

707

"addps %%xmm3, %%xmm1\n" // add gridcorrection

708

"movaps %%xmm0, (%2)\n" // Store

709

"movaps %%xmm1, 16(%2)\n" // Store

710

"sub $4, %0\n" // size -=4

711

"add $32, %2\n" // outcur+=32

712

"add $32, %3\n" // gridsample+=32

713

"cmp $0, %0\n"

714

"jg loop_wienerdegridnosharpen_sse\n"

715

: /* no output registers */

716

: "r" (size), "r" (temp), "r" (outcur), "r" (gridsample)

717

: /* %0 %1 %2 %3 */

718

);

719

720

}

721

#if defined (__x86_64__)

722

void ComplexWienerFilterDeGrid::processNoSharpen_SSE3( ComplexBlock* block )

723

{

724

fftwf_complex* outcur = block->complex;

725

fftwf_complex* gridsample = grid->complex;

726

float gridfraction = degrid*outcur[0][0]/gridsample[0][0];

727

float* temp = block->temp->data; // Get aligned temp area, at least 256 bytes, only used by this thread.

728

729

for (int i = 0; i < 4; i++) {

730

temp[i+0] = 1e-15f; // 0

731

temp[i+4] = gridfraction; // 16

732

temp[i+8] = sigmaSquaredNoiseNormed; // 32

733

temp[i+12] = lowlimit; // 48

734

}

735

int size = bw*bh;

736

if ((size & 7) == 0) { //TODO: Bench me to see if I'm faster

737

asm volatile

738

(

739

"movaps (%1), %%xmm14\n" // xmm14: 1e-15

740

"movaps 16(%1), %%xmm15\n" // Load gridfraction into xmm15

741

"loop_wienerdegridnosharpen_sse3_big:\n"

742

"movaps (%2), %%xmm0\n" // in r0i0 r1i1

743

"movaps 16(%2), %%xmm1\n" //in r2i2 r3i3

744

"movaps 32(%2), %%xmm8\n" // in r4i4 r5i5

745

"movaps 48(%2), %%xmm9\n" //in r6i6 r7i7

746

"movaps (%3), %%xmm4\n" // grid r0i0 r1i1

747

"movaps 16(%3), %%xmm5\n" // grid r2i2 r3i3

748

"movaps 32(%3), %%xmm10\n" // grid r4i4 r5i5

749

"movaps 48(%3), %%xmm11\n" // grid r6i6 r7i7

750

751

"mulps %%xmm15, %%xmm4\n" //grid r0*gf i0*gf r1*gf i1*gf (xmm4: gridcorrection0 + 1)

752

"mulps %%xmm15, %%xmm5\n" //grid r2*gf i2*gf r3*gf i3*gf (gridfraction*gridsample[x])

753

"mulps %%xmm15, %%xmm10\n" //grid r4*gf i4*gf r5*gf i5*gf

754

"mulps %%xmm15, %%xmm11\n" //grid r6*gf i6*gf r7*gf i7*gf

755

"movaps %%xmm4, %%xmm2\n" // maintain gridcorrection in memory

756

"movaps %%xmm5, %%xmm3\n"

757

"movaps %%xmm10, %%xmm12\n" // maintain gridcorrection in memory

758

"movaps %%xmm11, %%xmm13\n"

759

"subps %%xmm4, %%xmm0\n" // re0 im0 re1 im1 (re = outcur[x][0] - gridcorrection0;, etc) (xmm0 - xmm4)

760

"subps %%xmm5, %%xmm1\n" // re2 im2 re3 im3 -

761

"subps %%xmm10, %%xmm8\n" // re4 im4 re5 im5 (re = outcur[x][0] - gridcorrection0;, etc) (xmm0 - xmm4)

762

"subps %%xmm11, %%xmm9\n" // re6 im6 re7 im7 -

763

"movaps %%xmm0, %%xmm4\n" // copy re0+im0 ... into xmm4 and 5, xmm0 & 1 retained

764

"movaps %%xmm1, %%xmm5\n"

765

"movaps %%xmm8, %%xmm10\n" // copy re4+im4

766

"movaps %%xmm9, %%xmm11\n"

767

768

"mulps %%xmm4, %%xmm4\n" //r0i0 r1i1 squared

769

"mulps %%xmm5, %%xmm5\n" //r2i2 r3i3 squared

770

"mulps %%xmm10, %%xmm10\n" //r4i4 r5i5 squared

771

"mulps %%xmm11, %%xmm11\n" //r6i6 r7i7 squared

772

"haddps %%xmm5, %%xmm4\n" //r0+i0 r1+i1 r2+i2 r3+i3 (all squared) (SSE3!) - xmm 5 free

773

"haddps %%xmm11, %%xmm10\n" //r4+i4 r5+i5 r6+i6 r7+i7 (all squared) (SSE3!) - xmm 11 free

774

775

"addps %%xmm14, %%xmm4\n" // add 1e-15 (xmm4: psd for all 4 pixels)

776

"addps %%xmm14, %%xmm10\n" // add 1e-15 (xmm10: psd for all 4 pixels)

777

778

//WienerFactor = MAX((psd - sigmaSquaredNoiseNormed)/psd, lowlimit); // limited Wiener filter

779

780

"movaps %%xmm4, %%xmm5\n" // Copy psd into xmm5

781

"movaps %%xmm10, %%xmm11\n" // Copy psd into xmm11

782

783

"rcpps %%xmm4, %%xmm4\n" // xmm4: (1 / psd)

784

"movaps 32(%1), %%xmm7\n"

785

"rcpps %%xmm10, %%xmm10\n" // xmm10: (1 / psd)

786

"subps %%xmm7, %%xmm5\n" // xmm5 (psd) - xmm5 (ssnn)

787

"subps %%xmm7, %%xmm11\n" // xmm11 (psd) - xmm5 (ssnn)

788

"movaps 48(%1), %%xmm7\n"

789

"mulps %%xmm4, %%xmm5\n" // xmm6 = (psd - sigmaSquaredNoiseNormed)/psd

790

"mulps %%xmm10, %%xmm11\n" // xmm6 = (psd - sigmaSquaredNoiseNormed)/psd

791

"maxps %%xmm7, %%xmm5\n" // xmm6 = Wienerfactor = MAX(xmm6, lowlimit)

792

"maxps %%xmm7, %%xmm11\n" // xmm6 = Wienerfactor = MAX(xmm6, lowlimit)

793

794

"movaps %%xmm5, %%xmm7\n"

795

"movaps %%xmm11, %%xmm10\n"

796

"unpckhps %%xmm5, %%xmm5\n" // unpack high to xmm5

797

"unpcklps %%xmm7, %%xmm7\n" // unpack low to xmm7

798

"unpckhps %%xmm11, %%xmm11\n" // unpack high to xmm11

799

"unpcklps %%xmm10, %%xmm10\n" // unpack low to xmm10

800

801

"mulps %%xmm7, %%xmm0\n" // re+im *= sfact

802

"mulps %%xmm5, %%xmm1\n" // re+im *= sfact

803

"mulps %%xmm10, %%xmm8\n" // re+im *= sfact

804

"mulps %%xmm11, %%xmm9\n" // re+im *= sfact

805

"addps %%xmm2, %%xmm0\n" // add gridcorrection

806

"addps %%xmm3, %%xmm1\n" // add gridcorrection

807

"addps %%xmm12, %%xmm8\n" // add gridcorrection

808

"addps %%xmm13, %%xmm9\n" // add gridcorrection

809

"movaps %%xmm0, (%2)\n" // Store

810

"movaps %%xmm1, 16(%2)\n" // Store

811

"movaps %%xmm8, 32(%2)\n" // Store

812

"movaps %%xmm9, 48(%2)\n" // Store

813

"sub $8, %0\n" // size -=8

814

"add $64, %2\n" // outcur+=64

815

"add $64, %3\n" // gridsample+=64

816

"cmp $0, %0\n"

817

"jg loop_wienerdegridnosharpen_sse3_big\n"

818

: /* no output registers */

819

: "r" (size), "r" (temp), "r" (outcur), "r" (gridsample)

820

: /* %0 %1 %2 %3 */

821

);

822

} else {

823

asm volatile

824

(

825

"movaps (%1), %%xmm14\n" // xmm14: 1e-15

826

"movaps 16(%1), %%xmm15\n" // Load gridfraction into xmm15

827

"movaps 32(%1), %%xmm13\n" //sigmaSquaredNoiseNormed in xmm13

828

"movaps 48(%1), %%xmm12\n" // xmm12 = lowlimit

829

"loop_wienerdegridnosharpen_sse3:\n"

830

"movaps (%2), %%xmm0\n" // in r0i0 r1i1

831

"movaps 16(%2), %%xmm1\n" //in r2i2 r3i3

832

"movaps (%3), %%xmm4\n" // grid r0i0 r1i1

833

"movaps 16(%3), %%xmm5\n" // grid r2i2 r3i3

834

835

"mulps %%xmm15, %%xmm4\n" //grid r0*gf i0*gf r1*gf i1*gf (xmm4: gridcorrection0 + 1)

836

"mulps %%xmm15, %%xmm5\n" //grid r2*gf i2*gf r3*gf i3*gf (gridfraction*gridsample[x])

837

"movaps %%xmm4, %%xmm2\n" // maintain gridcorrection in memory

838

"movaps %%xmm5, %%xmm3\n"

839

"subps %%xmm4, %%xmm0\n" // re0 im0 re1 im1 (re = outcur[x][0] - gridcorrection0;, etc) (xmm0 - xmm4)

840

"subps %%xmm5, %%xmm1\n" // re2 im2 re3 im3 -

841

"movaps %%xmm0, %%xmm4\n" // copy re0+im0 ... into xmm4 and 5, xmm0 & 1 retained

842

"movaps %%xmm1, %%xmm5\n"

843

844

"mulps %%xmm4, %%xmm4\n" //r0i0 r1i1 squared

845

"mulps %%xmm5, %%xmm5\n" //r2i2 r3i3 squared

846

"haddps %%xmm5, %%xmm4\n" //r0+i0 r1+i1 r2+i2 r3+i3 r4+i4 (all squared) (SSE3!) - xmm 5 free

847

848

"addps %%xmm14, %%xmm4\n" // add 1e-15 (xmm4: psd for all 4 pixels)

849

850

//WienerFactor = MAX((psd - sigmaSquaredNoiseNormed)/psd, lowlimit); // limited Wiener filter

851

852

"movaps %%xmm4, %%xmm6\n" // Copy psd into xmm6

853

"rcpps %%xmm4, %%xmm7\n" // xmm7: (1 / psd)

854

"subps %%xmm13, %%xmm6\n" // xmm6 (psd) - xmm5 (ssnn) xmm5 free

855

"mulps %%xmm7, %%xmm6\n" // xmm6 = (psd - sigmaSquaredNoiseNormed)/psd

856

"maxps %%xmm12, %%xmm6\n" // xmm6 = Wienerfactor = MAX(xmm6, lowlimit)

857

858

"movaps %%xmm6, %%xmm7\n"

859

"unpcklps %%xmm7, %%xmm7\n" // unpack low to xmm7

860

"unpckhps %%xmm6, %%xmm6\n" // unpack high to xmm6

861

862

"mulps %%xmm7, %%xmm0\n" // re+im *= sfact

863

"mulps %%xmm6, %%xmm1\n" // re+im *= sfact

864

"addps %%xmm2, %%xmm0\n" // add gridcorrection

865

"addps %%xmm3, %%xmm1\n" // add gridcorrection

866

"movaps %%xmm0, (%2)\n" // Store

867

"movaps %%xmm1, 16(%2)\n" // Store

868

"sub $4, %0\n" // size -=4

869

"add $32, %2\n" // outcur+=32

870

"add $32, %3\n" // gridsample+=32

871

"cmp $0, %0\n"

872

"jg loop_wienerdegridnosharpen_sse3\n"

873

: /* no output registers */

874

: "r" (size), "r" (temp), "r" (outcur), "r" (gridsample)

875

: /* %0 %1 %2 %3 */

876

);

877

}

878

}

879

880

#else // 32 bits

881

void ComplexWienerFilterDeGrid::processNoSharpen_SSE3( ComplexBlock* block )

882

{

883

fftwf_complex* outcur = block->complex;

884

fftwf_complex* gridsample = grid->complex;

885

float gridfraction = degrid*outcur[0][0]/gridsample[0][0];

886

float* temp = block->temp->data; // Get aligned temp area, at least 256 bytes, only used by this thread.

887

888

for (int i = 0; i < 4; i++) {

889

temp[i+0] = 1e-15f; // 0

890

temp[i+4] = gridfraction; // 16

891

temp[i+8] = sigmaSquaredNoiseNormed; // 32

892

temp[i+12] = lowlimit; // 48

893

}

894

int size = bw*bh;

895

asm volatile

896

(

897

"loop_wienerdegridnosharpen_sse3:\n"

898

"movaps 16(%1),%%xmm6\n" // Load gridfraction into xmm6

899

"movaps (%2), %%xmm0\n" // in r0i0 r1i1

900

"movaps 16(%2), %%xmm1\n" //in r2i2 r3i3

901

"movaps (%3), %%xmm4\n" // grid r0i0 r1i1

902

"movaps 16(%3), %%xmm5\n" // grid r2i2 r3i3

903

904

"mulps %%xmm6, %%xmm4\n" //grid r0*gf i0*gf r1*gf i1*gf (xmm4: gridcorrection0 + 1)

905

"mulps %%xmm6, %%xmm5\n" //grid r2*gf i2*gf r3*gf i3*gf (gridfraction*gridsample[x])

906

"movaps %%xmm4, %%xmm2\n" // maintain gridcorrection in memory

907

"movaps %%xmm5, %%xmm3\n"

908

"subps %%xmm4, %%xmm0\n" // re0 im0 re1 im1 (re = outcur[x][0] - gridcorrection0;, etc) (xmm0 - xmm4)

909

"subps %%xmm5, %%xmm1\n" // re2 im2 re3 im3 -

910

"movaps %%xmm0, %%xmm4\n" // copy re0+im0 ... into xmm4 and 5, xmm0 & 1 retained

911

"movaps %%xmm1, %%xmm5\n"

912

913

"mulps %%xmm4, %%xmm4\n" //r0i0 r1i1 squared

914

"mulps %%xmm5, %%xmm5\n" //r2i2 r3i3 squared

915

"haddps %%xmm5, %%xmm4\n" //r0+i0 r1+i1 r2+i2 r3+i3 r4+i4 (all squared) (SSE3!) - xmm 5 free

916

917

"addps (%1), %%xmm4\n" // add 1e-15 (xmm4: psd for all 4 pixels)

918

919

//WienerFactor = MAX((psd - sigmaSquaredNoiseNormed)/psd, lowlimit); // limited Wiener filter

920

921

"movaps 32(%1), %%xmm5\n" //sigmaSquaredNoiseNormed in xmm5

922

"movaps %%xmm4, %%xmm6\n" // Copy psd into xmm6

923

"rcpps %%xmm4, %%xmm7\n" // xmm7: (1 / psd)

924

"subps %%xmm5, %%xmm6\n" // xmm6 (psd) - xmm5 (ssnn) xmm5 free

925

"movaps 48(%1), %%xmm5\n" // xmm5 = lowlimit

926

"mulps %%xmm7, %%xmm6\n" // xmm6 = (psd - sigmaSquaredNoiseNormed)/psd

927

"maxps %%xmm6, %%xmm5\n" // xmm6 = Wienerfactor = MAX(xmm6, lowlimit)

928

929

"movaps %%xmm5, %%xmm7\n"

930

"unpcklps %%xmm7, %%xmm7\n" // unpack low to xmm7

931

"unpckhps %%xmm5, %%xmm5\n" // unpack high to xmm5

932

933

"mulps %%xmm7, %%xmm0\n" // re+im *= sfact

934

"mulps %%xmm5, %%xmm1\n" // re+im *= sfact

935

"addps %%xmm2, %%xmm0\n" // add gridcorrection

936

"addps %%xmm3, %%xmm1\n" // add gridcorrection

937

"movaps %%xmm0, (%2)\n" // Store

938

"movaps %%xmm1, 16(%2)\n" // Store

939

"sub $4, %0\n" // size -=4

940

"add $32, %2\n" // outcur+=32

941

"add $32, %3\n" // gridsample+=32

942

"cmp $0, %0\n"

943

"jg loop_wienerdegridnosharpen_sse3\n"

944

: /* no output registers */

945

: "r" (size), "r" (temp), "r" (outcur), "r" (gridsample)

946

: /* %0 %1 %2 %3 */

947

);

948

}

949

#endif

950

951

#endif // defined (__i386__) || defined (__x86_64__)

952

953

}}// namespace RawStudio::FFTFilter

Older »