~ubuntu-branches/ubuntu/maverick/ntop/maverick

« back to all changes in this revision

Viewing changes to gdchart0.94c/gd-1.8.3/libpng-1.0.8/pnggccrd.c

Committer: Bazaar Package Importer
Author(s): Dennis Schoen
Date: 2002-04-12 11:38:47 UTC
Revision ID: james.westby@ubuntu.com-20020412113847-4k4yydw0pzybc6g8

Tags: upstream-2.0.0

Import upstream version 2.0.0

files added:

gdchart0.94c

gdchart0.94c/Makefile.in

gdchart0.94c/Makefile.mingw

gdchart0.94c/acconfig.h

gdchart0.94c/acinclude.m4

gdchart0.94c/aclocal.m4

gdchart0.94c/buildAll.sh

gdchart0.94c/config.guess

gdchart0.94c/config.sub

gdchart0.94c/configure

gdchart0.94c/configure.in

gdchart0.94c/gd-1.8.3

gdchart0.94c/gd-1.8.3/Makefile

gdchart0.94c/gd-1.8.3/Makefile.mingw

gdchart0.94c/gd-1.8.3/Makefile.nt

gdchart0.94c/gd-1.8.3/bdftogd

gdchart0.94c/gd-1.8.3/bwtest.png

gdchart0.94c/gd-1.8.3/demoin.png

gdchart0.94c/gd-1.8.3/demoout.png

gdchart0.94c/gd-1.8.3/gd.c

gdchart0.94c/gd-1.8.3/gd.h

gdchart0.94c/gd-1.8.3/gd2copypal.c

gdchart0.94c/gd-1.8.3/gd2time.c

gdchart0.94c/gd-1.8.3/gd2topng.c

gdchart0.94c/gd-1.8.3/gd_gd.c

gdchart0.94c/gd-1.8.3/gd_gd2.c

gdchart0.94c/gd-1.8.3/gd_io.c

gdchart0.94c/gd-1.8.3/gd_io.h

gdchart0.94c/gd-1.8.3/gd_io_dp.c

gdchart0.94c/gd-1.8.3/gd_io_file.c

gdchart0.94c/gd-1.8.3/gd_io_ss.c

gdchart0.94c/gd-1.8.3/gd_jpeg.c

gdchart0.94c/gd-1.8.3/gd_png.c

gdchart0.94c/gd-1.8.3/gd_ss.c

gdchart0.94c/gd-1.8.3/gd_wbmp.c

gdchart0.94c/gd-1.8.3/gdcache.c

gdchart0.94c/gd-1.8.3/gdcache.h

gdchart0.94c/gd-1.8.3/gddemo.c

gdchart0.94c/gd-1.8.3/gdfontg.c

gdchart0.94c/gd-1.8.3/gdfontg.h

gdchart0.94c/gd-1.8.3/gdfontl.c

gdchart0.94c/gd-1.8.3/gdfontl.h

gdchart0.94c/gd-1.8.3/gdfontmb.c

gdchart0.94c/gd-1.8.3/gdfontmb.h

gdchart0.94c/gd-1.8.3/gdfonts.c

gdchart0.94c/gd-1.8.3/gdfonts.h

gdchart0.94c/gd-1.8.3/gdfontt.c

gdchart0.94c/gd-1.8.3/gdfontt.h

gdchart0.94c/gd-1.8.3/gdkanji.c

gdchart0.94c/gd-1.8.3/gdparttopng.c

gdchart0.94c/gd-1.8.3/gdtables.c

gdchart0.94c/gd-1.8.3/gdtest.c

gdchart0.94c/gd-1.8.3/gdtestttf.c

gdchart0.94c/gd-1.8.3/gdtopng.c

gdchart0.94c/gd-1.8.3/gdttf.c

gdchart0.94c/gd-1.8.3/gdxpm.c

gdchart0.94c/gd-1.8.3/index.html

gdchart0.94c/gd-1.8.3/install-item

gdchart0.94c/gd-1.8.3/jisx0208.h

gdchart0.94c/gd-1.8.3/libpng-1.0.8

gdchart0.94c/gd-1.8.3/libpng-1.0.8/ANNOUNCE

gdchart0.94c/gd-1.8.3/libpng-1.0.8/CHANGES

gdchart0.94c/gd-1.8.3/libpng-1.0.8/INSTALL

gdchart0.94c/gd-1.8.3/libpng-1.0.8/KNOWNBUG

gdchart0.94c/gd-1.8.3/libpng-1.0.8/LICENSE

gdchart0.94c/gd-1.8.3/libpng-1.0.8/README

gdchart0.94c/gd-1.8.3/libpng-1.0.8/TODO

gdchart0.94c/gd-1.8.3/libpng-1.0.8/Y2KINFO

gdchart0.94c/gd-1.8.3/libpng-1.0.8/configure

gdchart0.94c/gd-1.8.3/libpng-1.0.8/contrib

gdchart0.94c/gd-1.8.3/libpng-1.0.8/contrib/gregbook

gdchart0.94c/gd-1.8.3/libpng-1.0.8/contrib/gregbook/LICENSE

gdchart0.94c/gd-1.8.3/libpng-1.0.8/contrib/gregbook/Makefile.unx

gdchart0.94c/gd-1.8.3/libpng-1.0.8/contrib/gregbook/Makefile.w32

gdchart0.94c/gd-1.8.3/libpng-1.0.8/contrib/gregbook/README

gdchart0.94c/gd-1.8.3/libpng-1.0.8/contrib/gregbook/README.w32

gdchart0.94c/gd-1.8.3/libpng-1.0.8/contrib/gregbook/makevms.com

gdchart0.94c/gd-1.8.3/libpng-1.0.8/contrib/gregbook/readpng.c

gdchart0.94c/gd-1.8.3/libpng-1.0.8/contrib/gregbook/readpng.h

gdchart0.94c/gd-1.8.3/libpng-1.0.8/contrib/gregbook/readpng2.c

gdchart0.94c/gd-1.8.3/libpng-1.0.8/contrib/gregbook/readpng2.h

gdchart0.94c/gd-1.8.3/libpng-1.0.8/contrib/gregbook/rpng-win.c

gdchart0.94c/gd-1.8.3/libpng-1.0.8/contrib/gregbook/rpng-x.c

gdchart0.94c/gd-1.8.3/libpng-1.0.8/contrib/gregbook/rpng2-win.c

gdchart0.94c/gd-1.8.3/libpng-1.0.8/contrib/gregbook/rpng2-x.c

gdchart0.94c/gd-1.8.3/libpng-1.0.8/contrib/gregbook/toucan.png

gdchart0.94c/gd-1.8.3/libpng-1.0.8/contrib/gregbook/wpng.c

gdchart0.94c/gd-1.8.3/libpng-1.0.8/contrib/gregbook/writepng.c

gdchart0.94c/gd-1.8.3/libpng-1.0.8/contrib/gregbook/writepng.h

gdchart0.94c/gd-1.8.3/libpng-1.0.8/contrib/msvctest

gdchart0.94c/gd-1.8.3/libpng-1.0.8/contrib/msvctest/README.txt

gdchart0.94c/gd-1.8.3/libpng-1.0.8/contrib/msvctest/msvctest.dsp

gdchart0.94c/gd-1.8.3/libpng-1.0.8/contrib/msvctest/msvctest.dsw

gdchart0.94c/gd-1.8.3/libpng-1.0.8/contrib/pngminus

gdchart0.94c/gd-1.8.3/libpng-1.0.8/contrib/pngminus/README

gdchart0.94c/gd-1.8.3/libpng-1.0.8/contrib/pngminus/makefile.std

gdchart0.94c/gd-1.8.3/libpng-1.0.8/contrib/pngminus/makefile.tc3

gdchart0.94c/gd-1.8.3/libpng-1.0.8/contrib/pngminus/makevms.com

gdchart0.94c/gd-1.8.3/libpng-1.0.8/contrib/pngminus/png2pnm.bat

gdchart0.94c/gd-1.8.3/libpng-1.0.8/contrib/pngminus/png2pnm.c

gdchart0.94c/gd-1.8.3/libpng-1.0.8/contrib/pngminus/png2pnm.sh

gdchart0.94c/gd-1.8.3/libpng-1.0.8/contrib/pngminus/pngminus.bat

gdchart0.94c/gd-1.8.3/libpng-1.0.8/contrib/pngminus/pngminus.sh

gdchart0.94c/gd-1.8.3/libpng-1.0.8/contrib/pngminus/pnm2png.bat

gdchart0.94c/gd-1.8.3/libpng-1.0.8/contrib/pngminus/pnm2png.c

gdchart0.94c/gd-1.8.3/libpng-1.0.8/contrib/pngminus/pnm2png.sh

gdchart0.94c/gd-1.8.3/libpng-1.0.8/contrib/pngsuite

gdchart0.94c/gd-1.8.3/libpng-1.0.8/contrib/pngsuite/README

gdchart0.94c/gd-1.8.3/libpng-1.0.8/contrib/pngsuite/basn0g01.png

gdchart0.94c/gd-1.8.3/libpng-1.0.8/contrib/pngsuite/basn0g02.png

gdchart0.94c/gd-1.8.3/libpng-1.0.8/contrib/pngsuite/basn0g04.png

gdchart0.94c/gd-1.8.3/libpng-1.0.8/contrib/pngsuite/basn0g08.png

gdchart0.94c/gd-1.8.3/libpng-1.0.8/contrib/pngsuite/basn0g16.png

gdchart0.94c/gd-1.8.3/libpng-1.0.8/contrib/pngsuite/basn2c08.png

gdchart0.94c/gd-1.8.3/libpng-1.0.8/contrib/pngsuite/basn2c16.png

gdchart0.94c/gd-1.8.3/libpng-1.0.8/contrib/pngsuite/basn3p01.png

gdchart0.94c/gd-1.8.3/libpng-1.0.8/contrib/pngsuite/basn3p02.png

gdchart0.94c/gd-1.8.3/libpng-1.0.8/contrib/pngsuite/basn3p04.png

gdchart0.94c/gd-1.8.3/libpng-1.0.8/contrib/pngsuite/basn3p08.png

gdchart0.94c/gd-1.8.3/libpng-1.0.8/contrib/pngsuite/basn4a08.png

gdchart0.94c/gd-1.8.3/libpng-1.0.8/contrib/pngsuite/basn4a16.png

gdchart0.94c/gd-1.8.3/libpng-1.0.8/contrib/pngsuite/basn6a08.png

gdchart0.94c/gd-1.8.3/libpng-1.0.8/contrib/pngsuite/basn6a16.png

gdchart0.94c/gd-1.8.3/libpng-1.0.8/contrib/visupng

gdchart0.94c/gd-1.8.3/libpng-1.0.8/contrib/visupng/PngFile.c

gdchart0.94c/gd-1.8.3/libpng-1.0.8/contrib/visupng/PngFile.h

gdchart0.94c/gd-1.8.3/libpng-1.0.8/contrib/visupng/README.txt

gdchart0.94c/gd-1.8.3/libpng-1.0.8/contrib/visupng/VisualPng.c

gdchart0.94c/gd-1.8.3/libpng-1.0.8/contrib/visupng/VisualPng.dsp

gdchart0.94c/gd-1.8.3/libpng-1.0.8/contrib/visupng/VisualPng.dsw

gdchart0.94c/gd-1.8.3/libpng-1.0.8/contrib/visupng/VisualPng.ico

gdchart0.94c/gd-1.8.3/libpng-1.0.8/contrib/visupng/VisualPng.png

gdchart0.94c/gd-1.8.3/libpng-1.0.8/contrib/visupng/VisualPng.rc

gdchart0.94c/gd-1.8.3/libpng-1.0.8/contrib/visupng/cexcept.h

gdchart0.94c/gd-1.8.3/libpng-1.0.8/contrib/visupng/resource.h

gdchart0.94c/gd-1.8.3/libpng-1.0.8/example.c

gdchart0.94c/gd-1.8.3/libpng-1.0.8/libpng.3

gdchart0.94c/gd-1.8.3/libpng-1.0.8/libpng.txt

gdchart0.94c/gd-1.8.3/libpng-1.0.8/libpngpf.3

gdchart0.94c/gd-1.8.3/libpng-1.0.8/msvc

gdchart0.94c/gd-1.8.3/libpng-1.0.8/msvc/README.txt

gdchart0.94c/gd-1.8.3/libpng-1.0.8/msvc/libpng.dsp

gdchart0.94c/gd-1.8.3/libpng-1.0.8/msvc/libpng.dsw

gdchart0.94c/gd-1.8.3/libpng-1.0.8/msvc/png.rc

gdchart0.94c/gd-1.8.3/libpng-1.0.8/msvc/png32ms.def

gdchart0.94c/gd-1.8.3/libpng-1.0.8/msvc/zlib.def

gdchart0.94c/gd-1.8.3/libpng-1.0.8/msvc/zlib.dsp

gdchart0.94c/gd-1.8.3/libpng-1.0.8/png.5

gdchart0.94c/gd-1.8.3/libpng-1.0.8/png.c

gdchart0.94c/gd-1.8.3/libpng-1.0.8/png.h

gdchart0.94c/gd-1.8.3/libpng-1.0.8/pngasmrd.h

gdchart0.94c/gd-1.8.3/libpng-1.0.8/pngbar.jpg

gdchart0.94c/gd-1.8.3/libpng-1.0.8/pngbar.png

gdchart0.94c/gd-1.8.3/libpng-1.0.8/pngconf.h

gdchart0.94c/gd-1.8.3/libpng-1.0.8/pngerror.c

gdchart0.94c/gd-1.8.3/libpng-1.0.8/pnggccrd.c

gdchart0.94c/gd-1.8.3/libpng-1.0.8/pngget.c

gdchart0.94c/gd-1.8.3/libpng-1.0.8/pngmem.c

gdchart0.94c/gd-1.8.3/libpng-1.0.8/pngnow.png

gdchart0.94c/gd-1.8.3/libpng-1.0.8/pngpread.c

gdchart0.94c/gd-1.8.3/libpng-1.0.8/pngread.c

gdchart0.94c/gd-1.8.3/libpng-1.0.8/pngrio.c

gdchart0.94c/gd-1.8.3/libpng-1.0.8/pngrtran.c

gdchart0.94c/gd-1.8.3/libpng-1.0.8/pngrutil.c

gdchart0.94c/gd-1.8.3/libpng-1.0.8/pngset.c

gdchart0.94c/gd-1.8.3/libpng-1.0.8/pngtest.c

gdchart0.94c/gd-1.8.3/libpng-1.0.8/pngtest.png

gdchart0.94c/gd-1.8.3/libpng-1.0.8/pngtrans.c

gdchart0.94c/gd-1.8.3/libpng-1.0.8/pngvcrd.c

gdchart0.94c/gd-1.8.3/libpng-1.0.8/pngwio.c

gdchart0.94c/gd-1.8.3/libpng-1.0.8/pngwrite.c

gdchart0.94c/gd-1.8.3/libpng-1.0.8/pngwtran.c

gdchart0.94c/gd-1.8.3/libpng-1.0.8/pngwutil.c

gdchart0.94c/gd-1.8.3/libpng-1.0.8/scripts

gdchart0.94c/gd-1.8.3/libpng-1.0.8/scripts/SCOPTIONS.ppc

gdchart0.94c/gd-1.8.3/libpng-1.0.8/scripts/descrip.mms

gdchart0.94c/gd-1.8.3/libpng-1.0.8/scripts/libpng.icc

gdchart0.94c/gd-1.8.3/libpng-1.0.8/scripts/makefile.acorn

gdchart0.94c/gd-1.8.3/libpng-1.0.8/scripts/makefile.amiga

gdchart0.94c/gd-1.8.3/libpng-1.0.8/scripts/makefile.atari

gdchart0.94c/gd-1.8.3/libpng-1.0.8/scripts/makefile.bc32

gdchart0.94c/gd-1.8.3/libpng-1.0.8/scripts/makefile.bd32

gdchart0.94c/gd-1.8.3/libpng-1.0.8/scripts/makefile.beos

gdchart0.94c/gd-1.8.3/libpng-1.0.8/scripts/makefile.bor

gdchart0.94c/gd-1.8.3/libpng-1.0.8/scripts/makefile.cygwin

gdchart0.94c/gd-1.8.3/libpng-1.0.8/scripts/makefile.darwin

gdchart0.94c/gd-1.8.3/libpng-1.0.8/scripts/makefile.dec

gdchart0.94c/gd-1.8.3/libpng-1.0.8/scripts/makefile.dj2

gdchart0.94c/gd-1.8.3/libpng-1.0.8/scripts/makefile.gcc

gdchart0.94c/gd-1.8.3/libpng-1.0.8/scripts/makefile.gcmmx

gdchart0.94c/gd-1.8.3/libpng-1.0.8/scripts/makefile.hpux

gdchart0.94c/gd-1.8.3/libpng-1.0.8/scripts/makefile.ibmc

gdchart0.94c/gd-1.8.3/libpng-1.0.8/scripts/makefile.intel

gdchart0.94c/gd-1.8.3/libpng-1.0.8/scripts/makefile.knr

gdchart0.94c/gd-1.8.3/libpng-1.0.8/scripts/makefile.linux

gdchart0.94c/gd-1.8.3/libpng-1.0.8/scripts/makefile.mingw

gdchart0.94c/gd-1.8.3/libpng-1.0.8/scripts/makefile.mips

gdchart0.94c/gd-1.8.3/libpng-1.0.8/scripts/makefile.msc

gdchart0.94c/gd-1.8.3/libpng-1.0.8/scripts/makefile.os2

gdchart0.94c/gd-1.8.3/libpng-1.0.8/scripts/makefile.sco

gdchart0.94c/gd-1.8.3/libpng-1.0.8/scripts/makefile.sggcc

gdchart0.94c/gd-1.8.3/libpng-1.0.8/scripts/makefile.sgi

gdchart0.94c/gd-1.8.3/libpng-1.0.8/scripts/makefile.solaris

gdchart0.94c/gd-1.8.3/libpng-1.0.8/scripts/makefile.std

gdchart0.94c/gd-1.8.3/libpng-1.0.8/scripts/makefile.sunos

gdchart0.94c/gd-1.8.3/libpng-1.0.8/scripts/makefile.tc3

gdchart0.94c/gd-1.8.3/libpng-1.0.8/scripts/makefile.vcawin32

gdchart0.94c/gd-1.8.3/libpng-1.0.8/scripts/makefile.vcwin32

gdchart0.94c/gd-1.8.3/libpng-1.0.8/scripts/makefile.watcom

gdchart0.94c/gd-1.8.3/libpng-1.0.8/scripts/makevms.com

gdchart0.94c/gd-1.8.3/libpng-1.0.8/scripts/pngdef.pas

gdchart0.94c/gd-1.8.3/libpng-1.0.8/scripts/pngos2.def

gdchart0.94c/gd-1.8.3/libpng-1.0.8/scripts/smakefile.ppc

gdchart0.94c/gd-1.8.3/libpng-1.0.8/wince

gdchart0.94c/gd-1.8.3/libpng-1.0.8/wince/READMEE.WCE

gdchart0.94c/gd-1.8.3/libpng-1.0.8/wince/READMEJ.WCE

gdchart0.94c/gd-1.8.3/libpng-1.0.8/wince/READMEZE.WCE

gdchart0.94c/gd-1.8.3/libpng-1.0.8/wince/READMEZJ.WCE

gdchart0.94c/gd-1.8.3/libpng-1.0.8/wince/lpngce.dsp

gdchart0.94c/gd-1.8.3/libpng-1.0.8/wince/lpngce.dsw

gdchart0.94c/gd-1.8.3/libpng-1.0.8/wince/png32ce.def

gdchart0.94c/gd-1.8.3/libpng-1.0.8/wince/pngtest.dsp

gdchart0.94c/gd-1.8.3/libpng-1.0.8/wince/pngtest.dsw

gdchart0.94c/gd-1.8.3/libpng-1.0.8/wince/winmain.c

gdchart0.94c/gd-1.8.3/libpng-1.0.8/wince/zlib.diff

gdchart0.94c/gd-1.8.3/libpng-1.0.8/wince/zlibce.def

gdchart0.94c/gd-1.8.3/libpng-1.0.8/wince/zlibce.dsp

gdchart0.94c/gd-1.8.3/mathmake.c

gdchart0.94c/gd-1.8.3/pngtogd.c

gdchart0.94c/gd-1.8.3/pngtogd2.c

gdchart0.94c/gd-1.8.3/readme.txt

gdchart0.94c/gd-1.8.3/test

gdchart0.94c/gd-1.8.3/test/gdtest.gd2

gdchart0.94c/gd-1.8.3/test/gdtest.png

gdchart0.94c/gd-1.8.3/test/gdtest_200_300_150_100.png

gdchart0.94c/gd-1.8.3/test/gdtest_merge.png

gdchart0.94c/gd-1.8.3/test/gdtest_wbmp_to_png.png

gdchart0.94c/gd-1.8.3/wbmp.c

gdchart0.94c/gd-1.8.3/wbmp.h

gdchart0.94c/gd-1.8.3/webpng.c

gdchart0.94c/gdc.c

gdchart0.94c/gdc.h

gdchart0.94c/gdc_pie.c

gdchart0.94c/gdchart.c

gdchart0.94c/gdchart.h

gdchart0.94c/gdcpie.h

gdchart0.94c/gifencode.c

gdchart0.94c/gifencode.h

gdchart0.94c/install-sh

gdchart0.94c/libtool

gdchart0.94c/ltconfig

gdchart0.94c/ltmain.sh

gdchart0.94c/price_conv.c

gdchart0.94c/zlib-1.1.3

gdchart0.94c/zlib-1.1.3/ChangeLog

gdchart0.94c/zlib-1.1.3/FAQ

gdchart0.94c/zlib-1.1.3/INDEX

gdchart0.94c/zlib-1.1.3/Make_vms.com

gdchart0.94c/zlib-1.1.3/Makefile

gdchart0.94c/zlib-1.1.3/Makefile.in

gdchart0.94c/zlib-1.1.3/Makefile.mingw

gdchart0.94c/zlib-1.1.3/Makefile.riscos

gdchart0.94c/zlib-1.1.3/README

gdchart0.94c/zlib-1.1.3/adler32.c

gdchart0.94c/zlib-1.1.3/algorithm.txt

gdchart0.94c/zlib-1.1.3/amiga

gdchart0.94c/zlib-1.1.3/amiga/Makefile.pup

gdchart0.94c/zlib-1.1.3/amiga/Makefile.sas

gdchart0.94c/zlib-1.1.3/compress.c

gdchart0.94c/zlib-1.1.3/configure

gdchart0.94c/zlib-1.1.3/contrib

gdchart0.94c/zlib-1.1.3/contrib/README.contrib

gdchart0.94c/zlib-1.1.3/contrib/asm386

gdchart0.94c/zlib-1.1.3/contrib/asm386/gvmat32.asm

gdchart0.94c/zlib-1.1.3/contrib/asm386/gvmat32c.c

gdchart0.94c/zlib-1.1.3/contrib/asm386/mkgvmt32.bat

gdchart0.94c/zlib-1.1.3/contrib/asm386/zlibvc.def

gdchart0.94c/zlib-1.1.3/contrib/asm386/zlibvc.dsp

gdchart0.94c/zlib-1.1.3/contrib/asm386/zlibvc.dsw

gdchart0.94c/zlib-1.1.3/contrib/asm586

gdchart0.94c/zlib-1.1.3/contrib/asm586/README.586

gdchart0.94c/zlib-1.1.3/contrib/asm586/match.S

gdchart0.94c/zlib-1.1.3/contrib/asm686

gdchart0.94c/zlib-1.1.3/contrib/asm686/README.686

gdchart0.94c/zlib-1.1.3/contrib/asm686/match.S

gdchart0.94c/zlib-1.1.3/contrib/delphi

gdchart0.94c/zlib-1.1.3/contrib/delphi/zlib.mak

gdchart0.94c/zlib-1.1.3/contrib/delphi/zlibdef.pas

gdchart0.94c/zlib-1.1.3/contrib/delphi2

gdchart0.94c/zlib-1.1.3/contrib/delphi2/d_zlib.bpr

gdchart0.94c/zlib-1.1.3/contrib/delphi2/d_zlib.cpp

gdchart0.94c/zlib-1.1.3/contrib/delphi2/readme.txt

gdchart0.94c/zlib-1.1.3/contrib/delphi2/zlib.bpg

gdchart0.94c/zlib-1.1.3/contrib/delphi2/zlib.bpr

gdchart0.94c/zlib-1.1.3/contrib/delphi2/zlib.cpp

gdchart0.94c/zlib-1.1.3/contrib/delphi2/zlib.pas

gdchart0.94c/zlib-1.1.3/contrib/delphi2/zlib32.bpr

gdchart0.94c/zlib-1.1.3/contrib/delphi2/zlib32.cpp

gdchart0.94c/zlib-1.1.3/contrib/iostream

gdchart0.94c/zlib-1.1.3/contrib/iostream/test.cpp

gdchart0.94c/zlib-1.1.3/contrib/iostream/zfstream.cpp

gdchart0.94c/zlib-1.1.3/contrib/iostream/zfstream.h

gdchart0.94c/zlib-1.1.3/contrib/iostream2

gdchart0.94c/zlib-1.1.3/contrib/iostream2/zstream.h

gdchart0.94c/zlib-1.1.3/contrib/iostream2/zstream_test.cpp

gdchart0.94c/zlib-1.1.3/contrib/minizip

gdchart0.94c/zlib-1.1.3/contrib/minizip/ChangeLogUnzip

gdchart0.94c/zlib-1.1.3/contrib/minizip/Makefile

gdchart0.94c/zlib-1.1.3/contrib/minizip/miniunz.c

gdchart0.94c/zlib-1.1.3/contrib/minizip/minizip.c

gdchart0.94c/zlib-1.1.3/contrib/minizip/readme.txt

gdchart0.94c/zlib-1.1.3/contrib/minizip/unzip.c

gdchart0.94c/zlib-1.1.3/contrib/minizip/unzip.def

gdchart0.94c/zlib-1.1.3/contrib/minizip/unzip.h

gdchart0.94c/zlib-1.1.3/contrib/minizip/zip.c

gdchart0.94c/zlib-1.1.3/contrib/minizip/zip.def

gdchart0.94c/zlib-1.1.3/contrib/minizip/zip.h

gdchart0.94c/zlib-1.1.3/contrib/minizip/zlibvc.def

gdchart0.94c/zlib-1.1.3/contrib/minizip/zlibvc.dsp

gdchart0.94c/zlib-1.1.3/contrib/minizip/zlibvc.dsw

gdchart0.94c/zlib-1.1.3/contrib/untgz

gdchart0.94c/zlib-1.1.3/contrib/untgz/Makefile

gdchart0.94c/zlib-1.1.3/contrib/untgz/makefile.w32

gdchart0.94c/zlib-1.1.3/contrib/untgz/untgz.c

gdchart0.94c/zlib-1.1.3/contrib/visual-basic.txt

gdchart0.94c/zlib-1.1.3/crc32.c

gdchart0.94c/zlib-1.1.3/deflate.c

gdchart0.94c/zlib-1.1.3/deflate.h

gdchart0.94c/zlib-1.1.3/descrip.mms

gdchart0.94c/zlib-1.1.3/example.c

gdchart0.94c/zlib-1.1.3/gzio.c

gdchart0.94c/zlib-1.1.3/infblock.c

gdchart0.94c/zlib-1.1.3/infblock.h

gdchart0.94c/zlib-1.1.3/infcodes.c

gdchart0.94c/zlib-1.1.3/infcodes.h

gdchart0.94c/zlib-1.1.3/inffast.c

gdchart0.94c/zlib-1.1.3/inffast.h

gdchart0.94c/zlib-1.1.3/inffixed.h

gdchart0.94c/zlib-1.1.3/inflate.c

gdchart0.94c/zlib-1.1.3/inftrees.c

gdchart0.94c/zlib-1.1.3/inftrees.h

gdchart0.94c/zlib-1.1.3/infutil.c

gdchart0.94c/zlib-1.1.3/infutil.h

gdchart0.94c/zlib-1.1.3/maketree.c

gdchart0.94c/zlib-1.1.3/minigzip.c

gdchart0.94c/zlib-1.1.3/msdos

gdchart0.94c/zlib-1.1.3/msdos/Makefile.b32

gdchart0.94c/zlib-1.1.3/msdos/Makefile.bor

gdchart0.94c/zlib-1.1.3/msdos/Makefile.dj2

gdchart0.94c/zlib-1.1.3/msdos/Makefile.emx

gdchart0.94c/zlib-1.1.3/msdos/Makefile.msc

gdchart0.94c/zlib-1.1.3/msdos/Makefile.tc

gdchart0.94c/zlib-1.1.3/msdos/Makefile.w32

gdchart0.94c/zlib-1.1.3/msdos/Makefile.wat

gdchart0.94c/zlib-1.1.3/msdos/zlib.def

gdchart0.94c/zlib-1.1.3/msdos/zlib.rc

gdchart0.94c/zlib-1.1.3/nt

gdchart0.94c/zlib-1.1.3/nt/Makefile.emx

gdchart0.94c/zlib-1.1.3/nt/Makefile.gcc

gdchart0.94c/zlib-1.1.3/nt/Makefile.nt

gdchart0.94c/zlib-1.1.3/nt/zlib.dnt

gdchart0.94c/zlib-1.1.3/os2

gdchart0.94c/zlib-1.1.3/os2/Makefile.os2

gdchart0.94c/zlib-1.1.3/os2/zlib.def

gdchart0.94c/zlib-1.1.3/trees.c

gdchart0.94c/zlib-1.1.3/trees.h

gdchart0.94c/zlib-1.1.3/uncompr.c

gdchart0.94c/zlib-1.1.3/zconf.h

gdchart0.94c/zlib-1.1.3/zlib.3

gdchart0.94c/zlib-1.1.3/zlib.h

gdchart0.94c/zlib-1.1.3/zutil.c

gdchart0.94c/zlib-1.1.3/zutil.h

ntop

ntop/AUTHORS

ntop/CONTENTS

ntop/COPYING

ntop/ChangeLog

ntop/INSTALL

ntop/MANIFESTO

ntop/Makefile.am

ntop/Makefile.in

ntop/Makefile.mingw

ntop/NEWS

ntop/PORTING

ntop/README

ntop/SUPPORT_NTOP.txt

ntop/THANKS

ntop/acconfig.h

ntop/acinclude.m4

ntop/acinclude.m4.in

ntop/aclocal.m4

ntop/address.c

ntop/admin.c

ntop/autogen.sh

ntop/config.guess

ntop/config.h.in

ntop/config.sub

ntop/configure

ntop/configure.in

ntop/dataFormat.c

ntop/database

ntop/database/FILES

ntop/database/Makefile

ntop/database/MsAccessDB

ntop/database/MsAccessDB/ntop.mdb

ntop/database/MySQLServer.java

ntop/database/ODBCServer.java

ntop/database/mySQLdefs.txt

ntop/database/mySQLserver.pl

ntop/database/pg_SQLdefs.txt

ntop/database/startClient

ntop/docs

ntop/docs/BUILD-MinGW.txt

ntop/docs/BUILD-NTOP.txt

ntop/docs/FAQ

ntop/docs/FILES

ntop/docs/HACKING

ntop/docs/INSTALL

ntop/docs/KNOWN_BUGS

ntop/docs/README

ntop/docs/README.SSL

ntop/docs/README.Suse

ntop/docs/RMON

ntop/docs/RMON/README.RMON

ntop/docs/THREADS-FAQ

ntop/docs/TODO

ntop/docs/database

ntop/docs/database/README

ntop/docs/database/README.mySQL

ntop/emitter.c

ntop/event.c

ntop/globals-core.c

ntop/globals-core.h

ntop/globals-report.c

ntop/globals-report.h

ntop/graph.c

ntop/hash.c

ntop/html

ntop/html/About.html

ntop/html/Admin.html

ntop/html/Copyright.html

ntop/html/Corner.gif

ntop/html/DataRcvd.html

ntop/html/DataSent.html

ntop/html/IPProtocols.html

ntop/html/IPTraffic.html

ntop/html/Risk_high.gif

ntop/html/Risk_low.gif

ntop/html/Risk_medium.gif

ntop/html/Risk_unknown.gif

ntop/html/Stats.html

ntop/html/arrow_down.gif

ntop/html/arrow_up.gif

ntop/html/bar.gif

ntop/html/bg_line.gif

ntop/html/black.gif

ntop/html/bottom_corner.gif

ntop/html/bulb.gif

ntop/html/card.gif

ntop/html/child.gif

ntop/html/clear.gif

ntop/html/collapsed.gif

ntop/html/copyright.gif

ntop/html/deleteURL.gif

ntop/html/deleteUser.gif

ntop/html/dns.gif

ntop/html/error.gif

ntop/html/expanded.gif

ntop/html/gauge.jpg

ntop/html/gray_bg.gif

ntop/html/help.html

ntop/html/index.html

ntop/html/index_inner.html

ntop/html/index_left.html

ntop/html/index_top.html

ntop/html/info.gif

ntop/html/mail.gif

ntop/html/major.gif

ntop/html/menubar.png

ntop/html/menuline.png

ntop/html/minor.gif

ntop/html/modifyUser.gif

ntop/html/multihomed.gif

ntop/html/ntop.gif

ntop/html/ntop.html

ntop/html/printer.gif

ntop/html/question.gif

ntop/html/router.gif

ntop/html/statsicons

ntop/html/statsicons/flags

ntop/html/statsicons/flags/ad.gif

ntop/html/statsicons/flags/ae.gif

ntop/html/statsicons/flags/af.gif

ntop/html/statsicons/flags/ag.gif

ntop/html/statsicons/flags/ai.gif

ntop/html/statsicons/flags/al.gif

ntop/html/statsicons/flags/am.gif

ntop/html/statsicons/flags/an.gif

ntop/html/statsicons/flags/ao.gif

ntop/html/statsicons/flags/aq.gif

ntop/html/statsicons/flags/ar.gif

ntop/html/statsicons/flags/as.gif

ntop/html/statsicons/flags/at.gif

ntop/html/statsicons/flags/au.gif

ntop/html/statsicons/flags/aw.gif

ntop/html/statsicons/flags/az.gif

ntop/html/statsicons/flags/ba.gif

ntop/html/statsicons/flags/bb.gif

ntop/html/statsicons/flags/bd.gif

ntop/html/statsicons/flags/be.gif

ntop/html/statsicons/flags/bf.gif

ntop/html/statsicons/flags/bg.gif

ntop/html/statsicons/flags/bh.gif

ntop/html/statsicons/flags/bi.gif

ntop/html/statsicons/flags/bj.gif

ntop/html/statsicons/flags/bm.gif

ntop/html/statsicons/flags/bn.gif

ntop/html/statsicons/flags/bo.gif

ntop/html/statsicons/flags/br.gif

ntop/html/statsicons/flags/bs.gif

ntop/html/statsicons/flags/bt.gif

ntop/html/statsicons/flags/bv.gif

ntop/html/statsicons/flags/bw.gif

ntop/html/statsicons/flags/by.gif

ntop/html/statsicons/flags/bz.gif

ntop/html/statsicons/flags/ca.gif

ntop/html/statsicons/flags/cc.gif

ntop/html/statsicons/flags/cd.gif

ntop/html/statsicons/flags/cf.gif

ntop/html/statsicons/flags/cg.gif

ntop/html/statsicons/flags/ch.gif

ntop/html/statsicons/flags/ci.gif

ntop/html/statsicons/flags/ck.gif

ntop/html/statsicons/flags/cl.gif

ntop/html/statsicons/flags/cm.gif

ntop/html/statsicons/flags/cn.gif

ntop/html/statsicons/flags/co.gif

ntop/html/statsicons/flags/com.gif

ntop/html/statsicons/flags/cr.gif

ntop/html/statsicons/flags/cu.gif

ntop/html/statsicons/flags/cv.gif

ntop/html/statsicons/flags/cx.gif

ntop/html/statsicons/flags/cy.gif

ntop/html/statsicons/flags/cz.gif

ntop/html/statsicons/flags/de.gif

ntop/html/statsicons/flags/dj.gif

ntop/html/statsicons/flags/dk.gif

ntop/html/statsicons/flags/dm.gif

ntop/html/statsicons/flags/do.gif

ntop/html/statsicons/flags/dz.gif

ntop/html/statsicons/flags/ec.gif

ntop/html/statsicons/flags/edu.gif

ntop/html/statsicons/flags/ee.gif

ntop/html/statsicons/flags/eg.gif

ntop/html/statsicons/flags/eh.gif

ntop/html/statsicons/flags/es.gif

ntop/html/statsicons/flags/et.gif

ntop/html/statsicons/flags/fi.gif

ntop/html/statsicons/flags/fj.gif

ntop/html/statsicons/flags/fk.gif

ntop/html/statsicons/flags/fm.gif

ntop/html/statsicons/flags/fo.gif

ntop/html/statsicons/flags/fr.gif

ntop/html/statsicons/flags/fx.gif

ntop/html/statsicons/flags/ga.gif

ntop/html/statsicons/flags/gb.gif

ntop/html/statsicons/flags/gd.gif

ntop/html/statsicons/flags/ge.gif

ntop/html/statsicons/flags/gf.gif

ntop/html/statsicons/flags/gh.gif

ntop/html/statsicons/flags/gi.gif

ntop/html/statsicons/flags/gl.gif

ntop/html/statsicons/flags/gm.gif

ntop/html/statsicons/flags/gn.gif

ntop/html/statsicons/flags/gov.gif

ntop/html/statsicons/flags/gp.gif

ntop/html/statsicons/flags/gq.gif

ntop/html/statsicons/flags/gr.gif

ntop/html/statsicons/flags/gt.gif

ntop/html/statsicons/flags/gu.gif

ntop/html/statsicons/flags/gw.gif

ntop/html/statsicons/flags/gy.gif

ntop/html/statsicons/flags/hk.gif

ntop/html/statsicons/flags/hm.gif

ntop/html/statsicons/flags/hn.gif

ntop/html/statsicons/flags/hr.gif

ntop/html/statsicons/flags/ht.gif

ntop/html/statsicons/flags/hu.gif

ntop/html/statsicons/flags/id.gif

ntop/html/statsicons/flags/ie.gif

ntop/html/statsicons/flags/il.gif

ntop/html/statsicons/flags/in.gif

ntop/html/statsicons/flags/int.gif

ntop/html/statsicons/flags/io.gif

ntop/html/statsicons/flags/iq.gif

ntop/html/statsicons/flags/ir.gif

ntop/html/statsicons/flags/is.gif

ntop/html/statsicons/flags/it.gif

ntop/html/statsicons/flags/jm.gif

ntop/html/statsicons/flags/jo.gif

ntop/html/statsicons/flags/jp.gif

ntop/html/statsicons/flags/ke.gif

ntop/html/statsicons/flags/kg.gif

ntop/html/statsicons/flags/kh.gif

ntop/html/statsicons/flags/ki.gif

ntop/html/statsicons/flags/km.gif

ntop/html/statsicons/flags/kn.gif

ntop/html/statsicons/flags/kp.gif

ntop/html/statsicons/flags/kr.gif

ntop/html/statsicons/flags/kw.gif

ntop/html/statsicons/flags/ky.gif

ntop/html/statsicons/flags/kz.gif

ntop/html/statsicons/flags/la.gif

ntop/html/statsicons/flags/lb.gif

ntop/html/statsicons/flags/lc.gif

ntop/html/statsicons/flags/li.gif

ntop/html/statsicons/flags/lk.gif

ntop/html/statsicons/flags/localhost.gif

ntop/html/statsicons/flags/lr.gif

ntop/html/statsicons/flags/ls.gif

ntop/html/statsicons/flags/lt.gif

ntop/html/statsicons/flags/lu.gif

ntop/html/statsicons/flags/lv.gif

ntop/html/statsicons/flags/ly.gif

ntop/html/statsicons/flags/ma.gif

ntop/html/statsicons/flags/mc.gif

ntop/html/statsicons/flags/md.gif

ntop/html/statsicons/flags/mil.gif

ntop/html/statsicons/flags/mk.gif

ntop/html/statsicons/flags/mq.gif

ntop/html/statsicons/flags/mr.gif

ntop/html/statsicons/flags/mt.gif

ntop/html/statsicons/flags/mu.gif

ntop/html/statsicons/flags/mx.gif

ntop/html/statsicons/flags/my.gif

ntop/html/statsicons/flags/na.gif

ntop/html/statsicons/flags/ne.gif

ntop/html/statsicons/flags/net.gif

ntop/html/statsicons/flags/ng.gif

ntop/html/statsicons/flags/ni.gif

ntop/html/statsicons/flags/nl.gif

ntop/html/statsicons/flags/no.gif

ntop/html/statsicons/flags/np.gif

ntop/html/statsicons/flags/nu.gif

ntop/html/statsicons/flags/nz.gif

ntop/html/statsicons/flags/om.gif

ntop/html/statsicons/flags/org.gif

ntop/html/statsicons/flags/pa.gif

ntop/html/statsicons/flags/pe.gif

ntop/html/statsicons/flags/pf.gif

ntop/html/statsicons/flags/ph.gif

ntop/html/statsicons/flags/pk.gif

ntop/html/statsicons/flags/pl.gif

ntop/html/statsicons/flags/pr.gif

ntop/html/statsicons/flags/pt.gif

ntop/html/statsicons/flags/py.gif

ntop/html/statsicons/flags/qa.gif

ntop/html/statsicons/flags/ro.gif

ntop/html/statsicons/flags/ru.gif

ntop/html/statsicons/flags/rw.gif

ntop/html/statsicons/flags/sa.gif

ntop/html/statsicons/flags/sb.gif

ntop/html/statsicons/flags/se.gif

ntop/html/statsicons/flags/sg.gif

ntop/html/statsicons/flags/si.gif

ntop/html/statsicons/flags/sk.gif

ntop/html/statsicons/flags/su.gif

ntop/html/statsicons/flags/sy.gif

ntop/html/statsicons/flags/sz.gif

ntop/html/statsicons/flags/tc.gif

ntop/html/statsicons/flags/td.gif

ntop/html/statsicons/flags/tf.gif

ntop/html/statsicons/flags/tg.gif

ntop/html/statsicons/flags/th.gif

ntop/html/statsicons/flags/tk.gif

ntop/html/statsicons/flags/tm.gif

ntop/html/statsicons/flags/tn.gif

ntop/html/statsicons/flags/to.gif

ntop/html/statsicons/flags/tr.gif

ntop/html/statsicons/flags/tt.gif

ntop/html/statsicons/flags/tw.gif

ntop/html/statsicons/flags/tz.gif

ntop/html/statsicons/flags/ua.gif

ntop/html/statsicons/flags/uk.gif

ntop/html/statsicons/flags/us.gif

ntop/html/statsicons/flags/uy.gif

ntop/html/statsicons/flags/uz.gif

ntop/html/statsicons/flags/va.gif

ntop/html/statsicons/flags/vc.gif

ntop/html/statsicons/flags/ve.gif

ntop/html/statsicons/flags/vg.gif

ntop/html/statsicons/flags/vi.gif

ntop/html/statsicons/flags/vn.gif

ntop/html/statsicons/flags/wf.gif

ntop/html/statsicons/flags/ws.gif

ntop/html/statsicons/flags/ye.gif

ntop/html/statsicons/flags/yu.gif

ntop/html/statsicons/flags/za.gif

ntop/html/statsicons/flags/ze.gif

ntop/html/statsicons/flags/zm.gif

ntop/html/statsicons/flags/zw.gif

ntop/html/statsicons/os

ntop/html/statsicons/os/aix.gif

ntop/html/statsicons/os/amiga.gif

ntop/html/statsicons/os/be.gif

ntop/html/statsicons/os/bsd.gif

ntop/html/statsicons/os/hp.gif

ntop/html/statsicons/os/irix.gif

ntop/html/statsicons/os/linux.gif

ntop/html/statsicons/os/mac.gif

ntop/html/statsicons/os/novell.gif

ntop/html/statsicons/os/os2.gif

ntop/html/statsicons/os/sun.gif

ntop/html/statsicons/os/windows.gif

ntop/html/style.css

ntop/html/top.html

ntop/html/url.gif

ntop/html/user.gif

ntop/html/vgauge.jpg

ntop/html/warning.gif

ntop/html/wheel.gif

ntop/html/white_bg.gif

ntop/http.c

ntop/initialize.c

ntop/install-sh

ntop/installer

ntop/installer/MacOSX

ntop/installer/MacOSX/License.txt

ntop/installer/MacOSX/ReadMe.txt

ntop/installer/MacOSX/ntop.package.pmsp

ntop/intop

ntop/intop/FILES

ntop/intop/Makefile.am

ntop/intop/Makefile.in

ntop/intop/TODO

ntop/intop/argv.c

ntop/intop/arp.c

ntop/intop/bind.c

ntop/intop/close.c

ntop/intop/command.c

ntop/intop/dump.c

ntop/intop/filter.c

ntop/intop/find.c

ntop/intop/getopt.c

ntop/intop/getopt.h

ntop/intop/gnuline.c

ntop/intop/hash.c

ntop/intop/hosts.c

ntop/intop/info.c

ntop/intop/init.c

ntop/intop/intop.1

ntop/intop/intop.c

ntop/intop/intop.h

ntop/intop/lsdev.c

ntop/intop/nbt.c

ntop/intop/nettable.c

ntop/intop/open.c

ntop/intop/pcapmore.c

ntop/intop/prompt.c

ntop/intop/sniff.c

ntop/intop/status.c

ntop/intop/swap.c

ntop/intop/top.c

ntop/intop/uptime.c

ntop/leaks.c

ntop/leaks.h

ntop/libtool

ntop/libtool.m4.in

ntop/logger.c

ntop/ltconfig

ntop/ltmain.sh

ntop/main.c

ntop/missing

ntop/mkinstalldirs

ntop/mysql.c

ntop/netflow.c

ntop/ntop-cert.pem

ntop/ntop-config.in

ntop/ntop-rules.8

ntop/ntop.8

ntop/ntop.c

ntop/ntop.h

ntop/ntop.html

ntop/ntop.txt

ntop/ntop_darwin.c

ntop/ntop_darwin.h

ntop/ntop_win32.c

ntop/ntop_win32.h

ntop/packages

ntop/packages/MacOSX

ntop/packages/MacOSX/License.txt

ntop/packages/MacOSX/ReadMe.txt

ntop/packages/MacOSX/ntop.package.pmsp

ntop/packages/suse

ntop/packages/suse/ntop.spec

ntop/packages/suse/ntopd

ntop/packages/suse/rc.config.ntopd

ntop/pbuf.c

ntop/plugin.c

ntop/pluginSkeleton.c

ntop/plugins

ntop/plugins/FILES

ntop/plugins/Makefile.am

ntop/plugins/Makefile.in

ntop/plugins/icmpPlugin.c

ntop/plugins/lastSeenPlugin.c

ntop/plugins/nfsPlugin.c

ntop/plugins/pep

ntop/plugins/pep/Makefile.am

ntop/plugins/pep/Makefile.in

ntop/plugins/pep/README

ntop/plugins/pep/arps.pl

ntop/plugins/pep/available.pl

ntop/plugins/pep/hosts.pl

ntop/plugins/pep/nbs.pl

ntop/plugins/pep/pep.c

ntop/plugins/pep/traffic.pl

ntop/plugins/rmon.h

ntop/plugins/rmonPlugin.c

ntop/plugins/wapPlugin.c

ntop/protocols.c

ntop/qsort.c

ntop/regex.c

ntop/regex.h

ntop/report.c

ntop/reportUtils.c

ntop/rules.c

ntop/rules.h

ntop/rules.sample

ntop/sql.c

ntop/ssl.c

ntop/stamp-h.in

ntop/term.c

ntop/traffic.c

ntop/util.c

ntop/utils

ntop/utils/Makefile

ntop/utils/README

ntop/utils/addMacAddress

ntop/utils/addMacAddress.c

ntop/utils/addMacAddressFile.pl

ntop/utils/sampleMACfile.txt

ntop/vendor.c

ntop/vendortable.h

ntop/vt.sed

ntop/webInterface.c

ntop/www

ntop/www/FILES

ntop/www/PHP

ntop/www/PHP/doSearch.php

ntop/www/PHP/ifStats.php

ntop/www/PHP/index.html

ntop/www/PHP/sessions.php

ntop/www/Perl

ntop/www/Perl/dumpFlat.pl

ntop/www/Perl/mapper.pl

ntop/www/Perl/nClient

ntop/www/Perl/remoteClient.pl

ntop/www/README

ntop/www/RRD

ntop/www/RRD/README

ntop/www/RRD/index.html

ntop/www/RRD/nwUsage.pl

ntop/www/RRD/protoUsage.pl

Show diffs side-by-side

added added

removed removed

gdchart0.94c/gd-1.8.3/libpng-1.0.8/pnggccrd.c

/* pnggccrd.c - mixed C/assembler version of utilities to read a PNG file

* For Intel x86 CPU (Pentium-MMX or later) and GNU C compiler.

* See http://www.intel.com/drg/pentiumII/appnotes/916/916.htm

* and http://www.intel.com/drg/pentiumII/appnotes/923/923.htm

* for Intel's performance analysis of the MMX vs. non-MMX code.

* libpng version 1.0.8 - July 24, 2000

* For conditions of distribution and use, see copyright notice in png.h

* Based on MSVC code contributed by Nirav Chhatrapati, Intel Corp., 1998.

* Interface to libpng contributed by Gilles Vollant, 1999.

* GNU C port by Greg Roelofs, 1999.

* Lines 2350-4300 converted in place with intel2gas 1.3.1:

* intel2gas -mdI pnggccrd.c.partially-msvc -o pnggccrd.c

* and then cleaned up by hand. See http://hermes.terminal.at/intel2gas/ .

* NOTE: A sufficiently recent version of GNU as (or as.exe under DOS/Windows)

* is required to assemble the newer MMX instructions such as movq.

* For djgpp, see

* ftp://ftp.simtel.net/pub/simtelnet/gnu/djgpp/v2gnu/bnu281b.zip

* (or a later version in the same directory). For Linux, check your

* distribution's web site(s) or try these links:

* http://rufus.w3.org/linux/RPM/binutils.html

* http://www.debian.org/Packages/stable/devel/binutils.html

* ftp://ftp.slackware.com/pub/linux/slackware/slackware/slakware/d1/

* binutils.tgz

* For other platforms, see the main GNU site:

* ftp://ftp.gnu.org/pub/gnu/binutils/

* Version 2.5.2l.15 is definitely too old...

* NOTES (mostly by Greg Roelofs)

* =====

* 19991006:

* - fixed sign error in post-MMX cleanup code (16- & 32-bit cases)

* 19991007:

* - additional optimizations (possible or definite):

* x [DONE] write MMX code for 64-bit case (pixel_bytes == 8) [not tested]

* - write MMX code for 48-bit case (pixel_bytes == 6)

* - figure out what's up with 24-bit case (pixel_bytes == 3):

* why subtract 8 from width_mmx in the pass 4/5 case?

* (only width_mmx case)

* x [DONE] replace pixel_bytes within each block with the true

* constant value (or are compilers smart enough to do that?)

* - rewrite all MMX interlacing code so it's aligned with

* the *beginning* of the row buffer, not the end. This

* would not only allow one to eliminate half of the memory

* writes for odd passes (i.e., pass == odd), it may also

* eliminate some unaligned-data-access exceptions (assuming

* there's a penalty for not aligning 64-bit accesses on

* 64-bit boundaries). The only catch is that the "leftover"

* pixel(s) at the end of the row would have to be saved,

* but there are enough unused MMX registers in every case,

* so this is not a problem. A further benefit is that the

* post-MMX cleanup code (C code) in at least some of the

* cases could be done within the assembler block.

* x [DONE] the "v3 v2 v1 v0 v7 v6 v5 v4" comments are confusing,

* inconsistent, and don't match the MMX Programmer's Reference

* Manual conventions anyway. They should be changed to

* "b7 b6 b5 b4 b3 b2 b1 b0," where b0 indicates the byte that

* was lowest in memory (e.g., corresponding to a left pixel)

* and b7 is the byte that was highest (e.g., a right pixel).

* 19991016:

* - Brennan's Guide notwithstanding, gcc under Linux does *not*

* want globals prefixed by underscores when referencing them--

* i.e., if the variable is const4, then refer to it as const4,

* not _const4. This seems to be a djgpp-specific requirement.

* Also, such variables apparently *must* be declared outside

* of functions; neither static nor automatic variables work if

* defined within the scope of a single function, but both

* static and truly global (multi-module) variables work fine.

* 19991023:

* - fixed png_combine_row() non-MMX replication bug (odd passes only?)

* - switched from string-concatenation-with-macros to cleaner method of

* renaming global variables for djgpp--i.e., always use prefixes in

* inlined assembler code (== strings) and conditionally rename the

* variables, not the other way around. Hence _const4, _mask8_0, etc.

* 19991024:

* - fixed mmxsupport()/png_do_interlace() first-row bug

* This one was severely weird: even though mmxsupport() doesn't touch

100

* ebx (where "row" pointer was stored), it nevertheless managed to zero

101

* the register (even in static/non-fPIC code--see below), which in turn

102

* caused png_do_interlace() to return prematurely on the first row of

103

* interlaced images (i.e., without expanding the interlaced pixels).

104

* Inspection of the generated assembly code didn't turn up any clues,

105

* although it did point at a minor optimization (i.e., get rid of

106

* mmx_supported_local variable and just use eax). Possibly the CPUID

107

* instruction is more destructive than it looks? (Not yet checked.)

108

* - "info gcc" was next to useless, so compared fPIC and non-fPIC assembly

109

* listings... Apparently register spillage has to do with ebx, since

110

* it's used to index the global offset table. Commenting it out of the

111

* input-reg lists in png_combine_row() eliminated compiler barfage, so

112

* ifdef'd with __PIC__ macro: if defined, use a global for unmask

113

114

* 19991107:

115

* - verified CPUID clobberage: 12-char string constant ("GenuineIntel",

116

* "AuthenticAMD", etc.) placed in EBX:ECX:EDX. Still need to polish.

117

118

* 19991120:

119

* - made "diff" variable (now "_dif") global to simplify conversion of

120

* filtering routines (running out of regs, sigh). "diff" is still used

121

* in interlacing routines, however.

122

* - fixed up both versions of mmxsupport() (ORIG_THAT_USED_TO_CLOBBER_EBX

123

* macro determines which is used); original not yet tested.

124

125

* 20000213:

126

* - When compiling with gcc, be sure to use -fomit-frame-pointer

127

128

* 20000319:

129

* - fixed a register-name typo in png_do_read_interlace(), default (MMX) case,

130

* pass == 4 or 5, that caused visible corruption of interlaced images

131

132

* 20000623:

133

* - Various problems were reported with gcc 2.95.2 in the Cygwin environment,

134

* many of the form "forbidden register 0 (ax) was spilled for class AREG."

135

* This is explained at http://gcc.gnu.org/fom_serv/cache/23.html, and

136

* Chuck Wilson supplied a patch involving dummy output registers. See

137

* http://sourceforge.net/bugs/?func=detailbug&bug_id=108741&group_id=5624

138

* for the original (anonymous) SourceForge bug report.

139

140

* 20000706:

141

* - Chuck Wilson passed along these remaining gcc 2.95.2 errors:

142

* pnggccrd.c: In function `png_combine_row':

143

* pnggccrd.c:525: more than 10 operands in `asm'

144

* pnggccrd.c:669: more than 10 operands in `asm'

145

* pnggccrd.c:828: more than 10 operands in `asm'

146

* pnggccrd.c:994: more than 10 operands in `asm'

147

* pnggccrd.c:1177: more than 10 operands in `asm'

148

* They are all the same problem and can be worked around by using the

149

* global _unmask variable unconditionally, not just in the -fPIC case.

150

* Apparently earlier versions of gcc also have the problem with more than

151

* 10 operands; they just don't report it. Much strangeness ensues, etc.

152

153

154

#define PNG_INTERNAL

155

#include "png.h"

156

157

#if defined(PNG_ASSEMBLER_CODE_SUPPORTED) && defined(PNG_USE_PNGGCCRD)

158

159

int mmxsupport(void);

160

161

static int mmx_supported = 2;

162

163

#ifdef PNG_USE_LOCAL_ARRAYS

164

static const int png_pass_start[7] = {0, 4, 0, 2, 0, 1, 0};

165

static const int png_pass_inc[7] = {8, 8, 4, 4, 2, 2, 1};

166

static const int png_pass_width[7] = {8, 4, 4, 2, 2, 1, 1};

167

#endif

168

169

// djgpp, Win32, and Cygwin add their own underscores to global variables,

170

// so define them without:

171

#if defined(__DJGPP__) || defined(WIN32) || defined(__CYGWIN__)

172

# define _unmask unmask

173

# define _const4 const4

174

# define _const6 const6

175

# define _mask8_0 mask8_0

176

# define _mask16_1 mask16_1

177

# define _mask16_0 mask16_0

178

# define _mask24_2 mask24_2

179

# define _mask24_1 mask24_1

180

# define _mask24_0 mask24_0

181

# define _mask32_3 mask32_3

182

# define _mask32_2 mask32_2

183

# define _mask32_1 mask32_1

184

# define _mask32_0 mask32_0

185

# define _mask48_5 mask48_5

186

# define _mask48_4 mask48_4

187

# define _mask48_3 mask48_3

188

# define _mask48_2 mask48_2

189

# define _mask48_1 mask48_1

190

# define _mask48_0 mask48_0

191

# define _FullLength FullLength

192

# define _MMXLength MMXLength

193

# define _dif dif

194

#endif

195

196

/* These constants are used in the inlined MMX assembly code.

197

Ignore gcc's "At top level: defined but not used" warnings. */

198

199

/* GRR 20000706: originally _unmask was needed only when compiling with -fPIC,

200

* since that case uses the %ebx register for indexing the Global Offset Table

201

* and there were no other registers available. But gcc 2.95 and later emit

202

* "more than 10 operands in `asm'" errors when %ebx is used to preload unmask

203

* in the non-PIC case, so we'll just use the global unconditionally now.

204

205

static int _unmask;

206

207

static unsigned long long _mask8_0 = 0x0102040810204080LL;

208

209

static unsigned long long _mask16_1 = 0x0101020204040808LL;

210

static unsigned long long _mask16_0 = 0x1010202040408080LL;

211

212

static unsigned long long _mask24_2 = 0x0101010202020404LL;

213

static unsigned long long _mask24_1 = 0x0408080810101020LL;

214

static unsigned long long _mask24_0 = 0x2020404040808080LL;

215

216

static unsigned long long _mask32_3 = 0x0101010102020202LL;

217

static unsigned long long _mask32_2 = 0x0404040408080808LL;

218

static unsigned long long _mask32_1 = 0x1010101020202020LL;

219

static unsigned long long _mask32_0 = 0x4040404080808080LL;

220

221

static unsigned long long _mask48_5 = 0x0101010101010202LL;

222

static unsigned long long _mask48_4 = 0x0202020204040404LL;

223

static unsigned long long _mask48_3 = 0x0404080808080808LL;

224

static unsigned long long _mask48_2 = 0x1010101010102020LL;

225

static unsigned long long _mask48_1 = 0x2020202040404040LL;

226

static unsigned long long _mask48_0 = 0x4040808080808080LL;

227

228

static unsigned long long _const4 = 0x0000000000FFFFFFLL;

229

//static unsigned long long _const5 = 0x000000FFFFFF0000LL; // NOT USED

230

static unsigned long long _const6 = 0x00000000000000FFLL;

231

232

// These are used in the row-filter routines and should/would be local

233

// variables if not for gcc addressing limitations.

234

235

static png_uint_32 _FullLength;

236

static png_uint_32 _MMXLength;

237

static int _dif;

238

239

240

void /* PRIVATE */

241

png_read_filter_row_c(png_structp png_ptr, png_row_infop row_info,

242

png_bytep row, png_bytep prev_row, int filter);

243

244

245

#if defined(PNG_HAVE_ASSEMBLER_COMBINE_ROW)

246

247

/* Combines the row recently read in with the previous row.

248

This routine takes care of alpha and transparency if requested.

249

This routine also handles the two methods of progressive display

250

of interlaced images, depending on the mask value.

251

The mask value describes which pixels are to be combined with

252

the row. The pattern always repeats every 8 pixels, so just 8

253

bits are needed. A one indicates the pixel is to be combined; a

254

zero indicates the pixel is to be skipped. This is in addition

255

to any alpha or transparency value associated with the pixel.

256

If you want all pixels to be combined, pass 0xff (255) in mask. */

257

258

/* Use this routine for the x86 platform - it uses a faster MMX routine

259

if the machine supports MMX. */

260

261

void /* PRIVATE */

262

png_combine_row(png_structp png_ptr, png_bytep row, int mask)

263

{

264

png_debug(1,"in png_combine_row_asm\n");

265

266

if (mmx_supported == 2)

267

mmx_supported = mmxsupport();

268

269

270

fprintf(stderr, "GRR DEBUG: png_combine_row() pixel_depth = %d, mask = 0x%02x, unmask = 0x%02x\n", png_ptr->row_info.pixel_depth, mask, ~mask);

271

fflush(stderr);

272

273

if (mask == 0xff)

274

{

275

png_memcpy(row, png_ptr->row_buf + 1,

276

(png_size_t)((png_ptr->width * png_ptr->row_info.pixel_depth + 7) >> 3));

277

}

278

/* GRR: add "else if (mask == 0)" case?

279

* or does png_combine_row() not even get called in that case? */

280

else

281

{

282

switch (png_ptr->row_info.pixel_depth)

283

{

284

case 1: // png_ptr->row_info.pixel_depth

285

{

286

png_bytep sp;

287

png_bytep dp;

288

int s_inc, s_start, s_end;

289

int m;

290

int shift;

291

png_uint_32 i;

292

293

sp = png_ptr->row_buf + 1;

294

dp = row;

295

m = 0x80;

296

#if defined(PNG_READ_PACKSWAP_SUPPORTED)

297

if (png_ptr->transformations & PNG_PACKSWAP)

298

{

299

s_start = 0;

300

s_end = 7;

301

s_inc = 1;

302

}

303

else

304

#endif

305

{

306

s_start = 7;

307

s_end = 0;

308

s_inc = -1;

309

}

310

311

shift = s_start;

312

313

for (i = 0; i < png_ptr->width; i++)

314

{

315

if (m & mask)

316

{

317

int value;

318

319

value = (*sp >> shift) & 0x1;

320

*dp &= (png_byte)((0x7f7f >> (7 - shift)) & 0xff);

321

*dp |= (png_byte)(value << shift);

322

}

323

324

if (shift == s_end)

325

{

326

shift = s_start;

327

sp++;

328

dp++;

329

}

330

else

331

shift += s_inc;

332

333

if (m == 1)

334

m = 0x80;

335

else

336

m >>= 1;

337

}

338

break;

339

}

340

341

case 2: // png_ptr->row_info.pixel_depth

342

{

343

png_bytep sp;

344

png_bytep dp;

345

int s_start, s_end, s_inc;

346

int m;

347

int shift;

348

png_uint_32 i;

349

int value;

350

351

sp = png_ptr->row_buf + 1;

352

dp = row;

353

m = 0x80;

354

#if defined(PNG_READ_PACKSWAP_SUPPORTED)

355

if (png_ptr->transformations & PNG_PACKSWAP)

356

{

357

s_start = 0;

358

s_end = 6;

359

s_inc = 2;

360

}

361

else

362

#endif

363

{

364

s_start = 6;

365

s_end = 0;

366

s_inc = -2;

367

}

368

369

shift = s_start;

370

371

for (i = 0; i < png_ptr->width; i++)

372

{

373

if (m & mask)

374

{

375

value = (*sp >> shift) & 0x3;

376

*dp &= (png_byte)((0x3f3f >> (6 - shift)) & 0xff);

377

*dp |= (png_byte)(value << shift);

378

}

379

380

if (shift == s_end)

381

{

382

shift = s_start;

383

sp++;

384

dp++;

385

}

386

else

387

shift += s_inc;

388

if (m == 1)

389

m = 0x80;

390

else

391

m >>= 1;

392

}

393

break;

394

}

395

396

case 4: // png_ptr->row_info.pixel_depth

397

{

398

png_bytep sp;

399

png_bytep dp;

400

int s_start, s_end, s_inc;

401

int m;

402

int shift;

403

png_uint_32 i;

404

int value;

405

406

sp = png_ptr->row_buf + 1;

407

dp = row;

408

m = 0x80;

409

#if defined(PNG_READ_PACKSWAP_SUPPORTED)

410

if (png_ptr->transformations & PNG_PACKSWAP)

411

{

412

s_start = 0;

413

s_end = 4;

414

s_inc = 4;

415

}

416

else

417

#endif

418

{

419

s_start = 4;

420

s_end = 0;

421

s_inc = -4;

422

}

423

shift = s_start;

424

425

for (i = 0; i < png_ptr->width; i++)

426

{

427

if (m & mask)

428

{

429

value = (*sp >> shift) & 0xf;

430

*dp &= (png_byte)((0xf0f >> (4 - shift)) & 0xff);

431

*dp |= (png_byte)(value << shift);

432

}

433

434

if (shift == s_end)

435

{

436

shift = s_start;

437

sp++;

438

dp++;

439

}

440

else

441

shift += s_inc;

442

if (m == 1)

443

m = 0x80;

444

else

445

m >>= 1;

446

}

447

break;

448

}

449

450

case 8: // png_ptr->row_info.pixel_depth

451

{

452

png_bytep srcptr;

453

png_bytep dstptr;

454

455

if (mmx_supported)

456

{

457

png_uint_32 len;

458

int diff;

459

int dummy_value_a; // fix 'forbidden register spilled' error

460

int dummy_value_d;

461

int dummy_value_c;

462

int dummy_value_S;

463

int dummy_value_D;

464

_unmask = ~mask; // global variable for -fPIC version

465

srcptr = png_ptr->row_buf + 1;

466

dstptr = row;

467

len = png_ptr->width &~7; // reduce to multiple of 8

468

diff = png_ptr->width & 7; // amount lost

469

470

__asm__ __volatile__ (

471

"movd _unmask, %%mm7 \n\t" // load bit pattern

472

"psubb %%mm6, %%mm6 \n\t" // zero mm6

473

"punpcklbw %%mm7, %%mm7 \n\t"

474

"punpcklwd %%mm7, %%mm7 \n\t"

475

"punpckldq %%mm7, %%mm7 \n\t" // fill reg with 8 masks

476

477

"movq _mask8_0, %%mm0 \n\t"

478

"pand %%mm7, %%mm0 \n\t" // nonzero if keep byte

479

"pcmpeqb %%mm6, %%mm0 \n\t" // zeros->1s, v versa

480

481

// preload "movl len, %%ecx \n\t" // load length of line

482

// preload "movl srcptr, %%esi \n\t" // load source

483

// preload "movl dstptr, %%edi \n\t" // load dest

484

485

"cmpl $0, %%ecx \n\t" // len == 0 ?

486

"je mainloop8end \n\t"

487

488

"mainloop8: \n\t"

489

"movq (%%esi), %%mm4 \n\t" // *srcptr

490

"pand %%mm0, %%mm4 \n\t"

491

"movq %%mm0, %%mm6 \n\t"

492

"pandn (%%edi), %%mm6 \n\t" // *dstptr

493

"por %%mm6, %%mm4 \n\t"

494

"movq %%mm4, (%%edi) \n\t"

495

"addl $8, %%esi \n\t" // inc by 8 bytes processed

496

"addl $8, %%edi \n\t"

497

"subl $8, %%ecx \n\t" // dec by 8 pixels processed

498

"ja mainloop8 \n\t"

499

500

"mainloop8end: \n\t"

501

// preload "movl diff, %%ecx \n\t" // (diff is in eax)

502

"movl %%eax, %%ecx \n\t"

503

"cmpl $0, %%ecx \n\t"

504

"jz end8 \n\t"

505

// preload "movl mask, %%edx \n\t"

506

"sall $24, %%edx \n\t" // make low byte, high byte

507

508

"secondloop8: \n\t"

509

"sall %%edx \n\t" // move high bit to CF

510

"jnc skip8 \n\t" // if CF = 0

511

"movb (%%esi), %%al \n\t"

512

"movb %%al, (%%edi) \n\t"

513

514

"skip8: \n\t"

515

"incl %%esi \n\t"

516

"incl %%edi \n\t"

517

"decl %%ecx \n\t"

518

"jnz secondloop8 \n\t"

519

520

"end8: \n\t"

521

"EMMS \n\t" // DONE

522

523

: "=a" (dummy_value_a), // output regs (dummy)

524

"=d" (dummy_value_d),

525

"=c" (dummy_value_c),

526

"=S" (dummy_value_S),

527

"=D" (dummy_value_D)

528

529

: "3" (srcptr), // esi // input regs

530

"4" (dstptr), // edi

531

"0" (diff), // eax

532

// was (unmask) "b" RESERVED // ebx // Global Offset Table idx

533

"2" (len), // ecx

534

"1" (mask) // edx

535

536

// : // clobber list

537

#if 0 /* MMX regs (%mm0, etc.) not supported by gcc 2.7.2.3 or egcs 1.1 */

538

: "%mm0", "%mm4", "%mm6", "%mm7"

539

#endif

540

);

541

}

542

else /* mmx _not supported - Use modified C routine */

543

{

544

545

png_uint_32 initial_val = png_pass_start[png_ptr->pass];

546

// png.c: png_pass_start[] = {0, 4, 0, 2, 0, 1, 0};

547

548

// png.c: png_pass_inc[] = {8, 8, 4, 4, 2, 2, 1};

549

550

// png.c: png_pass_width[] = {8, 4, 4, 2, 2, 1, 1};

551

552

553

srcptr = png_ptr->row_buf + 1 + initial_val;

554

dstptr = row + initial_val;

555

556

for (i = initial_val; i < final_val; i += stride)

557

{

558

png_memcpy(dstptr, srcptr, rep_bytes);

559

srcptr += stride;

560

dstptr += stride;

561

}

562

} /* end of else */

563

564

break;

565

} // end 8 bpp

566

567

case 16: // png_ptr->row_info.pixel_depth

568

{

569

png_bytep srcptr;

570

png_bytep dstptr;

571

572

if (mmx_supported)

573

{

574

png_uint_32 len;

575

int diff;

576

int dummy_value_a; // fix 'forbidden register spilled' error

577

int dummy_value_d;

578

int dummy_value_c;

579

int dummy_value_S;

580

int dummy_value_D;

581

_unmask = ~mask; // global variable for -fPIC version

582

srcptr = png_ptr->row_buf + 1;

583

dstptr = row;

584

len = png_ptr->width &~7; // reduce to multiple of 8

585

diff = png_ptr->width & 7; // amount lost

586

587

__asm__ __volatile__ (

588

"movd _unmask, %%mm7 \n\t" // load bit pattern

589

"psubb %%mm6, %%mm6 \n\t" // zero mm6

590

"punpcklbw %%mm7, %%mm7 \n\t"

591

"punpcklwd %%mm7, %%mm7 \n\t"

592

"punpckldq %%mm7, %%mm7 \n\t" // fill reg with 8 masks

593

594

"movq _mask16_0, %%mm0 \n\t"

595

"movq _mask16_1, %%mm1 \n\t"

596

597

"pand %%mm7, %%mm0 \n\t"

598

"pand %%mm7, %%mm1 \n\t"

599

600

"pcmpeqb %%mm6, %%mm0 \n\t"

601

"pcmpeqb %%mm6, %%mm1 \n\t"

602

603

// preload "movl len, %%ecx \n\t" // load length of line

604

// preload "movl srcptr, %%esi \n\t" // load source

605

// preload "movl dstptr, %%edi \n\t" // load dest

606

607

"cmpl $0, %%ecx \n\t"

608

"jz mainloop16end \n\t"

609

610

"mainloop16: \n\t"

611

"movq (%%esi), %%mm4 \n\t"

612

"pand %%mm0, %%mm4 \n\t"

613

"movq %%mm0, %%mm6 \n\t"

614

"movq (%%edi), %%mm7 \n\t"

615

"pandn %%mm7, %%mm6 \n\t"

616

"por %%mm6, %%mm4 \n\t"

617

"movq %%mm4, (%%edi) \n\t"

618

619

"movq 8(%%esi), %%mm5 \n\t"

620

"pand %%mm1, %%mm5 \n\t"

621

"movq %%mm1, %%mm7 \n\t"

622

"movq 8(%%edi), %%mm6 \n\t"

623

"pandn %%mm6, %%mm7 \n\t"

624

"por %%mm7, %%mm5 \n\t"

625

"movq %%mm5, 8(%%edi) \n\t"

626

627

"addl $16, %%esi \n\t" // inc by 16 bytes processed

628

"addl $16, %%edi \n\t"

629

"subl $8, %%ecx \n\t" // dec by 8 pixels processed

630

"ja mainloop16 \n\t"

631

632

"mainloop16end: \n\t"

633

// preload "movl diff, %%ecx \n\t" // (diff is in eax)

634

"movl %%eax, %%ecx \n\t"

635

"cmpl $0, %%ecx \n\t"

636

"jz end16 \n\t"

637

// preload "movl mask, %%edx \n\t"

638

"sall $24, %%edx \n\t" // make low byte, high byte

639

640

"secondloop16: \n\t"

641

"sall %%edx \n\t" // move high bit to CF

642

"jnc skip16 \n\t" // if CF = 0

643

"movw (%%esi), %%ax \n\t"

644

"movw %%ax, (%%edi) \n\t"

645

646

"skip16: \n\t"

647

"addl $2, %%esi \n\t"

648

"addl $2, %%edi \n\t"

649

"decl %%ecx \n\t"

650

"jnz secondloop16 \n\t"

651

652

"end16: \n\t"

653

"EMMS \n\t" // DONE

654

655

: "=a" (dummy_value_a), // output regs (dummy)

656

"=d" (dummy_value_d),

657

"=c" (dummy_value_c),

658

"=S" (dummy_value_S),

659

"=D" (dummy_value_D)

660

661

: "3" (srcptr), // esi // input regs

662

"4" (dstptr), // edi

663

"0" (diff), // eax

664

// was (unmask) "b" RESERVED // ebx // Global Offset Table idx

665

"2" (len), // ecx

666

"1" (mask) // edx

667

668

// : // clobber list

669

#if 0 /* MMX regs (%mm0, etc.) not supported by gcc 2.7.2.3 or egcs 1.1 */

670

: "%mm0", "%mm1",

671

"%mm4", "%mm5", "%mm6", "%mm7"

672

#endif

673

);

674

}

675

else /* mmx _not supported - Use modified C routine */

676

{

677

678

png_uint_32 initial_val = 2 * png_pass_start[png_ptr->pass];

679

// png.c: png_pass_start[] = {0, 4, 0, 2, 0, 1, 0};

680

681

// png.c: png_pass_inc[] = {8, 8, 4, 4, 2, 2, 1};

682

683

// png.c: png_pass_width[] = {8, 4, 4, 2, 2, 1, 1};

684

685

686

srcptr = png_ptr->row_buf + 1 + initial_val;

687

dstptr = row + initial_val;

688

689

for (i = initial_val; i < final_val; i += stride)

690

{

691

png_memcpy(dstptr, srcptr, rep_bytes);

692

srcptr += stride;

693

dstptr += stride;

694

}

695

} /* end of else */

696

697

break;

698

} // end 16 bpp

699

700

case 24: // png_ptr->row_info.pixel_depth

701

{

702

png_bytep srcptr;

703

png_bytep dstptr;

704

705

if (mmx_supported)

706

{

707

png_uint_32 len;

708

int diff;

709

int dummy_value_a; // fix 'forbidden register spilled' error

710

int dummy_value_d;

711

int dummy_value_c;

712

int dummy_value_S;

713

int dummy_value_D;

714

_unmask = ~mask; // global variable for -fPIC version

715

srcptr = png_ptr->row_buf + 1;

716

dstptr = row;

717

len = png_ptr->width &~7; // reduce to multiple of 8

718

diff = png_ptr->width & 7; // amount lost

719

720

__asm__ __volatile__ (

721

"movd _unmask, %%mm7 \n\t" // load bit pattern

722

"psubb %%mm6, %%mm6 \n\t" // zero mm6

723

"punpcklbw %%mm7, %%mm7 \n\t"

724

"punpcklwd %%mm7, %%mm7 \n\t"

725

"punpckldq %%mm7, %%mm7 \n\t" // fill reg with 8 masks

726

727

"movq _mask24_0, %%mm0 \n\t"

728

"movq _mask24_1, %%mm1 \n\t"

729

"movq _mask24_2, %%mm2 \n\t"

730

731

"pand %%mm7, %%mm0 \n\t"

732

"pand %%mm7, %%mm1 \n\t"

733

"pand %%mm7, %%mm2 \n\t"

734

735

"pcmpeqb %%mm6, %%mm0 \n\t"

736

"pcmpeqb %%mm6, %%mm1 \n\t"

737

"pcmpeqb %%mm6, %%mm2 \n\t"

738

739

// preload "movl len, %%ecx \n\t" // load length of line

740

// preload "movl srcptr, %%esi \n\t" // load source

741

// preload "movl dstptr, %%edi \n\t" // load dest

742

743

"cmpl $0, %%ecx \n\t"

744

"jz mainloop24end \n\t"

745

746

"mainloop24: \n\t"

747

"movq (%%esi), %%mm4 \n\t"

748

"pand %%mm0, %%mm4 \n\t"

749

"movq %%mm0, %%mm6 \n\t"

750

"movq (%%edi), %%mm7 \n\t"

751

"pandn %%mm7, %%mm6 \n\t"

752

"por %%mm6, %%mm4 \n\t"

753

"movq %%mm4, (%%edi) \n\t"

754

755

"movq 8(%%esi), %%mm5 \n\t"

756

"pand %%mm1, %%mm5 \n\t"

757

"movq %%mm1, %%mm7 \n\t"

758

"movq 8(%%edi), %%mm6 \n\t"

759

"pandn %%mm6, %%mm7 \n\t"

760

"por %%mm7, %%mm5 \n\t"

761

"movq %%mm5, 8(%%edi) \n\t"

762

763

"movq 16(%%esi), %%mm6 \n\t"

764

"pand %%mm2, %%mm6 \n\t"

765

"movq %%mm2, %%mm4 \n\t"

766

"movq 16(%%edi), %%mm7 \n\t"

767

"pandn %%mm7, %%mm4 \n\t"

768

"por %%mm4, %%mm6 \n\t"

769

"movq %%mm6, 16(%%edi) \n\t"

770

771

"addl $24, %%esi \n\t" // inc by 24 bytes processed

772

"addl $24, %%edi \n\t"

773

"subl $8, %%ecx \n\t" // dec by 8 pixels processed

774

775

"ja mainloop24 \n\t"

776

777

"mainloop24end: \n\t"

778

// preload "movl diff, %%ecx \n\t" // (diff is in eax)

779

"movl %%eax, %%ecx \n\t"

780

"cmpl $0, %%ecx \n\t"

781

"jz end24 \n\t"

782

// preload "movl mask, %%edx \n\t"

783

"sall $24, %%edx \n\t" // make low byte, high byte

784

785

"secondloop24: \n\t"

786

"sall %%edx \n\t" // move high bit to CF

787

"jnc skip24 \n\t" // if CF = 0

788

"movw (%%esi), %%ax \n\t"

789

"movw %%ax, (%%edi) \n\t"

790

"xorl %%eax, %%eax \n\t"

791

"movb 2(%%esi), %%al \n\t"

792

"movb %%al, 2(%%edi) \n\t"

793

794

"skip24: \n\t"

795

"addl $3, %%esi \n\t"

796

"addl $3, %%edi \n\t"

797

"decl %%ecx \n\t"

798

"jnz secondloop24 \n\t"

799

800

"end24: \n\t"

801

"EMMS \n\t" // DONE

802

803

: "=a" (dummy_value_a), // output regs (dummy)

804

"=d" (dummy_value_d),

805

"=c" (dummy_value_c),

806

"=S" (dummy_value_S),

807

"=D" (dummy_value_D)

808

809

: "3" (srcptr), // esi // input regs

810

"4" (dstptr), // edi

811

"0" (diff), // eax

812

// was (unmask) "b" RESERVED // ebx // Global Offset Table idx

813

"2" (len), // ecx

814

"1" (mask) // edx

815

816

// : // clobber list

817

#if 0 /* MMX regs (%mm0, etc.) not supported by gcc 2.7.2.3 or egcs 1.1 */

818

: "%mm0", "%mm1", "%mm2",

819

"%mm4", "%mm5", "%mm6", "%mm7"

820

#endif

821

);

822

}

823

else /* mmx _not supported - Use modified C routine */

824

{

825

826

png_uint_32 initial_val = 3 * png_pass_start[png_ptr->pass];

827

// png.c: png_pass_start[] = {0, 4, 0, 2, 0, 1, 0};

828

829

// png.c: png_pass_inc[] = {8, 8, 4, 4, 2, 2, 1};

830

831

// png.c: png_pass_width[] = {8, 4, 4, 2, 2, 1, 1};

832

833

834

srcptr = png_ptr->row_buf + 1 + initial_val;

835

dstptr = row + initial_val;

836

837

for (i = initial_val; i < final_val; i += stride)

838

{

839

png_memcpy(dstptr, srcptr, rep_bytes);

840

srcptr += stride;

841

dstptr += stride;

842

}

843

} /* end of else */

844

845

break;

846

} // end 24 bpp

847

848

case 32: // png_ptr->row_info.pixel_depth

849

{

850

png_bytep srcptr;

851

png_bytep dstptr;

852

853

if (mmx_supported)

854

{

855

png_uint_32 len;

856

int diff;

857

int dummy_value_a; // fix 'forbidden register spilled' error

858

int dummy_value_d;

859

int dummy_value_c;

860

int dummy_value_S;

861

int dummy_value_D;

862

_unmask = ~mask; // global variable for -fPIC version

863

srcptr = png_ptr->row_buf + 1;

864

dstptr = row;

865

len = png_ptr->width &~7; // reduce to multiple of 8

866

diff = png_ptr->width & 7; // amount lost

867

868

__asm__ __volatile__ (

869

"movd _unmask, %%mm7 \n\t" // load bit pattern

870

"psubb %%mm6, %%mm6 \n\t" // zero mm6

871

"punpcklbw %%mm7, %%mm7 \n\t"

872

"punpcklwd %%mm7, %%mm7 \n\t"

873

"punpckldq %%mm7, %%mm7 \n\t" // fill reg with 8 masks

874

875

"movq _mask32_0, %%mm0 \n\t"

876

"movq _mask32_1, %%mm1 \n\t"

877

"movq _mask32_2, %%mm2 \n\t"

878

"movq _mask32_3, %%mm3 \n\t"

879

880

"pand %%mm7, %%mm0 \n\t"

881

"pand %%mm7, %%mm1 \n\t"

882

"pand %%mm7, %%mm2 \n\t"

883

"pand %%mm7, %%mm3 \n\t"

884

885

"pcmpeqb %%mm6, %%mm0 \n\t"

886

"pcmpeqb %%mm6, %%mm1 \n\t"

887

"pcmpeqb %%mm6, %%mm2 \n\t"

888

"pcmpeqb %%mm6, %%mm3 \n\t"

889

890

// preload "movl len, %%ecx \n\t" // load length of line

891

// preload "movl srcptr, %%esi \n\t" // load source

892

// preload "movl dstptr, %%edi \n\t" // load dest

893

894

"cmpl $0, %%ecx \n\t" // lcr

895

"jz mainloop32end \n\t"

896

897

"mainloop32: \n\t"

898

"movq (%%esi), %%mm4 \n\t"

899

"pand %%mm0, %%mm4 \n\t"

900

"movq %%mm0, %%mm6 \n\t"

901

"movq (%%edi), %%mm7 \n\t"

902

"pandn %%mm7, %%mm6 \n\t"

903

"por %%mm6, %%mm4 \n\t"

904

"movq %%mm4, (%%edi) \n\t"

905

906

"movq 8(%%esi), %%mm5 \n\t"

907

"pand %%mm1, %%mm5 \n\t"

908

"movq %%mm1, %%mm7 \n\t"

909

"movq 8(%%edi), %%mm6 \n\t"

910

"pandn %%mm6, %%mm7 \n\t"

911

"por %%mm7, %%mm5 \n\t"

912

"movq %%mm5, 8(%%edi) \n\t"

913

914

"movq 16(%%esi), %%mm6 \n\t"

915

"pand %%mm2, %%mm6 \n\t"

916

"movq %%mm2, %%mm4 \n\t"

917

"movq 16(%%edi), %%mm7 \n\t"

918

"pandn %%mm7, %%mm4 \n\t"

919

"por %%mm4, %%mm6 \n\t"

920

"movq %%mm6, 16(%%edi) \n\t"

921

922

"movq 24(%%esi), %%mm7 \n\t"

923

"pand %%mm3, %%mm7 \n\t"

924

"movq %%mm3, %%mm5 \n\t"

925

"movq 24(%%edi), %%mm4 \n\t"

926

"pandn %%mm4, %%mm5 \n\t"

927

"por %%mm5, %%mm7 \n\t"

928

"movq %%mm7, 24(%%edi) \n\t"

929

930

"addl $32, %%esi \n\t" // inc by 32 bytes processed

931

"addl $32, %%edi \n\t"

932

"subl $8, %%ecx \n\t" // dec by 8 pixels processed

933

"ja mainloop32 \n\t"

934

935

"mainloop32end: \n\t"

936

// preload "movl diff, %%ecx \n\t" // (diff is in eax)

937

"movl %%eax, %%ecx \n\t"

938

"cmpl $0, %%ecx \n\t"

939

"jz end32 \n\t"

940

// preload "movl mask, %%edx \n\t"

941

"sall $24, %%edx \n\t" // low byte => high byte

942

943

"secondloop32: \n\t"

944

"sall %%edx \n\t" // move high bit to CF

945

"jnc skip32 \n\t" // if CF = 0

946

"movl (%%esi), %%eax \n\t"

947

"movl %%eax, (%%edi) \n\t"

948

949

"skip32: \n\t"

950

"addl $4, %%esi \n\t"

951

"addl $4, %%edi \n\t"

952

"decl %%ecx \n\t"

953

"jnz secondloop32 \n\t"

954

955

"end32: \n\t"

956

"EMMS \n\t" // DONE

957

958

: "=a" (dummy_value_a), // output regs (dummy)

959

"=d" (dummy_value_d),

960

"=c" (dummy_value_c),

961

"=S" (dummy_value_S),

962

"=D" (dummy_value_D)

963

964

: "3" (srcptr), // esi // input regs

965

"4" (dstptr), // edi

966

"0" (diff), // eax

967

// was (unmask) "b" RESERVED // ebx // Global Offset Table idx

968

"2" (len), // ecx

969

"1" (mask) // edx

970

971

// : // clobber list

972

#if 0 /* MMX regs (%mm0, etc.) not supported by gcc 2.7.2.3 or egcs 1.1 */

973

: "%mm0", "%mm1", "%mm2", "%mm3",

974

"%mm4", "%mm5", "%mm6", "%mm7"

975

#endif

976

);

977

}

978

else /* mmx _not supported - Use modified C routine */

979

{

980

981

png_uint_32 initial_val = 4 * png_pass_start[png_ptr->pass];

982

// png.c: png_pass_start[] = {0, 4, 0, 2, 0, 1, 0};

983

984

// png.c: png_pass_inc[] = {8, 8, 4, 4, 2, 2, 1};

985

986

// png.c: png_pass_width[] = {8, 4, 4, 2, 2, 1, 1};

987

988

989

srcptr = png_ptr->row_buf + 1 + initial_val;

990

dstptr = row + initial_val;

991

992

for (i = initial_val; i < final_val; i += stride)

993

{

994

png_memcpy(dstptr, srcptr, rep_bytes);

995

srcptr += stride;

996

dstptr += stride;

997

}

998

} /* end of else */

999

1000

break;

1001

} // end 32 bpp

1002

1003

case 48: // png_ptr->row_info.pixel_depth

1004

{

1005

png_bytep srcptr;

1006

png_bytep dstptr;

1007

1008

if (mmx_supported)

1009

{

1010

png_uint_32 len;

1011

int diff;

1012

int dummy_value_a; // fix 'forbidden register spilled' error

1013

int dummy_value_d;

1014

int dummy_value_c;

1015

int dummy_value_S;

1016

int dummy_value_D;

1017

_unmask = ~mask; // global variable for -fPIC version

1018

srcptr = png_ptr->row_buf + 1;

1019

dstptr = row;

1020

len = png_ptr->width &~7; // reduce to multiple of 8

1021

diff = png_ptr->width & 7; // amount lost

1022

1023

__asm__ __volatile__ (

1024

"movd _unmask, %%mm7 \n\t" // load bit pattern

1025

"psubb %%mm6, %%mm6 \n\t" // zero mm6

1026

"punpcklbw %%mm7, %%mm7 \n\t"

1027

"punpcklwd %%mm7, %%mm7 \n\t"

1028

"punpckldq %%mm7, %%mm7 \n\t" // fill reg with 8 masks

1029

1030

"movq _mask48_0, %%mm0 \n\t"

1031

"movq _mask48_1, %%mm1 \n\t"

1032

"movq _mask48_2, %%mm2 \n\t"

1033

"movq _mask48_3, %%mm3 \n\t"

1034

"movq _mask48_4, %%mm4 \n\t"

1035

"movq _mask48_5, %%mm5 \n\t"

1036

1037

"pand %%mm7, %%mm0 \n\t"

1038

"pand %%mm7, %%mm1 \n\t"

1039

"pand %%mm7, %%mm2 \n\t"

1040

"pand %%mm7, %%mm3 \n\t"

1041

"pand %%mm7, %%mm4 \n\t"

1042

"pand %%mm7, %%mm5 \n\t"

1043

1044

"pcmpeqb %%mm6, %%mm0 \n\t"

1045

"pcmpeqb %%mm6, %%mm1 \n\t"

1046

"pcmpeqb %%mm6, %%mm2 \n\t"

1047

"pcmpeqb %%mm6, %%mm3 \n\t"

1048

"pcmpeqb %%mm6, %%mm4 \n\t"

1049

"pcmpeqb %%mm6, %%mm5 \n\t"

1050

1051

// preload "movl len, %%ecx \n\t" // load length of line

1052

// preload "movl srcptr, %%esi \n\t" // load source

1053

// preload "movl dstptr, %%edi \n\t" // load dest

1054

1055

"cmpl $0, %%ecx \n\t"

1056

"jz mainloop48end \n\t"

1057

1058

"mainloop48: \n\t"

1059

"movq (%%esi), %%mm7 \n\t"

1060

"pand %%mm0, %%mm7 \n\t"

1061

"movq %%mm0, %%mm6 \n\t"

1062

"pandn (%%edi), %%mm6 \n\t"

1063

"por %%mm6, %%mm7 \n\t"

1064

"movq %%mm7, (%%edi) \n\t"

1065

1066

"movq 8(%%esi), %%mm6 \n\t"

1067

"pand %%mm1, %%mm6 \n\t"

1068

"movq %%mm1, %%mm7 \n\t"

1069

"pandn 8(%%edi), %%mm7 \n\t"

1070

"por %%mm7, %%mm6 \n\t"

1071

"movq %%mm6, 8(%%edi) \n\t"

1072

1073

"movq 16(%%esi), %%mm6 \n\t"

1074

"pand %%mm2, %%mm6 \n\t"

1075

"movq %%mm2, %%mm7 \n\t"

1076

"pandn 16(%%edi), %%mm7 \n\t"

1077

"por %%mm7, %%mm6 \n\t"

1078

"movq %%mm6, 16(%%edi) \n\t"

1079

1080

"movq 24(%%esi), %%mm7 \n\t"

1081

"pand %%mm3, %%mm7 \n\t"

1082

"movq %%mm3, %%mm6 \n\t"

1083

"pandn 24(%%edi), %%mm6 \n\t"

1084

"por %%mm6, %%mm7 \n\t"

1085

"movq %%mm7, 24(%%edi) \n\t"

1086

1087

"movq 32(%%esi), %%mm6 \n\t"

1088

"pand %%mm4, %%mm6 \n\t"

1089

"movq %%mm4, %%mm7 \n\t"

1090

"pandn 32(%%edi), %%mm7 \n\t"

1091

"por %%mm7, %%mm6 \n\t"

1092

"movq %%mm6, 32(%%edi) \n\t"

1093

1094

"movq 40(%%esi), %%mm7 \n\t"

1095

"pand %%mm5, %%mm7 \n\t"

1096

"movq %%mm5, %%mm6 \n\t"

1097

"pandn 40(%%edi), %%mm6 \n\t"

1098

"por %%mm6, %%mm7 \n\t"

1099

"movq %%mm7, 40(%%edi) \n\t"

1100

1101

"addl $48, %%esi \n\t" // inc by 48 bytes processed

1102

"addl $48, %%edi \n\t"

1103

"subl $8, %%ecx \n\t" // dec by 8 pixels processed

1104

1105

"ja mainloop48 \n\t"

1106

1107

"mainloop48end: \n\t"

1108

// preload "movl diff, %%ecx \n\t" // (diff is in eax)

1109

"movl %%eax, %%ecx \n\t"

1110

"cmpl $0, %%ecx \n\t"

1111

"jz end48 \n\t"

1112

// preload "movl mask, %%edx \n\t"

1113

"sall $24, %%edx \n\t" // make low byte, high byte

1114

1115

"secondloop48: \n\t"

1116

"sall %%edx \n\t" // move high bit to CF

1117

"jnc skip48 \n\t" // if CF = 0

1118

"movl (%%esi), %%eax \n\t"

1119

"movl %%eax, (%%edi) \n\t"

1120

1121

"skip48: \n\t"

1122

"addl $4, %%esi \n\t"

1123

"addl $4, %%edi \n\t"

1124

"decl %%ecx \n\t"

1125

"jnz secondloop48 \n\t"

1126

1127

"end48: \n\t"

1128

"EMMS \n\t" // DONE

1129

1130

: "=a" (dummy_value_a), // output regs (dummy)

1131

"=d" (dummy_value_d),

1132

"=c" (dummy_value_c),

1133

"=S" (dummy_value_S),

1134

"=D" (dummy_value_D)

1135

1136

: "3" (srcptr), // esi // input regs

1137

"4" (dstptr), // edi

1138

"0" (diff), // eax

1139

// was (unmask) "b" RESERVED // ebx // Global Offset Table idx

1140

"2" (len), // ecx

1141

"1" (mask) // edx

1142

1143

// : // clobber list

1144

#if 0 /* MMX regs (%mm0, etc.) not supported by gcc 2.7.2.3 or egcs 1.1 */

1145

: "%mm0", "%mm1", "%mm2", "%mm3",

1146

"%mm4", "%mm5", "%mm6", "%mm7"

1147

#endif

1148

);

1149

}

1150

else /* mmx _not supported - Use modified C routine */

1151

{

1152

1153

png_uint_32 initial_val = 6 * png_pass_start[png_ptr->pass];

1154

// png.c: png_pass_start[] = {0, 4, 0, 2, 0, 1, 0};

1155

1156

// png.c: png_pass_inc[] = {8, 8, 4, 4, 2, 2, 1};

1157

1158

// png.c: png_pass_width[] = {8, 4, 4, 2, 2, 1, 1};

1159

1160

1161

srcptr = png_ptr->row_buf + 1 + initial_val;

1162

dstptr = row + initial_val;

1163

1164

for (i = initial_val; i < final_val; i += stride)

1165

{

1166

png_memcpy(dstptr, srcptr, rep_bytes);

1167

srcptr += stride;

1168

dstptr += stride;

1169

}

1170

} /* end of else */

1171

1172

break;

1173

} // end 48 bpp

1174

1175

case 64: // png_ptr->row_info.pixel_depth

1176

{

1177

png_bytep srcptr;

1178

png_bytep dstptr;

1179

1180

png_uint_32 initial_val = 8 * png_pass_start[png_ptr->pass];

1181

// png.c: png_pass_start[] = {0, 4, 0, 2, 0, 1, 0};

1182

1183

// png.c: png_pass_inc[] = {8, 8, 4, 4, 2, 2, 1};

1184

1185

// png.c: png_pass_width[] = {8, 4, 4, 2, 2, 1, 1};

1186

1187

1188

srcptr = png_ptr->row_buf + 1 + initial_val;

1189

dstptr = row + initial_val;

1190

1191

for (i = initial_val; i < final_val; i += stride)

1192

{

1193

png_memcpy(dstptr, srcptr, rep_bytes);

1194

srcptr += stride;

1195

dstptr += stride;

1196

}

1197

break;

1198

} // end 64 bpp

1199

1200

default: // png_ptr->row_info.pixel_depth != 1,2,4,8,16,24,32,48,64

1201

{

1202

// this should never happen

1203

fprintf(stderr,

1204

"libpng internal error: png_ptr->row_info.pixel_depth = %d\n",

1205

png_ptr->row_info.pixel_depth);

1206

fflush(stderr);

1207

break;

1208

}

1209

} /* end switch (png_ptr->row_info.pixel_depth) */

1210

1211

} /* end if (non-trivial mask) */

1212

1213

} /* end png_combine_row() */

1214

1215

#endif /* PNG_HAVE_ASSEMBLER_COMBINE_ROW */

1216

1217

1218

1219

#if defined(PNG_READ_INTERLACING_SUPPORTED)

1220

#if defined(PNG_HAVE_ASSEMBLER_READ_INTERLACE)

1221

1222

/* png_do_read_interlace() is called after any 16-bit to 8-bit conversion

1223

* has taken place. [GRR: what other steps come before and/or after?]

1224

1225

1226

void /* PRIVATE */

1227

png_do_read_interlace(png_row_infop row_info, png_bytep row, int pass,

1228

png_uint_32 transformations)

1229

{

1230

1231

fprintf(stderr, "GRR DEBUG: entering png_do_read_interlace()\n");

1232

if (row == NULL) fprintf(stderr, "GRR DEBUG: row == NULL\n");

1233

if (row_info == NULL) fprintf(stderr, "GRR DEBUG: row_info == NULL\n");

1234

fflush(stderr);

1235

1236

png_debug(1,"in png_do_read_interlace\n");

1237

1238

if (mmx_supported == 2)

1239

mmx_supported = mmxsupport();

1240

1241

{

1242

fprintf(stderr, "GRR DEBUG: calling mmxsupport()\n");

1243

fprintf(stderr, "GRR DEBUG: done with mmxsupport() (mmx_supported = %d)\n", mmx_supported);

1244

}

1245

1246

1247

1248

this one happened on first row due to weirdness with mmxsupport():

1249

if (row == NULL) fprintf(stderr, "GRR DEBUG: now row == NULL!!!\n");

1250

row was in ebx, and even though nothing touched ebx, it still got wiped...

1251

[weird side effect of CPUID instruction?]

1252

if (row_info == NULL) fprintf(stderr, "GRR DEBUG: now row_info == NULL!!!\n");

1253

1254

if (row != NULL && row_info != NULL)

1255

{

1256

png_uint_32 final_width;

1257

1258

final_width = row_info->width * png_pass_inc[pass];

1259

1260

1261

fprintf(stderr, "GRR DEBUG: png_do_read_interlace() row_info->width = %d, final_width = %d\n", row_info->width, final_width);

1262

fprintf(stderr, "GRR DEBUG: png_do_read_interlace() pixel_depth = %d\n", row_info->pixel_depth);

1263

fflush(stderr);

1264

1265

switch (row_info->pixel_depth)

1266

{

1267

case 1:

1268

{

1269

png_bytep sp, dp;

1270

int sshift, dshift;

1271

int s_start, s_end, s_inc;

1272

png_byte v;

1273

png_uint_32 i;

1274

int j;

1275

1276

sp = row + (png_size_t)((row_info->width - 1) >> 3);

1277

dp = row + (png_size_t)((final_width - 1) >> 3);

1278

#if defined(PNG_READ_PACKSWAP_SUPPORTED)

1279

if (transformations & PNG_PACKSWAP)

1280

{

1281

sshift = (int)((row_info->width + 7) & 7);

1282

dshift = (int)((final_width + 7) & 7);

1283

s_start = 7;

1284

s_end = 0;

1285

s_inc = -1;

1286

}

1287

else

1288

#endif

1289

{

1290

sshift = 7 - (int)((row_info->width + 7) & 7);

1291

dshift = 7 - (int)((final_width + 7) & 7);

1292

s_start = 0;

1293

s_end = 7;

1294

s_inc = 1;

1295

}

1296

1297

for (i = row_info->width; i; i--)

1298

{

1299

v = (png_byte)((*sp >> sshift) & 0x1);

1300

for (j = 0; j < png_pass_inc[pass]; j++)

1301

{

1302

*dp &= (png_byte)((0x7f7f >> (7 - dshift)) & 0xff);

1303

*dp |= (png_byte)(v << dshift);

1304

if (dshift == s_end)

1305

{

1306

dshift = s_start;

1307

dp--;

1308

}

1309

else

1310

dshift += s_inc;

1311

}

1312

if (sshift == s_end)

1313

{

1314

sshift = s_start;

1315

sp--;

1316

}

1317

else

1318

sshift += s_inc;

1319

}

1320

break;

1321

}

1322

1323

case 2:

1324

{

1325

png_bytep sp, dp;

1326

int sshift, dshift;

1327

int s_start, s_end, s_inc;

1328

png_uint_32 i;

1329

1330

sp = row + (png_size_t)((row_info->width - 1) >> 2);

1331

dp = row + (png_size_t)((final_width - 1) >> 2);

1332

#if defined(PNG_READ_PACKSWAP_SUPPORTED)

1333

if (transformations & PNG_PACKSWAP)

1334

{

1335

sshift = (png_size_t)(((row_info->width + 3) & 3) << 1);

1336

dshift = (png_size_t)(((final_width + 3) & 3) << 1);

1337

s_start = 6;

1338

s_end = 0;

1339

s_inc = -2;

1340

}

1341

else

1342

#endif

1343

{

1344

sshift = (png_size_t)((3 - ((row_info->width + 3) & 3)) << 1);

1345

dshift = (png_size_t)((3 - ((final_width + 3) & 3)) << 1);

1346

s_start = 0;

1347

s_end = 6;

1348

s_inc = 2;

1349

}

1350

1351

for (i = row_info->width; i; i--)

1352

{

1353

png_byte v;

1354

int j;

1355

1356

v = (png_byte)((*sp >> sshift) & 0x3);

1357

for (j = 0; j < png_pass_inc[pass]; j++)

1358

{

1359

*dp &= (png_byte)((0x3f3f >> (6 - dshift)) & 0xff);

1360

*dp |= (png_byte)(v << dshift);

1361

if (dshift == s_end)

1362

{

1363

dshift = s_start;

1364

dp--;

1365

}

1366

else

1367

dshift += s_inc;

1368

}

1369

if (sshift == s_end)

1370

{

1371

sshift = s_start;

1372

sp--;

1373

}

1374

else

1375

sshift += s_inc;

1376

}

1377

break;

1378

}

1379

1380

case 4:

1381

{

1382

png_bytep sp, dp;

1383

int sshift, dshift;

1384

int s_start, s_end, s_inc;

1385

png_uint_32 i;

1386

1387

sp = row + (png_size_t)((row_info->width - 1) >> 1);

1388

dp = row + (png_size_t)((final_width - 1) >> 1);

1389

#if defined(PNG_READ_PACKSWAP_SUPPORTED)

1390

if (transformations & PNG_PACKSWAP)

1391

{

1392

sshift = (png_size_t)(((row_info->width + 1) & 1) << 2);

1393

dshift = (png_size_t)(((final_width + 1) & 1) << 2);

1394

s_start = 4;

1395

s_end = 0;

1396

s_inc = -4;

1397

}

1398

else

1399

#endif

1400

{

1401

sshift = (png_size_t)((1 - ((row_info->width + 1) & 1)) << 2);

1402

dshift = (png_size_t)((1 - ((final_width + 1) & 1)) << 2);

1403

s_start = 0;

1404

s_end = 4;

1405

s_inc = 4;

1406

}

1407

1408

for (i = row_info->width; i; i--)

1409

{

1410

png_byte v;

1411

int j;

1412

1413

v = (png_byte)((*sp >> sshift) & 0xf);

1414

for (j = 0; j < png_pass_inc[pass]; j++)

1415

{

1416

*dp &= (png_byte)((0xf0f >> (4 - dshift)) & 0xff);

1417

*dp |= (png_byte)(v << dshift);

1418

if (dshift == s_end)

1419

{

1420

dshift = s_start;

1421

dp--;

1422

}

1423

else

1424

dshift += s_inc;

1425

}

1426

if (sshift == s_end)

1427

{

1428

sshift = s_start;

1429

sp--;

1430

}

1431

else

1432

sshift += s_inc;

1433

}

1434

break;

1435

}

1436

1437

//====================================================================

1438

1439

default: // 8-bit or larger (this is where the routine is modified)

1440

{

1441

// static unsigned long long _const4 = 0x0000000000FFFFFFLL; no good

1442

// static unsigned long long const4 = 0x0000000000FFFFFFLL; no good

1443

// unsigned long long _const4 = 0x0000000000FFFFFFLL; no good

1444

// unsigned long long const4 = 0x0000000000FFFFFFLL; no good

1445

png_bytep sptr, dp;

1446

png_uint_32 i;

1447

png_size_t pixel_bytes;

1448

int width = row_info->width;

1449

1450

pixel_bytes = (row_info->pixel_depth >> 3);

1451

1452

// point sptr at the last pixel in the pre-expanded row:

1453

sptr = row + (width - 1) * pixel_bytes;

1454

1455

// point dp at the last pixel position in the expanded row:

1456

dp = row + (final_width - 1) * pixel_bytes;

1457

1458

// New code by Nirav Chhatrapati - Intel Corporation

1459

1460

if (mmx_supported) // use MMX code if machine supports it

1461

{

1462

//--------------------------------------------------------------

1463

if (pixel_bytes == 3)

1464

{

1465

if (((pass == 0) || (pass == 1)) && width)

1466

{

1467

int dummy_value_c; // fix 'forbidden register spilled'

1468

int dummy_value_S;

1469

int dummy_value_D;

1470

__asm__ __volatile__ (

1471

"subl $21, %%edi \n\t"

1472

// (png_pass_inc[pass] - 1)*pixel_bytes

1473

1474

".loop3_pass0: \n\t"

1475

"movd (%%esi), %%mm0 \n\t" // x x x x x 2 1 0

1476

"pand _const4, %%mm0 \n\t" // z z z z z 2 1 0

1477

"movq %%mm0, %%mm1 \n\t" // z z z z z 2 1 0

1478

"psllq $16, %%mm0 \n\t" // z z z 2 1 0 z z

1479

"movq %%mm0, %%mm2 \n\t" // z z z 2 1 0 z z

1480

"psllq $24, %%mm0 \n\t" // 2 1 0 z z z z z

1481

"psrlq $8, %%mm1 \n\t" // z z z z z z 2 1

1482

"por %%mm2, %%mm0 \n\t" // 2 1 0 2 1 0 z z

1483

"por %%mm1, %%mm0 \n\t" // 2 1 0 2 1 0 2 1

1484

"movq %%mm0, %%mm3 \n\t" // 2 1 0 2 1 0 2 1

1485

"psllq $16, %%mm0 \n\t" // 0 2 1 0 2 1 z z

1486

"movq %%mm3, %%mm4 \n\t" // 2 1 0 2 1 0 2 1

1487

"punpckhdq %%mm0, %%mm3 \n\t" // 0 2 1 0 2 1 0 2

1488

"movq %%mm4, 16(%%edi) \n\t"

1489

"psrlq $32, %%mm0 \n\t" // z z z z 0 2 1 0

1490

"movq %%mm3, 8(%%edi) \n\t"

1491

"punpckldq %%mm4, %%mm0 \n\t" // 1 0 2 1 0 2 1 0

1492

"subl $3, %%esi \n\t"

1493

"movq %%mm0, (%%edi) \n\t"

1494

"subl $24, %%edi \n\t"

1495

"decl %%ecx \n\t"

1496

"jnz .loop3_pass0 \n\t"

1497

"EMMS \n\t" // DONE

1498

1499

: "=c" (dummy_value_c), // output regs (dummy)

1500

"=S" (dummy_value_S),

1501

"=D" (dummy_value_D)

1502

1503

: "1" (sptr), // esi // input regs

1504

"2" (dp), // edi

1505

"0" (width) // ecx

1506

// doesn't work "i" (0x0000000000FFFFFFLL) // %1 (a.k.a. _const4)

1507

1508

// : // clobber list

1509

#if 0 /* %mm0, ..., %mm4 not supported by gcc 2.7.2.3 or egcs 1.1 */

1510

: "%mm0", "%mm1", "%mm2", "%mm3", "%mm4"

1511

#endif

1512

);

1513

}

1514

else if (((pass == 2) || (pass == 3)) && width)

1515

{

1516

int dummy_value_c; // fix 'forbidden register spilled'

1517

int dummy_value_S;

1518

int dummy_value_D;

1519

__asm__ __volatile__ (

1520

"subl $9, %%edi \n\t"

1521

// (png_pass_inc[pass] - 1)*pixel_bytes

1522

1523

".loop3_pass2: \n\t"

1524

"movd (%%esi), %%mm0 \n\t" // x x x x x 2 1 0

1525

"pand _const4, %%mm0 \n\t" // z z z z z 2 1 0

1526

"movq %%mm0, %%mm1 \n\t" // z z z z z 2 1 0

1527

"psllq $16, %%mm0 \n\t" // z z z 2 1 0 z z

1528

"movq %%mm0, %%mm2 \n\t" // z z z 2 1 0 z z

1529

"psllq $24, %%mm0 \n\t" // 2 1 0 z z z z z

1530

"psrlq $8, %%mm1 \n\t" // z z z z z z 2 1

1531

"por %%mm2, %%mm0 \n\t" // 2 1 0 2 1 0 z z

1532

"por %%mm1, %%mm0 \n\t" // 2 1 0 2 1 0 2 1

1533

"movq %%mm0, 4(%%edi) \n\t"

1534

"psrlq $16, %%mm0 \n\t" // z z 2 1 0 2 1 0

1535

"subl $3, %%esi \n\t"

1536

"movd %%mm0, (%%edi) \n\t"

1537

"subl $12, %%edi \n\t"

1538

"decl %%ecx \n\t"

1539

"jnz .loop3_pass2 \n\t"

1540

"EMMS \n\t" // DONE

1541

1542

: "=c" (dummy_value_c), // output regs (dummy)

1543

"=S" (dummy_value_S),

1544

"=D" (dummy_value_D)

1545

1546

: "1" (sptr), // esi // input regs

1547

"2" (dp), // edi

1548

"0" (width) // ecx

1549

1550

// : // clobber list

1551

#if 0 /* %mm0, ..., %mm2 not supported by gcc 2.7.2.3 or egcs 1.1 */

1552

: "%mm0", "%mm1", "%mm2"

1553

#endif

1554

);

1555

}

1556

else if (width) /* && ((pass == 4) || (pass == 5)) */

1557

{

1558

int width_mmx = ((width >> 1) << 1) - 8; // GRR: huh?

1559

if (width_mmx < 0)

1560

width_mmx = 0;

1561

width -= width_mmx; // 8 or 9 pix, 24 or 27 bytes

1562

if (width_mmx)

1563

{

1564

// png_pass_inc[] = {8, 8, 4, 4, 2, 2, 1};

1565

// sptr points at last pixel in pre-expanded row

1566

// dp points at last pixel position in expanded row

1567

int dummy_value_c; // fix 'forbidden register spilled'

1568

int dummy_value_S;

1569

int dummy_value_D;

1570

__asm__ __volatile__ (

1571

"subl $3, %%esi \n\t"

1572

"subl $9, %%edi \n\t"

1573

// (png_pass_inc[pass] + 1)*pixel_bytes

1574

1575

".loop3_pass4: \n\t"

1576

"movq (%%esi), %%mm0 \n\t" // x x 5 4 3 2 1 0

1577

"movq %%mm0, %%mm1 \n\t" // x x 5 4 3 2 1 0

1578

"movq %%mm0, %%mm2 \n\t" // x x 5 4 3 2 1 0

1579

"psllq $24, %%mm0 \n\t" // 4 3 2 1 0 z z z

1580

"pand _const4, %%mm1 \n\t" // z z z z z 2 1 0

1581

"psrlq $24, %%mm2 \n\t" // z z z x x 5 4 3

1582

"por %%mm1, %%mm0 \n\t" // 4 3 2 1 0 2 1 0

1583

"movq %%mm2, %%mm3 \n\t" // z z z x x 5 4 3

1584

"psllq $8, %%mm2 \n\t" // z z x x 5 4 3 z

1585

"movq %%mm0, (%%edi) \n\t"

1586

"psrlq $16, %%mm3 \n\t" // z z z z z x x 5

1587

"pand _const6, %%mm3 \n\t" // z z z z z z z 5

1588

"por %%mm3, %%mm2 \n\t" // z z x x 5 4 3 5

1589

"subl $6, %%esi \n\t"

1590

"movd %%mm2, 8(%%edi) \n\t"

1591

"subl $12, %%edi \n\t"

1592

"subl $2, %%ecx \n\t"

1593

"jnz .loop3_pass4 \n\t"

1594

"EMMS \n\t" // DONE

1595

1596

: "=c" (dummy_value_c), // output regs (dummy)

1597

"=S" (dummy_value_S),

1598

"=D" (dummy_value_D)

1599

1600

: "1" (sptr), // esi // input regs

1601

"2" (dp), // edi

1602

"0" (width_mmx) // ecx

1603

1604

// : // clobber list

1605

#if 0 /* %mm0, ..., %mm3 not supported by gcc 2.7.2.3 or egcs 1.1 */

1606

: "%mm0", "%mm1", "%mm2", "%mm3"

1607

#endif

1608

);

1609

}

1610

1611

sptr -= width_mmx*3;

1612

dp -= width_mmx*6;

1613

for (i = width; i; i--)

1614

{

1615

png_byte v[8];

1616

int j;

1617

1618

png_memcpy(v, sptr, 3);

1619

for (j = 0; j < png_pass_inc[pass]; j++)

1620

{

1621

png_memcpy(dp, v, 3);

1622

dp -= 3;

1623

}

1624

sptr -= 3;

1625

}

1626

}

1627

} /* end of pixel_bytes == 3 */

1628

1629

//--------------------------------------------------------------

1630

else if (pixel_bytes == 1)

1631

{

1632

if (((pass == 0) || (pass == 1)) && width)

1633

{

1634

int width_mmx = ((width >> 2) << 2);

1635

width -= width_mmx; // 0-3 pixels => 0-3 bytes

1636

if (width_mmx)

1637

{

1638

int dummy_value_c; // fix 'forbidden register spilled'

1639

int dummy_value_S;

1640

int dummy_value_D;

1641

__asm__ __volatile__ (

1642

"subl $3, %%esi \n\t"

1643

"subl $31, %%edi \n\t"

1644

1645

".loop1_pass0: \n\t"

1646

"movd (%%esi), %%mm0 \n\t" // x x x x 3 2 1 0

1647

"movq %%mm0, %%mm1 \n\t" // x x x x 3 2 1 0

1648

"punpcklbw %%mm0, %%mm0 \n\t" // 3 3 2 2 1 1 0 0

1649

"movq %%mm0, %%mm2 \n\t" // 3 3 2 2 1 1 0 0

1650

"punpcklwd %%mm0, %%mm0 \n\t" // 1 1 1 1 0 0 0 0

1651

"movq %%mm0, %%mm3 \n\t" // 1 1 1 1 0 0 0 0

1652

"punpckldq %%mm0, %%mm0 \n\t" // 0 0 0 0 0 0 0 0

1653

"punpckhdq %%mm3, %%mm3 \n\t" // 1 1 1 1 1 1 1 1

1654

"movq %%mm0, (%%edi) \n\t"

1655

"punpckhwd %%mm2, %%mm2 \n\t" // 3 3 3 3 2 2 2 2

1656

"movq %%mm3, 8(%%edi) \n\t"

1657

"movq %%mm2, %%mm4 \n\t" // 3 3 3 3 2 2 2 2

1658

"punpckldq %%mm2, %%mm2 \n\t" // 2 2 2 2 2 2 2 2

1659

"punpckhdq %%mm4, %%mm4 \n\t" // 3 3 3 3 3 3 3 3

1660

"movq %%mm2, 16(%%edi) \n\t"

1661

"subl $4, %%esi \n\t"

1662

"movq %%mm4, 24(%%edi) \n\t"

1663

"subl $32, %%edi \n\t"

1664

"subl $4, %%ecx \n\t"

1665

"jnz .loop1_pass0 \n\t"

1666

"EMMS \n\t" // DONE

1667

1668

: "=c" (dummy_value_c), // output regs (dummy)

1669

"=S" (dummy_value_S),

1670

"=D" (dummy_value_D)

1671

1672

: "1" (sptr), // esi // input regs

1673

"2" (dp), // edi

1674

"0" (width_mmx) // ecx

1675

1676

// : // clobber list

1677

#if 0 /* %mm0, ..., %mm4 not supported by gcc 2.7.2.3 or egcs 1.1 */

1678

: "%mm0", "%mm1", "%mm2", "%mm3", "%mm4"

1679

#endif

1680

);

1681

}

1682

1683

sptr -= width_mmx;

1684

dp -= width_mmx*8;

1685

for (i = width; i; i--)

1686

{

1687

int j;

1688

1689

/* I simplified this part in version 1.0.4e

1690

* here and in several other instances where

1691

* pixel_bytes == 1 -- GR-P

1692

1693

* Original code:

1694

1695

* png_byte v[8];

1696

* png_memcpy(v, sptr, pixel_bytes);

1697

* for (j = 0; j < png_pass_inc[pass]; j++)

1698

* {

1699

* png_memcpy(dp, v, pixel_bytes);

1700

* dp -= pixel_bytes;

1701

* }

1702

* sptr -= pixel_bytes;

1703

1704

* Replacement code is in the next three lines:

1705

1706

1707

for (j = 0; j < png_pass_inc[pass]; j++)

1708

*dp-- = *sptr;

1709

--sptr;

1710

}

1711

}

1712

else if (((pass == 2) || (pass == 3)) && width)

1713

{

1714

int width_mmx = ((width >> 2) << 2);

1715

width -= width_mmx; // 0-3 pixels => 0-3 bytes

1716

if (width_mmx)

1717

{

1718

int dummy_value_c; // fix 'forbidden register spilled'

1719

int dummy_value_S;

1720

int dummy_value_D;

1721

__asm__ __volatile__ (

1722

"subl $3, %%esi \n\t"

1723

"subl $15, %%edi \n\t"

1724

1725

".loop1_pass2: \n\t"

1726

"movd (%%esi), %%mm0 \n\t" // x x x x 3 2 1 0

1727

"punpcklbw %%mm0, %%mm0 \n\t" // 3 3 2 2 1 1 0 0

1728

"movq %%mm0, %%mm1 \n\t" // 3 3 2 2 1 1 0 0

1729

"punpcklwd %%mm0, %%mm0 \n\t" // 1 1 1 1 0 0 0 0

1730

"punpckhwd %%mm1, %%mm1 \n\t" // 3 3 3 3 2 2 2 2

1731

"movq %%mm0, (%%edi) \n\t"

1732

"subl $4, %%esi \n\t"

1733

"movq %%mm1, 8(%%edi) \n\t"

1734

"subl $16, %%edi \n\t"

1735

"subl $4, %%ecx \n\t"

1736

"jnz .loop1_pass2 \n\t"

1737

"EMMS \n\t" // DONE

1738

1739

: "=c" (dummy_value_c), // output regs (dummy)

1740

"=S" (dummy_value_S),

1741

"=D" (dummy_value_D)

1742

1743

: "1" (sptr), // esi // input regs

1744

"2" (dp), // edi

1745

"0" (width_mmx) // ecx

1746

1747

// : // clobber list

1748

#if 0 /* %mm0, %mm1 not supported by gcc 2.7.2.3 or egcs 1.1 */

1749

: "%mm0", "%mm1"

1750

#endif

1751

);

1752

}

1753

1754

sptr -= width_mmx;

1755

dp -= width_mmx*4;

1756

for (i = width; i; i--)

1757

{

1758

int j;

1759

1760

for (j = 0; j < png_pass_inc[pass]; j++)

1761

*dp-- = *sptr;

1762

--sptr;

1763

}

1764

}

1765

else if (width) /* && ((pass == 4) || (pass == 5)) */

1766

{

1767

int width_mmx = ((width >> 3) << 3);

1768

width -= width_mmx; // 0-3 pixels => 0-3 bytes

1769

if (width_mmx)

1770

{

1771

int dummy_value_c; // fix 'forbidden register spilled'

1772

int dummy_value_S;

1773

int dummy_value_D;

1774

__asm__ __volatile__ (

1775

"subl $7, %%esi \n\t"

1776

"subl $15, %%edi \n\t"

1777

1778

".loop1_pass4: \n\t"

1779

"movq (%%esi), %%mm0 \n\t" // 7 6 5 4 3 2 1 0

1780

"movq %%mm0, %%mm1 \n\t" // 7 6 5 4 3 2 1 0

1781

"punpcklbw %%mm0, %%mm0 \n\t" // 3 3 2 2 1 1 0 0

1782

"punpckhbw %%mm1, %%mm1 \n\t" // 7 7 6 6 5 5 4 4

1783

"movq %%mm1, 8(%%edi) \n\t"

1784

"subl $8, %%esi \n\t"

1785

"movq %%mm0, (%%edi) \n\t"

1786

"subl $16, %%edi \n\t"

1787

"subl $8, %%ecx \n\t"

1788

"jnz .loop1_pass4 \n\t"

1789

"EMMS \n\t" // DONE

1790

1791

: "=c" (dummy_value_c), // output regs (none)

1792

"=S" (dummy_value_S),

1793

"=D" (dummy_value_D)

1794

1795

: "1" (sptr), // esi // input regs

1796

"2" (dp), // edi

1797

"0" (width_mmx) // ecx

1798

1799

// : // clobber list

1800

#if 0 /* %mm0, %mm1 not supported by gcc 2.7.2.3 or egcs 1.1 */

1801

: "%mm0", "%mm1"

1802

#endif

1803

);

1804

}

1805

1806

sptr -= width_mmx;

1807

dp -= width_mmx*2;

1808

for (i = width; i; i--)

1809

{

1810

int j;

1811

1812

for (j = 0; j < png_pass_inc[pass]; j++)

1813

*dp-- = *sptr;

1814

--sptr;

1815

}

1816

}

1817

} /* end of pixel_bytes == 1 */

1818

1819

//--------------------------------------------------------------

1820

else if (pixel_bytes == 2)

1821

{

1822

if (((pass == 0) || (pass == 1)) && width)

1823

{

1824

int width_mmx = ((width >> 1) << 1);

1825

width -= width_mmx; // 0,1 pixels => 0,2 bytes

1826

if (width_mmx)

1827

{

1828

int dummy_value_c; // fix 'forbidden register spilled'

1829

int dummy_value_S;

1830

int dummy_value_D;

1831

__asm__ __volatile__ (

1832

"subl $2, %%esi \n\t"

1833

"subl $30, %%edi \n\t"

1834

1835

".loop2_pass0: \n\t"

1836

"movd (%%esi), %%mm0 \n\t" // x x x x 3 2 1 0

1837

"punpcklwd %%mm0, %%mm0 \n\t" // 3 2 3 2 1 0 1 0

1838

"movq %%mm0, %%mm1 \n\t" // 3 2 3 2 1 0 1 0

1839

"punpckldq %%mm0, %%mm0 \n\t" // 1 0 1 0 1 0 1 0

1840

"punpckhdq %%mm1, %%mm1 \n\t" // 3 2 3 2 3 2 3 2

1841

"movq %%mm0, (%%edi) \n\t"

1842

"movq %%mm0, 8(%%edi) \n\t"

1843

"movq %%mm1, 16(%%edi) \n\t"

1844

"subl $4, %%esi \n\t"

1845

"movq %%mm1, 24(%%edi) \n\t"

1846

"subl $32, %%edi \n\t"

1847

"subl $2, %%ecx \n\t"

1848

"jnz .loop2_pass0 \n\t"

1849

"EMMS \n\t" // DONE

1850

1851

: "=c" (dummy_value_c), // output regs (dummy)

1852

"=S" (dummy_value_S),

1853

"=D" (dummy_value_D)

1854

1855

: "1" (sptr), // esi // input regs

1856

"2" (dp), // edi

1857

"0" (width_mmx) // ecx

1858

1859

// : // clobber list

1860

#if 0 /* %mm0, %mm1 not supported by gcc 2.7.2.3 or egcs 1.1 */

1861

: "%mm0", "%mm1"

1862

#endif

1863

);

1864

}

1865

1866

sptr -= (width_mmx*2 - 2); // sign fixed

1867

dp -= (width_mmx*16 - 2); // sign fixed

1868

for (i = width; i; i--)

1869

{

1870

png_byte v[8];

1871

int j;

1872

sptr -= 2;

1873

png_memcpy(v, sptr, 2);

1874

for (j = 0; j < png_pass_inc[pass]; j++)

1875

{

1876

dp -= 2;

1877

png_memcpy(dp, v, 2);

1878

}

1879

}

1880

}

1881

else if (((pass == 2) || (pass == 3)) && width)

1882

{

1883

int width_mmx = ((width >> 1) << 1) ;

1884

width -= width_mmx; // 0,1 pixels => 0,2 bytes

1885

if (width_mmx)

1886

{

1887

int dummy_value_c; // fix 'forbidden register spilled'

1888

int dummy_value_S;

1889

int dummy_value_D;

1890

__asm__ __volatile__ (

1891

"subl $2, %%esi \n\t"

1892

"subl $14, %%edi \n\t"

1893

1894

".loop2_pass2: \n\t"

1895

"movd (%%esi), %%mm0 \n\t" // x x x x 3 2 1 0

1896

"punpcklwd %%mm0, %%mm0 \n\t" // 3 2 3 2 1 0 1 0

1897

"movq %%mm0, %%mm1 \n\t" // 3 2 3 2 1 0 1 0

1898

"punpckldq %%mm0, %%mm0 \n\t" // 1 0 1 0 1 0 1 0

1899

"punpckhdq %%mm1, %%mm1 \n\t" // 3 2 3 2 3 2 3 2

1900

"movq %%mm0, (%%edi) \n\t"

1901

"subl $4, %%esi \n\t"

1902

"movq %%mm1, 8(%%edi) \n\t"

1903

"subl $16, %%edi \n\t"

1904

"subl $2, %%ecx \n\t"

1905

"jnz .loop2_pass2 \n\t"

1906

"EMMS \n\t" // DONE

1907

1908

: "=c" (dummy_value_c), // output regs (dummy)

1909

"=S" (dummy_value_S),

1910

"=D" (dummy_value_D)

1911

1912

: "1" (sptr), // esi // input regs

1913

"2" (dp), // edi

1914

"0" (width_mmx) // ecx

1915

1916

// : // clobber list

1917

#if 0 /* %mm0, %mm1 not supported by gcc 2.7.2.3 or egcs 1.1 */

1918

: "%mm0", "%mm1"

1919

#endif

1920

);

1921

}

1922

1923

sptr -= (width_mmx*2 - 2); // sign fixed

1924

dp -= (width_mmx*8 - 2); // sign fixed

1925

for (i = width; i; i--)

1926

{

1927

png_byte v[8];

1928

int j;

1929

sptr -= 2;

1930

png_memcpy(v, sptr, 2);

1931

for (j = 0; j < png_pass_inc[pass]; j++)

1932

{

1933

dp -= 2;

1934

png_memcpy(dp, v, 2);

1935

}

1936

}

1937

}

1938

else if (width) // pass == 4 or 5

1939

{

1940

int width_mmx = ((width >> 1) << 1) ;

1941

width -= width_mmx; // 0,1 pixels => 0,2 bytes

1942

if (width_mmx)

1943

{

1944

int dummy_value_c; // fix 'forbidden register spilled'

1945

int dummy_value_S;

1946

int dummy_value_D;

1947

__asm__ __volatile__ (

1948

"subl $2, %%esi \n\t"

1949

"subl $6, %%edi \n\t"

1950

1951

".loop2_pass4: \n\t"

1952

"movd (%%esi), %%mm0 \n\t" // x x x x 3 2 1 0

1953

"punpcklwd %%mm0, %%mm0 \n\t" // 3 2 3 2 1 0 1 0

1954

"subl $4, %%esi \n\t"

1955

"movq %%mm0, (%%edi) \n\t"

1956

"subl $8, %%edi \n\t"

1957

"subl $2, %%ecx \n\t"

1958

"jnz .loop2_pass4 \n\t"

1959

"EMMS \n\t" // DONE

1960

1961

: "=c" (dummy_value_c), // output regs (dummy)

1962

"=S" (dummy_value_S),

1963

"=D" (dummy_value_D)

1964

1965

: "1" (sptr), // esi // input regs

1966

"2" (dp), // edi

1967

"0" (width_mmx) // ecx

1968

1969

// : // clobber list

1970

#if 0 /* %mm0 not supported by gcc 2.7.2.3 or egcs 1.1 */

1971

: "%mm0"

1972

#endif

1973

);

1974

}

1975

1976

sptr -= (width_mmx*2 - 2); // sign fixed

1977

dp -= (width_mmx*4 - 2); // sign fixed

1978

for (i = width; i; i--)

1979

{

1980

png_byte v[8];

1981

int j;

1982

sptr -= 2;

1983

png_memcpy(v, sptr, 2);

1984

for (j = 0; j < png_pass_inc[pass]; j++)

1985

{

1986

dp -= 2;

1987

png_memcpy(dp, v, 2);

1988

}

1989

}

1990

}

1991

} /* end of pixel_bytes == 2 */

1992

1993

//--------------------------------------------------------------

1994

else if (pixel_bytes == 4)

1995

{

1996

if (((pass == 0) || (pass == 1)) && width)

1997

{

1998

int width_mmx = ((width >> 1) << 1);

1999

width -= width_mmx; // 0,1 pixels => 0,4 bytes

2000

2001

fprintf(stderr, "GRR DEBUG: png_do_read_interlace() pass = %d, width_mmx = %d, width = %d\n", pass, width_mmx, width);

2002

fprintf(stderr, " sptr = 0x%08lx, dp = 0x%08lx\n", (unsigned long)sptr, (unsigned long)dp);

2003

fflush(stderr);

2004

2005

if (width_mmx)

2006

{

2007

int dummy_value_c; // fix 'forbidden register spilled'

2008

int dummy_value_S;

2009

int dummy_value_D;

2010

#ifdef GRR_DEBUG

2011

FILE *junk = fopen("junk.4bytes", "wb");

2012

if (junk)

2013

fclose(junk);

2014

#endif /* GRR_DEBUG */

2015

__asm__ __volatile__ (

2016

"subl $4, %%esi \n\t"

2017

"subl $60, %%edi \n\t"

2018

2019

".loop4_pass0: \n\t"

2020

"movq (%%esi), %%mm0 \n\t" // 7 6 5 4 3 2 1 0

2021

"movq %%mm0, %%mm1 \n\t" // 7 6 5 4 3 2 1 0

2022

"punpckldq %%mm0, %%mm0 \n\t" // 3 2 1 0 3 2 1 0

2023

"punpckhdq %%mm1, %%mm1 \n\t" // 7 6 5 4 7 6 5 4

2024

"movq %%mm0, (%%edi) \n\t"

2025

"movq %%mm0, 8(%%edi) \n\t"

2026

"movq %%mm0, 16(%%edi) \n\t"

2027

"movq %%mm0, 24(%%edi) \n\t"

2028

"movq %%mm1, 32(%%edi) \n\t"

2029

"movq %%mm1, 40(%%edi) \n\t"

2030

"movq %%mm1, 48(%%edi) \n\t"

2031

"subl $8, %%esi \n\t"

2032

"movq %%mm1, 56(%%edi) \n\t"

2033

"subl $64, %%edi \n\t"

2034

"subl $2, %%ecx \n\t"

2035

"jnz .loop4_pass0 \n\t"

2036

"EMMS \n\t" // DONE

2037

2038

: "=c" (dummy_value_c), // output regs (dummy)

2039

"=S" (dummy_value_S),

2040

"=D" (dummy_value_D)

2041

2042

: "1" (sptr), // esi // input regs

2043

"2" (dp), // edi

2044

"0" (width_mmx) // ecx

2045

2046

// : // clobber list

2047

#if 0 /* %mm0, %mm1 not supported by gcc 2.7.2.3 or egcs 1.1 */

2048

: "%mm0", "%mm1"

2049

#endif

2050

);

2051

}

2052

2053

sptr -= (width_mmx*4 - 4); // sign fixed

2054

dp -= (width_mmx*32 - 4); // sign fixed

2055

for (i = width; i; i--)

2056

{

2057

png_byte v[8];

2058

int j;

2059

sptr -= 4;

2060

png_memcpy(v, sptr, 4);

2061

for (j = 0; j < png_pass_inc[pass]; j++)

2062

{

2063

dp -= 4;

2064

png_memcpy(dp, v, 4);

2065

}

2066

}

2067

}

2068

else if (((pass == 2) || (pass == 3)) && width)

2069

{

2070

int width_mmx = ((width >> 1) << 1);

2071

width -= width_mmx; // 0,1 pixels => 0,4 bytes

2072

if (width_mmx)

2073

{

2074

int dummy_value_c; // fix 'forbidden register spilled'

2075

int dummy_value_S;

2076

int dummy_value_D;

2077

__asm__ __volatile__ (

2078

"subl $4, %%esi \n\t"

2079

"subl $28, %%edi \n\t"

2080

2081

".loop4_pass2: \n\t"

2082

"movq (%%esi), %%mm0 \n\t" // 7 6 5 4 3 2 1 0

2083

"movq %%mm0, %%mm1 \n\t" // 7 6 5 4 3 2 1 0

2084

"punpckldq %%mm0, %%mm0 \n\t" // 3 2 1 0 3 2 1 0

2085

"punpckhdq %%mm1, %%mm1 \n\t" // 7 6 5 4 7 6 5 4

2086

"movq %%mm0, (%%edi) \n\t"

2087

"movq %%mm0, 8(%%edi) \n\t"

2088

"movq %%mm1, 16(%%edi) \n\t"

2089

"movq %%mm1, 24(%%edi) \n\t"

2090

"subl $8, %%esi \n\t"

2091

"subl $32, %%edi \n\t"

2092

"subl $2, %%ecx \n\t"

2093

"jnz .loop4_pass2 \n\t"

2094

"EMMS \n\t" // DONE

2095

2096

: "=c" (dummy_value_c), // output regs (dummy)

2097

"=S" (dummy_value_S),

2098

"=D" (dummy_value_D)

2099

2100

: "1" (sptr), // esi // input regs

2101

"2" (dp), // edi

2102

"0" (width_mmx) // ecx

2103

2104

// : // clobber list

2105

#if 0 /* %mm0, %mm1 not supported by gcc 2.7.2.3 or egcs 1.1 */

2106

: "%mm0", "%mm1"

2107

#endif

2108

);

2109

}

2110

2111

sptr -= (width_mmx*4 - 4); // sign fixed

2112

dp -= (width_mmx*16 - 4); // sign fixed

2113

for (i = width; i; i--)

2114

{

2115

png_byte v[8];

2116

int j;

2117

sptr -= 4;

2118

png_memcpy(v, sptr, 4);

2119

for (j = 0; j < png_pass_inc[pass]; j++)

2120

{

2121

dp -= 4;

2122

png_memcpy(dp, v, 4);

2123

}

2124

}

2125

}

2126

else if (width) // pass == 4 or 5

2127

{

2128

int width_mmx = ((width >> 1) << 1) ;

2129

width -= width_mmx; // 0,1 pixels => 0,4 bytes

2130

if (width_mmx)

2131

{

2132

int dummy_value_c; // fix 'forbidden register spilled'

2133

int dummy_value_S;

2134

int dummy_value_D;

2135

__asm__ __volatile__ (

2136

"subl $4, %%esi \n\t"

2137

"subl $12, %%edi \n\t"

2138

2139

".loop4_pass4: \n\t"

2140

"movq (%%esi), %%mm0 \n\t" // 7 6 5 4 3 2 1 0

2141

"movq %%mm0, %%mm1 \n\t" // 7 6 5 4 3 2 1 0

2142

"punpckldq %%mm0, %%mm0 \n\t" // 3 2 1 0 3 2 1 0

2143

"punpckhdq %%mm1, %%mm1 \n\t" // 7 6 5 4 7 6 5 4

2144

"movq %%mm0, (%%edi) \n\t"

2145

"subl $8, %%esi \n\t"

2146

"movq %%mm1, 8(%%edi) \n\t"

2147

"subl $16, %%edi \n\t"

2148

"subl $2, %%ecx \n\t"

2149

"jnz .loop4_pass4 \n\t"

2150

"EMMS \n\t" // DONE

2151

2152

: "=c" (dummy_value_c), // output regs (dummy)

2153

"=S" (dummy_value_S),

2154

"=D" (dummy_value_D)

2155

2156

: "1" (sptr), // esi // input regs

2157

"2" (dp), // edi

2158

"0" (width_mmx) // ecx

2159

2160

// : // clobber list

2161

#if 0 /* %mm0, %mm1 not supported by gcc 2.7.2.3 or egcs 1.1 */

2162

: "%mm0", "%mm1"

2163

#endif

2164

);

2165

}

2166

2167

sptr -= (width_mmx*4 - 4); // sign fixed

2168

dp -= (width_mmx*8 - 4); // sign fixed

2169

for (i = width; i; i--)

2170

{

2171

png_byte v[8];

2172

int j;

2173

sptr -= 4;

2174

png_memcpy(v, sptr, 4);

2175

for (j = 0; j < png_pass_inc[pass]; j++)

2176

{

2177

dp -= 4;

2178

png_memcpy(dp, v, 4);

2179

}

2180

}

2181

}

2182

} /* end of pixel_bytes == 4 */

2183

2184

#define STILL_WORKING_ON_THIS

2185

#ifdef STILL_WORKING_ON_THIS // GRR: should work, but needs testing

2186

// (special 64-bit version of rpng2)

2187

2188

//--------------------------------------------------------------

2189

else if (pixel_bytes == 8)

2190

{

2191

// GRR NOTE: no need to combine passes here!

2192

if (((pass == 0) || (pass == 1)) && width)

2193

{

2194

// source is 8-byte RRGGBBAA

2195

// dest is 64-byte RRGGBBAA RRGGBBAA RRGGBBAA RRGGBBAA ...

2196

int dummy_value_c; // fix 'forbidden register spilled'

2197

int dummy_value_S;

2198

int dummy_value_D;

2199

#ifdef GRR_DEBUG

2200

FILE *junk = fopen("junk.8bytes", "wb");

2201

if (junk)

2202

fclose(junk);

2203

#endif /* GRR_DEBUG */

2204

__asm__ __volatile__ (

2205

"subl $56, %%edi \n\t" // start of last block

2206

2207

".loop8_pass0: \n\t"

2208

"movq (%%esi), %%mm0 \n\t" // 7 6 5 4 3 2 1 0

2209

"movq %%mm0, (%%edi) \n\t"

2210

"movq %%mm0, 8(%%edi) \n\t"

2211

"movq %%mm0, 16(%%edi) \n\t"

2212

"movq %%mm0, 24(%%edi) \n\t"

2213

"movq %%mm0, 32(%%edi) \n\t"

2214

"movq %%mm0, 40(%%edi) \n\t"

2215

"movq %%mm0, 48(%%edi) \n\t"

2216

"subl $8, %%esi \n\t"

2217

"movq %%mm0, 56(%%edi) \n\t"

2218

"subl $64, %%edi \n\t"

2219

"decl %%ecx \n\t"

2220

"jnz .loop8_pass0 \n\t"

2221

"EMMS \n\t" // DONE

2222

2223

: "=c" (dummy_value_c), // output regs (dummy)

2224

"=S" (dummy_value_S),

2225

"=D" (dummy_value_D)

2226

2227

: "1" (sptr), // esi // input regs

2228

"2" (dp), // edi

2229

"0" (width) // ecx

2230

2231

// : // clobber list

2232

#if 0 /* %mm0 not supported by gcc 2.7.2.3 or egcs 1.1 */

2233

: "%mm0"

2234

#endif

2235

);

2236

}

2237

else if (((pass == 2) || (pass == 3)) && width)

2238

{

2239

// source is 8-byte RRGGBBAA

2240

// dest is 32-byte RRGGBBAA RRGGBBAA RRGGBBAA RRGGBBAA

2241

int width_mmx = ((width >> 1) << 1) ;

2242

width -= width_mmx;

2243

if (width_mmx)

2244

{

2245

int dummy_value_c; // fix 'forbidden register spilled'

2246

int dummy_value_S;

2247

int dummy_value_D;

2248

__asm__ __volatile__ (

2249

"subl $24, %%edi \n\t" // start of last block

2250

2251

".loop8_pass2: \n\t"

2252

"movq (%%esi), %%mm0 \n\t" // 7 6 5 4 3 2 1 0

2253

"movq %%mm0, (%%edi) \n\t"

2254

"movq %%mm0, 8(%%edi) \n\t"

2255

"movq %%mm0, 16(%%edi) \n\t"

2256

"subl $8, %%esi \n\t"

2257

"movq %%mm0, 24(%%edi) \n\t"

2258

"subl $32, %%edi \n\t"

2259

"decl %%ecx \n\t"

2260

"jnz .loop8_pass2 \n\t"

2261

"EMMS \n\t" // DONE

2262

2263

: "=c" (dummy_value_c), // output regs (dummy)

2264

"=S" (dummy_value_S),

2265

"=D" (dummy_value_D)

2266

2267

: "1" (sptr), // esi // input regs

2268

"2" (dp), // edi

2269

"0" (width) // ecx

2270

2271

// : // clobber list

2272

#if 0 /* %mm0 not supported by gcc 2.7.2.3 or egcs 1.1 */

2273

: "%mm0"

2274

#endif

2275

);

2276

}

2277

}

2278

else if (width) // pass == 4 or 5

2279

{

2280

// source is 8-byte RRGGBBAA

2281

// dest is 16-byte RRGGBBAA RRGGBBAA

2282

int width_mmx = ((width >> 1) << 1) ;

2283

width -= width_mmx;

2284

if (width_mmx)

2285

{

2286

int dummy_value_c; // fix 'forbidden register spilled'

2287

int dummy_value_S;

2288

int dummy_value_D;

2289

__asm__ __volatile__ (

2290

"subl $8, %%edi \n\t" // start of last block

2291

2292

".loop8_pass4: \n\t"

2293

"movq (%%esi), %%mm0 \n\t" // 7 6 5 4 3 2 1 0

2294

"movq %%mm0, (%%edi) \n\t"

2295

"subl $8, %%esi \n\t"

2296

"movq %%mm0, 8(%%edi) \n\t"

2297

"subl $16, %%edi \n\t"

2298

"decl %%ecx \n\t"

2299

"jnz .loop8_pass4 \n\t"

2300

"EMMS \n\t" // DONE

2301

2302

: "=c" (dummy_value_c), // output regs (dummy)

2303

"=S" (dummy_value_S),

2304

"=D" (dummy_value_D)

2305

2306

: "1" (sptr), // esi // input regs

2307

"2" (dp), // edi

2308

"0" (width) // ecx

2309

2310

// : // clobber list

2311

#if 0 /* %mm0 not supported by gcc 2.7.2.3 or egcs 1.1 */

2312

: "%mm0"

2313

#endif

2314

);

2315

}

2316

}

2317

2318

} /* end of pixel_bytes == 8 */

2319

2320

#endif /* STILL_WORKING_ON_THIS */

2321

2322

//--------------------------------------------------------------

2323

else if (pixel_bytes == 6)

2324

{

2325

for (i = width; i; i--)

2326

{

2327

png_byte v[8];

2328

int j;

2329

png_memcpy(v, sptr, 6);

2330

for (j = 0; j < png_pass_inc[pass]; j++)

2331

{

2332

png_memcpy(dp, v, 6);

2333

dp -= 6;

2334

}

2335

sptr -= 6;

2336

}

2337

} /* end of pixel_bytes == 6 */

2338

2339

//--------------------------------------------------------------

2340

else

2341

{

2342

for (i = width; i; i--)

2343

{

2344

png_byte v[8];

2345

int j;

2346

png_memcpy(v, sptr, pixel_bytes);

2347

for (j = 0; j < png_pass_inc[pass]; j++)

2348

{

2349

png_memcpy(dp, v, pixel_bytes);

2350

dp -= pixel_bytes;

2351

}

2352

sptr-= pixel_bytes;

2353

}

2354

}

2355

} // end of mmx_supported =========================================

2356

2357

else /* MMX not supported: use modified C code - takes advantage

2358

* of inlining of memcpy for a constant */

2359

/* GRR 19991007: does it? or should pixel_bytes in each

2360

* block be replaced with immediate value (e.g., 1)? */

2361

/* GRR 19991017: replaced with constants in each case */

2362

{

2363

if (pixel_bytes == 1)

2364

{

2365

for (i = width; i; i--)

2366

{

2367

int j;

2368

for (j = 0; j < png_pass_inc[pass]; j++)

2369

*dp-- = *sptr;

2370

--sptr;

2371

}

2372

}

2373

else if (pixel_bytes == 3)

2374

{

2375

for (i = width; i; i--)

2376

{

2377

png_byte v[8];

2378

int j;

2379

png_memcpy(v, sptr, 3);

2380

for (j = 0; j < png_pass_inc[pass]; j++)

2381

{

2382

png_memcpy(dp, v, 3);

2383

dp -= 3;

2384

}

2385

sptr -= 3;

2386

}

2387

}

2388

else if (pixel_bytes == 2)

2389

{

2390

for (i = width; i; i--)

2391

{

2392

png_byte v[8];

2393

int j;

2394

png_memcpy(v, sptr, 2);

2395

for (j = 0; j < png_pass_inc[pass]; j++)

2396

{

2397

png_memcpy(dp, v, 2);

2398

dp -= 2;

2399

}

2400

sptr -= 2;

2401

}

2402

}

2403

else if (pixel_bytes == 4)

2404

{

2405

for (i = width; i; i--)

2406

{

2407

png_byte v[8];

2408

int j;

2409

png_memcpy(v, sptr, 4);

2410

for (j = 0; j < png_pass_inc[pass]; j++)

2411

{

2412

png_memcpy(dp, v, 4);

2413

dp -= 4;

2414

}

2415

sptr -= 4;

2416

}

2417

}

2418

else if (pixel_bytes == 6)

2419

{

2420

for (i = width; i; i--)

2421

{

2422

png_byte v[8];

2423

int j;

2424

png_memcpy(v, sptr, 6);

2425

for (j = 0; j < png_pass_inc[pass]; j++)

2426

{

2427

png_memcpy(dp, v, 6);

2428

dp -= 6;

2429

}

2430

sptr -= 6;

2431

}

2432

}

2433

else if (pixel_bytes == 8)

2434

{

2435

for (i = width; i; i--)

2436

{

2437

png_byte v[8];

2438

int j;

2439

png_memcpy(v, sptr, 8);

2440

for (j = 0; j < png_pass_inc[pass]; j++)

2441

{

2442

png_memcpy(dp, v, 8);

2443

dp -= 8;

2444

}

2445

sptr -= 8;

2446

}

2447

}

2448

else // GRR: should never be reached

2449

{

2450

for (i = width; i; i--)

2451

{

2452

png_byte v[8];

2453

int j;

2454

png_memcpy(v, sptr, pixel_bytes);

2455

for (j = 0; j < png_pass_inc[pass]; j++)

2456

{

2457

png_memcpy(dp, v, pixel_bytes);

2458

dp -= pixel_bytes;

2459

}

2460

sptr -= pixel_bytes;

2461

}

2462

}

2463

2464

} /* end if (MMX not supported) */

2465

break;

2466

}

2467

} /* end switch (row_info->pixel_depth) */

2468

2469

row_info->width = final_width;

2470

row_info->rowbytes = ((final_width *

2471

(png_uint_32)row_info->pixel_depth + 7) >> 3);

2472

}

2473

2474

} /* end png_do_read_interlace() */

2475

2476

#endif /* PNG_HAVE_ASSEMBLER_READ_INTERLACE */

2477

#endif /* PNG_READ_INTERLACING_SUPPORTED */

2478

2479

2480

// These variables are utilized in the functions below. They are declared

2481

// globally here to ensure alignment on 8-byte boundaries.

2482

2483

union uAll {

2484

long long use;

2485

double align;

2486

} LBCarryMask = {0x0101010101010101LL},

2487

HBClearMask = {0x7f7f7f7f7f7f7f7fLL},

2488

ActiveMask, ActiveMask2, ActiveMaskEnd, ShiftBpp, ShiftRem;

2489

2490

2491

// Optimized code for PNG Average filter decoder

2492

void /* PRIVATE */

2493

png_read_filter_row_mmx_avg(png_row_infop row_info, png_bytep row,

2494

png_bytep prev_row)

2495

{

2496

int bpp;

2497

int dummy_value_c; // fix 'forbidden register 2 (cx) was spilled' error

2498

int dummy_value_S;

2499

int dummy_value_D;

2500

// int diff; GRR: global now (shortened to dif/_dif)

2501

2502

bpp = (row_info->pixel_depth + 7) >> 3; // Get # bytes per pixel

2503

_FullLength = row_info->rowbytes; // # of bytes to filter

2504

__asm__ __volatile__ (

2505

// Init address pointers and offset

2506

//GRR "movl row, %%edi \n\t" // edi ==> Avg(x)

2507

"xorl %%ebx, %%ebx \n\t" // ebx ==> x

2508

"movl %%edi, %%edx \n\t"

2509

//GRR "movl prev_row, %%esi \n\t" // esi ==> Prior(x)

2510

//GRR "subl bpp, %%edx \n\t" // (bpp is preloaded into ecx)

2511

"subl %%ecx, %%edx \n\t" // edx ==> Raw(x-bpp)

2512

2513

"xorl %%eax,%%eax \n\t"

2514

2515

// Compute the Raw value for the first bpp bytes

2516

// Raw(x) = Avg(x) + (Prior(x)/2)

2517

"avg_rlp: \n\t"

2518

"movb (%%esi,%%ebx,),%%al \n\t" // Load al with Prior(x)

2519

"incl %%ebx \n\t"

2520

"shrb %%al \n\t" // divide by 2

2521

"addb -1(%%edi,%%ebx,),%%al \n\t" // add Avg(x); -1 to offset inc ebx

2522

//GRR "cmpl bpp, %%ebx \n\t" // (bpp is preloaded into ecx)

2523

"cmpl %%ecx, %%ebx \n\t"

2524

"movb %%al,-1(%%edi,%%ebx,) \n\t" // write Raw(x); -1 to offset inc ebx

2525

"jb avg_rlp \n\t" // mov does not affect flags

2526

2527

// get # of bytes to alignment

2528

"movl %%edi, _dif \n\t" // take start of row

2529

"addl %%ebx, _dif \n\t" // add bpp

2530

"addl $0xf, _dif \n\t" // add 7+8 to incr past alignment bdry

2531

"andl $0xfffffff8, _dif \n\t" // mask to alignment boundary

2532

"subl %%edi, _dif \n\t" // subtract from start => value ebx at alignment

2533

"jz avg_go \n\t"

2534

2535

// fix alignment

2536

// Compute the Raw value for the bytes up to the alignment boundary

2537

// Raw(x) = Avg(x) + ((Raw(x-bpp) + Prior(x))/2)

2538

"xorl %%ecx, %%ecx \n\t"

2539

"avg_lp1: \n\t"

2540

"xorl %%eax, %%eax \n\t"

2541

"movb (%%esi,%%ebx,), %%cl \n\t" // load cl with Prior(x)

2542

"movb (%%edx,%%ebx,), %%al \n\t" // load al with Raw(x-bpp)

2543

"addw %%cx, %%ax \n\t"

2544

"incl %%ebx \n\t"

2545

"shrw %%ax \n\t" // divide by 2

2546

"addb -1(%%edi,%%ebx,), %%al \n\t" // add Avg(x); -1 to offset inc ebx

2547

"cmpl _dif, %%ebx \n\t" // check if at alignment boundary

2548

"movb %%al, -1(%%edi,%%ebx,) \n\t" // write Raw(x); -1 to offset inc ebx

2549

"jb avg_lp1 \n\t" // repeat until at alignment boundary

2550

2551

"avg_go: \n\t"

2552

"movl _FullLength, %%eax \n\t"

2553

"movl %%eax, %%ecx \n\t"

2554

"subl %%ebx, %%eax \n\t" // subtract alignment fix

2555

"andl $0x00000007, %%eax \n\t" // calc bytes over mult of 8

2556

"subl %%eax, %%ecx \n\t" // drop over bytes from original length

2557

"movl %%ecx, _MMXLength \n\t"

2558

2559

: "=c" (dummy_value_c), // output regs/vars here, e.g., "=m" (_MMXLength) instead of final instr

2560

"=S" (dummy_value_S),

2561

"=D" (dummy_value_D)

2562

2563

: "1" (prev_row), // esi // input regs

2564

"2" (row), // edi

2565

"0" (bpp) // ecx

2566

2567

: "%eax", "%ebx", // clobber list

2568

"%edx"

2569

// GRR: INCLUDE "memory" as clobbered? (_dif, _MMXLength) PROBABLY

2570

);

2571

2572

#ifdef GRR_GCC_MMX_CONVERTED

2573

// Now do the math for the rest of the row

2574

switch ( bpp )

2575

{

2576

case 3:

2577

{

2578

ActiveMask.use = 0x0000000000ffffff;

2579

ShiftBpp.use = 24; // == 3 * 8

2580

ShiftRem.use = 40; // == 64 - 24

2581

__asm__ (

2582

// Re-init address pointers and offset

2583

"movq $ActiveMask, %%mm7 \n\t"

2584

"movl _dif, %%ebx \n\t" // ebx ==> x = offset to alignment boundary

2585

"movq $LBCarryMask, %%mm5 \n\t"

2586

"movl row, %%edi \n\t" // edi ==> Avg(x)

2587

"movq $HBClearMask, %%mm4 \n\t"

2588

"movl prev_row, %%esi \n\t" // esi ==> Prior(x)

2589

// PRIME the pump (load the first Raw(x-bpp) data set)

2590

"movq -8(%%edi,%%ebx,), %%mm2 \n\t" // Load previous aligned 8 bytes

2591

// (we correct position in loop below)

2592

"avg_3lp: \n\t"

2593

"movq (%%edi,%%ebx,), %%mm0 \n\t" // Load mm0 with Avg(x)

2594

// Add (Prev_row/2) to Average

2595

"movq %%mm5, %%mm3 \n\t"

2596

"psrlq $ShiftRem, %%mm2 \n\t" // Correct position Raw(x-bpp) data

2597

"movq (%%esi,%%ebx,), %%mm1 \n\t" // Load mm1 with Prior(x)

2598

"movq %%mm7, %%mm6 \n\t"

2599

"pand %%mm1, %%mm3 \n\t" // get lsb for each prev_row byte

2600

"psrlq $1, %%mm1 \n\t" // divide prev_row bytes by 2

2601

"pand %%mm4, %%mm1 \n\t" // clear invalid bit 7 of each byte

2602

"paddb %%mm1, %%mm0 \n\t" // add (Prev_row/2) to Avg for each byte

2603

// Add 1st active group (Raw(x-bpp)/2) to Average with LBCarry

2604

"movq %%mm3, %%mm1 \n\t" // now use mm1 for getting LBCarrys

2605

"pand %%mm2, %%mm1 \n\t" // get LBCarrys for each byte where both

2606

// lsb's were == 1 (Only valid for active group)

2607

"psrlq $1, %%mm2 \n\t" // divide raw bytes by 2

2608

"pand %%mm4, %%mm2 \n\t" // clear invalid bit 7 of each byte

2609

"paddb %%mm1, %%mm2 \n\t" // add LBCarrys to (Raw(x-bpp)/2) for each byte

2610

"pand %%mm6, %%mm2 \n\t" // Leave only Active Group 1 bytes to add to Avg

2611

"paddb %%mm2, %%mm0 \n\t" // add (Raw/2) + LBCarrys to Avg for each Active

2612

// byte

2613

// Add 2nd active group (Raw(x-bpp)/2) to Average with LBCarry

2614

"psllq $ShiftBpp, %%mm6 \n\t" // shift the mm6 mask to cover bytes 3-5

2615

"movq %%mm0, %%mm2 \n\t" // mov updated Raws to mm2

2616

"psllq $ShiftBpp, %%mm2 \n\t" // shift data to position correctly

2617

"movq %%mm3, %%mm1 \n\t" // now use mm1 for getting LBCarrys

2618

"pand %%mm2, %%mm1 \n\t" // get LBCarrys for each byte where both

2619

// lsb's were == 1 (Only valid for active group)

2620

"psrlq $1, %%mm2 \n\t" // divide raw bytes by 2

2621

"pand %%mm4, %%mm2 \n\t" // clear invalid bit 7 of each byte

2622

"paddb %%mm1, %%mm2 \n\t" // add LBCarrys to (Raw(x-bpp)/2) for each byte

2623

"pand %%mm6, %%mm2 \n\t" // Leave only Active Group 2 bytes to add to Avg

2624

"paddb %%mm2, %%mm0 \n\t" // add (Raw/2) + LBCarrys to Avg for each Active

2625

// byte

2626

2627

// Add 3rd active group (Raw(x-bpp)/2) to Average with LBCarry

2628

"psllq $ShiftBpp, %%mm6 \n\t" // shift the mm6 mask to cover the last two

2629

// bytes

2630

"movq %%mm0, %%mm2 \n\t" // mov updated Raws to mm2

2631

"psllq $ShiftBpp, %%mm2 \n\t" // shift data to position correctly

2632

// Data only needs to be shifted once here to

2633

// get the correct x-bpp offset.

2634

"movq %%mm3, %%mm1 \n\t" // now use mm1 for getting LBCarrys

2635

"pand %%mm2, %%mm1 \n\t" // get LBCarrys for each byte where both

2636

// lsb's were == 1 (Only valid for active group)

2637

"psrlq $1, %%mm2 \n\t" // divide raw bytes by 2

2638

"pand %%mm4, %%mm2 \n\t" // clear invalid bit 7 of each byte

2639

"paddb %%mm1, %%mm2 \n\t" // add LBCarrys to (Raw(x-bpp)/2) for each byte

2640

"pand %%mm6, %%mm2 \n\t" // Leave only Active Group 2 bytes to add to Avg

2641

"addl $8, %%ebx \n\t"

2642

"paddb %%mm2, %%mm0 \n\t" // add (Raw/2) + LBCarrys to Avg for each Active

2643

// byte

2644

// Now ready to write back to memory

2645

"movq %%mm0, -8(%%edi,%%ebx,) \n\t"

2646

// Move updated Raw(x) to use as Raw(x-bpp) for next loop

2647

"cmpl _MMXLength, %%ebx \n\t"

2648

"movq %%mm0, %%mm2 \n\t" // mov updated Raw(x) to mm2

2649

"jb avg_3lp \n\t"

2650

2651

: // output regs/vars go here, e.g.: "=m" (memory_var)

2652

2653

: "S" (prev_row), // esi // input regs

2654

"D" (row) // edi

2655

2656

: "%ebx", "%edi", "%esi" // clobber list

2657

// GRR: INCLUDE "memory" as clobbered? (_dif, _MMXLength) PROBABLY

2658

// , "%mm0", "%mm1", "%mm2", "%mm3",

2659

// "%mm4", "%mm5", "%mm6", "%mm7"

2660

);

2661

}

2662

break; // end 3 bpp

2663

2664

case 6:

2665

case 4:

2666

//case 7: // who wrote this? PNG doesn't support 5 or 7 bytes/pixel

2667

//case 5:

2668

{

2669

ActiveMask.use = 0xffffffffffffffff; // use shift below to clear

2670

// appropriate inactive bytes

2671

ShiftBpp.use = bpp << 3;

2672

ShiftRem.use = 64 - ShiftBpp.use;

2673

__asm__ (

2674

"movq $HBClearMask, %%mm4 \n\t"

2675

2676

// Re-init address pointers and offset

2677

"movl _dif, %%ebx \n\t" // ebx ==> x = offset to alignment boundary

2678

2679

// Load ActiveMask and clear all bytes except for 1st active group

2680

"movq $ActiveMask, %%mm7 \n\t"

2681

"movl row, %%edi \n\t" // edi ==> Avg(x)

2682

"psrlq $ShiftRem, %%mm7 \n\t"

2683

"movl prev_row, %%esi \n\t" // esi ==> Prior(x)

2684

"movq %%mm7, %%mm6 \n\t"

2685

"movq $LBCarryMask, %%mm5 \n\t"

2686

"psllq $ShiftBpp, %%mm6 \n\t" // Create mask for 2nd active group

2687

2688

// PRIME the pump (load the first Raw(x-bpp) data set

2689

"movq -8(%%edi,%%ebx,), %%mm2 \n\t" // Load previous aligned 8 bytes

2690

// (we correct position in loop below)

2691

"avg_4lp: \n\t"

2692

"movq (%%edi,%%ebx,), %%mm0 \n\t"

2693

"psrlq $ShiftRem, %%mm2 \n\t" // shift data to position correctly

2694

"movq (%%esi,%%ebx,), %%mm1 \n\t"

2695

// Add (Prev_row/2) to Average

2696

"movq %%mm5, %%mm3 \n\t"

2697

"pand %%mm1, %%mm3 \n\t" // get lsb for each prev_row byte

2698

"psrlq $1, %%mm1 \n\t" // divide prev_row bytes by 2

2699

"pand %%mm4, %%mm1 \n\t" // clear invalid bit 7 of each byte

2700

"paddb %%mm1, %%mm0 \n\t" // add (Prev_row/2) to Avg for each byte

2701

// Add 1st active group (Raw(x-bpp)/2) to Average with LBCarry

2702

"movq %%mm3, %%mm1 \n\t" // now use mm1 for getting LBCarrys

2703

"pand %%mm2, %%mm1 \n\t" // get LBCarrys for each byte where both

2704

// lsb's were == 1 (Only valid for active group)

2705

"psrlq $1, %%mm2 \n\t" // divide raw bytes by 2

2706

"pand %%mm4, %%mm2 \n\t" // clear invalid bit 7 of each byte

2707

"paddb %%mm1, %%mm2 \n\t" // add LBCarrys to (Raw(x-bpp)/2) for each byte

2708

"pand %%mm7, %%mm2 \n\t" // Leave only Active Group 1 bytes to add to Avg

2709

"paddb %%mm2, %%mm0 \n\t" // add (Raw/2) + LBCarrys to Avg for each Active

2710

// byte

2711

// Add 2nd active group (Raw(x-bpp)/2) to Average with LBCarry

2712

"movq %%mm0, %%mm2 \n\t" // mov updated Raws to mm2

2713

"psllq $ShiftBpp, %%mm2 \n\t" // shift data to position correctly

2714

"addl $8, %%ebx \n\t"

2715

"movq %%mm3, %%mm1 \n\t" // now use mm1 for getting LBCarrys

2716

"pand %%mm2, %%mm1 \n\t" // get LBCarrys for each byte where both

2717

// lsb's were == 1 (Only valid for active group)

2718

"psrlq $1, %%mm2 \n\t" // divide raw bytes by 2

2719

"pand %%mm4, %%mm2 \n\t" // clear invalid bit 7 of each byte

2720

"paddb %%mm1, %%mm2 \n\t" // add LBCarrys to (Raw(x-bpp)/2) for each byte

2721

"pand %%mm6, %%mm2 \n\t" // Leave only Active Group 2 bytes to add to Avg

2722

"paddb %%mm2, %%mm0 \n\t" // add (Raw/2) + LBCarrys to Avg for each Active

2723

// byte

2724

"cmpl _MMXLength, %%ebx \n\t"

2725

// Now ready to write back to memory

2726

"movq %%mm0, -8(%%edi,%%ebx,) \n\t"

2727

// Prep Raw(x-bpp) for next loop

2728

"movq %%mm0, %%mm2 \n\t" // mov updated Raws to mm2

2729

"jb avg_4lp \n\t"

2730

2731

: // FIXASM: output regs/vars go here, e.g.: "=m" (memory_var)

2732

2733

: // FIXASM: input regs, e.g.: "c" (count), "S" (src), "D" (dest)

2734

2735

: "%ebx", "%edi", "%esi", "%mm0", "%mm1", "%mm2", "%mm3", "%mm4", "%mm5", "%mm6", "%mm7" // CHECKASM: clobber list

2736

);

2737

}

2738

break; // end 4,6 bpp

2739

2740

case 2:

2741

{

2742

ActiveMask.use = 0x000000000000ffff;

2743

ShiftBpp.use = 24; // == 3 * 8

2744

ShiftRem.use = 40; // == 64 - 24

2745

__asm__ (

2746

// Load ActiveMask

2747

"movq $ActiveMask, %%mm7 \n\t"

2748

// Re-init address pointers and offset

2749

"movl _dif, %%ebx \n\t" // ebx ==> x = offset to alignment boundary

2750

"movq $LBCarryMask, %%mm5 \n\t"

2751

"movl row, %%edi \n\t" // edi ==> Avg(x)

2752

"movq $HBClearMask, %%mm4 \n\t"

2753

"movl prev_row, %%esi \n\t" // esi ==> Prior(x)

2754

// PRIME the pump (load the first Raw(x-bpp) data set

2755

"movq -8(%%edi,%%ebx,), %%mm2 \n\t" // Load previous aligned 8 bytes

2756

// (we correct position in loop below)

2757

"avg_2lp: \n\t"

2758

"movq (%%edi,%%ebx,), %%mm0 \n\t"

2759

"psllq $ShiftRem, %%mm2 \n\t" // shift data to position correctly

2760

"movq (%%esi,%%ebx,), %%mm1 \n\t"

2761

// Add (Prev_row/2) to Average

2762

"movq %%mm5, %%mm3 \n\t"

2763

"pand %%mm1, %%mm3 \n\t" // get lsb for each prev_row byte

2764

"psrlq $1, %%mm1 \n\t" // divide prev_row bytes by 2

2765

"pand %%mm4, %%mm1 \n\t" // clear invalid bit 7 of each byte

2766

"movq %%mm7, %%mm6 \n\t"

2767

"paddb %%mm1, %%mm0 \n\t" // add (Prev_row/2) to Avg for each byte

2768

// Add 1st active group (Raw(x-bpp)/2) to Average with LBCarry

2769

"movq %%mm3, %%mm1 \n\t" // now use mm1 for getting LBCarrys

2770

"pand %%mm2, %%mm1 \n\t" // get LBCarrys for each byte where both

2771

// lsb's were == 1 (Only valid for active group)

2772

"psrlq $1, %%mm2 \n\t" // divide raw bytes by 2

2773

"pand %%mm4, %%mm2 \n\t" // clear invalid bit 7 of each byte

2774

"paddb %%mm1, %%mm2 \n\t" // add LBCarrys to (Raw(x-bpp)/2) for each byte

2775

"pand %%mm6, %%mm2 \n\t" // Leave only Active Group 1 bytes to add to Avg

2776

"paddb %%mm2, %%mm0 \n\t" // add (Raw/2) + LBCarrys to Avg for each Active byte

2777

// Add 2nd active group (Raw(x-bpp)/2) to Average with LBCarry

2778

"psllq $ShiftBpp, %%mm6 \n\t" // shift the mm6 mask to cover bytes 2 & 3

2779

"movq %%mm0, %%mm2 \n\t" // mov updated Raws to mm2

2780

"psllq $ShiftBpp, %%mm2 \n\t" // shift data to position correctly

2781

"movq %%mm3, %%mm1 \n\t" // now use mm1 for getting LBCarrys

2782

"pand %%mm2, %%mm1 \n\t" // get LBCarrys for each byte where both

2783

// lsb's were == 1 (Only valid for active group)

2784

"psrlq $1, %%mm2 \n\t" // divide raw bytes by 2

2785

"pand %%mm4, %%mm2 \n\t" // clear invalid bit 7 of each byte

2786

"paddb %%mm1, %%mm2 \n\t" // add LBCarrys to (Raw(x-bpp)/2) for each byte

2787

"pand %%mm6, %%mm2 \n\t" // Leave only Active Group 2 bytes to add to Avg

2788

"paddb %%mm2, %%mm0 \n\t" // add (Raw/2) + LBCarrys to Avg for each Active byte

2789

2790

// Add rdd active group (Raw(x-bpp)/2) to Average with LBCarry

2791

"psllq $ShiftBpp, %%mm6 \n\t" // shift the mm6 mask to cover bytes 4 & 5

2792

"movq %%mm0, %%mm2 \n\t" // mov updated Raws to mm2

2793

"psllq $ShiftBpp, %%mm2 \n\t" // shift data to position correctly

2794

// Data only needs to be shifted once here to

2795

// get the correct x-bpp offset.

2796

"movq %%mm3, %%mm1 \n\t" // now use mm1 for getting LBCarrys

2797

"pand %%mm2, %%mm1 \n\t" // get LBCarrys for each byte where both

2798

// lsb's were == 1 (Only valid for active group)

2799

"psrlq $1, %%mm2 \n\t" // divide raw bytes by 2

2800

"pand %%mm4, %%mm2 \n\t" // clear invalid bit 7 of each byte

2801

"paddb %%mm1, %%mm2 \n\t" // add LBCarrys to (Raw(x-bpp)/2) for each byte

2802

"pand %%mm6, %%mm2 \n\t" // Leave only Active Group 2 bytes to add to Avg

2803

"paddb %%mm2, %%mm0 \n\t" // add (Raw/2) + LBCarrys to Avg for each Active byte

2804

2805

// Add 4th active group (Raw(x-bpp)/2) to Average with LBCarry

2806

"psllq $ShiftBpp, %%mm6 \n\t" // shift the mm6 mask to cover bytes 6 & 7

2807

"movq %%mm0, %%mm2 \n\t" // mov updated Raws to mm2

2808

"psllq $ShiftBpp, %%mm2 \n\t" // shift data to position correctly

2809

// Data only needs to be shifted once here to

2810

// get the correct x-bpp offset.

2811

"addl $8, %%ebx \n\t"

2812

"movq %%mm3, %%mm1 \n\t" // now use mm1 for getting LBCarrys

2813

"pand %%mm2, %%mm1 \n\t" // get LBCarrys for each byte where both

2814

// lsb's were == 1 (Only valid for active group)

2815

"psrlq $1, %%mm2 \n\t" // divide raw bytes by 2

2816

"pand %%mm4, %%mm2 \n\t" // clear invalid bit 7 of each byte

2817

"paddb %%mm1, %%mm2 \n\t" // add LBCarrys to (Raw(x-bpp)/2) for each byte

2818

"pand %%mm6, %%mm2 \n\t" // Leave only Active Group 2 bytes to add to Avg

2819

"paddb %%mm2, %%mm0 \n\t" // add (Raw/2) + LBCarrys to Avg for each Active byte

2820

2821

"cmpl _MMXLength, %%ebx \n\t"

2822

// Now ready to write back to memory

2823

"movq %%mm0, -8(%%edi,%%ebx,) \n\t"

2824

// Prep Raw(x-bpp) for next loop

2825

"movq %%mm0, %%mm2 \n\t" // mov updated Raws to mm2

2826

"jb avg_2lp \n\t"

2827

2828

: // FIXASM: output regs/vars go here, e.g.: "=m" (memory_var)

2829

2830

: // FIXASM: input regs, e.g.: "c" (count), "S" (src), "D" (dest)

2831

2832

: "%ebx", "%edi", "%esi", "%mm0", "%mm1", "%mm2", "%mm3", "%mm4", "%mm5", "%mm6", "%mm7" // CHECKASM: clobber list

2833

);

2834

}

2835

break; // end 2 bpp

2836

2837

case 1:

2838

{

2839

__asm__ (

2840

// Re-init address pointers and offset

2841

"movl _dif, %%ebx \n\t" // ebx ==> x = offset to alignment boundary

2842

"movl row, %%edi \n\t" // edi ==> Avg(x)

2843

"cmpl _FullLength, %%ebx \n\t" // Test if offset at end of array

2844

"jnb avg_1end \n\t"

2845

// Do Paeth decode for remaining bytes

2846

"movl prev_row, %%esi \n\t" // esi ==> Prior(x)

2847

"movl %%edi, %%edx \n\t"

2848

"xorl %%ecx, %%ecx \n\t" // zero ecx before using cl & cx in loop below

2849

"subl bpp, %%edx \n\t" // edx ==> Raw(x-bpp)

2850

"avg_1lp: \n\t"

2851

// Raw(x) = Avg(x) + ((Raw(x-bpp) + Prior(x))/2)

2852

"xorl %%eax, %%eax \n\t"

2853

"movb (%%esi,%%ebx,), %%cl \n\t" // load cl with Prior(x)

2854

"movb (%%edx,%%ebx,), %%al \n\t" // load al with Raw(x-bpp)

2855

"addw %%cx, %%ax \n\t"

2856

"incl %%ebx \n\t"

2857

"shrw %%ax \n\t" // divide by 2

2858

"addb -1(%%edi,%%ebx,), %%al \n\t" // Add Avg(x); -1 to offset inc ebx

2859

"cmpl _FullLength, %%ebx \n\t" // Check if at end of array

2860

"movb %%al, -1(%%edi,%%ebx,) \n\t" // Write back Raw(x);

2861

// mov does not affect flags; -1 to offset inc ebx

2862

"jb avg_1lp \n\t"

2863

"avg_1end: \n\t"

2864

2865

: // FIXASM: output regs/vars go here, e.g.: "=m" (memory_var)

2866

2867

: // FIXASM: input regs, e.g.: "c" (count), "S" (src), "D" (dest)

2868

2869

: "%eax", "%ebx", "%ecx", "%edx", "%edi", "%esi" // CHECKASM: clobber list

2870

);

2871

}

2872

return; // end 1 bpp

2873

2874

case 8:

2875

{

2876

__asm__ (

2877

// Re-init address pointers and offset

2878

"movl _dif, %%ebx \n\t" // ebx ==> x = offset to alignment boundary

2879

"movq $LBCarryMask, %%mm5 \n\t"

2880

"movl row, %%edi \n\t" // edi ==> Avg(x)

2881

"movq $HBClearMask, %%mm4 \n\t"

2882

"movl prev_row, %%esi \n\t" // esi ==> Prior(x)

2883

// PRIME the pump (load the first Raw(x-bpp) data set

2884

"movq -8(%%edi,%%ebx,), %%mm2 \n\t" // Load previous aligned 8 bytes

2885

// (NO NEED to correct position in loop below)

2886

"avg_8lp: \n\t"

2887

"movq (%%edi,%%ebx,), %%mm0 \n\t"

2888

"movq %%mm5, %%mm3 \n\t"

2889

"movq (%%esi,%%ebx,), %%mm1 \n\t"

2890

"addl $8, %%ebx \n\t"

2891

"pand %%mm1, %%mm3 \n\t" // get lsb for each prev_row byte

2892

"psrlq $1, %%mm1 \n\t" // divide prev_row bytes by 2

2893

"pand %%mm2, %%mm3 \n\t" // get LBCarrys for each byte where both

2894

// lsb's were == 1

2895

"psrlq $1, %%mm2 \n\t" // divide raw bytes by 2

2896

"pand %%mm4, %%mm1 \n\t" // clear invalid bit 7 of each byte

2897

"paddb %%mm3, %%mm0 \n\t" // add LBCarrys to Avg for each byte

2898

"pand %%mm4, %%mm2 \n\t" // clear invalid bit 7 of each byte

2899

"paddb %%mm1, %%mm0 \n\t" // add (Prev_row/2) to Avg for each byte

2900

"paddb %%mm2, %%mm0 \n\t" // add (Raw/2) to Avg for each byte

2901

"cmpl _MMXLength, %%ebx \n\t"

2902

"movq %%mm0, -8(%%edi,%%ebx,) \n\t"

2903

"movq %%mm0, %%mm2 \n\t" // reuse as Raw(x-bpp)

2904

"jb avg_8lp \n\t"

2905

2906

: // FIXASM: output regs/vars go here, e.g.: "=m" (memory_var)

2907

2908

: // FIXASM: input regs, e.g.: "c" (count), "S" (src), "D" (dest)

2909

2910

: "%ebx", "%edi", "%esi", "%mm0", "%mm1", "%mm2", "%mm3", "%mm4", "%mm5" // CHECKASM: clobber list

2911

);

2912

}

2913

break; // end 8 bpp

2914

2915

default: // bpp greater than 8 (!= 1,2,3,4,6,8)

2916

{

2917

2918

GRR: PRINT ERROR HERE: SHOULD NEVER BE REACHED (unless smaller than 1?)

2919

2920

__asm__ (

2921

"movq $LBCarryMask, %%mm5 \n\t"

2922

// Re-init address pointers and offset

2923

"movl _dif, %%ebx \n\t" // ebx ==> x = offset to alignment boundary

2924

"movl row, %%edi \n\t" // edi ==> Avg(x)

2925

"movq $HBClearMask, %%mm4 \n\t"

2926

"movl %%edi, %%edx \n\t"

2927

"movl prev_row, %%esi \n\t" // esi ==> Prior(x)

2928

"subl bpp, %%edx \n\t" // edx ==> Raw(x-bpp)

2929

"avg_Alp: \n\t"

2930

"movq (%%edi,%%ebx,), %%mm0 \n\t"

2931

"movq %%mm5, %%mm3 \n\t"

2932

"movq (%%esi,%%ebx,), %%mm1 \n\t"

2933

"pand %%mm1, %%mm3 \n\t" // get lsb for each prev_row byte

2934

"movq (%%edx,%%ebx,), %%mm2 \n\t"

2935

"psrlq $1, %%mm1 \n\t" // divide prev_row bytes by 2

2936

"pand %%mm2, %%mm3 \n\t" // get LBCarrys for each byte where both

2937

// lsb's were == 1

2938

"psrlq $1, %%mm2 \n\t" // divide raw bytes by 2

2939

"pand %%mm4, %%mm1 \n\t" // clear invalid bit 7 of each byte

2940

"paddb %%mm3, %%mm0 \n\t" // add LBCarrys to Avg for each byte

2941

"pand %%mm4, %%mm2 \n\t" // clear invalid bit 7 of each byte

2942

"paddb %%mm1, %%mm0 \n\t" // add (Prev_row/2) to Avg for each byte

2943

"addl $8, %%ebx \n\t"

2944

"paddb %%mm2, %%mm0 \n\t" // add (Raw/2) to Avg for each byte

2945

"cmpl _MMXLength, %%ebx \n\t"

2946

"movq %%mm0, -8(%%edi,%%ebx,) \n\t"

2947

"jb avg_Alp \n\t"

2948

2949

: // FIXASM: output regs/vars go here, e.g.: "=m" (memory_var)

2950

2951

: // FIXASM: input regs, e.g.: "c" (count), "S" (src), "D" (dest)

2952

2953

: "%ebx", "%edx", "%edi", "%esi", "%mm0", "%mm1", "%mm2", "%mm3", "%mm4", "%mm5" // CHECKASM: clobber list

2954

);

2955

}

2956

break;

2957

} // end switch ( bpp )

2958

2959

__asm__ (

2960

// MMX acceleration complete now do clean-up

2961

// Check if any remaining bytes left to decode

2962

"movl _MMXLength, %%ebx \n\t" // ebx ==> x = offset bytes remaining after MMX

2963

"movl row, %%edi \n\t" // edi ==> Avg(x)

2964

"cmpl _FullLength, %%ebx \n\t" // Test if offset at end of array

2965

"jnb avg_end \n\t"

2966

// Do Paeth decode for remaining bytes

2967

"movl prev_row, %%esi \n\t" // esi ==> Prior(x)

2968

"movl %%edi, %%edx \n\t"

2969

"xorl %%ecx, %%ecx \n\t" // zero ecx before using cl & cx in loop below

2970

"subl bpp, %%edx \n\t" // edx ==> Raw(x-bpp)

2971

"avg_lp2: \n\t"

2972

// Raw(x) = Avg(x) + ((Raw(x-bpp) + Prior(x))/2)

2973

"xorl %%eax, %%eax \n\t"

2974

"movb (%%esi,%%ebx,), %%cl \n\t" // load cl with Prior(x)

2975

"movb (%%edx,%%ebx,), %%al \n\t" // load al with Raw(x-bpp)

2976

"addw %%cx, %%ax \n\t"

2977

"incl %%ebx \n\t"

2978

"shrw %%ax \n\t" // divide by 2

2979

"addb -1(%%edi,%%ebx,), %%al \n\t" // Add Avg(x); -1 to offset inc ebx

2980

"cmpl _FullLength, %%ebx \n\t" // Check if at end of array

2981

"movb %%al, -1(%%edi,%%ebx,) \n\t" // Write back Raw(x);

2982

// mov does not affect flags; -1 to offset inc ebx

2983

"jb avg_lp2 \n\t"

2984

"avg_end: \n\t"

2985

"emms \n\t" // End MMX instructions; prep for possible FP instrs.

2986

2987

: // FIXASM: output regs/vars go here, e.g.: "=m" (memory_var)

2988

2989

: // FIXASM: input regs, e.g.: "c" (count), "S" (src), "D" (dest)

2990

2991

: "%eax", "%ebx", "%ecx", "%edx", "%edi", "%esi" // CHECKASM: clobber list

2992

);

2993

#endif /* GRR_GCC_MMX_CONVERTED */

2994

}

2995

2996

// Optimized code for PNG Paeth filter decoder

2997

void /* PRIVATE */

2998

png_read_filter_row_mmx_paeth(png_row_infop row_info, png_bytep row,

2999

png_bytep prev_row)

3000

{

3001

#ifdef GRR_GCC_MMX_CONVERTED

3002

int bpp;

3003

int patemp, pbtemp, pctemp;

3004

3005

bpp = (row_info->pixel_depth + 7) >> 3; // Get # bytes per pixel

3006

_FullLength = row_info->rowbytes; // # of bytes to filter

3007

__asm__ (

3008

"xorl %%ebx, %%ebx \n\t" // ebx ==> x offset

3009

"movl row, %%edi \n\t"

3010

"xorl %%edx, %%edx \n\t" // edx ==> x-bpp offset

3011

"movl prev_row, %%esi \n\t"

3012

"xorl %%eax, %%eax \n\t"

3013

3014

// Compute the Raw value for the first bpp bytes

3015

// Note: the formula works out to be always

3016

// Paeth(x) = Raw(x) + Prior(x) where x < bpp

3017

"paeth_rlp: \n\t"

3018

"movb (%%edi,%%ebx,), %%al \n\t"

3019

"addb (%%esi,%%ebx,), %%al \n\t"

3020

"incl %%ebx \n\t"

3021

"cmpl bpp, %%ebx \n\t"

3022

"movb %%al, -1(%%edi,%%ebx,) \n\t"

3023

"jb paeth_rlp \n\t"

3024

// get # of bytes to alignment

3025

"movl %%edi, _dif \n\t" // take start of row

3026

"addl %%ebx, _dif \n\t" // add bpp

3027

"xorl %%ecx, %%ecx \n\t"

3028

"addl $0xf, _dif \n\t" // add 7 + 8 to incr past alignment boundary

3029

"andl $0xfffffff8, _dif \n\t" // mask to alignment boundary

3030

"subl %%edi, _dif \n\t" // subtract from start ==> value ebx at alignment

3031

"jz paeth_go \n\t"

3032

// fix alignment

3033

"paeth_lp1: \n\t"

3034

"xorl %%eax, %%eax \n\t"

3035

// pav = p - a = (a + b - c) - a = b - c

3036

"movb (%%esi,%%ebx,), %%al \n\t" // load Prior(x) into al

3037

"movb (%%esi,%%edx,), %%cl \n\t" // load Prior(x-bpp) into cl

3038

"subl %%ecx, %%eax \n\t" // subtract Prior(x-bpp)

3039

"movl %%eax, patemp \n\t" // Save pav for later use

3040

"xorl %%eax, %%eax \n\t"

3041

// pbv = p - b = (a + b - c) - b = a - c

3042

"movb (%%edi,%%edx,), %%al \n\t" // load Raw(x-bpp) into al

3043

"subl %%ecx, %%eax \n\t" // subtract Prior(x-bpp)

3044

"movl %%eax, %%ecx \n\t"

3045

// pcv = p - c = (a + b - c) -c = (a - c) + (b - c) = pav + pbv

3046

"addl patemp, %%eax \n\t" // pcv = pav + pbv

3047

// pc = abs(pcv)

3048

"testl $0x80000000, %%eax \n\t"

3049

"jz paeth_pca \n\t"

3050

"negl %%eax \n\t" // reverse sign of neg values

3051

"paeth_pca: \n\t"

3052

"movl %%eax, pctemp \n\t" // save pc for later use

3053

// pb = abs(pbv)

3054

"testl $0x80000000, %%ecx \n\t"

3055

"jz paeth_pba \n\t"

3056

"negl %%ecx \n\t" // reverse sign of neg values

3057

"paeth_pba: \n\t"

3058

"movl %%ecx, pbtemp \n\t" // save pb for later use

3059

// pa = abs(pav)

3060

"movl patemp, %%eax \n\t"

3061

"testl $0x80000000, %%eax \n\t"

3062

"jz paeth_paa \n\t"

3063

"negl %%eax \n\t" // reverse sign of neg values

3064

"paeth_paa: \n\t"

3065

"movl %%eax, patemp \n\t" // save pa for later use

3066

// test if pa <= pb

3067

"cmpl %%ecx, %%eax \n\t"

3068

"jna paeth_abb \n\t"

3069

// pa > pb; now test if pb <= pc

3070

"cmpl pctemp, %%ecx \n\t"

3071

"jna paeth_bbc \n\t"

3072

// pb > pc; Raw(x) = Paeth(x) + Prior(x-bpp)

3073

"movb (%%esi,%%edx,), %%cl \n\t" // load Prior(x-bpp) into cl

3074

"jmp paeth_paeth \n\t"

3075

"paeth_bbc: \n\t"

3076

// pb <= pc; Raw(x) = Paeth(x) + Prior(x)

3077

"movb (%%esi,%%ebx,), %%cl \n\t" // load Prior(x) into cl

3078

"jmp paeth_paeth \n\t"

3079

"paeth_abb: \n\t"

3080

// pa <= pb; now test if pa <= pc

3081

"cmpl pctemp, %%eax \n\t"

3082

"jna paeth_abc \n\t"

3083

// pa > pc; Raw(x) = Paeth(x) + Prior(x-bpp)

3084

"movb (%%esi,%%edx,), %%cl \n\t" // load Prior(x-bpp) into cl

3085

"jmp paeth_paeth \n\t"

3086

"paeth_abc: \n\t"

3087

// pa <= pc; Raw(x) = Paeth(x) + Raw(x-bpp)

3088

"movb (%%edi,%%edx,), %%cl \n\t" // load Raw(x-bpp) into cl

3089

"paeth_paeth: \n\t"

3090

"incl %%ebx \n\t"

3091

"incl %%edx \n\t"

3092

// Raw(x) = (Paeth(x) + Paeth_Predictor( a, b, c )) mod 256

3093

"addb %%cl, -1(%%edi,%%ebx,) \n\t"

3094

"cmpl _dif, %%ebx \n\t"

3095

"jb paeth_lp1 \n\t"

3096

"paeth_go: \n\t"

3097

"movl _FullLength, %%ecx \n\t"

3098

"movl %%ecx, %%eax \n\t"

3099

"subl %%ebx, %%eax \n\t" // subtract alignment fix

3100

"andl $0x00000007, %%eax \n\t" // calc bytes over mult of 8

3101

"subl %%eax, %%ecx \n\t" // drop over bytes from original length

3102

"movl %%ecx, _MMXLength \n\t"

3103

3104

: // FIXASM: output regs/vars go here, e.g.: "=m" (memory_var)

3105

3106

: // FIXASM: input regs, e.g.: "c" (count), "S" (src), "D" (dest)

3107

3108

: "%eax", "%ebx", "%ecx", "%edx", "%edi", "%esi" // CHECKASM: clobber list

3109

);

3110

3111

// Now do the math for the rest of the row

3112

switch ( bpp )

3113

{

3114

case 3:

3115

{

3116

ActiveMask.use = 0x0000000000ffffff;

3117

ActiveMaskEnd.use = 0xffff000000000000;

3118

ShiftBpp.use = 24; // == bpp(3) * 8

3119

ShiftRem.use = 40; // == 64 - 24

3120

__asm__ (

3121

"movl _dif, %%ebx \n\t"

3122

"movl row, %%edi \n\t"

3123

"movl prev_row, %%esi \n\t"

3124

"pxor %%mm0, %%mm0 \n\t"

3125

// PRIME the pump (load the first Raw(x-bpp) data set

3126

"movq -8(%%edi,%%ebx,), %%mm1 \n\t"

3127

"paeth_3lp: \n\t"

3128

"psrlq $ShiftRem, %%mm1 \n\t" // shift last 3 bytes to 1st 3 bytes

3129

"movq (%%esi,%%ebx,), %%mm2 \n\t" // load b=Prior(x)

3130

"punpcklbw %%mm0, %%mm1 \n\t" // Unpack High bytes of a

3131

"movq -8(%%esi,%%ebx,), %%mm3 \n\t" // Prep c=Prior(x-bpp) bytes

3132

"punpcklbw %%mm0, %%mm2 \n\t" // Unpack High bytes of b

3133

"psrlq $ShiftRem, %%mm3 \n\t" // shift last 3 bytes to 1st 3 bytes

3134

// pav = p - a = (a + b - c) - a = b - c

3135

"movq %%mm2, %%mm4 \n\t"

3136

"punpcklbw %%mm0, %%mm3 \n\t" // Unpack High bytes of c

3137

// pbv = p - b = (a + b - c) - b = a - c

3138

"movq %%mm1, %%mm5 \n\t"

3139

"psubw %%mm3, %%mm4 \n\t"

3140

"pxor %%mm7, %%mm7 \n\t"

3141

// pcv = p - c = (a + b - c) -c = (a - c) + (b - c) = pav + pbv

3142

"movq %%mm4, %%mm6 \n\t"

3143

"psubw %%mm3, %%mm5 \n\t"

3144

3145

// pa = abs(p-a) = abs(pav)

3146

// pb = abs(p-b) = abs(pbv)

3147

// pc = abs(p-c) = abs(pcv)

3148

"pcmpgtw %%mm4, %%mm0 \n\t" // Create mask pav bytes < 0

3149

"paddw %%mm5, %%mm6 \n\t"

3150

"pand %%mm4, %%mm0 \n\t" // Only pav bytes < 0 in mm7

3151

"pcmpgtw %%mm5, %%mm7 \n\t" // Create mask pbv bytes < 0

3152

"psubw %%mm0, %%mm4 \n\t"

3153

"pand %%mm5, %%mm7 \n\t" // Only pbv bytes < 0 in mm0

3154

"psubw %%mm0, %%mm4 \n\t"

3155

"psubw %%mm7, %%mm5 \n\t"

3156

"pxor %%mm0, %%mm0 \n\t"

3157

"pcmpgtw %%mm6, %%mm0 \n\t" // Create mask pcv bytes < 0

3158

"pand %%mm6, %%mm0 \n\t" // Only pav bytes < 0 in mm7

3159

"psubw %%mm7, %%mm5 \n\t"

3160

"psubw %%mm0, %%mm6 \n\t"

3161

// test pa <= pb

3162

"movq %%mm4, %%mm7 \n\t"

3163

"psubw %%mm0, %%mm6 \n\t"

3164

"pcmpgtw %%mm5, %%mm7 \n\t" // pa > pb?

3165

"movq %%mm7, %%mm0 \n\t"

3166

// use mm7 mask to merge pa & pb

3167

"pand %%mm7, %%mm5 \n\t"

3168

// use mm0 mask copy to merge a & b

3169

"pand %%mm0, %%mm2 \n\t"

3170

"pandn %%mm4, %%mm7 \n\t"

3171

"pandn %%mm1, %%mm0 \n\t"

3172

"paddw %%mm5, %%mm7 \n\t"

3173

"paddw %%mm2, %%mm0 \n\t"

3174

// test ((pa <= pb)? pa:pb) <= pc

3175

"pcmpgtw %%mm6, %%mm7 \n\t" // pab > pc?

3176

"pxor %%mm1, %%mm1 \n\t"

3177

"pand %%mm7, %%mm3 \n\t"

3178

"pandn %%mm0, %%mm7 \n\t"

3179

"paddw %%mm3, %%mm7 \n\t"

3180

"pxor %%mm0, %%mm0 \n\t"

3181

"packuswb %%mm1, %%mm7 \n\t"

3182

"movq (%%esi,%%ebx,), %%mm3 \n\t" // load c=Prior(x-bpp)

3183

"pand $ActiveMask, %%mm7 \n\t"

3184

"movq %%mm3, %%mm2 \n\t" // load b=Prior(x) step 1

3185

"paddb (%%edi,%%ebx,), %%mm7 \n\t" // add Paeth predictor with Raw(x)

3186

"punpcklbw %%mm0, %%mm3 \n\t" // Unpack High bytes of c

3187

"movq %%mm7, (%%edi,%%ebx,) \n\t" // write back updated value

3188

"movq %%mm7, %%mm1 \n\t" // Now mm1 will be used as Raw(x-bpp)

3189

// Now do Paeth for 2nd set of bytes (3-5)

3190

"psrlq $ShiftBpp, %%mm2 \n\t" // load b=Prior(x) step 2

3191

"punpcklbw %%mm0, %%mm1 \n\t" // Unpack High bytes of a

3192

"pxor %%mm7, %%mm7 \n\t"

3193

"punpcklbw %%mm0, %%mm2 \n\t" // Unpack High bytes of b

3194

// pbv = p - b = (a + b - c) - b = a - c

3195

"movq %%mm1, %%mm5 \n\t"

3196

// pav = p - a = (a + b - c) - a = b - c

3197

"movq %%mm2, %%mm4 \n\t"

3198

"psubw %%mm3, %%mm5 \n\t"

3199

"psubw %%mm3, %%mm4 \n\t"

3200

// pcv = p - c = (a + b - c) -c = (a - c) + (b - c) =

3201

// pav + pbv = pbv + pav

3202

"movq %%mm5, %%mm6 \n\t"

3203

"paddw %%mm4, %%mm6 \n\t"

3204

3205

// pa = abs(p-a) = abs(pav)

3206

// pb = abs(p-b) = abs(pbv)

3207

// pc = abs(p-c) = abs(pcv)

3208

"pcmpgtw %%mm5, %%mm0 \n\t" // Create mask pbv bytes < 0

3209

"pcmpgtw %%mm4, %%mm7 \n\t" // Create mask pav bytes < 0

3210

"pand %%mm5, %%mm0 \n\t" // Only pbv bytes < 0 in mm0

3211

"pand %%mm4, %%mm7 \n\t" // Only pav bytes < 0 in mm7

3212

"psubw %%mm0, %%mm5 \n\t"

3213

"psubw %%mm7, %%mm4 \n\t"

3214

"psubw %%mm0, %%mm5 \n\t"

3215

"psubw %%mm7, %%mm4 \n\t"

3216

"pxor %%mm0, %%mm0 \n\t"

3217

"pcmpgtw %%mm6, %%mm0 \n\t" // Create mask pcv bytes < 0

3218

"pand %%mm6, %%mm0 \n\t" // Only pav bytes < 0 in mm7

3219

"psubw %%mm0, %%mm6 \n\t"

3220

// test pa <= pb

3221

"movq %%mm4, %%mm7 \n\t"

3222

"psubw %%mm0, %%mm6 \n\t"

3223

"pcmpgtw %%mm5, %%mm7 \n\t" // pa > pb?

3224

"movq %%mm7, %%mm0 \n\t"

3225

// use mm7 mask to merge pa & pb

3226

"pand %%mm7, %%mm5 \n\t"

3227

// use mm0 mask copy to merge a & b

3228

"pand %%mm0, %%mm2 \n\t"

3229

"pandn %%mm4, %%mm7 \n\t"

3230

"pandn %%mm1, %%mm0 \n\t"

3231

"paddw %%mm5, %%mm7 \n\t"

3232

"paddw %%mm2, %%mm0 \n\t"

3233

// test ((pa <= pb)? pa:pb) <= pc

3234

"pcmpgtw %%mm6, %%mm7 \n\t" // pab > pc?

3235

"movq (%%esi,%%ebx,), %%mm2 \n\t" // load b=Prior(x)

3236

"pand %%mm7, %%mm3 \n\t"

3237

"pandn %%mm0, %%mm7 \n\t"

3238

"pxor %%mm1, %%mm1 \n\t"

3239

"paddw %%mm3, %%mm7 \n\t"

3240

"pxor %%mm0, %%mm0 \n\t"

3241

"packuswb %%mm1, %%mm7 \n\t"

3242

"movq %%mm2, %%mm3 \n\t" // load c=Prior(x-bpp) step 1

3243

"pand $ActiveMask, %%mm7 \n\t"

3244

"punpckhbw %%mm0, %%mm2 \n\t" // Unpack High bytes of b

3245

"psllq $ShiftBpp, %%mm7 \n\t" // Shift bytes to 2nd group of 3 bytes

3246

// pav = p - a = (a + b - c) - a = b - c

3247

"movq %%mm2, %%mm4 \n\t"

3248

"paddb (%%edi,%%ebx,), %%mm7 \n\t" // add Paeth predictor with Raw(x)

3249

"psllq $ShiftBpp, %%mm3 \n\t" // load c=Prior(x-bpp) step 2

3250

"movq %%mm7, (%%edi,%%ebx,) \n\t" // write back updated value

3251

"movq %%mm7, %%mm1 \n\t"

3252

"punpckhbw %%mm0, %%mm3 \n\t" // Unpack High bytes of c

3253

"psllq $ShiftBpp, %%mm1 \n\t" // Shift bytes

3254

// Now mm1 will be used as Raw(x-bpp)

3255

// Now do Paeth for 3rd, and final, set of bytes (6-7)

3256

"pxor %%mm7, %%mm7 \n\t"

3257

"punpckhbw %%mm0, %%mm1 \n\t" // Unpack High bytes of a

3258

"psubw %%mm3, %%mm4 \n\t"

3259

// pbv = p - b = (a + b - c) - b = a - c

3260

"movq %%mm1, %%mm5 \n\t"

3261

// pcv = p - c = (a + b - c) -c = (a - c) + (b - c) = pav + pbv

3262

"movq %%mm4, %%mm6 \n\t"

3263

"psubw %%mm3, %%mm5 \n\t"

3264

"pxor %%mm0, %%mm0 \n\t"

3265

"paddw %%mm5, %%mm6 \n\t"

3266

3267

// pa = abs(p-a) = abs(pav)

3268

// pb = abs(p-b) = abs(pbv)

3269

// pc = abs(p-c) = abs(pcv)

3270

"pcmpgtw %%mm4, %%mm0 \n\t" // Create mask pav bytes < 0

3271

"pcmpgtw %%mm5, %%mm7 \n\t" // Create mask pbv bytes < 0

3272

"pand %%mm4, %%mm0 \n\t" // Only pav bytes < 0 in mm7

3273

"pand %%mm5, %%mm7 \n\t" // Only pbv bytes < 0 in mm0

3274

"psubw %%mm0, %%mm4 \n\t"

3275

"psubw %%mm7, %%mm5 \n\t"

3276

"psubw %%mm0, %%mm4 \n\t"

3277

"psubw %%mm7, %%mm5 \n\t"

3278

"pxor %%mm0, %%mm0 \n\t"

3279

"pcmpgtw %%mm6, %%mm0 \n\t" // Create mask pcv bytes < 0

3280

"pand %%mm6, %%mm0 \n\t" // Only pav bytes < 0 in mm7

3281

"psubw %%mm0, %%mm6 \n\t"

3282

// test pa <= pb

3283

"movq %%mm4, %%mm7 \n\t"

3284

"psubw %%mm0, %%mm6 \n\t"

3285

"pcmpgtw %%mm5, %%mm7 \n\t" // pa > pb?

3286

"movq %%mm7, %%mm0 \n\t"

3287

// use mm0 mask copy to merge a & b

3288

"pand %%mm0, %%mm2 \n\t"

3289

// use mm7 mask to merge pa & pb

3290

"pand %%mm7, %%mm5 \n\t"

3291

"pandn %%mm1, %%mm0 \n\t"

3292

"pandn %%mm4, %%mm7 \n\t"

3293

"paddw %%mm2, %%mm0 \n\t"

3294

"paddw %%mm5, %%mm7 \n\t"

3295

// test ((pa <= pb)? pa:pb) <= pc

3296

"pcmpgtw %%mm6, %%mm7 \n\t" // pab > pc?

3297

"pand %%mm7, %%mm3 \n\t"

3298

"pandn %%mm0, %%mm7 \n\t"

3299

"paddw %%mm3, %%mm7 \n\t"

3300

"pxor %%mm1, %%mm1 \n\t"

3301

"packuswb %%mm7, %%mm1 \n\t"

3302

// Step ebx to next set of 8 bytes and repeat loop til done

3303

"addl $8, %%ebx \n\t"

3304

"pand $ActiveMaskEnd, %%mm1 \n\t"

3305

"paddb -8(%%edi,%%ebx,), %%mm1 \n\t" // add Paeth predictor with Raw(x)

3306

3307

"cmpl _MMXLength, %%ebx \n\t"

3308

"pxor %%mm0, %%mm0 \n\t" // pxor does not affect flags

3309

"movq %%mm1, -8(%%edi,%%ebx,) \n\t" // write back updated value

3310

// mm1 will be used as Raw(x-bpp) next loop

3311

// mm3 ready to be used as Prior(x-bpp) next loop

3312

"jb paeth_3lp \n\t"

3313

3314

: // FIXASM: output regs/vars go here, e.g.: "=m" (memory_var)

3315

3316

: // FIXASM: input regs, e.g.: "c" (count), "S" (src), "D" (dest)

3317

3318

: "%ebx", "%edi", "%esi", "%mm0", "%mm1", "%mm2", "%mm3", "%mm4", "%mm5", "%mm6", "%mm7" // CHECKASM: clobber list

3319

);

3320

}

3321

break;

3322

3323

case 6:

3324

//case 7: // GRR BOGUS

3325

//case 5: // GRR BOGUS

3326

{

3327

ActiveMask.use = 0x00000000ffffffff;

3328

ActiveMask2.use = 0xffffffff00000000;

3329

ShiftBpp.use = bpp << 3; // == bpp * 8

3330

ShiftRem.use = 64 - ShiftBpp.use;

3331

__asm__ (

3332

"movl _dif, %%ebx \n\t"

3333

"movl row, %%edi \n\t"

3334

"movl prev_row, %%esi \n\t"

3335

// PRIME the pump (load the first Raw(x-bpp) data set

3336

"movq -8(%%edi,%%ebx,), %%mm1 \n\t"

3337

"pxor %%mm0, %%mm0 \n\t"

3338

"paeth_6lp: \n\t"

3339

// Must shift to position Raw(x-bpp) data

3340

"psrlq $ShiftRem, %%mm1 \n\t"

3341

// Do first set of 4 bytes

3342

"movq -8(%%esi,%%ebx,), %%mm3 \n\t" // read c=Prior(x-bpp) bytes

3343

"punpcklbw %%mm0, %%mm1 \n\t" // Unpack Low bytes of a

3344

"movq (%%esi,%%ebx,), %%mm2 \n\t" // load b=Prior(x)

3345

"punpcklbw %%mm0, %%mm2 \n\t" // Unpack Low bytes of b

3346

// Must shift to position Prior(x-bpp) data

3347

"psrlq $ShiftRem, %%mm3 \n\t"

3348

// pav = p - a = (a + b - c) - a = b - c

3349

"movq %%mm2, %%mm4 \n\t"

3350

"punpcklbw %%mm0, %%mm3 \n\t" // Unpack Low bytes of c

3351

// pbv = p - b = (a + b - c) - b = a - c

3352

"movq %%mm1, %%mm5 \n\t"

3353

"psubw %%mm3, %%mm4 \n\t"

3354

"pxor %%mm7, %%mm7 \n\t"

3355

// pcv = p - c = (a + b - c) -c = (a - c) + (b - c) = pav + pbv

3356

"movq %%mm4, %%mm6 \n\t"

3357

"psubw %%mm3, %%mm5 \n\t"

3358

// pa = abs(p-a) = abs(pav)

3359

// pb = abs(p-b) = abs(pbv)

3360

// pc = abs(p-c) = abs(pcv)

3361

"pcmpgtw %%mm4, %%mm0 \n\t" // Create mask pav bytes < 0

3362

"paddw %%mm5, %%mm6 \n\t"

3363

"pand %%mm4, %%mm0 \n\t" // Only pav bytes < 0 in mm7

3364

"pcmpgtw %%mm5, %%mm7 \n\t" // Create mask pbv bytes < 0

3365

"psubw %%mm0, %%mm4 \n\t"

3366

"pand %%mm5, %%mm7 \n\t" // Only pbv bytes < 0 in mm0

3367

"psubw %%mm0, %%mm4 \n\t"

3368

"psubw %%mm7, %%mm5 \n\t"

3369

"pxor %%mm0, %%mm0 \n\t"

3370

"pcmpgtw %%mm6, %%mm0 \n\t" // Create mask pcv bytes < 0

3371

"pand %%mm6, %%mm0 \n\t" // Only pav bytes < 0 in mm7

3372

"psubw %%mm7, %%mm5 \n\t"

3373

"psubw %%mm0, %%mm6 \n\t"

3374

// test pa <= pb

3375

"movq %%mm4, %%mm7 \n\t"

3376

"psubw %%mm0, %%mm6 \n\t"

3377

"pcmpgtw %%mm5, %%mm7 \n\t" // pa > pb?

3378

"movq %%mm7, %%mm0 \n\t"

3379

// use mm7 mask to merge pa & pb

3380

"pand %%mm7, %%mm5 \n\t"

3381

// use mm0 mask copy to merge a & b

3382

"pand %%mm0, %%mm2 \n\t"

3383

"pandn %%mm4, %%mm7 \n\t"

3384

"pandn %%mm1, %%mm0 \n\t"

3385

"paddw %%mm5, %%mm7 \n\t"

3386

"paddw %%mm2, %%mm0 \n\t"

3387

// test ((pa <= pb)? pa:pb) <= pc

3388

"pcmpgtw %%mm6, %%mm7 \n\t" // pab > pc?

3389

"pxor %%mm1, %%mm1 \n\t"

3390

"pand %%mm7, %%mm3 \n\t"

3391

"pandn %%mm0, %%mm7 \n\t"

3392

"paddw %%mm3, %%mm7 \n\t"

3393

"pxor %%mm0, %%mm0 \n\t"

3394

"packuswb %%mm1, %%mm7 \n\t"

3395

"movq -8(%%esi,%%ebx,), %%mm3 \n\t" // load c=Prior(x-bpp)

3396

"pand $ActiveMask, %%mm7 \n\t"

3397

"psrlq $ShiftRem, %%mm3 \n\t"

3398

"movq (%%esi,%%ebx,), %%mm2 \n\t" // load b=Prior(x) step 1

3399

"paddb (%%edi,%%ebx,), %%mm7 \n\t" // add Paeth predictor with Raw(x)

3400

"movq %%mm2, %%mm6 \n\t"

3401

"movq %%mm7, (%%edi,%%ebx,) \n\t" // write back updated value

3402

"movq -8(%%edi,%%ebx,), %%mm1 \n\t"

3403

"psllq $ShiftBpp, %%mm6 \n\t"

3404

"movq %%mm7, %%mm5 \n\t"

3405

"psrlq $ShiftRem, %%mm1 \n\t"

3406

"por %%mm6, %%mm3 \n\t"

3407

"psllq $ShiftBpp, %%mm5 \n\t"

3408

"punpckhbw %%mm0, %%mm3 \n\t" // Unpack High bytes of c

3409

"por %%mm5, %%mm1 \n\t"

3410

// Do second set of 4 bytes

3411

"punpckhbw %%mm0, %%mm2 \n\t" // Unpack High bytes of b

3412

"punpckhbw %%mm0, %%mm1 \n\t" // Unpack High bytes of a

3413

// pav = p - a = (a + b - c) - a = b - c

3414

"movq %%mm2, %%mm4 \n\t"

3415

// pbv = p - b = (a + b - c) - b = a - c

3416

"movq %%mm1, %%mm5 \n\t"

3417

"psubw %%mm3, %%mm4 \n\t"

3418

"pxor %%mm7, %%mm7 \n\t"

3419

// pcv = p - c = (a + b - c) -c = (a - c) + (b - c) = pav + pbv

3420

"movq %%mm4, %%mm6 \n\t"

3421

"psubw %%mm3, %%mm5 \n\t"

3422

// pa = abs(p-a) = abs(pav)

3423

// pb = abs(p-b) = abs(pbv)

3424

// pc = abs(p-c) = abs(pcv)

3425

"pcmpgtw %%mm4, %%mm0 \n\t" // Create mask pav bytes < 0

3426

"paddw %%mm5, %%mm6 \n\t"

3427

"pand %%mm4, %%mm0 \n\t" // Only pav bytes < 0 in mm7

3428

"pcmpgtw %%mm5, %%mm7 \n\t" // Create mask pbv bytes < 0

3429

"psubw %%mm0, %%mm4 \n\t"

3430

"pand %%mm5, %%mm7 \n\t" // Only pbv bytes < 0 in mm0

3431

"psubw %%mm0, %%mm4 \n\t"

3432

"psubw %%mm7, %%mm5 \n\t"

3433

"pxor %%mm0, %%mm0 \n\t"

3434

"pcmpgtw %%mm6, %%mm0 \n\t" // Create mask pcv bytes < 0

3435

"pand %%mm6, %%mm0 \n\t" // Only pav bytes < 0 in mm7

3436

"psubw %%mm7, %%mm5 \n\t"

3437

"psubw %%mm0, %%mm6 \n\t"

3438

// test pa <= pb

3439

"movq %%mm4, %%mm7 \n\t"

3440

"psubw %%mm0, %%mm6 \n\t"

3441

"pcmpgtw %%mm5, %%mm7 \n\t" // pa > pb?

3442

"movq %%mm7, %%mm0 \n\t"

3443

// use mm7 mask to merge pa & pb

3444

"pand %%mm7, %%mm5 \n\t"

3445

// use mm0 mask copy to merge a & b

3446

"pand %%mm0, %%mm2 \n\t"

3447

"pandn %%mm4, %%mm7 \n\t"

3448

"pandn %%mm1, %%mm0 \n\t"

3449

"paddw %%mm5, %%mm7 \n\t"

3450

"paddw %%mm2, %%mm0 \n\t"

3451

// test ((pa <= pb)? pa:pb) <= pc

3452

"pcmpgtw %%mm6, %%mm7 \n\t" // pab > pc?

3453

"pxor %%mm1, %%mm1 \n\t"

3454

"pand %%mm7, %%mm3 \n\t"

3455

"pandn %%mm0, %%mm7 \n\t"

3456

"pxor %%mm1, %%mm1 \n\t"

3457

"paddw %%mm3, %%mm7 \n\t"

3458

"pxor %%mm0, %%mm0 \n\t"

3459

// Step ex to next set of 8 bytes and repeat loop til done

3460

"addl $8, %%ebx \n\t"

3461

"packuswb %%mm7, %%mm1 \n\t"

3462

"paddb -8(%%edi,%%ebx,), %%mm1 \n\t" // add Paeth predictor with Raw(x)

3463

"cmpl _MMXLength, %%ebx \n\t"

3464

"movq %%mm1, -8(%%edi,%%ebx,) \n\t" // write back updated value

3465

// mm1 will be used as Raw(x-bpp) next loop

3466

"jb paeth_6lp \n\t"

3467

3468

: // FIXASM: output regs/vars go here, e.g.: "=m" (memory_var)

3469

3470

: // FIXASM: input regs, e.g.: "c" (count), "S" (src), "D" (dest)

3471

3472

: "%ebx", "%edi", "%esi", "%mm0", "%mm1", "%mm2", "%mm3", "%mm4", "%mm5", "%mm6", "%mm7" // CHECKASM: clobber list

3473

);

3474

}

3475

break;

3476

3477

case 4:

3478

{

3479

ActiveMask.use = 0x00000000ffffffff;

3480

__asm__ (

3481

"movl _dif, %%ebx \n\t"

3482

"movl row, %%edi \n\t"

3483

"movl prev_row, %%esi \n\t"

3484

"pxor %%mm0, %%mm0 \n\t"

3485

// PRIME the pump (load the first Raw(x-bpp) data set

3486

"movq -8(%%edi,%%ebx,), %%mm1 \n\t" // Only time should need to read

3487

// a=Raw(x-bpp) bytes

3488

"paeth_4lp: \n\t"

3489

// Do first set of 4 bytes

3490

"movq -8(%%esi,%%ebx,), %%mm3 \n\t" // read c=Prior(x-bpp) bytes

3491

"punpckhbw %%mm0, %%mm1 \n\t" // Unpack Low bytes of a

3492

"movq (%%esi,%%ebx,), %%mm2 \n\t" // load b=Prior(x)

3493

"punpcklbw %%mm0, %%mm2 \n\t" // Unpack High bytes of b

3494

// pav = p - a = (a + b - c) - a = b - c

3495

"movq %%mm2, %%mm4 \n\t"

3496

"punpckhbw %%mm0, %%mm3 \n\t" // Unpack High bytes of c

3497

// pbv = p - b = (a + b - c) - b = a - c

3498

"movq %%mm1, %%mm5 \n\t"

3499

"psubw %%mm3, %%mm4 \n\t"

3500

"pxor %%mm7, %%mm7 \n\t"

3501

// pcv = p - c = (a + b - c) -c = (a - c) + (b - c) = pav + pbv

3502

"movq %%mm4, %%mm6 \n\t"

3503

"psubw %%mm3, %%mm5 \n\t"

3504

// pa = abs(p-a) = abs(pav)

3505

// pb = abs(p-b) = abs(pbv)

3506

// pc = abs(p-c) = abs(pcv)

3507

"pcmpgtw %%mm4, %%mm0 \n\t" // Create mask pav bytes < 0

3508

"paddw %%mm5, %%mm6 \n\t"

3509

"pand %%mm4, %%mm0 \n\t" // Only pav bytes < 0 in mm7

3510

"pcmpgtw %%mm5, %%mm7 \n\t" // Create mask pbv bytes < 0

3511

"psubw %%mm0, %%mm4 \n\t"

3512

"pand %%mm5, %%mm7 \n\t" // Only pbv bytes < 0 in mm0

3513

"psubw %%mm0, %%mm4 \n\t"

3514

"psubw %%mm7, %%mm5 \n\t"

3515

"pxor %%mm0, %%mm0 \n\t"

3516

"pcmpgtw %%mm6, %%mm0 \n\t" // Create mask pcv bytes < 0

3517

"pand %%mm6, %%mm0 \n\t" // Only pav bytes < 0 in mm7

3518

"psubw %%mm7, %%mm5 \n\t"

3519

"psubw %%mm0, %%mm6 \n\t"

3520

// test pa <= pb

3521

"movq %%mm4, %%mm7 \n\t"

3522

"psubw %%mm0, %%mm6 \n\t"

3523

"pcmpgtw %%mm5, %%mm7 \n\t" // pa > pb?

3524

"movq %%mm7, %%mm0 \n\t"

3525

// use mm7 mask to merge pa & pb

3526

"pand %%mm7, %%mm5 \n\t"

3527

// use mm0 mask copy to merge a & b

3528

"pand %%mm0, %%mm2 \n\t"

3529

"pandn %%mm4, %%mm7 \n\t"

3530

"pandn %%mm1, %%mm0 \n\t"

3531

"paddw %%mm5, %%mm7 \n\t"

3532

"paddw %%mm2, %%mm0 \n\t"

3533

// test ((pa <= pb)? pa:pb) <= pc

3534

"pcmpgtw %%mm6, %%mm7 \n\t" // pab > pc?

3535

"pxor %%mm1, %%mm1 \n\t"

3536

"pand %%mm7, %%mm3 \n\t"

3537

"pandn %%mm0, %%mm7 \n\t"

3538

"paddw %%mm3, %%mm7 \n\t"

3539

"pxor %%mm0, %%mm0 \n\t"

3540

"packuswb %%mm1, %%mm7 \n\t"

3541

"movq (%%esi,%%ebx,), %%mm3 \n\t" // load c=Prior(x-bpp)

3542

"pand $ActiveMask, %%mm7 \n\t"

3543

"movq %%mm3, %%mm2 \n\t" // load b=Prior(x) step 1

3544

"paddb (%%edi,%%ebx,), %%mm7 \n\t" // add Paeth predictor with Raw(x)

3545

"punpcklbw %%mm0, %%mm3 \n\t" // Unpack High bytes of c

3546

"movq %%mm7, (%%edi,%%ebx,) \n\t" // write back updated value

3547

"movq %%mm7, %%mm1 \n\t" // Now mm1 will be used as Raw(x-bpp)

3548

// Do second set of 4 bytes

3549

"punpckhbw %%mm0, %%mm2 \n\t" // Unpack Low bytes of b

3550

"punpcklbw %%mm0, %%mm1 \n\t" // Unpack Low bytes of a

3551

// pav = p - a = (a + b - c) - a = b - c

3552

"movq %%mm2, %%mm4 \n\t"

3553

// pbv = p - b = (a + b - c) - b = a - c

3554

"movq %%mm1, %%mm5 \n\t"

3555

"psubw %%mm3, %%mm4 \n\t"

3556

"pxor %%mm7, %%mm7 \n\t"

3557

// pcv = p - c = (a + b - c) -c = (a - c) + (b - c) = pav + pbv

3558

"movq %%mm4, %%mm6 \n\t"

3559

"psubw %%mm3, %%mm5 \n\t"

3560

// pa = abs(p-a) = abs(pav)

3561

// pb = abs(p-b) = abs(pbv)

3562

// pc = abs(p-c) = abs(pcv)

3563

"pcmpgtw %%mm4, %%mm0 \n\t" // Create mask pav bytes < 0

3564

"paddw %%mm5, %%mm6 \n\t"

3565

"pand %%mm4, %%mm0 \n\t" // Only pav bytes < 0 in mm7

3566

"pcmpgtw %%mm5, %%mm7 \n\t" // Create mask pbv bytes < 0

3567

"psubw %%mm0, %%mm4 \n\t"

3568

"pand %%mm5, %%mm7 \n\t" // Only pbv bytes < 0 in mm0

3569

"psubw %%mm0, %%mm4 \n\t"

3570

"psubw %%mm7, %%mm5 \n\t"

3571

"pxor %%mm0, %%mm0 \n\t"

3572

"pcmpgtw %%mm6, %%mm0 \n\t" // Create mask pcv bytes < 0

3573

"pand %%mm6, %%mm0 \n\t" // Only pav bytes < 0 in mm7

3574

"psubw %%mm7, %%mm5 \n\t"

3575

"psubw %%mm0, %%mm6 \n\t"

3576

// test pa <= pb

3577

"movq %%mm4, %%mm7 \n\t"

3578

"psubw %%mm0, %%mm6 \n\t"

3579

"pcmpgtw %%mm5, %%mm7 \n\t" // pa > pb?

3580

"movq %%mm7, %%mm0 \n\t"

3581

// use mm7 mask to merge pa & pb

3582

"pand %%mm7, %%mm5 \n\t"

3583

// use mm0 mask copy to merge a & b

3584

"pand %%mm0, %%mm2 \n\t"

3585

"pandn %%mm4, %%mm7 \n\t"

3586

"pandn %%mm1, %%mm0 \n\t"

3587

"paddw %%mm5, %%mm7 \n\t"

3588

"paddw %%mm2, %%mm0 \n\t"

3589

// test ((pa <= pb)? pa:pb) <= pc

3590

"pcmpgtw %%mm6, %%mm7 \n\t" // pab > pc?

3591

"pxor %%mm1, %%mm1 \n\t"

3592

"pand %%mm7, %%mm3 \n\t"

3593

"pandn %%mm0, %%mm7 \n\t"

3594

"pxor %%mm1, %%mm1 \n\t"

3595

"paddw %%mm3, %%mm7 \n\t"

3596

"pxor %%mm0, %%mm0 \n\t"

3597

// Step ex to next set of 8 bytes and repeat loop til done

3598

"addl $8, %%ebx \n\t"

3599

"packuswb %%mm7, %%mm1 \n\t"

3600

"paddb -8(%%edi,%%ebx,), %%mm1 \n\t" // add Paeth predictor with Raw(x)

3601

"cmpl _MMXLength, %%ebx \n\t"

3602

"movq %%mm1, -8(%%edi,%%ebx,) \n\t" // write back updated value

3603

// mm1 will be used as Raw(x-bpp) next loop

3604

"jb paeth_4lp \n\t"

3605

3606

: // FIXASM: output regs/vars go here, e.g.: "=m" (memory_var)

3607

3608

: // FIXASM: input regs, e.g.: "c" (count), "S" (src), "D" (dest)

3609

3610

: "%ebx", "%edi", "%esi", "%mm0", "%mm1", "%mm2", "%mm3", "%mm4", "%mm5", "%mm6", "%mm7" // CHECKASM: clobber list

3611

);

3612

}

3613

break;

3614

case 8: // bpp == 8

3615

{

3616

ActiveMask.use = 0x00000000ffffffff;

3617

__asm__ (

3618

"movl _dif, %%ebx \n\t"

3619

"movl row, %%edi \n\t"

3620

"movl prev_row, %%esi \n\t"

3621

"pxor %%mm0, %%mm0 \n\t"

3622

// PRIME the pump (load the first Raw(x-bpp) data set

3623

"movq -8(%%edi,%%ebx,), %%mm1 \n\t" // Only time should need to read

3624

// a=Raw(x-bpp) bytes

3625

"paeth_8lp: \n\t"

3626

// Do first set of 4 bytes

3627

"movq -8(%%esi,%%ebx,), %%mm3 \n\t" // read c=Prior(x-bpp) bytes

3628

"punpcklbw %%mm0, %%mm1 \n\t" // Unpack Low bytes of a

3629

"movq (%%esi,%%ebx,), %%mm2 \n\t" // load b=Prior(x)

3630

"punpcklbw %%mm0, %%mm2 \n\t" // Unpack Low bytes of b

3631

// pav = p - a = (a + b - c) - a = b - c

3632

"movq %%mm2, %%mm4 \n\t"

3633

"punpcklbw %%mm0, %%mm3 \n\t" // Unpack Low bytes of c

3634

// pbv = p - b = (a + b - c) - b = a - c

3635

"movq %%mm1, %%mm5 \n\t"

3636

"psubw %%mm3, %%mm4 \n\t"

3637

"pxor %%mm7, %%mm7 \n\t"

3638

// pcv = p - c = (a + b - c) -c = (a - c) + (b - c) = pav + pbv

3639

"movq %%mm4, %%mm6 \n\t"

3640

"psubw %%mm3, %%mm5 \n\t"

3641

// pa = abs(p-a) = abs(pav)

3642

// pb = abs(p-b) = abs(pbv)

3643

// pc = abs(p-c) = abs(pcv)

3644

"pcmpgtw %%mm4, %%mm0 \n\t" // Create mask pav bytes < 0

3645

"paddw %%mm5, %%mm6 \n\t"

3646

"pand %%mm4, %%mm0 \n\t" // Only pav bytes < 0 in mm7

3647

"pcmpgtw %%mm5, %%mm7 \n\t" // Create mask pbv bytes < 0

3648

"psubw %%mm0, %%mm4 \n\t"

3649

"pand %%mm5, %%mm7 \n\t" // Only pbv bytes < 0 in mm0

3650

"psubw %%mm0, %%mm4 \n\t"

3651

"psubw %%mm7, %%mm5 \n\t"

3652

"pxor %%mm0, %%mm0 \n\t"

3653

"pcmpgtw %%mm6, %%mm0 \n\t" // Create mask pcv bytes < 0

3654

"pand %%mm6, %%mm0 \n\t" // Only pav bytes < 0 in mm7

3655

"psubw %%mm7, %%mm5 \n\t"

3656

"psubw %%mm0, %%mm6 \n\t"

3657

// test pa <= pb

3658

"movq %%mm4, %%mm7 \n\t"

3659

"psubw %%mm0, %%mm6 \n\t"

3660

"pcmpgtw %%mm5, %%mm7 \n\t" // pa > pb?

3661

"movq %%mm7, %%mm0 \n\t"

3662

// use mm7 mask to merge pa & pb

3663

"pand %%mm7, %%mm5 \n\t"

3664

// use mm0 mask copy to merge a & b

3665

"pand %%mm0, %%mm2 \n\t"

3666

"pandn %%mm4, %%mm7 \n\t"

3667

"pandn %%mm1, %%mm0 \n\t"

3668

"paddw %%mm5, %%mm7 \n\t"

3669

"paddw %%mm2, %%mm0 \n\t"

3670

// test ((pa <= pb)? pa:pb) <= pc

3671

"pcmpgtw %%mm6, %%mm7 \n\t" // pab > pc?

3672

"pxor %%mm1, %%mm1 \n\t"

3673

"pand %%mm7, %%mm3 \n\t"

3674

"pandn %%mm0, %%mm7 \n\t"

3675

"paddw %%mm3, %%mm7 \n\t"

3676

"pxor %%mm0, %%mm0 \n\t"

3677

"packuswb %%mm1, %%mm7 \n\t"

3678

"movq -8(%%esi,%%ebx,), %%mm3 \n\t" // read c=Prior(x-bpp) bytes

3679

"pand $ActiveMask, %%mm7 \n\t"

3680

"movq (%%esi,%%ebx,), %%mm2 \n\t" // load b=Prior(x)

3681

"paddb (%%edi,%%ebx,), %%mm7 \n\t" // add Paeth predictor with Raw(x)

3682

"punpckhbw %%mm0, %%mm3 \n\t" // Unpack High bytes of c

3683

"movq %%mm7, (%%edi,%%ebx,) \n\t" // write back updated value

3684

"movq -8(%%edi,%%ebx,), %%mm1 \n\t" // read a=Raw(x-bpp) bytes

3685

3686

// Do second set of 4 bytes

3687

"punpckhbw %%mm0, %%mm2 \n\t" // Unpack High bytes of b

3688

"punpckhbw %%mm0, %%mm1 \n\t" // Unpack High bytes of a

3689

// pav = p - a = (a + b - c) - a = b - c

3690

"movq %%mm2, %%mm4 \n\t"

3691

// pbv = p - b = (a + b - c) - b = a - c

3692

"movq %%mm1, %%mm5 \n\t"

3693

"psubw %%mm3, %%mm4 \n\t"

3694

"pxor %%mm7, %%mm7 \n\t"

3695

// pcv = p - c = (a + b - c) -c = (a - c) + (b - c) = pav + pbv

3696

"movq %%mm4, %%mm6 \n\t"

3697

"psubw %%mm3, %%mm5 \n\t"

3698

// pa = abs(p-a) = abs(pav)

3699

// pb = abs(p-b) = abs(pbv)

3700

// pc = abs(p-c) = abs(pcv)

3701

"pcmpgtw %%mm4, %%mm0 \n\t" // Create mask pav bytes < 0

3702

"paddw %%mm5, %%mm6 \n\t"

3703

"pand %%mm4, %%mm0 \n\t" // Only pav bytes < 0 in mm7

3704

"pcmpgtw %%mm5, %%mm7 \n\t" // Create mask pbv bytes < 0

3705

"psubw %%mm0, %%mm4 \n\t"

3706

"pand %%mm5, %%mm7 \n\t" // Only pbv bytes < 0 in mm0

3707

"psubw %%mm0, %%mm4 \n\t"

3708

"psubw %%mm7, %%mm5 \n\t"

3709

"pxor %%mm0, %%mm0 \n\t"

3710

"pcmpgtw %%mm6, %%mm0 \n\t" // Create mask pcv bytes < 0

3711

"pand %%mm6, %%mm0 \n\t" // Only pav bytes < 0 in mm7

3712

"psubw %%mm7, %%mm5 \n\t"

3713

"psubw %%mm0, %%mm6 \n\t"

3714

// test pa <= pb

3715

"movq %%mm4, %%mm7 \n\t"

3716

"psubw %%mm0, %%mm6 \n\t"

3717

"pcmpgtw %%mm5, %%mm7 \n\t" // pa > pb?

3718

"movq %%mm7, %%mm0 \n\t"

3719

// use mm7 mask to merge pa & pb

3720

"pand %%mm7, %%mm5 \n\t"

3721

// use mm0 mask copy to merge a & b

3722

"pand %%mm0, %%mm2 \n\t"

3723

"pandn %%mm4, %%mm7 \n\t"

3724

"pandn %%mm1, %%mm0 \n\t"

3725

"paddw %%mm5, %%mm7 \n\t"

3726

"paddw %%mm2, %%mm0 \n\t"

3727

// test ((pa <= pb)? pa:pb) <= pc

3728

"pcmpgtw %%mm6, %%mm7 \n\t" // pab > pc?

3729

"pxor %%mm1, %%mm1 \n\t"

3730

"pand %%mm7, %%mm3 \n\t"

3731

"pandn %%mm0, %%mm7 \n\t"

3732

"pxor %%mm1, %%mm1 \n\t"

3733

"paddw %%mm3, %%mm7 \n\t"

3734

"pxor %%mm0, %%mm0 \n\t"

3735

// Step ex to next set of 8 bytes and repeat loop til done

3736

"addl $8, %%ebx \n\t"

3737

"packuswb %%mm7, %%mm1 \n\t"

3738

"paddb -8(%%edi,%%ebx,), %%mm1 \n\t" // add Paeth predictor with Raw(x)

3739

"cmpl _MMXLength, %%ebx \n\t"

3740

"movq %%mm1, -8(%%edi,%%ebx,) \n\t" // write back updated value

3741

// mm1 will be used as Raw(x-bpp) next loop

3742

"jb paeth_8lp \n\t"

3743

3744

: // FIXASM: output regs/vars go here, e.g.: "=m" (memory_var)

3745

3746

: // FIXASM: input regs, e.g.: "c" (count), "S" (src), "D" (dest)

3747

3748

: "%ebx", "%edi", "%esi", "%mm0", "%mm1", "%mm2", "%mm3", "%mm4", "%mm5", "%mm6", "%mm7" // CHECKASM: clobber list

3749

);

3750

}

3751

break;

3752

3753

case 1: // bpp = 1

3754

case 2: // bpp = 2

3755

default: // bpp > 8

3756

{

3757

__asm__ (

3758

"movl _dif, %%ebx \n\t"

3759

"cmpl _FullLength, %%ebx \n\t"

3760

"jnb paeth_dend \n\t"

3761

"movl row, %%edi \n\t"

3762

"movl prev_row, %%esi \n\t"

3763

// Do Paeth decode for remaining bytes

3764

"movl %%ebx, %%edx \n\t"

3765

"xorl %%ecx, %%ecx \n\t" // zero ecx before using cl & cx in loop below

3766

"subl bpp, %%edx \n\t" // Set edx = ebx - bpp

3767

"paeth_dlp: \n\t"

3768

"xorl %%eax, %%eax \n\t"

3769

// pav = p - a = (a + b - c) - a = b - c

3770

"movb (%%esi,%%ebx,), %%al \n\t" // load Prior(x) into al

3771

"movb (%%esi,%%edx,), %%cl \n\t" // load Prior(x-bpp) into cl

3772

"subl %%ecx, %%eax \n\t" // subtract Prior(x-bpp)

3773

"movl %%eax, patemp \n\t" // Save pav for later use

3774

"xorl %%eax, %%eax \n\t"

3775

// pbv = p - b = (a + b - c) - b = a - c

3776

"movb (%%edi,%%edx,), %%al \n\t" // load Raw(x-bpp) into al

3777

"subl %%ecx, %%eax \n\t" // subtract Prior(x-bpp)

3778

"movl %%eax, %%ecx \n\t"

3779

// pcv = p - c = (a + b - c) -c = (a - c) + (b - c) = pav + pbv

3780

"addl patemp, %%eax \n\t" // pcv = pav + pbv

3781

// pc = abs(pcv)

3782

"testl $0x80000000, %%eax \n\t"

3783

"jz paeth_dpca \n\t"

3784

"negl %%eax \n\t" // reverse sign of neg values

3785

"paeth_dpca: \n\t"

3786

"movl %%eax, pctemp \n\t" // save pc for later use

3787

// pb = abs(pbv)

3788

"testl $0x80000000, %%ecx \n\t"

3789

"jz paeth_dpba \n\t"

3790

"negl %%ecx \n\t" // reverse sign of neg values

3791

"paeth_dpba: \n\t"

3792

"movl %%ecx, pbtemp \n\t" // save pb for later use

3793

// pa = abs(pav)

3794

"movl patemp, %%eax \n\t"

3795

"testl $0x80000000, %%eax \n\t"

3796

"jz paeth_dpaa \n\t"

3797

"negl %%eax \n\t" // reverse sign of neg values

3798

"paeth_dpaa: \n\t"

3799

"movl %%eax, patemp \n\t" // save pa for later use

3800

// test if pa <= pb

3801

"cmpl %%ecx, %%eax \n\t"

3802

"jna paeth_dabb \n\t"

3803

// pa > pb; now test if pb <= pc

3804

"cmpl pctemp, %%ecx \n\t"

3805

"jna paeth_dbbc \n\t"

3806

// pb > pc; Raw(x) = Paeth(x) + Prior(x-bpp)

3807

"movb (%%esi,%%edx,), %%cl \n\t" // load Prior(x-bpp) into cl

3808

"jmp paeth_dpaeth \n\t"

3809

"paeth_dbbc: \n\t"

3810

// pb <= pc; Raw(x) = Paeth(x) + Prior(x)

3811

"movb (%%esi,%%ebx,), %%cl \n\t" // load Prior(x) into cl

3812

"jmp paeth_dpaeth \n\t"

3813

"paeth_dabb: \n\t"

3814

// pa <= pb; now test if pa <= pc

3815

"cmpl pctemp, %%eax \n\t"

3816

"jna paeth_dabc \n\t"

3817

// pa > pc; Raw(x) = Paeth(x) + Prior(x-bpp)

3818

"movb (%%esi,%%edx,), %%cl \n\t" // load Prior(x-bpp) into cl

3819

"jmp paeth_dpaeth \n\t"

3820

"paeth_dabc: \n\t"

3821

// pa <= pc; Raw(x) = Paeth(x) + Raw(x-bpp)

3822

"movb (%%edi,%%edx,), %%cl \n\t" // load Raw(x-bpp) into cl

3823

"paeth_dpaeth: \n\t"

3824

"incl %%ebx \n\t"

3825

"incl %%edx \n\t"

3826

// Raw(x) = (Paeth(x) + Paeth_Predictor( a, b, c )) mod 256

3827

"addb %%cl, -1(%%edi,%%ebx,) \n\t"

3828

"cmpl _FullLength, %%ebx \n\t"

3829

"jb paeth_dlp \n\t"

3830

"paeth_dend: \n\t"

3831

3832

: // FIXASM: output regs/vars go here, e.g.: "=m" (memory_var)

3833

3834

: // FIXASM: input regs, e.g.: "c" (count), "S" (src), "D" (dest)

3835

3836

: "%eax", "%ebx", "%ecx", "%edx", "%edi", "%esi" // CHECKASM: clobber list

3837

);

3838

}

3839

return; // No need to go further with this one

3840

} // end switch ( bpp )

3841

__asm__ (

3842

// MMX acceleration complete now do clean-up

3843

// Check if any remaining bytes left to decode

3844

"movl _MMXLength, %%ebx \n\t"

3845

"cmpl _FullLength, %%ebx \n\t"

3846

"jnb paeth_end \n\t"

3847

"movl row, %%edi \n\t"

3848

"movl prev_row, %%esi \n\t"

3849

// Do Paeth decode for remaining bytes

3850

"movl %%ebx, %%edx \n\t"

3851

"xorl %%ecx, %%ecx \n\t" // zero ecx before using cl & cx in loop below

3852

"subl bpp, %%edx \n\t" // Set edx = ebx - bpp

3853

"paeth_lp2: \n\t"

3854

"xorl %%eax, %%eax \n\t"

3855

// pav = p - a = (a + b - c) - a = b - c

3856

"movb (%%esi,%%ebx,), %%al \n\t" // load Prior(x) into al

3857

"movb (%%esi,%%edx,), %%cl \n\t" // load Prior(x-bpp) into cl

3858

"subl %%ecx, %%eax \n\t" // subtract Prior(x-bpp)

3859

"movl %%eax, patemp \n\t" // Save pav for later use

3860

"xorl %%eax, %%eax \n\t"

3861

// pbv = p - b = (a + b - c) - b = a - c

3862

"movb (%%edi,%%edx,), %%al \n\t" // load Raw(x-bpp) into al

3863

"subl %%ecx, %%eax \n\t" // subtract Prior(x-bpp)

3864

"movl %%eax, %%ecx \n\t"

3865

// pcv = p - c = (a + b - c) -c = (a - c) + (b - c) = pav + pbv

3866

"addl patemp, %%eax \n\t" // pcv = pav + pbv

3867

// pc = abs(pcv)

3868

"testl $0x80000000, %%eax \n\t"

3869

"jz paeth_pca2 \n\t"

3870

"negl %%eax \n\t" // reverse sign of neg values

3871

"paeth_pca2: \n\t"

3872

"movl %%eax, pctemp \n\t" // save pc for later use

3873

// pb = abs(pbv)

3874

"testl $0x80000000, %%ecx \n\t"

3875

"jz paeth_pba2 \n\t"

3876

"negl %%ecx \n\t" // reverse sign of neg values

3877

"paeth_pba2: \n\t"

3878

"movl %%ecx, pbtemp \n\t" // save pb for later use

3879

// pa = abs(pav)

3880

"movl patemp, %%eax \n\t"

3881

"testl $0x80000000, %%eax \n\t"

3882

"jz paeth_paa2 \n\t"

3883

"negl %%eax \n\t" // reverse sign of neg values

3884

"paeth_paa2: \n\t"

3885

"movl %%eax, patemp \n\t" // save pa for later use

3886

// test if pa <= pb

3887

"cmpl %%ecx, %%eax \n\t"

3888

"jna paeth_abb2 \n\t"

3889

// pa > pb; now test if pb <= pc

3890

"cmpl pctemp, %%ecx \n\t"

3891

"jna paeth_bbc2 \n\t"

3892

// pb > pc; Raw(x) = Paeth(x) + Prior(x-bpp)

3893

"movb (%%esi,%%edx,), %%cl \n\t" // load Prior(x-bpp) into cl

3894

"jmp paeth_paeth2 \n\t"

3895

"paeth_bbc2: \n\t"

3896

// pb <= pc; Raw(x) = Paeth(x) + Prior(x)

3897

"movb (%%esi,%%ebx,), %%cl \n\t" // load Prior(x) into cl

3898

"jmp paeth_paeth2 \n\t"

3899

"paeth_abb2: \n\t"

3900

// pa <= pb; now test if pa <= pc

3901

"cmpl pctemp, %%eax \n\t"

3902

"jna paeth_abc2 \n\t"

3903

// pa > pc; Raw(x) = Paeth(x) + Prior(x-bpp)

3904

"movb (%%esi,%%edx,), %%cl \n\t" // load Prior(x-bpp) into cl

3905

"jmp paeth_paeth2 \n\t"

3906

"paeth_abc2: \n\t"

3907

// pa <= pc; Raw(x) = Paeth(x) + Raw(x-bpp)

3908

"movb (%%edi,%%edx,), %%cl \n\t" // load Raw(x-bpp) into cl

3909

"paeth_paeth2: \n\t"

3910

"incl %%ebx \n\t"

3911

"incl %%edx \n\t"

3912

// Raw(x) = (Paeth(x) + Paeth_Predictor( a, b, c )) mod 256

3913

"addb %%cl, -1(%%edi,%%ebx,) \n\t"

3914

"cmpl _FullLength, %%ebx \n\t"

3915

"jb paeth_lp2 \n\t"

3916

"paeth_end: \n\t"

3917

"emms \n\t" // End MMX instructions; prep for possible FP instrs.

3918

3919

: // FIXASM: output regs/vars go here, e.g.: "=m" (memory_var)

3920

3921

: // FIXASM: input regs, e.g.: "c" (count), "S" (src), "D" (dest)

3922

3923

: "%eax", "%ebx", "%ecx", "%edx", "%edi", "%esi" // CHECKASM: clobber list

3924

);

3925

#endif /* GRR_GCC_MMX_CONVERTED */

3926

}

3927

3928

// Optimized code for PNG Sub filter decoder

3929

void /* PRIVATE */

3930

png_read_filter_row_mmx_sub(png_row_infop row_info, png_bytep row)

3931

{

3932

#ifdef GRR_GCC_MMX_CONVERTED

3933

int bpp;

3934

3935

bpp = (row_info->pixel_depth + 7) >> 3; // Get # bytes per pixel

3936

_FullLength = row_info->rowbytes - bpp; // # of bytes to filter

3937

__asm__ (

3938

"movl row, %%edi \n\t"

3939

"movl %%edi, %%esi \n\t" // lp = row

3940

"addl bpp, %%edi \n\t" // rp = row + bpp

3941

"xorl %%eax, %%eax \n\t"

3942

// get # of bytes to alignment

3943

"movl %%edi, _dif \n\t" // take start of row

3944

"addl $0xf, _dif \n\t" // add 7 + 8 to incr past

3945

// alignment boundary

3946

"xorl %%ebx, %%ebx \n\t"

3947

"andl $0xfffffff8, _dif \n\t" // mask to alignment boundary

3948

"subl %%edi, _dif \n\t" // subtract from start ==> value

3949

// ebx at alignment

3950

"jz sub_go \n\t"

3951

// fix alignment

3952

"sub_lp1: \n\t"

3953

"movb (%%esi,%%ebx,), %%al \n\t"

3954

"addb %%al, (%%edi,%%ebx,) \n\t"

3955

"incl %%ebx \n\t"

3956

"cmpl _dif, %%ebx \n\t"

3957

"jb sub_lp1 \n\t"

3958

"sub_go: \n\t"

3959

"movl _FullLength, %%ecx \n\t"

3960

"movl %%ecx, %%edx \n\t"

3961

"subl %%ebx, %%edx \n\t" // subtract alignment fix

3962

"andl $0x00000007, %%edx \n\t" // calc bytes over mult of 8

3963

"subl %%edx, %%ecx \n\t" // drop over bytes from length

3964

"movl %%ecx, _MMXLength \n\t"

3965

3966

: // FIXASM: output regs/vars go here, e.g.: "=m" (memory_var)

3967

3968

: // FIXASM: input regs, e.g.: "c" (count), "S" (src), "D" (dest)

3969

3970

: "%eax", "%ebx", "%ecx", "%edx", "%edi", "%esi" // CHECKASM: clobber list

3971

);

3972

3973

// Now do the math for the rest of the row

3974

switch ( bpp )

3975

{

3976

case 3:

3977

{

3978

ActiveMask.use = 0x0000ffffff000000;

3979

ShiftBpp.use = 24; // == 3 * 8

3980

ShiftRem.use = 40; // == 64 - 24

3981

__asm__ (

3982

"movl row, %%edi \n\t"

3983

"movq $ActiveMask, %%mm7 \n\t" // Load ActiveMask for 2nd active byte group

3984

"movl %%edi, %%esi \n\t" // lp = row

3985

"addl bpp, %%edi \n\t" // rp = row + bpp

3986

"movq %%mm7, %%mm6 \n\t"

3987

"movl _dif, %%ebx \n\t"

3988

"psllq $ShiftBpp, %%mm6 \n\t" // Move mask in mm6 to cover 3rd active

3989

// byte group

3990

// PRIME the pump (load the first Raw(x-bpp) data set

3991

"movq -8(%%edi,%%ebx,), %%mm1 \n\t"

3992

"sub_3lp: \n\t"

3993

"psrlq $ShiftRem, %%mm1 \n\t" // Shift data for adding 1st bpp bytes

3994

// no need for mask; shift clears inactive bytes

3995

// Add 1st active group

3996

"movq (%%edi,%%ebx,), %%mm0 \n\t"

3997

"paddb %%mm1, %%mm0 \n\t"

3998

// Add 2nd active group

3999

"movq %%mm0, %%mm1 \n\t" // mov updated Raws to mm1

4000

"psllq $ShiftBpp, %%mm1 \n\t" // shift data to position correctly

4001

"pand %%mm7, %%mm1 \n\t" // mask to use only 2nd active group

4002

"paddb %%mm1, %%mm0 \n\t"

4003

// Add 3rd active group

4004

"movq %%mm0, %%mm1 \n\t" // mov updated Raws to mm1

4005

"psllq $ShiftBpp, %%mm1 \n\t" // shift data to position correctly

4006

"pand %%mm6, %%mm1 \n\t" // mask to use only 3rd active group

4007

"addl $8, %%ebx \n\t"

4008

"paddb %%mm1, %%mm0 \n\t"

4009

"cmpl _MMXLength, %%ebx \n\t"

4010

"movq %%mm0, -8(%%edi,%%ebx,) \n\t" // Write updated Raws back to array

4011

// Prep for doing 1st add at top of loop

4012

"movq %%mm0, %%mm1 \n\t"

4013

"jb sub_3lp \n\t"

4014

4015

: // FIXASM: output regs/vars go here, e.g.: "=m" (memory_var)

4016

4017

: // FIXASM: input regs, e.g.: "c" (count), "S" (src), "D" (dest)

4018

4019

: "%ebx", "%edi", "%esi", "%mm0", "%mm1", "%mm6", "%mm7" // CHECKASM: clobber list

4020

);

4021

}

4022

break;

4023

4024

case 1:

4025

{

4026

// Placed here just in case this is a duplicate of the

4027

// non-MMX code for the SUB filter in png_read_filter_row above

4028

4029

// png_bytep rp;

4030

// png_bytep lp;

4031

// png_uint_32 i;

4032

// bpp = (row_info->pixel_depth + 7) >> 3;

4033

// for (i = (png_uint_32)bpp, rp = row + bpp, lp = row;

4034

// i < row_info->rowbytes; i++, rp++, lp++)

4035

// {

4036

// *rp = (png_byte)(((int)(*rp) + (int)(*lp)) & 0xff);

4037

// }

4038

__asm__ (

4039

"movl _dif, %%ebx \n\t"

4040

"movl row, %%edi \n\t"

4041

"cmpl _FullLength, %%ebx \n\t"

4042

"jnb sub_1end \n\t"

4043

"movl %%edi, %%esi \n\t" // lp = row

4044

"xorl %%eax, %%eax \n\t"

4045

"addl bpp, %%edi \n\t" // rp = row + bpp

4046

"sub_1lp: \n\t"

4047

"movb (%%esi,%%ebx,), %%al \n\t"

4048

"addb %%al, (%%edi,%%ebx,) \n\t"

4049

"incl %%ebx \n\t"

4050

"cmpl _FullLength, %%ebx \n\t"

4051

"jb sub_1lp \n\t"

4052

"sub_1end: \n\t"

4053

4054

: // FIXASM: output regs/vars go here, e.g.: "=m" (memory_var)

4055

4056

: // FIXASM: input regs, e.g.: "c" (count), "S" (src), "D" (dest)

4057

4058

: "%eax", "%ebx", "%edi", "%esi" // CHECKASM: clobber list

4059

);

4060

}

4061

return;

4062

4063

case 6:

4064

case 7:

4065

case 4:

4066

case 5:

4067

{

4068

ShiftBpp.use = bpp << 3;

4069

ShiftRem.use = 64 - ShiftBpp.use;

4070

__asm__ (

4071

"movl row, %%edi \n\t"

4072

"movl _dif, %%ebx \n\t"

4073

"movl %%edi, %%esi \n\t" // lp = row

4074

"addl bpp, %%edi \n\t" // rp = row + bpp

4075

// PRIME the pump (load the first Raw(x-bpp) data set

4076

"movq -8(%%edi,%%ebx,), %%mm1 \n\t"

4077

"sub_4lp: \n\t"

4078

"psrlq $ShiftRem, %%mm1 \n\t" // Shift data for adding 1st bpp bytes

4079

// no need for mask; shift clears inactive bytes

4080

"movq (%%edi,%%ebx,), %%mm0 \n\t"

4081

"paddb %%mm1, %%mm0 \n\t"

4082

// Add 2nd active group

4083

"movq %%mm0, %%mm1 \n\t" // mov updated Raws to mm1

4084

"psllq $ShiftBpp, %%mm1 \n\t" // shift data to position correctly

4085

// there is no need for any mask

4086

// since shift clears inactive bits/bytes

4087

"addl $8, %%ebx \n\t"

4088

"paddb %%mm1, %%mm0 \n\t"

4089

"cmpl _MMXLength, %%ebx \n\t"

4090

"movq %%mm0, -8(%%edi,%%ebx,) \n\t"

4091

"movq %%mm0, %%mm1 \n\t" // Prep for doing 1st add at top of loop

4092

"jb sub_4lp \n\t"

4093

4094

: // FIXASM: output regs/vars go here, e.g.: "=m" (memory_var)

4095

4096

: // FIXASM: input regs, e.g.: "c" (count), "S" (src), "D" (dest)

4097

4098

: "%ebx", "%edi", "%esi", "%mm0", "%mm1" // CHECKASM: clobber list

4099

);

4100

}

4101

break;

4102

4103

case 2:

4104

{

4105

ActiveMask.use = 0x00000000ffff0000;

4106

ShiftBpp.use = 16; // == 2 * 8

4107

ShiftRem.use = 48; // == 64 - 16

4108

__asm__ (

4109

"movq $ActiveMask, %%mm7 \n\t" // Load ActiveMask for 2nd active byte group

4110

"movl _dif, %%ebx \n\t"

4111

"movq %%mm7, %%mm6 \n\t"

4112

"movl row, %%edi \n\t"

4113

"psllq $ShiftBpp, %%mm6 \n\t" // Move mask in mm6 to cover 3rd active

4114

// byte group

4115

"movl %%edi, %%esi \n\t" // lp = row

4116

"movq %%mm6, %%mm5 \n\t"

4117

"addl bpp, %%edi \n\t" // rp = row + bpp

4118

"psllq $ShiftBpp, %%mm5 \n\t" // Move mask in mm5 to cover 4th active

4119

// byte group

4120

// PRIME the pump (load the first Raw(x-bpp) data set

4121

"movq -8(%%edi,%%ebx,), %%mm1 \n\t"

4122

"sub_2lp: \n\t"

4123

// Add 1st active group

4124

"psrlq $ShiftRem, %%mm1 \n\t" // Shift data for adding 1st bpp bytes

4125

// no need for mask; shift clears inactive

4126

// bytes

4127

"movq (%%edi,%%ebx,), %%mm0 \n\t"

4128

"paddb %%mm1, %%mm0 \n\t"

4129

// Add 2nd active group

4130

"movq %%mm0, %%mm1 \n\t" // mov updated Raws to mm1

4131

"psllq $ShiftBpp, %%mm1 \n\t" // shift data to position correctly

4132

"pand %%mm7, %%mm1 \n\t" // mask to use only 2nd active group

4133

"paddb %%mm1, %%mm0 \n\t"

4134

// Add 3rd active group

4135

"movq %%mm0, %%mm1 \n\t" // mov updated Raws to mm1

4136

"psllq $ShiftBpp, %%mm1 \n\t" // shift data to position correctly

4137

"pand %%mm6, %%mm1 \n\t" // mask to use only 3rd active group

4138

"paddb %%mm1, %%mm0 \n\t"

4139

// Add 4th active group

4140

"movq %%mm0, %%mm1 \n\t" // mov updated Raws to mm1

4141

"psllq $ShiftBpp, %%mm1 \n\t" // shift data to position correctly

4142

"pand %%mm5, %%mm1 \n\t" // mask to use only 4th active group

4143

"addl $8, %%ebx \n\t"

4144

"paddb %%mm1, %%mm0 \n\t"

4145

"cmpl _MMXLength, %%ebx \n\t"

4146

"movq %%mm0, -8(%%edi,%%ebx,) \n\t" // Write updated Raws back to array

4147

"movq %%mm0, %%mm1 \n\t" // Prep for doing 1st add at top of loop

4148

"jb sub_2lp \n\t"

4149

4150

: // FIXASM: output regs/vars go here, e.g.: "=m" (memory_var)

4151

4152

: // FIXASM: input regs, e.g.: "c" (count), "S" (src), "D" (dest)

4153

4154

: "%ebx", "%edi", "%esi", "%mm0", "%mm1", "%mm5", "%mm6", "%mm7" // CHECKASM: clobber list

4155

);

4156

}

4157

break;

4158

case 8:

4159

{

4160

__asm__ (

4161

"movl row, %%edi \n\t"

4162

"movl _dif, %%ebx \n\t"

4163

"movl %%edi, %%esi \n\t" // lp = row

4164

"addl bpp, %%edi \n\t" // rp = row + bpp

4165

"movl _MMXLength, %%ecx \n\t"

4166

"movq -8(%%edi,%%ebx,), %%mm7 \n\t" // PRIME the pump (load the first

4167

// Raw(x-bpp) data set

4168

"andl $0x0000003f, %%ecx \n\t" // calc bytes over mult of 64

4169

"sub_8lp: \n\t"

4170

"movq (%%edi,%%ebx,), %%mm0 \n\t" // Load Sub(x) for 1st 8 bytes

4171

"paddb %%mm7, %%mm0 \n\t"

4172

"movq 8(%%edi,%%ebx,), %%mm1 \n\t" // Load Sub(x) for 2nd 8 bytes

4173

"movq %%mm0, (%%edi,%%ebx,) \n\t" // Write Raw(x) for 1st 8 bytes

4174

// Now mm0 will be used as Raw(x-bpp) for

4175

// the 2nd group of 8 bytes. This will be

4176

// repeated for each group of 8 bytes with

4177

// the 8th group being used as the Raw(x-bpp)

4178

// for the 1st group of the next loop.

4179

"paddb %%mm0, %%mm1 \n\t"

4180

"movq 16(%%edi,%%ebx,), %%mm2 \n\t" // Load Sub(x) for 3rd 8 bytes

4181

"movq %%mm1, 8(%%edi,%%ebx,) \n\t" // Write Raw(x) for 2nd 8 bytes

4182

"paddb %%mm1, %%mm2 \n\t"

4183

"movq 24(%%edi,%%ebx,), %%mm3 \n\t" // Load Sub(x) for 4th 8 bytes

4184

"movq %%mm2, 16(%%edi,%%ebx,) \n\t" // Write Raw(x) for 3rd 8 bytes

4185

"paddb %%mm2, %%mm3 \n\t"

4186

"movq 32(%%edi,%%ebx,), %%mm4 \n\t" // Load Sub(x) for 5th 8 bytes

4187

"movq %%mm3, 24(%%edi,%%ebx,) \n\t" // Write Raw(x) for 4th 8 bytes

4188

"paddb %%mm3, %%mm4 \n\t"

4189

"movq 40(%%edi,%%ebx,), %%mm5 \n\t" // Load Sub(x) for 6th 8 bytes

4190

"movq %%mm4, 32(%%edi,%%ebx,) \n\t" // Write Raw(x) for 5th 8 bytes

4191

"paddb %%mm4, %%mm5 \n\t"

4192

"movq 48(%%edi,%%ebx,), %%mm6 \n\t" // Load Sub(x) for 7th 8 bytes

4193

"movq %%mm5, 40(%%edi,%%ebx,) \n\t" // Write Raw(x) for 6th 8 bytes

4194

"paddb %%mm5, %%mm6 \n\t"

4195

"movq 56(%%edi,%%ebx,), %%mm7 \n\t" // Load Sub(x) for 8th 8 bytes

4196

"movq %%mm6, 48(%%edi,%%ebx,) \n\t" // Write Raw(x) for 7th 8 bytes

4197

"addl $64, %%ebx \n\t"

4198

"paddb %%mm6, %%mm7 \n\t"

4199

"cmpl %%ecx, %%ebx \n\t"

4200

"movq %%mm7, -8(%%edi,%%ebx,) \n\t" // Write Raw(x) for 8th 8 bytes

4201

"jb sub_8lp \n\t"

4202

"cmpl _MMXLength, %%ebx \n\t"

4203

"jnb sub_8lt8 \n\t"

4204

"sub_8lpA: \n\t"

4205

"movq (%%edi,%%ebx,), %%mm0 \n\t"

4206

"addl $8, %%ebx \n\t"

4207

"paddb %%mm7, %%mm0 \n\t"

4208

"cmpl _MMXLength, %%ebx \n\t"

4209

"movq %%mm0, -8(%%edi,%%ebx,) \n\t" // use -8 to offset early add to ebx

4210

"movq %%mm0, %%mm7 \n\t" // Move calculated Raw(x) data to mm1 to

4211

// be the new Raw(x-bpp) for the next loop

4212

"jb sub_8lpA \n\t"

4213

"sub_8lt8: \n\t"

4214

4215

: // FIXASM: output regs/vars go here, e.g.: "=m" (memory_var)

4216

4217

: // FIXASM: input regs, e.g.: "c" (count), "S" (src), "D" (dest)

4218

4219

: "%ebx", "%ecx", "%edi", "%esi", "%mm0", "%mm1", "%mm2", "%mm3", "%mm4", "%mm5", "%mm6", "%mm7" // CHECKASM: clobber list

4220

);

4221

}

4222

break;

4223

4224

default: // bpp greater than 8 bytes

4225

{

4226

__asm__ (

4227

"movl _dif, %%ebx \n\t"

4228

"movl row, %%edi \n\t"

4229

"movl %%edi, %%esi \n\t" // lp = row

4230

"addl bpp, %%edi \n\t" // rp = row + bpp

4231

"sub_Alp: \n\t"

4232

"movq (%%edi,%%ebx,), %%mm0 \n\t"

4233

"movq (%%esi,%%ebx,), %%mm1 \n\t"

4234

"addl $8, %%ebx \n\t"

4235

"paddb %%mm1, %%mm0 \n\t"

4236

"cmpl _MMXLength, %%ebx \n\t"

4237

"movq %%mm0, -8(%%edi,%%ebx,) \n\t" // mov does not affect flags; -8 to offset

4238

// add ebx

4239

"jb sub_Alp \n\t"

4240

4241

: // FIXASM: output regs/vars go here, e.g.: "=m" (memory_var)

4242

4243

: // FIXASM: input regs, e.g.: "c" (count), "S" (src), "D" (dest)

4244

4245

: "%ebx", "%edi", "%esi", "%mm0", "%mm1" // CHECKASM: clobber list

4246

);

4247

}

4248

break;

4249

4250

} // end switch ( bpp )

4251

4252

__asm__ (

4253

"movl _MMXLength, %%ebx \n\t"

4254

"movl row, %%edi \n\t"

4255

"cmpl _FullLength, %%ebx \n\t"

4256

"jnb sub_end \n\t"

4257

"movl %%edi, %%esi \n\t" // lp = row

4258

"xorl %%eax, %%eax \n\t"

4259

"addl bpp, %%edi \n\t" // rp = row + bpp

4260

"sub_lp2: \n\t"

4261

"movb (%%esi,%%ebx,), %%al \n\t"

4262

"addb %%al, (%%edi,%%ebx,) \n\t"

4263

"incl %%ebx \n\t"

4264

"cmpl _FullLength, %%ebx \n\t"

4265

"jb sub_lp2 \n\t"

4266

"sub_end: \n\t"

4267

"emms \n\t" // end MMX instructions

4268

4269

: // FIXASM: output regs/vars go here, e.g.: "=m" (memory_var)

4270

4271

: // FIXASM: input regs, e.g.: "c" (count), "S" (src), "D" (dest)

4272

4273

: "%eax", "%ebx", "%edi", "%esi" // CHECKASM: clobber list

4274

);

4275

#endif /* GRR_GCC_MMX_CONVERTED */

4276

}

4277

4278

// Optimized code for PNG Up filter decoder

4279

void /* PRIVATE */

4280

png_read_filter_row_mmx_up(png_row_infop row_info, png_bytep row,

4281

png_bytep prev_row)

4282

{

4283

#ifdef GRR_GCC_MMX_CONVERTED

4284

png_uint_32 len;

4285

4286

len = row_info->rowbytes; // # of bytes to filter

4287

__asm__ (

4288

"movl row, %%edi \n\t"

4289

// get # of bytes to alignment

4290

"movl %%edi, %%ecx \n\t"

4291

"xorl %%ebx, %%ebx \n\t"

4292

"addl $0x7, %%ecx \n\t"

4293

"xorl %%eax, %%eax \n\t"

4294

"andl $0xfffffff8, %%ecx \n\t"

4295

"movl prev_row, %%esi \n\t"

4296

"subl %%edi, %%ecx \n\t"

4297

"jz up_go \n\t"

4298

// fix alignment

4299

"up_lp1: \n\t"

4300

"movb (%%edi,%%ebx,), %%al \n\t"

4301

"addb (%%esi,%%ebx,), %%al \n\t"

4302

"incl %%ebx \n\t"

4303

"cmpl %%ecx, %%ebx \n\t"

4304

"movb %%al, -1(%%edi,%%ebx,) \n\t" // mov does not affect flags; -1 to offset inc ebx

4305

"jb up_lp1 \n\t"

4306

"up_go: \n\t"

4307

"movl len, %%ecx \n\t"

4308

"movl %%ecx, %%edx \n\t"

4309

"subl %%ebx, %%edx \n\t" // subtract alignment fix

4310

"andl $0x0000003f, %%edx \n\t" // calc bytes over mult of 64

4311

"subl %%edx, %%ecx \n\t" // drop over bytes from length

4312

// Unrolled loop - use all MMX registers and interleave to reduce

4313

// number of branch instructions (loops) and reduce partial stalls

4314

"up_loop: \n\t"

4315

"movq (%%esi,%%ebx,), %%mm1 \n\t"

4316

"movq (%%edi,%%ebx,), %%mm0 \n\t"

4317

"movq 8(%%esi,%%ebx,), %%mm3 \n\t"

4318

"paddb %%mm1, %%mm0 \n\t"

4319

"movq 8(%%edi,%%ebx,), %%mm2 \n\t"

4320

"movq %%mm0, (%%edi,%%ebx,) \n\t"

4321

"paddb %%mm3, %%mm2 \n\t"

4322

"movq 16(%%esi,%%ebx,), %%mm5 \n\t"

4323

"movq %%mm2, 8(%%edi,%%ebx,) \n\t"

4324

"movq 16(%%edi,%%ebx,), %%mm4 \n\t"

4325

"movq 24(%%esi,%%ebx,), %%mm7 \n\t"

4326

"paddb %%mm5, %%mm4 \n\t"

4327

"movq 24(%%edi,%%ebx,), %%mm6 \n\t"

4328

"movq %%mm4, 16(%%edi,%%ebx,) \n\t"

4329

"paddb %%mm7, %%mm6 \n\t"

4330

"movq 32(%%esi,%%ebx,), %%mm1 \n\t"

4331

"movq %%mm6, 24(%%edi,%%ebx,) \n\t"

4332

"movq 32(%%edi,%%ebx,), %%mm0 \n\t"

4333

"movq 40(%%esi,%%ebx,), %%mm3 \n\t"

4334

"paddb %%mm1, %%mm0 \n\t"

4335

"movq 40(%%edi,%%ebx,), %%mm2 \n\t"

4336

"movq %%mm0, 32(%%edi,%%ebx,) \n\t"

4337

"paddb %%mm3, %%mm2 \n\t"

4338

"movq 48(%%esi,%%ebx,), %%mm5 \n\t"

4339

"movq %%mm2, 40(%%edi,%%ebx,) \n\t"

4340

"movq 48(%%edi,%%ebx,), %%mm4 \n\t"

4341

"movq 56(%%esi,%%ebx,), %%mm7 \n\t"

4342

"paddb %%mm5, %%mm4 \n\t"

4343

"movq 56(%%edi,%%ebx,), %%mm6 \n\t"

4344

"movq %%mm4, 48(%%edi,%%ebx,) \n\t"

4345

"addl $64, %%ebx \n\t"

4346

"paddb %%mm7, %%mm6 \n\t"

4347

"cmpl %%ecx, %%ebx \n\t"

4348

"movq %%mm6, -8(%%edi,%%ebx,) \n\t" // (+56)movq does not affect flags;

4349

// -8 to offset add ebx

4350

"jb up_loop \n\t"

4351

4352

"cmpl $0, %%edx \n\t" // Test for bytes over mult of 64

4353

"jz up_end \n\t"

4354

4355

4356

// 2 lines added by lcreeve@netins.net

4357

// (mail 11 Jul 98 in png-implement list)

4358

"cmpl $8, %%edx \n\t" //test for less than 8 bytes

4359

"jb up_lt8 \n\t"

4360

4361

4362

"addl %%edx, %%ecx \n\t"

4363

"andl $0x00000007, %%edx \n\t" // calc bytes over mult of 8

4364

"subl %%edx, %%ecx \n\t" // drop over bytes from length

4365

"jz up_lt8 \n\t"

4366

// Loop using MMX registers mm0 & mm1 to update 8 bytes simultaneously

4367

"up_lpA: \n\t"

4368

"movq (%%esi,%%ebx,), %%mm1 \n\t"

4369

"movq (%%edi,%%ebx,), %%mm0 \n\t"

4370

"addl $8, %%ebx \n\t"

4371

"paddb %%mm1, %%mm0 \n\t"

4372

"cmpl %%ecx, %%ebx \n\t"

4373

"movq %%mm0, -8(%%edi,%%ebx,) \n\t" // movq does not affect flags; -8 to offset add ebx

4374

"jb up_lpA \n\t"

4375

"cmpl $0, %%edx \n\t" // Test for bytes over mult of 8

4376

"jz up_end \n\t"

4377

"up_lt8: \n\t"

4378

"xorl %%eax, %%eax \n\t"

4379

"addl %%edx, %%ecx \n\t" // move over byte count into counter

4380

// Loop using x86 registers to update remaining bytes

4381

"up_lp2: \n\t"

4382

"movb (%%edi,%%ebx,), %%al \n\t"

4383

"addb (%%esi,%%ebx,), %%al \n\t"

4384

"incl %%ebx \n\t"

4385

"cmpl %%ecx, %%ebx \n\t"

4386

"movb %%al, -1(%%edi,%%ebx,) \n\t" // mov does not affect flags; -1 to offset inc ebx

4387

"jb up_lp2 \n\t"

4388

"up_end: \n\t"

4389

// Conversion of filtered row completed

4390

"emms \n\t" // End MMX instructions; prep for possible FP instrs.

4391

4392

: // FIXASM: output regs/vars go here, e.g.: "=m" (memory_var)

4393

4394

: // FIXASM: input regs, e.g.: "c" (count), "S" (src), "D" (dest)

4395

4396

: "%eax", "%ebx", "%ecx", "%edx", "%edi", "%esi", "%mm0", "%mm1", "%mm2", "%mm3", "%mm4", "%mm5", "%mm6", "%mm7" // CHECKASM: clobber list

4397

);

4398

#endif /* GRR_GCC_MMX_CONVERTED */

4399

}

4400

4401

4402

#if defined(PNG_HAVE_ASSEMBLER_READ_FILTER_ROW)

4403

4404

// Optimized png_read_filter_row routines

4405

4406

void /* PRIVATE */

4407

png_read_filter_row(png_structp png_ptr, png_row_infop row_info, png_bytep

4408

row, png_bytep prev_row, int filter)

4409

{

4410

#ifdef PNG_DEBUG

4411

char filnm[6];

4412

#endif

4413

#define UseMMX 1

4414

4415

if (mmx_supported == 2)

4416

mmx_supported = mmxsupport();

4417

4418

#ifdef GRR_GCC_MMX_CONVERTED

4419

if (!mmx_supported)

4420

#endif

4421

{

4422

png_read_filter_row_c(png_ptr, row_info, row, prev_row, filter);

4423

return ;

4424

}

4425

4426

#ifdef PNG_DEBUG

4427

png_debug(1, "in png_read_filter_row\n");

4428

#if (UseMMX == 1)

4429

png_debug1(0,"%s, ", "MMX");

4430

#else

4431

png_debug1(0,"%s, ", "x86");

4432

#endif

4433

switch (filter)

4434

{

4435

case 0: sprintf(filnm, "None ");

4436

break;

4437

case 1: sprintf(filnm, "Sub ");

4438

break;

4439

case 2: sprintf(filnm, "Up ");

4440

break;

4441

case 3: sprintf(filnm, "Avg ");

4442

break;

4443

case 4: sprintf(filnm, "Paeth");

4444

break;

4445

default: sprintf(filnm, "Unknw");

4446

break;

4447

}

4448

png_debug2(0,"row=%5d, %s, ", png_ptr->row_number, filnm);

4449

png_debug2(0, "pd=%2d, b=%d, ", (int)row_info->pixel_depth,

4450

(int)((row_info->pixel_depth + 7) >> 3));

4451

png_debug1(0,"len=%8d, ", row_info->rowbytes);

4452

#endif

4453

4454

switch (filter)

4455

{

4456

case PNG_FILTER_VALUE_NONE:

4457

break;

4458

4459

case PNG_FILTER_VALUE_SUB:

4460

#if (UseMMX == 1)

4461

if ((row_info->pixel_depth > 8) && (row_info->rowbytes >= 128))

4462

{

4463

png_read_filter_row_mmx_sub(row_info, row);

4464

}

4465

else

4466

#endif

4467

{

4468

png_uint_32 i;

4469

png_uint_32 istop = row_info->rowbytes;

4470

png_uint_32 bpp = (row_info->pixel_depth + 7) >> 3;

4471

png_bytep rp = row + bpp;

4472

png_bytep lp = row;

4473

4474

for (i = bpp; i < istop; i++)

4475

{

4476

*rp = (png_byte)(((int)(*rp) + (int)(*lp++)) & 0xff);

4477

rp++;

4478

}

4479

} //end !UseMMX

4480

break;

4481

4482

case PNG_FILTER_VALUE_UP:

4483

#if (UseMMX == 1)

4484

if ((row_info->pixel_depth > 8) && (row_info->rowbytes >= 128))

4485

{

4486

png_read_filter_row_mmx_up(row_info, row, prev_row);

4487

}

4488

else

4489

#endif

4490

{

4491

png_bytep rp;

4492

png_bytep pp;

4493

png_uint_32 i;

4494

for (i = 0, rp = row, pp = prev_row;

4495

i < row_info->rowbytes; i++, rp++, pp++)

4496

{

4497

*rp = (png_byte)(((int)(*rp) + (int)(*pp)) & 0xff);

4498

}

4499

} //end !UseMMX

4500

break;

4501

4502

case PNG_FILTER_VALUE_AVG:

4503

#if (UseMMX == 1)

4504

if ((row_info->pixel_depth > 8) && (row_info->rowbytes >= 128))

4505

{

4506

png_read_filter_row_mmx_avg(row_info, row, prev_row);

4507

}

4508

else

4509

#endif

4510

{

4511

png_uint_32 i;

4512

png_bytep rp = row;

4513

png_bytep pp = prev_row;

4514

png_bytep lp = row;

4515

png_uint_32 bpp = (row_info->pixel_depth + 7) >> 3;

4516

png_uint_32 istop = row_info->rowbytes - bpp;

4517

4518

for (i = 0; i < bpp; i++)

4519

{

4520

*rp = (png_byte)(((int)(*rp) +

4521

((int)(*pp++) >> 1)) & 0xff);

4522

rp++;

4523

}

4524

4525

for (i = 0; i < istop; i++)

4526

{

4527

*rp = (png_byte)(((int)(*rp) +

4528

((int)(*pp++ + *lp++) >> 1)) & 0xff);

4529

rp++;

4530

}

4531

} //end !UseMMX

4532

break;

4533

4534

case PNG_FILTER_VALUE_PAETH:

4535

#if (UseMMX == 1)

4536

if ((row_info->pixel_depth > 8) && (row_info->rowbytes >= 128))

4537

{

4538

png_read_filter_row_mmx_paeth(row_info, row, prev_row);

4539

}

4540

else

4541

#endif

4542

{

4543

png_uint_32 i;

4544

png_bytep rp = row;

4545

png_bytep pp = prev_row;

4546

png_bytep lp = row;

4547

png_bytep cp = prev_row;

4548

png_uint_32 bpp = (row_info->pixel_depth + 7) >> 3;

4549

png_uint_32 istop=row_info->rowbytes - bpp;

4550

4551

for (i = 0; i < bpp; i++)

4552

{

4553

*rp = (png_byte)(((int)(*rp) + (int)(*pp++)) & 0xff);

4554

rp++;

4555

}

4556

4557

for (i = 0; i < istop; i++) // use leftover rp,pp

4558

{

4559

int a, b, c, pa, pb, pc, p;

4560

4561

a = *lp++;

4562

b = *pp++;

4563

c = *cp++;

4564

4565

p = b - c;

4566

pc = a - c;

4567

4568

#ifdef PNG_USE_ABS

4569

pa = abs(p);

4570

pb = abs(pc);

4571

pc = abs(p + pc);

4572

#else

4573

pa = p < 0 ? -p : p;

4574

pb = pc < 0 ? -pc : pc;

4575

pc = (p + pc) < 0 ? -(p + pc) : p + pc;

4576

#endif

4577

4578

4579

if (pa <= pb && pa <= pc)

4580

p = a;

4581

else if (pb <= pc)

4582

p = b;

4583

else

4584

p = c;

4585

4586

4587

p = (pa <= pb && pa <=pc) ? a : (pb <= pc) ? b : c;

4588

4589

*rp = (png_byte)(((int)(*rp) + p) & 0xff);

4590

rp++;

4591

}

4592

} //end !UseMMX

4593

break;

4594

4595

default:

4596

png_warning(png_ptr, "Ignoring bad adaptive filter type");

4597

*row=0;

4598

break;

4599

}

4600

}

4601

4602

#endif /* PNG_HAVE_ASSEMBLER_READ_FILTER_ROW */

4603

4604

4605

// GRR NOTES: (1) the following code assumes 386 or better (pushfl/popfl)

4606

// (2) all instructions compile with gcc 2.7.2.3 and later

4607

// (3) the function is moved down here to prevent gcc from

4608

// inlining it in multiple places and then barfing be-

4609

// cause the ".NOT_SUPPORTED" label is multiply defined

4610

// [is there a way to signal that a *single* function should

4611

// not be inlined? is there a way to modify the label for

4612

// each inlined instance, e.g., by appending _1, _2, etc.?

4613

// maybe if don't use leading "." in label name? (not tested)]

4614

4615

#ifdef ORIG_THAT_USED_TO_CLOBBER_EBX

4616

4617

int mmxsupport(void)

4618

{

4619

int mmx_supported_local = 0;

4620

4621

__asm__ (

4622

// ".byte 0x66 \n\t" // convert 16-bit pushf to 32-bit pushfd

4623

// "pushf \n\t" // save Eflag to stack

4624

"pushfl \n\t" // save Eflag to stack

4625

"popl %%eax \n\t" // get Eflag from stack into eax

4626

"movl %%eax, %%ecx \n\t" // make another copy of Eflag in ecx

4627

"xorl $0x200000, %%eax \n\t" // toggle ID bit in Eflag (i.e., bit 21)

4628

"pushl %%eax \n\t" // save modified Eflag back to stack

4629

// ".byte 0x66 \n\t" // convert 16-bit popf to 32-bit popfd

4630

// "popf \n\t" // restore modified value to Eflag reg

4631

"popfl \n\t" // restore modified value to Eflag reg

4632

"pushfl \n\t" // save Eflag to stack

4633

"popl %%eax \n\t" // get Eflag from stack

4634

"xorl %%ecx, %%eax \n\t" // compare new Eflag with original Eflag

4635

"jz .NOT_SUPPORTED \n\t" // if same, CPUID instr. is not supported

4636

4637

"xorl %%eax, %%eax \n\t" // set eax to zero

4638

// ".byte 0x0f, 0xa2 \n\t" // CPUID instruction (two-byte opcode)

4639

"cpuid \n\t" // get the CPU identification info

4640

"cmpl $1, %%eax \n\t" // make sure eax return non-zero value

4641

"jl .NOT_SUPPORTED \n\t" // if eax is zero, MMX is not supported

4642

4643

"xorl %%eax, %%eax \n\t" // set eax to zero and...

4644

"incl %%eax \n\t" // ...increment eax to 1. This pair is

4645

// faster than the instruction "mov eax, 1"

4646

"cpuid \n\t" // get the CPU identification info again

4647

"andl $0x800000, %%edx \n\t" // mask out all bits but MMX bit (23)

4648

"cmpl $0, %%edx \n\t" // 0 = MMX not supported

4649

"jz .NOT_SUPPORTED \n\t" // non-zero = yes, MMX IS supported

4650

4651

"movl $1, %0 \n\t" // set return value to 1 and fall through

4652

4653

".NOT_SUPPORTED: \n\t" // target label for jump instructions

4654

"movl %0, %%eax \n\t" // move return value to eax

4655

// DONE

4656

4657

: "=m" (mmx_supported_local) // %0 (output list: memory only)

4658

4659

: // any variables used on input (none)

4660

4661

: "%eax", "%ebx", // clobber list

4662

"%ecx", "%edx"

4663

// , "memory" // if write to a variable gcc thought was in a reg

4664

// , "cc" // "condition codes" (flag bits)

4665

);

4666

4667

//mmx_supported_local=0; // test code for force don't support MMX

4668

//printf("MMX : %u (1=MMX supported)\n",mmx_supported_local);

4669

4670

return mmx_supported_local;

4671

}

4672

4673

#else /* !ORIG_THAT_USED_TO_CLOBBER_EBX */

4674

4675

int mmxsupport(void)

4676

{

4677

__asm__ (

4678

"pushl %%ebx \n\t" // ebx gets clobbered by CPUID instruction

4679

"pushl %%ecx \n\t" // so does ecx...

4680

"pushl %%edx \n\t" // ...and edx (but ecx & edx safe on Linux)

4681

// ".byte 0x66 \n\t" // convert 16-bit pushf to 32-bit pushfd

4682

// "pushf \n\t" // save Eflag to stack

4683

"pushfl \n\t" // save Eflag to stack

4684

"popl %%eax \n\t" // get Eflag from stack into eax

4685

"movl %%eax, %%ecx \n\t" // make another copy of Eflag in ecx

4686

"xorl $0x200000, %%eax \n\t" // toggle ID bit in Eflag (i.e., bit 21)

4687

"pushl %%eax \n\t" // save modified Eflag back to stack

4688

// ".byte 0x66 \n\t" // convert 16-bit popf to 32-bit popfd

4689

// "popf \n\t" // restore modified value to Eflag reg

4690

"popfl \n\t" // restore modified value to Eflag reg

4691

"pushfl \n\t" // save Eflag to stack

4692

"popl %%eax \n\t" // get Eflag from stack

4693

"xorl %%ecx, %%eax \n\t" // compare new Eflag with original Eflag

4694

"jz .NOT_SUPPORTED \n\t" // if same, CPUID instr. is not supported

4695

4696

"xorl %%eax, %%eax \n\t" // set eax to zero

4697

// ".byte 0x0f, 0xa2 \n\t" // CPUID instruction (two-byte opcode)

4698

"cpuid \n\t" // get the CPU identification info

4699

"cmpl $1, %%eax \n\t" // make sure eax return non-zero value

4700

"jl .NOT_SUPPORTED \n\t" // if eax is zero, MMX is not supported

4701

4702

"xorl %%eax, %%eax \n\t" // set eax to zero and...

4703

"incl %%eax \n\t" // ...increment eax to 1. This pair is

4704

// faster than the instruction "mov eax, 1"

4705

"cpuid \n\t" // get the CPU identification info again

4706

"andl $0x800000, %%edx \n\t" // mask out all bits but MMX bit (23)

4707

"cmpl $0, %%edx \n\t" // 0 = MMX not supported

4708

"jz .NOT_SUPPORTED \n\t" // non-zero = yes, MMX IS supported

4709

4710

"movl $1, %%eax \n\t" // set return value to 1

4711

"popl %%edx \n\t" // restore edx

4712

"popl %%ecx \n\t" // restore ecx

4713

"popl %%ebx \n\t" // restore ebx ("row" in png_do_interlace)

4714

"ret \n\t" // DONE: have MMX support

4715

4716

".NOT_SUPPORTED: \n\t" // target label for jump instructions

4717

"movl $0, %%eax \n\t" // set return value to 0

4718

"popl %%edx \n\t" // restore edx

4719

"popl %%ecx \n\t" // restore ecx

4720

"popl %%ebx \n\t" // restore ebx ("row" in png_do_interlace)

4721

// "ret \n\t" // DONE: no MMX support

4722

// (fall through to standard C "ret")

4723

4724

: // "=m" (mmx_supported_local) // %0 (output list: memory only)

4725

4726

: // any variables used on input (none)

4727

4728

: "%eax" // clobber list

4729

// , "%ebx", "%ecx", "%edx" // GRR: we handle these manually

4730

// , "memory" // if write to a variable gcc thought was in a reg

4731

// , "cc" // "condition codes" (flag bits)

4732

);

4733

4734

//mmx_supported_local=0; // test code for force don't support MMX

4735

//printf("MMX : %u (1=MMX supported)\n",mmx_supported_local);

4736

4737

//return mmx_supported_local;

4738

}

4739

4740

#endif /* ?ORIG_THAT_USED_TO_CLOBBER_EBX */

4741

4742

#endif /* PNG_ASSEMBLER_CODE_SUPPORTED && PNG_USE_PNGGCCRD */

4743

4744

Older »