~snowball-yiddish-dev/snowball-yiddish/trunk

« back to all changes in this revision

Viewing changes to pystemmer/benchmark.py

Committer: Jason Spashett
Date: 2012-04-14 13:12:57 UTC
Revision ID: jason@spashett.com-20120414131257-rv3ugy4u2iyoczdk

Add ISO 639-2, and 639-1 language codes

files added:
data

data/danish

data/danish/diffs.txt

data/danish/output.txt

data/danish/voc.txt

data/dutch

data/dutch/diffs.txt

data/dutch/output.txt

data/dutch/voc.txt

data/english

data/english/diffs.txt

data/english/output.txt

data/english/voc.txt

data/finnish

data/finnish/diffs.txt

data/finnish/output.txt

data/finnish/voc.txt

data/french

data/french/diffs.txt

data/french/output.txt

data/french/voc.txt

data/german

data/german/diffs.txt

data/german/output.txt

data/german/voc.txt

data/german2

data/german2/output.txt

data/german2/voc.txt

data/hungarian

data/hungarian/diffs.txt

data/hungarian/output.txt

data/hungarian/voc.txt

data/italian

data/italian/diffs.txt

data/italian/output.txt

data/italian/voc.txt

data/kraaij_pohlmann

data/kraaij_pohlmann/diffs.txt

data/kraaij_pohlmann/output.txt

data/kraaij_pohlmann/voc.txt

data/lovins

data/lovins/output.txt

data/lovins/voc.txt

data/norwegian

data/norwegian/diffs.txt

data/norwegian/output.txt

data/norwegian/voc.txt

data/porter

data/porter/diffs.txt

data/porter/output.txt

data/porter/voc.txt

data/portuguese

data/portuguese/diffs.txt

data/portuguese/output.txt

data/portuguese/voc.txt

data/romanian

data/romanian/diffs.txt

data/romanian/output.txt

data/romanian/voc.txt

data/russian

data/russian/diffs-t.txt

data/russian/diffs.txt

data/russian/output.txt

data/russian/voc.txt

data/spanish

data/spanish/diffs.txt

data/spanish/output.txt

data/spanish/voc.txt

data/swedish

data/swedish/diffs.txt

data/swedish/output.txt

data/swedish/voc.txt

data/turkish

data/turkish/output.txt

data/turkish/voc.txt

pystemmer

pystemmer/ChangeLog

pystemmer/HACKING

pystemmer/LICENSE

pystemmer/MANIFEST.in

pystemmer/README

pystemmer/benchmark.py

pystemmer/docs

pystemmer/docs/quickstart.txt

pystemmer/docs/quickstart_python3.txt

pystemmer/makedist.sh

pystemmer/runtests.py

pystemmer/sampledata

pystemmer/sampledata/englishvoc.txt

pystemmer/sampledata/puttydoc.txt

pystemmer/setup.py

pystemmer/src

pystemmer/src/Stemmer.pyx

scripts

scripts/bootstrap.sh

scripts/checkdata.sh

scripts/make_website.sh

snowball/AUTHORS

snowball/algorithms/danish/stem_ISO_8859_1.sbl

snowball/algorithms/danish/stem_MS_DOS_Latin_I.sbl

snowball/algorithms/dutch/stem_ISO_8859_1.sbl

snowball/algorithms/dutch/stem_MS_DOS_Latin_I.sbl

snowball/algorithms/english/stem_ISO_8859_1.sbl

snowball/algorithms/finnish/stem_ISO_8859_1.sbl

snowball/algorithms/french/stem_ISO_8859_1.sbl

snowball/algorithms/french/stem_MS_DOS_Latin_I.sbl

snowball/algorithms/german/stem_ISO_8859_1.sbl

snowball/algorithms/german/stem_MS_DOS_Latin_I.sbl

snowball/algorithms/german2/stem_ISO_8859_1.sbl

snowball/algorithms/hungarian

snowball/algorithms/hungarian/stem_ISO_8859_1.sbl

snowball/algorithms/italian/stem_ISO_8859_1.sbl

snowball/algorithms/italian/stem_MS_DOS_Latin_I.sbl

snowball/algorithms/kraaij_pohlmann

snowball/algorithms/kraaij_pohlmann/stem_ISO_8859_1.sbl

snowball/algorithms/lovins/stem_ISO_8859_1.sbl

snowball/algorithms/norwegian/stem_ISO_8859_1.sbl

snowball/algorithms/norwegian/stem_MS_DOS_Latin_I.sbl

snowball/algorithms/porter/stem_ISO_8859_1.sbl

snowball/algorithms/portuguese/stem_ISO_8859_1.sbl

snowball/algorithms/portuguese/stem_MS_DOS_Latin_I.sbl

snowball/algorithms/romanian

snowball/algorithms/romanian/stem_ISO_8859_2.sbl

snowball/algorithms/romanian/stem_Unicode.sbl

snowball/algorithms/russian/stem_KOI8_R.sbl

snowball/algorithms/russian/stem_Unicode.sbl

snowball/algorithms/spanish/stem_ISO_8859_1.sbl

snowball/algorithms/spanish/stem_MS_DOS_Latin_I.sbl

snowball/algorithms/swedish/stem_ISO_8859_1.sbl

snowball/algorithms/swedish/stem_MS_DOS_Latin_I.sbl

snowball/algorithms/turkish

snowball/algorithms/turkish/stem_Unicode.sbl

snowball/algorithms/yiddish

snowball/algorithms/yiddish/stem_Unicode.sbl

snowball/compiler/syswords.h

snowball/compiler/syswords2.h

snowball/doc/libstemmer_c_README

snowball/doc/libstemmer_java_README

snowball/java

snowball/java/org

snowball/java/org/tartarus

snowball/java/org/tartarus/snowball

snowball/java/org/tartarus/snowball/Among.java

snowball/java/org/tartarus/snowball/SnowballProgram.java

snowball/java/org/tartarus/snowball/SnowballStemmer.java

snowball/java/org/tartarus/snowball/TestApp.java

snowball/libstemmer/libstemmer_c.in

snowball/libstemmer/modules.txt

snowball/libstemmer/modules_utf8.txt

website/S

website/S/index.php

website/algorithms

website/algorithms/armenian

website/algorithms/armenian/stemmer.html

website/algorithms/armenian/stemmer.java

website/algorithms/armenian/stemmer.sbl

website/algorithms/armenian/tarball.tgz

website/algorithms/basque

website/algorithms/basque/stemmer.html

website/algorithms/basque/tarball.tgz

website/algorithms/catalan

website/algorithms/catalan/stemmer.html

website/algorithms/catalan/tarball.tgz

website/algorithms/danish

website/algorithms/danish/stemmer.html

website/algorithms/danish/stop.txt

website/algorithms/dutch

website/algorithms/dutch/stemmer.html

website/algorithms/dutch/stop.txt

website/algorithms/english

website/algorithms/english/stemmer.html

website/algorithms/english/stop.txt

website/algorithms/finnish

website/algorithms/finnish/stemmer.html

website/algorithms/finnish/stop.txt

website/algorithms/french

website/algorithms/french/stemmer.html

website/algorithms/french/stop.txt

website/algorithms/german

website/algorithms/german/stemmer.html

website/algorithms/german/stop.txt

website/algorithms/german2

website/algorithms/german2/stemmer.html

website/algorithms/hungarian

website/algorithms/hungarian/stemmer.html

website/algorithms/hungarian/stop.txt

website/algorithms/italian

website/algorithms/italian/stemmer.html

website/algorithms/italian/stop.txt

website/algorithms/kraaij_pohlmann

website/algorithms/kraaij_pohlmann/stemmer.html

website/algorithms/lovins

website/algorithms/lovins/festschrift.html

website/algorithms/lovins/porter-1.jpg

website/algorithms/lovins/stemmer.html

website/algorithms/norwegian

website/algorithms/norwegian/stemmer.html

website/algorithms/norwegian/stop.txt

website/algorithms/porter

website/algorithms/porter/stemmer.html

website/algorithms/portuguese

website/algorithms/portuguese/stemmer.html

website/algorithms/portuguese/stop.txt

website/algorithms/romanian

website/algorithms/romanian/stemmer.html

website/algorithms/russian

website/algorithms/russian/stemmer.html

website/algorithms/russian/stop.txt

website/algorithms/spanish

website/algorithms/spanish/stemmer.html

website/algorithms/spanish/stop.txt

website/algorithms/swedish

website/algorithms/swedish/stemmer.html

website/algorithms/swedish/stop.txt

website/algorithms/turkish

website/algorithms/turkish/accompanying_paper.doc

website/algorithms/turkish/stemmer.html

website/compiler

website/compiler/snowman.html

website/contrib

website/contrib/PySnowballStemmer-0.0.1.tar.gz

website/otherapps

website/otherapps/pascal

website/otherapps/pascal/intro.html

website/otherapps/pascal/stemming.zip

website/otherapps/romanian

website/otherapps/romanian/intro.html

website/otherapps/romanian/romanian1.tgz

website/otherapps/romanian/romanian2.tgz

website/otherapps/schinke

website/otherapps/schinke/intro.html

website/otherapps/schinke/schinke.tgz

website/otherlangs

website/otherlangs/english_c.txt

website/otherlangs/english_cpp.txt

website/otherlangs/english_erl.txt

website/otherlangs/french_javascript.txt

website/otherlangs/german_javascript.txt

website/otherlangs/german_py.txt

website/otherlangs/index.html

website/otherlangs/italian_csharp.txt

website/otherlangs/portuguese_java.txt

website/otherlangs/russian_php5.txt

website/otherlangs/urim_c.txt

website/otherlangs/urim_javascript.txt

website/robots.txt

website/runtime

website/runtime/use.html

website/snub-dodecahedron.gif

website/texts/apostrophe.html

website/texts/earlyenglish.html

website/wrappers/PyStemmer-1.0.1.tar.gz

website/wrappers/PyStemmer-1.0.tar.gz

website/wrappers/PyStemmer-1.1.0.tar.gz

website/wrappers/PyStemmer-1.2.0.tar.gz

website/wrappers/perl.tgz

files removed:
snowball/.cvsignore

snowball/algorithms/danish/stem.sbl

snowball/algorithms/dutch/stem.sbl

snowball/algorithms/english/stem.sbl

snowball/algorithms/finnish/stem.sbl

snowball/algorithms/french/stem.sbl

snowball/algorithms/german/stem.sbl

snowball/algorithms/german2/stem.sbl

snowball/algorithms/italian/stem.sbl

snowball/algorithms/lovins/stem.sbl

snowball/algorithms/norwegian/stem.sbl

snowball/algorithms/porter/stem.sbl

snowball/algorithms/portuguese/stem.sbl

snowball/algorithms/russian/stem.sbl

snowball/algorithms/spanish/stem.sbl

snowball/algorithms/swedish/stem.sbl

snowball/compiler/sort.c

snowball/compiler/syswords

snowball/compiler/syswords2

snowball/libstemmer/.cvsignore

snowball/libstemmer/libstemmer.c

website/.cvsignore

website/Makefile

website/danish

website/danish/.cvsignore

website/danish/diffs.txt

website/danish/output.txt

website/danish/stem-MS-DOS-Latin-I.sbl

website/danish/stem.sbl

website/danish/stemmer.html

website/danish/stop.txt

website/danish/voc.txt

website/dutch

website/dutch/.cvsignore

website/dutch/diffs.txt

website/dutch/output.txt

website/dutch/stem-MS-DOS-Latin-I.sbl

website/dutch/stem.sbl

website/dutch/stemmer.html

website/dutch/stop.txt

website/dutch/voc.txt

website/english

website/english/.cvsignore

website/english/diffs.txt

website/english/output.txt

website/english/stem.sbl

website/english/stemmer.html

website/english/stop.txt

website/english/voc.txt

website/finnish

website/finnish/diffs.txt

website/finnish/output.txt

website/finnish/stem.sbl

website/finnish/stemmer.html

website/finnish/voc.txt

website/french

website/french/.cvsignore

website/french/diffs.txt

website/french/output.txt

website/french/stem-MS-DOS-Latin-I.sbl

website/french/stem.sbl

website/french/stemmer.html

website/french/stop.txt

website/french/voc.txt

website/german

website/german/.cvsignore

website/german/diffs.txt

website/german/output.txt

website/german/stem-MS-DOS-Latin-I.sbl

website/german/stem.sbl

website/german/stemmer.html

website/german/stop.txt

website/german/voc.txt

website/german2

website/german2/stem.c

website/german2/stem.h

website/german2/stem.sbl

website/german2/stemmer.html

website/italian

website/italian/.cvsignore

website/italian/diffs.txt

website/italian/output.txt

website/italian/stem-MS-DOS-Latin-I.sbl

website/italian/stem.sbl

website/italian/stemmer.html

website/italian/stop.txt

website/italian/voc.txt

website/kp

website/kp/D.txt

website/kp/stem.c

website/kp/stem.h

website/kp/stem.sbl

website/kp/stemmer.html

website/libstemmer

website/libstemmer/.cvsignore

website/libstemmer/libstemmer.h

website/libstemmer/wrapper.c

website/lovins

website/lovins/stem.c

website/lovins/stem.h

website/lovins/stem.sbl

website/lovins/stemmer.html

website/net

website/net/sf

website/net/sf/snowball

website/net/sf/snowball/Among.java

website/net/sf/snowball/SnowballProgram.java

website/net/sf/snowball/TestApp.java

website/norwegian

website/norwegian/.cvsignore

website/norwegian/diffs.txt

website/norwegian/output.txt

website/norwegian/stem-MS-DOS-Latin-I.sbl

website/norwegian/stem.sbl

website/norwegian/stemmer.html

website/norwegian/stop.txt

website/norwegian/voc.txt

website/p

website/p/analyser.c

website/p/driver.c

website/p/generator.c

website/p/generator_java.c

website/p/header.h

website/p/make

website/p/snowman.html

website/p/sort.c

website/p/space.c

website/p/syswords

website/p/syswords2

website/p/tokeniser.c

website/porter

website/porter/.cvsignore

website/porter/diffs.txt

website/porter/output.txt

website/porter/stem.sbl

website/porter/stemmer.html

website/porter/voc.txt

website/portuguese

website/portuguese/.cvsignore

website/portuguese/diffs.txt

website/portuguese/output.txt

website/portuguese/stem-MS-DOS-Latin-I.sbl

website/portuguese/stem.sbl

website/portuguese/stemmer.html

website/portuguese/stop.txt

website/portuguese/voc.txt

website/q

website/q/api.c

website/q/api.h

website/q/driver-porter.c

website/q/driver.c

website/q/driver.template

website/q/header.h

website/q/make

website/q/use.html

website/q/utilities.c

website/russian

website/russian/.cvsignore

website/russian/diffs.txt

website/russian/output.txt

website/russian/stem.sbl

website/russian/stemmer.html

website/russian/stop.txt

website/russian/voc.txt

website/snub-dodecahedron.gif

website/spanish

website/spanish/.cvsignore

website/spanish/diffs.txt

website/spanish/output.txt

website/spanish/stem-MS-DOS-Latin-I.sbl

website/spanish/stem.sbl

website/spanish/stemmer.html

website/spanish/stop.txt

website/spanish/voc.txt

website/swedish

website/swedish/.cvsignore

website/swedish/diffs.txt

website/swedish/output.txt

website/swedish/stem-MS-DOS-Latin-I.sbl

website/swedish/stem.sbl

website/swedish/stemmer.html

website/swedish/stop.txt

website/swedish/voc.txt

website/texts/snowball.tgz

website/wrappers/perl.tgz

files modified:
MODULE

snowball/GNUmakefile

snowball/README

snowball/compiler/analyser.c

snowball/compiler/driver.c

snowball/compiler/generator.c

snowball/compiler/generator_java.c

snowball/compiler/header.h

snowball/compiler/space.c

snowball/compiler/tokeniser.c

snowball/doc/TODO

snowball/examples/stemwords.c

snowball/include/libstemmer.h

snowball/libstemmer/mkmodules.pl

snowball/runtime/api.c

snowball/runtime/api.h

snowball/runtime/header.h

snowball/runtime/utilities.c

website/buglist.txt

website/codesets/guide.html

website/credits.php

website/demo.php

website/download.php

website/index.php

website/index_body.html

website/lists.php

website/menu.inc

website/projects.php

website/texts/germanic.html

website/texts/howtohelp.html

website/texts/introduction.html

website/texts/quickintro.html

website/texts/r1r2.html

website/texts/romance.html

website/texts/scandinavian.html

website/texts/stemmersoverview.html

website/wrappers/guide.html

Show diffs side-by-side

added added

removed removed

pystemmer/benchmark.py

#!/usr/bin/env python

# This script runs a simple benchmark of the python stemmer interface.

import timeit

datafiles = ('sampledata/englishvoc.txt', 'sampledata/puttydoc.txt',)

words_lst = [None]

for datafile in datafiles:

words = []

for line in open(datafile):

words.extend(line.split())

for cache_size in (0, 1, 10000, 30000):

setup = r"""

import Stemmer

stemmer = Stemmer.Stemmer('en', %d)

words = []

for line in open('%s'):

words.extend(line.split())

""" % (cache_size, datafile)

t = timeit.Timer(setup=setup,

stmt='stemmer.stemWords(words)')

for iters in (1, 2, 3, 10):

times = [time / iters for time in t.repeat(5, iters)]

print "'%s':words=%d,cacheSize=%d,iters=%d,mintime=%f" % (datafile, len(words), cache_size, iters, min(times))

Older »