1
# This file contains a list of stemmers to include in the distribution.
2
# The format is a set of space separated lines - on each line:
3
# First item is name of stemmer.
4
# Second item is comma separated list of character sets.
5
# Third item is comma separated list of names to refer to the stemmer by.
7
# Lines starting with a #, or blank lines, are ignored.
9
# List all the main algorithms for each language, in UTF-8.
11
danish UTF_8 danish,da,dan
12
dutch UTF_8 dutch,nl,dut,nld
13
english UTF_8 english,en,eng
14
finnish UTF_8 finnish,fi,fin
15
french UTF_8 french,fr,fre,fra
16
german UTF_8 german,de,ger,deu
17
hungarian UTF_8 hungarian,hu,hun
18
italian UTF_8 italian,it,ita
19
norwegian UTF_8 norwegian,no,nor
20
portuguese UTF_8 portuguese,pt,por
21
romanian UTF_8 romanian,ro,rum,ron
22
russian UTF_8 russian,ru,rus
23
spanish UTF_8 spanish,es,esl,spa
24
swedish UTF_8 swedish,sv,swe
25
turkish UTF_8 turkish,tr,tur
27
# Also include the traditional porter algorithm for english.
28
# The porter algorithm is included in the libstemmer distribution to assist
29
# with backwards compatibility, but for new systems the english algorithm
30
# should be used in preference.
33
# Some other stemmers in the snowball project are not included in the standard
34
# distribution. To compile a libstemmer with them in, add them to this list,
35
# and regenerate the distribution. (You will need a full source checkout for
36
# this.) They are included in the snowball website as curiosities, but are not
37
# intended for general use, and use of them is is not fully supported. These
40
# german2 - This is a slight modification of the german stemmer.
41
#german2 UTF_8 german2
43
# kraaij_pohlmann - This is a different dutch stemmer.
44
#kraaij_pohlmann UTF_8 kraaij_pohlmann
46
# lovins - This is an english stemmer, but fairly outdated, and
47
# only really applicable to a restricted type of input text
48
# (keywords in academic publications).