2
* Copyright 2004 The Apache Software Foundation
4
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
5
* use this file except in compliance with the License. You may obtain a copy of
8
* http://www.apache.org/licenses/LICENSE-2.0
10
* Unless required by applicable law or agreed to in writing, software
11
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
12
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
13
* License for the specific language governing permissions and limitations under
17
package org.apache.lucene.analysis.stempel;
19
import java.io.BufferedInputStream;
20
import java.io.DataInputStream;
21
import java.io.IOException;
22
import java.io.InputStream;
23
import java.util.Locale;
25
import org.egothor.stemmer.Diff;
26
import org.egothor.stemmer.Trie;
30
* Stemmer class is a convenient facade for other stemmer-related classes. The
31
* core stemming algorithm and its implementation is taken verbatim from the
32
* Egothor project ( <a href="http://www.egothor.org">www.egothor.org </a>).
35
* Even though the stemmer tables supplied in the distribution package are built
36
* for Polish language, there is nothing language-specific here.
39
public class StempelStemmer {
40
private Trie stemmer = null;
41
private StringBuilder buffer = new StringBuilder();
44
* Create a Stemmer using selected stemmer table
46
* @param stemmerTable stemmer table.
48
public StempelStemmer(InputStream stemmerTable) throws IOException {
49
this(load(stemmerTable));
53
* Create a Stemmer using pre-loaded stemmer table
55
* @param stemmer pre-loaded stemmer table
57
public StempelStemmer(Trie stemmer) {
58
this.stemmer = stemmer;
62
* Load a stemmer table from an inputstream.
64
public static Trie load(InputStream stemmerTable) throws IOException {
65
DataInputStream in = null;
67
in = new DataInputStream(new BufferedInputStream(stemmerTable));
68
String method = in.readUTF().toUpperCase(Locale.ENGLISH);
69
if (method.indexOf('M') < 0) {
70
return new org.egothor.stemmer.Trie(in);
72
return new org.egothor.stemmer.MultiTrie2(in);
82
* @param word input word to be stemmed.
83
* @return stemmed word, or null if the stem could not be generated.
85
public StringBuilder stem(CharSequence word) {
86
CharSequence cmd = stemmer.getLastOnPath(word);
94
Diff.apply(buffer, cmd);
96
if (buffer.length() > 0)