1
package org.apache.solr.analysis;
4
* Licensed to the Apache Software Foundation (ASF) under one or more
5
* contributor license agreements. See the NOTICE file distributed with
6
* this work for additional information regarding copyright ownership.
7
* The ASF licenses this file to You under the Apache License, Version 2.0
8
* (the "License"); you may not use this file except in compliance with
9
* the License. You may obtain a copy of the License at
11
* http://www.apache.org/licenses/LICENSE-2.0
13
* Unless required by applicable law or agreed to in writing, software
14
* distributed under the License is distributed on an "AS IS" BASIS,
15
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16
* See the License for the specific language governing permissions and
17
* limitations under the License.
20
import java.io.InputStream;
21
import java.util.ArrayList;
22
import java.util.List;
24
import org.apache.lucene.analysis.TokenStream;
25
import org.apache.lucene.analysis.hunspell.HunspellDictionary;
26
import org.apache.lucene.analysis.hunspell.HunspellStemFilter;
27
import org.apache.solr.common.ResourceLoader;
28
import org.apache.solr.common.SolrException;
29
import org.apache.solr.common.SolrException.ErrorCode;
30
import org.apache.solr.util.plugin.ResourceLoaderAware;
33
* TokenFilterFactory that creates instances of {@link org.apache.lucene.analysis.hunspell.HunspellStemFilter}.
34
* Example config for British English including a custom dictionary, case insensitive matching:
35
* <pre class="prettyprint" >
36
* <filter class="solr.HunspellStemFilterFactory"
37
* dictionary="en_GB.dic,my_custom.dic"
38
* affix="en_GB.aff"
39
* ignoreCase="true" /></pre>
40
* Both parameters dictionary and affix are mandatory.
42
* The parameter ignoreCase (true/false) controls whether matching is case sensitive or not. Default false.
44
* Dictionaries for many languages are available through the OpenOffice project.
46
* See <a href="http://wiki.apache.org/solr/Hunspell">http://wiki.apache.org/solr/Hunspell</a>
48
public class HunspellStemFilterFactory extends BaseTokenFilterFactory implements ResourceLoaderAware {
50
private static final String PARAM_DICTIONARY = "dictionary";
51
private static final String PARAM_AFFIX = "affix";
52
private static final String PARAM_IGNORE_CASE = "ignoreCase";
53
private static final String TRUE = "true";
54
private static final String FALSE = "false";
56
private HunspellDictionary dictionary;
57
private boolean ignoreCase = false;
60
* Loads the hunspell dictionary and affix files defined in the configuration
62
* @param loader ResourceLoader used to load the files
64
public void inform(ResourceLoader loader) {
66
String dictionaryFiles[] = args.get(PARAM_DICTIONARY).split(",");
67
String affixFile = args.get(PARAM_AFFIX);
68
String pic = args.get(PARAM_IGNORE_CASE);
70
if(pic.equalsIgnoreCase(TRUE)) ignoreCase = true;
71
else if(pic.equalsIgnoreCase(FALSE)) ignoreCase = false;
72
else throw new SolrException(ErrorCode.UNKNOWN, "Unknown value for "+PARAM_IGNORE_CASE+": "+pic+". Must be true or false");
76
List<InputStream> dictionaries = new ArrayList<InputStream>();
77
for (String file : dictionaryFiles) {
78
dictionaries.add(loader.openResource(file));
80
this.dictionary = new HunspellDictionary(loader.openResource(affixFile), dictionaries, luceneMatchVersion, ignoreCase);
81
} catch (Exception e) {
82
throw new RuntimeException("Unable to load hunspell data! [dictionary=" + args.get("dictionary") + ",affix=" + affixFile + "]", e);
87
* Creates an instance of {@link org.apache.lucene.analysis.hunspell.HunspellStemFilter} that will filter the given
90
* @param tokenStream TokenStream that will be filtered
91
* @return HunspellStemFilter that filters the TokenStream
93
public TokenStream create(TokenStream tokenStream) {
94
return new HunspellStemFilter(tokenStream, dictionary);