2
* Licensed to the Apache Software Foundation (ASF) under one or more
3
* contributor license agreements. See the NOTICE file distributed with
4
* this work for additional information regarding copyright ownership.
5
* The ASF licenses this file to You under the Apache License, Version 2.0
6
* (the "License"); you may not use this file except in compliance with
7
* the License. You may obtain a copy of the License at
9
* http://www.apache.org/licenses/LICENSE-2.0
11
* Unless required by applicable law or agreed to in writing, software
12
* distributed under the License is distributed on an "AS IS" BASIS,
13
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
* See the License for the specific language governing permissions and
15
* limitations under the License.
18
package org.apache.lucene.analysis;
21
import java.io.IOException;
22
import java.io.Reader;
25
import org.apache.lucene.analysis.CharArraySet;
26
import org.apache.lucene.analysis.ReusableAnalyzerBase;
27
import org.apache.lucene.analysis.WordlistLoader;
28
import org.apache.lucene.util.IOUtils;
29
import org.apache.lucene.util.Version;
32
* Base class for Analyzers that need to make use of stopword sets.
35
public abstract class StopwordAnalyzerBase extends ReusableAnalyzerBase {
38
* An immutable stopword set
40
protected final CharArraySet stopwords;
42
protected final Version matchVersion;
45
* Returns the analyzer's stopword set or an empty set if the analyzer has no
48
* @return the analyzer's stopword set or an empty set if the analyzer has no
51
public Set<?> getStopwordSet() {
56
* Creates a new instance initialized with the given stopword set
59
* the Lucene version for cross version compatibility
61
* the analyzer's stopword set
63
protected StopwordAnalyzerBase(final Version version, final Set<?> stopwords) {
64
matchVersion = version;
65
// analyzers should use char array set for stopwords!
66
this.stopwords = stopwords == null ? CharArraySet.EMPTY_SET : CharArraySet
67
.unmodifiableSet(CharArraySet.copy(version, stopwords));
71
* Creates a new Analyzer with an empty stopword set
74
* the Lucene version for cross version compatibility
76
protected StopwordAnalyzerBase(final Version version) {
81
* Creates a CharArraySet from a file resource associated with a class. (See
82
* {@link Class#getResourceAsStream(String)}).
85
* <code>true</code> if the set should ignore the case of the
86
* stopwords, otherwise <code>false</code>
88
* a class that is associated with the given stopwordResource
90
* name of the resource file associated with the given class
92
* comment string to ignore in the stopword file
93
* @return a CharArraySet containing the distinct stopwords from the given
96
* if loading the stopwords throws an {@link IOException}
98
protected static CharArraySet loadStopwordSet(final boolean ignoreCase,
99
final Class<? extends ReusableAnalyzerBase> aClass, final String resource,
100
final String comment) throws IOException {
101
Reader reader = null;
103
reader = IOUtils.getDecodingReader(aClass.getResourceAsStream(resource), IOUtils.CHARSET_UTF_8);
104
return WordlistLoader.getWordSet(reader, comment, new CharArraySet(Version.LUCENE_31, 16, ignoreCase));
106
IOUtils.close(reader);
112
* Creates a CharArraySet from a file.
115
* the stopwords file to load
117
* @param matchVersion
118
* the Lucene version for cross version compatibility
119
* @return a CharArraySet containing the distinct stopwords from the given
121
* @throws IOException
122
* if loading the stopwords throws an {@link IOException}
124
protected static CharArraySet loadStopwordSet(File stopwords,
125
Version matchVersion) throws IOException {
126
Reader reader = null;
128
reader = IOUtils.getDecodingReader(stopwords, IOUtils.CHARSET_UTF_8);
129
return WordlistLoader.getWordSet(reader, matchVersion);
131
IOUtils.close(reader);
136
* Creates a CharArraySet from a file.
139
* the stopwords reader to load
141
* @param matchVersion
142
* the Lucene version for cross version compatibility
143
* @return a CharArraySet containing the distinct stopwords from the given
145
* @throws IOException
146
* if loading the stopwords throws an {@link IOException}
148
protected static CharArraySet loadStopwordSet(Reader stopwords,
149
Version matchVersion) throws IOException {
151
return WordlistLoader.getWordSet(stopwords, matchVersion);
153
IOUtils.close(stopwords);