2
* Licensed to the Apache Software Foundation (ASF) under one or more
3
* contributor license agreements. See the NOTICE file distributed with
4
* this work for additional information regarding copyright ownership.
5
* The ASF licenses this file to You under the Apache License, Version 2.0
6
* (the "License"); you may not use this file except in compliance with
7
* the License. You may obtain a copy of the License at
9
* http://www.apache.org/licenses/LICENSE-2.0
11
* Unless required by applicable law or agreed to in writing, software
12
* distributed under the License is distributed on an "AS IS" BASIS,
13
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
* See the License for the specific language governing permissions and
15
* limitations under the License.
18
package org.apache.lucene.analysis;
20
import java.io.IOException;
21
import java.io.Reader;
24
* An convenience subclass of Analyzer that makes it easy to implement
25
* {@link TokenStream} reuse.
27
* ReusableAnalyzerBase is a simplification of Analyzer that supports easy reuse
28
* for the most common use-cases. Analyzers such as
29
* {@link PerFieldAnalyzerWrapper} that behave differently depending upon the
30
* field name need to subclass Analyzer directly instead.
33
* To prevent consistency problems, this class does not allow subclasses to
34
* extend {@link #reusableTokenStream(String, Reader)} or
35
* {@link #tokenStream(String, Reader)} directly. Instead, subclasses must
36
* implement {@link #createComponents(String, Reader)}.
39
public abstract class ReusableAnalyzerBase extends Analyzer {
42
* Creates a new {@link TokenStreamComponents} instance for this analyzer.
45
* the name of the fields content passed to the
46
* {@link TokenStreamComponents} sink as a reader
48
* the reader passed to the {@link Tokenizer} constructor
49
* @return the {@link TokenStreamComponents} for this analyzer.
51
protected abstract TokenStreamComponents createComponents(String fieldName,
55
* This method uses {@link #createComponents(String, Reader)} to obtain an
56
* instance of {@link TokenStreamComponents}. It returns the sink of the
57
* components and stores the components internally. Subsequent calls to this
58
* method will reuse the previously stored components if and only if the
59
* {@link TokenStreamComponents#reset(Reader)} method returned
60
* <code>true</code>. Otherwise a new instance of
61
* {@link TokenStreamComponents} is created.
63
* @param fieldName the name of the field the created TokenStream is used for
64
* @param reader the reader the streams source reads from
67
public final TokenStream reusableTokenStream(final String fieldName,
68
final Reader reader) throws IOException {
69
TokenStreamComponents streamChain = (TokenStreamComponents)
70
getPreviousTokenStream();
71
final Reader r = initReader(reader);
72
if (streamChain == null || !streamChain.reset(r)) {
73
streamChain = createComponents(fieldName, r);
74
setPreviousTokenStream(streamChain);
76
return streamChain.getTokenStream();
80
* This method uses {@link #createComponents(String, Reader)} to obtain an
81
* instance of {@link TokenStreamComponents} and returns the sink of the
82
* components. Each calls to this method will create a new instance of
83
* {@link TokenStreamComponents}. Created {@link TokenStream} instances are
86
* @param fieldName the name of the field the created TokenStream is used for
87
* @param reader the reader the streams source reads from
90
public final TokenStream tokenStream(final String fieldName,
91
final Reader reader) {
92
return createComponents(fieldName, initReader(reader)).getTokenStream();
96
* Override this if you want to add a CharFilter chain.
98
protected Reader initReader(Reader reader) {
103
* This class encapsulates the outer components of a token stream. It provides
104
* access to the source ({@link Tokenizer}) and the outer end (sink), an
105
* instance of {@link TokenFilter} which also serves as the
106
* {@link TokenStream} returned by
107
* {@link Analyzer#tokenStream(String, Reader)} and
108
* {@link Analyzer#reusableTokenStream(String, Reader)}.
110
public static class TokenStreamComponents {
111
protected final Tokenizer source;
112
protected final TokenStream sink;
115
* Creates a new {@link TokenStreamComponents} instance.
118
* the analyzer's tokenizer
120
* the analyzer's resulting token stream
122
public TokenStreamComponents(final Tokenizer source,
123
final TokenStream result) {
124
this.source = source;
129
* Creates a new {@link TokenStreamComponents} instance.
132
* the analyzer's tokenizer
134
public TokenStreamComponents(final Tokenizer source) {
135
this.source = source;
140
* Resets the encapsulated components with the given reader. This method by
141
* default returns <code>true</code> indicating that the components have
142
* been reset successfully. Subclasses of {@link ReusableAnalyzerBase} might use
143
* their own {@link TokenStreamComponents} returning <code>false</code> if
144
* the components cannot be reset.
147
* a reader to reset the source component
148
* @return <code>true</code> if the components were reset, otherwise
150
* @throws IOException
151
* if the component's reset method throws an {@link IOException}
153
protected boolean reset(final Reader reader) throws IOException {
154
source.reset(reader);
159
* Returns the sink {@link TokenStream}
161
* @return the sink {@link TokenStream}
163
protected TokenStream getTokenStream() {