2
* Licensed to the Apache Software Foundation (ASF) under one or more
3
* contributor license agreements. See the NOTICE file distributed with
4
* this work for additional information regarding copyright ownership.
5
* The ASF licenses this file to You under the Apache License, Version 2.0
6
* (the "License"); you may not use this file except in compliance with
7
* the License. You may obtain a copy of the License at
9
* http://www.apache.org/licenses/LICENSE-2.0
11
* Unless required by applicable law or agreed to in writing, software
12
* distributed under the License is distributed on an "AS IS" BASIS,
13
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
* See the License for the specific language governing permissions and
15
* limitations under the License.
17
package org.apache.solr.analysis;
19
import java.io.IOException;
20
import java.util.LinkedList;
22
import org.apache.commons.codec.language.DoubleMetaphone;
23
import org.apache.lucene.analysis.TokenFilter;
24
import org.apache.lucene.analysis.TokenStream;
25
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
26
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
28
public final class DoubleMetaphoneFilter extends TokenFilter {
30
private static final String TOKEN_TYPE = "DoubleMetaphone";
32
private final LinkedList<State> remainingTokens = new LinkedList<State>();
33
private final DoubleMetaphone encoder = new DoubleMetaphone();
34
private final boolean inject;
35
private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
36
private final PositionIncrementAttribute posAtt = addAttribute(PositionIncrementAttribute.class);
38
protected DoubleMetaphoneFilter(TokenStream input, int maxCodeLength, boolean inject) {
40
this.encoder.setMaxCodeLen(maxCodeLength);
45
public boolean incrementToken() throws IOException {
48
if (!remainingTokens.isEmpty()) {
49
// clearAttributes(); // not currently necessary
50
restoreState(remainingTokens.removeFirst());
54
if (!input.incrementToken()) return false;
56
int len = termAtt.length();
57
if (len==0) return true; // pass through zero length terms
59
int firstAlternativeIncrement = inject ? 0 : posAtt.getPositionIncrement();
61
String v = termAtt.toString();
62
String primaryPhoneticValue = encoder.doubleMetaphone(v);
63
String alternatePhoneticValue = encoder.doubleMetaphone(v, true);
65
// a flag to lazily save state if needed... this avoids a save/restore when only
66
// one token will be generated.
67
boolean saveState=inject;
69
if (primaryPhoneticValue!=null && primaryPhoneticValue.length() > 0 && !primaryPhoneticValue.equals(v)) {
71
remainingTokens.addLast(captureState());
73
posAtt.setPositionIncrement( firstAlternativeIncrement );
74
firstAlternativeIncrement = 0;
75
termAtt.setEmpty().append(primaryPhoneticValue);
79
if (alternatePhoneticValue!=null && alternatePhoneticValue.length() > 0
80
&& !alternatePhoneticValue.equals(primaryPhoneticValue)
81
&& !primaryPhoneticValue.equals(v)) {
83
remainingTokens.addLast(captureState());
86
posAtt.setPositionIncrement( firstAlternativeIncrement );
87
termAtt.setEmpty().append(alternatePhoneticValue);
91
// Just one token to return, so no need to capture/restore
92
// any state, simply return it.
93
if (remainingTokens.isEmpty()) {
98
remainingTokens.addLast(captureState());
104
public void reset() throws IOException {
106
remainingTokens.clear();