1
package org.apache.lucene.search;
4
* Licensed to the Apache Software Foundation (ASF) under one or more
5
* contributor license agreements. See the NOTICE file distributed with
6
* this work for additional information regarding copyright ownership.
7
* The ASF licenses this file to You under the Apache License, Version 2.0
8
* (the "License"); you may not use this file except in compliance with
9
* the License. You may obtain a copy of the License at
11
* http://www.apache.org/licenses/LICENSE-2.0
13
* Unless required by applicable law or agreed to in writing, software
14
* distributed under the License is distributed on an "AS IS" BASIS,
15
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16
* See the License for the specific language governing permissions and
17
* limitations under the License.
20
import org.apache.lucene.index.IndexReader;
21
import org.apache.lucene.index.Term;
22
import org.apache.lucene.util.ToStringUtils;
24
import java.io.IOException;
26
/** Implements the fuzzy search query. The similarity measurement
27
* is based on the Levenshtein (edit distance) algorithm.
29
* <p><em>Warning:</em> this query is not very scalable with its default prefix
30
* length of 0 - in this case, *every* term will be enumerated and
31
* cause an edit score calculation.
33
* <p>This query uses {@link MultiTermQuery.TopTermsScoringBooleanQueryRewrite}
34
* as default. So terms will be collected and scored according to their
35
* edit distance. Only the top terms are used for building the {@link BooleanQuery}.
36
* It is not recommended to change the rewrite mode for fuzzy queries.
38
public class FuzzyQuery extends MultiTermQuery {
40
public final static float defaultMinSimilarity = 0.5f;
41
public final static int defaultPrefixLength = 0;
42
public final static int defaultMaxExpansions = Integer.MAX_VALUE;
44
private float minimumSimilarity;
45
private int prefixLength;
46
private boolean termLongEnough = false;
51
* Create a new FuzzyQuery that will match terms with a similarity
52
* of at least <code>minimumSimilarity</code> to <code>term</code>.
53
* If a <code>prefixLength</code> > 0 is specified, a common prefix
54
* of that length is also required.
56
* @param term the term to search for
57
* @param minimumSimilarity a value between 0 and 1 to set the required similarity
58
* between the query term and the matching terms. For example, for a
59
* <code>minimumSimilarity</code> of <code>0.5</code> a term of the same length
60
* as the query term is considered similar to the query term if the edit distance
61
* between both terms is less than <code>length(term)*0.5</code>
62
* @param prefixLength length of common (non-fuzzy) prefix
63
* @param maxExpansions the maximum number of terms to match. If this number is
64
* greater than {@link BooleanQuery#getMaxClauseCount} when the query is rewritten,
65
* then the maxClauseCount will be used instead.
66
* @throws IllegalArgumentException if minimumSimilarity is >= 1 or < 0
67
* or if prefixLength < 0
69
public FuzzyQuery(Term term, float minimumSimilarity, int prefixLength,
73
if (minimumSimilarity >= 1.0f)
74
throw new IllegalArgumentException("minimumSimilarity >= 1");
75
else if (minimumSimilarity < 0.0f)
76
throw new IllegalArgumentException("minimumSimilarity < 0");
78
throw new IllegalArgumentException("prefixLength < 0");
79
if (maxExpansions < 0)
80
throw new IllegalArgumentException("maxExpansions < 0");
82
setRewriteMethod(new MultiTermQuery.TopTermsScoringBooleanQueryRewrite(maxExpansions));
84
if (term.text().length() > 1.0f / (1.0f - minimumSimilarity)) {
85
this.termLongEnough = true;
88
this.minimumSimilarity = minimumSimilarity;
89
this.prefixLength = prefixLength;
93
* Calls {@link #FuzzyQuery(Term, float) FuzzyQuery(term, minimumSimilarity, prefixLength, Integer.MAX_VALUE)}.
95
public FuzzyQuery(Term term, float minimumSimilarity, int prefixLength) {
96
this(term, minimumSimilarity, prefixLength, defaultMaxExpansions);
100
* Calls {@link #FuzzyQuery(Term, float) FuzzyQuery(term, minimumSimilarity, 0, Integer.MAX_VALUE)}.
102
public FuzzyQuery(Term term, float minimumSimilarity) {
103
this(term, minimumSimilarity, defaultPrefixLength, defaultMaxExpansions);
107
* Calls {@link #FuzzyQuery(Term, float) FuzzyQuery(term, 0.5f, 0, Integer.MAX_VALUE)}.
109
public FuzzyQuery(Term term) {
110
this(term, defaultMinSimilarity, defaultPrefixLength, defaultMaxExpansions);
114
* Returns the minimum similarity that is required for this query to match.
115
* @return float value between 0.0 and 1.0
117
public float getMinSimilarity() {
118
return minimumSimilarity;
122
* Returns the non-fuzzy prefix length. This is the number of characters at the start
123
* of a term that must be identical (not fuzzy) to the query term if the query
124
* is to match that term.
126
public int getPrefixLength() {
131
protected FilteredTermEnum getEnum(IndexReader reader) throws IOException {
132
if (!termLongEnough) { // can only match if it's exact
133
return new SingleTermEnum(reader, term);
135
return new FuzzyTermEnum(reader, getTerm(), minimumSimilarity, prefixLength);
139
* Returns the pattern term.
141
public Term getTerm() {
146
public String toString(String field) {
147
final StringBuilder buffer = new StringBuilder();
148
if (!term.field().equals(field)) {
149
buffer.append(term.field());
152
buffer.append(term.text());
154
buffer.append(Float.toString(minimumSimilarity));
155
buffer.append(ToStringUtils.boost(getBoost()));
156
return buffer.toString();
160
public int hashCode() {
161
final int prime = 31;
162
int result = super.hashCode();
163
result = prime * result + Float.floatToIntBits(minimumSimilarity);
164
result = prime * result + prefixLength;
165
result = prime * result + ((term == null) ? 0 : term.hashCode());
170
public boolean equals(Object obj) {
173
if (!super.equals(obj))
175
if (getClass() != obj.getClass())
177
FuzzyQuery other = (FuzzyQuery) obj;
178
if (Float.floatToIntBits(minimumSimilarity) != Float
179
.floatToIntBits(other.minimumSimilarity))
181
if (prefixLength != other.prefixLength)
184
if (other.term != null)
186
} else if (!term.equals(other.term))