1
package org.apache.lucene.search;
4
* Licensed to the Apache Software Foundation (ASF) under one or more
5
* contributor license agreements. See the NOTICE file distributed with
6
* this work for additional information regarding copyright ownership.
7
* The ASF licenses this file to You under the Apache License, Version 2.0
8
* (the "License"); you may not use this file except in compliance with
9
* the License. You may obtain a copy of the License at
11
* http://www.apache.org/licenses/LICENSE-2.0
13
* Unless required by applicable law or agreed to in writing, software
14
* distributed under the License is distributed on an "AS IS" BASIS,
15
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16
* See the License for the specific language governing permissions and
17
* limitations under the License.
20
import java.io.IOException;
21
import java.util.ArrayList;
23
import org.apache.lucene.index.IndexReader;
24
import org.apache.lucene.index.Term;
26
class ConstantScoreAutoRewrite extends TermCollectingRewrite<BooleanQuery> {
28
// Defaults derived from rough tests with a 20.0 million
29
// doc Wikipedia index. With more than 350 terms in the
30
// query, the filter method is fastest:
31
public static int DEFAULT_TERM_COUNT_CUTOFF = 350;
33
// If the query will hit more than 1 in 1000 of the docs
34
// in the index (0.1%), the filter method is fastest:
35
public static double DEFAULT_DOC_COUNT_PERCENT = 0.1;
37
private int termCountCutoff = DEFAULT_TERM_COUNT_CUTOFF;
38
private double docCountPercent = DEFAULT_DOC_COUNT_PERCENT;
40
/** If the number of terms in this query is equal to or
41
* larger than this setting then {@link
42
* #CONSTANT_SCORE_FILTER_REWRITE} is used. */
43
public void setTermCountCutoff(int count) {
44
termCountCutoff = count;
47
/** @see #setTermCountCutoff */
48
public int getTermCountCutoff() {
49
return termCountCutoff;
52
/** If the number of documents to be visited in the
53
* postings exceeds this specified percentage of the
54
* maxDoc() for the index, then {@link
55
* #CONSTANT_SCORE_FILTER_REWRITE} is used.
56
* @param percent 0.0 to 100.0 */
57
public void setDocCountPercent(double percent) {
58
docCountPercent = percent;
61
/** @see #setDocCountPercent */
62
public double getDocCountPercent() {
63
return docCountPercent;
67
protected BooleanQuery getTopLevelQuery() {
68
return new BooleanQuery(true);
72
protected void addClause(BooleanQuery topLevel, Term term, float boost /*ignored*/) {
73
topLevel.add(new TermQuery(term), BooleanClause.Occur.SHOULD);
77
public Query rewrite(final IndexReader reader, final MultiTermQuery query) throws IOException {
79
// Get the enum and start visiting terms. If we
80
// exhaust the enum before hitting either of the
81
// cutoffs, we use ConstantBooleanQueryRewrite; else,
82
// ConstantFilterRewrite:
83
final int docCountCutoff = (int) ((docCountPercent / 100.) * reader.maxDoc());
84
final int termCountLimit = Math.min(BooleanQuery.getMaxClauseCount(), termCountCutoff);
86
final CutOffTermCollector col = new CutOffTermCollector(reader, docCountCutoff, termCountLimit);
87
collectTerms(reader, query, col);
90
return MultiTermQuery.CONSTANT_SCORE_FILTER_REWRITE.rewrite(reader, query);
93
if (col.pendingTerms.isEmpty()) {
94
result = getTopLevelQuery();
96
BooleanQuery bq = getTopLevelQuery();
97
for(Term term : col.pendingTerms) {
98
addClause(bq, term, 1.0f);
101
result = new ConstantScoreQuery(bq);
102
result.setBoost(query.getBoost());
104
query.incTotalNumberOfTerms(col.pendingTerms.size());
109
private static final class CutOffTermCollector implements TermCollector {
110
CutOffTermCollector(IndexReader reader, int docCountCutoff, int termCountLimit) {
111
this.reader = reader;
112
this.docCountCutoff = docCountCutoff;
113
this.termCountLimit = termCountLimit;
116
public boolean collect(Term t, float boost) throws IOException {
118
// Loading the TermInfo from the terms dict here
119
// should not be costly, because 1) the
120
// query/filter will load the TermInfo when it
121
// runs, and 2) the terms dict has a cache:
122
docVisitCount += reader.docFreq(t);
123
if (pendingTerms.size() >= termCountLimit || docVisitCount >= docCountCutoff) {
130
int docVisitCount = 0;
131
boolean hasCutOff = false;
133
final IndexReader reader;
134
final int docCountCutoff, termCountLimit;
135
final ArrayList<Term> pendingTerms = new ArrayList<Term>();
139
public int hashCode() {
140
final int prime = 1279;
141
return (int) (prime * termCountCutoff + Double.doubleToLongBits(docCountPercent));
145
public boolean equals(Object obj) {
150
if (getClass() != obj.getClass())
153
ConstantScoreAutoRewrite other = (ConstantScoreAutoRewrite) obj;
154
if (other.termCountCutoff != termCountCutoff) {
158
if (Double.doubleToLongBits(other.docCountPercent) != Double.doubleToLongBits(docCountPercent)) {