1
package org.apache.lucene.search;
4
* Licensed to the Apache Software Foundation (ASF) under one or more
5
* contributor license agreements. See the NOTICE file distributed with
6
* this work for additional information regarding copyright ownership.
7
* The ASF licenses this file to You under the Apache License, Version 2.0
8
* (the "License"); you may not use this file except in compliance with
9
* the License. You may obtain a copy of the License at
11
* http://www.apache.org/licenses/LICENSE-2.0
13
* Unless required by applicable law or agreed to in writing, software
14
* distributed under the License is distributed on an "AS IS" BASIS,
15
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16
* See the License for the specific language governing permissions and
17
* limitations under the License.
20
import java.io.IOException;
21
import java.util.List;
23
import org.apache.lucene.index.IndexReader;
24
import org.apache.lucene.search.BooleanClause.Occur;
26
/* Description from Doug Cutting (excerpted from
29
* BooleanScorer uses an array to score windows of
30
* 2K docs. So it scores docs 0-2K first, then docs 2K-4K,
31
* etc. For each window it iterates through all query terms
32
* and accumulates a score in table[doc%2K]. It also stores
33
* in the table a bitmask representing which terms
34
* contributed to the score. Non-zero scores are chained in
35
* a linked list. At the end of scoring each window it then
36
* iterates through the linked list and, if the bitmask
37
* matches the boolean constraints, collects a hit. For
38
* boolean queries with lots of frequent terms this can be
39
* much faster, since it does not need to update a priority
40
* queue for each posting, instead performing constant-time
41
* operations per posting. The only downside is that it
42
* results in hits being delivered out-of-order within the
43
* window, which means it cannot be nested within other
44
* scorers. But it works well as a top-level scorer.
46
* The new BooleanScorer2 implementation instead works by
47
* merging priority queues of postings, albeit with some
48
* clever tricks. For example, a pure conjunction (all terms
49
* required) does not require a priority queue. Instead it
50
* sorts the posting streams at the start, then repeatedly
51
* skips the first to to the last. If the first ever equals
52
* the last, then there's a hit. When some terms are
53
* required and some terms are optional, the conjunction can
54
* be evaluated first, then the optional terms can all skip
55
* to the match and be added to the score. Thus the
56
* conjunction can reduce the number of priority queue
57
* updates for the optional terms. */
59
final class BooleanScorer extends Scorer {
61
private static final class BooleanScorerCollector extends Collector {
62
private BucketTable bucketTable;
64
private Scorer scorer;
66
public BooleanScorerCollector(int mask, BucketTable bucketTable) {
68
this.bucketTable = bucketTable;
72
public void collect(final int doc) throws IOException {
73
final BucketTable table = bucketTable;
74
final int i = doc & BucketTable.MASK;
75
final Bucket bucket = table.buckets[i];
77
if (bucket.doc != doc) { // invalid bucket
78
bucket.doc = doc; // set doc
79
bucket.score = scorer.score(); // initialize score
80
bucket.bits = mask; // initialize mask
81
bucket.coord = 1; // initialize coord
83
bucket.next = table.first; // push onto valid list
85
} else { // valid bucket
86
bucket.score += scorer.score(); // increment score
87
bucket.bits |= mask; // add bits in mask
88
bucket.coord++; // increment coord
93
public void setNextReader(IndexReader reader, int docBase) {
94
// not needed by this implementation
98
public void setScorer(Scorer scorer) throws IOException {
103
public boolean acceptsDocsOutOfOrder() {
109
// An internal class which is used in score(Collector, int) for setting the
110
// current score. This is required since Collector exposes a setScorer method
111
// and implementations that need the score will call scorer.score().
112
// Therefore the only methods that are implemented are score() and doc().
113
private static final class BucketScorer extends Scorer {
116
int doc = NO_MORE_DOCS;
119
public BucketScorer(Weight weight) { super(weight); }
122
public int advance(int target) throws IOException { return NO_MORE_DOCS; }
125
public int docID() { return doc; }
128
public float freq() { return freq; }
131
public int nextDoc() throws IOException { return NO_MORE_DOCS; }
134
public float score() throws IOException { return score; }
138
static final class Bucket {
139
int doc = -1; // tells if bucket is valid
140
float score; // incremental score
141
// TODO: break out bool anyProhibited, int
142
// numRequiredMatched; then we can remove 32 limit on
144
int bits; // used for bool constraints
145
int coord; // count of terms in score
146
Bucket next; // next valid bucket
149
/** A simple hash table of document scores within a range. */
150
static final class BucketTable {
151
public static final int SIZE = 1 << 11;
152
public static final int MASK = SIZE - 1;
154
final Bucket[] buckets = new Bucket[SIZE];
155
Bucket first = null; // head of valid list
157
public BucketTable() {
158
// Pre-fill to save the lazy init when collecting
160
for(int idx=0;idx<SIZE;idx++) {
161
buckets[idx] = new Bucket();
165
public Collector newCollector(int mask) {
166
return new BooleanScorerCollector(mask, this);
169
public int size() { return SIZE; }
172
static final class SubScorer {
173
public Scorer scorer;
174
// TODO: re-enable this if BQ ever sends us required clauses
175
//public boolean required = false;
176
public boolean prohibited;
177
public Collector collector;
178
public SubScorer next;
180
public SubScorer(Scorer scorer, boolean required, boolean prohibited,
181
Collector collector, SubScorer next)
184
throw new IllegalArgumentException("this scorer cannot handle required=true");
186
this.scorer = scorer;
187
// TODO: re-enable this if BQ ever sends us required clauses
188
//this.required = required;
189
this.prohibited = prohibited;
190
this.collector = collector;
195
private SubScorer scorers = null;
196
private BucketTable bucketTable = new BucketTable();
197
private final float[] coordFactors;
198
// TODO: re-enable this if BQ ever sends us required clauses
199
//private int requiredMask = 0;
200
private final int minNrShouldMatch;
202
private Bucket current;
203
private int doc = -1;
205
// Any time a prohibited clause matches we set bit 0:
206
private static final int PROHIBITED_MASK = 1;
208
BooleanScorer(Weight weight, boolean disableCoord, Similarity similarity, int minNrShouldMatch,
209
List<Scorer> optionalScorers, List<Scorer> prohibitedScorers, int maxCoord) throws IOException {
211
this.minNrShouldMatch = minNrShouldMatch;
213
if (optionalScorers != null && optionalScorers.size() > 0) {
214
for (Scorer scorer : optionalScorers) {
215
if (scorer.nextDoc() != NO_MORE_DOCS) {
216
scorers = new SubScorer(scorer, false, false, bucketTable.newCollector(0), scorers);
221
if (prohibitedScorers != null && prohibitedScorers.size() > 0) {
222
for (Scorer scorer : prohibitedScorers) {
223
if (scorer.nextDoc() != NO_MORE_DOCS) {
224
scorers = new SubScorer(scorer, false, true, bucketTable.newCollector(PROHIBITED_MASK), scorers);
229
coordFactors = new float[optionalScorers.size() + 1];
230
for (int i = 0; i < coordFactors.length; i++) {
231
coordFactors[i] = disableCoord ? 1.0f : similarity.coord(i, maxCoord);
235
// firstDocID is ignored since nextDoc() initializes 'current'
237
protected boolean score(Collector collector, int max, int firstDocID) throws IOException {
238
// Make sure it's only BooleanScorer that calls us:
239
assert firstDocID == -1;
242
BucketScorer bs = new BucketScorer(weight);
244
// The internal loop will set the score and doc before calling collect.
245
collector.setScorer(bs);
247
bucketTable.first = null;
249
while (current != null) { // more queued
251
// check prohibited & required
252
if ((current.bits & PROHIBITED_MASK) == 0) {
254
// TODO: re-enable this if BQ ever sends us required
256
//&& (current.bits & requiredMask) == requiredMask) {
258
// TODO: can we remove this?
259
if (current.doc >= max){
261
current = current.next;
262
tmp.next = bucketTable.first;
263
bucketTable.first = tmp;
267
if (current.coord >= minNrShouldMatch) {
268
bs.score = current.score * coordFactors[current.coord];
269
bs.doc = current.doc;
270
bs.freq = current.coord;
271
collector.collect(current.doc);
275
current = current.next; // pop the queue
278
if (bucketTable.first != null){
279
current = bucketTable.first;
280
bucketTable.first = current.next;
286
end += BucketTable.SIZE;
287
for (SubScorer sub = scorers; sub != null; sub = sub.next) {
288
int subScorerDocID = sub.scorer.docID();
289
if (subScorerDocID != NO_MORE_DOCS) {
290
more |= sub.scorer.score(sub.collector, end, subScorerDocID);
293
current = bucketTable.first;
295
} while (current != null || more);
301
public int advance(int target) throws IOException {
302
throw new UnsupportedOperationException();
307
throw new UnsupportedOperationException();
311
public int nextDoc() throws IOException {
312
throw new UnsupportedOperationException();
316
public float score() {
317
throw new UnsupportedOperationException();
321
public void score(Collector collector) throws IOException {
322
score(collector, Integer.MAX_VALUE, -1);
326
public String toString() {
327
StringBuilder buffer = new StringBuilder();
328
buffer.append("boolean(");
329
for (SubScorer sub = scorers; sub != null; sub = sub.next) {
330
buffer.append(sub.scorer.toString());
334
return buffer.toString();
338
protected void visitSubScorers(Query parent, Occur relationship, ScorerVisitor<Query, Query, Scorer> visitor) {
339
super.visitSubScorers(parent, relationship, visitor);
340
final Query q = weight.getQuery();
341
SubScorer sub = scorers;
343
// TODO: re-enable this if BQ ever sends us required
345
//if (sub.required) {
346
//relationship = Occur.MUST;
347
if (!sub.prohibited) {
348
relationship = Occur.SHOULD;
350
// TODO: maybe it's pointless to do this, but, it is
351
// possible the doc may still be collected, eg foo
353
relationship = Occur.MUST_NOT;
355
sub.scorer.visitSubScorers(q, relationship, visitor);