1
package org.apache.lucene.search;
3
import org.apache.lucene.util.DocIdBitSet;
4
import org.apache.lucene.util.LuceneTestCase;
6
import java.util.BitSet;
7
import java.io.IOException;
9
import org.apache.lucene.index.IndexReader;
10
import org.apache.lucene.index.IndexWriter;
11
import org.apache.lucene.index.Term;
12
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
13
import org.apache.lucene.store.Directory;
14
import org.apache.lucene.analysis.MockAnalyzer;
15
import org.apache.lucene.analysis.WhitespaceAnalyzer;
16
import org.apache.lucene.document.Document;
17
import org.apache.lucene.document.Field;
20
* Licensed to the Apache Software Foundation (ASF) under one or more
21
* contributor license agreements. See the NOTICE file distributed with
22
* this work for additional information regarding copyright ownership.
23
* The ASF licenses this file to You under the Apache License, Version 2.0
24
* (the "License"); you may not use this file except in compliance with
25
* the License. You may obtain a copy of the License at
27
* http://www.apache.org/licenses/LICENSE-2.0
29
* Unless required by applicable law or agreed to in writing, software
30
* distributed under the License is distributed on an "AS IS" BASIS,
31
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
32
* See the License for the specific language governing permissions and
33
* limitations under the License.
36
public class TestScorerPerf extends LuceneTestCase {
37
boolean validate = true; // set to false when doing performance testing
44
public void createDummySearcher() throws Exception {
45
// Create a dummy index with nothing in it.
46
// This could possibly fail if Lucene starts checking for docid ranges...
48
IndexWriter iw = new IndexWriter(d, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)));
49
iw.addDocument(new Document());
51
s = new IndexSearcher(d, true);
54
public void createRandomTerms(int nDocs, int nTerms, double power, Directory dir) throws Exception {
55
int[] freq = new int[nTerms];
56
terms = new Term[nTerms];
57
for (int i=0; i<nTerms; i++) {
58
int f = (nTerms+1)-i; // make first terms less frequent
59
freq[i] = (int)Math.ceil(Math.pow(f,power));
60
terms[i] = new Term("f",Character.toString((char)('A'+i)));
63
IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)).setOpenMode(OpenMode.CREATE));
64
for (int i=0; i<nDocs; i++) {
65
Document d = new Document();
66
for (int j=0; j<nTerms; j++) {
67
if (random.nextInt(freq[j]) == 0) {
68
d.add(newField("f", terms[j].text(), Field.Store.NO, Field.Index.NOT_ANALYZED));
69
//System.out.println(d);
79
public BitSet randBitSet(int sz, int numBitsToSet) {
80
BitSet set = new BitSet(sz);
81
for (int i=0; i<numBitsToSet; i++) {
82
set.set(random.nextInt(sz));
87
public BitSet[] randBitSets(int numSets, int setSize) {
88
BitSet[] sets = new BitSet[numSets];
89
for (int i=0; i<sets.length; i++) {
90
sets[i] = randBitSet(setSize, random.nextInt(setSize));
95
public static class CountingHitCollector extends Collector {
98
protected int docBase = 0;
101
public void setScorer(Scorer scorer) throws IOException {}
104
public void collect(int doc) {
106
sum += docBase + doc; // use it to avoid any possibility of being optimized away
109
public int getCount() { return count; }
110
public int getSum() { return sum; }
113
public void setNextReader(IndexReader reader, int base) {
117
public boolean acceptsDocsOutOfOrder() {
123
public static class MatchingHitCollector extends CountingHitCollector {
126
public MatchingHitCollector(BitSet answer) {
127
this.answer = answer;
130
public void collect(int doc, float score) {
132
pos = answer.nextSetBit(pos+1);
133
if (pos != doc + docBase) {
134
throw new RuntimeException("Expected doc " + pos + " but got " + doc + docBase);
141
BitSet addClause(BooleanQuery bq, BitSet result) {
142
final BitSet rnd = sets[random.nextInt(sets.length)];
143
Query q = new ConstantScoreQuery(new Filter() {
145
public DocIdSet getDocIdSet(IndexReader reader) {
146
return new DocIdBitSet(rnd);
149
bq.add(q, BooleanClause.Occur.MUST);
151
if (result==null) result = (BitSet)rnd.clone();
152
else result.and(rnd);
158
public int doConjunctions(int iter, int maxClauses) throws IOException {
161
for (int i=0; i<iter; i++) {
162
int nClauses = random.nextInt(maxClauses-1)+2; // min 2 clauses
163
BooleanQuery bq = new BooleanQuery();
165
for (int j=0; j<nClauses; j++) {
166
result = addClause(bq,result);
169
CountingHitCollector hc = validate ? new MatchingHitCollector(result)
170
: new CountingHitCollector();
174
if (validate) assertEquals(result.cardinality(), hc.getCount());
175
// System.out.println(hc.getCount());
181
public int doNestedConjunctions(int iter, int maxOuterClauses, int maxClauses) throws IOException {
185
for (int i=0; i<iter; i++) {
186
int oClauses = random.nextInt(maxOuterClauses-1)+2;
187
BooleanQuery oq = new BooleanQuery();
190
for (int o=0; o<oClauses; o++) {
192
int nClauses = random.nextInt(maxClauses-1)+2; // min 2 clauses
193
BooleanQuery bq = new BooleanQuery();
194
for (int j=0; j<nClauses; j++) {
195
result = addClause(bq,result);
198
oq.add(bq, BooleanClause.Occur.MUST);
201
CountingHitCollector hc = validate ? new MatchingHitCollector(result)
202
: new CountingHitCollector();
204
nMatches += hc.getCount();
206
if (validate) assertEquals(result.cardinality(), hc.getCount());
207
// System.out.println(hc.getCount());
209
if (VERBOSE) System.out.println("Average number of matches="+(nMatches/iter));
214
public int doTermConjunctions(IndexSearcher s,
218
) throws IOException {
222
for (int i=0; i<iter; i++) {
223
int nClauses = random.nextInt(maxClauses-1)+2; // min 2 clauses
224
BooleanQuery bq = new BooleanQuery();
225
BitSet termflag = new BitSet(termsInIndex);
226
for (int j=0; j<nClauses; j++) {
228
// don't pick same clause twice
229
tnum = random.nextInt(termsInIndex);
230
if (termflag.get(tnum)) tnum=termflag.nextClearBit(tnum);
231
if (tnum<0 || tnum>=termsInIndex) tnum=termflag.nextClearBit(0);
233
Query tq = new TermQuery(terms[tnum]);
234
bq.add(tq, BooleanClause.Occur.MUST);
237
CountingHitCollector hc = new CountingHitCollector();
239
nMatches += hc.getCount();
242
if (VERBOSE) System.out.println("Average number of matches="+(nMatches/iter));
248
public int doNestedTermConjunctions(IndexSearcher s,
253
) throws IOException {
256
for (int i=0; i<iter; i++) {
257
int oClauses = random.nextInt(maxOuterClauses-1)+2;
258
BooleanQuery oq = new BooleanQuery();
259
for (int o=0; o<oClauses; o++) {
261
int nClauses = random.nextInt(maxClauses-1)+2; // min 2 clauses
262
BooleanQuery bq = new BooleanQuery();
263
BitSet termflag = new BitSet(termsInIndex);
264
for (int j=0; j<nClauses; j++) {
266
// don't pick same clause twice
267
tnum = random.nextInt(termsInIndex);
268
if (termflag.get(tnum)) tnum=termflag.nextClearBit(tnum);
269
if (tnum<0 || tnum>=25) tnum=termflag.nextClearBit(0);
271
Query tq = new TermQuery(terms[tnum]);
272
bq.add(tq, BooleanClause.Occur.MUST);
275
oq.add(bq, BooleanClause.Occur.MUST);
279
CountingHitCollector hc = new CountingHitCollector();
281
nMatches += hc.getCount();
284
if (VERBOSE) System.out.println("Average number of matches="+(nMatches/iter));
289
public int doSloppyPhrase(IndexSearcher s,
293
) throws IOException {
296
for (int i=0; i<iter; i++) {
297
int nClauses = random.nextInt(maxClauses-1)+2; // min 2 clauses
298
PhraseQuery q = new PhraseQuery();
299
for (int j=0; j<nClauses; j++) {
300
int tnum = random.nextInt(termsInIndex);
301
q.add(new Term("f",Character.toString((char)(tnum+'A'))), j);
303
q.setSlop(termsInIndex); // this could be random too
305
CountingHitCollector hc = new CountingHitCollector();
314
public void testConjunctions() throws Exception {
315
// test many small sets... the bugs will be found on boundary conditions
316
createDummySearcher();
318
sets=randBitSets(atLeast(1000), atLeast(10));
319
doConjunctions(atLeast(10000), atLeast(5));
320
doNestedConjunctions(atLeast(10000), atLeast(3), atLeast(3));
328
public void testConjunctionPerf() throws Exception {
330
createDummySearcher();
332
sets=randBitSets(32,1000000);
333
for (int i=0; i<bigIter; i++) {
334
long start = System.currentTimeMillis();
335
doConjunctions(500,6);
336
long end = System.currentTimeMillis();
337
if (VERBOSE) System.out.println("milliseconds="+(end-start));
342
public void testNestedConjunctionPerf() throws Exception {
344
createDummySearcher();
346
sets=randBitSets(32,1000000);
347
for (int i=0; i<bigIter; i++) {
348
long start = System.currentTimeMillis();
349
doNestedConjunctions(500,3,3);
350
long end = System.currentTimeMillis();
351
if (VERBOSE) System.out.println("milliseconds="+(end-start));
357
public void testConjunctionTerms() throws Exception {
360
RAMDirectory dir = new RAMDirectory();
361
if (VERBOSE) System.out.println("Creating index");
362
createRandomTerms(100000,25,.5, dir);
363
s = new IndexSearcher(dir, true);
364
if (VERBOSE) System.out.println("Starting performance test");
365
for (int i=0; i<bigIter; i++) {
366
long start = System.currentTimeMillis();
367
doTermConjunctions(s,25,5,1000);
368
long end = System.currentTimeMillis();
369
if (VERBOSE) System.out.println("milliseconds="+(end-start));
374
public void testNestedConjunctionTerms() throws Exception {
377
RAMDirectory dir = new RAMDirectory();
378
if (VERBOSE) System.out.println("Creating index");
379
createRandomTerms(100000,25,.2, dir);
380
s = new IndexSearcher(dir, true);
381
if (VERBOSE) System.out.println("Starting performance test");
382
for (int i=0; i<bigIter; i++) {
383
long start = System.currentTimeMillis();
384
doNestedTermConjunctions(s,25,3,3,200);
385
long end = System.currentTimeMillis();
386
if (VERBOSE) System.out.println("milliseconds="+(end-start));
392
public void testSloppyPhrasePerf() throws Exception {
395
RAMDirectory dir = new RAMDirectory();
396
if (VERBOSE) System.out.println("Creating index");
397
createRandomTerms(100000,25,2,dir);
398
s = new IndexSearcher(dir, true);
399
if (VERBOSE) System.out.println("Starting performance test");
400
for (int i=0; i<bigIter; i++) {
401
long start = System.currentTimeMillis();
402
doSloppyPhrase(s,25,2,1000);
403
long end = System.currentTimeMillis();
404
if (VERBOSE) System.out.println("milliseconds="+(end-start));