2
* Licensed to the Apache Software Foundation (ASF) under one or more
3
* contributor license agreements. See the NOTICE file distributed with
4
* this work for additional information regarding copyright ownership.
5
* The ASF licenses this file to You under the Apache License, Version 2.0
6
* (the "License"); you may not use this file except in compliance with
7
* the License. You may obtain a copy of the License at
9
* http://www.apache.org/licenses/LICENSE-2.0
11
* Unless required by applicable law or agreed to in writing, software
12
* distributed under the License is distributed on an "AS IS" BASIS,
13
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
* See the License for the specific language governing permissions and
15
* limitations under the License.
18
package org.apache.solr.search;
20
import java.util.Random;
21
import java.util.Arrays;
22
import java.io.IOException;
24
import org.apache.lucene.util.LuceneTestCase;
25
import org.apache.lucene.util.OpenBitSet;
26
import org.apache.lucene.util.OpenBitSetIterator;
27
import org.apache.lucene.index.IndexReader;
28
import org.apache.lucene.index.FilterIndexReader;
29
import org.apache.lucene.index.MultiReader;
30
import org.apache.lucene.search.Filter;
31
import org.apache.lucene.search.DocIdSet;
32
import org.apache.lucene.search.DocIdSetIterator;
37
public class TestDocSet extends LuceneTestCase {
41
public OpenBitSet getRandomSet(int sz, int bitsToSet) {
42
OpenBitSet bs = new OpenBitSet(sz);
44
for (int i=0; i<bitsToSet; i++) {
45
bs.fastSet(rand.nextInt(sz));
50
public DocSet getHashDocSet(OpenBitSet bs) {
51
int[] docs = new int[(int)bs.cardinality()];
52
OpenBitSetIterator iter = new OpenBitSetIterator(bs);
53
for (int i=0; i<docs.length; i++) {
54
docs[i] = iter.nextDoc();
56
return new HashDocSet(docs,0,docs.length);
59
public DocSet getIntDocSet(OpenBitSet bs) {
60
int[] docs = new int[(int)bs.cardinality()];
61
OpenBitSetIterator iter = new OpenBitSetIterator(bs);
62
for (int i=0; i<docs.length; i++) {
63
docs[i] = iter.nextDoc();
65
return new SortedIntDocSet(docs);
69
public DocSet getBitDocSet(OpenBitSet bs) {
70
return new BitDocSet(bs);
73
public DocSet getDocSlice(OpenBitSet bs) {
74
int len = (int)bs.cardinality();
75
int[] arr = new int[len+5];
76
arr[0]=10; arr[1]=20; arr[2]=30; arr[arr.length-1]=1; arr[arr.length-2]=2;
78
int end = offset + len;
80
OpenBitSetIterator iter = new OpenBitSetIterator(bs);
81
// put in opposite order... DocLists are not ordered.
82
for (int i=end-1; i>=offset; i--) {
83
arr[i] = iter.nextDoc();
86
return new DocSlice(offset, len, arr, null, len*2, 100.0f);
90
public DocSet getDocSet(OpenBitSet bs) {
91
switch(rand.nextInt(10)) {
92
case 0: return getHashDocSet(bs);
94
case 1: return getBitDocSet(bs);
95
case 2: return getBitDocSet(bs);
96
case 3: return getBitDocSet(bs);
98
case 4: return getIntDocSet(bs);
99
case 5: return getIntDocSet(bs);
100
case 6: return getIntDocSet(bs);
101
case 7: return getIntDocSet(bs);
102
case 8: return getIntDocSet(bs);
104
case 9: return getDocSlice(bs);
109
public void checkEqual(OpenBitSet bs, DocSet set) {
110
for (int i=0; i<bs.capacity(); i++) {
111
assertEquals(bs.get(i), set.exists(i));
113
assertEquals(bs.cardinality(), set.size());
116
public void iter(DocSet d1, DocSet d2) {
117
// HashDocSet and DocList doesn't iterate in order.
118
if (d1 instanceof HashDocSet || d2 instanceof HashDocSet || d1 instanceof DocList || d2 instanceof DocList) return;
120
DocIterator i1 = d1.iterator();
121
DocIterator i2 = d2.iterator();
123
assert(i1.hasNext() == i2.hasNext());
126
boolean b1 = i1.hasNext();
127
boolean b2 = i2.hasNext();
130
assertEquals(i1.nextDoc(), i2.nextDoc());
134
protected void doSingle(int maxSize) {
135
int sz = rand.nextInt(maxSize+1);
136
int sz2 = rand.nextInt(maxSize);
137
OpenBitSet bs1 = getRandomSet(sz, rand.nextInt(sz+1));
138
OpenBitSet bs2 = getRandomSet(sz, rand.nextInt(sz2+1));
140
DocSet a1 = new BitDocSet(bs1);
141
DocSet a2 = new BitDocSet(bs2);
142
DocSet b1 = getDocSet(bs1);
143
DocSet b2 = getDocSet(bs2);
151
OpenBitSet a_and = (OpenBitSet) bs1.clone(); a_and.and(bs2);
152
OpenBitSet a_or = (OpenBitSet) bs1.clone(); a_or.or(bs2);
153
// OpenBitSet a_xor = (OpenBitSet)bs1.clone(); a_xor.xor(bs2);
154
OpenBitSet a_andn = (OpenBitSet) bs1.clone(); a_andn.andNot(bs2);
156
checkEqual(a_and, b1.intersection(b2));
157
checkEqual(a_or, b1.union(b2));
158
checkEqual(a_andn, b1.andNot(b2));
160
assertEquals(a_and.cardinality(), b1.intersectionSize(b2));
161
assertEquals(a_or.cardinality(), b1.unionSize(b2));
162
assertEquals(a_andn.cardinality(), b1.andNotSize(b2));
166
public void doMany(int maxSz, int iter) {
167
for (int i=0; i<iter; i++) {
172
public void testRandomDocSets() {
173
// Make the size big enough to go over certain limits (such as one set
174
// being 8 times the size of another in the int set, or going over 2 times
175
// 64 bits for the bit doc set. Smaller sets can hit more boundary conditions though.
178
// doMany(130, 1000000);
181
public DocSet getRandomDocSet(int n, int maxDoc) {
182
OpenBitSet obs = new OpenBitSet(maxDoc);
183
int[] a = new int[n];
184
for (int i=0; i<n; i++) {
186
int idx = rand.nextInt(maxDoc);
187
if (obs.getAndSet(idx)) continue;
193
if (n <= smallSetCuttoff) {
194
if (smallSetType ==0) {
196
return new SortedIntDocSet(a);
197
} else if (smallSetType ==1) {
199
return loadfactor!=0 ? new HashDocSet(a,0,n,1/loadfactor) : new HashDocSet(a,0,n);
203
return new BitDocSet(obs, n);
206
public DocSet[] getRandomSets(int nSets, int minSetSize, int maxSetSize, int maxDoc) {
207
DocSet[] sets = new DocSet[nSets];
209
for (int i=0; i<nSets; i++) {
211
sz = rand.nextInt(maxSetSize-minSetSize+1)+minSetSize;
212
// different distribution
213
// sz = (maxSetSize+1)/(rand.nextInt(maxSetSize)+1) + minSetSize;
214
sets[i] = getRandomDocSet(sz,maxDoc);
220
/**** needs code insertion into HashDocSet
221
public void testCollisions() {
223
rand=new Random(12345); // make deterministic
227
int[] maxDocs=new int[] {100000,500000,1000000,5000000,10000000};
229
long start=System.currentTimeMillis();
230
for (int maxDoc : maxDocs) {
231
int cstart = HashDocSet.collisions;
232
DocSet[] sets = getRandomHashSets(nSets,maxSetsize, maxDoc);
233
for (DocSet s1 : sets) {
234
for (DocSet s2 : sets) {
235
if (s1!=s2) ret += s1.intersectionSize(s2);
238
int cend = HashDocSet.collisions;
239
System.out.println("maxDoc="+maxDoc+"\tcollisions="+(cend-cstart));
241
long end=System.currentTimeMillis();
242
System.out.println("testIntersectionSizePerformance="+(end-start)+" ms");
243
if (ret==-1)System.out.println("wow!");
244
System.out.println("collisions="+HashDocSet.collisions);
249
public static int smallSetType = 0; // 0==sortedint, 1==hash, 2==openbitset
250
public static int smallSetCuttoff=3000;
253
public void testIntersectionSizePerformance() {
254
loadfactor=.75f; // for HashDocSet
255
rand=new Random(1); // make deterministic
257
int minBigSetSize=1,maxBigSetSize=30000;
258
int minSmallSetSize=1,maxSmallSetSize=30000;
264
smallSetCuttoff = maxDoc>>6; // break even for SortedIntSet is /32... but /64 is better for performance
265
// smallSetCuttoff = maxDoc;
268
DocSet[] bigsets = getRandomSets(nSets, minBigSetSize, maxBigSetSize, maxDoc);
269
DocSet[] smallsets = getRandomSets(nSets, minSmallSetSize, maxSmallSetSize, maxDoc);
271
long start=System.currentTimeMillis();
272
for (int i=0; i<iter; i++) {
273
for (DocSet s1 : bigsets) {
274
for (DocSet s2 : smallsets) {
275
ret += s1.intersectionSize(s2);
279
long end=System.currentTimeMillis();
280
System.out.println("intersectionSizePerformance="+(end-start)+" ms");
281
System.out.println("ret="+ret);
286
public void testExistsPerformance() {
288
rand=new Random(12345); // make deterministic
293
DocSet[] sets = getRandomHashSets(nSets,maxSetsize, maxDoc);
295
long start=System.currentTimeMillis();
296
for (int i=0; i<iter; i++) {
297
for (DocSet s1 : sets) {
298
for (int j=0; j<maxDoc; j++) {
299
ret += s1.exists(j) ? 1 :0;
303
long end=System.currentTimeMillis();
304
System.out.println("testExistsSizePerformance="+(end-start)+" ms");
305
if (ret==-1)System.out.println("wow!");
309
/**** needs code insertion into HashDocSet
310
public void testExistsCollisions() {
312
rand=new Random(12345); // make deterministic
315
int[] maxDocs=new int[] {100000,500000,1000000,5000000,10000000};
318
for (int maxDoc : maxDocs) {
319
int mask = (BitUtil.nextHighestPowerOfTwo(maxDoc)>>1)-1;
320
DocSet[] sets = getRandomHashSets(nSets,maxSetsize, maxDoc);
321
int cstart = HashDocSet.collisions;
322
for (DocSet s1 : sets) {
323
for (int j=0; j<maxDocs[0]; j++) {
324
int idx = rand.nextInt()&mask;
325
ret += s1.exists(idx) ? 1 :0;
328
int cend = HashDocSet.collisions;
329
System.out.println("maxDoc="+maxDoc+"\tcollisions="+(cend-cstart));
331
if (ret==-1)System.out.println("wow!");
332
System.out.println("collisions="+HashDocSet.collisions);
336
public IndexReader dummyIndexReader(final int maxDoc) {
338
IndexReader r = new FilterIndexReader(null) {
340
public int maxDoc() {
345
public boolean hasDeletions() {
350
public IndexReader[] getSequentialSubReaders() {
357
public IndexReader dummyMultiReader(int nSeg, int maxDoc) {
358
if (nSeg==1 && rand.nextBoolean()) return dummyIndexReader(rand.nextInt(maxDoc));
360
IndexReader[] subs = new IndexReader[rand.nextInt(nSeg)+1];
361
for (int i=0; i<subs.length; i++) {
362
subs[i] = dummyIndexReader(rand.nextInt(maxDoc));
365
MultiReader mr = new MultiReader(subs);
369
public void doTestIteratorEqual(DocIdSet a, DocIdSet b) throws IOException {
370
DocIdSetIterator ia = a.iterator();
371
DocIdSetIterator ib = b.iterator();
373
// test for next() equivalence
375
int da = ia.nextDoc();
376
int db = ib.nextDoc();
377
assertEquals(da, db);
378
assertEquals(ia.docID(), ib.docID());
379
if (da==DocIdSetIterator.NO_MORE_DOCS) break;
382
for (int i=0; i<10; i++) {
383
// test random skipTo() and next()
389
if (rand.nextBoolean()) {
393
int target = doc + rand.nextInt(10) + 1; // keep in mind future edge cases like probing (increase if necessary)
394
da = ia.advance(target);
395
db = ib.advance(target);
398
assertEquals(da, db);
399
assertEquals(ia.docID(), ib.docID());
400
if (da==DocIdSetIterator.NO_MORE_DOCS) break;
406
public void doFilterTest(SolrIndexReader reader) throws IOException {
407
OpenBitSet bs = getRandomSet(reader.maxDoc(), rand.nextInt(reader.maxDoc()+1));
408
DocSet a = new BitDocSet(bs);
409
DocSet b = getIntDocSet(bs);
411
Filter fa = a.getTopFilter();
412
Filter fb = b.getTopFilter();
415
DocIdSet da = fa.getDocIdSet(reader);
416
DocIdSet db = fb.getDocIdSet(reader);
417
doTestIteratorEqual(da, db);
419
// first test in-sequence sub readers
420
for (SolrIndexReader sir : reader.getLeafReaders()) {
421
da = fa.getDocIdSet(sir);
422
db = fb.getDocIdSet(sir);
423
doTestIteratorEqual(da, db);
426
int nReaders = reader.getLeafReaders().length;
427
// now test out-of-sequence sub readers
428
for (int i=0; i<nReaders; i++) {
429
SolrIndexReader sir = reader.getLeafReaders()[rand.nextInt(nReaders)];
430
da = fa.getDocIdSet(sir);
431
db = fb.getDocIdSet(sir);
432
doTestIteratorEqual(da, db);
436
public void testFilter() throws IOException {
437
// keeping these numbers smaller help hit more edge cases
439
int maxDoc=5; // increase if future changes add more edge cases (like probing a certain distance in the bin search)
441
for (int i=0; i<5000; i++) {
442
IndexReader r = dummyMultiReader(maxSeg, maxDoc);
443
SolrIndexReader sir = new SolrIndexReader(r, null, 0);