1
package org.apache.lucene.search;
4
* Licensed to the Apache Software Foundation (ASF) under one or more
5
* contributor license agreements. See the NOTICE file distributed with
6
* this work for additional information regarding copyright ownership.
7
* The ASF licenses this file to You under the Apache License, Version 2.0
8
* (the "License"); you may not use this file except in compliance with
9
* the License. You may obtain a copy of the License at
11
* http://www.apache.org/licenses/LICENSE-2.0
13
* Unless required by applicable law or agreed to in writing, software
14
* distributed under the License is distributed on an "AS IS" BASIS,
15
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16
* See the License for the specific language governing permissions and
17
* limitations under the License.
20
import org.apache.lucene.util.LuceneTestCase;
21
import org.apache.lucene.analysis.MockAnalyzer;
22
import org.apache.lucene.analysis.WhitespaceAnalyzer;
23
import org.apache.lucene.document.Document;
24
import org.apache.lucene.document.Field;
25
import org.apache.lucene.index.IndexReader;
26
import org.apache.lucene.index.FieldInvertState;
27
import org.apache.lucene.index.RandomIndexWriter;
28
import org.apache.lucene.index.Term;
29
import org.apache.lucene.store.Directory;
31
import java.text.DecimalFormat;
32
import java.io.IOException;
35
* Test of the DisjunctionMaxQuery.
38
public class TestDisjunctionMaxQuery extends LuceneTestCase {
40
/** threshold for comparing floats */
41
public static final float SCORE_COMP_THRESH = 0.0000f;
44
* Similarity to eliminate tf, idf and lengthNorm effects to isolate test
48
* same as TestRankingSimilarity in TestRanking.zip from
49
* http://issues.apache.org/jira/browse/LUCENE-323
52
private static class TestSimilarity extends DefaultSimilarity {
54
public TestSimilarity() {}
57
public float tf(float freq) {
58
if (freq > 0.0f) return 1.0f;
63
public float computeNorm(String fieldName, FieldInvertState state) {
64
// Disable length norm
65
return state.getBoost();
69
public float idf(int docFreq, int numDocs) {
74
public Similarity sim = new TestSimilarity();
75
public Directory index;
77
public IndexSearcher s;
80
public void setUp() throws Exception {
83
index = newDirectory();
84
RandomIndexWriter writer = new RandomIndexWriter(random, index,
85
newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random))
86
.setSimilarity(sim).setMergePolicy(newLogMergePolicy()));
88
// hed is the most important field, dek is secondary
90
// d1 is an "ok" match for: albino elephant
92
Document d1 = new Document();
93
d1.add(newField("id", "d1", Field.Store.YES, Field.Index.NOT_ANALYZED));// Field.Keyword("id",
96
.add(newField("hed", "elephant", Field.Store.YES,
97
Field.Index.ANALYZED));// Field.Text("hed", "elephant"));
99
.add(newField("dek", "elephant", Field.Store.YES,
100
Field.Index.ANALYZED));// Field.Text("dek", "elephant"));
101
writer.addDocument(d1);
104
// d2 is a "good" match for: albino elephant
106
Document d2 = new Document();
107
d2.add(newField("id", "d2", Field.Store.YES, Field.Index.NOT_ANALYZED));// Field.Keyword("id",
110
.add(newField("hed", "elephant", Field.Store.YES,
111
Field.Index.ANALYZED));// Field.Text("hed", "elephant"));
112
d2.add(newField("dek", "albino", Field.Store.YES, Field.Index.ANALYZED));// Field.Text("dek",
115
.add(newField("dek", "elephant", Field.Store.YES,
116
Field.Index.ANALYZED));// Field.Text("dek", "elephant"));
117
writer.addDocument(d2);
120
// d3 is a "better" match for: albino elephant
122
Document d3 = new Document();
123
d3.add(newField("id", "d3", Field.Store.YES, Field.Index.NOT_ANALYZED));// Field.Keyword("id",
125
d3.add(newField("hed", "albino", Field.Store.YES, Field.Index.ANALYZED));// Field.Text("hed",
128
.add(newField("hed", "elephant", Field.Store.YES,
129
Field.Index.ANALYZED));// Field.Text("hed", "elephant"));
130
writer.addDocument(d3);
133
// d4 is the "best" match for: albino elephant
135
Document d4 = new Document();
136
d4.add(newField("id", "d4", Field.Store.YES, Field.Index.NOT_ANALYZED));// Field.Keyword("id",
138
d4.add(newField("hed", "albino", Field.Store.YES, Field.Index.ANALYZED));// Field.Text("hed",
141
.add(newField("hed", "elephant", Field.Store.YES,
142
Field.Index.ANALYZED));// Field.Text("hed", "elephant"));
143
d4.add(newField("dek", "albino", Field.Store.YES, Field.Index.ANALYZED));// Field.Text("dek",
145
writer.addDocument(d4);
149
r = writer.getReader();
152
s.setSimilarity(sim);
156
public void tearDown() throws Exception {
163
public void testSkipToFirsttimeMiss() throws IOException {
164
final DisjunctionMaxQuery dq = new DisjunctionMaxQuery(0.0f);
165
dq.add(tq("id", "d1"));
166
dq.add(tq("dek", "DOES_NOT_EXIST"));
168
QueryUtils.check(random, dq, s);
170
final Weight dw = s.createNormalizedWeight(dq);
171
IndexReader sub = s.getIndexReader().getSequentialSubReaders() == null ?
172
s.getIndexReader() : s.getIndexReader().getSequentialSubReaders()[0];
173
final Scorer ds = dw.scorer(sub, true, false);
174
final boolean skipOk = ds.advance(3) != DocIdSetIterator.NO_MORE_DOCS;
176
fail("firsttime skipTo found a match? ... "
177
+ r.document(ds.docID()).get("id"));
181
public void testSkipToFirsttimeHit() throws IOException {
182
final DisjunctionMaxQuery dq = new DisjunctionMaxQuery(0.0f);
183
dq.add(tq("dek", "albino"));
184
dq.add(tq("dek", "DOES_NOT_EXIST"));
186
QueryUtils.check(random, dq, s);
188
final Weight dw = s.createNormalizedWeight(dq);
189
IndexReader sub = s.getIndexReader().getSequentialSubReaders() == null ?
190
s.getIndexReader() : s.getIndexReader().getSequentialSubReaders()[0];
191
final Scorer ds = dw.scorer(sub, true, false);
192
assertTrue("firsttime skipTo found no match",
193
ds.advance(3) != DocIdSetIterator.NO_MORE_DOCS);
194
assertEquals("found wrong docid", "d4", r.document(ds.docID()).get("id"));
197
public void testSimpleEqualScores1() throws Exception {
199
DisjunctionMaxQuery q = new DisjunctionMaxQuery(0.0f);
200
q.add(tq("hed", "albino"));
201
q.add(tq("hed", "elephant"));
202
QueryUtils.check(random, q, s);
204
ScoreDoc[] h = s.search(q, null, 1000).scoreDocs;
207
assertEquals("all docs should match " + q.toString(), 4, h.length);
209
float score = h[0].score;
210
for (int i = 1; i < h.length; i++) {
211
assertEquals("score #" + i + " is not the same", score, h[i].score,
215
printHits("testSimpleEqualScores1", h, s);
221
public void testSimpleEqualScores2() throws Exception {
223
DisjunctionMaxQuery q = new DisjunctionMaxQuery(0.0f);
224
q.add(tq("dek", "albino"));
225
q.add(tq("dek", "elephant"));
226
QueryUtils.check(random, q, s);
228
ScoreDoc[] h = s.search(q, null, 1000).scoreDocs;
231
assertEquals("3 docs should match " + q.toString(), 3, h.length);
232
float score = h[0].score;
233
for (int i = 1; i < h.length; i++) {
234
assertEquals("score #" + i + " is not the same", score, h[i].score,
238
printHits("testSimpleEqualScores2", h, s);
244
public void testSimpleEqualScores3() throws Exception {
246
DisjunctionMaxQuery q = new DisjunctionMaxQuery(0.0f);
247
q.add(tq("hed", "albino"));
248
q.add(tq("hed", "elephant"));
249
q.add(tq("dek", "albino"));
250
q.add(tq("dek", "elephant"));
251
QueryUtils.check(random, q, s);
253
ScoreDoc[] h = s.search(q, null, 1000).scoreDocs;
256
assertEquals("all docs should match " + q.toString(), 4, h.length);
257
float score = h[0].score;
258
for (int i = 1; i < h.length; i++) {
259
assertEquals("score #" + i + " is not the same", score, h[i].score,
263
printHits("testSimpleEqualScores3", h, s);
269
public void testSimpleTiebreaker() throws Exception {
271
DisjunctionMaxQuery q = new DisjunctionMaxQuery(0.01f);
272
q.add(tq("dek", "albino"));
273
q.add(tq("dek", "elephant"));
274
QueryUtils.check(random, q, s);
276
ScoreDoc[] h = s.search(q, null, 1000).scoreDocs;
279
assertEquals("3 docs should match " + q.toString(), 3, h.length);
280
assertEquals("wrong first", "d2", s.doc(h[0].doc).get("id"));
281
float score0 = h[0].score;
282
float score1 = h[1].score;
283
float score2 = h[2].score;
284
assertTrue("d2 does not have better score then others: " + score0
285
+ " >? " + score1, score0 > score1);
286
assertEquals("d4 and d1 don't have equal scores", score1, score2,
289
printHits("testSimpleTiebreaker", h, s);
294
public void testBooleanRequiredEqualScores() throws Exception {
296
BooleanQuery q = new BooleanQuery();
298
DisjunctionMaxQuery q1 = new DisjunctionMaxQuery(0.0f);
299
q1.add(tq("hed", "albino"));
300
q1.add(tq("dek", "albino"));
301
q.add(q1, BooleanClause.Occur.MUST);// true,false);
302
QueryUtils.check(random, q1, s);
306
DisjunctionMaxQuery q2 = new DisjunctionMaxQuery(0.0f);
307
q2.add(tq("hed", "elephant"));
308
q2.add(tq("dek", "elephant"));
309
q.add(q2, BooleanClause.Occur.MUST);// true,false);
310
QueryUtils.check(random, q2, s);
313
QueryUtils.check(random, q, s);
315
ScoreDoc[] h = s.search(q, null, 1000).scoreDocs;
318
assertEquals("3 docs should match " + q.toString(), 3, h.length);
319
float score = h[0].score;
320
for (int i = 1; i < h.length; i++) {
321
assertEquals("score #" + i + " is not the same", score, h[i].score,
325
printHits("testBooleanRequiredEqualScores1", h, s);
330
public void testBooleanOptionalNoTiebreaker() throws Exception {
332
BooleanQuery q = new BooleanQuery();
334
DisjunctionMaxQuery q1 = new DisjunctionMaxQuery(0.0f);
335
q1.add(tq("hed", "albino"));
336
q1.add(tq("dek", "albino"));
337
q.add(q1, BooleanClause.Occur.SHOULD);// false,false);
340
DisjunctionMaxQuery q2 = new DisjunctionMaxQuery(0.0f);
341
q2.add(tq("hed", "elephant"));
342
q2.add(tq("dek", "elephant"));
343
q.add(q2, BooleanClause.Occur.SHOULD);// false,false);
345
QueryUtils.check(random, q, s);
347
ScoreDoc[] h = s.search(q, null, 1000).scoreDocs;
350
assertEquals("4 docs should match " + q.toString(), 4, h.length);
351
float score = h[0].score;
352
for (int i = 1; i < h.length - 1; i++) { /* note: -1 */
353
assertEquals("score #" + i + " is not the same", score, h[i].score,
356
assertEquals("wrong last", "d1", s.doc(h[h.length - 1].doc).get("id"));
357
float score1 = h[h.length - 1].score;
358
assertTrue("d1 does not have worse score then others: " + score + " >? "
359
+ score1, score > score1);
361
printHits("testBooleanOptionalNoTiebreaker", h, s);
366
public void testBooleanOptionalWithTiebreaker() throws Exception {
368
BooleanQuery q = new BooleanQuery();
370
DisjunctionMaxQuery q1 = new DisjunctionMaxQuery(0.01f);
371
q1.add(tq("hed", "albino"));
372
q1.add(tq("dek", "albino"));
373
q.add(q1, BooleanClause.Occur.SHOULD);// false,false);
376
DisjunctionMaxQuery q2 = new DisjunctionMaxQuery(0.01f);
377
q2.add(tq("hed", "elephant"));
378
q2.add(tq("dek", "elephant"));
379
q.add(q2, BooleanClause.Occur.SHOULD);// false,false);
381
QueryUtils.check(random, q, s);
383
ScoreDoc[] h = s.search(q, null, 1000).scoreDocs;
387
assertEquals("4 docs should match " + q.toString(), 4, h.length);
389
float score0 = h[0].score;
390
float score1 = h[1].score;
391
float score2 = h[2].score;
392
float score3 = h[3].score;
394
String doc0 = s.doc(h[0].doc).get("id");
395
String doc1 = s.doc(h[1].doc).get("id");
396
String doc2 = s.doc(h[2].doc).get("id");
397
String doc3 = s.doc(h[3].doc).get("id");
399
assertTrue("doc0 should be d2 or d4: " + doc0, doc0.equals("d2")
400
|| doc0.equals("d4"));
401
assertTrue("doc1 should be d2 or d4: " + doc0, doc1.equals("d2")
402
|| doc1.equals("d4"));
403
assertEquals("score0 and score1 should match", score0, score1,
405
assertEquals("wrong third", "d3", doc2);
406
assertTrue("d3 does not have worse score then d2 and d4: " + score1
407
+ " >? " + score2, score1 > score2);
409
assertEquals("wrong fourth", "d1", doc3);
410
assertTrue("d1 does not have worse score then d3: " + score2 + " >? "
411
+ score3, score2 > score3);
414
printHits("testBooleanOptionalWithTiebreaker", h, s);
420
public void testBooleanOptionalWithTiebreakerAndBoost() throws Exception {
422
BooleanQuery q = new BooleanQuery();
424
DisjunctionMaxQuery q1 = new DisjunctionMaxQuery(0.01f);
425
q1.add(tq("hed", "albino", 1.5f));
426
q1.add(tq("dek", "albino"));
427
q.add(q1, BooleanClause.Occur.SHOULD);// false,false);
430
DisjunctionMaxQuery q2 = new DisjunctionMaxQuery(0.01f);
431
q2.add(tq("hed", "elephant", 1.5f));
432
q2.add(tq("dek", "elephant"));
433
q.add(q2, BooleanClause.Occur.SHOULD);// false,false);
435
QueryUtils.check(random, q, s);
437
ScoreDoc[] h = s.search(q, null, 1000).scoreDocs;
441
assertEquals("4 docs should match " + q.toString(), 4, h.length);
443
float score0 = h[0].score;
444
float score1 = h[1].score;
445
float score2 = h[2].score;
446
float score3 = h[3].score;
448
String doc0 = s.doc(h[0].doc).get("id");
449
String doc1 = s.doc(h[1].doc).get("id");
450
String doc2 = s.doc(h[2].doc).get("id");
451
String doc3 = s.doc(h[3].doc).get("id");
453
assertEquals("doc0 should be d4: ", "d4", doc0);
454
assertEquals("doc1 should be d3: ", "d3", doc1);
455
assertEquals("doc2 should be d2: ", "d2", doc2);
456
assertEquals("doc3 should be d1: ", "d1", doc3);
458
assertTrue("d4 does not have a better score then d3: " + score0 + " >? "
459
+ score1, score0 > score1);
460
assertTrue("d3 does not have a better score then d2: " + score1 + " >? "
461
+ score2, score1 > score2);
462
assertTrue("d3 does not have a better score then d1: " + score2 + " >? "
463
+ score3, score2 > score3);
466
printHits("testBooleanOptionalWithTiebreakerAndBoost", h, s);
472
protected Query tq(String f, String t) {
473
return new TermQuery(new Term(f, t));
477
protected Query tq(String f, String t, float b) {
483
protected void printHits(String test, ScoreDoc[] h, Searcher searcher)
486
System.err.println("------- " + test + " -------");
488
DecimalFormat f = new DecimalFormat("0.000000000");
490
for (int i = 0; i < h.length; i++) {
491
Document d = searcher.doc(h[i].doc);
492
float score = h[i].score;
494
.println("#" + i + ": " + f.format(score) + " - " + d.get("id"));