1
package org.apache.lucene.search;
4
* Licensed to the Apache Software Foundation (ASF) under one or more
5
* contributor license agreements. See the NOTICE file distributed with
6
* this work for additional information regarding copyright ownership.
7
* The ASF licenses this file to You under the Apache License, Version 2.0
8
* (the "License"); you may not use this file except in compliance with
9
* the License. You may obtain a copy of the License at
11
* http://www.apache.org/licenses/LICENSE-2.0
13
* Unless required by applicable law or agreed to in writing, software
14
* distributed under the License is distributed on an "AS IS" BASIS,
15
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16
* See the License for the specific language governing permissions and
17
* limitations under the License.
20
import org.apache.lucene.util.LuceneTestCase;
21
import org.apache.lucene.analysis.KeywordAnalyzer;
22
import org.apache.lucene.analysis.standard.StandardAnalyzer;
23
import org.apache.lucene.document.Document;
24
import org.apache.lucene.document.Field;
25
import org.apache.lucene.document.SetBasedFieldSelector;
27
import org.apache.lucene.index.FieldInvertState;
28
import org.apache.lucene.index.IndexReader;
29
import org.apache.lucene.index.IndexWriter;
30
import org.apache.lucene.index.IndexWriterConfig;
31
import org.apache.lucene.index.Term;
32
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
33
import org.apache.lucene.queryParser.QueryParser;
34
import org.apache.lucene.store.Directory;
35
import java.io.IOException;
36
import java.util.Collections;
37
import java.util.HashSet;
39
import java.util.Random;
43
* Tests {@link MultiSearcher} class.
45
public class TestMultiSearcher extends LuceneTestCase
49
* ReturnS a new instance of the concrete MultiSearcher class
52
protected MultiSearcher getMultiSearcherInstance(Searcher[] searchers) throws IOException {
53
return new MultiSearcher(searchers);
56
public void testEmptyIndex() throws Exception {
57
// creating two directories for indices
58
Directory indexStoreA = newDirectory();
59
Directory indexStoreB = newDirectory();
61
// creating a document to store
62
Document lDoc = new Document();
63
lDoc.add(newField("fulltext", "Once upon a time.....", Field.Store.YES, Field.Index.ANALYZED));
64
lDoc.add(newField("id", "doc1", Field.Store.YES, Field.Index.NOT_ANALYZED));
65
lDoc.add(newField("handle", "1", Field.Store.YES, Field.Index.NOT_ANALYZED));
67
// creating a document to store
68
Document lDoc2 = new Document();
69
lDoc2.add(newField("fulltext", "in a galaxy far far away.....",
70
Field.Store.YES, Field.Index.ANALYZED));
71
lDoc2.add(newField("id", "doc2", Field.Store.YES, Field.Index.NOT_ANALYZED));
72
lDoc2.add(newField("handle", "1", Field.Store.YES, Field.Index.NOT_ANALYZED));
74
// creating a document to store
75
Document lDoc3 = new Document();
76
lDoc3.add(newField("fulltext", "a bizarre bug manifested itself....",
77
Field.Store.YES, Field.Index.ANALYZED));
78
lDoc3.add(newField("id", "doc3", Field.Store.YES, Field.Index.NOT_ANALYZED));
79
lDoc3.add(newField("handle", "1", Field.Store.YES, Field.Index.NOT_ANALYZED));
81
// creating an index writer for the first index
82
IndexWriter writerA = new IndexWriter(indexStoreA, newIndexWriterConfig(TEST_VERSION_CURRENT, new StandardAnalyzer(TEST_VERSION_CURRENT)));
83
// creating an index writer for the second index, but writing nothing
84
IndexWriter writerB = new IndexWriter(indexStoreB, newIndexWriterConfig(TEST_VERSION_CURRENT, new StandardAnalyzer(TEST_VERSION_CURRENT)));
86
//--------------------------------------------------------------------
88
//--------------------------------------------------------------------
90
// writing the documents to the first index
91
writerA.addDocument(lDoc);
92
writerA.addDocument(lDoc2);
93
writerA.addDocument(lDoc3);
97
// closing the second index
100
// creating the query
101
QueryParser parser = new QueryParser(TEST_VERSION_CURRENT, "fulltext", new StandardAnalyzer(TEST_VERSION_CURRENT));
102
Query query = parser.parse("handle:1");
104
// building the searchables
105
Searcher[] searchers = new Searcher[2];
106
// VITAL STEP:adding the searcher for the empty index first, before the searcher for the populated index
107
searchers[0] = new IndexSearcher(indexStoreB, true);
108
searchers[1] = new IndexSearcher(indexStoreA, true);
109
// creating the multiSearcher
110
Searcher mSearcher = getMultiSearcherInstance(searchers);
111
// performing the search
112
ScoreDoc[] hits = mSearcher.search(query, null, 1000).scoreDocs;
114
assertEquals(3, hits.length);
116
// iterating over the hit documents
117
for (int i = 0; i < hits.length; i++) {
118
mSearcher.doc(hits[i].doc);
123
//--------------------------------------------------------------------
125
//--------------------------------------------------------------------
127
// adding one document to the empty index
128
writerB = new IndexWriter(indexStoreB, newIndexWriterConfig(
129
TEST_VERSION_CURRENT,
130
new StandardAnalyzer(TEST_VERSION_CURRENT))
131
.setOpenMode(OpenMode.APPEND));
132
writerB.addDocument(lDoc);
136
// building the searchables
137
Searcher[] searchers2 = new Searcher[2];
138
// VITAL STEP:adding the searcher for the empty index first, before the searcher for the populated index
139
searchers2[0] = new IndexSearcher(indexStoreB, true);
140
searchers2[1] = new IndexSearcher(indexStoreA, true);
141
// creating the mulitSearcher
142
MultiSearcher mSearcher2 = getMultiSearcherInstance(searchers2);
143
// performing the same search
144
ScoreDoc[] hits2 = mSearcher2.search(query, null, 1000).scoreDocs;
146
assertEquals(4, hits2.length);
148
// iterating over the hit documents
149
for (int i = 0; i < hits2.length; i++) {
150
// no exception should happen at this point
151
mSearcher2.doc(hits2[i].doc);
154
// test the subSearcher() method:
155
Query subSearcherQuery = parser.parse("id:doc1");
156
hits2 = mSearcher2.search(subSearcherQuery, null, 1000).scoreDocs;
157
assertEquals(2, hits2.length);
158
assertEquals(0, mSearcher2.subSearcher(hits2[0].doc)); // hit from searchers2[0]
159
assertEquals(1, mSearcher2.subSearcher(hits2[1].doc)); // hit from searchers2[1]
160
subSearcherQuery = parser.parse("id:doc2");
161
hits2 = mSearcher2.search(subSearcherQuery, null, 1000).scoreDocs;
162
assertEquals(1, hits2.length);
163
assertEquals(1, mSearcher2.subSearcher(hits2[0].doc)); // hit from searchers2[1]
166
//--------------------------------------------------------------------
168
//--------------------------------------------------------------------
170
// deleting the document just added, this will cause a different exception to take place
171
Term term = new Term("id", "doc1");
172
IndexReader readerB = IndexReader.open(indexStoreB, false);
173
readerB.deleteDocuments(term);
176
// optimizing the index with the writer
177
writerB = new IndexWriter(indexStoreB, new IndexWriterConfig(
178
TEST_VERSION_CURRENT,
179
new StandardAnalyzer(TEST_VERSION_CURRENT))
180
.setOpenMode(OpenMode.APPEND));
184
// building the searchables
185
Searcher[] searchers3 = new Searcher[2];
187
searchers3[0] = new IndexSearcher(indexStoreB, true);
188
searchers3[1] = new IndexSearcher(indexStoreA, true);
189
// creating the mulitSearcher
190
Searcher mSearcher3 = getMultiSearcherInstance(searchers3);
191
// performing the same search
192
ScoreDoc[] hits3 = mSearcher3.search(query, null, 1000).scoreDocs;
194
assertEquals(3, hits3.length);
196
// iterating over the hit documents
197
for (int i = 0; i < hits3.length; i++) {
198
mSearcher3.doc(hits3[i].doc);
205
private Document createDocument(String contents1, String contents2) {
206
Document document=new Document();
208
document.add(newField("contents", contents1, Field.Store.YES, Field.Index.NOT_ANALYZED));
209
document.add(newField("other", "other contents", Field.Store.YES, Field.Index.NOT_ANALYZED));
210
if (contents2!=null) {
211
document.add(newField("contents", contents2, Field.Store.YES, Field.Index.NOT_ANALYZED));
217
private void initIndex(Random random, Directory directory, int nDocs, boolean create, String contents2) throws IOException {
218
IndexWriter indexWriter=null;
221
indexWriter = new IndexWriter(directory, LuceneTestCase.newIndexWriterConfig(random,
222
TEST_VERSION_CURRENT, new KeywordAnalyzer()).setOpenMode(
223
create ? OpenMode.CREATE : OpenMode.APPEND));
225
for (int i=0; i<nDocs; i++) {
226
indexWriter.addDocument(createDocument("doc" + i, contents2));
229
if (indexWriter!=null) {
235
public void testFieldSelector() throws Exception {
236
Directory ramDirectory1, ramDirectory2;
237
IndexSearcher indexSearcher1, indexSearcher2;
239
ramDirectory1 = newDirectory();
240
ramDirectory2 = newDirectory();
241
Query query = new TermQuery(new Term("contents", "doc0"));
243
// Now put the documents in a different index
244
initIndex(random, ramDirectory1, 10, true, null); // documents with a single token "doc0", "doc1", etc...
245
initIndex(random, ramDirectory2, 10, true, "x"); // documents with two tokens "doc0" and "x", "doc1" and x, etc...
247
indexSearcher1 = new IndexSearcher(ramDirectory1, true);
248
indexSearcher2 = new IndexSearcher(ramDirectory2, true);
250
MultiSearcher searcher = getMultiSearcherInstance(new Searcher[]{indexSearcher1, indexSearcher2});
251
assertTrue("searcher is null and it shouldn't be", searcher != null);
252
ScoreDoc[] hits = searcher.search(query, null, 1000).scoreDocs;
253
assertTrue("hits is null and it shouldn't be", hits != null);
254
assertTrue(hits.length + " does not equal: " + 2, hits.length == 2);
255
Document document = searcher.doc(hits[0].doc);
256
assertTrue("document is null and it shouldn't be", document != null);
257
assertTrue("document.getFields() Size: " + document.getFields().size() + " is not: " + 2, document.getFields().size() == 2);
258
//Should be one document from each directory
259
//they both have two fields, contents and other
260
Set<String> ftl = new HashSet<String>();
262
SetBasedFieldSelector fs = new SetBasedFieldSelector(ftl, Collections. <String> emptySet());
263
document = searcher.doc(hits[0].doc, fs);
264
assertTrue("document is null and it shouldn't be", document != null);
265
assertTrue("document.getFields() Size: " + document.getFields().size() + " is not: " + 1, document.getFields().size() == 1);
266
String value = document.get("contents");
267
assertTrue("value is not null and it should be", value == null);
268
value = document.get("other");
269
assertTrue("value is null and it shouldn't be", value != null);
272
fs = new SetBasedFieldSelector(ftl, Collections. <String> emptySet());
273
document = searcher.doc(hits[1].doc, fs);
274
value = document.get("contents");
275
assertTrue("value is null and it shouldn't be", value != null);
276
value = document.get("other");
277
assertTrue("value is not null and it should be", value == null);
278
indexSearcher1.close();
279
indexSearcher2.close();
280
ramDirectory1.close();
281
ramDirectory2.close();
285
/* uncomment this when the highest score is always normalized to 1.0, even when it was < 1.0
286
public void testNormalization1() throws IOException {
287
testNormalization(1, "Using 1 document per index:");
291
public void testNormalization10() throws IOException {
292
testNormalization(10, "Using 10 documents per index:");
295
private void testNormalization(int nDocs, String message) throws IOException {
296
Query query=new TermQuery(new Term("contents", "doc0"));
298
Directory ramDirectory1;
299
IndexSearcher indexSearcher1;
302
ramDirectory1=newDirectory();
304
// First put the documents in the same index
305
initIndex(random, ramDirectory1, nDocs, true, null); // documents with a single token "doc0", "doc1", etc...
306
initIndex(random, ramDirectory1, nDocs, false, "x"); // documents with two tokens "doc0" and "x", "doc1" and x, etc...
308
indexSearcher1=new IndexSearcher(ramDirectory1, true);
309
indexSearcher1.setDefaultFieldSortScoring(true, true);
311
hits=indexSearcher1.search(query, null, 1000).scoreDocs;
313
assertEquals(message, 2, hits.length);
315
// Store the scores for use later
316
float[] scores={ hits[0].score, hits[1].score };
318
assertTrue(message, scores[0] > scores[1]);
320
indexSearcher1.close();
321
ramDirectory1.close();
326
Directory ramDirectory2;
327
IndexSearcher indexSearcher2;
329
ramDirectory1=newDirectory();
330
ramDirectory2=newDirectory();
332
// Now put the documents in a different index
333
initIndex(random, ramDirectory1, nDocs, true, null); // documents with a single token "doc0", "doc1", etc...
334
initIndex(random, ramDirectory2, nDocs, true, "x"); // documents with two tokens "doc0" and "x", "doc1" and x, etc...
336
indexSearcher1=new IndexSearcher(ramDirectory1, true);
337
indexSearcher1.setDefaultFieldSortScoring(true, true);
338
indexSearcher2=new IndexSearcher(ramDirectory2, true);
339
indexSearcher2.setDefaultFieldSortScoring(true, true);
341
Searcher searcher=getMultiSearcherInstance(new Searcher[] { indexSearcher1, indexSearcher2 });
343
hits=searcher.search(query, null, 1000).scoreDocs;
345
assertEquals(message, 2, hits.length);
347
// The scores should be the same (within reason)
348
assertEquals(message, scores[0], hits[0].score, 1e-6); // This will a document from ramDirectory1
349
assertEquals(message, scores[1], hits[1].score, 1e-6); // This will a document from ramDirectory2
353
// Adding a Sort.RELEVANCE object should not change anything
354
hits=searcher.search(query, null, 1000, Sort.RELEVANCE).scoreDocs;
356
assertEquals(message, 2, hits.length);
358
assertEquals(message, scores[0], hits[0].score, 1e-6); // This will a document from ramDirectory1
359
assertEquals(message, scores[1], hits[1].score, 1e-6); // This will a document from ramDirectory2
363
ramDirectory1.close();
364
ramDirectory2.close();
368
* test that custom similarity is in effect when using MultiSearcher (LUCENE-789).
369
* @throws IOException
371
public void testCustomSimilarity () throws IOException {
372
Directory dir = newDirectory();
373
initIndex(random, dir, 10, true, "x"); // documents with two tokens "doc0" and "x", "doc1" and x, etc...
374
IndexSearcher srchr = new IndexSearcher(dir, true);
375
MultiSearcher msrchr = getMultiSearcherInstance(new Searcher[]{srchr});
377
Similarity customSimilarity = new DefaultSimilarity() {
380
public float idf(int docFreq, int numDocs) { return 100.0f; }
382
public float coord(int overlap, int maxOverlap) { return 1.0f; }
384
public float computeNorm(String fieldName, FieldInvertState state) { return state.getBoost(); }
386
public float queryNorm(float sumOfSquaredWeights) { return 1.0f; }
388
public float sloppyFreq(int distance) { return 1.0f; }
390
public float tf(float freq) { return 1.0f; }
393
srchr.setSimilarity(customSimilarity);
394
msrchr.setSimilarity(customSimilarity);
396
Query query=new TermQuery(new Term("contents", "doc0"));
398
// Get a score from IndexSearcher
399
TopDocs topDocs = srchr.search(query, null, 1);
400
float score1 = topDocs.getMaxScore();
402
// Get the score from MultiSearcher
403
topDocs = msrchr.search(query, null, 1);
404
float scoreN = topDocs.getMaxScore();
406
// The scores from the IndexSearcher and Multisearcher should be the same
407
// if the same similarity is used.
408
assertEquals("MultiSearcher score must be equal to single searcher score!", score1, scoreN, 1e-6);
414
public void testDocFreq() throws IOException{
415
Directory dir1 = newDirectory();
416
Directory dir2 = newDirectory();
418
initIndex(random, dir1, 10, true, "x"); // documents with two tokens "doc0" and "x", "doc1" and x, etc...
419
initIndex(random, dir2, 5, true, "x"); // documents with two tokens "doc0" and "x", "doc1" and x, etc...
420
IndexSearcher searcher1 = new IndexSearcher(dir1, true);
421
IndexSearcher searcher2 = new IndexSearcher(dir2, true);
423
MultiSearcher multiSearcher = getMultiSearcherInstance(new Searcher[]{searcher1, searcher2});
424
assertEquals(15, multiSearcher.docFreq(new Term("contents","x")));
425
multiSearcher.close();
432
public void testCreateDocFrequencyMap() throws IOException{
433
Directory dir1 = newDirectory();
434
Directory dir2 = newDirectory();
435
Term template = new Term("contents") ;
436
String[] contents = {"a", "b", "c"};
437
HashSet<Term> termsSet = new HashSet<Term>();
438
for (int i = 0; i < contents.length; i++) {
439
initIndex(random, dir1, i+10, i==0, contents[i]);
440
initIndex(random, dir2, i+5, i==0, contents[i]);
441
termsSet.add(template.createTerm(contents[i]));
443
IndexSearcher searcher1 = new IndexSearcher(dir1, true);
444
IndexSearcher searcher2 = new IndexSearcher(dir2, true);
445
MultiSearcher multiSearcher = getMultiSearcherInstance(new Searcher[]{searcher1, searcher2});
446
Map<Term,Integer> docFrequencyMap = multiSearcher.createDocFrequencyMap(termsSet);
447
assertEquals(3, docFrequencyMap.size());
448
for (int i = 0; i < contents.length; i++) {
449
assertEquals(Integer.valueOf((i*2) +15), docFrequencyMap.get(template.createTerm(contents[i])));
451
multiSearcher.close();