1
package org.apache.lucene.search;
4
* Licensed to the Apache Software Foundation (ASF) under one or more
5
* contributor license agreements. See the NOTICE file distributed with
6
* this work for additional information regarding copyright ownership.
7
* The ASF licenses this file to You under the Apache License, Version 2.0
8
* (the "License"); you may not use this file except in compliance with
9
* the License. You may obtain a copy of the License at
11
* http://www.apache.org/licenses/LICENSE-2.0
13
* Unless required by applicable law or agreed to in writing, software
14
* distributed under the License is distributed on an "AS IS" BASIS,
15
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16
* See the License for the specific language governing permissions and
17
* limitations under the License.
20
import org.apache.lucene.index.IndexWriterConfig;
21
import org.apache.lucene.index.RandomIndexWriter;
22
import org.apache.lucene.index.Term;
23
import org.apache.lucene.index.TermEnum;
24
import org.apache.lucene.index.IndexReader;
25
import org.apache.lucene.queryParser.ParseException;
26
import org.apache.lucene.queryParser.QueryParser;
27
import org.apache.lucene.search.Explanation.IDFExplanation;
28
import org.apache.lucene.store.Directory;
29
import org.apache.lucene.analysis.Analyzer;
30
import org.apache.lucene.analysis.SimpleAnalyzer;
31
import org.apache.lucene.analysis.TokenStream;
32
import org.apache.lucene.analysis.Tokenizer;
33
import org.apache.lucene.analysis.standard.StandardAnalyzer;
34
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
35
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
36
import org.apache.lucene.document.Document;
37
import org.apache.lucene.document.Field;
38
import org.apache.lucene.index.IndexWriter;
39
import org.apache.lucene.search.IndexSearcher;
40
import org.apache.lucene.store.RAMDirectory;
41
import org.apache.lucene.util.LuceneTestCase;
43
import java.io.IOException;
44
import java.util.Collection;
45
import java.util.LinkedList;
46
import java.io.Reader;
49
* This class tests the MultiPhraseQuery class.
53
public class TestMultiPhraseQuery extends LuceneTestCase {
55
public void testPhrasePrefix() throws IOException {
56
Directory indexStore = newDirectory();
57
RandomIndexWriter writer = new RandomIndexWriter(random, indexStore);
58
add("blueberry pie", writer);
59
add("blueberry strudel", writer);
60
add("blueberry pizza", writer);
61
add("blueberry chewing gum", writer);
62
add("bluebird pizza", writer);
63
add("bluebird foobar pizza", writer);
64
add("piccadilly circus", writer);
66
IndexReader reader = writer.getReader();
67
IndexSearcher searcher = newSearcher(reader);
69
// search for "blueberry pi*":
70
MultiPhraseQuery query1 = new MultiPhraseQuery();
71
// search for "strawberry pi*":
72
MultiPhraseQuery query2 = new MultiPhraseQuery();
73
query1.add(new Term("body", "blueberry"));
74
query2.add(new Term("body", "strawberry"));
76
LinkedList<Term> termsWithPrefix = new LinkedList<Term>();
77
IndexReader ir = reader;
79
// this TermEnum gives "piccadilly", "pie" and "pizza".
81
TermEnum te = ir.terms(new Term("body", prefix));
83
if (te.term().text().startsWith(prefix))
85
termsWithPrefix.add(te.term());
89
query1.add(termsWithPrefix.toArray(new Term[0]));
90
assertEquals("body:\"blueberry (piccadilly pie pizza)\"", query1.toString());
91
query2.add(termsWithPrefix.toArray(new Term[0]));
92
assertEquals("body:\"strawberry (piccadilly pie pizza)\"", query2.toString());
95
result = searcher.search(query1, null, 1000).scoreDocs;
96
assertEquals(2, result.length);
97
result = searcher.search(query2, null, 1000).scoreDocs;
98
assertEquals(0, result.length);
100
// search for "blue* pizza":
101
MultiPhraseQuery query3 = new MultiPhraseQuery();
102
termsWithPrefix.clear();
104
te = ir.terms(new Term("body", prefix));
106
if (te.term().text().startsWith(prefix))
108
termsWithPrefix.add(te.term());
111
query3.add(termsWithPrefix.toArray(new Term[0]));
112
query3.add(new Term("body", "pizza"));
114
result = searcher.search(query3, null, 1000).scoreDocs;
115
assertEquals(2, result.length); // blueberry pizza, bluebird pizza
116
assertEquals("body:\"(blueberry bluebird) pizza\"", query3.toString());
120
result = searcher.search(query3, null, 1000).scoreDocs;
122
// just make sure no exc:
123
searcher.explain(query3, 0);
125
assertEquals(3, result.length); // blueberry pizza, bluebird pizza, bluebird foobar pizza
127
MultiPhraseQuery query4 = new MultiPhraseQuery();
129
query4.add(new Term("field1", "foo"));
130
query4.add(new Term("field2", "foobar"));
132
} catch(IllegalArgumentException e) {
133
// okay, all terms must belong to the same field
143
public void testTall() throws IOException {
144
Directory indexStore = newDirectory();
145
RandomIndexWriter writer = new RandomIndexWriter(random, indexStore);
146
add("blueberry chocolate pie", writer);
147
add("blueberry chocolate tart", writer);
148
IndexReader r = writer.getReader();
151
IndexSearcher searcher = newSearcher(r);
152
MultiPhraseQuery q = new MultiPhraseQuery();
153
q.add(new Term("body", "blueberry"));
154
q.add(new Term("body", "chocolate"));
155
q.add(new Term[] {new Term("body", "pie"), new Term("body", "tart")});
156
assertEquals(2, searcher.search(q, 1).totalHits);
162
private void add(String s, RandomIndexWriter writer) throws IOException {
163
Document doc = new Document();
164
doc.add(newField("body", s, Field.Store.YES, Field.Index.ANALYZED));
165
writer.addDocument(doc);
168
public void testBooleanQueryContainingSingleTermPrefixQuery()
170
// this tests against bug 33161 (now fixed)
171
// In order to cause the bug, the outer query must have more than one term
172
// and all terms required.
173
// The contained PhraseMultiQuery must contain exactly one term array.
174
Directory indexStore = newDirectory();
175
RandomIndexWriter writer = new RandomIndexWriter(random, indexStore);
176
add("blueberry pie", writer);
177
add("blueberry chewing gum", writer);
178
add("blue raspberry pie", writer);
180
IndexReader reader = writer.getReader();
181
IndexSearcher searcher = newSearcher(reader);
182
// This query will be equivalent to +body:pie +body:"blue*"
183
BooleanQuery q = new BooleanQuery();
184
q.add(new TermQuery(new Term("body", "pie")), BooleanClause.Occur.MUST);
186
MultiPhraseQuery trouble = new MultiPhraseQuery();
187
trouble.add(new Term[] {new Term("body", "blueberry"),
188
new Term("body", "blue")});
189
q.add(trouble, BooleanClause.Occur.MUST);
191
// exception will be thrown here without fix
192
ScoreDoc[] hits = searcher.search(q, null, 1000).scoreDocs;
194
assertEquals("Wrong number of hits", 2, hits.length);
196
// just make sure no exc:
197
searcher.explain(q, 0);
205
public void testPhrasePrefixWithBooleanQuery() throws IOException {
206
Directory indexStore = newDirectory();
207
RandomIndexWriter writer = new RandomIndexWriter(random, indexStore);
208
add("This is a test", "object", writer);
209
add("a note", "note", writer);
211
IndexReader reader = writer.getReader();
212
IndexSearcher searcher = newSearcher(reader);
214
// This query will be equivalent to +type:note +body:"a t*"
215
BooleanQuery q = new BooleanQuery();
216
q.add(new TermQuery(new Term("type", "note")), BooleanClause.Occur.MUST);
218
MultiPhraseQuery trouble = new MultiPhraseQuery();
219
trouble.add(new Term("body", "a"));
221
.add(new Term[] {new Term("body", "test"), new Term("body", "this")});
222
q.add(trouble, BooleanClause.Occur.MUST);
224
// exception will be thrown here without fix for #35626:
225
ScoreDoc[] hits = searcher.search(q, null, 1000).scoreDocs;
226
assertEquals("Wrong number of hits", 0, hits.length);
233
public void testNoDocs() throws Exception {
234
Directory indexStore = newDirectory();
235
RandomIndexWriter writer = new RandomIndexWriter(random, indexStore);
236
add("a note", "note", writer);
238
IndexReader reader = writer.getReader();
239
IndexSearcher searcher = newSearcher(reader);
241
MultiPhraseQuery q = new MultiPhraseQuery();
242
q.add(new Term("body", "a"));
243
q.add(new Term[] {new Term("body", "nope"), new Term("body", "nope")});
244
assertEquals("Wrong number of hits", 0,
245
searcher.search(q, null, 1).totalHits);
247
// just make sure no exc:
248
searcher.explain(q, 0);
256
public void testHashCodeAndEquals() {
257
MultiPhraseQuery query1 = new MultiPhraseQuery();
258
MultiPhraseQuery query2 = new MultiPhraseQuery();
260
assertEquals(query1.hashCode(), query2.hashCode());
261
assertEquals(query1, query2);
263
Term term1 = new Term("someField", "someText");
268
assertEquals(query1.hashCode(), query2.hashCode());
269
assertEquals(query1, query2);
271
Term term2 = new Term("someField", "someMoreText");
275
assertFalse(query1.hashCode() == query2.hashCode());
276
assertFalse(query1.equals(query2));
280
assertEquals(query1.hashCode(), query2.hashCode());
281
assertEquals(query1, query2);
284
private void add(String s, String type, RandomIndexWriter writer)
286
Document doc = new Document();
287
doc.add(newField("body", s, Field.Store.YES, Field.Index.ANALYZED));
288
doc.add(newField("type", type, Field.Store.YES, Field.Index.NOT_ANALYZED));
289
writer.addDocument(doc);
293
public void testEmptyToString() {
294
new MultiPhraseQuery().toString();
297
public void testCustomIDF() throws Exception {
298
Directory indexStore = newDirectory();
299
RandomIndexWriter writer = new RandomIndexWriter(random, indexStore);
300
add("This is a test", "object", writer);
301
add("a note", "note", writer);
303
IndexReader reader = writer.getReader();
304
IndexSearcher searcher = newSearcher(reader);
305
searcher.setSimilarity(new DefaultSimilarity() {
308
public IDFExplanation idfExplain(Collection<Term> terms,
309
Searcher searcher) throws IOException {
310
return new IDFExplanation() {
313
public float getIdf() {
318
public String explain() {
319
return "just a test";
326
MultiPhraseQuery query = new MultiPhraseQuery();
327
query.add(new Term[] { new Term("body", "this"), new Term("body", "that") });
328
query.add(new Term("body", "is"));
329
Weight weight = query.createWeight(searcher);
330
assertEquals(10f * 10f, weight.sumOfSquaredWeights(), 0.001f);
338
private static class TokenAndPos {
339
public final String token;
340
public final int pos;
341
public TokenAndPos(String token, int pos) {
347
private static class CannedAnalyzer extends Analyzer {
348
private final TokenAndPos[] tokens;
350
public CannedAnalyzer(TokenAndPos[] tokens) {
351
this.tokens = tokens;
355
public TokenStream tokenStream(String fieldName, Reader reader) {
356
return new CannedTokenizer(tokens);
360
private static class CannedTokenizer extends Tokenizer {
361
private final TokenAndPos[] tokens;
362
private int upto = 0;
363
private int lastPos = 0;
364
private final TermAttribute termAtt = addAttribute(TermAttribute.class);
365
private final PositionIncrementAttribute posIncrAtt = addAttribute(PositionIncrementAttribute.class);
367
public CannedTokenizer(TokenAndPos[] tokens) {
368
this.tokens = tokens;
372
public final boolean incrementToken() throws IOException {
374
if (upto < tokens.length) {
375
final TokenAndPos token = tokens[upto++];
376
termAtt.setTermBuffer(token.token);
377
posIncrAtt.setPositionIncrement(token.pos - lastPos);
386
public void reset() throws IOException {
393
public void testZeroPosIncr() throws IOException {
394
Directory dir = new RAMDirectory();
395
final TokenAndPos[] tokens = new TokenAndPos[3];
396
tokens[0] = new TokenAndPos("a", 0);
397
tokens[1] = new TokenAndPos("b", 0);
398
tokens[2] = new TokenAndPos("c", 0);
400
IndexWriter writer = new IndexWriter(dir, new CannedAnalyzer(tokens), true, IndexWriter.MaxFieldLength.LIMITED);
401
Document doc = new Document();
402
doc.add(new Field("field", "", Field.Store.NO, Field.Index.ANALYZED));
403
writer.addDocument(doc);
404
writer.addDocument(doc);
405
IndexReader r = writer.getReader();
407
IndexSearcher s = new IndexSearcher(r);
408
MultiPhraseQuery mpq = new MultiPhraseQuery();
411
// NOTE: not great that if we do the else clause here we
412
// get different scores! MultiPhraseQuery counts that
413
// phrase as occurring twice per doc (it should be 1, I
414
// think?). This is because MultipleTermPositions is able to
415
// return the same position more than once (0, in this
418
mpq.add(new Term[] {new Term("field", "b"), new Term("field", "c")}, 0);
419
mpq.add(new Term[] {new Term("field", "a")}, 0);
421
mpq.add(new Term[] {new Term("field", "a")}, 0);
422
mpq.add(new Term[] {new Term("field", "b"), new Term("field", "c")}, 0);
424
TopDocs hits = s.search(mpq, 2);
425
assertEquals(2, hits.totalHits);
426
assertEquals(hits.scoreDocs[0].score, hits.scoreDocs[1].score, 1e-5);
428
for(int hit=0;hit<hits.totalHits;hit++) {
429
ScoreDoc sd = hits.scoreDocs[hit];
430
System.out.println(" hit doc=" + sd.doc + " score=" + sd.score);
437
private final static TokenAndPos[] INCR_0_DOC_TOKENS = new TokenAndPos[] {
438
new TokenAndPos("x", 0),
439
new TokenAndPos("a", 1),
440
new TokenAndPos("1", 1),
441
new TokenAndPos("m", 2), // not existing, relying on slop=2
442
new TokenAndPos("b", 3),
443
new TokenAndPos("1", 3),
444
new TokenAndPos("n", 4), // not existing, relying on slop=2
445
new TokenAndPos("c", 5),
446
new TokenAndPos("y", 6)
449
private final static TokenAndPos[] INCR_0_QUERY_TOKENS_AND = new TokenAndPos[] {
450
new TokenAndPos("a", 0),
451
new TokenAndPos("1", 0),
452
new TokenAndPos("b", 1),
453
new TokenAndPos("1", 1),
454
new TokenAndPos("c", 2)
457
private final static TokenAndPos[][] INCR_0_QUERY_TOKENS_AND_OR_MATCH = new TokenAndPos[][] {
458
{ new TokenAndPos("a", 0) },
459
{ new TokenAndPos("x", 0), new TokenAndPos("1", 0) },
460
{ new TokenAndPos("b", 1) },
461
{ new TokenAndPos("x", 1), new TokenAndPos("1", 1) },
462
{ new TokenAndPos("c", 2) }
465
private final static TokenAndPos[][] INCR_0_QUERY_TOKENS_AND_OR_NO_MATCHN = new TokenAndPos[][] {
466
{ new TokenAndPos("x", 0) },
467
{ new TokenAndPos("a", 0), new TokenAndPos("1", 0) },
468
{ new TokenAndPos("x", 1) },
469
{ new TokenAndPos("b", 1), new TokenAndPos("1", 1) },
470
{ new TokenAndPos("c", 2) }
474
* using query parser, MPQ will be created, and will not be strict about having all query terms
475
* in each position - one of each position is sufficient (OR logic)
477
public void testZeroPosIncrSloppyParsedAnd() throws IOException, ParseException {
478
QueryParser qp = new QueryParser(TEST_VERSION_CURRENT, "field", new CannedAnalyzer(INCR_0_QUERY_TOKENS_AND));
479
final Query q = qp.parse("\"this text is acually ignored\"");
480
assertTrue("wrong query type!", q instanceof MultiPhraseQuery);
481
doTestZeroPosIncrSloppy(q, 0);
482
((MultiPhraseQuery) q).setSlop(1);
483
doTestZeroPosIncrSloppy(q, 0);
484
((MultiPhraseQuery) q).setSlop(2);
485
doTestZeroPosIncrSloppy(q, 1);
488
private void doTestZeroPosIncrSloppy(Query q, int nExpected) throws IOException {
489
Directory dir = newDirectory(); // random dir
490
IndexWriterConfig cfg = newIndexWriterConfig(TEST_VERSION_CURRENT, new CannedAnalyzer(INCR_0_DOC_TOKENS));
491
IndexWriter writer = new IndexWriter(dir, cfg);
492
Document doc = new Document();
493
doc.add(new Field("field", "", Field.Store.NO, Field.Index.ANALYZED));
494
writer.addDocument(doc);
495
IndexReader r = IndexReader.open(writer,false);
497
IndexSearcher s = new IndexSearcher(r);
500
System.out.println("QUERY=" + q);
503
TopDocs hits = s.search(q, 1);
504
assertEquals("wrong number of results", nExpected, hits.totalHits);
507
for(int hit=0;hit<hits.totalHits;hit++) {
508
ScoreDoc sd = hits.scoreDocs[hit];
509
System.out.println(" hit doc=" + sd.doc + " score=" + sd.score);
518
* PQ AND Mode - Manually creating a phrase query
520
public void testZeroPosIncrSloppyPqAnd() throws IOException, ParseException {
521
final PhraseQuery pq = new PhraseQuery();
522
for (TokenAndPos tap : INCR_0_QUERY_TOKENS_AND) {
523
pq.add(new Term("field",tap.token), tap.pos);
525
doTestZeroPosIncrSloppy(pq, 0);
527
doTestZeroPosIncrSloppy(pq, 0);
529
doTestZeroPosIncrSloppy(pq, 1);
533
* MPQ AND Mode - Manually creating a multiple phrase query
535
public void testZeroPosIncrSloppyMpqAnd() throws IOException, ParseException {
536
final MultiPhraseQuery mpq = new MultiPhraseQuery();
537
for (TokenAndPos tap : INCR_0_QUERY_TOKENS_AND) {
538
mpq.add(new Term[]{new Term("field",tap.token)}, tap.pos); //AND logic
540
doTestZeroPosIncrSloppy(mpq, 0);
542
doTestZeroPosIncrSloppy(mpq, 0);
544
doTestZeroPosIncrSloppy(mpq, 1);
548
* MPQ Combined AND OR Mode - Manually creating a multiple phrase query
550
public void testZeroPosIncrSloppyMpqAndOrMatch() throws IOException, ParseException {
551
final MultiPhraseQuery mpq = new MultiPhraseQuery();
552
for (TokenAndPos tap[] : INCR_0_QUERY_TOKENS_AND_OR_MATCH) {
553
Term[] terms = tapTerms(tap);
554
final int pos = tap[0].pos;
555
mpq.add(terms, pos); //AND logic in pos, OR across lines
557
doTestZeroPosIncrSloppy(mpq, 0);
559
doTestZeroPosIncrSloppy(mpq, 0);
561
doTestZeroPosIncrSloppy(mpq, 1);
565
* MPQ Combined AND OR Mode - Manually creating a multiple phrase query - with no match
567
public void testZeroPosIncrSloppyMpqAndOrNoMatch() throws IOException, ParseException {
568
final MultiPhraseQuery mpq = new MultiPhraseQuery();
569
for (TokenAndPos tap[] : INCR_0_QUERY_TOKENS_AND_OR_NO_MATCHN) {
570
Term[] terms = tapTerms(tap);
571
final int pos = tap[0].pos;
572
mpq.add(terms, pos); //AND logic in pos, OR across lines
574
doTestZeroPosIncrSloppy(mpq, 0);
576
doTestZeroPosIncrSloppy(mpq, 0);
579
private Term[] tapTerms(TokenAndPos[] tap) {
580
Term[] terms = new Term[tap.length];
581
for (int i=0; i<terms.length; i++) {
582
terms[i] = new Term("field",tap[i].token);