1
package org.apache.lucene.search;
4
* Licensed to the Apache Software Foundation (ASF) under one or more
5
* contributor license agreements. See the NOTICE file distributed with
6
* this work for additional information regarding copyright ownership.
7
* The ASF licenses this file to You under the Apache License, Version 2.0
8
* (the "License"); you may not use this file except in compliance with
9
* the License. You may obtain a copy of the License at
11
* http://www.apache.org/licenses/LICENSE-2.0
13
* Unless required by applicable law or agreed to in writing, software
14
* distributed under the License is distributed on an "AS IS" BASIS,
15
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16
* See the License for the specific language governing permissions and
17
* limitations under the License.
20
import org.apache.lucene.util.LuceneTestCase;
21
import org.apache.lucene.analysis.*;
22
import org.apache.lucene.analysis.tokenattributes.*;
23
import org.apache.lucene.document.*;
24
import org.apache.lucene.index.*;
25
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
26
import org.apache.lucene.queryParser.QueryParser;
27
import org.apache.lucene.store.*;
28
import org.apache.lucene.util.Version;
29
import org.apache.lucene.util._TestUtil;
30
import org.junit.AfterClass;
31
import org.junit.BeforeClass;
33
import java.io.IOException;
34
import java.io.Reader;
35
import java.io.StringReader;
36
import java.util.List;
37
import java.util.ArrayList;
38
import java.util.Random;
41
* Tests {@link PhraseQuery}.
43
* @see TestPositionIncrement
45
public class TestPhraseQuery extends LuceneTestCase {
47
/** threshold for comparing floats */
48
public static final float SCORE_COMP_THRESH = 1e-6f;
50
private static IndexSearcher searcher;
51
private static IndexReader reader;
52
private PhraseQuery query;
53
private static Directory directory;
56
public static void beforeClass() throws Exception {
57
directory = newDirectory();
58
Analyzer analyzer = new Analyzer() {
60
public TokenStream tokenStream(String fieldName, Reader reader) {
61
return new WhitespaceTokenizer(TEST_VERSION_CURRENT, reader);
65
public int getPositionIncrementGap(String fieldName) {
69
RandomIndexWriter writer = new RandomIndexWriter(random, directory, analyzer);
71
Document doc = new Document();
72
doc.add(newField("field", "one two three four five", Field.Store.YES, Field.Index.ANALYZED));
73
doc.add(newField("repeated", "this is a repeated field - first part", Field.Store.YES, Field.Index.ANALYZED));
74
Fieldable repeatedField = newField("repeated", "second part of a repeated field", Field.Store.YES, Field.Index.ANALYZED);
75
doc.add(repeatedField);
76
doc.add(newField("palindrome", "one two three two one", Field.Store.YES, Field.Index.ANALYZED));
77
writer.addDocument(doc);
80
doc.add(newField("nonexist", "phrase exist notexist exist found", Field.Store.YES, Field.Index.ANALYZED));
81
writer.addDocument(doc);
84
doc.add(newField("nonexist", "phrase exist notexist exist found", Field.Store.YES, Field.Index.ANALYZED));
85
writer.addDocument(doc);
87
reader = writer.getReader();
90
searcher = newSearcher(reader);
94
public void setUp() throws Exception {
96
query = new PhraseQuery();
100
public static void afterClass() throws Exception {
109
public void testNotCloseEnough() throws Exception {
111
query.add(new Term("field", "one"));
112
query.add(new Term("field", "five"));
113
ScoreDoc[] hits = searcher.search(query, null, 1000).scoreDocs;
114
assertEquals(0, hits.length);
115
QueryUtils.check(random, query,searcher);
118
public void testBarelyCloseEnough() throws Exception {
120
query.add(new Term("field", "one"));
121
query.add(new Term("field", "five"));
122
ScoreDoc[] hits = searcher.search(query, null, 1000).scoreDocs;
123
assertEquals(1, hits.length);
124
QueryUtils.check(random, query,searcher);
128
* Ensures slop of 0 works for exact matches, but not reversed
130
public void testExact() throws Exception {
131
// slop is zero by default
132
query.add(new Term("field", "four"));
133
query.add(new Term("field", "five"));
134
ScoreDoc[] hits = searcher.search(query, null, 1000).scoreDocs;
135
assertEquals("exact match", 1, hits.length);
136
QueryUtils.check(random, query,searcher);
139
query = new PhraseQuery();
140
query.add(new Term("field", "two"));
141
query.add(new Term("field", "one"));
142
hits = searcher.search(query, null, 1000).scoreDocs;
143
assertEquals("reverse not exact", 0, hits.length);
144
QueryUtils.check(random, query,searcher);
147
public void testSlop1() throws Exception {
148
// Ensures slop of 1 works with terms in order.
150
query.add(new Term("field", "one"));
151
query.add(new Term("field", "two"));
152
ScoreDoc[] hits = searcher.search(query, null, 1000).scoreDocs;
153
assertEquals("in order", 1, hits.length);
154
QueryUtils.check(random, query,searcher);
157
// Ensures slop of 1 does not work for phrases out of order;
158
// must be at least 2.
159
query = new PhraseQuery();
161
query.add(new Term("field", "two"));
162
query.add(new Term("field", "one"));
163
hits = searcher.search(query, null, 1000).scoreDocs;
164
assertEquals("reversed, slop not 2 or more", 0, hits.length);
165
QueryUtils.check(random, query,searcher);
169
* As long as slop is at least 2, terms can be reversed
171
public void testOrderDoesntMatter() throws Exception {
172
query.setSlop(2); // must be at least two for reverse order match
173
query.add(new Term("field", "two"));
174
query.add(new Term("field", "one"));
175
ScoreDoc[] hits = searcher.search(query, null, 1000).scoreDocs;
176
assertEquals("just sloppy enough", 1, hits.length);
177
QueryUtils.check(random, query,searcher);
180
query = new PhraseQuery();
182
query.add(new Term("field", "three"));
183
query.add(new Term("field", "one"));
184
hits = searcher.search(query, null, 1000).scoreDocs;
185
assertEquals("not sloppy enough", 0, hits.length);
186
QueryUtils.check(random, query,searcher);
191
* slop is the total number of positional moves allowed
192
* to line up a phrase
194
public void testMulipleTerms() throws Exception {
196
query.add(new Term("field", "one"));
197
query.add(new Term("field", "three"));
198
query.add(new Term("field", "five"));
199
ScoreDoc[] hits = searcher.search(query, null, 1000).scoreDocs;
200
assertEquals("two total moves", 1, hits.length);
201
QueryUtils.check(random, query,searcher);
204
query = new PhraseQuery();
205
query.setSlop(5); // it takes six moves to match this phrase
206
query.add(new Term("field", "five"));
207
query.add(new Term("field", "three"));
208
query.add(new Term("field", "one"));
209
hits = searcher.search(query, null, 1000).scoreDocs;
210
assertEquals("slop of 5 not close enough", 0, hits.length);
211
QueryUtils.check(random, query,searcher);
215
hits = searcher.search(query, null, 1000).scoreDocs;
216
assertEquals("slop of 6 just right", 1, hits.length);
217
QueryUtils.check(random, query,searcher);
221
public void testPhraseQueryWithStopAnalyzer() throws Exception {
222
Directory directory = newDirectory();
223
StopAnalyzer stopAnalyzer = new StopAnalyzer(Version.LUCENE_24);
224
RandomIndexWriter writer = new RandomIndexWriter(random, directory,
225
newIndexWriterConfig( Version.LUCENE_24, stopAnalyzer));
226
Document doc = new Document();
227
doc.add(newField("field", "the stop words are here", Field.Store.YES, Field.Index.ANALYZED));
228
writer.addDocument(doc);
229
IndexReader reader = writer.getReader();
232
IndexSearcher searcher = newSearcher(reader);
234
// valid exact phrase query
235
PhraseQuery query = new PhraseQuery();
236
query.add(new Term("field","stop"));
237
query.add(new Term("field","words"));
238
ScoreDoc[] hits = searcher.search(query, null, 1000).scoreDocs;
239
assertEquals(1, hits.length);
240
QueryUtils.check(random, query,searcher);
243
// StopAnalyzer as of 2.4 does not leave "holes", so this matches.
244
query = new PhraseQuery();
245
query.add(new Term("field", "words"));
246
query.add(new Term("field", "here"));
247
hits = searcher.search(query, null, 1000).scoreDocs;
248
assertEquals(1, hits.length);
249
QueryUtils.check(random, query,searcher);
257
public void testPhraseQueryInConjunctionScorer() throws Exception {
258
Directory directory = newDirectory();
259
RandomIndexWriter writer = new RandomIndexWriter(random, directory);
261
Document doc = new Document();
262
doc.add(newField("source", "marketing info", Field.Store.YES, Field.Index.ANALYZED));
263
writer.addDocument(doc);
265
doc = new Document();
266
doc.add(newField("contents", "foobar", Field.Store.YES, Field.Index.ANALYZED));
267
doc.add(newField("source", "marketing info", Field.Store.YES, Field.Index.ANALYZED));
268
writer.addDocument(doc);
270
IndexReader reader = writer.getReader();
273
IndexSearcher searcher = newSearcher(reader);
275
PhraseQuery phraseQuery = new PhraseQuery();
276
phraseQuery.add(new Term("source", "marketing"));
277
phraseQuery.add(new Term("source", "info"));
278
ScoreDoc[] hits = searcher.search(phraseQuery, null, 1000).scoreDocs;
279
assertEquals(2, hits.length);
280
QueryUtils.check(random, phraseQuery,searcher);
283
TermQuery termQuery = new TermQuery(new Term("contents","foobar"));
284
BooleanQuery booleanQuery = new BooleanQuery();
285
booleanQuery.add(termQuery, BooleanClause.Occur.MUST);
286
booleanQuery.add(phraseQuery, BooleanClause.Occur.MUST);
287
hits = searcher.search(booleanQuery, null, 1000).scoreDocs;
288
assertEquals(1, hits.length);
289
QueryUtils.check(random, termQuery,searcher);
295
writer = new RandomIndexWriter(random, directory,
296
newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)).setOpenMode(OpenMode.CREATE));
297
doc = new Document();
298
doc.add(newField("contents", "map entry woo", Field.Store.YES, Field.Index.ANALYZED));
299
writer.addDocument(doc);
301
doc = new Document();
302
doc.add(newField("contents", "woo map entry", Field.Store.YES, Field.Index.ANALYZED));
303
writer.addDocument(doc);
305
doc = new Document();
306
doc.add(newField("contents", "map foobarword entry woo", Field.Store.YES, Field.Index.ANALYZED));
307
writer.addDocument(doc);
309
reader = writer.getReader();
312
searcher = newSearcher(reader);
314
termQuery = new TermQuery(new Term("contents","woo"));
315
phraseQuery = new PhraseQuery();
316
phraseQuery.add(new Term("contents","map"));
317
phraseQuery.add(new Term("contents","entry"));
319
hits = searcher.search(termQuery, null, 1000).scoreDocs;
320
assertEquals(3, hits.length);
321
hits = searcher.search(phraseQuery, null, 1000).scoreDocs;
322
assertEquals(2, hits.length);
325
booleanQuery = new BooleanQuery();
326
booleanQuery.add(termQuery, BooleanClause.Occur.MUST);
327
booleanQuery.add(phraseQuery, BooleanClause.Occur.MUST);
328
hits = searcher.search(booleanQuery, null, 1000).scoreDocs;
329
assertEquals(2, hits.length);
331
booleanQuery = new BooleanQuery();
332
booleanQuery.add(phraseQuery, BooleanClause.Occur.MUST);
333
booleanQuery.add(termQuery, BooleanClause.Occur.MUST);
334
hits = searcher.search(booleanQuery, null, 1000).scoreDocs;
335
assertEquals(2, hits.length);
336
QueryUtils.check(random, booleanQuery,searcher);
344
public void testSlopScoring() throws IOException {
345
Directory directory = newDirectory();
346
RandomIndexWriter writer = new RandomIndexWriter(random, directory, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMergePolicy(newLogMergePolicy()));
348
Document doc = new Document();
349
doc.add(newField("field", "foo firstname lastname foo", Field.Store.YES, Field.Index.ANALYZED));
350
writer.addDocument(doc);
352
Document doc2 = new Document();
353
doc2.add(newField("field", "foo firstname zzz lastname foo", Field.Store.YES, Field.Index.ANALYZED));
354
writer.addDocument(doc2);
356
Document doc3 = new Document();
357
doc3.add(newField("field", "foo firstname zzz yyy lastname foo", Field.Store.YES, Field.Index.ANALYZED));
358
writer.addDocument(doc3);
360
IndexReader reader = writer.getReader();
363
IndexSearcher searcher = newSearcher(reader);
364
PhraseQuery query = new PhraseQuery();
365
query.add(new Term("field", "firstname"));
366
query.add(new Term("field", "lastname"));
367
query.setSlop(Integer.MAX_VALUE);
368
ScoreDoc[] hits = searcher.search(query, null, 1000).scoreDocs;
369
assertEquals(3, hits.length);
370
// Make sure that those matches where the terms appear closer to
371
// each other get a higher score:
372
assertEquals(0.71, hits[0].score, 0.01);
373
assertEquals(0, hits[0].doc);
374
assertEquals(0.44, hits[1].score, 0.01);
375
assertEquals(1, hits[1].doc);
376
assertEquals(0.31, hits[2].score, 0.01);
377
assertEquals(2, hits[2].doc);
378
QueryUtils.check(random, query,searcher);
384
public void testToString() throws Exception {
385
StopAnalyzer analyzer = new StopAnalyzer(TEST_VERSION_CURRENT);
386
QueryParser qp = new QueryParser(TEST_VERSION_CURRENT, "field", analyzer);
387
qp.setEnablePositionIncrements(true);
388
PhraseQuery q = (PhraseQuery)qp.parse("\"this hi this is a test is\"");
389
assertEquals("field:\"? hi ? ? ? test\"", q.toString());
390
q.add(new Term("field", "hello"), 1);
391
assertEquals("field:\"? hi|hello ? ? ? test\"", q.toString());
394
public void testWrappedPhrase() throws IOException {
395
query.add(new Term("repeated", "first"));
396
query.add(new Term("repeated", "part"));
397
query.add(new Term("repeated", "second"));
398
query.add(new Term("repeated", "part"));
401
ScoreDoc[] hits = searcher.search(query, null, 1000).scoreDocs;
402
assertEquals("slop of 100 just right", 1, hits.length);
403
QueryUtils.check(random, query,searcher);
407
hits = searcher.search(query, null, 1000).scoreDocs;
408
assertEquals("slop of 99 not enough", 0, hits.length);
409
QueryUtils.check(random, query,searcher);
412
// work on two docs like this: "phrase exist notexist exist found"
413
public void testNonExistingPhrase() throws IOException {
414
// phrase without repetitions that exists in 2 docs
415
query.add(new Term("nonexist", "phrase"));
416
query.add(new Term("nonexist", "notexist"));
417
query.add(new Term("nonexist", "found"));
418
query.setSlop(2); // would be found this way
420
ScoreDoc[] hits = searcher.search(query, null, 1000).scoreDocs;
421
assertEquals("phrase without repetitions exists in 2 docs", 2, hits.length);
422
QueryUtils.check(random, query,searcher);
424
// phrase with repetitions that exists in 2 docs
425
query = new PhraseQuery();
426
query.add(new Term("nonexist", "phrase"));
427
query.add(new Term("nonexist", "exist"));
428
query.add(new Term("nonexist", "exist"));
429
query.setSlop(1); // would be found
431
hits = searcher.search(query, null, 1000).scoreDocs;
432
assertEquals("phrase with repetitions exists in two docs", 2, hits.length);
433
QueryUtils.check(random, query,searcher);
435
// phrase I with repetitions that does not exist in any doc
436
query = new PhraseQuery();
437
query.add(new Term("nonexist", "phrase"));
438
query.add(new Term("nonexist", "notexist"));
439
query.add(new Term("nonexist", "phrase"));
440
query.setSlop(1000); // would not be found no matter how high the slop is
442
hits = searcher.search(query, null, 1000).scoreDocs;
443
assertEquals("nonexisting phrase with repetitions does not exist in any doc", 0, hits.length);
444
QueryUtils.check(random, query,searcher);
446
// phrase II with repetitions that does not exist in any doc
447
query = new PhraseQuery();
448
query.add(new Term("nonexist", "phrase"));
449
query.add(new Term("nonexist", "exist"));
450
query.add(new Term("nonexist", "exist"));
451
query.add(new Term("nonexist", "exist"));
452
query.setSlop(1000); // would not be found no matter how high the slop is
454
hits = searcher.search(query, null, 1000).scoreDocs;
455
assertEquals("nonexisting phrase with repetitions does not exist in any doc", 0, hits.length);
456
QueryUtils.check(random, query,searcher);
461
* Working on a 2 fields like this:
462
* Field("field", "one two three four five")
463
* Field("palindrome", "one two three two one")
464
* Phrase of size 2 occuriong twice, once in order and once in reverse,
465
* because doc is a palyndrome, is counted twice.
466
* Also, in this case order in query does not matter.
467
* Also, when an exact match is found, both sloppy scorer and exact scorer scores the same.
469
public void testPalyndrome2() throws Exception {
471
// search on non palyndrome, find phrase with no slop, using exact phrase scorer
472
query.setSlop(0); // to use exact phrase scorer
473
query.add(new Term("field", "two"));
474
query.add(new Term("field", "three"));
475
ScoreDoc[] hits = searcher.search(query, null, 1000).scoreDocs;
476
assertEquals("phrase found with exact phrase scorer", 1, hits.length);
477
float score0 = hits[0].score;
478
//System.out.println("(exact) field: two three: "+score0);
479
QueryUtils.check(random, query,searcher);
481
// search on non palyndrome, find phrase with slop 2, though no slop required here.
482
query.setSlop(2); // to use sloppy scorer
483
hits = searcher.search(query, null, 1000).scoreDocs;
484
assertEquals("just sloppy enough", 1, hits.length);
485
float score1 = hits[0].score;
486
//System.out.println("(sloppy) field: two three: "+score1);
487
assertEquals("exact scorer and sloppy scorer score the same when slop does not matter",score0, score1, SCORE_COMP_THRESH);
488
QueryUtils.check(random, query,searcher);
490
// search ordered in palyndrome, find it twice
491
query = new PhraseQuery();
492
query.setSlop(2); // must be at least two for both ordered and reversed to match
493
query.add(new Term("palindrome", "two"));
494
query.add(new Term("palindrome", "three"));
495
hits = searcher.search(query, null, 1000).scoreDocs;
496
assertEquals("just sloppy enough", 1, hits.length);
497
//float score2 = hits[0].score;
498
//System.out.println("palindrome: two three: "+score2);
499
QueryUtils.check(random, query,searcher);
501
//commented out for sloppy-phrase efficiency (issue 736) - see SloppyPhraseScorer.phraseFreq().
502
//assertTrue("ordered scores higher in palindrome",score1+SCORE_COMP_THRESH<score2);
504
// search reveresed in palyndrome, find it twice
505
query = new PhraseQuery();
506
query.setSlop(2); // must be at least two for both ordered and reversed to match
507
query.add(new Term("palindrome", "three"));
508
query.add(new Term("palindrome", "two"));
509
hits = searcher.search(query, null, 1000).scoreDocs;
510
assertEquals("just sloppy enough", 1, hits.length);
511
//float score3 = hits[0].score;
512
//System.out.println("palindrome: three two: "+score3);
513
QueryUtils.check(random, query,searcher);
515
//commented out for sloppy-phrase efficiency (issue 736) - see SloppyPhraseScorer.phraseFreq().
516
//assertTrue("reversed scores higher in palindrome",score1+SCORE_COMP_THRESH<score3);
517
//assertEquals("ordered or reversed does not matter",score2, score3, SCORE_COMP_THRESH);
521
* Working on a 2 fields like this:
522
* Field("field", "one two three four five")
523
* Field("palindrome", "one two three two one")
524
* Phrase of size 3 occuriong twice, once in order and once in reverse,
525
* because doc is a palyndrome, is counted twice.
526
* Also, in this case order in query does not matter.
527
* Also, when an exact match is found, both sloppy scorer and exact scorer scores the same.
529
public void testPalyndrome3() throws Exception {
531
// search on non palyndrome, find phrase with no slop, using exact phrase scorer
532
query.setSlop(0); // to use exact phrase scorer
533
query.add(new Term("field", "one"));
534
query.add(new Term("field", "two"));
535
query.add(new Term("field", "three"));
536
ScoreDoc[] hits = searcher.search(query, null, 1000).scoreDocs;
537
assertEquals("phrase found with exact phrase scorer", 1, hits.length);
538
float score0 = hits[0].score;
539
//System.out.println("(exact) field: one two three: "+score0);
540
QueryUtils.check(random, query,searcher);
542
// just make sure no exc:
543
searcher.explain(query, 0);
545
// search on non palyndrome, find phrase with slop 3, though no slop required here.
546
query.setSlop(4); // to use sloppy scorer
547
hits = searcher.search(query, null, 1000).scoreDocs;
548
assertEquals("just sloppy enough", 1, hits.length);
549
float score1 = hits[0].score;
550
//System.out.println("(sloppy) field: one two three: "+score1);
551
assertEquals("exact scorer and sloppy scorer score the same when slop does not matter",score0, score1, SCORE_COMP_THRESH);
552
QueryUtils.check(random, query,searcher);
554
// search ordered in palyndrome, find it twice
555
query = new PhraseQuery();
556
query.setSlop(4); // must be at least four for both ordered and reversed to match
557
query.add(new Term("palindrome", "one"));
558
query.add(new Term("palindrome", "two"));
559
query.add(new Term("palindrome", "three"));
560
hits = searcher.search(query, null, 1000).scoreDocs;
562
// just make sure no exc:
563
searcher.explain(query, 0);
565
assertEquals("just sloppy enough", 1, hits.length);
566
//float score2 = hits[0].score;
567
//System.out.println("palindrome: one two three: "+score2);
568
QueryUtils.check(random, query,searcher);
570
//commented out for sloppy-phrase efficiency (issue 736) - see SloppyPhraseScorer.phraseFreq().
571
//assertTrue("ordered scores higher in palindrome",score1+SCORE_COMP_THRESH<score2);
573
// search reveresed in palyndrome, find it twice
574
query = new PhraseQuery();
575
query.setSlop(4); // must be at least four for both ordered and reversed to match
576
query.add(new Term("palindrome", "three"));
577
query.add(new Term("palindrome", "two"));
578
query.add(new Term("palindrome", "one"));
579
hits = searcher.search(query, null, 1000).scoreDocs;
580
assertEquals("just sloppy enough", 1, hits.length);
581
//float score3 = hits[0].score;
582
//System.out.println("palindrome: three two one: "+score3);
583
QueryUtils.check(random, query,searcher);
585
//commented out for sloppy-phrase efficiency (issue 736) - see SloppyPhraseScorer.phraseFreq().
586
//assertTrue("reversed scores higher in palindrome",score1+SCORE_COMP_THRESH<score3);
587
//assertEquals("ordered or reversed does not matter",score2, score3, SCORE_COMP_THRESH);
591
public void testEmptyPhraseQuery() throws Throwable {
592
final BooleanQuery q2 = new BooleanQuery();
593
q2.add(new PhraseQuery(), BooleanClause.Occur.MUST);
597
/* test that a single term is rewritten to a term query */
598
public void testRewrite() throws IOException {
599
PhraseQuery pq = new PhraseQuery();
600
pq.add(new Term("foo", "bar"));
601
Query rewritten = pq.rewrite(searcher.getIndexReader());
602
assertTrue(rewritten instanceof TermQuery);
605
public void testRandomPhrases() throws Exception {
606
Directory dir = newDirectory();
607
Analyzer analyzer = new MockAnalyzer(random);
609
RandomIndexWriter w = new RandomIndexWriter(random, dir, newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer).setMergePolicy(newLogMergePolicy()));
610
List<List<String>> docs = new ArrayList<List<String>>();
611
Document d = new Document();
612
Field f = newField("f", "", Field.Store.NO, Field.Index.ANALYZED);
617
int NUM_DOCS = atLeast(10);
618
for (int i = 0; i < NUM_DOCS; i++) {
619
// must be > 4096 so it spans multiple chunks
620
int termCount = _TestUtil.nextInt(random, 4097, 8200);
622
List<String> doc = new ArrayList<String>();
624
StringBuilder sb = new StringBuilder();
625
while(doc.size() < termCount) {
626
if (r.nextInt(5) == 1 || docs.size() == 0) {
627
// make new non-empty-string term
630
term = _TestUtil.randomUnicodeString(r);
631
if (term.length() > 0) {
635
TokenStream ts = analyzer.reusableTokenStream("ignore", new StringReader(term));
636
CharTermAttribute termAttr = ts.addAttribute(CharTermAttribute.class);
638
while(ts.incrementToken()) {
639
String text = termAttr.toString();
641
sb.append(text).append(' ');
646
// pick existing sub-phrase
647
List<String> lastDoc = docs.get(r.nextInt(docs.size()));
648
int len = _TestUtil.nextInt(r, 1, 10);
649
int start = r.nextInt(lastDoc.size()-len);
650
for(int k=start;k<start+len;k++) {
651
String t = lastDoc.get(k);
653
sb.append(t).append(' ');
658
f.setValue(sb.toString());
662
IndexReader reader = w.getReader();
663
IndexSearcher s = newSearcher(reader);
667
int num = atLeast(10);
668
for(int i=0;i<num;i++) {
669
int docID = r.nextInt(docs.size());
670
List<String> doc = docs.get(docID);
672
final int numTerm = _TestUtil.nextInt(r, 2, 20);
673
final int start = r.nextInt(doc.size()-numTerm);
674
PhraseQuery pq = new PhraseQuery();
675
StringBuilder sb = new StringBuilder();
676
for(int t=start;t<start+numTerm;t++) {
677
pq.add(new Term("f", doc.get(t)));
678
sb.append(doc.get(t)).append(' ');
681
TopDocs hits = s.search(pq, NUM_DOCS);
682
boolean found = false;
683
for(int j=0;j<hits.scoreDocs.length;j++) {
684
if (hits.scoreDocs[j].doc == docID) {
690
assertTrue("phrase '" + sb + "' not found; start=" + start, found);