1
package org.apache.lucene.search;
4
* Licensed to the Apache Software Foundation (ASF) under one or more
5
* contributor license agreements. See the NOTICE file distributed with
6
* this work for additional information regarding copyright ownership.
7
* The ASF licenses this file to You under the Apache License, Version 2.0
8
* (the "License"); you may not use this file except in compliance with
9
* the License. You may obtain a copy of the License at
11
* http://www.apache.org/licenses/LICENSE-2.0
13
* Unless required by applicable law or agreed to in writing, software
14
* distributed under the License is distributed on an "AS IS" BASIS,
15
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16
* See the License for the specific language governing permissions and
17
* limitations under the License.
20
import java.io.IOException;
21
import java.text.Collator;
22
import java.util.Locale;
24
import org.apache.lucene.index.IndexReader;
25
import org.apache.lucene.index.RandomIndexWriter;
26
import org.apache.lucene.index.Term;
27
import org.apache.lucene.document.Document;
28
import org.apache.lucene.document.Field;
29
import org.apache.lucene.store.Directory;
30
import org.junit.Test;
33
* A basic 'positive' Unit test class for the TermRangeFilter class.
36
* NOTE: at the moment, this class only tests for 'positive' results, it does
37
* not verify the results to ensure there are no 'false positives', nor does it
38
* adequately test 'negative' results. It also does not test that garbage in
39
* results in an Exception.
41
public class TestTermRangeFilter extends BaseTestRangeFilter {
44
public void testRangeFilterId() throws IOException {
46
IndexReader reader = signedIndexReader;
47
IndexSearcher search = newSearcher(reader);
49
int medId = ((maxId - minId) / 2);
51
String minIP = pad(minId);
52
String maxIP = pad(maxId);
53
String medIP = pad(medId);
55
int numDocs = reader.numDocs();
57
assertEquals("num of docs", numDocs, 1 + maxId - minId);
60
Query q = new TermQuery(new Term("body", "body"));
62
// test id, bounded on both ends
64
result = search.search(q, new TermRangeFilter("id", minIP, maxIP, T, T),
66
assertEquals("find all", numDocs, result.length);
68
result = search.search(q, new TermRangeFilter("id", minIP, maxIP, T, F),
70
assertEquals("all but last", numDocs - 1, result.length);
72
result = search.search(q, new TermRangeFilter("id", minIP, maxIP, F, T),
74
assertEquals("all but first", numDocs - 1, result.length);
76
result = search.search(q, new TermRangeFilter("id", minIP, maxIP, F, F),
78
assertEquals("all but ends", numDocs - 2, result.length);
80
result = search.search(q, new TermRangeFilter("id", medIP, maxIP, T, T),
82
assertEquals("med and up", 1 + maxId - medId, result.length);
84
result = search.search(q, new TermRangeFilter("id", minIP, medIP, T, T),
86
assertEquals("up to med", 1 + medId - minId, result.length);
90
result = search.search(q, new TermRangeFilter("id", minIP, null, T, F),
92
assertEquals("min and up", numDocs, result.length);
94
result = search.search(q, new TermRangeFilter("id", null, maxIP, F, T),
96
assertEquals("max and down", numDocs, result.length);
98
result = search.search(q, new TermRangeFilter("id", minIP, null, F, F),
100
assertEquals("not min, but up", numDocs - 1, result.length);
102
result = search.search(q, new TermRangeFilter("id", null, maxIP, F, F),
104
assertEquals("not max, but down", numDocs - 1, result.length);
106
result = search.search(q, new TermRangeFilter("id", medIP, maxIP, T, F),
108
assertEquals("med and up, not max", maxId - medId, result.length);
110
result = search.search(q, new TermRangeFilter("id", minIP, medIP, F, T),
112
assertEquals("not min, up to med", medId - minId, result.length);
116
result = search.search(q, new TermRangeFilter("id", minIP, minIP, F, F),
118
assertEquals("min,min,F,F", 0, result.length);
119
result = search.search(q, new TermRangeFilter("id", medIP, medIP, F, F),
121
assertEquals("med,med,F,F", 0, result.length);
122
result = search.search(q, new TermRangeFilter("id", maxIP, maxIP, F, F),
124
assertEquals("max,max,F,F", 0, result.length);
126
result = search.search(q, new TermRangeFilter("id", minIP, minIP, T, T),
128
assertEquals("min,min,T,T", 1, result.length);
129
result = search.search(q, new TermRangeFilter("id", null, minIP, F, T),
131
assertEquals("nul,min,F,T", 1, result.length);
133
result = search.search(q, new TermRangeFilter("id", maxIP, maxIP, T, T),
135
assertEquals("max,max,T,T", 1, result.length);
136
result = search.search(q, new TermRangeFilter("id", maxIP, null, T, F),
138
assertEquals("max,nul,T,T", 1, result.length);
140
result = search.search(q, new TermRangeFilter("id", medIP, medIP, T, T),
142
assertEquals("med,med,T,T", 1, result.length);
148
public void testRangeFilterIdCollating() throws IOException {
150
IndexReader reader = signedIndexReader;
151
IndexSearcher search = newSearcher(reader);
153
Collator c = Collator.getInstance(Locale.ENGLISH);
155
int medId = ((maxId - minId) / 2);
157
String minIP = pad(minId);
158
String maxIP = pad(maxId);
159
String medIP = pad(medId);
161
int numDocs = reader.numDocs();
163
assertEquals("num of docs", numDocs, 1 + maxId - minId);
165
Query q = new TermQuery(new Term("body", "body"));
167
// test id, bounded on both ends
168
int numHits = search.search(q, new TermRangeFilter("id", minIP, maxIP, T,
169
T, c), 1000).totalHits;
170
assertEquals("find all", numDocs, numHits);
172
numHits = search.search(q,
173
new TermRangeFilter("id", minIP, maxIP, T, F, c), 1000).totalHits;
174
assertEquals("all but last", numDocs - 1, numHits);
176
numHits = search.search(q,
177
new TermRangeFilter("id", minIP, maxIP, F, T, c), 1000).totalHits;
178
assertEquals("all but first", numDocs - 1, numHits);
180
numHits = search.search(q,
181
new TermRangeFilter("id", minIP, maxIP, F, F, c), 1000).totalHits;
182
assertEquals("all but ends", numDocs - 2, numHits);
184
numHits = search.search(q,
185
new TermRangeFilter("id", medIP, maxIP, T, T, c), 1000).totalHits;
186
assertEquals("med and up", 1 + maxId - medId, numHits);
188
numHits = search.search(q,
189
new TermRangeFilter("id", minIP, medIP, T, T, c), 1000).totalHits;
190
assertEquals("up to med", 1 + medId - minId, numHits);
194
numHits = search.search(q, new TermRangeFilter("id", minIP, null, T, F, c),
196
assertEquals("min and up", numDocs, numHits);
198
numHits = search.search(q, new TermRangeFilter("id", null, maxIP, F, T, c),
200
assertEquals("max and down", numDocs, numHits);
202
numHits = search.search(q, new TermRangeFilter("id", minIP, null, F, F, c),
204
assertEquals("not min, but up", numDocs - 1, numHits);
206
numHits = search.search(q, new TermRangeFilter("id", null, maxIP, F, F, c),
208
assertEquals("not max, but down", numDocs - 1, numHits);
210
numHits = search.search(q,
211
new TermRangeFilter("id", medIP, maxIP, T, F, c), 1000).totalHits;
212
assertEquals("med and up, not max", maxId - medId, numHits);
214
numHits = search.search(q,
215
new TermRangeFilter("id", minIP, medIP, F, T, c), 1000).totalHits;
216
assertEquals("not min, up to med", medId - minId, numHits);
220
numHits = search.search(q,
221
new TermRangeFilter("id", minIP, minIP, F, F, c), 1000).totalHits;
222
assertEquals("min,min,F,F", 0, numHits);
223
numHits = search.search(q,
224
new TermRangeFilter("id", medIP, medIP, F, F, c), 1000).totalHits;
225
assertEquals("med,med,F,F", 0, numHits);
226
numHits = search.search(q,
227
new TermRangeFilter("id", maxIP, maxIP, F, F, c), 1000).totalHits;
228
assertEquals("max,max,F,F", 0, numHits);
230
numHits = search.search(q,
231
new TermRangeFilter("id", minIP, minIP, T, T, c), 1000).totalHits;
232
assertEquals("min,min,T,T", 1, numHits);
233
numHits = search.search(q, new TermRangeFilter("id", null, minIP, F, T, c),
235
assertEquals("nul,min,F,T", 1, numHits);
237
numHits = search.search(q,
238
new TermRangeFilter("id", maxIP, maxIP, T, T, c), 1000).totalHits;
239
assertEquals("max,max,T,T", 1, numHits);
240
numHits = search.search(q, new TermRangeFilter("id", maxIP, null, T, F, c),
242
assertEquals("max,nul,T,T", 1, numHits);
244
numHits = search.search(q,
245
new TermRangeFilter("id", medIP, medIP, T, T, c), 1000).totalHits;
246
assertEquals("med,med,T,T", 1, numHits);
252
public void testRangeFilterRand() throws IOException {
254
IndexReader reader = signedIndexReader;
255
IndexSearcher search = newSearcher(reader);
257
String minRP = pad(signedIndexDir.minR);
258
String maxRP = pad(signedIndexDir.maxR);
260
int numDocs = reader.numDocs();
262
assertEquals("num of docs", numDocs, 1 + maxId - minId);
265
Query q = new TermQuery(new Term("body", "body"));
267
// test extremes, bounded on both ends
269
result = search.search(q, new TermRangeFilter("rand", minRP, maxRP, T, T),
271
assertEquals("find all", numDocs, result.length);
273
result = search.search(q, new TermRangeFilter("rand", minRP, maxRP, T, F),
275
assertEquals("all but biggest", numDocs - 1, result.length);
277
result = search.search(q, new TermRangeFilter("rand", minRP, maxRP, F, T),
279
assertEquals("all but smallest", numDocs - 1, result.length);
281
result = search.search(q, new TermRangeFilter("rand", minRP, maxRP, F, F),
283
assertEquals("all but extremes", numDocs - 2, result.length);
287
result = search.search(q, new TermRangeFilter("rand", minRP, null, T, F),
289
assertEquals("smallest and up", numDocs, result.length);
291
result = search.search(q, new TermRangeFilter("rand", null, maxRP, F, T),
293
assertEquals("biggest and down", numDocs, result.length);
295
result = search.search(q, new TermRangeFilter("rand", minRP, null, F, F),
297
assertEquals("not smallest, but up", numDocs - 1, result.length);
299
result = search.search(q, new TermRangeFilter("rand", null, maxRP, F, F),
301
assertEquals("not biggest, but down", numDocs - 1, result.length);
305
result = search.search(q, new TermRangeFilter("rand", minRP, minRP, F, F),
307
assertEquals("min,min,F,F", 0, result.length);
308
result = search.search(q, new TermRangeFilter("rand", maxRP, maxRP, F, F),
310
assertEquals("max,max,F,F", 0, result.length);
312
result = search.search(q, new TermRangeFilter("rand", minRP, minRP, T, T),
314
assertEquals("min,min,T,T", 1, result.length);
315
result = search.search(q, new TermRangeFilter("rand", null, minRP, F, T),
317
assertEquals("nul,min,F,T", 1, result.length);
319
result = search.search(q, new TermRangeFilter("rand", maxRP, maxRP, T, T),
321
assertEquals("max,max,T,T", 1, result.length);
322
result = search.search(q, new TermRangeFilter("rand", maxRP, null, T, F),
324
assertEquals("max,nul,T,T", 1, result.length);
330
public void testRangeFilterRandCollating() throws IOException {
332
// using the unsigned index because collation seems to ignore hyphens
333
IndexReader reader = unsignedIndexReader;
334
IndexSearcher search = newSearcher(reader);
336
Collator c = Collator.getInstance(Locale.ENGLISH);
338
String minRP = pad(unsignedIndexDir.minR);
339
String maxRP = pad(unsignedIndexDir.maxR);
341
int numDocs = reader.numDocs();
343
assertEquals("num of docs", numDocs, 1 + maxId - minId);
345
Query q = new TermQuery(new Term("body", "body"));
347
// test extremes, bounded on both ends
349
int numHits = search.search(q, new TermRangeFilter("rand", minRP, maxRP, T,
350
T, c), 1000).totalHits;
351
assertEquals("find all", numDocs, numHits);
353
numHits = search.search(q, new TermRangeFilter("rand", minRP, maxRP, T, F,
355
assertEquals("all but biggest", numDocs - 1, numHits);
357
numHits = search.search(q, new TermRangeFilter("rand", minRP, maxRP, F, T,
359
assertEquals("all but smallest", numDocs - 1, numHits);
361
numHits = search.search(q, new TermRangeFilter("rand", minRP, maxRP, F, F,
363
assertEquals("all but extremes", numDocs - 2, numHits);
367
numHits = search.search(q,
368
new TermRangeFilter("rand", minRP, null, T, F, c), 1000).totalHits;
369
assertEquals("smallest and up", numDocs, numHits);
371
numHits = search.search(q,
372
new TermRangeFilter("rand", null, maxRP, F, T, c), 1000).totalHits;
373
assertEquals("biggest and down", numDocs, numHits);
375
numHits = search.search(q,
376
new TermRangeFilter("rand", minRP, null, F, F, c), 1000).totalHits;
377
assertEquals("not smallest, but up", numDocs - 1, numHits);
379
numHits = search.search(q,
380
new TermRangeFilter("rand", null, maxRP, F, F, c), 1000).totalHits;
381
assertEquals("not biggest, but down", numDocs - 1, numHits);
385
numHits = search.search(q, new TermRangeFilter("rand", minRP, minRP, F, F,
387
assertEquals("min,min,F,F", 0, numHits);
388
numHits = search.search(q, new TermRangeFilter("rand", maxRP, maxRP, F, F,
390
assertEquals("max,max,F,F", 0, numHits);
392
numHits = search.search(q, new TermRangeFilter("rand", minRP, minRP, T, T,
394
assertEquals("min,min,T,T", 1, numHits);
395
numHits = search.search(q,
396
new TermRangeFilter("rand", null, minRP, F, T, c), 1000).totalHits;
397
assertEquals("nul,min,F,T", 1, numHits);
399
numHits = search.search(q, new TermRangeFilter("rand", maxRP, maxRP, T, T,
401
assertEquals("max,max,T,T", 1, numHits);
402
numHits = search.search(q,
403
new TermRangeFilter("rand", maxRP, null, T, F, c), 1000).totalHits;
404
assertEquals("max,nul,T,T", 1, numHits);
410
public void testFarsi() throws Exception {
413
Directory farsiIndex = newDirectory();
414
RandomIndexWriter writer = new RandomIndexWriter(random, farsiIndex);
415
Document doc = new Document();
416
doc.add(newField("content", "\u0633\u0627\u0628", Field.Store.YES,
417
Field.Index.NOT_ANALYZED));
419
.add(newField("body", "body", Field.Store.YES,
420
Field.Index.NOT_ANALYZED));
421
writer.addDocument(doc);
423
IndexReader reader = writer.getReader();
426
IndexSearcher search = newSearcher(reader);
427
Query q = new TermQuery(new Term("body", "body"));
429
// Neither Java 1.4.2 nor 1.5.0 has Farsi Locale collation available in
430
// RuleBasedCollator. However, the Arabic Locale seems to order the Farsi
431
// characters properly.
432
Collator collator = Collator.getInstance(new Locale("ar"));
434
// Unicode order would include U+0633 in [ U+062F - U+0698 ], but Farsi
435
// orders the U+0698 character before the U+0633 character, so the single
436
// index Term below should NOT be returned by a TermRangeFilter with a Farsi
437
// Collator (or an Arabic one for the case when Farsi is not supported).
438
int numHits = search.search(q, new TermRangeFilter("content", "\u062F",
439
"\u0698", T, T, collator), 1000).totalHits;
440
assertEquals("The index Term should not be included.", 0, numHits);
442
numHits = search.search(q, new TermRangeFilter("content", "\u0633",
443
"\u0638", T, T, collator), 1000).totalHits;
444
assertEquals("The index Term should be included.", 1, numHits);
451
public void testDanish() throws Exception {
454
Directory danishIndex = newDirectory();
455
RandomIndexWriter writer = new RandomIndexWriter(random, danishIndex);
456
// Danish collation orders the words below in the given order
457
// (example taken from TestSort.testInternationalSort() ).
458
String[] words = {"H\u00D8T", "H\u00C5T", "MAND"};
459
for (int docnum = 0; docnum < words.length; ++docnum) {
460
Document doc = new Document();
461
doc.add(newField("content", words[docnum], Field.Store.YES,
462
Field.Index.NOT_ANALYZED));
463
doc.add(newField("body", "body", Field.Store.YES,
464
Field.Index.NOT_ANALYZED));
465
writer.addDocument(doc);
467
IndexReader reader = writer.getReader();
470
IndexSearcher search = newSearcher(reader);
471
Query q = new TermQuery(new Term("body", "body"));
473
Collator collator = Collator.getInstance(new Locale("da", "dk"));
475
// Unicode order would not include "H\u00C5T" in [ "H\u00D8T", "MAND" ],
476
// but Danish collation does.
477
int numHits = search.search(q, new TermRangeFilter("content", "H\u00D8T",
478
"MAND", F, F, collator), 1000).totalHits;
479
assertEquals("The index Term should be included.", 1, numHits);
481
numHits = search.search(q, new TermRangeFilter("content", "H\u00C5T",
482
"MAND", F, F, collator), 1000).totalHits;
483
assertEquals("The index Term should not be included.", 0, numHits);