1
package org.apache.lucene.search;
4
* Licensed to the Apache Software Foundation (ASF) under one or more
5
* contributor license agreements. See the NOTICE file distributed with
6
* this work for additional information regarding copyright ownership.
7
* The ASF licenses this file to You under the Apache License, Version 2.0
8
* (the "License"); you may not use this file except in compliance with
9
* the License. You may obtain a copy of the License at
11
* http://www.apache.org/licenses/LICENSE-2.0
13
* Unless required by applicable law or agreed to in writing, software
14
* distributed under the License is distributed on an "AS IS" BASIS,
15
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16
* See the License for the specific language governing permissions and
17
* limitations under the License.
20
import java.io.IOException;
21
import java.io.Reader;
22
import java.io.IOException;
23
import java.io.StringReader;
24
import java.util.Collection;
25
import java.util.Collections;
26
import org.apache.lucene.analysis.Analyzer;
27
import org.apache.lucene.analysis.StopFilter;
28
import org.apache.lucene.analysis.TokenStream;
29
import org.apache.lucene.analysis.WhitespaceAnalyzer;
30
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
31
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
32
import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
33
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
34
import org.apache.lucene.analysis.CharArraySet;
35
import org.apache.lucene.document.Document;
36
import org.apache.lucene.document.Field;
37
import org.apache.lucene.index.IndexReader;
38
import org.apache.lucene.index.RandomIndexWriter;
39
import org.apache.lucene.index.Term;
40
import org.apache.lucene.index.TermPositions;
41
import org.apache.lucene.queryParser.QueryParser;
42
import org.apache.lucene.store.Directory;
43
import org.apache.lucene.analysis.LowerCaseTokenizer;
44
import org.apache.lucene.analysis.TokenFilter;
45
import org.apache.lucene.index.Payload;
46
import org.apache.lucene.search.payloads.PayloadSpanUtil;
47
import org.apache.lucene.search.spans.SpanNearQuery;
48
import org.apache.lucene.search.spans.SpanQuery;
49
import org.apache.lucene.search.spans.SpanTermQuery;
50
import org.apache.lucene.search.spans.Spans;
51
import org.apache.lucene.util.Version;
52
import org.apache.lucene.util.LuceneTestCase;
55
* Term position unit test.
58
* @version $Revision: 1161586 $
60
public class TestPositionIncrement extends LuceneTestCase {
62
public void testSetPosition() throws Exception {
63
Analyzer analyzer = new Analyzer() {
65
public TokenStream tokenStream(String fieldName, Reader reader) {
66
return new TokenStream() {
67
private final String[] TOKENS = {"1", "2", "3", "4", "5"};
68
private final int[] INCREMENTS = {0, 2, 1, 0, 1};
71
PositionIncrementAttribute posIncrAtt = addAttribute(PositionIncrementAttribute.class);
72
CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
73
OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
76
public boolean incrementToken() {
77
if (i == TOKENS.length)
80
termAtt.append(TOKENS[i]);
81
offsetAtt.setOffset(i,i);
82
posIncrAtt.setPositionIncrement(INCREMENTS[i]);
88
public void reset() throws IOException {
95
Directory store = newDirectory();
96
RandomIndexWriter writer = new RandomIndexWriter(random, store, analyzer);
97
Document d = new Document();
98
d.add(newField("field", "bogus", Field.Store.YES, Field.Index.ANALYZED));
99
writer.addDocument(d);
100
IndexReader reader = writer.getReader();
104
IndexSearcher searcher = newSearcher(reader);
106
TermPositions pos = searcher.getIndexReader().termPositions(new Term("field", "1"));
108
// first token should be at position 0
109
assertEquals(0, pos.nextPosition());
111
pos = searcher.getIndexReader().termPositions(new Term("field", "2"));
113
// second token should be at position 2
114
assertEquals(2, pos.nextPosition());
119
q = new PhraseQuery();
120
q.add(new Term("field", "1"));
121
q.add(new Term("field", "2"));
122
hits = searcher.search(q, null, 1000).scoreDocs;
123
assertEquals(0, hits.length);
125
// same as previous, just specify positions explicitely.
126
q = new PhraseQuery();
127
q.add(new Term("field", "1"),0);
128
q.add(new Term("field", "2"),1);
129
hits = searcher.search(q, null, 1000).scoreDocs;
130
assertEquals(0, hits.length);
132
// specifying correct positions should find the phrase.
133
q = new PhraseQuery();
134
q.add(new Term("field", "1"),0);
135
q.add(new Term("field", "2"),2);
136
hits = searcher.search(q, null, 1000).scoreDocs;
137
assertEquals(1, hits.length);
139
q = new PhraseQuery();
140
q.add(new Term("field", "2"));
141
q.add(new Term("field", "3"));
142
hits = searcher.search(q, null, 1000).scoreDocs;
143
assertEquals(1, hits.length);
145
q = new PhraseQuery();
146
q.add(new Term("field", "3"));
147
q.add(new Term("field", "4"));
148
hits = searcher.search(q, null, 1000).scoreDocs;
149
assertEquals(0, hits.length);
151
// phrase query would find it when correct positions are specified.
152
q = new PhraseQuery();
153
q.add(new Term("field", "3"),0);
154
q.add(new Term("field", "4"),0);
155
hits = searcher.search(q, null, 1000).scoreDocs;
156
assertEquals(1, hits.length);
158
// phrase query should fail for non existing searched term
159
// even if there exist another searched terms in the same searched position.
160
q = new PhraseQuery();
161
q.add(new Term("field", "3"),0);
162
q.add(new Term("field", "9"),0);
163
hits = searcher.search(q, null, 1000).scoreDocs;
164
assertEquals(0, hits.length);
166
// multi-phrase query should succed for non existing searched term
167
// because there exist another searched terms in the same searched position.
168
MultiPhraseQuery mq = new MultiPhraseQuery();
169
mq.add(new Term[]{new Term("field", "3"),new Term("field", "9")},0);
170
hits = searcher.search(mq, null, 1000).scoreDocs;
171
assertEquals(1, hits.length);
173
q = new PhraseQuery();
174
q.add(new Term("field", "2"));
175
q.add(new Term("field", "4"));
176
hits = searcher.search(q, null, 1000).scoreDocs;
177
assertEquals(1, hits.length);
179
q = new PhraseQuery();
180
q.add(new Term("field", "3"));
181
q.add(new Term("field", "5"));
182
hits = searcher.search(q, null, 1000).scoreDocs;
183
assertEquals(1, hits.length);
185
q = new PhraseQuery();
186
q.add(new Term("field", "4"));
187
q.add(new Term("field", "5"));
188
hits = searcher.search(q, null, 1000).scoreDocs;
189
assertEquals(1, hits.length);
191
q = new PhraseQuery();
192
q.add(new Term("field", "2"));
193
q.add(new Term("field", "5"));
194
hits = searcher.search(q, null, 1000).scoreDocs;
195
assertEquals(0, hits.length);
197
// should not find "1 2" because there is a gap of 1 in the index
198
QueryParser qp = new QueryParser(TEST_VERSION_CURRENT, "field",
199
new StopWhitespaceAnalyzer(false));
200
q = (PhraseQuery) qp.parse("\"1 2\"");
201
hits = searcher.search(q, null, 1000).scoreDocs;
202
assertEquals(0, hits.length);
204
// omitted stop word cannot help because stop filter swallows the increments.
205
q = (PhraseQuery) qp.parse("\"1 stop 2\"");
206
hits = searcher.search(q, null, 1000).scoreDocs;
207
assertEquals(0, hits.length);
209
// query parser alone won't help, because stop filter swallows the increments.
210
qp.setEnablePositionIncrements(true);
211
q = (PhraseQuery) qp.parse("\"1 stop 2\"");
212
hits = searcher.search(q, null, 1000).scoreDocs;
213
assertEquals(0, hits.length);
215
// stop filter alone won't help, because query parser swallows the increments.
216
qp.setEnablePositionIncrements(false);
217
q = (PhraseQuery) qp.parse("\"1 stop 2\"");
218
hits = searcher.search(q, null, 1000).scoreDocs;
219
assertEquals(0, hits.length);
221
// when both qp qnd stopFilter propagate increments, we should find the doc.
222
qp = new QueryParser(TEST_VERSION_CURRENT, "field",
223
new StopWhitespaceAnalyzer(true));
224
qp.setEnablePositionIncrements(true);
225
q = (PhraseQuery) qp.parse("\"1 stop 2\"");
226
hits = searcher.search(q, null, 1000).scoreDocs;
227
assertEquals(1, hits.length);
234
private static class StopWhitespaceAnalyzer extends Analyzer {
235
boolean enablePositionIncrements;
236
final WhitespaceAnalyzer a = new WhitespaceAnalyzer(TEST_VERSION_CURRENT);
237
public StopWhitespaceAnalyzer(boolean enablePositionIncrements) {
238
this.enablePositionIncrements = enablePositionIncrements;
241
public TokenStream tokenStream(String fieldName, Reader reader) {
242
TokenStream ts = a.tokenStream(fieldName,reader);
243
return new StopFilter(enablePositionIncrements?TEST_VERSION_CURRENT:Version.LUCENE_24, ts,
244
new CharArraySet(TEST_VERSION_CURRENT, Collections.singleton("stop"), true));
248
public void testPayloadsPos0() throws Exception {
249
Directory dir = newDirectory();
250
RandomIndexWriter writer = new RandomIndexWriter(random, dir, new TestPayloadAnalyzer());
251
Document doc = new Document();
252
doc.add(new Field("content",
253
new StringReader("a a b c d e a f g h i j a b k k")));
254
writer.addDocument(doc);
256
IndexReader r = writer.getReader();
258
TermPositions tp = r.termPositions(new Term("content", "a"));
260
assertTrue(tp.next());
261
// "a" occurs 4 times
262
assertEquals(4, tp.freq());
264
assertEquals(expected, tp.nextPosition());
265
assertEquals(1, tp.nextPosition());
266
assertEquals(3, tp.nextPosition());
267
assertEquals(6, tp.nextPosition());
269
// only one doc has "a"
270
assertFalse(tp.next());
272
IndexSearcher is = newSearcher(r);
274
SpanTermQuery stq1 = new SpanTermQuery(new Term("content", "a"));
275
SpanTermQuery stq2 = new SpanTermQuery(new Term("content", "k"));
276
SpanQuery[] sqs = { stq1, stq2 };
277
SpanNearQuery snq = new SpanNearQuery(sqs, 30, false);
280
boolean sawZero = false;
281
//System.out.println("\ngetPayloadSpans test");
282
Spans pspans = snq.getSpans(is.getIndexReader());
283
while (pspans.next()) {
284
//System.out.println(pspans.doc() + " - " + pspans.start() + " - "+ pspans.end());
285
Collection<byte[]> payloads = pspans.getPayload();
286
sawZero |= pspans.start() == 0;
287
count += payloads.size();
289
assertEquals(5, count);
292
//System.out.println("\ngetSpans test");
293
Spans spans = snq.getSpans(is.getIndexReader());
296
while (spans.next()) {
298
sawZero |= spans.start() == 0;
299
//System.out.println(spans.doc() + " - " + spans.start() + " - " + spans.end());
301
assertEquals(4, count);
304
//System.out.println("\nPayloadSpanUtil test");
307
PayloadSpanUtil psu = new PayloadSpanUtil(is.getIndexReader());
308
Collection<byte[]> pls = psu.getPayloadsForQuery(snq);
310
for (byte[] bytes : pls) {
311
String s = new String(bytes);
312
//System.out.println(s);
313
sawZero |= s.equals("pos: 0");
315
assertEquals(5, count);
318
is.getIndexReader().close();
323
final class TestPayloadAnalyzer extends Analyzer {
326
public TokenStream tokenStream(String fieldName, Reader reader) {
327
TokenStream result = new LowerCaseTokenizer(LuceneTestCase.TEST_VERSION_CURRENT, reader);
328
return new PayloadFilter(result, fieldName);
332
final class PayloadFilter extends TokenFilter {
339
final PositionIncrementAttribute posIncrAttr;
340
final PayloadAttribute payloadAttr;
341
final CharTermAttribute termAttr;
343
public PayloadFilter(TokenStream input, String fieldName) {
345
this.fieldName = fieldName;
348
posIncrAttr = input.addAttribute(PositionIncrementAttribute.class);
349
payloadAttr = input.addAttribute(PayloadAttribute.class);
350
termAttr = input.addAttribute(CharTermAttribute.class);
354
public boolean incrementToken() throws IOException {
355
if (input.incrementToken()) {
356
payloadAttr.setPayload(new Payload(("pos: " + pos).getBytes()));
363
posIncrAttr.setPositionIncrement(posIncr);
365
if (TestPositionIncrement.VERBOSE) {
366
System.out.println("term=" + termAttr + " pos=" + pos);