1
package org.apache.lucene.search.payloads;
3
* Licensed to the Apache Software Foundation (ASF) under one or more
4
* contributor license agreements. See the NOTICE file distributed with
5
* this work for additional information regarding copyright ownership.
6
* The ASF licenses this file to You under the Apache License, Version 2.0
7
* (the "License"); you may not use this file except in compliance with
8
* the License. You may obtain a copy of the License at
10
* http://www.apache.org/licenses/LICENSE-2.0
12
* Unless required by applicable law or agreed to in writing, software
13
* distributed under the License is distributed on an "AS IS" BASIS,
14
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15
* See the License for the specific language governing permissions and
16
* limitations under the License.
18
import java.io.IOException;
19
import java.io.Reader;
20
import java.util.Collection;
22
import org.apache.lucene.analysis.Analyzer;
23
import org.apache.lucene.analysis.LowerCaseTokenizer;
24
import org.apache.lucene.analysis.TokenFilter;
25
import org.apache.lucene.analysis.TokenStream;
26
import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
27
import org.apache.lucene.document.Document;
28
import org.apache.lucene.document.Field;
29
import org.apache.lucene.index.FieldInvertState;
30
import org.apache.lucene.index.IndexReader;
31
import org.apache.lucene.index.Payload;
32
import org.apache.lucene.index.RandomIndexWriter;
33
import org.apache.lucene.index.Term;
34
import org.apache.lucene.search.DefaultSimilarity;
35
import org.apache.lucene.search.Explanation;
36
import org.apache.lucene.search.IndexSearcher;
37
import org.apache.lucene.search.QueryUtils;
38
import org.apache.lucene.search.ScoreDoc;
39
import org.apache.lucene.search.Searcher;
40
import org.apache.lucene.search.TopDocs;
41
import org.apache.lucene.search.spans.SpanQuery;
42
import org.apache.lucene.search.spans.SpanNearQuery;
43
import org.apache.lucene.search.spans.SpanTermQuery;
44
import org.apache.lucene.store.Directory;
45
import org.apache.lucene.util.English;
46
import org.apache.lucene.util.LuceneTestCase;
47
import org.apache.lucene.search.Explanation.IDFExplanation;
48
import org.junit.AfterClass;
49
import org.junit.BeforeClass;
52
public class TestPayloadNearQuery extends LuceneTestCase {
53
private static IndexSearcher searcher;
54
private static IndexReader reader;
55
private static Directory directory;
56
private static BoostingSimilarity similarity = new BoostingSimilarity();
57
private static byte[] payload2 = new byte[]{2};
58
private static byte[] payload4 = new byte[]{4};
60
private static class PayloadAnalyzer extends Analyzer {
62
public TokenStream tokenStream(String fieldName, Reader reader) {
63
TokenStream result = new LowerCaseTokenizer(TEST_VERSION_CURRENT, reader);
64
result = new PayloadFilter(result, fieldName);
69
private static class PayloadFilter extends TokenFilter {
70
private final String fieldName;
71
private int numSeen = 0;
72
private final PayloadAttribute payAtt;
74
public PayloadFilter(TokenStream input, String fieldName) {
76
this.fieldName = fieldName;
77
payAtt = addAttribute(PayloadAttribute.class);
81
public boolean incrementToken() throws IOException {
82
boolean result = false;
83
if (input.incrementToken()) {
84
if (numSeen % 2 == 0) {
85
payAtt.setPayload(new Payload(payload2));
87
payAtt.setPayload(new Payload(payload4));
96
public void reset() throws IOException {
102
private PayloadNearQuery newPhraseQuery (String fieldName, String phrase, boolean inOrder, PayloadFunction function ) {
103
String[] words = phrase.split("[\\s]+");
104
SpanQuery clauses[] = new SpanQuery[words.length];
105
for (int i=0;i<clauses.length;i++) {
106
clauses[i] = new SpanTermQuery(new Term(fieldName, words[i]));
108
return new PayloadNearQuery(clauses, 0, inOrder, function);
112
public static void beforeClass() throws Exception {
113
directory = newDirectory();
114
RandomIndexWriter writer = new RandomIndexWriter(random, directory,
115
newIndexWriterConfig(TEST_VERSION_CURRENT, new PayloadAnalyzer())
116
.setSimilarity(similarity));
117
//writer.infoStream = System.out;
118
for (int i = 0; i < 1000; i++) {
119
Document doc = new Document();
120
doc.add(newField("field", English.intToEnglish(i), Field.Store.YES, Field.Index.ANALYZED));
121
String txt = English.intToEnglish(i) +' '+English.intToEnglish(i+1);
122
doc.add(newField("field2", txt, Field.Store.YES, Field.Index.ANALYZED));
123
writer.addDocument(doc);
125
reader = writer.getReader();
128
searcher = newSearcher(reader);
129
searcher.setSimilarity(similarity);
133
public static void afterClass() throws Exception {
142
public void test() throws IOException {
143
PayloadNearQuery query;
146
query = newPhraseQuery("field", "twenty two", true, new AveragePayloadFunction());
147
QueryUtils.check(query);
149
// all 10 hits should have score = 3 because adjacent terms have payloads of 2,4
150
// and all the similarity factors are set to 1
151
hits = searcher.search(query, null, 100);
152
assertTrue("hits is null and it shouldn't be", hits != null);
153
assertTrue("should be 10 hits", hits.totalHits == 10);
154
for (int j = 0; j < hits.scoreDocs.length; j++) {
155
ScoreDoc doc = hits.scoreDocs[j];
156
assertTrue(doc.score + " does not equal: " + 3, doc.score == 3);
158
for (int i=1;i<10;i++) {
159
query = newPhraseQuery("field", English.intToEnglish(i)+" hundred", true, new AveragePayloadFunction());
160
// all should have score = 3 because adjacent terms have payloads of 2,4
161
// and all the similarity factors are set to 1
162
hits = searcher.search(query, null, 100);
163
assertTrue("hits is null and it shouldn't be", hits != null);
164
assertTrue("should be 100 hits", hits.totalHits == 100);
165
for (int j = 0; j < hits.scoreDocs.length; j++) {
166
ScoreDoc doc = hits.scoreDocs[j];
167
// System.out.println("Doc: " + doc.toString());
168
// System.out.println("Explain: " + searcher.explain(query, doc.doc));
169
assertTrue(doc.score + " does not equal: " + 3, doc.score == 3);
175
public void testPayloadNear() throws IOException {
176
SpanNearQuery q1, q2;
177
PayloadNearQuery query;
178
//SpanNearQuery(clauses, 10000, false)
179
q1 = spanNearQuery("field2", "twenty two");
180
q2 = spanNearQuery("field2", "twenty three");
181
SpanQuery[] clauses = new SpanQuery[2];
184
query = new PayloadNearQuery(clauses, 10, false);
185
//System.out.println(query.toString());
186
assertEquals(12, searcher.search(query, null, 100).totalHits);
188
System.out.println(hits.totalHits);
189
for (int j = 0; j < hits.scoreDocs.length; j++) {
190
ScoreDoc doc = hits.scoreDocs[j];
191
System.out.println("doc: "+doc.doc+", score: "+doc.score);
196
public void testAverageFunction() throws IOException {
197
PayloadNearQuery query;
200
query = newPhraseQuery("field", "twenty two", true, new AveragePayloadFunction());
201
QueryUtils.check(query);
202
// all 10 hits should have score = 3 because adjacent terms have payloads of 2,4
203
// and all the similarity factors are set to 1
204
hits = searcher.search(query, null, 100);
205
assertTrue("hits is null and it shouldn't be", hits != null);
206
assertTrue("should be 10 hits", hits.totalHits == 10);
207
for (int j = 0; j < hits.scoreDocs.length; j++) {
208
ScoreDoc doc = hits.scoreDocs[j];
209
assertTrue(doc.score + " does not equal: " + 3, doc.score == 3);
210
Explanation explain = searcher.explain(query, hits.scoreDocs[j].doc);
211
String exp = explain.toString();
212
assertTrue(exp, exp.indexOf("AveragePayloadFunction") > -1);
213
assertTrue(hits.scoreDocs[j].score + " explain value does not equal: " + 3, explain.getValue() == 3f);
216
public void testMaxFunction() throws IOException {
217
PayloadNearQuery query;
220
query = newPhraseQuery("field", "twenty two", true, new MaxPayloadFunction());
221
QueryUtils.check(query);
222
// all 10 hits should have score = 4 (max payload value)
223
hits = searcher.search(query, null, 100);
224
assertTrue("hits is null and it shouldn't be", hits != null);
225
assertTrue("should be 10 hits", hits.totalHits == 10);
226
for (int j = 0; j < hits.scoreDocs.length; j++) {
227
ScoreDoc doc = hits.scoreDocs[j];
228
assertTrue(doc.score + " does not equal: " + 4, doc.score == 4);
229
Explanation explain = searcher.explain(query, hits.scoreDocs[j].doc);
230
String exp = explain.toString();
231
assertTrue(exp, exp.indexOf("MaxPayloadFunction") > -1);
232
assertTrue(hits.scoreDocs[j].score + " explain value does not equal: " + 4, explain.getValue() == 4f);
235
public void testMinFunction() throws IOException {
236
PayloadNearQuery query;
239
query = newPhraseQuery("field", "twenty two", true, new MinPayloadFunction());
240
QueryUtils.check(query);
241
// all 10 hits should have score = 2 (min payload value)
242
hits = searcher.search(query, null, 100);
243
assertTrue("hits is null and it shouldn't be", hits != null);
244
assertTrue("should be 10 hits", hits.totalHits == 10);
245
for (int j = 0; j < hits.scoreDocs.length; j++) {
246
ScoreDoc doc = hits.scoreDocs[j];
247
assertTrue(doc.score + " does not equal: " + 2, doc.score == 2);
248
Explanation explain = searcher.explain(query, hits.scoreDocs[j].doc);
249
String exp = explain.toString();
250
assertTrue(exp, exp.indexOf("MinPayloadFunction") > -1);
251
assertTrue(hits.scoreDocs[j].score + " explain value does not equal: " + 2, explain.getValue() == 2f);
254
private SpanQuery[] getClauses() {
255
SpanNearQuery q1, q2;
256
q1 = spanNearQuery("field2", "twenty two");
257
q2 = spanNearQuery("field2", "twenty three");
258
SpanQuery[] clauses = new SpanQuery[2];
263
private SpanNearQuery spanNearQuery(String fieldName, String words) {
264
String[] wordList = words.split("[\\s]+");
265
SpanQuery clauses[] = new SpanQuery[wordList.length];
266
for (int i=0;i<clauses.length;i++) {
267
clauses[i] = new PayloadTermQuery(new Term(fieldName, wordList[i]), new AveragePayloadFunction());
269
return new SpanNearQuery(clauses, 10000, false);
272
public void testLongerSpan() throws IOException {
273
PayloadNearQuery query;
275
query = newPhraseQuery("field", "nine hundred ninety nine", true, new AveragePayloadFunction());
276
hits = searcher.search(query, null, 100);
277
assertTrue("hits is null and it shouldn't be", hits != null);
278
ScoreDoc doc = hits.scoreDocs[0];
279
// System.out.println("Doc: " + doc.toString());
280
// System.out.println("Explain: " + searcher.explain(query, doc.doc));
281
assertTrue("there should only be one hit", hits.totalHits == 1);
282
// should have score = 3 because adjacent terms have payloads of 2,4
283
assertTrue(doc.score + " does not equal: " + 3, doc.score == 3);
286
public void testComplexNested() throws IOException {
287
PayloadNearQuery query;
290
// combine ordered and unordered spans with some nesting to make sure all payloads are counted
292
SpanQuery q1 = newPhraseQuery("field", "nine hundred", true, new AveragePayloadFunction());
293
SpanQuery q2 = newPhraseQuery("field", "ninety nine", true, new AveragePayloadFunction());
294
SpanQuery q3 = newPhraseQuery("field", "nine ninety", false, new AveragePayloadFunction());
295
SpanQuery q4 = newPhraseQuery("field", "hundred nine", false, new AveragePayloadFunction());
296
SpanQuery[]clauses = new SpanQuery[] {new PayloadNearQuery(new SpanQuery[] {q1,q2}, 0, true), new PayloadNearQuery(new SpanQuery[] {q3,q4}, 0, false)};
297
query = new PayloadNearQuery(clauses, 0, false);
298
hits = searcher.search(query, null, 100);
299
assertTrue("hits is null and it shouldn't be", hits != null);
300
// should be only 1 hit - doc 999
301
assertTrue("should only be one hit", hits.scoreDocs.length == 1);
302
// the score should be 3 - the average of all the underlying payloads
303
ScoreDoc doc = hits.scoreDocs[0];
304
// System.out.println("Doc: " + doc.toString());
305
// System.out.println("Explain: " + searcher.explain(query, doc.doc));
306
assertTrue(doc.score + " does not equal: " + 3, doc.score == 3);
309
// must be static for weight serialization tests
310
static class BoostingSimilarity extends DefaultSimilarity {
312
@Override public float scorePayload(int docId, String fieldName, int start, int end, byte[] payload, int offset, int length) {
313
//we know it is size 4 here, so ignore the offset/length
316
//!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
317
//Make everything else 1 so we see the effect of the payload
318
//!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
319
@Override public float computeNorm(String fieldName, FieldInvertState state) {
320
return state.getBoost();
323
@Override public float queryNorm(float sumOfSquaredWeights) {
327
@Override public float sloppyFreq(int distance) {
331
@Override public float coord(int overlap, int maxOverlap) {
334
@Override public float tf(float freq) {
337
// idf used for phrase queries
338
@Override public IDFExplanation idfExplain(Collection<Term> terms, Searcher searcher) throws IOException {
339
return new IDFExplanation() {
341
public float getIdf() {
345
public String explain() {
346
return "Inexplicable";