1
package org.apache.lucene.search.payloads;
3
* Licensed to the Apache Software Foundation (ASF) under one or more
4
* contributor license agreements. See the NOTICE file distributed with
5
* this work for additional information regarding copyright ownership.
6
* The ASF licenses this file to You under the Apache License, Version 2.0
7
* (the "License"); you may not use this file except in compliance with
8
* the License. You may obtain a copy of the License at
10
* http://www.apache.org/licenses/LICENSE-2.0
12
* Unless required by applicable law or agreed to in writing, software
13
* distributed under the License is distributed on an "AS IS" BASIS,
14
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15
* See the License for the specific language governing permissions and
16
* limitations under the License.
19
import org.apache.lucene.util.LuceneTestCase;
20
import org.apache.lucene.util.English;
21
import org.apache.lucene.search.IndexSearcher;
22
import org.apache.lucene.search.QueryUtils;
23
import org.apache.lucene.search.TopDocs;
24
import org.apache.lucene.search.ScoreDoc;
25
import org.apache.lucene.search.CheckHits;
26
import org.apache.lucene.search.BooleanClause;
27
import org.apache.lucene.search.BooleanQuery;
28
import org.apache.lucene.search.DefaultSimilarity;
29
import org.apache.lucene.search.spans.SpanTermQuery;
30
import org.apache.lucene.search.spans.Spans;
31
import org.apache.lucene.search.spans.TermSpans;
32
import org.apache.lucene.analysis.Analyzer;
33
import org.apache.lucene.analysis.TokenStream;
34
import org.apache.lucene.analysis.LowerCaseTokenizer;
35
import org.apache.lucene.analysis.TokenFilter;
36
import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
37
import org.apache.lucene.index.FieldInvertState;
38
import org.apache.lucene.index.IndexReader;
39
import org.apache.lucene.index.Payload;
40
import org.apache.lucene.index.RandomIndexWriter;
41
import org.apache.lucene.index.Term;
42
import org.apache.lucene.store.Directory;
43
import org.apache.lucene.document.Document;
44
import org.apache.lucene.document.Field;
46
import java.io.Reader;
47
import java.io.IOException;
54
public class TestPayloadTermQuery extends LuceneTestCase {
55
private IndexSearcher searcher;
56
private IndexReader reader;
57
private BoostingSimilarity similarity = new BoostingSimilarity();
58
private byte[] payloadField = new byte[]{1};
59
private byte[] payloadMultiField1 = new byte[]{2};
60
private byte[] payloadMultiField2 = new byte[]{4};
61
protected Directory directory;
63
private class PayloadAnalyzer extends Analyzer {
67
public TokenStream tokenStream(String fieldName, Reader reader) {
68
TokenStream result = new LowerCaseTokenizer(TEST_VERSION_CURRENT, reader);
69
result = new PayloadFilter(result, fieldName);
74
private class PayloadFilter extends TokenFilter {
75
private final String fieldName;
76
private int numSeen = 0;
78
private final PayloadAttribute payloadAtt;
80
public PayloadFilter(TokenStream input, String fieldName) {
82
this.fieldName = fieldName;
83
payloadAtt = addAttribute(PayloadAttribute.class);
87
public boolean incrementToken() throws IOException {
88
boolean hasNext = input.incrementToken();
90
if (fieldName.equals("field")) {
91
payloadAtt.setPayload(new Payload(payloadField));
92
} else if (fieldName.equals("multiField")) {
93
if (numSeen % 2 == 0) {
94
payloadAtt.setPayload(new Payload(payloadMultiField1));
96
payloadAtt.setPayload(new Payload(payloadMultiField2));
107
public void reset() throws IOException {
114
public void setUp() throws Exception {
116
directory = newDirectory();
117
RandomIndexWriter writer = new RandomIndexWriter(random, directory,
118
newIndexWriterConfig(TEST_VERSION_CURRENT, new PayloadAnalyzer())
119
.setSimilarity(similarity).setMergePolicy(newLogMergePolicy()));
120
//writer.infoStream = System.out;
121
for (int i = 0; i < 1000; i++) {
122
Document doc = new Document();
123
Field noPayloadField = newField(PayloadHelper.NO_PAYLOAD_FIELD, English.intToEnglish(i), Field.Store.YES, Field.Index.ANALYZED);
124
//noPayloadField.setBoost(0);
125
doc.add(noPayloadField);
126
doc.add(newField("field", English.intToEnglish(i), Field.Store.YES, Field.Index.ANALYZED));
127
doc.add(newField("multiField", English.intToEnglish(i) + " " + English.intToEnglish(i), Field.Store.YES, Field.Index.ANALYZED));
128
writer.addDocument(doc);
130
reader = writer.getReader();
133
searcher = newSearcher(reader);
134
searcher.setSimilarity(similarity);
138
public void tearDown() throws Exception {
145
public void test() throws IOException {
146
PayloadTermQuery query = new PayloadTermQuery(new Term("field", "seventy"),
147
new MaxPayloadFunction());
148
TopDocs hits = searcher.search(query, null, 100);
149
assertTrue("hits is null and it shouldn't be", hits != null);
150
assertTrue("hits Size: " + hits.totalHits + " is not: " + 100, hits.totalHits == 100);
152
//they should all have the exact same score, because they all contain seventy once, and we set
153
//all the other similarity factors to be 1
155
assertTrue(hits.getMaxScore() + " does not equal: " + 1, hits.getMaxScore() == 1);
156
for (int i = 0; i < hits.scoreDocs.length; i++) {
157
ScoreDoc doc = hits.scoreDocs[i];
158
assertTrue(doc.score + " does not equal: " + 1, doc.score == 1);
160
CheckHits.checkExplanations(query, PayloadHelper.FIELD, searcher, true);
161
Spans spans = query.getSpans(searcher.getIndexReader());
162
assertTrue("spans is null and it shouldn't be", spans != null);
163
assertTrue("spans is not an instanceof " + TermSpans.class, spans instanceof TermSpans);
164
/*float score = hits.score(0);
165
for (int i =1; i < hits.length(); i++)
167
assertTrue("scores are not equal and they should be", score == hits.score(i));
172
public void testQuery() {
173
PayloadTermQuery boostingFuncTermQuery = new PayloadTermQuery(new Term(PayloadHelper.MULTI_FIELD, "seventy"),
174
new MaxPayloadFunction());
175
QueryUtils.check(boostingFuncTermQuery);
177
SpanTermQuery spanTermQuery = new SpanTermQuery(new Term(PayloadHelper.MULTI_FIELD, "seventy"));
179
assertTrue(boostingFuncTermQuery.equals(spanTermQuery) == spanTermQuery.equals(boostingFuncTermQuery));
181
PayloadTermQuery boostingFuncTermQuery2 = new PayloadTermQuery(new Term(PayloadHelper.MULTI_FIELD, "seventy"),
182
new AveragePayloadFunction());
184
QueryUtils.checkUnequal(boostingFuncTermQuery, boostingFuncTermQuery2);
187
public void testMultipleMatchesPerDoc() throws Exception {
188
PayloadTermQuery query = new PayloadTermQuery(new Term(PayloadHelper.MULTI_FIELD, "seventy"),
189
new MaxPayloadFunction());
190
TopDocs hits = searcher.search(query, null, 100);
191
assertTrue("hits is null and it shouldn't be", hits != null);
192
assertTrue("hits Size: " + hits.totalHits + " is not: " + 100, hits.totalHits == 100);
194
//they should all have the exact same score, because they all contain seventy once, and we set
195
//all the other similarity factors to be 1
197
//System.out.println("Hash: " + seventyHash + " Twice Hash: " + 2*seventyHash);
198
assertTrue(hits.getMaxScore() + " does not equal: " + 4.0, hits.getMaxScore() == 4.0);
199
//there should be exactly 10 items that score a 4, all the rest should score a 2
200
//The 10 items are: 70 + i*100 where i in [0-9]
202
for (int i = 0; i < hits.scoreDocs.length; i++) {
203
ScoreDoc doc = hits.scoreDocs[i];
204
if (doc.doc % 10 == 0) {
206
assertTrue(doc.score + " does not equal: " + 4.0, doc.score == 4.0);
208
assertTrue(doc.score + " does not equal: " + 2, doc.score == 2);
211
assertTrue(numTens + " does not equal: " + 10, numTens == 10);
212
CheckHits.checkExplanations(query, "field", searcher, true);
213
Spans spans = query.getSpans(searcher.getIndexReader());
214
assertTrue("spans is null and it shouldn't be", spans != null);
215
assertTrue("spans is not an instanceof " + TermSpans.class, spans instanceof TermSpans);
216
//should be two matches per document
218
//100 hits times 2 matches per hit, we should have 200 in count
219
while (spans.next()) {
222
assertTrue(count + " does not equal: " + 200, count == 200);
225
//Set includeSpanScore to false, in which case just the payload score comes through.
226
public void testIgnoreSpanScorer() throws Exception {
227
PayloadTermQuery query = new PayloadTermQuery(new Term(PayloadHelper.MULTI_FIELD, "seventy"),
228
new MaxPayloadFunction(), false);
230
IndexSearcher theSearcher = new IndexSearcher(directory, true);
231
theSearcher.setSimilarity(new FullSimilarity());
232
TopDocs hits = searcher.search(query, null, 100);
233
assertTrue("hits is null and it shouldn't be", hits != null);
234
assertTrue("hits Size: " + hits.totalHits + " is not: " + 100, hits.totalHits == 100);
236
//they should all have the exact same score, because they all contain seventy once, and we set
237
//all the other similarity factors to be 1
239
//System.out.println("Hash: " + seventyHash + " Twice Hash: " + 2*seventyHash);
240
assertTrue(hits.getMaxScore() + " does not equal: " + 4.0, hits.getMaxScore() == 4.0);
241
//there should be exactly 10 items that score a 4, all the rest should score a 2
242
//The 10 items are: 70 + i*100 where i in [0-9]
244
for (int i = 0; i < hits.scoreDocs.length; i++) {
245
ScoreDoc doc = hits.scoreDocs[i];
246
if (doc.doc % 10 == 0) {
248
assertTrue(doc.score + " does not equal: " + 4.0, doc.score == 4.0);
250
assertTrue(doc.score + " does not equal: " + 2, doc.score == 2);
253
assertTrue(numTens + " does not equal: " + 10, numTens == 10);
254
CheckHits.checkExplanations(query, "field", searcher, true);
255
Spans spans = query.getSpans(searcher.getIndexReader());
256
assertTrue("spans is null and it shouldn't be", spans != null);
257
assertTrue("spans is not an instanceof " + TermSpans.class, spans instanceof TermSpans);
258
//should be two matches per document
260
//100 hits times 2 matches per hit, we should have 200 in count
261
while (spans.next()) {
267
public void testNoMatch() throws Exception {
268
PayloadTermQuery query = new PayloadTermQuery(new Term(PayloadHelper.FIELD, "junk"),
269
new MaxPayloadFunction());
270
TopDocs hits = searcher.search(query, null, 100);
271
assertTrue("hits is null and it shouldn't be", hits != null);
272
assertTrue("hits Size: " + hits.totalHits + " is not: " + 0, hits.totalHits == 0);
276
public void testNoPayload() throws Exception {
277
PayloadTermQuery q1 = new PayloadTermQuery(new Term(PayloadHelper.NO_PAYLOAD_FIELD, "zero"),
278
new MaxPayloadFunction());
279
PayloadTermQuery q2 = new PayloadTermQuery(new Term(PayloadHelper.NO_PAYLOAD_FIELD, "foo"),
280
new MaxPayloadFunction());
281
BooleanClause c1 = new BooleanClause(q1, BooleanClause.Occur.MUST);
282
BooleanClause c2 = new BooleanClause(q2, BooleanClause.Occur.MUST_NOT);
283
BooleanQuery query = new BooleanQuery();
286
TopDocs hits = searcher.search(query, null, 100);
287
assertTrue("hits is null and it shouldn't be", hits != null);
288
assertTrue("hits Size: " + hits.totalHits + " is not: " + 1, hits.totalHits == 1);
289
int[] results = new int[1];
290
results[0] = 0;//hits.scoreDocs[0].doc;
291
CheckHits.checkHitCollector(random, query, PayloadHelper.NO_PAYLOAD_FIELD, searcher, results);
294
// must be static for weight serialization tests
295
static class BoostingSimilarity extends DefaultSimilarity {
297
// TODO: Remove warning after API has been finalized
299
public float scorePayload(int docId, String fieldName, int start, int end, byte[] payload, int offset, int length) {
300
//we know it is size 4 here, so ignore the offset/length
304
//!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
305
//Make everything else 1 so we see the effect of the payload
306
//!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
308
public float computeNorm(String fieldName, FieldInvertState state) {
309
return state.getBoost();
313
public float queryNorm(float sumOfSquaredWeights) {
318
public float sloppyFreq(int distance) {
323
public float coord(int overlap, int maxOverlap) {
328
public float idf(int docFreq, int numDocs) {
333
public float tf(float freq) {
334
return freq == 0 ? 0 : 1;
338
static class FullSimilarity extends DefaultSimilarity{
339
public float scorePayload(int docId, String fieldName, byte[] payload, int offset, int length) {
340
//we know it is size 4 here, so ignore the offset/length