1
package org.apache.lucene.index;
4
* Licensed to the Apache Software Foundation (ASF) under one or more
5
* contributor license agreements. See the NOTICE file distributed with
6
* this work for additional information regarding copyright ownership.
7
* The ASF licenses this file to You under the Apache License, Version 2.0
8
* (the "License"); you may not use this file except in compliance with
9
* the License. You may obtain a copy of the License at
11
* http://www.apache.org/licenses/LICENSE-2.0
13
* Unless required by applicable law or agreed to in writing, software
14
* distributed under the License is distributed on an "AS IS" BASIS,
15
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16
* See the License for the specific language governing permissions and
17
* limitations under the License.
20
import java.io.IOException;
21
import java.util.HashMap;
23
import java.util.Random;
25
import org.apache.lucene.analysis.MockAnalyzer;
26
import org.apache.lucene.analysis.MockTokenizer;
27
import org.apache.lucene.analysis.TokenStream;
28
import org.apache.lucene.analysis.WhitespaceAnalyzer;
29
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
30
import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
31
import org.apache.lucene.document.Document;
32
import org.apache.lucene.document.Field;
33
import org.apache.lucene.document.Field.Index;
34
import org.apache.lucene.document.Field.Store;
35
import org.apache.lucene.index.PayloadProcessorProvider.DirPayloadProcessor;
36
import org.apache.lucene.index.PayloadProcessorProvider.PayloadProcessor;
37
import org.apache.lucene.store.Directory;
38
import org.apache.lucene.util.LuceneTestCase;
39
import org.junit.Test;
41
public class TestPayloadProcessorProvider extends LuceneTestCase {
43
private static final class PerDirPayloadProcessor extends PayloadProcessorProvider {
45
private Map<Directory, DirPayloadProcessor> processors;
47
public PerDirPayloadProcessor(Map<Directory, DirPayloadProcessor> processors) {
48
this.processors = processors;
52
public DirPayloadProcessor getDirProcessor(Directory dir) throws IOException {
53
return processors.get(dir);
58
private static final class PerTermPayloadProcessor extends DirPayloadProcessor {
61
public PayloadProcessor getProcessor(Term term) throws IOException {
62
// don't process payloads of terms other than "p:p1"
63
if (!term.field().equals("p") || !term.text().equals("p1")) {
67
// All other terms are processed the same way
68
return new DeletePayloadProcessor();
73
/** deletes the incoming payload */
74
private static final class DeletePayloadProcessor extends PayloadProcessor {
77
public int payloadLength() throws IOException {
82
public byte[] processPayload(byte[] payload, int start, int length) throws IOException {
88
private static final class PayloadTokenStream extends TokenStream {
90
private final PayloadAttribute payload = addAttribute(PayloadAttribute.class);
91
private final CharTermAttribute term = addAttribute(CharTermAttribute.class);
93
private boolean called = false;
96
public PayloadTokenStream(String t) {
101
public boolean incrementToken() throws IOException {
107
byte[] p = new byte[] { 1 };
108
payload.setPayload(new Payload(p));
114
public void reset() throws IOException {
121
private static final int NUM_DOCS = 10;
123
private IndexWriterConfig getConfig(Random random) {
124
return newIndexWriterConfig(random, TEST_VERSION_CURRENT, new WhitespaceAnalyzer(
125
TEST_VERSION_CURRENT));
128
private void populateDirs(Random random, Directory[] dirs, boolean multipleCommits)
130
for (int i = 0; i < dirs.length; i++) {
131
dirs[i] = newDirectory();
132
populateDocs(random, dirs[i], multipleCommits);
133
verifyPayloadExists(dirs[i], new Term("p", "p1"), NUM_DOCS);
134
verifyPayloadExists(dirs[i], new Term("p", "p2"), NUM_DOCS);
138
private void populateDocs(Random random, Directory dir, boolean multipleCommits)
140
IndexWriter writer = new IndexWriter(
142
newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random, MockTokenizer.WHITESPACE, false)).
143
setMergePolicy(newLogMergePolicy(10))
145
TokenStream payloadTS1 = new PayloadTokenStream("p1");
146
TokenStream payloadTS2 = new PayloadTokenStream("p2");
147
for (int i = 0; i < NUM_DOCS; i++) {
148
Document doc = new Document();
149
doc.add(newField("id", "doc" + i, Store.NO, Index.NOT_ANALYZED_NO_NORMS));
150
doc.add(newField("content", "doc content " + i, Store.NO, Index.ANALYZED));
151
doc.add(new Field("p", payloadTS1));
152
doc.add(new Field("p", payloadTS2));
153
writer.addDocument(doc);
154
if (multipleCommits && (i % 4 == 0)) {
161
private void verifyPayloadExists(Directory dir, Term term, int numExpected)
163
IndexReader reader = IndexReader.open(dir);
166
TermPositions tp = reader.termPositions(term);
169
if (tp.isPayloadAvailable()) {
170
assertEquals(1, tp.getPayloadLength());
171
byte[] p = new byte[tp.getPayloadLength()];
173
assertEquals(1, p[0]);
177
assertEquals(numExpected, numPayloads);
183
private void doTest(Random random, boolean addToEmptyIndex,
184
int numExpectedPayloads, boolean multipleCommits) throws IOException {
185
Directory[] dirs = new Directory[2];
186
populateDirs(random, dirs, multipleCommits);
188
Directory dir = newDirectory();
189
if (!addToEmptyIndex) {
190
populateDocs(random, dir, multipleCommits);
191
verifyPayloadExists(dir, new Term("p", "p1"), NUM_DOCS);
192
verifyPayloadExists(dir, new Term("p", "p2"), NUM_DOCS);
195
// Add two source dirs. By not adding the dest dir, we ensure its payloads
196
// won't get processed.
197
Map<Directory, DirPayloadProcessor> processors = new HashMap<Directory, DirPayloadProcessor>();
198
for (Directory d : dirs) {
199
processors.put(d, new PerTermPayloadProcessor());
201
IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random, MockTokenizer.WHITESPACE, false)));
202
writer.setPayloadProcessorProvider(new PerDirPayloadProcessor(processors));
204
IndexReader[] readers = new IndexReader[dirs.length];
205
for (int i = 0; i < readers.length; i++) {
206
readers[i] = IndexReader.open(dirs[i]);
209
writer.addIndexes(readers);
211
for (IndexReader r : readers) {
216
verifyPayloadExists(dir, new Term("p", "p1"), numExpectedPayloads);
217
// the second term should always have all payloads
218
numExpectedPayloads = NUM_DOCS * dirs.length
219
+ (addToEmptyIndex ? 0 : NUM_DOCS);
220
verifyPayloadExists(dir, new Term("p", "p2"), numExpectedPayloads);
221
for (Directory d : dirs)
227
public void testAddIndexes() throws Exception {
228
// addIndexes - single commit in each
229
doTest(random, true, 0, false);
231
// addIndexes - multiple commits in each
232
doTest(random, true, 0, true);
236
public void testAddIndexesIntoExisting() throws Exception {
237
// addIndexes - single commit in each
238
doTest(random, false, NUM_DOCS, false);
240
// addIndexes - multiple commits in each
241
doTest(random, false, NUM_DOCS, true);
245
public void testRegularMerges() throws Exception {
246
Directory dir = newDirectory();
247
populateDocs(random, dir, true);
248
verifyPayloadExists(dir, new Term("p", "p1"), NUM_DOCS);
249
verifyPayloadExists(dir, new Term("p", "p2"), NUM_DOCS);
251
// Add two source dirs. By not adding the dest dir, we ensure its payloads
252
// won't get processed.
253
Map<Directory, DirPayloadProcessor> processors = new HashMap<Directory, DirPayloadProcessor>();
254
processors.put(dir, new PerTermPayloadProcessor());
255
IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random, MockTokenizer.WHITESPACE, false)));
256
writer.setPayloadProcessorProvider(new PerDirPayloadProcessor(processors));
260
verifyPayloadExists(dir, new Term("p", "p1"), 0);
261
verifyPayloadExists(dir, new Term("p", "p2"), NUM_DOCS);