1
package org.apache.lucene.index;
4
* Licensed to the Apache Software Foundation (ASF) under one or more
5
* contributor license agreements. See the NOTICE file distributed with
6
* this work for additional information regarding copyright ownership.
7
* The ASF licenses this file to You under the Apache License, Version 2.0
8
* (the "License"); you may not use this file except in compliance with
9
* the License. You may obtain a copy of the License at
11
* http://www.apache.org/licenses/LICENSE-2.0
13
* Unless required by applicable law or agreed to in writing, software
14
* distributed under the License is distributed on an "AS IS" BASIS,
15
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16
* See the License for the specific language governing permissions and
17
* limitations under the License.
20
import java.io.IOException;
21
import java.util.ArrayList;
22
import java.util.Random;
24
import java.util.concurrent.atomic.AtomicInteger;
25
import org.apache.lucene.analysis.Analyzer;
26
import org.apache.lucene.analysis.MockAnalyzer;
27
import org.apache.lucene.document.Document;
28
import org.apache.lucene.document.Field;
29
import org.apache.lucene.document.Field.Index;
30
import org.apache.lucene.document.Field.Store;
31
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
32
import org.apache.lucene.search.DefaultSimilarity;
33
import org.apache.lucene.search.Similarity;
34
import org.apache.lucene.store.Directory;
35
import org.apache.lucene.util.LuceneTestCase;
38
* Tests cloning IndexReader norms
40
public class TestIndexReaderCloneNorms extends LuceneTestCase {
42
private class SimilarityOne extends DefaultSimilarity {
44
public float computeNorm(String fieldName, FieldInvertState state) {
46
return state.getBoost();
50
private static final int NUM_FIELDS = 10;
52
private Similarity similarityOne;
54
private Analyzer anlzr;
56
private int numDocNorms;
58
private ArrayList<Float> norms;
60
private ArrayList<Float> modifiedNorms;
62
private float lastNorm = 0;
64
private float normDelta = (float) 0.001;
67
public void setUp() throws Exception {
69
similarityOne = new SimilarityOne();
70
anlzr = new MockAnalyzer(random);
74
* Test that norms values are preserved as the index is maintained. Including
75
* separate norms. Including merging indexes with seprate norms. Including
78
public void testNorms() throws IOException {
79
// test with a single index: index1
80
Directory dir1 = newDirectory();
81
IndexWriter.unlock(dir1);
83
norms = new ArrayList<Float>();
84
modifiedNorms = new ArrayList<Float>();
86
createIndex(random, dir1);
87
doTestNorms(random, dir1);
89
// test with a single index: index2
90
ArrayList<Float> norms1 = norms;
91
ArrayList<Float> modifiedNorms1 = modifiedNorms;
92
int numDocNorms1 = numDocNorms;
94
norms = new ArrayList<Float>();
95
modifiedNorms = new ArrayList<Float>();
98
Directory dir2 = newDirectory();
100
createIndex(random, dir2);
101
doTestNorms(random, dir2);
103
// add index1 and index2 to a third index: index3
104
Directory dir3 = newDirectory();
106
createIndex(random, dir3);
108
System.out.println("TEST: now addIndexes/full merge");
110
IndexWriter iw = new IndexWriter(
112
newIndexWriterConfig(TEST_VERSION_CURRENT, anlzr).
113
setOpenMode(OpenMode.APPEND).
114
setMaxBufferedDocs(5).
115
setMergePolicy(newLogMergePolicy(3)));
116
iw.addIndexes(dir1, dir2);
120
norms1.addAll(norms);
122
modifiedNorms1.addAll(modifiedNorms);
123
modifiedNorms = modifiedNorms1;
124
numDocNorms += numDocNorms1;
128
doTestNorms(random, dir3);
130
// now with full merge
131
iw = new IndexWriter(dir3, newIndexWriterConfig( TEST_VERSION_CURRENT,
132
anlzr).setOpenMode(OpenMode.APPEND).setMaxBufferedDocs(5).setMergePolicy(newLogMergePolicy(3)));
142
// try cloning and reopening the norms
143
private void doTestNorms(Random random, Directory dir) throws IOException {
144
addDocs(random, dir, 12, true);
145
IndexReader ir = IndexReader.open(dir, false);
147
modifyNormsForF1(ir);
148
IndexReader irc = (IndexReader) ir.clone();// IndexReader.open(dir, false);//ir.clone();
151
modifyNormsForF1(irc);
153
IndexReader irc3 = (IndexReader) irc.clone();
155
modifyNormsForF1(irc3);
164
public void testNormsClose() throws IOException {
165
Directory dir1 = newDirectory();
166
TestIndexReaderReopen.createIndex(random, dir1, false);
167
SegmentReader reader1 = SegmentReader.getOnlySegmentReader(dir1);
168
reader1.norms("field1");
169
SegmentNorms r1norm = reader1.norms.get("field1");
170
AtomicInteger r1BytesRef = r1norm.bytesRef();
171
SegmentReader reader2 = (SegmentReader)reader1.clone();
172
assertEquals(2, r1norm.bytesRef().get());
174
assertEquals(1, r1BytesRef.get());
175
reader2.norms("field1");
180
public void testNormsRefCounting() throws IOException {
181
Directory dir1 = newDirectory();
182
TestIndexReaderReopen.createIndex(random, dir1, false);
183
IndexReader reader1 = IndexReader.open(dir1, false);
185
IndexReader reader2C = (IndexReader) reader1.clone();
186
SegmentReader segmentReader2C = SegmentReader.getOnlySegmentReader(reader2C);
187
segmentReader2C.norms("field1"); // load the norms for the field
188
SegmentNorms reader2CNorm = segmentReader2C.norms.get("field1");
189
assertTrue("reader2CNorm.bytesRef()=" + reader2CNorm.bytesRef(), reader2CNorm.bytesRef().get() == 2);
193
IndexReader reader3C = (IndexReader) reader2C.clone();
194
SegmentReader segmentReader3C = SegmentReader.getOnlySegmentReader(reader3C);
195
SegmentNorms reader3CCNorm = segmentReader3C.norms.get("field1");
196
assertEquals(3, reader3CCNorm.bytesRef().get());
198
// edit a norm and the refcount should be 1
199
IndexReader reader4C = (IndexReader) reader3C.clone();
200
SegmentReader segmentReader4C = SegmentReader.getOnlySegmentReader(reader4C);
201
assertEquals(4, reader3CCNorm.bytesRef().get());
202
reader4C.setNorm(5, "field1", 0.33f);
204
// generate a cannot update exception in reader1
206
reader3C.setNorm(1, "field1", 0.99f);
207
fail("did not hit expected exception");
208
} catch (Exception ex) {
212
// norm values should be different
213
assertTrue(Similarity.getDefault().decodeNormValue(segmentReader3C.norms("field1")[5])
214
!= Similarity.getDefault().decodeNormValue(segmentReader4C.norms("field1")[5]));
215
SegmentNorms reader4CCNorm = segmentReader4C.norms.get("field1");
216
assertEquals(3, reader3CCNorm.bytesRef().get());
217
assertEquals(1, reader4CCNorm.bytesRef().get());
219
IndexReader reader5C = (IndexReader) reader4C.clone();
220
SegmentReader segmentReader5C = SegmentReader.getOnlySegmentReader(reader5C);
221
SegmentNorms reader5CCNorm = segmentReader5C.norms.get("field1");
222
reader5C.setNorm(5, "field1", 0.7f);
223
assertEquals(1, reader5CCNorm.bytesRef().get());
233
private void createIndex(Random random, Directory dir) throws IOException {
234
IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig(
235
TEST_VERSION_CURRENT, anlzr).setOpenMode(OpenMode.CREATE)
236
.setMaxBufferedDocs(5).setSimilarity(similarityOne).setMergePolicy(newLogMergePolicy()));
237
setUseCompoundFile(iw.getConfig().getMergePolicy(), true);
238
setMergeFactor(iw.getConfig().getMergePolicy(), 3);
242
private void modifyNormsForF1(IndexReader ir) throws IOException {
244
// System.out.println("modifyNormsForF1 maxDoc: "+n);
245
for (int i = 0; i < n; i += 3) { // modify for every third doc
246
int k = (i * 3) % modifiedNorms.size();
247
float origNorm = modifiedNorms.get(i).floatValue();
248
float newNorm = modifiedNorms.get(k).floatValue();
249
// System.out.println("Modifying: for "+i+" from "+origNorm+" to
251
// System.out.println(" and: for "+k+" from "+newNorm+" to "+origNorm);
252
modifiedNorms.set(i, Float.valueOf(newNorm));
253
modifiedNorms.set(k, Float.valueOf(origNorm));
254
ir.setNorm(i, "f" + 1, newNorm);
255
ir.setNorm(k, "f" + 1, origNorm);
256
// System.out.println("setNorm i: "+i);
262
private void verifyIndex(Directory dir) throws IOException {
263
IndexReader ir = IndexReader.open(dir, false);
268
private void verifyIndex(IndexReader ir) throws IOException {
269
for (int i = 0; i < NUM_FIELDS; i++) {
270
String field = "f" + i;
271
byte b[] = ir.norms(field);
272
assertEquals("number of norms mismatches", numDocNorms, b.length);
273
ArrayList<Float> storedNorms = (i == 1 ? modifiedNorms : norms);
274
for (int j = 0; j < b.length; j++) {
275
float norm = Similarity.getDefault().decodeNormValue(b[j]);
276
float norm1 = storedNorms.get(j).floatValue();
277
assertEquals("stored norm value of " + field + " for doc " + j + " is "
278
+ norm + " - a mismatch!", norm, norm1, 0.000001);
283
private void addDocs(Random random, Directory dir, int ndocs, boolean compound)
285
IndexWriterConfig conf = newIndexWriterConfig(
286
TEST_VERSION_CURRENT, anlzr).setOpenMode(OpenMode.APPEND)
287
.setMaxBufferedDocs(5).setSimilarity(similarityOne).setMergePolicy(newLogMergePolicy());
288
LogMergePolicy lmp = (LogMergePolicy) conf.getMergePolicy();
289
lmp.setMergeFactor(3);
290
lmp.setUseCompoundFile(compound);
291
IndexWriter iw = new IndexWriter(dir, conf);
292
for (int i = 0; i < ndocs; i++) {
293
iw.addDocument(newDoc());
298
// create the next document
299
private Document newDoc() {
300
Document d = new Document();
301
float boost = nextNorm();
302
for (int i = 0; i < 10; i++) {
303
Field f = newField("f" + i, "v" + i, Store.NO, Index.NOT_ANALYZED);
310
// return unique norm values that are unchanged by encoding/decoding
311
private float nextNorm() {
312
float norm = lastNorm + normDelta;
314
float norm1 = Similarity.getDefault().decodeNormValue(
315
Similarity.getDefault().encodeNormValue(norm));
316
if (norm1 > lastNorm) {
317
// System.out.println(norm1+" > "+lastNorm);
323
norms.add(numDocNorms, Float.valueOf(norm));
324
modifiedNorms.add(numDocNorms, Float.valueOf(norm));
325
// System.out.println("creating norm("+numDocNorms+"): "+norm);
327
lastNorm = (norm > 10 ? 0 : norm); // there's a limit to how many distinct
328
// values can be stored in a ingle byte