1
package org.apache.lucene.index;
4
* Licensed to the Apache Software Foundation (ASF) under one or more
5
* contributor license agreements. See the NOTICE file distributed with
6
* this work for additional information regarding copyright ownership.
7
* The ASF licenses this file to You under the Apache License, Version 2.0
8
* (the "License"); you may not use this file except in compliance with
9
* the License. You may obtain a copy of the License at
11
* http://www.apache.org/licenses/LICENSE-2.0
13
* Unless required by applicable law or agreed to in writing, software
14
* distributed under the License is distributed on an "AS IS" BASIS,
15
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16
* See the License for the specific language governing permissions and
17
* limitations under the License.
20
import java.io.IOException;
21
import java.util.ArrayList;
22
import java.util.Random;
24
import org.apache.lucene.analysis.Analyzer;
25
import org.apache.lucene.analysis.MockAnalyzer;
26
import org.apache.lucene.document.Document;
27
import org.apache.lucene.document.Field;
28
import org.apache.lucene.document.Field.Index;
29
import org.apache.lucene.document.Field.Store;
30
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
31
import org.apache.lucene.search.DefaultSimilarity;
32
import org.apache.lucene.search.Similarity;
33
import org.apache.lucene.store.Directory;
34
import org.apache.lucene.util.LuceneTestCase;
37
* Test that norms info is preserved during index life - including
38
* separate norms, addDocument, addIndexes, forceMerge.
40
public class TestNorms extends LuceneTestCase {
42
private class SimilarityOne extends DefaultSimilarity {
44
public float computeNorm(String fieldName, FieldInvertState state) {
45
// Disable length norm
46
return state.getBoost();
50
private static final int NUM_FIELDS = 10;
52
private Similarity similarityOne;
53
private Analyzer anlzr;
54
private int numDocNorms;
55
private ArrayList<Float> norms;
56
private ArrayList<Float> modifiedNorms;
57
private float lastNorm = 0;
58
private float normDelta = (float) 0.001;
61
public void setUp() throws Exception {
63
similarityOne = new SimilarityOne();
64
anlzr = new MockAnalyzer(random);
68
* Test that norms values are preserved as the index is maintained.
69
* Including separate norms.
70
* Including merging indexes with seprate norms.
71
* Including forceMerge.
73
public void testNorms() throws IOException {
74
Directory dir1 = newDirectory();
76
norms = new ArrayList<Float>();
77
modifiedNorms = new ArrayList<Float>();
79
createIndex(random, dir1);
80
doTestNorms(random, dir1);
82
// test with a single index: index2
83
ArrayList<Float> norms1 = norms;
84
ArrayList<Float> modifiedNorms1 = modifiedNorms;
85
int numDocNorms1 = numDocNorms;
87
norms = new ArrayList<Float>();
88
modifiedNorms = new ArrayList<Float>();
91
Directory dir2 = newDirectory();
93
createIndex(random, dir2);
94
doTestNorms(random, dir2);
96
// add index1 and index2 to a third index: index3
97
Directory dir3 = newDirectory();
99
createIndex(random, dir3);
100
IndexWriter iw = new IndexWriter(dir3, newIndexWriterConfig(
101
TEST_VERSION_CURRENT, anlzr).setOpenMode(OpenMode.APPEND)
102
.setMaxBufferedDocs(5).setMergePolicy(newLogMergePolicy(3)));
103
iw.addIndexes(new Directory[]{dir1,dir2});
107
norms1.addAll(norms);
109
modifiedNorms1.addAll(modifiedNorms);
110
modifiedNorms = modifiedNorms1;
111
numDocNorms += numDocNorms1;
115
doTestNorms(random, dir3);
117
// now with single segment
118
iw = new IndexWriter(dir3, newIndexWriterConfig( TEST_VERSION_CURRENT,
119
anlzr).setOpenMode(OpenMode.APPEND).setMaxBufferedDocs(5).setMergePolicy(newLogMergePolicy(3)));
129
private void doTestNorms(Random random, Directory dir) throws IOException {
130
int num = atLeast(1);
131
for (int i=0; i<num; i++) {
132
addDocs(random, dir,12,true);
134
modifyNormsForF1(dir);
136
addDocs(random, dir,12,false);
138
modifyNormsForF1(dir);
143
private void createIndex(Random random, Directory dir) throws IOException {
144
IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig(
145
TEST_VERSION_CURRENT, anlzr).setOpenMode(OpenMode.CREATE)
146
.setMaxBufferedDocs(5).setSimilarity(similarityOne).setMergePolicy(newLogMergePolicy()));
147
LogMergePolicy lmp = (LogMergePolicy) iw.getConfig().getMergePolicy();
148
lmp.setMergeFactor(3);
149
lmp.setUseCompoundFile(true);
153
private void modifyNormsForF1(Directory dir) throws IOException {
154
IndexReader ir = IndexReader.open(dir, false);
156
for (int i = 0; i < n; i+=3) { // modify for every third doc
157
int k = (i*3) % modifiedNorms.size();
158
float origNorm = modifiedNorms.get(i).floatValue();
159
float newNorm = modifiedNorms.get(k).floatValue();
160
//System.out.println("Modifying: for "+i+" from "+origNorm+" to "+newNorm);
161
//System.out.println(" and: for "+k+" from "+newNorm+" to "+origNorm);
162
modifiedNorms.set(i, Float.valueOf(newNorm));
163
modifiedNorms.set(k, Float.valueOf(origNorm));
164
ir.setNorm(i, "f"+1, newNorm);
165
ir.setNorm(k, "f"+1, origNorm);
171
private void verifyIndex(Directory dir) throws IOException {
172
IndexReader ir = IndexReader.open(dir, false);
173
for (int i = 0; i < NUM_FIELDS; i++) {
174
String field = "f"+i;
175
byte b[] = ir.norms(field);
176
assertEquals("number of norms mismatches",numDocNorms,b.length);
177
ArrayList<Float> storedNorms = (i==1 ? modifiedNorms : norms);
178
for (int j = 0; j < b.length; j++) {
179
float norm = similarityOne.decodeNormValue(b[j]);
180
float norm1 = storedNorms.get(j).floatValue();
181
assertEquals("stored norm value of "+field+" for doc "+j+" is "+norm+" - a mismatch!", norm, norm1, 0.000001);
187
private void addDocs(Random random, Directory dir, int ndocs, boolean compound) throws IOException {
188
IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig(
189
TEST_VERSION_CURRENT, anlzr).setOpenMode(OpenMode.APPEND)
190
.setMaxBufferedDocs(5).setSimilarity(similarityOne).setMergePolicy(newLogMergePolicy()));
191
LogMergePolicy lmp = (LogMergePolicy) iw.getConfig().getMergePolicy();
192
lmp.setMergeFactor(3);
193
lmp.setUseCompoundFile(compound);
194
for (int i = 0; i < ndocs; i++) {
195
iw.addDocument(newDoc());
200
// create the next document
201
private Document newDoc() {
202
Document d = new Document();
203
float boost = nextNorm();
204
for (int i = 0; i < 10; i++) {
205
Field f = newField("f"+i,"v"+i,Store.NO,Index.NOT_ANALYZED);
212
// return unique norm values that are unchanged by encoding/decoding
213
private float nextNorm() {
214
float norm = lastNorm + normDelta;
216
float norm1 = similarityOne.decodeNormValue(similarityOne.encodeNormValue(norm));
217
if (norm1 > lastNorm) {
218
//System.out.println(norm1+" > "+lastNorm);
224
norms.add(numDocNorms, Float.valueOf(norm));
225
modifiedNorms.add(numDocNorms, Float.valueOf(norm));
226
//System.out.println("creating norm("+numDocNorms+"): "+norm);
228
lastNorm = (norm>10 ? 0 : norm); //there's a limit to how many distinct values can be stored in a ingle byte
232
class CustomNormEncodingSimilarity extends DefaultSimilarity {
234
public byte encodeNormValue(float f) {
239
public float decodeNormValue(byte b) {
244
public float computeNorm(String field, FieldInvertState state) {
245
return (float) state.getLength();
250
public void testCustomEncoder() throws Exception {
251
Directory dir = newDirectory();
252
IndexWriterConfig config = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random));
253
config.setSimilarity(new CustomNormEncodingSimilarity());
254
RandomIndexWriter writer = new RandomIndexWriter(random, dir, config);
255
Document doc = new Document();
256
Field foo = newField("foo", "", Field.Store.NO, Field.Index.ANALYZED);
257
Field bar = newField("bar", "", Field.Store.NO, Field.Index.ANALYZED);
261
for (int i = 0; i < 100; i++) {
262
bar.setValue("singleton");
263
writer.addDocument(doc);
266
IndexReader reader = writer.getReader();
269
byte fooNorms[] = reader.norms("foo");
270
for (int i = 0; i < reader.maxDoc(); i++)
271
assertEquals(0, fooNorms[i]);
273
byte barNorms[] = reader.norms("bar");
274
for (int i = 0; i < reader.maxDoc(); i++)
275
assertEquals(1, barNorms[i]);