1
package org.apache.lucene.index;
4
* Licensed to the Apache Software Foundation (ASF) under one or more
5
* contributor license agreements. See the NOTICE file distributed with
6
* this work for additional information regarding copyright ownership.
7
* The ASF licenses this file to You under the Apache License, Version 2.0
8
* (the "License"); you may not use this file except in compliance with
9
* the License. You may obtain a copy of the License at
11
* http://www.apache.org/licenses/LICENSE-2.0
13
* Unless required by applicable law or agreed to in writing, software
14
* distributed under the License is distributed on an "AS IS" BASIS,
15
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16
* See the License for the specific language governing permissions and
17
* limitations under the License.
20
import org.apache.lucene.util.LuceneTestCase;
21
import org.apache.lucene.store.Directory;
22
import org.apache.lucene.store.IndexInput;
23
import org.apache.lucene.store.IndexOutput;
24
import org.apache.lucene.store.MockDirectoryWrapper;
25
import org.apache.lucene.analysis.MockAnalyzer;
26
import org.apache.lucene.analysis.WhitespaceAnalyzer;
27
import org.apache.lucene.document.Document;
28
import org.apache.lucene.document.Field;
29
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
35
Verify we can read the pre-2.1 file format, do searches
36
against it, and add documents to it.
39
public class TestIndexFileDeleter extends LuceneTestCase {
41
public void testDeleteLeftoverFiles() throws IOException {
42
MockDirectoryWrapper dir = newDirectory();
43
dir.setPreventDoubleWrite(false);
44
IndexWriterConfig conf = newIndexWriterConfig(
45
TEST_VERSION_CURRENT, new WhitespaceAnalyzer(TEST_VERSION_CURRENT))
46
.setMaxBufferedDocs(10);
47
LogMergePolicy mergePolicy = newLogMergePolicy(true, 10);
48
mergePolicy.setNoCFSRatio(1); // This test expects all of its segments to be in CFS
49
conf.setMergePolicy(mergePolicy);
51
IndexWriter writer = new IndexWriter(
53
newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).
54
setMaxBufferedDocs(10).
55
setMergePolicy(mergePolicy)
58
writer.setInfoStream(VERBOSE ? System.out : null);
64
mergePolicy.setUseCompoundFile(false);
70
// Delete one doc so we get a .del file:
71
IndexReader reader = IndexReader.open(dir, false);
72
Term searchTerm = new Term("id", "7");
73
int delCount = reader.deleteDocuments(searchTerm);
74
assertEquals("didn't delete the right number of documents", 1, delCount);
76
// Set one norm so we get a .s0 file:
77
reader.setNorm(21, "content", (float) 1.5);
80
// Now, artificially create an extra .del file & extra
82
String[] files = dir.listAll();
85
for(int j=0;j<files.length;j++) {
86
System.out.println(j + ": " + files[j]);
90
// The numbering of fields can vary depending on which
91
// JRE is in use. On some JREs we see content bound to
92
// field 0; on others, field 1. So, here we have to
93
// figure out which field number corresponds to
94
// "content", and then set our expected file names below
96
CompoundFileReader cfsReader = new CompoundFileReader(dir, "_2.cfs");
97
FieldInfos fieldInfos = new FieldInfos(cfsReader, "_2.fnm");
98
int contentFieldIndex = -1;
99
for(i=0;i<fieldInfos.size();i++) {
100
FieldInfo fi = fieldInfos.fieldInfo(i);
101
if (fi.name.equals("content")) {
102
contentFieldIndex = i;
107
assertTrue("could not locate the 'content' field number in the _2.cfs segment", contentFieldIndex != -1);
109
String normSuffix = "s" + contentFieldIndex;
111
// Create a bogus separate norms file for a
112
// segment/field that actually has a separate norms file
114
copyFile(dir, "_2_1." + normSuffix, "_2_2." + normSuffix);
116
// Create a bogus separate norms file for a
117
// segment/field that actually has a separate norms file
118
// already, using the "not compound file" extension:
119
copyFile(dir, "_2_1." + normSuffix, "_2_2.f" + contentFieldIndex);
121
// Create a bogus separate norms file for a
122
// segment/field that does not have a separate norms
124
copyFile(dir, "_2_1." + normSuffix, "_1_1." + normSuffix);
126
// Create a bogus separate norms file for a
127
// segment/field that does not have a separate norms
128
// file already using the "not compound file" extension:
129
copyFile(dir, "_2_1." + normSuffix, "_1_1.f" + contentFieldIndex);
131
// Create a bogus separate del file for a
132
// segment that already has a separate del file:
133
copyFile(dir, "_0_1.del", "_0_2.del");
135
// Create a bogus separate del file for a
136
// segment that does not yet have a separate del file:
137
copyFile(dir, "_0_1.del", "_1_1.del");
139
// Create a bogus separate del file for a
140
// non-existent segment:
141
copyFile(dir, "_0_1.del", "_188_1.del");
143
// Create a bogus segment file:
144
copyFile(dir, "_0.cfs", "_188.cfs");
146
// Create a bogus fnm file when the CFS already exists:
147
copyFile(dir, "_0.cfs", "_0.fnm");
149
// Create a deletable file:
150
copyFile(dir, "_0.cfs", "deletable");
152
// Create some old segments file:
153
copyFile(dir, "segments_2", "segments");
154
copyFile(dir, "segments_2", "segments_1");
156
// Create a bogus cfs file shadowing a non-cfs segment:
157
assertTrue(dir.fileExists("_3.fdt"));
158
assertTrue(!dir.fileExists("_3.cfs"));
159
copyFile(dir, "_1.cfs", "_3.cfs");
161
String[] filesPre = dir.listAll();
163
// Open & close a writer: it should delete the above 4
164
// files and nothing more:
165
writer = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).setOpenMode(OpenMode.APPEND));
168
String[] files2 = dir.listAll();
174
Set<String> dif = difFiles(files, files2);
176
if (!Arrays.equals(files, files2)) {
177
fail("IndexFileDeleter failed to delete unreferenced extra files: should have deleted " + (filesPre.length-files.length) + " files but only deleted " + (filesPre.length - files2.length) + "; expected files:\n " + asString(files) + "\n actual files:\n " + asString(files2)+"\ndif: "+dif);
181
private static Set<String> difFiles(String[] files1, String[] files2) {
182
Set<String> set1 = new HashSet<String>();
183
Set<String> set2 = new HashSet<String>();
184
Set<String> extra = new HashSet<String>();
186
for (int x=0; x < files1.length; x++) {
189
for (int x=0; x < files2.length; x++) {
192
Iterator<String> i1 = set1.iterator();
193
while (i1.hasNext()) {
194
String o = i1.next();
195
if (!set2.contains(o)) {
199
Iterator<String> i2 = set2.iterator();
200
while (i2.hasNext()) {
201
String o = i2.next();
202
if (!set1.contains(o)) {
209
private String asString(String[] l) {
211
for(int i=0;i<l.length;i++) {
220
public void copyFile(Directory dir, String src, String dest) throws IOException {
221
IndexInput in = dir.openInput(src);
222
IndexOutput out = dir.createOutput(dest);
223
byte[] b = new byte[1024];
224
long remainder = in.length();
225
while(remainder > 0) {
226
int len = (int) Math.min(b.length, remainder);
227
in.readBytes(b, 0, len);
228
out.writeBytes(b, len);
235
private void addDoc(IndexWriter writer, int id) throws IOException
237
Document doc = new Document();
238
doc.add(newField("content", "aaa", Field.Store.NO, Field.Index.ANALYZED));
239
doc.add(newField("id", Integer.toString(id), Field.Store.YES, Field.Index.NOT_ANALYZED));
240
writer.addDocument(doc);