1
package org.apache.lucene.index;
4
* Licensed to the Apache Software Foundation (ASF) under one or more
5
* contributor license agreements. See the NOTICE file distributed with
6
* this work for additional information regarding copyright ownership.
7
* The ASF licenses this file to You under the Apache License, Version 2.0
8
* (the "License"); you may not use this file except in compliance with
9
* the License. You may obtain a copy of the License at
11
* http://www.apache.org/licenses/LICENSE-2.0
13
* Unless required by applicable law or agreed to in writing, software
14
* distributed under the License is distributed on an "AS IS" BASIS,
15
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16
* See the License for the specific language governing permissions and
17
* limitations under the License.
20
import org.apache.lucene.store.Directory;
21
import org.apache.lucene.store.IndexOutput;
22
import org.apache.lucene.store.IndexInput;
23
import org.apache.lucene.util.BitVector;
24
import org.apache.lucene.util.Constants;
26
import java.io.IOException;
27
import java.util.HashSet;
28
import java.util.List;
30
import java.util.HashMap;
31
import java.util.ArrayList;
32
import java.util.Collections;
36
* Information about a segment such as it's name, directory, and files related
39
* @lucene.experimental
41
public final class SegmentInfo implements Cloneable {
43
static final int NO = -1; // e.g. no norms; no deletes;
44
static final int YES = 1; // e.g. have norms; have deletes;
45
static final int CHECK_DIR = 0; // e.g. must check dir to see if there are norms/deletions
46
static final int WITHOUT_GEN = 0; // a file name that has no GEN in it.
48
public String name; // unique name in dir
49
public int docCount; // number of docs in seg
50
public Directory dir; // where segment resides
52
private boolean preLockless; // true if this is a segments file written before
53
// lock-less commits (2.1)
55
private long delGen; // current generation of del file; NO if there
56
// are no deletes; CHECK_DIR if it's a pre-2.1 segment
57
// (and we must check filesystem); YES or higher if
58
// there are deletes at generation N
60
private long[] normGen; // current generation of each field's norm file.
61
// If this array is null, for lockLess this means no
62
// separate norms. For preLockLess this means we must
63
// check filesystem. If this array is not null, its
64
// values mean: NO says this field has no separate
65
// norms; CHECK_DIR says it is a preLockLess segment and
66
// filesystem must be checked; >= YES says this field
67
// has separate norms with the specified generation
69
private byte isCompoundFile; // NO if it is not; YES if it is; CHECK_DIR if it's
70
// pre-2.1 (ie, must check file system to see
71
// if <name>.cfs and <name>.nrm exist)
73
private boolean hasSingleNormFile; // true if this segment maintains norms in a single file;
75
// this is currently false for segments populated by DocumentWriter
76
// and true for newly created merged segments (both
77
// compound and non compound).
79
private volatile List<String> files; // cached list of files that this segment uses
82
private volatile long sizeInBytesNoStore = -1; // total byte size of all but the store files (computed on demand)
83
private volatile long sizeInBytesWithStore = -1; // total byte size of all of our files (computed on demand)
85
private int docStoreOffset; // if this segment shares stored fields & vectors, this
86
// offset is where in that file this segment's docs begin
87
private String docStoreSegment; // name used to derive fields/vectors file we share with
89
private boolean docStoreIsCompoundFile; // whether doc store files are stored in compound file (*.cfx)
91
private int delCount; // How many deleted docs in this segment, or -1 if not yet known
92
// (if it's an older index)
94
private boolean hasProx; // True if this segment has any fields with omitTermFreqAndPositions==false
96
private boolean hasVectors; // True if this segment wrote term vectors
98
private Map<String,String> diagnostics;
100
// Tracks the Lucene version this segment was created with, since 3.1. The
101
// format expected is "x.y" - "2.x" for pre-3.0 indexes, and specific versions
102
// afterwards ("3.0", "3.1" etc.).
103
// see Constants.LUCENE_MAIN_VERSION.
104
private String version;
106
// NOTE: only used in-RAM by IW to track buffered deletes;
107
// this is never written to/read from the Directory
108
private long bufferedDeletesGen;
110
public SegmentInfo(String name, int docCount, Directory dir, boolean isCompoundFile, boolean hasSingleNormFile,
111
boolean hasProx, boolean hasVectors) {
113
this.docCount = docCount;
116
this.isCompoundFile = (byte) (isCompoundFile ? YES : NO);
118
this.hasSingleNormFile = hasSingleNormFile;
119
this.docStoreOffset = -1;
121
this.hasProx = hasProx;
122
this.hasVectors = hasVectors;
123
this.version = Constants.LUCENE_MAIN_VERSION;
127
* Copy everything from src SegmentInfo into our instance.
129
void reset(SegmentInfo src) {
131
version = src.version;
133
docCount = src.docCount;
135
preLockless = src.preLockless;
137
docStoreOffset = src.docStoreOffset;
138
docStoreIsCompoundFile = src.docStoreIsCompoundFile;
139
hasVectors = src.hasVectors;
140
hasProx = src.hasProx;
141
if (src.normGen == null) {
144
normGen = new long[src.normGen.length];
145
System.arraycopy(src.normGen, 0, normGen, 0, src.normGen.length);
147
isCompoundFile = src.isCompoundFile;
148
hasSingleNormFile = src.hasSingleNormFile;
149
delCount = src.delCount;
152
void setDiagnostics(Map<String, String> diagnostics) {
153
this.diagnostics = diagnostics;
156
public Map<String, String> getDiagnostics() {
161
* Construct a new SegmentInfo instance by reading a
162
* previously saved SegmentInfo from input.
164
* @param dir directory to load from
165
* @param format format of the segments info file
166
* @param input input handle to read segment info from
168
SegmentInfo(Directory dir, int format, IndexInput input) throws IOException {
170
if (format <= SegmentInfos.FORMAT_3_1) {
171
version = input.readString();
173
name = input.readString();
174
docCount = input.readInt();
175
if (format <= SegmentInfos.FORMAT_LOCKLESS) {
176
delGen = input.readLong();
177
if (format <= SegmentInfos.FORMAT_SHARED_DOC_STORE) {
178
docStoreOffset = input.readInt();
179
if (docStoreOffset != -1) {
180
docStoreSegment = input.readString();
181
docStoreIsCompoundFile = (1 == input.readByte());
183
docStoreSegment = name;
184
docStoreIsCompoundFile = false;
188
docStoreSegment = name;
189
docStoreIsCompoundFile = false;
191
if (format <= SegmentInfos.FORMAT_SINGLE_NORM_FILE) {
192
hasSingleNormFile = (1 == input.readByte());
194
hasSingleNormFile = false;
196
int numNormGen = input.readInt();
197
if (numNormGen == NO) {
200
normGen = new long[numNormGen];
201
for(int j=0;j<numNormGen;j++) {
202
normGen[j] = input.readLong();
205
isCompoundFile = input.readByte();
206
preLockless = (isCompoundFile == CHECK_DIR);
207
if (format <= SegmentInfos.FORMAT_DEL_COUNT) {
208
delCount = input.readInt();
209
assert delCount <= docCount;
212
if (format <= SegmentInfos.FORMAT_HAS_PROX)
213
hasProx = input.readByte() == 1;
217
if (format <= SegmentInfos.FORMAT_DIAGNOSTICS) {
218
diagnostics = input.readStringStringMap();
220
diagnostics = Collections.<String,String>emptyMap();
223
if (format <= SegmentInfos.FORMAT_HAS_VECTORS) {
224
hasVectors = input.readByte() == 1;
226
final String storesSegment;
228
final boolean isCompoundFile;
229
if (docStoreOffset != -1) {
230
storesSegment = docStoreSegment;
231
isCompoundFile = docStoreIsCompoundFile;
232
ext = IndexFileNames.COMPOUND_FILE_STORE_EXTENSION;
234
storesSegment = name;
235
isCompoundFile = getUseCompoundFile();
236
ext = IndexFileNames.COMPOUND_FILE_EXTENSION;
238
final Directory dirToTest;
239
if (isCompoundFile) {
240
dirToTest = new CompoundFileReader(dir, IndexFileNames.segmentFileName(storesSegment, ext));
245
hasVectors = dirToTest.fileExists(IndexFileNames.segmentFileName(storesSegment, IndexFileNames.VECTORS_INDEX_EXTENSION));
247
if (isCompoundFile) {
255
isCompoundFile = CHECK_DIR;
257
hasSingleNormFile = false;
259
docStoreIsCompoundFile = false;
260
docStoreSegment = null;
263
diagnostics = Collections.<String,String>emptyMap();
267
void setNumFields(int numFields) {
268
if (normGen == null) {
269
// normGen is null if we loaded a pre-2.1 segment
270
// file, or, if this segments file hasn't had any
271
// norms set against it yet:
272
normGen = new long[numFields];
275
// Do nothing: thus leaving normGen[k]==CHECK_DIR (==0), so that later we know
276
// we have to check filesystem for norm files, because this is prelockless.
279
// This is a FORMAT_LOCKLESS segment, which means
280
// there are no separate norms:
281
for(int i=0;i<numFields;i++) {
289
* Returns total size in bytes of all of files used by this segment (if
290
* {@code includeDocStores} is true), or the size of all files except the store
293
public long sizeInBytes(boolean includeDocStores) throws IOException {
294
if (includeDocStores) {
295
if (sizeInBytesWithStore != -1) {
296
return sizeInBytesWithStore;
299
for (final String fileName : files()) {
300
// We don't count bytes used by a shared doc store
301
// against this segment
302
if (docStoreOffset == -1 || !IndexFileNames.isDocStoreFile(fileName)) {
303
sum += dir.fileLength(fileName);
306
sizeInBytesWithStore = sum;
307
return sizeInBytesWithStore;
309
if (sizeInBytesNoStore != -1) {
310
return sizeInBytesNoStore;
313
for (final String fileName : files()) {
314
if (IndexFileNames.isDocStoreFile(fileName)) {
317
sum += dir.fileLength(fileName);
319
sizeInBytesNoStore = sum;
320
return sizeInBytesNoStore;
324
public boolean getHasVectors() throws IOException {
328
public void setHasVectors(boolean v) {
333
public boolean hasDeletions()
337
// delGen == NO: this means this segment was written
338
// by the LOCKLESS code and for certain does not have
341
// delGen == CHECK_DIR: this means this segment was written by
342
// pre-LOCKLESS code which means we must check
343
// directory to see if .del file exists
345
// delGen >= YES: this means this segment was written by
346
// the LOCKLESS code and for certain has
351
} else if (delGen >= YES) {
354
return dir.fileExists(getDelFileName());
358
void advanceDelGen() {
359
// delGen 0 is reserved for pre-LOCKLESS format
374
public Object clone() {
375
SegmentInfo si = new SegmentInfo(name, docCount, dir, false, hasSingleNormFile,
376
hasProx, hasVectors);
377
si.docStoreOffset = docStoreOffset;
378
si.docStoreSegment = docStoreSegment;
379
si.docStoreIsCompoundFile = docStoreIsCompoundFile;
381
si.delCount = delCount;
382
si.preLockless = preLockless;
383
si.isCompoundFile = isCompoundFile;
384
si.diagnostics = new HashMap<String, String>(diagnostics);
385
if (normGen != null) {
386
si.normGen = normGen.clone();
388
si.version = version;
392
public String getDelFileName() {
394
// In this case we know there is no deletion filename
395
// against this segment
398
// If delGen is CHECK_DIR, it's the pre-lockless-commit file format
399
return IndexFileNames.fileNameFromGeneration(name, IndexFileNames.DELETES_EXTENSION, delGen);
404
* Returns true if this field for this segment has saved a separate norms file (_<segment>_N.sX).
406
* @param fieldNumber the field index to check
408
public boolean hasSeparateNorms(int fieldNumber)
410
if ((normGen == null && preLockless) || (normGen != null && normGen[fieldNumber] == CHECK_DIR)) {
411
// Must fallback to directory file exists check:
412
String fileName = name + ".s" + fieldNumber;
413
return dir.fileExists(fileName);
414
} else if (normGen == null || normGen[fieldNumber] == NO) {
422
* Returns true if any fields in this segment have separate norms.
424
public boolean hasSeparateNorms()
426
if (normGen == null) {
428
// This means we were created w/ LOCKLESS code and no
429
// norms are written yet:
432
// This means this segment was saved with pre-LOCKLESS
433
// code. So we must fallback to the original
434
// directory list check:
435
String[] result = dir.listAll();
437
throw new IOException("cannot read directory " + dir + ": listAll() returned null");
439
final IndexFileNameFilter filter = IndexFileNameFilter.getFilter();
441
pattern = name + ".s";
442
int patternLength = pattern.length();
443
for(int i = 0; i < result.length; i++){
444
String fileName = result[i];
445
if (filter.accept(null, fileName) && fileName.startsWith(pattern) && Character.isDigit(fileName.charAt(patternLength)))
451
// This means this segment was saved with LOCKLESS
452
// code so we first check whether any normGen's are >= 1
453
// (meaning they definitely have separate norms):
454
for(int i=0;i<normGen.length;i++) {
455
if (normGen[i] >= YES) {
459
// Next we look for any == 0. These cases were
460
// pre-LOCKLESS and must be checked in directory:
461
for(int i=0;i<normGen.length;i++) {
462
if (normGen[i] == CHECK_DIR) {
463
if (hasSeparateNorms(i)) {
474
* Increment the generation count for the norms file for
477
* @param fieldIndex field whose norm file will be rewritten
479
void advanceNormGen(int fieldIndex) {
480
if (normGen[fieldIndex] == NO) {
481
normGen[fieldIndex] = YES;
483
normGen[fieldIndex]++;
489
* Get the file name for the norms file for this field.
491
* @param number field index
493
public String getNormFileName(int number) throws IOException {
495
if (normGen == null) {
498
gen = normGen[number];
501
if (hasSeparateNorms(number)) {
502
// case 1: separate norm
503
return IndexFileNames.fileNameFromGeneration(name, "s" + number, gen);
506
if (hasSingleNormFile) {
507
// case 2: lockless (or nrm file exists) - single file for all norms
508
return IndexFileNames.fileNameFromGeneration(name, IndexFileNames.NORMS_EXTENSION, WITHOUT_GEN);
511
// case 3: norm file for each field
512
return IndexFileNames.fileNameFromGeneration(name, "f" + number, WITHOUT_GEN);
516
* Mark whether this segment is stored as a compound file.
518
* @param isCompoundFile true if this is a compound file;
521
void setUseCompoundFile(boolean isCompoundFile) {
522
if (isCompoundFile) {
523
this.isCompoundFile = YES;
525
this.isCompoundFile = NO;
531
* Returns true if this segment is stored as a compound
534
public boolean getUseCompoundFile() throws IOException {
535
if (isCompoundFile == NO) {
537
} else if (isCompoundFile == YES) {
540
return dir.fileExists(IndexFileNames.segmentFileName(name, IndexFileNames.COMPOUND_FILE_EXTENSION));
544
public int getDelCount() throws IOException {
545
if (delCount == -1) {
546
if (hasDeletions()) {
547
final String delFileName = getDelFileName();
548
delCount = new BitVector(dir, delFileName).count();
552
assert delCount <= docCount;
556
void setDelCount(int delCount) {
557
this.delCount = delCount;
558
assert delCount <= docCount;
561
public int getDocStoreOffset() {
562
return docStoreOffset;
565
public boolean getDocStoreIsCompoundFile() {
566
return docStoreIsCompoundFile;
569
void setDocStoreIsCompoundFile(boolean v) {
570
docStoreIsCompoundFile = v;
574
public String getDocStoreSegment() {
575
return docStoreSegment;
578
public void setDocStoreSegment(String segment) {
579
docStoreSegment = segment;
582
void setDocStoreOffset(int offset) {
583
docStoreOffset = offset;
587
void setDocStore(int offset, String segment, boolean isCompoundFile) {
588
docStoreOffset = offset;
589
docStoreSegment = segment;
590
docStoreIsCompoundFile = isCompoundFile;
595
* Save this segment's info.
597
void write(IndexOutput output)
599
assert delCount <= docCount: "delCount=" + delCount + " docCount=" + docCount + " segment=" + name;
600
// Write the Lucene version that created this segment, since 3.1
601
output.writeString(version);
602
output.writeString(name);
603
output.writeInt(docCount);
604
output.writeLong(delGen);
605
output.writeInt(docStoreOffset);
606
if (docStoreOffset != -1) {
607
output.writeString(docStoreSegment);
608
output.writeByte((byte) (docStoreIsCompoundFile ? 1:0));
611
output.writeByte((byte) (hasSingleNormFile ? 1:0));
612
if (normGen == null) {
615
output.writeInt(normGen.length);
616
for(int j = 0; j < normGen.length; j++) {
617
output.writeLong(normGen[j]);
620
output.writeByte(isCompoundFile);
621
output.writeInt(delCount);
622
output.writeByte((byte) (hasProx ? 1:0));
623
output.writeStringStringMap(diagnostics);
624
output.writeByte((byte) (hasVectors ? 1 : 0));
627
void setHasProx(boolean hasProx) {
628
this.hasProx = hasProx;
632
public boolean getHasProx() {
636
private void addIfExists(Set<String> files, String fileName) throws IOException {
637
if (dir.fileExists(fileName))
642
* Return all files referenced by this SegmentInfo. The
643
* returns List is a locally cached List so you should not
647
public List<String> files() throws IOException {
654
HashSet<String> filesSet = new HashSet<String>();
656
boolean useCompoundFile = getUseCompoundFile();
658
if (useCompoundFile) {
659
filesSet.add(IndexFileNames.segmentFileName(name, IndexFileNames.COMPOUND_FILE_EXTENSION));
661
for (String ext : IndexFileNames.NON_STORE_INDEX_EXTENSIONS)
662
addIfExists(filesSet, IndexFileNames.segmentFileName(name, ext));
665
if (docStoreOffset != -1) {
666
// We are sharing doc stores (stored fields, term
667
// vectors) with other segments
668
assert docStoreSegment != null;
669
if (docStoreIsCompoundFile) {
670
filesSet.add(IndexFileNames.segmentFileName(docStoreSegment, IndexFileNames.COMPOUND_FILE_STORE_EXTENSION));
672
filesSet.add(IndexFileNames.segmentFileName(docStoreSegment, IndexFileNames.FIELDS_INDEX_EXTENSION));
673
filesSet.add(IndexFileNames.segmentFileName(docStoreSegment, IndexFileNames.FIELDS_EXTENSION));
675
filesSet.add(IndexFileNames.segmentFileName(docStoreSegment, IndexFileNames.VECTORS_INDEX_EXTENSION));
676
filesSet.add(IndexFileNames.segmentFileName(docStoreSegment, IndexFileNames.VECTORS_DOCUMENTS_EXTENSION));
677
filesSet.add(IndexFileNames.segmentFileName(docStoreSegment, IndexFileNames.VECTORS_FIELDS_EXTENSION));
680
} else if (!useCompoundFile) {
681
filesSet.add(IndexFileNames.segmentFileName(name, IndexFileNames.FIELDS_INDEX_EXTENSION));
682
filesSet.add(IndexFileNames.segmentFileName(name, IndexFileNames.FIELDS_EXTENSION));
684
filesSet.add(IndexFileNames.segmentFileName(name, IndexFileNames.VECTORS_INDEX_EXTENSION));
685
filesSet.add(IndexFileNames.segmentFileName(name, IndexFileNames.VECTORS_DOCUMENTS_EXTENSION));
686
filesSet.add(IndexFileNames.segmentFileName(name, IndexFileNames.VECTORS_FIELDS_EXTENSION));
690
String delFileName = IndexFileNames.fileNameFromGeneration(name, IndexFileNames.DELETES_EXTENSION, delGen);
691
if (delFileName != null && (delGen >= YES || dir.fileExists(delFileName))) {
692
filesSet.add(delFileName);
695
// Careful logic for norms files
696
if (normGen != null) {
697
for(int i=0;i<normGen.length;i++) {
698
long gen = normGen[i];
700
// Definitely a separate norm file, with generation:
701
filesSet.add(IndexFileNames.fileNameFromGeneration(name, IndexFileNames.SEPARATE_NORMS_EXTENSION + i, gen));
702
} else if (NO == gen) {
703
// No separate norms but maybe plain norms
704
// in the non compound file case:
705
if (!hasSingleNormFile && !useCompoundFile) {
706
String fileName = IndexFileNames.segmentFileName(name, IndexFileNames.PLAIN_NORMS_EXTENSION + i);
707
if (dir.fileExists(fileName)) {
708
filesSet.add(fileName);
711
} else if (CHECK_DIR == gen) {
712
// Pre-2.1: we have to check file existence
713
String fileName = null;
714
if (useCompoundFile) {
715
fileName = IndexFileNames.segmentFileName(name, IndexFileNames.SEPARATE_NORMS_EXTENSION + i);
716
} else if (!hasSingleNormFile) {
717
fileName = IndexFileNames.segmentFileName(name, IndexFileNames.PLAIN_NORMS_EXTENSION + i);
719
if (fileName != null && dir.fileExists(fileName)) {
720
filesSet.add(fileName);
724
} else if (preLockless || (!hasSingleNormFile && !useCompoundFile)) {
725
// Pre-2.1: we have to scan the dir to find all
726
// matching _X.sN/_X.fN files for our segment:
729
prefix = IndexFileNames.segmentFileName(name, IndexFileNames.SEPARATE_NORMS_EXTENSION);
731
prefix = IndexFileNames.segmentFileName(name, IndexFileNames.PLAIN_NORMS_EXTENSION);
732
int prefixLength = prefix.length();
733
String[] allFiles = dir.listAll();
734
final IndexFileNameFilter filter = IndexFileNameFilter.getFilter();
735
for(int i=0;i<allFiles.length;i++) {
736
String fileName = allFiles[i];
737
if (filter.accept(null, fileName) && fileName.length() > prefixLength && Character.isDigit(fileName.charAt(prefixLength)) && fileName.startsWith(prefix)) {
738
filesSet.add(fileName);
742
return files = new ArrayList<String>(filesSet);
745
/* Called whenever any change is made that affects which
746
* files this segment has. */
747
private void clearFiles() {
749
sizeInBytesNoStore = -1;
750
sizeInBytesWithStore = -1;
755
public String toString() {
756
return toString(dir, 0);
759
/** Used for debugging. Format may suddenly change.
761
* <p>Current format looks like
762
* <code>_a(3.1):c45/4->_1</code>, which means the segment's
763
* name is <code>_a</code>; it was created with Lucene 3.1 (or
764
* '?' if it's unkown); it's using compound file
765
* format (would be <code>C</code> if not compound); it
766
* has 45 documents; it has 4 deletions (this part is
767
* left off when there are no deletions); it's using the
768
* shared doc stores named <code>_1</code> (this part is
769
* left off if doc stores are private).</p>
771
public String toString(Directory dir, int pendingDelCount) {
773
StringBuilder s = new StringBuilder();
774
s.append(name).append('(').append(version == null ? "?" : version).append(')').append(':');
778
if (getUseCompoundFile()) {
783
} catch (IOException ioe) {
788
if (this.dir != dir) {
798
delCount = getDelCount();
799
} catch (IOException ioe) {
802
if (delCount != -1) {
803
delCount += pendingDelCount;
807
if (delCount == -1) {
814
if (docStoreOffset != -1) {
815
s.append("->").append(docStoreSegment);
816
if (docStoreIsCompoundFile) {
821
s.append('+').append(docStoreOffset);
827
/** We consider another SegmentInfo instance equal if it
828
* has the same dir and same name. */
830
public boolean equals(Object obj) {
831
if (this == obj) return true;
832
if (obj instanceof SegmentInfo) {
833
final SegmentInfo other = (SegmentInfo) obj;
834
return other.dir == dir && other.name.equals(name);
841
public int hashCode() {
842
return dir.hashCode() + name.hashCode();
846
* Used by SegmentInfos to upgrade segments that do not record their code
847
* version (either "2.x" or "3.0").
849
* <b>NOTE:</b> this method is used for internal purposes only - you should
850
* not modify the version of a SegmentInfo, or it may result in unexpected
851
* exceptions thrown when you attempt to open the index.
855
void setVersion(String version) {
856
this.version = version;
859
/** Returns the version of the code which wrote the segment. */
860
public String getVersion() {
864
long getBufferedDeletesGen() {
865
return bufferedDeletesGen;
868
void setBufferedDeletesGen(long v) {
869
bufferedDeletesGen = v;