1
package org.apache.lucene.index;
4
* Licensed to the Apache Software Foundation (ASF) under one or more
5
* contributor license agreements. See the NOTICE file distributed with
6
* this work for additional information regarding copyright ownership.
7
* The ASF licenses this file to You under the Apache License, Version 2.0
8
* (the "License"); you may not use this file except in compliance with
9
* the License. You may obtain a copy of the License at
11
* http://www.apache.org/licenses/LICENSE-2.0
13
* Unless required by applicable law or agreed to in writing, software
14
* distributed under the License is distributed on an "AS IS" BASIS,
15
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16
* See the License for the specific language governing permissions and
17
* limitations under the License.
20
import org.apache.lucene.document.Document;
21
import org.apache.lucene.document.Fieldable;
22
import org.apache.lucene.index.FieldInfo.IndexOptions;
23
import org.apache.lucene.store.Directory;
24
import org.apache.lucene.store.IndexInput;
25
import org.apache.lucene.store.IndexOutput;
26
import org.apache.lucene.util.StringHelper;
28
import java.io.IOException;
31
/** Access to the Fieldable Info file that describes document fields and whether or
32
* not they are indexed. Each segment has a separate Fieldable Info file. Objects
33
* of this class are thread-safe for multiple readers, but only one thread can
34
* be adding documents at a time, with no other reader or writer threads
35
* accessing this object.
37
final class FieldInfos {
39
// Used internally (ie not written to *.fnm files) for pre-2.9 files
40
public static final int FORMAT_PRE = -1;
42
// First used in 2.9; prior to 2.9 there was no format header
43
public static final int FORMAT_START = -2;
45
// First used in 3.4: omit only positional information
46
public static final int FORMAT_OMIT_POSITIONS = -3;
48
// whenever you add a new format, make it 1 smaller (negative version logic)!
49
static final int CURRENT_FORMAT = FORMAT_OMIT_POSITIONS;
51
static final byte IS_INDEXED = 0x1;
52
static final byte STORE_TERMVECTOR = 0x2;
53
static final byte STORE_POSITIONS_WITH_TERMVECTOR = 0x4;
54
static final byte STORE_OFFSET_WITH_TERMVECTOR = 0x8;
55
static final byte OMIT_NORMS = 0x10;
56
static final byte STORE_PAYLOADS = 0x20;
57
static final byte OMIT_TERM_FREQ_AND_POSITIONS = 0x40;
58
static final byte OMIT_POSITIONS = -128;
60
private final ArrayList<FieldInfo> byNumber = new ArrayList<FieldInfo>();
61
private final HashMap<String,FieldInfo> byName = new HashMap<String,FieldInfo>();
67
* Construct a FieldInfos object using the directory and the name of the file
69
* @param d The directory to open the IndexInput from
70
* @param name The name of the file to open the IndexInput from in the Directory
73
FieldInfos(Directory d, String name) throws IOException {
74
IndexInput input = d.openInput(name);
78
} catch (IOException ioe) {
79
if (format == FORMAT_PRE) {
80
// LUCENE-1623: FORMAT_PRE (before there was a
81
// format) may be 2.3.2 (pre-utf8) or 2.4.x (utf8)
82
// encoding; retry with input set to pre-utf8
84
input.setModifiedUTF8StringsMode();
89
} catch (Throwable t) {
90
// Ignore any new exception & throw original IOE
94
// The IOException cannot be caused by
95
// LUCENE-1623, so re-throw it
105
* Returns a deep clone of this FieldInfos instance.
108
synchronized public Object clone() {
109
FieldInfos fis = new FieldInfos();
110
final int numField = byNumber.size();
111
for(int i=0;i<numField;i++) {
112
FieldInfo fi = (FieldInfo) ( byNumber.get(i)).clone();
113
fis.byNumber.add(fi);
114
fis.byName.put(fi.name, fi);
119
/** Adds field info for a Document. */
120
synchronized public void add(Document doc) {
121
List<Fieldable> fields = doc.getFields();
122
for (Fieldable field : fields) {
123
add(field.name(), field.isIndexed(), field.isTermVectorStored(), field.isStorePositionWithTermVector(),
124
field.isStoreOffsetWithTermVector(), field.getOmitNorms(), false, field.getIndexOptions());
128
/** Returns true if any fields do not omitTermFreqAndPositions */
130
final int numFields = byNumber.size();
131
for(int i=0;i<numFields;i++) {
132
final FieldInfo fi = fieldInfo(i);
133
if (fi.isIndexed && fi.indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) {
141
* Add fields that are indexed. Whether they have termvectors has to be specified.
143
* @param names The names of the fields
144
* @param storeTermVectors Whether the fields store term vectors or not
145
* @param storePositionWithTermVector true if positions should be stored.
146
* @param storeOffsetWithTermVector true if offsets should be stored
148
synchronized public void addIndexed(Collection<String> names, boolean storeTermVectors, boolean storePositionWithTermVector,
149
boolean storeOffsetWithTermVector) {
150
for (String name : names) {
151
add(name, true, storeTermVectors, storePositionWithTermVector, storeOffsetWithTermVector);
156
* Assumes the fields are not storing term vectors.
158
* @param names The names of the fields
159
* @param isIndexed Whether the fields are indexed or not
161
* @see #add(String, boolean)
163
synchronized public void add(Collection<String> names, boolean isIndexed) {
164
for (String name : names) {
165
add(name, isIndexed);
170
* Calls 5 parameter add with false for all TermVector parameters.
172
* @param name The name of the Fieldable
173
* @param isIndexed true if the field is indexed
174
* @see #add(String, boolean, boolean, boolean, boolean)
176
synchronized public void add(String name, boolean isIndexed) {
177
add(name, isIndexed, false, false, false, false);
181
* Calls 5 parameter add with false for term vector positions and offsets.
183
* @param name The name of the field
184
* @param isIndexed true if the field is indexed
185
* @param storeTermVector true if the term vector should be stored
187
synchronized public void add(String name, boolean isIndexed, boolean storeTermVector){
188
add(name, isIndexed, storeTermVector, false, false, false);
191
/** If the field is not yet known, adds it. If it is known, checks to make
192
* sure that the isIndexed flag is the same as was given previously for this
193
* field. If not - marks it as being indexed. Same goes for the TermVector
196
* @param name The name of the field
197
* @param isIndexed true if the field is indexed
198
* @param storeTermVector true if the term vector should be stored
199
* @param storePositionWithTermVector true if the term vector with positions should be stored
200
* @param storeOffsetWithTermVector true if the term vector with offsets should be stored
202
synchronized public void add(String name, boolean isIndexed, boolean storeTermVector,
203
boolean storePositionWithTermVector, boolean storeOffsetWithTermVector) {
205
add(name, isIndexed, storeTermVector, storePositionWithTermVector, storeOffsetWithTermVector, false);
208
/** If the field is not yet known, adds it. If it is known, checks to make
209
* sure that the isIndexed flag is the same as was given previously for this
210
* field. If not - marks it as being indexed. Same goes for the TermVector
213
* @param name The name of the field
214
* @param isIndexed true if the field is indexed
215
* @param storeTermVector true if the term vector should be stored
216
* @param storePositionWithTermVector true if the term vector with positions should be stored
217
* @param storeOffsetWithTermVector true if the term vector with offsets should be stored
218
* @param omitNorms true if the norms for the indexed field should be omitted
220
synchronized public void add(String name, boolean isIndexed, boolean storeTermVector,
221
boolean storePositionWithTermVector, boolean storeOffsetWithTermVector, boolean omitNorms) {
222
add(name, isIndexed, storeTermVector, storePositionWithTermVector,
223
storeOffsetWithTermVector, omitNorms, false, IndexOptions.DOCS_AND_FREQS_AND_POSITIONS);
226
/** If the field is not yet known, adds it. If it is known, checks to make
227
* sure that the isIndexed flag is the same as was given previously for this
228
* field. If not - marks it as being indexed. Same goes for the TermVector
231
* @param name The name of the field
232
* @param isIndexed true if the field is indexed
233
* @param storeTermVector true if the term vector should be stored
234
* @param storePositionWithTermVector true if the term vector with positions should be stored
235
* @param storeOffsetWithTermVector true if the term vector with offsets should be stored
236
* @param omitNorms true if the norms for the indexed field should be omitted
237
* @param storePayloads true if payloads should be stored for this field
238
* @param indexOptions if term freqs should be omitted for this field
240
synchronized public FieldInfo add(String name, boolean isIndexed, boolean storeTermVector,
241
boolean storePositionWithTermVector, boolean storeOffsetWithTermVector,
242
boolean omitNorms, boolean storePayloads, IndexOptions indexOptions) {
243
FieldInfo fi = fieldInfo(name);
245
return addInternal(name, isIndexed, storeTermVector, storePositionWithTermVector, storeOffsetWithTermVector, omitNorms, storePayloads, indexOptions);
247
fi.update(isIndexed, storeTermVector, storePositionWithTermVector, storeOffsetWithTermVector, omitNorms, storePayloads, indexOptions);
249
assert fi.indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS || !fi.storePayloads;
253
synchronized public FieldInfo add(FieldInfo fi) {
254
return add(fi.name, fi.isIndexed, fi.storeTermVector,
255
fi.storePositionWithTermVector, fi.storeOffsetWithTermVector,
256
fi.omitNorms, fi.storePayloads,
260
private FieldInfo addInternal(String name, boolean isIndexed,
261
boolean storeTermVector, boolean storePositionWithTermVector,
262
boolean storeOffsetWithTermVector, boolean omitNorms, boolean storePayloads, IndexOptions indexOptions) {
263
name = StringHelper.intern(name);
264
FieldInfo fi = new FieldInfo(name, isIndexed, byNumber.size(), storeTermVector, storePositionWithTermVector,
265
storeOffsetWithTermVector, omitNorms, storePayloads, indexOptions);
267
byName.put(name, fi);
271
public int fieldNumber(String fieldName) {
272
FieldInfo fi = fieldInfo(fieldName);
273
return (fi != null) ? fi.number : -1;
276
public FieldInfo fieldInfo(String fieldName) {
277
return byName.get(fieldName);
281
* Return the fieldName identified by its number.
284
* @return the fieldName or an empty string when the field
285
* with the given number doesn't exist.
287
public String fieldName(int fieldNumber) {
288
FieldInfo fi = fieldInfo(fieldNumber);
289
return (fi != null) ? fi.name : "";
293
* Return the fieldinfo object referenced by the fieldNumber.
295
* @return the FieldInfo object or null when the given fieldNumber
298
public FieldInfo fieldInfo(int fieldNumber) {
299
return (fieldNumber >= 0) ? byNumber.get(fieldNumber) : null;
303
return byNumber.size();
306
public boolean hasVectors() {
307
boolean hasVectors = false;
308
for (int i = 0; i < size(); i++) {
309
if (fieldInfo(i).storeTermVector) {
317
public void write(Directory d, String name) throws IOException {
318
IndexOutput output = d.createOutput(name);
326
public void write(IndexOutput output) throws IOException {
327
output.writeVInt(CURRENT_FORMAT);
328
output.writeVInt(size());
329
for (int i = 0; i < size(); i++) {
330
FieldInfo fi = fieldInfo(i);
331
assert fi.indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS || !fi.storePayloads;
333
if (fi.isIndexed) bits |= IS_INDEXED;
334
if (fi.storeTermVector) bits |= STORE_TERMVECTOR;
335
if (fi.storePositionWithTermVector) bits |= STORE_POSITIONS_WITH_TERMVECTOR;
336
if (fi.storeOffsetWithTermVector) bits |= STORE_OFFSET_WITH_TERMVECTOR;
337
if (fi.omitNorms) bits |= OMIT_NORMS;
338
if (fi.storePayloads) bits |= STORE_PAYLOADS;
339
if (fi.indexOptions == IndexOptions.DOCS_ONLY)
340
bits |= OMIT_TERM_FREQ_AND_POSITIONS;
341
else if (fi.indexOptions == IndexOptions.DOCS_AND_FREQS)
342
bits |= OMIT_POSITIONS;
344
output.writeString(fi.name);
345
output.writeByte(bits);
349
private void read(IndexInput input, String fileName) throws IOException {
350
int firstInt = input.readVInt();
353
// This is a real format
359
if (format != FORMAT_PRE && format != FORMAT_START && format != FORMAT_OMIT_POSITIONS) {
360
throw new CorruptIndexException("unrecognized format " + format + " in file \"" + fileName + "\"");
364
if (format == FORMAT_PRE) {
367
size = input.readVInt(); //read in the size
370
for (int i = 0; i < size; i++) {
371
String name = StringHelper.intern(input.readString());
372
byte bits = input.readByte();
373
boolean isIndexed = (bits & IS_INDEXED) != 0;
374
boolean storeTermVector = (bits & STORE_TERMVECTOR) != 0;
375
boolean storePositionsWithTermVector = (bits & STORE_POSITIONS_WITH_TERMVECTOR) != 0;
376
boolean storeOffsetWithTermVector = (bits & STORE_OFFSET_WITH_TERMVECTOR) != 0;
377
boolean omitNorms = (bits & OMIT_NORMS) != 0;
378
boolean storePayloads = (bits & STORE_PAYLOADS) != 0;
379
final IndexOptions indexOptions;
380
if ((bits & OMIT_TERM_FREQ_AND_POSITIONS) != 0) {
381
indexOptions = IndexOptions.DOCS_ONLY;
382
} else if ((bits & OMIT_POSITIONS) != 0) {
383
if (format <= FORMAT_OMIT_POSITIONS) {
384
indexOptions = IndexOptions.DOCS_AND_FREQS;
386
throw new CorruptIndexException("Corrupt fieldinfos, OMIT_POSITIONS set but format=" + format + " (resource: " + input + ")");
389
indexOptions = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS;
392
// LUCENE-3027: past indices were able to write
393
// storePayloads=true when omitTFAP is also true,
394
// which is invalid. We correct that, here:
395
if (indexOptions != IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) {
396
storePayloads = false;
399
addInternal(name, isIndexed, storeTermVector, storePositionsWithTermVector, storeOffsetWithTermVector, omitNorms, storePayloads, indexOptions);
402
if (input.getFilePointer() != input.length()) {
403
throw new CorruptIndexException("did not read all bytes from file \"" + fileName + "\": read " + input.getFilePointer() + " vs size " + input.length() + " (resource: " + input + ")");