72
92
int fieldNumber = fieldsStream.ReadVInt();
73
93
FieldInfo fi = fieldInfos.FieldInfo(fieldNumber);
94
FieldSelectorResult acceptField = fieldSelector == null ? FieldSelectorResult.LOAD : fieldSelector.Accept(fi.name);
75
96
byte bits = fieldsStream.ReadByte();
77
97
bool compressed = (bits & FieldsWriter.FIELD_IS_COMPRESSED) != 0;
78
98
bool tokenize = (bits & FieldsWriter.FIELD_IS_TOKENIZED) != 0;
80
if ((bits & FieldsWriter.FIELD_IS_BINARY) != 0)
82
byte[] b = new byte[fieldsStream.ReadVInt()];
99
bool binary = (bits & FieldsWriter.FIELD_IS_BINARY) != 0;
100
//TODO: Find an alternative approach here if this list continues to grow beyond the
101
//list of 5 or 6 currently here. See Lucene 762 for discussion
102
if (acceptField.Equals(FieldSelectorResult.LOAD))
104
AddField(doc, fi, binary, compressed, tokenize);
106
else if (acceptField.Equals(FieldSelectorResult.LOAD_FOR_MERGE))
108
AddFieldForMerge(doc, fi, binary, compressed, tokenize);
110
else if (acceptField.Equals(FieldSelectorResult.LOAD_AND_BREAK))
112
AddField(doc, fi, binary, compressed, tokenize);
113
break; //Get out of this loop
115
else if (acceptField.Equals(FieldSelectorResult.LAZY_LOAD))
117
AddFieldLazy(doc, fi, binary, compressed, tokenize);
119
else if (acceptField.Equals(FieldSelectorResult.SIZE))
121
SkipField(binary, compressed, AddFieldSize(doc, fi, binary, compressed));
123
else if (acceptField.Equals(FieldSelectorResult.SIZE_AND_BREAK))
125
AddFieldSize(doc, fi, binary, compressed);
130
SkipField(binary, compressed);
137
/// <summary> Skip the field. We still have to read some of the information about the field, but can skip past the actual content.
138
/// This will have the most payoff on large fields.
140
private void SkipField(bool binary, bool compressed)
142
SkipField(binary, compressed, fieldsStream.ReadVInt());
145
private void SkipField(bool binary, bool compressed, int toRead)
147
if (binary || compressed)
149
long pointer = fieldsStream.GetFilePointer();
150
fieldsStream.Seek(pointer + toRead);
154
//We need to skip chars. This will slow us down, but still better
155
fieldsStream.SkipChars(toRead);
159
private void AddFieldLazy(Document doc, FieldInfo fi, bool binary, bool compressed, bool tokenize)
163
int toRead = fieldsStream.ReadVInt();
164
long pointer = fieldsStream.GetFilePointer();
167
//was: doc.add(new Fieldable(fi.name, uncompress(b), Fieldable.Store.COMPRESS));
168
doc.Add(new LazyField(this, fi.name, Field.Store.COMPRESS, toRead, pointer));
172
//was: doc.add(new Fieldable(fi.name, b, Fieldable.Store.YES));
173
doc.Add(new LazyField(this, fi.name, Field.Store.YES, toRead, pointer));
175
//Need to move the pointer ahead by toRead positions
176
fieldsStream.Seek(pointer + toRead);
180
Field.Store store = Field.Store.YES;
181
Field.Index index = GetIndexType(fi, tokenize);
182
Field.TermVector termVector = GetTermVectorType(fi);
187
store = Field.Store.COMPRESS;
188
int toRead = fieldsStream.ReadVInt();
189
long pointer = fieldsStream.GetFilePointer();
190
f = new LazyField(this, fi.name, store, toRead, pointer);
191
//skip over the part that we aren't loading
192
fieldsStream.Seek(pointer + toRead);
193
f.SetOmitNorms(fi.omitNorms);
197
int length = fieldsStream.ReadVInt();
198
long pointer = fieldsStream.GetFilePointer();
199
//Skip ahead of where we are by the length of what is stored
200
fieldsStream.SkipChars(length);
201
f = new LazyField(this, fi.name, store, index, termVector, length, pointer);
202
f.SetOmitNorms(fi.omitNorms);
208
// in merge mode we don't uncompress the data of a compressed field
209
private void AddFieldForMerge(Document doc, FieldInfo fi, bool binary, bool compressed, bool tokenize)
213
if (binary || compressed)
215
int toRead = fieldsStream.ReadVInt();
216
byte[] b = new byte[toRead];
217
fieldsStream.ReadBytes(b, 0, b.Length);
222
data = fieldsStream.ReadString();
225
doc.Add(new FieldForMerge(data, fi, binary, compressed, tokenize));
228
private void AddField(Document doc, FieldInfo fi, bool binary, bool compressed, bool tokenize)
231
//we have a binary stored field, and it may be compressed
234
int toRead = fieldsStream.ReadVInt();
235
byte[] b = new byte[toRead];
236
fieldsStream.ReadBytes(b, 0, b.Length);
238
doc.Add(new Field(fi.name, Uncompress(b), Field.Store.COMPRESS));
240
doc.Add(new Field(fi.name, b, Field.Store.YES));
244
Field.Store store = Field.Store.YES;
245
Field.Index index = GetIndexType(fi, tokenize);
246
Field.TermVector termVector = GetTermVectorType(fi);
251
store = Field.Store.COMPRESS;
252
int toRead = fieldsStream.ReadVInt();
254
byte[] b = new byte[toRead];
83
255
fieldsStream.ReadBytes(b, 0, b.Length);
85
doc.Add(new Field(fi.name, Uncompress(b), Field.Store.COMPRESS));
87
doc.Add(new Field(fi.name, b, Field.Store.YES));
92
Field.Store store = Field.Store.YES;
94
if (fi.isIndexed && tokenize)
95
index = Field.Index.TOKENIZED;
96
else if (fi.isIndexed && !tokenize)
97
index = Field.Index.UN_TOKENIZED;
99
index = Field.Index.NO;
101
Field.TermVector termVector = null;
102
if (fi.storeTermVector)
104
if (fi.storeOffsetWithTermVector)
106
if (fi.storePositionWithTermVector)
108
termVector = Field.TermVector.WITH_POSITIONS_OFFSETS;
112
termVector = Field.TermVector.WITH_OFFSETS;
115
else if (fi.storePositionWithTermVector)
117
termVector = Field.TermVector.WITH_POSITIONS;
121
termVector = Field.TermVector.YES;
126
termVector = Field.TermVector.NO;
131
store = Field.Store.COMPRESS;
132
byte[] b = new byte[fieldsStream.ReadVInt()];
133
fieldsStream.ReadBytes(b, 0, b.Length);
134
Field f = new Field(fi.name, System.Text.Encoding.GetEncoding("UTF-8").GetString(Uncompress(b)), store, index, termVector);
135
f.SetOmitNorms(fi.omitNorms);
140
Field f = new Field(fi.name, fieldsStream.ReadString(), store, index, termVector);
141
f.SetOmitNorms(fi.omitNorms);
150
public /*internal*/ Document Doc(int n, string[] fields)
152
if (fields == null || fields.Length == 0)
155
// FIXME: use Hashset
156
ArrayList field_list = new ArrayList (fields);
157
int num_required_fields = field_list.Count;
159
indexStream.Seek(n * 8L);
160
long position = indexStream.ReadLong();
161
fieldsStream.Seek(position);
163
Document doc = new Document();
164
int numFields = fieldsStream.ReadVInt();
165
for (int i = 0; i < numFields && num_required_fields > 0; i++)
167
int fieldNumber = fieldsStream.ReadVInt();
168
FieldInfo fi = fieldInfos.FieldInfo(fieldNumber);
169
if (field_list.Contains (fi.name)) {
170
num_required_fields --;
172
byte bits = fieldsStream.ReadByte();
174
bool compressed = (bits & FieldsWriter.FIELD_IS_COMPRESSED) != 0;
175
bool tokenize = (bits & FieldsWriter.FIELD_IS_TOKENIZED) != 0;
177
if ((bits & FieldsWriter.FIELD_IS_BINARY) != 0)
179
byte[] b = new byte[fieldsStream.ReadVInt()];
180
fieldsStream.ReadBytes(b, 0, b.Length);
182
doc.Add(new Field(fi.name, Uncompress(b), Field.Store.COMPRESS));
184
doc.Add(new Field(fi.name, b, Field.Store.YES));
189
Field.Store store = Field.Store.YES;
191
if (fi.isIndexed && tokenize)
192
index = Field.Index.TOKENIZED;
193
else if (fi.isIndexed && !tokenize)
194
index = Field.Index.UN_TOKENIZED;
196
index = Field.Index.NO;
198
Field.TermVector termVector = null;
199
if (fi.storeTermVector)
201
if (fi.storeOffsetWithTermVector)
203
if (fi.storePositionWithTermVector)
205
termVector = Field.TermVector.WITH_POSITIONS_OFFSETS;
209
termVector = Field.TermVector.WITH_OFFSETS;
212
else if (fi.storePositionWithTermVector)
214
termVector = Field.TermVector.WITH_POSITIONS;
218
termVector = Field.TermVector.YES;
223
termVector = Field.TermVector.NO;
228
store = Field.Store.COMPRESS;
229
byte[] b = new byte[fieldsStream.ReadVInt()];
230
fieldsStream.ReadBytes(b, 0, b.Length);
231
Field f = new Field(fi.name, System.Text.Encoding.GetEncoding("UTF-8").GetString(Uncompress(b)), store, index, termVector);
232
f.SetOmitNorms(fi.omitNorms);
237
Field f = new Field(fi.name, fieldsStream.ReadString(), store, index, termVector);
238
f.SetOmitNorms(fi.omitNorms);
243
byte bits = fieldsStream.ReadByte();
245
bool compressed = (bits & FieldsWriter.FIELD_IS_COMPRESSED) != 0;
246
bool tokenize = (bits & FieldsWriter.FIELD_IS_TOKENIZED) != 0;
248
if ((bits & FieldsWriter.FIELD_IS_BINARY) != 0)
250
//byte[] b = new byte[fieldsStream.ReadVInt()];
251
//fieldsStream.ReadBytes(b, 0, b.Length);
252
int length = fieldsStream.ReadVInt();
253
for (int j = 0; j < length; j++)
254
fieldsStream.ReadByte ();
260
//byte[] b = new byte[fieldsStream.ReadVInt()];
261
//fieldsStream.ReadBytes(b, 0, b.Length);
262
int length = fieldsStream.ReadVInt();
263
for (int j = 0; j < length; j++)
264
fieldsStream.ReadByte ();
268
//fieldsStream.ReadString ();
269
int length = fieldsStream.ReadVInt();
270
for (int j = 0; j < length; j++)
272
byte b = fieldsStream.ReadByte ();
275
else if ((b & 0xE0) != 0xE0) {
276
fieldsStream.ReadByte ();
278
fieldsStream.ReadByte ();
279
fieldsStream.ReadByte ();
256
f = new Field(fi.name, System.Text.Encoding.GetEncoding("UTF-8").GetString(Uncompress(b)), store, index, termVector);
257
f.SetOmitNorms(fi.omitNorms);
261
f = new Field(fi.name, fieldsStream.ReadString(), store, index, termVector);
262
f.SetOmitNorms(fi.omitNorms);
268
// Add the size of field as a byte[] containing the 4 bytes of the integer byte size (high order byte first; char = 2 bytes)
269
// Read just the size -- caller must skip the field content to continue reading fields
270
// Return the size in bytes or chars, depending on field type
271
private int AddFieldSize(Document doc, FieldInfo fi, bool binary, bool compressed)
273
int size = fieldsStream.ReadVInt(), bytesize = binary || compressed ? size : 2 * size;
274
byte[] sizebytes = new byte[4];
275
sizebytes[0] = (byte) (SupportClass.Number.URShift(bytesize, 24));
276
sizebytes[1] = (byte) (SupportClass.Number.URShift(bytesize, 16));
277
sizebytes[2] = (byte) (SupportClass.Number.URShift(bytesize, 8));
278
sizebytes[3] = (byte) bytesize;
279
doc.Add(new Field(fi.name, sizebytes, Field.Store.YES));
283
private Field.TermVector GetTermVectorType(FieldInfo fi)
285
Field.TermVector termVector = null;
286
if (fi.storeTermVector)
288
if (fi.storeOffsetWithTermVector)
290
if (fi.storePositionWithTermVector)
292
termVector = Field.TermVector.WITH_POSITIONS_OFFSETS;
296
termVector = Field.TermVector.WITH_OFFSETS;
299
else if (fi.storePositionWithTermVector)
301
termVector = Field.TermVector.WITH_POSITIONS;
305
termVector = Field.TermVector.YES;
310
termVector = Field.TermVector.NO;
315
private Field.Index GetIndexType(FieldInfo fi, bool tokenize)
318
if (fi.isIndexed && tokenize)
319
index = Field.Index.TOKENIZED;
320
else if (fi.isIndexed && !tokenize)
321
index = Field.Index.UN_TOKENIZED;
323
index = Field.Index.NO;
327
/// <summary> A Lazy implementation of Fieldable that differs loading of fields until asked for, instead of when the Document is
331
private class LazyField:AbstractField, Fieldable
333
private void InitBlock(FieldsReader enclosingInstance)
335
this.enclosingInstance = enclosingInstance;
337
private FieldsReader enclosingInstance;
338
public FieldsReader Enclosing_Instance
342
return enclosingInstance;
347
private long pointer;
349
public LazyField(FieldsReader enclosingInstance, System.String name, Field.Store store, int toRead, long pointer):base(name, store, Field.Index.NO, Field.TermVector.NO)
351
InitBlock(enclosingInstance);
352
this.toRead = toRead;
353
this.pointer = pointer;
357
public LazyField(FieldsReader enclosingInstance, System.String name, Field.Store store, Field.Index index, Field.TermVector termVector, int toRead, long pointer):base(name, store, index, termVector)
359
InitBlock(enclosingInstance);
360
this.toRead = toRead;
361
this.pointer = pointer;
365
private IndexInput GetFieldStream()
367
IndexInput localFieldsStream = (IndexInput) System.Threading.Thread.GetData(Enclosing_Instance.fieldsStreamTL);
368
if (localFieldsStream == null)
370
localFieldsStream = (IndexInput) Enclosing_Instance.cloneableFieldsStream.Clone();
371
System.Threading.Thread.SetData(Enclosing_Instance.fieldsStreamTL, localFieldsStream);
373
return localFieldsStream;
376
/// <summary> The value of the field in Binary, or null. If null, the Reader or
377
/// String value is used. Exactly one of stringValue(), readerValue() and
378
/// binaryValue() must be set.
380
public override byte[] BinaryValue()
382
if (fieldsData == null)
384
byte[] b = new byte[toRead];
385
IndexInput localFieldsStream = GetFieldStream();
386
//Throw this IO Exception since IndexREader.document does so anyway, so probably not that big of a change for people
387
//since they are already handling this exception when getting the document
390
localFieldsStream.Seek(pointer);
391
localFieldsStream.ReadBytes(b, 0, b.Length);
392
if (isCompressed == true)
394
fieldsData = Enclosing_Instance.Uncompress(b);
401
catch (System.IO.IOException e)
403
throw new FieldReaderException(e);
406
return fieldsData is byte[] ? (byte[]) fieldsData : null;
409
/// <summary> The value of the field as a Reader, or null. If null, the String value
410
/// or binary value is used. Exactly one of stringValue(), readerValue(),
411
/// and binaryValue() must be set.
413
public override System.IO.TextReader ReaderValue()
415
return fieldsData is System.IO.TextReader ? (System.IO.TextReader) fieldsData : null;
418
/// <summary> The value of the field as a String, or null. If null, the Reader value
419
/// or binary value is used. Exactly one of stringValue(), readerValue(), and
420
/// binaryValue() must be set.
422
public override System.String StringValue()
424
if (fieldsData == null)
426
IndexInput localFieldsStream = GetFieldStream();
429
localFieldsStream.Seek(pointer);
432
byte[] b = new byte[toRead];
433
localFieldsStream.ReadBytes(b, 0, b.Length);
434
fieldsData = System.Text.Encoding.GetEncoding("UTF-8").GetString(Enclosing_Instance.Uncompress(b));
438
//read in chars b/c we already know the length we need to read
439
char[] chars = new char[toRead];
440
localFieldsStream.ReadChars(chars, 0, toRead);
441
fieldsData = new System.String(chars);
444
catch (System.IO.IOException e)
446
throw new FieldReaderException(e);
449
return fieldsData is System.String ? (System.String) fieldsData : null;
452
public long GetPointer()
457
public void SetPointer(long pointer)
459
this.pointer = pointer;
462
public int GetToRead()
467
public void SetToRead(int toRead)
469
this.toRead = toRead;
290
473
private byte[] Uncompress(byte[] input)
292
return SupportClass.CompressionSupport.Uncompress(input);
475
return SupportClass.CompressionSupport.Uncompress(input);
478
// Instances of this class hold field properties and data
481
public sealed class FieldForMerge : AbstractField
483
public override System.String StringValue()
485
return (System.String) this.fieldsData;
488
public override System.IO.TextReader ReaderValue()
490
// not needed for merge
494
public override byte[] BinaryValue()
496
return (byte[]) this.fieldsData;
499
public FieldForMerge(System.Object value_Renamed, FieldInfo fi, bool binary, bool compressed, bool tokenize)
501
this.isStored = true;
502
this.fieldsData = value_Renamed;
503
this.isCompressed = compressed;
504
this.isBinary = binary;
505
this.isTokenized = tokenize;
507
this.name = String.Intern(fi.name);
508
this.isIndexed = fi.isIndexed;
509
this.omitNorms = fi.omitNorms;
510
this.storeOffsetWithTermVector = fi.storeOffsetWithTermVector;
511
this.storePositionWithTermVector = fi.storePositionWithTermVector;
512
this.storeTermVector = fi.storeTermVector;
b'\\ No newline at end of file'