1
package org.apache.lucene.index;
4
* Licensed to the Apache Software Foundation (ASF) under one or more
5
* contributor license agreements. See the NOTICE file distributed with
6
* this work for additional information regarding copyright ownership.
7
* The ASF licenses this file to You under the Apache License, Version 2.0
8
* (the "License"); you may not use this file except in compliance with
9
* the License. You may obtain a copy of the License at
11
* http://www.apache.org/licenses/LICENSE-2.0
13
* Unless required by applicable law or agreed to in writing, software
14
* distributed under the License is distributed on an "AS IS" BASIS,
15
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16
* See the License for the specific language governing permissions and
17
* limitations under the License.
20
import org.apache.lucene.store.IndexOutput;
21
import org.apache.lucene.store.RAMOutputStream;
22
import org.apache.lucene.util.ArrayUtil;
23
import org.apache.lucene.util.IOUtils;
24
import org.apache.lucene.util.RamUsageEstimator;
26
import java.io.IOException;
27
import java.util.Collection;
31
final class TermVectorsTermsWriter extends TermsHashConsumer {
33
final DocumentsWriter docWriter;
34
PerDoc[] docFreeList = new PerDoc[1];
42
public TermVectorsTermsWriter(DocumentsWriter docWriter) {
43
this.docWriter = docWriter;
47
public TermsHashConsumerPerThread addThread(TermsHashPerThread termsHashPerThread) {
48
return new TermVectorsTermsWriterPerThread(termsHashPerThread, this);
52
synchronized void flush(Map<TermsHashConsumerPerThread,Collection<TermsHashConsumerPerField>> threadsAndFields, final SegmentWriteState state) throws IOException {
54
// At least one doc in this run had term vectors enabled
56
IOUtils.close(tvx, tvf, tvd);
57
tvx = tvd = tvf = null;
58
assert state.segmentName != null;
59
String idxName = IndexFileNames.segmentFileName(state.segmentName, IndexFileNames.VECTORS_INDEX_EXTENSION);
60
if (4 + ((long) state.numDocs) * 16 != state.directory.fileLength(idxName)) {
61
throw new RuntimeException("after flush: tvx size mismatch: " + state.numDocs + " docs vs " + state.directory.fileLength(idxName) + " length in bytes of " + idxName + " file exists?=" + state.directory.fileExists(idxName));
65
state.hasVectors = hasVectors;
69
for (Map.Entry<TermsHashConsumerPerThread,Collection<TermsHashConsumerPerField>> entry : threadsAndFields.entrySet()) {
70
for (final TermsHashConsumerPerField field : entry.getValue() ) {
71
TermVectorsTermsWriterPerField perField = (TermVectorsTermsWriterPerField) field;
72
perField.termsHashPerField.reset();
73
perField.shrinkHash();
76
TermVectorsTermsWriterPerThread perThread = (TermVectorsTermsWriterPerThread) entry.getKey();
77
perThread.termsHashPerThread.reset(true);
83
synchronized PerDoc getPerDoc() {
86
if (allocCount > docFreeList.length) {
87
// Grow our free list up front to make sure we have
88
// enough space to recycle all outstanding PerDoc
90
assert allocCount == 1+docFreeList.length;
91
docFreeList = new PerDoc[ArrayUtil.oversize(allocCount, RamUsageEstimator.NUM_BYTES_OBJECT_REF)];
95
return docFreeList[--freeCount];
99
/** Fills in no-term-vectors for all docs we haven't seen
100
* since the last doc that had term vectors. */
101
void fill(int docID) throws IOException {
102
if (lastDocID < docID) {
103
final long tvfPosition = tvf.getFilePointer();
104
while(lastDocID < docID) {
105
tvx.writeLong(tvd.getFilePointer());
107
tvx.writeLong(tvfPosition);
113
synchronized void initTermVectorsWriter() throws IOException {
115
boolean success = false;
117
// If we hit an exception while init'ing the term
118
// vector output files, we must abort this segment
119
// because those files will be in an unknown
122
tvx = docWriter.directory.createOutput(IndexFileNames.segmentFileName(docWriter.getSegment(), IndexFileNames.VECTORS_INDEX_EXTENSION));
123
tvd = docWriter.directory.createOutput(IndexFileNames.segmentFileName(docWriter.getSegment(), IndexFileNames.VECTORS_DOCUMENTS_EXTENSION));
124
tvf = docWriter.directory.createOutput(IndexFileNames.segmentFileName(docWriter.getSegment(), IndexFileNames.VECTORS_FIELDS_EXTENSION));
126
tvx.writeInt(TermVectorsReader.FORMAT_CURRENT);
127
tvd.writeInt(TermVectorsReader.FORMAT_CURRENT);
128
tvf.writeInt(TermVectorsReader.FORMAT_CURRENT);
132
IOUtils.closeWhileHandlingException(tvx, tvd, tvf);
139
synchronized void finishDocument(PerDoc perDoc) throws IOException {
141
assert docWriter.writer.testPoint("TermVectorsTermsWriter.finishDocument start");
143
initTermVectorsWriter();
147
// Append term vectors to the real outputs:
148
tvx.writeLong(tvd.getFilePointer());
149
tvx.writeLong(tvf.getFilePointer());
150
tvd.writeVInt(perDoc.numVectorFields);
151
if (perDoc.numVectorFields > 0) {
152
for(int i=0;i<perDoc.numVectorFields;i++) {
153
tvd.writeVInt(perDoc.fieldNumbers[i]);
155
assert 0 == perDoc.fieldPointers[0];
156
long lastPos = perDoc.fieldPointers[0];
157
for(int i=1;i<perDoc.numVectorFields;i++) {
158
long pos = perDoc.fieldPointers[i];
159
tvd.writeVLong(pos-lastPos);
162
perDoc.perDocTvf.writeTo(tvf);
163
perDoc.numVectorFields = 0;
166
assert lastDocID == perDoc.docID: "lastDocID=" + lastDocID + " perDoc.docID=" + perDoc.docID;
172
assert docWriter.writer.testPoint("TermVectorsTermsWriter.finishDocument end");
176
public void abort() {
179
IOUtils.closeWhileHandlingException(tvx, tvd, tvf);
180
} catch (IOException e) {
181
// cannot happen since we suppress exceptions
182
throw new RuntimeException(e);
186
docWriter.directory.deleteFile(IndexFileNames.segmentFileName(docWriter.getSegment(), IndexFileNames.VECTORS_INDEX_EXTENSION));
187
} catch (IOException ignored) {
191
docWriter.directory.deleteFile(IndexFileNames.segmentFileName(docWriter.getSegment(), IndexFileNames.VECTORS_DOCUMENTS_EXTENSION));
192
} catch (IOException ignored) {
196
docWriter.directory.deleteFile(IndexFileNames.segmentFileName(docWriter.getSegment(), IndexFileNames.VECTORS_FIELDS_EXTENSION));
197
} catch (IOException ignored) {
200
tvx = tvd = tvf = null;
204
synchronized void free(PerDoc doc) {
205
assert freeCount < docFreeList.length;
206
docFreeList[freeCount++] = doc;
209
class PerDoc extends DocumentsWriter.DocWriter {
211
final DocumentsWriter.PerDocBuffer buffer = docWriter.newPerDocBuffer();
212
RAMOutputStream perDocTvf = new RAMOutputStream(buffer);
216
int[] fieldNumbers = new int[1];
217
long[] fieldPointers = new long[1];
231
void addField(final int fieldNumber) {
232
if (numVectorFields == fieldNumbers.length) {
233
fieldNumbers = ArrayUtil.grow(fieldNumbers);
235
if (numVectorFields == fieldPointers.length) {
236
fieldPointers = ArrayUtil.grow(fieldPointers);
238
fieldNumbers[numVectorFields] = fieldNumber;
239
fieldPointers[numVectorFields] = perDocTvf.getFilePointer();
244
public long sizeInBytes() {
245
return buffer.getSizeInBytes();
249
public void finish() throws IOException {
250
finishDocument(this);