1
package org.apache.lucene.index;
4
* Licensed to the Apache Software Foundation (ASF) under one or more
5
* contributor license agreements. See the NOTICE file distributed with
6
* this work for additional information regarding copyright ownership.
7
* The ASF licenses this file to You under the Apache License, Version 2.0
8
* (the "License"); you may not use this file except in compliance with
9
* the License. You may obtain a copy of the License at
11
* http://www.apache.org/licenses/LICENSE-2.0
13
* Unless required by applicable law or agreed to in writing, software
14
* distributed under the License is distributed on an "AS IS" BASIS,
15
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16
* See the License for the specific language governing permissions and
17
* limitations under the License.
20
import java.io.Closeable;
21
import java.io.IOException;
22
import java.io.PrintStream;
23
import java.util.ArrayList;
24
import java.util.Collection;
25
import java.util.Collections;
26
import java.util.Comparator;
27
import java.util.Date;
28
import java.util.HashMap;
29
import java.util.HashSet;
30
import java.util.LinkedList;
31
import java.util.List;
34
import java.util.concurrent.atomic.AtomicInteger;
35
import java.util.concurrent.ConcurrentHashMap;
37
import org.apache.lucene.analysis.Analyzer;
38
import org.apache.lucene.analysis.LimitTokenCountAnalyzer;
39
import org.apache.lucene.document.Document;
40
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
41
import org.apache.lucene.index.PayloadProcessorProvider.DirPayloadProcessor;
42
import org.apache.lucene.search.Similarity;
43
import org.apache.lucene.search.Query;
44
import org.apache.lucene.store.AlreadyClosedException;
45
import org.apache.lucene.store.BufferedIndexInput;
46
import org.apache.lucene.store.Directory;
47
import org.apache.lucene.store.Lock;
48
import org.apache.lucene.store.LockObtainFailedException;
49
import org.apache.lucene.util.Constants;
50
import org.apache.lucene.util.StringHelper;
51
import org.apache.lucene.util.ThreadInterruptedException;
52
import org.apache.lucene.util.Version;
53
import org.apache.lucene.util.MapBackedSet;
54
import org.apache.lucene.util.TwoPhaseCommit;
57
An <code>IndexWriter</code> creates and maintains an index.
59
<p>The <code>create</code> argument to the {@link
60
#IndexWriter(Directory, Analyzer, boolean, MaxFieldLength) constructor} determines
61
whether a new index is created, or whether an existing index is
62
opened. Note that you can open an index with <code>create=true</code>
63
even while readers are using the index. The old readers will
64
continue to search the "point in time" snapshot they had opened,
65
and won't see the newly created index until they re-open. There are
66
also {@link #IndexWriter(Directory, Analyzer, MaxFieldLength) constructors}
67
with no <code>create</code> argument which will create a new index
68
if there is not already an index at the provided path and otherwise
69
open the existing index.</p>
71
<p>In either case, documents are added with {@link #addDocument(Document)
72
addDocument} and removed with {@link #deleteDocuments(Term)} or {@link
73
#deleteDocuments(Query)}. A document can be updated with {@link
74
#updateDocument(Term, Document) updateDocument} (which just deletes
75
and then adds the entire document). When finished adding, deleting
76
and updating documents, {@link #close() close} should be called.</p>
79
<p>These changes are buffered in memory and periodically
80
flushed to the {@link Directory} (during the above method
81
calls). A flush is triggered when there are enough
82
buffered deletes (see {@link #setMaxBufferedDeleteTerms})
83
or enough added documents since the last flush, whichever
84
is sooner. For the added documents, flushing is triggered
85
either by RAM usage of the documents (see {@link
86
#setRAMBufferSizeMB}) or the number of added documents.
87
The default is to flush when RAM usage hits 16 MB. For
88
best indexing speed you should flush by RAM usage with a
89
large RAM buffer. Note that flushing just moves the
90
internal buffered state in IndexWriter into the index, but
91
these changes are not visible to IndexReader until either
92
{@link #commit()} or {@link #close} is called. A flush may
93
also trigger one or more segment merges which by default
94
run with a background thread so as not to block the
95
addDocument calls (see <a href="#mergePolicy">below</a>
96
for changing the {@link MergeScheduler}).</p>
98
<p>Opening an <code>IndexWriter</code> creates a lock file for the directory in use. Trying to open
99
another <code>IndexWriter</code> on the same directory will lead to a
100
{@link LockObtainFailedException}. The {@link LockObtainFailedException}
101
is also thrown if an IndexReader on the same directory is used to delete documents
104
<a name="deletionPolicy"></a>
105
<p>Expert: <code>IndexWriter</code> allows an optional
106
{@link IndexDeletionPolicy} implementation to be
107
specified. You can use this to control when prior commits
108
are deleted from the index. The default policy is {@link
109
KeepOnlyLastCommitDeletionPolicy} which removes all prior
110
commits as soon as a new commit is done (this matches
111
behavior before 2.2). Creating your own policy can allow
112
you to explicitly keep previous "point in time" commits
113
alive in the index for some time, to allow readers to
114
refresh to the new commit without having the old commit
115
deleted out from under them. This is necessary on
116
filesystems like NFS that do not support "delete on last
117
close" semantics, which Lucene's "point in time" search
118
normally relies on. </p>
120
<a name="mergePolicy"></a> <p>Expert:
121
<code>IndexWriter</code> allows you to separately change
122
the {@link MergePolicy} and the {@link MergeScheduler}.
123
The {@link MergePolicy} is invoked whenever there are
124
changes to the segments in the index. Its role is to
125
select which merges to do, if any, and return a {@link
126
MergePolicy.MergeSpecification} describing the merges.
127
The default is {@link LogByteSizeMergePolicy}. Then, the {@link
128
MergeScheduler} is invoked with the requested merges and
129
it decides when and how to run the merges. The default is
130
{@link ConcurrentMergeScheduler}. </p>
132
<a name="OOME"></a><p><b>NOTE</b>: if you hit an
133
OutOfMemoryError then IndexWriter will quietly record this
134
fact and block all future segment commits. This is a
135
defensive measure in case any internal state (buffered
136
documents and deletions) were corrupted. Any subsequent
137
calls to {@link #commit()} will throw an
138
IllegalStateException. The only course of action is to
139
call {@link #close()}, which internally will call {@link
140
#rollback()}, to undo any changes to the index since the
141
last commit. You can also just call {@link #rollback()}
144
<a name="thread-safety"></a><p><b>NOTE</b>: {@link
145
IndexWriter} instances are completely thread
146
safe, meaning multiple threads can call any of its
147
methods, concurrently. If your application requires
148
external synchronization, you should <b>not</b>
149
synchronize on the <code>IndexWriter</code> instance as
150
this may cause deadlock; use your own (non-Lucene) objects
153
<p><b>NOTE</b>: If you call
154
<code>Thread.interrupt()</code> on a thread that's within
155
IndexWriter, IndexWriter will try to catch this (eg, if
156
it's in a wait() or Thread.sleep()), and will then throw
157
the unchecked exception {@link ThreadInterruptedException}
158
and <b>clear</b> the interrupt status on the thread.</p>
162
* Clarification: Check Points (and commits)
163
* IndexWriter writes new index files to the directory without writing a new segments_N
164
* file which references these new files. It also means that the state of
165
* the in memory SegmentInfos object is different than the most recent
166
* segments_N file written to the directory.
168
* Each time the SegmentInfos is changed, and matches the (possibly
169
* modified) directory files, we have a new "check point".
170
* If the modified/new SegmentInfos is written to disk - as a new
171
* (generation of) segments_N file - this check point is also an
174
* A new checkpoint always replaces the previous checkpoint and
175
* becomes the new "front" of the index. This allows the IndexFileDeleter
176
* to delete files that are referenced only by stale checkpoints.
177
* (files that were created since the last commit, but are no longer
178
* referenced by the "front" of the index). For this, IndexFileDeleter
179
* keeps track of the last non commit checkpoint.
181
public class IndexWriter implements Closeable, TwoPhaseCommit {
184
* Default value for the write lock timeout (1,000).
185
* @see #setDefaultWriteLockTimeout
186
* @deprecated use {@link IndexWriterConfig#WRITE_LOCK_TIMEOUT} instead
189
public static long WRITE_LOCK_TIMEOUT = IndexWriterConfig.WRITE_LOCK_TIMEOUT;
191
private long writeLockTimeout;
194
* Name of the write lock in the index.
196
public static final String WRITE_LOCK_NAME = "write.lock";
199
* Value to denote a flush trigger is disabled
200
* @deprecated use {@link IndexWriterConfig#DISABLE_AUTO_FLUSH} instead
203
public final static int DISABLE_AUTO_FLUSH = IndexWriterConfig.DISABLE_AUTO_FLUSH;
206
* Disabled by default (because IndexWriter flushes by RAM usage
207
* by default). Change using {@link #setMaxBufferedDocs(int)}.
208
* @deprecated use {@link IndexWriterConfig#DEFAULT_MAX_BUFFERED_DOCS} instead.
211
public final static int DEFAULT_MAX_BUFFERED_DOCS = IndexWriterConfig.DEFAULT_MAX_BUFFERED_DOCS;
214
* Default value is 16 MB (which means flush when buffered
215
* docs consume 16 MB RAM). Change using {@link #setRAMBufferSizeMB}.
216
* @deprecated use {@link IndexWriterConfig#DEFAULT_RAM_BUFFER_SIZE_MB} instead.
219
public final static double DEFAULT_RAM_BUFFER_SIZE_MB = IndexWriterConfig.DEFAULT_RAM_BUFFER_SIZE_MB;
222
* Disabled by default (because IndexWriter flushes by RAM usage
223
* by default). Change using {@link #setMaxBufferedDeleteTerms(int)}.
224
* @deprecated use {@link IndexWriterConfig#DEFAULT_MAX_BUFFERED_DELETE_TERMS} instead
227
public final static int DEFAULT_MAX_BUFFERED_DELETE_TERMS = IndexWriterConfig.DEFAULT_MAX_BUFFERED_DELETE_TERMS;
230
* Default value is 10,000. Change using {@link #setMaxFieldLength(int)}.
232
* @deprecated see {@link IndexWriterConfig}
235
public final static int DEFAULT_MAX_FIELD_LENGTH = MaxFieldLength.UNLIMITED.getLimit();
238
* Default value is 128. Change using {@link #setTermIndexInterval(int)}.
239
* @deprecated use {@link IndexWriterConfig#DEFAULT_TERM_INDEX_INTERVAL} instead.
242
public final static int DEFAULT_TERM_INDEX_INTERVAL = IndexWriterConfig.DEFAULT_TERM_INDEX_INTERVAL;
245
* Absolute hard maximum length for a term. If a term
246
* arrives from the analyzer longer than this length, it
247
* is skipped and a message is printed to infoStream, if
248
* set (see {@link #setInfoStream}).
250
public final static int MAX_TERM_LENGTH = DocumentsWriter.MAX_TERM_LENGTH;
252
// The normal read buffer size defaults to 1024, but
253
// increasing this during merging seems to yield
254
// performance gains. However we don't want to increase
255
// it too much because there are quite a few
256
// BufferedIndexInputs created during merging. See
257
// LUCENE-888 for details.
258
private final static int MERGE_READ_BUFFER_SIZE = 4096;
260
// Used for printing messages
261
private static final AtomicInteger MESSAGE_ID = new AtomicInteger();
262
private int messageID = MESSAGE_ID.getAndIncrement();
263
volatile private boolean hitOOM;
265
private final Directory directory; // where this index resides
266
private final Analyzer analyzer; // how to analyze text
268
// TODO 4.0: this should be made final once the setter is out
269
private /*final*/Similarity similarity = Similarity.getDefault(); // how to normalize
271
private volatile long changeCount; // increments every time a change is completed
272
private long lastCommitChangeCount; // last changeCount that was committed
274
private List<SegmentInfo> rollbackSegments; // list of segmentInfo we will fallback to if the commit fails
276
volatile SegmentInfos pendingCommit; // set when a commit is pending (after prepareCommit() & before commit())
277
volatile long pendingCommitChangeCount;
279
final SegmentInfos segmentInfos = new SegmentInfos(); // the segments
281
private DocumentsWriter docWriter;
282
private IndexFileDeleter deleter;
284
// used by forceMerge to note those needing merging
285
private Map<SegmentInfo,Boolean> segmentsToMerge = new HashMap<SegmentInfo,Boolean>();
286
private int mergeMaxNumSegments;
288
private Lock writeLock;
290
private volatile boolean closed;
291
private volatile boolean closing;
293
// Holds all SegmentInfo instances currently involved in
295
private HashSet<SegmentInfo> mergingSegments = new HashSet<SegmentInfo>();
297
private MergePolicy mergePolicy;
298
// TODO 4.0: this should be made final once the setter is removed
299
private /*final*/MergeScheduler mergeScheduler;
300
private LinkedList<MergePolicy.OneMerge> pendingMerges = new LinkedList<MergePolicy.OneMerge>();
301
private Set<MergePolicy.OneMerge> runningMerges = new HashSet<MergePolicy.OneMerge>();
302
private List<MergePolicy.OneMerge> mergeExceptions = new ArrayList<MergePolicy.OneMerge>();
303
private long mergeGen;
304
private boolean stopMerges;
306
private final AtomicInteger flushCount = new AtomicInteger();
307
private final AtomicInteger flushDeletesCount = new AtomicInteger();
309
final ReaderPool readerPool = new ReaderPool();
310
final BufferedDeletesStream bufferedDeletesStream;
312
// This is a "write once" variable (like the organic dye
313
// on a DVD-R that may or may not be heated by a laser and
314
// then cooled to permanently record the event): it's
315
// false, until getReader() is called for the first time,
316
// at which point it's switched to true and never changes
317
// back to false. Once this is true, we hold open and
318
// reuse SegmentReader instances internally for applying
319
// deletes, doing merges, and reopening near real-time
321
private volatile boolean poolReaders;
323
// The instance that was passed to the constructor. It is saved only in order
324
// to allow users to query an IndexWriter settings.
325
private final IndexWriterConfig config;
327
// The PayloadProcessorProvider to use when segments are merged
328
private PayloadProcessorProvider payloadProcessorProvider;
331
boolean anyNonBulkMerges;
334
* Expert: returns a readonly reader, covering all
335
* committed as well as un-committed changes to the index.
336
* This provides "near real-time" searching, in that
337
* changes made during an IndexWriter session can be
338
* quickly made available for searching without closing
339
* the writer nor calling {@link #commit}.
341
* <p>Note that this is functionally equivalent to calling
342
* {#flush} and then using {@link IndexReader#open} to
343
* open a new reader. But the turarnound time of this
344
* method should be faster since it avoids the potentially
345
* costly {@link #commit}.</p>
347
* <p>You must close the {@link IndexReader} returned by
348
* this method once you are done using it.</p>
350
* <p>It's <i>near</i> real-time because there is no hard
351
* guarantee on how quickly you can get a new reader after
352
* making changes with IndexWriter. You'll have to
353
* experiment in your situation to determine if it's
354
* fast enough. As this is a new and experimental
355
* feature, please report back on your findings so we can
356
* learn, improve and iterate.</p>
358
* <p>The resulting reader supports {@link
359
* IndexReader#reopen}, but that call will simply forward
360
* back to this method (though this may change in the
363
* <p>The very first time this method is called, this
364
* writer instance will make every effort to pool the
365
* readers that it opens for doing merges, applying
366
* deletes, etc. This means additional resources (RAM,
367
* file descriptors, CPU time) will be consumed.</p>
369
* <p>For lower latency on reopening a reader, you should
370
* call {@link #setMergedSegmentWarmer} to
371
* pre-warm a newly merged segment before it's committed
372
* to the index. This is important for minimizing
373
* index-to-search delay after a large merge. </p>
375
* <p>If an addIndexes* call is running in another thread,
376
* then this reader will only search those segments from
377
* the foreign index that have been successfully copied
380
* <p><b>NOTE</b>: Once the writer is closed, any
381
* outstanding readers may continue to be used. However,
382
* if you attempt to reopen any of those readers, you'll
383
* hit an {@link AlreadyClosedException}.</p>
385
* @lucene.experimental
387
* @return IndexReader that covers entire index plus all
388
* changes made so far by this IndexWriter instance
390
* @deprecated Please use {@link
391
* IndexReader#open(IndexWriter,boolean)} instead.
393
* @throws IOException
396
public IndexReader getReader() throws IOException {
397
return getReader(config.getReaderTermsIndexDivisor(), true);
400
IndexReader getReader(boolean applyAllDeletes) throws IOException {
401
return getReader(config.getReaderTermsIndexDivisor(), applyAllDeletes);
404
/** Expert: like {@link #getReader}, except you can
405
* specify which termInfosIndexDivisor should be used for
406
* any newly opened readers.
407
* @param termInfosIndexDivisor Subsamples which indexed
408
* terms are loaded into RAM. This has the same effect as {@link
409
* IndexWriter#setTermIndexInterval} except that setting
410
* must be done at indexing time while this setting can be
411
* set per reader. When set to N, then one in every
412
* N*termIndexInterval terms in the index is loaded into
413
* memory. By setting this to a value > 1 you can reduce
414
* memory usage, at the expense of higher latency when
415
* loading a TermInfo. The default value is 1. Set this
416
* to -1 to skip loading the terms index entirely.
418
* @deprecated Please use {@link
419
* IndexReader#open(IndexWriter,boolean)} instead. Furthermore,
420
* this method cannot guarantee the reader (and its
421
* sub-readers) will be opened with the
422
* termInfosIndexDivisor setting because some of them may
423
* have already been opened according to {@link
424
* IndexWriterConfig#setReaderTermsIndexDivisor}. You
425
* should set the requested termInfosIndexDivisor through
426
* {@link IndexWriterConfig#setReaderTermsIndexDivisor} and use
427
* {@link #getReader()}. */
429
public IndexReader getReader(int termInfosIndexDivisor) throws IOException {
430
return getReader(termInfosIndexDivisor, true);
433
IndexReader getReader(int termInfosIndexDivisor, boolean applyAllDeletes) throws IOException {
436
final long tStart = System.currentTimeMillis();
438
if (infoStream != null) {
439
message("flush at getReader");
442
// Do this up front before flushing so that the readers
443
// obtained during this flush are pooled, the first time
444
// this method is called:
447
// Prevent segmentInfos from changing while opening the
448
// reader; in theory we could do similar retry logic,
449
// just like we do when loading segments_N
452
flush(false, applyAllDeletes);
453
r = new ReadOnlyDirectoryReader(this, segmentInfos, termInfosIndexDivisor, applyAllDeletes);
454
if (infoStream != null) {
455
message("return reader version=" + r.getVersion() + " reader=" + r);
461
if (infoStream != null) {
462
message("getReader took " + (System.currentTimeMillis() - tStart) + " msec");
467
// Used for all SegmentReaders we open
468
private final Collection<IndexReader.ReaderFinishedListener> readerFinishedListeners = new MapBackedSet<IndexReader.ReaderFinishedListener>(new ConcurrentHashMap<IndexReader.ReaderFinishedListener,Boolean>());
470
Collection<IndexReader.ReaderFinishedListener> getReaderFinishedListeners() throws IOException {
471
return readerFinishedListeners;
474
/** Holds shared SegmentReader instances. IndexWriter uses
475
* SegmentReaders for 1) applying deletes, 2) doing
476
* merges, 3) handing out a real-time reader. This pool
477
* reuses instances of the SegmentReaders in all these
478
* places if it is in "near real-time mode" (getReader()
479
* has been called on this instance). */
483
private final Map<SegmentInfo,SegmentReader> readerMap = new HashMap<SegmentInfo,SegmentReader>();
485
/** Forcefully clear changes for the specified segments. This is called on successful merge. */
486
synchronized void clear(List<SegmentInfo> infos) throws IOException {
488
for (Map.Entry<SegmentInfo,SegmentReader> ent: readerMap.entrySet()) {
489
ent.getValue().hasChanges = false;
492
for (final SegmentInfo info: infos) {
493
final SegmentReader r = readerMap.get(info);
495
r.hasChanges = false;
501
// used only by asserts
502
public synchronized boolean infoIsLive(SegmentInfo info) {
503
int idx = segmentInfos.indexOf(info);
504
assert idx != -1: "info=" + info + " isn't in pool";
505
assert segmentInfos.info(idx) == info: "info=" + info + " doesn't match live info in segmentInfos";
509
public synchronized SegmentInfo mapToLive(SegmentInfo info) {
510
int idx = segmentInfos.indexOf(info);
512
info = segmentInfos.info(idx);
518
* Release the segment reader (i.e. decRef it and close if there
519
* are no more references.
520
* @return true if this release altered the index (eg
521
* the SegmentReader had pending changes to del docs and
522
* was closed). Caller must call checkpoint() if so.
524
* @throws IOException
526
public synchronized boolean release(SegmentReader sr) throws IOException {
527
return release(sr, false);
531
* Release the segment reader (i.e. decRef it and close if there
532
* are no more references.
533
* @return true if this release altered the index (eg
534
* the SegmentReader had pending changes to del docs and
535
* was closed). Caller must call checkpoint() if so.
537
* @throws IOException
539
public synchronized boolean release(SegmentReader sr, boolean drop) throws IOException {
541
final boolean pooled = readerMap.containsKey(sr.getSegmentInfo());
543
assert !pooled || readerMap.get(sr.getSegmentInfo()) == sr;
545
// Drop caller's ref; for an external reader (not
546
// pooled), this decRef will close it
549
if (pooled && (drop || (!poolReaders && sr.getRefCount() == 1))) {
551
// We invoke deleter.checkpoint below, so we must be
552
// sync'd on IW if there are changes:
553
assert !sr.hasChanges || Thread.holdsLock(IndexWriter.this);
555
// Discard (don't save) changes when we are dropping
556
// the reader; this is used only on the sub-readers
557
// after a successful merge.
558
sr.hasChanges &= !drop;
560
final boolean hasChanges = sr.hasChanges;
562
// Drop our ref -- this will commit any pending
563
// changes to the dir
566
// We are the last ref to this reader; since we're
567
// not pooling readers, we release it:
568
readerMap.remove(sr.getSegmentInfo());
576
public synchronized void drop(List<SegmentInfo> infos) throws IOException {
577
for(SegmentInfo info : infos) {
582
public synchronized void drop(SegmentInfo info) throws IOException {
583
final SegmentReader sr = readerMap.get(info);
585
sr.hasChanges = false;
586
readerMap.remove(info);
591
public synchronized void dropAll() throws IOException {
592
for(SegmentReader reader : readerMap.values()) {
593
reader.hasChanges = false;
595
// NOTE: it is allowed that this decRef does not
596
// actually close the SR; this can happen when a
597
// near real-time reader using this SR is still open
603
/** Remove all our references to readers, and commits
604
* any pending changes. */
605
synchronized void close() throws IOException {
606
// We invoke deleter.checkpoint below, so we must be
608
assert Thread.holdsLock(IndexWriter.this);
610
for(Map.Entry<SegmentInfo,SegmentReader> ent : readerMap.entrySet()) {
612
SegmentReader sr = ent.getValue();
614
assert infoIsLive(sr.getSegmentInfo());
617
// Must checkpoint w/ deleter, because this
618
// segment reader will have created new _X_N.del
620
deleter.checkpoint(segmentInfos, false);
623
// NOTE: it is allowed that this decRef does not
624
// actually close the SR; this can happen when a
625
// near real-time reader is kept open after the
626
// IndexWriter instance is closed
634
* Commit all segment reader in the pool.
635
* @throws IOException
637
synchronized void commit(SegmentInfos infos) throws IOException {
639
// We invoke deleter.checkpoint below, so we must be
641
assert Thread.holdsLock(IndexWriter.this);
643
for (SegmentInfo info : infos) {
645
final SegmentReader sr = readerMap.get(info);
646
if (sr != null && sr.hasChanges) {
647
assert infoIsLive(info);
649
// Must checkpoint w/ deleter, because this
650
// segment reader will have created new _X_N.del
652
deleter.checkpoint(segmentInfos, false);
658
* Returns a ref to a clone. NOTE: this clone is not
659
* enrolled in the pool, so you should simply close()
660
* it when you're done (ie, do not call release()).
662
public synchronized SegmentReader getReadOnlyClone(SegmentInfo info, boolean doOpenStores, int termInfosIndexDivisor) throws IOException {
663
SegmentReader sr = get(info, doOpenStores, BufferedIndexInput.BUFFER_SIZE, termInfosIndexDivisor);
665
return (SegmentReader) sr.clone(true);
672
* Obtain a SegmentReader from the readerPool. The reader
673
* must be returned by calling {@link #release(SegmentReader)}
674
* @see #release(SegmentReader)
676
* @param doOpenStores
677
* @throws IOException
679
public synchronized SegmentReader get(SegmentInfo info, boolean doOpenStores) throws IOException {
680
return get(info, doOpenStores, BufferedIndexInput.BUFFER_SIZE, config.getReaderTermsIndexDivisor());
684
* Obtain a SegmentReader from the readerPool. The reader
685
* must be returned by calling {@link #release(SegmentReader)}
687
* @see #release(SegmentReader)
689
* @param doOpenStores
690
* @param readBufferSize
691
* @param termsIndexDivisor
692
* @throws IOException
694
public synchronized SegmentReader get(SegmentInfo info, boolean doOpenStores, int readBufferSize, int termsIndexDivisor) throws IOException {
697
readBufferSize = BufferedIndexInput.BUFFER_SIZE;
700
SegmentReader sr = readerMap.get(info);
702
// TODO: we may want to avoid doing this while
704
// Returns a ref, which we xfer to readerMap:
705
sr = SegmentReader.get(false, info.dir, info, readBufferSize, doOpenStores, termsIndexDivisor);
706
sr.readerFinishedListeners = readerFinishedListeners;
708
if (info.dir == directory) {
709
// Only pool if reader is not external
710
readerMap.put(info, sr);
716
if (termsIndexDivisor != -1 && !sr.termsIndexLoaded()) {
717
// If this reader was originally opened because we
718
// needed to merge it, we didn't load the terms
719
// index. But now, if the caller wants the terms
720
// index (eg because it's doing deletes, or an NRT
721
// reader is being opened) we ask the reader to
722
// load its terms index.
723
sr.loadTermsIndex(termsIndexDivisor);
727
// Return a ref to our caller
728
if (info.dir == directory) {
729
// Only incRef if we pooled (reader is not external)
736
public synchronized SegmentReader getIfExists(SegmentInfo info) throws IOException {
737
SegmentReader sr = readerMap.get(info);
748
* Obtain the number of deleted docs for a pooled reader.
749
* If the reader isn't being pooled, the segmentInfo's
750
* delCount is returned.
752
public int numDeletedDocs(SegmentInfo info) throws IOException {
754
SegmentReader reader = readerPool.getIfExists(info);
756
if (reader != null) {
757
return reader.numDeletedDocs();
759
return info.getDelCount();
762
if (reader != null) {
763
readerPool.release(reader);
769
* Used internally to throw an {@link
770
* AlreadyClosedException} if this IndexWriter has been
772
* @throws AlreadyClosedException if this IndexWriter is closed
774
protected final void ensureOpen(boolean includePendingClose) throws AlreadyClosedException {
775
if (closed || (includePendingClose && closing)) {
776
throw new AlreadyClosedException("this IndexWriter is closed");
780
protected final void ensureOpen() throws AlreadyClosedException {
785
* Prints a message to the infoStream (if non-null),
786
* prefixed with the identifying information for this
787
* writer and the thread that's calling it.
789
public void message(String message) {
790
if (infoStream != null)
791
infoStream.println("IW " + messageID + " [" + new Date() + "; " + Thread.currentThread().getName() + "]: " + message);
795
* Casts current mergePolicy to LogMergePolicy, and throws
796
* an exception if the mergePolicy is not a LogMergePolicy.
798
private LogMergePolicy getLogMergePolicy() {
799
if (mergePolicy instanceof LogMergePolicy)
800
return (LogMergePolicy) mergePolicy;
802
throw new IllegalArgumentException("this method can only be called when the merge policy is the default LogMergePolicy");
805
/** <p>Get the current setting of whether newly flushed
806
* segments will use the compound file format. Note that
807
* this just returns the value previously set with
808
* setUseCompoundFile(boolean), or the default value
809
* (true). You cannot use this to query the status of
810
* previously flushed segments.</p>
812
* <p>Note that this method is a convenience method: it
813
* just calls mergePolicy.getUseCompoundFile as long as
814
* mergePolicy is an instance of {@link LogMergePolicy}.
815
* Otherwise an IllegalArgumentException is thrown.</p>
817
* @see #setUseCompoundFile(boolean)
818
* @deprecated use {@link LogMergePolicy#getUseCompoundFile()}
821
public boolean getUseCompoundFile() {
822
return getLogMergePolicy().getUseCompoundFile();
827
* Setting to turn on usage of a compound file. When on, multiple files for
828
* each segment are merged into a single file when a new segment is flushed.
832
* Note that this method is a convenience method: it just calls
833
* mergePolicy.setUseCompoundFile as long as mergePolicy is an instance of
834
* {@link LogMergePolicy}. Otherwise an IllegalArgumentException is thrown.
837
* @deprecated use {@link LogMergePolicy#setUseCompoundFile(boolean)}.
840
public void setUseCompoundFile(boolean value) {
841
getLogMergePolicy().setUseCompoundFile(value);
844
/** Expert: Set the Similarity implementation used by this IndexWriter.
846
* @see Similarity#setDefault(Similarity)
847
* @deprecated use {@link IndexWriterConfig#setSimilarity(Similarity)} instead
850
public void setSimilarity(Similarity similarity) {
852
this.similarity = similarity;
853
docWriter.setSimilarity(similarity);
854
// Required so config.getSimilarity returns the right value. But this will
855
// go away together with the method in 4.0.
856
config.setSimilarity(similarity);
859
/** Expert: Return the Similarity implementation used by this IndexWriter.
861
* <p>This defaults to the current value of {@link Similarity#getDefault()}.
862
* @deprecated use {@link IndexWriterConfig#getSimilarity()} instead
865
public Similarity getSimilarity() {
870
/** Expert: Set the interval between indexed terms. Large values cause less
871
* memory to be used by IndexReader, but slow random-access to terms. Small
872
* values cause more memory to be used by an IndexReader, and speed
873
* random-access to terms.
875
* This parameter determines the amount of computation required per query
876
* term, regardless of the number of documents that contain that term. In
877
* particular, it is the maximum number of other terms that must be
878
* scanned before a term is located and its frequency and position information
879
* may be processed. In a large index with user-entered query terms, query
880
* processing time is likely to be dominated not by term lookup but rather
881
* by the processing of frequency and positional data. In a small index
882
* or when many uncommon query terms are generated (e.g., by wildcard
883
* queries) term lookup may become a dominant cost.
885
* In particular, <code>numUniqueTerms/interval</code> terms are read into
886
* memory by an IndexReader, and, on average, <code>interval/2</code> terms
887
* must be scanned for each random term access.
889
* @see #DEFAULT_TERM_INDEX_INTERVAL
890
* @deprecated use {@link IndexWriterConfig#setTermIndexInterval(int)}
893
public void setTermIndexInterval(int interval) {
895
config.setTermIndexInterval(interval);
898
/** Expert: Return the interval between indexed terms.
900
* @see #setTermIndexInterval(int)
901
* @deprecated use {@link IndexWriterConfig#getTermIndexInterval()}
904
public int getTermIndexInterval() {
905
// We pass false because this method is called by SegmentMerger while we are in the process of closing
907
return config.getTermIndexInterval();
911
* Constructs an IndexWriter for the index in <code>d</code>.
912
* Text will be analyzed with <code>a</code>. If <code>create</code>
913
* is true, then a new, empty index will be created in
914
* <code>d</code>, replacing the index already there, if any.
916
* @param d the index directory
917
* @param a the analyzer to use
918
* @param create <code>true</code> to create the index or overwrite
919
* the existing one; <code>false</code> to append to the existing
921
* @param mfl Maximum field length in number of terms/tokens: LIMITED, UNLIMITED, or user-specified
922
* via the MaxFieldLength constructor.
923
* @throws CorruptIndexException if the index is corrupt
924
* @throws LockObtainFailedException if another writer
925
* has this index open (<code>write.lock</code> could not
927
* @throws IOException if the directory cannot be read/written to, or
928
* if it does not exist and <code>create</code> is
929
* <code>false</code> or if there is any other low-level
931
* @deprecated use {@link #IndexWriter(Directory, IndexWriterConfig)} instead
934
public IndexWriter(Directory d, Analyzer a, boolean create, MaxFieldLength mfl)
935
throws CorruptIndexException, LockObtainFailedException, IOException {
936
this(d, new IndexWriterConfig(Version.LUCENE_31, a).setOpenMode(
937
create ? OpenMode.CREATE : OpenMode.APPEND));
938
setMaxFieldLength(mfl.getLimit());
942
* Constructs an IndexWriter for the index in
943
* <code>d</code>, first creating it if it does not
944
* already exist. Text will be analyzed with
947
* @param d the index directory
948
* @param a the analyzer to use
949
* @param mfl Maximum field length in number of terms/tokens: LIMITED, UNLIMITED, or user-specified
950
* via the MaxFieldLength constructor.
951
* @throws CorruptIndexException if the index is corrupt
952
* @throws LockObtainFailedException if another writer
953
* has this index open (<code>write.lock</code> could not
955
* @throws IOException if the directory cannot be
956
* read/written to or if there is any other low-level
958
* @deprecated use {@link #IndexWriter(Directory, IndexWriterConfig)} instead
961
public IndexWriter(Directory d, Analyzer a, MaxFieldLength mfl)
962
throws CorruptIndexException, LockObtainFailedException, IOException {
963
this(d, new IndexWriterConfig(Version.LUCENE_31, a));
964
setMaxFieldLength(mfl.getLimit());
968
* Expert: constructs an IndexWriter with a custom {@link
969
* IndexDeletionPolicy}, for the index in <code>d</code>,
970
* first creating it if it does not already exist. Text
971
* will be analyzed with <code>a</code>.
973
* @param d the index directory
974
* @param a the analyzer to use
975
* @param deletionPolicy see <a href="#deletionPolicy">above</a>
976
* @param mfl whether or not to limit field lengths
977
* @throws CorruptIndexException if the index is corrupt
978
* @throws LockObtainFailedException if another writer
979
* has this index open (<code>write.lock</code> could not
981
* @throws IOException if the directory cannot be
982
* read/written to or if there is any other low-level
984
* @deprecated use {@link #IndexWriter(Directory, IndexWriterConfig)} instead
987
public IndexWriter(Directory d, Analyzer a, IndexDeletionPolicy deletionPolicy, MaxFieldLength mfl)
988
throws CorruptIndexException, LockObtainFailedException, IOException {
989
this(d, new IndexWriterConfig(Version.LUCENE_31, a).setIndexDeletionPolicy(deletionPolicy));
990
setMaxFieldLength(mfl.getLimit());
994
* Expert: constructs an IndexWriter with a custom {@link
995
* IndexDeletionPolicy}, for the index in <code>d</code>.
996
* Text will be analyzed with <code>a</code>. If
997
* <code>create</code> is true, then a new, empty index
998
* will be created in <code>d</code>, replacing the index
999
* already there, if any.
1001
* @param d the index directory
1002
* @param a the analyzer to use
1003
* @param create <code>true</code> to create the index or overwrite
1004
* the existing one; <code>false</code> to append to the existing
1006
* @param deletionPolicy see <a href="#deletionPolicy">above</a>
1007
* @param mfl {@link org.apache.lucene.index.IndexWriter.MaxFieldLength}, whether or not to limit field lengths. Value is in number of terms/tokens
1008
* @throws CorruptIndexException if the index is corrupt
1009
* @throws LockObtainFailedException if another writer
1010
* has this index open (<code>write.lock</code> could not
1012
* @throws IOException if the directory cannot be read/written to, or
1013
* if it does not exist and <code>create</code> is
1014
* <code>false</code> or if there is any other low-level
1016
* @deprecated use {@link #IndexWriter(Directory, IndexWriterConfig)} instead
1019
public IndexWriter(Directory d, Analyzer a, boolean create, IndexDeletionPolicy deletionPolicy, MaxFieldLength mfl)
1020
throws CorruptIndexException, LockObtainFailedException, IOException {
1021
this(d, new IndexWriterConfig(Version.LUCENE_31, a).setOpenMode(
1022
create ? OpenMode.CREATE : OpenMode.APPEND).setIndexDeletionPolicy(deletionPolicy));
1023
setMaxFieldLength(mfl.getLimit());
1027
* Expert: constructs an IndexWriter on specific commit
1028
* point, with a custom {@link IndexDeletionPolicy}, for
1029
* the index in <code>d</code>. Text will be analyzed
1030
* with <code>a</code>.
1032
* <p> This is only meaningful if you've used a {@link
1033
* IndexDeletionPolicy} in that past that keeps more than
1034
* just the last commit.
1036
* <p>This operation is similar to {@link #rollback()},
1037
* except that method can only rollback what's been done
1038
* with the current instance of IndexWriter since its last
1039
* commit, whereas this method can rollback to an
1040
* arbitrary commit point from the past, assuming the
1041
* {@link IndexDeletionPolicy} has preserved past
1044
* @param d the index directory
1045
* @param a the analyzer to use
1046
* @param deletionPolicy see <a href="#deletionPolicy">above</a>
1047
* @param mfl whether or not to limit field lengths, value is in number of terms/tokens. See {@link org.apache.lucene.index.IndexWriter.MaxFieldLength}.
1048
* @param commit which commit to open
1049
* @throws CorruptIndexException if the index is corrupt
1050
* @throws LockObtainFailedException if another writer
1051
* has this index open (<code>write.lock</code> could not
1053
* @throws IOException if the directory cannot be read/written to, or
1054
* if it does not exist and <code>create</code> is
1055
* <code>false</code> or if there is any other low-level
1057
* @deprecated use {@link #IndexWriter(Directory, IndexWriterConfig)} instead
1060
public IndexWriter(Directory d, Analyzer a, IndexDeletionPolicy deletionPolicy, MaxFieldLength mfl, IndexCommit commit)
1061
throws CorruptIndexException, LockObtainFailedException, IOException {
1062
this(d, new IndexWriterConfig(Version.LUCENE_31, a)
1063
.setOpenMode(OpenMode.APPEND).setIndexDeletionPolicy(deletionPolicy).setIndexCommit(commit));
1064
setMaxFieldLength(mfl.getLimit());
1068
* Constructs a new IndexWriter per the settings given in <code>conf</code>.
1069
* Note that the passed in {@link IndexWriterConfig} is
1070
* privately cloned; if you need to make subsequent "live"
1071
* changes to the configuration use {@link #getConfig}.
1075
* the index directory. The index is either created or appended
1076
* according <code>conf.getOpenMode()</code>.
1078
* the configuration settings according to which IndexWriter should
1080
* @throws CorruptIndexException
1081
* if the index is corrupt
1082
* @throws LockObtainFailedException
1083
* if another writer has this index open (<code>write.lock</code>
1084
* could not be obtained)
1085
* @throws IOException
1086
* if the directory cannot be read/written to, or if it does not
1087
* exist and <code>conf.getOpenMode()</code> is
1088
* <code>OpenMode.APPEND</code> or if there is any other low-level
1091
public IndexWriter(Directory d, IndexWriterConfig conf)
1092
throws CorruptIndexException, LockObtainFailedException, IOException {
1093
config = (IndexWriterConfig) conf.clone();
1095
analyzer = conf.getAnalyzer();
1096
infoStream = defaultInfoStream;
1097
writeLockTimeout = conf.getWriteLockTimeout();
1098
similarity = conf.getSimilarity();
1099
mergePolicy = conf.getMergePolicy();
1100
mergePolicy.setIndexWriter(this);
1101
mergeScheduler = conf.getMergeScheduler();
1102
bufferedDeletesStream = new BufferedDeletesStream(messageID);
1103
bufferedDeletesStream.setInfoStream(infoStream);
1104
poolReaders = conf.getReaderPooling();
1106
writeLock = directory.makeLock(WRITE_LOCK_NAME);
1108
if (!writeLock.obtain(writeLockTimeout)) // obtain write lock
1109
throw new LockObtainFailedException("Index locked for write: " + writeLock);
1111
OpenMode mode = conf.getOpenMode();
1113
if (mode == OpenMode.CREATE) {
1115
} else if (mode == OpenMode.APPEND) {
1118
// CREATE_OR_APPEND - create only if an index does not exist
1119
create = !IndexReader.indexExists(directory);
1122
boolean success = false;
1124
// TODO: we should check whether this index is too old,
1125
// and throw an IndexFormatTooOldExc up front, here,
1126
// instead of later when merge, applyDeletes, getReader
1127
// is attempted. I think to do this we should store the
1128
// oldest segment's version in segments_N.
1132
// Try to read first. This is to allow create
1133
// against an index that's currently open for
1134
// searching. In this case we write the next
1135
// segments_N file with no segments:
1137
segmentInfos.read(directory);
1138
segmentInfos.clear();
1139
} catch (IOException e) {
1140
// Likely this means it's a fresh directory
1143
// Record that we have a change (zero out all
1144
// segments) pending:
1146
segmentInfos.changed();
1148
segmentInfos.read(directory);
1150
IndexCommit commit = conf.getIndexCommit();
1151
if (commit != null) {
1152
// Swap out all segments, but, keep metadata in
1153
// SegmentInfos, like version & generation, to
1154
// preserve write-once. This is important if
1155
// readers are open against the future commit
1157
if (commit.getDirectory() != directory)
1158
throw new IllegalArgumentException("IndexCommit's directory doesn't match my directory");
1159
SegmentInfos oldInfos = new SegmentInfos();
1160
oldInfos.read(directory, commit.getSegmentsFileName());
1161
segmentInfos.replace(oldInfos);
1163
segmentInfos.changed();
1164
if (infoStream != null)
1165
message("init: loaded commit \"" + commit.getSegmentsFileName() + "\"");
1169
rollbackSegments = segmentInfos.createBackupSegmentInfos(true);
1171
docWriter = new DocumentsWriter(config, directory, this, getCurrentFieldInfos(), bufferedDeletesStream);
1172
docWriter.setInfoStream(infoStream);
1173
docWriter.setMaxFieldLength(maxFieldLength);
1175
// Default deleter (for backwards compatibility) is
1176
// KeepOnlyLastCommitDeleter:
1177
synchronized(this) {
1178
deleter = new IndexFileDeleter(directory,
1179
conf.getIndexDeletionPolicy(),
1180
segmentInfos, infoStream,
1184
if (deleter.startingCommitDeleted) {
1185
// Deletion policy deleted the "head" commit point.
1186
// We have to mark ourself as changed so that if we
1187
// are closed w/o any further changes we write a new
1190
segmentInfos.changed();
1193
if (infoStream != null) {
1201
if (infoStream != null) {
1202
message("init: hit exception on init; releasing write lock");
1205
writeLock.release();
1206
} catch (Throwable t) {
1207
// don't mask the original exception
1214
private FieldInfos getFieldInfos(SegmentInfo info) throws IOException {
1215
Directory cfsDir = null;
1217
if (info.getUseCompoundFile()) {
1218
cfsDir = new CompoundFileReader(directory, IndexFileNames.segmentFileName(info.name, IndexFileNames.COMPOUND_FILE_EXTENSION));
1222
return new FieldInfos(cfsDir, IndexFileNames.segmentFileName(info.name, IndexFileNames.FIELD_INFOS_EXTENSION));
1224
if (info.getUseCompoundFile() && cfsDir != null) {
1230
private FieldInfos getCurrentFieldInfos() throws IOException {
1231
final FieldInfos fieldInfos;
1232
if (segmentInfos.size() > 0) {
1233
if (segmentInfos.getFormat() > SegmentInfos.FORMAT_DIAGNOSTICS) {
1234
// Pre-3.1 index. In this case we sweep all
1235
// segments, merging their FieldInfos:
1236
fieldInfos = new FieldInfos();
1237
for(SegmentInfo info : segmentInfos) {
1238
final FieldInfos segFieldInfos = getFieldInfos(info);
1239
final int fieldCount = segFieldInfos.size();
1240
for(int fieldNumber=0;fieldNumber<fieldCount;fieldNumber++) {
1241
fieldInfos.add(segFieldInfos.fieldInfo(fieldNumber));
1245
// Already a 3.1 index; just seed the FieldInfos
1246
// from the last segment
1247
fieldInfos = getFieldInfos(segmentInfos.info(segmentInfos.size()-1));
1250
fieldInfos = new FieldInfos();
1256
* Returns the private {@link IndexWriterConfig}, cloned
1257
* from the {@link IndexWriterConfig} passed to
1258
* {@link #IndexWriter(Directory, IndexWriterConfig)}.
1260
* <b>NOTE:</b> some settings may be changed on the
1261
* returned {@link IndexWriterConfig}, and will take
1262
* effect in the current IndexWriter instance. See the
1263
* javadocs for the specific setters in {@link
1264
* IndexWriterConfig} for details.
1266
public IndexWriterConfig getConfig() {
1272
* Expert: set the merge policy used by this writer.
1274
* @deprecated use {@link IndexWriterConfig#setMergePolicy(MergePolicy)} instead.
1277
public void setMergePolicy(MergePolicy mp) {
1280
throw new NullPointerException("MergePolicy must be non-null");
1282
if (mergePolicy != mp)
1283
mergePolicy.close();
1285
mergePolicy.setIndexWriter(this);
1286
pushMaxBufferedDocs();
1287
if (infoStream != null)
1288
message("setMergePolicy " + mp);
1289
// Required so config.getMergePolicy returns the right value. But this will
1290
// go away together with the method in 4.0.
1291
config.setMergePolicy(mp);
1295
* Expert: returns the current MergePolicy in use by this writer.
1296
* @see #setMergePolicy
1298
* @deprecated use {@link IndexWriterConfig#getMergePolicy()} instead
1301
public MergePolicy getMergePolicy() {
1307
* Expert: set the merge scheduler used by this writer.
1308
* @deprecated use {@link IndexWriterConfig#setMergeScheduler(MergeScheduler)} instead
1311
synchronized public void setMergeScheduler(MergeScheduler mergeScheduler) throws CorruptIndexException, IOException {
1313
if (mergeScheduler == null)
1314
throw new NullPointerException("MergeScheduler must be non-null");
1316
if (this.mergeScheduler != mergeScheduler) {
1318
this.mergeScheduler.close();
1320
this.mergeScheduler = mergeScheduler;
1321
if (infoStream != null)
1322
message("setMergeScheduler " + mergeScheduler);
1323
// Required so config.getMergeScheduler returns the right value. But this will
1324
// go away together with the method in 4.0.
1325
config.setMergeScheduler(mergeScheduler);
1329
* Expert: returns the current MergeScheduler in use by this
1331
* @see #setMergeScheduler(MergeScheduler)
1332
* @deprecated use {@link IndexWriterConfig#getMergeScheduler()} instead
1335
public MergeScheduler getMergeScheduler() {
1337
return mergeScheduler;
1340
/** <p>Determines the largest segment (measured by
1341
* document count) that may be merged with other segments.
1342
* Small values (e.g., less than 10,000) are best for
1343
* interactive indexing, as this limits the length of
1344
* pauses while indexing to a few seconds. Larger values
1345
* are best for batched indexing and speedier
1348
* <p>The default value is {@link Integer#MAX_VALUE}.</p>
1350
* <p>Note that this method is a convenience method: it
1351
* just calls mergePolicy.setMaxMergeDocs as long as
1352
* mergePolicy is an instance of {@link LogMergePolicy}.
1353
* Otherwise an IllegalArgumentException is thrown.</p>
1355
* <p>The default merge policy ({@link
1356
* LogByteSizeMergePolicy}) also allows you to set this
1357
* limit by net size (in MB) of the segment, using {@link
1358
* LogByteSizeMergePolicy#setMaxMergeMB}.</p>
1359
* @deprecated use {@link LogMergePolicy#setMaxMergeDocs(int)} directly.
1362
public void setMaxMergeDocs(int maxMergeDocs) {
1363
getLogMergePolicy().setMaxMergeDocs(maxMergeDocs);
1367
* <p>Returns the largest segment (measured by document
1368
* count) that may be merged with other segments.</p>
1370
* <p>Note that this method is a convenience method: it
1371
* just calls mergePolicy.getMaxMergeDocs as long as
1372
* mergePolicy is an instance of {@link LogMergePolicy}.
1373
* Otherwise an IllegalArgumentException is thrown.</p>
1375
* @see #setMaxMergeDocs
1376
* @deprecated use {@link LogMergePolicy#getMaxMergeDocs()} directly.
1379
public int getMaxMergeDocs() {
1380
return getLogMergePolicy().getMaxMergeDocs();
1384
* The maximum number of terms that will be indexed for a single field in a
1385
* document. This limits the amount of memory required for indexing, so that
1386
* collections with very large files will not crash the indexing process by
1387
* running out of memory. This setting refers to the number of running terms,
1388
* not to the number of different terms.
1390
* <strong>Note:</strong> this silently truncates large documents, excluding
1391
* from the index all terms that occur further in the document. If you know
1392
* your source documents are large, be sure to set this value high enough to
1393
* accomodate the expected size. If you set it to Integer.MAX_VALUE, then the
1394
* only limit is your memory, but you should anticipate an OutOfMemoryError.
1396
* By default, no more than {@link #DEFAULT_MAX_FIELD_LENGTH} terms will be
1397
* indexed for a field.
1399
* @deprecated use {@link LimitTokenCountAnalyzer} instead. Note that the
1400
* behvaior slightly changed - the analyzer limits the number of
1401
* tokens per token stream created, while this setting limits the
1402
* total number of tokens to index. This only matters if you index
1403
* many multi-valued fields though.
1406
public void setMaxFieldLength(int maxFieldLength) {
1408
this.maxFieldLength = maxFieldLength;
1409
docWriter.setMaxFieldLength(maxFieldLength);
1410
if (infoStream != null)
1411
message("setMaxFieldLength " + maxFieldLength);
1415
* Returns the maximum number of terms that will be
1416
* indexed for a single field in a document.
1417
* @see #setMaxFieldLength
1418
* @deprecated use {@link LimitTokenCountAnalyzer} to limit number of tokens.
1421
public int getMaxFieldLength() {
1423
return maxFieldLength;
1427
* @deprecated use {@link
1428
* IndexWriterConfig#setReaderTermsIndexDivisor} instead.
1431
public void setReaderTermsIndexDivisor(int divisor) {
1433
config.setReaderTermsIndexDivisor(divisor);
1434
if (infoStream != null) {
1435
message("setReaderTermsIndexDivisor " + divisor);
1440
* @deprecated use {@link
1441
* IndexWriterConfig#getReaderTermsIndexDivisor} instead.
1444
public int getReaderTermsIndexDivisor() {
1446
return config.getReaderTermsIndexDivisor();
1449
/** Determines the minimal number of documents required
1450
* before the buffered in-memory documents are flushed as
1451
* a new Segment. Large values generally gives faster
1454
* <p>When this is set, the writer will flush every
1455
* maxBufferedDocs added documents. Pass in {@link
1456
* #DISABLE_AUTO_FLUSH} to prevent triggering a flush due
1457
* to number of buffered documents. Note that if flushing
1458
* by RAM usage is also enabled, then the flush will be
1459
* triggered by whichever comes first.</p>
1461
* <p>Disabled by default (writer flushes by RAM usage).</p>
1463
* @throws IllegalArgumentException if maxBufferedDocs is
1464
* enabled but smaller than 2, or it disables maxBufferedDocs
1465
* when ramBufferSize is already disabled
1466
* @see #setRAMBufferSizeMB
1467
* @deprecated use {@link IndexWriterConfig#setMaxBufferedDocs(int)} instead.
1470
public void setMaxBufferedDocs(int maxBufferedDocs) {
1472
pushMaxBufferedDocs();
1473
if (infoStream != null) {
1474
message("setMaxBufferedDocs " + maxBufferedDocs);
1476
// Required so config.getMaxBufferedDocs returns the right value. But this
1477
// will go away together with the method in 4.0.
1478
config.setMaxBufferedDocs(maxBufferedDocs);
1482
* If we are flushing by doc count (not by RAM usage), and
1483
* using LogDocMergePolicy then push maxBufferedDocs down
1484
* as its minMergeDocs, to keep backwards compatibility.
1486
private void pushMaxBufferedDocs() {
1487
if (config.getMaxBufferedDocs() != DISABLE_AUTO_FLUSH) {
1488
final MergePolicy mp = mergePolicy;
1489
if (mp instanceof LogDocMergePolicy) {
1490
LogDocMergePolicy lmp = (LogDocMergePolicy) mp;
1491
final int maxBufferedDocs = config.getMaxBufferedDocs();
1492
if (lmp.getMinMergeDocs() != maxBufferedDocs) {
1493
if (infoStream != null)
1494
message("now push maxBufferedDocs " + maxBufferedDocs + " to LogDocMergePolicy");
1495
lmp.setMinMergeDocs(maxBufferedDocs);
1502
* Returns the number of buffered added documents that will
1503
* trigger a flush if enabled.
1504
* @see #setMaxBufferedDocs
1505
* @deprecated use {@link IndexWriterConfig#getMaxBufferedDocs()} instead.
1508
public int getMaxBufferedDocs() {
1510
return config.getMaxBufferedDocs();
1513
/** Determines the amount of RAM that may be used for
1514
* buffering added documents and deletions before they are
1515
* flushed to the Directory. Generally for faster
1516
* indexing performance it's best to flush by RAM usage
1517
* instead of document count and use as large a RAM buffer
1520
* <p>When this is set, the writer will flush whenever
1521
* buffered documents and deletions use this much RAM.
1522
* Pass in {@link #DISABLE_AUTO_FLUSH} to prevent
1523
* triggering a flush due to RAM usage. Note that if
1524
* flushing by document count is also enabled, then the
1525
* flush will be triggered by whichever comes first.</p>
1527
* <p> <b>NOTE</b>: the account of RAM usage for pending
1528
* deletions is only approximate. Specifically, if you
1529
* delete by Query, Lucene currently has no way to measure
1530
* the RAM usage if individual Queries so the accounting
1531
* will under-estimate and you should compensate by either
1532
* calling commit() periodically yourself, or by using
1533
* {@link #setMaxBufferedDeleteTerms} to flush by count
1534
* instead of RAM usage (each buffered delete Query counts
1537
* <p> <b>NOTE</b>: because IndexWriter uses
1538
* <code>int</code>s when managing its internal storage,
1539
* the absolute maximum value for this setting is somewhat
1540
* less than 2048 MB. The precise limit depends on
1541
* various factors, such as how large your documents are,
1542
* how many fields have norms, etc., so it's best to set
1543
* this value comfortably under 2048.</p>
1545
* <p> The default value is {@link #DEFAULT_RAM_BUFFER_SIZE_MB}.</p>
1547
* @throws IllegalArgumentException if ramBufferSize is
1548
* enabled but non-positive, or it disables ramBufferSize
1549
* when maxBufferedDocs is already disabled
1550
* @deprecated use {@link IndexWriterConfig#setRAMBufferSizeMB(double)} instead.
1553
public void setRAMBufferSizeMB(double mb) {
1554
if (infoStream != null) {
1555
message("setRAMBufferSizeMB " + mb);
1557
// Required so config.getRAMBufferSizeMB returns the right value. But this
1558
// will go away together with the method in 4.0.
1559
config.setRAMBufferSizeMB(mb);
1563
* Returns the value set by {@link #setRAMBufferSizeMB} if enabled.
1564
* @deprecated use {@link IndexWriterConfig#getRAMBufferSizeMB()} instead.
1567
public double getRAMBufferSizeMB() {
1568
return config.getRAMBufferSizeMB();
1572
* <p>Determines the minimal number of delete terms required before the buffered
1573
* in-memory delete terms are applied and flushed. If there are documents
1574
* buffered in memory at the time, they are merged and a new segment is
1577
* <p>Disabled by default (writer flushes by RAM usage).</p>
1579
* @throws IllegalArgumentException if maxBufferedDeleteTerms
1580
* is enabled but smaller than 1
1581
* @see #setRAMBufferSizeMB
1582
* @deprecated use {@link IndexWriterConfig#setMaxBufferedDeleteTerms(int)} instead.
1585
public void setMaxBufferedDeleteTerms(int maxBufferedDeleteTerms) {
1587
if (infoStream != null)
1588
message("setMaxBufferedDeleteTerms " + maxBufferedDeleteTerms);
1589
// Required so config.getMaxBufferedDeleteTerms returns the right value. But
1590
// this will go away together with the method in 4.0.
1591
config.setMaxBufferedDeleteTerms(maxBufferedDeleteTerms);
1595
* Returns the number of buffered deleted terms that will
1596
* trigger a flush if enabled.
1597
* @see #setMaxBufferedDeleteTerms
1598
* @deprecated use {@link IndexWriterConfig#getMaxBufferedDeleteTerms()} instead
1601
public int getMaxBufferedDeleteTerms() {
1603
return config.getMaxBufferedDeleteTerms();
1606
/** Determines how often segment indices are merged by addDocument(). With
1607
* smaller values, less RAM is used while indexing, and searches on
1608
* unoptimized indices are faster, but indexing speed is slower. With larger
1609
* values, more RAM is used during indexing, and while searches on unoptimized
1610
* indices are slower, indexing is faster. Thus larger values (> 10) are best
1611
* for batch index creation, and smaller values (< 10) for indices that are
1612
* interactively maintained.
1614
* <p>Note that this method is a convenience method: it
1615
* just calls mergePolicy.setMergeFactor as long as
1616
* mergePolicy is an instance of {@link LogMergePolicy}.
1617
* Otherwise an IllegalArgumentException is thrown.</p>
1619
* <p>This must never be less than 2. The default value is 10.
1620
* @deprecated use {@link LogMergePolicy#setMergeFactor(int)} directly.
1623
public void setMergeFactor(int mergeFactor) {
1624
getLogMergePolicy().setMergeFactor(mergeFactor);
1628
* <p>Returns the number of segments that are merged at
1629
* once and also controls the total number of segments
1630
* allowed to accumulate in the index.</p>
1632
* <p>Note that this method is a convenience method: it
1633
* just calls mergePolicy.getMergeFactor as long as
1634
* mergePolicy is an instance of {@link LogMergePolicy}.
1635
* Otherwise an IllegalArgumentException is thrown.</p>
1637
* @see #setMergeFactor
1638
* @deprecated use {@link LogMergePolicy#getMergeFactor()} directly.
1641
public int getMergeFactor() {
1642
return getLogMergePolicy().getMergeFactor();
1645
/** If non-null, this will be the default infoStream used
1646
* by a newly instantiated IndexWriter.
1647
* @see #setInfoStream
1649
public static void setDefaultInfoStream(PrintStream infoStream) {
1650
IndexWriter.defaultInfoStream = infoStream;
1654
* Returns the current default infoStream for newly
1655
* instantiated IndexWriters.
1656
* @see #setDefaultInfoStream
1658
public static PrintStream getDefaultInfoStream() {
1659
return IndexWriter.defaultInfoStream;
1662
/** If non-null, information about merges, deletes and a
1663
* message when maxFieldLength is reached will be printed
1666
public void setInfoStream(PrintStream infoStream) throws IOException {
1668
this.infoStream = infoStream;
1669
docWriter.setInfoStream(infoStream);
1670
deleter.setInfoStream(infoStream);
1671
bufferedDeletesStream.setInfoStream(infoStream);
1672
if (infoStream != null)
1676
private void messageState() throws IOException {
1677
message("\ndir=" + directory + "\n" +
1678
"index=" + segString() + "\n" +
1679
"version=" + Constants.LUCENE_VERSION + "\n" +
1684
* Returns the current infoStream in use by this writer.
1685
* @see #setInfoStream
1687
public PrintStream getInfoStream() {
1692
/** Returns true if verbosing is enabled (i.e., infoStream != null). */
1693
public boolean verbose() {
1694
return infoStream != null;
1698
* Sets the maximum time to wait for a write lock (in milliseconds) for this instance of IndexWriter. @see
1699
* @see #setDefaultWriteLockTimeout to change the default value for all instances of IndexWriter.
1700
* @deprecated use {@link IndexWriterConfig#setWriteLockTimeout(long)} instead
1703
public void setWriteLockTimeout(long writeLockTimeout) {
1705
this.writeLockTimeout = writeLockTimeout;
1706
// Required so config.getWriteLockTimeout returns the right value. But this
1707
// will go away together with the method in 4.0.
1708
config.setWriteLockTimeout(writeLockTimeout);
1712
* Returns allowed timeout when acquiring the write lock.
1713
* @see #setWriteLockTimeout
1714
* @deprecated use {@link IndexWriterConfig#getWriteLockTimeout()}
1717
public long getWriteLockTimeout() {
1719
return writeLockTimeout;
1723
* Sets the default (for any instance of IndexWriter) maximum time to wait for a write lock (in
1725
* @deprecated use {@link IndexWriterConfig#setDefaultWriteLockTimeout(long)} instead
1728
public static void setDefaultWriteLockTimeout(long writeLockTimeout) {
1729
IndexWriterConfig.setDefaultWriteLockTimeout(writeLockTimeout);
1733
* Returns default write lock timeout for newly
1734
* instantiated IndexWriters.
1735
* @see #setDefaultWriteLockTimeout
1736
* @deprecated use {@link IndexWriterConfig#getDefaultWriteLockTimeout()} instead
1739
public static long getDefaultWriteLockTimeout() {
1740
return IndexWriterConfig.getDefaultWriteLockTimeout();
1744
* Commits all changes to an index and closes all
1745
* associated files. Note that this may be a costly
1746
* operation, so, try to re-use a single writer instead of
1747
* closing and opening a new one. See {@link #commit()} for
1748
* caveats about write caching done by some IO devices.
1750
* <p> If an Exception is hit during close, eg due to disk
1751
* full or some other reason, then both the on-disk index
1752
* and the internal state of the IndexWriter instance will
1753
* be consistent. However, the close will not be complete
1754
* even though part of it (flushing buffered documents)
1755
* may have succeeded, so the write lock will still be
1758
* <p> If you can correct the underlying cause (eg free up
1759
* some disk space) then you can call close() again.
1760
* Failing that, if you want to force the write lock to be
1761
* released (dangerous, because you may then lose buffered
1762
* docs in the IndexWriter instance) then you can do
1763
* something like this:</p>
1769
* if (IndexWriter.isLocked(directory)) {
1770
* IndexWriter.unlock(directory);
1775
* after which, you must be certain not to use the writer
1776
* instance anymore.</p>
1778
* <p><b>NOTE</b>: if this method hits an OutOfMemoryError
1779
* you should immediately close the writer, again. See <a
1780
* href="#OOME">above</a> for details.</p>
1782
* @throws CorruptIndexException if the index is corrupt
1783
* @throws IOException if there is a low-level IO error
1785
public void close() throws CorruptIndexException, IOException {
1790
* Closes the index with or without waiting for currently
1791
* running merges to finish. This is only meaningful when
1792
* using a MergeScheduler that runs merges in background
1795
* <p><b>NOTE</b>: if this method hits an OutOfMemoryError
1796
* you should immediately close the writer, again. See <a
1797
* href="#OOME">above</a> for details.</p>
1799
* <p><b>NOTE</b>: it is dangerous to always call
1800
* close(false), especially when IndexWriter is not open
1801
* for very long, because this can result in "merge
1802
* starvation" whereby long merges will never have a
1803
* chance to finish. This will cause too many segments in
1804
* your index over time.</p>
1806
* @param waitForMerges if true, this call will block
1807
* until all merges complete; else, it will ask all
1808
* running merges to abort, wait until those merges have
1809
* finished (which should be at most a few seconds), and
1812
public void close(boolean waitForMerges) throws CorruptIndexException, IOException {
1814
// Ensure that only one thread actually gets to do the closing:
1815
if (shouldClose()) {
1816
// If any methods have hit OutOfMemoryError, then abort
1817
// on close, in case the internal state of IndexWriter
1818
// or DocumentsWriter is corrupt
1822
closeInternal(waitForMerges);
1826
// Returns true if this thread should attempt to close, or
1827
// false if IndexWriter is now closed; else, waits until
1828
// another thread finishes closing
1829
synchronized private boolean shouldClose() {
1836
// Another thread is presently trying to close;
1837
// wait until it finishes one way (closes
1838
// successfully) or another (fails to close)
1846
private void closeInternal(boolean waitForMerges) throws CorruptIndexException, IOException {
1849
if (infoStream != null) {
1850
message("now flush at close waitForMerges=" + waitForMerges);
1855
// Only allow a new merge to be triggered if we are
1856
// going to wait for merges:
1858
flush(waitForMerges, true);
1862
// Give merge scheduler last chance to run, in case
1863
// any pending merges are waiting:
1864
mergeScheduler.merge(this);
1866
mergePolicy.close();
1868
synchronized(this) {
1869
finishMerges(waitForMerges);
1873
mergeScheduler.close();
1875
if (infoStream != null)
1876
message("now call final commit()");
1879
commitInternal(null);
1882
if (infoStream != null)
1883
message("at close: " + segString());
1885
synchronized(this) {
1891
if (writeLock != null) {
1892
writeLock.release(); // release write lock
1895
synchronized(this) {
1898
} catch (OutOfMemoryError oom) {
1899
handleOOM(oom, "closeInternal");
1901
synchronized(this) {
1905
if (infoStream != null)
1906
message("hit exception while closing");
1912
/** Returns the Directory used by this index. */
1913
public Directory getDirectory() {
1914
// Pass false because the flush during closing calls getDirectory
1919
/** Returns the analyzer used by this index. */
1920
public Analyzer getAnalyzer() {
1925
/** Returns total number of docs in this index, including
1926
* docs not yet flushed (still in the RAM buffer),
1927
* not counting deletions.
1929
public synchronized int maxDoc() {
1932
if (docWriter != null)
1933
count = docWriter.getNumDocs();
1937
count += segmentInfos.totalDocCount();
1941
/** Returns total number of docs in this index, including
1942
* docs not yet flushed (still in the RAM buffer), and
1943
* including deletions. <b>NOTE:</b> buffered deletions
1944
* are not counted. If you really need these to be
1945
* counted you should call {@link #commit()} first.
1947
public synchronized int numDocs() throws IOException {
1950
if (docWriter != null)
1951
count = docWriter.getNumDocs();
1955
for (final SegmentInfo info : segmentInfos) {
1956
count += info.docCount - numDeletedDocs(info);
1961
public synchronized boolean hasDeletions() throws IOException {
1963
if (bufferedDeletesStream.any()) {
1966
if (docWriter.anyDeletions()) {
1969
for (final SegmentInfo info : segmentInfos) {
1970
if (info.hasDeletions()) {
1978
* The maximum number of terms that will be indexed for a single field in a
1979
* document. This limits the amount of memory required for indexing, so that
1980
* collections with very large files will not crash the indexing process by
1981
* running out of memory.<p/>
1982
* Note that this effectively truncates large documents, excluding from the
1983
* index terms that occur further in the document. If you know your source
1984
* documents are large, be sure to set this value high enough to accommodate
1985
* the expected size. If you set it to Integer.MAX_VALUE, then the only limit
1986
* is your memory, but you should anticipate an OutOfMemoryError.<p/>
1987
* By default, no more than 10,000 terms will be indexed for a field.
1989
* @see MaxFieldLength
1990
* @deprecated remove in 4.0
1993
private int maxFieldLength = DEFAULT_MAX_FIELD_LENGTH;
1996
* Adds a document to this index. If the document contains more than
1997
* {@link #setMaxFieldLength(int)} terms for a given field, the remainder are
2000
* <p> Note that if an Exception is hit (for example disk full)
2001
* then the index will be consistent, but this document
2002
* may not have been added. Furthermore, it's possible
2003
* the index will have one segment in non-compound format
2004
* even when using compound files (when a merge has
2005
* partially succeeded).</p>
2007
* <p> This method periodically flushes pending documents
2008
* to the Directory (see <a href="#flush">above</a>), and
2009
* also periodically triggers segment merges in the index
2010
* according to the {@link MergePolicy} in use.</p>
2012
* <p>Merges temporarily consume space in the
2013
* directory. The amount of space required is up to 1X the
2014
* size of all segments being merged, when no
2015
* readers/searchers are open against the index, and up to
2016
* 2X the size of all segments being merged when
2017
* readers/searchers are open against the index (see
2018
* {@link #forceMerge(int)} for details). The sequence of
2019
* primitive merge operations performed is governed by the
2022
* <p>Note that each term in the document can be no longer
2023
* than 16383 characters, otherwise an
2024
* IllegalArgumentException will be thrown.</p>
2026
* <p>Note that it's possible to create an invalid Unicode
2027
* string in java if a UTF16 surrogate pair is malformed.
2028
* In this case, the invalid characters are silently
2029
* replaced with the Unicode replacement character
2032
* <p><b>NOTE</b>: if this method hits an OutOfMemoryError
2033
* you should immediately close the writer. See <a
2034
* href="#OOME">above</a> for details.</p>
2036
* @throws CorruptIndexException if the index is corrupt
2037
* @throws IOException if there is a low-level IO error
2039
public void addDocument(Document doc) throws CorruptIndexException, IOException {
2040
addDocument(doc, analyzer);
2044
* Adds a document to this index, using the provided analyzer instead of the
2045
* value of {@link #getAnalyzer()}. If the document contains more than
2046
* {@link #setMaxFieldLength(int)} terms for a given field, the remainder are
2049
* <p>See {@link #addDocument(Document)} for details on
2050
* index and IndexWriter state after an Exception, and
2051
* flushing/merging temporary free space requirements.</p>
2053
* <p><b>NOTE</b>: if this method hits an OutOfMemoryError
2054
* you should immediately close the writer. See <a
2055
* href="#OOME">above</a> for details.</p>
2057
* @throws CorruptIndexException if the index is corrupt
2058
* @throws IOException if there is a low-level IO error
2060
public void addDocument(Document doc, Analyzer analyzer) throws CorruptIndexException, IOException {
2062
boolean doFlush = false;
2063
boolean success = false;
2066
doFlush = docWriter.updateDocument(doc, analyzer, null);
2069
if (!success && infoStream != null)
2070
message("hit exception adding document");
2074
} catch (OutOfMemoryError oom) {
2075
handleOOM(oom, "addDocument");
2080
* Atomically adds a block of documents with sequentially
2081
* assigned document IDs, such that an external reader
2082
* will see all or none of the documents.
2084
* <p><b>WARNING</b>: the index does not currently record
2085
* which documents were added as a block. Today this is
2086
* fine, because merging will preserve the block (as long
2087
* as none them were deleted). But it's possible in the
2088
* future that Lucene may more aggressively re-order
2089
* documents (for example, perhaps to obtain better index
2090
* compression), in which case you may need to fully
2091
* re-index your documents at that time.
2093
* <p>See {@link #addDocument(Document)} for details on
2094
* index and IndexWriter state after an Exception, and
2095
* flushing/merging temporary free space requirements.</p>
2097
* <p><b>NOTE</b>: tools that do offline splitting of an index
2098
* (for example, IndexSplitter in contrib) or
2099
* re-sorting of documents (for example, IndexSorter in
2100
* contrib) are not aware of these atomically added documents
2101
* and will likely break them up. Use such tools at your
2104
* <p><b>NOTE</b>: if this method hits an OutOfMemoryError
2105
* you should immediately close the writer. See <a
2106
* href="#OOME">above</a> for details.</p>
2108
* @throws CorruptIndexException if the index is corrupt
2109
* @throws IOException if there is a low-level IO error
2111
* @lucene.experimental
2113
public void addDocuments(Collection<Document> docs) throws CorruptIndexException, IOException {
2114
// TODO: if we backport DWPT we should change arg to Iterable<Document>
2115
addDocuments(docs, analyzer);
2119
* Atomically adds a block of documents, analyzed using the
2120
* provided analyzer, with sequentially assigned document
2121
* IDs, such that an external reader will see all or none
2124
* @throws CorruptIndexException if the index is corrupt
2125
* @throws IOException if there is a low-level IO error
2127
* @lucene.experimental
2129
public void addDocuments(Collection<Document> docs, Analyzer analyzer) throws CorruptIndexException, IOException {
2130
// TODO: if we backport DWPT we should change arg to Iterable<Document>
2131
updateDocuments(null, docs, analyzer);
2135
* Atomically deletes documents matching the provided
2136
* delTerm and adds a block of documents with sequentially
2137
* assigned document IDs, such that an external reader
2138
* will see all or none of the documents.
2140
* See {@link #addDocuments(Collection)}.
2142
* @throws CorruptIndexException if the index is corrupt
2143
* @throws IOException if there is a low-level IO error
2145
* @lucene.experimental
2147
public void updateDocuments(Term delTerm, Collection<Document> docs) throws CorruptIndexException, IOException {
2148
// TODO: if we backport DWPT we should change arg to Iterable<Document>
2149
updateDocuments(delTerm, docs, analyzer);
2153
* Atomically deletes documents matching the provided
2154
* delTerm and adds a block of documents, analyzed using
2155
* the provided analyzer, with sequentially
2156
* assigned document IDs, such that an external reader
2157
* will see all or none of the documents.
2159
* See {@link #addDocuments(Collection)}.
2161
* @throws CorruptIndexException if the index is corrupt
2162
* @throws IOException if there is a low-level IO error
2164
* @lucene.experimental
2166
public void updateDocuments(Term delTerm, Collection<Document> docs, Analyzer analyzer) throws CorruptIndexException, IOException {
2167
// TODO: if we backport DWPT we should change arg to Iterable<Document>
2170
boolean success = false;
2171
boolean doFlush = false;
2173
doFlush = docWriter.updateDocuments(docs, analyzer, delTerm);
2176
if (!success && infoStream != null) {
2177
message("hit exception updating document");
2183
} catch (OutOfMemoryError oom) {
2184
handleOOM(oom, "updateDocuments");
2189
* Deletes the document(s) containing <code>term</code>.
2191
* <p><b>NOTE</b>: if this method hits an OutOfMemoryError
2192
* you should immediately close the writer. See <a
2193
* href="#OOME">above</a> for details.</p>
2195
* @param term the term to identify the documents to be deleted
2196
* @throws CorruptIndexException if the index is corrupt
2197
* @throws IOException if there is a low-level IO error
2199
public void deleteDocuments(Term term) throws CorruptIndexException, IOException {
2202
if (docWriter.deleteTerm(term, false)) {
2205
} catch (OutOfMemoryError oom) {
2206
handleOOM(oom, "deleteDocuments(Term)");
2211
* Deletes the document(s) containing any of the
2212
* terms. All deletes are flushed at the same time.
2214
* <p><b>NOTE</b>: if this method hits an OutOfMemoryError
2215
* you should immediately close the writer. See <a
2216
* href="#OOME">above</a> for details.</p>
2218
* @param terms array of terms to identify the documents
2220
* @throws CorruptIndexException if the index is corrupt
2221
* @throws IOException if there is a low-level IO error
2223
public void deleteDocuments(Term... terms) throws CorruptIndexException, IOException {
2226
if (docWriter.deleteTerms(terms)) {
2229
} catch (OutOfMemoryError oom) {
2230
handleOOM(oom, "deleteDocuments(Term..)");
2235
* Deletes the document(s) matching the provided query.
2237
* <p><b>NOTE</b>: if this method hits an OutOfMemoryError
2238
* you should immediately close the writer. See <a
2239
* href="#OOME">above</a> for details.</p>
2241
* @param query the query to identify the documents to be deleted
2242
* @throws CorruptIndexException if the index is corrupt
2243
* @throws IOException if there is a low-level IO error
2245
public void deleteDocuments(Query query) throws CorruptIndexException, IOException {
2248
if (docWriter.deleteQuery(query)) {
2251
} catch (OutOfMemoryError oom) {
2252
handleOOM(oom, "deleteDocuments(Query)");
2257
* Deletes the document(s) matching any of the provided queries.
2258
* All deletes are flushed at the same time.
2260
* <p><b>NOTE</b>: if this method hits an OutOfMemoryError
2261
* you should immediately close the writer. See <a
2262
* href="#OOME">above</a> for details.</p>
2264
* @param queries array of queries to identify the documents
2266
* @throws CorruptIndexException if the index is corrupt
2267
* @throws IOException if there is a low-level IO error
2269
public void deleteDocuments(Query... queries) throws CorruptIndexException, IOException {
2272
if (docWriter.deleteQueries(queries)) {
2275
} catch (OutOfMemoryError oom) {
2276
handleOOM(oom, "deleteDocuments(Query..)");
2281
* Updates a document by first deleting the document(s)
2282
* containing <code>term</code> and then adding the new
2283
* document. The delete and then add are atomic as seen
2284
* by a reader on the same index (flush may happen only after
2287
* <p><b>NOTE</b>: if this method hits an OutOfMemoryError
2288
* you should immediately close the writer. See <a
2289
* href="#OOME">above</a> for details.</p>
2291
* @param term the term to identify the document(s) to be
2293
* @param doc the document to be added
2294
* @throws CorruptIndexException if the index is corrupt
2295
* @throws IOException if there is a low-level IO error
2297
public void updateDocument(Term term, Document doc) throws CorruptIndexException, IOException {
2299
updateDocument(term, doc, getAnalyzer());
2303
* Updates a document by first deleting the document(s)
2304
* containing <code>term</code> and then adding the new
2305
* document. The delete and then add are atomic as seen
2306
* by a reader on the same index (flush may happen only after
2309
* <p><b>NOTE</b>: if this method hits an OutOfMemoryError
2310
* you should immediately close the writer. See <a
2311
* href="#OOME">above</a> for details.</p>
2313
* @param term the term to identify the document(s) to be
2315
* @param doc the document to be added
2316
* @param analyzer the analyzer to use when analyzing the document
2317
* @throws CorruptIndexException if the index is corrupt
2318
* @throws IOException if there is a low-level IO error
2320
public void updateDocument(Term term, Document doc, Analyzer analyzer)
2321
throws CorruptIndexException, IOException {
2324
boolean doFlush = false;
2325
boolean success = false;
2327
doFlush = docWriter.updateDocument(doc, analyzer, term);
2330
if (!success && infoStream != null)
2331
message("hit exception updating document");
2336
} catch (OutOfMemoryError oom) {
2337
handleOOM(oom, "updateDocument");
2342
final synchronized int getSegmentCount(){
2343
return segmentInfos.size();
2347
final synchronized int getNumBufferedDocuments(){
2348
return docWriter.getNumDocs();
2352
final synchronized int getDocCount(int i) {
2353
if (i >= 0 && i < segmentInfos.size()) {
2354
return segmentInfos.info(i).docCount;
2361
final int getFlushCount() {
2362
return flushCount.get();
2366
final int getFlushDeletesCount() {
2367
return flushDeletesCount.get();
2370
final String newSegmentName() {
2371
// Cannot synchronize on IndexWriter because that causes
2373
synchronized(segmentInfos) {
2374
// Important to increment changeCount so that the
2375
// segmentInfos is written on close. Otherwise we
2376
// could close, re-open and re-return the same segment
2377
// name that was previously returned which can cause
2378
// problems at least with ConcurrentMergeScheduler.
2380
segmentInfos.changed();
2381
return "_" + Integer.toString(segmentInfos.counter++, Character.MAX_RADIX);
2385
/** If non-null, information about merges will be printed to this.
2387
private PrintStream infoStream;
2388
private static PrintStream defaultInfoStream;
2390
/** This method has been deprecated, as it is horribly
2391
* inefficient and very rarely justified. Lucene's
2392
* multi-segment search performance has improved over
2393
* time, and the default TieredMergePolicy now targets
2394
* segments with deletions.
2398
public void optimize() throws CorruptIndexException, IOException {
2399
forceMerge(1, true);
2402
/** This method has been deprecated, as it is horribly
2403
* inefficient and very rarely justified. Lucene's
2404
* multi-segment search performance has improved over
2405
* time, and the default TieredMergePolicy now targets
2406
* segments with deletions.
2410
public void optimize(int maxNumSegments) throws CorruptIndexException, IOException {
2411
forceMerge(maxNumSegments, true);
2414
/** This method has been deprecated, as it is horribly
2415
* inefficient and very rarely justified. Lucene's
2416
* multi-segment search performance has improved over
2417
* time, and the default TieredMergePolicy now targets
2418
* segments with deletions.
2422
public void optimize(boolean doWait) throws CorruptIndexException, IOException {
2423
forceMerge(1, doWait);
2427
* Forces merge policy to merge segments until there's <=
2428
* maxNumSegments. The actual merges to be
2429
* executed are determined by the {@link MergePolicy}.
2431
* <p>This is a horribly costly operation, especially when
2432
* you pass a small {@code maxNumSegments}; usually you
2433
* should only call this if the index is static (will no
2434
* longer be changed).</p>
2436
* <p>Note that this requires up to 2X the index size free
2437
* space in your Directory (3X if you're using compound
2438
* file format). For example, if your index size is 10 MB
2439
* then you need up to 20 MB free for this to complete (30
2440
* MB if you're using compound file format). Also,
2441
* it's best to call {@link #commit()} afterwards,
2442
* to allow IndexWriter to free up disk space.</p>
2444
* <p>If some but not all readers re-open while merging
2445
* is underway, this will cause > 2X temporary
2446
* space to be consumed as those new readers will then
2447
* hold open the temporary segments at that time. It is
2448
* best not to re-open readers while merging is running.</p>
2450
* <p>The actual temporary usage could be much less than
2451
* these figures (it depends on many factors).</p>
2453
* <p>In general, once the this completes, the total size of the
2454
* index will be less than the size of the starting index.
2455
* It could be quite a bit smaller (if there were many
2456
* pending deletes) or just slightly smaller.</p>
2458
* <p>If an Exception is hit, for example
2459
* due to disk full, the index will not be corrupt and no
2460
* documents will have been lost. However, it may have
2461
* been partially merged (some segments were merged but
2462
* not all), and it's possible that one of the segments in
2463
* the index will be in non-compound format even when
2464
* using compound file format. This will occur when the
2465
* Exception is hit during conversion of the segment into
2466
* compound format.</p>
2468
* <p>This call will merge those segments present in
2469
* the index when the call started. If other threads are
2470
* still adding documents and flushing segments, those
2471
* newly created segments will not be merged unless you
2472
* call forceMerge again.</p>
2474
* <p><b>NOTE</b>: if this method hits an OutOfMemoryError
2475
* you should immediately close the writer. See <a
2476
* href="#OOME">above</a> for details.</p>
2478
* <p><b>NOTE</b>: if you call {@link #close(boolean)}
2479
* with <tt>false</tt>, which aborts all running merges,
2480
* then any thread still running this method might hit a
2481
* {@link MergePolicy.MergeAbortedException}.
2483
* @throws CorruptIndexException if the index is corrupt
2484
* @throws IOException if there is a low-level IO error
2485
* @see MergePolicy#findMerges
2487
* @param maxNumSegments maximum number of segments left
2488
* in the index after merging finishes
2490
public void forceMerge(int maxNumSegments) throws CorruptIndexException, IOException {
2491
forceMerge(maxNumSegments, true);
2494
/** Just like {@link #forceMerge(int)}, except you can
2495
* specify whether the call should block until
2496
* all merging completes. This is only meaningful with a
2497
* {@link MergeScheduler} that is able to run merges in
2498
* background threads.
2500
* <p><b>NOTE</b>: if this method hits an OutOfMemoryError
2501
* you should immediately close the writer. See <a
2502
* href="#OOME">above</a> for details.</p>
2504
public void forceMerge(int maxNumSegments, boolean doWait) throws CorruptIndexException, IOException {
2507
if (maxNumSegments < 1)
2508
throw new IllegalArgumentException("maxNumSegments must be >= 1; got " + maxNumSegments);
2510
if (infoStream != null) {
2511
message("forceMerge: index now " + segString());
2512
message("now flush at forceMerge");
2517
synchronized(this) {
2518
resetMergeExceptions();
2519
segmentsToMerge.clear();
2520
for(SegmentInfo info : segmentInfos) {
2521
segmentsToMerge.put(info, Boolean.TRUE);
2523
mergeMaxNumSegments = maxNumSegments;
2525
// Now mark all pending & running merges as isMaxNumSegments:
2526
for(final MergePolicy.OneMerge merge : pendingMerges) {
2527
merge.maxNumSegments = maxNumSegments;
2528
segmentsToMerge.put(merge.info, Boolean.TRUE);
2531
for ( final MergePolicy.OneMerge merge: runningMerges ) {
2532
merge.maxNumSegments = maxNumSegments;
2533
segmentsToMerge.put(merge.info, Boolean.TRUE);
2537
maybeMerge(maxNumSegments);
2540
synchronized(this) {
2544
throw new IllegalStateException("this writer hit an OutOfMemoryError; cannot complete forceMerge");
2547
if (mergeExceptions.size() > 0) {
2548
// Forward any exceptions in background merge
2549
// threads to the current thread:
2550
final int size = mergeExceptions.size();
2551
for(int i=0;i<size;i++) {
2552
final MergePolicy.OneMerge merge = mergeExceptions.get(i);
2553
if (merge.maxNumSegments != -1) {
2554
IOException err = new IOException("background merge hit exception: " + merge.segString(directory));
2555
final Throwable t = merge.getException();
2563
if (maxNumSegmentsMergesPending())
2570
// If close is called while we are still
2571
// running, throw an exception so the calling
2572
// thread will know merging did not
2577
// NOTE: in the ConcurrentMergeScheduler case, when
2578
// doWait is false, we can return immediately while
2579
// background threads accomplish the merging
2582
/** Returns true if any merges in pendingMerges or
2583
* runningMerges are maxNumSegments merges. */
2584
private synchronized boolean maxNumSegmentsMergesPending() {
2585
for (final MergePolicy.OneMerge merge : pendingMerges) {
2586
if (merge.maxNumSegments != -1)
2590
for (final MergePolicy.OneMerge merge : runningMerges) {
2591
if (merge.maxNumSegments != -1)
2598
/** This method has been deprecated, as it is horribly
2599
* inefficient and very rarely justified. Lucene's
2600
* multi-segment search performance has improved over
2601
* time, and the default TieredMergePolicy now targets
2602
* segments with deletions.
2606
public void expungeDeletes(boolean doWait) throws CorruptIndexException, IOException {
2607
forceMergeDeletes(doWait);
2610
/** Just like {@link #forceMergeDeletes()}, except you can
2611
* specify whether the call should block until the
2612
* operation completes. This is only meaningful with a
2613
* {@link MergeScheduler} that is able to run merges in
2614
* background threads.
2616
* <p><b>NOTE</b>: if this method hits an OutOfMemoryError
2617
* you should immediately close the writer. See <a
2618
* href="#OOME">above</a> for details.</p>
2620
* <p><b>NOTE</b>: if you call {@link #close(boolean)}
2621
* with <tt>false</tt>, which aborts all running merges,
2622
* then any thread still running this method might hit a
2623
* {@link MergePolicy.MergeAbortedException}.
2625
public void forceMergeDeletes(boolean doWait)
2626
throws CorruptIndexException, IOException {
2631
if (infoStream != null)
2632
message("forceMergeDeletes: index now " + segString());
2634
MergePolicy.MergeSpecification spec;
2636
synchronized(this) {
2637
spec = mergePolicy.findForcedDeletesMerges(segmentInfos);
2639
final int numMerges = spec.merges.size();
2640
for(int i=0;i<numMerges;i++)
2641
registerMerge(spec.merges.get(i));
2645
mergeScheduler.merge(this);
2647
if (spec != null && doWait) {
2648
final int numMerges = spec.merges.size();
2649
synchronized(this) {
2650
boolean running = true;
2654
throw new IllegalStateException("this writer hit an OutOfMemoryError; cannot complete forceMergeDeletes");
2657
// Check each merge that MergePolicy asked us to
2658
// do, to see if any of them are still running and
2659
// if any of them have hit an exception.
2661
for(int i=0;i<numMerges;i++) {
2662
final MergePolicy.OneMerge merge = spec.merges.get(i);
2663
if (pendingMerges.contains(merge) || runningMerges.contains(merge))
2665
Throwable t = merge.getException();
2667
IOException ioe = new IOException("background merge hit exception: " + merge.segString(directory));
2673
// If any of our merges are still running, wait:
2680
// NOTE: in the ConcurrentMergeScheduler case, when
2681
// doWait is false, we can return immediately while
2682
// background threads accomplish the merging
2686
/** This method has been deprecated, as it is horribly
2687
* inefficient and very rarely justified. Lucene's
2688
* multi-segment search performance has improved over
2689
* time, and the default TieredMergePolicy now targets
2690
* segments with deletions.
2694
public void expungeDeletes() throws CorruptIndexException, IOException {
2695
forceMergeDeletes();
2699
* Forces merging of all segments that have deleted
2700
* documents. The actual merges to be executed are
2701
* determined by the {@link MergePolicy}. For example,
2702
* the default {@link TieredMergePolicy} will only
2703
* pick a segment if the percentage of
2704
* deleted docs is over 10%.
2706
* <p>This is often a horribly costly operation; rarely
2707
* is it warranted.</p>
2710
* many deletions you have pending in your index, call
2711
* {@link IndexReader#numDeletedDocs}.</p>
2713
* <p><b>NOTE</b>: this method first flushes a new
2714
* segment (if there are indexed documents), and applies
2715
* all buffered deletes.
2717
* <p><b>NOTE</b>: if this method hits an OutOfMemoryError
2718
* you should immediately close the writer. See <a
2719
* href="#OOME">above</a> for details.</p>
2721
public void forceMergeDeletes() throws CorruptIndexException, IOException {
2722
forceMergeDeletes(true);
2726
* Expert: asks the mergePolicy whether any merges are
2727
* necessary now and if so, runs the requested merges and
2728
* then iterate (test again if merges are needed) until no
2729
* more merges are returned by the mergePolicy.
2731
* Explicit calls to maybeMerge() are usually not
2732
* necessary. The most common case is when merge policy
2733
* parameters have changed.
2735
* <p><b>NOTE</b>: if this method hits an OutOfMemoryError
2736
* you should immediately close the writer. See <a
2737
* href="#OOME">above</a> for details.</p>
2739
public final void maybeMerge() throws CorruptIndexException, IOException {
2743
private final void maybeMerge(int maxNumSegments) throws CorruptIndexException, IOException {
2745
updatePendingMerges(maxNumSegments);
2746
mergeScheduler.merge(this);
2749
private synchronized void updatePendingMerges(int maxNumSegments)
2750
throws CorruptIndexException, IOException {
2751
assert maxNumSegments == -1 || maxNumSegments > 0;
2757
// Do not start new merges if we've hit OOME
2762
final MergePolicy.MergeSpecification spec;
2763
if (maxNumSegments != -1) {
2764
spec = mergePolicy.findForcedMerges(segmentInfos, maxNumSegments, Collections.unmodifiableMap(segmentsToMerge));
2766
final int numMerges = spec.merges.size();
2767
for(int i=0;i<numMerges;i++) {
2768
final MergePolicy.OneMerge merge = spec.merges.get(i);
2769
merge.maxNumSegments = maxNumSegments;
2774
spec = mergePolicy.findMerges(segmentInfos);
2778
final int numMerges = spec.merges.size();
2779
for(int i=0;i<numMerges;i++) {
2780
registerMerge(spec.merges.get(i));
2785
/** Expert: to be used by a {@link MergePolicy} to avoid
2786
* selecting merges for segments already being merged.
2787
* The returned collection is not cloned, and thus is
2788
* only safe to access if you hold IndexWriter's lock
2789
* (which you do when IndexWriter invokes the
2792
* <p>Do not alter the returned collection! */
2793
public synchronized Collection<SegmentInfo> getMergingSegments() {
2794
return mergingSegments;
2797
/** Expert: the {@link MergeScheduler} calls this method
2798
* to retrieve the next merge requested by the
2801
* @lucene.experimental
2803
public synchronized MergePolicy.OneMerge getNextMerge() {
2804
if (pendingMerges.size() == 0)
2807
// Advance the merge from pending to running
2808
MergePolicy.OneMerge merge = pendingMerges.removeFirst();
2809
runningMerges.add(merge);
2815
* Close the <code>IndexWriter</code> without committing
2816
* any changes that have occurred since the last commit
2817
* (or since it was opened, if commit hasn't been called).
2818
* This removes any temporary files that had been created,
2819
* after which the state of the index will be the same as
2820
* it was when commit() was last called or when this
2821
* writer was first opened. This also clears a previous
2822
* call to {@link #prepareCommit}.
2823
* @throws IOException if there is a low-level IO error
2825
public void rollback() throws IOException {
2828
// Ensure that only one thread actually gets to do the closing:
2833
private void rollbackInternal() throws IOException {
2835
boolean success = false;
2837
if (infoStream != null ) {
2838
message("rollback");
2842
synchronized(this) {
2843
finishMerges(false);
2847
if (infoStream != null ) {
2848
message("rollback: done finish merges");
2851
// Must pre-close these two, in case they increment
2852
// changeCount so that we can then set it to false
2853
// before calling closeInternal
2854
mergePolicy.close();
2855
mergeScheduler.close();
2857
bufferedDeletesStream.clear();
2859
synchronized(this) {
2861
if (pendingCommit != null) {
2862
pendingCommit.rollbackCommit(directory);
2863
deleter.decRef(pendingCommit);
2864
pendingCommit = null;
2868
// Keep the same segmentInfos instance but replace all
2869
// of its SegmentInfo instances. This is so the next
2870
// attempt to commit using this instance of IndexWriter
2871
// will always write to a new generation ("write
2873
segmentInfos.rollbackSegmentInfos(rollbackSegments);
2874
if (infoStream != null ) {
2875
message("rollback: infos=" + segString(segmentInfos));
2880
assert testPoint("rollback before checkpoint");
2882
// Ask deleter to locate unreferenced files & remove
2884
deleter.checkpoint(segmentInfos, false);
2888
// Don't bother saving any changes in our segmentInfos
2889
readerPool.clear(null);
2891
lastCommitChangeCount = changeCount;
2894
} catch (OutOfMemoryError oom) {
2895
handleOOM(oom, "rollbackInternal");
2897
synchronized(this) {
2901
if (infoStream != null)
2902
message("hit exception during rollback");
2907
closeInternal(false);
2911
* Delete all documents in the index.
2913
* <p>This method will drop all buffered documents and will
2914
* remove all segments from the index. This change will not be
2915
* visible until a {@link #commit()} has been called. This method
2916
* can be rolled back using {@link #rollback()}.</p>
2918
* <p>NOTE: this method is much faster than using deleteDocuments( new MatchAllDocsQuery() ).</p>
2920
* <p>NOTE: this method will forcefully abort all merges
2921
* in progress. If other threads are running {@link
2922
* #forceMerge}, {@link #addIndexes(IndexReader[])} or
2923
* {@link #forceMergeDeletes} methods, they may receive
2924
* {@link MergePolicy.MergeAbortedException}s.
2926
public synchronized void deleteAll() throws IOException {
2930
// Abort any running merges
2931
finishMerges(false);
2933
// Remove any buffered docs
2936
// Remove all segments
2937
segmentInfos.clear();
2939
// Ask deleter to locate unreferenced files & remove them:
2940
deleter.checkpoint(segmentInfos, false);
2943
// Don't bother saving any changes in our segmentInfos
2944
readerPool.dropAll();
2946
// Mark that the index has changed
2948
segmentInfos.changed();
2949
} catch (OutOfMemoryError oom) {
2950
handleOOM(oom, "deleteAll");
2952
if (infoStream != null) {
2953
message("hit exception during deleteAll");
2958
private synchronized void finishMerges(boolean waitForMerges) throws IOException {
2959
if (!waitForMerges) {
2963
// Abort all pending & running merges:
2964
for (final MergePolicy.OneMerge merge : pendingMerges) {
2965
if (infoStream != null)
2966
message("now abort pending merge " + merge.segString(directory));
2970
pendingMerges.clear();
2972
for (final MergePolicy.OneMerge merge : runningMerges) {
2973
if (infoStream != null)
2974
message("now abort running merge " + merge.segString(directory));
2978
// These merges periodically check whether they have
2979
// been aborted, and stop if so. We wait here to make
2980
// sure they all stop. It should not take very long
2981
// because the merge threads periodically check if
2982
// they are aborted.
2983
while(runningMerges.size() > 0) {
2984
if (infoStream != null)
2985
message("now wait for " + runningMerges.size() + " running merge to abort");
2992
assert 0 == mergingSegments.size();
2994
if (infoStream != null)
2995
message("all running merges have aborted");
2998
// waitForMerges() will ensure any running addIndexes finishes.
2999
// It's fine if a new one attempts to start because from our
3000
// caller above the call will see that we are in the
3001
// process of closing, and will throw an
3002
// AlreadyClosedException.
3008
* Wait for any currently outstanding merges to finish.
3010
* <p>It is guaranteed that any merges started prior to calling this method
3011
* will have completed once this method completes.</p>
3013
public synchronized void waitForMerges() {
3015
if (infoStream != null) {
3016
message("waitForMerges");
3018
while(pendingMerges.size() > 0 || runningMerges.size() > 0) {
3023
assert 0 == mergingSegments.size();
3025
if (infoStream != null) {
3026
message("waitForMerges done");
3031
* Called whenever the SegmentInfos has been updated and
3032
* the index files referenced exist (correctly) in the
3035
synchronized void checkpoint() throws IOException {
3037
segmentInfos.changed();
3038
deleter.checkpoint(segmentInfos, false);
3041
private synchronized void resetMergeExceptions() {
3042
mergeExceptions = new ArrayList<MergePolicy.OneMerge>();
3046
private void noDupDirs(Directory... dirs) {
3047
HashSet<Directory> dups = new HashSet<Directory>();
3048
for (Directory dir : dirs) {
3049
if (dups.contains(dir))
3050
throw new IllegalArgumentException("Directory " + dir + " appears more than once");
3051
if (dir == directory)
3052
throw new IllegalArgumentException("Cannot add directory to itself");
3058
* @deprecated use {@link #addIndexes(Directory...)} instead
3061
public void addIndexesNoOptimize(Directory... dirs)
3062
throws CorruptIndexException, IOException {
3067
* Adds all segments from an array of indexes into this index.
3069
* <p>This may be used to parallelize batch indexing. A large document
3070
* collection can be broken into sub-collections. Each sub-collection can be
3071
* indexed in parallel, on a different thread, process or machine. The
3072
* complete index can then be created by merging sub-collection indexes
3076
* <b>NOTE:</b> the index in each {@link Directory} must not be
3077
* changed (opened by a writer) while this method is
3078
* running. This method does not acquire a write lock in
3079
* each input Directory, so it is up to the caller to
3082
* <p>This method is transactional in how Exceptions are
3083
* handled: it does not commit a new segments_N file until
3084
* all indexes are added. This means if an Exception
3085
* occurs (for example disk full), then either no indexes
3086
* will have been added or they all will have been.
3088
* <p>Note that this requires temporary free space in the
3089
* {@link Directory} up to 2X the sum of all input indexes
3090
* (including the starting index). If readers/searchers
3091
* are open against the starting index, then temporary
3092
* free space required will be higher by the size of the
3093
* starting index (see {@link #forceMerge(int)} for details).
3096
* <b>NOTE:</b> this method only copies the segments of the incomning indexes
3097
* and does not merge them. Therefore deleted documents are not removed and
3098
* the new segments are not merged with the existing ones. Also, if the merge
3099
* policy allows compound files, then any segment that is not compound is
3100
* converted to such. However, if the segment is compound, it is copied as-is
3101
* even if the merge policy does not allow compound files.
3104
* <p>This requires this index not be among those to be added.
3107
* <b>NOTE</b>: if this method hits an OutOfMemoryError
3108
* you should immediately close the writer. See <a
3109
* href="#OOME">above</a> for details.
3111
* @throws CorruptIndexException if the index is corrupt
3112
* @throws IOException if there is a low-level IO error
3114
public void addIndexes(Directory... dirs) throws CorruptIndexException, IOException {
3120
if (infoStream != null)
3121
message("flush at addIndexes(Directory...)");
3125
List<SegmentInfo> infos = new ArrayList<SegmentInfo>();
3126
Comparator<String> versionComparator = StringHelper.getVersionComparator();
3127
for (Directory dir : dirs) {
3128
if (infoStream != null) {
3129
message("addIndexes: process directory " + dir);
3131
SegmentInfos sis = new SegmentInfos(); // read infos from dir
3133
final Set<String> dsFilesCopied = new HashSet<String>();
3134
final Map<String, String> dsNames = new HashMap<String, String>();
3135
for (SegmentInfo info : sis) {
3136
assert !infos.contains(info): "dup info dir=" + info.dir + " name=" + info.name;
3138
docCount += info.docCount;
3139
String newSegName = newSegmentName();
3140
String dsName = info.getDocStoreSegment();
3142
if (infoStream != null) {
3143
message("addIndexes: process segment origName=" + info.name + " newName=" + newSegName + " dsName=" + dsName + " info=" + info);
3146
// create CFS only if the source segment is not CFS, and MP agrees it
3149
synchronized (this) { // Guard segmentInfos
3150
createCFS = !info.getUseCompoundFile()
3151
&& mergePolicy.useCompoundFile(segmentInfos, info)
3152
// optimize case only for segments that don't share doc stores
3153
&& versionComparator.compare(info.getVersion(), "3.1") >= 0;
3157
copySegmentIntoCFS(info, newSegName);
3159
copySegmentAsIs(info, newSegName, dsNames, dsFilesCopied);
3165
synchronized (this) {
3167
segmentInfos.addAll(infos);
3171
} catch (OutOfMemoryError oom) {
3172
handleOOM(oom, "addIndexes(Directory...)");
3177
* Merges the provided indexes into this index. This method is useful
3178
* if you use extensions of {@link IndexReader}. Otherwise, using
3179
* {@link #addIndexes(Directory...)} is highly recommended for performance
3180
* reasons. It uses the {@link MergeScheduler} and {@link MergePolicy} set
3181
* on this writer, which may perform merges in parallel.
3183
* <p>The provided IndexReaders are not closed.
3185
* <p><b>NOTE:</b> this method does not merge the current segments,
3186
* only the incoming ones.
3188
* <p>See {@link #addIndexes(Directory...)} for details on transactional
3189
* semantics, temporary free space required in the Directory,
3190
* and non-CFS segments on an Exception.
3192
* <p><b>NOTE</b>: if this method hits an OutOfMemoryError
3193
* you should immediately close the writer. See <a
3194
* href="#OOME">above</a> for details.
3196
* <p><b>NOTE</b>: if you call {@link #close(boolean)}
3197
* with <tt>false</tt>, which aborts all running merges,
3198
* then any thread still running this method might hit a
3199
* {@link MergePolicy.MergeAbortedException}.
3201
* @throws CorruptIndexException if the index is corrupt
3202
* @throws IOException if there is a low-level IO error
3204
public void addIndexes(IndexReader... readers) throws CorruptIndexException, IOException {
3209
if (infoStream != null)
3210
message("flush at addIndexes(IndexReader...)");
3213
String mergedName = newSegmentName();
3214
// TODO: somehow we should fix this merge so it's
3215
// abortable so that IW.close(false) is able to stop it
3216
SegmentMerger merger = new SegmentMerger(directory, config.getTermIndexInterval(),
3217
mergedName, null, payloadProcessorProvider,
3218
((FieldInfos) docWriter.getFieldInfos().clone()));
3220
for (IndexReader reader : readers) // add new indexes
3223
int docCount = merger.merge(); // merge 'em
3225
SegmentInfo info = new SegmentInfo(mergedName, docCount, directory,
3227
merger.fieldInfos().hasProx(),
3228
merger.fieldInfos().hasVectors());
3229
setDiagnostics(info, "addIndexes(IndexReader...)");
3231
boolean useCompoundFile;
3232
synchronized(this) { // Guard segmentInfos
3234
deleter.deleteNewFiles(info.files());
3238
useCompoundFile = mergePolicy.useCompoundFile(segmentInfos, info);
3241
// Now create the compound file if needed
3242
if (useCompoundFile) {
3243
merger.createCompoundFile(mergedName + ".cfs", info);
3245
// delete new non cfs files directly: they were never
3246
// registered with IFD
3247
synchronized(this) {
3248
deleter.deleteNewFiles(info.files());
3250
info.setUseCompoundFile(true);
3253
// Register the new segment
3254
synchronized(this) {
3256
deleter.deleteNewFiles(info.files());
3260
segmentInfos.add(info);
3264
} catch (OutOfMemoryError oom) {
3265
handleOOM(oom, "addIndexes(IndexReader...)");
3269
/** Copies the segment into the IndexWriter's directory, as a compound segment. */
3270
private void copySegmentIntoCFS(SegmentInfo info, String segName) throws IOException {
3271
String segFileName = IndexFileNames.segmentFileName(segName, IndexFileNames.COMPOUND_FILE_EXTENSION);
3272
Collection<String> files = info.files();
3273
CompoundFileWriter cfsWriter = new CompoundFileWriter(directory, segFileName);
3274
for (String file : files) {
3275
String newFileName = segName + IndexFileNames.stripSegmentName(file);
3276
if (!IndexFileNames.matchesExtension(file, IndexFileNames.DELETES_EXTENSION)
3277
&& !IndexFileNames.isSeparateNormsFile(file)) {
3278
cfsWriter.addFile(file, info.dir);
3280
assert !directory.fileExists(newFileName): "file \"" + newFileName + "\" already exists";
3281
info.dir.copy(directory, file, newFileName);
3288
info.dir = directory;
3289
info.name = segName;
3290
info.setUseCompoundFile(true);
3293
/** Copies the segment files as-is into the IndexWriter's directory. */
3294
private void copySegmentAsIs(SegmentInfo info, String segName,
3295
Map<String, String> dsNames, Set<String> dsFilesCopied)
3296
throws IOException {
3297
// Determine if the doc store of this segment needs to be copied. It's
3298
// only relevant for segments that share doc store with others,
3299
// because the DS might have been copied already, in which case we
3300
// just want to update the DS name of this SegmentInfo.
3301
// NOTE: pre-3x segments include a null DSName if they don't share doc
3302
// store. The following code ensures we don't accidentally insert
3303
// 'null' to the map.
3304
String dsName = info.getDocStoreSegment();
3305
final String newDsName;
3306
if (dsName != null) {
3307
if (dsNames.containsKey(dsName)) {
3308
newDsName = dsNames.get(dsName);
3310
dsNames.put(dsName, segName);
3311
newDsName = segName;
3314
newDsName = segName;
3317
// Copy the segment files
3318
for (String file: info.files()) {
3319
final String newFileName;
3320
if (IndexFileNames.isDocStoreFile(file)) {
3321
newFileName = newDsName + IndexFileNames.stripSegmentName(file);
3322
if (dsFilesCopied.contains(newFileName)) {
3325
dsFilesCopied.add(newFileName);
3327
newFileName = segName + IndexFileNames.stripSegmentName(file);
3330
assert !directory.fileExists(newFileName): "file \"" + newFileName + "\" already exists";
3331
info.dir.copy(directory, file, newFileName);
3334
info.setDocStore(info.getDocStoreOffset(), newDsName, info.getDocStoreIsCompoundFile());
3335
info.dir = directory;
3336
info.name = segName;
3340
* A hook for extending classes to execute operations after pending added and
3341
* deleted documents have been flushed to the Directory but before the change
3342
* is committed (new segments_N file written).
3344
protected void doAfterFlush() throws IOException {}
3347
* A hook for extending classes to execute operations before pending added and
3348
* deleted documents are flushed to the Directory.
3350
protected void doBeforeFlush() throws IOException {}
3352
/** Expert: prepare for commit.
3354
* <p><b>NOTE</b>: if this method hits an OutOfMemoryError
3355
* you should immediately close the writer. See <a
3356
* href="#OOME">above</a> for details.</p>
3358
* @see #prepareCommit(Map) */
3359
public final void prepareCommit() throws CorruptIndexException, IOException {
3361
prepareCommit(null);
3364
/** <p>Expert: prepare for commit, specifying
3365
* commitUserData Map (String -> String). This does the
3366
* first phase of 2-phase commit. This method does all
3367
* steps necessary to commit changes since this writer
3368
* was opened: flushes pending added and deleted docs,
3369
* syncs the index files, writes most of next segments_N
3370
* file. After calling this you must call either {@link
3371
* #commit()} to finish the commit, or {@link
3372
* #rollback()} to revert the commit and undo all changes
3373
* done since the writer was opened.</p>
3375
* You can also just call {@link #commit(Map)} directly
3376
* without prepareCommit first in which case that method
3377
* will internally call prepareCommit.
3379
* <p><b>NOTE</b>: if this method hits an OutOfMemoryError
3380
* you should immediately close the writer. See <a
3381
* href="#OOME">above</a> for details.</p>
3383
* @param commitUserData Opaque Map (String->String)
3384
* that's recorded into the segments file in the index,
3385
* and retrievable by {@link
3386
* IndexReader#getCommitUserData}. Note that when
3387
* IndexWriter commits itself during {@link #close}, the
3388
* commitUserData is unchanged (just carried over from
3389
* the prior commit). If this is null then the previous
3390
* commitUserData is kept. Also, the commitUserData will
3391
* only "stick" if there are actually changes in the
3394
public final void prepareCommit(Map<String, String> commitUserData)
3395
throws CorruptIndexException, IOException {
3399
throw new IllegalStateException(
3400
"this writer hit an OutOfMemoryError; cannot commit");
3403
if (pendingCommit != null)
3404
throw new IllegalStateException(
3405
"prepareCommit was already called with no corresponding call to commit");
3407
if (infoStream != null)
3408
message("prepareCommit: flush");
3411
boolean anySegmentsFlushed = false;
3412
SegmentInfos toCommit = null;
3413
boolean success = false;
3416
synchronized (this) {
3417
anySegmentsFlushed = doFlush(true);
3418
readerPool.commit(segmentInfos);
3419
toCommit = (SegmentInfos) segmentInfos.clone();
3420
pendingCommitChangeCount = changeCount;
3421
// This protects the segmentInfos we are now going
3422
// to commit. This is important in case, eg, while
3423
// we are trying to sync all referenced files, a
3424
// merge completes which would otherwise have
3425
// removed the files we are now syncing.
3426
deleter.incRef(toCommit, false);
3430
if (!success && infoStream != null) {
3431
message("hit exception during prepareCommit");
3435
} catch (OutOfMemoryError oom) {
3436
handleOOM(oom, "prepareCommit");
3441
if (anySegmentsFlushed) {
3447
synchronized (this) {
3448
deleter.decRef(toCommit);
3453
startCommit(toCommit, commitUserData);
3456
// Used only by commit, below; lock order is commitLock -> IW
3457
private final Object commitLock = new Object();
3460
* <p>Commits all pending changes (added & deleted
3461
* documents, segment merges, added
3462
* indexes, etc.) to the index, and syncs all referenced
3463
* index files, such that a reader will see the changes
3464
* and the index updates will survive an OS or machine
3465
* crash or power loss. Note that this does not wait for
3466
* any running background merges to finish. This may be a
3467
* costly operation, so you should test the cost in your
3468
* application and do it only when really necessary.</p>
3470
* <p> Note that this operation calls Directory.sync on
3471
* the index files. That call should not return until the
3472
* file contents & metadata are on stable storage. For
3473
* FSDirectory, this calls the OS's fsync. But, beware:
3474
* some hardware devices may in fact cache writes even
3475
* during fsync, and return before the bits are actually
3476
* on stable storage, to give the appearance of faster
3477
* performance. If you have such a device, and it does
3478
* not have a battery backup (for example) then on power
3479
* loss it may still lose data. Lucene cannot guarantee
3480
* consistency on such devices. </p>
3482
* <p><b>NOTE</b>: if this method hits an OutOfMemoryError
3483
* you should immediately close the writer. See <a
3484
* href="#OOME">above</a> for details.</p>
3486
* @see #prepareCommit
3489
public final void commit() throws CorruptIndexException, IOException {
3493
/** Commits all changes to the index, specifying a
3494
* commitUserData Map (String -> String). This just
3495
* calls {@link #prepareCommit(Map)} (if you didn't
3496
* already call it) and then {@link #finishCommit}.
3498
* <p><b>NOTE</b>: if this method hits an OutOfMemoryError
3499
* you should immediately close the writer. See <a
3500
* href="#OOME">above</a> for details.</p>
3502
public final void commit(Map<String,String> commitUserData) throws CorruptIndexException, IOException {
3506
commitInternal(commitUserData);
3509
private final void commitInternal(Map<String,String> commitUserData) throws CorruptIndexException, IOException {
3511
if (infoStream != null) {
3512
message("commit: start");
3515
synchronized(commitLock) {
3516
if (infoStream != null) {
3517
message("commit: enter lock");
3520
if (pendingCommit == null) {
3521
if (infoStream != null) {
3522
message("commit: now prepare");
3524
prepareCommit(commitUserData);
3525
} else if (infoStream != null) {
3526
message("commit: already prepared");
3533
private synchronized final void finishCommit() throws CorruptIndexException, IOException {
3535
if (pendingCommit != null) {
3537
if (infoStream != null)
3538
message("commit: pendingCommit != null");
3539
pendingCommit.finishCommit(directory);
3540
if (infoStream != null)
3541
message("commit: wrote segments file \"" + pendingCommit.getCurrentSegmentFileName() + "\"");
3542
lastCommitChangeCount = pendingCommitChangeCount;
3543
segmentInfos.updateGeneration(pendingCommit);
3544
segmentInfos.setUserData(pendingCommit.getUserData());
3545
rollbackSegments = pendingCommit.createBackupSegmentInfos(true);
3546
deleter.checkpoint(pendingCommit, true);
3548
// Matches the incRef done in startCommit:
3549
deleter.decRef(pendingCommit);
3550
pendingCommit = null;
3554
} else if (infoStream != null) {
3555
message("commit: pendingCommit == null; skip");
3558
if (infoStream != null) {
3559
message("commit: done");
3563
/** NOTE: flushDocStores is ignored now (hardwired to
3564
* true); this method is only here for backwards
3566
protected final void flush(boolean triggerMerge, boolean flushDocStores, boolean flushDeletes) throws CorruptIndexException, IOException {
3567
flush(triggerMerge, flushDeletes);
3571
* Flush all in-memory buffered updates (adds and deletes)
3573
* @param triggerMerge if true, we may merge segments (if
3574
* deletes or docs were flushed) if necessary
3575
* @param applyAllDeletes whether pending deletes should also
3577
protected final void flush(boolean triggerMerge, boolean applyAllDeletes) throws CorruptIndexException, IOException {
3579
// NOTE: this method cannot be sync'd because
3580
// maybeMerge() in turn calls mergeScheduler.merge which
3581
// in turn can take a long time to run and we don't want
3582
// to hold the lock for that. In the case of
3583
// ConcurrentMergeScheduler this can lead to deadlock
3584
// when it stalls due to too many running merges.
3586
// We can be called during close, when closing==true, so we must pass false to ensureOpen:
3588
if (doFlush(applyAllDeletes) && triggerMerge) {
3593
// TODO: this method should not have to be entirely
3594
// synchronized, ie, merges should be allowed to commit
3595
// even while a flush is happening
3596
private synchronized boolean doFlush(boolean applyAllDeletes) throws CorruptIndexException, IOException {
3599
throw new IllegalStateException("this writer hit an OutOfMemoryError; cannot flush");
3604
assert testPoint("startDoFlush");
3606
// We may be flushing because it was triggered by doc
3607
// count, del count, ram usage (in which case flush
3608
// pending is already set), or we may be flushing
3609
// due to external event eg getReader or commit is
3610
// called (in which case we now set it, and this will
3611
// pause all threads):
3612
flushControl.setFlushPendingNoWait("explicit flush");
3614
boolean success = false;
3618
if (infoStream != null) {
3619
message(" start flush: applyAllDeletes=" + applyAllDeletes);
3620
message(" index before flush " + segString());
3623
final SegmentInfo newSegment = docWriter.flush(this, deleter, mergePolicy, segmentInfos);
3624
if (newSegment != null) {
3625
setDiagnostics(newSegment, "flush");
3626
segmentInfos.add(newSegment);
3630
if (!applyAllDeletes) {
3631
// If deletes alone are consuming > 1/2 our RAM
3632
// buffer, force them all to apply now. This is to
3633
// prevent too-frequent flushing of a long tail of
3635
if (flushControl.getFlushDeletes() ||
3636
(config.getRAMBufferSizeMB() != IndexWriterConfig.DISABLE_AUTO_FLUSH &&
3637
bufferedDeletesStream.bytesUsed() > (1024*1024*config.getRAMBufferSizeMB()/2))) {
3638
applyAllDeletes = true;
3639
if (infoStream != null) {
3640
message("force apply deletes bytesUsed=" + bufferedDeletesStream.bytesUsed() + " vs ramBuffer=" + (1024*1024*config.getRAMBufferSizeMB()));
3645
if (applyAllDeletes) {
3646
if (infoStream != null) {
3647
message("apply all deletes during flush");
3650
flushDeletesCount.incrementAndGet();
3651
final BufferedDeletesStream.ApplyDeletesResult result = bufferedDeletesStream
3652
.applyDeletes(readerPool, segmentInfos.asList());
3653
if (result.anyDeletes) {
3656
if (!keepFullyDeletedSegments && result.allDeleted != null) {
3657
if (infoStream != null) {
3658
message("drop 100% deleted segments: " + result.allDeleted);
3660
for (SegmentInfo info : result.allDeleted) {
3661
// If a merge has already registered for this
3662
// segment, we leave it in the readerPool; the
3663
// merge will skip merging it and will then drop
3664
// it once it's done:
3665
if (!mergingSegments.contains(info)) {
3666
segmentInfos.remove(info);
3667
if (readerPool != null) {
3668
readerPool.drop(info);
3674
bufferedDeletesStream.prune(segmentInfos);
3676
assert !bufferedDeletesStream.any();
3677
flushControl.clearDeletes();
3678
} else if (infoStream != null) {
3679
message("don't apply deletes now delTermCount=" + bufferedDeletesStream.numTerms() + " bytesUsed=" + bufferedDeletesStream.bytesUsed());
3684
flushCount.incrementAndGet();
3688
return newSegment != null;
3690
} catch (OutOfMemoryError oom) {
3691
handleOOM(oom, "doFlush");
3695
flushControl.clearFlushPending();
3696
if (!success && infoStream != null)
3697
message("hit exception during flush");
3701
/** Expert: Return the total size of all index files currently cached in memory.
3702
* Useful for size management with flushRamDocs()
3704
public final long ramSizeInBytes() {
3706
return docWriter.bytesUsed() + bufferedDeletesStream.bytesUsed();
3709
/** Expert: Return the number of documents currently
3710
* buffered in RAM. */
3711
public final synchronized int numRamDocs() {
3713
return docWriter.getNumDocs();
3716
private void ensureValidMerge(MergePolicy.OneMerge merge) throws IOException {
3717
for(SegmentInfo info : merge.segments) {
3718
if (!segmentInfos.contains(info)) {
3719
throw new MergePolicy.MergeException("MergePolicy selected a segment (" + info.name + ") that is not in the current index " + segString(), directory);
3724
/** Carefully merges deletes for the segments we just
3725
* merged. This is tricky because, although merging will
3726
* clear all deletes (compacts the documents), new
3727
* deletes may have been flushed to the segments since
3728
* the merge was started. This method "carries over"
3729
* such new deletes onto the newly merged segment, and
3730
* saves the resulting deletes file (incrementing the
3731
* delete generation for merge.info). If no deletes were
3732
* flushed, no new deletes file is saved. */
3733
synchronized private void commitMergedDeletes(MergePolicy.OneMerge merge, SegmentReader mergedReader) throws IOException {
3735
assert testPoint("startCommitMergeDeletes");
3737
final List<SegmentInfo> sourceSegments = merge.segments;
3739
if (infoStream != null)
3740
message("commitMergeDeletes " + merge.segString(directory));
3742
// Carefully merge deletes that occurred after we
3746
long minGen = Long.MAX_VALUE;
3748
for(int i=0; i < sourceSegments.size(); i++) {
3749
SegmentInfo info = sourceSegments.get(i);
3750
minGen = Math.min(info.getBufferedDeletesGen(), minGen);
3751
int docCount = info.docCount;
3752
final SegmentReader previousReader = merge.readerClones.get(i);
3753
if (previousReader == null) {
3754
// Reader was skipped because it was 100% deletions
3757
final SegmentReader currentReader = merge.readers.get(i);
3758
if (previousReader.hasDeletions()) {
3760
// There were deletes on this segment when the merge
3761
// started. The merge has collapsed away those
3762
// deletes, but, if new deletes were flushed since
3763
// the merge started, we must now carefully keep any
3764
// newly flushed deletes but mapping them to the new
3767
if (currentReader.numDeletedDocs() > previousReader.numDeletedDocs()) {
3768
// This means this segment has had new deletes
3769
// committed since we started the merge, so we
3771
for(int j=0;j<docCount;j++) {
3772
if (previousReader.isDeleted(j))
3773
assert currentReader.isDeleted(j);
3775
if (currentReader.isDeleted(j)) {
3776
mergedReader.doDelete(docUpto);
3783
docUpto += docCount - previousReader.numDeletedDocs();
3785
} else if (currentReader.hasDeletions()) {
3786
// This segment had no deletes before but now it
3788
for(int j=0; j<docCount; j++) {
3789
if (currentReader.isDeleted(j)) {
3790
mergedReader.doDelete(docUpto);
3796
// No deletes before or after
3797
docUpto += info.docCount;
3800
assert mergedReader.numDeletedDocs() == delCount;
3802
mergedReader.hasChanges = delCount > 0;
3804
// If new deletes were applied while we were merging
3805
// (which happens if eg commit() or getReader() is
3806
// called during our merge), then it better be the case
3807
// that the delGen has increased for all our merged
3809
assert !mergedReader.hasChanges || minGen > mergedReader.getSegmentInfo().getBufferedDeletesGen();
3811
mergedReader.getSegmentInfo().setBufferedDeletesGen(minGen);
3814
synchronized private boolean commitMerge(MergePolicy.OneMerge merge, SegmentReader mergedReader) throws IOException {
3816
assert testPoint("startCommitMerge");
3819
throw new IllegalStateException("this writer hit an OutOfMemoryError; cannot complete merge");
3822
if (infoStream != null)
3823
message("commitMerge: " + merge.segString(directory) + " index=" + segString());
3825
assert merge.registerDone;
3827
// If merge was explicitly aborted, or, if rollback() or
3828
// rollbackTransaction() had been called since our merge
3829
// started (which results in an unqualified
3830
// deleter.refresh() call that will remove any index
3831
// file that current segments does not reference), we
3833
if (merge.isAborted()) {
3834
if (infoStream != null)
3835
message("commitMerge: skipping merge " + merge.segString(directory) + ": it was aborted");
3839
commitMergedDeletes(merge, mergedReader);
3841
// If the doc store we are using has been closed and
3842
// is in now compound format (but wasn't when we
3843
// started), then we will switch to the compound
3846
assert !segmentInfos.contains(merge.info);
3848
final boolean allDeleted = mergedReader.numDocs() == 0;
3850
if (infoStream != null && allDeleted) {
3851
message("merged segment " + merge.info + " is 100% deleted" + (keepFullyDeletedSegments ? "" : "; skipping insert"));
3854
final boolean dropSegment = allDeleted && !keepFullyDeletedSegments;
3855
segmentInfos.applyMergeChanges(merge, dropSegment);
3858
readerPool.drop(merge.info);
3861
if (infoStream != null) {
3862
message("after commit: " + segString());
3865
closeMergeReaders(merge, false);
3867
// Must note the change to segmentInfos so any commits
3868
// in-flight don't lose it:
3871
// If the merged segments had pending changes, clear
3872
// them so that they don't bother writing them to
3873
// disk, updating SegmentInfo, etc.:
3874
readerPool.clear(merge.segments);
3876
if (merge.maxNumSegments != -1) {
3877
// cascade the forceMerge:
3878
if (!segmentsToMerge.containsKey(merge.info)) {
3879
segmentsToMerge.put(merge.info, Boolean.FALSE);
3886
final private void handleMergeException(Throwable t, MergePolicy.OneMerge merge) throws IOException {
3888
if (infoStream != null) {
3889
message("handleMergeException: merge=" + merge.segString(directory) + " exc=" + t);
3892
// Set the exception on the merge, so if
3893
// forceMerge is waiting on us it sees the root
3895
merge.setException(t);
3896
addMergeException(merge);
3898
if (t instanceof MergePolicy.MergeAbortedException) {
3899
// We can ignore this exception (it happens when
3900
// close(false) or rollback is called), unless the
3901
// merge involves segments from external directories,
3902
// in which case we must throw it so, for example, the
3903
// rollbackTransaction code in addIndexes* is
3905
if (merge.isExternal)
3906
throw (MergePolicy.MergeAbortedException) t;
3907
} else if (t instanceof IOException)
3908
throw (IOException) t;
3909
else if (t instanceof RuntimeException)
3910
throw (RuntimeException) t;
3911
else if (t instanceof Error)
3914
// Should not get here
3915
throw new RuntimeException(t);
3919
* Merges the indicated segments, replacing them in the stack with a
3922
* @lucene.experimental
3924
public void merge(MergePolicy.OneMerge merge)
3925
throws CorruptIndexException, IOException {
3927
boolean success = false;
3929
final long t0 = System.currentTimeMillis();
3930
//System.out.println(Thread.currentThread().getName() + ": merge start: size=" + (merge.estimatedMergeBytes/1024./1024.) + " MB\n merge=" + merge.segString(directory) + "\n idx=" + segString());
3937
if (infoStream != null)
3938
message("now merge\n merge=" + merge.segString(directory) + "\n merge=" + merge + "\n index=" + segString());
3941
mergeSuccess(merge);
3943
} catch (Throwable t) {
3944
handleMergeException(t, merge);
3947
synchronized(this) {
3951
if (infoStream != null)
3952
message("hit exception during merge");
3953
if (merge.info != null && !segmentInfos.contains(merge.info))
3954
deleter.refresh(merge.info.name);
3957
// This merge (and, generally, any change to the
3958
// segments) may now enable new merges, so we call
3959
// merge policy & update pending merges.
3960
if (success && !merge.isAborted() && (merge.maxNumSegments != -1 || (!closed && !closing))) {
3961
updatePendingMerges(merge.maxNumSegments);
3965
} catch (OutOfMemoryError oom) {
3966
handleOOM(oom, "merge");
3968
if (infoStream != null && merge.info != null) {
3969
message("merge time " + (System.currentTimeMillis()-t0) + " msec for " + merge.info.docCount + " docs");
3971
//System.out.println(Thread.currentThread().getName() + ": merge end");
3974
/** Hook that's called when the specified merge is complete. */
3975
void mergeSuccess(MergePolicy.OneMerge merge) {
3978
/** Checks whether this merge involves any segments
3979
* already participating in a merge. If not, this merge
3980
* is "registered", meaning we record that its segments
3981
* are now participating in a merge, and true is
3982
* returned. Else (the merge conflicts) false is
3984
final synchronized boolean registerMerge(MergePolicy.OneMerge merge) throws MergePolicy.MergeAbortedException, IOException {
3986
if (merge.registerDone)
3991
throw new MergePolicy.MergeAbortedException("merge is aborted: " + merge.segString(directory));
3994
boolean isExternal = false;
3995
for(SegmentInfo info : merge.segments) {
3996
if (mergingSegments.contains(info)) {
3999
if (!segmentInfos.contains(info)) {
4002
if (info.dir != directory) {
4005
if (segmentsToMerge.containsKey(info)) {
4006
merge.maxNumSegments = mergeMaxNumSegments;
4010
ensureValidMerge(merge);
4012
pendingMerges.add(merge);
4014
if (infoStream != null)
4015
message("add merge to pendingMerges: " + merge.segString(directory) + " [total " + pendingMerges.size() + " pending]");
4017
merge.mergeGen = mergeGen;
4018
merge.isExternal = isExternal;
4020
// OK it does not conflict; now record that this merge
4021
// is running (while synchronized) to avoid race
4022
// condition where two conflicting merges from different
4024
message("registerMerge merging=" + mergingSegments);
4025
for(SegmentInfo info : merge.segments) {
4026
message("registerMerge info=" + info);
4027
mergingSegments.add(info);
4030
// Merge is now registered
4031
merge.registerDone = true;
4035
/** Does initial setup for a merge, which is fast but holds
4036
* the synchronized lock on IndexWriter instance. */
4037
final synchronized void mergeInit(MergePolicy.OneMerge merge) throws IOException {
4038
boolean success = false;
4044
if (infoStream != null) {
4045
message("hit exception in mergeInit");
4052
synchronized private void _mergeInit(MergePolicy.OneMerge merge) throws IOException {
4054
assert testPoint("startMergeInit");
4056
assert merge.registerDone;
4057
assert merge.maxNumSegments == -1 || merge.maxNumSegments > 0;
4060
throw new IllegalStateException("this writer hit an OutOfMemoryError; cannot merge");
4063
// TODO: is there any perf benefit to sorting
4064
// merged segments? eg biggest to smallest?
4066
if (merge.info != null)
4067
// mergeInit already done
4070
if (merge.isAborted())
4073
boolean hasVectors = false;
4074
for (SegmentInfo sourceSegment : merge.segments) {
4075
if (sourceSegment.getHasVectors()) {
4080
// Bind a new segment name here so even with
4081
// ConcurrentMergePolicy we keep deterministic segment
4083
merge.info = new SegmentInfo(newSegmentName(), 0, directory, false, true, false, hasVectors);
4085
// Lock order: IW -> BD
4086
final BufferedDeletesStream.ApplyDeletesResult result = bufferedDeletesStream.applyDeletes(readerPool, merge.segments);
4088
if (result.anyDeletes) {
4092
if (!keepFullyDeletedSegments && result.allDeleted != null) {
4093
if (infoStream != null) {
4094
message("drop 100% deleted segments: " + result.allDeleted);
4096
for(SegmentInfo info : result.allDeleted) {
4097
segmentInfos.remove(info);
4098
if (merge.segments.contains(info)) {
4099
mergingSegments.remove(info);
4100
merge.segments.remove(info);
4103
if (readerPool != null) {
4104
readerPool.drop(result.allDeleted);
4109
merge.info.setBufferedDeletesGen(result.gen);
4111
// Lock order: IW -> BD
4112
bufferedDeletesStream.prune(segmentInfos);
4114
Map<String,String> details = new HashMap<String,String>();
4115
details.put("mergeMaxNumSegments", ""+merge.maxNumSegments);
4116
details.put("mergeFactor", Integer.toString(merge.segments.size()));
4117
setDiagnostics(merge.info, "merge", details);
4119
if (infoStream != null) {
4120
message("merge seg=" + merge.info.name);
4123
assert merge.estimatedMergeBytes == 0;
4124
for(SegmentInfo info : merge.segments) {
4125
if (info.docCount > 0) {
4126
final int delCount = numDeletedDocs(info);
4127
assert delCount <= info.docCount;
4128
final double delRatio = ((double) delCount)/info.docCount;
4129
merge.estimatedMergeBytes += info.sizeInBytes(true) * (1.0 - delRatio);
4133
// TODO: I think this should no longer be needed (we
4134
// now build CFS before adding segment to the infos);
4135
// however, on removing it, tests fail for some reason!
4137
// Also enroll the merged segment into mergingSegments;
4138
// this prevents it from getting selected for a merge
4139
// after our merge is done but while we are building the
4141
mergingSegments.add(merge.info);
4144
private void setDiagnostics(SegmentInfo info, String source) {
4145
setDiagnostics(info, source, null);
4148
private void setDiagnostics(SegmentInfo info, String source, Map<String,String> details) {
4149
Map<String,String> diagnostics = new HashMap<String,String>();
4150
diagnostics.put("source", source);
4151
diagnostics.put("lucene.version", Constants.LUCENE_VERSION);
4152
diagnostics.put("os", Constants.OS_NAME);
4153
diagnostics.put("os.arch", Constants.OS_ARCH);
4154
diagnostics.put("os.version", Constants.OS_VERSION);
4155
diagnostics.put("java.version", Constants.JAVA_VERSION);
4156
diagnostics.put("java.vendor", Constants.JAVA_VENDOR);
4157
if (details != null) {
4158
diagnostics.putAll(details);
4160
info.setDiagnostics(diagnostics);
4163
/** Does fininishing for a merge, which is fast but holds
4164
* the synchronized lock on IndexWriter instance. */
4165
final synchronized void mergeFinish(MergePolicy.OneMerge merge) throws IOException {
4167
// forceMerge, addIndexes or finishMerges may be waiting
4168
// on merges to finish.
4171
// It's possible we are called twice, eg if there was an
4172
// exception inside mergeInit
4173
if (merge.registerDone) {
4174
final List<SegmentInfo> sourceSegments = merge.segments;
4175
for(SegmentInfo info : sourceSegments) {
4176
mergingSegments.remove(info);
4178
// TODO: if we remove the add in _mergeInit, we should
4179
// also remove this:
4180
mergingSegments.remove(merge.info);
4181
merge.registerDone = false;
4184
runningMerges.remove(merge);
4187
private final synchronized void closeMergeReaders(MergePolicy.OneMerge merge, boolean suppressExceptions) throws IOException {
4188
final int numSegments = merge.readers.size();
4189
Throwable th = null;
4191
boolean anyChanges = false;
4192
boolean drop = !suppressExceptions;
4193
for (int i = 0; i < numSegments; i++) {
4194
if (merge.readers.get(i) != null) {
4196
anyChanges |= readerPool.release(merge.readers.get(i), drop);
4197
} catch (Throwable t) {
4202
merge.readers.set(i, null);
4205
if (i < merge.readerClones.size() && merge.readerClones.get(i) != null) {
4207
merge.readerClones.get(i).close();
4208
} catch (Throwable t) {
4213
// This was a private clone and we had the
4215
assert merge.readerClones.get(i).getRefCount() == 0: "refCount should be 0 but is " + merge.readerClones.get(i).getRefCount();
4216
merge.readerClones.set(i, null);
4220
if (suppressExceptions && anyChanges) {
4224
// If any error occured, throw it.
4225
if (!suppressExceptions && th != null) {
4226
if (th instanceof IOException) throw (IOException) th;
4227
if (th instanceof RuntimeException) throw (RuntimeException) th;
4228
if (th instanceof Error) throw (Error) th;
4229
throw new RuntimeException(th);
4233
/** Does the actual (time-consuming) work of the merge,
4234
* but without holding synchronized lock on IndexWriter
4236
final private int mergeMiddle(MergePolicy.OneMerge merge)
4237
throws CorruptIndexException, IOException {
4239
merge.checkAborted(directory);
4241
final String mergedName = merge.info.name;
4243
int mergedDocCount = 0;
4245
List<SegmentInfo> sourceSegments = merge.segments;
4247
SegmentMerger merger = new SegmentMerger(directory, config.getTermIndexInterval(), mergedName, merge,
4248
payloadProcessorProvider,
4249
((FieldInfos) docWriter.getFieldInfos().clone()));
4251
if (infoStream != null) {
4252
message("merging " + merge.segString(directory) + " mergeVectors=" + merge.info.getHasVectors());
4255
merge.readers = new ArrayList<SegmentReader>();
4256
merge.readerClones = new ArrayList<SegmentReader>();
4258
// This is try/finally to make sure merger's readers are
4260
boolean success = false;
4262
int totDocCount = 0;
4264
while(segUpto < sourceSegments.size()) {
4266
final SegmentInfo info = sourceSegments.get(segUpto);
4268
// Hold onto the "live" reader; we will use this to
4269
// commit merged deletes
4270
final SegmentReader reader = readerPool.get(info, true,
4271
MERGE_READ_BUFFER_SIZE,
4273
merge.readers.add(reader);
4275
// We clone the segment readers because other
4276
// deletes may come in while we're merging so we
4277
// need readers that will not change
4278
final SegmentReader clone = (SegmentReader) reader.clone(true);
4279
merge.readerClones.add(clone);
4281
if (clone.numDocs() > 0) {
4283
totDocCount += clone.numDocs();
4288
if (infoStream != null) {
4289
message("merge: total " + totDocCount + " docs");
4292
merge.checkAborted(directory);
4294
// This is where all the work happens:
4295
mergedDocCount = merge.info.docCount = merger.merge();
4297
// LUCENE-3403: set hasVectors after merge(), so that it is properly set.
4298
merge.info.setHasVectors(merger.fieldInfos().hasVectors());
4300
assert mergedDocCount == totDocCount;
4302
if (infoStream != null) {
4303
message("merge store matchedCount=" + merger.getMatchedSubReaderCount() + " vs " + merge.readers.size());
4306
anyNonBulkMerges |= merger.getAnyNonBulkMerges();
4308
assert mergedDocCount == totDocCount: "mergedDocCount=" + mergedDocCount + " vs " + totDocCount;
4310
// Very important to do this before opening the reader
4311
// because SegmentReader must know if prox was written for
4313
merge.info.setHasProx(merger.fieldInfos().hasProx());
4315
boolean useCompoundFile;
4316
synchronized (this) { // Guard segmentInfos
4317
useCompoundFile = mergePolicy.useCompoundFile(segmentInfos, merge.info);
4320
if (useCompoundFile) {
4323
final String compoundFileName = IndexFileNames.segmentFileName(mergedName, IndexFileNames.COMPOUND_FILE_EXTENSION);
4326
if (infoStream != null) {
4327
message("create compound file " + compoundFileName);
4329
merger.createCompoundFile(compoundFileName, merge.info);
4331
} catch (IOException ioe) {
4332
synchronized(this) {
4333
if (merge.isAborted()) {
4334
// This can happen if rollback or close(false)
4335
// is called -- fall through to logic below to
4336
// remove the partially created CFS:
4338
handleMergeException(ioe, merge);
4341
} catch (Throwable t) {
4342
handleMergeException(t, merge);
4345
if (infoStream != null) {
4346
message("hit exception creating compound file during merge");
4349
synchronized(this) {
4350
deleter.deleteFile(compoundFileName);
4351
deleter.deleteNewFiles(merge.info.files());
4358
synchronized(this) {
4360
// delete new non cfs files directly: they were never
4361
// registered with IFD
4362
deleter.deleteNewFiles(merge.info.files());
4364
if (merge.isAborted()) {
4365
if (infoStream != null) {
4366
message("abort merge after building CFS");
4368
deleter.deleteFile(compoundFileName);
4373
merge.info.setUseCompoundFile(true);
4376
if (infoStream != null) {
4377
message(String.format("merged segment size=%.3f MB vs estimate=%.3f MB", merge.info.sizeInBytes(true)/1024./1024., merge.estimatedMergeBytes/1024/1024.));
4380
final IndexReaderWarmer mergedSegmentWarmer = config.getMergedSegmentWarmer();
4382
final int termsIndexDivisor;
4383
final boolean loadDocStores;
4385
if (mergedSegmentWarmer != null) {
4386
// Load terms index & doc stores so the segment
4387
// warmer can run searches, load documents/term
4389
termsIndexDivisor = config.getReaderTermsIndexDivisor();
4390
loadDocStores = true;
4392
termsIndexDivisor = -1;
4393
loadDocStores = false;
4396
// TODO: in the non-realtime case, we may want to only
4397
// keep deletes (it's costly to open entire reader
4398
// when we just need deletes)
4400
final SegmentReader mergedReader = readerPool.get(merge.info, loadDocStores, BufferedIndexInput.BUFFER_SIZE, termsIndexDivisor);
4402
if (poolReaders && mergedSegmentWarmer != null) {
4403
mergedSegmentWarmer.warm(mergedReader);
4406
if (!commitMerge(merge, mergedReader)) {
4407
// commitMerge will return false if this merge was aborted
4411
synchronized(this) {
4412
if (readerPool.release(mergedReader)) {
4413
// Must checkpoint after releasing the
4414
// mergedReader since it may have written a new
4424
// Readers are already closed in commitMerge if we didn't hit
4427
closeMergeReaders(merge, true);
4431
return mergedDocCount;
4434
synchronized void addMergeException(MergePolicy.OneMerge merge) {
4435
assert merge.getException() != null;
4436
if (!mergeExceptions.contains(merge) && mergeGen == merge.mergeGen)
4437
mergeExceptions.add(merge);
4440
// For test purposes.
4441
final int getBufferedDeleteTermsSize() {
4442
return docWriter.getPendingDeletes().terms.size();
4445
// For test purposes.
4446
final int getNumBufferedDeleteTerms() {
4447
return docWriter.getPendingDeletes().numTermDeletes.get();
4450
// utility routines for tests
4451
synchronized SegmentInfo newestSegment() {
4452
return segmentInfos.size() > 0 ? segmentInfos.info(segmentInfos.size()-1) : null;
4455
/** @lucene.internal */
4456
public synchronized String segString() throws IOException {
4457
return segString(segmentInfos);
4460
/** @lucene.internal */
4461
public synchronized String segString(Iterable<SegmentInfo> infos) throws IOException {
4462
final StringBuilder buffer = new StringBuilder();
4463
for(final SegmentInfo s : infos) {
4464
if (buffer.length() > 0) {
4467
buffer.append(segString(s));
4469
return buffer.toString();
4472
/** @lucene.internal */
4473
public synchronized String segString(SegmentInfo info) throws IOException {
4474
StringBuilder buffer = new StringBuilder();
4475
SegmentReader reader = readerPool.getIfExists(info);
4477
if (reader != null) {
4478
buffer.append(reader.toString());
4480
buffer.append(info.toString(directory, 0));
4481
if (info.dir != directory) {
4482
buffer.append("**");
4486
if (reader != null) {
4487
readerPool.release(reader);
4490
return buffer.toString();
4493
private synchronized void doWait() {
4494
// NOTE: the callers of this method should in theory
4495
// be able to do simply wait(), but, as a defense
4496
// against thread timing hazards where notifyAll()
4497
// fails to be called, we wait for at most 1 second
4498
// and then return so caller can check if wait
4499
// conditions are satisfied:
4502
} catch (InterruptedException ie) {
4503
throw new ThreadInterruptedException(ie);
4507
private boolean keepFullyDeletedSegments;
4509
/** Only for testing.
4511
* @lucene.internal */
4512
void keepFullyDeletedSegments() {
4513
keepFullyDeletedSegments = true;
4516
boolean getKeepFullyDeletedSegments() {
4517
return keepFullyDeletedSegments;
4520
// called only from assert
4521
private boolean filesExist(SegmentInfos toSync) throws IOException {
4522
Collection<String> files = toSync.files(directory, false);
4523
for(final String fileName: files) {
4524
assert directory.fileExists(fileName): "file " + fileName + " does not exist";
4525
// If this trips it means we are missing a call to
4526
// .checkpoint somewhere, because by the time we
4527
// are called, deleter should know about every
4528
// file referenced by the current head
4530
assert deleter.exists(fileName): "IndexFileDeleter doesn't know about file " + fileName;
4535
/** Walk through all files referenced by the current
4536
* segmentInfos and ask the Directory to sync each file,
4537
* if it wasn't already. If that succeeds, then we
4538
* prepare a new segments_N file but do not fully commit
4540
private void startCommit(SegmentInfos toSync, Map<String,String> commitUserData) throws IOException {
4542
assert testPoint("startStartCommit");
4543
assert pendingCommit == null;
4546
throw new IllegalStateException("this writer hit an OutOfMemoryError; cannot commit");
4551
if (infoStream != null)
4552
message("startCommit(): start");
4555
synchronized(this) {
4557
assert lastCommitChangeCount <= changeCount;
4559
if (pendingCommitChangeCount == lastCommitChangeCount) {
4560
if (infoStream != null) {
4561
message(" skip startCommit(): no changes pending");
4563
deleter.decRef(toSync);
4567
// First, we clone & incref the segmentInfos we intend
4568
// to sync, then, without locking, we sync() all files
4569
// referenced by toSync, in the background.
4571
if (infoStream != null)
4572
message("startCommit index=" + segString(toSync) + " changeCount=" + changeCount);
4574
assert filesExist(toSync);
4576
if (commitUserData != null) {
4577
toSync.setUserData(commitUserData);
4581
assert testPoint("midStartCommit");
4583
boolean pendingCommitSet = false;
4586
// This call can take a long time -- 10s of seconds
4587
// or more. We do it without sync:
4588
directory.sync(toSync.files(directory, false));
4590
assert testPoint("midStartCommit2");
4592
synchronized(this) {
4594
assert pendingCommit == null;
4596
assert segmentInfos.getGeneration() == toSync.getGeneration();
4598
// Exception here means nothing is prepared
4599
// (this method unwinds everything it did on
4601
toSync.prepareCommit(directory);
4602
pendingCommitSet = true;
4603
pendingCommit = toSync;
4606
if (infoStream != null) {
4607
message("done all syncs");
4610
assert testPoint("midStartCommitSuccess");
4613
synchronized(this) {
4615
// Have our master segmentInfos record the
4616
// generations we just prepared. We do this
4617
// on error or success so we don't
4618
// double-write a segments_N file.
4619
segmentInfos.updateGeneration(toSync);
4621
if (!pendingCommitSet) {
4622
if (infoStream != null) {
4623
message("hit exception committing segments file");
4626
deleter.decRef(toSync);
4630
} catch (OutOfMemoryError oom) {
4631
handleOOM(oom, "startCommit");
4633
assert testPoint("finishStartCommit");
4637
* Returns <code>true</code> iff the index in the named directory is
4639
* @param directory the directory to check for a lock
4640
* @throws IOException if there is a low-level IO error
4642
public static boolean isLocked(Directory directory) throws IOException {
4643
return directory.makeLock(WRITE_LOCK_NAME).isLocked();
4647
* Forcibly unlocks the index in the named directory.
4649
* Caution: this should only be used by failure recovery code,
4650
* when it is known that no other process nor thread is in fact
4651
* currently accessing this index.
4653
public static void unlock(Directory directory) throws IOException {
4654
directory.makeLock(IndexWriter.WRITE_LOCK_NAME).release();
4658
* Specifies maximum field length (in number of tokens/terms) in
4659
* {@link IndexWriter} constructors. {@link #setMaxFieldLength(int)} overrides
4660
* the value set by the constructor.
4662
* @deprecated use {@link LimitTokenCountAnalyzer} instead.
4665
public static final class MaxFieldLength {
4668
private String name;
4671
* Private type-safe-enum-pattern constructor.
4673
* @param name instance name
4674
* @param limit maximum field length
4676
private MaxFieldLength(String name, int limit) {
4682
* Public constructor to allow users to specify the maximum field size limit.
4684
* @param limit The maximum field length
4686
public MaxFieldLength(int limit) {
4687
this("User-specified", limit);
4690
public int getLimit() {
4695
public String toString()
4697
return name + ":" + limit;
4700
/** Sets the maximum field length to {@link Integer#MAX_VALUE}. */
4701
public static final MaxFieldLength UNLIMITED
4702
= new MaxFieldLength("UNLIMITED", Integer.MAX_VALUE);
4705
* Sets the maximum field length to
4706
* {@link #DEFAULT_MAX_FIELD_LENGTH}
4708
public static final MaxFieldLength LIMITED
4709
= new MaxFieldLength("LIMITED", 10000);
4712
/** If {@link #getReader} has been called (ie, this writer
4713
* is in near real-time mode), then after a merge
4714
* completes, this class can be invoked to warm the
4715
* reader on the newly merged segment, before the merge
4716
* commits. This is not required for near real-time
4717
* search, but will reduce search latency on opening a
4718
* new near real-time reader after a merge completes.
4720
* @lucene.experimental
4722
* <p><b>NOTE</b>: warm is called before any deletes have
4723
* been carried over to the merged segment. */
4724
public static abstract class IndexReaderWarmer {
4725
public abstract void warm(IndexReader reader) throws IOException;
4729
* Set the merged segment warmer. See {@link IndexReaderWarmer}.
4732
* {@link IndexWriterConfig#setMergedSegmentWarmer}
4736
public void setMergedSegmentWarmer(IndexReaderWarmer warmer) {
4737
config.setMergedSegmentWarmer(warmer);
4741
* Returns the current merged segment warmer. See {@link IndexReaderWarmer}.
4743
* @deprecated use {@link IndexWriterConfig#getMergedSegmentWarmer()} instead.
4746
public IndexReaderWarmer getMergedSegmentWarmer() {
4747
return config.getMergedSegmentWarmer();
4750
private void handleOOM(OutOfMemoryError oom, String location) {
4751
if (infoStream != null) {
4752
message("hit OutOfMemoryError inside " + location);
4758
// Used only by assert for testing. Current points:
4764
// midStartCommitSuccess
4765
// finishStartCommit
4766
// startCommitMergeDeletes
4768
// DocumentsWriter.ThreadState.init start
4769
boolean testPoint(String name) {
4773
synchronized boolean nrtIsCurrent(SegmentInfos infos) {
4774
//System.out.println("IW.nrtIsCurrent " + (infos.version == segmentInfos.version && !docWriter.anyChanges() && !bufferedDeletesStream.any()));
4776
return infos.version == segmentInfos.version && !docWriter.anyChanges() && !bufferedDeletesStream.any();
4779
synchronized boolean isClosed() {
4783
/** Expert: remove any index files that are no longer
4786
* <p> IndexWriter normally deletes unused files itself,
4787
* during indexing. However, on Windows, which disallows
4788
* deletion of open files, if there is a reader open on
4789
* the index then those files cannot be deleted. This is
4790
* fine, because IndexWriter will periodically retry
4793
* <p> However, IndexWriter doesn't try that often: only
4794
* on open, close, flushing a new segment, and finishing
4795
* a merge. If you don't do any of these actions with your
4796
* IndexWriter, you'll see the unused files linger. If
4797
* that's a problem, call this method to delete them
4798
* (once you've closed the open readers that were
4799
* preventing their deletion).
4801
* <p> In addition, you can call this method to delete
4802
* unreferenced index commits. This might be useful if you
4803
* are using an {@link IndexDeletionPolicy} which holds
4804
* onto index commits until some criteria are met, but those
4805
* commits are no longer needed. Otherwise, those commits will
4806
* be deleted the next time commit() is called.
4808
public synchronized void deleteUnusedFiles() throws IOException {
4810
deleter.deletePendingFiles();
4811
deleter.revisitPolicy();
4814
// Called by DirectoryReader.doClose
4815
synchronized void deletePendingFiles() throws IOException {
4816
deleter.deletePendingFiles();
4820
* Sets the {@link PayloadProcessorProvider} to use when merging payloads.
4821
* Note that the given <code>pcp</code> will be invoked for every segment that
4822
* is merged, not only external ones that are given through
4823
* {@link #addIndexes}. If you want only the payloads of the external segments
4824
* to be processed, you can return <code>null</code> whenever a
4825
* {@link DirPayloadProcessor} is requested for the {@link Directory} of the
4826
* {@link IndexWriter}.
4828
* The default is <code>null</code> which means payloads are processed
4829
* normally (copied) during segment merges. You can also unset it by passing
4830
* <code>null</code>.
4832
* <b>NOTE:</b> the set {@link PayloadProcessorProvider} will be in effect
4833
* immediately, potentially for already running merges too. If you want to be
4834
* sure it is used for further operations only, such as {@link #addIndexes} or
4835
* {@link #forceMerge}, you can call {@link #waitForMerges()} before.
4837
public void setPayloadProcessorProvider(PayloadProcessorProvider pcp) {
4839
payloadProcessorProvider = pcp;
4843
* Returns the {@link PayloadProcessorProvider} that is used during segment
4844
* merges to process payloads.
4846
public PayloadProcessorProvider getPayloadProcessorProvider() {
4848
return payloadProcessorProvider;
4851
// decides when flushes happen
4852
final class FlushControl {
4854
private boolean flushPending;
4855
private boolean flushDeletes;
4856
private int delCount;
4857
private int docCount;
4858
private boolean flushing;
4860
private synchronized boolean setFlushPending(String reason, boolean doWait) {
4861
if (flushPending || flushing) {
4863
while(flushPending || flushing) {
4866
} catch (InterruptedException ie) {
4867
throw new ThreadInterruptedException(ie);
4873
if (infoStream != null) {
4874
message("now trigger flush reason=" + reason);
4876
flushPending = true;
4877
return flushPending;
4881
public synchronized void setFlushPendingNoWait(String reason) {
4882
setFlushPending(reason, false);
4885
public synchronized boolean getFlushPending() {
4886
return flushPending;
4889
public synchronized boolean getFlushDeletes() {
4890
return flushDeletes;
4893
public synchronized void clearFlushPending() {
4894
if (infoStream != null) {
4895
message("clearFlushPending");
4897
flushPending = false;
4898
flushDeletes = false;
4903
public synchronized void clearDeletes() {
4907
public synchronized boolean waitUpdate(int docInc, int delInc) {
4908
return waitUpdate(docInc, delInc, false);
4911
public synchronized boolean waitUpdate(int docInc, int delInc, boolean skipWait) {
4912
while(flushPending) {
4915
} catch (InterruptedException ie) {
4916
throw new ThreadInterruptedException(ie);
4923
// skipWait is only used when a thread is BOTH adding
4924
// a doc and buffering a del term, and, the adding of
4925
// the doc already triggered a flush
4930
final int maxBufferedDocs = config.getMaxBufferedDocs();
4931
if (maxBufferedDocs != IndexWriterConfig.DISABLE_AUTO_FLUSH &&
4932
docCount >= maxBufferedDocs) {
4933
return setFlushPending("maxBufferedDocs", true);
4936
final int maxBufferedDeleteTerms = config.getMaxBufferedDeleteTerms();
4937
if (maxBufferedDeleteTerms != IndexWriterConfig.DISABLE_AUTO_FLUSH &&
4938
delCount >= maxBufferedDeleteTerms) {
4939
flushDeletes = true;
4940
return setFlushPending("maxBufferedDeleteTerms", true);
4943
return flushByRAMUsage("add delete/doc");
4946
public synchronized boolean flushByRAMUsage(String reason) {
4947
final double ramBufferSizeMB = config.getRAMBufferSizeMB();
4948
if (ramBufferSizeMB != IndexWriterConfig.DISABLE_AUTO_FLUSH) {
4949
final long limit = (long) (ramBufferSizeMB*1024*1024);
4950
long used = bufferedDeletesStream.bytesUsed() + docWriter.bytesUsed();
4951
if (used >= limit) {
4953
// DocumentsWriter may be able to free up some
4955
// Lock order: FC -> DW
4956
docWriter.balanceRAM();
4958
used = bufferedDeletesStream.bytesUsed() + docWriter.bytesUsed();
4959
if (used >= limit) {
4960
return setFlushPending("ram full: " + reason, false);
4968
final FlushControl flushControl = new FlushControl();