1
package org.apache.lucene.index;
4
* Licensed to the Apache Software Foundation (ASF) under one or more
5
* contributor license agreements. See the NOTICE file distributed with
6
* this work for additional information regarding copyright ownership.
7
* The ASF licenses this file to You under the Apache License, Version 2.0
8
* (the "License"); you may not use this file except in compliance with
9
* the License. You may obtain a copy of the License at
11
* http://www.apache.org/licenses/LICENSE-2.0
13
* Unless required by applicable law or agreed to in writing, software
14
* distributed under the License is distributed on an "AS IS" BASIS,
15
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16
* See the License for the specific language governing permissions and
17
* limitations under the License.
20
import java.io.Closeable;
22
import java.io.FileOutputStream;
23
import java.io.IOException;
24
import java.util.Collection;
26
import java.util.concurrent.atomic.AtomicInteger;
28
import org.apache.lucene.document.Document;
29
import org.apache.lucene.document.FieldSelector;
30
import org.apache.lucene.search.FieldCache; // javadocs
31
import org.apache.lucene.search.Similarity;
32
import org.apache.lucene.store.*;
33
import org.apache.lucene.util.ArrayUtil;
34
import org.apache.lucene.util.ReaderUtil; // for javadocs
35
import org.apache.lucene.util.VirtualMethod;
37
/** IndexReader is an abstract class, providing an interface for accessing an
38
index. Search of an index is done entirely through this abstract interface,
39
so that any subclass which implements it is searchable.
41
<p> Concrete subclasses of IndexReader are usually constructed with a call to
42
one of the static <code>open()</code> methods, e.g. {@link
43
#open(Directory, boolean)}.
45
<p> For efficiency, in this API documents are often referred to via
46
<i>document numbers</i>, non-negative integers which each name a unique
47
document in the index. These document numbers are ephemeral--they may change
48
as documents are added to and deleted from an index. Clients should thus not
49
rely on a given document having the same number between sessions.
51
<p> An IndexReader can be opened on a directory for which an IndexWriter is
52
opened already, but it cannot be used to delete documents from the index then.
55
<b>NOTE</b>: for backwards API compatibility, several methods are not listed
56
as abstract, but have no useful implementations in this base class and
57
instead always throw UnsupportedOperationException. Subclasses are
58
strongly encouraged to override these methods, but in many cases may not
64
<b>NOTE</b>: as of 2.4, it's possible to open a read-only
65
IndexReader using the static open methods that accept the
66
boolean readOnly parameter. Such a reader has better
67
concurrency as it's not necessary to synchronize on the
68
isDeleted method. You must specify false if you want to
69
make changes with the resulting IndexReader.
72
<a name="thread-safety"></a><p><b>NOTE</b>: {@link
73
IndexReader} instances are completely thread
74
safe, meaning multiple threads can call any of its methods,
75
concurrently. If your application requires external
76
synchronization, you should <b>not</b> synchronize on the
77
<code>IndexReader</code> instance; use your own
78
(non-Lucene) objects instead.
80
public abstract class IndexReader implements Cloneable,Closeable {
83
* A custom listener that's invoked when the IndexReader
86
* <p>For a SegmentReader, this listener is called only
87
* once all SegmentReaders sharing the same core are
88
* closed. At this point it is safe for apps to evict
89
* this reader from any caches keyed on {@link
90
* #getCoreCacheKey}. This is the same interface that
91
* {@link FieldCache} uses, internally, to evict
94
* <p>For other readers, this listener is called when they
97
* @lucene.experimental
99
public static interface ReaderFinishedListener {
100
public void finished(IndexReader reader);
103
// Impls must set this if they may call add/removeReaderFinishedListener:
104
protected volatile Collection<ReaderFinishedListener> readerFinishedListeners;
106
/** Expert: adds a {@link ReaderFinishedListener}. The
107
* provided listener is also added to any sub-readers, if
108
* this is a composite reader. Also, any reader reopened
109
* or cloned from this one will also copy the listeners at
110
* the time of reopen.
112
* @lucene.experimental */
113
public void addReaderFinishedListener(ReaderFinishedListener listener) {
115
readerFinishedListeners.add(listener);
118
/** Expert: remove a previously added {@link ReaderFinishedListener}.
120
* @lucene.experimental */
121
public void removeReaderFinishedListener(ReaderFinishedListener listener) {
123
readerFinishedListeners.remove(listener);
126
protected void notifyReaderFinishedListeners() {
127
// Defensive (should never be null -- all impls must set
129
if (readerFinishedListeners != null) {
130
for(ReaderFinishedListener listener : readerFinishedListeners) {
131
listener.finished(this);
136
protected void readerFinished() {
137
notifyReaderFinishedListeners();
141
* Constants describing field properties, for example used for
142
* {@link IndexReader#getFieldNames(FieldOption)}.
144
public static enum FieldOption {
147
/** All indexed fields */
149
/** All fields that store payloads */
151
/** All fields that omit tf */
152
OMIT_TERM_FREQ_AND_POSITIONS,
153
/** All fields that omit positions */
155
/** All fields which are not indexed */
157
/** All fields which are indexed with termvectors enabled */
158
INDEXED_WITH_TERMVECTOR,
159
/** All fields which are indexed but don't have termvectors enabled */
160
INDEXED_NO_TERMVECTOR,
161
/** All fields with termvectors enabled. Please note that only standard termvector fields are returned */
163
/** All fields with termvectors with position values enabled */
164
TERMVECTOR_WITH_POSITION,
165
/** All fields with termvectors with offset values enabled */
166
TERMVECTOR_WITH_OFFSET,
167
/** All fields with termvectors with offset values and position values enabled */
168
TERMVECTOR_WITH_POSITION_OFFSET,
171
private volatile boolean closed;
172
protected boolean hasChanges;
174
private final AtomicInteger refCount = new AtomicInteger();
176
static int DEFAULT_TERMS_INDEX_DIVISOR = 1;
178
/** Expert: returns the current refCount for this reader */
179
public int getRefCount() {
180
return refCount.get();
184
* Expert: increments the refCount of this IndexReader
185
* instance. RefCounts are used to determine when a
186
* reader can be closed safely, i.e. as soon as there are
187
* no more references. Be sure to always call a
188
* corresponding {@link #decRef}, in a finally clause;
189
* otherwise the reader may never be closed. Note that
190
* {@link #close} simply calls decRef(), which means that
191
* the IndexReader will not really be closed until {@link
192
* #decRef} has been called for all outstanding
198
public void incRef() {
200
refCount.incrementAndGet();
204
* Expert: increments the refCount of this IndexReader
205
* instance only if the IndexReader has not been closed yet
206
* and returns <code>true</code> iff the refCount was
207
* successfully incremented, otherwise <code>false</code>.
208
* If this method returns <code>false</code> the reader is either
209
* already closed or is currently been closed. Either way this
210
* reader instance shouldn't be used by an application unless
211
* <code>true</code> is returned.
213
* RefCounts are used to determine when a
214
* reader can be closed safely, i.e. as soon as there are
215
* no more references. Be sure to always call a
216
* corresponding {@link #decRef}, in a finally clause;
217
* otherwise the reader may never be closed. Note that
218
* {@link #close} simply calls decRef(), which means that
219
* the IndexReader will not really be closed until {@link
220
* #decRef} has been called for all outstanding
226
public boolean tryIncRef() {
228
while ((count = refCount.get()) > 0) {
229
if (refCount.compareAndSet(count, count+1)) {
238
public String toString() {
239
final StringBuilder buffer = new StringBuilder();
243
buffer.append(getClass().getSimpleName());
245
final IndexReader[] subReaders = getSequentialSubReaders();
246
if ((subReaders != null) && (subReaders.length > 0)) {
247
buffer.append(subReaders[0]);
248
for (int i = 1; i < subReaders.length; ++i) {
249
buffer.append(" ").append(subReaders[i]);
253
return buffer.toString();
257
* Expert: decreases the refCount of this IndexReader
258
* instance. If the refCount drops to 0, then pending
259
* changes (if any) are committed to the index and this
260
* reader is closed. If an exception is hit, the refCount
263
* @throws IOException in case an IOException occurs in commit() or doClose()
267
public void decRef() throws IOException {
269
final int rc = refCount.getAndDecrement();
271
boolean success = false;
278
// Put reference back on failure
279
refCount.incrementAndGet();
283
} else if (rc <= 0) {
284
throw new IllegalStateException("too many decRef calls: refCount was " + rc + " before decrement");
288
protected IndexReader() {
293
* @throws AlreadyClosedException if this IndexReader is closed
295
protected final void ensureOpen() throws AlreadyClosedException {
296
if (refCount.get() <= 0) {
297
throw new AlreadyClosedException("this IndexReader is closed");
301
/** Returns a IndexReader reading the index in the given
302
* Directory, with readOnly=true.
303
* @param directory the index directory
304
* @throws CorruptIndexException if the index is corrupt
305
* @throws IOException if there is a low-level IO error
307
public static IndexReader open(final Directory directory) throws CorruptIndexException, IOException {
308
return open(directory, null, null, true, DEFAULT_TERMS_INDEX_DIVISOR);
311
/** Returns an IndexReader reading the index in the given
312
* Directory. You should pass readOnly=true, since it
313
* gives much better concurrent performance, unless you
314
* intend to do write operations (delete documents or
315
* change norms) with the reader.
316
* @param directory the index directory
317
* @param readOnly true if no changes (deletions, norms) will be made with this IndexReader
318
* @throws CorruptIndexException if the index is corrupt
319
* @throws IOException if there is a low-level IO error
321
public static IndexReader open(final Directory directory, boolean readOnly) throws CorruptIndexException, IOException {
322
return open(directory, null, null, readOnly, DEFAULT_TERMS_INDEX_DIVISOR);
326
* Open a near real time IndexReader from the {@link org.apache.lucene.index.IndexWriter}.
328
* @param writer The IndexWriter to open from
329
* @param applyAllDeletes If true, all buffered deletes will
330
* be applied (made visible) in the returned reader. If
331
* false, the deletes are not applied but remain buffered
332
* (in IndexWriter) so that they will be applied in the
333
* future. Applying deletes can be costly, so if your app
334
* can tolerate deleted documents being returned you might
335
* gain some performance by passing false.
336
* @return The new IndexReader
337
* @throws CorruptIndexException
338
* @throws IOException if there is a low-level IO error
340
* @see #openIfChanged(IndexReader,IndexWriter,boolean)
342
* @lucene.experimental
344
public static IndexReader open(final IndexWriter writer, boolean applyAllDeletes) throws CorruptIndexException, IOException {
345
return writer.getReader(applyAllDeletes);
348
/** Expert: returns an IndexReader reading the index in the given
349
* {@link IndexCommit}. You should pass readOnly=true, since it
350
* gives much better concurrent performance, unless you
351
* intend to do write operations (delete documents or
352
* change norms) with the reader.
353
* @param commit the commit point to open
354
* @param readOnly true if no changes (deletions, norms) will be made with this IndexReader
355
* @throws CorruptIndexException if the index is corrupt
356
* @throws IOException if there is a low-level IO error
358
public static IndexReader open(final IndexCommit commit, boolean readOnly) throws CorruptIndexException, IOException {
359
return open(commit.getDirectory(), null, commit, readOnly, DEFAULT_TERMS_INDEX_DIVISOR);
362
/** Expert: returns an IndexReader reading the index in
363
* the given Directory, with a custom {@link
364
* IndexDeletionPolicy}. You should pass readOnly=true,
365
* since it gives much better concurrent performance,
366
* unless you intend to do write operations (delete
367
* documents or change norms) with the reader.
368
* @param directory the index directory
369
* @param deletionPolicy a custom deletion policy (only used
370
* if you use this reader to perform deletes or to set
371
* norms); see {@link IndexWriter} for details.
372
* @param readOnly true if no changes (deletions, norms) will be made with this IndexReader
373
* @throws CorruptIndexException if the index is corrupt
374
* @throws IOException if there is a low-level IO error
376
public static IndexReader open(final Directory directory, IndexDeletionPolicy deletionPolicy, boolean readOnly) throws CorruptIndexException, IOException {
377
return open(directory, deletionPolicy, null, readOnly, DEFAULT_TERMS_INDEX_DIVISOR);
380
/** Expert: returns an IndexReader reading the index in
381
* the given Directory, with a custom {@link
382
* IndexDeletionPolicy}. You should pass readOnly=true,
383
* since it gives much better concurrent performance,
384
* unless you intend to do write operations (delete
385
* documents or change norms) with the reader.
386
* @param directory the index directory
387
* @param deletionPolicy a custom deletion policy (only used
388
* if you use this reader to perform deletes or to set
389
* norms); see {@link IndexWriter} for details.
390
* @param readOnly true if no changes (deletions, norms) will be made with this IndexReader
391
* @param termInfosIndexDivisor Subsamples which indexed
392
* terms are loaded into RAM. This has the same effect as {@link
393
* IndexWriter#setTermIndexInterval} except that setting
394
* must be done at indexing time while this setting can be
395
* set per reader. When set to N, then one in every
396
* N*termIndexInterval terms in the index is loaded into
397
* memory. By setting this to a value > 1 you can reduce
398
* memory usage, at the expense of higher latency when
399
* loading a TermInfo. The default value is 1. Set this
400
* to -1 to skip loading the terms index entirely.
401
* @throws CorruptIndexException if the index is corrupt
402
* @throws IOException if there is a low-level IO error
404
public static IndexReader open(final Directory directory, IndexDeletionPolicy deletionPolicy, boolean readOnly, int termInfosIndexDivisor) throws CorruptIndexException, IOException {
405
return open(directory, deletionPolicy, null, readOnly, termInfosIndexDivisor);
408
/** Expert: returns an IndexReader reading the index in
409
* the given Directory, using a specific commit and with
410
* a custom {@link IndexDeletionPolicy}. You should pass
411
* readOnly=true, since it gives much better concurrent
412
* performance, unless you intend to do write operations
413
* (delete documents or change norms) with the reader.
414
* @param commit the specific {@link IndexCommit} to open;
415
* see {@link IndexReader#listCommits} to list all commits
417
* @param deletionPolicy a custom deletion policy (only used
418
* if you use this reader to perform deletes or to set
419
* norms); see {@link IndexWriter} for details.
420
* @param readOnly true if no changes (deletions, norms) will be made with this IndexReader
421
* @throws CorruptIndexException if the index is corrupt
422
* @throws IOException if there is a low-level IO error
424
public static IndexReader open(final IndexCommit commit, IndexDeletionPolicy deletionPolicy, boolean readOnly) throws CorruptIndexException, IOException {
425
return open(commit.getDirectory(), deletionPolicy, commit, readOnly, DEFAULT_TERMS_INDEX_DIVISOR);
428
/** Expert: returns an IndexReader reading the index in
429
* the given Directory, using a specific commit and with
430
* a custom {@link IndexDeletionPolicy}. You should pass
431
* readOnly=true, since it gives much better concurrent
432
* performance, unless you intend to do write operations
433
* (delete documents or change norms) with the reader.
434
* @param commit the specific {@link IndexCommit} to open;
435
* see {@link IndexReader#listCommits} to list all commits
437
* @param deletionPolicy a custom deletion policy (only used
438
* if you use this reader to perform deletes or to set
439
* norms); see {@link IndexWriter} for details.
440
* @param readOnly true if no changes (deletions, norms) will be made with this IndexReader
441
* @param termInfosIndexDivisor Subsamples which indexed
442
* terms are loaded into RAM. This has the same effect as {@link
443
* IndexWriter#setTermIndexInterval} except that setting
444
* must be done at indexing time while this setting can be
445
* set per reader. When set to N, then one in every
446
* N*termIndexInterval terms in the index is loaded into
447
* memory. By setting this to a value > 1 you can reduce
448
* memory usage, at the expense of higher latency when
449
* loading a TermInfo. The default value is 1. Set this
450
* to -1 to skip loading the terms index entirely. This is only useful in
451
* advanced situations when you will only .next() through all terms;
452
* attempts to seek will hit an exception.
454
* @throws CorruptIndexException if the index is corrupt
455
* @throws IOException if there is a low-level IO error
457
public static IndexReader open(final IndexCommit commit, IndexDeletionPolicy deletionPolicy, boolean readOnly, int termInfosIndexDivisor) throws CorruptIndexException, IOException {
458
return open(commit.getDirectory(), deletionPolicy, commit, readOnly, termInfosIndexDivisor);
461
private static IndexReader open(final Directory directory, final IndexDeletionPolicy deletionPolicy, final IndexCommit commit, final boolean readOnly, int termInfosIndexDivisor) throws CorruptIndexException, IOException {
462
return DirectoryReader.open(directory, deletionPolicy, commit, readOnly, termInfosIndexDivisor);
466
* If the index has changed since the provided reader was
467
* opened, open and return a new reader; else, return
468
* null. The new reader, if not null, will be the same
469
* type of reader as the previous one, ie an NRT reader
470
* will open a new NRT reader, a MultiReader will open a
471
* new MultiReader, etc.
473
* <p>This method is typically far less costly than opening a
474
* fully new <code>IndexReader</code> as it shares
475
* resources (for example sub-readers) with the provided
476
* <code>IndexReader</code>, when possible.
478
* <p>The provided reader is not closed (you are responsible
479
* for doing so); if a new reader is returned you also
480
* must eventually close it. Be sure to never close a
481
* reader while other threads are still using it; see
482
* <code>SearcherManager</code> in
483
* <code>contrib/misc</code> to simplify managing this.
485
* <p>If a new reader is returned, it's safe to make changes
486
* (deletions, norms) with it. All shared mutable state
487
* with the old reader uses "copy on write" semantics to
488
* ensure the changes are not seen by other readers.
490
* @throws CorruptIndexException if the index is corrupt
491
* @throws IOException if there is a low-level IO error
492
* @return null if there are no changes; else, a new
493
* IndexReader instance which you must eventually close
495
public static IndexReader openIfChanged(IndexReader oldReader) throws IOException {
496
if (oldReader.hasNewReopenAPI1) {
497
final IndexReader newReader = oldReader.doOpenIfChanged();
498
assert newReader != oldReader;
501
final IndexReader newReader = oldReader.reopen();
502
if (newReader == oldReader) {
511
* If the index has changed since the provided reader was
512
* opened, open and return a new reader, with the
513
* specified <code>readOnly</code>; else, return
516
* @see #openIfChanged(IndexReader)
518
public static IndexReader openIfChanged(IndexReader oldReader, boolean readOnly) throws IOException {
519
if (oldReader.hasNewReopenAPI2) {
520
final IndexReader newReader = oldReader.doOpenIfChanged(readOnly);
521
assert newReader != oldReader;
524
final IndexReader newReader = oldReader.reopen(readOnly);
525
if (newReader == oldReader) {
534
* If the IndexCommit differs from what the
535
* provided reader is searching, or the provided reader is
536
* not already read-only, open and return a new
537
* <code>readOnly=true</code> reader; else, return null.
539
* @see #openIfChanged(IndexReader)
541
// TODO: should you be able to specify readOnly?
542
public static IndexReader openIfChanged(IndexReader oldReader, IndexCommit commit) throws IOException {
543
if (oldReader.hasNewReopenAPI3) {
544
final IndexReader newReader = oldReader.doOpenIfChanged(commit);
545
assert newReader != oldReader;
548
final IndexReader newReader = oldReader.reopen(commit);
549
if (newReader == oldReader) {
558
* Expert: If there changes (committed or not) in the
559
* {@link IndexWriter} versus what the provided reader is
560
* searching, then open and return a new read-only
561
* IndexReader searching both committed and uncommitted
562
* changes from the writer; else, return null (though, the
563
* current implementation never returns null).
565
* <p>This provides "near real-time" searching, in that
566
* changes made during an {@link IndexWriter} session can be
567
* quickly made available for searching without closing
568
* the writer nor calling {@link #commit}.
570
* <p>It's <i>near</i> real-time because there is no hard
571
* guarantee on how quickly you can get a new reader after
572
* making changes with IndexWriter. You'll have to
573
* experiment in your situation to determine if it's
574
* fast enough. As this is a new and experimental
575
* feature, please report back on your findings so we can
576
* learn, improve and iterate.</p>
578
* <p>The very first time this method is called, this
579
* writer instance will make every effort to pool the
580
* readers that it opens for doing merges, applying
581
* deletes, etc. This means additional resources (RAM,
582
* file descriptors, CPU time) will be consumed.</p>
584
* <p>For lower latency on reopening a reader, you should
585
* call {@link IndexWriterConfig#setMergedSegmentWarmer} to
586
* pre-warm a newly merged segment before it's committed
587
* to the index. This is important for minimizing
588
* index-to-search delay after a large merge. </p>
590
* <p>If an addIndexes* call is running in another thread,
591
* then this reader will only search those segments from
592
* the foreign index that have been successfully copied
595
* <p><b>NOTE</b>: Once the writer is closed, any
596
* outstanding readers may continue to be used. However,
597
* if you attempt to reopen any of those readers, you'll
598
* hit an {@link AlreadyClosedException}.</p>
600
* @return IndexReader that covers entire index plus all
601
* changes made so far by this IndexWriter instance, or
602
* null if there are no new changes
604
* @param writer The IndexWriter to open from
606
* @param applyAllDeletes If true, all buffered deletes will
607
* be applied (made visible) in the returned reader. If
608
* false, the deletes are not applied but remain buffered
609
* (in IndexWriter) so that they will be applied in the
610
* future. Applying deletes can be costly, so if your app
611
* can tolerate deleted documents being returned you might
612
* gain some performance by passing false.
614
* @throws IOException
616
* @lucene.experimental
618
public static IndexReader openIfChanged(IndexReader oldReader, IndexWriter writer, boolean applyAllDeletes) throws IOException {
619
if (oldReader.hasNewReopenAPI4) {
620
final IndexReader newReader = oldReader.doOpenIfChanged(writer, applyAllDeletes);
621
assert newReader != oldReader;
624
final IndexReader newReader = oldReader.reopen(writer, applyAllDeletes);
625
if (newReader == oldReader) {
634
* Refreshes an IndexReader if the index has changed since this instance
637
* Opening an IndexReader is an expensive operation. This method can be used
638
* to refresh an existing IndexReader to reduce these costs. This method
639
* tries to only load segments that have changed or were created after the
640
* IndexReader was (re)opened.
642
* If the index has not changed since this instance was (re)opened, then this
643
* call is a NOOP and returns this instance. Otherwise, a new instance is
644
* returned. The old instance is <b>not</b> closed and remains usable.<br>
646
* If the reader is reopened, even though they share
647
* resources internally, it's safe to make changes
648
* (deletions, norms) with the new reader. All shared
649
* mutable state obeys "copy on write" semantics to ensure
650
* the changes are not seen by other readers.
652
* You can determine whether a reader was actually reopened by comparing the
653
* old instance with the instance returned by this method:
655
* IndexReader reader = ...
657
* IndexReader newReader = r.reopen();
658
* if (newReader != reader) {
659
* ... // reader was reopened
662
* reader = newReader;
666
* Be sure to synchronize that code so that other threads,
667
* if present, can never use reader after it has been
668
* closed and before it's switched to newReader.
670
* <p><b>NOTE</b>: If this reader is a near real-time
671
* reader (obtained from {@link IndexWriter#getReader()},
672
* reopen() will simply call writer.getReader() again for
673
* you, though this may change in the future.
675
* @throws CorruptIndexException if the index is corrupt
676
* @throws IOException if there is a low-level IO error
677
* @deprecated Use IndexReader#openIfChanged(IndexReader) instead
680
public IndexReader reopen() throws CorruptIndexException, IOException {
681
final IndexReader newReader = IndexReader.openIfChanged(this);
682
if (newReader == null) {
689
/** Just like {@link #reopen()}, except you can change the
690
* readOnly of the original reader. If the index is
691
* unchanged but readOnly is different then a new reader
694
* IndexReader#openIfChanged(IndexReader,boolean) instead */
696
public IndexReader reopen(boolean openReadOnly) throws CorruptIndexException, IOException {
697
final IndexReader newReader = IndexReader.openIfChanged(this, openReadOnly);
698
if (newReader == null) {
705
/** Expert: reopen this reader on a specific commit point.
706
* This always returns a readOnly reader. If the
707
* specified commit point matches what this reader is
708
* already on, and this reader is already readOnly, then
709
* this same instance is returned; if it is not already
710
* readOnly, a readOnly clone is returned.
711
* @deprecated Use IndexReader#openIfChanged(IndexReader,IndexCommit) instead
714
public IndexReader reopen(IndexCommit commit) throws CorruptIndexException, IOException {
715
final IndexReader newReader = IndexReader.openIfChanged(this, commit);
716
if (newReader == null) {
724
* Expert: returns a readonly reader, covering all
725
* committed as well as un-committed changes to the index.
726
* This provides "near real-time" searching, in that
727
* changes made during an IndexWriter session can be
728
* quickly made available for searching without closing
729
* the writer nor calling {@link #commit}.
731
* <p>Note that this is functionally equivalent to calling
732
* {#flush} (an internal IndexWriter operation) and then using {@link IndexReader#open} to
733
* open a new reader. But the turnaround time of this
734
* method should be faster since it avoids the potentially
735
* costly {@link #commit}.</p>
737
* <p>You must close the {@link IndexReader} returned by
738
* this method once you are done using it.</p>
740
* <p>It's <i>near</i> real-time because there is no hard
741
* guarantee on how quickly you can get a new reader after
742
* making changes with IndexWriter. You'll have to
743
* experiment in your situation to determine if it's
744
* fast enough. As this is a new and experimental
745
* feature, please report back on your findings so we can
746
* learn, improve and iterate.</p>
748
* <p>The resulting reader supports {@link
749
* IndexReader#reopen}, but that call will simply forward
750
* back to this method (though this may change in the
753
* <p>The very first time this method is called, this
754
* writer instance will make every effort to pool the
755
* readers that it opens for doing merges, applying
756
* deletes, etc. This means additional resources (RAM,
757
* file descriptors, CPU time) will be consumed.</p>
759
* <p>For lower latency on reopening a reader, you should
760
* call {@link IndexWriterConfig#setMergedSegmentWarmer} to
761
* pre-warm a newly merged segment before it's committed
762
* to the index. This is important for minimizing
763
* index-to-search delay after a large merge. </p>
765
* <p>If an addIndexes* call is running in another thread,
766
* then this reader will only search those segments from
767
* the foreign index that have been successfully copied
770
* <p><b>NOTE</b>: Once the writer is closed, any
771
* outstanding readers may continue to be used. However,
772
* if you attempt to reopen any of those readers, you'll
773
* hit an {@link AlreadyClosedException}.</p>
775
* @return IndexReader that covers entire index plus all
776
* changes made so far by this IndexWriter instance
778
* @param writer The IndexWriter to open from
779
* @param applyAllDeletes If true, all buffered deletes will
780
* be applied (made visible) in the returned reader. If
781
* false, the deletes are not applied but remain buffered
782
* (in IndexWriter) so that they will be applied in the
783
* future. Applying deletes can be costly, so if your app
784
* can tolerate deleted documents being returned you might
785
* gain some performance by passing false.
787
* @throws IOException
789
* @lucene.experimental
790
* @deprecated Use IndexReader#openIfChanged(IndexReader,IndexReader,boolean) instead
793
public IndexReader reopen(IndexWriter writer, boolean applyAllDeletes) throws CorruptIndexException, IOException {
794
final IndexReader newReader = IndexReader.openIfChanged(this, writer, applyAllDeletes);
795
if (newReader == null) {
803
* If the index has changed since it was opened, open and return a new reader;
804
* else, return {@code null}.
806
* @see #openIfChanged(IndexReader)
808
protected IndexReader doOpenIfChanged() throws CorruptIndexException, IOException {
809
throw new UnsupportedOperationException("This reader does not support reopen().");
813
* If the index has changed since it was opened, open and return a new reader;
814
* else, return {@code null}.
816
* @see #openIfChanged(IndexReader, boolean)
818
protected IndexReader doOpenIfChanged(boolean openReadOnly) throws CorruptIndexException, IOException {
819
throw new UnsupportedOperationException("This reader does not support reopen().");
823
* If the index has changed since it was opened, open and return a new reader;
824
* else, return {@code null}.
826
* @see #openIfChanged(IndexReader, IndexCommit)
828
protected IndexReader doOpenIfChanged(final IndexCommit commit) throws CorruptIndexException, IOException {
829
throw new UnsupportedOperationException("This reader does not support reopen(IndexCommit).");
833
* If the index has changed since it was opened, open and return a new reader;
834
* else, return {@code null}.
836
* @see #openIfChanged(IndexReader, IndexWriter, boolean)
838
protected IndexReader doOpenIfChanged(IndexWriter writer, boolean applyAllDeletes) throws CorruptIndexException, IOException {
839
return writer.getReader(applyAllDeletes);
843
* Efficiently clones the IndexReader (sharing most
846
* On cloning a reader with pending changes (deletions,
847
* norms), the original reader transfers its write lock to
848
* the cloned reader. This means only the cloned reader
849
* may make further changes to the index, and commit the
850
* changes to the index on close, but the old reader still
851
* reflects all changes made up until it was cloned.
853
* Like {@link #openIfChanged(IndexReader)}, it's safe to make changes to
854
* either the original or the cloned reader: all shared
855
* mutable state obeys "copy on write" semantics to ensure
856
* the changes are not seen by other readers.
860
public synchronized Object clone() {
861
throw new UnsupportedOperationException("This reader does not implement clone()");
865
* Clones the IndexReader and optionally changes readOnly. A readOnly
866
* reader cannot open a writeable reader.
867
* @throws CorruptIndexException if the index is corrupt
868
* @throws IOException if there is a low-level IO error
870
public synchronized IndexReader clone(boolean openReadOnly) throws CorruptIndexException, IOException {
871
throw new UnsupportedOperationException("This reader does not implement clone()");
875
* Returns the directory associated with this index. The Default
876
* implementation returns the directory specified by subclasses when
877
* delegating to the IndexReader(Directory) constructor, or throws an
878
* UnsupportedOperationException if one was not specified.
879
* @throws UnsupportedOperationException if no directory
881
public Directory directory() {
883
throw new UnsupportedOperationException("This reader does not support this method.");
887
* Returns the time the index in the named directory was last modified.
888
* Do not use this to check whether the reader is still up-to-date, use
889
* {@link #isCurrent()} instead.
890
* @throws CorruptIndexException if the index is corrupt
891
* @throws IOException if there is a low-level IO error
893
public static long lastModified(final Directory directory2) throws CorruptIndexException, IOException {
894
return ((Long) new SegmentInfos.FindSegmentsFile(directory2) {
896
public Object doBody(String segmentFileName) throws IOException {
897
return Long.valueOf(directory2.fileModified(segmentFileName));
899
}.run()).longValue();
903
* Reads version number from segments files. The version number is
904
* initialized with a timestamp and then increased by one for each change of
907
* @param directory where the index resides.
908
* @return version number.
909
* @throws CorruptIndexException if the index is corrupt
910
* @throws IOException if there is a low-level IO error
912
public static long getCurrentVersion(Directory directory) throws CorruptIndexException, IOException {
913
return SegmentInfos.readCurrentVersion(directory);
917
* Reads commitUserData, previously passed to {@link
918
* IndexWriter#commit(Map)}, from current index
919
* segments file. This will return null if {@link
920
* IndexWriter#commit(Map)} has never been called for
923
* @param directory where the index resides.
924
* @return commit userData.
925
* @throws CorruptIndexException if the index is corrupt
926
* @throws IOException if there is a low-level IO error
928
* @see #getCommitUserData()
930
public static Map<String,String> getCommitUserData(Directory directory) throws CorruptIndexException, IOException {
931
return SegmentInfos.readCurrentUserData(directory);
935
* Version number when this IndexReader was opened. Not
936
* implemented in the IndexReader base class.
938
* <p>If this reader is based on a Directory (ie, was
939
* created by calling {@link #open}, or {@link #openIfChanged} on
940
* a reader based on a Directory), then this method
941
* returns the version recorded in the commit that the
942
* reader opened. This version is advanced every time
943
* {@link IndexWriter#commit} is called.</p>
945
* <p>If instead this reader is a near real-time reader
946
* (ie, obtained by a call to {@link
947
* IndexWriter#getReader}, or by calling {@link #openIfChanged}
948
* on a near real-time reader), then this method returns
949
* the version of the last commit done by the writer.
950
* Note that even as further changes are made with the
951
* writer, the version will not changed until a commit is
952
* completed. Thus, you should not rely on this method to
953
* determine when a near real-time reader should be
954
* opened. Use {@link #isCurrent} instead.</p>
956
* @throws UnsupportedOperationException unless overridden in subclass
958
public long getVersion() {
959
throw new UnsupportedOperationException("This reader does not support this method.");
963
* Retrieve the String userData optionally passed to
964
* IndexWriter#commit. This will return null if {@link
965
* IndexWriter#commit(Map)} has never been called for
968
* @see #getCommitUserData(Directory)
970
public Map<String,String> getCommitUserData() {
971
throw new UnsupportedOperationException("This reader does not support this method.");
976
* Check whether any new changes have occurred to the
977
* index since this reader was opened.
979
* <p>If this reader is based on a Directory (ie, was
980
* created by calling {@link #open}, or {@link #openIfChanged} on
981
* a reader based on a Directory), then this method checks
982
* if any further commits (see {@link IndexWriter#commit}
983
* have occurred in that directory).</p>
985
* <p>If instead this reader is a near real-time reader
986
* (ie, obtained by a call to {@link
987
* IndexWriter#getReader}, or by calling {@link #openIfChanged}
988
* on a near real-time reader), then this method checks if
989
* either a new commmit has occurred, or any new
990
* uncommitted changes have taken place via the writer.
991
* Note that even if the writer has only performed
992
* merging, this method will still return false.</p>
994
* <p>In any event, if this returns false, you should call
995
* {@link #openIfChanged} to get a new reader that sees the
998
* @throws CorruptIndexException if the index is corrupt
999
* @throws IOException if there is a low-level IO error
1000
* @throws UnsupportedOperationException unless overridden in subclass
1002
public boolean isCurrent() throws CorruptIndexException, IOException {
1003
throw new UnsupportedOperationException("This reader does not support this method.");
1006
/** @deprecated Check segment count using {@link
1007
* #getSequentialSubReaders} instead. */
1009
public boolean isOptimized() {
1010
throw new UnsupportedOperationException("This reader does not support this method.");
1014
* Return an array of term frequency vectors for the specified document.
1015
* The array contains a vector for each vectorized field in the document.
1016
* Each vector contains terms and frequencies for all terms in a given vectorized field.
1017
* If no such fields existed, the method returns null. The term vectors that are
1018
* returned may either be of type {@link TermFreqVector}
1019
* or of type {@link TermPositionVector} if
1020
* positions or offsets have been stored.
1022
* @param docNumber document for which term frequency vectors are returned
1023
* @return array of term frequency vectors. May be null if no term vectors have been
1024
* stored for the specified document.
1025
* @throws IOException if index cannot be accessed
1026
* @see org.apache.lucene.document.Field.TermVector
1028
abstract public TermFreqVector[] getTermFreqVectors(int docNumber)
1033
* Return a term frequency vector for the specified document and field. The
1034
* returned vector contains terms and frequencies for the terms in
1035
* the specified field of this document, if the field had the storeTermVector
1036
* flag set. If termvectors had been stored with positions or offsets, a
1037
* {@link TermPositionVector} is returned.
1039
* @param docNumber document for which the term frequency vector is returned
1040
* @param field field for which the term frequency vector is returned.
1041
* @return term frequency vector May be null if field does not exist in the specified
1042
* document or term vector was not stored.
1043
* @throws IOException if index cannot be accessed
1044
* @see org.apache.lucene.document.Field.TermVector
1046
abstract public TermFreqVector getTermFreqVector(int docNumber, String field)
1050
* Load the Term Vector into a user-defined data structure instead of relying on the parallel arrays of
1051
* the {@link TermFreqVector}.
1052
* @param docNumber The number of the document to load the vector for
1053
* @param field The name of the field to load
1054
* @param mapper The {@link TermVectorMapper} to process the vector. Must not be null
1055
* @throws IOException if term vectors cannot be accessed or if they do not exist on the field and doc. specified.
1058
abstract public void getTermFreqVector(int docNumber, String field, TermVectorMapper mapper) throws IOException;
1061
* Map all the term vectors for all fields in a Document
1062
* @param docNumber The number of the document to load the vector for
1063
* @param mapper The {@link TermVectorMapper} to process the vector. Must not be null
1064
* @throws IOException if term vectors cannot be accessed or if they do not exist on the field and doc. specified.
1066
abstract public void getTermFreqVector(int docNumber, TermVectorMapper mapper) throws IOException;
1069
* Returns <code>true</code> if an index exists at the specified directory.
1070
* @param directory the directory to check for an index
1071
* @return <code>true</code> if an index exists; <code>false</code> otherwise
1072
* @throws IOException if there is a problem with accessing the index
1074
public static boolean indexExists(Directory directory) throws IOException {
1076
new SegmentInfos().read(directory);
1078
} catch (IOException ioe) {
1083
/** Returns the number of documents in this index. */
1084
public abstract int numDocs();
1086
/** Returns one greater than the largest possible document number.
1087
* This may be used to, e.g., determine how big to allocate an array which
1088
* will have an element for every document number in an index.
1090
public abstract int maxDoc();
1092
/** Returns the number of deleted documents. */
1093
public int numDeletedDocs() {
1094
return maxDoc() - numDocs();
1098
* Returns the stored fields of the <code>n</code><sup>th</sup>
1099
* <code>Document</code> in this index.
1101
* <b>NOTE:</b> for performance reasons, this method does not check if the
1102
* requested document is deleted, and therefore asking for a deleted document
1103
* may yield unspecified results. Usually this is not required, however you
1104
* can call {@link #isDeleted(int)} with the requested document ID to verify
1105
* the document is not deleted.
1107
* @throws CorruptIndexException if the index is corrupt
1108
* @throws IOException if there is a low-level IO error
1110
public Document document(int n) throws CorruptIndexException, IOException {
1112
if (n < 0 || n >= maxDoc()) {
1113
throw new IllegalArgumentException("docID must be >= 0 and < maxDoc=" + maxDoc() + " (got docID=" + n + ")");
1115
return document(n, null);
1119
* Get the {@link org.apache.lucene.document.Document} at the <code>n</code>
1120
* <sup>th</sup> position. The {@link FieldSelector} may be used to determine
1121
* what {@link org.apache.lucene.document.Field}s to load and how they should
1122
* be loaded. <b>NOTE:</b> If this Reader (more specifically, the underlying
1123
* <code>FieldsReader</code>) is closed before the lazy
1124
* {@link org.apache.lucene.document.Field} is loaded an exception may be
1125
* thrown. If you want the value of a lazy
1126
* {@link org.apache.lucene.document.Field} to be available after closing you
1127
* must explicitly load it or fetch the Document again with a new loader.
1129
* <b>NOTE:</b> for performance reasons, this method does not check if the
1130
* requested document is deleted, and therefore asking for a deleted document
1131
* may yield unspecified results. Usually this is not required, however you
1132
* can call {@link #isDeleted(int)} with the requested document ID to verify
1133
* the document is not deleted.
1135
* @param n Get the document at the <code>n</code><sup>th</sup> position
1136
* @param fieldSelector The {@link FieldSelector} to use to determine what
1137
* Fields should be loaded on the Document. May be null, in which case
1138
* all Fields will be loaded.
1139
* @return The stored fields of the
1140
* {@link org.apache.lucene.document.Document} at the nth position
1141
* @throws CorruptIndexException if the index is corrupt
1142
* @throws IOException if there is a low-level IO error
1143
* @see org.apache.lucene.document.Fieldable
1144
* @see org.apache.lucene.document.FieldSelector
1145
* @see org.apache.lucene.document.SetBasedFieldSelector
1146
* @see org.apache.lucene.document.LoadFirstFieldSelector
1148
// TODO (1.5): When we convert to JDK 1.5 make this Set<String>
1149
public abstract Document document(int n, FieldSelector fieldSelector) throws CorruptIndexException, IOException;
1151
/** Returns true if document <i>n</i> has been deleted */
1152
public abstract boolean isDeleted(int n);
1154
/** Returns true if any documents have been deleted */
1155
public abstract boolean hasDeletions();
1157
/** Returns true if there are norms stored for this field. */
1158
public boolean hasNorms(String field) throws IOException {
1159
// backward compatible implementation.
1160
// SegmentReader has an efficient implementation.
1162
return norms(field) != null;
1165
/** Returns the byte-encoded normalization factor for the named field of
1166
* every document. This is used by the search code to score documents.
1167
* Returns null if norms were not indexed for this field.
1169
* @see org.apache.lucene.document.Field#setBoost(float)
1171
public abstract byte[] norms(String field) throws IOException;
1173
/** Reads the byte-encoded normalization factor for the named field of every
1174
* document. This is used by the search code to score documents.
1176
* @see org.apache.lucene.document.Field#setBoost(float)
1178
public abstract void norms(String field, byte[] bytes, int offset)
1181
/** Expert: Resets the normalization factor for the named field of the named
1182
* document. The norm represents the product of the field's {@link
1183
* org.apache.lucene.document.Fieldable#setBoost(float) boost} and its {@link Similarity#lengthNorm(String,
1184
* int) length normalization}. Thus, to preserve the length normalization
1185
* values when resetting this, one should base the new value upon the old.
1187
* <b>NOTE:</b> If this field does not index norms, then
1188
* this method throws {@link IllegalStateException}.
1190
* @see #norms(String)
1191
* @see Similarity#decodeNormValue(byte)
1192
* @throws StaleReaderException if the index has changed
1193
* since this reader was opened
1194
* @throws CorruptIndexException if the index is corrupt
1195
* @throws LockObtainFailedException if another writer
1196
* has this index open (<code>write.lock</code> could not
1198
* @throws IOException if there is a low-level IO error
1199
* @throws IllegalStateException if the field does not index norms
1201
public synchronized void setNorm(int doc, String field, byte value)
1202
throws StaleReaderException, CorruptIndexException, LockObtainFailedException, IOException {
1206
doSetNorm(doc, field, value);
1209
/** Implements setNorm in subclass.*/
1210
protected abstract void doSetNorm(int doc, String field, byte value)
1211
throws CorruptIndexException, IOException;
1213
/** Expert: Resets the normalization factor for the named field of the named
1216
* @see #norms(String)
1217
* @see Similarity#decodeNormValue(byte)
1219
* @throws StaleReaderException if the index has changed
1220
* since this reader was opened
1221
* @throws CorruptIndexException if the index is corrupt
1222
* @throws LockObtainFailedException if another writer
1223
* has this index open (<code>write.lock</code> could not
1225
* @throws IOException if there is a low-level IO error
1226
* @deprecated Use {@link #setNorm(int, String, byte)} instead, encoding the
1227
* float to byte with your Similarity's {@link Similarity#encodeNormValue(float)}.
1228
* This method will be removed in Lucene 4.0
1231
public void setNorm(int doc, String field, float value)
1232
throws StaleReaderException, CorruptIndexException, LockObtainFailedException, IOException {
1234
setNorm(doc, field, Similarity.getDefault().encodeNormValue(value));
1237
/** Returns an enumeration of all the terms in the index. The
1238
* enumeration is ordered by Term.compareTo(). Each term is greater
1239
* than all that precede it in the enumeration. Note that after
1240
* calling terms(), {@link TermEnum#next()} must be called
1241
* on the resulting enumeration before calling other methods such as
1242
* {@link TermEnum#term()}.
1243
* @throws IOException if there is a low-level IO error
1245
public abstract TermEnum terms() throws IOException;
1247
/** Returns an enumeration of all terms starting at a given term. If
1248
* the given term does not exist, the enumeration is positioned at the
1249
* first term greater than the supplied term. The enumeration is
1250
* ordered by Term.compareTo(). Each term is greater than all that
1251
* precede it in the enumeration.
1252
* @throws IOException if there is a low-level IO error
1254
public abstract TermEnum terms(Term t) throws IOException;
1256
/** Returns the number of documents containing the term <code>t</code>.
1257
* @throws IOException if there is a low-level IO error
1259
public abstract int docFreq(Term t) throws IOException;
1261
/** Returns an enumeration of all the documents which contain
1262
* <code>term</code>. For each document, the document number, the frequency of
1263
* the term in that document is also provided, for use in
1264
* search scoring. If term is null, then all non-deleted
1265
* docs are returned with freq=1.
1266
* Thus, this method implements the mapping:
1268
* Term => <docNum, freq><sup>*</sup>
1270
* <p>The enumeration is ordered by document number. Each document number
1271
* is greater than all that precede it in the enumeration.
1272
* @throws IOException if there is a low-level IO error
1274
public TermDocs termDocs(Term term) throws IOException {
1276
TermDocs termDocs = termDocs();
1277
termDocs.seek(term);
1281
/** Returns an unpositioned {@link TermDocs} enumerator.
1283
* Note: the TermDocs returned is unpositioned. Before using it, ensure
1284
* that you first position it with {@link TermDocs#seek(Term)} or
1285
* {@link TermDocs#seek(TermEnum)}.
1287
* @throws IOException if there is a low-level IO error
1289
public abstract TermDocs termDocs() throws IOException;
1291
/** Returns an enumeration of all the documents which contain
1292
* <code>term</code>. For each document, in addition to the document number
1293
* and frequency of the term in that document, a list of all of the ordinal
1294
* positions of the term in the document is available. Thus, this method
1295
* implements the mapping:
1298
* Term => <docNum, freq,
1299
* <pos<sub>1</sub>, pos<sub>2</sub>, ...
1300
* pos<sub>freq-1</sub>>
1303
* <p> This positional information facilitates phrase and proximity searching.
1304
* <p>The enumeration is ordered by document number. Each document number is
1305
* greater than all that precede it in the enumeration.
1306
* @throws IOException if there is a low-level IO error
1308
public TermPositions termPositions(Term term) throws IOException {
1310
TermPositions termPositions = termPositions();
1311
termPositions.seek(term);
1312
return termPositions;
1315
/** Returns an unpositioned {@link TermPositions} enumerator.
1316
* @throws IOException if there is a low-level IO error
1318
public abstract TermPositions termPositions() throws IOException;
1322
/** Deletes the document numbered <code>docNum</code>. Once a document is
1323
* deleted it will not appear in TermDocs or TermPostitions enumerations.
1324
* Attempts to read its field with the {@link #document}
1325
* method will result in an error. The presence of this document may still be
1326
* reflected in the {@link #docFreq} statistic, though
1327
* this will be corrected eventually as the index is further modified.
1329
* @throws StaleReaderException if the index has changed
1330
* since this reader was opened
1331
* @throws CorruptIndexException if the index is corrupt
1332
* @throws LockObtainFailedException if another writer
1333
* has this index open (<code>write.lock</code> could not
1335
* @throws IOException if there is a low-level IO error
1337
public synchronized void deleteDocument(int docNum) throws StaleReaderException, CorruptIndexException, LockObtainFailedException, IOException {
1345
/** Implements deletion of the document numbered <code>docNum</code>.
1346
* Applications should call {@link #deleteDocument(int)} or {@link #deleteDocuments(Term)}.
1348
protected abstract void doDelete(int docNum) throws CorruptIndexException, IOException;
1351
/** Deletes all documents that have a given <code>term</code> indexed.
1352
* This is useful if one uses a document field to hold a unique ID string for
1353
* the document. Then to delete such a document, one merely constructs a
1354
* term with the appropriate field and the unique ID string as its text and
1355
* passes it to this method.
1356
* See {@link #deleteDocument(int)} for information about when this deletion will
1359
* @return the number of documents deleted
1360
* @throws StaleReaderException if the index has changed
1361
* since this reader was opened
1362
* @throws CorruptIndexException if the index is corrupt
1363
* @throws LockObtainFailedException if another writer
1364
* has this index open (<code>write.lock</code> could not
1366
* @throws IOException if there is a low-level IO error
1368
public int deleteDocuments(Term term) throws StaleReaderException, CorruptIndexException, LockObtainFailedException, IOException {
1370
TermDocs docs = termDocs(term);
1371
if (docs == null) return 0;
1374
while (docs.next()) {
1375
deleteDocument(docs.doc());
1384
/** Undeletes all documents currently marked as deleted in
1387
* <p>NOTE: this method can only recover documents marked
1388
* for deletion but not yet removed from the index; when
1389
* and how Lucene removes deleted documents is an
1390
* implementation detail, subject to change from release
1391
* to release. However, you can use {@link
1392
* #numDeletedDocs} on the current IndexReader instance to
1393
* see how many documents will be un-deleted.
1395
* @throws StaleReaderException if the index has changed
1396
* since this reader was opened
1397
* @throws LockObtainFailedException if another writer
1398
* has this index open (<code>write.lock</code> could not
1400
* @throws CorruptIndexException if the index is corrupt
1401
* @throws IOException if there is a low-level IO error
1403
public synchronized void undeleteAll() throws StaleReaderException, CorruptIndexException, LockObtainFailedException, IOException {
1410
/** Implements actual undeleteAll() in subclass. */
1411
protected abstract void doUndeleteAll() throws CorruptIndexException, IOException;
1413
/** Does nothing by default. Subclasses that require a write lock for
1414
* index modifications must implement this method. */
1415
protected synchronized void acquireWriteLock() throws IOException {
1421
* @throws IOException
1423
public final synchronized void flush() throws IOException {
1429
* @param commitUserData Opaque Map (String -> String)
1430
* that's recorded into the segments file in the index,
1431
* and retrievable by {@link
1432
* IndexReader#getCommitUserData}.
1433
* @throws IOException
1435
public final synchronized void flush(Map<String, String> commitUserData) throws IOException {
1437
commit(commitUserData);
1441
* Commit changes resulting from delete, undeleteAll, or
1442
* setNorm operations
1444
* If an exception is hit, then either no changes or all
1445
* changes will have been committed to the index
1446
* (transactional semantics).
1447
* @throws IOException if there is a low-level IO error
1449
protected final synchronized void commit() throws IOException {
1454
* Commit changes resulting from delete, undeleteAll, or
1455
* setNorm operations
1457
* If an exception is hit, then either no changes or all
1458
* changes will have been committed to the index
1459
* (transactional semantics).
1460
* @throws IOException if there is a low-level IO error
1462
public final synchronized void commit(Map<String, String> commitUserData) throws IOException {
1463
// Don't call ensureOpen since we commit() on close
1464
doCommit(commitUserData);
1468
/** Implements commit. */
1469
protected abstract void doCommit(Map<String, String> commitUserData) throws IOException;
1472
* Closes files associated with this index.
1473
* Also saves any new deletions to disk.
1474
* No other methods should be called after this has been called.
1475
* @throws IOException if there is a low-level IO error
1477
public final synchronized void close() throws IOException {
1484
/** Implements close. */
1485
protected abstract void doClose() throws IOException;
1489
* Get a list of unique field names that exist in this index and have the specified
1490
* field option information.
1491
* @param fldOption specifies which field option should be available for the returned fields
1492
* @return Collection of Strings indicating the names of the fields.
1493
* @see IndexReader.FieldOption
1495
public abstract Collection<String> getFieldNames(FieldOption fldOption);
1498
* Expert: return the IndexCommit that this reader has
1499
* opened. This method is only implemented by those
1500
* readers that correspond to a Directory with its own
1503
* @lucene.experimental
1505
public IndexCommit getIndexCommit() throws IOException {
1506
throw new UnsupportedOperationException("This reader does not support this method.");
1510
* Prints the filename and size of each file within a given compound file.
1511
* Add the -extract flag to extract files to the current working directory.
1512
* In order to make the extracted version of the index work, you have to copy
1513
* the segments file from the compound index into the directory where the extracted files are stored.
1514
* @param args Usage: org.apache.lucene.index.IndexReader [-extract] <cfsfile>
1516
public static void main(String [] args) {
1517
String filename = null;
1518
boolean extract = false;
1520
for (int i = 0; i < args.length; ++i) {
1521
if (args[i].equals("-extract")) {
1523
} else if (filename == null) {
1528
if (filename == null) {
1529
System.out.println("Usage: org.apache.lucene.index.IndexReader [-extract] <cfsfile>");
1533
Directory dir = null;
1534
CompoundFileReader cfr = null;
1537
File file = new File(filename);
1538
String dirname = file.getAbsoluteFile().getParent();
1539
filename = file.getName();
1540
dir = FSDirectory.open(new File(dirname));
1541
cfr = new CompoundFileReader(dir, filename);
1543
String [] files = cfr.listAll();
1544
ArrayUtil.mergeSort(files); // sort the array of filename so that the output is more readable
1546
for (int i = 0; i < files.length; ++i) {
1547
long len = cfr.fileLength(files[i]);
1550
System.out.println("extract " + files[i] + " with " + len + " bytes to local directory...");
1551
IndexInput ii = cfr.openInput(files[i]);
1553
FileOutputStream f = new FileOutputStream(files[i]);
1555
// read and write with a small buffer, which is more effective than reading byte by byte
1556
byte[] buffer = new byte[1024];
1557
int chunk = buffer.length;
1559
final int bufLen = (int) Math.min(chunk, len);
1560
ii.readBytes(buffer, 0, bufLen);
1561
f.write(buffer, 0, bufLen);
1569
System.out.println(files[i] + ": " + len + " bytes");
1571
} catch (IOException ioe) {
1572
ioe.printStackTrace();
1581
catch (IOException ioe) {
1582
ioe.printStackTrace();
1587
/** Returns all commit points that exist in the Directory.
1588
* Normally, because the default is {@link
1589
* KeepOnlyLastCommitDeletionPolicy}, there would be only
1590
* one commit point. But if you're using a custom {@link
1591
* IndexDeletionPolicy} then there could be many commits.
1592
* Once you have a given commit, you can open a reader on
1593
* it by calling {@link IndexReader#open(IndexCommit,boolean)}
1594
* There must be at least one commit in
1595
* the Directory, else this method throws {@link
1596
* IndexNotFoundException}. Note that if a commit is in
1597
* progress while this method is running, that commit
1598
* may or may not be returned.
1600
* @return a sorted list of {@link IndexCommit}s, from oldest
1602
public static Collection<IndexCommit> listCommits(Directory dir) throws IOException {
1603
return DirectoryReader.listCommits(dir);
1606
/** Expert: returns the sequential sub readers that this
1607
* reader is logically composed of. For example,
1608
* IndexSearcher uses this API to drive searching by one
1609
* sub reader at a time. If this reader is not composed
1610
* of sequential child readers, it should return null.
1611
* If this method returns an empty array, that means this
1612
* reader is a null reader (for example a MultiReader
1613
* that has no sub readers).
1615
* NOTE: You should not try using sub-readers returned by
1616
* this method to make any changes (setNorm, deleteDocument,
1617
* etc.). While this might succeed for one composite reader
1618
* (like MultiReader), it will most likely lead to index
1619
* corruption for other readers (like DirectoryReader obtained
1620
* through {@link #open}. Use the parent reader directly. */
1621
public IndexReader[] getSequentialSubReaders() {
1627
public Object getCoreCacheKey() {
1628
// Don't can ensureOpen since FC calls this (to evict)
1633
/** Expert. Warning: this returns null if the reader has
1635
public Object getDeletesCacheKey() {
1639
/** Returns the number of unique terms (across all fields)
1642
* This method returns long, even though internally
1643
* Lucene cannot handle more than 2^31 unique terms, for
1644
* a possible future when this limitation is removed.
1646
* @throws UnsupportedOperationException if this count
1647
* cannot be easily determined (eg Multi*Readers).
1648
* Instead, you should call {@link
1649
* #getSequentialSubReaders} and ask each sub reader for
1650
* its unique term count. */
1651
public long getUniqueTermCount() throws IOException {
1652
throw new UnsupportedOperationException("this reader does not implement getUniqueTermCount()");
1655
// Back compat for reopen()
1657
private static final VirtualMethod<IndexReader> reopenMethod1 =
1658
new VirtualMethod<IndexReader>(IndexReader.class, "reopen");
1660
private static final VirtualMethod<IndexReader> doOpenIfChangedMethod1 =
1661
new VirtualMethod<IndexReader>(IndexReader.class, "doOpenIfChanged");
1663
private final boolean hasNewReopenAPI1 =
1664
VirtualMethod.compareImplementationDistance(getClass(),
1665
doOpenIfChangedMethod1, reopenMethod1) >= 0; // its ok for both to be overridden
1667
// Back compat for reopen(boolean openReadOnly)
1669
private static final VirtualMethod<IndexReader> reopenMethod2 =
1670
new VirtualMethod<IndexReader>(IndexReader.class, "reopen", boolean.class);
1672
private static final VirtualMethod<IndexReader> doOpenIfChangedMethod2 =
1673
new VirtualMethod<IndexReader>(IndexReader.class, "doOpenIfChanged", boolean.class);
1675
private final boolean hasNewReopenAPI2 =
1676
VirtualMethod.compareImplementationDistance(getClass(),
1677
doOpenIfChangedMethod2, reopenMethod2) >= 0; // its ok for both to be overridden
1679
// Back compat for reopen(IndexCommit commit)
1681
private static final VirtualMethod<IndexReader> reopenMethod3 =
1682
new VirtualMethod<IndexReader>(IndexReader.class, "reopen", IndexCommit.class);
1684
private static final VirtualMethod<IndexReader> doOpenIfChangedMethod3 =
1685
new VirtualMethod<IndexReader>(IndexReader.class, "doOpenIfChanged", IndexCommit.class);
1687
private final boolean hasNewReopenAPI3 =
1688
VirtualMethod.compareImplementationDistance(getClass(),
1689
doOpenIfChangedMethod3, reopenMethod3) >= 0; // its ok for both to be overridden
1691
// Back compat for reopen(IndexWriter writer, boolean applyDeletes)
1693
private static final VirtualMethod<IndexReader> reopenMethod4 =
1694
new VirtualMethod<IndexReader>(IndexReader.class, "reopen", IndexWriter.class, boolean.class);
1696
private static final VirtualMethod<IndexReader> doOpenIfChangedMethod4 =
1697
new VirtualMethod<IndexReader>(IndexReader.class, "doOpenIfChanged", IndexWriter.class, boolean.class);
1699
private final boolean hasNewReopenAPI4 =
1700
VirtualMethod.compareImplementationDistance(getClass(),
1701
doOpenIfChangedMethod4, reopenMethod4) >= 0; // its ok for both to be overridden
1703
/** For IndexReader implementations that use
1704
* TermInfosReader to read terms, this returns the
1705
* current indexDivisor as specified when the reader was
1708
public int getTermInfosIndexDivisor() {
1709
throw new UnsupportedOperationException("This reader does not support this method.");