~slub.team/goobi-indexserver/3.x

private final Collection<IndexReader.ReaderFinishedListener> readerFinishedListeners = new MapBackedSet<IndexReader.ReaderFinishedListener>(new ConcurrentHashMap<IndexReader.ReaderFinishedListener,Boolean>());

469

470

Collection<IndexReader.ReaderFinishedListener> getReaderFinishedListeners() throws IOException {

471

return readerFinishedListeners;

472

}

473

474

/** Holds shared SegmentReader instances. IndexWriter uses

475

* SegmentReaders for 1) applying deletes, 2) doing

476

* merges, 3) handing out a real-time reader. This pool

477

* reuses instances of the SegmentReaders in all these

478

* places if it is in "near real-time mode" (getReader()

479

* has been called on this instance). */

480

481

class ReaderPool {

482

483

private final Map<SegmentInfo,SegmentReader> readerMap = new HashMap<SegmentInfo,SegmentReader>();

484

485

/** Forcefully clear changes for the specified segments. This is called on successful merge. */

486

synchronized void clear(List<SegmentInfo> infos) throws IOException {

487

if (infos == null) {

488

for (Map.Entry<SegmentInfo,SegmentReader> ent: readerMap.entrySet()) {

489

ent.getValue().hasChanges = false;

490

}

491

} else {

492

for (final SegmentInfo info: infos) {

493

final SegmentReader r = readerMap.get(info);

494

if (r != null) {

495

r.hasChanges = false;

496

}

497

}

498

}

499

}

500

501

// used only by asserts

502

public synchronized boolean infoIsLive(SegmentInfo info) {

503

int idx = segmentInfos.indexOf(info);

504

assert idx != -1: "info=" + info + " isn't in pool";

505

assert segmentInfos.info(idx) == info: "info=" + info + " doesn't match live info in segmentInfos";

506

return true;

507

}

508

509

public synchronized SegmentInfo mapToLive(SegmentInfo info) {

510

int idx = segmentInfos.indexOf(info);

511

if (idx != -1) {

512

info = segmentInfos.info(idx);

513

}

514

return info;

515

}

516

517

/**

518

* Release the segment reader (i.e. decRef it and close if there

519

* are no more references.

520

* @return true if this release altered the index (eg

521

* the SegmentReader had pending changes to del docs and

522

* was closed). Caller must call checkpoint() if so.

523

* @param sr

524

* @throws IOException

525

526

public synchronized boolean release(SegmentReader sr) throws IOException {

527

return release(sr, false);

528

}

529

530

/**

531

* Release the segment reader (i.e. decRef it and close if there

532

* are no more references.

533

* @return true if this release altered the index (eg

534

* the SegmentReader had pending changes to del docs and

535

* was closed). Caller must call checkpoint() if so.

536

* @param sr

537

* @throws IOException

538

539

public synchronized boolean release(SegmentReader sr, boolean drop) throws IOException {

540

541

final boolean pooled = readerMap.containsKey(sr.getSegmentInfo());

542

543

assert !pooled || readerMap.get(sr.getSegmentInfo()) == sr;

544

545

// Drop caller's ref; for an external reader (not

546

// pooled), this decRef will close it

547

sr.decRef();

548

549

if (pooled && (drop || (!poolReaders && sr.getRefCount() == 1))) {

550

551

// We invoke deleter.checkpoint below, so we must be

552

// sync'd on IW if there are changes:

553

assert !sr.hasChanges || Thread.holdsLock(IndexWriter.this);

554

555

// Discard (don't save) changes when we are dropping

556

// the reader; this is used only on the sub-readers

557

// after a successful merge.

558

sr.hasChanges &= !drop;

559

560

final boolean hasChanges = sr.hasChanges;

561

562

// Drop our ref -- this will commit any pending

563

// changes to the dir

564

sr.close();

565

566

// We are the last ref to this reader; since we're

567

// not pooling readers, we release it:

568

readerMap.remove(sr.getSegmentInfo());

569

570

return hasChanges;

571

}

572

573

return false;

574

}

575

576

public synchronized void drop(List<SegmentInfo> infos) throws IOException {

577

for(SegmentInfo info : infos) {

578

drop(info);

579

}

580

}

581

582

public synchronized void drop(SegmentInfo info) throws IOException {

583

final SegmentReader sr = readerMap.get(info);

584

if (sr != null) {

585

sr.hasChanges = false;

586

readerMap.remove(info);

587

sr.close();

588

}

589

}

590

591

public synchronized void dropAll() throws IOException {

592

for(SegmentReader reader : readerMap.values()) {

593

reader.hasChanges = false;

594

595

// NOTE: it is allowed that this decRef does not

596

// actually close the SR; this can happen when a

597

// near real-time reader using this SR is still open

598

reader.decRef();

599

}

600

readerMap.clear();

601

}

602

603

/** Remove all our references to readers, and commits

604

* any pending changes. */

605

synchronized void close() throws IOException {

606

// We invoke deleter.checkpoint below, so we must be

607

// sync'd on IW:

608

assert Thread.holdsLock(IndexWriter.this);

609

610

for(Map.Entry<SegmentInfo,SegmentReader> ent : readerMap.entrySet()) {

611

612

SegmentReader sr = ent.getValue();

613

if (sr.hasChanges) {

614

assert infoIsLive(sr.getSegmentInfo());

615

sr.doCommit(null);

616

617

// Must checkpoint w/ deleter, because this

618

// segment reader will have created new _X_N.del

619

// file.

620

deleter.checkpoint(segmentInfos, false);

621

}

622

623

// NOTE: it is allowed that this decRef does not

624

// actually close the SR; this can happen when a

625

// near real-time reader is kept open after the

626

// IndexWriter instance is closed

627

sr.decRef();

628

}

629

630

readerMap.clear();

631

}

632

633

/**

634

* Commit all segment reader in the pool.

635

* @throws IOException

636

637

synchronized void commit(SegmentInfos infos) throws IOException {

638

639

// We invoke deleter.checkpoint below, so we must be

640

// sync'd on IW:

641

assert Thread.holdsLock(IndexWriter.this);

642

643

for (SegmentInfo info : infos) {

644

645

final SegmentReader sr = readerMap.get(info);

646

if (sr != null && sr.hasChanges) {

647

assert infoIsLive(info);

648

sr.doCommit(null);

649

// Must checkpoint w/ deleter, because this

650

// segment reader will have created new _X_N.del

651

// file.

652

deleter.checkpoint(segmentInfos, false);

653

}

654

}

655

}

656

657

/**

658

* Returns a ref to a clone. NOTE: this clone is not

659

* enrolled in the pool, so you should simply close()

660

* it when you're done (ie, do not call release()).

661

662

public synchronized SegmentReader getReadOnlyClone(SegmentInfo info, boolean doOpenStores, int termInfosIndexDivisor) throws IOException {

663

SegmentReader sr = get(info, doOpenStores, BufferedIndexInput.BUFFER_SIZE, termInfosIndexDivisor);

664

try {

665

return (SegmentReader) sr.clone(true);

666

} finally {

667

sr.decRef();

668

}

669

}

670

671

/**

672

* Obtain a SegmentReader from the readerPool. The reader

673

* must be returned by calling {@link #release(SegmentReader)}

674

* @see #release(SegmentReader)

675

* @param info

676

* @param doOpenStores

677

* @throws IOException

678

679

public synchronized SegmentReader get(SegmentInfo info, boolean doOpenStores) throws IOException {

680

return get(info, doOpenStores, BufferedIndexInput.BUFFER_SIZE, config.getReaderTermsIndexDivisor());

681

}

682

683

/**

684

* Obtain a SegmentReader from the readerPool. The reader

685

* must be returned by calling {@link #release(SegmentReader)}

686

687

* @see #release(SegmentReader)

688

* @param info

689

* @param doOpenStores

690

* @param readBufferSize

691

* @param termsIndexDivisor

692

* @throws IOException

693

694

public synchronized SegmentReader get(SegmentInfo info, boolean doOpenStores, int readBufferSize, int termsIndexDivisor) throws IOException {

695

696

if (poolReaders) {

697

readBufferSize = BufferedIndexInput.BUFFER_SIZE;

698

}

699

700

SegmentReader sr = readerMap.get(info);

701

if (sr == null) {

702

// TODO: we may want to avoid doing this while

703

// synchronized

704

// Returns a ref, which we xfer to readerMap:

705

sr = SegmentReader.get(false, info.dir, info, readBufferSize, doOpenStores, termsIndexDivisor);

706

sr.readerFinishedListeners = readerFinishedListeners;

707

708

if (info.dir == directory) {

709

// Only pool if reader is not external

710

readerMap.put(info, sr);

711

}

712

} else {

713

if (doOpenStores) {

714

sr.openDocStores();

715

}

716

if (termsIndexDivisor != -1 && !sr.termsIndexLoaded()) {

717

// If this reader was originally opened because we

718

// needed to merge it, we didn't load the terms

719

// index. But now, if the caller wants the terms

720

// index (eg because it's doing deletes, or an NRT

721

// reader is being opened) we ask the reader to

722

// load its terms index.

723

sr.loadTermsIndex(termsIndexDivisor);

724

}

725

}

726

727

// Return a ref to our caller

728

if (info.dir == directory) {

729

// Only incRef if we pooled (reader is not external)

730

sr.incRef();

731

}

732

return sr;

733

}

734

735

// Returns a ref

736

public synchronized SegmentReader getIfExists(SegmentInfo info) throws IOException {

737

SegmentReader sr = readerMap.get(info);

738

if (sr != null) {

739

sr.incRef();

740

}

741

return sr;

742

}

743

}

744

745

746

747

/**

748

* Obtain the number of deleted docs for a pooled reader.

749

* If the reader isn't being pooled, the segmentInfo's

750

* delCount is returned.

751

752

public int numDeletedDocs(SegmentInfo info) throws IOException {

753

ensureOpen(false);

754

SegmentReader reader = readerPool.getIfExists(info);

755

try {

756

if (reader != null) {

757

return reader.numDeletedDocs();

758

} else {

759

return info.getDelCount();

760

}

761

} finally {

762

if (reader != null) {

763

readerPool.release(reader);

764

}

765

}

766

}

767

768

/**

769

* Used internally to throw an {@link

770

* AlreadyClosedException} if this IndexWriter has been

771

* closed.

772

* @throws AlreadyClosedException if this IndexWriter is closed

773

774

protected final void ensureOpen(boolean includePendingClose) throws AlreadyClosedException {

775

if (closed || (includePendingClose && closing)) {

776

throw new AlreadyClosedException("this IndexWriter is closed");

777

}

778

}

779

780

protected final void ensureOpen() throws AlreadyClosedException {

781

ensureOpen(true);

782

}

783

784

/**

785

* Prints a message to the infoStream (if non-null),

786

* prefixed with the identifying information for this

787

* writer and the thread that's calling it.

788

789

public void message(String message) {

790

if (infoStream != null)

791

infoStream.println("IW " + messageID + " [" + new Date() + "; " + Thread.currentThread().getName() + "]: " + message);

792

}

793

794

/**

795

* Casts current mergePolicy to LogMergePolicy, and throws

796

* an exception if the mergePolicy is not a LogMergePolicy.

797

798

private LogMergePolicy getLogMergePolicy() {

799

if (mergePolicy instanceof LogMergePolicy)

800

return (LogMergePolicy) mergePolicy;

801

else

802

throw new IllegalArgumentException("this method can only be called when the merge policy is the default LogMergePolicy");

803

}

804

805

/** Get the current setting of whether newly flushed

806

* segments will use the compound file format. Note that

807

* this just returns the value previously set with

808

* setUseCompoundFile(boolean), or the default value

809

* (true). You cannot use this to query the status of

810

* previously flushed segments.

811

812

* Note that this method is a convenience method: it

813

* just calls mergePolicy.getUseCompoundFile as long as

814

* mergePolicy is an instance of {@link LogMergePolicy}.

815

* Otherwise an IllegalArgumentException is thrown.

816

817

* @see #setUseCompoundFile(boolean)

818

* @deprecated use {@link LogMergePolicy#getUseCompoundFile()}

819

820

@Deprecated

821

public boolean getUseCompoundFile() {

822

return getLogMergePolicy().getUseCompoundFile();

823

}

824

825

/**

826

*

827

* Setting to turn on usage of a compound file. When on, multiple files for

828

* each segment are merged into a single file when a new segment is flushed.

829

*

830

831

*

832

* Note that this method is a convenience method: it just calls

833

* mergePolicy.setUseCompoundFile as long as mergePolicy is an instance of

834

* {@link LogMergePolicy}. Otherwise an IllegalArgumentException is thrown.

835

*

836

837

* @deprecated use {@link LogMergePolicy#setUseCompoundFile(boolean)}.

838

839

@Deprecated

840

public void setUseCompoundFile(boolean value) {

841

getLogMergePolicy().setUseCompoundFile(value);

842

}

843

844

/** Expert: Set the Similarity implementation used by this IndexWriter.

845

846

* @see Similarity#setDefault(Similarity)

847

* @deprecated use {@link IndexWriterConfig#setSimilarity(Similarity)} instead

848

849

@Deprecated

850

public void setSimilarity(Similarity similarity) {

851

ensureOpen();

852

this.similarity = similarity;

853

docWriter.setSimilarity(similarity);

854

// Required so config.getSimilarity returns the right value. But this will

855

// go away together with the method in 4.0.

856

config.setSimilarity(similarity);

857

}

858

859

/** Expert: Return the Similarity implementation used by this IndexWriter.

860

861

* This defaults to the current value of {@link Similarity#getDefault()}.

862

* @deprecated use {@link IndexWriterConfig#getSimilarity()} instead

863

864

@Deprecated

865

public Similarity getSimilarity() {

866

ensureOpen();

867

return similarity;

868

}

869

870

/** Expert: Set the interval between indexed terms. Large values cause less

871

* memory to be used by IndexReader, but slow random-access to terms. Small

872

* values cause more memory to be used by an IndexReader, and speed

873

* random-access to terms.

874

875

* This parameter determines the amount of computation required per query

876

* term, regardless of the number of documents that contain that term. In

877

* particular, it is the maximum number of other terms that must be

878

* scanned before a term is located and its frequency and position information

879

* may be processed. In a large index with user-entered query terms, query

880

* processing time is likely to be dominated not by term lookup but rather

881

* by the processing of frequency and positional data. In a small index

882

* or when many uncommon query terms are generated (e.g., by wildcard

883

* queries) term lookup may become a dominant cost.

884

885

* In particular, <code>numUniqueTerms/interval</code> terms are read into

886

* memory by an IndexReader, and, on average, <code>interval/2</code> terms

887

* must be scanned for each random term access.

888

889

* @see #DEFAULT_TERM_INDEX_INTERVAL

890

* @deprecated use {@link IndexWriterConfig#setTermIndexInterval(int)}

891

892

@Deprecated

893

public void setTermIndexInterval(int interval) {

894

ensureOpen();

895

config.setTermIndexInterval(interval);

896

}

897

898

/** Expert: Return the interval between indexed terms.

899

900

* @see #setTermIndexInterval(int)

901

* @deprecated use {@link IndexWriterConfig#getTermIndexInterval()}

902

903

@Deprecated

904

public int getTermIndexInterval() {

905

// We pass false because this method is called by SegmentMerger while we are in the process of closing

906

ensureOpen(false);

907

return config.getTermIndexInterval();

908

}

909

910

/**

911

* Constructs an IndexWriter for the index in <code>d</code>.

912

* Text will be analyzed with <code>a</code>. If <code>create</code>

913

* is true, then a new, empty index will be created in

914

* <code>d</code>, replacing the index already there, if any.

915

916

* @param d the index directory

917

* @param a the analyzer to use

918

* @param create <code>true</code> to create the index or overwrite

919

* the existing one; <code>false</code> to append to the existing

920

* index

921

* @param mfl Maximum field length in number of terms/tokens: LIMITED, UNLIMITED, or user-specified

922

* via the MaxFieldLength constructor.

923

* @throws CorruptIndexException if the index is corrupt

924

* @throws LockObtainFailedException if another writer

925

* has this index open (<code>write.lock</code> could not

926

* be obtained)

927

* @throws IOException if the directory cannot be read/written to, or

928

* if it does not exist and <code>create</code> is

929

* <code>false</code> or if there is any other low-level

930

* IO error

931

* @deprecated use {@link #IndexWriter(Directory, IndexWriterConfig)} instead

932

933

@Deprecated

934

public IndexWriter(Directory d, Analyzer a, boolean create, MaxFieldLength mfl)

935

throws CorruptIndexException, LockObtainFailedException, IOException {

936

this(d, new IndexWriterConfig(Version.LUCENE_31, a).setOpenMode(

937

create ? OpenMode.CREATE : OpenMode.APPEND));

938

setMaxFieldLength(mfl.getLimit());

939

}

940

941

/**

942

* Constructs an IndexWriter for the index in

943

* <code>d</code>, first creating it if it does not

944

* already exist. Text will be analyzed with

945

* <code>a</code>.

946

947

* @param d the index directory

948

* @param a the analyzer to use

949

* @param mfl Maximum field length in number of terms/tokens: LIMITED, UNLIMITED, or user-specified

950

* via the MaxFieldLength constructor.

951

* @throws CorruptIndexException if the index is corrupt

952

* @throws LockObtainFailedException if another writer

953

* has this index open (<code>write.lock</code> could not

954

* be obtained)

955

* @throws IOException if the directory cannot be

956

* read/written to or if there is any other low-level

957

* IO error

958

* @deprecated use {@link #IndexWriter(Directory, IndexWriterConfig)} instead

959

960

@Deprecated

961

public IndexWriter(Directory d, Analyzer a, MaxFieldLength mfl)

962

throws CorruptIndexException, LockObtainFailedException, IOException {

963

this(d, new IndexWriterConfig(Version.LUCENE_31, a));

964

setMaxFieldLength(mfl.getLimit());

965

}

966

967

/**

968

* Expert: constructs an IndexWriter with a custom {@link

969

* IndexDeletionPolicy}, for the index in <code>d</code>,

970

* first creating it if it does not already exist. Text

971

* will be analyzed with <code>a</code>.

972

973

* @param d the index directory

974

* @param a the analyzer to use

975

* @param deletionPolicy see <a href="#deletionPolicy">above</a>

976

* @param mfl whether or not to limit field lengths

977

* @throws CorruptIndexException if the index is corrupt

978

* @throws LockObtainFailedException if another writer

979

* has this index open (<code>write.lock</code> could not

980

* be obtained)

981

* @throws IOException if the directory cannot be

982

* read/written to or if there is any other low-level

983

* IO error

984

* @deprecated use {@link #IndexWriter(Directory, IndexWriterConfig)} instead

985

986

@Deprecated

987

public IndexWriter(Directory d, Analyzer a, IndexDeletionPolicy deletionPolicy, MaxFieldLength mfl)

988

throws CorruptIndexException, LockObtainFailedException, IOException {

989

this(d, new IndexWriterConfig(Version.LUCENE_31, a).setIndexDeletionPolicy(deletionPolicy));

990

setMaxFieldLength(mfl.getLimit());

991

}

992

993

/**

994

* Expert: constructs an IndexWriter with a custom {@link

995

* IndexDeletionPolicy}, for the index in <code>d</code>.

996

* Text will be analyzed with <code>a</code>. If

997

* <code>create</code> is true, then a new, empty index

998

* will be created in <code>d</code>, replacing the index

999

* already there, if any.

1000

1001

* @param d the index directory

1002

* @param a the analyzer to use

1003

* @param create <code>true</code> to create the index or overwrite

1004

* the existing one; <code>false</code> to append to the existing

1005

* index

1006

* @param deletionPolicy see <a href="#deletionPolicy">above</a>

1007

* @param mfl {@link org.apache.lucene.index.IndexWriter.MaxFieldLength}, whether or not to limit field lengths. Value is in number of terms/tokens

1008

* @throws CorruptIndexException if the index is corrupt

1009

* @throws LockObtainFailedException if another writer

1010

* has this index open (<code>write.lock</code> could not

1011

* be obtained)

1012

* @throws IOException if the directory cannot be read/written to, or

1013

* if it does not exist and <code>create</code> is

1014

* <code>false</code> or if there is any other low-level

1015

* IO error

1016

* @deprecated use {@link #IndexWriter(Directory, IndexWriterConfig)} instead

1017

1018

@Deprecated

1019

public IndexWriter(Directory d, Analyzer a, boolean create, IndexDeletionPolicy deletionPolicy, MaxFieldLength mfl)

1020

throws CorruptIndexException, LockObtainFailedException, IOException {

1021

this(d, new IndexWriterConfig(Version.LUCENE_31, a).setOpenMode(

1022

create ? OpenMode.CREATE : OpenMode.APPEND).setIndexDeletionPolicy(deletionPolicy));

1023

setMaxFieldLength(mfl.getLimit());

1024

}

1025

1026

/**

1027

* Expert: constructs an IndexWriter on specific commit

1028

* point, with a custom {@link IndexDeletionPolicy}, for

1029

* the index in <code>d</code>. Text will be analyzed

1030

* with <code>a</code>.

1031

1032

* This is only meaningful if you've used a {@link

1033

* IndexDeletionPolicy} in that past that keeps more than

1034

* just the last commit.

1035

1036

* This operation is similar to {@link #rollback()},

1037

* except that method can only rollback what's been done

1038

* with the current instance of IndexWriter since its last

1039

* commit, whereas this method can rollback to an

1040

* arbitrary commit point from the past, assuming the

1041

* {@link IndexDeletionPolicy} has preserved past

1042

* commits.

1043

1044

* @param d the index directory

1045

* @param a the analyzer to use

1046

* @param deletionPolicy see <a href="#deletionPolicy">above</a>

1047

* @param mfl whether or not to limit field lengths, value is in number of terms/tokens. See {@link org.apache.lucene.index.IndexWriter.MaxFieldLength}.

1048

* @param commit which commit to open

1049

* @throws CorruptIndexException if the index is corrupt

1050

* @throws LockObtainFailedException if another writer

1051

* has this index open (<code>write.lock</code> could not

1052

* be obtained)

1053

* @throws IOException if the directory cannot be read/written to, or

1054

* if it does not exist and <code>create</code> is

1055

* <code>false</code> or if there is any other low-level

1056

* IO error

1057

* @deprecated use {@link #IndexWriter(Directory, IndexWriterConfig)} instead

1058

1059

@Deprecated

1060

public IndexWriter(Directory d, Analyzer a, IndexDeletionPolicy deletionPolicy, MaxFieldLength mfl, IndexCommit commit)

1061

throws CorruptIndexException, LockObtainFailedException, IOException {

1062

this(d, new IndexWriterConfig(Version.LUCENE_31, a)

1063

.setOpenMode(OpenMode.APPEND).setIndexDeletionPolicy(deletionPolicy).setIndexCommit(commit));

1064

setMaxFieldLength(mfl.getLimit());

1065

}

1066

1067

/**

1068

* Constructs a new IndexWriter per the settings given in <code>conf</code>.

1069

* Note that the passed in {@link IndexWriterConfig} is

1070

* privately cloned; if you need to make subsequent "live"

1071

* changes to the configuration use {@link #getConfig}.

1072

*

1073

1074

* @param d

1075

* the index directory. The index is either created or appended

1076

* according <code>conf.getOpenMode()</code>.

1077

* @param conf

1078

* the configuration settings according to which IndexWriter should

1079

* be initialized.

1080

* @throws CorruptIndexException

1081

* if the index is corrupt

1082

* @throws LockObtainFailedException

1083

* if another writer has this index open (<code>write.lock</code>

1084

* could not be obtained)

1085

* @throws IOException

1086

* if the directory cannot be read/written to, or if it does not

1087

* exist and <code>conf.getOpenMode()</code> is

1088

* <code>OpenMode.APPEND</code> or if there is any other low-level

1089

* IO error

1090

1091

public IndexWriter(Directory d, IndexWriterConfig conf)

1092

throws CorruptIndexException, LockObtainFailedException, IOException {

1093

config = (IndexWriterConfig) conf.clone();

1094

directory = d;

1095

analyzer = conf.getAnalyzer();

1096

infoStream = defaultInfoStream;

1097

writeLockTimeout = conf.getWriteLockTimeout();

1098

similarity = conf.getSimilarity();

1099

mergePolicy = conf.getMergePolicy();

1100

mergePolicy.setIndexWriter(this);

1101

mergeScheduler = conf.getMergeScheduler();

1102

bufferedDeletesStream = new BufferedDeletesStream(messageID);

1103

bufferedDeletesStream.setInfoStream(infoStream);

1104

poolReaders = conf.getReaderPooling();

1105

1106

writeLock = directory.makeLock(WRITE_LOCK_NAME);

1107

1108

if (!writeLock.obtain(writeLockTimeout)) // obtain write lock

1109

throw new LockObtainFailedException("Index locked for write: " + writeLock);

1110

1111

OpenMode mode = conf.getOpenMode();

1112

boolean create;

1113

if (mode == OpenMode.CREATE) {

1114

create = true;

1115

} else if (mode == OpenMode.APPEND) {

1116

create = false;

1117

} else {

1118

// CREATE_OR_APPEND - create only if an index does not exist

1119

create = !IndexReader.indexExists(directory);

1120

}

1121

1122

boolean success = false;

1123

1124

// TODO: we should check whether this index is too old,

1125

// and throw an IndexFormatTooOldExc up front, here,

1126

// instead of later when merge, applyDeletes, getReader

1127

// is attempted. I think to do this we should store the

1128

// oldest segment's version in segments_N.

1129

1130

try {

1131

if (create) {

1132

// Try to read first. This is to allow create

1133

// against an index that's currently open for

1134

// searching. In this case we write the next

1135

// segments_N file with no segments:

1136

try {

1137

segmentInfos.read(directory);

1138

segmentInfos.clear();

1139

} catch (IOException e) {

1140

// Likely this means it's a fresh directory

1141

}

1142

1143

// Record that we have a change (zero out all

1144

// segments) pending:

1145

changeCount++;

1146

segmentInfos.changed();

1147

} else {

1148

segmentInfos.read(directory);

1149

1150

IndexCommit commit = conf.getIndexCommit();

1151

if (commit != null) {

1152

// Swap out all segments, but, keep metadata in

1153

// SegmentInfos, like version & generation, to

1154

// preserve write-once. This is important if

1155

// readers are open against the future commit

1156

// points.

1157

if (commit.getDirectory() != directory)

1158

throw new IllegalArgumentException("IndexCommit's directory doesn't match my directory");

1159

SegmentInfos oldInfos = new SegmentInfos();

1160

oldInfos.read(directory, commit.getSegmentsFileName());

1161

segmentInfos.replace(oldInfos);

1162

changeCount++;

1163

segmentInfos.changed();

1164

if (infoStream != null)

1165

message("init: loaded commit \"" + commit.getSegmentsFileName() + "\"");

1166

}

1167

}

1168

1169

rollbackSegments = segmentInfos.createBackupSegmentInfos(true);

1170

1171

docWriter = new DocumentsWriter(config, directory, this, getCurrentFieldInfos(), bufferedDeletesStream);

1172

docWriter.setInfoStream(infoStream);

1173

docWriter.setMaxFieldLength(maxFieldLength);

1174

1175

// Default deleter (for backwards compatibility) is

1176

// KeepOnlyLastCommitDeleter:

1177

synchronized(this) {

1178

deleter = new IndexFileDeleter(directory,

1179

conf.getIndexDeletionPolicy(),

1180

segmentInfos, infoStream,

1181

this);

1182

}

1183

1184

if (deleter.startingCommitDeleted) {

1185

// Deletion policy deleted the "head" commit point.

1186

// We have to mark ourself as changed so that if we

1187

// are closed w/o any further changes we write a new

1188

// segments_N file.

1189

changeCount++;

1190

segmentInfos.changed();

1191

}

1192

1193

if (infoStream != null) {

1194

messageState();

1195

}

1196

1197

success = true;

1198

1199

} finally {

1200

if (!success) {

1201

if (infoStream != null) {

1202

message("init: hit exception on init; releasing write lock");

1203

}

1204

try {

1205

writeLock.release();

1206

} catch (Throwable t) {

1207

// don't mask the original exception

1208

}

1209

writeLock = null;

1210

}

1211

}

1212

}

1213

1214

private FieldInfos getFieldInfos(SegmentInfo info) throws IOException {

1215

Directory cfsDir = null;

1216

try {

1217

if (info.getUseCompoundFile()) {

1218

cfsDir = new CompoundFileReader(directory, IndexFileNames.segmentFileName(info.name, IndexFileNames.COMPOUND_FILE_EXTENSION));

1219

} else {

1220

cfsDir = directory;

1221

}

1222

return new FieldInfos(cfsDir, IndexFileNames.segmentFileName(info.name, IndexFileNames.FIELD_INFOS_EXTENSION));

1223

} finally {

1224

if (info.getUseCompoundFile() && cfsDir != null) {

1225

cfsDir.close();

1226

}

1227

}

1228

}

1229

1230

private FieldInfos getCurrentFieldInfos() throws IOException {

1231

final FieldInfos fieldInfos;

1232

if (segmentInfos.size() > 0) {

1233

if (segmentInfos.getFormat() > SegmentInfos.FORMAT_DIAGNOSTICS) {

1234

// Pre-3.1 index. In this case we sweep all

1235

// segments, merging their FieldInfos:

1236

fieldInfos = new FieldInfos();

1237

for(SegmentInfo info : segmentInfos) {

1238

final FieldInfos segFieldInfos = getFieldInfos(info);

1239

final int fieldCount = segFieldInfos.size();

1240

for(int fieldNumber=0;fieldNumber<fieldCount;fieldNumber++) {

1241

fieldInfos.add(segFieldInfos.fieldInfo(fieldNumber));

1242

}

1243

}

1244

} else {

1245

// Already a 3.1 index; just seed the FieldInfos

1246

// from the last segment

1247

fieldInfos = getFieldInfos(segmentInfos.info(segmentInfos.size()-1));

1248

}

1249

} else {

1250

fieldInfos = new FieldInfos();

1251

}

1252

return fieldInfos;

1253

}

1254

1255

/**

1256

* Returns the private {@link IndexWriterConfig}, cloned

1257

* from the {@link IndexWriterConfig} passed to

1258

* {@link #IndexWriter(Directory, IndexWriterConfig)}.

1259

*

1260

* NOTE: some settings may be changed on the

1261

* returned {@link IndexWriterConfig}, and will take

1262

* effect in the current IndexWriter instance. See the

1263

* javadocs for the specific setters in {@link

1264

* IndexWriterConfig} for details.

1265

1266

public IndexWriterConfig getConfig() {

1267

ensureOpen(false);

1268

return config;

1269

}

1270

1271

/**

1272

* Expert: set the merge policy used by this writer.

1273

1274

* @deprecated use {@link IndexWriterConfig#setMergePolicy(MergePolicy)} instead.

1275

1276

@Deprecated

1277

public void setMergePolicy(MergePolicy mp) {

1278

ensureOpen();

1279

if (mp == null)

1280

throw new NullPointerException("MergePolicy must be non-null");

1281

1282

if (mergePolicy != mp)

1283

mergePolicy.close();

1284

mergePolicy = mp;

1285

mergePolicy.setIndexWriter(this);

1286

pushMaxBufferedDocs();

1287

if (infoStream != null)

1288

message("setMergePolicy " + mp);

1289

// Required so config.getMergePolicy returns the right value. But this will

1290

// go away together with the method in 4.0.

1291

config.setMergePolicy(mp);

1292

}

1293

1294

/**

1295

* Expert: returns the current MergePolicy in use by this writer.

1296

* @see #setMergePolicy

1297

1298

* @deprecated use {@link IndexWriterConfig#getMergePolicy()} instead

1299

1300

@Deprecated

1301

public MergePolicy getMergePolicy() {

1302

ensureOpen();

1303

return mergePolicy;

1304

}

1305

1306

/**

1307

* Expert: set the merge scheduler used by this writer.

1308

* @deprecated use {@link IndexWriterConfig#setMergeScheduler(MergeScheduler)} instead

1309

1310

@Deprecated

1311

synchronized public void setMergeScheduler(MergeScheduler mergeScheduler) throws CorruptIndexException, IOException {

1312

ensureOpen();

1313

if (mergeScheduler == null)

1314

throw new NullPointerException("MergeScheduler must be non-null");

1315

1316

if (this.mergeScheduler != mergeScheduler) {

1317

finishMerges(true);

1318

this.mergeScheduler.close();

1319

}

1320

this.mergeScheduler = mergeScheduler;

1321

if (infoStream != null)

1322

message("setMergeScheduler " + mergeScheduler);

1323

// Required so config.getMergeScheduler returns the right value. But this will

1324

// go away together with the method in 4.0.

1325

config.setMergeScheduler(mergeScheduler);

1326

}

1327

1328

/**

1329

* Expert: returns the current MergeScheduler in use by this

1330

* writer.

1331

* @see #setMergeScheduler(MergeScheduler)

1332

* @deprecated use {@link IndexWriterConfig#getMergeScheduler()} instead

1333

1334

@Deprecated

1335

public MergeScheduler getMergeScheduler() {

1336

ensureOpen();

1337

return mergeScheduler;

1338

}

1339

1340

/** Determines the largest segment (measured by

1341

* document count) that may be merged with other segments.

1342

* Small values (e.g., less than 10,000) are best for

1343

* interactive indexing, as this limits the length of

1344

* pauses while indexing to a few seconds. Larger values

1345

* are best for batched indexing and speedier

1346

* searches.

1347

1348

* The default value is {@link Integer#MAX_VALUE}.

1349

1350

* Note that this method is a convenience method: it

1351

* just calls mergePolicy.setMaxMergeDocs as long as

1352

* mergePolicy is an instance of {@link LogMergePolicy}.

1353

* Otherwise an IllegalArgumentException is thrown.

1354

1355

* The default merge policy ({@link

1356

* LogByteSizeMergePolicy}) also allows you to set this

1357

* limit by net size (in MB) of the segment, using {@link

1358

* LogByteSizeMergePolicy#setMaxMergeMB}.

1359

* @deprecated use {@link LogMergePolicy#setMaxMergeDocs(int)} directly.

1360

1361

@Deprecated

1362

public void setMaxMergeDocs(int maxMergeDocs) {

1363

getLogMergePolicy().setMaxMergeDocs(maxMergeDocs);

1364

}

1365

1366

/**

1367

* Returns the largest segment (measured by document

1368

* count) that may be merged with other segments.

1369

1370

* Note that this method is a convenience method: it

1371

* just calls mergePolicy.getMaxMergeDocs as long as

1372

* mergePolicy is an instance of {@link LogMergePolicy}.

1373

* Otherwise an IllegalArgumentException is thrown.

1374

1375

* @see #setMaxMergeDocs

1376

* @deprecated use {@link LogMergePolicy#getMaxMergeDocs()} directly.

1377

1378

@Deprecated

1379

public int getMaxMergeDocs() {

1380

return getLogMergePolicy().getMaxMergeDocs();

1381

}

1382

1383

/**

1384

* The maximum number of terms that will be indexed for a single field in a

1385

* document. This limits the amount of memory required for indexing, so that

1386

* collections with very large files will not crash the indexing process by

1387

* running out of memory. This setting refers to the number of running terms,

1388

* not to the number of different terms.

1389

*

1390

* Note: this silently truncates large documents, excluding

1391

* from the index all terms that occur further in the document. If you know

1392

* your source documents are large, be sure to set this value high enough to

1393

* accomodate the expected size. If you set it to Integer.MAX_VALUE, then the

1394

* only limit is your memory, but you should anticipate an OutOfMemoryError.

1395

*

1396

* By default, no more than {@link #DEFAULT_MAX_FIELD_LENGTH} terms will be

1397

* indexed for a field.

1398

1399

* @deprecated use {@link LimitTokenCountAnalyzer} instead. Note that the

1400

* behvaior slightly changed - the analyzer limits the number of

1401

* tokens per token stream created, while this setting limits the

1402

* total number of tokens to index. This only matters if you index

1403

* many multi-valued fields though.

1404

1405

@Deprecated

1406

public void setMaxFieldLength(int maxFieldLength) {

1407

ensureOpen();

1408

this.maxFieldLength = maxFieldLength;

1409

docWriter.setMaxFieldLength(maxFieldLength);

1410

if (infoStream != null)

1411

message("setMaxFieldLength " + maxFieldLength);

1412

}

1413

1414

/**

1415

* Returns the maximum number of terms that will be

1416

* indexed for a single field in a document.

1417

* @see #setMaxFieldLength

1418

* @deprecated use {@link LimitTokenCountAnalyzer} to limit number of tokens.

1419

1420

@Deprecated

1421

public int getMaxFieldLength() {

1422

ensureOpen();

1423

return maxFieldLength;

1424

}

1425

1426

/**

1427

* @deprecated use {@link

1428

* IndexWriterConfig#setReaderTermsIndexDivisor} instead.

1429

1430

@Deprecated

1431

public void setReaderTermsIndexDivisor(int divisor) {

1432

ensureOpen();

1433

config.setReaderTermsIndexDivisor(divisor);

1434

if (infoStream != null) {

1435

message("setReaderTermsIndexDivisor " + divisor);

1436

}

1437

}

1438

1439

/**

1440

* @deprecated use {@link

1441

* IndexWriterConfig#getReaderTermsIndexDivisor} instead.

1442

1443

@Deprecated

1444

public int getReaderTermsIndexDivisor() {

1445

ensureOpen();

1446

return config.getReaderTermsIndexDivisor();

1447

}

1448

1449

/** Determines the minimal number of documents required

1450

* before the buffered in-memory documents are flushed as

1451

* a new Segment. Large values generally gives faster

1452

* indexing.

1453

1454

* When this is set, the writer will flush every

1455

* maxBufferedDocs added documents. Pass in {@link

1456

* #DISABLE_AUTO_FLUSH} to prevent triggering a flush due

1457

* to number of buffered documents. Note that if flushing

1458

* by RAM usage is also enabled, then the flush will be

1459

* triggered by whichever comes first.

1460

1461

* Disabled by default (writer flushes by RAM usage).

1462

1463

* @throws IllegalArgumentException if maxBufferedDocs is

1464

* enabled but smaller than 2, or it disables maxBufferedDocs

1465

* when ramBufferSize is already disabled

1466

* @see #setRAMBufferSizeMB

1467

* @deprecated use {@link IndexWriterConfig#setMaxBufferedDocs(int)} instead.

1468

1469

@Deprecated

1470

public void setMaxBufferedDocs(int maxBufferedDocs) {

1471

ensureOpen();

1472

pushMaxBufferedDocs();

1473

if (infoStream != null) {

1474

message("setMaxBufferedDocs " + maxBufferedDocs);

1475

}

1476

// Required so config.getMaxBufferedDocs returns the right value. But this

1477

// will go away together with the method in 4.0.

1478

config.setMaxBufferedDocs(maxBufferedDocs);

1479

}

1480

1481

/**

1482

* If we are flushing by doc count (not by RAM usage), and

1483

* using LogDocMergePolicy then push maxBufferedDocs down

1484

* as its minMergeDocs, to keep backwards compatibility.

1485

1486

private void pushMaxBufferedDocs() {

1487

if (config.getMaxBufferedDocs() != DISABLE_AUTO_FLUSH) {

1488

final MergePolicy mp = mergePolicy;

1489

if (mp instanceof LogDocMergePolicy) {

1490

LogDocMergePolicy lmp = (LogDocMergePolicy) mp;

1491

final int maxBufferedDocs = config.getMaxBufferedDocs();

1492

if (lmp.getMinMergeDocs() != maxBufferedDocs) {

1493

if (infoStream != null)

1494

message("now push maxBufferedDocs " + maxBufferedDocs + " to LogDocMergePolicy");

1495

lmp.setMinMergeDocs(maxBufferedDocs);

1496

}

1497

}

1498

}

1499

}

1500

1501

/**

1502

* Returns the number of buffered added documents that will

1503

* trigger a flush if enabled.

1504

* @see #setMaxBufferedDocs

1505

* @deprecated use {@link IndexWriterConfig#getMaxBufferedDocs()} instead.

1506

1507

@Deprecated

1508

public int getMaxBufferedDocs() {

1509

ensureOpen();

1510

return config.getMaxBufferedDocs();

1511

}

1512

1513

/** Determines the amount of RAM that may be used for

1514

* buffering added documents and deletions before they are

1515

* flushed to the Directory. Generally for faster

1516

* indexing performance it's best to flush by RAM usage

1517

* instead of document count and use as large a RAM buffer

1518

* as you can.

1519

1520

* When this is set, the writer will flush whenever

1521

* buffered documents and deletions use this much RAM.

1522

* Pass in {@link #DISABLE_AUTO_FLUSH} to prevent

1523

* triggering a flush due to RAM usage. Note that if

1524

* flushing by document count is also enabled, then the

1525

* flush will be triggered by whichever comes first.

1526

1527

* NOTE: the account of RAM usage for pending

1528

* deletions is only approximate. Specifically, if you

1529

* delete by Query, Lucene currently has no way to measure

1530

* the RAM usage if individual Queries so the accounting

1531

* will under-estimate and you should compensate by either

1532

* calling commit() periodically yourself, or by using

1533

* {@link #setMaxBufferedDeleteTerms} to flush by count

1534

* instead of RAM usage (each buffered delete Query counts

1535

* as one).

1536

1537

* NOTE: because IndexWriter uses

1538

* <code>int</code>s when managing its internal storage,

1539

* the absolute maximum value for this setting is somewhat

1540

* less than 2048 MB. The precise limit depends on

1541

* various factors, such as how large your documents are,

1542

* how many fields have norms, etc., so it's best to set

1543

* this value comfortably under 2048.

1544

1545

* The default value is {@link #DEFAULT_RAM_BUFFER_SIZE_MB}.

1546

1547

* @throws IllegalArgumentException if ramBufferSize is

1548

* enabled but non-positive, or it disables ramBufferSize

1549

* when maxBufferedDocs is already disabled

1550

* @deprecated use {@link IndexWriterConfig#setRAMBufferSizeMB(double)} instead.

1551

1552

@Deprecated

1553

public void setRAMBufferSizeMB(double mb) {

1554

if (infoStream != null) {

1555

message("setRAMBufferSizeMB " + mb);

1556

}

1557

// Required so config.getRAMBufferSizeMB returns the right value. But this

1558

// will go away together with the method in 4.0.

1559

config.setRAMBufferSizeMB(mb);

1560

}

1561

1562

/**

1563

* Returns the value set by {@link #setRAMBufferSizeMB} if enabled.

1564

* @deprecated use {@link IndexWriterConfig#getRAMBufferSizeMB()} instead.

1565

1566

@Deprecated

1567

public double getRAMBufferSizeMB() {

1568

return config.getRAMBufferSizeMB();

1569

}

1570

1571

/**

1572

* Determines the minimal number of delete terms required before the buffered

1573

* in-memory delete terms are applied and flushed. If there are documents

1574

* buffered in memory at the time, they are merged and a new segment is

1575

* created.

1576

1577

* Disabled by default (writer flushes by RAM usage).

1578

1579

* @throws IllegalArgumentException if maxBufferedDeleteTerms

1580

* is enabled but smaller than 1

1581

* @see #setRAMBufferSizeMB

1582

* @deprecated use {@link IndexWriterConfig#setMaxBufferedDeleteTerms(int)} instead.

1583

1584

@Deprecated

1585

public void setMaxBufferedDeleteTerms(int maxBufferedDeleteTerms) {

1586

ensureOpen();

1587

if (infoStream != null)

1588

message("setMaxBufferedDeleteTerms " + maxBufferedDeleteTerms);

1589

// Required so config.getMaxBufferedDeleteTerms returns the right value. But

1590

// this will go away together with the method in 4.0.

1591

config.setMaxBufferedDeleteTerms(maxBufferedDeleteTerms);

1592

}

1593

1594

/**

1595

* Returns the number of buffered deleted terms that will

1596

* trigger a flush if enabled.

1597

* @see #setMaxBufferedDeleteTerms

1598

* @deprecated use {@link IndexWriterConfig#getMaxBufferedDeleteTerms()} instead

1599

1600

@Deprecated

1601

public int getMaxBufferedDeleteTerms() {

1602

ensureOpen();

1603

return config.getMaxBufferedDeleteTerms();

1604

}

1605

1606

/** Determines how often segment indices are merged by addDocument(). With

1607

* smaller values, less RAM is used while indexing, and searches on

1608

* unoptimized indices are faster, but indexing speed is slower. With larger

1609

* values, more RAM is used during indexing, and while searches on unoptimized

1610

* indices are slower, indexing is faster. Thus larger values (> 10) are best

1611

* for batch index creation, and smaller values (< 10) for indices that are

1612

* interactively maintained.

1613

1614

* Note that this method is a convenience method: it

1615

* just calls mergePolicy.setMergeFactor as long as

1616

* mergePolicy is an instance of {@link LogMergePolicy}.

1617

* Otherwise an IllegalArgumentException is thrown.

1618

1619

* This must never be less than 2. The default value is 10.

1620

* @deprecated use {@link LogMergePolicy#setMergeFactor(int)} directly.

1621

1622

@Deprecated

1623

public void setMergeFactor(int mergeFactor) {

1624

getLogMergePolicy().setMergeFactor(mergeFactor);

1625

}

1626

1627

/**

1628

* Returns the number of segments that are merged at

1629

* once and also controls the total number of segments

1630

* allowed to accumulate in the index.

1631

1632

* Note that this method is a convenience method: it

1633

* just calls mergePolicy.getMergeFactor as long as

1634

* mergePolicy is an instance of {@link LogMergePolicy}.

1635

* Otherwise an IllegalArgumentException is thrown.

1636

1637

* @see #setMergeFactor

1638

* @deprecated use {@link LogMergePolicy#getMergeFactor()} directly.

1639

1640

@Deprecated

1641

public int getMergeFactor() {

1642

return getLogMergePolicy().getMergeFactor();

1643

}

1644

1645

/** If non-null, this will be the default infoStream used

1646

* by a newly instantiated IndexWriter.

1647

* @see #setInfoStream

1648

1649

public static void setDefaultInfoStream(PrintStream infoStream) {

1650

IndexWriter.defaultInfoStream = infoStream;

1651

}

1652

1653

/**

1654

* Returns the current default infoStream for newly

1655

* instantiated IndexWriters.

1656

* @see #setDefaultInfoStream

1657

1658

public static PrintStream getDefaultInfoStream() {

1659

return IndexWriter.defaultInfoStream;

1660

}

1661

1662

/** If non-null, information about merges, deletes and a

1663

* message when maxFieldLength is reached will be printed

1664

* to this.

1665

1666

public void setInfoStream(PrintStream infoStream) throws IOException {

1667

ensureOpen();

1668

this.infoStream = infoStream;

1669

docWriter.setInfoStream(infoStream);

1670

deleter.setInfoStream(infoStream);

1671

bufferedDeletesStream.setInfoStream(infoStream);

1672

if (infoStream != null)

1673

messageState();

1674

}

1675

1676

private void messageState() throws IOException {

1677

message("\ndir=" + directory + "\n" +

1678

"index=" + segString() + "\n" +

1679

"version=" + Constants.LUCENE_VERSION + "\n" +

1680

config.toString());

1681

}

1682

1683

/**

1684

* Returns the current infoStream in use by this writer.

1685

* @see #setInfoStream

1686

1687

public PrintStream getInfoStream() {

1688

ensureOpen();

1689

return infoStream;

1690

}

1691

1692

/** Returns true if verbosing is enabled (i.e., infoStream != null). */

1693

public boolean verbose() {

1694

return infoStream != null;

1695

}

1696

1697

/**

1698

* Sets the maximum time to wait for a write lock (in milliseconds) for this instance of IndexWriter. @see

1699

* @see #setDefaultWriteLockTimeout to change the default value for all instances of IndexWriter.

1700

* @deprecated use {@link IndexWriterConfig#setWriteLockTimeout(long)} instead

1701

1702

@Deprecated

1703

public void setWriteLockTimeout(long writeLockTimeout) {

1704

ensureOpen();

1705

this.writeLockTimeout = writeLockTimeout;

1706

// Required so config.getWriteLockTimeout returns the right value. But this

1707

// will go away together with the method in 4.0.

1708

config.setWriteLockTimeout(writeLockTimeout);

1709

}

1710

1711

/**

1712

* Returns allowed timeout when acquiring the write lock.

1713

* @see #setWriteLockTimeout

1714

* @deprecated use {@link IndexWriterConfig#getWriteLockTimeout()}

1715

1716

@Deprecated

1717

public long getWriteLockTimeout() {

1718

ensureOpen();

1719

return writeLockTimeout;

1720

}

1721

1722

/**

1723

* Sets the default (for any instance of IndexWriter) maximum time to wait for a write lock (in

1724

* milliseconds).

1725

* @deprecated use {@link IndexWriterConfig#setDefaultWriteLockTimeout(long)} instead

1726

1727

@Deprecated

1728

public static void setDefaultWriteLockTimeout(long writeLockTimeout) {

1729

IndexWriterConfig.setDefaultWriteLockTimeout(writeLockTimeout);

1730

}

1731

1732

/**

1733

* Returns default write lock timeout for newly

1734

* instantiated IndexWriters.

1735

* @see #setDefaultWriteLockTimeout

1736

* @deprecated use {@link IndexWriterConfig#getDefaultWriteLockTimeout()} instead

1737

1738

@Deprecated

1739

public static long getDefaultWriteLockTimeout() {

1740

return IndexWriterConfig.getDefaultWriteLockTimeout();

1741

}

1742

1743

/**

1744

* Commits all changes to an index and closes all

1745

* associated files. Note that this may be a costly

1746

* operation, so, try to re-use a single writer instead of

1747

* closing and opening a new one. See {@link #commit()} for

1748

* caveats about write caching done by some IO devices.

1749

1750

* If an Exception is hit during close, eg due to disk

1751

* full or some other reason, then both the on-disk index

1752

* and the internal state of the IndexWriter instance will

1753

* be consistent. However, the close will not be complete

1754

* even though part of it (flushing buffered documents)

1755

* may have succeeded, so the write lock will still be

1756

* held.

1757

1758

* If you can correct the underlying cause (eg free up

1759

* some disk space) then you can call close() again.

1760

* Failing that, if you want to force the write lock to be

1761

* released (dangerous, because you may then lose buffered

1762

* docs in the IndexWriter instance) then you can do

1763

* something like this:

1764

1765

* <pre>

1766

* try {

1767

* writer.close();

1768

* } finally {

1769

* if (IndexWriter.isLocked(directory)) {

1770

* IndexWriter.unlock(directory);

1771

* }

1772

* }

1773

* </pre>

1774

1775

* after which, you must be certain not to use the writer

1776

* instance anymore.

1777

1778

* NOTE: if this method hits an OutOfMemoryError

1779

* you should immediately close the writer, again. See <a

1780

* href="#OOME">above</a> for details.

1781

1782

* @throws CorruptIndexException if the index is corrupt

1783

* @throws IOException if there is a low-level IO error

1784

1785

public void close() throws CorruptIndexException, IOException {

1786

close(true);

1787

}

1788

1789

/**

1790

* Closes the index with or without waiting for currently

1791

* running merges to finish. This is only meaningful when

1792

* using a MergeScheduler that runs merges in background

1793

* threads.

1794

1795

* NOTE: if this method hits an OutOfMemoryError

1796

* you should immediately close the writer, again. See <a

1797

* href="#OOME">above</a> for details.

1798

1799

* NOTE: it is dangerous to always call

1800

* close(false), especially when IndexWriter is not open

1801

* for very long, because this can result in "merge

1802

* starvation" whereby long merges will never have a

1803

* chance to finish. This will cause too many segments in

1804

* your index over time.

1805

1806

* @param waitForMerges if true, this call will block

1807

* until all merges complete; else, it will ask all

1808

* running merges to abort, wait until those merges have

1809

* finished (which should be at most a few seconds), and

1810

* then return.

1811

1812

public void close(boolean waitForMerges) throws CorruptIndexException, IOException {

1813

1814

// Ensure that only one thread actually gets to do the closing:

1815

if (shouldClose()) {

1816

// If any methods have hit OutOfMemoryError, then abort

1817

// on close, in case the internal state of IndexWriter

1818

// or DocumentsWriter is corrupt

1819

if (hitOOM)

1820

rollbackInternal();

1821

else

1822

closeInternal(waitForMerges);

1823

}

1824

}

1825

1826

// Returns true if this thread should attempt to close, or

1827

// false if IndexWriter is now closed; else, waits until

1828

// another thread finishes closing

1829

synchronized private boolean shouldClose() {

1830

while(true) {

1831

if (!closed) {

1832

if (!closing) {

1833

closing = true;

1834

return true;

1835

} else {

1836

// Another thread is presently trying to close;

1837

// wait until it finishes one way (closes

1838

// successfully) or another (fails to close)

1839

doWait();

1840

}

1841

} else

1842

return false;

1843

}

1844

}

1845

1846

private void closeInternal(boolean waitForMerges) throws CorruptIndexException, IOException {

1847

1848

try {

1849

if (infoStream != null) {

1850

message("now flush at close waitForMerges=" + waitForMerges);

1851

}

1852

1853

docWriter.close();

1854

1855

// Only allow a new merge to be triggered if we are

1856

// going to wait for merges:

1857

if (!hitOOM) {

1858

flush(waitForMerges, true);

1859

}

1860

1861

if (waitForMerges)

1862

// Give merge scheduler last chance to run, in case

1863

// any pending merges are waiting:

1864

mergeScheduler.merge(this);

1865

1866

mergePolicy.close();

1867

1868

synchronized(this) {

1869

finishMerges(waitForMerges);

1870

stopMerges = true;

1871

}

1872

1873

mergeScheduler.close();

1874

1875

if (infoStream != null)

1876

message("now call final commit()");

1877

1878

if (!hitOOM) {

1879

commitInternal(null);

1880

}

1881

1882

if (infoStream != null)

1883

message("at close: " + segString());

1884

1885

synchronized(this) {

1886

readerPool.close();

1887

docWriter = null;

1888

deleter.close();

1889

}

1890

1891

if (writeLock != null) {

1892

writeLock.release(); // release write lock

1893

writeLock = null;

1894

}

1895

synchronized(this) {

1896

closed = true;

1897

}

1898

} catch (OutOfMemoryError oom) {

1899

handleOOM(oom, "closeInternal");

1900

} finally {

1901

synchronized(this) {

1902

closing = false;

1903

notifyAll();

1904

if (!closed) {

1905

if (infoStream != null)

1906

message("hit exception while closing");

1907

}

1908

}

1909

}

1910

}

1911

1912

/** Returns the Directory used by this index. */

1913

public Directory getDirectory() {

1914

// Pass false because the flush during closing calls getDirectory

1915

ensureOpen(false);

1916

return directory;

1917

}

1918

1919

/** Returns the analyzer used by this index. */

1920

public Analyzer getAnalyzer() {

1921

ensureOpen();

1922

return analyzer;

1923

}

1924

1925

/** Returns total number of docs in this index, including

1926

* docs not yet flushed (still in the RAM buffer),

1927

* not counting deletions.

1928

* @see #numDocs */

1929

public synchronized int maxDoc() {

1930

ensureOpen();

1931

int count;

1932

if (docWriter != null)

1933

count = docWriter.getNumDocs();

1934

else

1935

count = 0;

1936

1937

count += segmentInfos.totalDocCount();

1938

return count;

1939

}

1940

1941

/** Returns total number of docs in this index, including

1942

* docs not yet flushed (still in the RAM buffer), and

1943

* including deletions. NOTE: buffered deletions

1944

* are not counted. If you really need these to be

1945

* counted you should call {@link #commit()} first.

1946

* @see #numDocs */

1947

public synchronized int numDocs() throws IOException {

1948

ensureOpen();

1949

int count;

1950

if (docWriter != null)

1951

count = docWriter.getNumDocs();

1952

else

1953

count = 0;

1954

1955

for (final SegmentInfo info : segmentInfos) {

1956

count += info.docCount - numDeletedDocs(info);

1957

}

1958

return count;

1959

}

1960

1961

public synchronized boolean hasDeletions() throws IOException {

1962

ensureOpen();

1963

if (bufferedDeletesStream.any()) {

1964

return true;

1965

}

1966

if (docWriter.anyDeletions()) {

1967

return true;

1968

}

1969

for (final SegmentInfo info : segmentInfos) {

1970

if (info.hasDeletions()) {

1971

return true;

1972

}

1973

}

1974

return false;

1975

}

1976

1977

/**

1978

* The maximum number of terms that will be indexed for a single field in a

1979

* document. This limits the amount of memory required for indexing, so that

1980

* collections with very large files will not crash the indexing process by

1981

* running out of memory.

1982

* Note that this effectively truncates large documents, excluding from the

1983

* index terms that occur further in the document. If you know your source

1984

* documents are large, be sure to set this value high enough to accommodate

1985

* the expected size. If you set it to Integer.MAX_VALUE, then the only limit

1986

* is your memory, but you should anticipate an OutOfMemoryError.

1987

* By default, no more than 10,000 terms will be indexed for a field.

1988

1989

* @see MaxFieldLength

1990

* @deprecated remove in 4.0

1991

1992

@Deprecated

1993

private int maxFieldLength = DEFAULT_MAX_FIELD_LENGTH;

1994

1995

/**

1996

* Adds a document to this index. If the document contains more than

1997

* {@link #setMaxFieldLength(int)} terms for a given field, the remainder are

1998

* discarded.

1999

2000

* Note that if an Exception is hit (for example disk full)

2001

* then the index will be consistent, but this document

2002

* may not have been added. Furthermore, it's possible

2003

* the index will have one segment in non-compound format

2004

* even when using compound files (when a merge has

2005

* partially succeeded).

2006

2007

* This method periodically flushes pending documents

2008

* to the Directory (see <a href="#flush">above</a>), and

2009

* also periodically triggers segment merges in the index

2010

* according to the {@link MergePolicy} in use.

2011

2012

* Merges temporarily consume space in the

2013

* directory. The amount of space required is up to 1X the

2014

* size of all segments being merged, when no

2015

* readers/searchers are open against the index, and up to

2016

* 2X the size of all segments being merged when

2017

* readers/searchers are open against the index (see

2018

* {@link #forceMerge(int)} for details). The sequence of

2019

* primitive merge operations performed is governed by the

2020

* merge policy.

2021

2022

* Note that each term in the document can be no longer

2023

* than 16383 characters, otherwise an

2024

* IllegalArgumentException will be thrown.

2025

2026

* Note that it's possible to create an invalid Unicode

2027

* string in java if a UTF16 surrogate pair is malformed.

2028

* In this case, the invalid characters are silently

2029

* replaced with the Unicode replacement character

2030

* U+FFFD.

2031

2032

* NOTE: if this method hits an OutOfMemoryError

2033

* you should immediately close the writer. See <a

2034

* href="#OOME">above</a> for details.

2035

2036

* @throws CorruptIndexException if the index is corrupt

2037

* @throws IOException if there is a low-level IO error

2038

2039

public void addDocument(Document doc) throws CorruptIndexException, IOException {

2040

addDocument(doc, analyzer);

2041

}

2042

2043

/**

2044

* Adds a document to this index, using the provided analyzer instead of the

2045

* value of {@link #getAnalyzer()}. If the document contains more than

2046

* {@link #setMaxFieldLength(int)} terms for a given field, the remainder are

2047

* discarded.

2048

2049

* See {@link #addDocument(Document)} for details on

2050

* index and IndexWriter state after an Exception, and

2051

* flushing/merging temporary free space requirements.

2052

2053

* NOTE: if this method hits an OutOfMemoryError

2054

* you should immediately close the writer. See <a

2055

* href="#OOME">above</a> for details.

2056

2057

* @throws CorruptIndexException if the index is corrupt

2058

* @throws IOException if there is a low-level IO error

2059

2060

public void addDocument(Document doc, Analyzer analyzer) throws CorruptIndexException, IOException {

2061

ensureOpen();

2062

boolean doFlush = false;

2063

boolean success = false;

2064

try {

2065

try {

2066

doFlush = docWriter.updateDocument(doc, analyzer, null);

2067

success = true;

2068

} finally {

2069

if (!success && infoStream != null)

2070

message("hit exception adding document");

2071

}

2072

if (doFlush)

2073

flush(true, false);

2074

} catch (OutOfMemoryError oom) {

2075

handleOOM(oom, "addDocument");

2076

}

2077

}

2078

2079

/**

2080

* Atomically adds a block of documents with sequentially

2081

* assigned document IDs, such that an external reader

2082

* will see all or none of the documents.

2083

2084

* WARNING: the index does not currently record

2085

* which documents were added as a block. Today this is

2086

* fine, because merging will preserve the block (as long

2087

* as none them were deleted). But it's possible in the

2088

* future that Lucene may more aggressively re-order

2089

* documents (for example, perhaps to obtain better index

2090

* compression), in which case you may need to fully

2091

* re-index your documents at that time.

2092

2093

* See {@link #addDocument(Document)} for details on

2094

* index and IndexWriter state after an Exception, and

2095

* flushing/merging temporary free space requirements.

2096

2097

* NOTE: tools that do offline splitting of an index

2098

* (for example, IndexSplitter in contrib) or

2099

* re-sorting of documents (for example, IndexSorter in

2100

* contrib) are not aware of these atomically added documents

2101

* and will likely break them up. Use such tools at your

2102

* own risk!

2103

2104

* NOTE: if this method hits an OutOfMemoryError

2105

* you should immediately close the writer. See <a

2106

* href="#OOME">above</a> for details.

2107

2108

* @throws CorruptIndexException if the index is corrupt

2109

* @throws IOException if there is a low-level IO error

2110

2111

* @lucene.experimental

2112

2113

public void addDocuments(Collection<Document> docs) throws CorruptIndexException, IOException {

2114

// TODO: if we backport DWPT we should change arg to Iterable<Document>

2115

addDocuments(docs, analyzer);

2116

}

2117

2118

/**

2119

* Atomically adds a block of documents, analyzed using the

2120

* provided analyzer, with sequentially assigned document

2121

* IDs, such that an external reader will see all or none

2122

* of the documents.

2123

2124

* @throws CorruptIndexException if the index is corrupt

2125

* @throws IOException if there is a low-level IO error

2126

2127

* @lucene.experimental

2128

2129

public void addDocuments(Collection<Document> docs, Analyzer analyzer) throws CorruptIndexException, IOException {

2130

// TODO: if we backport DWPT we should change arg to Iterable<Document>

2131

updateDocuments(null, docs, analyzer);

2132

}

2133

2134

/**

2135

* Atomically deletes documents matching the provided

2136

* delTerm and adds a block of documents with sequentially

2137

* assigned document IDs, such that an external reader

2138

* will see all or none of the documents.

2139

2140

* See {@link #addDocuments(Collection)}.

2141

2142

* @throws CorruptIndexException if the index is corrupt

2143

* @throws IOException if there is a low-level IO error

2144

2145

* @lucene.experimental

2146

2147

public void updateDocuments(Term delTerm, Collection<Document> docs) throws CorruptIndexException, IOException {

2148

// TODO: if we backport DWPT we should change arg to Iterable<Document>

2149

updateDocuments(delTerm, docs, analyzer);

2150

}

2151

2152

/**

2153

* Atomically deletes documents matching the provided

2154

* delTerm and adds a block of documents, analyzed using

2155

* the provided analyzer, with sequentially

2156

* assigned document IDs, such that an external reader

2157

* will see all or none of the documents.

2158

2159

* See {@link #addDocuments(Collection)}.

2160

2161

* @throws CorruptIndexException if the index is corrupt

2162

* @throws IOException if there is a low-level IO error

2163

2164

* @lucene.experimental

2165

2166

public void updateDocuments(Term delTerm, Collection<Document> docs, Analyzer analyzer) throws CorruptIndexException, IOException {

2167

// TODO: if we backport DWPT we should change arg to Iterable<Document>

2168

ensureOpen();

2169

try {

2170

boolean success = false;

2171

boolean doFlush = false;

2172

try {

2173

doFlush = docWriter.updateDocuments(docs, analyzer, delTerm);

2174

success = true;

2175

} finally {

2176

if (!success && infoStream != null) {

2177

message("hit exception updating document");

2178

}

2179

}

2180

if (doFlush) {

2181

flush(true, false);

2182

}

2183

} catch (OutOfMemoryError oom) {

2184

handleOOM(oom, "updateDocuments");

2185

}

2186

}

2187

2188

/**

2189

* Deletes the document(s) containing <code>term</code>.

2190

2191

* NOTE: if this method hits an OutOfMemoryError

2192

* you should immediately close the writer. See <a

2193

* href="#OOME">above</a> for details.

2194

2195

* @param term the term to identify the documents to be deleted

2196

* @throws CorruptIndexException if the index is corrupt

2197

* @throws IOException if there is a low-level IO error

2198

2199

public void deleteDocuments(Term term) throws CorruptIndexException, IOException {

2200

ensureOpen();

2201

try {

2202

if (docWriter.deleteTerm(term, false)) {

2203

flush(true, false);

2204

}

2205

} catch (OutOfMemoryError oom) {

2206

handleOOM(oom, "deleteDocuments(Term)");

2207

}

2208

}

2209

2210

/**

2211

* Deletes the document(s) containing any of the

2212

* terms. All deletes are flushed at the same time.

2213

2214

* NOTE: if this method hits an OutOfMemoryError

2215

* you should immediately close the writer. See <a

2216

* href="#OOME">above</a> for details.

2217

2218

* @param terms array of terms to identify the documents

2219

* to be deleted

2220

* @throws CorruptIndexException if the index is corrupt

2221

* @throws IOException if there is a low-level IO error

2222

2223

public void deleteDocuments(Term... terms) throws CorruptIndexException, IOException {

2224

ensureOpen();

2225

try {

2226

if (docWriter.deleteTerms(terms)) {

2227

flush(true, false);

2228

}

2229

} catch (OutOfMemoryError oom) {

2230

handleOOM(oom, "deleteDocuments(Term..)");

2231

}

2232

}

2233

2234

/**

2235

* Deletes the document(s) matching the provided query.

2236

2237

* NOTE: if this method hits an OutOfMemoryError

2238

* you should immediately close the writer. See <a

2239

* href="#OOME">above</a> for details.

2240

2241

* @param query the query to identify the documents to be deleted

2242

* @throws CorruptIndexException if the index is corrupt

2243

* @throws IOException if there is a low-level IO error

2244

2245

public void deleteDocuments(Query query) throws CorruptIndexException, IOException {

2246

ensureOpen();

2247

try {

2248

if (docWriter.deleteQuery(query)) {

2249

flush(true, false);

2250

}

2251

} catch (OutOfMemoryError oom) {

2252

handleOOM(oom, "deleteDocuments(Query)");

2253

}

2254

}

2255

2256

/**

2257

* Deletes the document(s) matching any of the provided queries.

2258

* All deletes are flushed at the same time.

2259

2260

* NOTE: if this method hits an OutOfMemoryError

2261

* you should immediately close the writer. See <a

2262

* href="#OOME">above</a> for details.

2263

2264

* @param queries array of queries to identify the documents

2265

* to be deleted

2266

* @throws CorruptIndexException if the index is corrupt

2267

* @throws IOException if there is a low-level IO error

2268

2269

public void deleteDocuments(Query... queries) throws CorruptIndexException, IOException {

2270

ensureOpen();

2271

try {

2272

if (docWriter.deleteQueries(queries)) {

2273

flush(true, false);

2274

}

2275

} catch (OutOfMemoryError oom) {

2276

handleOOM(oom, "deleteDocuments(Query..)");

2277

}

2278

}

2279

2280

/**

2281

* Updates a document by first deleting the document(s)

2282

* containing <code>term</code> and then adding the new

2283

* document. The delete and then add are atomic as seen

2284

* by a reader on the same index (flush may happen only after

2285

* the add).

2286

2287

* NOTE: if this method hits an OutOfMemoryError

2288

* you should immediately close the writer. See <a

2289

* href="#OOME">above</a> for details.

2290

2291

* @param term the term to identify the document(s) to be

2292

* deleted

2293

* @param doc the document to be added

2294

* @throws CorruptIndexException if the index is corrupt

2295

* @throws IOException if there is a low-level IO error

2296

2297

public void updateDocument(Term term, Document doc) throws CorruptIndexException, IOException {

2298

ensureOpen();

2299

updateDocument(term, doc, getAnalyzer());

2300

}

2301

2302

/**

2303

* Updates a document by first deleting the document(s)

2304

* containing <code>term</code> and then adding the new

2305

* document. The delete and then add are atomic as seen

2306

* by a reader on the same index (flush may happen only after

2307

* the add).

2308

2309

* NOTE: if this method hits an OutOfMemoryError

2310

* you should immediately close the writer. See <a

2311

* href="#OOME">above</a> for details.

2312

2313

* @param term the term to identify the document(s) to be

2314

* deleted

2315

* @param doc the document to be added

2316

* @param analyzer the analyzer to use when analyzing the document

2317

* @throws CorruptIndexException if the index is corrupt

2318

* @throws IOException if there is a low-level IO error

2319

2320

public void updateDocument(Term term, Document doc, Analyzer analyzer)

2321

throws CorruptIndexException, IOException {

2322

ensureOpen();

2323

try {

2324

boolean doFlush = false;

2325

boolean success = false;

2326

try {

2327

doFlush = docWriter.updateDocument(doc, analyzer, term);

2328

success = true;

2329

} finally {

2330

if (!success && infoStream != null)

2331

message("hit exception updating document");

2332

}

2333

if (doFlush) {

2334

flush(true, false);

2335

}

2336

} catch (OutOfMemoryError oom) {

2337

handleOOM(oom, "updateDocument");

2338

}

2339

}

2340

2341

// for test purpose

2342

final synchronized int getSegmentCount(){

2343

return segmentInfos.size();

2344

}

2345

2346

// for test purpose

2347

final synchronized int getNumBufferedDocuments(){

2348

return docWriter.getNumDocs();

2349

}

2350

2351

// for test purpose

2352

final synchronized int getDocCount(int i) {

2353

if (i >= 0 && i < segmentInfos.size()) {

2354

return segmentInfos.info(i).docCount;

2355

} else {

2356

return -1;

2357

}

2358

}

2359

2360

// for test purpose

2361

final int getFlushCount() {

2362

return flushCount.get();

2363

}

2364

2365

// for test purpose

2366

final int getFlushDeletesCount() {

2367

return flushDeletesCount.get();

2368

}

2369

2370

final String newSegmentName() {

2371

// Cannot synchronize on IndexWriter because that causes

2372

// deadlock

2373

synchronized(segmentInfos) {

2374

// Important to increment changeCount so that the

2375

// segmentInfos is written on close. Otherwise we

2376

// could close, re-open and re-return the same segment

2377

// name that was previously returned which can cause

2378

// problems at least with ConcurrentMergeScheduler.

2379

changeCount++;

2380

segmentInfos.changed();

2381

return "_" + Integer.toString(segmentInfos.counter++, Character.MAX_RADIX);

2382

}

2383

}

2384

2385

/** If non-null, information about merges will be printed to this.

2386

2387

private PrintStream infoStream;

2388

private static PrintStream defaultInfoStream;

2389

2390

/** This method has been deprecated, as it is horribly

2391

* inefficient and very rarely justified. Lucene's

2392

* multi-segment search performance has improved over

2393

* time, and the default TieredMergePolicy now targets

2394

* segments with deletions.

2395

2396

* @deprecated */

2397

@Deprecated

2398

public void optimize() throws CorruptIndexException, IOException {

2399

forceMerge(1, true);

2400

}

2401

2402

/** This method has been deprecated, as it is horribly

2403

* inefficient and very rarely justified. Lucene's

2404

* multi-segment search performance has improved over

2405

* time, and the default TieredMergePolicy now targets

2406

* segments with deletions.

2407

2408

* @deprecated */

2409

@Deprecated

2410

public void optimize(int maxNumSegments) throws CorruptIndexException, IOException {

2411

forceMerge(maxNumSegments, true);

2412

}

2413

2414

/** This method has been deprecated, as it is horribly

2415

* inefficient and very rarely justified. Lucene's

2416

* multi-segment search performance has improved over

2417

* time, and the default TieredMergePolicy now targets

2418

* segments with deletions.

2419

2420

* @deprecated */

2421

@Deprecated

2422

public void optimize(boolean doWait) throws CorruptIndexException, IOException {

2423

forceMerge(1, doWait);

2424

}

2425

2426

/**

2427

* Forces merge policy to merge segments until there's <=

2428

* maxNumSegments. The actual merges to be

2429

* executed are determined by the {@link MergePolicy}.

2430

2431

* This is a horribly costly operation, especially when

2432

* you pass a small {@code maxNumSegments}; usually you

2433

* should only call this if the index is static (will no

2434

* longer be changed).

2435

2436

* Note that this requires up to 2X the index size free

2437

* space in your Directory (3X if you're using compound

2438

* file format). For example, if your index size is 10 MB

2439

* then you need up to 20 MB free for this to complete (30

2440

* MB if you're using compound file format). Also,

2441

* it's best to call {@link #commit()} afterwards,

2442

* to allow IndexWriter to free up disk space.

2443

2444

* If some but not all readers re-open while merging

2445

* is underway, this will cause > 2X temporary

2446

* space to be consumed as those new readers will then

2447

* hold open the temporary segments at that time. It is

2448

* best not to re-open readers while merging is running.

2449

2450

* The actual temporary usage could be much less than

2451

* these figures (it depends on many factors).

2452

2453

* In general, once the this completes, the total size of the

2454

* index will be less than the size of the starting index.

2455

* It could be quite a bit smaller (if there were many

2456

* pending deletes) or just slightly smaller.

2457

2458

* If an Exception is hit, for example

2459

* due to disk full, the index will not be corrupt and no

2460

* documents will have been lost. However, it may have

2461

* been partially merged (some segments were merged but

2462

* not all), and it's possible that one of the segments in

2463

* the index will be in non-compound format even when

2464

* using compound file format. This will occur when the

2465

* Exception is hit during conversion of the segment into

2466

* compound format.

2467

2468

* This call will merge those segments present in

2469

* the index when the call started. If other threads are

2470

* still adding documents and flushing segments, those

2471

* newly created segments will not be merged unless you

2472

* call forceMerge again.

2473

2474

* NOTE: if this method hits an OutOfMemoryError

2475

* you should immediately close the writer. See <a

2476

* href="#OOME">above</a> for details.

2477

2478

* NOTE: if you call {@link #close(boolean)}

2479

* with <tt>false</tt>, which aborts all running merges,

2480

* then any thread still running this method might hit a

2481

* {@link MergePolicy.MergeAbortedException}.

2482

2483

* @throws CorruptIndexException if the index is corrupt

2484

* @throws IOException if there is a low-level IO error

2485

* @see MergePolicy#findMerges

2486

2487

* @param maxNumSegments maximum number of segments left

2488

* in the index after merging finishes

2489

2490

public void forceMerge(int maxNumSegments) throws CorruptIndexException, IOException {

2491

forceMerge(maxNumSegments, true);

2492

}

2493

2494

/** Just like {@link #forceMerge(int)}, except you can

2495

* specify whether the call should block until

2496

* all merging completes. This is only meaningful with a

2497

* {@link MergeScheduler} that is able to run merges in

2498

* background threads.

2499

2500

* NOTE: if this method hits an OutOfMemoryError

2501

* you should immediately close the writer. See <a

2502

* href="#OOME">above</a> for details.

2503

2504

public void forceMerge(int maxNumSegments, boolean doWait) throws CorruptIndexException, IOException {

2505

ensureOpen();

2506

2507

if (maxNumSegments < 1)

2508

throw new IllegalArgumentException("maxNumSegments must be >= 1; got " + maxNumSegments);

2509

2510

if (infoStream != null) {

2511

message("forceMerge: index now " + segString());

2512

message("now flush at forceMerge");

2513

}

2514

2515

flush(true, true);

2516

2517

synchronized(this) {

2518

resetMergeExceptions();

2519

segmentsToMerge.clear();

2520

for(SegmentInfo info : segmentInfos) {

2521

segmentsToMerge.put(info, Boolean.TRUE);

2522

}

2523

mergeMaxNumSegments = maxNumSegments;

2524

2525

// Now mark all pending & running merges as isMaxNumSegments:

2526

for(final MergePolicy.OneMerge merge : pendingMerges) {

2527

merge.maxNumSegments = maxNumSegments;

2528

segmentsToMerge.put(merge.info, Boolean.TRUE);

2529

}

2530

2531

for ( final MergePolicy.OneMerge merge: runningMerges ) {

2532

merge.maxNumSegments = maxNumSegments;

2533

segmentsToMerge.put(merge.info, Boolean.TRUE);

2534

}

2535

}

2536

2537

maybeMerge(maxNumSegments);

2538

2539

if (doWait) {

2540

synchronized(this) {

2541

while(true) {

2542

2543

if (hitOOM) {

2544

throw new IllegalStateException("this writer hit an OutOfMemoryError; cannot complete forceMerge");

2545

}

2546

2547

if (mergeExceptions.size() > 0) {

2548

// Forward any exceptions in background merge

2549

// threads to the current thread:

2550

final int size = mergeExceptions.size();

2551

for(int i=0;i<size;i++) {

2552

final MergePolicy.OneMerge merge = mergeExceptions.get(i);

2553

if (merge.maxNumSegments != -1) {

2554

IOException err = new IOException("background merge hit exception: " + merge.segString(directory));

2555

final Throwable t = merge.getException();

2556

if (t != null)

2557

err.initCause(t);

2558

throw err;

2559

}

2560

}

2561

}

2562

2563

if (maxNumSegmentsMergesPending())

2564

doWait();

2565

else

2566

break;

2567

}

2568

}

2569

2570

// If close is called while we are still

2571

// running, throw an exception so the calling

2572

// thread will know merging did not

2573

// complete

2574

ensureOpen();

2575

}

2576

2577

// NOTE: in the ConcurrentMergeScheduler case, when

2578

// doWait is false, we can return immediately while

2579

// background threads accomplish the merging

2580

}

2581

2582

/** Returns true if any merges in pendingMerges or

2583

* runningMerges are maxNumSegments merges. */

2584

private synchronized boolean maxNumSegmentsMergesPending() {

2585

for (final MergePolicy.OneMerge merge : pendingMerges) {

2586

if (merge.maxNumSegments != -1)

2587

return true;

2588

}

2589

2590

for (final MergePolicy.OneMerge merge : runningMerges) {

2591

if (merge.maxNumSegments != -1)

2592

return true;

2593

}

2594

2595

return false;

2596

}

2597

2598

/** This method has been deprecated, as it is horribly

2599

* inefficient and very rarely justified. Lucene's

2600

* multi-segment search performance has improved over

2601

* time, and the default TieredMergePolicy now targets

2602

* segments with deletions.

2603

2604

* @deprecated */

2605

@Deprecated

2606

public void expungeDeletes(boolean doWait) throws CorruptIndexException, IOException {

2607

forceMergeDeletes(doWait);

2608

}

2609

2610

/** Just like {@link #forceMergeDeletes()}, except you can

2611

* specify whether the call should block until the

2612

* operation completes. This is only meaningful with a

2613

* {@link MergeScheduler} that is able to run merges in

2614

* background threads.

2615

2616

* NOTE: if this method hits an OutOfMemoryError

2617

* you should immediately close the writer. See <a

2618

* href="#OOME">above</a> for details.

2619

2620

* NOTE: if you call {@link #close(boolean)}

2621

* with <tt>false</tt>, which aborts all running merges,

2622

* then any thread still running this method might hit a

2623

* {@link MergePolicy.MergeAbortedException}.

2624

2625

public void forceMergeDeletes(boolean doWait)

2626

throws CorruptIndexException, IOException {

2627

ensureOpen();

2628

2629

flush(true, true);

2630

2631

if (infoStream != null)

2632

message("forceMergeDeletes: index now " + segString());

2633

2634

MergePolicy.MergeSpecification spec;

2635

2636

synchronized(this) {

2637

spec = mergePolicy.findForcedDeletesMerges(segmentInfos);

2638

if (spec != null) {

2639

final int numMerges = spec.merges.size();

2640

for(int i=0;i<numMerges;i++)

2641

registerMerge(spec.merges.get(i));

2642

}

2643

}

2644

2645

mergeScheduler.merge(this);

2646

2647

if (spec != null && doWait) {

2648

final int numMerges = spec.merges.size();

2649

synchronized(this) {

2650

boolean running = true;

2651

while(running) {

2652

2653

if (hitOOM) {

2654

throw new IllegalStateException("this writer hit an OutOfMemoryError; cannot complete forceMergeDeletes");

2655

}

2656

2657

// Check each merge that MergePolicy asked us to

2658

// do, to see if any of them are still running and

2659

// if any of them have hit an exception.

2660

running = false;

2661

for(int i=0;i<numMerges;i++) {

2662

final MergePolicy.OneMerge merge = spec.merges.get(i);

2663

if (pendingMerges.contains(merge) || runningMerges.contains(merge))

2664

running = true;

2665

Throwable t = merge.getException();

2666

if (t != null) {

2667

IOException ioe = new IOException("background merge hit exception: " + merge.segString(directory));

2668

ioe.initCause(t);

2669

throw ioe;

2670

}

2671

}

2672

2673

// If any of our merges are still running, wait:

2674

if (running)

2675

doWait();

2676

}

2677

}

2678

}

2679

2680

// NOTE: in the ConcurrentMergeScheduler case, when

2681

// doWait is false, we can return immediately while

2682

// background threads accomplish the merging

2683

}

2684

2685

2686

/** This method has been deprecated, as it is horribly

2687

* inefficient and very rarely justified. Lucene's

2688

* multi-segment search performance has improved over

2689

* time, and the default TieredMergePolicy now targets

2690

* segments with deletions.

2691

2692

* @deprecated */

2693

@Deprecated

2694

public void expungeDeletes() throws CorruptIndexException, IOException {

2695

forceMergeDeletes();

2696

}

2697

2698

/**

2699

* Forces merging of all segments that have deleted

2700

* documents. The actual merges to be executed are

2701

* determined by the {@link MergePolicy}. For example,

2702

* the default {@link TieredMergePolicy} will only

2703

* pick a segment if the percentage of

2704

* deleted docs is over 10%.

2705

2706

* This is often a horribly costly operation; rarely

2707

* is it warranted.

2708

2709

* To see how

2710

* many deletions you have pending in your index, call

2711

* {@link IndexReader#numDeletedDocs}.

2712

2713

* NOTE: this method first flushes a new

2714

* segment (if there are indexed documents), and applies

2715

* all buffered deletes.

2716

2717

* NOTE: if this method hits an OutOfMemoryError

2718

* you should immediately close the writer. See <a

2719

* href="#OOME">above</a> for details.

2720

2721

public void forceMergeDeletes() throws CorruptIndexException, IOException {

2722

forceMergeDeletes(true);

2723

}

2724

2725

/**

2726

* Expert: asks the mergePolicy whether any merges are

2727

* necessary now and if so, runs the requested merges and

2728

* then iterate (test again if merges are needed) until no

2729

* more merges are returned by the mergePolicy.

2730

2731

* Explicit calls to maybeMerge() are usually not

2732

* necessary. The most common case is when merge policy

2733

* parameters have changed.

2734

2735

* NOTE: if this method hits an OutOfMemoryError

2736

* you should immediately close the writer. See <a

2737

* href="#OOME">above</a> for details.

2738

2739

public final void maybeMerge() throws CorruptIndexException, IOException {

2740

maybeMerge(-1);

2741

}

2742

2743

private final void maybeMerge(int maxNumSegments) throws CorruptIndexException, IOException {

2744

ensureOpen(false);

2745

updatePendingMerges(maxNumSegments);

2746

mergeScheduler.merge(this);

2747

}

2748

2749

private synchronized void updatePendingMerges(int maxNumSegments)

2750

throws CorruptIndexException, IOException {

2751

assert maxNumSegments == -1 || maxNumSegments > 0;

2752

2753

if (stopMerges) {

2754

return;

2755

}

2756

2757

// Do not start new merges if we've hit OOME

2758

if (hitOOM) {

2759

return;

2760

}

2761

2762

final MergePolicy.MergeSpecification spec;

2763

if (maxNumSegments != -1) {

2764

spec = mergePolicy.findForcedMerges(segmentInfos, maxNumSegments, Collections.unmodifiableMap(segmentsToMerge));

2765

if (spec != null) {

2766

final int numMerges = spec.merges.size();

2767

for(int i=0;i<numMerges;i++) {

2768

final MergePolicy.OneMerge merge = spec.merges.get(i);

2769

merge.maxNumSegments = maxNumSegments;

2770

}

2771

}

2772

2773

} else {

2774

spec = mergePolicy.findMerges(segmentInfos);

2775

}

2776

2777

if (spec != null) {

2778

final int numMerges = spec.merges.size();

2779

for(int i=0;i<numMerges;i++) {

2780

registerMerge(spec.merges.get(i));

2781

}

2782

}

2783

}

2784

2785

/** Expert: to be used by a {@link MergePolicy} to avoid

2786

* selecting merges for segments already being merged.

2787

* The returned collection is not cloned, and thus is

2788

* only safe to access if you hold IndexWriter's lock

2789

* (which you do when IndexWriter invokes the

2790

* MergePolicy).

2791

2792

* Do not alter the returned collection! */

2793

public synchronized Collection<SegmentInfo> getMergingSegments() {

2794

return mergingSegments;

2795

}

2796

2797

/** Expert: the {@link MergeScheduler} calls this method

2798

* to retrieve the next merge requested by the

2799

* MergePolicy

2800

2801

* @lucene.experimental

2802

2803

public synchronized MergePolicy.OneMerge getNextMerge() {

2804

if (pendingMerges.size() == 0)

2805

return null;

2806

else {

2807

// Advance the merge from pending to running

2808

MergePolicy.OneMerge merge = pendingMerges.removeFirst();

2809

runningMerges.add(merge);

2810

return merge;

2811

}

2812

}

2813

2814

/**

2815

* Close the <code>IndexWriter</code> without committing

2816

* any changes that have occurred since the last commit

2817

* (or since it was opened, if commit hasn't been called).

2818

* This removes any temporary files that had been created,

2819

* after which the state of the index will be the same as

2820

* it was when commit() was last called or when this

2821

* writer was first opened. This also clears a previous

2822

* call to {@link #prepareCommit}.

2823

* @throws IOException if there is a low-level IO error

2824

2825

public void rollback() throws IOException {

2826

ensureOpen();

2827

2828

// Ensure that only one thread actually gets to do the closing:

2829

if (shouldClose())

2830

rollbackInternal();

2831

}

2832

2833

private void rollbackInternal() throws IOException {

2834

2835

boolean success = false;

2836

2837

if (infoStream != null ) {

2838

message("rollback");

2839

}

2840

2841

try {

2842

synchronized(this) {

2843

finishMerges(false);

2844

stopMerges = true;

2845

}

2846

2847

if (infoStream != null ) {

2848

message("rollback: done finish merges");

2849

}

2850

2851

// Must pre-close these two, in case they increment

2852

// changeCount so that we can then set it to false

2853

// before calling closeInternal

2854

mergePolicy.close();

2855

mergeScheduler.close();

2856

2857

bufferedDeletesStream.clear();

2858

2859

synchronized(this) {

2860

2861

if (pendingCommit != null) {

2862

pendingCommit.rollbackCommit(directory);

2863

deleter.decRef(pendingCommit);

2864

pendingCommit = null;

2865

notifyAll();

2866

}

2867

2868

// Keep the same segmentInfos instance but replace all

2869

// of its SegmentInfo instances. This is so the next

2870

// attempt to commit using this instance of IndexWriter

2871

// will always write to a new generation ("write

2872

// once").

2873

segmentInfos.rollbackSegmentInfos(rollbackSegments);

2874

if (infoStream != null ) {

2875

message("rollback: infos=" + segString(segmentInfos));

2876

}

2877

2878

docWriter.abort();

2879

2880

assert testPoint("rollback before checkpoint");

2881

2882

// Ask deleter to locate unreferenced files & remove

2883

// them:

2884

deleter.checkpoint(segmentInfos, false);

2885

deleter.refresh();

2886

}

2887

2888

// Don't bother saving any changes in our segmentInfos

2889

readerPool.clear(null);

2890

2891

lastCommitChangeCount = changeCount;

2892

2893

success = true;

2894

} catch (OutOfMemoryError oom) {

2895

handleOOM(oom, "rollbackInternal");

2896

} finally {

2897

synchronized(this) {

2898

if (!success) {

2899

closing = false;

2900

notifyAll();

2901

if (infoStream != null)

2902

message("hit exception during rollback");

2903

}

2904

}

2905

}

2906

2907

closeInternal(false);

2908

}

2909

2910

/**

2911

* Delete all documents in the index.

2912

2913

* This method will drop all buffered documents and will

2914

* remove all segments from the index. This change will not be

2915

* visible until a {@link #commit()} has been called. This method

2916

* can be rolled back using {@link #rollback()}.

2917

2918

* NOTE: this method is much faster than using deleteDocuments( new MatchAllDocsQuery() ).

2919

2920

* NOTE: this method will forcefully abort all merges

2921

* in progress. If other threads are running {@link

2922

* #forceMerge}, {@link #addIndexes(IndexReader[])} or

2923

* {@link #forceMergeDeletes} methods, they may receive

2924

* {@link MergePolicy.MergeAbortedException}s.

2925

2926

public synchronized void deleteAll() throws IOException {

2927

ensureOpen();

2928

try {

2929

2930

// Abort any running merges

2931

finishMerges(false);

2932

2933

// Remove any buffered docs

2934

docWriter.abort();

2935

2936

// Remove all segments

2937

segmentInfos.clear();

2938

2939

// Ask deleter to locate unreferenced files & remove them:

2940

deleter.checkpoint(segmentInfos, false);

2941

deleter.refresh();

2942

2943

// Don't bother saving any changes in our segmentInfos

2944

readerPool.dropAll();

2945

2946

// Mark that the index has changed

2947

++changeCount;

2948

segmentInfos.changed();

2949

} catch (OutOfMemoryError oom) {

2950

handleOOM(oom, "deleteAll");

2951

} finally {

2952

if (infoStream != null) {

2953

message("hit exception during deleteAll");

2954

}

2955

}

2956

}

2957

2958

private synchronized void finishMerges(boolean waitForMerges) throws IOException {

2959

if (!waitForMerges) {

2960

2961

stopMerges = true;

2962

2963

// Abort all pending & running merges:

2964

for (final MergePolicy.OneMerge merge : pendingMerges) {

2965

if (infoStream != null)

2966

message("now abort pending merge " + merge.segString(directory));

2967

merge.abort();

2968

mergeFinish(merge);

2969

}

2970

pendingMerges.clear();

2971

2972

for (final MergePolicy.OneMerge merge : runningMerges) {

2973

if (infoStream != null)

2974

message("now abort running merge " + merge.segString(directory));

2975

merge.abort();

2976

}

2977

2978

// These merges periodically check whether they have

2979

// been aborted, and stop if so. We wait here to make

2980

// sure they all stop. It should not take very long

2981

// because the merge threads periodically check if

2982

// they are aborted.

2983

while(runningMerges.size() > 0) {

2984

if (infoStream != null)

2985

message("now wait for " + runningMerges.size() + " running merge to abort");

2986

doWait();

2987

}

2988

2989

stopMerges = false;

2990

notifyAll();

2991

2992

assert 0 == mergingSegments.size();

2993

2994

if (infoStream != null)

2995

message("all running merges have aborted");

2996

2997

} else {

2998

// waitForMerges() will ensure any running addIndexes finishes.

2999

// It's fine if a new one attempts to start because from our

3000

// caller above the call will see that we are in the

3001

// process of closing, and will throw an

3002

// AlreadyClosedException.

3003

waitForMerges();

3004

}

3005

}

3006

3007

/**

3008

* Wait for any currently outstanding merges to finish.

3009

3010

* It is guaranteed that any merges started prior to calling this method

3011

* will have completed once this method completes.

3012

3013

public synchronized void waitForMerges() {

3014

ensureOpen(false);

3015

if (infoStream != null) {

3016

message("waitForMerges");

3017

}

3018

while(pendingMerges.size() > 0 || runningMerges.size() > 0) {

3019

doWait();

3020

}

3021

3022

// sanity check

3023

assert 0 == mergingSegments.size();

3024

3025

if (infoStream != null) {

3026

message("waitForMerges done");

3027

}

3028

}

3029

3030

/**

3031

* Called whenever the SegmentInfos has been updated and

3032

* the index files referenced exist (correctly) in the

3033

* index directory.

3034

3035

synchronized void checkpoint() throws IOException {

3036

changeCount++;

3037

segmentInfos.changed();

3038

deleter.checkpoint(segmentInfos, false);

3039

}

3040

3041

private synchronized void resetMergeExceptions() {

3042

mergeExceptions = new ArrayList<MergePolicy.OneMerge>();

3043

mergeGen++;

3044

}

3045

3046

private void noDupDirs(Directory... dirs) {

3047

HashSet<Directory> dups = new HashSet<Directory>();

3048

for (Directory dir : dirs) {

3049

if (dups.contains(dir))

3050

throw new IllegalArgumentException("Directory " + dir + " appears more than once");

3051

if (dir == directory)

3052

throw new IllegalArgumentException("Cannot add directory to itself");

3053

dups.add(dir);

3054

}

3055

}

3056

3057

/**

3058

* @deprecated use {@link #addIndexes(Directory...)} instead

3059

3060

@Deprecated

3061

public void addIndexesNoOptimize(Directory... dirs)

3062

throws CorruptIndexException, IOException {

3063

addIndexes(dirs);

3064

}

3065

3066

/**

3067

* Adds all segments from an array of indexes into this index.

3068

3069

* This may be used to parallelize batch indexing. A large document

3070

* collection can be broken into sub-collections. Each sub-collection can be

3071

* indexed in parallel, on a different thread, process or machine. The

3072

* complete index can then be created by merging sub-collection indexes

3073

* with this method.

3074

3075

*

3076

* NOTE: the index in each {@link Directory} must not be

3077

* changed (opened by a writer) while this method is

3078

* running. This method does not acquire a write lock in

3079

* each input Directory, so it is up to the caller to

3080

* enforce this.

3081

3082

* This method is transactional in how Exceptions are

3083

* handled: it does not commit a new segments_N file until

3084

* all indexes are added. This means if an Exception

3085

* occurs (for example disk full), then either no indexes

3086

* will have been added or they all will have been.

3087

3088

* Note that this requires temporary free space in the

3089

* {@link Directory} up to 2X the sum of all input indexes

3090

* (including the starting index). If readers/searchers

3091

* are open against the starting index, then temporary

3092

* free space required will be higher by the size of the

3093

* starting index (see {@link #forceMerge(int)} for details).

3094

3095

*

3096

* NOTE: this method only copies the segments of the incomning indexes

3097

* and does not merge them. Therefore deleted documents are not removed and

3098

* the new segments are not merged with the existing ones. Also, if the merge

3099

* policy allows compound files, then any segment that is not compound is

3100

* converted to such. However, if the segment is compound, it is copied as-is

3101

* even if the merge policy does not allow compound files.

3102

3103

*

3104

* This requires this index not be among those to be added.

3105

3106

*

3107

* NOTE: if this method hits an OutOfMemoryError

3108

* you should immediately close the writer. See <a

3109

* href="#OOME">above</a> for details.

3110

3111

* @throws CorruptIndexException if the index is corrupt

3112

* @throws IOException if there is a low-level IO error

3113

3114

public void addIndexes(Directory... dirs) throws CorruptIndexException, IOException {

3115

ensureOpen();

3116

3117

noDupDirs(dirs);

3118

3119

try {

3120

if (infoStream != null)

3121

message("flush at addIndexes(Directory...)");

3122

flush(false, true);

3123

3124

int docCount = 0;

3125

List<SegmentInfo> infos = new ArrayList<SegmentInfo>();

3126

Comparator<String> versionComparator = StringHelper.getVersionComparator();

3127

for (Directory dir : dirs) {

3128

if (infoStream != null) {

3129

message("addIndexes: process directory " + dir);

3130

}

3131

SegmentInfos sis = new SegmentInfos(); // read infos from dir

3132

sis.read(dir);

3133

final Set<String> dsFilesCopied = new HashSet<String>();

3134

final Map<String, String> dsNames = new HashMap<String, String>();

3135

for (SegmentInfo info : sis) {

3136

assert !infos.contains(info): "dup info dir=" + info.dir + " name=" + info.name;

3137

3138

docCount += info.docCount;

3139

String newSegName = newSegmentName();

3140

String dsName = info.getDocStoreSegment();

3141

3142

if (infoStream != null) {

3143

message("addIndexes: process segment origName=" + info.name + " newName=" + newSegName + " dsName=" + dsName + " info=" + info);

3144

}

3145

3146

// create CFS only if the source segment is not CFS, and MP agrees it

3147

// should be CFS.

3148

boolean createCFS;

3149

synchronized (this) { // Guard segmentInfos

3150

createCFS = !info.getUseCompoundFile()

3151

&& mergePolicy.useCompoundFile(segmentInfos, info)

3152

// optimize case only for segments that don't share doc stores

3153

&& versionComparator.compare(info.getVersion(), "3.1") >= 0;

3154

}

3155

3156

if (createCFS) {

3157

copySegmentIntoCFS(info, newSegName);

3158

} else {

3159

copySegmentAsIs(info, newSegName, dsNames, dsFilesCopied);

3160

}

3161

infos.add(info);

3162

}

3163

}

3164

3165

synchronized (this) {

3166

ensureOpen();

3167

segmentInfos.addAll(infos);

3168

checkpoint();

3169

}

3170

3171

} catch (OutOfMemoryError oom) {

3172

handleOOM(oom, "addIndexes(Directory...)");

3173

}

3174

}

3175

3176

/**

3177

* Merges the provided indexes into this index. This method is useful

3178

* if you use extensions of {@link IndexReader}. Otherwise, using

3179

* {@link #addIndexes(Directory...)} is highly recommended for performance

3180

* reasons. It uses the {@link MergeScheduler} and {@link MergePolicy} set

3181

* on this writer, which may perform merges in parallel.

3182

3183

* The provided IndexReaders are not closed.

3184

3185

* NOTE: this method does not merge the current segments,

3186

* only the incoming ones.

3187

3188

* See {@link #addIndexes(Directory...)} for details on transactional

3189

* semantics, temporary free space required in the Directory,

3190

* and non-CFS segments on an Exception.

3191

3192

* NOTE: if this method hits an OutOfMemoryError

3193

* you should immediately close the writer. See <a

3194

* href="#OOME">above</a> for details.

3195

3196

* NOTE: if you call {@link #close(boolean)}

3197

* with <tt>false</tt>, which aborts all running merges,

3198

* then any thread still running this method might hit a

3199

* {@link MergePolicy.MergeAbortedException}.

3200

3201

* @throws CorruptIndexException if the index is corrupt

3202

* @throws IOException if there is a low-level IO error

3203

3204

public void addIndexes(IndexReader... readers) throws CorruptIndexException, IOException {

3205

3206

ensureOpen();

3207

3208

try {

3209

if (infoStream != null)

3210

message("flush at addIndexes(IndexReader...)");

3211

flush(false, true);

3212

3213

String mergedName = newSegmentName();

3214

// TODO: somehow we should fix this merge so it's

3215

// abortable so that IW.close(false) is able to stop it

3216

SegmentMerger merger = new SegmentMerger(directory, config.getTermIndexInterval(),

3217

mergedName, null, payloadProcessorProvider,

3218

((FieldInfos) docWriter.getFieldInfos().clone()));

3219

3220

for (IndexReader reader : readers) // add new indexes

3221

merger.add(reader);

3222

3223

int docCount = merger.merge(); // merge 'em

3224

3225

SegmentInfo info = new SegmentInfo(mergedName, docCount, directory,

3226

false, true,

3227

merger.fieldInfos().hasProx(),

3228

merger.fieldInfos().hasVectors());

3229

setDiagnostics(info, "addIndexes(IndexReader...)");

3230

3231

boolean useCompoundFile;

3232

synchronized(this) { // Guard segmentInfos

3233

if (stopMerges) {

3234

deleter.deleteNewFiles(info.files());

3235

return;

3236

}

3237

ensureOpen();

3238

useCompoundFile = mergePolicy.useCompoundFile(segmentInfos, info);

3239

}

3240

3241

// Now create the compound file if needed

3242

if (useCompoundFile) {

3243

merger.createCompoundFile(mergedName + ".cfs", info);

3244

3245

// delete new non cfs files directly: they were never

3246

// registered with IFD

3247

synchronized(this) {

3248

deleter.deleteNewFiles(info.files());

3249

}

3250

info.setUseCompoundFile(true);

3251

}

3252

3253

// Register the new segment

3254

synchronized(this) {

3255

if (stopMerges) {

3256

deleter.deleteNewFiles(info.files());

3257

return;

3258

}

3259

ensureOpen();

3260

segmentInfos.add(info);

3261

checkpoint();

3262

}

3263

3264

} catch (OutOfMemoryError oom) {

3265

handleOOM(oom, "addIndexes(IndexReader...)");

3266

}

3267

}

3268

3269

/** Copies the segment into the IndexWriter's directory, as a compound segment. */

3270

private void copySegmentIntoCFS(SegmentInfo info, String segName) throws IOException {

3271

String segFileName = IndexFileNames.segmentFileName(segName, IndexFileNames.COMPOUND_FILE_EXTENSION);

3272

Collection<String> files = info.files();

3273

CompoundFileWriter cfsWriter = new CompoundFileWriter(directory, segFileName);

3274

for (String file : files) {

3275

String newFileName = segName + IndexFileNames.stripSegmentName(file);

3276

if (!IndexFileNames.matchesExtension(file, IndexFileNames.DELETES_EXTENSION)

3277

&& !IndexFileNames.isSeparateNormsFile(file)) {

3278

cfsWriter.addFile(file, info.dir);

3279

} else {

3280

assert !directory.fileExists(newFileName): "file \"" + newFileName + "\" already exists";

3281

info.dir.copy(directory, file, newFileName);

3282

}

3283

}

3284

3285

// Create the .cfs

3286

cfsWriter.close();

3287

3288

info.dir = directory;

3289

info.name = segName;

3290

info.setUseCompoundFile(true);

3291

}

3292

3293

/** Copies the segment files as-is into the IndexWriter's directory. */

3294

private void copySegmentAsIs(SegmentInfo info, String segName,

3295

Map<String, String> dsNames, Set<String> dsFilesCopied)

3296

throws IOException {

3297

// Determine if the doc store of this segment needs to be copied. It's

3298

// only relevant for segments that share doc store with others,

3299

// because the DS might have been copied already, in which case we

3300

// just want to update the DS name of this SegmentInfo.

3301

// NOTE: pre-3x segments include a null DSName if they don't share doc

3302

// store. The following code ensures we don't accidentally insert

3303

// 'null' to the map.

3304

String dsName = info.getDocStoreSegment();

3305

final String newDsName;

3306

if (dsName != null) {

3307

if (dsNames.containsKey(dsName)) {

3308

newDsName = dsNames.get(dsName);

3309

} else {

3310

dsNames.put(dsName, segName);

3311

newDsName = segName;

3312

}

3313

} else {

3314

newDsName = segName;

3315

}

3316

3317

// Copy the segment files

3318

for (String file: info.files()) {

3319

final String newFileName;

3320

if (IndexFileNames.isDocStoreFile(file)) {

3321

newFileName = newDsName + IndexFileNames.stripSegmentName(file);

3322

if (dsFilesCopied.contains(newFileName)) {

3323

continue;

3324

}

3325

dsFilesCopied.add(newFileName);

3326

} else {

3327

newFileName = segName + IndexFileNames.stripSegmentName(file);

3328

}

3329

3330

assert !directory.fileExists(newFileName): "file \"" + newFileName + "\" already exists";

3331

info.dir.copy(directory, file, newFileName);

3332

}

3333

3334

info.setDocStore(info.getDocStoreOffset(), newDsName, info.getDocStoreIsCompoundFile());

3335

info.dir = directory;

3336

info.name = segName;

3337

}

3338

3339

/**

3340

* A hook for extending classes to execute operations after pending added and

3341

* deleted documents have been flushed to the Directory but before the change

3342

* is committed (new segments_N file written).

3343

3344

protected void doAfterFlush() throws IOException {}

3345

3346

/**

3347

* A hook for extending classes to execute operations before pending added and

3348

* deleted documents are flushed to the Directory.

3349

3350

protected void doBeforeFlush() throws IOException {}

3351

3352

/** Expert: prepare for commit.

3353

3354

* NOTE: if this method hits an OutOfMemoryError

3355

* you should immediately close the writer. See <a

3356

* href="#OOME">above</a> for details.

3357

3358

* @see #prepareCommit(Map) */

3359

public final void prepareCommit() throws CorruptIndexException, IOException {

3360

ensureOpen();

3361

prepareCommit(null);

3362

}

3363

3364

/** Expert: prepare for commit, specifying

3365

* commitUserData Map (String -> String). This does the

3366

* first phase of 2-phase commit. This method does all

3367

* steps necessary to commit changes since this writer

3368

* was opened: flushes pending added and deleted docs,

3369

* syncs the index files, writes most of next segments_N

3370

* file. After calling this you must call either {@link

3371

* #commit()} to finish the commit, or {@link

3372

* #rollback()} to revert the commit and undo all changes

3373

* done since the writer was opened.

3374

3375

* You can also just call {@link #commit(Map)} directly

3376

* without prepareCommit first in which case that method

3377

* will internally call prepareCommit.

3378

3379

* NOTE: if this method hits an OutOfMemoryError

3380

* you should immediately close the writer. See <a

3381

* href="#OOME">above</a> for details.

3382

3383

* @param commitUserData Opaque Map (String->String)

3384

* that's recorded into the segments file in the index,

3385

* and retrievable by {@link

3386

* IndexReader#getCommitUserData}. Note that when

3387

* IndexWriter commits itself during {@link #close}, the

3388

* commitUserData is unchanged (just carried over from

3389

* the prior commit). If this is null then the previous

3390

* commitUserData is kept. Also, the commitUserData will

3391

* only "stick" if there are actually changes in the

3392

* index to commit.

3393

3394

public final void prepareCommit(Map<String, String> commitUserData)

3395

throws CorruptIndexException, IOException {

3396

ensureOpen(false);

3397

3398

if (hitOOM) {

3399

throw new IllegalStateException(

3400

"this writer hit an OutOfMemoryError; cannot commit");

3401

}

3402

3403

if (pendingCommit != null)

3404

throw new IllegalStateException(

3405

"prepareCommit was already called with no corresponding call to commit");

3406

3407

if (infoStream != null)

3408

message("prepareCommit: flush");

3409

3410

ensureOpen(false);

3411

boolean anySegmentsFlushed = false;

3412

SegmentInfos toCommit = null;

3413

boolean success = false;

3414

try {

3415

try {

3416

synchronized (this) {

3417

anySegmentsFlushed = doFlush(true);

3418

readerPool.commit(segmentInfos);

3419

toCommit = (SegmentInfos) segmentInfos.clone();

3420

pendingCommitChangeCount = changeCount;

3421

// This protects the segmentInfos we are now going

3422

// to commit. This is important in case, eg, while

3423

// we are trying to sync all referenced files, a

3424

// merge completes which would otherwise have

3425

// removed the files we are now syncing.

3426

deleter.incRef(toCommit, false);

3427

}

3428

success = true;

3429

} finally {

3430

if (!success && infoStream != null) {

3431

message("hit exception during prepareCommit");

3432

}

3433

doAfterFlush();

3434

}

3435

} catch (OutOfMemoryError oom) {

3436

handleOOM(oom, "prepareCommit");

3437

}

3438

3439

success = false;

3440

try {

3441

if (anySegmentsFlushed) {

3442

maybeMerge();

3443

}

3444

success = true;

3445

} finally {

3446

if (!success) {

3447

synchronized (this) {

3448

deleter.decRef(toCommit);

3449

}

3450

}

3451

}

3452

3453

startCommit(toCommit, commitUserData);

3454

}

3455

3456

// Used only by commit, below; lock order is commitLock -> IW

3457

private final Object commitLock = new Object();

3458

3459

/**

3460

* Commits all pending changes (added & deleted

3461

* documents, segment merges, added

3462

* indexes, etc.) to the index, and syncs all referenced

3463

* index files, such that a reader will see the changes

3464

* and the index updates will survive an OS or machine

3465

* crash or power loss. Note that this does not wait for

3466

* any running background merges to finish. This may be a

3467

* costly operation, so you should test the cost in your

3468

* application and do it only when really necessary.

3469

3470

* Note that this operation calls Directory.sync on

3471

* the index files. That call should not return until the

3472

* file contents & metadata are on stable storage. For

3473

* FSDirectory, this calls the OS's fsync. But, beware:

3474

* some hardware devices may in fact cache writes even

3475

* during fsync, and return before the bits are actually

3476

* on stable storage, to give the appearance of faster

3477

* performance. If you have such a device, and it does

3478

* not have a battery backup (for example) then on power

3479

* loss it may still lose data. Lucene cannot guarantee

3480

* consistency on such devices.

3481

3482

* NOTE: if this method hits an OutOfMemoryError

3483

* you should immediately close the writer. See <a

3484

* href="#OOME">above</a> for details.

3485

3486

* @see #prepareCommit

3487

* @see #commit(Map)

3488

3489

public final void commit() throws CorruptIndexException, IOException {

3490

commit(null);

3491

}

3492

3493

/** Commits all changes to the index, specifying a

3494

* commitUserData Map (String -> String). This just

3495

* calls {@link #prepareCommit(Map)} (if you didn't

3496

* already call it) and then {@link #finishCommit}.

3497

3498

* NOTE: if this method hits an OutOfMemoryError

3499

* you should immediately close the writer. See <a

3500

* href="#OOME">above</a> for details.

3501

3502

public final void commit(Map<String,String> commitUserData) throws CorruptIndexException, IOException {

3503

3504

ensureOpen();

3505

3506

commitInternal(commitUserData);

3507

}

3508

3509

private final void commitInternal(Map<String,String> commitUserData) throws CorruptIndexException, IOException {

3510

3511

if (infoStream != null) {

3512

message("commit: start");

3513

}

3514

3515

synchronized(commitLock) {

3516

if (infoStream != null) {

3517

message("commit: enter lock");

3518

}

3519

3520

if (pendingCommit == null) {

3521

if (infoStream != null) {

3522

message("commit: now prepare");

3523

}

3524

prepareCommit(commitUserData);

3525

} else if (infoStream != null) {

3526

message("commit: already prepared");

3527

}

3528

3529

finishCommit();

3530

}

3531

}

3532

3533

private synchronized final void finishCommit() throws CorruptIndexException, IOException {

3534

3535

if (pendingCommit != null) {

3536

try {

3537

if (infoStream != null)

3538

message("commit: pendingCommit != null");

3539

pendingCommit.finishCommit(directory);

3540

if (infoStream != null)

3541

message("commit: wrote segments file \"" + pendingCommit.getCurrentSegmentFileName() + "\"");

3542

lastCommitChangeCount = pendingCommitChangeCount;

3543

segmentInfos.updateGeneration(pendingCommit);

3544

segmentInfos.setUserData(pendingCommit.getUserData());

3545

rollbackSegments = pendingCommit.createBackupSegmentInfos(true);

3546

deleter.checkpoint(pendingCommit, true);

3547

} finally {

3548

// Matches the incRef done in startCommit:

3549

deleter.decRef(pendingCommit);

3550

pendingCommit = null;

3551

notifyAll();

3552

}

3553

3554

} else if (infoStream != null) {

3555

message("commit: pendingCommit == null; skip");

3556

}

3557

3558

if (infoStream != null) {

3559

message("commit: done");

3560

}

3561

}

3562

3563

/** NOTE: flushDocStores is ignored now (hardwired to

3564

* true); this method is only here for backwards

3565

* compatibility */

3566

protected final void flush(boolean triggerMerge, boolean flushDocStores, boolean flushDeletes) throws CorruptIndexException, IOException {

3567

flush(triggerMerge, flushDeletes);

3568

}

3569

3570

/**

3571

* Flush all in-memory buffered updates (adds and deletes)

3572

* to the Directory.

3573

* @param triggerMerge if true, we may merge segments (if

3574

* deletes or docs were flushed) if necessary

3575

* @param applyAllDeletes whether pending deletes should also

3576

3577

protected final void flush(boolean triggerMerge, boolean applyAllDeletes) throws CorruptIndexException, IOException {

3578

3579

// NOTE: this method cannot be sync'd because

3580

// maybeMerge() in turn calls mergeScheduler.merge which

3581

// in turn can take a long time to run and we don't want

3582

// to hold the lock for that. In the case of

3583

// ConcurrentMergeScheduler this can lead to deadlock

3584

// when it stalls due to too many running merges.

3585

3586

// We can be called during close, when closing==true, so we must pass false to ensureOpen:

3587

ensureOpen(false);

3588

if (doFlush(applyAllDeletes) && triggerMerge) {

3589

maybeMerge();

3590

}

3591

}

3592

3593

// TODO: this method should not have to be entirely

3594

// synchronized, ie, merges should be allowed to commit

3595

// even while a flush is happening

3596

private synchronized boolean doFlush(boolean applyAllDeletes) throws CorruptIndexException, IOException {

3597

3598

if (hitOOM) {

3599

throw new IllegalStateException("this writer hit an OutOfMemoryError; cannot flush");

3600

}

3601

3602

doBeforeFlush();

3603

3604

assert testPoint("startDoFlush");

3605

3606

// We may be flushing because it was triggered by doc

3607

// count, del count, ram usage (in which case flush

3608

// pending is already set), or we may be flushing

3609

// due to external event eg getReader or commit is

3610

// called (in which case we now set it, and this will

3611

// pause all threads):

3612

flushControl.setFlushPendingNoWait("explicit flush");

3613

3614

boolean success = false;

3615

3616

try {

3617

3618

if (infoStream != null) {

3619

message(" start flush: applyAllDeletes=" + applyAllDeletes);

3620

message(" index before flush " + segString());

3621

}

3622

3623

final SegmentInfo newSegment = docWriter.flush(this, deleter, mergePolicy, segmentInfos);

3624

if (newSegment != null) {

3625

setDiagnostics(newSegment, "flush");

3626

segmentInfos.add(newSegment);

3627

checkpoint();

3628

}

3629

3630

if (!applyAllDeletes) {

3631

// If deletes alone are consuming > 1/2 our RAM

3632

// buffer, force them all to apply now. This is to

3633

// prevent too-frequent flushing of a long tail of

3634

// tiny segments:

3635

if (flushControl.getFlushDeletes() ||

3636

(config.getRAMBufferSizeMB() != IndexWriterConfig.DISABLE_AUTO_FLUSH &&

3637

bufferedDeletesStream.bytesUsed() > (1024*1024*config.getRAMBufferSizeMB()/2))) {

3638

applyAllDeletes = true;

3639

if (infoStream != null) {

3640

message("force apply deletes bytesUsed=" + bufferedDeletesStream.bytesUsed() + " vs ramBuffer=" + (1024*1024*config.getRAMBufferSizeMB()));

3641

}

3642

}

3643

}

3644

3645

if (applyAllDeletes) {

3646

if (infoStream != null) {

3647

message("apply all deletes during flush");

3648

}

3649

3650

flushDeletesCount.incrementAndGet();

3651

final BufferedDeletesStream.ApplyDeletesResult result = bufferedDeletesStream

3652

.applyDeletes(readerPool, segmentInfos.asList());

3653

if (result.anyDeletes) {

3654

checkpoint();

3655

}

3656

if (!keepFullyDeletedSegments && result.allDeleted != null) {

3657

if (infoStream != null) {

3658

message("drop 100% deleted segments: " + result.allDeleted);

3659

}

3660

for (SegmentInfo info : result.allDeleted) {

3661

// If a merge has already registered for this

3662

// segment, we leave it in the readerPool; the

3663

// merge will skip merging it and will then drop

3664

// it once it's done:

3665

if (!mergingSegments.contains(info)) {

3666

segmentInfos.remove(info);

3667

if (readerPool != null) {

3668

readerPool.drop(info);

3669

}

3670

}

3671

}

3672

checkpoint();

3673

}

3674

bufferedDeletesStream.prune(segmentInfos);

3675

3676

assert !bufferedDeletesStream.any();

3677

flushControl.clearDeletes();

3678

} else if (infoStream != null) {

3679

message("don't apply deletes now delTermCount=" + bufferedDeletesStream.numTerms() + " bytesUsed=" + bufferedDeletesStream.bytesUsed());

3680

}

3681

3682

3683

doAfterFlush();

3684

flushCount.incrementAndGet();

3685

3686

success = true;

3687

3688

return newSegment != null;

3689

3690

} catch (OutOfMemoryError oom) {

3691

handleOOM(oom, "doFlush");

3692

// never hit

3693

return false;

3694

} finally {

3695

flushControl.clearFlushPending();

3696

if (!success && infoStream != null)

3697

message("hit exception during flush");

3698

}

3699

}

3700

3701

/** Expert: Return the total size of all index files currently cached in memory.

3702

* Useful for size management with flushRamDocs()

3703

3704

public final long ramSizeInBytes() {

3705

ensureOpen();

3706

return docWriter.bytesUsed() + bufferedDeletesStream.bytesUsed();

3707

}

3708

3709

/** Expert: Return the number of documents currently

3710

* buffered in RAM. */

3711

public final synchronized int numRamDocs() {

3712

ensureOpen();

3713

return docWriter.getNumDocs();

3714

}

3715

3716

private void ensureValidMerge(MergePolicy.OneMerge merge) throws IOException {

3717

for(SegmentInfo info : merge.segments) {

3718

if (!segmentInfos.contains(info)) {

3719

throw new MergePolicy.MergeException("MergePolicy selected a segment (" + info.name + ") that is not in the current index " + segString(), directory);

3720

}

3721

}

3722

}

3723

3724

/** Carefully merges deletes for the segments we just

3725

* merged. This is tricky because, although merging will

3726

* clear all deletes (compacts the documents), new

3727

* deletes may have been flushed to the segments since

3728

* the merge was started. This method "carries over"

3729

* such new deletes onto the newly merged segment, and

3730

* saves the resulting deletes file (incrementing the

3731

* delete generation for merge.info). If no deletes were

3732

* flushed, no new deletes file is saved. */

3733

synchronized private void commitMergedDeletes(MergePolicy.OneMerge merge, SegmentReader mergedReader) throws IOException {

3734

3735

assert testPoint("startCommitMergeDeletes");

3736

3737

final List<SegmentInfo> sourceSegments = merge.segments;

3738

3739

if (infoStream != null)

3740

message("commitMergeDeletes " + merge.segString(directory));

3741

3742

// Carefully merge deletes that occurred after we

3743

// started merging:

3744

int docUpto = 0;

3745

int delCount = 0;

3746

long minGen = Long.MAX_VALUE;

3747

3748

for(int i=0; i < sourceSegments.size(); i++) {

3749

SegmentInfo info = sourceSegments.get(i);

3750

minGen = Math.min(info.getBufferedDeletesGen(), minGen);

3751

int docCount = info.docCount;

3752

final SegmentReader previousReader = merge.readerClones.get(i);

3753

if (previousReader == null) {

3754

// Reader was skipped because it was 100% deletions

3755

continue;

3756

}

3757

final SegmentReader currentReader = merge.readers.get(i);

3758

if (previousReader.hasDeletions()) {

3759

3760

// There were deletes on this segment when the merge

3761

// started. The merge has collapsed away those

3762

// deletes, but, if new deletes were flushed since

3763

// the merge started, we must now carefully keep any

3764

// newly flushed deletes but mapping them to the new

3765

// docIDs.

3766

3767

if (currentReader.numDeletedDocs() > previousReader.numDeletedDocs()) {

3768

// This means this segment has had new deletes

3769

// committed since we started the merge, so we

3770

// must merge them:

3771

for(int j=0;j<docCount;j++) {

3772

if (previousReader.isDeleted(j))

3773

assert currentReader.isDeleted(j);

3774

else {

3775

if (currentReader.isDeleted(j)) {

3776

mergedReader.doDelete(docUpto);

3777

delCount++;

3778

}

3779

docUpto++;

3780

}

3781

}

3782

} else {

3783

docUpto += docCount - previousReader.numDeletedDocs();

3784

}

3785

} else if (currentReader.hasDeletions()) {

3786

// This segment had no deletes before but now it

3787

// does:

3788

for(int j=0; j<docCount; j++) {

3789

if (currentReader.isDeleted(j)) {

3790

mergedReader.doDelete(docUpto);

3791

delCount++;

3792

}

3793

docUpto++;

3794

}

3795

} else

3796

// No deletes before or after

3797

docUpto += info.docCount;

3798

}

3799

3800

assert mergedReader.numDeletedDocs() == delCount;

3801

3802

mergedReader.hasChanges = delCount > 0;

3803

3804

// If new deletes were applied while we were merging

3805

// (which happens if eg commit() or getReader() is

3806

// called during our merge), then it better be the case

3807

// that the delGen has increased for all our merged

3808

// segments:

3809

assert !mergedReader.hasChanges || minGen > mergedReader.getSegmentInfo().getBufferedDeletesGen();

3810

3811

mergedReader.getSegmentInfo().setBufferedDeletesGen(minGen);

3812

}

3813

3814

synchronized private boolean commitMerge(MergePolicy.OneMerge merge, SegmentReader mergedReader) throws IOException {

3815

3816

assert testPoint("startCommitMerge");

3817

3818

if (hitOOM) {

3819

throw new IllegalStateException("this writer hit an OutOfMemoryError; cannot complete merge");

3820

}

3821

3822

if (infoStream != null)

3823

message("commitMerge: " + merge.segString(directory) + " index=" + segString());

3824

3825

assert merge.registerDone;

3826

3827

// If merge was explicitly aborted, or, if rollback() or

3828

// rollbackTransaction() had been called since our merge

3829

// started (which results in an unqualified

3830

// deleter.refresh() call that will remove any index

3831

// file that current segments does not reference), we

3832

// abort this merge

3833

if (merge.isAborted()) {

3834

if (infoStream != null)

3835

message("commitMerge: skipping merge " + merge.segString(directory) + ": it was aborted");

3836

return false;

3837

}

3838

3839

commitMergedDeletes(merge, mergedReader);

3840

3841

// If the doc store we are using has been closed and

3842

// is in now compound format (but wasn't when we

3843

// started), then we will switch to the compound

3844

// format as well:

3845

3846

assert !segmentInfos.contains(merge.info);

3847

3848

final boolean allDeleted = mergedReader.numDocs() == 0;

3849

3850

if (infoStream != null && allDeleted) {

3851

message("merged segment " + merge.info + " is 100% deleted" + (keepFullyDeletedSegments ? "" : "; skipping insert"));

3852

}

3853

3854

final boolean dropSegment = allDeleted && !keepFullyDeletedSegments;

3855

segmentInfos.applyMergeChanges(merge, dropSegment);

3856

3857

if (dropSegment) {

3858

readerPool.drop(merge.info);

3859

}

3860

3861

if (infoStream != null) {

3862

message("after commit: " + segString());

3863

}

3864

3865

closeMergeReaders(merge, false);

3866

3867

// Must note the change to segmentInfos so any commits

3868

// in-flight don't lose it:

3869

checkpoint();

3870

3871

// If the merged segments had pending changes, clear

3872

// them so that they don't bother writing them to

3873

// disk, updating SegmentInfo, etc.:

3874

readerPool.clear(merge.segments);

3875

3876

if (merge.maxNumSegments != -1) {

3877

// cascade the forceMerge:

3878

if (!segmentsToMerge.containsKey(merge.info)) {

3879

segmentsToMerge.put(merge.info, Boolean.FALSE);

3880

}

3881

}

3882

3883

return true;

3884

}

3885

3886

final private void handleMergeException(Throwable t, MergePolicy.OneMerge merge) throws IOException {

3887

3888

if (infoStream != null) {

3889

message("handleMergeException: merge=" + merge.segString(directory) + " exc=" + t);

3890

}

3891

3892

// Set the exception on the merge, so if

3893

// forceMerge is waiting on us it sees the root

3894

// cause exception:

3895

merge.setException(t);

3896

addMergeException(merge);

3897

3898

if (t instanceof MergePolicy.MergeAbortedException) {

3899

// We can ignore this exception (it happens when

3900

// close(false) or rollback is called), unless the

3901

// merge involves segments from external directories,

3902

// in which case we must throw it so, for example, the

3903

// rollbackTransaction code in addIndexes* is

3904

// executed.

3905

if (merge.isExternal)

3906

throw (MergePolicy.MergeAbortedException) t;

3907

} else if (t instanceof IOException)

3908

throw (IOException) t;

3909

else if (t instanceof RuntimeException)

3910

throw (RuntimeException) t;

3911

else if (t instanceof Error)

3912

throw (Error) t;

3913

else

3914

// Should not get here

3915

throw new RuntimeException(t);

3916

}

3917

3918

/**

3919

* Merges the indicated segments, replacing them in the stack with a

3920

* single segment.

3921

3922

* @lucene.experimental

3923

3924

public void merge(MergePolicy.OneMerge merge)

3925

throws CorruptIndexException, IOException {

3926

3927

boolean success = false;

3928

3929

final long t0 = System.currentTimeMillis();

3930

//System.out.println(Thread.currentThread().getName() + ": merge start: size=" + (merge.estimatedMergeBytes/1024./1024.) + " MB\n merge=" + merge.segString(directory) + "\n idx=" + segString());

3931

3932

try {

3933

try {

3934

try {

3935

mergeInit(merge);

3936

3937

if (infoStream != null)

3938

message("now merge\n merge=" + merge.segString(directory) + "\n merge=" + merge + "\n index=" + segString());

3939

3940

mergeMiddle(merge);

3941

mergeSuccess(merge);

3942

success = true;

3943

} catch (Throwable t) {

3944

handleMergeException(t, merge);

3945

}

3946

} finally {

3947

synchronized(this) {

3948

mergeFinish(merge);

3949

3950

if (!success) {

3951

if (infoStream != null)

3952

message("hit exception during merge");

3953

if (merge.info != null && !segmentInfos.contains(merge.info))

3954

deleter.refresh(merge.info.name);

3955

}

3956

3957

// This merge (and, generally, any change to the

3958

// segments) may now enable new merges, so we call

3959

// merge policy & update pending merges.

3960

if (success && !merge.isAborted() && (merge.maxNumSegments != -1 || (!closed && !closing))) {

3961

updatePendingMerges(merge.maxNumSegments);

3962

}

3963

}

3964

}

3965

} catch (OutOfMemoryError oom) {

3966

handleOOM(oom, "merge");

3967

}

3968

if (infoStream != null && merge.info != null) {

3969

message("merge time " + (System.currentTimeMillis()-t0) + " msec for " + merge.info.docCount + " docs");

3970

}

3971

//System.out.println(Thread.currentThread().getName() + ": merge end");

3972

}

3973

3974

/** Hook that's called when the specified merge is complete. */

3975

void mergeSuccess(MergePolicy.OneMerge merge) {

3976

}

3977

3978

/** Checks whether this merge involves any segments

3979

* already participating in a merge. If not, this merge

3980

* is "registered", meaning we record that its segments

3981

* are now participating in a merge, and true is

3982

* returned. Else (the merge conflicts) false is

3983

* returned. */

3984

final synchronized boolean registerMerge(MergePolicy.OneMerge merge) throws MergePolicy.MergeAbortedException, IOException {

3985

3986

if (merge.registerDone)

3987

return true;

3988

3989

if (stopMerges) {

3990

merge.abort();

3991

throw new MergePolicy.MergeAbortedException("merge is aborted: " + merge.segString(directory));

3992

}

3993

3994

boolean isExternal = false;

3995

for(SegmentInfo info : merge.segments) {

3996

if (mergingSegments.contains(info)) {

3997

return false;

3998

}

3999

if (!segmentInfos.contains(info)) {

4000

return false;

4001

}

4002

if (info.dir != directory) {

4003

isExternal = true;

4004

}

4005

if (segmentsToMerge.containsKey(info)) {

4006

merge.maxNumSegments = mergeMaxNumSegments;

4007

}

4008

}

4009

4010

ensureValidMerge(merge);

4011

4012

pendingMerges.add(merge);

4013

4014

if (infoStream != null)

4015

message("add merge to pendingMerges: " + merge.segString(directory) + " [total " + pendingMerges.size() + " pending]");

4016

4017

merge.mergeGen = mergeGen;

4018

merge.isExternal = isExternal;

4019

4020

// OK it does not conflict; now record that this merge

4021

// is running (while synchronized) to avoid race

4022

// condition where two conflicting merges from different

4023

// threads, start

4024

message("registerMerge merging=" + mergingSegments);

4025

for(SegmentInfo info : merge.segments) {

4026

message("registerMerge info=" + info);

4027

mergingSegments.add(info);

4028

}

4029

4030

// Merge is now registered

4031

merge.registerDone = true;

4032

return true;

4033

}

4034

4035

/** Does initial setup for a merge, which is fast but holds

4036

* the synchronized lock on IndexWriter instance. */

4037

final synchronized void mergeInit(MergePolicy.OneMerge merge) throws IOException {

4038

boolean success = false;

4039

try {

4040

_mergeInit(merge);

4041

success = true;

4042

} finally {

4043

if (!success) {

4044

if (infoStream != null) {

4045

message("hit exception in mergeInit");

4046

}

4047

mergeFinish(merge);

4048

}

4049

}

4050

}

4051

4052

synchronized private void _mergeInit(MergePolicy.OneMerge merge) throws IOException {

4053

4054

assert testPoint("startMergeInit");

4055

4056

assert merge.registerDone;

4057

assert merge.maxNumSegments == -1 || merge.maxNumSegments > 0;

4058

4059

if (hitOOM) {

4060

throw new IllegalStateException("this writer hit an OutOfMemoryError; cannot merge");

4061

}

4062

4063

// TODO: is there any perf benefit to sorting

4064

// merged segments? eg biggest to smallest?

4065

4066

if (merge.info != null)

4067

// mergeInit already done

4068

return;

4069

4070

if (merge.isAborted())

4071

return;

4072

4073

boolean hasVectors = false;

4074

for (SegmentInfo sourceSegment : merge.segments) {

4075

if (sourceSegment.getHasVectors()) {

4076

hasVectors = true;

4077

}

4078

}

4079

4080

// Bind a new segment name here so even with

4081

// ConcurrentMergePolicy we keep deterministic segment

4082

// names.

4083

merge.info = new SegmentInfo(newSegmentName(), 0, directory, false, true, false, hasVectors);

4084

4085

// Lock order: IW -> BD

4086

final BufferedDeletesStream.ApplyDeletesResult result = bufferedDeletesStream.applyDeletes(readerPool, merge.segments);

4087

4088

if (result.anyDeletes) {

4089

checkpoint();

4090

}

4091

4092

if (!keepFullyDeletedSegments && result.allDeleted != null) {

4093

if (infoStream != null) {

4094

message("drop 100% deleted segments: " + result.allDeleted);

4095

}

4096

for(SegmentInfo info : result.allDeleted) {

4097

segmentInfos.remove(info);

4098

if (merge.segments.contains(info)) {

4099

mergingSegments.remove(info);

4100

merge.segments.remove(info);

4101

}

4102

}

4103

if (readerPool != null) {

4104

readerPool.drop(result.allDeleted);

4105

}

4106

checkpoint();

4107

}

4108

4109

merge.info.setBufferedDeletesGen(result.gen);

4110

4111

// Lock order: IW -> BD

4112

bufferedDeletesStream.prune(segmentInfos);

4113

4114

Map<String,String> details = new HashMap<String,String>();

4115

details.put("mergeMaxNumSegments", ""+merge.maxNumSegments);

4116

details.put("mergeFactor", Integer.toString(merge.segments.size()));

4117

setDiagnostics(merge.info, "merge", details);

4118

4119

if (infoStream != null) {

4120

message("merge seg=" + merge.info.name);

4121

}

4122

4123

assert merge.estimatedMergeBytes == 0;

4124

for(SegmentInfo info : merge.segments) {

4125

if (info.docCount > 0) {

4126

final int delCount = numDeletedDocs(info);

4127

assert delCount <= info.docCount;

4128

final double delRatio = ((double) delCount)/info.docCount;

4129

merge.estimatedMergeBytes += info.sizeInBytes(true) * (1.0 - delRatio);

4130

}

4131

}

4132

4133

// TODO: I think this should no longer be needed (we

4134

// now build CFS before adding segment to the infos);

4135

// however, on removing it, tests fail for some reason!

4136

4137

// Also enroll the merged segment into mergingSegments;

4138

// this prevents it from getting selected for a merge

4139

// after our merge is done but while we are building the

4140

// CFS:

4141

mergingSegments.add(merge.info);

4142

}

4143

4144

private void setDiagnostics(SegmentInfo info, String source) {

4145

setDiagnostics(info, source, null);

4146

}

4147

4148

private void setDiagnostics(SegmentInfo info, String source, Map<String,String> details) {

4149

Map<String,String> diagnostics = new HashMap<String,String>();

4150

diagnostics.put("source", source);

4151

diagnostics.put("lucene.version", Constants.LUCENE_VERSION);

4152

diagnostics.put("os", Constants.OS_NAME);

4153

diagnostics.put("os.arch", Constants.OS_ARCH);

4154

diagnostics.put("os.version", Constants.OS_VERSION);

4155

diagnostics.put("java.version", Constants.JAVA_VERSION);

4156

diagnostics.put("java.vendor", Constants.JAVA_VENDOR);

4157

if (details != null) {

4158

diagnostics.putAll(details);

4159

}

4160

info.setDiagnostics(diagnostics);

4161

}

4162

4163

/** Does fininishing for a merge, which is fast but holds

4164

* the synchronized lock on IndexWriter instance. */

4165

final synchronized void mergeFinish(MergePolicy.OneMerge merge) throws IOException {

4166

4167

// forceMerge, addIndexes or finishMerges may be waiting

4168

// on merges to finish.

4169

notifyAll();

4170

4171

// It's possible we are called twice, eg if there was an

4172

// exception inside mergeInit

4173

if (merge.registerDone) {

4174

final List<SegmentInfo> sourceSegments = merge.segments;

4175

for(SegmentInfo info : sourceSegments) {

4176

mergingSegments.remove(info);

4177

}

4178

// TODO: if we remove the add in _mergeInit, we should

4179

// also remove this:

4180

mergingSegments.remove(merge.info);

4181

merge.registerDone = false;

4182

}

4183

4184

runningMerges.remove(merge);

4185

}

4186

4187

private final synchronized void closeMergeReaders(MergePolicy.OneMerge merge, boolean suppressExceptions) throws IOException {

4188

final int numSegments = merge.readers.size();

4189

Throwable th = null;

4190

4191

boolean anyChanges = false;

4192

boolean drop = !suppressExceptions;

4193

for (int i = 0; i < numSegments; i++) {

4194

if (merge.readers.get(i) != null) {

4195

try {

4196

anyChanges |= readerPool.release(merge.readers.get(i), drop);

4197

} catch (Throwable t) {

4198

if (th == null) {

4199

th = t;

4200

}

4201

}

4202

merge.readers.set(i, null);

4203

}

4204

4205

if (i < merge.readerClones.size() && merge.readerClones.get(i) != null) {

4206

try {

4207

merge.readerClones.get(i).close();

4208

} catch (Throwable t) {

4209

if (th == null) {

4210

th = t;

4211

}

4212

}

4213

// This was a private clone and we had the

4214

// only reference

4215

assert merge.readerClones.get(i).getRefCount() == 0: "refCount should be 0 but is " + merge.readerClones.get(i).getRefCount();

4216

merge.readerClones.set(i, null);

4217

}

4218

}

4219

4220

if (suppressExceptions && anyChanges) {

4221

checkpoint();

4222

}

4223

4224

// If any error occured, throw it.

4225

if (!suppressExceptions && th != null) {

4226

if (th instanceof IOException) throw (IOException) th;

4227

if (th instanceof RuntimeException) throw (RuntimeException) th;

4228

if (th instanceof Error) throw (Error) th;

4229

throw new RuntimeException(th);

4230

}

4231

}

4232

4233

/** Does the actual (time-consuming) work of the merge,

4234

* but without holding synchronized lock on IndexWriter

4235

* instance */

4236

final private int mergeMiddle(MergePolicy.OneMerge merge)

4237

throws CorruptIndexException, IOException {

4238

4239

merge.checkAborted(directory);

4240

4241

final String mergedName = merge.info.name;

4242

4243

int mergedDocCount = 0;

4244

4245

List<SegmentInfo> sourceSegments = merge.segments;

4246

4247

SegmentMerger merger = new SegmentMerger(directory, config.getTermIndexInterval(), mergedName, merge,

4248

payloadProcessorProvider,

4249

((FieldInfos) docWriter.getFieldInfos().clone()));

4250

4251

if (infoStream != null) {

4252

message("merging " + merge.segString(directory) + " mergeVectors=" + merge.info.getHasVectors());

4253

}

4254

4255

merge.readers = new ArrayList<SegmentReader>();

4256

merge.readerClones = new ArrayList<SegmentReader>();

4257

4258

// This is try/finally to make sure merger's readers are

4259

// closed:

4260

boolean success = false;

4261

try {

4262

int totDocCount = 0;

4263

int segUpto = 0;

4264

while(segUpto < sourceSegments.size()) {

4265

4266

final SegmentInfo info = sourceSegments.get(segUpto);

4267

4268

// Hold onto the "live" reader; we will use this to

4269

// commit merged deletes

4270

final SegmentReader reader = readerPool.get(info, true,

4271

MERGE_READ_BUFFER_SIZE,

4272

-1);

4273

merge.readers.add(reader);

4274

4275

// We clone the segment readers because other

4276

// deletes may come in while we're merging so we

4277

// need readers that will not change

4278

final SegmentReader clone = (SegmentReader) reader.clone(true);

4279

merge.readerClones.add(clone);

4280

4281

if (clone.numDocs() > 0) {

4282

merger.add(clone);

4283

totDocCount += clone.numDocs();

4284

}

4285

segUpto++;

4286

}

4287

4288

if (infoStream != null) {

4289

message("merge: total " + totDocCount + " docs");

4290

}

4291

4292

merge.checkAborted(directory);

4293

4294

// This is where all the work happens:

4295

mergedDocCount = merge.info.docCount = merger.merge();

4296

4297

// LUCENE-3403: set hasVectors after merge(), so that it is properly set.

4298

merge.info.setHasVectors(merger.fieldInfos().hasVectors());

4299

4300

assert mergedDocCount == totDocCount;

4301

4302

if (infoStream != null) {

4303

message("merge store matchedCount=" + merger.getMatchedSubReaderCount() + " vs " + merge.readers.size());

4304

}

4305

4306

anyNonBulkMerges |= merger.getAnyNonBulkMerges();

4307

4308

assert mergedDocCount == totDocCount: "mergedDocCount=" + mergedDocCount + " vs " + totDocCount;

4309

4310

// Very important to do this before opening the reader

4311

// because SegmentReader must know if prox was written for

4312

// this segment:

4313

merge.info.setHasProx(merger.fieldInfos().hasProx());

4314

4315

boolean useCompoundFile;

4316

synchronized (this) { // Guard segmentInfos

4317

useCompoundFile = mergePolicy.useCompoundFile(segmentInfos, merge.info);

4318

}

4319

4320

if (useCompoundFile) {

4321

4322

success = false;

4323

final String compoundFileName = IndexFileNames.segmentFileName(mergedName, IndexFileNames.COMPOUND_FILE_EXTENSION);

4324

4325

try {

4326

if (infoStream != null) {

4327

message("create compound file " + compoundFileName);

4328

}

4329

merger.createCompoundFile(compoundFileName, merge.info);

4330

success = true;

4331

} catch (IOException ioe) {

4332

synchronized(this) {

4333

if (merge.isAborted()) {

4334

// This can happen if rollback or close(false)

4335

// is called -- fall through to logic below to

4336

// remove the partially created CFS:

4337

} else {

4338

handleMergeException(ioe, merge);

4339

}

4340

}

4341

} catch (Throwable t) {

4342

handleMergeException(t, merge);

4343

} finally {

4344

if (!success) {

4345

if (infoStream != null) {

4346

message("hit exception creating compound file during merge");

4347

}

4348

4349

synchronized(this) {

4350

deleter.deleteFile(compoundFileName);

4351

deleter.deleteNewFiles(merge.info.files());

4352

}

4353

}

4354

}

4355

4356

success = false;

4357

4358

synchronized(this) {

4359

4360

// delete new non cfs files directly: they were never

4361

// registered with IFD

4362

deleter.deleteNewFiles(merge.info.files());

4363

4364

if (merge.isAborted()) {

4365

if (infoStream != null) {

4366

message("abort merge after building CFS");

4367

}

4368

deleter.deleteFile(compoundFileName);

4369

return 0;

4370

}

4371

}

4372

4373

merge.info.setUseCompoundFile(true);

4374

}

4375

4376

if (infoStream != null) {

4377

message(String.format("merged segment size=%.3f MB vs estimate=%.3f MB", merge.info.sizeInBytes(true)/1024./1024., merge.estimatedMergeBytes/1024/1024.));

4378

}

4379

4380

final IndexReaderWarmer mergedSegmentWarmer = config.getMergedSegmentWarmer();

4381

4382

final int termsIndexDivisor;

4383

final boolean loadDocStores;

4384

4385

if (mergedSegmentWarmer != null) {

4386

// Load terms index & doc stores so the segment

4387

// warmer can run searches, load documents/term

4388

// vectors

4389

termsIndexDivisor = config.getReaderTermsIndexDivisor();

4390

loadDocStores = true;

4391

} else {

4392

termsIndexDivisor = -1;

4393

loadDocStores = false;

4394

}

4395

4396

// TODO: in the non-realtime case, we may want to only

4397

// keep deletes (it's costly to open entire reader

4398

// when we just need deletes)

4399

4400

final SegmentReader mergedReader = readerPool.get(merge.info, loadDocStores, BufferedIndexInput.BUFFER_SIZE, termsIndexDivisor);

4401

try {

4402

if (poolReaders && mergedSegmentWarmer != null) {

4403

mergedSegmentWarmer.warm(mergedReader);

4404

}

4405

4406

if (!commitMerge(merge, mergedReader)) {

4407

// commitMerge will return false if this merge was aborted

4408

return 0;

4409

}

4410

} finally {

4411

synchronized(this) {

4412

if (readerPool.release(mergedReader)) {

4413

// Must checkpoint after releasing the

4414

// mergedReader since it may have written a new

4415

// deletes file:

4416

checkpoint();

4417

}

4418

}

4419

}

4420

4421

success = true;

4422

4423

} finally {

4424

// Readers are already closed in commitMerge if we didn't hit

4425

// an exc:

4426

if (!success) {

4427

closeMergeReaders(merge, true);

4428

}

4429

}

4430

4431

return mergedDocCount;

4432

}

4433

4434

synchronized void addMergeException(MergePolicy.OneMerge merge) {

4435

assert merge.getException() != null;

4436

if (!mergeExceptions.contains(merge) && mergeGen == merge.mergeGen)

4437

mergeExceptions.add(merge);

4438

}

4439

4440

// For test purposes.

4441

final int getBufferedDeleteTermsSize() {

4442

return docWriter.getPendingDeletes().terms.size();

4443

}

4444

4445

// For test purposes.

4446

final int getNumBufferedDeleteTerms() {

4447

return docWriter.getPendingDeletes().numTermDeletes.get();

4448

}

4449

4450

// utility routines for tests

4451

synchronized SegmentInfo newestSegment() {

4452

return segmentInfos.size() > 0 ? segmentInfos.info(segmentInfos.size()-1) : null;

4453

}

4454

4455

/** @lucene.internal */

4456

public synchronized String segString() throws IOException {

4457

return segString(segmentInfos);

4458

}

4459

4460

/** @lucene.internal */

4461

public synchronized String segString(Iterable<SegmentInfo> infos) throws IOException {

4462

final StringBuilder buffer = new StringBuilder();

4463

for(final SegmentInfo s : infos) {

4464

if (buffer.length() > 0) {

4465

buffer.append(' ');

4466

}

4467

buffer.append(segString(s));

4468

}

4469

return buffer.toString();

4470

}

4471

4472

/** @lucene.internal */

4473

public synchronized String segString(SegmentInfo info) throws IOException {

4474

StringBuilder buffer = new StringBuilder();

4475

SegmentReader reader = readerPool.getIfExists(info);

4476

try {

4477

if (reader != null) {

4478

buffer.append(reader.toString());

4479

} else {

4480

buffer.append(info.toString(directory, 0));

4481

if (info.dir != directory) {

4482

buffer.append("**");

4483

}

4484

}

4485

} finally {

4486

if (reader != null) {

4487

readerPool.release(reader);

4488

}

4489

}

4490

return buffer.toString();

4491

}

4492

4493

private synchronized void doWait() {

4494

// NOTE: the callers of this method should in theory

4495

// be able to do simply wait(), but, as a defense

4496

// against thread timing hazards where notifyAll()

4497

// fails to be called, we wait for at most 1 second

4498

// and then return so caller can check if wait

4499

// conditions are satisfied:

4500

try {

4501

wait(1000);

4502

} catch (InterruptedException ie) {

4503

throw new ThreadInterruptedException(ie);

4504

}

4505

}

4506

4507

private boolean keepFullyDeletedSegments;

4508

4509

/** Only for testing.

4510

4511

* @lucene.internal */

4512

void keepFullyDeletedSegments() {

4513

keepFullyDeletedSegments = true;

4514

}

4515

4516

boolean getKeepFullyDeletedSegments() {

4517

return keepFullyDeletedSegments;

4518

}

4519

4520

// called only from assert

4521

private boolean filesExist(SegmentInfos toSync) throws IOException {

4522

Collection<String> files = toSync.files(directory, false);

4523

for(final String fileName: files) {

4524

assert directory.fileExists(fileName): "file " + fileName + " does not exist";

4525

// If this trips it means we are missing a call to

4526

// .checkpoint somewhere, because by the time we

4527

// are called, deleter should know about every

4528

// file referenced by the current head

4529

// segmentInfos:

4530

assert deleter.exists(fileName): "IndexFileDeleter doesn't know about file " + fileName;

4531

}

4532

return true;

4533

}

4534

4535

/** Walk through all files referenced by the current

4536

* segmentInfos and ask the Directory to sync each file,

4537

* if it wasn't already. If that succeeds, then we

4538

* prepare a new segments_N file but do not fully commit

4539

* it. */

4540

private void startCommit(SegmentInfos toSync, Map<String,String> commitUserData) throws IOException {

4541

4542

assert testPoint("startStartCommit");

4543

assert pendingCommit == null;

4544

4545

if (hitOOM) {

4546

throw new IllegalStateException("this writer hit an OutOfMemoryError; cannot commit");

4547

}

4548

4549

try {

4550

4551

if (infoStream != null)

4552

message("startCommit(): start");

4553

4554

4555

synchronized(this) {

4556

4557

assert lastCommitChangeCount <= changeCount;

4558

4559

if (pendingCommitChangeCount == lastCommitChangeCount) {

4560

if (infoStream != null) {

4561

message(" skip startCommit(): no changes pending");

4562

}

4563

deleter.decRef(toSync);

4564

return;

4565

}

4566

4567

// First, we clone & incref the segmentInfos we intend

4568

// to sync, then, without locking, we sync() all files

4569

// referenced by toSync, in the background.

4570

4571

if (infoStream != null)

4572

message("startCommit index=" + segString(toSync) + " changeCount=" + changeCount);

4573

4574

assert filesExist(toSync);

4575

4576

if (commitUserData != null) {

4577

toSync.setUserData(commitUserData);

4578

}

4579

}

4580

4581

assert testPoint("midStartCommit");

4582

4583

boolean pendingCommitSet = false;

4584

4585

try {

4586

// This call can take a long time -- 10s of seconds

4587

// or more. We do it without sync:

4588

directory.sync(toSync.files(directory, false));

4589

4590

assert testPoint("midStartCommit2");

4591

4592

synchronized(this) {

4593

4594

assert pendingCommit == null;

4595

4596

assert segmentInfos.getGeneration() == toSync.getGeneration();

4597

4598

// Exception here means nothing is prepared

4599

// (this method unwinds everything it did on

4600

// an exception)

4601

toSync.prepareCommit(directory);

4602

pendingCommitSet = true;

4603

pendingCommit = toSync;

4604

}

4605

4606

if (infoStream != null) {

4607

message("done all syncs");

4608

}

4609

4610

assert testPoint("midStartCommitSuccess");

4611

4612

} finally {

4613

synchronized(this) {

4614

4615

// Have our master segmentInfos record the

4616

// generations we just prepared. We do this

4617

// on error or success so we don't

4618

// double-write a segments_N file.

4619

segmentInfos.updateGeneration(toSync);

4620

4621

if (!pendingCommitSet) {

4622

if (infoStream != null) {

4623

message("hit exception committing segments file");

4624

}

4625

4626

deleter.decRef(toSync);

4627

}

4628

}

4629

}

4630

} catch (OutOfMemoryError oom) {

4631

handleOOM(oom, "startCommit");

4632

}

4633

assert testPoint("finishStartCommit");

4634

}

4635

4636

/**

4637

* Returns <code>true</code> iff the index in the named directory is

4638

* currently locked.

4639

* @param directory the directory to check for a lock

4640

* @throws IOException if there is a low-level IO error

4641

4642

public static boolean isLocked(Directory directory) throws IOException {

4643

return directory.makeLock(WRITE_LOCK_NAME).isLocked();

4644

}

4645

4646

/**

4647

* Forcibly unlocks the index in the named directory.

4648

*

4649

* Caution: this should only be used by failure recovery code,

4650

* when it is known that no other process nor thread is in fact

4651

* currently accessing this index.

4652

4653

public static void unlock(Directory directory) throws IOException {

4654

directory.makeLock(IndexWriter.WRITE_LOCK_NAME).release();

4655

}

4656

4657

/**

4658

* Specifies maximum field length (in number of tokens/terms) in

4659

* {@link IndexWriter} constructors. {@link #setMaxFieldLength(int)} overrides

4660

* the value set by the constructor.

4661

4662

* @deprecated use {@link LimitTokenCountAnalyzer} instead.

4663

4664

@Deprecated

4665

public static final class MaxFieldLength {

4666

4667

private int limit;

4668

private String name;

4669

4670

/**

4671

* Private type-safe-enum-pattern constructor.

4672

4673

* @param name instance name

4674

* @param limit maximum field length

4675

4676

private MaxFieldLength(String name, int limit) {

4677

this.name = name;

4678

this.limit = limit;

4679

}

4680

4681

/**

4682

* Public constructor to allow users to specify the maximum field size limit.

4683

4684

* @param limit The maximum field length

4685

4686

public MaxFieldLength(int limit) {

4687

this("User-specified", limit);

4688

}

4689

4690

public int getLimit() {

4691

return limit;

4692

}

4693

4694

@Override

4695

public String toString()

4696

{

4697

return name + ":" + limit;

4698

}

4699

4700

/** Sets the maximum field length to {@link Integer#MAX_VALUE}. */

4701

public static final MaxFieldLength UNLIMITED

4702

= new MaxFieldLength("UNLIMITED", Integer.MAX_VALUE);

4703

4704

/**

4705

* Sets the maximum field length to

4706

* {@link #DEFAULT_MAX_FIELD_LENGTH}

4707

* */

4708

public static final MaxFieldLength LIMITED

4709

= new MaxFieldLength("LIMITED", 10000);

4710

}

4711

4712

/** If {@link #getReader} has been called (ie, this writer

4713

* is in near real-time mode), then after a merge

4714

* completes, this class can be invoked to warm the

4715

* reader on the newly merged segment, before the merge

4716

* commits. This is not required for near real-time

4717

* search, but will reduce search latency on opening a

4718

* new near real-time reader after a merge completes.

4719

4720

* @lucene.experimental

4721

4722

* NOTE: warm is called before any deletes have

4723

* been carried over to the merged segment. */

4724

public static abstract class IndexReaderWarmer {

4725

public abstract void warm(IndexReader reader) throws IOException;

4726

}

4727

4728

/**

4729

* Set the merged segment warmer. See {@link IndexReaderWarmer}.

4730

4731

* @deprecated use

4732

* {@link IndexWriterConfig#setMergedSegmentWarmer}

4733

* instead.

4734

4735

@Deprecated

4736

public void setMergedSegmentWarmer(IndexReaderWarmer warmer) {

4737

config.setMergedSegmentWarmer(warmer);

4738

}

4739

4740

/**

4741

* Returns the current merged segment warmer. See {@link IndexReaderWarmer}.

4742

4743

* @deprecated use {@link IndexWriterConfig#getMergedSegmentWarmer()} instead.

4744

4745

@Deprecated

4746

public IndexReaderWarmer getMergedSegmentWarmer() {

4747

return config.getMergedSegmentWarmer();

4748

}

4749

4750

private void handleOOM(OutOfMemoryError oom, String location) {

4751

if (infoStream != null) {

4752

message("hit OutOfMemoryError inside " + location);

4753

}

4754

hitOOM = true;

4755

throw oom;

4756

}

4757

4758

// Used only by assert for testing. Current points:

4759

// startDoFlush

4760

// startCommitMerge

4761

// startStartCommit

4762

// midStartCommit

4763

// midStartCommit2

4764

// midStartCommitSuccess

4765

// finishStartCommit

4766

// startCommitMergeDeletes

4767

// startMergeInit

4768

// DocumentsWriter.ThreadState.init start

4769

boolean testPoint(String name) {

4770

return true;

4771

}

4772

4773

synchronized boolean nrtIsCurrent(SegmentInfos infos) {

4774

//System.out.println("IW.nrtIsCurrent " + (infos.version == segmentInfos.version && !docWriter.anyChanges() && !bufferedDeletesStream.any()));

4775

ensureOpen();

4776

return infos.version == segmentInfos.version && !docWriter.anyChanges() && !bufferedDeletesStream.any();

4777

}

4778

4779

synchronized boolean isClosed() {

4780

return closed;

4781

}

4782

4783

/** Expert: remove any index files that are no longer

4784

* used.

4785

4786

* IndexWriter normally deletes unused files itself,

4787

* during indexing. However, on Windows, which disallows

4788

* deletion of open files, if there is a reader open on

4789

* the index then those files cannot be deleted. This is

4790

* fine, because IndexWriter will periodically retry

4791

* the deletion.

4792

4793

* However, IndexWriter doesn't try that often: only

4794

* on open, close, flushing a new segment, and finishing

4795

* a merge. If you don't do any of these actions with your

4796

* IndexWriter, you'll see the unused files linger. If

4797

* that's a problem, call this method to delete them

4798

* (once you've closed the open readers that were

4799

* preventing their deletion).

4800

4801

* In addition, you can call this method to delete

4802

* unreferenced index commits. This might be useful if you

4803

* are using an {@link IndexDeletionPolicy} which holds

4804

* onto index commits until some criteria are met, but those

4805

* commits are no longer needed. Otherwise, those commits will

4806

* be deleted the next time commit() is called.

4807

4808

public synchronized void deleteUnusedFiles() throws IOException {

4809

ensureOpen(false);

4810

deleter.deletePendingFiles();

4811

deleter.revisitPolicy();

4812

}

4813

4814

// Called by DirectoryReader.doClose

4815

synchronized void deletePendingFiles() throws IOException {

4816

deleter.deletePendingFiles();

4817

}

4818

4819

/**

4820

* Sets the {@link PayloadProcessorProvider} to use when merging payloads.

4821

* Note that the given <code>pcp</code> will be invoked for every segment that

4822

* is merged, not only external ones that are given through

4823

* {@link #addIndexes}. If you want only the payloads of the external segments

4824

* to be processed, you can return <code>null</code> whenever a

4825

* {@link DirPayloadProcessor} is requested for the {@link Directory} of the

4826

* {@link IndexWriter}.

4827

*

4828

* The default is <code>null</code> which means payloads are processed

4829

* normally (copied) during segment merges. You can also unset it by passing

4830

* <code>null</code>.

4831

*

4832

* NOTE: the set {@link PayloadProcessorProvider} will be in effect

4833

* immediately, potentially for already running merges too. If you want to be

4834

* sure it is used for further operations only, such as {@link #addIndexes} or

4835

* {@link #forceMerge}, you can call {@link #waitForMerges()} before.

4836

4837

public void setPayloadProcessorProvider(PayloadProcessorProvider pcp) {

4838

ensureOpen();

4839

payloadProcessorProvider = pcp;

4840

}

4841

4842

/**

4843

* Returns the {@link PayloadProcessorProvider} that is used during segment

4844

* merges to process payloads.

4845

4846

public PayloadProcessorProvider getPayloadProcessorProvider() {

4847

ensureOpen();

4848

return payloadProcessorProvider;

4849

}

4850

4851

// decides when flushes happen

4852

final class FlushControl {

4853

4854

private boolean flushPending;

4855

private boolean flushDeletes;

4856

private int delCount;

4857

private int docCount;

4858

private boolean flushing;

4859

4860

private synchronized boolean setFlushPending(String reason, boolean doWait) {

4861

if (flushPending || flushing) {

4862

if (doWait) {

4863

while(flushPending || flushing) {

4864

try {

4865

wait();

4866

} catch (InterruptedException ie) {

4867

throw new ThreadInterruptedException(ie);

4868

}

4869

}

4870

}

4871

return false;

4872

} else {

4873

if (infoStream != null) {

4874

message("now trigger flush reason=" + reason);

4875

}

4876

flushPending = true;

4877

return flushPending;

4878

}

4879

}

4880

4881

public synchronized void setFlushPendingNoWait(String reason) {

4882

setFlushPending(reason, false);

4883

}

4884

4885

public synchronized boolean getFlushPending() {

4886

return flushPending;

4887

}

4888

4889

public synchronized boolean getFlushDeletes() {

4890

return flushDeletes;

4891

}

4892

4893

public synchronized void clearFlushPending() {

4894

if (infoStream != null) {

4895

message("clearFlushPending");

4896

}

4897

flushPending = false;

4898

flushDeletes = false;

4899

docCount = 0;

4900

notifyAll();

4901

}

4902

4903

public synchronized void clearDeletes() {

4904

delCount = 0;

4905

}

4906

4907

public synchronized boolean waitUpdate(int docInc, int delInc) {

4908

return waitUpdate(docInc, delInc, false);

4909

}

4910

4911

public synchronized boolean waitUpdate(int docInc, int delInc, boolean skipWait) {

4912

while(flushPending) {

4913

try {

4914

wait();

4915

} catch (InterruptedException ie) {

4916

throw new ThreadInterruptedException(ie);

4917

}

4918

}

4919

4920

docCount += docInc;

4921

delCount += delInc;

4922

4923

// skipWait is only used when a thread is BOTH adding

4924

// a doc and buffering a del term, and, the adding of

4925

// the doc already triggered a flush

4926

if (skipWait) {

4927

return false;

4928

}

4929

4930

final int maxBufferedDocs = config.getMaxBufferedDocs();

4931

if (maxBufferedDocs != IndexWriterConfig.DISABLE_AUTO_FLUSH &&

4932

docCount >= maxBufferedDocs) {

4933

return setFlushPending("maxBufferedDocs", true);

4934

}

4935

4936

final int maxBufferedDeleteTerms = config.getMaxBufferedDeleteTerms();

4937

if (maxBufferedDeleteTerms != IndexWriterConfig.DISABLE_AUTO_FLUSH &&

4938

delCount >= maxBufferedDeleteTerms) {

4939

flushDeletes = true;

4940

return setFlushPending("maxBufferedDeleteTerms", true);

4941

}

4942

4943

return flushByRAMUsage("add delete/doc");

4944

}

4945

4946

public synchronized boolean flushByRAMUsage(String reason) {

4947

final double ramBufferSizeMB = config.getRAMBufferSizeMB();

4948

if (ramBufferSizeMB != IndexWriterConfig.DISABLE_AUTO_FLUSH) {

4949

final long limit = (long) (ramBufferSizeMB*1024*1024);

4950

long used = bufferedDeletesStream.bytesUsed() + docWriter.bytesUsed();

4951

if (used >= limit) {

4952

4953

// DocumentsWriter may be able to free up some

4954

// RAM:

4955

// Lock order: FC -> DW

4956

docWriter.balanceRAM();

4957

4958

used = bufferedDeletesStream.bytesUsed() + docWriter.bytesUsed();

4959

if (used >= limit) {

4960

return setFlushPending("ram full: " + reason, false);

4961

}

4962

}

4963

}

4964

return false;

4965

}

4966

}

4967

4968

final FlushControl flushControl = new FlushControl();

4969

}

Older »