1
package org.apache.lucene.index;
4
* Licensed under the Apache License, Version 2.0 (the "License");
5
* you may not use this file except in compliance with the License.
6
* You may obtain a copy of the License at
8
* http://www.apache.org/licenses/LICENSE-2.0
10
* Unless required by applicable law or agreed to in writing, software
11
* distributed under the License is distributed on an "AS IS" BASIS,
12
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
* See the License for the specific language governing permissions and
14
* limitations under the License.
17
import java.io.IOException;
18
import java.util.ArrayList;
19
import java.util.Arrays;
20
import java.util.Collections;
21
import java.util.Comparator;
22
import java.util.HashMap;
23
import java.util.Iterator;
24
import java.util.List;
26
import java.util.Random;
28
import junit.framework.Assert;
30
import org.apache.lucene.analysis.WhitespaceAnalyzer;
31
import org.apache.lucene.document.Document;
32
import org.apache.lucene.document.Field;
33
import org.apache.lucene.document.Fieldable;
34
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
35
import org.apache.lucene.search.TermQuery;
36
import org.apache.lucene.store.Directory;
37
import org.apache.lucene.util.LuceneTestCase;
38
import org.apache.lucene.util.StringHelper;
39
import org.apache.lucene.util._TestUtil;
41
public class TestStressIndexing2 extends LuceneTestCase {
42
static int maxFields=4;
43
static int bigFieldSize=10;
44
static boolean sameFieldOrder=false;
45
static int mergeFactor=3;
46
static int maxBufferedDocs=3;
49
public class MockIndexWriter extends IndexWriter {
51
public MockIndexWriter(Directory dir, IndexWriterConfig conf) throws IOException {
56
boolean testPoint(String name) {
57
// if (name.equals("startCommit")) {
58
if (random.nextInt(4) == 2)
64
public void testRandomIWReader() throws Throwable {
65
Directory dir = newDirectory();
67
// TODO: verify equals using IW.getReader
68
DocsAndWriter dw = indexRandomIWReader(5, 3, 100, dir);
69
IndexReader reader = dw.writer.getReader();
71
verifyEquals(random, reader, dir, "id");
77
public void testRandom() throws Throwable {
78
Directory dir1 = newDirectory();
79
Directory dir2 = newDirectory();
80
// mergeFactor=2; maxBufferedDocs=2; Map docs = indexRandom(1, 3, 2, dir1);
81
int maxThreadStates = 1+random.nextInt(10);
82
boolean doReaderPooling = random.nextBoolean();
83
Map<String,Document> docs = indexRandom(5, 3, 100, dir1, maxThreadStates, doReaderPooling);
84
indexSerial(random, docs, dir2);
87
// verifyEquals(dir1, dir1, "id");
88
// verifyEquals(dir2, dir2, "id");
90
verifyEquals(dir1, dir2, "id");
95
public void testMultiConfig() throws Throwable {
96
// test lots of smaller different params together
98
for (int i = 0; i < num; i++) { // increase iterations for better testing
100
System.out.println("\n\nTEST: top iter=" + i);
102
sameFieldOrder=random.nextBoolean();
103
mergeFactor=random.nextInt(3)+2;
104
maxBufferedDocs=random.nextInt(3)+2;
105
int maxThreadStates = 1+random.nextInt(10);
106
boolean doReaderPooling = random.nextBoolean();
109
int nThreads=random.nextInt(5)+1;
110
int iter=random.nextInt(5)+1;
111
int range=random.nextInt(20)+1;
112
Directory dir1 = newDirectory();
113
Directory dir2 = newDirectory();
115
System.out.println(" nThreads=" + nThreads + " iter=" + iter + " range=" + range + " doPooling=" + doReaderPooling + " maxThreadStates=" + maxThreadStates + " sameFieldOrder=" + sameFieldOrder + " mergeFactor=" + mergeFactor);
117
Map<String,Document> docs = indexRandom(nThreads, iter, range, dir1, maxThreadStates, doReaderPooling);
119
System.out.println("TEST: index serial");
121
indexSerial(random, docs, dir2);
123
System.out.println("TEST: verify");
125
verifyEquals(dir1, dir2, "id");
132
static Term idTerm = new Term("id","");
133
IndexingThread[] threads;
134
static Comparator<Fieldable> fieldNameComparator = new Comparator<Fieldable>() {
135
public int compare(Fieldable o1, Fieldable o2) {
136
return o1.name().compareTo(o2.name());
140
// This test avoids using any extra synchronization in the multiple
141
// indexing threads to test that IndexWriter does correctly synchronize
144
public static class DocsAndWriter {
145
Map<String,Document> docs;
149
public DocsAndWriter indexRandomIWReader(int nThreads, int iterations, int range, Directory dir) throws IOException, InterruptedException {
150
Map<String,Document> docs = new HashMap<String,Document>();
151
IndexWriter w = new MockIndexWriter(dir, newIndexWriterConfig(
152
TEST_VERSION_CURRENT, new WhitespaceAnalyzer(TEST_VERSION_CURRENT)).setOpenMode(OpenMode.CREATE).setRAMBufferSizeMB(
153
0.1).setMaxBufferedDocs(maxBufferedDocs).setMergePolicy(newLogMergePolicy()));
154
w.setInfoStream(VERBOSE ? System.out : null);
156
setUseCompoundFile(w.getConfig().getMergePolicy(), false);
157
setMergeFactor(w.getConfig().getMergePolicy(), mergeFactor);
159
w.setMaxMergeDocs(Integer.MAX_VALUE);
160
w.setMaxFieldLength(10000);
161
w.setRAMBufferSizeMB(1);
162
w.setMergeFactor(10);
165
threads = new IndexingThread[nThreads];
166
for (int i=0; i<threads.length; i++) {
167
IndexingThread th = new IndexingThread();
171
th.iterations = iterations;
175
for (int i=0; i<threads.length; i++) {
178
for (int i=0; i<threads.length; i++) {
185
for (int i=0; i<threads.length; i++) {
186
IndexingThread th = threads[i];
188
docs.putAll(th.docs);
192
_TestUtil.checkIndex(dir);
193
DocsAndWriter dw = new DocsAndWriter();
199
public Map<String,Document> indexRandom(int nThreads, int iterations, int range, Directory dir, int maxThreadStates,
200
boolean doReaderPooling) throws IOException, InterruptedException {
201
Map<String,Document> docs = new HashMap<String,Document>();
202
for(int iter=0;iter<3;iter++) {
204
System.out.println("TEST: iter=" + iter);
206
IndexWriter w = new MockIndexWriter(dir, newIndexWriterConfig(
207
TEST_VERSION_CURRENT, new WhitespaceAnalyzer(TEST_VERSION_CURRENT)).setOpenMode(OpenMode.CREATE)
208
.setRAMBufferSizeMB(0.1).setMaxBufferedDocs(maxBufferedDocs).setMaxThreadStates(maxThreadStates)
209
.setReaderPooling(doReaderPooling).setMergePolicy(newLogMergePolicy()));
210
w.setInfoStream(VERBOSE ? System.out : null);
211
setUseCompoundFile(w.getConfig().getMergePolicy(), false);
212
setMergeFactor(w.getConfig().getMergePolicy(), mergeFactor);
214
threads = new IndexingThread[nThreads];
215
for (int i=0; i<threads.length; i++) {
216
IndexingThread th = new IndexingThread();
220
th.iterations = iterations;
224
for (int i=0; i<threads.length; i++) {
227
for (int i=0; i<threads.length; i++) {
234
for (int i=0; i<threads.length; i++) {
235
IndexingThread th = threads[i];
237
docs.putAll(th.docs);
242
_TestUtil.checkIndex(dir);
248
public static void indexSerial(Random random, Map<String,Document> docs, Directory dir) throws IOException {
249
IndexWriter w = new IndexWriter(dir, LuceneTestCase.newIndexWriterConfig(random, TEST_VERSION_CURRENT, new WhitespaceAnalyzer(TEST_VERSION_CURRENT)).setMergePolicy(newLogMergePolicy()));
251
// index all docs in a single thread
252
Iterator<Document> iter = docs.values().iterator();
253
while (iter.hasNext()) {
254
Document d = iter.next();
255
ArrayList<Fieldable> fields = new ArrayList<Fieldable>();
256
fields.addAll(d.getFields());
257
// put fields in same order each time
258
Collections.sort(fields, fieldNameComparator);
260
Document d1 = new Document();
261
d1.setBoost(d.getBoost());
262
for (int i=0; i<fields.size(); i++) {
263
d1.add(fields.get(i));
266
// System.out.println("indexing "+d1);
272
public static void verifyEquals(Random r, IndexReader r1, Directory dir2, String idField) throws Throwable {
273
IndexReader r2 = IndexReader.open(dir2);
274
verifyEquals(r1, r2, idField);
278
public static void verifyEquals(Directory dir1, Directory dir2, String idField) throws Throwable {
279
IndexReader r1 = IndexReader.open(dir1, true);
280
IndexReader r2 = IndexReader.open(dir2, true);
281
verifyEquals(r1, r2, idField);
286
private static void printDocs(IndexReader r) throws Throwable {
287
IndexReader[] subs = r.getSequentialSubReaders();
288
for(IndexReader sub : subs) {
289
System.out.println(" " + ((SegmentReader) sub).getSegmentInfo());
290
for(int docID=0;docID<sub.maxDoc();docID++) {
291
Document doc = sub.document(docID);
292
if (!sub.isDeleted(docID)) {
293
System.out.println(" docID=" + docID + " id:" + doc.get("id"));
295
System.out.println(" DEL docID=" + docID + " id:" + doc.get("id"));
302
public static void verifyEquals(IndexReader r1, IndexReader r2, String idField) throws Throwable {
304
System.out.println("\nr1 docs:");
306
System.out.println("\nr2 docs:");
309
if (r1.numDocs() != r2.numDocs()) {
310
assert false: "r1.numDocs()=" + r1.numDocs() + " vs r2.numDocs()=" + r2.numDocs();
312
boolean hasDeletes = !(r1.maxDoc()==r2.maxDoc() && r1.numDocs()==r1.maxDoc());
314
int[] r2r1 = new int[r2.maxDoc()]; // r2 id to r1 id mapping
316
TermDocs termDocs1 = r1.termDocs();
317
TermDocs termDocs2 = r2.termDocs();
319
// create mapping from id2 space to id2 based on idField
320
idField = StringHelper.intern(idField);
321
TermEnum termEnum = r1.terms (new Term (idField, ""));
323
Term term = termEnum.term();
324
if (term==null || term.field() != idField) break;
326
termDocs1.seek (termEnum);
327
if (!termDocs1.next()) {
328
// This doc is deleted and wasn't replaced
329
termDocs2.seek(termEnum);
330
assertFalse(termDocs2.next());
334
int id1 = termDocs1.doc();
335
assertFalse(termDocs1.next());
337
termDocs2.seek(termEnum);
338
assertTrue(termDocs2.next());
339
int id2 = termDocs2.doc();
340
assertFalse(termDocs2.next());
344
// verify stored fields are equivalent
346
verifyEquals(r1.document(id1), r2.document(id2));
347
} catch (Throwable t) {
348
System.out.println("FAILED id=" + term + " id1=" + id1 + " id2=" + id2 + " term="+ term);
349
System.out.println(" d1=" + r1.document(id1));
350
System.out.println(" d2=" + r2.document(id2));
355
// verify term vectors are equivalent
356
verifyEquals(r1.getTermFreqVectors(id1), r2.getTermFreqVectors(id2));
357
} catch (Throwable e) {
358
System.out.println("FAILED id=" + term + " id1=" + id1 + " id2=" + id2);
359
TermFreqVector[] tv1 = r1.getTermFreqVectors(id1);
360
System.out.println(" d1=" + tv1);
362
for(int i=0;i<tv1.length;i++)
363
System.out.println(" " + i + ": " + tv1[i]);
365
TermFreqVector[] tv2 = r2.getTermFreqVectors(id2);
366
System.out.println(" d2=" + tv2);
368
for(int i=0;i<tv2.length;i++)
369
System.out.println(" " + i + ": " + tv2[i]);
374
} while (termEnum.next());
379
TermEnum termEnum1 = r1.terms (new Term ("", ""));
380
TermEnum termEnum2 = r2.terms (new Term ("", ""));
382
// pack both doc and freq into single element for easy sorting
383
long[] info1 = new long[r1.numDocs()];
384
long[] info2 = new long[r2.numDocs()];
389
// iterate until we get some docs
393
term1 = termEnum1.term();
394
if (term1==null) break;
395
termDocs1.seek(termEnum1);
396
while (termDocs1.next()) {
397
int d1 = termDocs1.doc();
398
int f1 = termDocs1.freq();
399
info1[len1] = (((long)d1)<<32) | f1;
403
if (!termEnum1.next()) break;
406
// iterate until we get some docs
410
term2 = termEnum2.term();
411
if (term2==null) break;
412
termDocs2.seek(termEnum2);
413
while (termDocs2.next()) {
414
int d2 = termDocs2.doc();
415
int f2 = termDocs2.freq();
416
info2[len2] = (((long)r2r1[d2])<<32) | f2;
420
if (!termEnum2.next()) break;
424
assertEquals(termEnum1.docFreq(), termEnum2.docFreq());
426
assertEquals(len1, len2);
427
if (len1==0) break; // no more terms
429
assertEquals(term1, term2);
431
// sort info2 to get it into ascending docid
432
Arrays.sort(info2, 0, len2);
435
for (int i=0; i<len1; i++) {
436
assertEquals(info1[i], info2[i]);
444
public static void verifyEquals(Document d1, Document d2) {
445
List<Fieldable> ff1 = d1.getFields();
446
List<Fieldable> ff2 = d2.getFields();
448
Collections.sort(ff1, fieldNameComparator);
449
Collections.sort(ff2, fieldNameComparator);
451
assertEquals(ff1 + " : " + ff2, ff1.size(), ff2.size());
453
for (int i=0; i<ff1.size(); i++) {
454
Fieldable f1 = ff1.get(i);
455
Fieldable f2 = ff2.get(i);
457
assert(f2.isBinary());
459
String s1 = f1.stringValue();
460
String s2 = f2.stringValue();
461
assertEquals(ff1 + " : " + ff2, s1,s2);
466
public static void verifyEquals(TermFreqVector[] d1, TermFreqVector[] d2) {
468
assertTrue(d2 == null);
471
assertTrue(d2 != null);
473
assertEquals(d1.length, d2.length);
474
for(int i=0;i<d1.length;i++) {
475
TermFreqVector v1 = d1[i];
476
TermFreqVector v2 = d2[i];
477
if (v1 == null || v2 == null)
478
System.out.println("v1=" + v1 + " v2=" + v2 + " i=" + i + " of " + d1.length);
479
assertEquals(v1.size(), v2.size());
480
int numTerms = v1.size();
481
String[] terms1 = v1.getTerms();
482
String[] terms2 = v2.getTerms();
483
int[] freq1 = v1.getTermFrequencies();
484
int[] freq2 = v2.getTermFrequencies();
485
for(int j=0;j<numTerms;j++) {
486
if (!terms1[j].equals(terms2[j]))
487
assertEquals(terms1[j], terms2[j]);
488
assertEquals(freq1[j], freq2[j]);
490
if (v1 instanceof TermPositionVector) {
491
assertTrue(v2 instanceof TermPositionVector);
492
TermPositionVector tpv1 = (TermPositionVector) v1;
493
TermPositionVector tpv2 = (TermPositionVector) v2;
494
for(int j=0;j<numTerms;j++) {
495
int[] pos1 = tpv1.getTermPositions(j);
496
int[] pos2 = tpv2.getTermPositions(j);
502
assertEquals(pos1.length, pos2.length);
503
TermVectorOffsetInfo[] offsets1 = tpv1.getOffsets(j);
504
TermVectorOffsetInfo[] offsets2 = tpv2.getOffsets(j);
505
if (offsets1 == null)
506
assertTrue(offsets2 == null);
508
assertTrue(offsets2 != null);
509
for(int k=0;k<pos1.length;k++) {
510
assertEquals(pos1[k], pos2[k]);
511
if (offsets1 != null) {
512
assertEquals(offsets1[k].getStartOffset(),
513
offsets2[k].getStartOffset());
514
assertEquals(offsets1[k].getEndOffset(),
515
offsets2[k].getEndOffset());
524
private class IndexingThread extends Thread {
529
Map<String,Document> docs = new HashMap<String,Document>();
532
public int nextInt(int lim) {
533
return r.nextInt(lim);
536
// start is inclusive and end is exclusive
537
public int nextInt(int start, int end) {
538
return start + r.nextInt(end-start);
541
char[] buffer = new char[100];
543
private int addUTF8Token(int start) {
544
final int end = start + nextInt(20);
545
if (buffer.length < 1+end) {
546
char[] newBuffer = new char[(int) ((1+end)*1.25)];
547
System.arraycopy(buffer, 0, newBuffer, 0, buffer.length);
551
for(int i=start;i<end;i++) {
553
if (0 == t && i < end-1) {
554
// Make a surrogate pair
556
buffer[i++] = (char) nextInt(0xd800, 0xdc00);
558
buffer[i] = (char) nextInt(0xdc00, 0xe000);
560
buffer[i] = (char) nextInt(0x80);
562
buffer[i] = (char) nextInt(0x80, 0x800);
564
buffer[i] = (char) nextInt(0x800, 0xd800);
566
buffer[i] = (char) nextInt(0xe000, 0xffff);
568
// Illegal unpaired surrogate
570
buffer[i] = (char) nextInt(0xd800, 0xdc00);
572
buffer[i] = (char) nextInt(0xdc00, 0xe000);
579
public String getString(int nTokens) {
580
nTokens = nTokens!=0 ? nTokens : r.nextInt(4)+1;
582
// Half the time make a random UTF8 string
584
return getUTF8String(nTokens);
586
// avoid StringBuffer because it adds extra synchronization.
587
char[] arr = new char[nTokens*2];
588
for (int i=0; i<nTokens; i++) {
589
arr[i*2] = (char)('A' + r.nextInt(10));
592
return new String(arr);
595
public String getUTF8String(int nTokens) {
597
Arrays.fill(buffer, (char) 0);
598
for(int i=0;i<nTokens;i++)
599
upto = addUTF8Token(upto);
600
return new String(buffer, 0, upto);
603
public String getIdString() {
604
return Integer.toString(base + nextInt(range));
607
public void indexDoc() throws IOException {
608
Document d = new Document();
610
ArrayList<Field> fields = new ArrayList<Field>();
611
String idString = getIdString();
612
Field idField = newField(idTerm.field(), idString, Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS);
615
int nFields = nextInt(maxFields);
616
for (int i=0; i<nFields; i++) {
618
Field.TermVector tvVal = Field.TermVector.NO;
619
switch (nextInt(4)) {
621
tvVal = Field.TermVector.NO;
624
tvVal = Field.TermVector.YES;
627
tvVal = Field.TermVector.WITH_POSITIONS;
630
tvVal = Field.TermVector.WITH_POSITIONS_OFFSETS;
634
switch (nextInt(4)) {
636
fields.add(newField("f" + nextInt(100), getString(1), Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS, tvVal));
639
fields.add(newField("f" + nextInt(100), getString(0), Field.Store.NO, Field.Index.ANALYZED, tvVal));
642
fields.add(newField("f" + nextInt(100), getString(0), Field.Store.YES, Field.Index.NO, Field.TermVector.NO));
645
fields.add(newField("f" + nextInt(100), getString(bigFieldSize), Field.Store.YES, Field.Index.ANALYZED, tvVal));
650
if (sameFieldOrder) {
651
Collections.sort(fields, fieldNameComparator);
653
// random placement of id field also
654
Collections.swap(fields,nextInt(fields.size()), 0);
657
for (int i=0; i<fields.size(); i++) {
658
d.add(fields.get(i));
661
System.out.println(Thread.currentThread().getName() + ": indexing id:" + idString);
663
w.updateDocument(idTerm.createTerm(idString), d);
664
//System.out.println(Thread.currentThread().getName() + ": indexing "+d);
665
docs.put(idString, d);
668
public void deleteDoc() throws IOException {
669
String idString = getIdString();
671
System.out.println(Thread.currentThread().getName() + ": del id:" + idString);
673
w.deleteDocuments(idTerm.createTerm(idString));
674
docs.remove(idString);
677
public void deleteByQuery() throws IOException {
678
String idString = getIdString();
680
System.out.println(Thread.currentThread().getName() + ": del query id:" + idString);
682
w.deleteDocuments(new TermQuery(idTerm.createTerm(idString)));
683
docs.remove(idString);
689
r = new Random(base+range+seed);
690
for (int i=0; i<iterations; i++) {
691
int what = nextInt(100);
694
} else if (what < 10) {
700
} catch (Throwable e) {
702
Assert.fail(e.toString());
705
synchronized (this) {