1
package org.apache.lucene.index;
4
* Copyright 2006 The Apache Software Foundation
6
* Licensed under the Apache License, Version 2.0 (the "License");
7
* you may not use this file except in compliance with the License.
8
* You may obtain a copy of the License at
10
* http://www.apache.org/licenses/LICENSE-2.0
12
* Unless required by applicable law or agreed to in writing, software
13
* distributed under the License is distributed on an "AS IS" BASIS,
14
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15
* See the License for the specific language governing permissions and
16
* limitations under the License.
20
import java.io.IOException;
21
import java.io.Reader;
22
import java.util.Random;
24
import org.apache.lucene.analysis.Analyzer;
25
import org.apache.lucene.analysis.ReusableAnalyzerBase;
26
import org.apache.lucene.analysis.TokenStream;
27
import org.apache.lucene.analysis.Tokenizer;
28
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
29
import org.apache.lucene.document.Document;
30
import org.apache.lucene.document.Field;
31
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
32
import org.apache.lucene.store.Directory;
33
import org.apache.lucene.util.LuceneTestCase;
35
class RepeatingTokenStream extends Tokenizer {
37
private final Random random;
38
private final float percentDocs;
39
private final int maxTF;
41
CharTermAttribute termAtt;
44
public RepeatingTokenStream(String val, Random random, float percentDocs, int maxTF) {
47
this.percentDocs = percentDocs;
49
this.termAtt = addAttribute(CharTermAttribute.class);
53
public boolean incrementToken() throws IOException {
57
termAtt.append(value);
64
public void reset() throws IOException {
66
if (random.nextFloat() < percentDocs) {
67
num = random.nextInt(maxTF) + 1;
75
public class TestTermdocPerf extends LuceneTestCase {
77
void addDocs(final Random random, Directory dir, final int ndocs, String field, final String val, final int maxTF, final float percentDocs) throws IOException {
78
final RepeatingTokenStream ts = new RepeatingTokenStream(val, random, percentDocs, maxTF);
80
Analyzer analyzer = new Analyzer() {
82
public TokenStream tokenStream(String fieldName, Reader reader) {
87
Document doc = new Document();
88
doc.add(newField(field,val, Field.Store.NO, Field.Index.NOT_ANALYZED_NO_NORMS));
89
IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(
90
TEST_VERSION_CURRENT, analyzer)
91
.setOpenMode(OpenMode.CREATE).setMaxBufferedDocs(100));
92
((LogMergePolicy) writer.getConfig().getMergePolicy()).setMergeFactor(100);
94
for (int i=0; i<ndocs; i++) {
95
writer.addDocument(doc);
103
public int doTest(int iter, int ndocs, int maxTF, float percentDocs) throws IOException {
104
Directory dir = newDirectory();
106
long start = System.currentTimeMillis();
107
addDocs(random, dir, ndocs, "foo", "val", maxTF, percentDocs);
108
long end = System.currentTimeMillis();
109
if (VERBOSE) System.out.println("milliseconds for creation of " + ndocs + " docs = " + (end-start));
111
IndexReader reader = IndexReader.open(dir, true);
112
TermEnum tenum = reader.terms(new Term("foo","val"));
113
TermDocs tdocs = reader.termDocs();
115
start = System.currentTimeMillis();
118
for (int i=0; i<iter; i++) {
120
while (tdocs.next()) {
125
end = System.currentTimeMillis();
126
if (VERBOSE) System.out.println("milliseconds for " + iter + " TermDocs iteration: " + (end-start));
131
public void testTermDocPerf() throws IOException {
132
// performance test for 10% of documents containing a term
133
// doTest(100000, 10000,3,.1f);