2
* Licensed to the Apache Software Foundation (ASF) under one or more
3
* contributor license agreements. See the NOTICE file distributed with
4
* this work for additional information regarding copyright ownership.
5
* The ASF licenses this file to You under the Apache License, Version 2.0
6
* (the "License"); you may not use this file except in compliance with
7
* the License. You may obtain a copy of the License at
9
* http://www.apache.org/licenses/LICENSE-2.0
11
* Unless required by applicable law or agreed to in writing, software
12
* distributed under the License is distributed on an "AS IS" BASIS,
13
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
* See the License for the specific language governing permissions and
15
* limitations under the License.
18
package org.apache.lucene.benchmark.byTask;
20
import java.io.StringReader;
22
import java.io.FileReader;
23
import java.io.BufferedReader;
24
import java.util.List;
25
import java.util.Iterator;
27
import org.apache.lucene.benchmark.byTask.Benchmark;
28
import org.apache.lucene.benchmark.byTask.feeds.DocData;
29
import org.apache.lucene.benchmark.byTask.feeds.NoMoreDataException;
30
import org.apache.lucene.benchmark.byTask.feeds.ReutersDocMaker;
31
import org.apache.lucene.benchmark.byTask.tasks.CountingSearchTestTask;
32
import org.apache.lucene.benchmark.byTask.stats.TaskStats;
33
import org.apache.lucene.index.IndexReader;
34
import org.apache.lucene.index.IndexWriter;
35
import org.apache.lucene.index.TermEnum;
36
import org.apache.lucene.index.TermDocs;
38
import junit.framework.TestCase;
41
* Test very simply that perf tasks - simple algorithms - are doing what they should.
43
public class TestPerfTasksLogic extends TestCase {
45
private static final boolean DEBUG = false;
46
static final String NEW_LINE = System.getProperty("line.separator");
48
// properties in effect in all tests here
49
static final String propLines [] = {
50
"directory=RAMDirectory",
55
* @param name test name
57
public TestPerfTasksLogic(String name) {
62
* Test index creation logic
64
public void testIndexAndSearchTasks() throws Exception {
65
// 1. alg definition (required in every "logic" test)
73
"{ CountingSearchTest } : 200",
75
"[ CountingSearchTest > : 70",
76
"[ CountingSearchTest > : 9",
79
// 2. we test this value later
80
CountingSearchTestTask.numSearches = 0;
82
// 3. execute the algorithm (required in every "logic" test)
83
Benchmark benchmark = execBenchmark(algLines);
85
// 4. test specific checks after the benchmark run completed.
86
assertEquals("TestSearchTask was supposed to be called!",279,CountingSearchTestTask.numSearches);
87
assertTrue("Index does not exist?...!", IndexReader.indexExists(benchmark.getRunData().getDirectory()));
88
// now we should be able to open the index for write.
89
IndexWriter iw = new IndexWriter(benchmark.getRunData().getDirectory(),null,false);
91
IndexReader ir = IndexReader.open(benchmark.getRunData().getDirectory());
92
assertEquals("1000 docs were added to the index, this is what we expect to find!",1000,ir.numDocs());
97
* Test Exhasting Doc Maker logic
99
public void testExhaustDocMaker() throws Exception {
100
// 1. alg definition (required in every "logic" test)
101
String algLines[] = {
102
"# ----- properties ",
103
"doc.maker=org.apache.lucene.benchmark.byTask.feeds.SimpleDocMaker",
104
"doc.add.log.step=1",
105
"doc.term.vector=false",
106
"doc.maker.forever=false",
107
"directory=RAMDirectory",
109
"doc.tokenized=false",
116
"{ CountingSearchTest } : 100",
118
"[ CountingSearchTest > : 30",
119
"[ CountingSearchTest > : 9",
122
// 2. we test this value later
123
CountingSearchTestTask.numSearches = 0;
125
// 3. execute the algorithm (required in every "logic" test)
126
Benchmark benchmark = execBenchmark(algLines);
128
// 4. test specific checks after the benchmark run completed.
129
assertEquals("TestSearchTask was supposed to be called!",139,CountingSearchTestTask.numSearches);
130
assertTrue("Index does not exist?...!", IndexReader.indexExists(benchmark.getRunData().getDirectory()));
131
// now we should be able to open the index for write.
132
IndexWriter iw = new IndexWriter(benchmark.getRunData().getDirectory(),null,false);
134
IndexReader ir = IndexReader.open(benchmark.getRunData().getDirectory());
135
assertEquals("1 docs were added to the index, this is what we expect to find!",1,ir.numDocs());
140
* Test Parallel Doc Maker logic (for LUCENE-940)
142
public void testParallelDocMaker() throws Exception {
143
// 1. alg definition (required in every "logic" test)
144
String algLines[] = {
145
"# ----- properties ",
146
"doc.maker="+Reuters20DocMaker.class.getName(),
147
"doc.add.log.step=3",
148
"doc.term.vector=false",
149
"doc.maker.forever=false",
150
"directory=FSDirectory",
152
"doc.tokenized=false",
155
"[ { AddDoc } : * ] : 4 ",
159
// 2. execute the algorithm (required in every "logic" test)
160
Benchmark benchmark = execBenchmark(algLines);
162
// 3. test number of docs in the index
163
IndexReader ir = IndexReader.open(benchmark.getRunData().getDirectory());
164
int ndocsExpected = 20; // Reuters20DocMaker exhausts after 20 docs.
165
assertEquals("wrong number of docs in the index!", ndocsExpected, ir.numDocs());
170
* Test WriteLineDoc and LineDocMaker.
172
public void testLineDocFile() throws Exception {
173
File lineFile = new File(System.getProperty("tempDir"), "test.reuters.lines.txt");
175
// We will call WriteLineDocs this many times
176
final int NUM_TRY_DOCS = 500;
178
// Creates a line file with first 500 docs from reuters
179
String algLines1[] = {
180
"# ----- properties ",
181
"doc.maker=org.apache.lucene.benchmark.byTask.feeds.ReutersDocMaker",
182
"doc.maker.forever=false",
183
"line.file.out=" + lineFile.getAbsolutePath().replace('\\', '/'),
185
"{WriteLineDoc()}:" + NUM_TRY_DOCS,
189
Benchmark benchmark = execBenchmark(algLines1);
191
// Verify we got somewhere between 1-500 lines (some
192
// Reuters docs have no body, which WriteLineDoc task
194
BufferedReader r = new BufferedReader(new FileReader(lineFile));
196
while(r.readLine() != null)
199
assertTrue("did not see the right number of docs; should be > 0 and <= " + NUM_TRY_DOCS + " but was " + numLines, numLines > 0 && numLines <= NUM_TRY_DOCS);
201
// Index the line docs
202
String algLines2[] = {
203
"# ----- properties ",
204
"analyzer=org.apache.lucene.analysis.SimpleAnalyzer",
205
"doc.maker=org.apache.lucene.benchmark.byTask.feeds.LineDocMaker",
206
"docs.file=" + lineFile.getAbsolutePath().replace('\\', '/'),
207
"doc.maker.forever=false",
218
benchmark = execBenchmark(algLines2);
220
// now we should be able to open the index for write.
221
IndexWriter iw = new IndexWriter(benchmark.getRunData().getDirectory(),null,false);
224
IndexReader ir = IndexReader.open(benchmark.getRunData().getDirectory());
225
assertEquals(numLines + " lines were were created but " + ir.numDocs() + " docs are in the index", numLines, ir.numDocs());
232
* Test ReadTokensTask
234
public void testReadTokens() throws Exception {
236
// We will call ReadTokens on this many docs
237
final int NUM_DOCS = 100;
239
// Read tokens from first NUM_DOCS docs from Reuters and
240
// then build index from the same docs
241
String algLines1[] = {
242
"# ----- properties ",
243
"analyzer=org.apache.lucene.analysis.WhitespaceAnalyzer",
244
"doc.maker=org.apache.lucene.benchmark.byTask.feeds.ReutersDocMaker",
246
"{ReadTokens}: " + NUM_DOCS,
249
"{AddDoc}: " + NUM_DOCS,
254
Benchmark benchmark = execBenchmark(algLines1);
256
List stats = benchmark.getRunData().getPoints().taskStats();
258
// Count how many tokens all ReadTokens saw
259
int totalTokenCount1 = 0;
260
for (Iterator it = stats.iterator(); it.hasNext();) {
261
TaskStats stat = (TaskStats) it.next();
262
if (stat.getTask().getName().equals("ReadTokens")) {
263
totalTokenCount1 += stat.getCount();
267
// Separately count how many tokens are actually in the index:
268
IndexReader reader = IndexReader.open(benchmark.getRunData().getDirectory());
269
assertEquals(NUM_DOCS, reader.numDocs());
271
TermEnum terms = reader.terms();
272
TermDocs termDocs = reader.termDocs();
273
int totalTokenCount2 = 0;
274
while(terms.next()) {
275
termDocs.seek(terms.term());
276
while(termDocs.next())
277
totalTokenCount2 += termDocs.freq();
281
// Make sure they are the same
282
assertEquals(totalTokenCount1, totalTokenCount2);
286
* Test that " {[AddDoc(4000)]: 4} : * " works corrcetly (for LUCENE-941)
288
public void testParallelExhausted() throws Exception {
289
// 1. alg definition (required in every "logic" test)
290
String algLines[] = {
291
"# ----- properties ",
292
"doc.maker="+Reuters20DocMaker.class.getName(),
293
"doc.add.log.step=3",
294
"doc.term.vector=false",
295
"doc.maker.forever=false",
296
"directory=RAMDirectory",
298
"doc.tokenized=false",
302
"{ [ AddDoc]: 4} : * ",
304
"{ [ AddDoc]: 4} : * ",
308
// 2. execute the algorithm (required in every "logic" test)
309
Benchmark benchmark = execBenchmark(algLines);
311
// 3. test number of docs in the index
312
IndexReader ir = IndexReader.open(benchmark.getRunData().getDirectory());
313
int ndocsExpected = 2 * 20; // Reuters20DocMaker exhausts after 20 docs.
314
assertEquals("wrong number of docs in the index!", ndocsExpected, ir.numDocs());
318
// create the benchmark and execute it.
319
public static Benchmark execBenchmark(String[] algLines) throws Exception {
320
String algText = algLinesToText(algLines);
321
logTstLogic(algText);
322
Benchmark benchmark = new Benchmark(new StringReader(algText));
327
// catenate alg lines to make the alg text
328
private static String algLinesToText(String[] algLines) {
330
StringBuffer sb = new StringBuffer();
331
for (int i = 0; i < propLines.length; i++) {
332
sb.append(indent).append(propLines[i]).append(NEW_LINE);
334
for (int i = 0; i < algLines.length; i++) {
335
sb.append(indent).append(algLines[i]).append(NEW_LINE);
337
return sb.toString();
340
private static void logTstLogic (String txt) {
343
System.out.println("Test logic of:");
344
System.out.println(txt);
347
/** use reuters and the exhaust mechanism, but to be faster, add 20 docs only... */
348
public static class Reuters20DocMaker extends ReutersDocMaker {
350
protected synchronized DocData getNextDocData() throws Exception {
351
if (nDocs>=20 && !forever) {
352
throw new NoMoreDataException();
355
return super.getNextDocData();
357
public synchronized void resetInputs() {
364
* Test that exhaust in loop works as expected (LUCENE-1115).
366
public void testExhaustedLooped() throws Exception {
367
// 1. alg definition (required in every "logic" test)
368
String algLines[] = {
369
"# ----- properties ",
370
"doc.maker="+Reuters20DocMaker.class.getName(),
371
"doc.add.log.step=3",
372
"doc.term.vector=false",
373
"doc.maker.forever=false",
374
"directory=RAMDirectory",
376
"doc.tokenized=false",
382
" { \"AddDocs\" AddDoc > : * ",
387
// 2. execute the algorithm (required in every "logic" test)
388
Benchmark benchmark = execBenchmark(algLines);
390
// 3. test number of docs in the index
391
IndexReader ir = IndexReader.open(benchmark.getRunData().getDirectory());
392
int ndocsExpected = 20; // Reuters20DocMaker exhausts after 20 docs.
393
assertEquals("wrong number of docs in the index!", ndocsExpected, ir.numDocs());