1
package org.apache.lucene.benchmark.byTask.tasks;
4
* Licensed to the Apache Software Foundation (ASF) under one or more
5
* contributor license agreements. See the NOTICE file distributed with
6
* this work for additional information regarding copyright ownership.
7
* The ASF licenses this file to You under the Apache License, Version 2.0
8
* (the "License"); you may not use this file except in compliance with
9
* the License. You may obtain a copy of the License at
11
* http://www.apache.org/licenses/LICENSE-2.0
13
* Unless required by applicable law or agreed to in writing, software
14
* distributed under the License is distributed on an "AS IS" BASIS,
15
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16
* See the License for the specific language governing permissions and
17
* limitations under the License.
20
import java.io.IOException;
21
import java.util.Collection;
22
import java.util.HashSet;
23
import java.util.List;
26
import org.apache.lucene.analysis.Analyzer;
27
import org.apache.lucene.benchmark.byTask.PerfRunData;
28
import org.apache.lucene.benchmark.byTask.feeds.QueryMaker;
29
import org.apache.lucene.document.Document;
30
import org.apache.lucene.document.Fieldable;
31
import org.apache.lucene.index.IndexReader;
32
import org.apache.lucene.search.Collector;
33
import org.apache.lucene.search.IndexSearcher;
34
import org.apache.lucene.search.Query;
35
import org.apache.lucene.search.ScoreDoc;
36
import org.apache.lucene.search.Sort;
37
import org.apache.lucene.search.TopDocs;
38
import org.apache.lucene.search.TopFieldCollector;
39
import org.apache.lucene.search.TopScoreDocCollector;
40
import org.apache.lucene.search.Weight;
41
import org.apache.lucene.store.Directory;
45
* Read index (abstract) task.
46
* Sub classes implement withSearch(), withWarm(), withTraverse() and withRetrieve()
47
* methods to configure the actual action.
49
* <p>Note: All ReadTasks reuse the reader if it is already open.
50
* Otherwise a reader is opened at start and closed at the end.
52
* The <code>search.num.hits</code> config parameter sets
53
* the top number of hits to collect during searching. If
54
* <code>print.hits.field</code> is set, then each hit is
55
* printed along with the value of that field.</p>
57
* <p>Other side effects: none.
59
public abstract class ReadTask extends PerfTask {
61
private final QueryMaker queryMaker;
63
public ReadTask(PerfRunData runData) {
66
queryMaker = getQueryMaker();
72
public int doLogic() throws Exception {
75
// open reader or use existing one
76
IndexSearcher searcher = getRunData().getIndexSearcher();
80
final boolean closeSearcher;
81
if (searcher == null) {
82
// open our own reader
83
Directory dir = getRunData().getDirectory();
84
reader = IndexReader.open(dir, true);
85
searcher = new IndexSearcher(reader);
88
// use existing one; this passes +1 ref to us
89
reader = searcher.getIndexReader();
90
closeSearcher = false;
93
// optionally warm and add num docs traversed to count
96
for (int m = 0; m < reader.maxDoc(); m++) {
97
if (!reader.isDeleted(m)) {
98
doc = reader.document(m);
99
res += (doc == null ? 0 : 1);
106
Query q = queryMaker.makeQuery();
107
Sort sort = getSort();
109
final int numHits = numHits();
111
if (withCollector() == false) {
113
Weight w = searcher.createNormalizedWeight(q);
114
TopFieldCollector collector = TopFieldCollector.create(sort, numHits,
117
!w.scoresDocsOutOfOrder());
118
searcher.search(w, null, collector);
119
hits = collector.topDocs();
121
hits = searcher.search(q, numHits);
124
Collector collector = createCollector();
125
searcher.search(q, null, collector);
126
//hits = collector.topDocs();
129
final String printHitsField = getRunData().getConfig().get("print.hits.field", null);
130
if (hits != null && printHitsField != null && printHitsField.length() > 0) {
131
System.out.println("totalHits = " + hits.totalHits);
132
System.out.println("maxDoc() = " + reader.maxDoc());
133
System.out.println("numDocs() = " + reader.numDocs());
134
for(int i=0;i<hits.scoreDocs.length;i++) {
135
final int docID = hits.scoreDocs[i].doc;
136
final Document doc = reader.document(docID);
137
System.out.println(" " + i + ": doc=" + docID + " score=" + hits.scoreDocs[i].score + " " + printHitsField + " =" + doc.get(printHitsField));
141
if (withTraverse()) {
142
final ScoreDoc[] scoreDocs = hits.scoreDocs;
143
int traversalSize = Math.min(scoreDocs.length, traversalSize());
145
if (traversalSize > 0) {
146
boolean retrieve = withRetrieve();
147
int numHighlight = Math.min(numToHighlight(), scoreDocs.length);
148
Analyzer analyzer = getRunData().getAnalyzer();
149
BenchmarkHighlighter highlighter = null;
150
if (numHighlight > 0) {
151
highlighter = getBenchmarkHighlighter(q);
153
for (int m = 0; m < traversalSize; m++) {
154
int id = scoreDocs[m].doc;
157
Document document = retrieveDoc(reader, id);
158
res += document != null ? 1 : 0;
159
if (numHighlight > 0 && m < numHighlight) {
160
Collection<String> fieldsToHighlight = getFieldsToHighlight(document);
161
for (final String field : fieldsToHighlight) {
162
String text = document.get(field);
163
res += highlighter.doHighlight(reader, id, field, document, analyzer, text);
177
// Release our +1 ref from above
183
protected Collector createCollector() throws Exception {
184
return TopScoreDocCollector.create(numHits(), true);
188
protected Document retrieveDoc(IndexReader ir, int id) throws IOException {
189
return ir.document(id);
193
* Return query maker used for this task.
195
public abstract QueryMaker getQueryMaker();
198
* Return true if search should be performed.
200
public abstract boolean withSearch();
202
public boolean withCollector(){
208
* Return true if warming should be performed.
210
public abstract boolean withWarm();
213
* Return true if, with search, results should be traversed.
215
public abstract boolean withTraverse();
217
/** Whether scores should be computed (only useful with
219
public boolean withScore() {
223
/** Whether maxScores should be computed (only useful with
225
public boolean withMaxScore() {
230
* Specify the number of hits to traverse. Tasks should override this if they want to restrict the number
231
* of hits that are traversed when {@link #withTraverse()} is true. Must be greater than 0.
233
* Read task calculates the traversal as: Math.min(hits.length(), traversalSize())
235
* @return Integer.MAX_VALUE
237
public int traversalSize() {
238
return Integer.MAX_VALUE;
241
static final int DEFAULT_SEARCH_NUM_HITS = 10;
245
public void setup() throws Exception {
247
numHits = getRunData().getConfig().get("search.num.hits", DEFAULT_SEARCH_NUM_HITS);
251
* Specify the number of hits to retrieve. Tasks should override this if they want to restrict the number
252
* of hits that are collected during searching. Must be greater than 0.
254
* @return 10 by default, or search.num.hits config if set.
256
public int numHits() {
261
* Return true if, with search & results traversing, docs should be retrieved.
263
public abstract boolean withRetrieve();
266
* Set to the number of documents to highlight.
268
* @return The number of the results to highlight. O means no docs will be highlighted.
270
public int numToHighlight() {
275
* Return an appropriate highlighter to be used with
278
protected BenchmarkHighlighter getBenchmarkHighlighter(Query q){
282
protected Sort getSort() {
287
* Define the fields to highlight. Base implementation returns all fields
288
* @param document The Document
289
* @return A Collection of Field names (Strings)
291
protected Collection<String> getFieldsToHighlight(Document document) {
292
List<Fieldable> fieldables = document.getFields();
293
Set<String> result = new HashSet<String>(fieldables.size());
294
for (final Fieldable fieldable : fieldables) {
295
result.add(fieldable.name());