1
package org.apache.lucene.facet;
4
import java.io.IOException;
5
import java.util.ArrayList;
6
import java.util.Arrays;
7
import java.util.HashMap;
8
import java.util.HashSet;
12
import org.apache.lucene.analysis.Analyzer;
13
import org.apache.lucene.analysis.MockAnalyzer;
14
import org.apache.lucene.analysis.MockTokenizer;
15
import org.apache.lucene.document.Document;
16
import org.apache.lucene.document.Field;
17
import org.apache.lucene.document.Field.Index;
18
import org.apache.lucene.document.Field.Store;
19
import org.apache.lucene.document.Field.TermVector;
20
import org.apache.lucene.index.CorruptIndexException;
21
import org.apache.lucene.index.IndexReader;
22
import org.apache.lucene.index.IndexWriterConfig;
23
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
24
import org.apache.lucene.index.RandomIndexWriter;
25
import org.apache.lucene.index.Term;
26
import org.apache.lucene.index.TermDocs;
27
import org.apache.lucene.index.TermEnum;
28
import org.apache.lucene.search.IndexSearcher;
29
import org.apache.lucene.store.Directory;
31
import org.apache.lucene.util.IOUtils;
32
import org.apache.lucene.util.LuceneTestCase;
33
import org.apache.lucene.util._TestUtil;
34
import org.apache.lucene.facet.index.CategoryDocumentBuilder;
35
import org.apache.lucene.facet.index.params.CategoryListParams;
36
import org.apache.lucene.facet.index.params.DefaultFacetIndexingParams;
37
import org.apache.lucene.facet.index.params.FacetIndexingParams;
38
import org.apache.lucene.facet.search.params.FacetRequest;
39
import org.apache.lucene.facet.search.params.FacetSearchParams;
40
import org.apache.lucene.facet.search.results.FacetResult;
41
import org.apache.lucene.facet.search.results.FacetResultNode;
42
import org.apache.lucene.facet.taxonomy.CategoryPath;
43
import org.apache.lucene.facet.taxonomy.TaxonomyReader;
44
import org.apache.lucene.facet.taxonomy.TaxonomyWriter;
45
import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyReader;
46
import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter;
47
import org.junit.AfterClass;
48
import org.junit.BeforeClass;
51
* Licensed to the Apache Software Foundation (ASF) under one or more
52
* contributor license agreements. See the NOTICE file distributed with
53
* this work for additional information regarding copyright ownership.
54
* The ASF licenses this file to You under the Apache License, Version 2.0
55
* (the "License"); you may not use this file except in compliance with
56
* the License. You may obtain a copy of the License at
58
* http://www.apache.org/licenses/LICENSE-2.0
60
* Unless required by applicable law or agreed to in writing, software
61
* distributed under the License is distributed on an "AS IS" BASIS,
62
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
63
* See the License for the specific language governing permissions and
64
* limitations under the License.
67
/** Base faceted search test. */
68
public abstract class FacetTestBase extends LuceneTestCase {
70
/** Holds a search and taxonomy Directories pair. */
71
private static final class SearchTaxoDirPair {
72
Directory searchDir, taxoDir;
73
SearchTaxoDirPair() {}
76
private static HashMap<Integer, SearchTaxoDirPair> dirsPerPartitionSize;
77
private static File TEST_DIR;
79
/** Documents text field. */
80
protected static final String CONTENT_FIELD = "content";
82
/** taxonomy Reader for the test. */
83
protected TaxonomyReader taxoReader;
85
/** Index Reader for the test. */
86
protected IndexReader indexReader;
88
/** Searcher for the test. */
89
protected IndexSearcher searcher;
92
public static void beforeClassFacetTestBase() throws Exception {
93
TEST_DIR = _TestUtil.getTempDir("facets");
94
dirsPerPartitionSize = new HashMap<Integer, FacetTestBase.SearchTaxoDirPair>();
98
public static void afterClassFacetTestBase() throws Exception {
99
for (SearchTaxoDirPair pair : dirsPerPartitionSize.values()) {
100
IOUtils.close(pair.searchDir, pair.taxoDir);
104
/** documents text (for the text field). */
105
private static final String[] DEFAULT_CONTENT = {
106
"the white car is the one I want.",
107
"the white dog does not belong to anyone.",
110
/** Facets: facets[D][F] == category-path no. F for document no. D. */
111
private static final CategoryPath[][] DEFAULT_CATEGORIES = {
112
{ new CategoryPath("root","a","f1"), new CategoryPath("root","a","f2") },
113
{ new CategoryPath("root","a","f1"), new CategoryPath("root","a","f3") },
116
/** categories to be added to specified doc */
117
protected List<CategoryPath> getCategories(int doc) {
118
return Arrays.asList(DEFAULT_CATEGORIES[doc]);
121
/** Number of documents to index */
122
protected int numDocsToIndex() {
123
return DEFAULT_CONTENT.length;
126
/** content to be added to specified doc */
127
protected String getContent(int doc) {
128
return DEFAULT_CONTENT[doc];
131
/** Prepare index (in RAM) with single partition */
132
protected final void initIndex() throws Exception {
133
initIndex(Integer.MAX_VALUE);
136
/** Prepare index (in RAM) with some documents and some facets */
137
protected final void initIndex(int partitionSize) throws Exception {
138
initIndex(partitionSize, false);
141
/** Prepare index (in RAM/Disk) with some documents and some facets */
142
protected final void initIndex(int partitionSize, boolean forceDisk) throws Exception {
144
System.out.println("Partition Size: " + partitionSize+" forceDisk: "+forceDisk);
147
SearchTaxoDirPair pair = dirsPerPartitionSize.get(Integer.valueOf(partitionSize));
149
pair = new SearchTaxoDirPair();
151
pair.searchDir = newFSDirectory(new File(TEST_DIR, "index"));
152
pair.taxoDir = newFSDirectory(new File(TEST_DIR, "taxo"));
154
pair.searchDir = newDirectory();
155
pair.taxoDir = newDirectory();
158
RandomIndexWriter iw = new RandomIndexWriter(random, pair.searchDir, getIndexWriterConfig(getAnalyzer()));
159
TaxonomyWriter taxo = new DirectoryTaxonomyWriter(pair.taxoDir, OpenMode.CREATE);
161
populateIndex(iw, taxo, getFacetIndexingParams(partitionSize));
163
// commit changes (taxonomy prior to search index for consistency)
169
dirsPerPartitionSize.put(Integer.valueOf(partitionSize), pair);
172
// prepare for searching
173
taxoReader = new DirectoryTaxonomyReader(pair.taxoDir);
174
indexReader = IndexReader.open(pair.searchDir);
175
searcher = newSearcher(indexReader);
178
/** Returns indexing params for the main index */
179
protected IndexWriterConfig getIndexWriterConfig(Analyzer analyzer) {
180
return newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer);
183
/** Returns a default facet indexing params */
184
protected FacetIndexingParams getFacetIndexingParams(final int partSize) {
185
return new DefaultFacetIndexingParams() {
187
protected int fixedPartitionSize() {
194
* Faceted Search Params for the test.
195
* Sub classes should override in order to test with different faceted search params.
197
protected FacetSearchParams getFacetedSearchParams() {
198
return getFacetedSearchParams(Integer.MAX_VALUE);
202
* Faceted Search Params with specified partition size.
203
* @see #getFacetedSearchParams()
205
protected FacetSearchParams getFacetedSearchParams(int partitionSize) {
206
FacetSearchParams res = new FacetSearchParams(getFacetIndexingParams(partitionSize));
211
* Populate the test index+taxonomy for this test.
212
* <p>Subclasses can override this to test different scenarios
214
protected void populateIndex(RandomIndexWriter iw, TaxonomyWriter taxo, FacetIndexingParams iParams)
215
throws IOException, CorruptIndexException {
216
// add test documents
217
int numDocsToIndex = numDocsToIndex();
218
for (int doc=0; doc<numDocsToIndex; doc++) {
219
indexDoc(iParams, iw, taxo, getContent(doc), getCategories(doc));
222
// also add a document that would be deleted, so that all tests are also working against deletions in the index
223
String content4del = "ContentOfDocToDelete";
224
indexDoc(iParams, iw, taxo, content4del, getCategories(0));
225
iw.commit(); // commit it
226
iw.deleteDocuments(new Term(CONTENT_FIELD,content4del)); // now delete the committed doc
229
/** Close all indexes */
230
protected void closeAll() throws Exception {
231
// close and nullify everything
232
IOUtils.close(taxoReader, indexReader, searcher);
239
* Analyzer to use for the test.
240
* Sub classes should override in order to test with different analyzer.
242
protected Analyzer getAnalyzer() {
243
return new MockAnalyzer(random, MockTokenizer.WHITESPACE, false);
246
/** convenience method: convert sub results to an array */
247
protected static FacetResultNode[] resultNodesAsArray(FacetResultNode parentRes) {
248
ArrayList<FacetResultNode> a = new ArrayList<FacetResultNode>();
249
for (FacetResultNode frn : parentRes.getSubResults()) {
252
return a.toArray(new FacetResultNode[0]);
255
/** utility Create a dummy document with specified categories and content */
256
protected final void indexDoc(FacetIndexingParams iParams, RandomIndexWriter iw,
257
TaxonomyWriter tw, String content, List<CategoryPath> categories) throws IOException,
258
CorruptIndexException {
259
Document d = new Document();
260
CategoryDocumentBuilder builder = new CategoryDocumentBuilder(tw, iParams);
261
builder.setCategoryPaths(categories);
263
d.add(new Field("content", content, Store.YES, Index.ANALYZED, TermVector.NO));
267
/** Build the "truth" with ALL the facets enumerating indexes content. */
268
protected Map<CategoryPath, Integer> facetCountsTruth() throws IOException {
269
FacetIndexingParams iParams = getFacetIndexingParams(Integer.MAX_VALUE);
270
String delim = String.valueOf(iParams.getFacetDelimChar());
271
Map<CategoryPath, Integer> res = new HashMap<CategoryPath, Integer>();
272
HashSet<Term> handledTerms = new HashSet<Term>();
273
for (CategoryListParams clp : iParams.getAllCategoryListParams()) {
274
Term baseTerm = clp.getTerm().createTerm("");
275
if (!handledTerms.add(baseTerm)) {
276
continue; // already handled this term (for another list)
278
TermEnum te = indexReader.terms(baseTerm);
281
if (!t.field().equals(baseTerm.field())) {
282
break; // hit a different field
284
TermDocs tp = indexReader.termDocs(t);
287
if (!indexReader.isDeleted(tp.doc())) { // ignore deleted docs
291
res.put(new CategoryPath(t.text().split(delim)), cnt);
297
/** Validate counts for returned facets, and that there are not too many results */
298
protected static void assertCountsAndCardinality(Map<CategoryPath, Integer> facetCountsTruth, List<FacetResult> facetResults) throws Exception {
299
for (FacetResult fr : facetResults) {
300
FacetResultNode topResNode = fr.getFacetResultNode();
301
FacetRequest freq = fr.getFacetRequest();
303
System.out.println(freq.getCategoryPath().toString()+ "\t\t" + topResNode);
305
assertCountsAndCardinality(facetCountsTruth, topResNode, freq.getNumResults());
309
/** Validate counts for returned facets, and that there are not too many results */
310
private static void assertCountsAndCardinality(Map<CategoryPath,Integer> facetCountsTruth, FacetResultNode resNode, int reqNumResults) throws Exception {
311
int actualNumResults = resNode.getNumSubResults();
313
System.out.println("NumResults: " + actualNumResults);
315
assertTrue("Too many results!", actualNumResults <= reqNumResults);
316
for (FacetResultNode subRes : resNode.getSubResults()) {
317
assertEquals("wrong count for: "+subRes, facetCountsTruth.get(subRes.getLabel()).intValue(), (int)subRes.getValue());
318
assertCountsAndCardinality(facetCountsTruth, subRes, reqNumResults); // recurse into child results
322
/** Validate results equality */
323
protected static void assertSameResults(List<FacetResult> expected,
324
List<FacetResult> actual) {
325
String expectedResults = resStringValueOnly(expected);
326
String actualResults = resStringValueOnly(actual);
327
if (!expectedResults.equals(actualResults)) {
328
System.err.println("Results are not the same!");
329
System.err.println("Expected:\n" + expectedResults);
330
System.err.println("Actual" + actualResults);
331
throw new NotSameResultError();
335
/** exclude the residue and numDecendants because it is incorrect in sampling */
336
private static final String resStringValueOnly(List<FacetResult> results) {
337
StringBuilder sb = new StringBuilder();
338
for (FacetResult facetRes : results) {
339
sb.append(facetRes.toString()).append('\n');
341
return sb.toString().replaceAll("Residue:.*.0", "").replaceAll("Num valid Descendants.*", "");
344
/** Special Error class for ability to ignore only this error and retry... */
345
public static class NotSameResultError extends Error {
346
public NotSameResultError() {
347
super("Results are not the same!");