1
package org.apache.lucene.facet.search;
3
import java.io.IOException;
4
import java.util.ArrayList;
7
import org.apache.lucene.analysis.MockAnalyzer;
8
import org.apache.lucene.document.Document;
9
import org.apache.lucene.document.Field;
10
import org.apache.lucene.document.Field.Index;
11
import org.apache.lucene.document.Field.Store;
12
import org.apache.lucene.document.Field.TermVector;
13
import org.apache.lucene.index.CorruptIndexException;
14
import org.apache.lucene.index.IndexReader;
15
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
16
import org.apache.lucene.index.RandomIndexWriter;
17
import org.apache.lucene.index.Term;
18
import org.apache.lucene.search.IndexSearcher;
19
import org.apache.lucene.search.Query;
20
import org.apache.lucene.search.TermQuery;
21
import org.apache.lucene.store.Directory;
22
import org.junit.Test;
24
import org.apache.lucene.util.LuceneTestCase;
25
import org.apache.lucene.facet.index.CategoryDocumentBuilder;
26
import org.apache.lucene.facet.index.params.DefaultFacetIndexingParams;
27
import org.apache.lucene.facet.search.FacetsAccumulator;
28
import org.apache.lucene.facet.search.FloatArrayAllocator;
29
import org.apache.lucene.facet.search.IntArrayAllocator;
30
import org.apache.lucene.facet.search.ScoredDocIdCollector;
31
import org.apache.lucene.facet.search.StandardFacetsAccumulator;
32
import org.apache.lucene.facet.search.params.CountFacetRequest;
33
import org.apache.lucene.facet.search.params.FacetSearchParams;
34
import org.apache.lucene.facet.search.params.FacetRequest.ResultMode;
35
import org.apache.lucene.facet.search.results.FacetResult;
36
import org.apache.lucene.facet.search.results.FacetResultNode;
37
import org.apache.lucene.facet.taxonomy.CategoryPath;
38
import org.apache.lucene.facet.taxonomy.TaxonomyWriter;
39
import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyReader;
40
import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter;
41
import org.apache.lucene.facet.util.PartitionsUtils;
44
* Licensed to the Apache Software Foundation (ASF) under one or more
45
* contributor license agreements. See the NOTICE file distributed with
46
* this work for additional information regarding copyright ownership.
47
* The ASF licenses this file to You under the Apache License, Version 2.0
48
* (the "License"); you may not use this file except in compliance with
49
* the License. You may obtain a copy of the License at
51
* http://www.apache.org/licenses/LICENSE-2.0
53
* Unless required by applicable law or agreed to in writing, software
54
* distributed under the License is distributed on an "AS IS" BASIS,
55
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
56
* See the License for the specific language governing permissions and
57
* limitations under the License.
60
public class TestTopKInEachNodeResultHandler extends LuceneTestCase {
62
//TODO (Facet): Move to extend BaseTestTopK and separate to several smaller test cases (methods) - see TestTopKResultsHandler
65
public void testSimple() throws Exception {
67
int[] partitionSizes = new int[] {
68
2,3,4, 5, 6, 7, 10, 1000,
71
for (int partitionSize : partitionSizes) {
72
Directory iDir = newDirectory();
73
Directory tDir = newDirectory();
76
System.out.println("Partition Size: " + partitionSize);
79
final int pSize = partitionSize;
80
DefaultFacetIndexingParams iParams = new DefaultFacetIndexingParams() {
82
protected int fixedPartitionSize() {
87
RandomIndexWriter iw = new RandomIndexWriter(random, iDir,
88
newIndexWriterConfig(TEST_VERSION_CURRENT,
89
new MockAnalyzer(random)).setOpenMode(OpenMode.CREATE));
90
TaxonomyWriter tw = new DirectoryTaxonomyWriter(tDir);
91
prvt_add(iParams, iw, tw, "a", "b");
92
prvt_add(iParams, iw, tw, "a", "b", "1");
93
prvt_add(iParams, iw, tw, "a", "b", "1");
94
prvt_add(iParams, iw, tw, "a", "b", "2");
95
prvt_add(iParams, iw, tw, "a", "b", "2");
96
prvt_add(iParams, iw, tw, "a", "b", "2");
97
prvt_add(iParams, iw, tw, "a", "b", "3");
98
prvt_add(iParams, iw, tw, "a", "b", "4");
99
prvt_add(iParams, iw, tw, "a", "c");
100
prvt_add(iParams, iw, tw, "a", "c");
101
prvt_add(iParams, iw, tw, "a", "c");
102
prvt_add(iParams, iw, tw, "a", "c");
103
prvt_add(iParams, iw, tw, "a", "c");
104
prvt_add(iParams, iw, tw, "a", "c", "1");
105
prvt_add(iParams, iw, tw, "a", "d");
106
prvt_add(iParams, iw, tw, "a", "e");
108
IndexReader ir = iw.getReader();
113
IndexSearcher is = newSearcher(ir);
114
DirectoryTaxonomyReader tr = new DirectoryTaxonomyReader(tDir);
116
// Get all of the documents and run the query, then do different
117
// facet counts and compare to control
118
Query q = new TermQuery(new Term("content", "alpha"));
119
ScoredDocIdCollector scoredDoc = ScoredDocIdCollector.create(is.maxDoc(), true);
121
// Collector collector = new MultiCollector(scoredDoc);
122
is.search(q, scoredDoc);
124
CountFacetRequest cfra23 = new CountFacetRequest(
125
new CategoryPath("a"), 2);
127
cfra23.setResultMode(ResultMode.PER_NODE_IN_TREE);
129
CountFacetRequest cfra22 = new CountFacetRequest(
130
new CategoryPath("a"), 2);
132
cfra22.setResultMode(ResultMode.PER_NODE_IN_TREE);
134
CountFacetRequest cfra21 = new CountFacetRequest(
135
new CategoryPath("a"), 2);
137
cfra21.setResultMode(ResultMode.PER_NODE_IN_TREE);
139
CountFacetRequest cfrb22 = new CountFacetRequest(
140
new CategoryPath("a", "b"), 2);
142
cfrb22.setResultMode(ResultMode.PER_NODE_IN_TREE);
144
CountFacetRequest cfrb23 = new CountFacetRequest(
145
new CategoryPath("a", "b"), 2);
147
cfrb23.setResultMode(ResultMode.PER_NODE_IN_TREE);
149
CountFacetRequest cfrb21 = new CountFacetRequest(
150
new CategoryPath("a", "b"), 2);
152
cfrb21.setResultMode(ResultMode.PER_NODE_IN_TREE);
154
CountFacetRequest doctor = new CountFacetRequest(
155
new CategoryPath("Doctor"), 2);
157
doctor.setResultMode(ResultMode.PER_NODE_IN_TREE);
159
CountFacetRequest cfrb20 = new CountFacetRequest(
160
new CategoryPath("a", "b"), 2);
162
cfrb20.setResultMode(ResultMode.PER_NODE_IN_TREE);
164
FacetSearchParams facetSearchParams = new FacetSearchParams(iParams);
165
facetSearchParams.addFacetRequest(cfra23);
166
facetSearchParams.addFacetRequest(cfra22);
167
facetSearchParams.addFacetRequest(cfra21);
168
facetSearchParams.addFacetRequest(cfrb23);
169
facetSearchParams.addFacetRequest(cfrb22);
170
facetSearchParams.addFacetRequest(cfrb21);
171
facetSearchParams.addFacetRequest(doctor);
172
facetSearchParams.addFacetRequest(cfrb20);
174
IntArrayAllocator iaa = new IntArrayAllocator(PartitionsUtils.partitionSize(facetSearchParams,tr), 1);
175
FloatArrayAllocator faa = new FloatArrayAllocator(PartitionsUtils.partitionSize(facetSearchParams,tr), 1);
176
FacetsAccumulator fctExtrctr = new StandardFacetsAccumulator(facetSearchParams, is.getIndexReader(), tr, iaa, faa);
177
fctExtrctr.setComplementThreshold(FacetsAccumulator.DISABLE_COMPLEMENT);
178
long start = System.currentTimeMillis();
180
List<FacetResult> facetResults = fctExtrctr.accumulate(scoredDoc.getScoredDocIDs());
182
long end = System.currentTimeMillis();
184
System.out.println("Time: " + (end - start));
187
FacetResult fr = facetResults.get(0); // a, depth=3, K=2
188
boolean hasDoctor = "Doctor".equals(fr.getFacetRequest().getCategoryPath().getComponent(0));
189
assertEquals(9, fr.getNumValidDescendants());
190
FacetResultNode parentRes = fr.getFacetResultNode();
191
assertEquals(16.0, parentRes.getValue(), Double.MIN_VALUE);
192
assertEquals(2.0, parentRes.getResidue(), Double.MIN_VALUE);
193
assertEquals(2, parentRes.getNumSubResults());
194
// two nodes sorted by descending values: a/b with 8 and a/c with 6
195
// a/b has residue 2 and two children a/b/2 with value 3, and a/b/1 with value 2.
196
// a/c has residue 0, and one child a/c/1 with value 1.
197
double [] expectedValues0 = { 8.0, 2.0, 3.0, 0.0, 2.0, 0.0, 6.0, 0.0, 1.0, 0.0 };
199
for (FacetResultNode node : parentRes.getSubResults()) {
200
assertEquals(expectedValues0[i++], node.getValue(), Double.MIN_VALUE);
201
assertEquals(expectedValues0[i++], node.getResidue(), Double.MIN_VALUE);
202
for (FacetResultNode node2 : node.getSubResults()) {
203
assertEquals(expectedValues0[i++], node2.getValue(), Double.MIN_VALUE);
204
assertEquals(expectedValues0[i++], node2.getResidue(), Double.MIN_VALUE);
208
// now just change the value of the first child of the root to 5, and then rearrange
209
// expected are: first a/c of value 6 and residue 0, and one child a/c/1 with value 1
210
// then a/b with value 5 and residue 2, and both children: a/b/2 with value 3, and a/b/1 with value 2.
211
for (FacetResultNode node : parentRes.getSubResults()) {
216
double [] expectedValues00 = { 6.0, 0.0, 1.0, 0.0, 5.0, 2.0, 3.0, 0.0, 2.0, 0.0 };
217
fr = cfra23.createFacetResultsHandler(tr).rearrangeFacetResult(fr);
219
for (FacetResultNode node : parentRes.getSubResults()) {
220
assertEquals(expectedValues00[i++], node.getValue(), Double.MIN_VALUE);
221
assertEquals(expectedValues00[i++], node.getResidue(), Double.MIN_VALUE);
222
for (FacetResultNode node2 : node.getSubResults()) {
223
assertEquals(expectedValues00[i++], node2.getValue(), Double.MIN_VALUE);
224
assertEquals(expectedValues00[i++], node2.getResidue(), Double.MIN_VALUE);
228
fr = facetResults.get(1); // a, depth=2, K=2. same result as before
229
hasDoctor |= "Doctor".equals(fr.getFacetRequest().getCategoryPath().getComponent(0));
230
assertEquals(9, fr.getNumValidDescendants());
231
parentRes = fr.getFacetResultNode();
232
assertEquals(16.0, parentRes.getValue(), Double.MIN_VALUE);
233
assertEquals(2.0, parentRes.getResidue(), Double.MIN_VALUE);
234
assertEquals(2, parentRes.getNumSubResults());
235
// two nodes sorted by descending values: a/b with 8 and a/c with 6
236
// a/b has residue 2 and two children a/b/2 with value 3, and a/b/1 with value 2.
237
// a/c has residue 0, and one child a/c/1 with value 1.
239
for (FacetResultNode node : parentRes.getSubResults()) {
240
assertEquals(expectedValues0[i++], node.getValue(), Double.MIN_VALUE);
241
assertEquals(expectedValues0[i++], node.getResidue(), Double.MIN_VALUE);
242
for (FacetResultNode node2 : node.getSubResults()) {
243
assertEquals(expectedValues0[i++], node2.getValue(), Double.MIN_VALUE);
244
assertEquals(expectedValues0[i++], node2.getResidue(), Double.MIN_VALUE);
248
fr = facetResults.get(2); // a, depth=1, K=2
249
hasDoctor |= "Doctor".equals(fr.getFacetRequest().getCategoryPath().getComponent(0));
250
assertEquals(4, fr.getNumValidDescendants(), 4);
251
parentRes = fr.getFacetResultNode();
252
assertEquals(16.0, parentRes.getValue(), Double.MIN_VALUE);
253
assertEquals(2.0, parentRes.getResidue(), Double.MIN_VALUE);
254
assertEquals(2, parentRes.getNumSubResults());
255
// two nodes sorted by descending values:
256
// a/b with value 8 and residue 0 (because no children considered),
257
// and a/c with value 6 and residue 0 (because no children considered)
258
double [] expectedValues2 = { 8.0, 0.0, 6.0, 0.0 };
260
for (FacetResultNode node : parentRes.getSubResults()) {
261
assertEquals(expectedValues2[i++], node.getValue(), Double.MIN_VALUE);
262
assertEquals(expectedValues2[i++], node.getResidue(), Double.MIN_VALUE);
263
assertEquals(node.getNumSubResults(), 0);
266
fr = facetResults.get(3); // a/b, depth=3, K=2
267
hasDoctor |= "Doctor".equals(fr.getFacetRequest().getCategoryPath().getComponent(0));
268
assertEquals(4, fr.getNumValidDescendants());
269
parentRes = fr.getFacetResultNode();
270
assertEquals(8.0, parentRes.getValue(), Double.MIN_VALUE);
271
assertEquals(2.0, parentRes.getResidue(), Double.MIN_VALUE);
272
assertEquals(2, parentRes.getNumSubResults());
273
double [] expectedValues3 = { 3.0, 2.0 };
275
for (FacetResultNode node : parentRes.getSubResults()) {
276
assertEquals(expectedValues3[i++], node.getValue(), Double.MIN_VALUE);
277
assertEquals(0.0, node.getResidue(), Double.MIN_VALUE);
278
assertEquals(0, node.getNumSubResults());
281
fr = facetResults.get(4); // a/b, depth=2, K=2
282
hasDoctor |= "Doctor".equals(fr.getFacetRequest().getCategoryPath().getComponent(0));
283
assertEquals(4, fr.getNumValidDescendants());
284
parentRes = fr.getFacetResultNode();
285
assertEquals(8.0, parentRes.getValue(), Double.MIN_VALUE);
286
assertEquals(2.0, parentRes.getResidue(), Double.MIN_VALUE);
287
assertEquals(2, parentRes.getNumSubResults());
289
for (FacetResultNode node : parentRes.getSubResults()) {
290
assertEquals(expectedValues3[i++], node.getValue(), Double.MIN_VALUE);
291
assertEquals(0.0, node.getResidue(), Double.MIN_VALUE);
292
assertEquals(0, node.getNumSubResults());
295
fr = facetResults.get(5); // a/b, depth=1, K=2
296
hasDoctor |= "Doctor".equals(fr.getFacetRequest().getCategoryPath().getComponent(0));
297
assertEquals(4, fr.getNumValidDescendants());
298
parentRes = fr.getFacetResultNode();
299
assertEquals(8.0, parentRes.getValue(), Double.MIN_VALUE);
300
assertEquals(2.0, parentRes.getResidue(), Double.MIN_VALUE);
301
assertEquals(2, parentRes.getNumSubResults());
303
for (FacetResultNode node : parentRes.getSubResults()) {
304
assertEquals(expectedValues3[i++], node.getValue(), Double.MIN_VALUE);
305
assertEquals(0.0, node.getResidue(), Double.MIN_VALUE);
306
assertEquals(0, node.getNumSubResults());
309
fr = facetResults.get(6); // a/b, depth=0, K=2
310
hasDoctor |= "Doctor".equals(fr.getFacetRequest().getCategoryPath().getComponent(0));
311
assertEquals(0, fr.getNumValidDescendants()); // 0 descendants but rootnode
312
parentRes = fr.getFacetResultNode();
313
assertEquals(8.0, parentRes.getValue(), Double.MIN_VALUE);
314
assertEquals(0.0, parentRes.getResidue(), Double.MIN_VALUE);
315
assertEquals(0, parentRes.getNumSubResults());
316
hasDoctor |= "Doctor".equals(fr.getFacetRequest().getCategoryPath().getComponent(0));
318
// doctor, depth=1, K=2
319
assertFalse("Shouldn't have found anything for a FacetRequest " +
320
"of a facet that doesn't exist in the index.", hasDoctor);
321
assertEquals("Shouldn't have found more than seven request.", 7, facetResults.size());
330
private void prvt_add(DefaultFacetIndexingParams iParams, RandomIndexWriter iw,
331
TaxonomyWriter tw, String... strings) throws IOException,
332
CorruptIndexException {
333
ArrayList<CategoryPath> cps = new ArrayList<CategoryPath>();
334
CategoryPath cp = new CategoryPath(strings);
336
Document d = new Document();
337
new CategoryDocumentBuilder(tw, iParams).setCategoryPaths(cps).build(d);
338
d.add(new Field("content", "alpha", Store.YES, Index.ANALYZED, TermVector.NO));