1
package org.apache.lucene.facet.taxonomy;
3
import java.io.IOException;
4
import java.io.PrintWriter;
5
import java.io.StringWriter;
6
import java.util.ArrayList;
7
import java.util.Arrays;
9
import org.apache.lucene.store.Directory;
10
import org.apache.lucene.store.LockObtainFailedException;
11
import org.apache.lucene.store.RAMDirectory;
12
import org.junit.Ignore;
13
import org.junit.Test;
15
import org.apache.lucene.util.LuceneTestCase;
16
import org.apache.lucene.facet.taxonomy.TaxonomyReader.ChildrenArrays;
17
import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyReader;
18
import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter;
19
import org.apache.lucene.util.SlowRAMDirectory;
22
* Licensed to the Apache Software Foundation (ASF) under one or more
23
* contributor license agreements. See the NOTICE file distributed with
24
* this work for additional information regarding copyright ownership.
25
* The ASF licenses this file to You under the Apache License, Version 2.0
26
* (the "License"); you may not use this file except in compliance with
27
* the License. You may obtain a copy of the License at
29
* http://www.apache.org/licenses/LICENSE-2.0
31
* Unless required by applicable law or agreed to in writing, software
32
* distributed under the License is distributed on an "AS IS" BASIS,
33
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
34
* See the License for the specific language governing permissions and
35
* limitations under the License.
38
public class TestTaxonomyCombined extends LuceneTestCase {
40
/** The following categories will be added to the taxonomy by
41
fillTaxonomy(), and tested by all tests below:
43
private final static String[][] categories = {
44
{ "Author", "Tom Clancy" },
45
{ "Author", "Richard Dawkins" },
46
{ "Author", "Richard Adams" },
47
{ "Price", "10", "11" },
48
{ "Price", "10", "12" },
49
{ "Price", "20", "27" },
50
{ "Date", "2006", "05" },
53
{ "Subject", "Nonfiction", "Children", "Animals" },
54
{ "Author", "Stephen Jay Gould" },
55
{ "Author", "\u05e0\u05d3\u05d1\u3042\u0628" },
58
/** When adding the above categories with TaxonomyWriter.addCategory(),
59
the following paths are expected to be returned:
60
(note that currently the full path is not returned, and therefore
61
not tested - rather, just the last component, the ordinal, is returned
64
private final static int[][] expectedPaths = {
79
/** The taxonomy index is expected to then contain the following
80
generated categories, with increasing ordinals (note how parent
81
categories are be added automatically when subcategories are added).
83
private final static String[][] expectedCategories = {
84
{ }, // the root category
86
{ "Author", "Tom Clancy" },
87
{ "Author", "Richard Dawkins" },
88
{ "Author", "Richard Adams" },
91
{ "Price", "10", "11" },
92
{ "Price", "10", "12" },
94
{ "Price", "20", "27" },
97
{ "Date", "2006", "05" },
100
{ "Subject", "Nonfiction" },
101
{ "Subject", "Nonfiction", "Children" },
102
{ "Subject", "Nonfiction", "Children", "Animals" },
103
{ "Author", "Stephen Jay Gould" },
104
{ "Author", "\u05e0\u05d3\u05d1\u3042\u0628" },
107
/** fillTaxonomy adds the categories in the categories[] array, and asserts
108
that the additions return exactly the ordinals (in the past - paths)
109
specified in expectedPaths[].
110
Note that this assumes that fillTaxonomy() is called on an empty taxonomy
111
index. Calling it after something else was already added to the taxonomy
112
index will surely have this method fail.
114
public static void fillTaxonomy(TaxonomyWriter tw) throws IOException {
115
for (int i = 0; i < categories.length; i++) {
116
int ordinal = tw.addCategory(new CategoryPath(categories[i]));
117
int expectedOrdinal = expectedPaths[i][expectedPaths[i].length-1];
118
if (ordinal!=expectedOrdinal) {
119
fail("For category "+showcat(categories[i])+" expected ordinal "+
120
expectedOrdinal+", but got "+ordinal);
125
public static String showcat(String[] path) {
129
if (path.length==0) {
132
if (path.length==1 && path[0].length()==0) {
135
StringBuilder sb = new StringBuilder(path[0]);
136
for (int i=1; i<path.length; i++) {
140
return sb.toString();
143
private String showcat(CategoryPath path) {
147
if (path.length()==0) {
150
return "<"+path.toString('/')+">";
153
/** Basic tests for TaxonomyWriter. Basically, we test that
154
IndexWriter.addCategory works, i.e. returns the expected ordinals
155
(this is tested by calling the fillTaxonomy() method above).
156
We do not test here that after writing the index can be read -
157
this will be done in more tests below.
160
public void testWriter() throws Exception {
161
Directory indexDir = newDirectory();
162
TaxonomyWriter tw = new DirectoryTaxonomyWriter(indexDir);
164
// Also check TaxonomyWriter.getSize() - see that the taxonomy's size
165
// is what we expect it to be.
166
assertEquals(expectedCategories.length, tw.getSize());
171
/** testWriterTwice is exactly like testWriter, except that after adding
172
all the categories, we add them again, and see that we get the same
173
old ids again - not new categories.
176
public void testWriterTwice() throws Exception {
177
Directory indexDir = newDirectory();
178
TaxonomyWriter tw = new DirectoryTaxonomyWriter(indexDir);
180
// run fillTaxonomy again - this will try to add the same categories
181
// again, and check that we see the same ordinal paths again, not
184
// Let's check the number of categories again, to see that no
185
// extraneous categories were created:
186
assertEquals(expectedCategories.length, tw.getSize());
191
/** testWriterTwice2 is similar to testWriterTwice, except that the index
192
is closed and reopened before attempting to write to it the same
193
categories again. While testWriterTwice can get along with writing
194
and reading correctly just to the cache, testWriterTwice2 checks also
195
the actual disk read part of the writer:
198
public void testWriterTwice2() throws Exception {
199
Directory indexDir = newDirectory();
200
TaxonomyWriter tw = new DirectoryTaxonomyWriter(indexDir);
203
tw = new DirectoryTaxonomyWriter(indexDir);
204
// run fillTaxonomy again - this will try to add the same categories
205
// again, and check that we see the same ordinals again, not different
206
// ones, and that the number of categories hasn't grown by the new
209
assertEquals(expectedCategories.length, tw.getSize());
215
* testWriterTwice3 is yet another test which tests creating a taxonomy
216
* in two separate writing sessions. This test used to fail because of
217
* a bug involving commit(), explained below, and now should succeed.
222
public void testWriterTwice3() throws Exception {
223
Directory indexDir = newDirectory();
224
// First, create and fill the taxonomy
225
TaxonomyWriter tw = new DirectoryTaxonomyWriter(indexDir);
228
// Now, open the same taxonomy and add the same categories again.
229
// After a few categories, the LuceneTaxonomyWriter implementation
230
// will stop looking for each category on disk, and rather read them
231
// all into memory and close it's reader. The bug was that it closed
232
// the reader, but forgot that it did (because it didn't set the reader
233
// reference to null).
234
tw = new DirectoryTaxonomyWriter(indexDir);
236
// Add one new category, just to make commit() do something:
237
tw.addCategory(new CategoryPath("hi"));
238
// Do a commit(). Here was a bug - if tw had a reader open, it should
239
// be reopened after the commit. However, in our case the reader should
240
// not be open (as explained above) but because it was not set to null,
241
// we forgot that, tried to reopen it, and got an AlreadyClosedException.
243
assertEquals(expectedCategories.length+1, tw.getSize());
248
/** Another set of tests for the writer, which don't use an array and
249
* try to distill the different cases, and therefore may be more helpful
250
* for debugging a problem than testWriter() which is hard to know why
251
* or where it failed.
254
public void testWriterSimpler() throws Exception {
255
Directory indexDir = newDirectory();
256
TaxonomyWriter tw = new DirectoryTaxonomyWriter(indexDir);
257
assertEquals(1, tw.getSize()); // the root only
258
// Test that adding a new top-level category works
259
assertEquals(1, tw.addCategory(new CategoryPath("a")));
260
assertEquals(2, tw.getSize());
261
// Test that adding the same category again is noticed, and the
262
// same ordinal (and not a new one) is returned.
263
assertEquals(1, tw.addCategory(new CategoryPath("a")));
264
assertEquals(2, tw.getSize());
265
// Test that adding another top-level category returns a new ordinal,
267
assertEquals(2, tw.addCategory(new CategoryPath("b")));
268
assertEquals(3, tw.getSize());
269
// Test that adding a category inside one of the above adds just one
271
assertEquals(3, tw.addCategory(new CategoryPath("a","c")));
272
assertEquals(4, tw.getSize());
273
// Test that adding the same second-level category doesn't do anything:
274
assertEquals(3, tw.addCategory(new CategoryPath("a","c")));
275
assertEquals(4, tw.getSize());
276
// Test that adding a second-level category with two new components
277
// indeed adds two categories
278
assertEquals(5, tw.addCategory(new CategoryPath("d","e")));
279
assertEquals(6, tw.getSize());
280
// Verify that the parents were added above in the order we expected
281
assertEquals(4, tw.addCategory(new CategoryPath("d")));
282
// Similar, but inside a category that already exists:
283
assertEquals(7, tw.addCategory(new CategoryPath("b", "d","e")));
284
assertEquals(8, tw.getSize());
285
// And now inside two levels of categories that already exist:
286
assertEquals(8, tw.addCategory(new CategoryPath("b", "d","f")));
287
assertEquals(9, tw.getSize());
293
/** Test writing an empty index, and seeing that a reader finds in it
294
the root category, and only it. We check all the methods on that
295
root category return the expected results.
298
public void testRootOnly() throws Exception {
299
Directory indexDir = newDirectory();
300
TaxonomyWriter tw = new DirectoryTaxonomyWriter(indexDir);
301
// right after opening the index, it should already contain the
302
// root, so have size 1:
303
assertEquals(1, tw.getSize());
305
TaxonomyReader tr = new DirectoryTaxonomyReader(indexDir);
306
assertEquals(1, tr.getSize());
307
assertEquals(0, tr.getPath(0).length());
308
assertEquals(TaxonomyReader.INVALID_ORDINAL, tr.getParent(0));
309
assertEquals(0, tr.getOrdinal(new CategoryPath()));
314
/** The following test is exactly the same as testRootOnly, except we
315
* do not close the writer before opening the reader. We want to see
316
* that the root is visible to the reader not only after the writer is
317
* closed, but immediately after it is created.
320
public void testRootOnly2() throws Exception {
321
Directory indexDir = newDirectory();
322
TaxonomyWriter tw = new DirectoryTaxonomyWriter(indexDir);
324
TaxonomyReader tr = new DirectoryTaxonomyReader(indexDir);
325
assertEquals(1, tr.getSize());
326
assertEquals(0, tr.getPath(0).length());
327
assertEquals(TaxonomyReader.INVALID_ORDINAL, tr.getParent(0));
328
assertEquals(0, tr.getOrdinal(new CategoryPath()));
334
/** Basic tests for TaxonomyReader's category <=> ordinal transformations
335
(getSize(), getCategory() and getOrdinal()).
336
We test that after writing the index, it can be read and all the
337
categories and ordinals are there just as we expected them to be.
340
public void testReaderBasic() throws Exception {
341
Directory indexDir = newDirectory();
342
TaxonomyWriter tw = new DirectoryTaxonomyWriter(indexDir);
345
TaxonomyReader tr = new DirectoryTaxonomyReader(indexDir);
347
// test TaxonomyReader.getSize():
348
assertEquals(expectedCategories.length, tr.getSize());
350
// test round trips of ordinal => category => ordinal
351
for (int i=0; i<tr.getSize(); i++) {
352
assertEquals(i, tr.getOrdinal(tr.getPath(i)));
355
// test TaxonomyReader.getCategory():
356
for (int i=0; i<tr.getSize(); i++) {
357
CategoryPath expectedCategory = new CategoryPath(expectedCategories[i]);
358
CategoryPath category = tr.getPath(i);
359
if (!expectedCategory.equals(category)) {
360
fail("For ordinal "+i+" expected category "+
361
showcat(expectedCategory)+", but got "+showcat(category));
364
// (also test invalid ordinals:)
365
assertNull(tr.getPath(-1));
366
assertNull(tr.getPath(tr.getSize()));
367
assertNull(tr.getPath(TaxonomyReader.INVALID_ORDINAL));
369
// test TaxonomyReader.getOrdinal():
370
for (int i=0; i<expectedCategories.length; i++) {
371
int expectedOrdinal = i;
372
int ordinal = tr.getOrdinal(new CategoryPath(expectedCategories[i]));
373
if (expectedOrdinal != ordinal) {
374
fail("For category "+showcat(expectedCategories[i])+" expected ordinal "+
375
expectedOrdinal+", but got "+ordinal);
378
// (also test invalid categories:)
379
assertEquals(TaxonomyReader.INVALID_ORDINAL, tr.getOrdinal(new CategoryPath("non-existant")));
380
assertEquals(TaxonomyReader.INVALID_ORDINAL, tr.getOrdinal(new CategoryPath("Author", "Jules Verne")));
386
/** Tests for TaxonomyReader's getParent() method.
387
We check it by comparing its results to those we could have gotten by
388
looking at the category string paths (where the parentage is obvious).
389
Note that after testReaderBasic(), we already know we can trust the
390
ordinal <=> category conversions.
392
Note: At the moment, the parent methods in the reader are deprecated,
393
but this does not mean they should not be tested! Until they are
394
removed (*if* they are removed), these tests should remain to see
395
that they still work correctly.
399
public void testReaderParent() throws Exception {
400
Directory indexDir = newDirectory();
401
TaxonomyWriter tw = new DirectoryTaxonomyWriter(indexDir);
404
TaxonomyReader tr = new DirectoryTaxonomyReader(indexDir);
406
// check that the parent of the root ordinal is the invalid ordinal:
407
assertEquals(TaxonomyReader.INVALID_ORDINAL, tr.getParent(0));
409
// check parent of non-root ordinals:
410
for (int ordinal=1; ordinal<tr.getSize(); ordinal++) {
411
CategoryPath me = tr.getPath(ordinal);
412
int parentOrdinal = tr.getParent(ordinal);
413
CategoryPath parent = tr.getPath(parentOrdinal);
415
fail("Parent of "+ordinal+" is "+parentOrdinal+
416
", but this is not a valid category.");
418
// verify that the parent is indeed my parent, according to the strings
419
if (!new CategoryPath(me, me.length()-1).equals(parent)) {
420
fail("Got parent "+parentOrdinal+" for ordinal "+ordinal+
421
" but categories are "+showcat(parent)+" and "+showcat(me)+
426
// check parent of of invalid ordinals:
429
fail("getParent for -1 should throw exception");
430
} catch (ArrayIndexOutOfBoundsException e) {
434
tr.getParent(TaxonomyReader.INVALID_ORDINAL);
435
fail("getParent for INVALID_ORDINAL should throw exception");
436
} catch (ArrayIndexOutOfBoundsException e) {
440
int parent = tr.getParent(tr.getSize());
441
fail("getParent for getSize() should throw exception, but returned "+parent);
442
} catch (ArrayIndexOutOfBoundsException e) {
451
* Tests for TaxonomyWriter's getParent() method. We check it by comparing
452
* its results to those we could have gotten by looking at the category
453
* string paths using a TaxonomyReader (where the parentage is obvious).
454
* Note that after testReaderBasic(), we already know we can trust the
455
* ordinal <=> category conversions from TaxonomyReader.
457
* The difference between testWriterParent1 and testWriterParent2 is that
458
* the former closes the taxonomy writer before reopening it, while the
461
* This test code is virtually identical to that of testReaderParent().
464
public void testWriterParent1() throws Exception {
465
Directory indexDir = newDirectory();
466
TaxonomyWriter tw = new DirectoryTaxonomyWriter(indexDir);
469
tw = new DirectoryTaxonomyWriter(indexDir);
470
TaxonomyReader tr = new DirectoryTaxonomyReader(indexDir);
472
checkWriterParent(tr, tw);
480
public void testWriterParent2() throws Exception {
481
Directory indexDir = newDirectory();
482
TaxonomyWriter tw = new DirectoryTaxonomyWriter(indexDir);
485
TaxonomyReader tr = new DirectoryTaxonomyReader(indexDir);
487
checkWriterParent(tr, tw);
494
private void checkWriterParent(TaxonomyReader tr, TaxonomyWriter tw) throws Exception {
495
// check that the parent of the root ordinal is the invalid ordinal:
496
assertEquals(TaxonomyReader.INVALID_ORDINAL, tw.getParent(0));
498
// check parent of non-root ordinals:
499
for (int ordinal = 1; ordinal < tr.getSize(); ordinal++) {
500
CategoryPath me = tr.getPath(ordinal);
501
int parentOrdinal = tw.getParent(ordinal);
502
CategoryPath parent = tr.getPath(parentOrdinal);
503
if (parent == null) {
504
fail("Parent of " + ordinal + " is " + parentOrdinal
505
+ ", but this is not a valid category.");
507
// verify that the parent is indeed my parent, according to the
509
if (!new CategoryPath(me, me.length() - 1).equals(parent)) {
510
fail("Got parent " + parentOrdinal + " for ordinal " + ordinal
511
+ " but categories are " + showcat(parent) + " and "
512
+ showcat(me) + " respectively.");
516
// check parent of of invalid ordinals:
519
fail("getParent for -1 should throw exception");
520
} catch (ArrayIndexOutOfBoundsException e) {
524
tw.getParent(TaxonomyReader.INVALID_ORDINAL);
525
fail("getParent for INVALID_ORDINAL should throw exception");
526
} catch (ArrayIndexOutOfBoundsException e) {
530
int parent = tw.getParent(tr.getSize());
531
fail("getParent for getSize() should throw exception, but returned "
533
} catch (ArrayIndexOutOfBoundsException e) {
538
/** Tests TaxonomyReader's getParentArray() method. We do not test this
539
method directly, but rather just compare its results to those from
540
other methods (which we have already tested above).
543
public void testReaderParentArray() throws Exception {
544
Directory indexDir = newDirectory();
545
TaxonomyWriter tw = new DirectoryTaxonomyWriter(indexDir);
548
TaxonomyReader tr = new DirectoryTaxonomyReader(indexDir);
549
int[] parents = tr.getParentArray();
550
assertEquals(tr.getSize(), parents.length);
551
for (int i=0; i<tr.getSize(); i++) {
552
assertEquals(tr.getParent(i), parents[i]);
559
* Test TaxonomyReader's child browsing method, getChildrenArrays()
560
* This only tests for correctness of the data on one example - we have
561
* below further tests on data refresh etc.
564
public void testChildrenArrays() throws Exception {
565
Directory indexDir = newDirectory();
566
TaxonomyWriter tw = new DirectoryTaxonomyWriter(indexDir);
569
TaxonomyReader tr = new DirectoryTaxonomyReader(indexDir);
570
ChildrenArrays ca = tr.getChildrenArrays();
571
int[] youngestChildArray = ca.getYoungestChildArray();
572
assertEquals(tr.getSize(), youngestChildArray.length);
573
int[] olderSiblingArray = ca.getOlderSiblingArray();
574
assertEquals(tr.getSize(), olderSiblingArray.length);
575
for (int i=0; i<expectedCategories.length; i++) {
576
// find expected children by looking at all expectedCategories
578
ArrayList<Integer> expectedChildren = new ArrayList<Integer>();
579
for (int j=expectedCategories.length-1; j>=0; j--) {
580
if (expectedCategories[j].length != expectedCategories[i].length+1) {
581
continue; // not longer by 1, so can't be a child
583
boolean ischild=true;
584
for (int k=0; k<expectedCategories[i].length; k++) {
585
if (!expectedCategories[j][k].equals(expectedCategories[i][k])) {
591
expectedChildren.add(j);
594
// check that children and expectedChildren are the same, with the
595
// correct reverse (youngest to oldest) order:
596
if (expectedChildren.size()==0) {
597
assertEquals(TaxonomyReader.INVALID_ORDINAL, youngestChildArray[i]);
599
int child = youngestChildArray[i];
600
assertEquals(expectedChildren.get(0).intValue(),
602
for (int j=1; j<expectedChildren.size(); j++) {
603
child = olderSiblingArray[child];
604
assertEquals(expectedChildren.get(j).intValue(),
606
// if child is INVALID_ORDINAL we should stop, but
607
// assertEquals would fail in this case anyway.
609
// When we're done comparing, olderSiblingArray should now point
610
// to INVALID_ORDINAL, saying there are no more children. If it
611
// doesn't, we found too many children...
612
assertEquals(-1, olderSiblingArray[child]);
620
* Similar to testChildrenArrays, except rather than look at
621
* expected results, we test for several "invariants" that the results
622
* should uphold, e.g., that a child of a category indeed has this category
623
* as its parent. This sort of test can more easily be extended to larger
624
* example taxonomies, because we do not need to build the expected list
625
* of categories like we did in the above test.
628
public void testChildrenArraysInvariants() throws Exception {
629
Directory indexDir = newDirectory();
630
TaxonomyWriter tw = new DirectoryTaxonomyWriter(indexDir);
633
TaxonomyReader tr = new DirectoryTaxonomyReader(indexDir);
634
ChildrenArrays ca = tr.getChildrenArrays();
635
int[] youngestChildArray = ca.getYoungestChildArray();
636
assertEquals(tr.getSize(), youngestChildArray.length);
637
int[] olderSiblingArray = ca.getOlderSiblingArray();
638
assertEquals(tr.getSize(), olderSiblingArray.length);
640
// test that the "youngest child" of every category is indeed a child:
641
for (int i=0; i<tr.getSize(); i++) {
642
int youngestChild = youngestChildArray[i];
643
if (youngestChild != TaxonomyReader.INVALID_ORDINAL) {
644
assertEquals(i, tr.getParent(youngestChild));
648
// test that the "older sibling" of every category is indeed older (lower)
649
// (it can also be INVALID_ORDINAL, which is lower than any ordinal)
650
for (int i=0; i<tr.getSize(); i++) {
651
assertTrue("olderSiblingArray["+i+"] should be <"+i, olderSiblingArray[i] < i);
654
// test that the "older sibling" of every category is indeed a sibling
655
// (they share the same parent)
656
for (int i=0; i<tr.getSize(); i++) {
657
int sibling = olderSiblingArray[i];
658
if (sibling == TaxonomyReader.INVALID_ORDINAL) {
661
assertEquals(tr.getParent(i), tr.getParent(sibling));
664
// And now for slightly more complex (and less "invariant-like"...)
667
// test that the "youngest child" is indeed the youngest (so we don't
668
// miss the first children in the chain)
669
for (int i=0; i<tr.getSize(); i++) {
670
// Find the really youngest child:
672
for (j=tr.getSize()-1; j>i; j--) {
673
if (tr.getParent(j)==i) {
674
break; // found youngest child
677
if (j==i) { // no child found
678
j=TaxonomyReader.INVALID_ORDINAL;
680
assertEquals(j, youngestChildArray[i]);
683
// test that the "older sibling" is indeed the least oldest one - and
684
// not a too old one or -1 (so we didn't miss some children in the
685
// middle or the end of the chain).
686
for (int i=0; i<tr.getSize(); i++) {
687
// Find the youngest older sibling:
689
for (j=i-1; j>=0; j--) {
690
if (tr.getParent(j)==tr.getParent(i)) {
691
break; // found youngest older sibling
694
if (j<0) { // no sibling found
695
j=TaxonomyReader.INVALID_ORDINAL;
697
assertEquals(j, olderSiblingArray[i]);
705
* Test how getChildrenArrays() deals with the taxonomy's growth:
708
public void testChildrenArraysGrowth() throws Exception {
709
Directory indexDir = newDirectory();
710
TaxonomyWriter tw = new DirectoryTaxonomyWriter(indexDir);
711
tw.addCategory(new CategoryPath("hi", "there"));
713
TaxonomyReader tr = new DirectoryTaxonomyReader(indexDir);
714
ChildrenArrays ca = tr.getChildrenArrays();
715
assertEquals(3, tr.getSize());
716
assertEquals(3, ca.getOlderSiblingArray().length);
717
assertEquals(3, ca.getYoungestChildArray().length);
718
assertTrue(Arrays.equals(new int[] { 1, 2, -1 }, ca.getYoungestChildArray()));
719
assertTrue(Arrays.equals(new int[] { -1, -1, -1 }, ca.getOlderSiblingArray()));
720
tw.addCategory(new CategoryPath("hi", "ho"));
721
tw.addCategory(new CategoryPath("hello"));
723
// Before refresh, nothing changed..
724
ChildrenArrays newca = tr.getChildrenArrays();
725
assertSame(newca, ca); // we got exactly the same object
726
assertEquals(3, tr.getSize());
727
assertEquals(3, ca.getOlderSiblingArray().length);
728
assertEquals(3, ca.getYoungestChildArray().length);
729
// After the refresh, things change:
731
ca = tr.getChildrenArrays();
732
assertEquals(5, tr.getSize());
733
assertEquals(5, ca.getOlderSiblingArray().length);
734
assertEquals(5, ca.getYoungestChildArray().length);
735
assertTrue(Arrays.equals(new int[] { 4, 3, -1, -1, -1 }, ca.getYoungestChildArray()));
736
assertTrue(Arrays.equals(new int[] { -1, -1, -1, 2, 1 }, ca.getOlderSiblingArray()));
743
* Test that getParentArrays is valid when retrieved during refresh
747
public void testTaxonomyReaderRefreshRaces() throws Exception {
748
// compute base child arrays - after first chunk, and after the other
749
Directory indexDirBase = newDirectory();
750
TaxonomyWriter twBase = new DirectoryTaxonomyWriter(indexDirBase);
751
twBase.addCategory(new CategoryPath("a", "0"));
752
final CategoryPath abPath = new CategoryPath("a", "b");
753
twBase.addCategory(abPath);
755
TaxonomyReader trBase = new DirectoryTaxonomyReader(indexDirBase);
757
final ChildrenArrays ca1 = trBase.getChildrenArrays();
759
final int abOrd = trBase.getOrdinal(abPath);
760
final int abYoungChildBase1 = ca1.getYoungestChildArray()[abOrd];
762
for (int i=0; i < 1<<10; i++) { //1024 facets
763
twBase.addCategory(new CategoryPath("a", "b", Integer.toString(i)));
769
final ChildrenArrays ca2 = trBase.getChildrenArrays();
770
final int abYoungChildBase2 = ca2.getYoungestChildArray()[abOrd];
772
for (int retry=0; retry<100; retry++) {
773
assertConsistentYoungestChild(abPath, abOrd, abYoungChildBase1, abYoungChildBase2, retry);
775
indexDirBase.close();
778
private void assertConsistentYoungestChild(final CategoryPath abPath,
779
final int abOrd, final int abYoungChildBase1, final int abYoungChildBase2, final int retry)
781
SlowRAMDirectory indexDir = new SlowRAMDirectory(-1,null); // no slowness for intialization
782
TaxonomyWriter tw = new DirectoryTaxonomyWriter(indexDir);
783
tw.addCategory(new CategoryPath("a", "0"));
784
tw.addCategory(abPath);
787
final TaxonomyReader tr = new DirectoryTaxonomyReader(indexDir);
788
for (int i=0; i < 1<<10; i++) { //1024 facets
789
final CategoryPath cp = new CategoryPath("a", "b", Integer.toString(i));
791
assertEquals("Ordinal of "+cp+" must be invalid until Taxonomy Reader was refreshed", TaxonomyReader.INVALID_ORDINAL, tr.getOrdinal(cp));
795
final boolean[] stop = new boolean[] { false };
796
final Throwable[] error = new Throwable[] { null };
797
final int retrieval[] = { 0 };
799
Thread thread = new Thread("Child Arrays Verifier") {
802
setPriority(1+getPriority());
805
int lastOrd = tr.getParentArray().length-1;
806
assertNotNull("path of last-ord "+lastOrd+" is not found!",tr.getPath(lastOrd));
807
assertChildrenArrays(tr.getChildrenArrays(),retry,retrieval[0]++);
809
} catch (Throwable e) {
815
private void assertChildrenArrays(ChildrenArrays ca, int retry, int retrieval) {
816
final int abYoungChild = ca.getYoungestChildArray()[abOrd];
818
"Retry "+retry+": retrieval: "+retrieval+": wrong youngest child for category "+abPath+" (ord="+abOrd+
819
") - must be either "+abYoungChildBase1+" or "+abYoungChildBase2+" but was: "+abYoungChild,
820
abYoungChildBase1==abYoungChild ||
821
abYoungChildBase2==ca.getYoungestChildArray()[abOrd]);
826
indexDir.setSleepMillis(1); // some delay for refresh
831
assertNull("Unexpcted exception at retry "+retry+" retrieval "+retrieval[0]+": \n"+stackTraceStr(error[0]), error[0]);
837
/** Grab the stack trace into a string since the exception was thrown in a thread and we want the assert
838
* outside the thread to show the stack trace in case of failure. */
839
private String stackTraceStr(final Throwable error) {
843
StringWriter sw = new StringWriter();
844
PrintWriter pw = new PrintWriter(sw);
845
error.printStackTrace(pw);
847
return sw.toString();
850
/** Test that if separate reader and writer objects are opened, new
851
categories written into the writer are available to a reader only
853
Note that this test obviously doesn't cover all the different
854
concurrency scenarios, all different methods, and so on. We may
855
want to write more tests of this sort.
857
This test simulates what would happen when there are two separate
858
processes, one doing indexing, and the other searching, and each opens
859
its own object (with obviously no connection between the objects) using
860
the same disk files. Note, though, that this test does not test what
861
happens when the two processes do their actual work at exactly the same
863
It also doesn't test multi-threading.
866
public void testSeparateReaderAndWriter() throws Exception {
867
Directory indexDir = newDirectory();
868
TaxonomyWriter tw = new DirectoryTaxonomyWriter(indexDir);
870
TaxonomyReader tr = new DirectoryTaxonomyReader(indexDir);
874
// getParent() and getSize() test:
876
tr.getParent(author);
877
fail("Initially, getParent for "+author+" should throw exception");
878
} catch (ArrayIndexOutOfBoundsException e) {
881
assertEquals(1, tr.getSize()); // the empty taxonomy has size 1 (the root)
882
tw.addCategory(new CategoryPath("Author"));
884
tr.getParent(author);
885
fail("Before commit() and refresh(), getParent for "+author+" should still throw exception");
886
} catch (ArrayIndexOutOfBoundsException e) {
889
assertEquals(1, tr.getSize()); // still root only...
890
tr.refresh(); // this is not enough, because tw.commit() hasn't been done yet
892
tr.getParent(author);
893
fail("Before commit() and refresh(), getParent for "+author+" should still throw exception");
894
} catch (ArrayIndexOutOfBoundsException e) {
897
assertEquals(1, tr.getSize()); // still root only...
900
tr.getParent(author);
901
fail("Before refresh(), getParent for "+author+" should still throw exception");
902
} catch (ArrayIndexOutOfBoundsException e) {
905
assertEquals(1, tr.getSize()); // still root only...
908
assertEquals(TaxonomyReader.ROOT_ORDINAL, tr.getParent(author));
910
} catch (ArrayIndexOutOfBoundsException e) {
911
fail("After category addition, commit() and refresh(), getParent for "+author+" should NOT throw exception");
913
assertEquals(2, tr.getSize()); // finally, see there are two categories
915
// now, add another category, and verify that after commit and refresh
916
// the parent of this category is correct (this requires the reader
917
// to correctly update its prefetched parent vector), and that the
918
// old information also wasn't ruined:
919
tw.addCategory(new CategoryPath("Author", "Richard Dawkins"));
923
assertEquals(author, tr.getParent(dawkins));
924
assertEquals(TaxonomyReader.ROOT_ORDINAL, tr.getParent(author));
925
assertEquals(TaxonomyReader.INVALID_ORDINAL, tr.getParent(TaxonomyReader.ROOT_ORDINAL));
926
assertEquals(3, tr.getSize());
933
public void testSeparateReaderAndWriter2() throws Exception {
934
Directory indexDir = newDirectory();
935
TaxonomyWriter tw = new DirectoryTaxonomyWriter(indexDir);
937
TaxonomyReader tr = new DirectoryTaxonomyReader(indexDir);
939
// Test getOrdinal():
940
CategoryPath author = new CategoryPath("Author");
942
assertEquals(1, tr.getSize()); // the empty taxonomy has size 1 (the root)
943
assertEquals(TaxonomyReader.INVALID_ORDINAL, tr.getOrdinal(author));
944
tw.addCategory(author);
945
// before commit and refresh, no change:
946
assertEquals(TaxonomyReader.INVALID_ORDINAL, tr.getOrdinal(author));
947
assertEquals(1, tr.getSize()); // still root only...
948
tr.refresh(); // this is not enough, because tw.commit() hasn't been done yet
949
assertEquals(TaxonomyReader.INVALID_ORDINAL, tr.getOrdinal(author));
950
assertEquals(1, tr.getSize()); // still root only...
952
// still not enough before refresh:
953
assertEquals(TaxonomyReader.INVALID_ORDINAL, tr.getOrdinal(author));
954
assertEquals(1, tr.getSize()); // still root only...
955
tr.refresh(); // finally
956
assertEquals(1, tr.getOrdinal(author));
957
assertEquals(2, tr.getSize()); // still root only...
964
* Test what happens if we try to write to a locked taxonomy writer,
965
* and see that we can unlock it and continue.
968
public void testWriterLock() throws Exception {
969
// native fslock impl gets angry if we use it, so use RAMDirectory explicitly.
970
Directory indexDir = new RAMDirectory();
971
TaxonomyWriter tw = new DirectoryTaxonomyWriter(indexDir);
972
tw.addCategory(new CategoryPath("hi", "there"));
974
// we deliberately not close the write now, and keep it open and
976
// Verify that the writer worked:
977
TaxonomyReader tr = new DirectoryTaxonomyReader(indexDir);
978
assertEquals(2, tr.getOrdinal(new CategoryPath("hi", "there")));
979
// Try to open a second writer, with the first one locking the directory.
980
// We expect to get a LockObtainFailedException.
982
new DirectoryTaxonomyWriter(indexDir);
983
fail("should have failed to write in locked directory");
984
} catch (LockObtainFailedException e) {
985
// this is what we expect to happen.
987
// Remove the lock, and now the open should succeed, and we can
988
// write to the new writer.
989
DirectoryTaxonomyWriter.unlock(indexDir);
990
TaxonomyWriter tw2 = new DirectoryTaxonomyWriter(indexDir);
991
tw2.addCategory(new CategoryPath("hey"));
993
// See that the writer indeed wrote:
995
assertEquals(3, tr.getOrdinal(new CategoryPath("hey")));
1002
* fillTaxonomyCheckPaths adds the categories in the categories[] array,
1003
* and asserts that the additions return exactly paths specified in
1004
* expectedPaths[]. This is the same add fillTaxonomy() but also checks
1005
* the correctness of getParent(), not just addCategory().
1006
* Note that this assumes that fillTaxonomyCheckPaths() is called on an empty
1007
* taxonomy index. Calling it after something else was already added to the
1008
* taxonomy index will surely have this method fail.
1010
public static void fillTaxonomyCheckPaths(TaxonomyWriter tw) throws IOException {
1011
for (int i = 0; i < categories.length; i++) {
1012
int ordinal = tw.addCategory(new CategoryPath(categories[i]));
1013
int expectedOrdinal = expectedPaths[i][expectedPaths[i].length-1];
1014
if (ordinal!=expectedOrdinal) {
1015
fail("For category "+showcat(categories[i])+" expected ordinal "+
1016
expectedOrdinal+", but got "+ordinal);
1018
for (int j=expectedPaths[i].length-2; j>=0; j--) {
1019
ordinal = tw.getParent(ordinal);
1020
expectedOrdinal = expectedPaths[i][j];
1021
if (ordinal!=expectedOrdinal) {
1022
fail("For category "+showcat(categories[i])+" expected ancestor level "+
1023
(expectedPaths[i].length-1-j)+" was "+expectedOrdinal+
1024
", but got "+ordinal);
1030
// After fillTaxonomy returned successfully, checkPaths() checks that
1031
// the getParent() calls return as expected, from the table
1032
public static void checkPaths(TaxonomyWriter tw) throws IOException {
1033
for (int i = 0; i < categories.length; i++) {
1034
int ordinal = expectedPaths[i][expectedPaths[i].length-1];
1035
for (int j=expectedPaths[i].length-2; j>=0; j--) {
1036
ordinal = tw.getParent(ordinal);
1037
int expectedOrdinal = expectedPaths[i][j];
1038
if (ordinal!=expectedOrdinal) {
1039
fail("For category "+showcat(categories[i])+" expected ancestor level "+
1040
(expectedPaths[i].length-1-j)+" was "+expectedOrdinal+
1041
", but got "+ordinal);
1044
assertEquals(TaxonomyReader.ROOT_ORDINAL, tw.getParent(expectedPaths[i][0]));
1046
assertEquals(TaxonomyReader.INVALID_ORDINAL, tw.getParent(TaxonomyReader.ROOT_ORDINAL));
1050
* Basic test for TaxonomyWriter.getParent(). This is similar to testWriter
1051
* above, except we also check the parents of the added categories, not just
1052
* the categories themselves.
1055
public void testWriterCheckPaths() throws Exception {
1056
Directory indexDir = newDirectory();
1057
TaxonomyWriter tw = new DirectoryTaxonomyWriter(indexDir);
1058
fillTaxonomyCheckPaths(tw);
1059
// Also check TaxonomyWriter.getSize() - see that the taxonomy's size
1060
// is what we expect it to be.
1061
assertEquals(expectedCategories.length, tw.getSize());
1067
* testWriterCheckPaths2 is the path-checking variant of testWriterTwice
1068
* and testWriterTwice2. After adding all the categories, we add them again,
1069
* and see that we get the same old ids and paths. We repeat the path checking
1070
* yet again after closing and opening the index for writing again - to see
1071
* that the reading of existing data from disk works as well.
1074
public void testWriterCheckPaths2() throws Exception {
1075
Directory indexDir = newDirectory();
1076
TaxonomyWriter tw = new DirectoryTaxonomyWriter(indexDir);
1083
tw = new DirectoryTaxonomyWriter(indexDir);
1091
// TODO (Facet): test multiple readers, one writer. Have the multiple readers
1092
// using the same object (simulating threads) or different objects
1093
// (simulating processes).