1
package org.apache.lucene.util.fst;
4
* Licensed to the Apache Software Foundation (ASF) under one or more
5
* contributor license agreements. See the NOTICE file distributed with
6
* this work for additional information regarding copyright ownership.
7
* The ASF licenses this file to You under the Apache License, Version 2.0
8
* (the "License"); you may not use this file except in compliance with
9
* the License. You may obtain a copy of the License at
11
* http://www.apache.org/licenses/LICENSE-2.0
13
* Unless required by applicable law or agreed to in writing, software
14
* distributed under the License is distributed on an "AS IS" BASIS,
15
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16
* See the License for the specific language governing permissions and
17
* limitations under the License.
20
import java.io.BufferedReader;
22
import java.io.FileInputStream;
23
import java.io.FileOutputStream;
24
import java.io.IOException;
25
import java.io.InputStreamReader;
26
import java.io.OutputStreamWriter;
27
import java.io.Writer;
30
import org.apache.lucene.analysis.MockAnalyzer;
31
import org.apache.lucene.document.Document;
32
import org.apache.lucene.index.IndexReader;
33
import org.apache.lucene.index.IndexWriter;
34
import org.apache.lucene.index.IndexWriterConfig;
35
import org.apache.lucene.index.Term;
36
import org.apache.lucene.index.TermEnum;
37
import org.apache.lucene.store.Directory;
38
import org.apache.lucene.store.FSDirectory;
39
import org.apache.lucene.store.IndexInput;
40
import org.apache.lucene.store.IndexOutput;
41
import org.apache.lucene.store.MockDirectoryWrapper;
42
import org.apache.lucene.util.BytesRef;
43
import org.apache.lucene.util.IntsRef;
44
import org.apache.lucene.util.LineFileDocs;
45
import org.apache.lucene.util.LuceneTestCase;
46
import org.apache.lucene.util.UnicodeUtil;
47
import org.apache.lucene.util._TestUtil;
48
import org.apache.lucene.util.fst.FST.Arc;
50
public class TestFSTs extends LuceneTestCase {
52
private MockDirectoryWrapper dir;
55
public void setUp() throws Exception {
58
dir.setPreventDoubleWrite(false);
62
public void tearDown() throws Exception {
67
private static BytesRef toBytesRef(IntsRef ir) {
68
BytesRef br = new BytesRef(ir.length);
69
for(int i=0;i<ir.length;i++) {
70
int x = ir.ints[ir.offset+i];
71
assert x >= 0 && x <= 255;
72
br.bytes[i] = (byte) x;
74
br.length = ir.length;
78
private static IntsRef toIntsRef(String s, int inputMode) {
79
return toIntsRef(s, inputMode, new IntsRef(10));
82
private static IntsRef toIntsRef(String s, int inputMode, IntsRef ir) {
85
return toIntsRef(new BytesRef(s), ir);
88
return toIntsRefUTF32(s, ir);
92
private static IntsRef toIntsRefUTF32(String s, IntsRef ir) {
93
final int charLength = s.length();
96
while(charIdx < charLength) {
97
if (intIdx == ir.ints.length) {
100
final int utf32 = s.codePointAt(charIdx);
101
ir.ints[intIdx] = utf32;
102
charIdx += Character.charCount(utf32);
109
private static IntsRef toIntsRef(BytesRef br, IntsRef ir) {
110
if (br.length > ir.ints.length) {
113
for(int i=0;i<br.length;i++) {
114
ir.ints[i] = br.bytes[br.offset+i]&0xFF;
116
ir.length = br.length;
120
public void testBasicFSA() throws IOException {
121
String[] strings = new String[] {"station", "commotion", "elation", "elastic", "plastic", "stop", "ftop", "ftation", "stat"};
122
String[] strings2 = new String[] {"station", "commotion", "elation", "elastic", "plastic", "stop", "ftop", "ftation"};
123
IntsRef[] terms = new IntsRef[strings.length];
124
IntsRef[] terms2 = new IntsRef[strings2.length];
125
for(int inputMode=0;inputMode<2;inputMode++) {
127
System.out.println("TEST: inputMode=" + inputModeToString(inputMode));
130
for(int idx=0;idx<strings.length;idx++) {
131
terms[idx] = toIntsRef(strings[idx], inputMode);
133
for(int idx=0;idx<strings2.length;idx++) {
134
terms2[idx] = toIntsRef(strings2[idx], inputMode);
138
doTest(inputMode, terms);
140
// Test pre-determined FST sizes to make sure we haven't lost minimality (at least on this trivial set of terms):
144
final Outputs<Object> outputs = NoOutputs.getSingleton();
145
final Object NO_OUTPUT = outputs.getNoOutput();
146
final List<FSTTester.InputOutput<Object>> pairs = new ArrayList<FSTTester.InputOutput<Object>>(terms2.length);
147
for(IntsRef term : terms2) {
148
pairs.add(new FSTTester.InputOutput<Object>(term, NO_OUTPUT));
150
FST<Object> fst = new FSTTester<Object>(random, dir, inputMode, pairs, outputs).doTest(0, 0, false);
152
assertEquals(22, fst.getNodeCount());
153
assertEquals(27, fst.getArcCount());
158
final PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton(true);
159
final List<FSTTester.InputOutput<Long>> pairs = new ArrayList<FSTTester.InputOutput<Long>>(terms2.length);
160
for(int idx=0;idx<terms2.length;idx++) {
161
pairs.add(new FSTTester.InputOutput<Long>(terms2[idx], outputs.get(idx)));
163
final FST<Long> fst = new FSTTester<Long>(random, dir, inputMode, pairs, outputs).doTest(0, 0, false);
165
assertEquals(22, fst.getNodeCount());
166
assertEquals(27, fst.getArcCount());
169
// FST byte sequence ord
171
final ByteSequenceOutputs outputs = ByteSequenceOutputs.getSingleton();
172
final BytesRef NO_OUTPUT = outputs.getNoOutput();
173
final List<FSTTester.InputOutput<BytesRef>> pairs = new ArrayList<FSTTester.InputOutput<BytesRef>>(terms2.length);
174
for(int idx=0;idx<terms2.length;idx++) {
175
final BytesRef output = random.nextInt(30) == 17 ? NO_OUTPUT : new BytesRef(Integer.toString(idx));
176
pairs.add(new FSTTester.InputOutput<BytesRef>(terms2[idx], output));
178
final FST<BytesRef> fst = new FSTTester<BytesRef>(random, dir, inputMode, pairs, outputs).doTest(0, 0, false);
180
assertEquals(24, fst.getNodeCount());
181
assertEquals(30, fst.getArcCount());
186
private static String simpleRandomString(Random r) {
187
final int end = r.nextInt(10);
192
final char[] buffer = new char[end];
193
for (int i = 0; i < end; i++) {
194
buffer[i] = (char) _TestUtil.nextInt(r, 97, 102);
196
return new String(buffer, 0, end);
199
// given set of terms, test the different outputs for them
200
private void doTest(int inputMode, IntsRef[] terms) throws IOException {
203
// NoOutputs (simple FSA)
205
final Outputs<Object> outputs = NoOutputs.getSingleton();
206
final Object NO_OUTPUT = outputs.getNoOutput();
207
final List<FSTTester.InputOutput<Object>> pairs = new ArrayList<FSTTester.InputOutput<Object>>(terms.length);
208
for(IntsRef term : terms) {
209
pairs.add(new FSTTester.InputOutput<Object>(term, NO_OUTPUT));
211
new FSTTester<Object>(random, dir, inputMode, pairs, outputs).doTest();
214
// PositiveIntOutput (ord)
216
final PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton(true);
217
final List<FSTTester.InputOutput<Long>> pairs = new ArrayList<FSTTester.InputOutput<Long>>(terms.length);
218
for(int idx=0;idx<terms.length;idx++) {
219
pairs.add(new FSTTester.InputOutput<Long>(terms[idx], outputs.get(idx)));
221
new FSTTester<Long>(random, dir, inputMode, pairs, outputs).doTest();
224
// PositiveIntOutput (random monotonically increasing positive number)
226
final PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton(random.nextBoolean());
227
final List<FSTTester.InputOutput<Long>> pairs = new ArrayList<FSTTester.InputOutput<Long>>(terms.length);
229
for(int idx=0;idx<terms.length;idx++) {
230
final long value = lastOutput + _TestUtil.nextInt(random, 1, 1000);
232
pairs.add(new FSTTester.InputOutput<Long>(terms[idx], outputs.get(value)));
234
new FSTTester<Long>(random, dir, inputMode, pairs, outputs).doTest();
237
// PositiveIntOutput (random positive number)
239
final PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton(random.nextBoolean());
240
final List<FSTTester.InputOutput<Long>> pairs = new ArrayList<FSTTester.InputOutput<Long>>(terms.length);
241
for(int idx=0;idx<terms.length;idx++) {
242
pairs.add(new FSTTester.InputOutput<Long>(terms[idx], outputs.get(random.nextLong()) & Long.MAX_VALUE));
244
new FSTTester<Long>(random, dir, inputMode, pairs, outputs).doTest();
247
// Pair<ord, (random monotonically increasing positive number>
249
final PositiveIntOutputs o1 = PositiveIntOutputs.getSingleton(random.nextBoolean());
250
final PositiveIntOutputs o2 = PositiveIntOutputs.getSingleton(random.nextBoolean());
251
final PairOutputs<Long,Long> outputs = new PairOutputs<Long,Long>(o1, o2);
252
final List<FSTTester.InputOutput<PairOutputs.Pair<Long,Long>>> pairs = new ArrayList<FSTTester.InputOutput<PairOutputs.Pair<Long,Long>>>(terms.length);
254
for(int idx=0;idx<terms.length;idx++) {
255
final long value = lastOutput + _TestUtil.nextInt(random, 1, 1000);
257
pairs.add(new FSTTester.InputOutput<PairOutputs.Pair<Long,Long>>(terms[idx],
258
outputs.get(o1.get(idx),
261
new FSTTester<PairOutputs.Pair<Long,Long>>(random, dir, inputMode, pairs, outputs).doTest();
266
final ByteSequenceOutputs outputs = ByteSequenceOutputs.getSingleton();
267
final BytesRef NO_OUTPUT = outputs.getNoOutput();
268
final List<FSTTester.InputOutput<BytesRef>> pairs = new ArrayList<FSTTester.InputOutput<BytesRef>>(terms.length);
269
for(int idx=0;idx<terms.length;idx++) {
270
final BytesRef output = random.nextInt(30) == 17 ? NO_OUTPUT : new BytesRef(Integer.toString(idx));
271
pairs.add(new FSTTester.InputOutput<BytesRef>(terms[idx], output));
273
new FSTTester<BytesRef>(random, dir, inputMode, pairs, outputs).doTest();
278
final IntSequenceOutputs outputs = IntSequenceOutputs.getSingleton();
279
final List<FSTTester.InputOutput<IntsRef>> pairs = new ArrayList<FSTTester.InputOutput<IntsRef>>(terms.length);
280
for(int idx=0;idx<terms.length;idx++) {
281
final String s = Integer.toString(idx);
282
final IntsRef output = new IntsRef(s.length());
283
output.length = s.length();
284
for(int idx2=0;idx2<output.length;idx2++) {
285
output.ints[idx2] = s.charAt(idx2);
287
pairs.add(new FSTTester.InputOutput<IntsRef>(terms[idx], output));
289
new FSTTester<IntsRef>(random, dir, inputMode, pairs, outputs).doTest();
292
// Up to two positive ints, shared, generally but not
293
// monotonically increasing
296
System.out.println("TEST: now test UpToTwoPositiveIntOutputs");
298
final UpToTwoPositiveIntOutputs outputs = UpToTwoPositiveIntOutputs.getSingleton(true);
299
final List<FSTTester.InputOutput<Object>> pairs = new ArrayList<FSTTester.InputOutput<Object>>(terms.length);
301
for(int idx=0;idx<terms.length;idx++) {
302
// Sometimes go backwards
303
long value = lastOutput + _TestUtil.nextInt(random, -100, 1000);
305
value = lastOutput + _TestUtil.nextInt(random, -100, 1000);
308
if (random.nextInt(5) == 3) {
309
long value2 = lastOutput + _TestUtil.nextInt(random, -100, 1000);
311
value2 = lastOutput + _TestUtil.nextInt(random, -100, 1000);
313
output = outputs.get(value, value2);
315
output = outputs.get(value);
317
pairs.add(new FSTTester.InputOutput<Object>(terms[idx], output));
319
new FSTTester<Object>(random, dir, inputMode, pairs, outputs).doTest();
323
private static class FSTTester<T> {
326
final List<InputOutput<T>> pairs;
328
final Outputs<T> outputs;
331
public FSTTester(Random random, Directory dir, int inputMode, List<InputOutput<T>> pairs, Outputs<T> outputs) {
332
this.random = random;
334
this.inputMode = inputMode;
336
this.outputs = outputs;
339
private static class InputOutput<T> implements Comparable<InputOutput<T>> {
340
public final IntsRef input;
341
public final T output;
343
public InputOutput(IntsRef input, T output) {
345
this.output = output;
348
public int compareTo(InputOutput<T> other) {
349
if (other instanceof InputOutput) {
350
return input.compareTo((other).input);
352
throw new IllegalArgumentException();
357
public void doTest() throws IOException {
361
if (!(outputs instanceof UpToTwoPositiveIntOutputs)) {
363
doTest(_TestUtil.nextInt(random, 1, 1+pairs.size()), 0, true);
366
doTest(0, _TestUtil.nextInt(random, 1, 1+pairs.size()), true);
370
// runs the term, returning the output, or null if term
371
// isn't accepted. if prefixLength is non-null it must be
372
// length 1 int array; prefixLength[0] is set to the length
373
// of the term prefix that matches
374
private T run(FST<T> fst, IntsRef term, int[] prefixLength) throws IOException {
375
assert prefixLength == null || prefixLength.length == 1;
376
final FST.Arc<T> arc = fst.getFirstArc(new FST.Arc<T>());
377
final T NO_OUTPUT = fst.outputs.getNoOutput();
378
T output = NO_OUTPUT;
380
for(int i=0;i<=term.length;i++) {
382
if (i == term.length) {
383
label = FST.END_LABEL;
385
label = term.ints[term.offset+i];
387
//System.out.println(" loop i=" + i + " label=" + label + " output=" + fst.outputs.outputToString(output) + " curArc: target=" + arc.target + " isFinal?=" + arc.isFinal());
388
if (fst.findTargetArc(label, arc, arc) == null) {
389
if (prefixLength != null) {
396
output = fst.outputs.add(output, arc.output);
399
if (prefixLength != null) {
400
prefixLength[0] = term.length;
406
private T randomAcceptedWord(FST<T> fst, IntsRef in) throws IOException {
407
FST.Arc<T> arc = fst.getFirstArc(new FST.Arc<T>());
409
final List<FST.Arc<T>> arcs = new ArrayList<FST.Arc<T>>();
412
final T NO_OUTPUT = fst.outputs.getNoOutput();
413
T output = NO_OUTPUT;
417
fst.readFirstTargetArc(arc, arc);
418
arcs.add(new FST.Arc<T>().copyFrom(arc));
419
while(!arc.isLast()) {
420
fst.readNextArc(arc);
421
arcs.add(new FST.Arc<T>().copyFrom(arc));
425
arc = arcs.get(random.nextInt(arcs.size()));
429
output = fst.outputs.add(output, arc.output);
432
if (arc.label == FST.END_LABEL) {
436
if (in.ints.length == in.length) {
437
in.grow(1+in.length);
439
in.ints[in.length++] = arc.label;
446
FST<T> doTest(int prune1, int prune2, boolean allowRandomSuffixSharing) throws IOException {
448
System.out.println("TEST: prune1=" + prune1 + " prune2=" + prune2);
451
final Builder<T> builder = new Builder<T>(inputMode == 0 ? FST.INPUT_TYPE.BYTE1 : FST.INPUT_TYPE.BYTE4,
453
prune1==0 && prune2==0,
454
allowRandomSuffixSharing ? random.nextBoolean() : true,
455
allowRandomSuffixSharing ? _TestUtil.nextInt(random, 1, 10) : Integer.MAX_VALUE,
458
for(InputOutput<T> pair : pairs) {
459
if (pair.output instanceof UpToTwoPositiveIntOutputs.TwoLongs) {
460
final UpToTwoPositiveIntOutputs _outputs = (UpToTwoPositiveIntOutputs) outputs;
461
final UpToTwoPositiveIntOutputs.TwoLongs twoLongs = (UpToTwoPositiveIntOutputs.TwoLongs) pair.output;
462
@SuppressWarnings("unchecked") final Builder<Object> builderObject = (Builder<Object>) builder;
463
builderObject.add(pair.input, _outputs.get(twoLongs.first));
464
builderObject.add(pair.input, _outputs.get(twoLongs.second));
466
builder.add(pair.input, pair.output);
469
FST<T> fst = builder.finish();
471
if (random.nextBoolean() && fst != null) {
472
IndexOutput out = dir.createOutput("fst.bin");
475
IndexInput in = dir.openInput("fst.bin");
477
fst = new FST<T>(in, outputs);
480
dir.deleteFile("fst.bin");
484
if (VERBOSE && pairs.size() <= 20 && fst != null) {
485
Writer w = new OutputStreamWriter(new FileOutputStream("out.dot"), "UTF-8");
486
Util.toDot(fst, w, false, false);
488
System.out.println("SAVED out.dot");
493
System.out.println(" fst has 0 nodes (fully pruned)");
495
System.out.println(" fst has " + fst.getNodeCount() + " nodes and " + fst.getArcCount() + " arcs");
499
if (prune1 == 0 && prune2 == 0) {
500
verifyUnPruned(inputMode, fst);
502
verifyPruned(inputMode, fst, prune1, prune2);
509
private void verifyUnPruned(int inputMode, FST<T> fst) throws IOException {
511
if (pairs.size() == 0) {
517
System.out.println("TEST: now verify " + pairs.size() + " terms");
518
for(InputOutput<T> pair : pairs) {
520
assertNotNull(pair.input);
521
assertNotNull(pair.output);
522
System.out.println(" " + inputToString(inputMode, pair.input) + ": " + outputs.outputToString(pair.output));
528
// visit valid paris in order -- make sure all words
529
// are accepted, and FSTEnum's next() steps through
532
System.out.println("TEST: check valid terms/next()");
535
IntsRefFSTEnum<T> fstEnum = new IntsRefFSTEnum<T>(fst);
536
for(InputOutput<T> pair : pairs) {
537
IntsRef term = pair.input;
539
System.out.println("TEST: check term=" + inputToString(inputMode, term) + " output=" + fst.outputs.outputToString(pair.output));
541
Object output = run(fst, term, null);
543
assertNotNull("term " + inputToString(inputMode, term) + " is not accepted", output);
544
assertEquals(pair.output, output);
546
// verify enum's next
547
IntsRefFSTEnum.InputOutput<T> t = fstEnum.next();
549
assertEquals("expected input=" + inputToString(inputMode, term) + " but fstEnum returned " + inputToString(inputMode, t.input), term, t.input);
550
assertEquals(pair.output, t.output);
552
assertNull(fstEnum.next());
555
final Map<IntsRef,T> termsMap = new HashMap<IntsRef,T>();
556
for(InputOutput<T> pair : pairs) {
557
termsMap.put(pair.input, pair.output);
560
// find random matching word and make sure it's valid
562
System.out.println("TEST: verify random accepted terms");
564
final IntsRef scratch = new IntsRef(10);
565
int num = atLeast(500);
566
for(int iter=0;iter<num;iter++) {
567
T output = randomAcceptedWord(fst, scratch);
568
assertTrue("accepted word " + inputToString(inputMode, scratch) + " is not valid", termsMap.containsKey(scratch));
569
assertEquals(termsMap.get(scratch), output);
572
// test IntsRefFSTEnum.seek:
574
System.out.println("TEST: verify seek");
576
IntsRefFSTEnum<T> fstEnum = new IntsRefFSTEnum<T>(fst);
578
for(int iter=0;iter<num;iter++) {
580
System.out.println("TEST: iter=" + iter);
582
if (random.nextBoolean()) {
583
// seek to term that doesn't exist:
585
final IntsRef term = toIntsRef(getRandomString(), inputMode);
586
int pos = Collections.binarySearch(pairs, new InputOutput<T>(term, null));
590
//System.out.println(" seek " + inputToString(inputMode, term));
591
final IntsRefFSTEnum.InputOutput<T> seekResult;
592
if (random.nextBoolean()) {
594
System.out.println(" do non-exist seekFloor term=" + inputToString(inputMode, term));
596
seekResult = fstEnum.seekFloor(term);
600
System.out.println(" do non-exist seekCeil term=" + inputToString(inputMode, term));
602
seekResult = fstEnum.seekCeil(term);
605
if (pos != -1 && pos < pairs.size()) {
606
//System.out.println(" got " + inputToString(inputMode,seekResult.input) + " output=" + fst.outputs.outputToString(seekResult.output));
607
assertNotNull("got null but expected term=" + inputToString(inputMode, pairs.get(pos).input), seekResult);
609
System.out.println(" got " + inputToString(inputMode, seekResult.input));
611
assertEquals("expected " + inputToString(inputMode, pairs.get(pos).input) + " but got " + inputToString(inputMode, seekResult.input), pairs.get(pos).input, seekResult.input);
612
assertEquals(pairs.get(pos).output, seekResult.output);
614
// seeked before start or beyond end
615
//System.out.println("seek=" + seekTerm);
616
assertNull("expected null but got " + (seekResult==null ? "null" : inputToString(inputMode, seekResult.input)), seekResult);
618
System.out.println(" got null");
626
// seek to term that does exist:
627
InputOutput<T> pair = pairs.get(random.nextInt(pairs.size()));
628
final IntsRefFSTEnum.InputOutput<T> seekResult;
629
if (random.nextBoolean()) {
631
System.out.println(" do exists seekFloor " + inputToString(inputMode, pair.input));
633
seekResult = fstEnum.seekFloor(pair.input);
636
System.out.println(" do exists seekCeil " + inputToString(inputMode, pair.input));
638
seekResult = fstEnum.seekCeil(pair.input);
640
assertNotNull(seekResult);
641
assertEquals("got " + inputToString(inputMode, seekResult.input) + " but expected " + inputToString(inputMode, pair.input), pair.input, seekResult.input);
642
assertEquals(pair.output, seekResult.output);
647
System.out.println("TEST: mixed next/seek");
650
// test mixed next/seek
652
for(int iter=0;iter<num;iter++) {
654
System.out.println("TEST: iter " + iter);
657
fstEnum = new IntsRefFSTEnum<T>(fst);
660
boolean isDone = false;
661
if (upto == pairs.size()-1 || random.nextBoolean()) {
665
System.out.println(" do next");
667
isDone = fstEnum.next() == null;
668
} else if (upto != -1 && upto < 0.75 * pairs.size() && random.nextBoolean()) {
670
for(;attempt<10;attempt++) {
671
IntsRef term = toIntsRef(getRandomString(), inputMode);
672
if (!termsMap.containsKey(term) && term.compareTo(pairs.get(upto).input) > 0) {
673
int pos = Collections.binarySearch(pairs, new InputOutput<T>(term, null));
677
if (random.nextBoolean()) {
679
assertTrue(upto != -1);
681
System.out.println(" do non-exist seekFloor(" + inputToString(inputMode, term) + ")");
683
isDone = fstEnum.seekFloor(term) == null;
686
System.out.println(" do non-exist seekCeil(" + inputToString(inputMode, term) + ")");
688
isDone = fstEnum.seekCeil(term) == null;
699
final int inc = random.nextInt(pairs.size() - upto - 1);
705
if (random.nextBoolean()) {
707
System.out.println(" do advanceCeil(" + inputToString(inputMode, pairs.get(upto).input) + ")");
709
isDone = fstEnum.seekCeil(pairs.get(upto).input) == null;
712
System.out.println(" do advanceFloor(" + inputToString(inputMode, pairs.get(upto).input) + ")");
714
isDone = fstEnum.seekFloor(pairs.get(upto).input) == null;
719
System.out.println(" got " + inputToString(inputMode, fstEnum.current().input));
721
System.out.println(" got null");
725
if (upto == pairs.size()) {
730
assertEquals(pairs.get(upto).input, fstEnum.current().input);
731
assertEquals(pairs.get(upto).output, fstEnum.current().output);
734
if (upto < pairs.size()-1) {
736
while(tryCount < 10) {
737
final IntsRef t = toIntsRef(getRandomString(), inputMode);
738
if (pairs.get(upto).input.compareTo(t) < 0) {
739
final boolean expected = t.compareTo(pairs.get(upto+1).input) < 0;
741
System.out.println("TEST: call beforeNext(" + inputToString(inputMode, t) + "); current=" + inputToString(inputMode, pairs.get(upto).input) + " next=" + inputToString(inputMode, pairs.get(upto+1).input) + " expected=" + expected);
743
assertEquals(expected, fstEnum.beforeNext(t));
755
private static class CountMinOutput<T> {
759
boolean isLeaf = true;
764
private void verifyPruned(int inputMode, FST<T> fst, int prune1, int prune2) throws IOException {
767
System.out.println("TEST: now verify pruned " + pairs.size() + " terms; outputs=" + outputs);
768
for(InputOutput<T> pair : pairs) {
769
System.out.println(" " + inputToString(inputMode, pair.input) + ": " + outputs.outputToString(pair.output));
773
// To validate the FST, we brute-force compute all prefixes
774
// in the terms, matched to their "common" outputs, prune that
775
// set according to the prune thresholds, then assert the FST
776
// matches that same set.
778
// NOTE: Crazy RAM intensive!!
780
//System.out.println("TEST: tally prefixes");
782
// build all prefixes
783
final Map<IntsRef,CountMinOutput<T>> prefixes = new HashMap<IntsRef,CountMinOutput<T>>();
784
final IntsRef scratch = new IntsRef(10);
785
for(InputOutput<T> pair: pairs) {
786
scratch.copy(pair.input);
787
for(int idx=0;idx<=pair.input.length;idx++) {
788
scratch.length = idx;
789
CountMinOutput<T> cmo = prefixes.get(scratch);
791
cmo = new CountMinOutput<T>();
793
cmo.output = pair.output;
794
prefixes.put(new IntsRef(scratch), cmo);
797
cmo.output = outputs.common(cmo.output, pair.output);
799
if (idx == pair.input.length) {
801
cmo.finalOutput = cmo.output;
807
System.out.println("TEST: now prune");
811
final Iterator<Map.Entry<IntsRef,CountMinOutput<T>>> it = prefixes.entrySet().iterator();
812
while(it.hasNext()) {
813
Map.Entry<IntsRef,CountMinOutput<T>> ent = it.next();
814
final IntsRef prefix = ent.getKey();
815
final CountMinOutput<T> cmo = ent.getValue();
817
System.out.println(" term prefix=" + inputToString(inputMode, prefix, false) + " count=" + cmo.count + " isLeaf=" + cmo.isLeaf + " output=" + outputs.outputToString(cmo.output) + " isFinal=" + cmo.isFinal);
821
keep = cmo.count >= prune1;
824
if (prune2 > 1 && cmo.count >= prune2) {
826
} else if (prefix.length > 0) {
827
// consult our parent
828
scratch.length = prefix.length-1;
829
System.arraycopy(prefix.ints, prefix.offset, scratch.ints, 0, scratch.length);
830
final CountMinOutput<T> cmo2 = prefixes.get(scratch);
831
//System.out.println(" parent count = " + (cmo2 == null ? -1 : cmo2.count));
832
keep = cmo2 != null && ((prune2 > 1 && cmo2.count >= prune2) || (prune2 == 1 && (cmo2.count >= 2 || prefix.length <= 1)));
833
} else if (cmo.count >= prune2) {
842
//System.out.println(" remove");
844
// clear isLeaf for all ancestors
845
//System.out.println(" keep");
846
scratch.copy(prefix);
848
while(scratch.length >= 0) {
849
final CountMinOutput<T> cmo2 = prefixes.get(scratch);
851
//System.out.println(" clear isLeaf " + inputToString(inputMode, scratch));
859
//System.out.println("TEST: after prune");
861
for(Map.Entry<BytesRef,CountMinOutput> ent : prefixes.entrySet()) {
862
System.out.println(" " + inputToString(inputMode, ent.getKey()) + ": isLeaf=" + ent.getValue().isLeaf + " isFinal=" + ent.getValue().isFinal);
863
if (ent.getValue().isFinal) {
864
System.out.println(" finalOutput=" + outputs.outputToString(ent.getValue().finalOutput));
869
if (prefixes.size() <= 1) {
876
// make sure FST only enums valid prefixes
878
System.out.println("TEST: check pruned enum");
880
IntsRefFSTEnum<T> fstEnum = new IntsRefFSTEnum<T>(fst);
881
IntsRefFSTEnum.InputOutput<T> current;
882
while((current = fstEnum.next()) != null) {
884
System.out.println(" fstEnum.next prefix=" + inputToString(inputMode, current.input, false) + " output=" + outputs.outputToString(current.output));
886
final CountMinOutput cmo = prefixes.get(current.input);
888
assertTrue(cmo.isLeaf || cmo.isFinal);
889
//if (cmo.isFinal && !cmo.isLeaf) {
891
assertEquals(cmo.finalOutput, current.output);
893
assertEquals(cmo.output, current.output);
897
// make sure all non-pruned prefixes are present in the FST
899
System.out.println("TEST: verify all prefixes");
901
final int[] stopNode = new int[1];
902
for(Map.Entry<IntsRef,CountMinOutput<T>> ent : prefixes.entrySet()) {
903
if (ent.getKey().length > 0) {
904
final CountMinOutput<T> cmo = ent.getValue();
905
final T output = run(fst, ent.getKey(), stopNode);
907
System.out.println("TEST: verify prefix=" + inputToString(inputMode, ent.getKey(), false) + " output=" + outputs.outputToString(cmo.output));
909
// if (cmo.isFinal && !cmo.isLeaf) {
911
assertEquals(cmo.finalOutput, output);
913
assertEquals(cmo.output, output);
915
assertEquals(ent.getKey().length, stopNode[0]);
921
public void testRandomWords() throws IOException {
922
testRandomWords(1000, atLeast(2));
923
//testRandomWords(20, 100);
926
private String inputModeToString(int mode) {
934
private void testRandomWords(int maxNumWords, int numIter) throws IOException {
935
for(int iter=0;iter<numIter;iter++) {
937
System.out.println("\nTEST: iter " + iter);
939
for(int inputMode=0;inputMode<2;inputMode++) {
940
final int numWords = random.nextInt(maxNumWords+1);
941
Set<IntsRef> termsSet = new HashSet<IntsRef>();
942
IntsRef[] terms = new IntsRef[numWords];
943
while(termsSet.size() < numWords) {
944
final String term = getRandomString();
945
termsSet.add(toIntsRef(term, inputMode));
947
doTest(inputMode, termsSet.toArray(new IntsRef[termsSet.size()]));
952
static String getRandomString() {
954
if (random.nextBoolean()) {
955
term = _TestUtil.randomRealisticUnicodeString(random);
957
// we want to mix in limited-alphabet symbols so
958
// we get more sharing of the nodes given how few
959
// terms we are testing...
960
term = simpleRandomString(random);
966
public void testBigSet() throws IOException {
967
testRandomWords(_TestUtil.nextInt(random, 50000, 60000), 1);
970
private static String inputToString(int inputMode, IntsRef term) {
971
return inputToString(inputMode, term, true);
974
private static String inputToString(int inputMode, IntsRef term, boolean isValidUnicode) {
975
if (!isValidUnicode) {
976
return term.toString();
977
} else if (inputMode == 0) {
979
return toBytesRef(term).utf8ToString() + " " + term;
982
return UnicodeUtil.newString(term.ints, term.offset, term.length) + " " + term;
986
private static IntsRef toIntsRef(String s) {
987
final int charCount = s.length();
988
IntsRef ir = new IntsRef(charCount);
989
for(int charIDX=0;charIDX<charCount;charIDX++) {
990
ir.ints[charIDX] = s.charAt(charIDX);
992
ir.length = charCount;
996
private static String toString(IntsRef ints) {
997
char[] chars = new char[ints.length];
998
for(int charIDX=0;charIDX<ints.length;charIDX++) {
999
final int ch = ints.ints[ints.offset+charIDX];
1000
assertTrue(ch >= 0 && ch < 65536);
1001
chars[charIDX] = (char) ch;
1003
return new String(chars);
1006
// Build FST for all unique terms in the test line docs
1007
// file, up until a time limit
1008
public void testRealTerms() throws Exception {
1011
if (CodecProvider.getDefault().getDefaultFieldCodec().equals("SimpleText")) {
1013
CodecProvider.getDefault().setDefaultFieldCodec("Standard");
1017
final LineFileDocs docs = new LineFileDocs(random);
1018
final int RUN_TIME_MSEC = atLeast(500);
1019
final IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMaxBufferedDocs(-1).setRAMBufferSizeMB(64);
1020
final File tempDir = _TestUtil.getTempDir("fstlines");
1021
final MockDirectoryWrapper dir = newFSDirectory(tempDir);
1022
final IndexWriter writer = new IndexWriter(dir, conf);
1023
writer.setInfoStream(VERBOSE ? System.out : null);
1024
final long stopTime = System.currentTimeMillis() + RUN_TIME_MSEC;
1027
while((doc = docs.nextDoc()) != null && System.currentTimeMillis() < stopTime) {
1028
writer.addDocument(doc);
1031
IndexReader r = IndexReader.open(writer, true);
1033
final PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton(random.nextBoolean());
1034
Builder<Long> builder = new Builder<Long>(FST.INPUT_TYPE.BYTE2, outputs);
1036
boolean storeOrd = false;
1039
System.out.println("FST stores ord");
1041
System.out.println("FST stores docFreq");
1044
TermEnum termEnum = r.terms(new Term("body", ""));
1046
System.out.println("TEST: got termEnum=" + termEnum);
1050
final Term term = termEnum.term();
1051
if (term == null || !"body".equals(term.field())) {
1060
} catch (UnsupportedOperationException uoe) {
1062
System.out.println("TEST: codec doesn't support ord; FST stores docFreq");
1072
output = termEnum.docFreq();
1074
//System.out.println("ADD: " + term.text() + " ch[0]=" + (term.text().length() == 0 ? -1 : term.text().charAt(0)));
1075
builder.add(toIntsRef(term.text()), outputs.get(output));
1077
if (VERBOSE && ord % 100000 == 0 && LuceneTestCase.TEST_NIGHTLY) {
1078
System.out.println(ord + " terms...");
1082
final FST<Long> fst = builder.finish();
1084
System.out.println("FST: " + docCount + " docs; " + ord + " terms; " + fst.getNodeCount() + " nodes; " + fst.getArcCount() + " arcs;" + " " + fst.sizeInBytes() + " bytes");
1088
// Now confirm BytesRefFSTEnum and TermEnum act the
1090
final IntsRefFSTEnum<Long> fstEnum = new IntsRefFSTEnum<Long>(fst);
1091
int num = atLeast(1000);
1092
for(int iter=0;iter<num;iter++) {
1093
final String randomTerm = getRandomString();
1096
System.out.println("TEST: seek " + randomTerm + " ch[0]=" + (randomTerm.length() == 0 ? -1 : randomTerm.charAt(0)));
1099
termEnum = r.terms(new Term("body", randomTerm));
1100
final IntsRefFSTEnum.InputOutput fstSeekResult = fstEnum.seekCeil(toIntsRef(randomTerm));
1102
if (termEnum.term() == null || !"body".equals(termEnum.term().field())) {
1103
assertNull("got " + (fstSeekResult == null ? "null" : toString(fstSeekResult.input) + " but expected null"), fstSeekResult);
1105
assertSame(termEnum, fstEnum, storeOrd);
1106
for(int nextIter=0;nextIter<10;nextIter++) {
1108
System.out.println("TEST: next");
1110
//System.out.println(" ord=" + termEnum.ord());
1114
if (termEnum.term() != null && "body".equals(termEnum.term().field())) {
1116
System.out.println(" term=" + termEnum.term());
1118
assertNotNull(fstEnum.next());
1119
assertSame(termEnum, fstEnum, storeOrd);
1122
System.out.println(" end!");
1124
IntsRefFSTEnum.InputOutput<Long> nextResult = fstEnum.next();
1125
if (nextResult != null) {
1126
System.out.println("expected null but got: input=" + toString(nextResult.input) + " output=" + outputs.outputToString(nextResult.output));
1140
private void assertSame(TermEnum termEnum, IntsRefFSTEnum fstEnum, boolean storeOrd) throws Exception {
1141
if (termEnum.term() == null || !"body".equals(termEnum.term().field())) {
1142
if (fstEnum.current() != null) {
1143
fail("fstEnum.current().input=" + toString(fstEnum.current().input));
1146
assertNotNull(fstEnum.current());
1147
assertEquals(termEnum.term() + " != " + toString(fstEnum.current().input), termEnum.term().text(), toString(fstEnum.current().input));
1149
// fst stored the ord
1151
// assertEquals(termEnum.ord(), ((Long) fstEnum.current().output).longValue());
1153
// fst stored the docFreq
1154
assertEquals(termEnum.docFreq(), (int) (((Long) fstEnum.current().output).longValue()));
1159
private static abstract class VisitTerms<T> {
1160
private final String dirOut;
1161
private final String wordsFileIn;
1162
private int inputMode;
1163
private final Outputs<T> outputs;
1164
private final Builder<T> builder;
1166
public VisitTerms(String dirOut, String wordsFileIn, int inputMode, int prune, Outputs<T> outputs) {
1167
this.dirOut = dirOut;
1168
this.wordsFileIn = wordsFileIn;
1169
this.inputMode = inputMode;
1170
this.outputs = outputs;
1172
builder = new Builder<T>(inputMode == 0 ? FST.INPUT_TYPE.BYTE1 : FST.INPUT_TYPE.BYTE4, 0, prune, prune == 0, true, Integer.MAX_VALUE, outputs);
1175
protected abstract T getOutput(IntsRef input, int ord) throws IOException;
1177
public void run(int limit, boolean verify) throws IOException {
1178
BufferedReader is = new BufferedReader(new InputStreamReader(new FileInputStream(wordsFileIn), "UTF-8"), 65536);
1180
final IntsRef intsRef = new IntsRef(10);
1181
long tStart = System.currentTimeMillis();
1184
String w = is.readLine();
1188
toIntsRef(w, inputMode, intsRef);
1189
builder.add(intsRef,
1190
getOutput(intsRef, ord));
1193
if (ord % 500000 == 0) {
1195
String.format(Locale.ENGLISH,
1196
"%6.2fs: %9d...", ((System.currentTimeMillis() - tStart) / 1000.0), ord));
1203
assert builder.getTermCount() == ord;
1204
final FST<T> fst = builder.finish();
1206
System.out.println("FST was fully pruned!");
1213
System.out.println(ord + " terms; " + fst.getNodeCount() + " nodes; " + fst.getArcCount() + " arcs; " + fst.getArcWithOutputCount() + " arcs w/ output; tot size " + fst.sizeInBytes());
1214
if (fst.getNodeCount() < 100) {
1215
Writer w = new OutputStreamWriter(new FileOutputStream("out.dot"), "UTF-8");
1216
Util.toDot(fst, w, false, false);
1218
System.out.println("Wrote FST to out.dot");
1221
Directory dir = FSDirectory.open(new File(dirOut));
1222
IndexOutput out = dir.createOutput("fst.bin");
1226
System.out.println("Saved FST to fst.bin.");
1232
System.out.println("\nNow verify...");
1235
is = new BufferedReader(new InputStreamReader(new FileInputStream(wordsFileIn), "UTF-8"), 65536);
1238
tStart = System.currentTimeMillis();
1240
String w = is.readLine();
1244
toIntsRef(w, inputMode, intsRef);
1245
T expected = getOutput(intsRef, ord);
1246
T actual = Util.get(fst, intsRef);
1247
if (actual == null) {
1248
throw new RuntimeException("unexpected null output on input=" + w);
1250
if (!actual.equals(expected)) {
1251
throw new RuntimeException("wrong output (got " + outputs.outputToString(actual) + " but expected " + outputs.outputToString(expected) + ") on input=" + w);
1255
if (ord % 500000 == 0) {
1256
System.out.println(((System.currentTimeMillis()-tStart)/1000.0) + "s: " + ord + "...");
1263
double totSec = ((System.currentTimeMillis() - tStart)/1000.0);
1264
System.out.println("Verify took " + totSec + " sec + (" + (int) ((totSec*1000000000/ord)) + " nsec per lookup)");
1272
// java -cp build/classes/test:build/classes/java:build/classes/test-framework:lib/junit-4.7.jar org.apache.lucene.util.fst.TestFSTs /x/tmp/allTerms3.txt out
1273
public static void main(String[] args) throws IOException {
1275
int limit = Integer.MAX_VALUE;
1276
int inputMode = 0; // utf8
1277
boolean storeOrds = false;
1278
boolean storeDocFreqs = false;
1279
boolean verify = true;
1281
String wordsFileIn = null;
1282
String dirOut = null;
1285
while (idx < args.length) {
1286
if (args[idx].equals("-prune")) {
1287
prune = Integer.valueOf(args[1 + idx]);
1289
} else if (args[idx].equals("-limit")) {
1290
limit = Integer.valueOf(args[1 + idx]);
1292
} else if (args[idx].equals("-utf8")) {
1294
} else if (args[idx].equals("-utf32")) {
1296
} else if (args[idx].equals("-docFreq")) {
1297
storeDocFreqs = true;
1298
} else if (args[idx].equals("-ords")) {
1300
} else if (args[idx].equals("-noverify")) {
1302
} else if (args[idx].startsWith("-")) {
1303
System.err.println("Unrecognized option: " + args[idx]);
1306
if (wordsFileIn == null) {
1307
wordsFileIn = args[idx];
1308
} else if (dirOut == null) {
1311
System.err.println("Too many arguments, expected: input [output]");
1318
if (wordsFileIn == null) {
1319
System.err.println("No input file.");
1323
// ord benefits from share, docFreqs don't:
1325
if (storeOrds && storeDocFreqs) {
1326
// Store both ord & docFreq:
1327
final PositiveIntOutputs o1 = PositiveIntOutputs.getSingleton(true);
1328
final PositiveIntOutputs o2 = PositiveIntOutputs.getSingleton(false);
1329
final PairOutputs<Long,Long> outputs = new PairOutputs<Long,Long>(o1, o2);
1330
new VisitTerms<PairOutputs.Pair<Long,Long>>(dirOut, wordsFileIn, inputMode, prune, outputs) {
1333
public PairOutputs.Pair<Long,Long> getOutput(IntsRef input, int ord) {
1335
rand = new Random(17);
1337
return new PairOutputs.Pair<Long,Long>(o1.get(ord),
1338
o2.get(_TestUtil.nextInt(rand, 1, 5000)));
1340
}.run(limit, verify);
1341
} else if (storeOrds) {
1343
final PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton(true);
1344
new VisitTerms<Long>(dirOut, wordsFileIn, inputMode, prune, outputs) {
1346
public Long getOutput(IntsRef input, int ord) {
1347
return outputs.get(ord);
1349
}.run(limit, verify);
1350
} else if (storeDocFreqs) {
1351
// Store only docFreq
1352
final PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton(false);
1353
new VisitTerms<Long>(dirOut, wordsFileIn, inputMode, prune, outputs) {
1356
public Long getOutput(IntsRef input, int ord) {
1358
rand = new Random(17);
1360
return outputs.get(_TestUtil.nextInt(rand, 1, 5000));
1362
}.run(limit, verify);
1365
final NoOutputs outputs = NoOutputs.getSingleton();
1366
final Object NO_OUTPUT = outputs.getNoOutput();
1367
new VisitTerms<Object>(dirOut, wordsFileIn, inputMode, prune, outputs) {
1369
public Object getOutput(IntsRef input, int ord) {
1372
}.run(limit, verify);
1376
public void testSingleString() throws Exception {
1377
final Outputs<Object> outputs = NoOutputs.getSingleton();
1378
final Builder<Object> b = new Builder<Object>(FST.INPUT_TYPE.BYTE1, outputs);
1379
b.add(new BytesRef("foobar"), outputs.getNoOutput());
1380
final BytesRefFSTEnum<Object> fstEnum = new BytesRefFSTEnum<Object>(b.finish());
1381
assertNull(fstEnum.seekFloor(new BytesRef("foo")));
1382
assertNull(fstEnum.seekCeil(new BytesRef("foobaz")));
1385
public void testSimple() throws Exception {
1387
// Get outputs -- passing true means FST will share
1388
// (delta code) the outputs. This should result in
1389
// smaller FST if the outputs grow monotonically. But
1390
// if numbers are "random", false should give smaller
1392
final PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton(true);
1394
// Build an FST mapping BytesRef -> Long
1395
final Builder<Long> builder = new Builder<Long>(FST.INPUT_TYPE.BYTE1, outputs);
1397
final BytesRef a = new BytesRef("a");
1398
final BytesRef b = new BytesRef("b");
1399
final BytesRef c = new BytesRef("c");
1401
builder.add(a, outputs.get(17));
1402
builder.add(b, outputs.get(42));
1403
builder.add(c, outputs.get(13824324872317238L));
1405
final FST<Long> fst = builder.finish();
1407
assertEquals(13824324872317238L, (long) Util.get(fst, c));
1408
assertEquals(42, (long) Util.get(fst, b));
1409
assertEquals(17, (long) Util.get(fst, a));
1411
BytesRefFSTEnum<Long> fstEnum = new BytesRefFSTEnum<Long>(fst);
1412
BytesRefFSTEnum.InputOutput<Long> seekResult;
1413
seekResult = fstEnum.seekFloor(a);
1414
assertNotNull(seekResult);
1415
assertEquals(17, (long) seekResult.output);
1418
seekResult = fstEnum.seekFloor(new BytesRef("aa"));
1419
assertNotNull(seekResult);
1420
assertEquals(17, (long) seekResult.output);
1423
seekResult = fstEnum.seekCeil(new BytesRef("aa"));
1424
assertNotNull(seekResult);
1425
assertEquals(b, seekResult.input);
1426
assertEquals(42, (long) seekResult.output);
1430
* Test state expansion (array format) on close-to-root states. Creates
1431
* synthetic input that has one expanded state on each level.
1433
* @see "https://issues.apache.org/jira/browse/LUCENE-2933"
1435
public void testExpandedCloseToRoot() throws Exception {
1436
class SyntheticData {
1437
FST<Object> compile(String[] lines) throws IOException {
1438
final NoOutputs outputs = NoOutputs.getSingleton();
1439
final Object nothing = outputs.getNoOutput();
1440
final Builder<Object> b = new Builder<Object>(FST.INPUT_TYPE.BYTE1, outputs);
1443
final BytesRef term = new BytesRef();
1444
while (line < lines.length) {
1445
String w = lines[line++];
1450
b.add(term, nothing);
1456
void generate(ArrayList<String> out, StringBuilder b, char from, char to,
1458
if (depth == 0 || from == to) {
1459
String seq = b.toString() + "_" + out.size() + "_end";
1462
for (char c = from; c <= to; c++) {
1464
generate(out, b, from, c == to ? to : from, depth - 1);
1465
b.deleteCharAt(b.length() - 1);
1470
public int verifyStateAndBelow(FST<Object> fst, Arc<Object> arc, int depth)
1471
throws IOException {
1472
if (fst.targetHasArcs(arc)) {
1474
for (arc = fst.readFirstTargetArc(arc, arc);;
1475
arc = fst.readNextArc(arc), childCount++)
1477
boolean expanded = fst.isExpandedTarget(arc);
1478
int children = verifyStateAndBelow(fst, new FST.Arc<Object>().copyFrom(arc), depth + 1);
1482
(depth <= FST.FIXED_ARRAY_SHALLOW_DISTANCE &&
1483
children >= FST.FIXED_ARRAY_NUM_ARCS_SHALLOW) ||
1484
children >= FST.FIXED_ARRAY_NUM_ARCS_DEEP);
1485
if (arc.isLast()) break;
1495
assertTrue(FST.FIXED_ARRAY_NUM_ARCS_SHALLOW < FST.FIXED_ARRAY_NUM_ARCS_DEEP);
1496
assertTrue(FST.FIXED_ARRAY_SHALLOW_DISTANCE >= 0);
1498
SyntheticData s = new SyntheticData();
1500
ArrayList<String> out = new ArrayList<String>();
1501
StringBuilder b = new StringBuilder();
1502
s.generate(out, b, 'a', 'i', 10);
1503
String[] input = out.toArray(new String[out.size()]);
1505
FST<Object> fst = s.compile(input);
1506
FST.Arc<Object> arc = fst.getFirstArc(new FST.Arc<Object>());
1507
s.verifyStateAndBelow(fst, arc, 1);
1510
// Make sure raw FST can differentiate between final vs
1511
// non-final end nodes
1512
public void testNonFinalStopNodes() throws Exception {
1513
final PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton(true);
1514
final Long nothing = outputs.getNoOutput();
1515
final Builder<Long> b = new Builder<Long>(FST.INPUT_TYPE.BYTE1, outputs);
1517
final FST<Long> fst = new FST<Long>(FST.INPUT_TYPE.BYTE1, outputs);
1519
final Builder.UnCompiledNode<Long> rootNode = new Builder.UnCompiledNode<Long>(b, 0);
1521
// Add final stop node
1523
final Builder.UnCompiledNode<Long> node = new Builder.UnCompiledNode<Long>(b, 0);
1524
node.isFinal = true;
1525
rootNode.addArc('a', node);
1526
final Builder.CompiledNode frozen = new Builder.CompiledNode();
1527
frozen.address = fst.addNode(node);
1528
rootNode.arcs[0].nextFinalOutput = outputs.get(17);
1529
rootNode.arcs[0].isFinal = true;
1530
rootNode.arcs[0].output = nothing;
1531
rootNode.arcs[0].target = frozen;
1534
// Add non-final stop node
1536
final Builder.UnCompiledNode<Long> node = new Builder.UnCompiledNode<Long>(b, 0);
1537
rootNode.addArc('b', node);
1538
final Builder.CompiledNode frozen = new Builder.CompiledNode();
1539
frozen.address = fst.addNode(node);
1540
rootNode.arcs[1].nextFinalOutput = nothing;
1541
rootNode.arcs[1].output = outputs.get(42);
1542
rootNode.arcs[1].target = frozen;
1545
fst.finish(fst.addNode(rootNode));
1547
checkStopNodes(fst, outputs);
1549
// Make sure it still works after save/load:
1550
Directory dir = newDirectory();
1551
IndexOutput out = dir.createOutput("fst");
1555
IndexInput in = dir.openInput("fst");
1556
final FST<Long> fst2 = new FST<Long>(in, outputs);
1557
checkStopNodes(fst2, outputs);
1562
private void checkStopNodes(FST<Long> fst, PositiveIntOutputs outputs) throws Exception {
1563
final Long nothing = outputs.getNoOutput();
1564
FST.Arc<Long> startArc = fst.getFirstArc(new FST.Arc<Long>());
1565
assertEquals(nothing, startArc.output);
1566
assertEquals(nothing, startArc.nextFinalOutput);
1568
FST.Arc<Long> arc = fst.readFirstTargetArc(startArc, new FST.Arc<Long>());
1569
assertEquals('a', arc.label);
1570
assertEquals(17, arc.nextFinalOutput.longValue());
1571
assertTrue(arc.isFinal());
1573
arc = fst.readNextArc(arc);
1574
assertEquals('b', arc.label);
1575
assertFalse(arc.isFinal());
1576
assertEquals(42, arc.output.longValue());