1
package org.apache.lucene.search.suggest.fst;
4
* Licensed to the Apache Software Foundation (ASF) under one or more
5
* contributor license agreements. See the NOTICE file distributed with
6
* this work for additional information regarding copyright ownership.
7
* The ASF licenses this file to You under the Apache License, Version 2.0
8
* (the "License"); you may not use this file except in compliance with
9
* the License. You may obtain a copy of the License at
11
* http://www.apache.org/licenses/LICENSE-2.0
13
* Unless required by applicable law or agreed to in writing, software
14
* distributed under the License is distributed on an "AS IS" BASIS,
15
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16
* See the License for the specific language governing permissions and
17
* limitations under the License.
20
import java.util.ArrayList;
21
import java.util.Arrays;
22
import java.util.List;
23
import java.util.Locale;
24
import java.util.Random;
26
import org.apache.lucene.search.suggest.Lookup.LookupResult;
27
import org.apache.lucene.search.suggest.fst.FSTLookup;
28
import org.apache.lucene.util.LuceneTestCase;
30
import org.apache.lucene.search.suggest.LookupBenchmarkTest;
31
import org.apache.lucene.search.suggest.TermFreq;
32
import org.apache.lucene.search.suggest.TermFreqArrayIterator;
35
* Unit tests for {@link FSTLookup}.
37
public class FSTLookupTest extends LuceneTestCase {
38
public static TermFreq tf(String t, float v) {
39
return new TermFreq(t, v);
42
private FSTLookup lookup;
44
public void setUp() throws Exception {
47
lookup = new FSTLookup();
48
lookup.build(new TermFreqArrayIterator(evalKeys()));
51
private TermFreq[] evalKeys() {
52
final TermFreq[] keys = new TermFreq[] {
75
public void testExactMatchHighPriority() throws Exception {
76
assertMatchEquals(lookup.lookup("two", true, 1), "two/1.0");
79
public void testExactMatchLowPriority() throws Exception {
80
assertMatchEquals(lookup.lookup("one", true, 2),
85
public void testRequestedCount() throws Exception {
86
// 'one' is promoted after collecting two higher ranking results.
87
assertMatchEquals(lookup.lookup("one", true, 2),
91
// 'one' is at the top after collecting all alphabetical results.
92
assertMatchEquals(lookup.lookup("one", false, 2),
96
// 'four' is collected in a bucket and then again as an exact match.
97
assertMatchEquals(lookup.lookup("four", true, 2),
101
// Check reordering of exact matches.
102
assertMatchEquals(lookup.lookup("four", true, 4),
108
lookup = new FSTLookup(10, false);
109
lookup.build(new TermFreqArrayIterator(evalKeys()));
111
// 'one' is not promoted after collecting two higher ranking results.
112
assertMatchEquals(lookup.lookup("one", true, 2),
116
// 'one' is at the top after collecting all alphabetical results.
117
assertMatchEquals(lookup.lookup("one", false, 2),
122
public void testMiss() throws Exception {
123
assertMatchEquals(lookup.lookup("xyz", true, 1));
126
public void testAlphabeticWithWeights() throws Exception {
127
assertEquals(0, lookup.lookup("xyz", false, 1).size());
130
public void testFullMatchList() throws Exception {
131
assertMatchEquals(lookup.lookup("one", true, Integer.MAX_VALUE),
138
public void testMultilingualInput() throws Exception {
139
List<TermFreq> input = LookupBenchmarkTest.readTop50KWiki();
141
lookup = new FSTLookup();
142
lookup.build(new TermFreqArrayIterator(input));
144
for (TermFreq tf : input) {
145
assertTrue("Not found: " + tf.term, lookup.get(tf.term) != null);
146
assertEquals(tf.term, lookup.lookup(tf.term, true, 1).get(0).key);
150
public void testEmptyInput() throws Exception {
151
lookup = new FSTLookup();
152
lookup.build(new TermFreqArrayIterator(new TermFreq[0]));
154
assertMatchEquals(lookup.lookup("", true, 10));
157
public void testRandom() throws Exception {
158
List<TermFreq> freqs = new ArrayList<TermFreq>();
160
for (int i = 0; i < 5000; i++) {
161
freqs.add(new TermFreq("" + rnd.nextLong(), rnd.nextInt(100)));
163
lookup = new FSTLookup();
164
lookup.build(new TermFreqArrayIterator(freqs.toArray(new TermFreq[freqs.size()])));
166
for (TermFreq tf : freqs) {
167
final String term = tf.term;
168
for (int i = 1; i < term.length(); i++) {
169
String prefix = term.substring(0, i);
170
for (LookupResult lr : lookup.lookup(prefix, true, 10)) {
171
assertTrue(lr.key.startsWith(prefix));
177
private void assertMatchEquals(List<LookupResult> res, String... expected) {
178
String [] result = new String [res.size()];
179
for (int i = 0; i < res.size(); i++)
180
result[i] = res.get(i).toString();
182
if (!Arrays.equals(expected, result)) {
183
int colLen = Math.max(maxLen(expected), maxLen(result));
185
StringBuilder b = new StringBuilder();
186
String format = "%" + colLen + "s " + "%" + colLen + "s\n";
187
b.append(String.format(Locale.ENGLISH, format, "Expected", "Result"));
188
for (int i = 0; i < Math.max(result.length, expected.length); i++) {
189
b.append(String.format(Locale.ENGLISH, format,
190
i < expected.length ? expected[i] : "--",
191
i < result.length ? result[i] : "--"));
194
System.err.println(b.toString());
195
fail("Expected different output:\n" + b.toString());
199
private int maxLen(String[] result) {
201
for (String s : result)
202
len = Math.max(len, s.length());