1
package org.apache.solr.analysis;
4
* Licensed to the Apache Software Foundation (ASF) under one or more
5
* contributor license agreements. See the NOTICE file distributed with
6
* this work for additional information regarding copyright ownership.
7
* The ASF licenses this file to You under the Apache License, Version 2.0
8
* (the "License"); you may not use this file except in compliance with
9
* the License. You may obtain a copy of the License at
11
* http://www.apache.org/licenses/LICENSE-2.0
13
* Unless required by applicable law or agreed to in writing, software
14
* distributed under the License is distributed on an "AS IS" BASIS,
15
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16
* See the License for the specific language governing permissions and
17
* limitations under the License.
20
import java.io.Reader;
21
import java.io.StringReader;
22
import java.util.HashMap;
25
import org.apache.lucene.analysis.MockTokenizer;
26
import org.apache.lucene.analysis.TokenStream;
27
import org.apache.lucene.analysis.Tokenizer;
30
* Simple tests to ensure the NGram filter factories are working.
32
public class TestNGramFilters extends BaseTokenTestCase {
34
* Test NGramTokenizerFactory
36
public void testNGramTokenizer() throws Exception {
37
Reader reader = new StringReader("test");
38
Map<String,String> args = new HashMap<String,String>();
39
NGramTokenizerFactory factory = new NGramTokenizerFactory();
41
Tokenizer stream = factory.create(reader);
42
assertTokenStreamContents(stream,
43
new String[] { "t", "e", "s", "t", "te", "es", "st" });
46
* Test NGramTokenizerFactory with min and max gram options
48
public void testNGramTokenizer2() throws Exception {
49
Reader reader = new StringReader("test");
50
Map<String,String> args = new HashMap<String,String>();
51
args.put("minGramSize", "2");
52
args.put("maxGramSize", "3");
53
NGramTokenizerFactory factory = new NGramTokenizerFactory();
55
Tokenizer stream = factory.create(reader);
56
assertTokenStreamContents(stream,
57
new String[] { "te", "es", "st", "tes", "est" });
60
* Test the NGramFilterFactory
62
public void testNGramFilter() throws Exception {
63
Reader reader = new StringReader("test");
64
Map<String,String> args = new HashMap<String,String>();
65
NGramFilterFactory factory = new NGramFilterFactory();
67
TokenStream stream = factory.create(new MockTokenizer(reader, MockTokenizer.WHITESPACE, false));
68
assertTokenStreamContents(stream,
69
new String[] { "t", "e", "s", "t", "te", "es", "st" });
72
* Test the NGramFilterFactory with min and max gram options
74
public void testNGramFilter2() throws Exception {
75
Reader reader = new StringReader("test");
76
Map<String,String> args = new HashMap<String,String>();
77
args.put("minGramSize", "2");
78
args.put("maxGramSize", "3");
79
NGramFilterFactory factory = new NGramFilterFactory();
81
TokenStream stream = factory.create(new MockTokenizer(reader, MockTokenizer.WHITESPACE, false));
82
assertTokenStreamContents(stream,
83
new String[] { "te", "es", "st", "tes", "est" });
86
* Test EdgeNGramTokenizerFactory
88
public void testEdgeNGramTokenizer() throws Exception {
89
Reader reader = new StringReader("test");
90
Map<String,String> args = new HashMap<String,String>();
91
EdgeNGramTokenizerFactory factory = new EdgeNGramTokenizerFactory();
93
Tokenizer stream = factory.create(reader);
94
assertTokenStreamContents(stream,
95
new String[] { "t" });
98
* Test EdgeNGramTokenizerFactory with min and max gram size
100
public void testEdgeNGramTokenizer2() throws Exception {
101
Reader reader = new StringReader("test");
102
Map<String,String> args = new HashMap<String,String>();
103
args.put("minGramSize", "1");
104
args.put("maxGramSize", "2");
105
EdgeNGramTokenizerFactory factory = new EdgeNGramTokenizerFactory();
107
Tokenizer stream = factory.create(reader);
108
assertTokenStreamContents(stream,
109
new String[] { "t", "te" });
112
* Test EdgeNGramTokenizerFactory with side option
114
public void testEdgeNGramTokenizer3() throws Exception {
115
Reader reader = new StringReader("ready");
116
Map<String,String> args = new HashMap<String,String>();
117
args.put("side", "back");
118
EdgeNGramTokenizerFactory factory = new EdgeNGramTokenizerFactory();
120
Tokenizer stream = factory.create(reader);
121
assertTokenStreamContents(stream,
122
new String[] { "y" });
125
* Test EdgeNGramFilterFactory
127
public void testEdgeNGramFilter() throws Exception {
128
Reader reader = new StringReader("test");
129
Map<String,String> args = new HashMap<String,String>();
130
EdgeNGramFilterFactory factory = new EdgeNGramFilterFactory();
132
TokenStream stream = factory.create(new MockTokenizer(reader, MockTokenizer.WHITESPACE, false));
133
assertTokenStreamContents(stream,
134
new String[] { "t" });
137
* Test EdgeNGramFilterFactory with min and max gram size
139
public void testEdgeNGramFilter2() throws Exception {
140
Reader reader = new StringReader("test");
141
Map<String,String> args = new HashMap<String,String>();
142
args.put("minGramSize", "1");
143
args.put("maxGramSize", "2");
144
EdgeNGramFilterFactory factory = new EdgeNGramFilterFactory();
146
TokenStream stream = factory.create(new MockTokenizer(reader, MockTokenizer.WHITESPACE, false));
147
assertTokenStreamContents(stream,
148
new String[] { "t", "te" });
151
* Test EdgeNGramFilterFactory with side option
153
public void testEdgeNGramFilter3() throws Exception {
154
Reader reader = new StringReader("ready");
155
Map<String,String> args = new HashMap<String,String>();
156
args.put("side", "back");
157
EdgeNGramFilterFactory factory = new EdgeNGramFilterFactory();
159
TokenStream stream = factory.create(new MockTokenizer(reader, MockTokenizer.WHITESPACE, false));
160
assertTokenStreamContents(stream,
161
new String[] { "y" });