1
package org.apache.solr.analysis;
4
* Licensed to the Apache Software Foundation (ASF) under one or more
5
* contributor license agreements. See the NOTICE file distributed with
6
* this work for additional information regarding copyright ownership.
7
* The ASF licenses this file to You under the Apache License, Version 2.0
8
* (the "License"); you may not use this file except in compliance with
9
* the License. You may obtain a copy of the License at
11
* http://www.apache.org/licenses/LICENSE-2.0
13
* Unless required by applicable law or agreed to in writing, software
14
* distributed under the License is distributed on an "AS IS" BASIS,
15
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16
* See the License for the specific language governing permissions and
17
* limitations under the License.
20
import java.io.Reader;
21
import java.io.StringReader;
23
import org.apache.lucene.analysis.CharReader;
24
import org.apache.lucene.analysis.TokenStream;
25
import org.apache.lucene.analysis.Tokenizer;
28
* Simple tests to ensure the Arabic filter Factories are working.
30
public class TestArabicFilters extends BaseTokenTestCase {
32
* Test ArabicLetterTokenizerFactory
33
* @deprecated (3.1) Remove in Lucene 5.0
36
public void testTokenizer() throws Exception {
37
Reader reader = new StringReader("الذين مَلكت أيمانكم");
38
ArabicLetterTokenizerFactory factory = new ArabicLetterTokenizerFactory();
39
factory.init(DEFAULT_VERSION_PARAM);
40
Tokenizer stream = factory.create(reader);
41
assertTokenStreamContents(stream, new String[] {"الذين", "مَلكت", "أيمانكم"});
45
* Test ArabicNormalizationFilterFactory
47
public void testNormalizer() throws Exception {
48
Reader reader = new StringReader("الذين مَلكت أيمانكم");
49
StandardTokenizerFactory factory = new StandardTokenizerFactory();
50
ArabicNormalizationFilterFactory filterFactory = new ArabicNormalizationFilterFactory();
51
factory.init(DEFAULT_VERSION_PARAM);
52
filterFactory.init(DEFAULT_VERSION_PARAM);
53
Tokenizer tokenizer = factory.create(reader);
54
TokenStream stream = filterFactory.create(tokenizer);
55
assertTokenStreamContents(stream, new String[] {"الذين", "ملكت", "ايمانكم"});
59
* Test ArabicStemFilterFactory
61
public void testStemmer() throws Exception {
62
Reader reader = new StringReader("الذين مَلكت أيمانكم");
63
StandardTokenizerFactory factory = new StandardTokenizerFactory();
64
ArabicNormalizationFilterFactory normFactory = new ArabicNormalizationFilterFactory();
65
ArabicStemFilterFactory stemFactory = new ArabicStemFilterFactory();
66
factory.init(DEFAULT_VERSION_PARAM);
67
normFactory.init(DEFAULT_VERSION_PARAM);
68
Tokenizer tokenizer = factory.create(reader);
69
TokenStream stream = normFactory.create(tokenizer);
70
stream = stemFactory.create(stream);
71
assertTokenStreamContents(stream, new String[] {"ذين", "ملكت", "ايمانكم"});
75
* Test PersianCharFilterFactory
77
public void testPersianCharFilter() throws Exception {
78
Reader reader = new StringReader("میخورد");
79
PersianCharFilterFactory charfilterFactory = new PersianCharFilterFactory();
80
StandardTokenizerFactory tokenizerFactory = new StandardTokenizerFactory();
81
tokenizerFactory.init(DEFAULT_VERSION_PARAM);
82
TokenStream stream = tokenizerFactory.create(charfilterFactory.create(CharReader.get(reader)));
83
assertTokenStreamContents(stream, new String[] { "می", "خورد" });