1
// Copyright (c) 2014 Couchbase, Inc.
3
// Licensed under the Apache License, Version 2.0 (the "License");
4
// you may not use this file except in compliance with the License.
5
// You may obtain a copy of the License at
7
// http://www.apache.org/licenses/LICENSE-2.0
9
// Unless required by applicable law or agreed to in writing, software
10
// distributed under the License is distributed on an "AS IS" BASIS,
11
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
// See the License for the specific language governing permissions and
13
// limitations under the License.
21
"github.com/blevesearch/bleve/analysis"
22
"github.com/blevesearch/bleve/registry"
25
func TestArabicAnalyzer(t *testing.T) {
28
output analysis.TokenStream
31
input: []byte("كبير"),
32
output: analysis.TokenStream{
43
input: []byte("كبيرة"),
44
output: analysis.TokenStream{
54
input: []byte("مشروب"),
55
output: analysis.TokenStream{
57
Term: []byte("مشروب"),
66
input: []byte("مشروبات"),
67
output: analysis.TokenStream{
69
Term: []byte("مشروب"),
78
input: []byte("أمريكيين"),
79
output: analysis.TokenStream{
81
Term: []byte("امريك"),
88
// singular with bare alif
90
input: []byte("امريكي"),
91
output: analysis.TokenStream{
93
Term: []byte("امريك"),
101
input: []byte("كتاب"),
102
output: analysis.TokenStream{
104
Term: []byte("كتاب"),
113
input: []byte("الكتاب"),
114
output: analysis.TokenStream{
116
Term: []byte("كتاب"),
124
input: []byte("ما ملكت أيمانكم"),
125
output: analysis.TokenStream{
127
Term: []byte("ملكت"),
133
Term: []byte("ايمانكم"),
142
input: []byte("الذين ملكت أيمانكم"),
143
output: analysis.TokenStream{
145
Term: []byte("ملكت"),
151
Term: []byte("ايمانكم"),
158
// presentation form normalization
160
input: []byte("ﺍﻟﺴﻼﻢ"),
161
output: analysis.TokenStream{
163
Term: []byte("سلام"),
172
cache := registry.NewCache()
173
analyzer, err := cache.AnalyzerNamed(AnalyzerName)
177
for _, test := range tests {
178
actual := analyzer.Analyze(test.input)
179
if !reflect.DeepEqual(actual, test.output) {
180
t.Errorf("expected %v, got %v", test.output, actual)
181
t.Errorf("expected % x, got % x", test.output[0].Term, actual[0].Term)