1
// Copyright (c) 2014 Couchbase, Inc.
3
// Licensed under the Apache License, Version 2.0 (the "License");
4
// you may not use this file except in compliance with the License.
5
// You may obtain a copy of the License at
7
// http://www.apache.org/licenses/LICENSE-2.0
9
// Unless required by applicable law or agreed to in writing, software
10
// distributed under the License is distributed on an "AS IS" BASIS,
11
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
// See the License for the specific language governing permissions and
13
// limitations under the License.
21
"github.com/blevesearch/bleve/analysis"
24
func TestArabicStemmerFilter(t *testing.T) {
26
input analysis.TokenStream
27
output analysis.TokenStream
31
input: analysis.TokenStream{
33
Term: []byte("الحسن"),
36
output: analysis.TokenStream{
44
input: analysis.TokenStream{
46
Term: []byte("والحسن"),
49
output: analysis.TokenStream{
57
input: analysis.TokenStream{
59
Term: []byte("بالحسن"),
62
output: analysis.TokenStream{
70
input: analysis.TokenStream{
72
Term: []byte("كالحسن"),
75
output: analysis.TokenStream{
83
input: analysis.TokenStream{
85
Term: []byte("فالحسن"),
88
output: analysis.TokenStream{
96
input: analysis.TokenStream{
98
Term: []byte("للاخر"),
101
output: analysis.TokenStream{
109
input: analysis.TokenStream{
111
Term: []byte("وحسن"),
114
output: analysis.TokenStream{
122
input: analysis.TokenStream{
124
Term: []byte("زوجها"),
127
output: analysis.TokenStream{
135
input: analysis.TokenStream{
137
Term: []byte("ساهدان"),
140
output: analysis.TokenStream{
142
Term: []byte("ساهد"),
148
input: analysis.TokenStream{
150
Term: []byte("ساهدات"),
153
output: analysis.TokenStream{
155
Term: []byte("ساهد"),
161
input: analysis.TokenStream{
163
Term: []byte("ساهدون"),
166
output: analysis.TokenStream{
168
Term: []byte("ساهد"),
174
input: analysis.TokenStream{
176
Term: []byte("ساهدين"),
179
output: analysis.TokenStream{
181
Term: []byte("ساهد"),
187
input: analysis.TokenStream{
189
Term: []byte("ساهديه"),
192
output: analysis.TokenStream{
194
Term: []byte("ساهد"),
200
input: analysis.TokenStream{
202
Term: []byte("ساهدية"),
205
output: analysis.TokenStream{
207
Term: []byte("ساهد"),
213
input: analysis.TokenStream{
215
Term: []byte("ساهده"),
218
output: analysis.TokenStream{
220
Term: []byte("ساهد"),
226
input: analysis.TokenStream{
228
Term: []byte("ساهدة"),
231
output: analysis.TokenStream{
233
Term: []byte("ساهد"),
239
input: analysis.TokenStream{
241
Term: []byte("ساهدي"),
244
output: analysis.TokenStream{
246
Term: []byte("ساهد"),
252
input: analysis.TokenStream{
254
Term: []byte("وساهدون"),
257
output: analysis.TokenStream{
259
Term: []byte("ساهد"),
265
input: analysis.TokenStream{
267
Term: []byte("ساهدهات"),
270
output: analysis.TokenStream{
272
Term: []byte("ساهد"),
278
input: analysis.TokenStream{
283
output: analysis.TokenStream{
291
input: analysis.TokenStream{
293
Term: []byte("English"),
296
output: analysis.TokenStream{
298
Term: []byte("English"),
303
input: analysis.TokenStream{
305
Term: []byte("سلام"),
308
output: analysis.TokenStream{
310
Term: []byte("سلام"),
315
input: analysis.TokenStream{
317
Term: []byte("السلام"),
320
output: analysis.TokenStream{
322
Term: []byte("سلام"),
327
input: analysis.TokenStream{
329
Term: []byte("سلامة"),
332
output: analysis.TokenStream{
334
Term: []byte("سلام"),
339
input: analysis.TokenStream{
341
Term: []byte("السلامة"),
344
output: analysis.TokenStream{
346
Term: []byte("سلام"),
351
input: analysis.TokenStream{
353
Term: []byte("الوصل"),
356
output: analysis.TokenStream{
363
input: analysis.TokenStream{
365
Term: []byte("والصل"),
368
output: analysis.TokenStream{
376
input: analysis.TokenStream{
381
output: analysis.TokenStream{
389
arabicStemmerFilter := NewArabicStemmerFilter()
390
for _, test := range tests {
391
actual := arabicStemmerFilter.Filter(test.input)
392
if !reflect.DeepEqual(actual, test.output) {
393
t.Errorf("expected %#v, got %#v", test.output, actual)
394
t.Errorf("expected % x, got % x", test.output[0].Term, actual[0].Term)