~ubuntu-branches/debian/sid/golang-github-blevesearch-bleve/sid

« back to all changes in this revision

Viewing changes to analysis/lang/en/analyzer_en_test.go

  • Committer: Package Import Robot
  • Author(s): Michael Lustfield
  • Date: 2017-03-30 16:06:03 UTC
  • Revision ID: package-import@ubuntu.com-20170330160603-0oogmb960l7918jx
Tags: upstream-0.5.0+git20170324.202.4702785f
ImportĀ upstreamĀ versionĀ 0.5.0+git20170324.202.4702785f

Show diffs side-by-side

added added

removed removed

Lines of Context:
 
1
//  Copyright (c) 2014 Couchbase, Inc.
 
2
//
 
3
// Licensed under the Apache License, Version 2.0 (the "License");
 
4
// you may not use this file except in compliance with the License.
 
5
// You may obtain a copy of the License at
 
6
//
 
7
//              http://www.apache.org/licenses/LICENSE-2.0
 
8
//
 
9
// Unless required by applicable law or agreed to in writing, software
 
10
// distributed under the License is distributed on an "AS IS" BASIS,
 
11
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 
12
// See the License for the specific language governing permissions and
 
13
// limitations under the License.
 
14
 
 
15
package en
 
16
 
 
17
import (
 
18
        "reflect"
 
19
        "testing"
 
20
 
 
21
        "github.com/blevesearch/bleve/analysis"
 
22
        "github.com/blevesearch/bleve/registry"
 
23
)
 
24
 
 
25
func TestEnglishAnalyzer(t *testing.T) {
 
26
        tests := []struct {
 
27
                input  []byte
 
28
                output analysis.TokenStream
 
29
        }{
 
30
                // stemming
 
31
                {
 
32
                        input: []byte("books"),
 
33
                        output: analysis.TokenStream{
 
34
                                &analysis.Token{
 
35
                                        Term:     []byte("book"),
 
36
                                        Position: 1,
 
37
                                        Start:    0,
 
38
                                        End:      5,
 
39
                                },
 
40
                        },
 
41
                },
 
42
                {
 
43
                        input: []byte("book"),
 
44
                        output: analysis.TokenStream{
 
45
                                &analysis.Token{
 
46
                                        Term:     []byte("book"),
 
47
                                        Position: 1,
 
48
                                        Start:    0,
 
49
                                        End:      4,
 
50
                                },
 
51
                        },
 
52
                },
 
53
                // stop word removal
 
54
                {
 
55
                        input:  []byte("the"),
 
56
                        output: analysis.TokenStream{},
 
57
                },
 
58
                // possessive removal
 
59
                {
 
60
                        input: []byte("steven's"),
 
61
                        output: analysis.TokenStream{
 
62
                                &analysis.Token{
 
63
                                        Term:     []byte("steven"),
 
64
                                        Position: 1,
 
65
                                        Start:    0,
 
66
                                        End:      8,
 
67
                                },
 
68
                        },
 
69
                },
 
70
                {
 
71
                        input: []byte("steven\u2019s"),
 
72
                        output: analysis.TokenStream{
 
73
                                &analysis.Token{
 
74
                                        Term:     []byte("steven"),
 
75
                                        Position: 1,
 
76
                                        Start:    0,
 
77
                                        End:      10,
 
78
                                },
 
79
                        },
 
80
                },
 
81
                {
 
82
                        input: []byte("steven\uFF07s"),
 
83
                        output: analysis.TokenStream{
 
84
                                &analysis.Token{
 
85
                                        Term:     []byte("steven"),
 
86
                                        Position: 1,
 
87
                                        Start:    0,
 
88
                                        End:      10,
 
89
                                },
 
90
                        },
 
91
                },
 
92
        }
 
93
 
 
94
        cache := registry.NewCache()
 
95
        analyzer, err := cache.AnalyzerNamed(AnalyzerName)
 
96
        if err != nil {
 
97
                t.Fatal(err)
 
98
        }
 
99
        for _, test := range tests {
 
100
                actual := analyzer.Analyze(test.input)
 
101
                if !reflect.DeepEqual(actual, test.output) {
 
102
                        t.Errorf("expected %v, got %v", test.output, actual)
 
103
                }
 
104
        }
 
105
}