1
// Copyright 2013 The Go Authors. All rights reserved.
2
// Use of this source code is governed by a BSD-style
3
// license that can be found in the LICENSE file.
15
var verbose = flag.Bool("verbose", false, "set to true to print the internal tables of matchers")
17
func TestAddLikelySubtags(t *testing.T) {
18
tests := []struct{ in, out string }{
20
{"aa-Latn", "aa-Latn-ET"},
21
{"aa-Arab", "aa-Arab-ET"},
22
{"aa-Arab-ER", "aa-Arab-ER"},
24
{"kk-CN", "kk-Arab-CN"},
26
{"zh-AU", "zh-Hant-AU"},
27
{"zh-VN", "zh-Hant-VN"},
28
{"zh-SG", "zh-Hans-SG"},
29
{"zh-Hant", "zh-Hant-TW"},
30
{"zh-Hani", "zh-Hani-CN"},
31
{"und-Hani", "zh-Hani-CN"},
32
{"und", "en-Latn-US"},
33
{"und-GB", "en-Latn-GB"},
34
{"und-CW", "pap-Latn-CW"},
35
{"und-YT", "fr-Latn-YT"},
36
{"und-Arab", "ar-Arab-EG"},
37
{"und-AM", "hy-Armn-AM"},
38
{"und-002", "en-Latn-NG"},
39
{"und-Latn-002", "en-Latn-NG"},
40
{"en-Latn-002", "en-Latn-NG"},
41
{"en-002", "en-Latn-NG"},
42
{"en-001", "en-Latn-US"},
43
{"und-003", "en-Latn-US"},
44
{"und-GB", "en-Latn-GB"},
45
{"Latn-001", "en-Latn-US"},
46
{"en-001", "en-Latn-US"},
47
{"es-419", "es-Latn-419"},
48
{"he-145", "he-Hebr-IL"},
49
{"ky-145", "ky-Latn-TR"},
51
// Don't specialize duplicate and ambiguous matches.
52
{"kk-034", "kk-Arab-034"}, // Matches IR and AF. Both are Arab.
53
{"ku-145", "ku-Latn-TR"}, // Matches IQ, TR, and LB, but kk -> TR.
54
{"und-Arab-CC", "ms-Arab-CC"},
55
{"und-Arab-GB", "ks-Arab-GB"},
56
{"und-Hans-CC", "zh-Hans-CC"},
57
{"und-CC", "en-Latn-CC"},
59
{"sr-151", "sr-Latn-151"}, // Matches RO and RU.
60
// We would like addLikelySubtags to generate the same results if the input
61
// only changes by adding tags that would otherwise have been added
64
// und-AA -> xx-Scrp-AA implies und-Scrp-AA -> xx-Scrp-AA
65
// und-AA -> xx-Scrp-AA implies xx-AA -> xx-Scrp-AA
66
// und-Scrp -> xx-Scrp-AA implies und-Scrp-AA -> xx-Scrp-AA
67
// und-Scrp -> xx-Scrp-AA implies xx-Scrp -> xx-Scrp-AA
68
// xx -> xx-Scrp-AA implies xx-Scrp -> xx-Scrp-AA
69
// xx -> xx-Scrp-AA implies xx-AA -> xx-Scrp-AA
71
// The algorithm specified in
72
// http://unicode.org/reports/tr35/tr35-9.html#Supplemental_Data,
73
// Section C.10, does not handle the first case. For example,
74
// the CLDR data contains an entry und-BJ -> fr-Latn-BJ, but not
75
// there is no rule for und-Latn-BJ. According to spec, und-Latn-BJ
76
// would expand to en-Latn-BJ, violating the aforementioned principle.
77
// We deviate from the spec by letting und-Scrp-AA expand to xx-Scrp-AA
78
// if a rule of the form und-AA -> xx-Scrp-AA is defined.
79
// Note that as of version 23, CLDR has some explicitly specified
80
// entries that do not conform to these rules. The implementation
81
// will not correct these explicit inconsistencies. A later versions of CLDR
82
// is supposed to fix this.
83
{"und-Latn-BJ", "fr-Latn-BJ"},
84
{"und-Bugi-ID", "bug-Bugi-ID"},
85
// regions, scripts and languages without definitions
86
{"und-Arab-AA", "ar-Arab-AA"},
87
{"und-Afak-RE", "fr-Afak-RE"},
88
{"und-Arab-GB", "ks-Arab-GB"},
89
{"abp-Arab-GB", "abp-Arab-GB"},
90
// script has preference over region
91
{"und-Arab-NL", "ar-Arab-NL"},
92
{"zza", "zza-Arab-TR"},
93
// preserve variants and extensions
94
{"de-1901", "de-Latn-DE-1901"},
95
{"de-x-abc", "de-Latn-DE-x-abc"},
96
{"de-1901-x-abc", "de-Latn-DE-1901-x-abc"},
97
{"x-abc", "x-abc"}, // TODO: is this the desired behavior?
99
for i, tt := range tests {
100
in, _ := Parse(tt.in)
101
out, _ := Parse(tt.out)
102
in, _ = in.addLikelySubtags()
103
if in.String() != out.String() {
104
t.Errorf("%d: add(%s) was %s; want %s", i, tt.in, in, tt.out)
108
func TestMinimize(t *testing.T) {
109
tests := []struct{ in, out string }{
112
{"aa-Latn-ET", "aa"},
114
{"aa-Arab", "aa-Arab"},
115
{"aa-Arab-ER", "aa-Arab-ER"},
116
{"aa-Arab-ET", "aa-Arab"},
119
{"und-Latn-US", "und"},
120
{"en-Latn-US", "en"},
122
{"cmn-Hans", "cmn-Hans"},
123
{"cmn-Hant", "cmn-Hant"},
127
{"zh-Hant", "zh-Hant"},
128
{"zh-Hant-TW", "zh-TW"},
130
{"zh-Hani", "zh-Hani"},
131
{"und-Hans", "und-Hans"},
132
{"und-Hani", "und-Hani"},
134
{"und-CW", "und-CW"},
135
{"und-YT", "und-YT"},
136
{"und-Arab", "und-Arab"},
137
{"und-AM", "und-AM"},
138
{"und-Arab-CC", "und-Arab-CC"},
139
{"und-CC", "und-CC"},
140
{"und-Latn-BJ", "und-BJ"},
141
{"und-Bugi-ID", "und-Bugi"},
142
{"bug-Bugi-ID", "bug-Bugi"},
143
// regions, scripts and languages without definitions
144
{"und-Arab-AA", "und-Arab-AA"},
145
// preserve variants and extensions
146
{"de-Latn-1901", "de-1901"},
147
{"de-Latn-x-abc", "de-x-abc"},
148
{"de-DE-1901-x-abc", "de-1901-x-abc"},
149
{"x-abc", "x-abc"}, // TODO: is this the desired behavior?
151
for i, tt := range tests {
152
in, _ := Parse(tt.in)
153
out, _ := Parse(tt.out)
154
min, _ := in.minimize()
155
if min.String() != out.String() {
156
t.Errorf("%d: min(%s) was %s; want %s", i, tt.in, min, tt.out)
158
max, _ := min.addLikelySubtags()
159
if x, _ := in.addLikelySubtags(); x.String() != max.String() {
160
t.Errorf("%d: max(min(%s)) = %s; want %s", i, tt.in, max, x)
165
func TestRegionDistance(t *testing.T) {
188
for i, tt := range tests {
189
ra, _ := getRegionID([]byte(tt.a))
190
rb, _ := getRegionID([]byte(tt.b))
191
if d := regionDistance(ra, rb); d != tt.d {
192
t.Errorf("%d: d(%s, %s) = %v; want %v", i, tt.a, tt.b, d, tt.d)
197
func TestParentDistance(t *testing.T) {
203
{"en-GB", "en-AU", 1},
204
{"pt-PT", "pt-AO", 1},
206
{"en-AU", "en-GB", 255},
207
{"en-NL", "en-AU", 255},
208
// Note that pt-BR and en-US are not automatically minimized.
209
{"pt-BR", "pt-AO", 255},
210
{"en-US", "en-AU", 255},
212
for _, tt := range tests {
213
r := Raw.MustParse(tt.parent).region
214
tag := Raw.MustParse(tt.tag)
215
if d := parentDistance(r, tag); d != tt.d {
216
t.Errorf("d(%s, %s) was %d; want %d", r, tag, d, tt.d)
221
// Implementation of String methods for various types for debugging purposes.
223
func (m *matcher) String() string {
225
fmt.Fprintln(w, "Default:", m.default_)
226
for tag, h := range m.index {
227
fmt.Fprintf(w, " %s: %v\n", tag, h)
232
func (h *matchHeader) String() string {
234
fmt.Fprintf(w, "exact: ")
235
for _, h := range h.exact {
236
fmt.Fprintf(w, "%v, ", h)
238
fmt.Fprint(w, "; max: ")
239
for _, h := range h.max {
240
fmt.Fprintf(w, "%v, ", h)
245
func (t haveTag) String() string {
246
return fmt.Sprintf("%v:%d:%v:%v-%v|%v", t.tag, t.index, t.conf, t.maxRegion, t.maxScript, t.altScript)
249
// The test set for TestBestMatch is defined in data_test.go.
250
func TestBestMatch(t *testing.T) {
251
parse := func(list string) (out []Tag) {
252
for _, s := range strings.Split(list, ",") {
253
out = append(out, mk(strings.TrimSpace(s)))
257
for i, tt := range matchTests {
258
supported := parse(tt.supported)
259
m := newMatcher(supported)
261
fmt.Printf("%s:\n%v\n", tt.comment, m)
263
for _, tm := range tt.test {
264
desired := parse(tm.desired)
265
id, conf := m.getBest(desired...)
270
if tag.String() != tm.match {
271
t.Errorf("%d:%s: find %s in %q: have %s; want %s (%v)\n", i, tt.comment, desired, tt.supported, tag, tm.match, conf)
277
var benchHave = []Tag{
311
var benchWant = [][]Tag{
342
func BenchmarkMatch(b *testing.B) {
343
m := newMatcher(benchHave)
344
for i := 0; i < b.N; i++ {
345
for _, want := range benchWant {
351
func BenchmarkMatchExact(b *testing.B) {
353
m := newMatcher(benchHave)
354
for i := 0; i < b.N; i++ {
359
func BenchmarkMatchAltLanguagePresent(b *testing.B) {
361
m := newMatcher(benchHave)
362
for i := 0; i < b.N; i++ {
367
func BenchmarkMatchAltLanguageNotPresent(b *testing.B) {
369
m := newMatcher(benchHave)
370
for i := 0; i < b.N; i++ {
375
func BenchmarkMatchAltScriptPresent(b *testing.B) {
376
want := mk("zh-Hant-CN")
377
m := newMatcher(benchHave)
378
for i := 0; i < b.N; i++ {
383
func BenchmarkMatchAltScriptNotPresent(b *testing.B) {
384
want := mk("fr-Cyrl")
385
m := newMatcher(benchHave)
386
for i := 0; i < b.N; i++ {
391
func BenchmarkMatchLimitedExact(b *testing.B) {
392
want := []Tag{mk("he-NL"), mk("iw-NL")}
393
m := newMatcher(benchHave)
394
for i := 0; i < b.N; i++ {