1
// Copyright 2012 The Go Authors. All rights reserved.
2
// Use of this source code is governed by a BSD-style
3
// license that can be found in the LICENSE file.
13
func TestNodeLabel(t *testing.T) {
14
for i, want := range nodeLabels {
15
got := nodeLabel(uint32(i))
17
t.Errorf("%d: got %q, want %q", i, got, want)
22
func TestFind(t *testing.T) {
23
testCases := []string{
49
for _, tc := range testCases {
50
got := find(tc, 0, numTLD)
52
for i := uint32(0); i < numTLD; i++ {
53
if tc == nodeLabel(i) {
59
t.Errorf("%q: got %d, want %d", tc, got, want)
64
func TestICANN(t *testing.T) {
65
testCases := map[string]bool{
68
"foo.dyndns.org": false,
69
"foo.go.dyndns.org": false,
70
"foo.blogspot.co.uk": false,
71
"foo.intranet": false,
73
for domain, want := range testCases {
74
_, got := PublicSuffix(domain)
76
t.Errorf("%q: got %v, want %v", domain, got, want)
81
var publicSuffixTestCases = []struct {
98
{"www.pb.ao", "pb.ao"},
99
{"www.xxx.yyy.zzz.pb.ao", "pb.ao"},
101
// The .ar rules are:
116
{"www.nic.ar", "ar"},
117
{"com.ar", "com.ar"},
118
{"www.com.ar", "com.ar"},
119
{"blogspot.com.ar", "blogspot.com.ar"},
120
{"www.blogspot.com.ar", "blogspot.com.ar"},
121
{"www.xxx.yyy.zzz.blogspot.com.ar", "blogspot.com.ar"},
122
{"logspot.com.ar", "com.ar"},
123
{"zlogspot.com.ar", "com.ar"},
124
{"zblogspot.com.ar", "com.ar"},
126
// The .arpa rules are:
135
{"www.arpa", "arpa"},
136
{"urn.arpa", "urn.arpa"},
137
{"www.urn.arpa", "urn.arpa"},
138
{"www.xxx.yyy.zzz.urn.arpa", "urn.arpa"},
140
// The relevant {kobe,kyoto}.jp rules are:
148
{"c.kobe.jp", "c.kobe.jp"},
149
{"b.c.kobe.jp", "c.kobe.jp"},
150
{"a.b.c.kobe.jp", "c.kobe.jp"},
151
{"city.kobe.jp", "kobe.jp"},
152
{"www.city.kobe.jp", "kobe.jp"},
153
{"kyoto.jp", "kyoto.jp"},
154
{"test.kyoto.jp", "kyoto.jp"},
155
{"ide.kyoto.jp", "ide.kyoto.jp"},
156
{"b.ide.kyoto.jp", "ide.kyoto.jp"},
157
{"a.b.ide.kyoto.jp", "ide.kyoto.jp"},
159
// The .tw rules are:
171
// 網路.tw (xn--zf0ao64a.tw)
172
// 組織.tw (xn--uc0atv.tw)
173
// 商業.tw (xn--czrw28b.tw)
177
{"www.aaa.tw", "tw"},
178
{"xn--czrw28b.aaa.tw", "tw"},
179
{"edu.tw", "edu.tw"},
180
{"www.edu.tw", "edu.tw"},
181
{"xn--czrw28b.edu.tw", "edu.tw"},
182
{"xn--czrw28b.tw", "xn--czrw28b.tw"},
183
{"www.xn--czrw28b.tw", "xn--czrw28b.tw"},
184
{"xn--uc0atv.xn--czrw28b.tw", "xn--czrw28b.tw"},
185
{"xn--kpry57d.tw", "tw"},
187
// The .uk rules are:
203
{"www.aaa.uk", "uk"},
205
{"www.mod.uk", "uk"},
207
{"mod.sch.uk", "mod.sch.uk"},
208
{"www.sch.uk", "www.sch.uk"},
209
{"blogspot.co.uk", "blogspot.co.uk"},
210
{"blogspot.nic.uk", "uk"},
211
{"blogspot.sch.uk", "blogspot.sch.uk"},
215
{"xn--p1ai", "xn--p1ai"},
216
{"aaa.xn--p1ai", "xn--p1ai"},
217
{"www.xxx.yyy.xn--p1ai", "xn--p1ai"},
219
// The .zw rules are:
222
{"www.zw", "www.zw"},
223
{"zzz.zw", "zzz.zw"},
224
{"www.zzz.zw", "zzz.zw"},
225
{"www.xxx.yyy.zzz.zw", "zzz.zw"},
227
// There are no .nosuchtld rules.
228
{"nosuchtld", "nosuchtld"},
229
{"foo.nosuchtld", "nosuchtld"},
230
{"bar.foo.nosuchtld", "nosuchtld"},
233
func BenchmarkPublicSuffix(b *testing.B) {
234
for i := 0; i < b.N; i++ {
235
for _, tc := range publicSuffixTestCases {
236
List.PublicSuffix(tc.domain)
241
func TestPublicSuffix(t *testing.T) {
242
for _, tc := range publicSuffixTestCases {
243
got := List.PublicSuffix(tc.domain)
245
t.Errorf("%q: got %q, want %q", tc.domain, got, tc.want)
250
func TestSlowPublicSuffix(t *testing.T) {
251
for _, tc := range publicSuffixTestCases {
252
got := slowPublicSuffix(tc.domain)
254
t.Errorf("%q: got %q, want %q", tc.domain, got, tc.want)
259
// slowPublicSuffix implements the canonical (but O(number of rules)) public
260
// suffix algorithm described at http://publicsuffix.org/list/.
262
// 1. Match domain against all rules and take note of the matching ones.
263
// 2. If no rules match, the prevailing rule is "*".
264
// 3. If more than one rule matches, the prevailing rule is the one which is an exception rule.
265
// 4. If there is no matching exception rule, the prevailing rule is the one with the most labels.
266
// 5. If the prevailing rule is a exception rule, modify it by removing the leftmost label.
267
// 6. The public suffix is the set of labels from the domain which directly match the labels of the prevailing rule (joined by dots).
268
// 7. The registered or registrable domain is the public suffix plus one additional label.
270
// This function returns the public suffix, not the registrable domain, and so
271
// it stops after step 6.
272
func slowPublicSuffix(domain string) string {
273
match := func(rulePart, domainPart string) bool {
278
return rulePart[1:] == domainPart
280
return rulePart == domainPart
283
domainParts := strings.Split(domain, ".")
284
var matchingRules [][]string
287
for _, rule := range rules {
288
ruleParts := strings.Split(rule, ".")
289
if len(domainParts) < len(ruleParts) {
292
for i := range ruleParts {
293
rulePart := ruleParts[len(ruleParts)-1-i]
294
domainPart := domainParts[len(domainParts)-1-i]
295
if !match(rulePart, domainPart) {
299
matchingRules = append(matchingRules, ruleParts)
301
if len(matchingRules) == 0 {
302
matchingRules = append(matchingRules, []string{"*"})
304
sort.Sort(byPriority(matchingRules))
306
prevailing := matchingRules[0]
307
if prevailing[0][0] == '!' {
308
prevailing = prevailing[1:]
310
if prevailing[0][0] == '*' {
311
replaced := domainParts[len(domainParts)-len(prevailing)]
312
prevailing = append([]string{replaced}, prevailing[1:]...)
314
return strings.Join(prevailing, ".")
317
type byPriority [][]string
319
func (b byPriority) Len() int { return len(b) }
320
func (b byPriority) Swap(i, j int) { b[i], b[j] = b[j], b[i] }
321
func (b byPriority) Less(i, j int) bool {
322
if b[i][0][0] == '!' {
325
if b[j][0][0] == '!' {
328
return len(b[i]) > len(b[j])
331
// eTLDPlusOneTestCases come from
332
// http://mxr.mozilla.org/mozilla-central/source/netwerk/test/unit/data/test_psl.txt
333
var eTLDPlusOneTestCases = []struct {
340
{"example.example", "example.example"},
341
{"b.example.example", "example.example"},
342
{"a.b.example.example", "example.example"},
343
// TLD with only 1 rule.
345
{"domain.biz", "domain.biz"},
346
{"b.domain.biz", "domain.biz"},
347
{"a.b.domain.biz", "domain.biz"},
348
// TLD with some 2-level rules.
350
{"example.com", "example.com"},
351
{"b.example.com", "example.com"},
352
{"a.b.example.com", "example.com"},
354
{"example.uk.com", "example.uk.com"},
355
{"b.example.uk.com", "example.uk.com"},
356
{"a.b.example.uk.com", "example.uk.com"},
357
{"test.ac", "test.ac"},
358
// TLD with only 1 (wildcard) rule.
361
{"b.c.cy", "b.c.cy"},
362
{"a.b.c.cy", "b.c.cy"},
365
{"test.jp", "test.jp"},
366
{"www.test.jp", "test.jp"},
368
{"test.ac.jp", "test.ac.jp"},
369
{"www.test.ac.jp", "test.ac.jp"},
371
{"test.kyoto.jp", "test.kyoto.jp"},
372
{"ide.kyoto.jp", ""},
373
{"b.ide.kyoto.jp", "b.ide.kyoto.jp"},
374
{"a.b.ide.kyoto.jp", "b.ide.kyoto.jp"},
376
{"b.c.kobe.jp", "b.c.kobe.jp"},
377
{"a.b.c.kobe.jp", "b.c.kobe.jp"},
378
{"city.kobe.jp", "city.kobe.jp"},
379
{"www.city.kobe.jp", "city.kobe.jp"},
380
// TLD with a wildcard rule and exceptions.
383
{"b.test.ck", "b.test.ck"},
384
{"a.b.test.ck", "b.test.ck"},
385
{"www.ck", "www.ck"},
386
{"www.www.ck", "www.ck"},
389
{"test.us", "test.us"},
390
{"www.test.us", "test.us"},
392
{"test.ak.us", "test.ak.us"},
393
{"www.test.ak.us", "test.ak.us"},
395
{"test.k12.ak.us", "test.k12.ak.us"},
396
{"www.test.k12.ak.us", "test.k12.ak.us"},
397
// Punycoded IDN labels
398
{"xn--85x722f.com.cn", "xn--85x722f.com.cn"},
399
{"xn--85x722f.xn--55qx5d.cn", "xn--85x722f.xn--55qx5d.cn"},
400
{"www.xn--85x722f.xn--55qx5d.cn", "xn--85x722f.xn--55qx5d.cn"},
401
{"shishi.xn--55qx5d.cn", "shishi.xn--55qx5d.cn"},
402
{"xn--55qx5d.cn", ""},
403
{"xn--85x722f.xn--fiqs8s", "xn--85x722f.xn--fiqs8s"},
404
{"www.xn--85x722f.xn--fiqs8s", "xn--85x722f.xn--fiqs8s"},
405
{"shishi.xn--fiqs8s", "shishi.xn--fiqs8s"},
409
func TestEffectiveTLDPlusOne(t *testing.T) {
410
for _, tc := range eTLDPlusOneTestCases {
411
got, _ := EffectiveTLDPlusOne(tc.domain)
413
t.Errorf("%q: got %q, want %q", tc.domain, got, tc.want)