8
8
# The ASF licenses this file to you under the Apache License, Version 2.0
9
9
# (the "License"); you may not use this file except in compliance with
10
10
# the License. You may obtain a copy of the License at:
12
12
# http://www.apache.org/licenses/LICENSE-2.0
14
14
# Unless required by applicable law or agreed to in writing, software
15
15
# distributed under the License is distributed on an "AS IS" BASIS,
16
16
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
38
38
# The list of currently-valid TLDs for the DNS system.
40
# When updating domain lists, also modify t/uri_text.t accordingly
40
42
# http://data.iana.org/TLD/tlds-alpha-by-domain.txt
41
43
# Version 2008020601, Last Updated Thu Feb 7 09:07:00 2008 UTC
42
44
# The following have been removed from the list because they are
44
46
# as of 2008-02-08, e.g. http://en.wikipedia.org/wiki/.so_%28domain_name%29
45
47
# bv gb pm sj so um yt
50
# Re-enable .so .pm .yt in 2012-02-21
51
# http://www.afnic.fr/fr/produits-et-services/autres-domaines-de-premier-niveau/
52
# http://www.iana.org/domains/root/db/so.html
54
# Bug 5048: Re-enable .xxx 2012-04-01
47
56
# Remember to also change regexp below when updating!
55
64
in info int io iq ir is it je jm jo jobs jp ke kg kh ki km kn kp kr kw
56
65
ky kz la lb lc li lk lr ls lt lu lv ly ma mc md me mg mh mil mk ml mm
57
66
mn mo mobi mp mq mr ms mt mu museum mv mw mx my mz na name nc ne net
58
nf ng ni nl no np nr nu nz om org pa pe pf pg ph pk pl pn pr pro ps
59
pt pw py qa re ro rs ru rw sa sb sc sd se sg sh si sk sl sm sn
67
nf ng ni nl no np nr nu nz om org pa pe pf pg ph pk pl pm pn pr pro ps
68
pt pw py qa re ro rs ru rw sa sb sc sd se sg sh si sk sl sm sn so
60
69
sr st su sv sy sz tc td tel tf tg th tj tk tl tm tn to tp tr travel tt
61
tv tw tz ua ug uk us uy uz va vc ve vg vi vn vu wf ws xxx ye za
70
tv tw tz ua ug uk us uy uz va vc ve vg vi vn vu wf ws xxx ye yt za
64
73
$VALID_TLDS{$_} = 1;
67
76
# %VALID_TLDS as Regexp::List optimized regexp, for use in Plugins etc
68
77
# Paste above list to:
69
78
# perl -MRegexp::List -e '$/=undef; $_=<>; $r = Regexp::List->new; push @l, $_ for (split); print $r->list2re(@l)'
70
# Verified up to date 20110501
79
# Verified up to date 20120401
71
80
$VALID_TLDS_RE = qr/
72
(?=[abcdefghijklmnopqrstuvwyz])
81
(?=[abcdefghijklmnopqrstuvwxyz])
73
82
(?:a(?:e(?:ro)?|r(?:pa)?|s(?:ia)?|[cdfgilmnoqtuwxz])|b(?:iz?|[abdefghjmnorstwyz])
74
|c(?:at?|o(?:m|op)?|[cdfghiklmnruvxyz])|d[ejkmoz]|e(?:[cegrst]|d?u)|f[ijkmor]
75
|g(?:[adefghilmnpqrstuwy]|ov)|h[kmnrtu]|i(?:n(?:fo|t)?|[delmoqrst])|j(?:o(?:bs)?|[emp])
76
|k[eghimnprwyz]|l[abcikrstuvy]|m(?:o(?:bi)?|u(?:seum)?|[acdeghkmnpqrstvwxyz]|i?l)
77
|n(?:a(?:me)?|et?|[cfgilopruz])|o(?:m|rg)|p(?:ro?|[aefghklnstwy])|r[eosuw]
78
|s[abcdeghiklmnrtuvyz]|t(?:r(?:avel)?|[cdfghjkmnoptvwz]|e?l)|u[agksyz]
79
|v[aceginu]|w[fs]|z[amw]|qa|ye
83
|c(?:at?|o(?:m|op)?|[cdfghiklmnruvxyz])|d[ejkmoz]|e(?:[cegrst]|d?u)|f[ijkmor]
84
|g(?:[adefghilmnpqrstuwy]|ov)|h[kmnrtu]|i(?:n(?:fo|t)?|[delmoqrst])|j(?:o(?:bs)?|[emp])
85
|k[eghimnprwyz]|l[abcikrstuvy]|m(?:o(?:bi)?|u(?:seum)?|[acdeghkmnpqrstvwxyz]|i?l)
86
|n(?:a(?:me)?|et?|[cfgilopruz])|o(?:m|rg)|p(?:ro?|[aefghklmnstwy])|r[eosuw]
87
|s[abcdeghiklmnortuvyz]|t(?:r(?:avel)?|[cdfghjkmnoptvwz]|e?l)|u[agksyz]
88
|v[aceginu]|w[fs]|y[et]|z[amw]|qa|xxx
89
98
# The freeapp.net site now says that information on the site is obsolete
90
99
# See discussion and sources in comments of bug 5677
91
100
# updated as per bug 5815
101
# cleanup in progress per bug 6795 (axb)
103
# .ua : http://hostmaster.ua
104
# .hu : http://www.domain.hu/domain/English/szabalyzat/sld.html
94
108
com.ac edu.ac gov.ac mil.ac net.ac org.ac
115
129
com.bm edu.bm gov.bm net.bm org.bm
116
130
com.bn edu.bn net.bn org.bn
117
131
com.bo edu.bo gob.bo gov.bo int.bo mil.bo net.bo org.bo tv.bo
118
adm.br adv.br agr.br am.br arq.br art.br ato.br bio.br bmd.br cim.br cng.br cnt.br com.br coop.br dpn.br ecn.br edu.br eng.br esp.br etc.br eti.br far.br fm.br fnd.br fot.br fst.br g12.br ggf.br gov.br imb.br ind.br inf.br jor.br lel.br mat.br med.br mil.br mus.br net.br nom.br not.br ntr.br odo.br org.br ppg.br pro.br psc.br psi.br qsl.br rec.br slg.br srv.br tmp.br trd.br tur.br tv.br vet.br zlg.br
132
adm.br adv.br agr.br am.br arq.br art.br ato.br bio.br bmd.br cim.br cng.br cnt.br com.br coop.br dpn.br eco.br ecn.br edu.br eng.br esp.br etc.br eti.br far.br fm.br fnd.br fot.br fst.br g12.br ggf.br gov.br imb.br ind.br inf.br jor.br lel.br mat.br med.br mil.br mus.br net.br nom.br not.br ntr.br odo.br org.br ppg.br pro.br psc.br psi.br qsl.br rec.br slg.br srv.br tmp.br trd.br tur.br tv.br vet.br zlg.br
119
133
com.bs net.bs org.bs
120
134
com.bt edu.bt gov.bt net.bt org.bt
124
138
ab.ca bc.ca gc.ca mb.ca nb.ca nf.ca nl.ca ns.ca nt.ca nu.ca on.ca pe.ca qc.ca sk.ca yk.ca
125
139
co.ck edu.ck gov.ck net.ck org.ck
126
140
ac.cn ah.cn bj.cn com.cn cq.cn edu.cn fj.cn gd.cn gov.cn gs.cn gx.cn gz.cn ha.cn hb.cn he.cn hi.cn hk.cn hl.cn hn.cn jl.cn js.cn jx.cn ln.cn mo.cn net.cn nm.cn nx.cn org.cn qh.cn sc.cn sd.cn sh.cn sn.cn sx.cn tj.cn tw.cn xj.cn xz.cn yn.cn zj.cn
127
arts.co com.co edu.co firm.co gov.co info.co int.co mil.co net.co nom.co org.co rec.co store.co web.co
141
arts.co com.co edu.co firm.co gov.co info.co int.co mil.co net.co nom.co org.co rec.co web.co
128
142
lkd.co.im ltd.co.im plc.co.im
129
144
au.com br.com cn.com de.com eu.com gb.com hu.com no.com qc.com ru.com sa.com se.com uk.com us.com uy.com za.com
130
145
ac.cr co.cr ed.cr fi.cr go.cr or.cr sa.cr
131
146
com.cu edu.cu gov.cu inf.cu net.cu org.cu
135
150
com.dm edu.dm gov.dm net.dm org.dm
136
151
art.do com.do edu.do gob.do gov.do mil.do net.do org.do sld.do web.do
137
152
art.dz asso.dz com.dz edu.dz gov.dz net.dz org.dz pol.dz
138
com.ec edu.ec fin.ec gov.ec info.ec k12.ec med.ec mil.ec net.ec org.ec pro.ec
153
com.ec edu.ec fin.ec gov.ec info.ec k12.ec med.ec mil.ec net.ec org.ec pro.ec gob.ec
139
154
co.ee com.ee edu.ee fie.ee med.ee org.ee pri.ee
140
155
com.eg edu.eg eun.eg gov.eg mil.eg net.eg org.eg sci.eg
141
156
com.er edu.er gov.er ind.er mil.er net.er org.er
145
160
ac.fj biz.fj com.fj gov.fj id.fj info.fj mil.fj name.fj net.fj org.fj pro.fj school.fj
146
161
ac.fk co.fk com.fk gov.fk net.fk nom.fk org.fk
147
aeroport.fr assedic.fr asso.fr avocat.fr avoues.fr barreau.fr cci.fr chambagri.fr chirurgiens-dentistes.fr com.fr experts-comptables.fr geometre-expert.fr gouv.fr greta.fr huissier-justice.fr medecin.fr nom.fr notaires.fr pharmacien.fr port.fr prd.fr presse.fr tm.fr veterinaire.fr
162
tm.fr asso.fr nom.fr prd.fr presse.fr com.fr gouv.fr
148
163
com.ge edu.ge gov.ge mil.ge net.ge org.ge pvt.ge
149
164
ac.gg alderney.gg co.gg gov.gg guernsey.gg ind.gg ltd.gg net.gg org.gg sark.gg sch.gg
150
165
com.gh edu.gh gov.gh mil.gh org.gh
158
173
com.hn edu.hn gob.hn mil.hn net.hn org.hn
159
174
com.hr from.hr iz.hr name.hr
160
175
adult.ht art.ht asso.ht com.ht coop.ht edu.ht firm.ht gouv.ht info.ht med.ht net.ht org.ht perso.ht pol.ht pro.ht rel.ht shop.ht
161
2000.hu ac.hu agrar.hu bolt.hu casino.hu city.hu co.hu edu.hu erotica.hu erotika.hu film.hu forum.hu games.hu gov.hu hotel.hu info.hu ingatlan.hu jogasz.hu konyvelo.hu lakas.hu media.hu news.hu org.hu priv.hu reklam.hu sex.hu shop.hu sport.hu suli.hu szex.hu tm.hu tozsde.hu utazas.hu video.hu
176
2000.hu agrar.hu bolt.hu casino.hu city.hu co.hu erotica.hu erotika.hu film.hu forum.hu games.hu hotel.hu info.hu ingatlan.hu jogasz.hu konyvelo.hu lakas.hu media.hu news.hu org.hu priv.hu reklam.hu sex.hu shop.hu sport.hu suli.hu szex.hu tm.hu tozsde.hu utazas.hu video.hu
162
177
ac.id co.id go.id mil.id net.id or.id sch.id web.id
164
179
ac.il co.il gov.il idf.il k12.il muni.il net.il org.il
197
212
edu.mn gov.mn org.mn
198
213
com.mo edu.mo gov.mo net.mo org.mo
199
214
music.mobi weather.mobi
215
co.mp edu.mp gov.mp net.mp org.mp
200
216
com.mt edu.mt gov.mt net.mt org.mt tm.mt uu.mt
202
218
aero.mv biz.mv com.mv coop.mv edu.mv gov.mv info.mv int.mv mil.mv museum.mv name.mv net.mv org.mv pro.mv
207
223
com.nc net.nc org.nc
208
224
de.net gb.net uk.net
209
225
ac.ng com.ng edu.ng gov.ng net.ng org.ng sch.ng
210
com.ni edu.ni gob.ni net.ni nom.ni org.ni
226
ac.ni biz.ni com.ni edu.ni gob.ni in.ni info.ni int.ni mil.ni net.ni nom.ni org.ni web.ni
211
227
fhs.no folkebibl.no fylkesbibl.no herad.no idrett.no kommune.no mil.no museum.no priv.no stat.no tel.no vgs.no
212
228
com.np edu.np gov.np mil.np net.np org.np
213
229
biz.nr co.nr com.nr edu.nr fax.nr gov.nr info.nr mob.nr mobil.nr mobile.nr net.nr org.nr tel.nr tlf.nr
220
236
ac.pg com.pg net.pg
221
237
com.ph edu.ph gov.ph mil.ph net.ph ngo.ph org.ph
222
238
biz.pk com.pk edu.pk fam.pk gob.pk gok.pk gon.pk gop.pk gos.pk gov.pk net.pk org.pk web.pk
223
agro.pl aid.pl art.pl atm.pl auto.pl bialystok.pl biz.pl com.pl edu.pl gda.pl gdansk.pl gmina.pl gov.pl gsm.pl info.pl katowice.pl krakow.pl lodz.pl lublin.pl mail.pl media.pl miasta.pl mil.pl net.pl ngo.pl nieruchomosci.pl nom.pl olsztyn.pl opole.pl org.pl pc.pl powiat.pl poznan.pl priv.pl realestate.pl rel.pl sex.pl shop.pl sklep.pl slupsk.pl sos.pl szczecin.pl szkola.pl targi.pl tm.pl torun.pl tourism.pl travel.pl turystyka.pl warszawa.pl waw.pl wroc.pl wroclaw.pl za.pl zgora.pl
239
art.pl biz.pl com.pl edu.pl gov.pl info.pl mil.pl net.pl ngo.pl org.pl
224
240
biz.pr com.pr edu.pr gov.pr info.pr isla.pr name.pr net.pr org.pr pro.pr
225
241
cpa.pro law.pro med.pro
226
242
com.ps edu.ps gov.ps net.ps org.ps plo.ps sec.ps
230
246
asso.re com.re nom.re
231
247
arts.ro com.ro firm.ro info.ro nom.ro nt.ro org.ro rec.ro store.ro tm.ro www.ro
232
248
ac.rs co.rs edu.rs gov.rs in.rs org.rs
233
ac.ru adygeya.ru altai.ru amur.ru amursk.ru arkhangelsk.ru astrakhan.ru baikal.ru bashkiria.ru belgorod.ru bir.ru bryansk.ru buryatia.ru cbg.ru chel.ru chelyabinsk.ru chita.ru chukotka.ru chuvashia.ru cmw.ru com.ru dagestan.ru dudinka.ru e-burg.ru edu.ru fareast.ru gov.ru grozny.ru int.ru irkutsk.ru ivanovo.ru izhevsk.ru jamal.ru jar.ru joshkar-ola.ru k-uralsk.ru kalmykia.ru kaluga.ru kamchatka.ru karelia.ru kazan.ru kchr.ru kemerovo.ru khabarovsk.ru khakassia.ru khv.ru kirov.ru kms.ru koenig.ru komi.ru kostroma.ru krasnoyarsk.ru kuban.ru kurgan.ru kursk.ru kustanai.ru kuzbass.ru lipetsk.ru magadan.ru magnitka.ru mari-el.ru mari.ru marine.ru mil.ru mordovia.ru mosreg.ru msk.ru murmansk.ru mytis.ru nakhodka.ru nalchik.ru net.ru nkz.ru nnov.ru norilsk.ru nov.ru novosibirsk.ru nsk.ru omsk.ru orenburg.ru org.ru oryol.ru oskol.ru palana.ru penza.ru perm.ru pp.ru pskov.ru ptz.ru pyatigorsk.ru rnd.ru rubtsovsk.ru ryazan.ru sakhalin.ru samara.ru saratov.ru simbirsk.ru smolensk.ru snz.ru spb.ru stavropol.ru stv.ru surgut.ru syzran.ru tambov.ru tatarstan.ru test.ru tom.ru tomsk.ru tsaritsyn.ru tsk.ru tula.ru tuva.ru tver.ru tyumen.ru udm.ru udmurtia.ru ulan-ude.ru vdonsk.ru vladikavkaz.ru vladimir.ru vladivostok.ru volgograd.ru vologda.ru voronezh.ru vrn.ru vyatka.ru yakutia.ru yamal.ru yaroslavl.ru yekaterinburg.ru yuzhno-sakhalinsk.ru zgrad.ru
249
ac.ru com.ru edu.ru gov.ru int.ru mil.ru net.ru org.ru pp.ru
234
250
ac.rw co.rw com.rw edu.rw gouv.rw gov.rw int.rw mil.rw net.rw
235
251
com.sa edu.sa gov.sa med.sa net.sa org.sa pub.sa sch.sa
236
252
com.sb edu.sb gov.sb net.sb org.sb
267
283
com.ws edu.ws gov.ws net.ws org.ws
268
284
com.ye edu.ye gov.ye mil.ye net.ye org.ye
269
285
ac.za alt.za bourse.za city.za co.za edu.za gov.za law.za mil.za net.za ngo.za nom.za org.za school.za tm.za web.za
270
ac.zm co.zm gov.zm org.zm sch.zm
286
ac.zm co.zm com.zm edu.zm gov.zm org.zm sch.zm
271
287
ac.zw co.zw gov.zw org.zw
281
ak al ar az ca co ct dc de fl ga gu hi ia id il in ks ky la ma md me mi
282
mn mo ms mt nc nd ne nh nj nm nv ny oh ok or pa pr ri sc sd tn tx ut va vi
297
ak al ar az ca co ct dc de fl ga gu hi ia id il in ks ky la ma md me mi
298
mn mo ms mt nc nd ne nh nj nm nv ny oh ok or pa pr ri sc sd tn tx ut va vi
285
301
$US_STATES{$_} = 1;
328
344
while (@domparts > 1) { # go until we find the TLD
329
345
if (@domparts == 4) {
330
if ($domparts[3] eq 'us' &&
331
(($domparts[0] eq 'pvt' && $domparts[1] eq 'k12') ||
332
($domparts[0] =~ /^c[io]$/)))
346
if ($domparts[3] eq 'us' &&
347
(($domparts[0] eq 'pvt' && $domparts[1] eq 'k12') ||
348
($domparts[0] =~ /^c[io]$/)))
334
350
# http://www.neustar.us/policies/docs/rfc_1480.txt
335
351
# "Fire-Dept.CI.Los-Angeles.CA.US"
336
352
# "<school-name>.PVT.K12.<state>.US"
337
353
last if ($US_STATES{$domparts[2]});
340
356
elsif (@domparts == 3) {
341
357
# http://www.neustar.us/policies/docs/rfc_1480.txt
344
# [^\.]+\.${US_STATES}\.us
345
if ($domparts[2] eq 'us') {
360
# [^\.]+\.${US_STATES}\.us
361
if ($domparts[2] eq 'us') {
346
362
last if ($US_STATES{$domparts[1]});
349
365
my $temp = join(".", @domparts);
350
366
last if ($THREE_LEVEL_DOMAINS{$temp});
353
369
elsif (@domparts == 2) {
355
my $temp = join(".", @domparts);
371
my $temp = join(".", @domparts);
356
372
last if ($TWO_LEVEL_DOMAINS{$temp});
358
374
push(@hostname, shift @domparts);