3
"""world -- Print mappings between country names and DNS country codes.
6
Email: barry@python.org
7
Version: %(__version__)s
9
This script will take a list of Internet addresses and print out where in the
10
world those addresses originate from, based on the top-level domain country
11
code found in the address. Addresses can be in any of the following forms:
13
xx -- just the country code or top-level domain identifier
14
host.domain.xx -- any Internet host or network name
15
somebody@where.xx -- an Internet email address
17
If no match is found, the address is interpreted as a regular expression and a
18
reverse lookup is attempted. This script will search the country names and
19
print a list of matching entries. You can force reverse mappings with the
20
`-r' flag (see below).
25
tz originated from Tanzania, United Republic of
26
us originated from United States
29
united matches 6 countries:
30
ae: United Arab Emirates
31
uk: United Kingdom (common practice)
32
um: United States Minor Outlying Islands
34
tz: Tanzania, United Republic of
37
Country codes are maintained by the RIPE Network Coordination Centre,
38
in coordination with the ISO 3166 Maintenance Agency at DIN Berlin. The
39
authoritative source of country code mappings is:
41
<url:ftp://ftp.ripe.net/iso3166-countrycodes.txt>
43
The latest known change to this information was:
45
Monday, 10 October 2006, 17:59:51 UTC 2006
47
This script also knows about non-geographic top-level domains, and the
48
additional ccTLDs reserved by IANA.
50
Usage: %(PROGRAM)s [-d] [-p file] [-o] [-h] addr [addr ...]
54
Print mapping of all top-level domains.
58
Parse an iso3166-countrycodes file extracting the two letter country
59
code followed by the country name. Note that the three letter country
60
codes and numbers, which are also provided in the standard format
65
When used in conjunction with the `-p' option, output is in the form
66
of a Python dictionary, and country names are normalized
67
w.r.t. capitalization. This makes it appropriate for cutting and
68
pasting back into this file. Output is always to standard out.
72
Force reverse lookup. In this mode the address can be any Python
73
regular expression; this is matched against all country names and a
74
list of matching mappings is printed. In normal mode (e.g. without
75
this flag), reverse lookup is performed on addresses if no matching
76
country code is found.
82
__version__ = '$Revision: 59581 $'
93
def usage(code, msg=''):
94
print(__doc__ % globals())
101
def resolve(rawaddr):
102
parts = rawaddr.split('.')
104
# no top level domain found, bounce it to the next step
108
print(rawaddr, 'is in the', nameorgs[addr], 'top level domain')
110
elif addr in countries:
111
print(rawaddr, 'originated from', countries[addr])
114
# Not resolved, bounce it to the next step
121
cre = re.compile(regexp, re.IGNORECASE)
122
for code, country in all.items():
123
mo = cre.search(country)
128
# not resolved, bounce it to the next step
130
if len(matches) == 1:
132
print(regexp, "matches code `%s', %s" % (code, all[code]))
134
print(regexp, 'matches %d countries:' % len(matches))
136
print(" %s: %s" % (code, all[code]))
141
def parse(file, normalize):
144
except IOError as err:
145
errno, msg = err.args
146
print(msg, ':', file)
149
cre = re.compile('(.*?)[ \t]+([A-Z]{2})[ \t]+[A-Z]{3}[ \t]+[0-9]{3}')
153
print('countries = {')
168
print('Could not parse line:', line)
170
country, code = mo.group(1, 2)
172
words = country.split()
173
for i in range(len(words)):
176
if w in ('AND', 'OF', 'OF)', 'name:', 'METROPOLITAN'):
178
elif w == 'THE' and i != 1:
180
elif len(w) > 3 and w[1] == "'":
181
words[i] = w[0:3].upper() + w[3:].lower()
182
elif w in ('(U.S.)', 'U.S.'):
184
elif w[0] == '(' and w != '(local':
185
words[i] = '(' + w[1:].capitalize()
186
elif w.find('-') != -1:
188
[s.capitalize() for s in w.split('-')])
190
words[i] = w.capitalize()
192
country = ' '.join(words)
193
print(' "%s": "%s",' % (code, country))
213
opts, args = getopt.getopt(
216
['parse=', 'reverse', 'outputdict', 'help', 'dump'])
217
except getopt.error as msg:
220
for opt, arg in opts:
221
if opt in ('-h', '--help'):
223
elif opt in ('-d', '--dump'):
225
elif opt in ('-p', '--parse'):
227
elif opt in ('-o', '--outputdict'):
229
elif opt in ('-r', '--reverse'):
236
print('Official country coded domains:')
237
codes = sorted(countries)
239
print(' %2s:' % code, countries[code])
241
print('\nOther top-level domains:')
242
codes = sorted(nameorgs)
244
print(' %6s:' % code, nameorgs[code])
246
parse(parsefile, normalize)
249
args = filter(None, map(resolve, args))
250
args = filter(None, map(reverse, args))
252
print('Where in the world is %s?' % arg)
258
# New top level domains as described by ICANN
259
# http://www.icann.org/tlds/
260
"aero": "air-transport industry",
261
"asia": "from Asia/for Asia",
264
"cat": "Catalan community",
266
"coop": "cooperatives",
267
"edu": "educational",
269
"info": "unrestricted `info'",
270
"int": "international",
271
"jobs": "employment-related",
273
"mobi": "mobile specific",
275
"name": "`name' (for registration by individuals)",
277
"org": "non-commercial",
278
"pro": "professionals",
279
"tel": "business telecommunications",
280
"travel": "travel and tourism",
281
# These additional ccTLDs are included here even though they are not part
282
# of ISO 3166. IANA has a decoding table listing all reserved ccTLDs:
284
# http://www.iso.org/iso/iso-3166-1_decoding_table
286
# Note that `uk' is the common practice country code for the United
287
# Kingdom. AFAICT, the official `gb' code is routinely ignored!
289
# <D.M.Pick@qmw.ac.uk> tells me that `uk' was long in use before ISO3166
290
# was adopted for top-level DNS zone names (although in the reverse order
291
# like uk.ac.qmw) and was carried forward (with the reversal) to avoid a
292
# large-scale renaming process as the UK switched from their old `Coloured
293
# Book' protocols over X.25 to Internet protocols over IP.
295
# See <url:ftp://ftp.ripe.net/ripe/docs/ripe-159.txt>
297
# Also, `su', while obsolete is still in limited use.
298
"ac": "Ascension Island",
299
"cp": "Clipperton Island",
300
"dg": "Diego Garcia",
301
"ea": "Ceuta, Melilla",
302
"eu": "European Union",
303
"fx": "Metropolitan France",
304
"ic": "Canary Islands",
305
"ta": "Tristan da Cunha",
306
"uk": "United Kingdom (common practice)",
307
"su": "Soviet Union (still in limited use)",
314
"ax": "Aland Islands",
317
"as": "American Samoa",
322
"ag": "Antigua and Barbuda",
340
"ba": "Bosnia and Herzegovina",
342
"bv": "Bouvet Island",
344
"io": "British Indian Ocean Territory",
345
"bn": "Brunei Darussalam",
347
"bf": "Burkina Faso",
353
"ky": "Cayman Islands",
354
"cf": "Central African Republic",
358
"cx": "Christmas Island",
359
"cc": "Cocos (Keeling) Islands",
363
"cd": "Congo, The Democratic Republic of the",
364
"ck": "Cook Islands",
366
"ci": "Cote D'Ivoire",
370
"cz": "Czech Republic",
374
"do": "Dominican Republic",
378
"gq": "Equatorial Guinea",
382
"fk": "Falkland Islands (Malvinas)",
383
"fo": "Faroe Islands",
387
"gf": "French Guiana",
388
"pf": "French Polynesia",
389
"tf": "French Southern Territories",
404
"gw": "Guinea-Bissau",
407
"hm": "Heard Island and Mcdonald Islands",
408
"va": "Holy See (Vatican City State)",
415
"ir": "Iran (Islamic Republic of)",
428
"kp": "Korea, Democratic People's Republic of",
429
"kr": "Korea, Republic of",
432
"la": "Lao People's Democratic Republic",
437
"ly": "Libyan Arab Jamahiriya",
438
"li": "Liechtenstein",
442
"mk": "Macedonia, The Former Yugoslav Republic of",
449
"mh": "Marshall Islands",
455
"fm": "Micronesia, Federated States of",
456
"md": "Moldova, Republic of",
468
"an": "Netherlands Antilles",
469
"nc": "New Caledonia",
475
"nf": "Norfolk Island",
476
"mp": "Northern Mariana Islands",
481
"ps": "Palestinian Territory, Occupied",
483
"pg": "Papua New Guinea",
494
"ru": "Russian Federation",
496
"sh": "Saint Helena",
497
"kn": "Saint Kitts and Nevis",
499
"pm": "Saint Pierre and Miquelon",
500
"vc": "Saint Vincent and the Grenadines",
503
"st": "Sao Tome and Principe",
504
"sa": "Saudi Arabia",
508
"sl": "Sierra Leone",
512
"sb": "Solomon Islands",
514
"za": "South Africa",
515
"gs": "South Georgia and the South Sandwich Islands",
520
"sj": "Svalbard and Jan Mayen",
522
"pm": "St. Pierre and Miquelon",
526
"sy": "Syrian Arab Republic",
527
"tw": "Taiwan, Province of China",
529
"tz": "Tanzania, United Republic of",
535
"tt": "Trinidad and Tobago",
538
"tm": "Turkmenistan",
539
"tc": "Turks and Caicos Islands",
543
"ae": "United Arab Emirates",
544
"gb": "United Kingdom",
545
"us": "United States",
546
"um": "United States Minor Outlying Islands",
550
"va": "Vatican City State (Holy See)",
553
"vg": "Virgin Islands (British)",
554
"vi": "Virgin Islands (U.S.)",
555
"wf": "Wallis and Futuna",
556
"eh": "Western Sahara",
563
all = nameorgs.copy()
564
all.update(countries)
567
if __name__ == '__main__':