1
"""Guess the MIME type of a file.
3
This module defines two useful functions:
5
guess_type(url, strict=1) -- guess the MIME type and encoding of a URL.
7
guess_extension(type, strict=1) -- guess the extension for a given MIME type.
9
It also contains the following, for tuning the behavior:
13
knownfiles -- list of files to parse
14
inited -- flag set when init() has been called
15
suffix_map -- dictionary mapping suffixes to suffixes
16
encodings_map -- dictionary mapping suffixes to encodings
17
types_map -- dictionary mapping suffixes to types
21
init([files]) -- parse a list of files, default knownfiles
22
read_mime_types(file) -- parse one file, return a dictionary or None
30
"guess_type","guess_extension","guess_all_extensions",
31
"add_type","read_mime_types","init"
36
"/etc/httpd/mime.types", # Mac OS X
37
"/etc/httpd/conf/mime.types", # Apache
38
"/etc/apache/mime.types", # Apache 1
39
"/etc/apache2/mime.types", # Apache 2
40
"/usr/local/etc/httpd/conf/mime.types",
41
"/usr/local/lib/netscape/mime.types",
42
"/usr/local/etc/httpd/conf/mime.types", # Apache 1.2
43
"/usr/local/etc/mime.types", # Apache 1.3
50
"""MIME-types datastore.
52
This datastore can handle information from mime.types-style files
53
and supports basic determination of MIME type from a filename or
54
URL, and can guess a reasonable extension given a MIME type.
57
def __init__(self, filenames=(), strict=True):
60
self.encodings_map = encodings_map.copy()
61
self.suffix_map = suffix_map.copy()
62
self.types_map = ({}, {}) # dict for (non-strict, strict)
63
self.types_map_inv = ({}, {})
64
for (ext, type) in types_map.items():
65
self.add_type(type, ext, True)
66
for (ext, type) in common_types.items():
67
self.add_type(type, ext, False)
68
for name in filenames:
69
self.read(name, strict)
71
def add_type(self, type, ext, strict=True):
72
"""Add a mapping between a type and an extension.
74
When the extension is already known, the new
75
type will replace the old one. When the type
76
is already known the extension will be added
77
to the list of known extensions.
79
If strict is true, information will be added to
80
list of standard types, else to the list of non-standard
83
self.types_map[strict][ext] = type
84
exts = self.types_map_inv[strict].setdefault(type, [])
88
def guess_type(self, url, strict=True):
89
"""Guess the type of a file based on its URL.
91
Return value is a tuple (type, encoding) where type is None if
92
the type can't be guessed (no or unknown suffix) or a string
93
of the form type/subtype, usable for a MIME Content-type
94
header; and encoding is None for no encoding or the name of
95
the program used to encode (e.g. compress or gzip). The
96
mappings are table driven. Encoding suffixes are case
97
sensitive; type suffixes are first tried case sensitive, then
100
The suffixes .tgz, .taz and .tz (case sensitive!) are all
101
mapped to '.tar.gz'. (This is table-driven too, using the
102
dictionary suffix_map.)
104
Optional `strict' argument when False adds a bunch of commonly found,
105
but non-standard types.
107
scheme, url = urllib.parse.splittype(url)
109
# syntax of data URLs:
110
# dataurl := "data:" [ mediatype ] [ ";base64" ] "," data
111
# mediatype := [ type "/" subtype ] *( ";" parameter )
113
# parameter := attribute "=" value
114
# type/subtype defaults to "text/plain"
115
comma = url.find(',')
119
semi = url.find(';', 0, comma)
124
if '=' in type or '/' not in type:
126
return type, None # never compressed, so encoding is None
127
base, ext = posixpath.splitext(url)
128
while ext in self.suffix_map:
129
base, ext = posixpath.splitext(base + self.suffix_map[ext])
130
if ext in self.encodings_map:
131
encoding = self.encodings_map[ext]
132
base, ext = posixpath.splitext(base)
135
types_map = self.types_map[True]
137
return types_map[ext], encoding
138
elif ext.lower() in types_map:
139
return types_map[ext.lower()], encoding
141
return None, encoding
142
types_map = self.types_map[False]
144
return types_map[ext], encoding
145
elif ext.lower() in types_map:
146
return types_map[ext.lower()], encoding
148
return None, encoding
150
def guess_all_extensions(self, type, strict=True):
151
"""Guess the extensions for a file based on its MIME type.
153
Return value is a list of strings giving the possible filename
154
extensions, including the leading dot ('.'). The extension is not
155
guaranteed to have been associated with any particular data stream,
156
but would be mapped to the MIME type `type' by guess_type().
158
Optional `strict' argument when false adds a bunch of commonly found,
159
but non-standard types.
162
extensions = self.types_map_inv[True].get(type, [])
164
for ext in self.types_map_inv[False].get(type, []):
165
if ext not in extensions:
166
extensions.append(ext)
169
def guess_extension(self, type, strict=True):
170
"""Guess the extension for a file based on its MIME type.
172
Return value is a string giving a filename extension,
173
including the leading dot ('.'). The extension is not
174
guaranteed to have been associated with any particular data
175
stream, but would be mapped to the MIME type `type' by
176
guess_type(). If no extension can be guessed for `type', None
179
Optional `strict' argument when false adds a bunch of commonly found,
180
but non-standard types.
182
extensions = self.guess_all_extensions(type, strict)
187
def read(self, filename, strict=True):
189
Read a single mime.types-format file, specified by pathname.
191
If strict is true, information will be added to
192
list of standard types, else to the list of non-standard
196
self.readfp(fp, strict)
199
def readfp(self, fp, strict=True):
201
Read a single mime.types-format file.
203
If strict is true, information will be added to
204
list of standard types, else to the list of non-standard
212
for i in range(len(words)):
213
if words[i][0] == '#':
218
type, suffixes = words[0], words[1:]
219
for suff in suffixes:
220
self.add_type(type, '.' + suff, strict)
222
def guess_type(url, strict=True):
223
"""Guess the type of a file based on its URL.
225
Return value is a tuple (type, encoding) where type is None if the
226
type can't be guessed (no or unknown suffix) or a string of the
227
form type/subtype, usable for a MIME Content-type header; and
228
encoding is None for no encoding or the name of the program used
229
to encode (e.g. compress or gzip). The mappings are table
230
driven. Encoding suffixes are case sensitive; type suffixes are
231
first tried case sensitive, then case insensitive.
233
The suffixes .tgz, .taz and .tz (case sensitive!) are all mapped
234
to ".tar.gz". (This is table-driven too, using the dictionary
237
Optional `strict' argument when false adds a bunch of commonly found, but
242
return guess_type(url, strict)
245
def guess_all_extensions(type, strict=True):
246
"""Guess the extensions for a file based on its MIME type.
248
Return value is a list of strings giving the possible filename
249
extensions, including the leading dot ('.'). The extension is not
250
guaranteed to have been associated with any particular data
251
stream, but would be mapped to the MIME type `type' by
252
guess_type(). If no extension can be guessed for `type', None
255
Optional `strict' argument when false adds a bunch of commonly found,
256
but non-standard types.
260
return guess_all_extensions(type, strict)
262
def guess_extension(type, strict=True):
263
"""Guess the extension for a file based on its MIME type.
265
Return value is a string giving a filename extension, including the
266
leading dot ('.'). The extension is not guaranteed to have been
267
associated with any particular data stream, but would be mapped to the
268
MIME type `type' by guess_type(). If no extension can be guessed for
269
`type', None is returned.
271
Optional `strict' argument when false adds a bunch of commonly found,
272
but non-standard types.
276
return guess_extension(type, strict)
278
def add_type(type, ext, strict=True):
279
"""Add a mapping between a type and an extension.
281
When the extension is already known, the new
282
type will replace the old one. When the type
283
is already known the extension will be added
284
to the list of known extensions.
286
If strict is true, information will be added to
287
list of standard types, else to the list of non-standard
292
return add_type(type, ext, strict)
295
def init(files=None):
296
global guess_all_extensions, guess_extension, guess_type
297
global suffix_map, types_map, encodings_map, common_types
298
global add_type, inited
304
if os.path.isfile(file):
305
db.readfp(open(file))
306
encodings_map = db.encodings_map
307
suffix_map = db.suffix_map
308
types_map = db.types_map[True]
309
guess_all_extensions = db.guess_all_extensions
310
guess_extension = db.guess_extension
311
guess_type = db.guess_type
312
add_type = db.add_type
313
common_types = db.types_map[False]
316
def read_mime_types(file):
323
return db.types_map[True]
326
def _default_mime_types():
345
# Before adding new types, make sure they are either registered with IANA,
346
# at http://www.isi.edu/in-notes/iana/assignments/media-types
347
# or extensions, i.e. using the x- prefix
349
# If you add to these, please keep them sorted!
351
'.a' : 'application/octet-stream',
352
'.ai' : 'application/postscript',
353
'.aif' : 'audio/x-aiff',
354
'.aifc' : 'audio/x-aiff',
355
'.aiff' : 'audio/x-aiff',
356
'.au' : 'audio/basic',
357
'.avi' : 'video/x-msvideo',
358
'.bat' : 'text/plain',
359
'.bcpio' : 'application/x-bcpio',
360
'.bin' : 'application/octet-stream',
361
'.bmp' : 'image/x-ms-bmp',
364
'.cdf' : 'application/x-cdf',
365
'.cdf' : 'application/x-netcdf',
366
'.cpio' : 'application/x-cpio',
367
'.csh' : 'application/x-csh',
369
'.dll' : 'application/octet-stream',
370
'.doc' : 'application/msword',
371
'.dot' : 'application/msword',
372
'.dvi' : 'application/x-dvi',
373
'.eml' : 'message/rfc822',
374
'.eps' : 'application/postscript',
375
'.etx' : 'text/x-setext',
376
'.exe' : 'application/octet-stream',
377
'.gif' : 'image/gif',
378
'.gtar' : 'application/x-gtar',
380
'.hdf' : 'application/x-hdf',
381
'.htm' : 'text/html',
382
'.html' : 'text/html',
383
'.ief' : 'image/ief',
384
'.jpe' : 'image/jpeg',
385
'.jpeg' : 'image/jpeg',
386
'.jpg' : 'image/jpeg',
387
'.js' : 'application/x-javascript',
388
'.ksh' : 'text/plain',
389
'.latex' : 'application/x-latex',
390
'.m1v' : 'video/mpeg',
391
'.man' : 'application/x-troff-man',
392
'.me' : 'application/x-troff-me',
393
'.mht' : 'message/rfc822',
394
'.mhtml' : 'message/rfc822',
395
'.mif' : 'application/x-mif',
396
'.mov' : 'video/quicktime',
397
'.movie' : 'video/x-sgi-movie',
398
'.mp2' : 'audio/mpeg',
399
'.mp3' : 'audio/mpeg',
400
'.mp4' : 'video/mp4',
401
'.mpa' : 'video/mpeg',
402
'.mpe' : 'video/mpeg',
403
'.mpeg' : 'video/mpeg',
404
'.mpg' : 'video/mpeg',
405
'.ms' : 'application/x-troff-ms',
406
'.nc' : 'application/x-netcdf',
407
'.nws' : 'message/rfc822',
408
'.o' : 'application/octet-stream',
409
'.obj' : 'application/octet-stream',
410
'.oda' : 'application/oda',
411
'.p12' : 'application/x-pkcs12',
412
'.p7c' : 'application/pkcs7-mime',
413
'.pbm' : 'image/x-portable-bitmap',
414
'.pdf' : 'application/pdf',
415
'.pfx' : 'application/x-pkcs12',
416
'.pgm' : 'image/x-portable-graymap',
417
'.pl' : 'text/plain',
418
'.png' : 'image/png',
419
'.pnm' : 'image/x-portable-anymap',
420
'.pot' : 'application/vnd.ms-powerpoint',
421
'.ppa' : 'application/vnd.ms-powerpoint',
422
'.ppm' : 'image/x-portable-pixmap',
423
'.pps' : 'application/vnd.ms-powerpoint',
424
'.ppt' : 'application/vnd.ms-powerpoint',
425
'.ps' : 'application/postscript',
426
'.pwz' : 'application/vnd.ms-powerpoint',
427
'.py' : 'text/x-python',
428
'.pyc' : 'application/x-python-code',
429
'.pyo' : 'application/x-python-code',
430
'.qt' : 'video/quicktime',
431
'.ra' : 'audio/x-pn-realaudio',
432
'.ram' : 'application/x-pn-realaudio',
433
'.ras' : 'image/x-cmu-raster',
434
'.rdf' : 'application/xml',
435
'.rgb' : 'image/x-rgb',
436
'.roff' : 'application/x-troff',
437
'.rtx' : 'text/richtext',
438
'.sgm' : 'text/x-sgml',
439
'.sgml' : 'text/x-sgml',
440
'.sh' : 'application/x-sh',
441
'.shar' : 'application/x-shar',
442
'.snd' : 'audio/basic',
443
'.so' : 'application/octet-stream',
444
'.src' : 'application/x-wais-source',
445
'.sv4cpio': 'application/x-sv4cpio',
446
'.sv4crc' : 'application/x-sv4crc',
447
'.swf' : 'application/x-shockwave-flash',
448
'.t' : 'application/x-troff',
449
'.tar' : 'application/x-tar',
450
'.tcl' : 'application/x-tcl',
451
'.tex' : 'application/x-tex',
452
'.texi' : 'application/x-texinfo',
453
'.texinfo': 'application/x-texinfo',
454
'.tif' : 'image/tiff',
455
'.tiff' : 'image/tiff',
456
'.tr' : 'application/x-troff',
457
'.tsv' : 'text/tab-separated-values',
458
'.txt' : 'text/plain',
459
'.ustar' : 'application/x-ustar',
460
'.vcf' : 'text/x-vcard',
461
'.wav' : 'audio/x-wav',
462
'.wiz' : 'application/msword',
463
'.wsdl' : 'application/xml',
464
'.xbm' : 'image/x-xbitmap',
465
'.xlb' : 'application/vnd.ms-excel',
467
'.xls' : 'application/excel',
468
'.xls' : 'application/vnd.ms-excel',
470
'.xpdl' : 'application/xml',
471
'.xpm' : 'image/x-xpixmap',
472
'.xsl' : 'application/xml',
473
'.xwd' : 'image/x-xwindowdump',
474
'.zip' : 'application/zip',
477
# These are non-standard types, commonly found in the wild. They will
478
# only match if strict=0 flag is given to the API methods.
480
# Please sort these too
482
'.jpg' : 'image/jpg',
483
'.mid' : 'audio/midi',
484
'.midi': 'audio/midi',
485
'.pct' : 'image/pict',
486
'.pic' : 'image/pict',
487
'.pict': 'image/pict',
488
'.rtf' : 'application/rtf',
493
_default_mime_types()
496
if __name__ == '__main__':
501
Usage: mimetypes.py [options] type
504
--help / -h -- print this message and exit
505
--lenient / -l -- additionally search of some common, but non-standard
507
--extension / -e -- guess extension instead of type
509
More than one type argument may be given.
512
def usage(code, msg=''):
518
opts, args = getopt.getopt(sys.argv[1:], 'hle',
519
['help', 'lenient', 'extension'])
520
except getopt.error as msg:
525
for opt, arg in opts:
526
if opt in ('-h', '--help'):
528
elif opt in ('-l', '--lenient'):
530
elif opt in ('-e', '--extension'):
534
guess = guess_extension(gtype, strict)
535
if not guess: print("I don't know anything about type", gtype)
538
guess, encoding = guess_type(gtype, strict)
539
if not guess: print("I don't know anything about type", gtype)
540
else: print('type:', guess, 'encoding:', encoding)