1
"Read and write ZIP files."
3
import struct, os, time
7
import zlib # We may need its compression method
11
__all__ = ["BadZipfile", "error", "ZIP_STORED", "ZIP_DEFLATED", "is_zipfile",
12
"ZipInfo", "ZipFile", "PyZipFile"]
14
class BadZipfile(Exception):
16
error = BadZipfile # The exception raised by this module
18
# constants for Zip file compression methods
21
# Other ZIP compression methods not supported
23
# Here are some struct module formats for reading headers
24
structEndArchive = "<4s4H2lH" # 9 items, end of archive, 22 bytes
25
stringEndArchive = "PK\005\006" # magic number for end of archive record
26
structCentralDir = "<4s4B4HlLL5HLl"# 19 items, central directory, 46 bytes
27
stringCentralDir = "PK\001\002" # magic number for central directory
28
structFileHeader = "<4s2B4HlLL2H" # 12 items, file header record, 30 bytes
29
stringFileHeader = "PK\003\004" # magic number for file header
31
# indexes of entries in the central directory structure
33
_CD_CREATE_VERSION = 1
35
_CD_EXTRACT_VERSION = 3
36
_CD_EXTRACT_SYSTEM = 4 # is this meaningful?
42
_CD_COMPRESSED_SIZE = 10
43
_CD_UNCOMPRESSED_SIZE = 11
44
_CD_FILENAME_LENGTH = 12
45
_CD_EXTRA_FIELD_LENGTH = 13
46
_CD_COMMENT_LENGTH = 14
47
_CD_DISK_NUMBER_START = 15
48
_CD_INTERNAL_FILE_ATTRIBUTES = 16
49
_CD_EXTERNAL_FILE_ATTRIBUTES = 17
50
_CD_LOCAL_HEADER_OFFSET = 18
52
# indexes of entries in the local file header structure
54
_FH_EXTRACT_VERSION = 1
55
_FH_EXTRACT_SYSTEM = 2 # is this meaningful?
56
_FH_GENERAL_PURPOSE_FLAG_BITS = 3
57
_FH_COMPRESSION_METHOD = 4
61
_FH_COMPRESSED_SIZE = 8
62
_FH_UNCOMPRESSED_SIZE = 9
63
_FH_FILENAME_LENGTH = 10
64
_FH_EXTRA_FIELD_LENGTH = 11
66
def is_zipfile(filename):
67
"""Quickly see if file is a ZIP file by checking the magic number."""
69
fpin = open(filename, "rb")
70
endrec = _EndRecData(fpin)
73
return True # file has correct magic number
78
def _EndRecData(fpin):
79
"""Return data from the "End of Central Directory" record, or None.
81
The data is a list of the nine items in the ZIP "End of central dir"
82
record followed by a tenth item, the file seek offset of this record."""
83
fpin.seek(-22, 2) # Assume no archive comment.
84
filesize = fpin.tell() + 22 # Get file size
86
if data[0:4] == stringEndArchive and data[-2:] == "\000\000":
87
endrec = struct.unpack(structEndArchive, data)
89
endrec.append("") # Append the archive comment
90
endrec.append(filesize - 22) # Append the record start offset
92
# Search the last END_BLOCK bytes of the file for the record signature.
93
# The comment is appended to the ZIP file and has a 16 bit length.
94
# So the comment may be up to 64K long. We limit the search for the
95
# signature to a few Kbytes at the end of the file for efficiency.
96
# also, the signature must not appear in the comment.
97
END_BLOCK = min(filesize, 1024 * 4)
98
fpin.seek(filesize - END_BLOCK, 0)
100
start = data.rfind(stringEndArchive)
101
if start >= 0: # Correct signature string was found
102
endrec = struct.unpack(structEndArchive, data[start:start+22])
103
endrec = list(endrec)
104
comment = data[start+22:]
105
if endrec[7] == len(comment): # Comment length checks out
106
# Append the archive comment and start offset
107
endrec.append(comment)
108
endrec.append(filesize - END_BLOCK + start)
110
return # Error, return None
114
"""Class with attributes describing each file in the ZIP archive."""
116
def __init__(self, filename="NoName", date_time=(1980,1,1,0,0,0)):
117
self.orig_filename = filename # Original file name in archive
118
# Terminate the file name at the first null byte. Null bytes in file
119
# names are used as tricks by viruses in archives.
120
null_byte = filename.find(chr(0))
122
filename = filename[0:null_byte]
123
# This is used to ensure paths in generated ZIP files always use
124
# forward slashes as the directory separator, as required by the
125
# ZIP format specification.
127
filename = filename.replace(os.sep, "/")
128
self.filename = filename # Normalized file name
129
self.date_time = date_time # year, month, day, hour, min, sec
131
self.compress_type = ZIP_STORED # Type of compression for the file
132
self.comment = "" # Comment for each file
133
self.extra = "" # ZIP extra data
134
self.create_system = 0 # System which created ZIP archive
135
self.create_version = 20 # Version which created ZIP archive
136
self.extract_version = 20 # Version needed to extract archive
137
self.reserved = 0 # Must be zero
138
self.flag_bits = 0 # ZIP flag bits
139
self.volume = 0 # Volume number of file header
140
self.internal_attr = 0 # Internal attributes
141
self.external_attr = 0 # External file attributes
142
# Other attributes are set by class ZipFile:
143
# header_offset Byte offset to the file header
144
# file_offset Byte offset to the start of the file data
145
# CRC CRC-32 of the uncompressed file
146
# compress_size Size of the compressed file
147
# file_size Size of the uncompressed file
149
def FileHeader(self):
150
"""Return the per-file header as a string."""
152
dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
153
dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
154
if self.flag_bits & 0x08:
155
# Set these to zero because we write them after the file data
156
CRC = compress_size = file_size = 0
159
compress_size = self.compress_size
160
file_size = self.file_size
161
header = struct.pack(structFileHeader, stringFileHeader,
162
self.extract_version, self.reserved, self.flag_bits,
163
self.compress_type, dostime, dosdate, CRC,
164
compress_size, file_size,
165
len(self.filename), len(self.extra))
166
return header + self.filename + self.extra
170
""" Class with methods to open, read, write, close, list zip files.
172
z = ZipFile(file, mode="r", compression=ZIP_STORED)
174
file: Either the path to the file, or a file-like object.
175
If it is a path, the file will be opened and closed by ZipFile.
176
mode: The mode can be either read "r", write "w" or append "a".
177
compression: ZIP_STORED (no compression) or ZIP_DEFLATED (requires zlib).
180
fp = None # Set here since __del__ checks it
182
def __init__(self, file, mode="r", compression=ZIP_STORED):
183
"""Open the ZIP file with mode read "r", write "w" or append "a"."""
184
if compression == ZIP_STORED:
186
elif compression == ZIP_DEFLATED:
189
"Compression requires the (missing) zlib module"
191
raise RuntimeError, "That compression method is not supported"
192
self.debug = 0 # Level of printing: 0 through 3
193
self.NameToInfo = {} # Find file info given name
194
self.filelist = [] # List of ZipInfo instances for archive
195
self.compression = compression # Method of compression
196
self.mode = key = mode.replace('b', '')[0]
198
# Check if we were passed a file-like object
199
if isinstance(file, basestring):
202
modeDict = {'r' : 'rb', 'w': 'wb', 'a' : 'r+b'}
203
self.fp = open(file, modeDict[mode])
207
self.filename = getattr(file, 'name', None)
214
try: # See if file is a zip file
215
self._RealGetContents()
216
# seek to start of directory and overwrite
217
self.fp.seek(self.start_dir, 0)
218
except BadZipfile: # file is not a zip file, just append
221
if not self._filePassed:
224
raise RuntimeError, 'Mode must be "r", "w" or "a"'
226
def _GetContents(self):
227
"""Read the directory, making sure we close the file if the format
230
self._RealGetContents()
232
if not self._filePassed:
237
def _RealGetContents(self):
238
"""Read in the table of contents for the ZIP file."""
240
endrec = _EndRecData(fp)
242
raise BadZipfile, "File is not a zip file"
245
size_cd = endrec[5] # bytes in central directory
246
offset_cd = endrec[6] # offset of central directory
247
self.comment = endrec[8] # archive comment
248
# endrec[9] is the offset of the "End of Central Dir" record
249
x = endrec[9] - size_cd
250
# "concat" is zero, unless zip was concatenated to another file
251
concat = x - offset_cd
253
print "given, inferred, offset", offset_cd, x, concat
254
# self.start_dir: Position of start of central directory
255
self.start_dir = offset_cd + concat
256
fp.seek(self.start_dir, 0)
258
while total < size_cd:
259
centdir = fp.read(46)
261
if centdir[0:4] != stringCentralDir:
262
raise BadZipfile, "Bad magic number for central directory"
263
centdir = struct.unpack(structCentralDir, centdir)
266
filename = fp.read(centdir[_CD_FILENAME_LENGTH])
267
# Create ZipInfo instance to store file information
268
x = ZipInfo(filename)
269
x.extra = fp.read(centdir[_CD_EXTRA_FIELD_LENGTH])
270
x.comment = fp.read(centdir[_CD_COMMENT_LENGTH])
271
total = (total + centdir[_CD_FILENAME_LENGTH]
272
+ centdir[_CD_EXTRA_FIELD_LENGTH]
273
+ centdir[_CD_COMMENT_LENGTH])
274
x.header_offset = centdir[_CD_LOCAL_HEADER_OFFSET] + concat
275
# file_offset must be computed below...
276
(x.create_version, x.create_system, x.extract_version, x.reserved,
277
x.flag_bits, x.compress_type, t, d,
278
x.CRC, x.compress_size, x.file_size) = centdir[1:12]
279
x.volume, x.internal_attr, x.external_attr = centdir[15:18]
280
# Convert date/time code to (year, month, day, hour, min, sec)
281
x.date_time = ( (d>>9)+1980, (d>>5)&0xF, d&0x1F,
282
t>>11, (t>>5)&0x3F, (t&0x1F) * 2 )
283
self.filelist.append(x)
284
self.NameToInfo[x.filename] = x
287
for data in self.filelist:
288
fp.seek(data.header_offset, 0)
289
fheader = fp.read(30)
290
if fheader[0:4] != stringFileHeader:
291
raise BadZipfile, "Bad magic number for file header"
292
fheader = struct.unpack(structFileHeader, fheader)
293
# file_offset is computed here, since the extra field for
294
# the central directory and for the local file header
295
# refer to different fields, and they can have different
297
data.file_offset = (data.header_offset + 30
298
+ fheader[_FH_FILENAME_LENGTH]
299
+ fheader[_FH_EXTRA_FIELD_LENGTH])
300
fname = fp.read(fheader[_FH_FILENAME_LENGTH])
301
if fname != data.orig_filename:
302
raise RuntimeError, \
303
'File name in directory "%s" and header "%s" differ.' % (
304
data.orig_filename, fname)
307
"""Return a list of file names in the archive."""
309
for data in self.filelist:
310
l.append(data.filename)
314
"""Return a list of class ZipInfo instances for files in the
319
"""Print a table of contents for the zip file."""
320
print "%-46s %19s %12s" % ("File Name", "Modified ", "Size")
321
for zinfo in self.filelist:
322
date = "%d-%02d-%02d %02d:%02d:%02d" % zinfo.date_time
323
print "%-46s %s %12d" % (zinfo.filename, date, zinfo.file_size)
326
"""Read all the files and check the CRC."""
327
for zinfo in self.filelist:
329
self.read(zinfo.filename) # Check CRC-32
331
return zinfo.filename
333
def getinfo(self, name):
334
"""Return the instance of ZipInfo given 'name'."""
335
return self.NameToInfo[name]
337
def read(self, name):
338
"""Return file bytes (as a string) for name."""
339
if self.mode not in ("r", "a"):
340
raise RuntimeError, 'read() requires mode "r" or "a"'
342
raise RuntimeError, \
343
"Attempt to read ZIP archive that was already closed"
344
zinfo = self.getinfo(name)
345
filepos = self.fp.tell()
346
self.fp.seek(zinfo.file_offset, 0)
347
bytes = self.fp.read(zinfo.compress_size)
348
self.fp.seek(filepos, 0)
349
if zinfo.compress_type == ZIP_STORED:
351
elif zinfo.compress_type == ZIP_DEFLATED:
353
raise RuntimeError, \
354
"De-compression requires the (missing) zlib module"
355
# zlib compress/decompress code by Jeremy Hylton of CNRI
356
dc = zlib.decompressobj(-15)
357
bytes = dc.decompress(bytes)
358
# need to feed in unused pad byte so that zlib won't choke
359
ex = dc.decompress('Z') + dc.flush()
364
"Unsupported compression method %d for file %s" % \
365
(zinfo.compress_type, name)
366
crc = binascii.crc32(bytes)
368
raise BadZipfile, "Bad CRC-32 for file %s" % name
371
def _writecheck(self, zinfo):
372
"""Check for errors before writing a file to the archive."""
373
if zinfo.filename in self.NameToInfo:
374
if self.debug: # Warning for duplicate names
375
print "Duplicate name:", zinfo.filename
376
if self.mode not in ("w", "a"):
377
raise RuntimeError, 'write() requires mode "w" or "a"'
379
raise RuntimeError, \
380
"Attempt to write ZIP archive that was already closed"
381
if zinfo.compress_type == ZIP_DEFLATED and not zlib:
382
raise RuntimeError, \
383
"Compression requires the (missing) zlib module"
384
if zinfo.compress_type not in (ZIP_STORED, ZIP_DEFLATED):
385
raise RuntimeError, \
386
"That compression method is not supported"
388
def write(self, filename, arcname=None, compress_type=None):
389
"""Put the bytes from filename into the archive under the name
391
st = os.stat(filename)
392
mtime = time.localtime(st.st_mtime)
393
date_time = mtime[0:6]
394
# Create ZipInfo instance to store file information
396
zinfo = ZipInfo(filename, date_time)
398
zinfo = ZipInfo(arcname, date_time)
399
zinfo.external_attr = (st[0] & 0xFFFF) << 16L # Unix attributes
400
if compress_type is None:
401
zinfo.compress_type = self.compression
403
zinfo.compress_type = compress_type
404
self._writecheck(zinfo)
405
fp = open(filename, "rb")
406
zinfo.flag_bits = 0x00
407
zinfo.header_offset = self.fp.tell() # Start of header bytes
408
# Must overwrite CRC and sizes with correct data later
410
zinfo.compress_size = compress_size = 0
411
zinfo.file_size = file_size = 0
412
self.fp.write(zinfo.FileHeader())
413
zinfo.file_offset = self.fp.tell() # Start of file bytes
414
if zinfo.compress_type == ZIP_DEFLATED:
415
cmpr = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
420
buf = fp.read(1024 * 8)
423
file_size = file_size + len(buf)
424
CRC = binascii.crc32(buf, CRC)
426
buf = cmpr.compress(buf)
427
compress_size = compress_size + len(buf)
432
compress_size = compress_size + len(buf)
434
zinfo.compress_size = compress_size
436
zinfo.compress_size = file_size
438
zinfo.file_size = file_size
439
# Seek backwards and write CRC and file sizes
440
position = self.fp.tell() # Preserve current position in file
441
self.fp.seek(zinfo.header_offset + 14, 0)
442
self.fp.write(struct.pack("<lLL", zinfo.CRC, zinfo.compress_size,
444
self.fp.seek(position, 0)
445
self.filelist.append(zinfo)
446
self.NameToInfo[zinfo.filename] = zinfo
448
def writestr(self, zinfo_or_arcname, bytes):
449
"""Write a file into the archive. The contents is the string
450
'bytes'. 'zinfo_or_arcname' is either a ZipInfo instance or
451
the name of the file in the archive."""
452
if not isinstance(zinfo_or_arcname, ZipInfo):
453
zinfo = ZipInfo(filename=zinfo_or_arcname,
454
date_time=time.localtime(time.time()))
455
zinfo.compress_type = self.compression
457
zinfo = zinfo_or_arcname
458
self._writecheck(zinfo)
459
zinfo.file_size = len(bytes) # Uncompressed size
460
zinfo.CRC = binascii.crc32(bytes) # CRC-32 checksum
461
if zinfo.compress_type == ZIP_DEFLATED:
462
co = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
464
bytes = co.compress(bytes) + co.flush()
465
zinfo.compress_size = len(bytes) # Compressed size
467
zinfo.compress_size = zinfo.file_size
468
zinfo.header_offset = self.fp.tell() # Start of header bytes
469
self.fp.write(zinfo.FileHeader())
470
zinfo.file_offset = self.fp.tell() # Start of file bytes
472
if zinfo.flag_bits & 0x08:
473
# Write CRC and file sizes after the file data
474
self.fp.write(struct.pack("<lLL", zinfo.CRC, zinfo.compress_size,
476
self.filelist.append(zinfo)
477
self.NameToInfo[zinfo.filename] = zinfo
480
"""Call the "close()" method in case the user forgot."""
484
"""Close the file, and for mode "w" and "a" write the ending
488
if self.mode in ("w", "a"): # write ending records
490
pos1 = self.fp.tell()
491
for zinfo in self.filelist: # write central directory
494
dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
495
dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
496
centdir = struct.pack(structCentralDir,
497
stringCentralDir, zinfo.create_version,
498
zinfo.create_system, zinfo.extract_version, zinfo.reserved,
499
zinfo.flag_bits, zinfo.compress_type, dostime, dosdate,
500
zinfo.CRC, zinfo.compress_size, zinfo.file_size,
501
len(zinfo.filename), len(zinfo.extra), len(zinfo.comment),
502
0, zinfo.internal_attr, zinfo.external_attr,
504
self.fp.write(centdir)
505
self.fp.write(zinfo.filename)
506
self.fp.write(zinfo.extra)
507
self.fp.write(zinfo.comment)
508
pos2 = self.fp.tell()
509
# Write end-of-zip-archive record
510
endrec = struct.pack(structEndArchive, stringEndArchive,
511
0, 0, count, count, pos2 - pos1, pos1, 0)
512
self.fp.write(endrec)
514
if not self._filePassed:
519
class PyZipFile(ZipFile):
520
"""Class to create ZIP archives with Python library files and packages."""
522
def writepy(self, pathname, basename = ""):
523
"""Add all files from "pathname" to the ZIP archive.
525
If pathname is a package directory, search the directory and
526
all package subdirectories recursively for all *.py and enter
527
the modules into the archive. If pathname is a plain
528
directory, listdir *.py and enter all modules. Else, pathname
529
must be a Python *.py file and the module will be put into the
530
archive. Added modules are always module.pyo or module.pyc.
531
This method will compile the module.py into module.pyc if
534
dir, name = os.path.split(pathname)
535
if os.path.isdir(pathname):
536
initname = os.path.join(pathname, "__init__.py")
537
if os.path.isfile(initname):
538
# This is a package directory, add it
540
basename = "%s/%s" % (basename, name)
544
print "Adding package in", pathname, "as", basename
545
fname, arcname = self._get_codename(initname[0:-3], basename)
547
print "Adding", arcname
548
self.write(fname, arcname)
549
dirlist = os.listdir(pathname)
550
dirlist.remove("__init__.py")
551
# Add all *.py files and package subdirectories
552
for filename in dirlist:
553
path = os.path.join(pathname, filename)
554
root, ext = os.path.splitext(filename)
555
if os.path.isdir(path):
556
if os.path.isfile(os.path.join(path, "__init__.py")):
557
# This is a package directory, add it
558
self.writepy(path, basename) # Recursive call
560
fname, arcname = self._get_codename(path[0:-3],
563
print "Adding", arcname
564
self.write(fname, arcname)
566
# This is NOT a package directory, add its files at top level
568
print "Adding files from directory", pathname
569
for filename in os.listdir(pathname):
570
path = os.path.join(pathname, filename)
571
root, ext = os.path.splitext(filename)
573
fname, arcname = self._get_codename(path[0:-3],
576
print "Adding", arcname
577
self.write(fname, arcname)
579
if pathname[-3:] != ".py":
580
raise RuntimeError, \
581
'Files added with writepy() must end with ".py"'
582
fname, arcname = self._get_codename(pathname[0:-3], basename)
584
print "Adding file", arcname
585
self.write(fname, arcname)
587
def _get_codename(self, pathname, basename):
588
"""Return (filename, archivename) for the path.
590
Given a module name path, return the correct file path and
591
archive name, compiling if necessary. For example, given
592
/python/lib/string, return (/python/lib/string.pyc, string).
594
file_py = pathname + ".py"
595
file_pyc = pathname + ".pyc"
596
file_pyo = pathname + ".pyo"
597
if os.path.isfile(file_pyo) and \
598
os.stat(file_pyo).st_mtime >= os.stat(file_py).st_mtime:
599
fname = file_pyo # Use .pyo file
600
elif not os.path.isfile(file_pyc) or \
601
os.stat(file_pyc).st_mtime < os.stat(file_py).st_mtime:
604
print "Compiling", file_py
606
py_compile.compile(file_py, file_pyc, None, True)
607
except py_compile.PyCompileError,err:
612
archivename = os.path.split(fname)[1]
614
archivename = "%s/%s" % (basename, archivename)
615
return (fname, archivename)