1
"""An extremely asynch approach to unzipping files. This allows you
2
to unzip a little bit of a file at a time, which means it can
3
integrate nicely with a reactor.
7
from __future__ import generators
15
class ChunkingZipFile(zipfile.ZipFile):
16
"""A ZipFile object which, with readfile(), also gives you access
17
to a filelike object for each entry.
19
def readfile(self, name):
20
"""Return file-like object for name."""
21
if self.mode not in ("r", "a"):
22
raise RuntimeError, 'read() requires mode "r" or "a"'
25
"Attempt to read ZIP archive that was already closed"
26
zinfo = self.getinfo(name)
28
self.fp.seek(zinfo.header_offset, 0)
30
# Skip the file header:
31
fheader = self.fp.read(30)
32
if fheader[0:4] != zipfile.stringFileHeader:
33
raise zipfile.BadZipfile, "Bad magic number for file header"
35
fheader = struct.unpack(zipfile.structFileHeader, fheader)
36
fname = self.fp.read(fheader[zipfile._FH_FILENAME_LENGTH])
37
if fheader[zipfile._FH_EXTRA_FIELD_LENGTH]:
38
self.fp.read(fheader[zipfile._FH_EXTRA_FIELD_LENGTH])
40
if fname != zinfo.orig_filename:
41
raise zipfile.BadZipfile, \
42
'File name in directory "%s" and header "%s" differ.' % (
43
zinfo.orig_filename, fname)
45
if zinfo.compress_type == zipfile.ZIP_STORED:
46
return ZipFileEntry(self.fp, zinfo.compress_size)
47
elif zinfo.compress_type == zipfile.ZIP_DEFLATED:
50
"De-compression requires the (missing) zlib module"
51
return DeflatedZipFileEntry(self.fp, zinfo.compress_size)
53
raise zipfile.BadZipfile, \
54
"Unsupported compression method %d for file %s" % \
55
(zinfo.compress_type, name)
58
"""Return file bytes (as a string) for name."""
59
f = self.readfile(name)
60
zinfo = self.getinfo(name)
62
crc = binascii.crc32(bytes)
64
raise zipfile.BadZipfile, "Bad CRC-32 for file %s" % name
69
"""File-like object used to read an uncompressed entry in a ZipFile"""
71
def __init__(self, fp, length):
80
def read(self, n=None):
82
n = self.length - self.readBytes
83
if n == 0 or self.finished:
86
data = self.fp.read(min(n, self.length - self.readBytes))
87
self.readBytes += len(data)
88
if self.readBytes == self.length or len(data) < n:
97
class DeflatedZipFileEntry:
98
"""File-like object used to read a deflated entry in a ZipFile"""
100
def __init__(self, fp, length):
102
self.returnedBytes = 0
104
self.decomp = zlib.decompressobj(-15)
110
return self.returnedBytes
112
def read(self, n=None):
116
result = [self.buffer,]
117
result.append(self.decomp.decompress(self.fp.read(self.length - self.readBytes)))
118
result.append(self.decomp.decompress("Z"))
119
result.append(self.decomp.flush())
122
result = "".join(result)
123
self.returnedBytes += len(result)
126
while len(self.buffer) < n:
127
data = self.fp.read(min(n, 1024, self.length - self.readBytes))
128
self.readBytes += len(data)
130
result = self.buffer + self.decomp.decompress("Z") + self.decomp.flush()
133
self.returnedBytes += len(result)
136
self.buffer += self.decomp.decompress(data)
137
result = self.buffer[:n]
138
self.buffer = self.buffer[n:]
139
self.returnedBytes += len(result)
147
def unzip(filename, directory=".", overwrite=0):
149
@param filename: the name of the zip file
150
@param directory: the directory into which the files will be
152
@param overwrite: if on, overwrite files when they exist. You can
153
still get an error if you try to create a directory over a file
154
with the same name or vice-versa.
156
for i in unzipIter(filename, directory, overwrite):
160
def unzipIter(filename, directory='.', overwrite=0):
161
"""Return a generator for the zipfile. This implementation will
162
yield after every file.
164
The value it yields is the number of files left to unzip.
166
zf=zipfile.ZipFile(filename, 'r')
168
if not os.path.exists(directory): os.makedirs(directory)
169
remaining=countZipFileEntries(filename)
171
remaining=remaining - 1
172
isdir=zf.getinfo(entry).external_attr & DIR_BIT
173
f=os.path.join(directory, entry)
175
# overwrite flag only applies to files
176
if not os.path.exists(f): os.makedirs(f)
178
# create the directory the file will be in first,
179
# since we can't guarantee it exists
180
fdir=os.path.split(f)[0]
181
if not os.path.exists(fdir):
183
if overwrite or not os.path.exists(f):
184
outfile=file(f, 'wb')
185
outfile.write(zf.read(entry))
189
def countZipFileChunks(filename, chunksize):
190
"""Predict the number of chunks that will be extracted from the
191
entire zipfile, given chunksize blocks.
194
zf=ChunkingZipFile(filename)
195
for info in zf.infolist():
196
totalchunks=totalchunks+countFileChunks(info, chunksize)
199
def countFileChunks(zipinfo, chunksize):
200
size=zipinfo.file_size
202
if size%chunksize > 0:
204
# each file counts as at least one chunk
207
def countZipFileEntries(filename):
208
zf=zipfile.ZipFile(filename)
209
return len(zf.namelist())
211
def unzipIterChunky(filename, directory='.', overwrite=0,
213
"""Return a generator for the zipfile. This implementation will
214
yield after every chunksize uncompressed bytes, or at the end of a
215
file, whichever comes first.
217
The value it yields is the number of chunks left to unzip.
219
czf=ChunkingZipFile(filename, 'r')
220
if not os.path.exists(directory): os.makedirs(directory)
221
remaining=countZipFileChunks(filename, chunksize)
225
for entry, info in zip(names, infos):
226
isdir=info.external_attr & DIR_BIT
227
f=os.path.join(directory, entry)
229
# overwrite flag only applies to files
230
if not os.path.exists(f): os.makedirs(f)
231
remaining=remaining-1
235
# create the directory the file will be in first,
236
# since we can't guarantee it exists
237
fdir=os.path.split(f)[0]
238
if not os.path.exists(fdir):
240
if overwrite or not os.path.exists(f):
241
outfile=file(f, 'wb')
242
fp=czf.readfile(entry)
243
if info.file_size==0:
244
remaining=remaining-1
251
while ftell() < size:
252
hunk=fread(chunksize)
254
remaining=remaining-1
259
remaining=remaining-countFileChunks(info, chunksize)