1
# -*- test-case-name: twisted.python.test.test_zipstream -*-
2
# Copyright (c) 2001-2008 Twisted Matrix Laboratories.
3
# See LICENSE for details.
6
An incremental approach to unzipping files. This allows you to unzip a little
7
bit of a file at a time, which means you can report progress as a file unzips.
16
_fileHeaderSize = struct.calcsize(zipfile.structFileHeader)
18
class ChunkingZipFile(zipfile.ZipFile):
20
A ZipFile object which, with readfile(), also gives you access to a
21
filelike object for each entry.
24
def readfile(self, name):
26
Return file-like object for name.
28
if self.mode not in ("r", "a"):
29
raise RuntimeError('read() requires mode "r" or "a"')
32
"Attempt to read ZIP archive that was already closed")
33
zinfo = self.getinfo(name)
35
self.fp.seek(zinfo.header_offset, 0)
37
fheader = self.fp.read(_fileHeaderSize)
38
if fheader[0:4] != zipfile.stringFileHeader:
39
raise zipfile.BadZipfile("Bad magic number for file header")
41
fheader = struct.unpack(zipfile.structFileHeader, fheader)
42
fname = self.fp.read(fheader[zipfile._FH_FILENAME_LENGTH])
44
if fheader[zipfile._FH_EXTRA_FIELD_LENGTH]:
45
self.fp.read(fheader[zipfile._FH_EXTRA_FIELD_LENGTH])
47
if fname != zinfo.orig_filename:
48
raise zipfile.BadZipfile(
49
'File name in directory "%s" and header "%s" differ.' % (
50
zinfo.orig_filename, fname))
52
if zinfo.compress_type == zipfile.ZIP_STORED:
53
return ZipFileEntry(self, zinfo.compress_size)
54
elif zinfo.compress_type == zipfile.ZIP_DEFLATED:
55
return DeflatedZipFileEntry(self, zinfo.compress_size)
57
raise zipfile.BadZipfile(
58
"Unsupported compression method %d for file %s" %
59
(zinfo.compress_type, name))
63
class _FileEntry(object):
65
Abstract superclass of both compressed and uncompressed variants of
66
file-like objects within a zip archive.
68
@ivar chunkingZipFile: a chunking zip file.
69
@type chunkingZipFile: L{ChunkingZipFile}
71
@ivar length: The number of bytes within the zip file that represent this
72
file. (This is the size on disk, not the number of decompressed bytes
73
which will result from reading it.)
75
@ivar fp: the underlying file object (that contains pkzip data). Do not
76
touch this, please. It will quite likely move or go away.
78
@ivar closed: File-like 'closed' attribute; True before this file has been
82
@ivar finished: An older, broken synonym for 'closed'. Do not touch this,
84
@type finished: L{int}
86
def __init__(self, chunkingZipFile, length):
88
Create a L{_FileEntry} from a L{ChunkingZipFile}.
90
self.chunkingZipFile = chunkingZipFile
91
self.fp = self.chunkingZipFile.fp
99
Returns false because zip files should not be ttys
106
Close self (file-like object)
118
for byte in iter(lambda : self.read(1), ""):
127
Implement next as file does (like readline, except raises StopIteration
130
nextline = self.readline()
133
raise StopIteration()
138
Returns a list of all the lines
143
def xreadlines(self):
145
Returns an iterator (so self)
152
Returns an iterator (so self)
158
class ZipFileEntry(_FileEntry):
160
File-like object used to read an uncompressed entry in a ZipFile
163
def __init__(self, chunkingZipFile, length):
164
_FileEntry.__init__(self, chunkingZipFile, length)
169
return self.readBytes
172
def read(self, n=None):
174
n = self.length - self.readBytes
175
if n == 0 or self.finished:
177
data = self.chunkingZipFile.fp.read(
178
min(n, self.length - self.readBytes))
179
self.readBytes += len(data)
180
if self.readBytes == self.length or len(data) < n:
186
class DeflatedZipFileEntry(_FileEntry):
188
File-like object used to read a deflated entry in a ZipFile
191
def __init__(self, chunkingZipFile, length):
192
_FileEntry.__init__(self, chunkingZipFile, length)
193
self.returnedBytes = 0
195
self.decomp = zlib.decompressobj(-15)
200
return self.returnedBytes
203
def read(self, n=None):
207
result = [self.buffer,]
209
self.decomp.decompress(
210
self.chunkingZipFile.fp.read(
211
self.length - self.readBytes)))
212
result.append(self.decomp.decompress("Z"))
213
result.append(self.decomp.flush())
216
result = "".join(result)
217
self.returnedBytes += len(result)
220
while len(self.buffer) < n:
221
data = self.chunkingZipFile.fp.read(
222
min(n, 1024, self.length - self.readBytes))
223
self.readBytes += len(data)
225
result = (self.buffer
226
+ self.decomp.decompress("Z")
227
+ self.decomp.flush())
230
self.returnedBytes += len(result)
233
self.buffer += self.decomp.decompress(data)
234
result = self.buffer[:n]
235
self.buffer = self.buffer[n:]
236
self.returnedBytes += len(result)
241
def unzip(filename, directory=".", overwrite=0):
245
@param filename: the name of the zip file
246
@param directory: the directory into which the files will be
248
@param overwrite: if on, overwrite files when they exist. You can
249
still get an error if you try to create a directory over a file
250
with the same name or vice-versa.
252
for i in unzipIter(filename, directory, overwrite):
257
def unzipIter(filename, directory='.', overwrite=0):
259
Return a generator for the zipfile. This implementation will yield
262
The value it yields is the number of files left to unzip.
264
zf = zipfile.ZipFile(filename, 'r')
265
names = zf.namelist()
266
if not os.path.exists(directory):
267
os.makedirs(directory)
268
remaining = len(zf.namelist())
271
isdir = zf.getinfo(entry).external_attr & DIR_BIT
272
f = os.path.join(directory, entry)
274
# overwrite flag only applies to files
275
if not os.path.exists(f):
278
# create the directory the file will be in first,
279
# since we can't guarantee it exists
280
fdir = os.path.split(f)[0]
281
if not os.path.exists(fdir):
283
if overwrite or not os.path.exists(f):
284
outfile = file(f, 'wb')
285
outfile.write(zf.read(entry))
290
def countZipFileChunks(filename, chunksize):
292
Predict the number of chunks that will be extracted from the entire
293
zipfile, given chunksize blocks.
296
zf = ChunkingZipFile(filename)
297
for info in zf.infolist():
298
totalchunks += countFileChunks(info, chunksize)
302
def countFileChunks(zipinfo, chunksize):
304
Count the number of chunks that will result from the given L{ZipInfo}.
306
@param zipinfo: a L{zipfile.ZipInfo} instance describing an entry in a zip
307
archive to be counted.
309
@return: the number of chunks present in the zip file. (Even an empty file
310
counts as one chunk.)
313
count, extra = divmod(zipinfo.file_size, chunksize)
319
def countZipFileEntries(filename):
321
Count the number of entries in a zip archive. (Don't use this function.)
323
@param filename: The filename of a zip archive.
324
@type filename: L{str}
326
warnings.warn("countZipFileEntries is deprecated.",
327
DeprecationWarning, 2)
328
zf = zipfile.ZipFile(filename)
329
return len(zf.namelist())
332
def unzipIterChunky(filename, directory='.', overwrite=0,
335
Return a generator for the zipfile. This implementation will yield after
336
every chunksize uncompressed bytes, or at the end of a file, whichever
339
The value it yields is the number of chunks left to unzip.
341
czf = ChunkingZipFile(filename, 'r')
342
if not os.path.exists(directory):
343
os.makedirs(directory)
344
remaining = countZipFileChunks(filename, chunksize)
345
names = czf.namelist()
346
infos = czf.infolist()
348
for entry, info in zip(names, infos):
349
isdir = info.external_attr & DIR_BIT
350
f = os.path.join(directory, entry)
352
# overwrite flag only applies to files
353
if not os.path.exists(f):
358
# create the directory the file will be in first,
359
# since we can't guarantee it exists
360
fdir = os.path.split(f)[0]
361
if not os.path.exists(fdir):
363
if overwrite or not os.path.exists(f):
364
outfile = file(f, 'wb')
365
fp = czf.readfile(entry)
366
if info.file_size == 0:
369
while fp.tell() < info.file_size:
370
hunk = fp.read(chunksize)
376
remaining -= countFileChunks(info, chunksize)