1
by bescoto
Initial checkin |
1 |
#!/usr/bin/env python
|
335
by loafman
patch #6675: Add modelines |
2 |
# -*- Mode:Python; indent-tabs-mode:nil; tab-width:4 -*-
|
1
by bescoto
Initial checkin |
3 |
#-------------------------------------------------------------------
|
4 |
# tarfile.py
|
|
5 |
#
|
|
6 |
# Module for reading and writing .tar and tar.gz files.
|
|
7 |
#
|
|
8 |
# Needs at least Python version 2.2.
|
|
9 |
#
|
|
10 |
# Please consult the html documentation in this distribution
|
|
11 |
# for further details on how to use tarfile.
|
|
12 |
#
|
|
13 |
#-------------------------------------------------------------------
|
|
59
by bescoto
Small changes for 0.4.1 and python 2.3 |
14 |
# Copyright (C) 2002 Lars Gustabel <lars@gustaebel.de>
|
1
by bescoto
Initial checkin |
15 |
# All rights reserved.
|
16 |
#
|
|
17 |
# Permission is hereby granted, free of charge, to any person
|
|
18 |
# obtaining a copy of this software and associated documentation
|
|
19 |
# files (the "Software"), to deal in the Software without
|
|
20 |
# restriction, including without limitation the rights to use,
|
|
21 |
# copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
22 |
# copies of the Software, and to permit persons to whom the
|
|
23 |
# Software is furnished to do so, subject to the following
|
|
24 |
# conditions:
|
|
25 |
#
|
|
26 |
# The above copyright notice and this permission notice shall be
|
|
27 |
# included in all copies or substantial portions of the Software.
|
|
28 |
#
|
|
29 |
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
|
30 |
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
|
31 |
# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
|
32 |
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
|
33 |
# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
|
34 |
# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
35 |
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
|
36 |
# OTHER DEALINGS IN THE SOFTWARE.
|
|
37 |
#
|
|
38 |
"""Read from and write to tar format archives.
|
|
39 |
"""
|
|
40 |
||
335
by loafman
patch #6675: Add modelines |
41 |
__version__ = "$Revision: 1.8 $" |
1
by bescoto
Initial checkin |
42 |
# $Source: /sources/duplicity/duplicity/duplicity/tarfile.py,v $
|
43 |
||
44 |
version = "0.4.9" |
|
59
by bescoto
Small changes for 0.4.1 and python 2.3 |
45 |
__author__ = "Lars Gustabel (lars@gustaebel.de)" |
335
by loafman
patch #6675: Add modelines |
46 |
__date__ = "$Date: 2008/11/16 18:48:15 $" |
47 |
__cvsid__ = "$Id: tarfile.py,v 1.8 2008/11/16 18:48:15 loafman Exp $" |
|
1
by bescoto
Initial checkin |
48 |
__credits__ = "Gustavo Niemeyer for his support, " \ |
49 |
"Detlef Lannert for some early contributions"
|
|
50 |
||
51 |
#---------
|
|
52 |
# Imports
|
|
53 |
#---------
|
|
54 |
import sys |
|
55 |
import os |
|
56 |
import __builtin__ |
|
57 |
import shutil |
|
58 |
import stat |
|
59 |
import errno |
|
60 |
import time |
|
61 |
||
62 |
try: |
|
63 |
import grp, pwd |
|
64 |
except ImportError: |
|
65 |
grp = pwd = None |
|
54
by bescoto
Cache pwd and group files |
66 |
# These are used later to cache user and group names and ids
|
67 |
gname_dict = uname_dict = uid_dict = gid_dict = None |
|
1
by bescoto
Initial checkin |
68 |
|
69 |
# We won't need this anymore in Python 2.3
|
|
70 |
#
|
|
71 |
# We import the _tarfile extension, that contains
|
|
72 |
# some useful functions to handle devices and symlinks.
|
|
73 |
# We inject them into os module, as if we were under 2.3.
|
|
74 |
#
|
|
75 |
try: |
|
76 |
import _tarfile |
|
77 |
if _tarfile.mknod is None: |
|
78 |
_tarfile = None |
|
79 |
except ImportError: |
|
80 |
_tarfile = None |
|
81 |
if _tarfile and not hasattr(os, "mknod"): |
|
82 |
os.mknod = _tarfile.mknod |
|
83 |
if _tarfile and not hasattr(os, "major"): |
|
84 |
os.major = _tarfile.major |
|
85 |
if _tarfile and not hasattr(os, "minor"): |
|
86 |
os.minor = _tarfile.minor |
|
87 |
if _tarfile and not hasattr(os, "makedev"): |
|
88 |
os.makedev = _tarfile.makedev |
|
89 |
if _tarfile and not hasattr(os, "lchown"): |
|
90 |
os.lchown = _tarfile.lchown |
|
91 |
||
92 |
# XXX remove for release (2.3)
|
|
93 |
if sys.version_info[:2] < (2,3): |
|
94 |
True = 1 |
|
95 |
False = 0 |
|
96 |
||
97 |
#---------------------------------------------------------
|
|
98 |
# GNUtar constants
|
|
99 |
#---------------------------------------------------------
|
|
100 |
BLOCKSIZE = 512 # length of processing blocks |
|
101 |
RECORDSIZE = BLOCKSIZE * 20 # length of records |
|
102 |
MAGIC = "ustar" # magic tar string |
|
103 |
VERSION = "00" # version number |
|
104 |
||
105 |
LENGTH_NAME = 100 # maximal length of a filename |
|
106 |
LENGTH_LINK = 100 # maximal length of a linkname |
|
107 |
||
108 |
REGTYPE = "0" # regular file |
|
109 |
AREGTYPE = "\0" # regular file |
|
110 |
LNKTYPE = "1" # link (inside tarfile) |
|
111 |
SYMTYPE = "2" # symbolic link |
|
112 |
CHRTYPE = "3" # character special device |
|
113 |
BLKTYPE = "4" # block special device |
|
114 |
DIRTYPE = "5" # directory |
|
115 |
FIFOTYPE = "6" # fifo special device |
|
116 |
CONTTYPE = "7" # contiguous file |
|
117 |
||
118 |
GNUTYPE_LONGNAME = "L" # GNU tar extension for longnames |
|
119 |
GNUTYPE_LONGLINK = "K" # GNU tar extension for longlink |
|
120 |
GNUTYPE_SPARSE = "S" # GNU tar extension for sparse file |
|
121 |
||
122 |
#---------------------------------------------------------
|
|
123 |
# tarfile constants
|
|
124 |
#---------------------------------------------------------
|
|
125 |
SUPPORTED_TYPES = (REGTYPE, AREGTYPE, LNKTYPE, # file types that tarfile |
|
126 |
SYMTYPE, DIRTYPE, FIFOTYPE, # can cope with. |
|
127 |
CONTTYPE, GNUTYPE_LONGNAME, |
|
128 |
GNUTYPE_LONGLINK, GNUTYPE_SPARSE, |
|
129 |
CHRTYPE, BLKTYPE) |
|
130 |
||
131 |
REGULAR_TYPES = (REGTYPE, AREGTYPE, # file types that somehow |
|
132 |
CONTTYPE, GNUTYPE_SPARSE) # represent regular files |
|
133 |
||
134 |
#---------------------------------------------------------
|
|
135 |
# Bits used in the mode field, values in octal.
|
|
136 |
#---------------------------------------------------------
|
|
137 |
S_IFLNK = 0120000 # symbolic link |
|
138 |
S_IFREG = 0100000 # regular file |
|
139 |
S_IFBLK = 0060000 # block device |
|
140 |
S_IFDIR = 0040000 # directory |
|
141 |
S_IFCHR = 0020000 # character device |
|
142 |
S_IFIFO = 0010000 # fifo |
|
143 |
||
144 |
TSUID = 04000 # set UID on execution |
|
145 |
TSGID = 02000 # set GID on execution |
|
146 |
TSVTX = 01000 # reserved |
|
147 |
||
148 |
TUREAD = 00400 # read by owner |
|
149 |
TUWRITE = 00200 # write by owner |
|
150 |
TUEXEC = 00100 # execute/search by owner |
|
151 |
TGREAD = 00040 # read by group |
|
152 |
TGWRITE = 00020 # write by group |
|
153 |
TGEXEC = 00010 # execute/search by group |
|
154 |
TOREAD = 00004 # read by other |
|
155 |
TOWRITE = 00002 # write by other |
|
156 |
TOEXEC = 00001 # execute/search by other |
|
157 |
||
158 |
#---------------------------------------------------------
|
|
159 |
# Some useful functions
|
|
160 |
#---------------------------------------------------------
|
|
161 |
def nts(s): |
|
162 |
"""Convert a null-terminated string buffer to a python string.
|
|
163 |
"""
|
|
164 |
return s.split("\0", 1)[0] |
|
165 |
||
166 |
def calc_chksum(buf): |
|
167 |
"""Calculate the checksum for a member's header. It's a simple addition
|
|
168 |
of all bytes, treating the chksum field as if filled with spaces.
|
|
169 |
buf is a 512 byte long string buffer which holds the header.
|
|
170 |
"""
|
|
171 |
chk = 256 # chksum field is treated as blanks, |
|
172 |
# so the initial value is 8 * ord(" ")
|
|
173 |
for c in buf[:148]: chk += ord(c) # sum up all bytes before chksum |
|
174 |
for c in buf[156:]: chk += ord(c) # sum up all bytes after chksum |
|
175 |
return chk |
|
176 |
||
177 |
def copyfileobj(src, dst, length=None): |
|
178 |
"""Copy length bytes from fileobj src to fileobj dst.
|
|
179 |
If length is None, copy the entire content.
|
|
180 |
"""
|
|
181 |
if length == 0: |
|
182 |
return
|
|
183 |
if length is None: |
|
184 |
shutil.copyfileobj(src, dst) |
|
185 |
return
|
|
186 |
||
187 |
BUFSIZE = 16 * 1024 |
|
188 |
blocks, remainder = divmod(length, BUFSIZE) |
|
189 |
for b in range(blocks): |
|
190 |
buf = src.read(BUFSIZE) |
|
191 |
if len(buf) < BUFSIZE: |
|
192 |
raise IOError, "end of file reached" |
|
193 |
dst.write(buf) |
|
194 |
||
195 |
if remainder != 0: |
|
196 |
buf = src.read(remainder) |
|
197 |
if len(buf) < remainder: |
|
198 |
raise IOError, "end of file reached" |
|
199 |
dst.write(buf) |
|
200 |
return
|
|
201 |
||
202 |
filemode_table = ( |
|
203 |
(S_IFLNK, "l", |
|
204 |
S_IFREG, "-", |
|
205 |
S_IFBLK, "b", |
|
206 |
S_IFDIR, "d", |
|
207 |
S_IFCHR, "c", |
|
208 |
S_IFIFO, "p"), |
|
209 |
(TUREAD, "r"), |
|
210 |
(TUWRITE, "w"), |
|
211 |
(TUEXEC, "x", TSUID, "S", TUEXEC|TSUID, "s"), |
|
212 |
(TGREAD, "r"), |
|
213 |
(TGWRITE, "w"), |
|
214 |
(TGEXEC, "x", TSGID, "S", TGEXEC|TSGID, "s"), |
|
215 |
(TOREAD, "r"), |
|
216 |
(TOWRITE, "w"), |
|
217 |
(TOEXEC, "x", TSVTX, "T", TOEXEC|TSVTX, "t")) |
|
218 |
||
219 |
def filemode(mode): |
|
220 |
"""Convert a file's mode to a string of the form
|
|
221 |
-rwxrwxrwx.
|
|
222 |
Used by TarFile.list()
|
|
223 |
"""
|
|
224 |
s = "" |
|
225 |
for t in filemode_table: |
|
226 |
while 1: |
|
227 |
if mode & t[0] == t[0]: |
|
228 |
s += t[1] |
|
229 |
elif len(t) > 2: |
|
230 |
t = t[2:] |
|
231 |
continue
|
|
232 |
else: |
|
233 |
s += "-" |
|
234 |
break
|
|
235 |
return s |
|
236 |
||
237 |
if os.sep != "/": |
|
238 |
normpath = lambda path: os.path.normpath(path).replace(os.sep, "/") |
|
239 |
else: |
|
240 |
normpath = os.path.normpath |
|
241 |
||
242 |
class TarError(Exception): |
|
243 |
"""Internally used exception"""
|
|
244 |
pass
|
|
245 |
||
246 |
#--------------------
|
|
247 |
# exported functions
|
|
248 |
#--------------------
|
|
249 |
def open(name, mode="r", fileobj=None): |
|
250 |
"""Open (uncompressed) tar archive name for reading, writing
|
|
251 |
or appending.
|
|
252 |
"""
|
|
253 |
return TarFile(name, mode, fileobj) |
|
254 |
||
255 |
def gzopen(gzname, gzmode="r", compresslevel=9, fileobj=None): |
|
256 |
"""Open gzip compressed tar archive name for reading or writing.
|
|
257 |
Appending is not allowed.
|
|
258 |
"""
|
|
259 |
if gzmode == "a": |
|
260 |
raise ValueError, "Appending to gzipped archive is not allowed" |
|
261 |
import gzip |
|
262 |
pre, ext = os.path.splitext(gzname) |
|
263 |
pre = os.path.basename(pre) |
|
264 |
if ext == ".tgz": |
|
265 |
ext = ".tar" |
|
266 |
if ext == ".gz": |
|
267 |
ext = "" |
|
268 |
tarname = pre + ext |
|
269 |
mode = gzmode |
|
270 |
if "b" not in gzmode: |
|
271 |
gzmode += "b" |
|
272 |
if mode[0:1] == "w": |
|
273 |
if not fileobj: |
|
274 |
fileobj = __builtin__.file(gzname, gzmode) |
|
275 |
t = TarFile(tarname, mode, gzip.GzipFile(tarname, gzmode, |
|
276 |
compresslevel, fileobj)) |
|
277 |
else: |
|
278 |
t = TarFile(tarname, mode, gzip.open(gzname, gzmode, compresslevel)) |
|
279 |
t._extfileobj = 0 |
|
280 |
return t |
|
281 |
||
282 |
def is_tarfile(name): |
|
283 |
"""Return True if name points to a tar archive that we
|
|
284 |
are able to handle, else return False.
|
|
285 |
"""
|
|
286 |
||
287 |
buftoinfo = TarFile.__dict__["_buftoinfo"] |
|
288 |
try: |
|
289 |
buf = __builtin__.open(name, "rb").read(BLOCKSIZE) |
|
290 |
buftoinfo(None, buf) |
|
291 |
return True |
|
292 |
except (ValueError, ImportError): |
|
293 |
pass
|
|
294 |
try: |
|
295 |
import gzip |
|
296 |
buf = gzip.open(name, "rb").read(BLOCKSIZE) |
|
297 |
buftoinfo(None, buf) |
|
298 |
return True |
|
299 |
except (IOError, ValueError, ImportError): |
|
300 |
pass
|
|
301 |
return False |
|
302 |
||
303 |
#------------------
|
|
304 |
# Exported Classes
|
|
305 |
#------------------
|
|
306 |
class TarInfo: |
|
307 |
"""Informational class which holds the details about an
|
|
308 |
archive member given by a tar header block.
|
|
309 |
TarInfo instances are returned by TarFile.getmember() and
|
|
310 |
TarFile.getmembers() and are usually created internally.
|
|
311 |
If you want to create a TarInfo instance from the outside,
|
|
312 |
you should use TarFile.gettarinfo() if the file already exists,
|
|
313 |
or you can instanciate the class yourself.
|
|
314 |
"""
|
|
315 |
||
316 |
def __init__(self, name=""): |
|
317 |
"""Construct a TarInfo instance. name is the optional name
|
|
318 |
of the member.
|
|
319 |
"""
|
|
320 |
||
321 |
self.name = name # member name (dirnames must end with '/') |
|
322 |
self.mode = 0100666 # file permissions |
|
323 |
self.uid = 0 # user id |
|
324 |
self.gid = 0 # group id |
|
325 |
self.size = 0 # file size |
|
326 |
self.mtime = 0 # modification time |
|
327 |
self.chksum = 0 # header checksum |
|
328 |
self.type = REGTYPE # member type |
|
329 |
self.linkname = "" # link name |
|
330 |
self.uname = "user" # user name |
|
331 |
self.gname = "group" # group name |
|
332 |
self.devmajor = 0 #- |
|
333 |
self.devminor = 0 #-for use with CHRTYPE and BLKTYPE |
|
334 |
self.prefix = "" # prefix, holding information |
|
335 |
# about sparse files
|
|
336 |
||
337 |
self.offset = 0 # the tar header starts here |
|
338 |
self.offset_data = 0 # the optional file's data starts here |
|
339 |
||
340 |
def init_from_stat(self, statres): |
|
341 |
"""Initialize various attributes from statobj (these are
|
|
342 |
returned by os.stat() and related functions. Return none on error"""
|
|
343 |
stmd = statres.st_mode |
|
344 |
if stat.S_ISREG(stmd): type = REGTYPE |
|
345 |
elif stat.S_ISDIR(stmd): |
|
346 |
type = DIRTYPE |
|
347 |
if self.name[-1:] != "/": self.name += "/" |
|
348 |
elif stat.S_ISFIFO(stmd): type = FIFOTYPE |
|
349 |
elif stat.S_ISLNK(stmd): type = SYMTYPE |
|
350 |
elif stat.S_ISCHR(stmd): type = CHRTYPE |
|
351 |
elif stat.S_ISBLK(stmd): type = BLKTYPE |
|
352 |
else: return None |
|
353 |
||
354 |
# Fill the TarInfo instance with all
|
|
355 |
# information we can get.
|
|
356 |
self.mode = stat.S_IMODE(stmd) |
|
357 |
self.uid = statres.st_uid |
|
358 |
self.gid = statres.st_gid |
|
359 |
self.size = statres.st_size |
|
360 |
self.mtime = statres.st_mtime |
|
361 |
self.type = type |
|
362 |
if pwd: |
|
54
by bescoto
Cache pwd and group files |
363 |
try: self.uname = uid2uname(self.uid) |
364 |
except KeyError: pass |
|
1
by bescoto
Initial checkin |
365 |
if grp: |
54
by bescoto
Cache pwd and group files |
366 |
try: self.gname = gid2gname(self.gid) |
367 |
except KeyError: pass |
|
1
by bescoto
Initial checkin |
368 |
|
369 |
if type in (CHRTYPE, BLKTYPE): |
|
370 |
if hasattr(os, "major") and hasattr(os, "minor"): |
|
371 |
self.devmajor = os.major(statres.st_rdev) |
|
372 |
self.devminor = os.minor(statres.st_rdev) |
|
373 |
return 1 |
|
374 |
||
375 |
def set_arcname(self, name): |
|
376 |
"""Set the name of the member in the archive. Backward
|
|
377 |
slashes are converted to forward slashes, Absolute paths are
|
|
378 |
turned to relative paths.
|
|
379 |
"""
|
|
380 |
arcname = normpath(name) |
|
381 |
drv, arcname = os.path.splitdrive(arcname) |
|
382 |
while arcname[0:1] == "/": |
|
383 |
arcname = arcname[1:] |
|
384 |
self.name = arcname |
|
385 |
||
386 |
def getheader(self): |
|
387 |
"""Return a tar header block as a 512 byte string.
|
|
388 |
"""
|
|
69
by bescoto
asdf's tarfile large uid/gid patch |
389 |
if self.uid > 2097151 or self.uid < 0: |
390 |
sys.stderr.write("uid %i of file %s not in range. Setting uid to 60001\n" % (self.uid,self.name)) |
|
391 |
self.uid = 60001 |
|
392 |
if self.gid > 2097151 or self.gid < 0: |
|
393 |
sys.stderr.write("gid %i of file %s not in range. Setting gid to 60001\n" % (self.gid, self.name)) |
|
394 |
self.gid = 60001 |
|
1
by bescoto
Initial checkin |
395 |
# The following code was contributed by Detlef Lannert.
|
396 |
parts = [] |
|
397 |
for value, fieldsize in ( |
|
398 |
(self.name, 100), |
|
399 |
("%07o" % self.mode, 8), |
|
400 |
("%07o" % self.uid, 8), |
|
401 |
("%07o" % self.gid, 8), |
|
402 |
("%011o" % self.size, 12), |
|
403 |
("%011o" % self.mtime, 12), |
|
404 |
(" ", 8), |
|
405 |
(self.type, 1), |
|
406 |
(self.linkname, 100), |
|
407 |
(MAGIC, 6), |
|
408 |
(VERSION, 2), |
|
409 |
(self.uname, 32), |
|
410 |
(self.gname, 32), |
|
411 |
("%07o" % self.devmajor, 8), |
|
412 |
("%07o" % self.devminor, 8), |
|
413 |
(self.prefix, 155) |
|
414 |
):
|
|
415 |
l = len(value) |
|
416 |
parts.append(value + (fieldsize - l) * "\0") |
|
417 |
||
418 |
buf = "".join(parts) |
|
419 |
chksum = calc_chksum(buf) |
|
420 |
buf = buf[:148] + "%06o\0" % chksum + buf[155:] |
|
421 |
buf += (512 - len(buf)) * "\0" |
|
422 |
self.buf = buf |
|
423 |
return buf |
|
424 |
||
425 |
def isreg(self): |
|
426 |
return self.type in REGULAR_TYPES |
|
427 |
def isfile(self): |
|
428 |
return self.isreg() |
|
429 |
def isdir(self): |
|
430 |
return self.type == DIRTYPE |
|
431 |
def issym(self): |
|
432 |
return self.type == SYMTYPE |
|
433 |
def islnk(self): |
|
434 |
return self.type == LNKTYPE |
|
435 |
def ischr(self): |
|
436 |
return self.type == CHRTYPE |
|
437 |
def isblk(self): |
|
438 |
return self.type == BLKTYPE |
|
439 |
def isfifo(self): |
|
440 |
return self.type == FIFOTYPE |
|
441 |
def issparse(self): |
|
442 |
return self.type == GNUTYPE_SPARSE |
|
443 |
def isdev(self): |
|
444 |
return self.type in (CHRTYPE, BLKTYPE, FIFOTYPE) |
|
445 |
# class TarInfo
|
|
446 |
||
447 |
||
448 |
class TarFile: |
|
449 |
"""Class representing a TAR archive file on disk.
|
|
450 |
"""
|
|
451 |
debug = 0 # May be set from 0 (no msgs) to 3 (all msgs) |
|
452 |
||
453 |
dereference = False # If true, add content of linked file to the |
|
454 |
# tar file, else the link.
|
|
455 |
||
456 |
ignore_zeros = False # If true, skips empty or invalid blocks and |
|
457 |
# continues processing.
|
|
458 |
||
459 |
errorlevel = 0 # If 0, fatal errors only appear in debug |
|
460 |
# messages (if debug >= 0). If > 0, errors
|
|
461 |
# are passed to the caller as exceptions.
|
|
462 |
||
463 |
def __init__(self, name=None, mode="r", fileobj=None): |
|
464 |
self.name = name |
|
465 |
||
466 |
if len(mode) > 1 or mode not in "raw": |
|
467 |
raise ValueError, "mode must be either 'r', 'a' or 'w', " \ |
|
468 |
"not '%s'" % mode |
|
469 |
self._mode = mode |
|
470 |
self.mode = {"r": "rb", "a": "r+b", "w": "wb"}[mode] |
|
471 |
||
472 |
if not fileobj: |
|
473 |
fileobj = __builtin__.file(self.name, self.mode) |
|
474 |
self._extfileobj = 0 |
|
475 |
else: |
|
476 |
if self.name is None and hasattr(fileobj, "name"): |
|
477 |
self.name = fileobj.name |
|
478 |
if hasattr(fileobj, "mode"): |
|
479 |
self.mode = fileobj.mode |
|
480 |
self._extfileobj = 1 |
|
481 |
self.fileobj = fileobj |
|
482 |
||
483 |
self.init_datastructures() |
|
484 |
||
485 |
if self._mode == "a": |
|
486 |
self.fileobj.seek(0) |
|
487 |
self._load() |
|
488 |
||
489 |
def init_datastructures(self): |
|
490 |
# Init datastructures
|
|
491 |
#self.members = [] # list of members as TarInfo instances
|
|
492 |
#self.membernames = [] # names of members
|
|
493 |
#self.chunks = [0] # chunk cache
|
|
494 |
self._loaded = 0 # flag if all members have been read |
|
495 |
self.offset = 0l # current position in the archive file |
|
496 |
self.inodes = {} # dictionary caching the inodes of |
|
497 |
# archive members already added
|
|
498 |
self.next_chunk = 0 # offset of next tarinfo, used when reading |
|
499 |
||
500 |
def close(self): |
|
501 |
"""Close the TarFile instance and do some cleanup.
|
|
502 |
"""
|
|
503 |
if self.fileobj: |
|
504 |
if self._mode in "aw": |
|
505 |
# fill up the end with zero-blocks
|
|
506 |
# (like option -b20 for tar does)
|
|
507 |
blocks, remainder = divmod(self.offset, RECORDSIZE) |
|
508 |
if remainder > 0: |
|
509 |
self.fileobj.write("\0" * (RECORDSIZE - remainder)) |
|
510 |
||
511 |
if not self._extfileobj: |
|
512 |
self.fileobj.close() |
|
513 |
self.fileobj = None |
|
514 |
||
515 |
def throwaway_until(self, position): |
|
516 |
"""Read data, throwing it away until we get to position"""
|
|
517 |
bufsize = 16 * 1024 |
|
518 |
bytes_to_read = position - self.offset |
|
519 |
assert bytes_to_read >= 0 |
|
520 |
while bytes_to_read >= bufsize: |
|
521 |
self.fileobj.read(bufsize) |
|
522 |
bytes_to_read -= bufsize |
|
523 |
self.fileobj.read(bytes_to_read) |
|
524 |
self.offset = position |
|
525 |
||
526 |
def next(self): |
|
527 |
"""Return the next member from the archive.
|
|
528 |
Return None if the end is reached.
|
|
529 |
Can be used in a while statement, is used
|
|
530 |
for Iteration (see __iter__()) and internally.
|
|
531 |
"""
|
|
532 |
if not self.fileobj: |
|
533 |
raise ValueError, "I/O operation on closed file" |
|
534 |
if self._mode not in "ra": |
|
535 |
raise ValueError, "reading from a write-mode file" |
|
536 |
||
537 |
# Read the next block.
|
|
538 |
# self.fileobj.seek(self.chunks[-1])
|
|
539 |
#self.fileobj.seek(self.next_chunk)
|
|
540 |
#self.offset = self.next_chunk
|
|
541 |
self.throwaway_until(self.next_chunk) |
|
542 |
while 1: |
|
543 |
buf = self.fileobj.read(BLOCKSIZE) |
|
544 |
if not buf: |
|
545 |
return None |
|
546 |
try: |
|
547 |
tarinfo = self._buftoinfo(buf) |
|
548 |
except ValueError: |
|
549 |
if self.ignore_zeros: |
|
550 |
if buf.count("\0") == BLOCKSIZE: |
|
551 |
adj = "empty" |
|
552 |
else: |
|
553 |
adj = "invalid" |
|
554 |
self._dbg(2, "0x%X: %s block\n" % (self.offset, adj)) |
|
555 |
self.offset += BLOCKSIZE |
|
556 |
continue
|
|
557 |
else: |
|
558 |
return None |
|
559 |
break
|
|
560 |
||
561 |
# If the TarInfo instance contains a GNUTYPE longname or longlink
|
|
562 |
# statement, we must process this first.
|
|
563 |
if tarinfo.type in (GNUTYPE_LONGLINK, GNUTYPE_LONGNAME): |
|
564 |
tarinfo = self._proc_gnulong(tarinfo, tarinfo.type) |
|
565 |
||
566 |
if tarinfo.issparse(): |
|
567 |
assert 0, "Sparse file support turned off" |
|
568 |
# Sparse files need some care,
|
|
569 |
# due to the possible extra headers.
|
|
570 |
tarinfo.offset = self.offset |
|
571 |
self.offset += BLOCKSIZE |
|
572 |
origsize = self._proc_sparse(tarinfo) |
|
573 |
tarinfo.offset_data = self.offset |
|
574 |
blocks, remainder = divmod(tarinfo.size, BLOCKSIZE) |
|
575 |
if remainder: |
|
576 |
blocks += 1 |
|
577 |
self.offset += blocks * BLOCKSIZE |
|
578 |
tarinfo.size = origsize |
|
579 |
else: |
|
580 |
tarinfo.offset = self.offset |
|
581 |
self.offset += BLOCKSIZE |
|
582 |
tarinfo.offset_data = self.offset |
|
583 |
if tarinfo.isreg(): |
|
584 |
## Skip the following data blocks.
|
|
585 |
blocks, remainder = divmod(tarinfo.size, BLOCKSIZE) |
|
586 |
if remainder: |
|
587 |
blocks += 1 |
|
588 |
self.next_chunk = self.offset + (blocks * BLOCKSIZE) |
|
589 |
else: self.next_chunk = self.offset |
|
590 |
||
591 |
#self.members.append(tarinfo) These use too much memory
|
|
592 |
#self.membernames.append(tarinfo.name)
|
|
593 |
#self.chunks.append(self.offset)
|
|
594 |
return tarinfo |
|
595 |
||
596 |
def getmember(self, name): |
|
597 |
"""Return a TarInfo instance for member name.
|
|
598 |
"""
|
|
599 |
if name not in self.membernames and not self._loaded: |
|
600 |
self._load() |
|
601 |
if name not in self.membernames: |
|
602 |
raise KeyError, "filename `%s' not found in tar archive" % name |
|
603 |
return self._getmember(name) |
|
604 |
||
605 |
def getinfo(self, name): |
|
606 |
"""Return a TarInfo instance for member name.
|
|
607 |
This method will be deprecated in 0.6,
|
|
608 |
use getmember() instead.
|
|
609 |
"""
|
|
610 |
# XXX kick this out in 0.6
|
|
611 |
import warnings |
|
612 |
warnings.warn("use getmember() instead", DeprecationWarning) |
|
613 |
return self.getmember(name) |
|
614 |
||
615 |
def getmembers(self): |
|
616 |
"""Return a list of all members in the archive
|
|
617 |
(as TarInfo instances).
|
|
618 |
"""
|
|
619 |
if not self._loaded: # if we want to obtain a list of |
|
620 |
self._load() # all members, we first have to |
|
621 |
# scan the whole archive.
|
|
622 |
return self.members |
|
623 |
||
624 |
def getnames(self): |
|
625 |
"""Return a list of names of all members in the
|
|
626 |
archive.
|
|
627 |
"""
|
|
628 |
if not self._loaded: |
|
629 |
self._load() |
|
630 |
return self.membernames |
|
631 |
||
632 |
def gettarinfo(self, name, arcname=None): |
|
633 |
"""Create a TarInfo instance from an existing file.
|
|
634 |
Optional arcname defines the name under which the file
|
|
635 |
shall be stored in the archive.
|
|
636 |
"""
|
|
637 |
# Now, fill the TarInfo instance with
|
|
638 |
# information specific for the file.
|
|
639 |
tarinfo = TarInfo() |
|
640 |
||
641 |
if arcname is None: tarinfo.set_arcname(name) |
|
642 |
else: tarinfo.set_arcname(arcname) |
|
643 |
||
644 |
# Use os.stat or os.lstat, depending on platform
|
|
645 |
# and if symlinks shall be resolved.
|
|
646 |
if hasattr(os, "lstat") and not self.dereference: |
|
647 |
statres = os.lstat(name) |
|
648 |
else: |
|
649 |
statres = os.stat(name) |
|
650 |
||
651 |
if not tarinfo.init_from_stat(statres): return None |
|
652 |
||
653 |
if tarinfo.type == REGTYPE: |
|
654 |
inode = (statres.st_ino, statres.st_dev, statres.st_mtime) |
|
655 |
if inode in self.inodes.keys() and not self.dereference: |
|
656 |
# Is it a hardlink to an already
|
|
657 |
# archived file?
|
|
658 |
tarinfo.type = LNKTYPE |
|
659 |
tarinfo.linkname = self.inodes[inode] |
|
660 |
else: |
|
661 |
# The inode is added only if its valid.
|
|
662 |
# For win32 it is always 0.
|
|
663 |
if inode[0]: self.inodes[inode] = tarinfo.name |
|
664 |
elif tarinfo.type == SYMTYPE: |
|
665 |
tarinfo.linkname = os.readlink(name) |
|
666 |
tarinfo.size = 0 |
|
667 |
||
668 |
return tarinfo |
|
669 |
||
670 |
def list(self, verbose=1): |
|
671 |
"""Print a formatted listing of the archive's
|
|
672 |
contents to stdout.
|
|
673 |
"""
|
|
674 |
for tarinfo in self: |
|
675 |
if verbose: |
|
676 |
print filemode(tarinfo.mode), |
|
677 |
print tarinfo.uname + "/" + tarinfo.gname, |
|
678 |
if tarinfo.ischr() or tarinfo.isblk(): |
|
679 |
print "%10s" % (str(tarinfo.devmajor) + "," + str(tarinfo.devminor)), |
|
680 |
else: |
|
681 |
print "%10d" % tarinfo.size, |
|
682 |
print "%d-%02d-%02d %02d:%02d:%02d" \ |
|
683 |
% time.gmtime(tarinfo.mtime)[:6], |
|
684 |
||
685 |
print tarinfo.name, |
|
686 |
||
687 |
if verbose: |
|
688 |
if tarinfo.issym(): |
|
689 |
print "->", tarinfo.linkname, |
|
690 |
if tarinfo.islnk(): |
|
691 |
print "link to", tarinfo.linkname, |
|
692 |
print
|
|
693 |
||
694 |
def add(self, name, arcname=None, recursive=1): |
|
695 |
"""Add a file or a directory to the archive.
|
|
696 |
Directory addition is recursive by default.
|
|
697 |
"""
|
|
698 |
if not self.fileobj: |
|
699 |
raise ValueError, "I/O operation on closed file" |
|
700 |
if self._mode == "r": |
|
701 |
raise ValueError, "writing to a read-mode file" |
|
702 |
||
703 |
if arcname is None: |
|
704 |
arcname = name |
|
705 |
||
706 |
# Skip if somebody tries to archive the archive...
|
|
707 |
if os.path.abspath(name) == os.path.abspath(self.name): |
|
708 |
self._dbg(2, "tarfile: Skipped `%s'\n" % name) |
|
709 |
return
|
|
710 |
||
711 |
# Special case: The user wants to add the current
|
|
712 |
# working directory.
|
|
713 |
if name == ".": |
|
714 |
if recursive: |
|
715 |
if arcname == ".": |
|
716 |
arcname = "" |
|
717 |
for f in os.listdir("."): |
|
718 |
self.add(f, os.path.join(arcname, f)) |
|
719 |
return
|
|
720 |
||
721 |
self._dbg(1, "%s\n" % name) |
|
722 |
||
723 |
# Create a TarInfo instance from the file.
|
|
724 |
tarinfo = self.gettarinfo(name, arcname) |
|
725 |
||
726 |
if tarinfo is None: |
|
727 |
self._dbg(1, "tarfile: Unsupported type `%s'\n" % name) |
|
728 |
||
729 |
||
730 |
# Append the tar header and data to the archive.
|
|
731 |
if tarinfo.isreg(): |
|
732 |
f = __builtin__.file(name, "rb") |
|
733 |
self.addfile(tarinfo, fileobj = f) |
|
734 |
f.close() |
|
735 |
||
736 |
if tarinfo.type in (LNKTYPE, SYMTYPE, FIFOTYPE, CHRTYPE, BLKTYPE): |
|
737 |
tarinfo.size = 0l |
|
738 |
self.addfile(tarinfo) |
|
739 |
||
740 |
if tarinfo.isdir(): |
|
741 |
self.addfile(tarinfo) |
|
742 |
if recursive: |
|
743 |
for f in os.listdir(name): |
|
744 |
self.add(os.path.join(name, f), os.path.join(arcname, f)) |
|
745 |
||
746 |
def addfile(self, tarinfo, fileobj=None): |
|
747 |
"""Add the content of fileobj to the tarfile.
|
|
748 |
The amount of bytes to read is determined by
|
|
749 |
the size attribute in the tarinfo instance.
|
|
750 |
"""
|
|
751 |
if not self.fileobj: |
|
752 |
raise ValueError, "I/O operation on closed file" |
|
753 |
if self._mode == "r": |
|
754 |
raise ValueError, "writing to a read-mode file" |
|
755 |
||
756 |
# XXX What was this good for again?
|
|
757 |
#try:
|
|
758 |
# self.fileobj.seek(self.chunks[-1])
|
|
759 |
#except IOError:
|
|
760 |
# pass
|
|
761 |
||
762 |
full_headers = self._get_full_headers(tarinfo) |
|
763 |
self.fileobj.write(full_headers) |
|
764 |
assert len(full_headers) % BLOCKSIZE == 0 |
|
765 |
self.offset += len(full_headers) |
|
766 |
||
767 |
# If there's data to follow, append it.
|
|
768 |
if fileobj is not None: |
|
769 |
copyfileobj(fileobj, self.fileobj, tarinfo.size) |
|
770 |
blocks, remainder = divmod(tarinfo.size, BLOCKSIZE) |
|
771 |
if remainder > 0: |
|
772 |
self.fileobj.write("\0" * (BLOCKSIZE - remainder)) |
|
773 |
blocks += 1 |
|
774 |
self.offset += blocks * BLOCKSIZE |
|
775 |
||
776 |
#self.members.append(tarinfo) #These take up too much memory
|
|
777 |
#self.membernames.append(tarinfo.name)
|
|
778 |
#self.chunks.append(self.offset)
|
|
779 |
||
780 |
def _get_full_headers(self, tarinfo): |
|
781 |
"""Return string containing headers around tarinfo, including gnulongs
|
|
782 |
"""
|
|
783 |
buf = "" |
|
784 |
# Now we must check if the strings for filename
|
|
785 |
# and linkname fit into the posix header.
|
|
786 |
# (99 chars + "\0" for each)
|
|
787 |
# If not, we must create GNU extension headers.
|
|
788 |
# If both filename and linkname are too long,
|
|
789 |
# the longlink is first to be written out.
|
|
790 |
if len(tarinfo.linkname) >= LENGTH_LINK - 1: |
|
791 |
buf += self._return_gnulong(tarinfo.linkname, GNUTYPE_LONGLINK) |
|
792 |
tarinfo.linkname = tarinfo.linkname[:LENGTH_LINK -1] |
|
793 |
if len(tarinfo.name) >= LENGTH_NAME - 1: |
|
794 |
buf += self._return_gnulong(tarinfo.name, GNUTYPE_LONGNAME) |
|
795 |
tarinfo.name = tarinfo.name[:LENGTH_NAME - 1] |
|
796 |
return buf + tarinfo.getheader() |
|
797 |
||
798 |
# def untar(self, path):
|
|
799 |
# """Untar the whole archive to path.
|
|
800 |
# """
|
|
801 |
# later = []
|
|
802 |
# for tarinfo in self:
|
|
803 |
# if tarinfo.isdir():
|
|
804 |
# later.append(tarinfo)
|
|
805 |
# self.extract(tarinfo, path)
|
|
806 |
# for tarinfo in later:
|
|
807 |
# self._utime(tarinfo, os.path.join(path, tarinfo.name))
|
|
808 |
||
809 |
def extractfile(self, member): |
|
810 |
"""Extract member from the archive and return a file-like
|
|
811 |
object. member may be a name or a TarInfo instance.
|
|
812 |
"""
|
|
813 |
if not self.fileobj: |
|
814 |
raise ValueError, "I/O operation on closed file" |
|
815 |
if self._mode != "r": |
|
816 |
raise ValueError, "reading from a write-mode file" |
|
817 |
||
818 |
if isinstance(member, TarInfo): |
|
819 |
tarinfo = member |
|
820 |
else: |
|
821 |
tarinfo = self.getmember(member) |
|
822 |
||
823 |
if tarinfo.isreg() or tarinfo.type not in SUPPORTED_TYPES: |
|
824 |
return _FileObject(self, tarinfo) |
|
825 |
elif tarinfo.islnk() or tarinfo.issym(): |
|
826 |
return self.extractfile(self._getmember(tarinfo.linkname, tarinfo)) |
|
827 |
else: |
|
828 |
return None |
|
829 |
||
830 |
def extract(self, member, path=""): |
|
831 |
"""Extract member from the archive and write it to
|
|
832 |
current working directory using its full pathname.
|
|
833 |
If optional path is given, it is attached before the
|
|
834 |
pathname.
|
|
835 |
member may be a name or a TarInfo instance.
|
|
836 |
"""
|
|
837 |
if not self.fileobj: |
|
838 |
raise ValueError, "I/O operation on closed file" |
|
839 |
if self._mode != "r": |
|
840 |
raise ValueError, "reading from a write-mode file" |
|
841 |
||
842 |
if isinstance(member, TarInfo): |
|
843 |
tarinfo = member |
|
844 |
else: |
|
845 |
tarinfo = self.getmember(member) |
|
846 |
||
847 |
self._dbg(1, tarinfo.name) |
|
848 |
try: |
|
849 |
self._extract_member(tarinfo, os.path.join(path, tarinfo.name)) |
|
850 |
except EnvironmentError, e: |
|
851 |
if self.errorlevel > 0: |
|
852 |
raise
|
|
853 |
else: |
|
854 |
self._dbg(1, "\ntarfile: %s `%s'" % (e.strerror, e.filename)) |
|
855 |
except TarError, e: |
|
856 |
if self.errorlevel > 1: |
|
857 |
raise
|
|
858 |
else: |
|
859 |
self._dbg(1, "\ntarfile: %s" % e) |
|
860 |
self._dbg(1, "\n") |
|
861 |
||
862 |
def _extract_member(self, tarinfo, targetpath): |
|
863 |
"""Extract the TarInfo instance tarinfo to a physical
|
|
864 |
file called targetpath.
|
|
865 |
"""
|
|
866 |
# Fetch the TarInfo instance for the given name
|
|
867 |
# and build the destination pathname, replacing
|
|
868 |
# forward slashes to platform specific separators.
|
|
869 |
if targetpath[-1:] == "/": |
|
870 |
targetpath = targetpath[:-1] |
|
871 |
targetpath = os.path.normpath(targetpath) |
|
872 |
||
873 |
# Create all upper directories.
|
|
874 |
upperdirs = os.path.dirname(targetpath) |
|
875 |
if upperdirs and not os.path.exists(upperdirs): |
|
876 |
ti = TarInfo() |
|
877 |
ti.name = "" |
|
878 |
ti.type = DIRTYPE |
|
879 |
ti.mode = 0777 |
|
880 |
ti.mtime = tarinfo.mtime |
|
881 |
ti.uid = tarinfo.uid |
|
882 |
ti.gid = tarinfo.gid |
|
883 |
ti.uname = tarinfo.uname |
|
884 |
ti.gname = tarinfo.gname |
|
885 |
for d in os.path.split(os.path.splitdrive(upperdirs)[1]): |
|
886 |
ti.name = os.path.join(ti.name, d) |
|
887 |
self._extract_member(ti, ti.name) |
|
888 |
||
889 |
if tarinfo.isreg(): |
|
890 |
self._makefile(tarinfo, targetpath) |
|
891 |
elif tarinfo.isdir(): |
|
892 |
self._makedir(tarinfo, targetpath) |
|
893 |
elif tarinfo.isfifo(): |
|
894 |
self._makefifo(tarinfo, targetpath) |
|
895 |
elif tarinfo.ischr() or tarinfo.isblk(): |
|
896 |
self._makedev(tarinfo, targetpath) |
|
897 |
elif tarinfo.islnk() or tarinfo.issym(): |
|
898 |
self._makelink(tarinfo, targetpath) |
|
899 |
else: |
|
900 |
self._makefile(tarinfo, targetpath) |
|
901 |
if tarinfo.type not in SUPPORTED_TYPES: |
|
902 |
self._dbg(1, "\ntarfile: Unknown file type '%s', " \ |
|
903 |
"extracted as regular file." % tarinfo.type) |
|
904 |
||
905 |
if not tarinfo.issym(): |
|
906 |
self._chown(tarinfo, targetpath) |
|
907 |
self._chmod(tarinfo, targetpath) |
|
908 |
if not tarinfo.isdir(): |
|
909 |
self._utime(tarinfo, targetpath) |
|
910 |
||
911 |
def _makedir(self, tarinfo, targetpath): |
|
912 |
"""Make a directory called targetpath out of tarinfo.
|
|
913 |
"""
|
|
914 |
try: |
|
915 |
os.mkdir(targetpath) |
|
916 |
except EnvironmentError, e: |
|
917 |
if e.errno != errno.EEXIST: |
|
918 |
raise
|
|
919 |
||
920 |
def _makefile(self, tarinfo, targetpath): |
|
921 |
"""Make a file called targetpath out of tarinfo.
|
|
922 |
"""
|
|
923 |
source = self.extractfile(tarinfo) |
|
924 |
target = __builtin__.file(targetpath, "wb") |
|
925 |
copyfileobj(source, target) |
|
926 |
source.close() |
|
927 |
target.close() |
|
928 |
||
929 |
def _makefifo(self, tarinfo, targetpath): |
|
930 |
"""Make a fifo called targetpath out of tarinfo.
|
|
931 |
"""
|
|
932 |
if hasattr(os, "mkfifo"): |
|
933 |
os.mkfifo(targetpath) |
|
934 |
else: |
|
935 |
raise TarError, "Fifo not supported by system" |
|
936 |
||
937 |
def _makedev(self, tarinfo, targetpath): |
|
938 |
"""Make a character or block device called targetpath out of tarinfo.
|
|
939 |
"""
|
|
940 |
if not hasattr(os, "mknod"): |
|
941 |
raise TarError, "Special devices not supported by system" |
|
942 |
||
943 |
mode = tarinfo.mode |
|
944 |
if tarinfo.isblk(): |
|
945 |
mode |= stat.S_IFBLK |
|
946 |
else: |
|
947 |
mode |= stat.S_IFCHR |
|
948 |
||
949 |
# This if statement should go away when python-2.3a0-devicemacros
|
|
950 |
# patch succeeds.
|
|
951 |
if hasattr(os, "makedev"): |
|
952 |
os.mknod(targetpath, mode, |
|
953 |
os.makedev(tarinfo.devmajor, tarinfo.devminor)) |
|
954 |
else: |
|
955 |
os.mknod(targetpath, mode, |
|
956 |
tarinfo.devmajor, tarinfo.devminor) |
|
957 |
||
958 |
def _makelink(self, tarinfo, targetpath): |
|
959 |
"""Make a (symbolic) link called targetpath out of tarinfo.
|
|
960 |
If it cannot be made (due to platform or failure), we try
|
|
961 |
to make a copy of the referenced file instead of a link.
|
|
962 |
"""
|
|
963 |
linkpath = tarinfo.linkname |
|
964 |
self._dbg(1, " -> %s" % linkpath) |
|
965 |
try: |
|
966 |
if tarinfo.issym(): |
|
967 |
os.symlink(linkpath, targetpath) |
|
968 |
else: |
|
969 |
linkpath = os.path.join(os.path.dirname(targetpath), |
|
970 |
linkpath) |
|
971 |
os.link(linkpath, targetpath) |
|
972 |
except AttributeError: |
|
973 |
linkpath = os.path.join(os.path.dirname(tarinfo.name), |
|
974 |
tarinfo.linkname) |
|
975 |
linkpath = normpath(linkpath) |
|
976 |
try: |
|
977 |
self._extract_member(self.getmember(linkpath), targetpath) |
|
978 |
except (IOError, OSError, KeyError), e: |
|
979 |
linkpath = os.path.normpath(linkpath) |
|
980 |
try: |
|
981 |
shutil.copy2(linkpath, targetpath) |
|
982 |
except EnvironmentError, e: |
|
983 |
raise TarError, "Link could not be created" |
|
984 |
||
985 |
def _chown(self, tarinfo, targetpath): |
|
986 |
"""Set owner of targetpath according to tarinfo.
|
|
987 |
"""
|
|
988 |
if pwd and os.geteuid() == 0: |
|
989 |
# We have to be root to do so.
|
|
54
by bescoto
Cache pwd and group files |
990 |
try: g = gname2gid(tarinfo.gname) |
991 |
except KeyError: |
|
992 |
try: |
|
993 |
gid2gname(tarinfo.gid) # Make sure gid exists |
|
994 |
g = tarinfo.gid |
|
995 |
except KeyError: g = os.getgid() |
|
996 |
try: u = uname2uid(tarinfo.uname) |
|
997 |
except KeyError: |
|
998 |
try: |
|
999 |
uid2uname(tarinfo.uid) # Make sure uid exists |
|
1000 |
u = tarinfo.uid |
|
1001 |
except KeyError: u = os.getuid() |
|
1
by bescoto
Initial checkin |
1002 |
try: |
1003 |
if tarinfo.issym() and hasattr(os, "lchown"): |
|
1004 |
os.lchown(targetpath, u, g) |
|
1005 |
else: |
|
1006 |
os.chown(targetpath, u, g) |
|
1007 |
except EnvironmentError, e: |
|
1008 |
self._dbg(2, "\ntarfile: (chown failed), %s `%s'" |
|
1009 |
% (e.strerror, e.filename)) |
|
1010 |
||
1011 |
def _chmod(self, tarinfo, targetpath): |
|
1012 |
"""Set file permissions of targetpath according to tarinfo.
|
|
1013 |
"""
|
|
1014 |
try: |
|
1015 |
os.chmod(targetpath, tarinfo.mode) |
|
1016 |
except EnvironmentError, e: |
|
1017 |
self._dbg(2, "\ntarfile: (chmod failed), %s `%s'" |
|
1018 |
% (e.strerror, e.filename)) |
|
1019 |
||
1020 |
def _utime(self, tarinfo, targetpath): |
|
1021 |
"""Set modification time of targetpath according to tarinfo.
|
|
1022 |
"""
|
|
1023 |
try: |
|
1024 |
os.utime(targetpath, (tarinfo.mtime, tarinfo.mtime)) |
|
1025 |
except EnvironmentError, e: |
|
1026 |
self._dbg(2, "\ntarfile: (utime failed), %s `%s'" |
|
1027 |
% (e.strerror, e.filename)) |
|
1028 |
||
1029 |
def _getmember(self, name, tarinfo=None): |
|
1030 |
"""Find an archive member by name from bottom to top.
|
|
1031 |
If tarinfo is given, it is used as the starting point.
|
|
1032 |
"""
|
|
1033 |
if tarinfo is None: |
|
1034 |
end = len(self.members) |
|
1035 |
else: |
|
1036 |
end = self.members.index(tarinfo) |
|
1037 |
||
1038 |
for i in xrange(end - 1, -1, -1): |
|
1039 |
if name == self.membernames[i]: |
|
1040 |
return self.members[i] |
|
1041 |
||
1042 |
def _load(self): |
|
1043 |
"""Read through the entire archive file and look for readable
|
|
1044 |
members.
|
|
1045 |
"""
|
|
1046 |
while 1: |
|
1047 |
tarinfo = self.next() |
|
1048 |
if tarinfo is None: |
|
1049 |
break
|
|
1050 |
self._loaded = 1 |
|
1051 |
return
|
|
1052 |
||
1053 |
def __iter__(self): |
|
1054 |
"""Provide an iterator object.
|
|
1055 |
"""
|
|
1056 |
if self._loaded: |
|
1057 |
return iter(self.members) |
|
1058 |
else: |
|
1059 |
return TarIter(self) |
|
1060 |
||
1061 |
def _buftoinfo(self, buf): |
|
1062 |
"""Transform a 512 byte block to a TarInfo instance.
|
|
1063 |
"""
|
|
1064 |
tarinfo = TarInfo() |
|
1065 |
tarinfo.name = nts(buf[0:100]) |
|
95
by loafman
Apply patch for bug 19998, ValueError exception. |
1066 |
tarinfo.mode = int(buf[100:107], 8) |
1067 |
tarinfo.uid = int(buf[108:115],8) |
|
1068 |
tarinfo.gid = int(buf[116:123],8) |
|
1069 |
tarinfo.size = long(buf[124:135], 8) |
|
1070 |
tarinfo.mtime = long(buf[136:147], 8) |
|
304
by loafman
Untabify all files. To compare against previous |
1071 |
# chksum stored as a six digit octal number with
|
1072 |
# leading zeroes followed by a nul and then a space
|
|
95
by loafman
Apply patch for bug 19998, ValueError exception. |
1073 |
tarinfo.chksum = int(buf[148:154], 8) |
1
by bescoto
Initial checkin |
1074 |
tarinfo.type = buf[156:157] |
1075 |
tarinfo.linkname = nts(buf[157:257]) |
|
1076 |
tarinfo.uname = nts(buf[265:297]) |
|
1077 |
tarinfo.gname = nts(buf[297:329]) |
|
1078 |
try: |
|
599
by Kenneth Loafman
Applied patches from Kasper Brand that fixed device file handling. |
1079 |
tarinfo.devmajor = int(buf[329:336], 8) |
1080 |
tarinfo.devminor = int(buf[337:344], 8) |
|
1
by bescoto
Initial checkin |
1081 |
except ValueError: |
1082 |
tarinfo.devmajor = tarinfo.devmajor = 0 |
|
1083 |
tarinfo.prefix = buf[345:500] |
|
1084 |
if tarinfo.chksum != calc_chksum(buf): |
|
1085 |
self._dbg(1, "tarfile: Bad Checksum\n") |
|
1086 |
return tarinfo |
|
596
by Kenneth Loafman
418170 [PATCH] file names longer then 512 symbols are not supported |
1087 |
|
1
by bescoto
Initial checkin |
1088 |
def _proc_gnulong(self, tarinfo, type): |
596
by Kenneth Loafman
418170 [PATCH] file names longer then 512 symbols are not supported |
1089 |
"""Evaluate the blocks that hold a GNU longname
|
1
by bescoto
Initial checkin |
1090 |
or longlink member.
|
1091 |
"""
|
|
1092 |
name = None |
|
1093 |
linkname = None |
|
596
by Kenneth Loafman
418170 [PATCH] file names longer then 512 symbols are not supported |
1094 |
#may be some sanity checking should be done here
|
1095 |
#assert tarinfo.size < 1000 * BLOCKSIZE, "Filename appears to be too long!"
|
|
1
by bescoto
Initial checkin |
1096 |
buf = self.fileobj.read(BLOCKSIZE) |
48
by bescoto
Added test and fix for long symlink to long file bug |
1097 |
if not buf: return None |
596
by Kenneth Loafman
418170 [PATCH] file names longer then 512 symbols are not supported |
1098 |
namesize = tarinfo.size - BLOCKSIZE |
1
by bescoto
Initial checkin |
1099 |
self.offset += BLOCKSIZE |
596
by Kenneth Loafman
418170 [PATCH] file names longer then 512 symbols are not supported |
1100 |
# may be the whole name should be read with one operation?
|
1101 |
while namesize > 0: |
|
1102 |
buf += self.fileobj.read(BLOCKSIZE) |
|
1103 |
if not buf: return None |
|
1104 |
self.offset += BLOCKSIZE |
|
1105 |
namesize -= BLOCKSIZE |
|
48
by bescoto
Added test and fix for long symlink to long file bug |
1106 |
if type == GNUTYPE_LONGNAME: name = nts(buf) |
1107 |
if type == GNUTYPE_LONGLINK: linkname = nts(buf) |
|
1
by bescoto
Initial checkin |
1108 |
|
1109 |
buf = self.fileobj.read(BLOCKSIZE) |
|
48
by bescoto
Added test and fix for long symlink to long file bug |
1110 |
if not buf: return None |
1
by bescoto
Initial checkin |
1111 |
tarinfo = self._buftoinfo(buf) |
48
by bescoto
Added test and fix for long symlink to long file bug |
1112 |
if tarinfo.type in (GNUTYPE_LONGLINK, GNUTYPE_LONGNAME): |
1113 |
tarinfo = self._proc_gnulong(tarinfo, tarinfo.type) |
|
1
by bescoto
Initial checkin |
1114 |
if name is not None: |
1115 |
tarinfo.name = name |
|
1116 |
if linkname is not None: |
|
1117 |
tarinfo.linkname = linkname |
|
1118 |
self.offset += BLOCKSIZE |
|
1119 |
return tarinfo |
|
1120 |
||
596
by Kenneth Loafman
418170 [PATCH] file names longer then 512 symbols are not supported |
1121 |
|
1122 |
||
1
by bescoto
Initial checkin |
1123 |
def _return_gnulong(self, name, type): |
1124 |
"""Insert a GNU longname/longlink member into the archive.
|
|
1125 |
It consists of a common tar header, with the length
|
|
1126 |
of the longname as size, followed by a data block,
|
|
1127 |
which contains the longname as a null terminated string.
|
|
1128 |
"""
|
|
1129 |
tarinfo = TarInfo() |
|
1130 |
tarinfo.name = "././@LongLink" |
|
1131 |
tarinfo.type = type |
|
1132 |
tarinfo.mode = 0 |
|
1133 |
tarinfo.size = len(name) |
|
1134 |
||
596
by Kenneth Loafman
418170 [PATCH] file names longer then 512 symbols are not supported |
1135 |
residual = (tarinfo.size % BLOCKSIZE) |
1
by bescoto
Initial checkin |
1136 |
return "%s%s%s" % (tarinfo.getheader(), name, |
596
by Kenneth Loafman
418170 [PATCH] file names longer then 512 symbols are not supported |
1137 |
"\0" * ((BLOCKSIZE - residual) * (residual > 0))) |
1
by bescoto
Initial checkin |
1138 |
|
1139 |
def _proc_sparse(self, tarinfo): |
|
1140 |
"""Analyze a GNU sparse header plus extra headers.
|
|
1141 |
"""
|
|
1142 |
buf = tarinfo.getheader() |
|
1143 |
sp = _ringbuffer() |
|
1144 |
pos = 386 |
|
1145 |
lastpos = 0l |
|
1146 |
realpos = 0l |
|
1147 |
try: |
|
1148 |
# There are 4 possible sparse structs in the
|
|
1149 |
# first header.
|
|
1150 |
for i in range(4): |
|
1151 |
offset = int(buf[pos:pos + 12], 8) |
|
1152 |
numbytes = int(buf[pos + 12:pos + 24], 8) |
|
1153 |
if offset > lastpos: |
|
1154 |
sp.append(_hole(lastpos, offset - lastpos)) |
|
1155 |
sp.append(_data(offset, numbytes, realpos)) |
|
1156 |
realpos += numbytes |
|
1157 |
lastpos = offset + numbytes |
|
1158 |
pos += 24 |
|
1159 |
||
1160 |
isextended = ord(buf[482]) |
|
1161 |
origsize = int(buf[483:495], 8) |
|
1162 |
||
1163 |
# If the isextended flag is given,
|
|
1164 |
# there are extra headers to process.
|
|
1165 |
while isextended == 1: |
|
1166 |
buf = self.fileobj.read(BLOCKSIZE) |
|
1167 |
self.offset += BLOCKSIZE |
|
1168 |
pos = 0 |
|
1169 |
for i in range(21): |
|
1170 |
offset = int(buf[pos:pos + 12], 8) |
|
1171 |
numbytes = int(buf[pos + 12:pos + 24], 8) |
|
1172 |
if offset > lastpos: |
|
1173 |
sp.append(_hole(lastpos, offset - lastpos)) |
|
1174 |
sp.append(_data(offset, numbytes, realpos)) |
|
1175 |
realpos += numbytes |
|
1176 |
lastpos = offset + numbytes |
|
1177 |
pos += 24 |
|
1178 |
isextended = ord(buf[504]) |
|
1179 |
except ValueError: |
|
1180 |
pass
|
|
1181 |
if lastpos < origsize: |
|
1182 |
sp.append(_hole(lastpos, origsize - lastpos)) |
|
1183 |
||
1184 |
tarinfo.sparse = sp |
|
1185 |
return origsize |
|
1186 |
||
1187 |
def _dbg(self, level, msg): |
|
1188 |
if level <= self.debug: |
|
1189 |
sys.stdout.write(msg) |
|
1190 |
# class TarFile
|
|
1191 |
||
1192 |
class TarIter: |
|
1193 |
"""Iterator Class.
|
|
1194 |
||
1195 |
for tarinfo in TarFile(...):
|
|
1196 |
suite...
|
|
1197 |
"""
|
|
1198 |
||
1199 |
def __init__(self, tarfile): |
|
1200 |
"""Construct a TarIter instance.
|
|
1201 |
"""
|
|
1202 |
self.tarfile = tarfile |
|
1203 |
def __iter__(self): |
|
1204 |
"""Return iterator object.
|
|
1205 |
"""
|
|
1206 |
return self |
|
1207 |
def next(self): |
|
1208 |
"""Return the next item using TarFile's next() method.
|
|
1209 |
When all members have been read, set TarFile as _loaded.
|
|
1210 |
"""
|
|
1211 |
tarinfo = self.tarfile.next() |
|
1212 |
if not tarinfo: |
|
1213 |
self.tarfile._loaded = 1 |
|
1214 |
raise StopIteration |
|
1215 |
return tarinfo |
|
1216 |
# class TarIter
|
|
1217 |
||
1218 |
# Helper classes for sparse file support
|
|
1219 |
class _section: |
|
1220 |
"""Base class for _data and _hole.
|
|
1221 |
"""
|
|
1222 |
def __init__(self, offset, size): |
|
1223 |
self.offset = offset |
|
1224 |
self.size = size |
|
1225 |
def __contains__(self, offset): |
|
1226 |
return self.offset <= offset < self.offset + self.size |
|
1227 |
||
1228 |
class _data(_section): |
|
1229 |
"""Represent a data section in a sparse file.
|
|
1230 |
"""
|
|
1231 |
def __init__(self, offset, size, realpos): |
|
1232 |
_section.__init__(self, offset, size) |
|
1233 |
self.realpos = realpos |
|
1234 |
||
1235 |
class _hole(_section): |
|
1236 |
"""Represent a hole section in a sparse file.
|
|
1237 |
"""
|
|
1238 |
pass
|
|
1239 |
||
1240 |
class _ringbuffer(list): |
|
1241 |
"""Ringbuffer class which increases performance
|
|
1242 |
over a regular list.
|
|
1243 |
"""
|
|
1244 |
def __init__(self): |
|
1245 |
self.idx = 0 |
|
1246 |
def find(self, offset): |
|
1247 |
idx = self.idx |
|
1248 |
while 1: |
|
1249 |
item = self[idx] |
|
1250 |
if offset in item: |
|
1251 |
break
|
|
1252 |
idx += 1 |
|
1253 |
if idx == len(self): |
|
1254 |
idx = 0 |
|
1255 |
if idx == self.idx: |
|
1256 |
# End of File
|
|
1257 |
return None |
|
1258 |
self.idx = idx |
|
1259 |
return item |
|
1260 |
||
1261 |
class _FileObject: |
|
1262 |
"""File-like object for reading an archive member,
|
|
1263 |
is returned by TarFile.extractfile().
|
|
1264 |
Support for sparse files included.
|
|
1265 |
"""
|
|
1266 |
||
1267 |
def __init__(self, tarfile, tarinfo): |
|
1268 |
self.tarfile = tarfile |
|
1269 |
self.fileobj = tarfile.fileobj |
|
1270 |
self.name = tarinfo.name |
|
1271 |
self.mode = "r" |
|
1272 |
self.closed = 0 |
|
1273 |
self.offset = tarinfo.offset_data |
|
1274 |
self.size = tarinfo.size |
|
1275 |
self.pos = 0l |
|
1276 |
self.linebuffer = "" |
|
1277 |
if tarinfo.issparse(): |
|
1278 |
self.sparse = tarinfo.sparse |
|
1279 |
self.read = self._readsparse |
|
1280 |
else: |
|
1281 |
self.read = self._readnormal |
|
1282 |
||
1283 |
def readline(self, size=-1): |
|
1284 |
"""Read a line with approx. size.
|
|
1285 |
If size is negative, read a whole line.
|
|
1286 |
readline() and read() must not be mixed up (!).
|
|
1287 |
"""
|
|
1288 |
if size < 0: |
|
1289 |
size = sys.maxint |
|
1290 |
||
1291 |
nl = self.linebuffer.find("\n") |
|
1292 |
if nl >= 0: |
|
1293 |
nl = min(nl, size) |
|
1294 |
else: |
|
1295 |
size -= len(self.linebuffer) |
|
1296 |
while nl < 0: |
|
1297 |
buf = self.read(min(size, 100)) |
|
1298 |
if not buf: |
|
1299 |
break
|
|
1300 |
self.linebuffer += buf |
|
1301 |
size -= len(buf) |
|
1302 |
if size <= 0: |
|
1303 |
break
|
|
1304 |
nl = self.linebuffer.find("\n") |
|
1305 |
if nl == -1: |
|
1306 |
s = self.linebuffer |
|
1307 |
self.linebuffer = "" |
|
1308 |
return s |
|
1309 |
buf = self.linebuffer[:nl] |
|
1310 |
self.linebuffer = self.linebuffer[nl + 1:] |
|
1311 |
while buf[-1:] == "\r": |
|
1312 |
buf = buf[:-1] |
|
1313 |
return buf + "\n" |
|
1314 |
||
1315 |
def readlines(self): |
|
1316 |
"""Return a list with all (following) lines.
|
|
1317 |
"""
|
|
1318 |
result = [] |
|
1319 |
while 1: |
|
1320 |
line = self.readline() |
|
1321 |
if not line: break |
|
1322 |
result.append(line) |
|
1323 |
return result |
|
1324 |
||
1325 |
def _readnormal(self, size=None): |
|
1326 |
"""Read operation for regular files.
|
|
1327 |
"""
|
|
1328 |
if self.closed: |
|
1329 |
raise ValueError, "I/O operation on closed file" |
|
1330 |
#self.fileobj.seek(self.offset + self.pos)
|
|
1331 |
bytesleft = self.size - self.pos |
|
1332 |
if size is None: |
|
1333 |
bytestoread = bytesleft |
|
1334 |
else: |
|
1335 |
bytestoread = min(size, bytesleft) |
|
1336 |
self.pos += bytestoread |
|
1337 |
self.tarfile.offset += bytestoread |
|
1338 |
return self.fileobj.read(bytestoread) |
|
1339 |
||
1340 |
def _readsparse(self, size=None): |
|
1341 |
"""Read operation for sparse files.
|
|
1342 |
"""
|
|
1343 |
if self.closed: |
|
1344 |
raise ValueError, "I/O operation on closed file" |
|
1345 |
||
1346 |
if size is None: |
|
1347 |
size = self.size - self.pos |
|
1348 |
||
1349 |
data = "" |
|
1350 |
while size > 0: |
|
1351 |
buf = self._readsparsesection(size) |
|
1352 |
if not buf: |
|
1353 |
break
|
|
1354 |
size -= len(buf) |
|
1355 |
data += buf |
|
1356 |
return data |
|
1357 |
||
1358 |
def _readsparsesection(self, size): |
|
1359 |
"""Read a single section of a sparse file.
|
|
1360 |
"""
|
|
1361 |
section = self.sparse.find(self.pos) |
|
1362 |
||
1363 |
if section is None: |
|
1364 |
return "" |
|
1365 |
||
1366 |
toread = min(size, section.offset + section.size - self.pos) |
|
1367 |
if isinstance(section, _data): |
|
1368 |
realpos = section.realpos + self.pos - section.offset |
|
1369 |
self.pos += toread |
|
1370 |
self.fileobj.seek(self.offset + realpos) |
|
1371 |
return self.fileobj.read(toread) |
|
1372 |
else: |
|
1373 |
self.pos += toread |
|
1374 |
return "\0" * toread |
|
1375 |
||
1376 |
def tell(self): |
|
1377 |
"""Return the current file position.
|
|
1378 |
"""
|
|
1379 |
return self.pos |
|
1380 |
||
1381 |
def seek(self, pos, whence=0): |
|
1382 |
"""Seek to a position in the file.
|
|
1383 |
"""
|
|
1384 |
self.linebuffer = "" |
|
1385 |
if whence == 0: |
|
1386 |
self.pos = min(max(pos, 0), self.size) |
|
1387 |
if whence == 1: |
|
1388 |
if pos < 0: |
|
1389 |
self.pos = max(self.pos + pos, 0) |
|
1390 |
else: |
|
1391 |
self.pos = min(self.pos + pos, self.size) |
|
1392 |
if whence == 2: |
|
1393 |
self.pos = max(min(self.size + pos, self.size), 0) |
|
1394 |
||
1395 |
def close(self): |
|
1396 |
"""Close the file object.
|
|
1397 |
"""
|
|
1398 |
self.closed = 1 |
|
1399 |
#class _FileObject
|
|
1400 |
||
1401 |
#---------------------------------------------
|
|
1402 |
# zipfile compatible TarFile class
|
|
1403 |
#
|
|
1404 |
# for details consult zipfile's documentation
|
|
1405 |
#---------------------------------------------
|
|
1406 |
import cStringIO |
|
1407 |
||
1408 |
TAR_PLAIN = 0 # zipfile.ZIP_STORED |
|
1409 |
TAR_GZIPPED = 8 # zipfile.ZIP_DEFLATED |
|
1410 |
class TarFileCompat: |
|
1411 |
"""TarFile class compatible with standard module zipfile's
|
|
1412 |
ZipFile class.
|
|
1413 |
"""
|
|
1414 |
def __init__(self, file, mode="r", compression=TAR_PLAIN): |
|
1415 |
if compression == TAR_PLAIN: |
|
1416 |
self.tarfile = open(file, mode) |
|
1417 |
elif compression == TAR_GZIPPED: |
|
1418 |
self.tarfile = gzopen(file, mode) |
|
1419 |
else: |
|
1420 |
raise ValueError, "unknown compression constant" |
|
1421 |
if mode[0:1] == "r": |
|
1422 |
import time |
|
1423 |
members = self.tarfile.getmembers() |
|
1424 |
for i in range(len(members)): |
|
1425 |
m = members[i] |
|
1426 |
m.filename = m.name |
|
1427 |
m.file_size = m.size |
|
1428 |
m.date_time = time.gmtime(m.mtime)[:6] |
|
1429 |
def namelist(self): |
|
1430 |
return map(lambda m: m.name, self.infolist()) |
|
1431 |
def infolist(self): |
|
1432 |
return filter(lambda m: m.type in REGULAR_TYPES, |
|
1433 |
self.tarfile.getmembers()) |
|
1434 |
def printdir(self): |
|
1435 |
self.tarfile.list() |
|
1436 |
def testzip(self): |
|
1437 |
return
|
|
1438 |
def getinfo(self, name): |
|
1439 |
return self.tarfile.getmember(name) |
|
1440 |
def read(self, name): |
|
1441 |
return self.tarfile.extractfile(self.tarfile.getmember(name)).read() |
|
1442 |
def write(self, filename, arcname=None, compress_type=None): |
|
1443 |
self.tarfile.add(filename, arcname) |
|
1444 |
def writestr(self, zinfo, bytes): |
|
1445 |
import calendar |
|
1446 |
zinfo.name = zinfo.filename |
|
1447 |
zinfo.size = zinfo.file_size |
|
1448 |
zinfo.mtime = calendar.timegm(zinfo.date_time) |
|
1449 |
self.tarfile.addfile(zinfo, cStringIO.StringIO(bytes)) |
|
1450 |
def close(self): |
|
1451 |
self.tarfile.close() |
|
1452 |
#class TarFileCompat
|
|
1453 |
||
1454 |
if __name__ == "__main__": |
|
1455 |
# a "light-weight" implementation of GNUtar ;-)
|
|
1456 |
usage = """ |
|
1457 |
Usage: %s [options] [files] |
|
1458 |
||
1459 |
-h display this help message
|
|
1460 |
-c create a tarfile
|
|
1461 |
-r append to an existing archive
|
|
1462 |
-x extract archive
|
|
1463 |
-t list archive contents
|
|
1464 |
-f FILENAME
|
|
1465 |
use archive FILENAME, else STDOUT (-c)
|
|
1466 |
-z filter archive through gzip
|
|
1467 |
-C DIRNAME
|
|
1468 |
with opt -x: extract to directory DIRNAME
|
|
1469 |
with opt -c, -r: put files to archive under DIRNAME
|
|
1470 |
-v verbose output
|
|
1471 |
-q quiet
|
|
1472 |
||
1473 |
wildcards *, ?, [seq], [!seq] are accepted.
|
|
1474 |
""" % sys.argv[0] |
|
1475 |
||
1476 |
import getopt, glob |
|
1477 |
try: |
|
1478 |
opts, args = getopt.getopt(sys.argv[1:], "htcrzxf:C:qv") |
|
1479 |
except getopt.GetoptError, e: |
|
1480 |
print
|
|
1481 |
print "ERROR:", e |
|
1482 |
print usage |
|
1483 |
sys.exit(0) |
|
1484 |
||
1485 |
file = None |
|
1486 |
mode = None |
|
1487 |
dir = None |
|
1488 |
comp = 0 |
|
1489 |
debug = 0 |
|
1490 |
for o, a in opts: |
|
1491 |
if o == "-t": mode = "l" # list archive |
|
1492 |
if o == "-c": mode = "w" # write to archive |
|
1493 |
if o == "-r": mode = "a" # append to archive |
|
1494 |
if o == "-x": mode = "r" # extract from archive |
|
1495 |
if o == "-f": file = a # specify filename else use stdout |
|
1496 |
if o == "-C": dir = a # change to dir |
|
1497 |
if o == "-z": comp = 1 # filter through gzip |
|
1498 |
if o == "-v": debug = 2 # verbose mode |
|
1499 |
if o == "-q": debug = 0 # quiet mode |
|
1500 |
if o == "-h": # help message |
|
1501 |
print usage |
|
1502 |
sys.exit(0) |
|
1503 |
||
1504 |
if not mode: |
|
1505 |
print usage |
|
1506 |
sys.exit(0) |
|
1507 |
||
1508 |
if comp: |
|
1509 |
func = gzopen |
|
1510 |
else: |
|
1511 |
func = open |
|
1512 |
||
1513 |
if not file or file == "-": |
|
1514 |
if mode != "w": |
|
1515 |
print usage |
|
1516 |
sys.exit(0) |
|
1517 |
debug = 0 |
|
1518 |
# If under Win32, set stdout to binary.
|
|
1519 |
try: |
|
1520 |
import msvcrt |
|
1521 |
msvcrt.setmode(1, os.O_BINARY) |
|
1522 |
except ImportError: |
|
1523 |
pass
|
|
1524 |
tarfile = func("sys.stdout.tar", mode, 9, sys.stdout) |
|
1525 |
else: |
|
1526 |
if mode == "l": |
|
1527 |
tarfile = func(file, "r") |
|
1528 |
else: |
|
1529 |
tarfile = func(file, mode) |
|
1530 |
||
1531 |
tarfile.debug = debug |
|
1532 |
||
1533 |
if mode == "r": |
|
1534 |
if dir is None: |
|
1535 |
dir = "" |
|
1536 |
for tarinfo in tarfile: |
|
1537 |
tarfile.extract(tarinfo, dir) |
|
1538 |
elif mode == "l": |
|
1539 |
tarfile.list(debug) |
|
1540 |
else: |
|
1541 |
for arg in args: |
|
1542 |
files = glob.glob(arg) |
|
1543 |
for f in files: |
|
1544 |
tarfile.add(f, dir) |
|
1545 |
tarfile.close() |
|
1546 |
||
1547 |
||
1548 |
class TarFromIterator(TarFile): |
|
1549 |
"""Readable tarfile-like object generated from iterator
|
|
1550 |
"""
|
|
1551 |
# These various status numbers indicate what we are in the process
|
|
1552 |
# of doing in the tarfile.
|
|
1553 |
BEGIN = 0 # next step is to read tarinfo, write new header |
|
1554 |
MIDDLE_OF_FILE = 1 # in process of writing file data |
|
1555 |
END = 2 # end of data |
|
1556 |
||
1557 |
# Buffer is added to in multiples of following
|
|
1558 |
BUFFER_ADDLEN = 64 * 1024 |
|
1559 |
||
1560 |
def __init__(self, pair_iter): |
|
1561 |
"""Construct a TarFromIterator instance. pair_iter is an
|
|
1562 |
iterator of (TarInfo, fileobj) objects, which fileobj should
|
|
1563 |
be a file-like object opened for reading, or None. The
|
|
1564 |
fileobjs will be closed before the next element in the
|
|
1565 |
iterator is read.
|
|
1566 |
"""
|
|
1567 |
self.closed = None |
|
1568 |
self.name = None |
|
1569 |
self.mode = "rb" |
|
1570 |
self.pair_iter = pair_iter |
|
1571 |
||
1572 |
self.init_datastructures() |
|
1573 |
self.status = self.BEGIN |
|
1574 |
self.cur_tarinfo, self.cur_fileobj = None, None |
|
1575 |
self.cur_pos_in_fileobj = 0 |
|
1576 |
self.buffer = "" |
|
1577 |
# holds current position as seen by reading client. This is
|
|
1578 |
# distinct from self.offset.
|
|
1579 |
self.tar_iter_offset = 0 |
|
1580 |
||
1581 |
def seek(self, offset): |
|
1582 |
"""Seek to current position. Just read and discard some amount"""
|
|
1583 |
if offset < self.tar_iter_offset: |
|
1584 |
raise TarError("Seeks in TarFromIterator must go forwards,\n" |
|
1585 |
"Instead asking for %s from %s" % |
|
1586 |
(offset, self.tar_iter_offset)) |
|
1587 |
while offset - self.tar_iter_offset >= self.BUFFER_ADDLEN: |
|
1588 |
buf = self.read(self.BUFFER_ADDLEN) |
|
1589 |
if not buf: return # eof |
|
1590 |
self.read(offset - self.tar_iter_offset) |
|
1591 |
||
1592 |
def read(self, length = -1): |
|
1593 |
"""Return next length bytes, or everything if length < 0"""
|
|
1594 |
if length < 0: |
|
1595 |
while 1: |
|
1596 |
if not self._addtobuffer(): break |
|
1597 |
result = self.buffer |
|
1598 |
self.buffer = "" |
|
1599 |
else: |
|
1600 |
while len(self.buffer) < length: |
|
1601 |
if not self._addtobuffer(): break |
|
1602 |
# It's possible that length > len(self.buffer)
|
|
1603 |
result = self.buffer[:length] |
|
1604 |
self.buffer = self.buffer[length:] |
|
1605 |
self.tar_iter_offset += len(result) |
|
1606 |
return result |
|
1607 |
||
1608 |
def _addtobuffer(self): |
|
1609 |
"""Write more data into the buffer. Return None if at end"""
|
|
1610 |
if self.status == self.BEGIN: |
|
1611 |
# Just write headers into buffer
|
|
1612 |
try: self.cur_tarinfo, self.cur_fileobj = self.pair_iter.next() |
|
1613 |
except StopIteration: |
|
1614 |
self._add_final() |
|
1615 |
self.status = self.END |
|
1616 |
return None |
|
1617 |
||
1618 |
# Zero out tarinfo sizes for various file types
|
|
1619 |
if self.cur_tarinfo.type in (LNKTYPE, SYMTYPE, |
|
1620 |
FIFOTYPE, CHRTYPE, BLKTYPE): |
|
1621 |
self.cur_tarinfo.size = 0l |
|
1622 |
||
1623 |
full_headers = self._get_full_headers(self.cur_tarinfo) |
|
1624 |
self.buffer += full_headers |
|
1625 |
self.offset += len(full_headers) |
|
1626 |
assert len(full_headers) % BLOCKSIZE == 0 |
|
1627 |
||
1628 |
if self.cur_fileobj is None: # no data with header |
|
1629 |
self.status = self.BEGIN |
|
1630 |
self._finish_fileobj() |
|
1631 |
else: |
|
1632 |
self.status = self.MIDDLE_OF_FILE |
|
1633 |
self.cur_pos_in_fileobj = 0 |
|
1634 |
return 1 |
|
1635 |
elif self.status == self.MIDDLE_OF_FILE: |
|
1636 |
# Add next chunk of self.cur_fileobj to self.buffer
|
|
1637 |
l = min(self.BUFFER_ADDLEN, |
|
1638 |
self.cur_tarinfo.size - self.cur_pos_in_fileobj) |
|
1639 |
s = self.cur_fileobj.read(l) |
|
1640 |
self.cur_pos_in_fileobj += len(s) |
|
1641 |
if len(s) == 0: |
|
1642 |
if l != 0: raise IOError, "end of file reached" |
|
1643 |
blocks, remainder = divmod(self.cur_tarinfo.size, BLOCKSIZE) |
|
1644 |
if remainder > 0: |
|
1645 |
self.buffer += "\0" * (BLOCKSIZE - remainder) |
|
1646 |
blocks += 1 |
|
1647 |
self.cur_fileobj.close() |
|
1648 |
self.offset += blocks * BLOCKSIZE |
|
1649 |
self._finish_fileobj() |
|
1650 |
self.status = self.BEGIN |
|
1651 |
else: self.buffer += s |
|
1652 |
return 1 |
|
1653 |
elif self.status == self.END: return None |
|
1654 |
assert 0 |
|
1655 |
||
1656 |
def _finish_fileobj(self): |
|
1657 |
"""Update some variables when done writing fileobj"""
|
|
1658 |
return # Skip saving tarinfo information to save memory |
|
1659 |
self.members.append(self.cur_tarinfo) |
|
1660 |
self.membernames.append(self.cur_tarinfo.name) |
|
1661 |
self.chunks.append(self.offset) |
|
1662 |
||
1663 |
def _add_final(self): |
|
1664 |
"""Add closing footer to buffer"""
|
|
1665 |
blocks, remainder = divmod(self.offset, RECORDSIZE) |
|
1666 |
if remainder > 0: self.buffer += "\0" * (RECORDSIZE - remainder) |
|
1667 |
||
1668 |
def close(self): |
|
1669 |
"""Close file obj"""
|
|
1670 |
assert not self.closed |
|
1671 |
self.closed = 1 |
|
54
by bescoto
Cache pwd and group files |
1672 |
|
1673 |
||
1674 |
def uid2uname(uid): |
|
1675 |
"""Return uname of uid, or raise KeyError if none"""
|
|
1676 |
if uid_dict is None: set_pwd_dict() |
|
1677 |
return uid_dict[uid] |
|
1678 |
||
1679 |
def uname2uid(uname): |
|
1680 |
"""Return uid of given uname, or raise KeyError if none"""
|
|
1681 |
if uname_dict is None: set_pwd_dict() |
|
1682 |
return uname_dict[uname] |
|
1683 |
||
1684 |
def set_pwd_dict(): |
|
1685 |
"""Set global pwd caching dictionaries uid_dict and uname_dict"""
|
|
1686 |
global uid_dict, uname_dict |
|
1687 |
assert uid_dict is None and uname_dict is None and pwd |
|
1688 |
uid_dict = {}; uname_dict = {} |
|
1689 |
for entry in pwd.getpwall(): |
|
1690 |
uname = entry[0]; uid = entry[2] |
|
1691 |
uid_dict[uid] = uname |
|
1692 |
uname_dict[uname] = uid |
|
1693 |
||
1694 |
def gid2gname(gid): |
|
1695 |
"""Return group name of gid, or raise KeyError if none"""
|
|
1696 |
if gid_dict is None: set_grp_dict() |
|
1697 |
return gid_dict[gid] |
|
1698 |
||
1699 |
def gname2gid(gname): |
|
1700 |
"""Return gid of given group name, or raise KeyError if none"""
|
|
1701 |
if gname_dict is None: set_grp_dict() |
|
1702 |
return gname_dict[gname] |
|
1703 |
||
1704 |
def set_grp_dict(): |
|
1705 |
global gid_dict, gname_dict |
|
1706 |
assert gid_dict is None and gname_dict is None and grp |
|
1707 |
gid_dict = {}; gname_dict = {} |
|
1708 |
for entry in grp.getgrall(): |
|
1709 |
gname = entry[0]; gid = entry[2] |
|
1710 |
gid_dict[gid] = gname |
|
1711 |
gname_dict[gname] = gid |