2
#-------------------------------------------------------------------
5
# Module for reading and writing .tar and tar.gz files.
7
# Needs at least Python version 2.2.
9
# Please consult the html documentation in this distribution
10
# for further details on how to use tarfile.
12
#-------------------------------------------------------------------
13
# Copyright (C) 2002 Lars Gust�bel <lars@gustaebel.de>
14
# All rights reserved.
16
# Permission is hereby granted, free of charge, to any person
17
# obtaining a copy of this software and associated documentation
18
# files (the "Software"), to deal in the Software without
19
# restriction, including without limitation the rights to use,
20
# copy, modify, merge, publish, distribute, sublicense, and/or sell
21
# copies of the Software, and to permit persons to whom the
22
# Software is furnished to do so, subject to the following
25
# The above copyright notice and this permission notice shall be
26
# included in all copies or substantial portions of the Software.
28
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
29
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
30
# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
31
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
32
# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
33
# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
34
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
35
# OTHER DEALINGS IN THE SOFTWARE.
37
"""Read from and write to tar format archives.
40
__version__ = "$Revision: 1.1 $"
41
# $Source: /sources/duplicity/duplicity/duplicity/tarfile.py,v $
44
__author__ = "Lars Gust�bel (lars@gustaebel.de)"
45
__date__ = "$Date: 2002/10/29 01:51:36 $"
46
__cvsid__ = "$Id: tarfile.py,v 1.1 2002/10/29 01:51:36 bescoto Exp $"
47
__credits__ = "Gustavo Niemeyer for his support, " \
48
"Detlef Lannert for some early contributions"
66
# We won't need this anymore in Python 2.3
68
# We import the _tarfile extension, that contains
69
# some useful functions to handle devices and symlinks.
70
# We inject them into os module, as if we were under 2.3.
74
if _tarfile.mknod is None:
78
if _tarfile and not hasattr(os, "mknod"):
79
os.mknod = _tarfile.mknod
80
if _tarfile and not hasattr(os, "major"):
81
os.major = _tarfile.major
82
if _tarfile and not hasattr(os, "minor"):
83
os.minor = _tarfile.minor
84
if _tarfile and not hasattr(os, "makedev"):
85
os.makedev = _tarfile.makedev
86
if _tarfile and not hasattr(os, "lchown"):
87
os.lchown = _tarfile.lchown
89
# XXX remove for release (2.3)
90
if sys.version_info[:2] < (2,3):
94
#---------------------------------------------------------
96
#---------------------------------------------------------
97
BLOCKSIZE = 512 # length of processing blocks
98
RECORDSIZE = BLOCKSIZE * 20 # length of records
99
MAGIC = "ustar" # magic tar string
100
VERSION = "00" # version number
102
LENGTH_NAME = 100 # maximal length of a filename
103
LENGTH_LINK = 100 # maximal length of a linkname
105
REGTYPE = "0" # regular file
106
AREGTYPE = "\0" # regular file
107
LNKTYPE = "1" # link (inside tarfile)
108
SYMTYPE = "2" # symbolic link
109
CHRTYPE = "3" # character special device
110
BLKTYPE = "4" # block special device
111
DIRTYPE = "5" # directory
112
FIFOTYPE = "6" # fifo special device
113
CONTTYPE = "7" # contiguous file
115
GNUTYPE_LONGNAME = "L" # GNU tar extension for longnames
116
GNUTYPE_LONGLINK = "K" # GNU tar extension for longlink
117
GNUTYPE_SPARSE = "S" # GNU tar extension for sparse file
119
#---------------------------------------------------------
121
#---------------------------------------------------------
122
SUPPORTED_TYPES = (REGTYPE, AREGTYPE, LNKTYPE, # file types that tarfile
123
SYMTYPE, DIRTYPE, FIFOTYPE, # can cope with.
124
CONTTYPE, GNUTYPE_LONGNAME,
125
GNUTYPE_LONGLINK, GNUTYPE_SPARSE,
128
REGULAR_TYPES = (REGTYPE, AREGTYPE, # file types that somehow
129
CONTTYPE, GNUTYPE_SPARSE) # represent regular files
131
#---------------------------------------------------------
132
# Bits used in the mode field, values in octal.
133
#---------------------------------------------------------
134
S_IFLNK = 0120000 # symbolic link
135
S_IFREG = 0100000 # regular file
136
S_IFBLK = 0060000 # block device
137
S_IFDIR = 0040000 # directory
138
S_IFCHR = 0020000 # character device
139
S_IFIFO = 0010000 # fifo
141
TSUID = 04000 # set UID on execution
142
TSGID = 02000 # set GID on execution
143
TSVTX = 01000 # reserved
145
TUREAD = 00400 # read by owner
146
TUWRITE = 00200 # write by owner
147
TUEXEC = 00100 # execute/search by owner
148
TGREAD = 00040 # read by group
149
TGWRITE = 00020 # write by group
150
TGEXEC = 00010 # execute/search by group
151
TOREAD = 00004 # read by other
152
TOWRITE = 00002 # write by other
153
TOEXEC = 00001 # execute/search by other
155
#---------------------------------------------------------
156
# Some useful functions
157
#---------------------------------------------------------
159
"""Convert a null-terminated string buffer to a python string.
161
return s.split("\0", 1)[0]
163
def calc_chksum(buf):
164
"""Calculate the checksum for a member's header. It's a simple addition
165
of all bytes, treating the chksum field as if filled with spaces.
166
buf is a 512 byte long string buffer which holds the header.
168
chk = 256 # chksum field is treated as blanks,
169
# so the initial value is 8 * ord(" ")
170
for c in buf[:148]: chk += ord(c) # sum up all bytes before chksum
171
for c in buf[156:]: chk += ord(c) # sum up all bytes after chksum
174
def copyfileobj(src, dst, length=None):
175
"""Copy length bytes from fileobj src to fileobj dst.
176
If length is None, copy the entire content.
181
shutil.copyfileobj(src, dst)
185
blocks, remainder = divmod(length, BUFSIZE)
186
for b in range(blocks):
187
buf = src.read(BUFSIZE)
188
if len(buf) < BUFSIZE:
189
raise IOError, "end of file reached"
193
buf = src.read(remainder)
194
if len(buf) < remainder:
195
raise IOError, "end of file reached"
208
(TUEXEC, "x", TSUID, "S", TUEXEC|TSUID, "s"),
211
(TGEXEC, "x", TSGID, "S", TGEXEC|TSGID, "s"),
214
(TOEXEC, "x", TSVTX, "T", TOEXEC|TSVTX, "t"))
217
"""Convert a file's mode to a string of the form
219
Used by TarFile.list()
222
for t in filemode_table:
224
if mode & t[0] == t[0]:
235
normpath = lambda path: os.path.normpath(path).replace(os.sep, "/")
237
normpath = os.path.normpath
239
class TarError(Exception):
240
"""Internally used exception"""
243
#--------------------
245
#--------------------
246
def open(name, mode="r", fileobj=None):
247
"""Open (uncompressed) tar archive name for reading, writing
250
return TarFile(name, mode, fileobj)
252
def gzopen(gzname, gzmode="r", compresslevel=9, fileobj=None):
253
"""Open gzip compressed tar archive name for reading or writing.
254
Appending is not allowed.
257
raise ValueError, "Appending to gzipped archive is not allowed"
259
pre, ext = os.path.splitext(gzname)
260
pre = os.path.basename(pre)
267
if "b" not in gzmode:
271
fileobj = __builtin__.file(gzname, gzmode)
272
t = TarFile(tarname, mode, gzip.GzipFile(tarname, gzmode,
273
compresslevel, fileobj))
275
t = TarFile(tarname, mode, gzip.open(gzname, gzmode, compresslevel))
279
def is_tarfile(name):
280
"""Return True if name points to a tar archive that we
281
are able to handle, else return False.
284
buftoinfo = TarFile.__dict__["_buftoinfo"]
286
buf = __builtin__.open(name, "rb").read(BLOCKSIZE)
289
except (ValueError, ImportError):
293
buf = gzip.open(name, "rb").read(BLOCKSIZE)
296
except (IOError, ValueError, ImportError):
304
"""Informational class which holds the details about an
305
archive member given by a tar header block.
306
TarInfo instances are returned by TarFile.getmember() and
307
TarFile.getmembers() and are usually created internally.
308
If you want to create a TarInfo instance from the outside,
309
you should use TarFile.gettarinfo() if the file already exists,
310
or you can instanciate the class yourself.
313
def __init__(self, name=""):
314
"""Construct a TarInfo instance. name is the optional name
318
self.name = name # member name (dirnames must end with '/')
319
self.mode = 0100666 # file permissions
320
self.uid = 0 # user id
321
self.gid = 0 # group id
322
self.size = 0 # file size
323
self.mtime = 0 # modification time
324
self.chksum = 0 # header checksum
325
self.type = REGTYPE # member type
326
self.linkname = "" # link name
327
self.uname = "user" # user name
328
self.gname = "group" # group name
330
self.devminor = 0 #-for use with CHRTYPE and BLKTYPE
331
self.prefix = "" # prefix, holding information
334
self.offset = 0 # the tar header starts here
335
self.offset_data = 0 # the optional file's data starts here
337
def init_from_stat(self, statres):
338
"""Initialize various attributes from statobj (these are
339
returned by os.stat() and related functions. Return none on error"""
340
stmd = statres.st_mode
341
if stat.S_ISREG(stmd): type = REGTYPE
342
elif stat.S_ISDIR(stmd):
344
if self.name[-1:] != "/": self.name += "/"
345
elif stat.S_ISFIFO(stmd): type = FIFOTYPE
346
elif stat.S_ISLNK(stmd): type = SYMTYPE
347
elif stat.S_ISCHR(stmd): type = CHRTYPE
348
elif stat.S_ISBLK(stmd): type = BLKTYPE
351
# Fill the TarInfo instance with all
352
# information we can get.
353
self.mode = stat.S_IMODE(stmd)
354
self.uid = statres.st_uid
355
self.gid = statres.st_gid
356
self.size = statres.st_size
357
self.mtime = statres.st_mtime
361
self.uname = pwd.getpwuid(self.uid)[0]
366
self.gname = grp.getgrgid(self.gid)[0]
370
if type in (CHRTYPE, BLKTYPE):
371
if hasattr(os, "major") and hasattr(os, "minor"):
372
self.devmajor = os.major(statres.st_rdev)
373
self.devminor = os.minor(statres.st_rdev)
376
def set_arcname(self, name):
377
"""Set the name of the member in the archive. Backward
378
slashes are converted to forward slashes, Absolute paths are
379
turned to relative paths.
381
arcname = normpath(name)
382
drv, arcname = os.path.splitdrive(arcname)
383
while arcname[0:1] == "/":
384
arcname = arcname[1:]
388
"""Return a tar header block as a 512 byte string.
390
# The following code was contributed by Detlef Lannert.
392
for value, fieldsize in (
394
("%07o" % self.mode, 8),
395
("%07o" % self.uid, 8),
396
("%07o" % self.gid, 8),
397
("%011o" % self.size, 12),
398
("%011o" % self.mtime, 12),
401
(self.linkname, 100),
406
("%07o" % self.devmajor, 8),
407
("%07o" % self.devminor, 8),
411
parts.append(value + (fieldsize - l) * "\0")
414
chksum = calc_chksum(buf)
415
buf = buf[:148] + "%06o\0" % chksum + buf[155:]
416
buf += (512 - len(buf)) * "\0"
421
return self.type in REGULAR_TYPES
425
return self.type == DIRTYPE
427
return self.type == SYMTYPE
429
return self.type == LNKTYPE
431
return self.type == CHRTYPE
433
return self.type == BLKTYPE
435
return self.type == FIFOTYPE
437
return self.type == GNUTYPE_SPARSE
439
return self.type in (CHRTYPE, BLKTYPE, FIFOTYPE)
444
"""Class representing a TAR archive file on disk.
446
debug = 0 # May be set from 0 (no msgs) to 3 (all msgs)
448
dereference = False # If true, add content of linked file to the
449
# tar file, else the link.
451
ignore_zeros = False # If true, skips empty or invalid blocks and
452
# continues processing.
454
errorlevel = 0 # If 0, fatal errors only appear in debug
455
# messages (if debug >= 0). If > 0, errors
456
# are passed to the caller as exceptions.
458
def __init__(self, name=None, mode="r", fileobj=None):
461
if len(mode) > 1 or mode not in "raw":
462
raise ValueError, "mode must be either 'r', 'a' or 'w', " \
465
self.mode = {"r": "rb", "a": "r+b", "w": "wb"}[mode]
468
fileobj = __builtin__.file(self.name, self.mode)
471
if self.name is None and hasattr(fileobj, "name"):
472
self.name = fileobj.name
473
if hasattr(fileobj, "mode"):
474
self.mode = fileobj.mode
476
self.fileobj = fileobj
478
self.init_datastructures()
480
if self._mode == "a":
484
def init_datastructures(self):
485
# Init datastructures
486
#self.members = [] # list of members as TarInfo instances
487
#self.membernames = [] # names of members
488
#self.chunks = [0] # chunk cache
489
self._loaded = 0 # flag if all members have been read
490
self.offset = 0l # current position in the archive file
491
self.inodes = {} # dictionary caching the inodes of
492
# archive members already added
493
self.next_chunk = 0 # offset of next tarinfo, used when reading
496
"""Close the TarFile instance and do some cleanup.
499
if self._mode in "aw":
500
# fill up the end with zero-blocks
501
# (like option -b20 for tar does)
502
blocks, remainder = divmod(self.offset, RECORDSIZE)
504
self.fileobj.write("\0" * (RECORDSIZE - remainder))
506
if not self._extfileobj:
510
def throwaway_until(self, position):
511
"""Read data, throwing it away until we get to position"""
513
bytes_to_read = position - self.offset
514
assert bytes_to_read >= 0
515
while bytes_to_read >= bufsize:
516
self.fileobj.read(bufsize)
517
bytes_to_read -= bufsize
518
self.fileobj.read(bytes_to_read)
519
self.offset = position
522
"""Return the next member from the archive.
523
Return None if the end is reached.
524
Can be used in a while statement, is used
525
for Iteration (see __iter__()) and internally.
528
raise ValueError, "I/O operation on closed file"
529
if self._mode not in "ra":
530
raise ValueError, "reading from a write-mode file"
532
# Read the next block.
533
# self.fileobj.seek(self.chunks[-1])
534
#self.fileobj.seek(self.next_chunk)
535
#self.offset = self.next_chunk
536
self.throwaway_until(self.next_chunk)
538
buf = self.fileobj.read(BLOCKSIZE)
542
tarinfo = self._buftoinfo(buf)
544
if self.ignore_zeros:
545
if buf.count("\0") == BLOCKSIZE:
549
self._dbg(2, "0x%X: %s block\n" % (self.offset, adj))
550
self.offset += BLOCKSIZE
556
# If the TarInfo instance contains a GNUTYPE longname or longlink
557
# statement, we must process this first.
558
if tarinfo.type in (GNUTYPE_LONGLINK, GNUTYPE_LONGNAME):
559
tarinfo = self._proc_gnulong(tarinfo, tarinfo.type)
561
if tarinfo.issparse():
562
assert 0, "Sparse file support turned off"
563
# Sparse files need some care,
564
# due to the possible extra headers.
565
tarinfo.offset = self.offset
566
self.offset += BLOCKSIZE
567
origsize = self._proc_sparse(tarinfo)
568
tarinfo.offset_data = self.offset
569
blocks, remainder = divmod(tarinfo.size, BLOCKSIZE)
572
self.offset += blocks * BLOCKSIZE
573
tarinfo.size = origsize
575
tarinfo.offset = self.offset
576
self.offset += BLOCKSIZE
577
tarinfo.offset_data = self.offset
579
## Skip the following data blocks.
580
blocks, remainder = divmod(tarinfo.size, BLOCKSIZE)
583
self.next_chunk = self.offset + (blocks * BLOCKSIZE)
584
else: self.next_chunk = self.offset
586
#self.members.append(tarinfo) These use too much memory
587
#self.membernames.append(tarinfo.name)
588
#self.chunks.append(self.offset)
591
def getmember(self, name):
592
"""Return a TarInfo instance for member name.
594
if name not in self.membernames and not self._loaded:
596
if name not in self.membernames:
597
raise KeyError, "filename `%s' not found in tar archive" % name
598
return self._getmember(name)
600
def getinfo(self, name):
601
"""Return a TarInfo instance for member name.
602
This method will be deprecated in 0.6,
603
use getmember() instead.
605
# XXX kick this out in 0.6
607
warnings.warn("use getmember() instead", DeprecationWarning)
608
return self.getmember(name)
610
def getmembers(self):
611
"""Return a list of all members in the archive
612
(as TarInfo instances).
614
if not self._loaded: # if we want to obtain a list of
615
self._load() # all members, we first have to
616
# scan the whole archive.
620
"""Return a list of names of all members in the
625
return self.membernames
627
def gettarinfo(self, name, arcname=None):
628
"""Create a TarInfo instance from an existing file.
629
Optional arcname defines the name under which the file
630
shall be stored in the archive.
632
# Now, fill the TarInfo instance with
633
# information specific for the file.
636
if arcname is None: tarinfo.set_arcname(name)
637
else: tarinfo.set_arcname(arcname)
639
# Use os.stat or os.lstat, depending on platform
640
# and if symlinks shall be resolved.
641
if hasattr(os, "lstat") and not self.dereference:
642
statres = os.lstat(name)
644
statres = os.stat(name)
646
if not tarinfo.init_from_stat(statres): return None
648
if tarinfo.type == REGTYPE:
649
inode = (statres.st_ino, statres.st_dev, statres.st_mtime)
650
if inode in self.inodes.keys() and not self.dereference:
651
# Is it a hardlink to an already
653
tarinfo.type = LNKTYPE
654
tarinfo.linkname = self.inodes[inode]
656
# The inode is added only if its valid.
657
# For win32 it is always 0.
658
if inode[0]: self.inodes[inode] = tarinfo.name
659
elif tarinfo.type == SYMTYPE:
660
tarinfo.linkname = os.readlink(name)
665
def list(self, verbose=1):
666
"""Print a formatted listing of the archive's
671
print filemode(tarinfo.mode),
672
print tarinfo.uname + "/" + tarinfo.gname,
673
if tarinfo.ischr() or tarinfo.isblk():
674
print "%10s" % (str(tarinfo.devmajor) + "," + str(tarinfo.devminor)),
676
print "%10d" % tarinfo.size,
677
print "%d-%02d-%02d %02d:%02d:%02d" \
678
% time.gmtime(tarinfo.mtime)[:6],
684
print "->", tarinfo.linkname,
686
print "link to", tarinfo.linkname,
689
def add(self, name, arcname=None, recursive=1):
690
"""Add a file or a directory to the archive.
691
Directory addition is recursive by default.
694
raise ValueError, "I/O operation on closed file"
695
if self._mode == "r":
696
raise ValueError, "writing to a read-mode file"
701
# Skip if somebody tries to archive the archive...
702
if os.path.abspath(name) == os.path.abspath(self.name):
703
self._dbg(2, "tarfile: Skipped `%s'\n" % name)
706
# Special case: The user wants to add the current
712
for f in os.listdir("."):
713
self.add(f, os.path.join(arcname, f))
716
self._dbg(1, "%s\n" % name)
718
# Create a TarInfo instance from the file.
719
tarinfo = self.gettarinfo(name, arcname)
722
self._dbg(1, "tarfile: Unsupported type `%s'\n" % name)
725
# Append the tar header and data to the archive.
727
f = __builtin__.file(name, "rb")
728
self.addfile(tarinfo, fileobj = f)
731
if tarinfo.type in (LNKTYPE, SYMTYPE, FIFOTYPE, CHRTYPE, BLKTYPE):
733
self.addfile(tarinfo)
736
self.addfile(tarinfo)
738
for f in os.listdir(name):
739
self.add(os.path.join(name, f), os.path.join(arcname, f))
741
def addfile(self, tarinfo, fileobj=None):
742
"""Add the content of fileobj to the tarfile.
743
The amount of bytes to read is determined by
744
the size attribute in the tarinfo instance.
747
raise ValueError, "I/O operation on closed file"
748
if self._mode == "r":
749
raise ValueError, "writing to a read-mode file"
751
# XXX What was this good for again?
753
# self.fileobj.seek(self.chunks[-1])
757
full_headers = self._get_full_headers(tarinfo)
758
self.fileobj.write(full_headers)
759
assert len(full_headers) % BLOCKSIZE == 0
760
self.offset += len(full_headers)
762
# If there's data to follow, append it.
763
if fileobj is not None:
764
copyfileobj(fileobj, self.fileobj, tarinfo.size)
765
blocks, remainder = divmod(tarinfo.size, BLOCKSIZE)
767
self.fileobj.write("\0" * (BLOCKSIZE - remainder))
769
self.offset += blocks * BLOCKSIZE
771
#self.members.append(tarinfo) #These take up too much memory
772
#self.membernames.append(tarinfo.name)
773
#self.chunks.append(self.offset)
775
def _get_full_headers(self, tarinfo):
776
"""Return string containing headers around tarinfo, including gnulongs
779
# Now we must check if the strings for filename
780
# and linkname fit into the posix header.
781
# (99 chars + "\0" for each)
782
# If not, we must create GNU extension headers.
783
# If both filename and linkname are too long,
784
# the longlink is first to be written out.
785
if len(tarinfo.linkname) >= LENGTH_LINK - 1:
786
buf += self._return_gnulong(tarinfo.linkname, GNUTYPE_LONGLINK)
787
tarinfo.linkname = tarinfo.linkname[:LENGTH_LINK -1]
788
if len(tarinfo.name) >= LENGTH_NAME - 1:
789
buf += self._return_gnulong(tarinfo.name, GNUTYPE_LONGNAME)
790
tarinfo.name = tarinfo.name[:LENGTH_NAME - 1]
791
return buf + tarinfo.getheader()
793
# def untar(self, path):
794
# """Untar the whole archive to path.
797
# for tarinfo in self:
798
# if tarinfo.isdir():
799
# later.append(tarinfo)
800
# self.extract(tarinfo, path)
801
# for tarinfo in later:
802
# self._utime(tarinfo, os.path.join(path, tarinfo.name))
804
def extractfile(self, member):
805
"""Extract member from the archive and return a file-like
806
object. member may be a name or a TarInfo instance.
809
raise ValueError, "I/O operation on closed file"
810
if self._mode != "r":
811
raise ValueError, "reading from a write-mode file"
813
if isinstance(member, TarInfo):
816
tarinfo = self.getmember(member)
818
if tarinfo.isreg() or tarinfo.type not in SUPPORTED_TYPES:
819
return _FileObject(self, tarinfo)
820
elif tarinfo.islnk() or tarinfo.issym():
821
return self.extractfile(self._getmember(tarinfo.linkname, tarinfo))
825
def extract(self, member, path=""):
826
"""Extract member from the archive and write it to
827
current working directory using its full pathname.
828
If optional path is given, it is attached before the
830
member may be a name or a TarInfo instance.
833
raise ValueError, "I/O operation on closed file"
834
if self._mode != "r":
835
raise ValueError, "reading from a write-mode file"
837
if isinstance(member, TarInfo):
840
tarinfo = self.getmember(member)
842
self._dbg(1, tarinfo.name)
844
self._extract_member(tarinfo, os.path.join(path, tarinfo.name))
845
except EnvironmentError, e:
846
if self.errorlevel > 0:
849
self._dbg(1, "\ntarfile: %s `%s'" % (e.strerror, e.filename))
851
if self.errorlevel > 1:
854
self._dbg(1, "\ntarfile: %s" % e)
857
def _extract_member(self, tarinfo, targetpath):
858
"""Extract the TarInfo instance tarinfo to a physical
859
file called targetpath.
861
# Fetch the TarInfo instance for the given name
862
# and build the destination pathname, replacing
863
# forward slashes to platform specific separators.
864
if targetpath[-1:] == "/":
865
targetpath = targetpath[:-1]
866
targetpath = os.path.normpath(targetpath)
868
# Create all upper directories.
869
upperdirs = os.path.dirname(targetpath)
870
if upperdirs and not os.path.exists(upperdirs):
875
ti.mtime = tarinfo.mtime
878
ti.uname = tarinfo.uname
879
ti.gname = tarinfo.gname
880
for d in os.path.split(os.path.splitdrive(upperdirs)[1]):
881
ti.name = os.path.join(ti.name, d)
882
self._extract_member(ti, ti.name)
885
self._makefile(tarinfo, targetpath)
886
elif tarinfo.isdir():
887
self._makedir(tarinfo, targetpath)
888
elif tarinfo.isfifo():
889
self._makefifo(tarinfo, targetpath)
890
elif tarinfo.ischr() or tarinfo.isblk():
891
self._makedev(tarinfo, targetpath)
892
elif tarinfo.islnk() or tarinfo.issym():
893
self._makelink(tarinfo, targetpath)
895
self._makefile(tarinfo, targetpath)
896
if tarinfo.type not in SUPPORTED_TYPES:
897
self._dbg(1, "\ntarfile: Unknown file type '%s', " \
898
"extracted as regular file." % tarinfo.type)
900
if not tarinfo.issym():
901
self._chown(tarinfo, targetpath)
902
self._chmod(tarinfo, targetpath)
903
if not tarinfo.isdir():
904
self._utime(tarinfo, targetpath)
906
def _makedir(self, tarinfo, targetpath):
907
"""Make a directory called targetpath out of tarinfo.
911
except EnvironmentError, e:
912
if e.errno != errno.EEXIST:
915
def _makefile(self, tarinfo, targetpath):
916
"""Make a file called targetpath out of tarinfo.
918
source = self.extractfile(tarinfo)
919
target = __builtin__.file(targetpath, "wb")
920
copyfileobj(source, target)
924
def _makefifo(self, tarinfo, targetpath):
925
"""Make a fifo called targetpath out of tarinfo.
927
if hasattr(os, "mkfifo"):
928
os.mkfifo(targetpath)
930
raise TarError, "Fifo not supported by system"
932
def _makedev(self, tarinfo, targetpath):
933
"""Make a character or block device called targetpath out of tarinfo.
935
if not hasattr(os, "mknod"):
936
raise TarError, "Special devices not supported by system"
944
# This if statement should go away when python-2.3a0-devicemacros
946
if hasattr(os, "makedev"):
947
os.mknod(targetpath, mode,
948
os.makedev(tarinfo.devmajor, tarinfo.devminor))
950
os.mknod(targetpath, mode,
951
tarinfo.devmajor, tarinfo.devminor)
953
def _makelink(self, tarinfo, targetpath):
954
"""Make a (symbolic) link called targetpath out of tarinfo.
955
If it cannot be made (due to platform or failure), we try
956
to make a copy of the referenced file instead of a link.
958
linkpath = tarinfo.linkname
959
self._dbg(1, " -> %s" % linkpath)
962
os.symlink(linkpath, targetpath)
964
linkpath = os.path.join(os.path.dirname(targetpath),
966
os.link(linkpath, targetpath)
967
except AttributeError:
968
linkpath = os.path.join(os.path.dirname(tarinfo.name),
970
linkpath = normpath(linkpath)
972
self._extract_member(self.getmember(linkpath), targetpath)
973
except (IOError, OSError, KeyError), e:
974
linkpath = os.path.normpath(linkpath)
976
shutil.copy2(linkpath, targetpath)
977
except EnvironmentError, e:
978
raise TarError, "Link could not be created"
980
def _chown(self, tarinfo, targetpath):
981
"""Set owner of targetpath according to tarinfo.
983
if pwd and os.geteuid() == 0:
984
# We have to be root to do so.
986
g = grp.getgrnam(tarinfo.gname)[2]
989
g = grp.getgrgid(tarinfo.gid)[2]
993
u = pwd.getpwnam(tarinfo.uname)[2]
996
u = pwd.getpwuid(tarinfo.uid)[2]
1000
if tarinfo.issym() and hasattr(os, "lchown"):
1001
os.lchown(targetpath, u, g)
1003
os.chown(targetpath, u, g)
1004
except EnvironmentError, e:
1005
self._dbg(2, "\ntarfile: (chown failed), %s `%s'"
1006
% (e.strerror, e.filename))
1008
def _chmod(self, tarinfo, targetpath):
1009
"""Set file permissions of targetpath according to tarinfo.
1012
os.chmod(targetpath, tarinfo.mode)
1013
except EnvironmentError, e:
1014
self._dbg(2, "\ntarfile: (chmod failed), %s `%s'"
1015
% (e.strerror, e.filename))
1017
def _utime(self, tarinfo, targetpath):
1018
"""Set modification time of targetpath according to tarinfo.
1021
os.utime(targetpath, (tarinfo.mtime, tarinfo.mtime))
1022
except EnvironmentError, e:
1023
self._dbg(2, "\ntarfile: (utime failed), %s `%s'"
1024
% (e.strerror, e.filename))
1026
def _getmember(self, name, tarinfo=None):
1027
"""Find an archive member by name from bottom to top.
1028
If tarinfo is given, it is used as the starting point.
1031
end = len(self.members)
1033
end = self.members.index(tarinfo)
1035
for i in xrange(end - 1, -1, -1):
1036
if name == self.membernames[i]:
1037
return self.members[i]
1040
"""Read through the entire archive file and look for readable
1044
tarinfo = self.next()
1051
"""Provide an iterator object.
1054
return iter(self.members)
1056
return TarIter(self)
1058
def _buftoinfo(self, buf):
1059
"""Transform a 512 byte block to a TarInfo instance.
1062
tarinfo.name = nts(buf[0:100])
1063
tarinfo.mode = int(buf[100:108], 8)
1064
tarinfo.uid = int(buf[108:116],8)
1065
tarinfo.gid = int(buf[116:124],8)
1066
tarinfo.size = long(buf[124:136], 8)
1067
tarinfo.mtime = long(buf[136:148], 8)
1068
tarinfo.chksum = int(buf[148:156], 8)
1069
tarinfo.type = buf[156:157]
1070
tarinfo.linkname = nts(buf[157:257])
1071
tarinfo.uname = nts(buf[265:297])
1072
tarinfo.gname = nts(buf[297:329])
1074
tarinfo.devmajor = int(buf[329:337], 8)
1075
tarinfo.devminor = int(buf[337:345], 8)
1077
tarinfo.devmajor = tarinfo.devmajor = 0
1078
tarinfo.prefix = buf[345:500]
1079
if tarinfo.chksum != calc_chksum(buf):
1080
self._dbg(1, "tarfile: Bad Checksum\n")
1083
def _proc_gnulong(self, tarinfo, type):
1084
"""Evaluate the two blocks that hold a GNU longname
1089
buf = self.fileobj.read(BLOCKSIZE)
1092
self.offset += BLOCKSIZE
1093
if type == GNUTYPE_LONGNAME:
1095
if type == GNUTYPE_LONGLINK:
1098
buf = self.fileobj.read(BLOCKSIZE)
1101
tarinfo = self._buftoinfo(buf)
1102
if name is not None:
1104
if linkname is not None:
1105
tarinfo.linkname = linkname
1106
self.offset += BLOCKSIZE
1109
def _return_gnulong(self, name, type):
1110
"""Insert a GNU longname/longlink member into the archive.
1111
It consists of a common tar header, with the length
1112
of the longname as size, followed by a data block,
1113
which contains the longname as a null terminated string.
1116
tarinfo.name = "././@LongLink"
1119
tarinfo.size = len(name)
1121
return "%s%s%s" % (tarinfo.getheader(), name,
1122
"\0" * (512 - len(name)))
1124
def _proc_sparse(self, tarinfo):
1125
"""Analyze a GNU sparse header plus extra headers.
1127
buf = tarinfo.getheader()
1133
# There are 4 possible sparse structs in the
1136
offset = int(buf[pos:pos + 12], 8)
1137
numbytes = int(buf[pos + 12:pos + 24], 8)
1138
if offset > lastpos:
1139
sp.append(_hole(lastpos, offset - lastpos))
1140
sp.append(_data(offset, numbytes, realpos))
1142
lastpos = offset + numbytes
1145
isextended = ord(buf[482])
1146
origsize = int(buf[483:495], 8)
1148
# If the isextended flag is given,
1149
# there are extra headers to process.
1150
while isextended == 1:
1151
buf = self.fileobj.read(BLOCKSIZE)
1152
self.offset += BLOCKSIZE
1155
offset = int(buf[pos:pos + 12], 8)
1156
numbytes = int(buf[pos + 12:pos + 24], 8)
1157
if offset > lastpos:
1158
sp.append(_hole(lastpos, offset - lastpos))
1159
sp.append(_data(offset, numbytes, realpos))
1161
lastpos = offset + numbytes
1163
isextended = ord(buf[504])
1166
if lastpos < origsize:
1167
sp.append(_hole(lastpos, origsize - lastpos))
1172
def _dbg(self, level, msg):
1173
if level <= self.debug:
1174
sys.stdout.write(msg)
1180
for tarinfo in TarFile(...):
1184
def __init__(self, tarfile):
1185
"""Construct a TarIter instance.
1187
self.tarfile = tarfile
1189
"""Return iterator object.
1193
"""Return the next item using TarFile's next() method.
1194
When all members have been read, set TarFile as _loaded.
1196
tarinfo = self.tarfile.next()
1198
self.tarfile._loaded = 1
1203
# Helper classes for sparse file support
1205
"""Base class for _data and _hole.
1207
def __init__(self, offset, size):
1208
self.offset = offset
1210
def __contains__(self, offset):
1211
return self.offset <= offset < self.offset + self.size
1213
class _data(_section):
1214
"""Represent a data section in a sparse file.
1216
def __init__(self, offset, size, realpos):
1217
_section.__init__(self, offset, size)
1218
self.realpos = realpos
1220
class _hole(_section):
1221
"""Represent a hole section in a sparse file.
1225
class _ringbuffer(list):
1226
"""Ringbuffer class which increases performance
1227
over a regular list.
1231
def find(self, offset):
1238
if idx == len(self):
1247
"""File-like object for reading an archive member,
1248
is returned by TarFile.extractfile().
1249
Support for sparse files included.
1252
def __init__(self, tarfile, tarinfo):
1253
self.tarfile = tarfile
1254
self.fileobj = tarfile.fileobj
1255
self.name = tarinfo.name
1258
self.offset = tarinfo.offset_data
1259
self.size = tarinfo.size
1261
self.linebuffer = ""
1262
if tarinfo.issparse():
1263
self.sparse = tarinfo.sparse
1264
self.read = self._readsparse
1266
self.read = self._readnormal
1268
def readline(self, size=-1):
1269
"""Read a line with approx. size.
1270
If size is negative, read a whole line.
1271
readline() and read() must not be mixed up (!).
1276
nl = self.linebuffer.find("\n")
1280
size -= len(self.linebuffer)
1282
buf = self.read(min(size, 100))
1285
self.linebuffer += buf
1289
nl = self.linebuffer.find("\n")
1292
self.linebuffer = ""
1294
buf = self.linebuffer[:nl]
1295
self.linebuffer = self.linebuffer[nl + 1:]
1296
while buf[-1:] == "\r":
1300
def readlines(self):
1301
"""Return a list with all (following) lines.
1305
line = self.readline()
1310
def _readnormal(self, size=None):
1311
"""Read operation for regular files.
1314
raise ValueError, "I/O operation on closed file"
1315
#self.fileobj.seek(self.offset + self.pos)
1316
bytesleft = self.size - self.pos
1318
bytestoread = bytesleft
1320
bytestoread = min(size, bytesleft)
1321
self.pos += bytestoread
1322
self.tarfile.offset += bytestoread
1323
return self.fileobj.read(bytestoread)
1325
def _readsparse(self, size=None):
1326
"""Read operation for sparse files.
1329
raise ValueError, "I/O operation on closed file"
1332
size = self.size - self.pos
1336
buf = self._readsparsesection(size)
1343
def _readsparsesection(self, size):
1344
"""Read a single section of a sparse file.
1346
section = self.sparse.find(self.pos)
1351
toread = min(size, section.offset + section.size - self.pos)
1352
if isinstance(section, _data):
1353
realpos = section.realpos + self.pos - section.offset
1355
self.fileobj.seek(self.offset + realpos)
1356
return self.fileobj.read(toread)
1359
return "\0" * toread
1362
"""Return the current file position.
1366
def seek(self, pos, whence=0):
1367
"""Seek to a position in the file.
1369
self.linebuffer = ""
1371
self.pos = min(max(pos, 0), self.size)
1374
self.pos = max(self.pos + pos, 0)
1376
self.pos = min(self.pos + pos, self.size)
1378
self.pos = max(min(self.size + pos, self.size), 0)
1381
"""Close the file object.
1386
#---------------------------------------------
1387
# zipfile compatible TarFile class
1389
# for details consult zipfile's documentation
1390
#---------------------------------------------
1393
TAR_PLAIN = 0 # zipfile.ZIP_STORED
1394
TAR_GZIPPED = 8 # zipfile.ZIP_DEFLATED
1395
class TarFileCompat:
1396
"""TarFile class compatible with standard module zipfile's
1399
def __init__(self, file, mode="r", compression=TAR_PLAIN):
1400
if compression == TAR_PLAIN:
1401
self.tarfile = open(file, mode)
1402
elif compression == TAR_GZIPPED:
1403
self.tarfile = gzopen(file, mode)
1405
raise ValueError, "unknown compression constant"
1406
if mode[0:1] == "r":
1408
members = self.tarfile.getmembers()
1409
for i in range(len(members)):
1412
m.file_size = m.size
1413
m.date_time = time.gmtime(m.mtime)[:6]
1415
return map(lambda m: m.name, self.infolist())
1417
return filter(lambda m: m.type in REGULAR_TYPES,
1418
self.tarfile.getmembers())
1423
def getinfo(self, name):
1424
return self.tarfile.getmember(name)
1425
def read(self, name):
1426
return self.tarfile.extractfile(self.tarfile.getmember(name)).read()
1427
def write(self, filename, arcname=None, compress_type=None):
1428
self.tarfile.add(filename, arcname)
1429
def writestr(self, zinfo, bytes):
1431
zinfo.name = zinfo.filename
1432
zinfo.size = zinfo.file_size
1433
zinfo.mtime = calendar.timegm(zinfo.date_time)
1434
self.tarfile.addfile(zinfo, cStringIO.StringIO(bytes))
1436
self.tarfile.close()
1437
#class TarFileCompat
1439
if __name__ == "__main__":
1440
# a "light-weight" implementation of GNUtar ;-)
1442
Usage: %s [options] [files]
1444
-h display this help message
1446
-r append to an existing archive
1448
-t list archive contents
1450
use archive FILENAME, else STDOUT (-c)
1451
-z filter archive through gzip
1453
with opt -x: extract to directory DIRNAME
1454
with opt -c, -r: put files to archive under DIRNAME
1458
wildcards *, ?, [seq], [!seq] are accepted.
1463
opts, args = getopt.getopt(sys.argv[1:], "htcrzxf:C:qv")
1464
except getopt.GetoptError, e:
1476
if o == "-t": mode = "l" # list archive
1477
if o == "-c": mode = "w" # write to archive
1478
if o == "-r": mode = "a" # append to archive
1479
if o == "-x": mode = "r" # extract from archive
1480
if o == "-f": file = a # specify filename else use stdout
1481
if o == "-C": dir = a # change to dir
1482
if o == "-z": comp = 1 # filter through gzip
1483
if o == "-v": debug = 2 # verbose mode
1484
if o == "-q": debug = 0 # quiet mode
1485
if o == "-h": # help message
1498
if not file or file == "-":
1503
# If under Win32, set stdout to binary.
1506
msvcrt.setmode(1, os.O_BINARY)
1509
tarfile = func("sys.stdout.tar", mode, 9, sys.stdout)
1512
tarfile = func(file, "r")
1514
tarfile = func(file, mode)
1516
tarfile.debug = debug
1521
for tarinfo in tarfile:
1522
tarfile.extract(tarinfo, dir)
1527
files = glob.glob(arg)
1533
class TarFromIterator(TarFile):
1534
"""Readable tarfile-like object generated from iterator
1536
# These various status numbers indicate what we are in the process
1537
# of doing in the tarfile.
1538
BEGIN = 0 # next step is to read tarinfo, write new header
1539
MIDDLE_OF_FILE = 1 # in process of writing file data
1540
END = 2 # end of data
1542
# Buffer is added to in multiples of following
1543
BUFFER_ADDLEN = 64 * 1024
1545
def __init__(self, pair_iter):
1546
"""Construct a TarFromIterator instance. pair_iter is an
1547
iterator of (TarInfo, fileobj) objects, which fileobj should
1548
be a file-like object opened for reading, or None. The
1549
fileobjs will be closed before the next element in the
1555
self.pair_iter = pair_iter
1557
self.init_datastructures()
1558
self.status = self.BEGIN
1559
self.cur_tarinfo, self.cur_fileobj = None, None
1560
self.cur_pos_in_fileobj = 0
1562
# holds current position as seen by reading client. This is
1563
# distinct from self.offset.
1564
self.tar_iter_offset = 0
1566
def seek(self, offset):
1567
"""Seek to current position. Just read and discard some amount"""
1568
if offset < self.tar_iter_offset:
1569
raise TarError("Seeks in TarFromIterator must go forwards,\n"
1570
"Instead asking for %s from %s" %
1571
(offset, self.tar_iter_offset))
1572
while offset - self.tar_iter_offset >= self.BUFFER_ADDLEN:
1573
buf = self.read(self.BUFFER_ADDLEN)
1574
if not buf: return # eof
1575
self.read(offset - self.tar_iter_offset)
1577
def read(self, length = -1):
1578
"""Return next length bytes, or everything if length < 0"""
1581
if not self._addtobuffer(): break
1582
result = self.buffer
1585
while len(self.buffer) < length:
1586
if not self._addtobuffer(): break
1587
# It's possible that length > len(self.buffer)
1588
result = self.buffer[:length]
1589
self.buffer = self.buffer[length:]
1590
self.tar_iter_offset += len(result)
1593
def _addtobuffer(self):
1594
"""Write more data into the buffer. Return None if at end"""
1595
if self.status == self.BEGIN:
1596
# Just write headers into buffer
1597
try: self.cur_tarinfo, self.cur_fileobj = self.pair_iter.next()
1598
except StopIteration:
1600
self.status = self.END
1603
# Zero out tarinfo sizes for various file types
1604
if self.cur_tarinfo.type in (LNKTYPE, SYMTYPE,
1605
FIFOTYPE, CHRTYPE, BLKTYPE):
1606
self.cur_tarinfo.size = 0l
1608
full_headers = self._get_full_headers(self.cur_tarinfo)
1609
self.buffer += full_headers
1610
self.offset += len(full_headers)
1611
assert len(full_headers) % BLOCKSIZE == 0
1613
if self.cur_fileobj is None: # no data with header
1614
self.status = self.BEGIN
1615
self._finish_fileobj()
1617
self.status = self.MIDDLE_OF_FILE
1618
self.cur_pos_in_fileobj = 0
1620
elif self.status == self.MIDDLE_OF_FILE:
1621
# Add next chunk of self.cur_fileobj to self.buffer
1622
l = min(self.BUFFER_ADDLEN,
1623
self.cur_tarinfo.size - self.cur_pos_in_fileobj)
1624
s = self.cur_fileobj.read(l)
1625
self.cur_pos_in_fileobj += len(s)
1627
if l != 0: raise IOError, "end of file reached"
1628
blocks, remainder = divmod(self.cur_tarinfo.size, BLOCKSIZE)
1630
self.buffer += "\0" * (BLOCKSIZE - remainder)
1632
self.cur_fileobj.close()
1633
self.offset += blocks * BLOCKSIZE
1634
self._finish_fileobj()
1635
self.status = self.BEGIN
1636
else: self.buffer += s
1638
elif self.status == self.END: return None
1641
def _finish_fileobj(self):
1642
"""Update some variables when done writing fileobj"""
1643
return # Skip saving tarinfo information to save memory
1644
self.members.append(self.cur_tarinfo)
1645
self.membernames.append(self.cur_tarinfo.name)
1646
self.chunks.append(self.offset)
1648
def _add_final(self):
1649
"""Add closing footer to buffer"""
1650
blocks, remainder = divmod(self.offset, RECORDSIZE)
1651
if remainder > 0: self.buffer += "\0" * (RECORDSIZE - remainder)
1654
"""Close file obj"""
1655
assert not self.closed