2
#-------------------------------------------------------------------
5
# Module for reading and writing .tar and tar.gz files.
7
# Needs at least Python version 2.2.
9
# Please consult the html documentation in this distribution
10
# for further details on how to use tarfile.
12
#-------------------------------------------------------------------
13
# Copyright (C) 2002 Lars Gust�bel <lars@gustaebel.de>
14
# All rights reserved.
16
# Permission is hereby granted, free of charge, to any person
17
# obtaining a copy of this software and associated documentation
18
# files (the "Software"), to deal in the Software without
19
# restriction, including without limitation the rights to use,
20
# copy, modify, merge, publish, distribute, sublicense, and/or sell
21
# copies of the Software, and to permit persons to whom the
22
# Software is furnished to do so, subject to the following
25
# The above copyright notice and this permission notice shall be
26
# included in all copies or substantial portions of the Software.
28
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
29
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
30
# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
31
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
32
# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
33
# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
34
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
35
# OTHER DEALINGS IN THE SOFTWARE.
37
"""Read from and write to tar format archives.
40
__version__ = "$Revision: 1.1 $"
41
# $Source: /sources/duplicity/duplicity/duplicity/Attic/tarfile.py.old,v $
44
__author__ = "Lars Gust�bel (lars@gustaebel.de)"
45
__date__ = "$Date: 2002/10/29 01:51:36 $"
46
__cvsid__ = "$Id: tarfile.py.old,v 1.1 2002/10/29 01:51:36 bescoto Exp $"
47
__credits__ = "Gustavo Niemeyer for his support, " \
48
"Detlef Lannert for some early contributions"
66
# We won't need this anymore in Python 2.3
68
# We import the _tarfile extension, that contains
69
# some useful functions to handle devices and symlinks.
70
# We inject them into os module, as if we were under 2.3.
74
if _tarfile.mknod is None:
78
if _tarfile and not hasattr(os, "mknod"):
79
os.mknod = _tarfile.mknod
80
if _tarfile and not hasattr(os, "major"):
81
os.major = _tarfile.major
82
if _tarfile and not hasattr(os, "minor"):
83
os.minor = _tarfile.minor
84
if _tarfile and not hasattr(os, "makedev"):
85
os.makedev = _tarfile.makedev
86
if _tarfile and not hasattr(os, "lchown"):
87
os.lchown = _tarfile.lchown
89
# XXX remove for release (2.3)
90
if sys.version_info[:2] < (2,3):
94
#---------------------------------------------------------
96
#---------------------------------------------------------
97
BLOCKSIZE = 512 # length of processing blocks
98
RECORDSIZE = BLOCKSIZE * 20 # length of records
99
MAGIC = "ustar" # magic tar string
100
VERSION = "00" # version number
102
LENGTH_NAME = 100 # maximal length of a filename
103
LENGTH_LINK = 100 # maximal length of a linkname
105
REGTYPE = "0" # regular file
106
AREGTYPE = "\0" # regular file
107
LNKTYPE = "1" # link (inside tarfile)
108
SYMTYPE = "2" # symbolic link
109
CHRTYPE = "3" # character special device
110
BLKTYPE = "4" # block special device
111
DIRTYPE = "5" # directory
112
FIFOTYPE = "6" # fifo special device
113
CONTTYPE = "7" # contiguous file
115
GNUTYPE_LONGNAME = "L" # GNU tar extension for longnames
116
GNUTYPE_LONGLINK = "K" # GNU tar extension for longlink
117
GNUTYPE_SPARSE = "S" # GNU tar extension for sparse file
119
#---------------------------------------------------------
121
#---------------------------------------------------------
122
SUPPORTED_TYPES = (REGTYPE, AREGTYPE, LNKTYPE, # file types that tarfile
123
SYMTYPE, DIRTYPE, FIFOTYPE, # can cope with.
124
CONTTYPE, GNUTYPE_LONGNAME,
125
GNUTYPE_LONGLINK, GNUTYPE_SPARSE,
128
REGULAR_TYPES = (REGTYPE, AREGTYPE, # file types that somehow
129
CONTTYPE, GNUTYPE_SPARSE) # represent regular files
131
#---------------------------------------------------------
132
# Bits used in the mode field, values in octal.
133
#---------------------------------------------------------
134
S_IFLNK = 0120000 # symbolic link
135
S_IFREG = 0100000 # regular file
136
S_IFBLK = 0060000 # block device
137
S_IFDIR = 0040000 # directory
138
S_IFCHR = 0020000 # character device
139
S_IFIFO = 0010000 # fifo
141
TSUID = 04000 # set UID on execution
142
TSGID = 02000 # set GID on execution
143
TSVTX = 01000 # reserved
145
TUREAD = 00400 # read by owner
146
TUWRITE = 00200 # write by owner
147
TUEXEC = 00100 # execute/search by owner
148
TGREAD = 00040 # read by group
149
TGWRITE = 00020 # write by group
150
TGEXEC = 00010 # execute/search by group
151
TOREAD = 00004 # read by other
152
TOWRITE = 00002 # write by other
153
TOEXEC = 00001 # execute/search by other
155
#---------------------------------------------------------
156
# Some useful functions
157
#---------------------------------------------------------
159
"""Convert a null-terminated string buffer to a python string.
161
return s.split("\0", 1)[0]
163
def calc_chksum(buf):
164
"""Calculate the checksum for a member's header. It's a simple addition
165
of all bytes, treating the chksum field as if filled with spaces.
166
buf is a 512 byte long string buffer which holds the header.
168
chk = 256 # chksum field is treated as blanks,
169
# so the initial value is 8 * ord(" ")
170
for c in buf[:148]: chk += ord(c) # sum up all bytes before chksum
171
for c in buf[156:]: chk += ord(c) # sum up all bytes after chksum
174
def copyfileobj(src, dst, length=None):
175
"""Copy length bytes from fileobj src to fileobj dst.
176
If length is None, copy the entire content.
181
shutil.copyfileobj(src, dst)
185
blocks, remainder = divmod(length, BUFSIZE)
186
for b in range(blocks):
187
buf = src.read(BUFSIZE)
188
if len(buf) < BUFSIZE:
189
raise IOError, "end of file reached"
193
buf = src.read(remainder)
194
if len(buf) < remainder:
195
raise IOError, "end of file reached"
208
(TUEXEC, "x", TSUID, "S", TUEXEC|TSUID, "s"),
211
(TGEXEC, "x", TSGID, "S", TGEXEC|TSGID, "s"),
214
(TOEXEC, "x", TSVTX, "T", TOEXEC|TSVTX, "t"))
217
"""Convert a file's mode to a string of the form
219
Used by TarFile.list()
222
for t in filemode_table:
224
if mode & t[0] == t[0]:
235
normpath = lambda path: os.path.normpath(path).replace(os.sep, "/")
237
normpath = os.path.normpath
239
class TarError(Exception):
240
"""Internally used exception"""
243
#--------------------
245
#--------------------
246
def open(name, mode="r", fileobj=None):
247
"""Open (uncompressed) tar archive name for reading, writing
250
return TarFile(name, mode, fileobj)
252
def gzopen(gzname, gzmode="r", compresslevel=9, fileobj=None):
253
"""Open gzip compressed tar archive name for reading or writing.
254
Appending is not allowed.
257
raise ValueError, "Appending to gzipped archive is not allowed"
259
pre, ext = os.path.splitext(gzname)
260
pre = os.path.basename(pre)
267
if "b" not in gzmode:
271
fileobj = __builtin__.file(gzname, gzmode)
272
t = TarFile(tarname, mode, gzip.GzipFile(tarname, gzmode,
273
compresslevel, fileobj))
275
t = TarFile(tarname, mode, gzip.open(gzname, gzmode, compresslevel))
279
def is_tarfile(name):
280
"""Return True if name points to a tar archive that we
281
are able to handle, else return False.
284
buftoinfo = TarFile.__dict__["_buftoinfo"]
286
buf = __builtin__.open(name, "rb").read(BLOCKSIZE)
289
except (ValueError, ImportError):
293
buf = gzip.open(name, "rb").read(BLOCKSIZE)
296
except (IOError, ValueError, ImportError):
304
"""Informational class which holds the details about an
305
archive member given by a tar header block.
306
TarInfo instances are returned by TarFile.getmember() and
307
TarFile.getmembers() and are usually created internally.
308
If you want to create a TarInfo instance from the outside,
309
you should use TarFile.gettarinfo() if the file already exists,
310
or you can instanciate the class yourself.
313
def __init__(self, name=""):
314
"""Construct a TarInfo instance. name is the optional name
318
self.name = name # member name (dirnames must end with '/')
319
self.mode = 0100666 # file permissions
320
self.uid = 0 # user id
321
self.gid = 0 # group id
322
self.size = 0 # file size
323
self.mtime = 0 # modification time
324
self.chksum = 0 # header checksum
325
self.type = REGTYPE # member type
326
self.linkname = "" # link name
327
self.uname = "user" # user name
328
self.gname = "group" # group name
330
self.devminor = 0 #-for use with CHRTYPE and BLKTYPE
331
self.prefix = "" # prefix, holding information
334
self.offset = 0 # the tar header starts here
335
self.offset_data = 0 # the optional file's data starts here
338
"""Return a tar header block as a 512 byte string.
340
# The following code was contributed by Detlef Lannert.
342
for value, fieldsize in (
344
("%07o" % self.mode, 8),
345
("%07o" % self.uid, 8),
346
("%07o" % self.gid, 8),
347
("%011o" % self.size, 12),
348
("%011o" % self.mtime, 12),
351
(self.linkname, 100),
356
("%07o" % self.devmajor, 8),
357
("%07o" % self.devminor, 8),
361
parts.append(value + (fieldsize - l) * "\0")
364
chksum = calc_chksum(buf)
365
buf = buf[:148] + "%06o\0" % chksum + buf[155:]
366
buf += (512 - len(buf)) * "\0"
371
return self.type in REGULAR_TYPES
375
return self.type == DIRTYPE
377
return self.type == SYMTYPE
379
return self.type == LNKTYPE
381
return self.type == CHRTYPE
383
return self.type == BLKTYPE
385
return self.type == FIFOTYPE
387
return self.type == GNUTYPE_SPARSE
389
return self.type in (CHRTYPE, BLKTYPE, FIFOTYPE)
394
"""Class representing a TAR archive file on disk.
396
debug = 0 # May be set from 0 (no msgs) to 3 (all msgs)
398
dereference = False # If true, add content of linked file to the
399
# tar file, else the link.
401
ignore_zeros = False # If true, skips empty or invalid blocks and
402
# continues processing.
404
errorlevel = 0 # If 0, fatal errors only appear in debug
405
# messages (if debug >= 0). If > 0, errors
406
# are passed to the caller as exceptions.
408
def __init__(self, name=None, mode="r", fileobj=None):
411
if len(mode) > 1 or mode not in "raw":
412
raise ValueError, "mode must be either 'r', 'a' or 'w', " \
415
self.mode = {"r": "rb", "a": "r+b", "w": "wb"}[mode]
418
fileobj = __builtin__.file(self.name, self.mode)
421
if self.name is None and hasattr(fileobj, "name"):
422
self.name = fileobj.name
423
if hasattr(fileobj, "mode"):
424
self.mode = fileobj.mode
426
self.fileobj = fileobj
428
# Init datastructures
429
self.members = [] # list of members as TarInfo instances
430
self.membernames = [] # names of members
431
self.chunks = [0] # chunk cache
432
self._loaded = 0 # flag if all members have been read
433
self.offset = 0l # current position in the archive file
434
self.inodes = {} # dictionary caching the inodes of
435
# archive members already added
436
if self._mode == "a":
441
"""Close the TarFile instance and do some cleanup.
444
if self._mode in "aw":
445
# fill up the end with zero-blocks
446
# (like option -b20 for tar does)
447
blocks, remainder = divmod(self.offset, RECORDSIZE)
449
self.fileobj.write("\0" * (RECORDSIZE - remainder))
451
if not self._extfileobj:
456
"""Return the next member from the archive.
457
Return None if the end is reached.
458
Can be used in a while statement, is used
459
for Iteration (see __iter__()) and internally.
462
raise ValueError, "I/O operation on closed file"
463
if self._mode not in "ra":
464
raise ValueError, "reading from a write-mode file"
466
# Read the next block.
467
self.fileobj.seek(self.chunks[-1])
469
buf = self.fileobj.read(BLOCKSIZE)
473
tarinfo = self._buftoinfo(buf)
475
if self.ignore_zeros:
476
if buf.count("\0") == BLOCKSIZE:
480
self._dbg(2, "0x%X: %s block\n" % (self.offset, adj))
481
self.offset += BLOCKSIZE
487
# If the TarInfo instance contains a GNUTYPE longname or longlink
488
# statement, we must process this first.
489
if tarinfo.type in (GNUTYPE_LONGLINK, GNUTYPE_LONGNAME):
490
tarinfo = self._proc_gnulong(tarinfo, tarinfo.type)
492
if tarinfo.issparse():
493
# Sparse files need some care,
494
# due to the possible extra headers.
495
tarinfo.offset = self.offset
496
self.offset += BLOCKSIZE
497
origsize = self._proc_sparse(tarinfo)
498
tarinfo.offset_data = self.offset
499
blocks, remainder = divmod(tarinfo.size, BLOCKSIZE)
502
self.offset += blocks * BLOCKSIZE
503
tarinfo.size = origsize
505
tarinfo.offset = self.offset
506
self.offset += BLOCKSIZE
507
tarinfo.offset_data = self.offset
509
# Skip the following data blocks.
510
blocks, remainder = divmod(tarinfo.size, BLOCKSIZE)
513
self.offset += blocks * BLOCKSIZE
515
self.members.append(tarinfo)
516
self.membernames.append(tarinfo.name)
517
self.chunks.append(self.offset)
520
def getmember(self, name):
521
"""Return a TarInfo instance for member name.
523
if name not in self.membernames and not self._loaded:
525
if name not in self.membernames:
526
raise KeyError, "filename `%s' not found in tar archive" % name
527
return self._getmember(name)
529
def getinfo(self, name):
530
"""Return a TarInfo instance for member name.
531
This method will be deprecated in 0.6,
532
use getmember() instead.
534
# XXX kick this out in 0.6
536
warnings.warn("use getmember() instead", DeprecationWarning)
537
return self.getmember(name)
539
def getmembers(self):
540
"""Return a list of all members in the archive
541
(as TarInfo instances).
543
if not self._loaded: # if we want to obtain a list of
544
self._load() # all members, we first have to
545
# scan the whole archive.
549
"""Return a list of names of all members in the
554
return self.membernames
556
def gettarinfo(self, name, arcname=None):
557
"""Create a TarInfo instance from an existing file.
558
Optional arcname defines the name under which the file
559
shall be stored in the archive.
561
# Building the name of the member in the archive.
562
# Backward slashes are converted to forward slashes,
563
# Absolute paths are turned to relative paths.
566
arcname = normpath(arcname)
567
drv, arcname = os.path.splitdrive(arcname)
568
while arcname[0:1] == "/":
569
arcname = arcname[1:]
571
# Now, fill the TarInfo instance with
572
# information specific for the file.
575
# Use os.stat or os.lstat, depending on platform
576
# and if symlinks shall be resolved.
577
if hasattr(os, "lstat") and not self.dereference:
578
statres = os.lstat(name)
580
statres = os.stat(name)
584
stmd = statres.st_mode
585
if stat.S_ISREG(stmd):
586
inode = (statres.st_ino, statres.st_dev,
588
if inode in self.inodes.keys() and not self.dereference:
589
# Is it a hardlink to an already
592
linkname = self.inodes[inode]
594
# The inode is added only if its valid.
595
# For win32 it is always 0.
597
if inode[0]: self.inodes[inode] = arcname
598
elif stat.S_ISDIR(stmd):
600
if arcname[-1:] != "/": arcname += "/"
601
elif stat.S_ISFIFO(stmd):
603
elif stat.S_ISLNK(stmd):
605
linkname = os.readlink(name)
606
elif stat.S_ISCHR(stmd):
608
elif stat.S_ISBLK(stmd):
613
# Fill the TarInfo instance with all
614
# information we can get.
615
tarinfo.name = arcname
617
tarinfo.uid = statres.st_uid
618
tarinfo.gid = statres.st_gid
619
tarinfo.size = statres.st_size
620
tarinfo.mtime = statres.st_mtime
622
tarinfo.linkname = linkname
625
tarinfo.uname = pwd.getpwuid(tarinfo.uid)[0]
630
tarinfo.gname = grp.getgrgid(tarinfo.gid)[0]
634
if type in (CHRTYPE, BLKTYPE):
635
if hasattr(os, "major") and hasattr(os, "minor"):
636
tarinfo.devmajor = os.major(statres.st_rdev)
637
tarinfo.devminor = os.minor(statres.st_rdev)
640
def list(self, verbose=1):
641
"""Print a formatted listing of the archive's
646
print filemode(tarinfo.mode),
647
print tarinfo.uname + "/" + tarinfo.gname,
648
if tarinfo.ischr() or tarinfo.isblk():
649
print "%10s" % (str(tarinfo.devmajor) + "," + str(tarinfo.devminor)),
651
print "%10d" % tarinfo.size,
652
print "%d-%02d-%02d %02d:%02d:%02d" \
653
% time.gmtime(tarinfo.mtime)[:6],
659
print "->", tarinfo.linkname,
661
print "link to", tarinfo.linkname,
664
def add(self, name, arcname=None, recursive=1):
665
"""Add a file or a directory to the archive.
666
Directory addition is recursive by default.
669
raise ValueError, "I/O operation on closed file"
670
if self._mode == "r":
671
raise ValueError, "writing to a read-mode file"
676
# Skip if somebody tries to archive the archive...
677
if os.path.abspath(name) == os.path.abspath(self.name):
678
self._dbg(2, "tarfile: Skipped `%s'\n" % name)
681
# Special case: The user wants to add the current
687
for f in os.listdir("."):
688
self.add(f, os.path.join(arcname, f))
691
self._dbg(1, "%s\n" % name)
693
# Create a TarInfo instance from the file.
694
tarinfo = self.gettarinfo(name, arcname)
697
self._dbg(1, "tarfile: Unsupported type `%s'\n" % name)
700
# Append the tar header and data to the archive.
702
f = __builtin__.file(name, "rb")
703
self.addfile(tarinfo, fileobj = f)
706
if tarinfo.type in (LNKTYPE, SYMTYPE, FIFOTYPE, CHRTYPE, BLKTYPE):
708
self.addfile(tarinfo)
711
self.addfile(tarinfo)
713
for f in os.listdir(name):
714
self.add(os.path.join(name, f), os.path.join(arcname, f))
716
def addfile(self, tarinfo, fileobj=None):
717
"""Add the content of fileobj to the tarfile.
718
The amount of bytes to read is determined by
719
the size attribute in the tarinfo instance.
722
raise ValueError, "I/O operation on closed file"
723
if self._mode == "r":
724
raise ValueError, "writing to a read-mode file"
726
# XXX What was this good for again?
728
self.fileobj.seek(self.chunks[-1])
732
# Now we must check if the strings for filename
733
# and linkname fit into the posix header.
734
# (99 chars + "\0" for each)
735
# If not, we must create GNU extension headers.
736
# If both filename and linkname are too long,
737
# the longlink is first to be written out.
738
if len(tarinfo.linkname) >= LENGTH_LINK - 1:
739
self._create_gnulong(tarinfo.linkname, GNUTYPE_LONGLINK)
740
tarinfo.linkname = tarinfo.linkname[:LENGTH_LINK -1]
741
if len(tarinfo.name) >= LENGTH_NAME - 1:
742
self._create_gnulong(tarinfo.name, GNUTYPE_LONGNAME)
743
tarinfo.name = tarinfo.name[:LENGTH_NAME - 1]
745
header = tarinfo.getheader()
746
self.fileobj.write(header)
747
self.offset += BLOCKSIZE
749
# If there's data to follow, append it.
750
if fileobj is not None:
751
copyfileobj(fileobj, self.fileobj, tarinfo.size)
752
blocks, remainder = divmod(tarinfo.size, BLOCKSIZE)
754
self.fileobj.write("\0" * (BLOCKSIZE - remainder))
756
self.offset += blocks * BLOCKSIZE
758
self.members.append(tarinfo)
759
self.membernames.append(tarinfo.name)
760
self.chunks.append(self.offset)
762
# def untar(self, path):
763
# """Untar the whole archive to path.
766
# for tarinfo in self:
767
# if tarinfo.isdir():
768
# later.append(tarinfo)
769
# self.extract(tarinfo, path)
770
# for tarinfo in later:
771
# self._utime(tarinfo, os.path.join(path, tarinfo.name))
773
def extractfile(self, member):
774
"""Extract member from the archive and return a file-like
775
object. member may be a name or a TarInfo instance.
778
raise ValueError, "I/O operation on closed file"
779
if self._mode != "r":
780
raise ValueError, "reading from a write-mode file"
782
if isinstance(member, TarInfo):
785
tarinfo = self.getmember(member)
787
if tarinfo.isreg() or tarinfo.type not in SUPPORTED_TYPES:
788
return _FileObject(self, tarinfo)
789
elif tarinfo.islnk() or tarinfo.issym():
790
return self.extractfile(self._getmember(tarinfo.linkname, tarinfo))
794
def extract(self, member, path=""):
795
"""Extract member from the archive and write it to
796
current working directory using its full pathname.
797
If optional path is given, it is attached before the
799
member may be a name or a TarInfo instance.
802
raise ValueError, "I/O operation on closed file"
803
if self._mode != "r":
804
raise ValueError, "reading from a write-mode file"
806
if isinstance(member, TarInfo):
809
tarinfo = self.getmember(member)
811
self._dbg(1, tarinfo.name)
813
self._extract_member(tarinfo, os.path.join(path, tarinfo.name))
814
except EnvironmentError, e:
815
if self.errorlevel > 0:
818
self._dbg(1, "\ntarfile: %s `%s'" % (e.strerror, e.filename))
820
if self.errorlevel > 1:
823
self._dbg(1, "\ntarfile: %s" % e)
826
def _extract_member(self, tarinfo, targetpath):
827
"""Extract the TarInfo instance tarinfo to a physical
828
file called targetpath.
830
# Fetch the TarInfo instance for the given name
831
# and build the destination pathname, replacing
832
# forward slashes to platform specific separators.
833
if targetpath[-1:] == "/":
834
targetpath = targetpath[:-1]
835
targetpath = os.path.normpath(targetpath)
837
# Create all upper directories.
838
upperdirs = os.path.dirname(targetpath)
839
if upperdirs and not os.path.exists(upperdirs):
844
ti.mtime = tarinfo.mtime
847
ti.uname = tarinfo.uname
848
ti.gname = tarinfo.gname
849
for d in os.path.split(os.path.splitdrive(upperdirs)[1]):
850
ti.name = os.path.join(ti.name, d)
851
self._extract_member(ti, ti.name)
854
self._makefile(tarinfo, targetpath)
855
elif tarinfo.isdir():
856
self._makedir(tarinfo, targetpath)
857
elif tarinfo.isfifo():
858
self._makefifo(tarinfo, targetpath)
859
elif tarinfo.ischr() or tarinfo.isblk():
860
self._makedev(tarinfo, targetpath)
861
elif tarinfo.islnk() or tarinfo.issym():
862
self._makelink(tarinfo, targetpath)
864
self._makefile(tarinfo, targetpath)
865
if tarinfo.type not in SUPPORTED_TYPES:
866
self._dbg(1, "\ntarfile: Unknown file type '%s', " \
867
"extracted as regular file." % tarinfo.type)
869
if not tarinfo.issym():
870
self._chown(tarinfo, targetpath)
871
self._chmod(tarinfo, targetpath)
872
if not tarinfo.isdir():
873
self._utime(tarinfo, targetpath)
875
def _makedir(self, tarinfo, targetpath):
876
"""Make a directory called targetpath out of tarinfo.
880
except EnvironmentError, e:
881
if e.errno != errno.EEXIST:
884
def _makefile(self, tarinfo, targetpath):
885
"""Make a file called targetpath out of tarinfo.
887
source = self.extractfile(tarinfo)
888
target = __builtin__.file(targetpath, "wb")
889
copyfileobj(source, target)
893
def _makefifo(self, tarinfo, targetpath):
894
"""Make a fifo called targetpath out of tarinfo.
896
if hasattr(os, "mkfifo"):
897
os.mkfifo(targetpath)
899
raise TarError, "Fifo not supported by system"
901
def _makedev(self, tarinfo, targetpath):
902
"""Make a character or block device called targetpath out of tarinfo.
904
if not hasattr(os, "mknod"):
905
raise TarError, "Special devices not supported by system"
913
# This if statement should go away when python-2.3a0-devicemacros
915
if hasattr(os, "makedev"):
916
os.mknod(targetpath, mode,
917
os.makedev(tarinfo.devmajor, tarinfo.devminor))
919
os.mknod(targetpath, mode,
920
tarinfo.devmajor, tarinfo.devminor)
922
def _makelink(self, tarinfo, targetpath):
923
"""Make a (symbolic) link called targetpath out of tarinfo.
924
If it cannot be made (due to platform or failure), we try
925
to make a copy of the referenced file instead of a link.
927
linkpath = tarinfo.linkname
928
self._dbg(1, " -> %s" % linkpath)
931
os.symlink(linkpath, targetpath)
933
linkpath = os.path.join(os.path.dirname(targetpath),
935
os.link(linkpath, targetpath)
936
except AttributeError:
937
linkpath = os.path.join(os.path.dirname(tarinfo.name),
939
linkpath = normpath(linkpath)
941
self._extract_member(self.getmember(linkpath), targetpath)
942
except (IOError, OSError, KeyError), e:
943
linkpath = os.path.normpath(linkpath)
945
shutil.copy2(linkpath, targetpath)
946
except EnvironmentError, e:
947
raise TarError, "Link could not be created"
949
def _chown(self, tarinfo, targetpath):
950
"""Set owner of targetpath according to tarinfo.
952
if pwd and os.geteuid() == 0:
953
# We have to be root to do so.
955
g = grp.getgrnam(tarinfo.gname)[2]
958
g = grp.getgrgid(tarinfo.gid)[2]
962
u = pwd.getpwnam(tarinfo.uname)[2]
965
u = pwd.getpwuid(tarinfo.uid)[2]
969
if tarinfo.issym() and hasattr(os, "lchown"):
970
os.lchown(targetpath, u, g)
972
os.chown(targetpath, u, g)
973
except EnvironmentError, e:
974
self._dbg(2, "\ntarfile: (chown failed), %s `%s'"
975
% (e.strerror, e.filename))
977
def _chmod(self, tarinfo, targetpath):
978
"""Set file permissions of targetpath according to tarinfo.
981
os.chmod(targetpath, tarinfo.mode)
982
except EnvironmentError, e:
983
self._dbg(2, "\ntarfile: (chmod failed), %s `%s'"
984
% (e.strerror, e.filename))
986
def _utime(self, tarinfo, targetpath):
987
"""Set modification time of targetpath according to tarinfo.
990
os.utime(targetpath, (tarinfo.mtime, tarinfo.mtime))
991
except EnvironmentError, e:
992
self._dbg(2, "\ntarfile: (utime failed), %s `%s'"
993
% (e.strerror, e.filename))
995
def _getmember(self, name, tarinfo=None):
996
"""Find an archive member by name from bottom to top.
997
If tarinfo is given, it is used as the starting point.
1000
end = len(self.members)
1002
end = self.members.index(tarinfo)
1004
for i in xrange(end - 1, -1, -1):
1005
if name == self.membernames[i]:
1006
return self.members[i]
1009
"""Read through the entire archive file and look for readable
1013
tarinfo = self.next()
1020
"""Provide an iterator object.
1023
return iter(self.members)
1025
return TarIter(self)
1027
def _buftoinfo(self, buf):
1028
"""Transform a 512 byte block to a TarInfo instance.
1031
tarinfo.name = nts(buf[0:100])
1032
tarinfo.mode = int(buf[100:108], 8)
1033
tarinfo.uid = int(buf[108:116],8)
1034
tarinfo.gid = int(buf[116:124],8)
1035
tarinfo.size = long(buf[124:136], 8)
1036
tarinfo.mtime = long(buf[136:148], 8)
1037
tarinfo.chksum = int(buf[148:156], 8)
1038
tarinfo.type = buf[156:157]
1039
tarinfo.linkname = nts(buf[157:257])
1040
tarinfo.uname = nts(buf[265:297])
1041
tarinfo.gname = nts(buf[297:329])
1043
tarinfo.devmajor = int(buf[329:337], 8)
1044
tarinfo.devminor = int(buf[337:345], 8)
1046
tarinfo.devmajor = tarinfo.devmajor = 0
1047
tarinfo.prefix = buf[345:500]
1048
if tarinfo.chksum != calc_chksum(buf):
1049
self._dbg(1, "tarfile: Bad Checksum\n")
1052
def _proc_gnulong(self, tarinfo, type):
1053
"""Evaluate the two blocks that hold a GNU longname
1058
buf = self.fileobj.read(BLOCKSIZE)
1061
self.offset += BLOCKSIZE
1062
if type == GNUTYPE_LONGNAME:
1064
if type == GNUTYPE_LONGLINK:
1067
buf = self.fileobj.read(BLOCKSIZE)
1070
tarinfo = self._buftoinfo(buf)
1071
if name is not None:
1073
if linkname is not None:
1074
tarinfo.linkname = linkname
1075
self.offset += BLOCKSIZE
1078
def _create_gnulong(self, name, type):
1079
"""Insert a GNU longname/longlink member into the archive.
1080
It consists of a common tar header, with the length
1081
of the longname as size, followed by a data block,
1082
which contains the longname as a null terminated string.
1085
tarinfo.name = "././@LongLink"
1088
tarinfo.size = len(name)
1090
self.fileobj.write(tarinfo.getheader())
1091
self.fileobj.write(name + "\0" * (512 - len(name)))
1092
self.offset += BLOCKSIZE * 2
1094
def _proc_sparse(self, tarinfo):
1095
"""Analyze a GNU sparse header plus extra headers.
1097
buf = tarinfo.getheader()
1103
# There are 4 possible sparse structs in the
1106
offset = int(buf[pos:pos + 12], 8)
1107
numbytes = int(buf[pos + 12:pos + 24], 8)
1108
if offset > lastpos:
1109
sp.append(_hole(lastpos, offset - lastpos))
1110
sp.append(_data(offset, numbytes, realpos))
1112
lastpos = offset + numbytes
1115
isextended = ord(buf[482])
1116
origsize = int(buf[483:495], 8)
1118
# If the isextended flag is given,
1119
# there are extra headers to process.
1120
while isextended == 1:
1121
buf = self.fileobj.read(BLOCKSIZE)
1122
self.offset += BLOCKSIZE
1125
offset = int(buf[pos:pos + 12], 8)
1126
numbytes = int(buf[pos + 12:pos + 24], 8)
1127
if offset > lastpos:
1128
sp.append(_hole(lastpos, offset - lastpos))
1129
sp.append(_data(offset, numbytes, realpos))
1131
lastpos = offset + numbytes
1133
isextended = ord(buf[504])
1136
if lastpos < origsize:
1137
sp.append(_hole(lastpos, origsize - lastpos))
1142
def _dbg(self, level, msg):
1143
if level <= self.debug:
1144
sys.stdout.write(msg)
1150
for tarinfo in TarFile(...):
1154
def __init__(self, tarfile):
1155
"""Construct a TarIter instance.
1157
self.tarfile = tarfile
1159
"""Return iterator object.
1163
"""Return the next item using TarFile's next() method.
1164
When all members have been read, set TarFile as _loaded.
1166
tarinfo = self.tarfile.next()
1168
self.tarfile._loaded = 1
1173
# Helper classes for sparse file support
1175
"""Base class for _data and _hole.
1177
def __init__(self, offset, size):
1178
self.offset = offset
1180
def __contains__(self, offset):
1181
return self.offset <= offset < self.offset + self.size
1183
class _data(_section):
1184
"""Represent a data section in a sparse file.
1186
def __init__(self, offset, size, realpos):
1187
_section.__init__(self, offset, size)
1188
self.realpos = realpos
1190
class _hole(_section):
1191
"""Represent a hole section in a sparse file.
1195
class _ringbuffer(list):
1196
"""Ringbuffer class which increases performance
1197
over a regular list.
1201
def find(self, offset):
1208
if idx == len(self):
1217
"""File-like object for reading an archive member,
1218
is returned by TarFile.extractfile().
1219
Support for sparse files included.
1222
def __init__(self, tarfile, tarinfo):
1223
self.fileobj = tarfile.fileobj
1224
self.name = tarinfo.name
1227
self.offset = tarinfo.offset_data
1228
self.size = tarinfo.size
1230
self.linebuffer = ""
1231
if tarinfo.issparse():
1232
self.sparse = tarinfo.sparse
1233
self.read = self._readsparse
1235
self.read = self._readnormal
1237
def readline(self, size=-1):
1238
"""Read a line with approx. size.
1239
If size is negative, read a whole line.
1240
readline() and read() must not be mixed up (!).
1245
nl = self.linebuffer.find("\n")
1249
size -= len(self.linebuffer)
1251
buf = self.read(min(size, 100))
1254
self.linebuffer += buf
1258
nl = self.linebuffer.find("\n")
1261
self.linebuffer = ""
1263
buf = self.linebuffer[:nl]
1264
self.linebuffer = self.linebuffer[nl + 1:]
1265
while buf[-1:] == "\r":
1269
def readlines(self):
1270
"""Return a list with all (following) lines.
1274
line = self.readline()
1279
def _readnormal(self, size=None):
1280
"""Read operation for regular files.
1283
raise ValueError, "I/O operation on closed file"
1284
self.fileobj.seek(self.offset + self.pos)
1285
bytesleft = self.size - self.pos
1287
bytestoread = bytesleft
1289
bytestoread = min(size, bytesleft)
1290
self.pos += bytestoread
1291
return self.fileobj.read(bytestoread)
1293
def _readsparse(self, size=None):
1294
"""Read operation for sparse files.
1297
raise ValueError, "I/O operation on closed file"
1300
size = self.size - self.pos
1304
buf = self._readsparsesection(size)
1311
def _readsparsesection(self, size):
1312
"""Read a single section of a sparse file.
1314
section = self.sparse.find(self.pos)
1319
toread = min(size, section.offset + section.size - self.pos)
1320
if isinstance(section, _data):
1321
realpos = section.realpos + self.pos - section.offset
1323
self.fileobj.seek(self.offset + realpos)
1324
return self.fileobj.read(toread)
1327
return "\0" * toread
1330
"""Return the current file position.
1334
def seek(self, pos, whence=0):
1335
"""Seek to a position in the file.
1337
self.linebuffer = ""
1339
self.pos = min(max(pos, 0), self.size)
1342
self.pos = max(self.pos + pos, 0)
1344
self.pos = min(self.pos + pos, self.size)
1346
self.pos = max(min(self.size + pos, self.size), 0)
1349
"""Close the file object.
1354
#---------------------------------------------
1355
# zipfile compatible TarFile class
1357
# for details consult zipfile's documentation
1358
#---------------------------------------------
1361
TAR_PLAIN = 0 # zipfile.ZIP_STORED
1362
TAR_GZIPPED = 8 # zipfile.ZIP_DEFLATED
1363
class TarFileCompat:
1364
"""TarFile class compatible with standard module zipfile's
1367
def __init__(self, file, mode="r", compression=TAR_PLAIN):
1368
if compression == TAR_PLAIN:
1369
self.tarfile = open(file, mode)
1370
elif compression == TAR_GZIPPED:
1371
self.tarfile = gzopen(file, mode)
1373
raise ValueError, "unknown compression constant"
1374
if mode[0:1] == "r":
1376
members = self.tarfile.getmembers()
1377
for i in range(len(members)):
1380
m.file_size = m.size
1381
m.date_time = time.gmtime(m.mtime)[:6]
1383
return map(lambda m: m.name, self.infolist())
1385
return filter(lambda m: m.type in REGULAR_TYPES,
1386
self.tarfile.getmembers())
1391
def getinfo(self, name):
1392
return self.tarfile.getmember(name)
1393
def read(self, name):
1394
return self.tarfile.extractfile(self.tarfile.getmember(name)).read()
1395
def write(self, filename, arcname=None, compress_type=None):
1396
self.tarfile.add(filename, arcname)
1397
def writestr(self, zinfo, bytes):
1399
zinfo.name = zinfo.filename
1400
zinfo.size = zinfo.file_size
1401
zinfo.mtime = calendar.timegm(zinfo.date_time)
1402
self.tarfile.addfile(zinfo, cStringIO.StringIO(bytes))
1404
self.tarfile.close()
1405
#class TarFileCompat
1407
if __name__ == "__main__":
1408
# a "light-weight" implementation of GNUtar ;-)
1410
Usage: %s [options] [files]
1412
-h display this help message
1414
-r append to an existing archive
1416
-t list archive contents
1418
use archive FILENAME, else STDOUT (-c)
1419
-z filter archive through gzip
1421
with opt -x: extract to directory DIRNAME
1422
with opt -c, -r: put files to archive under DIRNAME
1426
wildcards *, ?, [seq], [!seq] are accepted.
1431
opts, args = getopt.getopt(sys.argv[1:], "htcrzxf:C:qv")
1432
except getopt.GetoptError, e:
1444
if o == "-t": mode = "l" # list archive
1445
if o == "-c": mode = "w" # write to archive
1446
if o == "-r": mode = "a" # append to archive
1447
if o == "-x": mode = "r" # extract from archive
1448
if o == "-f": file = a # specify filename else use stdout
1449
if o == "-C": dir = a # change to dir
1450
if o == "-z": comp = 1 # filter through gzip
1451
if o == "-v": debug = 2 # verbose mode
1452
if o == "-q": debug = 0 # quiet mode
1453
if o == "-h": # help message
1466
if not file or file == "-":
1471
# If under Win32, set stdout to binary.
1474
msvcrt.setmode(1, os.O_BINARY)
1477
tarfile = func("sys.stdout.tar", mode, 9, sys.stdout)
1480
tarfile = func(file, "r")
1482
tarfile = func(file, mode)
1484
tarfile.debug = debug
1489
for tarinfo in tarfile:
1490
tarfile.extract(tarinfo, dir)
1495
files = glob.glob(arg)