2
# -*- coding: iso-8859-1 -*-
3
#-------------------------------------------------------------------
6
# Module for reading and writing .tar and tar.gz files.
8
# Needs at least Python version 2.2.
10
# Please consult the html documentation in this distribution
11
# for further details on how to use tarfile.
13
#-------------------------------------------------------------------
14
# Copyright (C) 2002 Lars Gust�bel <lars@gustaebel.de>
15
# All rights reserved.
17
# Permission is hereby granted, free of charge, to any person
18
# obtaining a copy of this software and associated documentation
19
# files (the "Software"), to deal in the Software without
20
# restriction, including without limitation the rights to use,
21
# copy, modify, merge, publish, distribute, sublicense, and/or sell
22
# copies of the Software, and to permit persons to whom the
23
# Software is furnished to do so, subject to the following
26
# The above copyright notice and this permission notice shall be
27
# included in all copies or substantial portions of the Software.
29
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
30
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
31
# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
32
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
33
# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
34
# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
35
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
36
# OTHER DEALINGS IN THE SOFTWARE.
38
"""Read from and write to tar format archives.
41
__version__ = "$Revision: 1.1 $"
42
# $Source: /cvsroot/boa-constructor/boa/ExternalLib/tarfile.py,v $
45
__author__ = "Lars Gust�bel (lars@gustaebel.de)"
46
__date__ = "$Date: 2003/08/11 15:28:18 $"
47
__cvsid__ = "$Id: tarfile.py,v 1.1 2003/08/11 15:28:18 riaan Exp $"
48
__credits__ = "Niels Gust�bel for his invaluable support, " \
49
"Richard Townsend for endless and patient testing, " \
50
"Gustavo Niemeyer for his support and his patches."
69
# We won't need this anymore in Python 2.3
71
# We import the _tarfile extension, that contains
72
# some useful functions to handle devices and symlinks.
73
# We inject them into os module, as if we were under 2.3.
77
if _tarfile.mknod is None:
81
if _tarfile and not hasattr(os, "mknod"):
82
os.mknod = _tarfile.mknod
83
if _tarfile and not hasattr(os, "major"):
84
os.major = _tarfile.major
85
if _tarfile and not hasattr(os, "minor"):
86
os.minor = _tarfile.minor
87
if _tarfile and not hasattr(os, "makedev"):
88
os.makedev = _tarfile.makedev
89
if _tarfile and not hasattr(os, "lchown"):
90
os.lchown = _tarfile.lchown
92
# XXX remove for release (2.3)
100
#---------------------------------------------------------
102
#---------------------------------------------------------
103
NUL = "\0" # the null character
104
BLOCKSIZE = 512 # length of processing blocks
105
RECORDSIZE = BLOCKSIZE * 20 # length of records
106
MAGIC = "ustar" # magic tar string
107
VERSION = "00" # version number
109
LENGTH_NAME = 100 # maximum length of a filename
110
LENGTH_LINK = 100 # maximum length of a linkname
111
LENGTH_PREFIX = 155 # maximum length of the prefix field
112
MAXSIZE_MEMBER = 077777777777L # maximum size of a file (11 octal digits)
114
REGTYPE = "0" # regular file
115
AREGTYPE = "\0" # regular file
116
LNKTYPE = "1" # link (inside tarfile)
117
SYMTYPE = "2" # symbolic link
118
CHRTYPE = "3" # character special device
119
BLKTYPE = "4" # block special device
120
DIRTYPE = "5" # directory
121
FIFOTYPE = "6" # fifo special device
122
CONTTYPE = "7" # contiguous file
124
GNUTYPE_LONGNAME = "L" # GNU tar extension for longnames
125
GNUTYPE_LONGLINK = "K" # GNU tar extension for longlink
126
GNUTYPE_SPARSE = "S" # GNU tar extension for sparse file
128
#---------------------------------------------------------
130
#---------------------------------------------------------
131
SUPPORTED_TYPES = (REGTYPE, AREGTYPE, LNKTYPE, # file types that tarfile
132
SYMTYPE, DIRTYPE, FIFOTYPE, # can cope with.
133
CONTTYPE, GNUTYPE_LONGNAME,
134
GNUTYPE_LONGLINK, GNUTYPE_SPARSE,
137
REGULAR_TYPES = (REGTYPE, AREGTYPE, # file types that somehow
138
CONTTYPE, GNUTYPE_SPARSE) # represent regular files
140
#---------------------------------------------------------
141
# Bits used in the mode field, values in octal.
142
#---------------------------------------------------------
143
S_IFLNK = 0120000 # symbolic link
144
S_IFREG = 0100000 # regular file
145
S_IFBLK = 0060000 # block device
146
S_IFDIR = 0040000 # directory
147
S_IFCHR = 0020000 # character device
148
S_IFIFO = 0010000 # fifo
150
TSUID = 04000 # set UID on execution
151
TSGID = 02000 # set GID on execution
152
TSVTX = 01000 # reserved
154
TUREAD = 0400 # read by owner
155
TUWRITE = 0200 # write by owner
156
TUEXEC = 0100 # execute/search by owner
157
TGREAD = 0040 # read by group
158
TGWRITE = 0020 # write by group
159
TGEXEC = 0010 # execute/search by group
160
TOREAD = 0004 # read by other
161
TOWRITE = 0002 # write by other
162
TOEXEC = 0001 # execute/search by other
164
#---------------------------------------------------------
165
# Some useful functions
166
#---------------------------------------------------------
168
"""Convert a null-terminated string buffer to a python string.
170
return s.split(NUL, 1)[0]
172
def calc_chksum(buf):
173
"""Calculate the checksum for a member's header. It's a simple addition
174
of all bytes, treating the chksum field as if filled with spaces.
175
buf is a 512 byte long string buffer which holds the header.
177
chk = 256 # chksum field is treated as blanks,
178
# so the initial value is 8 * ord(" ")
179
for c in buf[:148]: chk += ord(c) # sum up all bytes before chksum
180
for c in buf[156:]: chk += ord(c) # sum up all bytes after chksum
183
def copyfileobj(src, dst, length=None):
184
"""Copy length bytes from fileobj src to fileobj dst.
185
If length is None, copy the entire content.
190
shutil.copyfileobj(src, dst)
194
blocks, remainder = divmod(length, BUFSIZE)
195
for b in range(blocks):
196
buf = src.read(BUFSIZE)
197
if len(buf) < BUFSIZE:
198
raise IOError, "end of file reached"
202
buf = src.read(remainder)
203
if len(buf) < remainder:
204
raise IOError, "end of file reached (%d)"%(remainder-len(buf))
217
(TUEXEC, "x", TSUID, "S", TUEXEC|TSUID, "s"),
220
(TGEXEC, "x", TSGID, "S", TGEXEC|TSGID, "s"),
223
(TOEXEC, "x", TSVTX, "T", TOEXEC|TSVTX, "t"))
226
"""Convert a file's mode to a string of the form
228
Used by TarFile.list()
231
for t in filemode_table:
233
if mode & t[0] == t[0]:
244
normpath = lambda path: os.path.normpath(path).replace(os.sep, "/")
246
normpath = os.path.normpath
248
class TarError(Exception):
249
"""General exception for extract errors"""
251
class ReadError(Exception):
252
"""Exception for unreadble tar archives"""
254
class CompressionError(Exception):
255
"""Exception for unavailable compression methods"""
257
class StreamError(Exception):
258
"""Exception for misuse of stream-like TarFiles"""
261
error = (TarError, ReadError, CompressionError, StreamError)
263
#--------------------
265
#--------------------
266
def open(name=None, mode="r", fileobj=None, bufsize=20*512):
267
"""Open a tar archive for reading, writing or appending. Return
268
an appropriate TarFile class.
271
'r' open for reading with transparent compression
272
'r:' open for reading exclusively uncompressed
273
'r:gz' open for reading with gzip compression
274
'a' open for appending
275
'w' or 'w:' open for writing without compression
276
'w:gz' open for writing with gzip compression
277
'r|' open an uncompressed stream of tar blocks for reading
278
'r|gz' open a gzip compressed stream of tar blocks
279
'w|' open an uncompressed stream for writing
280
'w|gz' open a gzip compressed stream for writing
282
if not name and not fileobj:
283
raise ValueError, "nothing to open"
286
filemode, comptype = mode.split(":")
287
filemode = filemode or "r"
288
comptype = comptype or "tar"
290
if "%sopen" % comptype in globals():
291
func = eval("%sopen" % comptype)
293
raise CompressionError, "unknown compression type %r" % comptype
294
return func(name, filemode, fileobj)
297
filemode, comptype = mode.split("|")
298
filemode = filemode or "r"
299
comptype = comptype or "tar"
301
if filemode not in "rw":
302
raise ValueError, "mode must be 'r' or 'w'"
304
t = TarFile(name, filemode,
305
_Stream(name, filemode, comptype, fileobj, bufsize))
306
t._extfileobj = False
310
findcomp = lambda f: f[-4:] == "open" and f[:-4]
311
comptypes = filter(findcomp, globals().keys())
312
comptypes = map(findcomp, comptypes)
313
for comptype in comptypes:
314
func = eval("%sopen" % comptype)
316
return func(name, "r", fileobj)
319
raise ReadError, "file could not be opened successfully"
322
return taropen(name, mode, fileobj)
324
raise ValueError, "undiscernible mode"
326
def taropen(name, mode="r", fileobj=None):
327
"""Open uncompressed tar archive name for reading or writing.
329
if len(mode) > 1 or mode not in "raw":
330
raise ValueError, "mode must be 'r', 'a' or 'w'"
331
return TarFile(name, mode, fileobj)
333
def gzopen(name, mode="r", fileobj=None, compresslevel=9):
334
"""Open gzip compressed tar archive name for reading or writing.
335
Appending is not allowed.
337
if len(mode) > 1 or mode not in "raw":
338
raise ValueError, "mode must be 'r', 'a' or 'w'"
343
raise CompressionError, "gzip module is not available"
345
pre, ext = os.path.splitext(name)
346
pre = os.path.basename(pre)
356
# to emulate an tar.gz file append
357
fileobj = cStringIO.StringIO(__builtin__.file(name, "rb").read())
358
tarsrc = taropen(tarname, "r", gzip.GzipFile(name, "rb",
359
compresslevel, fileobj))
363
fileobj = __builtin__.file(name, mode+"b")
369
t = taropen(tarname, mode, gzip.GzipFile(tarname, mode+"b", compresslevel, fileobj))
371
raise ReadError, "not a gzip file"
373
# copy existing entries when appending
375
for m in tarsrc.getmembers():
376
t.addfile(m, tarsrc.extractfile(m))
378
t._extfileobj = False
382
def gzopena(name, mode="a", fileobj=None, compresslevel=9):
383
"""Open gzip compressed tar archive name for appending.
385
#if len(mode) > 1 or mode not in "a":
386
# raise ValueError, "mode must be 'r', 'a' or 'w'"
391
raise CompressionError, "gzip module is not available"
393
pre, ext = os.path.splitext(name)
394
pre = os.path.basename(pre)
403
# to emulate an tar.gz file append
404
fileobj = __builtin__.file(name, "wb")
416
t = taropen(tarname, mode, gzip.GzipFile(name, mode + "b",
417
compresslevel, fileobj))
419
raise ReadError, "not a gzip file"
420
t._extfileobj = False
422
def bz2open(name, mode="r", fileobj=None, compresslevel=9):
423
"""Open bzip2 compressed tar archive name for reading or writing.
424
Appending is not allowed.
426
if len(mode) > 1 or mode not in "rw":
427
raise ValueError, "mode must be 'r' or 'w'."
432
raise CompressionError, "bz2 module is not available"
434
pre, ext = os.path.splitext(name)
435
pre = os.path.basename(pre)
442
if fileobj is not None:
443
raise ValueError, "no support for external file objects"
446
t = taropen(tarname, mode, bz2.BZ2File(name, mode + "b"))
448
raise ReadError, "not a bzip2 file"
451
def is_tarfile(name):
452
"""Return True if name points to a tar archive that we
453
are able to handle, else return False.
462
#---------------------------
463
# internal stream interface
464
#---------------------------
466
"""Low-level file object. Supports reading and writing.
467
It is used instead of a regular file object for streaming
471
def __init__(self, name, mode):
474
"w": os.O_WRONLY | os.O_CREAT,
476
if hasattr(os, "O_BINARY"):
478
self.fd = os.open(name, mode)
483
def read(self, size):
484
return os.read(self.fd, size)
490
"""Class that serves as an adapter between TarFile and
491
a stream-like object. The stream-like object only
492
needs to have a read() or write() method and is accessed
493
blockwise. Use of gzip compression is possible.
494
A stream-like object could be for example: sys.stdin,
495
sys.stdout, a socket, a tape device etc.
497
_Stream is intended to be used only internally.
500
def __init__(self, name, mode, type, fileobj, bufsize):
501
"""Construct a _Stream object.
503
self._extfileobj = True
505
fileobj = _LowLevelFile(name, mode)
506
self._extfileobj = False
511
self.fileobj = fileobj
512
self.bufsize = bufsize
521
raise CompressionError, "zlib module is not available"
523
self.crc = zlib.crc32("")
527
self._init_write_gz()
533
def _init_write_gz(self):
534
"""Initialize for writing with gzip compression.
537
self.zlib = zlib.compressobj(9, zlib.DEFLATED,
541
timestamp = struct.pack("<L", long(time.time()))
542
self.__write("\037\213\010\010%s\002\377" % timestamp)
543
if self.name.endswith(".gz"):
544
self.name = self.name[:-3]
545
self.__write(self.name + NUL)
548
"""Write string s to the stream.
550
if self.type == "gz":
552
self.crc = self.zlibmod.crc32(s, self.crc)
553
s = self.zlib.compress(s)
556
def __write(self, s):
557
"""Write string s to the stream if a whole new block
558
is ready to be written.
561
while len(self.buf) > self.bufsize:
562
self.fileobj.write(self.buf[:self.bufsize])
563
self.buf = self.buf[self.bufsize:]
566
"""Close the _Stream object. No operation should be
567
done on it afterwards.
572
if self.mode == "w" and self.buf:
573
if self.type == "gz":
574
self.buf += self.zlib.flush()
575
self.fileobj.write(self.buf)
577
if self.type == "gz":
578
self.fileobj.write(struct.pack("<l", self.crc))
579
self.fileobj.write(struct.pack("<l", self.pos))
581
if not self._extfileobj:
586
def _init_read_gz(self):
587
"""Initialize for reading a gzip compressed fileobj.
589
self.zlib = self.zlibmod.decompressobj(-self.zlibmod.MAX_WBITS)
592
# taken from gzip.GzipFile with some alterations
593
if self.__read(2) != "\037\213":
594
raise ReadError, "not a gzip file"
595
if self.__read(1) != "\010":
596
raise CompressionError, "unsupported compression method"
598
flag = ord(self.__read(1))
602
xlen = ord(self.__read(1)) + 256 * ord(self.__read(1))
607
if not s or s == NUL: break
611
if not s or s == NUL: break
616
"""Return the stream's file pointer position.
620
def seek(self, pos=0):
621
"""Set the stream's file pointer to pos. Negative seeking
624
if pos - self.pos >= 0:
625
self.read(pos - self.pos)
627
raise StreamError, "seeking backwards is not allowed"
630
def read(self, size=None):
631
"""Return the next size number of bytes from the stream.
632
If size is not defined, return all bytes of the stream
638
buf = self._read(self.bufsize)
643
buf = self._read(size)
647
def _read(self, size):
648
"""Return size bytes from the stream. gzip compression is
651
if self.type != "gz":
652
return self.__read(size)
654
while len(self.dbuf) < size:
655
buf = self.__read(1024)
657
self.dbuf += self.zlib.decompress(buf)
658
buf = self.dbuf[:size]
659
self.dbuf = self.dbuf[size:]
662
def __read(self, size):
663
"""Return size bytes from stream. If internal buffer is empty,
664
read another block from the stream.
666
while len(self.buf) < size:
667
buf = self.fileobj.read(self.bufsize)
670
buf = self.buf[:size]
671
self.buf = self.buf[size:]
678
"""Informational class which holds the details about an
679
archive member given by a tar header block.
680
TarInfo instances are returned by TarFile.getmember(),
681
TarFile.getmembers() and TarFile.gettarinfo() and are
682
usually created internally.
685
def __init__(self, name=""):
686
"""Construct a TarInfo instance. name is the optional name
690
self.name = name # member name (dirnames must end with '/')
691
self.mode = 0666 # file permissions
692
self.uid = 0 # user id
693
self.gid = 0 # group id
694
self.size = 0 # file size
695
self.mtime = 0 # modification time
696
self.chksum = 0 # header checksum
697
self.type = REGTYPE # member type
698
self.linkname = "" # link name
699
self.uname = "user" # user name
700
self.gname = "group" # group name
702
self.devminor = 0 #-for use with CHRTYPE and BLKTYPE
703
self.prefix = "" # prefix to filename or holding information
706
self.offset = 0 # the tar header starts here
707
self.offset_data = 0 # the optional file's data starts here
709
# zipfile compatibility
713
"""Return a tar header block as a 512 byte string.
716
if self.isdir() and name[-1:] != "/":
718
# The following code was contributed by Detlef Lannert.
720
for value, fieldsize in (
722
("%07o" % (self.mode & 07777), 8),
723
("%07o" % self.uid, 8),
724
("%07o" % self.gid, 8),
725
("%011o" % self.size, 12),
726
("%011o" % self.mtime, 12),
729
(self.linkname, 100),
734
("%07o" % self.devmajor, 8),
735
("%07o" % self.devminor, 8),
739
parts.append(value + (fieldsize - l) * NUL)
742
chksum = calc_chksum(buf)
743
buf = buf[:148] + "%06o\0" % chksum + buf[155:]
744
buf += (512 - len(buf)) * NUL
749
return self.type in REGULAR_TYPES
753
return self.type == DIRTYPE
755
return self.type == SYMTYPE
757
return self.type == LNKTYPE
759
return self.type == CHRTYPE
761
return self.type == BLKTYPE
763
return self.type == FIFOTYPE
765
return self.type == GNUTYPE_SPARSE
767
return self.type in (CHRTYPE, BLKTYPE, FIFOTYPE)
771
"""The TarFile Class provides an interface to tar archives.
774
debug = 0 # May be set from 0 (no msgs) to 3 (all msgs)
776
dereference = False # If true, add content of linked file to the
777
# tar file, else the link.
779
ignore_zeros = False # If true, skips empty or invalid blocks and
780
# continues processing.
782
errorlevel = 0 # If 0, fatal errors only appear in debug
783
# messages (if debug >= 0). If > 0, errors
784
# are passed to the caller as exceptions.
786
posix = True # If True, generates POSIX.1-1990-compliant
787
# archives (no GNU extensions!)
789
def __init__(self, name=None, mode="r", fileobj=None):
792
if len(mode) > 1 or mode not in "raw":
793
raise ValueError, "mode must be 'r', 'a' or 'w'"
795
self.mode = {"r": "rb", "a": "r+b", "w": "wb"}[mode]
798
fileobj = __builtin__.file(self.name, self.mode)
799
self._extfileobj = False
801
if self.name is None and hasattr(fileobj, "name"):
802
self.name = fileobj.name
803
if hasattr(fileobj, "mode"):
804
self.mode = fileobj.mode
805
self._extfileobj = True
806
self.fileobj = fileobj
808
# Init datastructures
810
self.members = [] # list of members as TarInfo instances
811
self.membernames = [] # names of members
812
self.chunks = [0] # chunk cache
813
self._loaded = False # flag if all members have been read
814
self.offset = 0L # current position in the archive file
815
self.inodes = {} # dictionary caching the inodes of
816
# archive members already added
818
if self._mode == "r":
819
self.firstmember = None
820
self.firstmember = self.next()
822
if self._mode == "a":
823
# Move to the end of the archive,
824
# before the first empty block.
825
self.firstmember = None
828
tarinfo = self.next()
833
self.fileobj.seek(- BLOCKSIZE, 1)
838
"""Close the TarFile instance and do some cleanup.
843
if self._mode in "aw":
844
self.fileobj.write(NUL * (BLOCKSIZE * 2))
845
self.offset += (BLOCKSIZE * 2)
846
# fill up the end with zero-blocks
847
# (like option -b20 for tar does)
848
blocks, remainder = divmod(self.offset, RECORDSIZE)
850
self.fileobj.write(NUL * (RECORDSIZE - remainder))
852
if not self._extfileobj:
857
"""Return the next member from the archive. Return None if the
858
end of the archive is reached. Normally there is no need to
859
use this method directly, because the TarFile class can be
863
if self.firstmember is not None:
865
self.firstmember = None
868
# Read the next block.
869
self.fileobj.seek(self.chunks[-1])
871
buf = self.fileobj.read(BLOCKSIZE)
875
tarinfo = self._buftoinfo(buf)
877
if self.ignore_zeros:
878
if buf.count(NUL) == BLOCKSIZE:
882
self._dbg(2, "0x%X: %s block\n" % (self.offset, adj))
883
self.offset += BLOCKSIZE
886
# Block is empty or unreadable.
887
if self.chunks[-1] == 0:
888
# If the first block is invalid. That does not
889
# look like a tar archive we can handle.
890
raise ReadError,"empty, unreadable or compressed file"
894
# If the TarInfo instance contains a GNUTYPE longname or longlink
895
# statement, we must process this first.
896
if tarinfo.type in (GNUTYPE_LONGLINK, GNUTYPE_LONGNAME):
897
tarinfo = self._proc_gnulong(tarinfo, tarinfo.type)
899
if tarinfo.isreg() and tarinfo.name[:-1] == "/":
900
# some old tar programs don't know DIRTYPE
901
tarinfo.type = DIRTYPE
903
if tarinfo.issparse():
904
# Sparse files need some care,
905
# due to the possible extra headers.
906
tarinfo.offset = self.offset
907
self.offset += BLOCKSIZE
908
origsize = self._proc_sparse(tarinfo)
909
tarinfo.offset_data = self.offset
910
blocks, remainder = divmod(tarinfo.size, BLOCKSIZE)
913
self.offset += blocks * BLOCKSIZE
914
tarinfo.size = origsize
916
tarinfo.offset = self.offset
917
self.offset += BLOCKSIZE
918
tarinfo.offset_data = self.offset
920
# Skip the following data blocks.
921
blocks, remainder = divmod(tarinfo.size, BLOCKSIZE)
924
self.offset += blocks * BLOCKSIZE
926
self.members.append(tarinfo)
927
self.membernames.append(tarinfo.name)
928
self.chunks.append(self.offset)
931
def getmember(self, name):
932
"""Return a TarInfo instance for member name.
935
if name not in self.membernames and not self._loaded:
937
if name not in self.membernames:
938
raise KeyError, "filename %r not found" % name
939
return self._getmember(name)
941
def getmembers(self):
942
"""Return all members in the archive as a list of TarInfo
946
if not self._loaded: # if we want to obtain a list of
947
self._load() # all members, we first have to
948
# scan the whole archive.
952
"""Return all members in the archive as a list of their names.
957
return self.membernames
959
def gettarinfo(self, name, arcname=None):
960
"""Create and return a TarInfo object that represents the existing
961
physical file name. The TarInfo object and the file's data can
962
be added to the TarFile using addfile(). arcname specifies the
963
pathname under which the member shall be stored in the archive.
967
# Building the name of the member in the archive.
968
# Backward slashes are converted to forward slashes,
969
# Absolute paths are turned to relative paths.
972
arcname = normpath(arcname)
973
drv, arcname = os.path.splitdrive(arcname)
974
while arcname[0:1] == "/":
975
arcname = arcname[1:]
977
# Now, fill the TarInfo instance with
978
# information specific for the file.
981
# Use os.stat or os.lstat, depending on platform
982
# and if symlinks shall be resolved.
983
if hasattr(os, "lstat") and not self.dereference:
984
statres = os.lstat(name)
986
statres = os.stat(name)
989
stmd = statres.st_mode
990
if stat.S_ISREG(stmd):
991
inode = (statres.st_ino, statres.st_dev)
992
if inode in self.inodes.keys() and not self.dereference:
993
# Is it a hardlink to an already
996
linkname = self.inodes[inode]
998
# The inode is added only if its valid.
999
# For win32 it is always 0.
1002
self.inodes[inode] = arcname
1003
elif stat.S_ISDIR(stmd):
1005
if arcname[-1:] != "/":
1007
elif stat.S_ISFIFO(stmd):
1009
elif stat.S_ISLNK(stmd):
1011
linkname = os.readlink(name)
1012
elif stat.S_ISCHR(stmd):
1014
elif stat.S_ISBLK(stmd):
1019
# Fill the TarInfo instance with all
1020
# information we can get.
1021
tarinfo.name = arcname
1023
tarinfo.uid = statres.st_uid
1024
tarinfo.gid = statres.st_gid
1025
tarinfo.size = statres.st_size
1026
tarinfo.mtime = statres.st_mtime
1028
tarinfo.linkname = linkname
1031
tarinfo.uname = pwd.getpwuid(tarinfo.uid)[0]
1036
tarinfo.gname = grp.getgrgid(tarinfo.gid)[0]
1040
if type in (CHRTYPE, BLKTYPE):
1041
if hasattr(os, "major") and hasattr(os, "minor"):
1042
tarinfo.devmajor = os.major(statres.st_rdev)
1043
tarinfo.devminor = os.minor(statres.st_rdev)
1046
def list(self, verbose=1):
1047
"""Print a formatted listing of TarFile's contents
1052
for tarinfo in self:
1054
print filemode(tarinfo.mode),
1055
print tarinfo.uname + "/" + tarinfo.gname,
1056
if tarinfo.ischr() or tarinfo.isblk():
1057
print "%10s" % (str(tarinfo.devmajor) + "," + str(tarinfo.devminor)),
1059
print "%10d" % tarinfo.size,
1060
print "%d-%02d-%02d %02d:%02d:%02d" \
1061
% time.localtime(tarinfo.mtime)[:6],
1067
print "->", tarinfo.linkname,
1069
print "link to", tarinfo.linkname,
1072
def add(self, name, arcname=None, recursive=True):
1073
"""Add a file to the TarFile. Directories are added
1074
recursively by default.
1081
# Skip if somebody tries to archive the archive...
1082
if self.name is not None \
1083
and os.path.abspath(name) == os.path.abspath(self.name):
1084
self._dbg(2, "tarfile: Skipped %r\n" % name)
1087
# Special case: The user wants to add the current
1088
# working directory.
1093
for f in os.listdir("."):
1094
self.add(f, os.path.join(arcname, f))
1097
self._dbg(1, "%s\n" % name)
1099
# Create a TarInfo instance from the file.
1100
tarinfo = self.gettarinfo(name, arcname)
1103
self._dbg(1, "tarfile: Unsupported type %r\n" % name)
1106
# Append the tar header and data to the archive.
1108
f = __builtin__.file(name, "rb")
1109
self.addfile(tarinfo, f)
1112
if tarinfo.type in (LNKTYPE, SYMTYPE, FIFOTYPE, CHRTYPE, BLKTYPE):
1114
self.addfile(tarinfo)
1117
self.addfile(tarinfo)
1119
for f in os.listdir(name):
1120
self.add(os.path.join(name, f), os.path.join(arcname, f))
1122
def addfile(self, tarinfo, fileobj=None):
1123
"""Read from fileobj and add the data to the TarFile.
1124
File information and the number of bytes to read is
1129
tarinfo.name = normpath(tarinfo.name)
1130
if tarinfo.linkname:
1131
tarinfo.linkname = normpath(tarinfo.linkname)
1133
if tarinfo.size > MAXSIZE_MEMBER:
1134
raise ValueError, "file is too large (>8GB)"
1136
if len(tarinfo.linkname) > LENGTH_LINK:
1138
raise ValueError, "linkname is too long (>%d)" \
1141
self._create_gnulong(tarinfo.linkname, GNUTYPE_LONGLINK)
1142
tarinfo.linkname = tarinfo.linkname[:LENGTH_LINK -1]
1143
self._dbg(2, "tarfile: Created GNU tar extension LONGLINK\n")
1145
if len(tarinfo.name) > LENGTH_NAME:
1147
prefix = tarinfo.name[:LENGTH_PREFIX + 1]
1148
while prefix and prefix[-1] != "/":
1149
prefix = prefix[:-1]
1151
name = tarinfo.name[len(prefix):]
1152
prefix = prefix[:-1]
1154
if not prefix or len(name) > LENGTH_NAME:
1155
raise ValueError, "name is too long (>%d)" \
1159
tarinfo.prefix = prefix
1161
self._create_gnulong(tarinfo.name, GNUTYPE_LONGNAME)
1162
tarinfo.name = tarinfo.name[:LENGTH_NAME - 1]
1163
self._dbg(2, "tarfile: Created GNU tar extension LONGNAME\n")
1165
header = tarinfo.getheader()
1166
self.fileobj.write(header)
1167
self.offset += BLOCKSIZE
1169
# If there's data to follow, append it.
1170
if fileobj is not None:
1171
copyfileobj(fileobj, self.fileobj, tarinfo.size)
1172
blocks, remainder = divmod(tarinfo.size, BLOCKSIZE)
1174
self.fileobj.write(NUL * (BLOCKSIZE - remainder))
1176
self.offset += blocks * BLOCKSIZE
1178
def extractfile(self, member):
1179
"""Extract member from the TarFile and return a file-like
1180
object. member may be a name or a TarInfo object.
1184
if isinstance(member, TarInfo):
1187
tarinfo = self.getmember(member)
1190
return _FileObject(self, tarinfo)
1192
elif tarinfo.type not in SUPPORTED_TYPES:
1193
# If a member's type is unknown, it is treated as a
1195
return _FileObject(self, tarinfo)
1197
elif tarinfo.islnk() or tarinfo.issym():
1198
if isinstance(self.fileobj, _Stream):
1199
# A small but ugly workaround for the case that someone tries
1200
# to extract a (sym)link as a file-object from a non-seekable
1201
# stream of tar blocks.
1202
raise StreamError, "cannot extract (sym)link as file object"
1204
# A (sym)link's file object is it's target's file object.
1205
return self.extractfile(self._getmember(tarinfo.linkname,
1208
# If there's no data associated with the member (directory, chrdev,
1209
# blkdev, etc.), return None instead of a file object.
1212
def extract(self, member, path=""):
1213
"""Extract member from the TarFile and write it to current
1214
working directory using its full pathname. If path is
1215
given, it is prepended to the pathname. member may be a
1216
name or a TarInfo object.
1220
if isinstance(member, TarInfo):
1223
tarinfo = self.getmember(member)
1225
self._dbg(1, tarinfo.name)
1227
self._extract_member(tarinfo, os.path.join(path, tarinfo.name))
1228
except EnvironmentError, e:
1229
if self.errorlevel > 0:
1232
if e.filename is None:
1233
self._dbg(1, "\ntarfile: %s" % e.strerror)
1235
self._dbg(1, "\ntarfile: %s %r" % (e.strerror, e.filename))
1237
if self.errorlevel > 1:
1240
self._dbg(1, "\ntarfile: %s" % e)
1243
def _extract_member(self, tarinfo, targetpath):
1244
"""Extract the TarInfo object tarinfo to a physical
1245
file called targetpath.
1247
# Fetch the TarInfo instance for the given name
1248
# and build the destination pathname, replacing
1249
# forward slashes to platform specific separators.
1250
if targetpath[-1:] == "/":
1251
targetpath = targetpath[:-1]
1252
targetpath = os.path.normpath(targetpath)
1254
# Create all upper directories.
1255
upperdirs = os.path.dirname(targetpath)
1256
if upperdirs and not os.path.exists(upperdirs):
1261
ti.mtime = tarinfo.mtime
1262
ti.uid = tarinfo.uid
1263
ti.gid = tarinfo.gid
1264
ti.uname = tarinfo.uname
1265
ti.gname = tarinfo.gname
1267
self._extract_member(ti, ti.name)
1272
self._makefile(tarinfo, targetpath)
1273
elif tarinfo.isdir():
1274
self._makedir(tarinfo, targetpath)
1275
elif tarinfo.isfifo():
1276
self._makefifo(tarinfo, targetpath)
1277
elif tarinfo.ischr() or tarinfo.isblk():
1278
self._makedev(tarinfo, targetpath)
1279
elif tarinfo.islnk() or tarinfo.issym():
1280
self._makelink(tarinfo, targetpath)
1282
self._makefile(tarinfo, targetpath)
1283
if tarinfo.type not in SUPPORTED_TYPES:
1284
self._dbg(1, "\ntarfile: Unknown file type %r, " \
1285
"extracted as regular file." % tarinfo.type)
1287
if not tarinfo.issym():
1288
self._chown(tarinfo, targetpath)
1289
self._chmod(tarinfo, targetpath)
1290
self._utime(tarinfo, targetpath)
1292
def _makedir(self, tarinfo, targetpath):
1293
"""Make a directory called targetpath from tarinfo.
1296
os.mkdir(targetpath)
1297
except EnvironmentError, e:
1298
if e.errno != errno.EEXIST:
1301
def _makefile(self, tarinfo, targetpath):
1302
"""Make a file called targetpath from tarinfo.
1304
source = self.extractfile(tarinfo)
1305
target = __builtin__.file(targetpath, "wb")
1306
copyfileobj(source, target)
1310
def _makefifo(self, tarinfo, targetpath):
1311
"""Make a fifo called targetpath from tarinfo.
1313
if hasattr(os, "mkfifo"):
1314
os.mkfifo(targetpath)
1316
raise TarError, "fifo not supported by system"
1318
def _makedev(self, tarinfo, targetpath):
1319
"""Make a character or block device called targetpath
1322
if not hasattr(os, "mknod"):
1323
raise TarError, "special devices not supported by system"
1327
mode |= stat.S_IFBLK
1329
mode |= stat.S_IFCHR
1331
# XXX This if statement should go away when
1332
# python-2.3a0-devicemacros patch succeeds.
1333
if hasattr(os, "makedev"):
1334
os.mknod(targetpath, mode,
1335
os.makedev(tarinfo.devmajor, tarinfo.devminor))
1337
os.mknod(targetpath, mode,
1338
tarinfo.devmajor, tarinfo.devminor)
1340
def _makelink(self, tarinfo, targetpath):
1341
"""Make a (symbolic) link called targetpath from tarinfo.
1342
If it cannot be created (platform limitation), we try
1343
to make a copy of the referenced file instead of a link.
1345
linkpath = tarinfo.linkname
1346
self._dbg(1, " -> %s" % linkpath)
1349
os.symlink(linkpath, targetpath)
1351
os.link(linkpath, targetpath)
1352
except AttributeError:
1354
linkpath = os.path.join(os.path.dirname(tarinfo.name),
1356
linkpath = normpath(linkpath)
1359
self._extract_member(self.getmember(linkpath), targetpath)
1360
except (EnvironmentError, KeyError), e:
1361
linkpath = os.path.normpath(linkpath)
1363
shutil.copy2(linkpath, targetpath)
1364
except EnvironmentError, e:
1365
raise IOError, "link could not be created"
1367
def _chown(self, tarinfo, targetpath):
1368
"""Set owner of targetpath according to tarinfo.
1370
if pwd and os.geteuid() == 0:
1371
# We have to be root to do so.
1373
g = grp.getgrnam(tarinfo.gname)[2]
1376
g = grp.getgrgid(tarinfo.gid)[2]
1380
u = pwd.getpwnam(tarinfo.uname)[2]
1383
u = pwd.getpwuid(tarinfo.uid)[2]
1387
if tarinfo.issym() and hasattr(os, "lchown"):
1388
os.lchown(targetpath, u, g)
1390
os.chown(targetpath, u, g)
1391
except EnvironmentError, e:
1392
raise TarError, "could not change owner"
1394
def _chmod(self, tarinfo, targetpath):
1395
"""Set file permissions of targetpath according to tarinfo.
1398
os.chmod(targetpath, tarinfo.mode)
1399
except EnvironmentError, e:
1400
raise TarError, "could not change mode"
1402
def _utime(self, tarinfo, targetpath):
1403
"""Set modification time of targetpath according to tarinfo.
1406
os.utime(targetpath, (tarinfo.mtime, tarinfo.mtime))
1407
except EnvironmentError, e:
1408
raise TarError, "could not change modification time"
1410
def _getmember(self, name, tarinfo=None):
1411
"""Find an archive member by name from bottom to top.
1412
If tarinfo is given, it is used as the starting point.
1415
end = len(self.members)
1417
end = self.members.index(tarinfo)
1419
for i in xrange(end - 1, -1, -1):
1420
if name == self.membernames[i]:
1421
return self.members[i]
1424
"""Read through the entire archive file and look for readable
1428
tarinfo = self.next()
1433
def _check(self, mode):
1434
"""Check if TarFile is still open, and if the operation's mode
1435
corresponds to TarFile's mode.
1437
if self.fileobj is None:
1438
raise IOError, "%s is closed" % self.__class__.__name__
1439
if self._mode not in mode:
1440
raise IOError, "bad operation for mode %r" % self._mode
1443
"""Provide an iterator object.
1446
return iter(self.members)
1448
return TarIter(self)
1450
def _buftoinfo(self, buf):
1451
"""Transform a 512 byte block to a TarInfo object.
1454
tarinfo.name = nts(buf[0:100])
1455
tarinfo.mode = int(buf[100:108], 8)
1456
tarinfo.uid = int(buf[108:116],8)
1457
tarinfo.gid = int(buf[116:124],8)
1458
tarinfo.size = long(buf[124:136], 8)
1459
tarinfo.mtime = long(buf[136:148], 8)
1460
tarinfo.chksum = int(buf[148:156], 8)
1461
tarinfo.type = buf[156:157]
1462
tarinfo.linkname = nts(buf[157:257])
1463
tarinfo.uname = nts(buf[265:297])
1464
tarinfo.gname = nts(buf[297:329])
1466
tarinfo.devmajor = int(buf[329:337], 8)
1467
tarinfo.devminor = int(buf[337:345], 8)
1469
tarinfo.devmajor = tarinfo.devmajor = 0
1471
prefix = buf[345:500]
1472
while prefix and prefix[-1] == NUL:
1473
prefix = prefix[:-1]
1474
if len(prefix.split(NUL)) == 1:
1475
tarinfo.prefix = prefix
1476
tarinfo.name = normpath(os.path.join(tarinfo.prefix, tarinfo.name))
1478
tarinfo.prefix = buf[345:500]
1480
if tarinfo.chksum != calc_chksum(buf):
1482
self._dbg(1, "tarfile: Bad Checksum\n")
1485
def _proc_gnulong(self, tarinfo, type):
1486
"""Evaluate the blocks that hold a GNU longname
1492
count = tarinfo.size
1494
block = self.fileobj.read(BLOCKSIZE)
1496
self.offset += BLOCKSIZE
1499
if type == GNUTYPE_LONGNAME:
1501
if type == GNUTYPE_LONGLINK:
1504
buf = self.fileobj.read(BLOCKSIZE)
1505
tarinfo = self._buftoinfo(buf)
1506
if name is not None:
1508
if linkname is not None:
1509
tarinfo.linkname = linkname
1510
self.offset += BLOCKSIZE
1513
def _create_gnulong(self, name, type):
1514
"""Write a GNU longname/longlink member to the TarFile.
1515
It consists of an extended tar header, with the length
1516
of the longname as size, followed by data blocks,
1517
which contain the longname as a null terminated string.
1520
tarinfo.name = "././@LongLink"
1523
tarinfo.size = len(name)
1525
# write extended header
1526
self.fileobj.write(tarinfo.getheader())
1528
self.fileobj.write(name)
1529
blocks, remainder = divmod(tarinfo.size, BLOCKSIZE)
1531
self.fileobj.write(NUL * (BLOCKSIZE - remainder))
1533
self.offset += blocks * BLOCKSIZE
1535
def _proc_sparse(self, tarinfo):
1536
"""Analyze a GNU sparse header plus extra headers.
1538
buf = tarinfo.getheader()
1543
# There are 4 possible sparse structs in the
1547
offset = int(buf[pos:pos + 12], 8)
1548
numbytes = int(buf[pos + 12:pos + 24], 8)
1551
if offset > lastpos:
1552
sp.append(_hole(lastpos, offset - lastpos))
1553
sp.append(_data(offset, numbytes, realpos))
1555
lastpos = offset + numbytes
1558
isextended = ord(buf[482])
1559
origsize = int(buf[483:495], 8)
1561
# If the isextended flag is given,
1562
# there are extra headers to process.
1563
while isextended == 1:
1564
buf = self.fileobj.read(BLOCKSIZE)
1565
self.offset += BLOCKSIZE
1569
offset = int(buf[pos:pos + 12], 8)
1570
numbytes = int(buf[pos + 12:pos + 24], 8)
1573
if offset > lastpos:
1574
sp.append(_hole(lastpos, offset - lastpos))
1575
sp.append(_data(offset, numbytes, realpos))
1577
lastpos = offset + numbytes
1579
isextended = ord(buf[504])
1581
if lastpos < origsize:
1582
sp.append(_hole(lastpos, origsize - lastpos))
1587
def _dbg(self, level, msg):
1588
if level <= self.debug:
1589
sys.stdout.write(msg)
1595
for tarinfo in TarFile(...):
1599
def __init__(self, tarfile):
1600
"""Construct a TarIter instance.
1602
self.tarfile = tarfile
1604
"""Return iterator object.
1608
"""Return the next item using TarFile's next() method.
1609
When all members have been read, set TarFile as _loaded.
1611
tarinfo = self.tarfile.next()
1613
self.tarfile._loaded = True
1618
# Helper classes for sparse file support
1620
"""Base class for _data and _hole.
1622
def __init__(self, offset, size):
1623
self.offset = offset
1625
def __contains__(self, offset):
1626
return self.offset <= offset < self.offset + self.size
1628
class _data(_section):
1629
"""Represent a data section in a sparse file.
1631
def __init__(self, offset, size, realpos):
1632
_section.__init__(self, offset, size)
1633
self.realpos = realpos
1635
class _hole(_section):
1636
"""Represent a hole section in a sparse file.
1640
class _ringbuffer(list):
1641
"""Ringbuffer class which increases performance
1642
over a regular list.
1646
def find(self, offset):
1653
if idx == len(self):
1662
"""File-like object for reading an archive member.
1663
Is returned by TarFile.extractfile(). Support for
1664
sparse files included.
1667
def __init__(self, tarfile, tarinfo):
1668
self.fileobj = tarfile.fileobj
1669
self.name = tarinfo.name
1672
self.offset = tarinfo.offset_data
1673
self.size = tarinfo.size
1675
self.linebuffer = ""
1676
if tarinfo.issparse():
1677
self.sparse = tarinfo.sparse
1678
self.read = self._readsparse
1680
self.read = self._readnormal
1682
def readline(self, size=-1):
1683
"""Read a line with approx. size. If size is negative,
1684
read a whole line. readline() and read() must not
1690
nl = self.linebuffer.find("\n")
1694
size -= len(self.linebuffer)
1696
buf = self.read(min(size, 100))
1699
self.linebuffer += buf
1703
nl = self.linebuffer.find("\n")
1706
self.linebuffer = ""
1708
buf = self.linebuffer[:nl]
1709
self.linebuffer = self.linebuffer[nl + 1:]
1710
while buf[-1:] == "\r":
1714
def readlines(self):
1715
"""Return a list with all (following) lines.
1719
line = self.readline()
1724
def _readnormal(self, size=None):
1725
"""Read operation for regular files.
1728
raise ValueError, "file is closed"
1729
self.fileobj.seek(self.offset + self.pos)
1730
bytesleft = self.size - self.pos
1732
bytestoread = bytesleft
1734
bytestoread = min(size, bytesleft)
1735
self.pos += bytestoread
1736
return self.fileobj.read(bytestoread)
1738
def _readsparse(self, size=None):
1739
"""Read operation for sparse files.
1742
raise ValueError, "file is closed"
1745
size = self.size - self.pos
1749
buf = self._readsparsesection(size)
1756
def _readsparsesection(self, size):
1757
"""Read a single section of a sparse file.
1759
section = self.sparse.find(self.pos)
1764
toread = min(size, section.offset + section.size - self.pos)
1765
if isinstance(section, _data):
1766
realpos = section.realpos + self.pos - section.offset
1768
self.fileobj.seek(self.offset + realpos)
1769
return self.fileobj.read(toread)
1775
"""Return the current file position.
1779
def seek(self, pos, whence=0):
1780
"""Seek to a position in the file.
1782
self.linebuffer = ""
1784
self.pos = min(max(pos, 0), self.size)
1787
self.pos = max(self.pos + pos, 0)
1789
self.pos = min(self.pos + pos, self.size)
1791
self.pos = max(min(self.size + pos, self.size), 0)
1794
"""Close the file object.
1799
#---------------------------------------------
1800
# zipfile compatible TarFile class
1802
# for details consult zipfile's documentation
1803
#---------------------------------------------
1806
TAR_PLAIN = 0 # zipfile.ZIP_STORED
1807
TAR_GZIPPED = 8 # zipfile.ZIP_DEFLATED
1808
class TarFileCompat:
1809
"""TarFile class compatible with standard module zipfile's
1812
def __init__(self, file, mode="r", compression=TAR_PLAIN):
1813
if not isinstance(file, (type(''), type(u''))) and hasattr(file, 'read'):
1819
self.compression = compression
1821
if compression == TAR_PLAIN:
1822
self.tarfile = open(file, mode, fileobj=fileobj)
1823
elif compression == TAR_GZIPPED:
1825
self.tarfile = gzopen(file, mode, fileobj=fileobj)
1827
self.tarfile = gzopen(file, mode, fileobj=fileobj)
1829
raise ValueError, "unknown compression constant"
1830
if mode[0:1] == "r":
1832
members = self.tarfile.getmembers()
1833
for i in range(len(members)):
1836
m.file_size = m.size
1837
m.date_time = time.gmtime(m.mtime)[:6]
1840
return map(lambda m: m.name, self.infolist())
1842
return filter(lambda m: m.type in REGULAR_TYPES,
1843
self.tarfile.getmembers())
1848
def getinfo(self, name):
1849
return self.tarfile.getmember(name)
1850
def read(self, name):
1851
return self.tarfile.extractfile(self.tarfile.getmember(name)).read()
1852
def write(self, filename, arcname=None, compress_type=None):
1853
self.tarfile.add(filename, arcname)
1854
def writestr(self, zinfo, bytes):
1856
zinfo.name = zinfo.filename
1857
zinfo.size = zinfo.file_size
1858
zinfo.mtime = calendar.timegm(zinfo.date_time)
1859
self.tarfile.addfile(zinfo, cStringIO.StringIO(bytes))
1861
self.tarfile.close()
1862
#class TarFileCompat
1864
if __name__ == "__main__":
1865
# a "light-weight" implementation of GNUtar ;-)
1867
Usage: %s [options] [files]
1869
-h display this help message
1873
-r append to an existing archive
1877
-t list archive contents
1879
-f FILENAME use archive FILENAME, else STDOUT (-c)
1881
-z filter archive through gzip
1883
-C DIRNAME with opt -x: extract to directory DIRNAME
1884
with opt -c, -r: put files to archive under DIRNAME
1890
--posix create a POSIX 1003.1-1990 compliant archive
1892
wildcards *, ?, [seq], [!seq] are accepted.
1897
opts, args = getopt.getopt(sys.argv[1:], "htcxrzjf:C:vq", ("posix",))
1898
except getopt.GetoptError, e:
1911
if o == "-t": mode = "l" # list archive
1912
if o == "-c": mode = "w" # write to archive
1913
if o == "-r": mode = "a" # append to archive
1914
if o == "-x": mode = "r" # extract from archive
1915
if o == "-f": file = a # specify filename else use stdout
1916
if o == "-C": dir = a # change to dir
1917
if o == "-z": comp = "gz" # filter through gzip
1918
if o == "-j": comp = "bz2" # filter through bzip2
1919
if o == "-v": debug = 2 # verbose mode
1920
if o == "-q": debug = 0 # quiet mode
1921
if o == "--posix": posix = True # create posix compatible archive
1922
if o == "-h": # help message
1930
mode = "%s:%s" % (mode, comp)
1932
if not file or file == "-":
1937
# If under Win32, set stdout to binary.
1940
msvcrt.setmode(1, os.O_BINARY)
1943
tarfile = open("sys.stdout.tar", "%s:%s" % (mode, comp), sys.stdout)
1946
tarfile = open(file, "r" + mode[1:])
1948
tarfile = open(file, mode)
1950
tarfile.debug = debug
1951
tarfile.posix = posix
1956
for tarinfo in tarfile:
1957
tarfile.extract(tarinfo, dir)
1958
elif mode[0] == "l":
1962
files = glob.glob(arg)
1966
except Exception, e: