2
#-------------------------------------------------------------------
4
#-------------------------------------------------------------------
5
# Copyright (C) 2002 Lars Gustaebel <lars@gustaebel.de>
8
# Permission is hereby granted, free of charge, to any person
9
# obtaining a copy of this software and associated documentation
10
# files (the "Software"), to deal in the Software without
11
# restriction, including without limitation the rights to use,
12
# copy, modify, merge, publish, distribute, sublicense, and/or sell
13
# copies of the Software, and to permit persons to whom the
14
# Software is furnished to do so, subject to the following
17
# The above copyright notice and this permission notice shall be
18
# included in all copies or substantial portions of the Software.
20
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
21
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
22
# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
23
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
24
# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
25
# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
26
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
27
# OTHER DEALINGS IN THE SOFTWARE.
29
"""Read from and write to tar format archives.
32
__version__ = "$Revision: 70525 $"
35
__author__ = "Lars Gust\u00e4bel (lars@gustaebel.de)"
36
__date__ = "$Date: 2009-03-22 21:34:29 +0100 (So, 22. MƤr 2009) $"
37
__cvsid__ = "$Id: tarfile.py 70525 2009-03-22 20:34:29Z lars.gustaebel $"
38
__credits__ = "Gustavo Niemeyer, Niels Gust\u00e4bel, Richard Townsend."
53
if sys.platform == 'mac':
54
# This module needs work for MacOS9, especially in the area of pathname
55
# handling. In many places it is assumed a simple substitution of / by the
56
# local os.path.sep is good enough to convert pathnames, but this does not
57
# work with the mac rooted:path:name versus :nonrooted:path:name syntax
58
raise ImportError("tarfile does not work for platform==mac")
65
# from tarfile import *
66
__all__ = ["TarFile", "TarInfo", "is_tarfile", "TarError"]
68
from builtins import open as _open # Since 'open' is TarFile.open
70
#---------------------------------------------------------
72
#---------------------------------------------------------
73
NUL = b"\0" # the null character
74
BLOCKSIZE = 512 # length of processing blocks
75
RECORDSIZE = BLOCKSIZE * 20 # length of records
76
GNU_MAGIC = b"ustar \0" # magic gnu tar string
77
POSIX_MAGIC = b"ustar\x0000" # magic posix tar string
79
LENGTH_NAME = 100 # maximum length of a filename
80
LENGTH_LINK = 100 # maximum length of a linkname
81
LENGTH_PREFIX = 155 # maximum length of the prefix field
83
REGTYPE = b"0" # regular file
84
AREGTYPE = b"\0" # regular file
85
LNKTYPE = b"1" # link (inside tarfile)
86
SYMTYPE = b"2" # symbolic link
87
CHRTYPE = b"3" # character special device
88
BLKTYPE = b"4" # block special device
89
DIRTYPE = b"5" # directory
90
FIFOTYPE = b"6" # fifo special device
91
CONTTYPE = b"7" # contiguous file
93
GNUTYPE_LONGNAME = b"L" # GNU tar longname
94
GNUTYPE_LONGLINK = b"K" # GNU tar longlink
95
GNUTYPE_SPARSE = b"S" # GNU tar sparse file
97
XHDTYPE = b"x" # POSIX.1-2001 extended header
98
XGLTYPE = b"g" # POSIX.1-2001 global header
99
SOLARIS_XHDTYPE = b"X" # Solaris extended header
101
USTAR_FORMAT = 0 # POSIX.1-1988 (ustar) format
102
GNU_FORMAT = 1 # GNU tar format
103
PAX_FORMAT = 2 # POSIX.1-2001 (pax) format
104
DEFAULT_FORMAT = GNU_FORMAT
106
#---------------------------------------------------------
108
#---------------------------------------------------------
109
# File types that tarfile supports:
110
SUPPORTED_TYPES = (REGTYPE, AREGTYPE, LNKTYPE,
111
SYMTYPE, DIRTYPE, FIFOTYPE,
112
CONTTYPE, CHRTYPE, BLKTYPE,
113
GNUTYPE_LONGNAME, GNUTYPE_LONGLINK,
116
# File types that will be treated as a regular file.
117
REGULAR_TYPES = (REGTYPE, AREGTYPE,
118
CONTTYPE, GNUTYPE_SPARSE)
120
# File types that are part of the GNU tar format.
121
GNU_TYPES = (GNUTYPE_LONGNAME, GNUTYPE_LONGLINK,
124
# Fields from a pax header that override a TarInfo attribute.
125
PAX_FIELDS = ("path", "linkpath", "size", "mtime",
126
"uid", "gid", "uname", "gname")
128
# Fields in a pax header that are numbers, all other fields
129
# are treated as strings.
130
PAX_NUMBER_FIELDS = {
139
#---------------------------------------------------------
140
# Bits used in the mode field, values in octal.
141
#---------------------------------------------------------
142
S_IFLNK = 0o120000 # symbolic link
143
S_IFREG = 0o100000 # regular file
144
S_IFBLK = 0o060000 # block device
145
S_IFDIR = 0o040000 # directory
146
S_IFCHR = 0o020000 # character device
147
S_IFIFO = 0o010000 # fifo
149
TSUID = 0o4000 # set UID on execution
150
TSGID = 0o2000 # set GID on execution
151
TSVTX = 0o1000 # reserved
153
TUREAD = 0o400 # read by owner
154
TUWRITE = 0o200 # write by owner
155
TUEXEC = 0o100 # execute/search by owner
156
TGREAD = 0o040 # read by group
157
TGWRITE = 0o020 # write by group
158
TGEXEC = 0o010 # execute/search by group
159
TOREAD = 0o004 # read by other
160
TOWRITE = 0o002 # write by other
161
TOEXEC = 0o001 # execute/search by other
163
#---------------------------------------------------------
165
#---------------------------------------------------------
166
ENCODING = sys.getfilesystemencoding()
170
#---------------------------------------------------------
171
# Some useful functions
172
#---------------------------------------------------------
174
def stn(s, length, encoding, errors):
175
"""Convert a string to a null-terminated bytes object.
177
s = s.encode(encoding, errors)
178
return s[:length] + (length - len(s)) * NUL
180
def nts(s, encoding, errors):
181
"""Convert a null-terminated bytes object to a string.
186
return s.decode(encoding, errors)
189
"""Convert a number field to a python number.
191
# There are two possible encodings for a number field, see
193
if s[0] != chr(0o200):
195
n = int(nts(s, "ascii", "strict") or "0", 8)
197
raise HeaderError("invalid header")
200
for i in range(len(s) - 1):
205
def itn(n, digits=8, format=DEFAULT_FORMAT):
206
"""Convert a python number to a number field.
208
# POSIX 1003.1-1988 requires numbers to be encoded as a string of
209
# octal digits followed by a null-byte, this allows values up to
210
# (8**(digits-1))-1. GNU tar allows storing numbers greater than
211
# that if necessary. A leading 0o200 byte indicates this particular
212
# encoding, the following digits-1 bytes are a big-endian
213
# representation. This allows values up to (256**(digits-1))-1.
214
if 0 <= n < 8 ** (digits - 1):
215
s = bytes("%0*o" % (digits - 1, n), "ascii") + NUL
217
if format != GNU_FORMAT or n >= 256 ** (digits - 1):
218
raise ValueError("overflow in number field")
221
# XXX We mimic GNU tar's behaviour with negative numbers,
222
# this could raise OverflowError.
223
n = struct.unpack("L", struct.pack("l", n))[0]
226
for i in range(digits - 1):
227
s.insert(0, n & 0o377)
232
def calc_chksums(buf):
233
"""Calculate the checksum for a member's header by summing up all
234
characters except for the chksum field which is treated as if
235
it was filled with spaces. According to the GNU tar sources,
236
some tars (Sun and NeXT) calculate chksum with signed char,
237
which will be different if there are chars in the buffer with
238
the high bit set. So we calculate two checksums, unsigned and
241
unsigned_chksum = 256 + sum(struct.unpack("148B", buf[:148]) + struct.unpack("356B", buf[156:512]))
242
signed_chksum = 256 + sum(struct.unpack("148b", buf[:148]) + struct.unpack("356b", buf[156:512]))
243
return unsigned_chksum, signed_chksum
245
def copyfileobj(src, dst, length=None):
246
"""Copy length bytes from fileobj src to fileobj dst.
247
If length is None, copy the entire content.
252
shutil.copyfileobj(src, dst)
256
blocks, remainder = divmod(length, BUFSIZE)
257
for b in range(blocks):
258
buf = src.read(BUFSIZE)
259
if len(buf) < BUFSIZE:
260
raise IOError("end of file reached")
264
buf = src.read(remainder)
265
if len(buf) < remainder:
266
raise IOError("end of file reached")
280
((TUEXEC|TSUID, "s"),
286
((TGEXEC|TSGID, "s"),
292
((TOEXEC|TSVTX, "t"),
298
"""Convert a file's mode to a string of the form
300
Used by TarFile.list()
303
for table in filemode_table:
304
for bit, char in table:
305
if mode & bit == bit:
313
normpath = lambda path: os.path.normpath(path).replace(os.sep, "/")
315
normpath = os.path.normpath
317
class TarError(Exception):
318
"""Base exception."""
320
class ExtractError(TarError):
321
"""General exception for extract errors."""
323
class ReadError(TarError):
324
"""Exception for unreadble tar archives."""
326
class CompressionError(TarError):
327
"""Exception for unavailable compression methods."""
329
class StreamError(TarError):
330
"""Exception for unsupported operations on stream-like TarFiles."""
332
class HeaderError(TarError):
333
"""Exception for invalid headers."""
336
#---------------------------
337
# internal stream interface
338
#---------------------------
340
"""Low-level file object. Supports reading and writing.
341
It is used instead of a regular file object for streaming
345
def __init__(self, name, mode):
348
"w": os.O_WRONLY | os.O_CREAT | os.O_TRUNC,
350
if hasattr(os, "O_BINARY"):
352
self.fd = os.open(name, mode)
357
def read(self, size):
358
return os.read(self.fd, size)
364
"""Class that serves as an adapter between TarFile and
365
a stream-like object. The stream-like object only
366
needs to have a read() or write() method and is accessed
367
blockwise. Use of gzip or bzip2 compression is possible.
368
A stream-like object could be for example: sys.stdin,
369
sys.stdout, a socket, a tape device etc.
371
_Stream is intended to be used only internally.
374
def __init__(self, name, mode, comptype, fileobj, bufsize):
375
"""Construct a _Stream object.
377
self._extfileobj = True
379
fileobj = _LowLevelFile(name, mode)
380
self._extfileobj = False
383
# Enable transparent compression detection for the
385
fileobj = _StreamProxy(fileobj)
386
comptype = fileobj.getcomptype()
388
self.name = name or ""
390
self.comptype = comptype
391
self.fileobj = fileobj
392
self.bufsize = bufsize
401
raise CompressionError("zlib module is not available")
403
self.crc = zlib.crc32("")
407
self._init_write_gz()
409
if comptype == "bz2":
413
raise CompressionError("bz2 module is not available")
416
self.cmp = bz2.BZ2Decompressor()
418
self.cmp = bz2.BZ2Compressor()
421
if hasattr(self, "closed") and not self.closed:
424
def _init_write_gz(self):
425
"""Initialize for writing with gzip compression.
427
self.cmp = self.zlib.compressobj(9, self.zlib.DEFLATED,
428
-self.zlib.MAX_WBITS,
429
self.zlib.DEF_MEM_LEVEL,
431
timestamp = struct.pack("<L", int(time.time()))
432
self.__write(b"\037\213\010\010" + timestamp + b"\002\377")
433
if self.name.endswith(".gz"):
434
self.name = self.name[:-3]
435
# RFC1952 says we must use ISO-8859-1 for the FNAME field.
436
self.__write(self.name.encode("iso-8859-1", "replace") + NUL)
439
"""Write string s to the stream.
441
if self.comptype == "gz":
442
self.crc = self.zlib.crc32(s, self.crc)
444
if self.comptype != "tar":
445
s = self.cmp.compress(s)
448
def __write(self, s):
449
"""Write string s to the stream if a whole new block
450
is ready to be written.
453
while len(self.buf) > self.bufsize:
454
self.fileobj.write(self.buf[:self.bufsize])
455
self.buf = self.buf[self.bufsize:]
458
"""Close the _Stream object. No operation should be
459
done on it afterwards.
464
if self.mode == "w" and self.comptype != "tar":
465
self.buf += self.cmp.flush()
467
if self.mode == "w" and self.buf:
468
self.fileobj.write(self.buf)
470
if self.comptype == "gz":
471
# The native zlib crc is an unsigned 32-bit integer, but
472
# the Python wrapper implicitly casts that to a signed C
473
# long. So, on a 32-bit box self.crc may "look negative",
474
# while the same crc on a 64-bit box may "look positive".
475
# To avoid irksome warnings from the `struct` module, force
476
# it to look positive on all boxes.
477
self.fileobj.write(struct.pack("<L", self.crc & 0xffffffff))
478
self.fileobj.write(struct.pack("<L", self.pos & 0xffffFFFF))
480
if not self._extfileobj:
485
def _init_read_gz(self):
486
"""Initialize for reading a gzip compressed fileobj.
488
self.cmp = self.zlib.decompressobj(-self.zlib.MAX_WBITS)
491
# taken from gzip.GzipFile with some alterations
492
if self.__read(2) != b"\037\213":
493
raise ReadError("not a gzip file")
494
if self.__read(1) != b"\010":
495
raise CompressionError("unsupported compression method")
497
flag = ord(self.__read(1))
501
xlen = ord(self.__read(1)) + 256 * ord(self.__read(1))
506
if not s or s == NUL:
511
if not s or s == NUL:
517
"""Return the stream's file pointer position.
521
def seek(self, pos=0):
522
"""Set the stream's file pointer to pos. Negative seeking
525
if pos - self.pos >= 0:
526
blocks, remainder = divmod(pos - self.pos, self.bufsize)
527
for i in range(blocks):
528
self.read(self.bufsize)
531
raise StreamError("seeking backwards is not allowed")
534
def read(self, size=None):
535
"""Return the next size number of bytes from the stream.
536
If size is not defined, return all bytes of the stream
542
buf = self._read(self.bufsize)
548
buf = self._read(size)
552
def _read(self, size):
553
"""Return size bytes from the stream.
555
if self.comptype == "tar":
556
return self.__read(size)
560
buf = self.__read(self.bufsize)
564
buf = self.cmp.decompress(buf)
566
raise ReadError("invalid compressed data")
569
buf = self.dbuf[:size]
570
self.dbuf = self.dbuf[size:]
573
def __read(self, size):
574
"""Return size bytes from stream. If internal buffer is empty,
575
read another block from the stream.
579
buf = self.fileobj.read(self.bufsize)
584
buf = self.buf[:size]
585
self.buf = self.buf[size:]
589
class _StreamProxy(object):
590
"""Small proxy class that enables transparent compression
591
detection for the Stream interface (mode 'r|*').
594
def __init__(self, fileobj):
595
self.fileobj = fileobj
596
self.buf = self.fileobj.read(BLOCKSIZE)
598
def read(self, size):
599
self.read = self.fileobj.read
602
def getcomptype(self):
603
if self.buf.startswith(b"\037\213\010"):
605
if self.buf.startswith(b"BZh91"):
613
class _BZ2Proxy(object):
614
"""Small proxy class that enables external file object
615
support for "r:bz2" and "w:bz2" modes. This is actually
616
a workaround for a limitation in bz2 module's BZ2File
617
class which (unlike gzip.GzipFile) has no support for
618
a file object argument.
621
blocksize = 16 * 1024
623
def __init__(self, fileobj, mode):
624
self.fileobj = fileobj
626
self.name = getattr(self.fileobj, "name", None)
633
self.bz2obj = bz2.BZ2Decompressor()
637
self.bz2obj = bz2.BZ2Compressor()
639
def read(self, size):
642
raw = self.fileobj.read(self.blocksize)
645
data = self.bz2obj.decompress(raw)
649
buf = self.buf[:size]
650
self.buf = self.buf[size:]
657
self.read(pos - self.pos)
662
def write(self, data):
663
self.pos += len(data)
664
raw = self.bz2obj.compress(data)
665
self.fileobj.write(raw)
669
raw = self.bz2obj.flush()
670
self.fileobj.write(raw)
673
#------------------------
674
# Extraction file object
675
#------------------------
676
class _FileInFile(object):
677
"""A thin wrapper around an existing file object that
678
provides a part of its data as an individual file
682
def __init__(self, fileobj, offset, size, sparse=None):
683
self.fileobj = fileobj
690
if not hasattr(self.fileobj, "seekable"):
691
# XXX gzip.GzipFile and bz2.BZ2File
693
return self.fileobj.seekable()
696
"""Return the current file position.
700
def seek(self, position):
701
"""Seek to a position in the file.
703
self.position = position
705
def read(self, size=None):
706
"""Read data from the file.
709
size = self.size - self.position
711
size = min(size, self.size - self.position)
713
if self.sparse is None:
714
return self.readnormal(size)
716
return self.readsparse(size)
718
def readnormal(self, size):
719
"""Read operation for regular files.
721
self.fileobj.seek(self.offset + self.position)
722
self.position += size
723
return self.fileobj.read(size)
725
def readsparse(self, size):
726
"""Read operation for sparse files.
730
buf = self.readsparsesection(size)
737
def readsparsesection(self, size):
738
"""Read a single section of a sparse file.
740
section = self.sparse.find(self.position)
745
size = min(size, section.offset + section.size - self.position)
747
if isinstance(section, _data):
748
realpos = section.realpos + self.position - section.offset
749
self.fileobj.seek(self.offset + realpos)
750
self.position += size
751
return self.fileobj.read(size)
753
self.position += size
758
class ExFileObject(object):
759
"""File-like object for reading an archive member.
760
Is returned by TarFile.extractfile().
764
def __init__(self, tarfile, tarinfo):
765
self.fileobj = _FileInFile(tarfile.fileobj,
769
self.name = tarinfo.name
772
self.size = tarinfo.size
784
return self.fileobj.seekable()
786
def read(self, size=None):
787
"""Read at most size bytes from the file. If size is not
788
present or None, read all data until EOF is reached.
791
raise ValueError("I/O operation on closed file")
799
buf = self.buffer[:size]
800
self.buffer = self.buffer[size:]
803
buf += self.fileobj.read()
805
buf += self.fileobj.read(size - len(buf))
807
self.position += len(buf)
810
# XXX TextIOWrapper uses the read1() method.
813
def readline(self, size=-1):
814
"""Read one entire line from the file. If size is present
815
and non-negative, return a string with at most that
816
size, which may be an incomplete line.
819
raise ValueError("I/O operation on closed file")
821
pos = self.buffer.find(b"\n") + 1
825
buf = self.fileobj.read(self.blocksize)
827
if not buf or b"\n" in buf:
828
pos = self.buffer.find(b"\n") + 1
831
pos = len(self.buffer)
837
buf = self.buffer[:pos]
838
self.buffer = self.buffer[pos:]
839
self.position += len(buf)
843
"""Return a list with all remaining lines.
847
line = self.readline()
853
"""Return the current file position.
856
raise ValueError("I/O operation on closed file")
860
def seek(self, pos, whence=os.SEEK_SET):
861
"""Seek to a position in the file.
864
raise ValueError("I/O operation on closed file")
866
if whence == os.SEEK_SET:
867
self.position = min(max(pos, 0), self.size)
868
elif whence == os.SEEK_CUR:
870
self.position = max(self.position + pos, 0)
872
self.position = min(self.position + pos, self.size)
873
elif whence == os.SEEK_END:
874
self.position = max(min(self.size + pos, self.size), 0)
876
raise ValueError("Invalid argument")
879
self.fileobj.seek(self.position)
882
"""Close the file object.
887
"""Get an iterator over the file's lines.
890
line = self.readline()
899
class TarInfo(object):
900
"""Informational class which holds the details about an
901
archive member given by a tar header block.
902
TarInfo objects are returned by TarFile.getmember(),
903
TarFile.getmembers() and TarFile.gettarinfo() and are
904
usually created internally.
907
__slots__ = ("name", "mode", "uid", "gid", "size", "mtime",
908
"chksum", "type", "linkname", "uname", "gname",
909
"devmajor", "devminor",
910
"offset", "offset_data", "pax_headers", "sparse",
911
"tarfile", "_sparse_structs", "_link_target")
913
def __init__(self, name=""):
914
"""Construct a TarInfo object. name is the optional name
917
self.name = name # member name
918
self.mode = 0o644 # file permissions
919
self.uid = 0 # user id
920
self.gid = 0 # group id
921
self.size = 0 # file size
922
self.mtime = 0 # modification time
923
self.chksum = 0 # header checksum
924
self.type = REGTYPE # member type
925
self.linkname = "" # link name
926
self.uname = "root" # user name
927
self.gname = "root" # group name
928
self.devmajor = 0 # device major number
929
self.devminor = 0 # device minor number
931
self.offset = 0 # the tar header starts here
932
self.offset_data = 0 # the file's data starts here
934
self.sparse = None # sparse member information
935
self.pax_headers = {} # pax header information
937
# In pax headers the "name" and "linkname" field are called
938
# "path" and "linkpath".
941
def _setpath(self, name):
943
path = property(_getpath, _setpath)
945
def _getlinkpath(self):
947
def _setlinkpath(self, linkname):
948
self.linkname = linkname
949
linkpath = property(_getlinkpath, _setlinkpath)
952
return "<%s %r at %#x>" % (self.__class__.__name__,self.name,id(self))
955
"""Return the TarInfo's attributes as a dictionary.
958
"name": normpath(self.name),
959
"mode": self.mode & 0o7777,
964
"chksum": self.chksum,
966
"linkname": normpath(self.linkname) if self.linkname else "",
969
"devmajor": self.devmajor,
970
"devminor": self.devminor
973
if info["type"] == DIRTYPE and not info["name"].endswith("/"):
978
def tobuf(self, format=DEFAULT_FORMAT, encoding=ENCODING, errors="strict"):
979
"""Return a tar header as a string of 512 byte blocks.
981
info = self.get_info()
983
if format == USTAR_FORMAT:
984
return self.create_ustar_header(info, encoding, errors)
985
elif format == GNU_FORMAT:
986
return self.create_gnu_header(info, encoding, errors)
987
elif format == PAX_FORMAT:
988
return self.create_pax_header(info)
990
raise ValueError("invalid format")
992
def create_ustar_header(self, info, encoding, errors):
993
"""Return the object as a ustar header block.
995
info["magic"] = POSIX_MAGIC
997
if len(info["linkname"]) > LENGTH_LINK:
998
raise ValueError("linkname is too long")
1000
if len(info["name"]) > LENGTH_NAME:
1001
info["prefix"], info["name"] = self._posix_split_name(info["name"])
1003
return self._create_header(info, USTAR_FORMAT, encoding, errors)
1005
def create_gnu_header(self, info, encoding, errors):
1006
"""Return the object as a GNU header block sequence.
1008
info["magic"] = GNU_MAGIC
1011
if len(info["linkname"]) > LENGTH_LINK:
1012
buf += self._create_gnu_long_header(info["linkname"], GNUTYPE_LONGLINK, encoding, errors)
1014
if len(info["name"]) > LENGTH_NAME:
1015
buf += self._create_gnu_long_header(info["name"], GNUTYPE_LONGNAME, encoding, errors)
1017
return buf + self._create_header(info, GNU_FORMAT, encoding, errors)
1019
def create_pax_header(self, info):
1020
"""Return the object as a ustar header block. If it cannot be
1021
represented this way, prepend a pax extended header sequence
1022
with supplement information.
1024
info["magic"] = POSIX_MAGIC
1025
pax_headers = self.pax_headers.copy()
1027
# Test string fields for values that exceed the field length or cannot
1028
# be represented in ASCII encoding.
1029
for name, hname, length in (
1030
("name", "path", LENGTH_NAME), ("linkname", "linkpath", LENGTH_LINK),
1031
("uname", "uname", 32), ("gname", "gname", 32)):
1033
if hname in pax_headers:
1034
# The pax header has priority.
1037
# Try to encode the string as ASCII.
1039
info[name].encode("ascii", "strict")
1040
except UnicodeEncodeError:
1041
pax_headers[hname] = info[name]
1044
if len(info[name]) > length:
1045
pax_headers[hname] = info[name]
1047
# Test number fields for values that exceed the field limit or values
1048
# that like to be stored as float.
1049
for name, digits in (("uid", 8), ("gid", 8), ("size", 12), ("mtime", 12)):
1050
if name in pax_headers:
1051
# The pax header has priority. Avoid overflow.
1056
if not 0 <= val < 8 ** (digits - 1) or isinstance(val, float):
1057
pax_headers[name] = str(val)
1060
# Create a pax extended header if necessary.
1062
buf = self._create_pax_generic_header(pax_headers, XHDTYPE)
1066
return buf + self._create_header(info, USTAR_FORMAT, "ascii", "replace")
1069
def create_pax_global_header(cls, pax_headers):
1070
"""Return the object as a pax global header block sequence.
1072
return cls._create_pax_generic_header(pax_headers, XGLTYPE)
1074
def _posix_split_name(self, name):
1075
"""Split a name longer than 100 chars into a prefix
1078
prefix = name[:LENGTH_PREFIX + 1]
1079
while prefix and prefix[-1] != "/":
1080
prefix = prefix[:-1]
1082
name = name[len(prefix):]
1083
prefix = prefix[:-1]
1085
if not prefix or len(name) > LENGTH_NAME:
1086
raise ValueError("name is too long")
1090
def _create_header(info, format, encoding, errors):
1091
"""Return a header block. info is a dictionary with file
1092
information, format must be one of the *_FORMAT constants.
1095
stn(info.get("name", ""), 100, encoding, errors),
1096
itn(info.get("mode", 0) & 0o7777, 8, format),
1097
itn(info.get("uid", 0), 8, format),
1098
itn(info.get("gid", 0), 8, format),
1099
itn(info.get("size", 0), 12, format),
1100
itn(info.get("mtime", 0), 12, format),
1101
b" ", # checksum field
1102
info.get("type", REGTYPE),
1103
stn(info.get("linkname", ""), 100, encoding, errors),
1104
info.get("magic", POSIX_MAGIC),
1105
stn(info.get("uname", "root"), 32, encoding, errors),
1106
stn(info.get("gname", "root"), 32, encoding, errors),
1107
itn(info.get("devmajor", 0), 8, format),
1108
itn(info.get("devminor", 0), 8, format),
1109
stn(info.get("prefix", ""), 155, encoding, errors)
1112
buf = struct.pack("%ds" % BLOCKSIZE, b"".join(parts))
1113
chksum = calc_chksums(buf[-BLOCKSIZE:])[0]
1114
buf = buf[:-364] + bytes("%06o\0" % chksum, "ascii") + buf[-357:]
1118
def _create_payload(payload):
1119
"""Return the string payload filled with zero bytes
1120
up to the next 512 byte border.
1122
blocks, remainder = divmod(len(payload), BLOCKSIZE)
1124
payload += (BLOCKSIZE - remainder) * NUL
1128
def _create_gnu_long_header(cls, name, type, encoding, errors):
1129
"""Return a GNUTYPE_LONGNAME or GNUTYPE_LONGLINK sequence
1132
name = name.encode(encoding, errors) + NUL
1135
info["name"] = "././@LongLink"
1137
info["size"] = len(name)
1138
info["magic"] = GNU_MAGIC
1140
# create extended header + name blocks.
1141
return cls._create_header(info, USTAR_FORMAT, encoding, errors) + \
1142
cls._create_payload(name)
1145
def _create_pax_generic_header(cls, pax_headers, type):
1146
"""Return a POSIX.1-2001 extended or global header sequence
1147
that contains a list of keyword, value pairs. The values
1151
for keyword, value in pax_headers.items():
1152
keyword = keyword.encode("utf8")
1153
value = value.encode("utf8")
1154
l = len(keyword) + len(value) + 3 # ' ' + '=' + '\n'
1161
records += bytes(str(p), "ascii") + b" " + keyword + b"=" + value + b"\n"
1163
# We use a hardcoded "././@PaxHeader" name like star does
1164
# instead of the one that POSIX recommends.
1166
info["name"] = "././@PaxHeader"
1168
info["size"] = len(records)
1169
info["magic"] = POSIX_MAGIC
1171
# Create pax header + record blocks.
1172
return cls._create_header(info, USTAR_FORMAT, "ascii", "replace") + \
1173
cls._create_payload(records)
1176
def frombuf(cls, buf, encoding, errors):
1177
"""Construct a TarInfo object from a 512 byte bytes object.
1179
if len(buf) != BLOCKSIZE:
1180
raise HeaderError("truncated header")
1181
if buf.count(NUL) == BLOCKSIZE:
1182
raise HeaderError("empty header")
1184
chksum = nti(buf[148:156])
1185
if chksum not in calc_chksums(buf):
1186
raise HeaderError("bad checksum")
1189
obj.name = nts(buf[0:100], encoding, errors)
1190
obj.mode = nti(buf[100:108])
1191
obj.uid = nti(buf[108:116])
1192
obj.gid = nti(buf[116:124])
1193
obj.size = nti(buf[124:136])
1194
obj.mtime = nti(buf[136:148])
1196
obj.type = buf[156:157]
1197
obj.linkname = nts(buf[157:257], encoding, errors)
1198
obj.uname = nts(buf[265:297], encoding, errors)
1199
obj.gname = nts(buf[297:329], encoding, errors)
1200
obj.devmajor = nti(buf[329:337])
1201
obj.devminor = nti(buf[337:345])
1202
prefix = nts(buf[345:500], encoding, errors)
1204
# Old V7 tar format represents a directory as a regular
1205
# file with a trailing slash.
1206
if obj.type == AREGTYPE and obj.name.endswith("/"):
1209
# The old GNU sparse format occupies some of the unused
1210
# space in the buffer for up to 4 sparse structures.
1211
# Save the them for later processing in _proc_sparse().
1212
if obj.type == GNUTYPE_SPARSE:
1217
offset = nti(buf[pos:pos + 12])
1218
numbytes = nti(buf[pos + 12:pos + 24])
1221
structs.append((offset, numbytes))
1223
isextended = bool(buf[482])
1224
origsize = nti(buf[483:495])
1225
obj._sparse_structs = (structs, isextended, origsize)
1227
# Remove redundant slashes from directories.
1229
obj.name = obj.name.rstrip("/")
1231
# Reconstruct a ustar longname.
1232
if prefix and obj.type not in GNU_TYPES:
1233
obj.name = prefix + "/" + obj.name
1237
def fromtarfile(cls, tarfile):
1238
"""Return the next TarInfo object from TarFile object
1241
buf = tarfile.fileobj.read(BLOCKSIZE)
1244
obj = cls.frombuf(buf, tarfile.encoding, tarfile.errors)
1245
obj.offset = tarfile.fileobj.tell() - BLOCKSIZE
1246
return obj._proc_member(tarfile)
1248
#--------------------------------------------------------------------------
1249
# The following are methods that are called depending on the type of a
1250
# member. The entry point is _proc_member() which can be overridden in a
1251
# subclass to add custom _proc_*() methods. A _proc_*() method MUST
1252
# implement the following
1254
# 1. Set self.offset_data to the position where the data blocks begin,
1255
# if there is data that follows.
1256
# 2. Set tarfile.offset to the position where the next member's header will
1258
# 3. Return self or another valid TarInfo object.
1259
def _proc_member(self, tarfile):
1260
"""Choose the right processing method depending on
1261
the type and call it.
1263
if self.type in (GNUTYPE_LONGNAME, GNUTYPE_LONGLINK):
1264
return self._proc_gnulong(tarfile)
1265
elif self.type == GNUTYPE_SPARSE:
1266
return self._proc_sparse(tarfile)
1267
elif self.type in (XHDTYPE, XGLTYPE, SOLARIS_XHDTYPE):
1268
return self._proc_pax(tarfile)
1270
return self._proc_builtin(tarfile)
1272
def _proc_builtin(self, tarfile):
1273
"""Process a builtin type or an unknown type which
1274
will be treated as a regular file.
1276
self.offset_data = tarfile.fileobj.tell()
1277
offset = self.offset_data
1278
if self.isreg() or self.type not in SUPPORTED_TYPES:
1279
# Skip the following data blocks.
1280
offset += self._block(self.size)
1281
tarfile.offset = offset
1283
# Patch the TarInfo object with saved global
1284
# header information.
1285
self._apply_pax_info(tarfile.pax_headers, tarfile.encoding, tarfile.errors)
1289
def _proc_gnulong(self, tarfile):
1290
"""Process the blocks that hold a GNU longname
1293
buf = tarfile.fileobj.read(self._block(self.size))
1295
# Fetch the next header and process it.
1296
next = self.fromtarfile(tarfile)
1298
raise HeaderError("missing subsequent header")
1300
# Patch the TarInfo object from the next header with
1301
# the longname information.
1302
next.offset = self.offset
1303
if self.type == GNUTYPE_LONGNAME:
1304
next.name = nts(buf, tarfile.encoding, tarfile.errors)
1305
elif self.type == GNUTYPE_LONGLINK:
1306
next.linkname = nts(buf, tarfile.encoding, tarfile.errors)
1310
def _proc_sparse(self, tarfile):
1311
"""Process a GNU sparse header plus extra headers.
1313
# We already collected some sparse structures in frombuf().
1314
structs, isextended, origsize = self._sparse_structs
1315
del self._sparse_structs
1317
# Collect sparse structures from extended header blocks.
1319
buf = tarfile.fileobj.read(BLOCKSIZE)
1323
offset = nti(buf[pos:pos + 12])
1324
numbytes = nti(buf[pos + 12:pos + 24])
1327
structs.append((offset, numbytes))
1329
isextended = bool(buf[504])
1331
# Transform the sparse structures to something we can use
1333
self.sparse = _ringbuffer()
1336
for offset, numbytes in structs:
1337
if offset > lastpos:
1338
self.sparse.append(_hole(lastpos, offset - lastpos))
1339
self.sparse.append(_data(offset, numbytes, realpos))
1341
lastpos = offset + numbytes
1342
if lastpos < origsize:
1343
self.sparse.append(_hole(lastpos, origsize - lastpos))
1345
self.offset_data = tarfile.fileobj.tell()
1346
tarfile.offset = self.offset_data + self._block(self.size)
1347
self.size = origsize
1351
def _proc_pax(self, tarfile):
1352
"""Process an extended or global header as described in
1355
# Read the header information.
1356
buf = tarfile.fileobj.read(self._block(self.size))
1358
# A pax header stores supplemental information for either
1359
# the following file (extended) or all following files
1361
if self.type == XGLTYPE:
1362
pax_headers = tarfile.pax_headers
1364
pax_headers = tarfile.pax_headers.copy()
1366
# Parse pax header information. A record looks like that:
1367
# "%d %s=%s\n" % (length, keyword, value). length is the size
1368
# of the complete record including the length field itself and
1369
# the newline. keyword and value are both UTF-8 encoded strings.
1370
regex = re.compile(br"(\d+) ([^=]+)=")
1373
match = regex.match(buf, pos)
1377
length, keyword = match.groups()
1378
length = int(length)
1379
value = buf[match.end(2) + 1:match.start(1) + length - 1]
1381
keyword = keyword.decode("utf8")
1382
value = value.decode("utf8")
1384
pax_headers[keyword] = value
1387
# Fetch the next header.
1388
next = self.fromtarfile(tarfile)
1390
if self.type in (XHDTYPE, SOLARIS_XHDTYPE):
1392
raise HeaderError("missing subsequent header")
1394
# Patch the TarInfo object with the extended header info.
1395
next._apply_pax_info(pax_headers, tarfile.encoding, tarfile.errors)
1396
next.offset = self.offset
1398
if "size" in pax_headers:
1399
# If the extended header replaces the size field,
1400
# we need to recalculate the offset where the next
1402
offset = next.offset_data
1403
if next.isreg() or next.type not in SUPPORTED_TYPES:
1404
offset += next._block(next.size)
1405
tarfile.offset = offset
1409
def _apply_pax_info(self, pax_headers, encoding, errors):
1410
"""Replace fields with supplemental information from a previous
1411
pax extended or global header.
1413
for keyword, value in pax_headers.items():
1414
if keyword not in PAX_FIELDS:
1417
if keyword == "path":
1418
value = value.rstrip("/")
1420
if keyword in PAX_NUMBER_FIELDS:
1422
value = PAX_NUMBER_FIELDS[keyword](value)
1426
setattr(self, keyword, value)
1428
self.pax_headers = pax_headers.copy()
1430
def _block(self, count):
1431
"""Round up a byte count by BLOCKSIZE and return it,
1432
e.g. _block(834) => 1024.
1434
blocks, remainder = divmod(count, BLOCKSIZE)
1437
return blocks * BLOCKSIZE
1440
return self.type in REGULAR_TYPES
1444
return self.type == DIRTYPE
1446
return self.type == SYMTYPE
1448
return self.type == LNKTYPE
1450
return self.type == CHRTYPE
1452
return self.type == BLKTYPE
1454
return self.type == FIFOTYPE
1456
return self.type == GNUTYPE_SPARSE
1458
return self.type in (CHRTYPE, BLKTYPE, FIFOTYPE)
1461
class TarFile(object):
1462
"""The TarFile Class provides an interface to tar archives.
1465
debug = 0 # May be set from 0 (no msgs) to 3 (all msgs)
1467
dereference = False # If true, add content of linked file to the
1468
# tar file, else the link.
1470
ignore_zeros = False # If true, skips empty or invalid blocks and
1471
# continues processing.
1473
errorlevel = 0 # If 0, fatal errors only appear in debug
1474
# messages (if debug >= 0). If > 0, errors
1475
# are passed to the caller as exceptions.
1477
format = DEFAULT_FORMAT # The format to use when creating an archive.
1479
encoding = ENCODING # Encoding for 8-bit character strings.
1481
errors = None # Error handler for unicode conversion.
1483
tarinfo = TarInfo # The default TarInfo class to use.
1485
fileobject = ExFileObject # The default ExFileObject class to use.
1487
def __init__(self, name=None, mode="r", fileobj=None, format=None,
1488
tarinfo=None, dereference=None, ignore_zeros=None, encoding=None,
1489
errors=None, pax_headers=None, debug=None, errorlevel=None):
1490
"""Open an (uncompressed) tar archive `name'. `mode' is either 'r' to
1491
read from an existing archive, 'a' to append data to an existing
1492
file or 'w' to create a new file overwriting an existing one. `mode'
1494
If `fileobj' is given, it is used for reading or writing data. If it
1495
can be determined, `mode' is overridden by `fileobj's mode.
1496
`fileobj' is not closed, when TarFile is closed.
1498
if len(mode) > 1 or mode not in "raw":
1499
raise ValueError("mode must be 'r', 'a' or 'w'")
1501
self._mode = {"r": "rb", "a": "r+b", "w": "wb"}[mode]
1504
if self.mode == "a" and not os.path.exists(name):
1505
# Create nonexistent files in append mode.
1508
fileobj = bltn_open(name, self._mode)
1509
self._extfileobj = False
1511
if name is None and hasattr(fileobj, "name"):
1513
if hasattr(fileobj, "mode"):
1514
self._mode = fileobj.mode
1515
self._extfileobj = True
1516
self.name = os.path.abspath(name) if name else None
1517
self.fileobj = fileobj
1520
if format is not None:
1521
self.format = format
1522
if tarinfo is not None:
1523
self.tarinfo = tarinfo
1524
if dereference is not None:
1525
self.dereference = dereference
1526
if ignore_zeros is not None:
1527
self.ignore_zeros = ignore_zeros
1528
if encoding is not None:
1529
self.encoding = encoding
1531
if errors is not None:
1532
self.errors = errors
1534
self.errors = "replace"
1536
self.errors = "strict"
1538
if pax_headers is not None and self.format == PAX_FORMAT:
1539
self.pax_headers = pax_headers
1541
self.pax_headers = {}
1543
if debug is not None:
1545
if errorlevel is not None:
1546
self.errorlevel = errorlevel
1548
# Init datastructures.
1550
self.members = [] # list of members as TarInfo objects
1551
self._loaded = False # flag if all members have been read
1552
self.offset = self.fileobj.tell()
1553
# current position in the archive file
1554
self.inodes = {} # dictionary caching the inodes of
1555
# archive members already added
1557
if self.mode == "r":
1558
self.firstmember = None
1559
self.firstmember = self.next()
1561
if self.mode == "a":
1562
# Move to the end of the archive,
1563
# before the first empty block.
1564
self.firstmember = None
1566
if self.next() is None:
1568
self.fileobj.seek(self.fileobj.tell() - BLOCKSIZE)
1571
if self.mode in "aw":
1574
if self.pax_headers:
1575
buf = self.tarinfo.create_pax_global_header(self.pax_headers.copy())
1576
self.fileobj.write(buf)
1577
self.offset += len(buf)
1579
#--------------------------------------------------------------------------
1580
# Below are the classmethods which act as alternate constructors to the
1581
# TarFile class. The open() method is the only one that is needed for
1582
# public use; it is the "super"-constructor and is able to select an
1583
# adequate "sub"-constructor for a particular compression using the mapping
1586
# This concept allows one to subclass TarFile without losing the comfort of
1587
# the super-constructor. A sub-constructor is registered and made available
1588
# by adding it to the mapping in OPEN_METH.
1591
def open(cls, name=None, mode="r", fileobj=None, bufsize=RECORDSIZE, **kwargs):
1592
"""Open a tar archive for reading, writing or appending. Return
1593
an appropriate TarFile class.
1596
'r' or 'r:*' open for reading with transparent compression
1597
'r:' open for reading exclusively uncompressed
1598
'r:gz' open for reading with gzip compression
1599
'r:bz2' open for reading with bzip2 compression
1600
'a' or 'a:' open for appending, creating the file if necessary
1601
'w' or 'w:' open for writing without compression
1602
'w:gz' open for writing with gzip compression
1603
'w:bz2' open for writing with bzip2 compression
1605
'r|*' open a stream of tar blocks with transparent compression
1606
'r|' open an uncompressed stream of tar blocks for reading
1607
'r|gz' open a gzip compressed stream of tar blocks
1608
'r|bz2' open a bzip2 compressed stream of tar blocks
1609
'w|' open an uncompressed stream for writing
1610
'w|gz' open a gzip compressed stream for writing
1611
'w|bz2' open a bzip2 compressed stream for writing
1614
if not name and not fileobj:
1615
raise ValueError("nothing to open")
1617
if mode in ("r", "r:*"):
1618
# Find out which *open() is appropriate for opening the file.
1619
for comptype in cls.OPEN_METH:
1620
func = getattr(cls, cls.OPEN_METH[comptype])
1621
if fileobj is not None:
1622
saved_pos = fileobj.tell()
1624
return func(name, "r", fileobj, **kwargs)
1625
except (ReadError, CompressionError) as e:
1626
if fileobj is not None:
1627
fileobj.seek(saved_pos)
1629
raise ReadError("file could not be opened successfully")
1632
filemode, comptype = mode.split(":", 1)
1633
filemode = filemode or "r"
1634
comptype = comptype or "tar"
1636
# Select the *open() function according to
1637
# given compression.
1638
if comptype in cls.OPEN_METH:
1639
func = getattr(cls, cls.OPEN_METH[comptype])
1641
raise CompressionError("unknown compression type %r" % comptype)
1642
return func(name, filemode, fileobj, **kwargs)
1645
filemode, comptype = mode.split("|", 1)
1646
filemode = filemode or "r"
1647
comptype = comptype or "tar"
1649
if filemode not in "rw":
1650
raise ValueError("mode must be 'r' or 'w'")
1652
t = cls(name, filemode,
1653
_Stream(name, filemode, comptype, fileobj, bufsize),
1655
t._extfileobj = False
1659
return cls.taropen(name, mode, fileobj, **kwargs)
1661
raise ValueError("undiscernible mode")
1664
def taropen(cls, name, mode="r", fileobj=None, **kwargs):
1665
"""Open uncompressed tar archive name for reading or writing.
1667
if len(mode) > 1 or mode not in "raw":
1668
raise ValueError("mode must be 'r', 'a' or 'w'")
1669
return cls(name, mode, fileobj, **kwargs)
1672
def gzopen(cls, name, mode="r", fileobj=None, compresslevel=9, **kwargs):
1673
"""Open gzip compressed tar archive name for reading or writing.
1674
Appending is not allowed.
1676
if len(mode) > 1 or mode not in "rw":
1677
raise ValueError("mode must be 'r' or 'w'")
1682
except (ImportError, AttributeError):
1683
raise CompressionError("gzip module is not available")
1686
fileobj = bltn_open(name, mode + "b")
1689
t = cls.taropen(name, mode,
1690
gzip.GzipFile(name, mode, compresslevel, fileobj),
1693
raise ReadError("not a gzip file")
1694
t._extfileobj = False
1698
def bz2open(cls, name, mode="r", fileobj=None, compresslevel=9, **kwargs):
1699
"""Open bzip2 compressed tar archive name for reading or writing.
1700
Appending is not allowed.
1702
if len(mode) > 1 or mode not in "rw":
1703
raise ValueError("mode must be 'r' or 'w'.")
1708
raise CompressionError("bz2 module is not available")
1710
if fileobj is not None:
1711
fileobj = _BZ2Proxy(fileobj, mode)
1713
fileobj = bz2.BZ2File(name, mode, compresslevel=compresslevel)
1716
t = cls.taropen(name, mode, fileobj, **kwargs)
1718
raise ReadError("not a bzip2 file")
1719
t._extfileobj = False
1722
# All *open() methods are registered here.
1724
"tar": "taropen", # uncompressed tar
1725
"gz": "gzopen", # gzip compressed tar
1726
"bz2": "bz2open" # bzip2 compressed tar
1729
#--------------------------------------------------------------------------
1730
# The public methods which TarFile provides:
1733
"""Close the TarFile. In write-mode, two finishing zero blocks are
1734
appended to the archive.
1739
if self.mode in "aw":
1740
self.fileobj.write(NUL * (BLOCKSIZE * 2))
1741
self.offset += (BLOCKSIZE * 2)
1742
# fill up the end with zero-blocks
1743
# (like option -b20 for tar does)
1744
blocks, remainder = divmod(self.offset, RECORDSIZE)
1746
self.fileobj.write(NUL * (RECORDSIZE - remainder))
1748
if not self._extfileobj:
1749
self.fileobj.close()
1752
def getmember(self, name):
1753
"""Return a TarInfo object for member `name'. If `name' can not be
1754
found in the archive, KeyError is raised. If a member occurs more
1755
than once in the archive, its last occurrence is assumed to be the
1756
most up-to-date version.
1758
tarinfo = self._getmember(name)
1760
raise KeyError("filename %r not found" % name)
1763
def getmembers(self):
1764
"""Return the members of the archive as a list of TarInfo objects. The
1765
list has the same order as the members in the archive.
1768
if not self._loaded: # if we want to obtain a list of
1769
self._load() # all members, we first have to
1770
# scan the whole archive.
1774
"""Return the members of the archive as a list of their names. It has
1775
the same order as the list returned by getmembers().
1777
return [tarinfo.name for tarinfo in self.getmembers()]
1779
def gettarinfo(self, name=None, arcname=None, fileobj=None):
1780
"""Create a TarInfo object for either the file `name' or the file
1781
object `fileobj' (using os.fstat on its file descriptor). You can
1782
modify some of the TarInfo's attributes before you add it using
1783
addfile(). If given, `arcname' specifies an alternative name for the
1784
file in the archive.
1788
# When fileobj is given, replace name by
1789
# fileobj's real name.
1790
if fileobj is not None:
1793
# Building the name of the member in the archive.
1794
# Backward slashes are converted to forward slashes,
1795
# Absolute paths are turned to relative paths.
1798
arcname = normpath(arcname)
1799
drv, arcname = os.path.splitdrive(arcname)
1800
while arcname[0:1] == "/":
1801
arcname = arcname[1:]
1803
# Now, fill the TarInfo object with
1804
# information specific for the file.
1805
tarinfo = self.tarinfo()
1806
tarinfo.tarfile = self
1808
# Use os.stat or os.lstat, depending on platform
1809
# and if symlinks shall be resolved.
1811
if hasattr(os, "lstat") and not self.dereference:
1812
statres = os.lstat(name)
1814
statres = os.stat(name)
1816
statres = os.fstat(fileobj.fileno())
1819
stmd = statres.st_mode
1820
if stat.S_ISREG(stmd):
1821
inode = (statres.st_ino, statres.st_dev)
1822
if not self.dereference and statres.st_nlink > 1 and \
1823
inode in self.inodes and arcname != self.inodes[inode]:
1824
# Is it a hardlink to an already
1827
linkname = self.inodes[inode]
1829
# The inode is added only if its valid.
1830
# For win32 it is always 0.
1833
self.inodes[inode] = arcname
1834
elif stat.S_ISDIR(stmd):
1836
elif stat.S_ISFIFO(stmd):
1838
elif stat.S_ISLNK(stmd):
1840
linkname = os.readlink(name)
1841
elif stat.S_ISCHR(stmd):
1843
elif stat.S_ISBLK(stmd):
1848
# Fill the TarInfo object with all
1849
# information we can get.
1850
tarinfo.name = arcname
1852
tarinfo.uid = statres.st_uid
1853
tarinfo.gid = statres.st_gid
1854
if stat.S_ISREG(stmd):
1855
tarinfo.size = statres.st_size
1858
tarinfo.mtime = statres.st_mtime
1860
tarinfo.linkname = linkname
1863
tarinfo.uname = pwd.getpwuid(tarinfo.uid)[0]
1868
tarinfo.gname = grp.getgrgid(tarinfo.gid)[0]
1872
if type in (CHRTYPE, BLKTYPE):
1873
if hasattr(os, "major") and hasattr(os, "minor"):
1874
tarinfo.devmajor = os.major(statres.st_rdev)
1875
tarinfo.devminor = os.minor(statres.st_rdev)
1878
def list(self, verbose=True):
1879
"""Print a table of contents to sys.stdout. If `verbose' is False, only
1880
the names of the members are printed. If it is True, an `ls -l'-like
1885
for tarinfo in self:
1887
print(filemode(tarinfo.mode), end=' ')
1888
print("%s/%s" % (tarinfo.uname or tarinfo.uid,
1889
tarinfo.gname or tarinfo.gid), end=' ')
1890
if tarinfo.ischr() or tarinfo.isblk():
1891
print("%10s" % ("%d,%d" \
1892
% (tarinfo.devmajor, tarinfo.devminor)), end=' ')
1894
print("%10d" % tarinfo.size, end=' ')
1895
print("%d-%02d-%02d %02d:%02d:%02d" \
1896
% time.localtime(tarinfo.mtime)[:6], end=' ')
1898
print(tarinfo.name + ("/" if tarinfo.isdir() else ""), end=' ')
1902
print("->", tarinfo.linkname, end=' ')
1904
print("link to", tarinfo.linkname, end=' ')
1907
def add(self, name, arcname=None, recursive=True, exclude=None):
1908
"""Add the file `name' to the archive. `name' may be any type of file
1909
(directory, fifo, symbolic link, etc.). If given, `arcname'
1910
specifies an alternative name for the file in the archive.
1911
Directories are added recursively by default. This can be avoided by
1912
setting `recursive' to False. `exclude' is a function that should
1913
return True for each filename to be excluded.
1920
# Exclude pathnames.
1921
if exclude is not None and exclude(name):
1922
self._dbg(2, "tarfile: Excluded %r" % name)
1925
# Skip if somebody tries to archive the archive...
1926
if self.name is not None and os.path.abspath(name) == self.name:
1927
self._dbg(2, "tarfile: Skipped %r" % name)
1930
# Special case: The user wants to add the current
1931
# working directory.
1936
for f in os.listdir(name):
1937
self.add(f, os.path.join(arcname, f), recursive, exclude)
1942
# Create a TarInfo object from the file.
1943
tarinfo = self.gettarinfo(name, arcname)
1946
self._dbg(1, "tarfile: Unsupported type %r" % name)
1949
# Append the tar header and data to the archive.
1951
f = bltn_open(name, "rb")
1952
self.addfile(tarinfo, f)
1955
elif tarinfo.isdir():
1956
self.addfile(tarinfo)
1958
for f in os.listdir(name):
1959
self.add(os.path.join(name, f), os.path.join(arcname, f), recursive, exclude)
1962
self.addfile(tarinfo)
1964
def addfile(self, tarinfo, fileobj=None):
1965
"""Add the TarInfo object `tarinfo' to the archive. If `fileobj' is
1966
given, tarinfo.size bytes are read from it and added to the archive.
1967
You can create TarInfo objects using gettarinfo().
1968
On Windows platforms, `fileobj' should always be opened with mode
1969
'rb' to avoid irritation about the file size.
1973
tarinfo = copy.copy(tarinfo)
1975
buf = tarinfo.tobuf(self.format, self.encoding, self.errors)
1976
self.fileobj.write(buf)
1977
self.offset += len(buf)
1979
# If there's data to follow, append it.
1980
if fileobj is not None:
1981
copyfileobj(fileobj, self.fileobj, tarinfo.size)
1982
blocks, remainder = divmod(tarinfo.size, BLOCKSIZE)
1984
self.fileobj.write(NUL * (BLOCKSIZE - remainder))
1986
self.offset += blocks * BLOCKSIZE
1988
self.members.append(tarinfo)
1990
def extractall(self, path=".", members=None):
1991
"""Extract all members from the archive to the current working
1992
directory and set owner, modification time and permissions on
1993
directories afterwards. `path' specifies a different directory
1994
to extract to. `members' is optional and must be a subset of the
1995
list returned by getmembers().
2002
for tarinfo in members:
2004
# Extract directories with a safe mode.
2005
directories.append(tarinfo)
2006
tarinfo = copy.copy(tarinfo)
2007
tarinfo.mode = 0o700
2008
self.extract(tarinfo, path)
2010
# Reverse sort directories.
2011
directories.sort(key=lambda a: a.name)
2012
directories.reverse()
2014
# Set correct owner, mtime and filemode on directories.
2015
for tarinfo in directories:
2016
dirpath = os.path.join(path, tarinfo.name)
2018
self.chown(tarinfo, dirpath)
2019
self.utime(tarinfo, dirpath)
2020
self.chmod(tarinfo, dirpath)
2021
except ExtractError as e:
2022
if self.errorlevel > 1:
2025
self._dbg(1, "tarfile: %s" % e)
2027
def extract(self, member, path=""):
2028
"""Extract a member from the archive to the current working directory,
2029
using its full name. Its file information is extracted as accurately
2030
as possible. `member' may be a filename or a TarInfo object. You can
2031
specify a different directory using `path'.
2035
if isinstance(member, str):
2036
tarinfo = self.getmember(member)
2040
# Prepare the link target for makelink().
2042
tarinfo._link_target = os.path.join(path, tarinfo.linkname)
2045
self._extract_member(tarinfo, os.path.join(path, tarinfo.name))
2046
except EnvironmentError as e:
2047
if self.errorlevel > 0:
2050
if e.filename is None:
2051
self._dbg(1, "tarfile: %s" % e.strerror)
2053
self._dbg(1, "tarfile: %s %r" % (e.strerror, e.filename))
2054
except ExtractError as e:
2055
if self.errorlevel > 1:
2058
self._dbg(1, "tarfile: %s" % e)
2060
def extractfile(self, member):
2061
"""Extract a member from the archive as a file object. `member' may be
2062
a filename or a TarInfo object. If `member' is a regular file, a
2063
file-like object is returned. If `member' is a link, a file-like
2064
object is constructed from the link's target. If `member' is none of
2065
the above, None is returned.
2066
The file-like object is read-only and provides the following
2067
methods: read(), readline(), readlines(), seek() and tell()
2071
if isinstance(member, str):
2072
tarinfo = self.getmember(member)
2077
return self.fileobject(self, tarinfo)
2079
elif tarinfo.type not in SUPPORTED_TYPES:
2080
# If a member's type is unknown, it is treated as a
2082
return self.fileobject(self, tarinfo)
2084
elif tarinfo.islnk() or tarinfo.issym():
2085
if isinstance(self.fileobj, _Stream):
2086
# A small but ugly workaround for the case that someone tries
2087
# to extract a (sym)link as a file-object from a non-seekable
2088
# stream of tar blocks.
2089
raise StreamError("cannot extract (sym)link as file object")
2091
# A (sym)link's file object is its target's file object.
2092
return self.extractfile(self._getmember(tarinfo.linkname,
2095
# If there's no data associated with the member (directory, chrdev,
2096
# blkdev, etc.), return None instead of a file object.
2099
def _extract_member(self, tarinfo, targetpath):
2100
"""Extract the TarInfo object tarinfo to a physical
2101
file called targetpath.
2103
# Fetch the TarInfo object for the given name
2104
# and build the destination pathname, replacing
2105
# forward slashes to platform specific separators.
2106
if targetpath[-1:] == "/":
2107
targetpath = targetpath[:-1]
2108
targetpath = os.path.normpath(targetpath)
2110
# Create all upper directories.
2111
upperdirs = os.path.dirname(targetpath)
2112
if upperdirs and not os.path.exists(upperdirs):
2113
# Create directories that are not part of the archive with
2114
# default permissions.
2115
os.makedirs(upperdirs)
2117
if tarinfo.islnk() or tarinfo.issym():
2118
self._dbg(1, "%s -> %s" % (tarinfo.name, tarinfo.linkname))
2120
self._dbg(1, tarinfo.name)
2123
self.makefile(tarinfo, targetpath)
2124
elif tarinfo.isdir():
2125
self.makedir(tarinfo, targetpath)
2126
elif tarinfo.isfifo():
2127
self.makefifo(tarinfo, targetpath)
2128
elif tarinfo.ischr() or tarinfo.isblk():
2129
self.makedev(tarinfo, targetpath)
2130
elif tarinfo.islnk() or tarinfo.issym():
2131
self.makelink(tarinfo, targetpath)
2132
elif tarinfo.type not in SUPPORTED_TYPES:
2133
self.makeunknown(tarinfo, targetpath)
2135
self.makefile(tarinfo, targetpath)
2137
self.chown(tarinfo, targetpath)
2138
if not tarinfo.issym():
2139
self.chmod(tarinfo, targetpath)
2140
self.utime(tarinfo, targetpath)
2142
#--------------------------------------------------------------------------
2143
# Below are the different file methods. They are called via
2144
# _extract_member() when extract() is called. They can be replaced in a
2145
# subclass to implement other functionality.
2147
def makedir(self, tarinfo, targetpath):
2148
"""Make a directory called targetpath.
2151
# Use a safe mode for the directory, the real mode is set
2152
# later in _extract_member().
2153
os.mkdir(targetpath, 0o700)
2154
except EnvironmentError as e:
2155
if e.errno != errno.EEXIST:
2158
def makefile(self, tarinfo, targetpath):
2159
"""Make a file called targetpath.
2161
source = self.extractfile(tarinfo)
2162
target = bltn_open(targetpath, "wb")
2163
copyfileobj(source, target)
2167
def makeunknown(self, tarinfo, targetpath):
2168
"""Make a file from a TarInfo object with an unknown type
2171
self.makefile(tarinfo, targetpath)
2172
self._dbg(1, "tarfile: Unknown file type %r, " \
2173
"extracted as regular file." % tarinfo.type)
2175
def makefifo(self, tarinfo, targetpath):
2176
"""Make a fifo called targetpath.
2178
if hasattr(os, "mkfifo"):
2179
os.mkfifo(targetpath)
2181
raise ExtractError("fifo not supported by system")
2183
def makedev(self, tarinfo, targetpath):
2184
"""Make a character or block device called targetpath.
2186
if not hasattr(os, "mknod") or not hasattr(os, "makedev"):
2187
raise ExtractError("special devices not supported by system")
2191
mode |= stat.S_IFBLK
2193
mode |= stat.S_IFCHR
2195
os.mknod(targetpath, mode,
2196
os.makedev(tarinfo.devmajor, tarinfo.devminor))
2198
def makelink(self, tarinfo, targetpath):
2199
"""Make a (symbolic) link called targetpath. If it cannot be created
2200
(platform limitation), we try to make a copy of the referenced file
2203
linkpath = tarinfo.linkname
2206
os.symlink(linkpath, targetpath)
2209
os.link(tarinfo._link_target, targetpath)
2210
except AttributeError:
2212
linkpath = os.path.join(os.path.dirname(tarinfo.name),
2214
linkpath = normpath(linkpath)
2217
self._extract_member(self.getmember(linkpath), targetpath)
2218
except (EnvironmentError, KeyError) as e:
2219
linkpath = os.path.normpath(linkpath)
2221
shutil.copy2(linkpath, targetpath)
2222
except EnvironmentError as e:
2223
raise IOError("link could not be created")
2225
def chown(self, tarinfo, targetpath):
2226
"""Set owner of targetpath according to tarinfo.
2228
if pwd and hasattr(os, "geteuid") and os.geteuid() == 0:
2229
# We have to be root to do so.
2231
g = grp.getgrnam(tarinfo.gname)[2]
2234
g = grp.getgrgid(tarinfo.gid)[2]
2238
u = pwd.getpwnam(tarinfo.uname)[2]
2241
u = pwd.getpwuid(tarinfo.uid)[2]
2245
if tarinfo.issym() and hasattr(os, "lchown"):
2246
os.lchown(targetpath, u, g)
2248
if sys.platform != "os2emx":
2249
os.chown(targetpath, u, g)
2250
except EnvironmentError as e:
2251
raise ExtractError("could not change owner")
2253
def chmod(self, tarinfo, targetpath):
2254
"""Set file permissions of targetpath according to tarinfo.
2256
if hasattr(os, 'chmod'):
2258
os.chmod(targetpath, tarinfo.mode)
2259
except EnvironmentError as e:
2260
raise ExtractError("could not change mode")
2262
def utime(self, tarinfo, targetpath):
2263
"""Set modification time of targetpath according to tarinfo.
2265
if not hasattr(os, 'utime'):
2268
os.utime(targetpath, (tarinfo.mtime, tarinfo.mtime))
2269
except EnvironmentError as e:
2270
raise ExtractError("could not change modification time")
2272
#--------------------------------------------------------------------------
2274
"""Return the next member of the archive as a TarInfo object, when
2275
TarFile is opened for reading. Return None if there is no more
2279
if self.firstmember is not None:
2280
m = self.firstmember
2281
self.firstmember = None
2284
# Read the next block.
2285
self.fileobj.seek(self.offset)
2288
tarinfo = self.tarinfo.fromtarfile(self)
2291
self.members.append(tarinfo)
2293
except HeaderError as e:
2294
if self.ignore_zeros:
2295
self._dbg(2, "0x%X: %s" % (self.offset, e))
2296
self.offset += BLOCKSIZE
2299
if self.offset == 0:
2300
raise ReadError(str(e))
2306
#--------------------------------------------------------------------------
2307
# Little helper methods:
2309
def _getmember(self, name, tarinfo=None):
2310
"""Find an archive member by name from bottom to top.
2311
If tarinfo is given, it is used as the starting point.
2313
# Ensure that all members have been loaded.
2314
members = self.getmembers()
2319
end = members.index(tarinfo)
2321
for i in range(end - 1, -1, -1):
2322
if name == members[i].name:
2326
"""Read through the entire archive file and look for readable
2330
tarinfo = self.next()
2335
def _check(self, mode=None):
2336
"""Check if TarFile is still open, and if the operation's mode
2337
corresponds to TarFile's mode.
2340
raise IOError("%s is closed" % self.__class__.__name__)
2341
if mode is not None and self.mode not in mode:
2342
raise IOError("bad operation for mode %r" % self.mode)
2345
"""Provide an iterator object.
2348
return iter(self.members)
2350
return TarIter(self)
2352
def _dbg(self, level, msg):
2353
"""Write debugging output to sys.stderr.
2355
if level <= self.debug:
2356
print(msg, file=sys.stderr)
2362
for tarinfo in TarFile(...):
2366
def __init__(self, tarfile):
2367
"""Construct a TarIter object.
2369
self.tarfile = tarfile
2372
"""Return iterator object.
2376
"""Return the next item using TarFile's next() method.
2377
When all members have been read, set TarFile as _loaded.
2379
# Fix for SF #1100429: Under rare circumstances it can
2380
# happen that getmembers() is called during iteration,
2381
# which will cause TarIter to stop prematurely.
2382
if not self.tarfile._loaded:
2383
tarinfo = self.tarfile.next()
2385
self.tarfile._loaded = True
2389
tarinfo = self.tarfile.members[self.index]
2395
# Helper classes for sparse file support
2397
"""Base class for _data and _hole.
2399
def __init__(self, offset, size):
2400
self.offset = offset
2402
def __contains__(self, offset):
2403
return self.offset <= offset < self.offset + self.size
2405
class _data(_section):
2406
"""Represent a data section in a sparse file.
2408
def __init__(self, offset, size, realpos):
2409
_section.__init__(self, offset, size)
2410
self.realpos = realpos
2412
class _hole(_section):
2413
"""Represent a hole section in a sparse file.
2417
class _ringbuffer(list):
2418
"""Ringbuffer class which increases performance
2419
over a regular list.
2423
def find(self, offset):
2430
if idx == len(self):
2438
#--------------------
2439
# exported functions
2440
#--------------------
2441
def is_tarfile(name):
2442
"""Return True if name points to a tar archive that we
2443
are able to handle, else return False.