~mterry/duplicity/list-old-chains-0.6

1 by bescoto
Initial checkin
1
#!/usr/bin/env python
335 by loafman
patch #6675: Add modelines
2
# -*- Mode:Python; indent-tabs-mode:nil; tab-width:4 -*-
1 by bescoto
Initial checkin
3
#-------------------------------------------------------------------
4
# tarfile.py
5
#
6
# Module for reading and writing .tar and tar.gz files.
7
#
8
# Needs at least Python version 2.2.
9
#
10
# Please consult the html documentation in this distribution
11
# for further details on how to use tarfile.
12
#
13
#-------------------------------------------------------------------
59 by bescoto
Small changes for 0.4.1 and python 2.3
14
# Copyright (C) 2002 Lars Gustabel <lars@gustaebel.de>
1 by bescoto
Initial checkin
15
# All rights reserved.
16
#
17
# Permission  is  hereby granted,  free  of charge,  to  any person
18
# obtaining a  copy of  this software  and associated documentation
19
# files  (the  "Software"),  to   deal  in  the  Software   without
20
# restriction,  including  without limitation  the  rights to  use,
21
# copy, modify, merge, publish, distribute, sublicense, and/or sell
22
# copies  of  the  Software,  and to  permit  persons  to  whom the
23
# Software  is  furnished  to  do  so,  subject  to  the  following
24
# conditions:
25
#
26
# The above copyright  notice and this  permission notice shall  be
27
# included in all copies or substantial portions of the Software.
28
#
29
# THE SOFTWARE IS PROVIDED "AS  IS", WITHOUT WARRANTY OF ANY  KIND,
30
# EXPRESS OR IMPLIED, INCLUDING  BUT NOT LIMITED TO  THE WARRANTIES
31
# OF  MERCHANTABILITY,  FITNESS   FOR  A  PARTICULAR   PURPOSE  AND
32
# NONINFRINGEMENT.  IN  NO  EVENT SHALL  THE  AUTHORS  OR COPYRIGHT
33
# HOLDERS  BE LIABLE  FOR ANY  CLAIM, DAMAGES  OR OTHER  LIABILITY,
34
# WHETHER  IN AN  ACTION OF  CONTRACT, TORT  OR OTHERWISE,  ARISING
35
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
36
# OTHER DEALINGS IN THE SOFTWARE.
37
#
38
"""Read from and write to tar format archives.
39
"""
40
335 by loafman
patch #6675: Add modelines
41
__version__ = "$Revision: 1.8 $"
1 by bescoto
Initial checkin
42
# $Source: /sources/duplicity/duplicity/duplicity/tarfile.py,v $
43
44
version     = "0.4.9"
59 by bescoto
Small changes for 0.4.1 and python 2.3
45
__author__  = "Lars Gustabel (lars@gustaebel.de)"
335 by loafman
patch #6675: Add modelines
46
__date__    = "$Date: 2008/11/16 18:48:15 $"
47
__cvsid__   = "$Id: tarfile.py,v 1.8 2008/11/16 18:48:15 loafman Exp $"
1 by bescoto
Initial checkin
48
__credits__ = "Gustavo Niemeyer for his support, " \
49
              "Detlef Lannert for some early contributions"
50
51
#---------
52
# Imports
53
#---------
54
import sys
55
import os
56
import __builtin__
57
import shutil
58
import stat
59
import errno
60
import time
61
62
try:
63
    import grp, pwd
64
except ImportError:
65
    grp = pwd = None
54 by bescoto
Cache pwd and group files
66
# These are used later to cache user and group names and ids
67
gname_dict = uname_dict = uid_dict = gid_dict = None
1 by bescoto
Initial checkin
68
69
# We won't need this anymore in Python 2.3
70
#
71
# We import the _tarfile extension, that contains
72
# some useful functions to handle devices and symlinks.
73
# We inject them into os module, as if we were under 2.3.
74
#
75
try:
76
    import _tarfile
77
    if _tarfile.mknod is None:
78
        _tarfile = None
79
except ImportError:
80
    _tarfile = None
81
if _tarfile and not hasattr(os, "mknod"):
82
    os.mknod = _tarfile.mknod
83
if _tarfile and not hasattr(os, "major"):
84
    os.major = _tarfile.major
85
if _tarfile and not hasattr(os, "minor"):
86
    os.minor = _tarfile.minor
87
if _tarfile and not hasattr(os, "makedev"):
88
    os.makedev = _tarfile.makedev
89
if _tarfile and not hasattr(os, "lchown"):
90
    os.lchown = _tarfile.lchown
91
92
# XXX remove for release (2.3)
93
if sys.version_info[:2] < (2,3):
94
    True  = 1
95
    False = 0
96
97
#---------------------------------------------------------
98
# GNUtar constants
99
#---------------------------------------------------------
100
BLOCKSIZE  = 512                # length of processing blocks
101
RECORDSIZE = BLOCKSIZE * 20     # length of records
102
MAGIC      = "ustar"            # magic tar string
103
VERSION    = "00"               # version number
104
105
LENGTH_NAME = 100               # maximal length of a filename
106
LENGTH_LINK = 100               # maximal length of a linkname
107
108
REGTYPE  = "0"                  # regular file
109
AREGTYPE = "\0"                 # regular file
110
LNKTYPE  = "1"                  # link (inside tarfile)
111
SYMTYPE  = "2"                  # symbolic link
112
CHRTYPE  = "3"                  # character special device
113
BLKTYPE  = "4"                  # block special device
114
DIRTYPE  = "5"                  # directory
115
FIFOTYPE = "6"                  # fifo special device
116
CONTTYPE = "7"                  # contiguous file
117
118
GNUTYPE_LONGNAME = "L"          # GNU tar extension for longnames
119
GNUTYPE_LONGLINK = "K"          # GNU tar extension for longlink
120
GNUTYPE_SPARSE   = "S"          # GNU tar extension for sparse file
121
122
#---------------------------------------------------------
123
# tarfile constants
124
#---------------------------------------------------------
125
SUPPORTED_TYPES = (REGTYPE, AREGTYPE, LNKTYPE,  # file types that tarfile
126
                   SYMTYPE, DIRTYPE, FIFOTYPE,  # can cope with.
127
                   CONTTYPE, GNUTYPE_LONGNAME,
128
                   GNUTYPE_LONGLINK, GNUTYPE_SPARSE,
129
                   CHRTYPE, BLKTYPE)
130
131
REGULAR_TYPES = (REGTYPE, AREGTYPE,             # file types that somehow
132
                 CONTTYPE, GNUTYPE_SPARSE)      # represent regular files
133
134
#---------------------------------------------------------
135
# Bits used in the mode field, values in octal.
136
#---------------------------------------------------------
137
S_IFLNK = 0120000        # symbolic link
138
S_IFREG = 0100000        # regular file
139
S_IFBLK = 0060000        # block device
140
S_IFDIR = 0040000        # directory
141
S_IFCHR = 0020000        # character device
142
S_IFIFO = 0010000        # fifo
143
144
TSUID   = 04000          # set UID on execution
145
TSGID   = 02000          # set GID on execution
146
TSVTX   = 01000          # reserved
147
148
TUREAD  = 00400          # read by owner
149
TUWRITE = 00200          # write by owner
150
TUEXEC  = 00100          # execute/search by owner
151
TGREAD  = 00040          # read by group
152
TGWRITE = 00020          # write by group
153
TGEXEC  = 00010          # execute/search by group
154
TOREAD  = 00004          # read by other
155
TOWRITE = 00002          # write by other
156
TOEXEC  = 00001          # execute/search by other
157
158
#---------------------------------------------------------
159
# Some useful functions
160
#---------------------------------------------------------
161
def nts(s):
162
    """Convert a null-terminated string buffer to a python string.
163
    """
164
    return s.split("\0", 1)[0]
165
166
def calc_chksum(buf):
167
    """Calculate the checksum for a member's header. It's a simple addition
168
       of all bytes, treating the chksum field as if filled with spaces.
169
       buf is a 512 byte long string buffer which holds the header.
170
    """
171
    chk = 256                           # chksum field is treated as blanks,
172
                                        # so the initial value is 8 * ord(" ")
173
    for c in buf[:148]: chk += ord(c)   # sum up all bytes before chksum
174
    for c in buf[156:]: chk += ord(c)   # sum up all bytes after chksum
175
    return chk
176
177
def copyfileobj(src, dst, length=None):
178
    """Copy length bytes from fileobj src to fileobj dst.
179
       If length is None, copy the entire content.
180
    """
181
    if length == 0:
182
        return
183
    if length is None:
184
        shutil.copyfileobj(src, dst)
185
        return
186
187
    BUFSIZE = 16 * 1024
188
    blocks, remainder = divmod(length, BUFSIZE)
189
    for b in range(blocks):
190
        buf = src.read(BUFSIZE)
191
        if len(buf) < BUFSIZE:
192
            raise IOError, "end of file reached"
193
        dst.write(buf)
194
195
    if remainder != 0:
196
        buf = src.read(remainder)
197
        if len(buf) < remainder:
198
            raise IOError, "end of file reached"
199
        dst.write(buf)
200
    return
201
202
filemode_table = (
203
    (S_IFLNK, "l",
204
     S_IFREG, "-",
205
     S_IFBLK, "b",
206
     S_IFDIR, "d",
207
     S_IFCHR, "c",
208
     S_IFIFO, "p"),
209
    (TUREAD,  "r"),
210
    (TUWRITE, "w"),
211
    (TUEXEC,  "x", TSUID, "S", TUEXEC|TSUID, "s"),
212
    (TGREAD,  "r"),
213
    (TGWRITE, "w"),
214
    (TGEXEC,  "x", TSGID, "S", TGEXEC|TSGID, "s"),
215
    (TOREAD,  "r"),
216
    (TOWRITE, "w"),
217
    (TOEXEC,  "x", TSVTX, "T", TOEXEC|TSVTX, "t"))
218
219
def filemode(mode):
220
    """Convert a file's mode to a string of the form
221
       -rwxrwxrwx.
222
       Used by TarFile.list()
223
    """
224
    s = ""
225
    for t in filemode_table:
226
        while 1:
227
            if mode & t[0] == t[0]:
228
                s += t[1]
229
            elif len(t) > 2:
230
                t = t[2:]
231
                continue
232
            else:
233
                s += "-"
234
            break
235
    return s
236
237
if os.sep != "/":
238
    normpath = lambda path: os.path.normpath(path).replace(os.sep, "/")
239
else:
240
    normpath = os.path.normpath
241
242
class TarError(Exception):
243
    """Internally used exception"""
244
    pass
245
246
#--------------------
247
# exported functions
248
#--------------------
249
def open(name, mode="r", fileobj=None):
250
    """Open (uncompressed) tar archive name for reading, writing
251
       or appending.
252
    """
253
    return TarFile(name, mode, fileobj)
254
255
def gzopen(gzname, gzmode="r", compresslevel=9, fileobj=None):
256
    """Open gzip compressed tar archive name for reading or writing.
257
       Appending is not allowed.
258
    """
259
    if gzmode == "a":
260
        raise ValueError, "Appending to gzipped archive is not allowed"
261
    import gzip
262
    pre, ext = os.path.splitext(gzname)
263
    pre = os.path.basename(pre)
264
    if ext == ".tgz":
265
        ext = ".tar"
266
    if ext == ".gz":
267
        ext = ""
268
    tarname = pre + ext
269
    mode = gzmode
270
    if "b" not in gzmode:
271
        gzmode += "b"
272
    if mode[0:1] == "w":
273
        if not fileobj:
274
            fileobj = __builtin__.file(gzname, gzmode)
275
        t = TarFile(tarname, mode, gzip.GzipFile(tarname, gzmode,
276
                                                 compresslevel, fileobj))
277
    else:
278
        t = TarFile(tarname, mode, gzip.open(gzname, gzmode, compresslevel))
279
    t._extfileobj = 0
280
    return t
281
282
def is_tarfile(name):
283
    """Return True if name points to a tar archive that we
284
       are able to handle, else return False.
285
    """
286
287
    buftoinfo = TarFile.__dict__["_buftoinfo"]
288
    try:
289
        buf = __builtin__.open(name, "rb").read(BLOCKSIZE)
290
        buftoinfo(None, buf)
291
        return True
292
    except (ValueError, ImportError):
293
        pass
294
    try:
295
        import gzip
296
        buf = gzip.open(name, "rb").read(BLOCKSIZE)
297
        buftoinfo(None, buf)
298
        return True
299
    except (IOError, ValueError, ImportError):
300
        pass
301
    return False
302
303
#------------------
304
# Exported Classes
305
#------------------
306
class TarInfo:
307
    """Informational class which holds the details about an
308
       archive member given by a tar header block.
309
       TarInfo instances are returned by TarFile.getmember() and
310
       TarFile.getmembers() and are usually created internally.
311
       If you want to create a TarInfo instance from the outside,
312
       you should use TarFile.gettarinfo() if the file already exists,
313
       or you can instanciate the class yourself.
314
    """
315
316
    def __init__(self, name=""):
317
        """Construct a TarInfo instance. name is the optional name
318
           of the member.
319
        """
320
321
        self.name     = name       # member name (dirnames must end with '/')
322
        self.mode     = 0100666    # file permissions
323
        self.uid      = 0          # user id
324
        self.gid      = 0          # group id
325
        self.size     = 0          # file size
326
        self.mtime    = 0          # modification time
327
        self.chksum   = 0          # header checksum
328
        self.type     = REGTYPE    # member type
329
        self.linkname = ""         # link name
330
        self.uname    = "user"     # user name
331
        self.gname    = "group"    # group name
332
        self.devmajor = 0          #-
333
        self.devminor = 0          #-for use with CHRTYPE and BLKTYPE
334
        self.prefix   = ""         # prefix, holding information
335
                                   # about sparse files
336
337
        self.offset   = 0          # the tar header starts here
338
        self.offset_data = 0       # the optional file's data starts here
339
340
    def init_from_stat(self, statres):
341
        """Initialize various attributes from statobj (these are
342
        returned by os.stat() and related functions.  Return none on error"""
343
        stmd = statres.st_mode
344
        if stat.S_ISREG(stmd): type = REGTYPE
345
        elif stat.S_ISDIR(stmd):
346
            type = DIRTYPE
347
            if self.name[-1:] != "/": self.name += "/"
348
        elif stat.S_ISFIFO(stmd): type = FIFOTYPE
349
        elif stat.S_ISLNK(stmd): type = SYMTYPE
350
        elif stat.S_ISCHR(stmd): type = CHRTYPE
351
        elif stat.S_ISBLK(stmd): type = BLKTYPE
352
        else: return None
353
354
        # Fill the TarInfo instance with all
355
        # information we can get.
356
        self.mode  = stat.S_IMODE(stmd)
357
        self.uid   = statres.st_uid
358
        self.gid   = statres.st_gid
359
        self.size  = statres.st_size
360
        self.mtime = statres.st_mtime
361
        self.type  = type
362
        if pwd:
54 by bescoto
Cache pwd and group files
363
            try: self.uname = uid2uname(self.uid)
364
            except KeyError: pass
1 by bescoto
Initial checkin
365
        if grp:
54 by bescoto
Cache pwd and group files
366
            try: self.gname = gid2gname(self.gid)
367
            except KeyError: pass
1 by bescoto
Initial checkin
368
369
        if type in (CHRTYPE, BLKTYPE):
370
            if hasattr(os, "major") and hasattr(os, "minor"):
371
                self.devmajor = os.major(statres.st_rdev)
372
                self.devminor = os.minor(statres.st_rdev)
373
        return 1
374
375
    def set_arcname(self, name):
376
        """Set the name of the member in the archive.  Backward
377
        slashes are converted to forward slashes, Absolute paths are
378
        turned to relative paths.
379
        """
380
        arcname = normpath(name)
381
        drv, arcname = os.path.splitdrive(arcname)
382
        while arcname[0:1] == "/":
383
            arcname = arcname[1:]
384
        self.name = arcname
385
386
    def getheader(self):
387
        """Return a tar header block as a 512 byte string.
388
        """
69 by bescoto
asdf's tarfile large uid/gid patch
389
        if self.uid > 2097151 or self.uid < 0:
390
            sys.stderr.write("uid %i of file %s not in range. Setting uid to 60001\n" % (self.uid,self.name))
391
            self.uid = 60001
392
        if self.gid > 2097151 or self.gid < 0:
393
            sys.stderr.write("gid %i of file %s not in range. Setting gid to 60001\n" % (self.gid, self.name))
394
            self.gid = 60001
1 by bescoto
Initial checkin
395
        # The following code was contributed by Detlef Lannert.
396
        parts = []
397
        for value, fieldsize in (
398
                (self.name, 100),
399
                ("%07o" % self.mode, 8),
400
                ("%07o" % self.uid, 8),
401
                ("%07o" % self.gid, 8),
402
                ("%011o" % self.size, 12),
403
                ("%011o" % self.mtime, 12),
404
                ("        ", 8),
405
                (self.type, 1),
406
                (self.linkname, 100),
407
                (MAGIC, 6),
408
                (VERSION, 2),
409
                (self.uname, 32),
410
                (self.gname, 32),
411
                ("%07o" % self.devmajor, 8),
412
                ("%07o" % self.devminor, 8),
413
                (self.prefix, 155)
414
                ):
415
            l = len(value)
416
            parts.append(value + (fieldsize - l) * "\0")
417
418
        buf = "".join(parts)
419
        chksum = calc_chksum(buf)
420
        buf = buf[:148] + "%06o\0" % chksum + buf[155:]
421
        buf += (512 - len(buf)) * "\0"
422
        self.buf = buf
423
        return buf
424
425
    def isreg(self):
426
        return self.type in REGULAR_TYPES
427
    def isfile(self):
428
        return self.isreg()
429
    def isdir(self):
430
        return self.type == DIRTYPE
431
    def issym(self):
432
        return self.type == SYMTYPE
433
    def islnk(self):
434
        return self.type == LNKTYPE
435
    def ischr(self):
436
        return self.type == CHRTYPE
437
    def isblk(self):
438
        return self.type == BLKTYPE
439
    def isfifo(self):
440
        return self.type == FIFOTYPE
441
    def issparse(self):
442
        return self.type == GNUTYPE_SPARSE
443
    def isdev(self):
444
        return self.type in (CHRTYPE, BLKTYPE, FIFOTYPE)
445
# class TarInfo
446
447
448
class TarFile:
449
    """Class representing a TAR archive file on disk.
450
    """
451
    debug = 0                   # May be set from 0 (no msgs) to 3 (all msgs)
452
453
    dereference = False         # If true, add content of linked file to the
454
                                # tar file, else the link.
455
456
    ignore_zeros = False        # If true, skips empty or invalid blocks and
457
                                # continues processing.
458
459
    errorlevel = 0              # If 0, fatal errors only appear in debug
460
                                # messages (if debug >= 0). If > 0, errors
461
                                # are passed to the caller as exceptions.
462
463
    def __init__(self, name=None, mode="r", fileobj=None):
464
        self.name = name
465
466
        if len(mode) > 1 or mode not in "raw":
467
            raise ValueError, "mode must be either 'r', 'a' or 'w', " \
468
                                "not '%s'" % mode
469
        self._mode = mode
470
        self.mode = {"r": "rb", "a": "r+b", "w": "wb"}[mode]
471
472
        if not fileobj:
473
            fileobj = __builtin__.file(self.name, self.mode)
474
            self._extfileobj = 0
475
        else:
476
            if self.name is None and hasattr(fileobj, "name"):
477
                self.name = fileobj.name
478
            if hasattr(fileobj, "mode"):
479
                self.mode = fileobj.mode
480
            self._extfileobj = 1
481
        self.fileobj = fileobj
482
483
        self.init_datastructures()
484
485
        if self._mode == "a":
486
            self.fileobj.seek(0)
487
            self._load()
488
489
    def init_datastructures(self):
490
        # Init datastructures
491
        #self.members     = []       # list of members as TarInfo instances
492
        #self.membernames = []       # names of members
493
        #self.chunks      = [0]      # chunk cache
494
        self._loaded     = 0        # flag if all members have been read
495
        self.offset      = 0l       # current position in the archive file
496
        self.inodes      = {}       # dictionary caching the inodes of
497
                                    # archive members already added
498
        self.next_chunk = 0 # offset of next tarinfo, used when reading
499
500
    def close(self):
501
        """Close the TarFile instance and do some cleanup.
502
        """
503
        if self.fileobj:
504
            if self._mode in "aw":
505
                # fill up the end with zero-blocks
506
                # (like option -b20 for tar does)
507
                blocks, remainder = divmod(self.offset, RECORDSIZE)
508
                if remainder > 0:
509
                    self.fileobj.write("\0" * (RECORDSIZE - remainder))
510
511
            if not self._extfileobj:
512
                self.fileobj.close()
513
            self.fileobj = None
514
515
    def throwaway_until(self, position):
516
        """Read data, throwing it away until we get to position"""
517
        bufsize = 16 * 1024
518
        bytes_to_read = position - self.offset
519
        assert bytes_to_read >= 0
520
        while bytes_to_read >= bufsize:
521
            self.fileobj.read(bufsize)
522
            bytes_to_read -= bufsize
523
        self.fileobj.read(bytes_to_read)
524
        self.offset = position
525
526
    def next(self):
527
        """Return the next member from the archive.
528
           Return None if the end is reached.
529
           Can be used in a while statement, is used
530
           for Iteration (see __iter__()) and internally.
531
        """
532
        if not self.fileobj:
533
            raise ValueError, "I/O operation on closed file"
534
        if self._mode not in "ra":
535
            raise ValueError, "reading from a write-mode file"
536
537
        # Read the next block.
538
        # self.fileobj.seek(self.chunks[-1])
539
        #self.fileobj.seek(self.next_chunk)
540
        #self.offset = self.next_chunk
541
        self.throwaway_until(self.next_chunk)
542
        while 1:
543
            buf = self.fileobj.read(BLOCKSIZE)
544
            if not buf:
545
                return None
546
            try:
547
                tarinfo = self._buftoinfo(buf)
548
            except ValueError:
549
                if self.ignore_zeros:
550
                    if buf.count("\0") == BLOCKSIZE:
551
                        adj = "empty"
552
                    else:
553
                        adj = "invalid"
554
                    self._dbg(2, "0x%X: %s block\n" % (self.offset, adj))
555
                    self.offset += BLOCKSIZE
556
                    continue
557
                else:
558
                    return None
559
            break
560
561
        # If the TarInfo instance contains a GNUTYPE longname or longlink
562
        # statement, we must process this first.
563
        if tarinfo.type in (GNUTYPE_LONGLINK, GNUTYPE_LONGNAME):
564
            tarinfo = self._proc_gnulong(tarinfo, tarinfo.type)
565
566
        if tarinfo.issparse():
567
            assert 0, "Sparse file support turned off"
568
            # Sparse files need some care,
569
            # due to the possible extra headers.
570
            tarinfo.offset = self.offset
571
            self.offset += BLOCKSIZE
572
            origsize = self._proc_sparse(tarinfo)
573
            tarinfo.offset_data = self.offset
574
            blocks, remainder = divmod(tarinfo.size, BLOCKSIZE)
575
            if remainder:
576
                blocks += 1
577
            self.offset += blocks * BLOCKSIZE
578
            tarinfo.size = origsize
579
        else:
580
            tarinfo.offset = self.offset
581
            self.offset += BLOCKSIZE
582
            tarinfo.offset_data = self.offset
583
            if tarinfo.isreg():
584
                ## Skip the following data blocks.
585
                blocks, remainder = divmod(tarinfo.size, BLOCKSIZE)
586
                if remainder:
587
                    blocks += 1
588
                self.next_chunk = self.offset + (blocks * BLOCKSIZE)
589
            else: self.next_chunk = self.offset
590
591
        #self.members.append(tarinfo)  These use too much memory
592
        #self.membernames.append(tarinfo.name)
593
        #self.chunks.append(self.offset)
594
        return tarinfo
595
596
    def getmember(self, name):
597
        """Return a TarInfo instance for member name.
598
        """
599
        if name not in self.membernames and not self._loaded:
600
            self._load()
601
        if name not in self.membernames:
602
            raise KeyError, "filename `%s' not found in tar archive" % name
603
        return self._getmember(name)
604
605
    def getinfo(self, name):
606
        """Return a TarInfo instance for member name.
607
           This method will be deprecated in 0.6,
608
           use getmember() instead.
609
        """
610
        # XXX kick this out in 0.6
611
        import warnings
612
        warnings.warn("use getmember() instead", DeprecationWarning)
613
        return self.getmember(name)
614
615
    def getmembers(self):
616
        """Return a list of all members in the archive
617
           (as TarInfo instances).
618
        """
619
        if not self._loaded:    # if we want to obtain a list of
620
            self._load()        # all members, we first have to
621
                                # scan the whole archive.
622
        return self.members
623
624
    def getnames(self):
625
        """Return a list of names of all members in the
626
           archive.
627
        """
628
        if not self._loaded:
629
            self._load()
630
        return self.membernames
631
632
    def gettarinfo(self, name, arcname=None):
633
        """Create a TarInfo instance from an existing file.
634
           Optional arcname defines the name under which the file
635
           shall be stored in the archive.
636
        """
637
        # Now, fill the TarInfo instance with
638
        # information specific for the file.
639
        tarinfo = TarInfo()
640
641
        if arcname is None: tarinfo.set_arcname(name)
642
        else: tarinfo.set_arcname(arcname)
643
644
        # Use os.stat or os.lstat, depending on platform
645
        # and if symlinks shall be resolved.
646
        if hasattr(os, "lstat") and not self.dereference:
647
            statres = os.lstat(name)
648
        else:
649
            statres = os.stat(name)
650
651
        if not tarinfo.init_from_stat(statres): return None
652
653
        if tarinfo.type == REGTYPE:
654
            inode = (statres.st_ino, statres.st_dev, statres.st_mtime)
655
            if inode in self.inodes.keys() and not self.dereference:
656
                # Is it a hardlink to an already
657
                # archived file?
658
                tarinfo.type = LNKTYPE
659
                tarinfo.linkname = self.inodes[inode]
660
            else:
661
                # The inode is added only if its valid.
662
                # For win32 it is always 0.
663
                if inode[0]: self.inodes[inode] = tarinfo.name
664
        elif tarinfo.type == SYMTYPE:
665
            tarinfo.linkname = os.readlink(name)
666
            tarinfo.size = 0
667
668
        return tarinfo
669
670
    def list(self, verbose=1):
671
        """Print a formatted listing of the archive's
672
           contents to stdout.
673
        """
674
        for tarinfo in self:
675
            if verbose:
676
                print filemode(tarinfo.mode),
677
                print tarinfo.uname + "/" + tarinfo.gname,
678
                if tarinfo.ischr() or tarinfo.isblk():
679
                    print "%10s" % (str(tarinfo.devmajor) + "," + str(tarinfo.devminor)),
680
                else:
681
                    print "%10d" % tarinfo.size,
682
                print "%d-%02d-%02d %02d:%02d:%02d" \
683
                      % time.gmtime(tarinfo.mtime)[:6],
684
685
            print tarinfo.name,
686
687
            if verbose:
688
                if tarinfo.issym():
689
                    print "->", tarinfo.linkname,
690
                if tarinfo.islnk():
691
                    print "link to", tarinfo.linkname,
692
            print
693
694
    def add(self, name, arcname=None, recursive=1):
695
        """Add a file or a directory to the archive.
696
           Directory addition is recursive by default.
697
        """
698
        if not self.fileobj:
699
            raise ValueError, "I/O operation on closed file"
700
        if self._mode == "r":
701
            raise ValueError, "writing to a read-mode file"
702
703
        if arcname is None:
704
            arcname = name
705
706
        # Skip if somebody tries to archive the archive...
707
        if os.path.abspath(name) == os.path.abspath(self.name):
708
            self._dbg(2, "tarfile: Skipped `%s'\n" % name)
709
            return
710
711
        # Special case: The user wants to add the current
712
        # working directory.
713
        if name == ".":
714
            if recursive:
715
                if arcname == ".":
716
                    arcname = ""
717
                for f in os.listdir("."):
718
                    self.add(f, os.path.join(arcname, f))
719
            return
720
721
        self._dbg(1, "%s\n" % name)
722
723
        # Create a TarInfo instance from the file.
724
        tarinfo = self.gettarinfo(name, arcname)
725
726
        if tarinfo is None:
727
            self._dbg(1, "tarfile: Unsupported type `%s'\n" % name)
728
729
730
        # Append the tar header and data to the archive.
731
        if tarinfo.isreg():
732
            f = __builtin__.file(name, "rb")
733
            self.addfile(tarinfo, fileobj = f)
734
            f.close()
735
736
        if tarinfo.type in (LNKTYPE, SYMTYPE, FIFOTYPE, CHRTYPE, BLKTYPE):
737
            tarinfo.size = 0l
738
            self.addfile(tarinfo)
739
740
        if tarinfo.isdir():
741
            self.addfile(tarinfo)
742
            if recursive:
743
                for f in os.listdir(name):
744
                    self.add(os.path.join(name, f), os.path.join(arcname, f))
745
746
    def addfile(self, tarinfo, fileobj=None):
747
        """Add the content of fileobj to the tarfile.
748
           The amount of bytes to read is determined by
749
           the size attribute in the tarinfo instance.
750
        """
751
        if not self.fileobj:
752
            raise ValueError, "I/O operation on closed file"
753
        if self._mode == "r":
754
            raise ValueError, "writing to a read-mode file"
755
756
        # XXX What was this good for again?
757
        #try:
758
        #    self.fileobj.seek(self.chunks[-1])
759
        #except IOError:
760
        #    pass
761
762
        full_headers = self._get_full_headers(tarinfo)
763
        self.fileobj.write(full_headers)
764
        assert len(full_headers) % BLOCKSIZE == 0
765
        self.offset += len(full_headers)
766
767
        # If there's data to follow, append it.
768
        if fileobj is not None:
769
            copyfileobj(fileobj, self.fileobj, tarinfo.size)
770
            blocks, remainder = divmod(tarinfo.size, BLOCKSIZE)
771
            if remainder > 0:
772
                self.fileobj.write("\0" * (BLOCKSIZE - remainder))
773
                blocks += 1
774
            self.offset += blocks * BLOCKSIZE
775
776
        #self.members.append(tarinfo)  #These take up too much memory
777
        #self.membernames.append(tarinfo.name)
778
        #self.chunks.append(self.offset)
779
780
    def _get_full_headers(self, tarinfo):
781
        """Return string containing headers around tarinfo, including gnulongs
782
        """
783
        buf = ""
784
        # Now we must check if the strings for filename
785
        # and linkname fit into the posix header.
786
        # (99 chars + "\0" for each)
787
        # If not, we must create GNU extension headers.
788
        # If both filename and linkname are too long,
789
        # the longlink is first to be written out.
790
        if len(tarinfo.linkname) >= LENGTH_LINK - 1:
791
            buf += self._return_gnulong(tarinfo.linkname, GNUTYPE_LONGLINK)
792
            tarinfo.linkname = tarinfo.linkname[:LENGTH_LINK -1]
793
        if len(tarinfo.name) >= LENGTH_NAME - 1:
794
            buf += self._return_gnulong(tarinfo.name, GNUTYPE_LONGNAME)
795
            tarinfo.name = tarinfo.name[:LENGTH_NAME - 1]
796
        return buf + tarinfo.getheader()
797
798
#    def untar(self, path):
799
#        """Untar the whole archive to path.
800
#        """
801
#        later = []
802
#        for tarinfo in self:
803
#            if tarinfo.isdir():
804
#                later.append(tarinfo)
805
#            self.extract(tarinfo, path)
806
#        for tarinfo in later:
807
#            self._utime(tarinfo, os.path.join(path, tarinfo.name))
808
809
    def extractfile(self, member):
810
        """Extract member from the archive and return a file-like
811
           object. member may be a name or a TarInfo instance.
812
        """
813
        if not self.fileobj:
814
            raise ValueError, "I/O operation on closed file"
815
        if self._mode != "r":
816
            raise ValueError, "reading from a write-mode file"
817
818
        if isinstance(member, TarInfo):
819
            tarinfo = member
820
        else:
821
            tarinfo = self.getmember(member)
822
823
        if tarinfo.isreg() or tarinfo.type not in SUPPORTED_TYPES:
824
            return _FileObject(self, tarinfo)
825
        elif tarinfo.islnk() or tarinfo.issym():
826
            return self.extractfile(self._getmember(tarinfo.linkname, tarinfo))
827
        else:
828
            return None
829
830
    def extract(self, member, path=""):
831
        """Extract member from the archive and write it to
832
           current working directory using its full pathname.
833
           If optional path is given, it is attached before the
834
           pathname.
835
           member may be a name or a TarInfo instance.
836
        """
837
        if not self.fileobj:
838
            raise ValueError, "I/O operation on closed file"
839
        if self._mode != "r":
840
            raise ValueError, "reading from a write-mode file"
841
842
        if isinstance(member, TarInfo):
843
            tarinfo = member
844
        else:
845
            tarinfo = self.getmember(member)
846
847
        self._dbg(1, tarinfo.name)
848
        try:
849
            self._extract_member(tarinfo, os.path.join(path, tarinfo.name))
850
        except EnvironmentError, e:
851
            if self.errorlevel > 0:
852
                raise
853
            else:
854
                self._dbg(1, "\ntarfile: %s `%s'" % (e.strerror, e.filename))
855
        except TarError, e:
856
            if self.errorlevel > 1:
857
                raise
858
            else:
859
                self._dbg(1, "\ntarfile: %s" % e)
860
        self._dbg(1, "\n")
861
862
    def _extract_member(self, tarinfo, targetpath):
863
        """Extract the TarInfo instance tarinfo to a physical
864
           file called targetpath.
865
        """
866
        # Fetch the TarInfo instance for the given name
867
        # and build the destination pathname, replacing
868
        # forward slashes to platform specific separators.
869
        if targetpath[-1:] == "/":
870
            targetpath = targetpath[:-1]
871
        targetpath = os.path.normpath(targetpath)
872
873
        # Create all upper directories.
874
        upperdirs = os.path.dirname(targetpath)
875
        if upperdirs and not os.path.exists(upperdirs):
876
            ti = TarInfo()
877
            ti.name  = ""
878
            ti.type  = DIRTYPE
879
            ti.mode  = 0777
880
            ti.mtime = tarinfo.mtime
881
            ti.uid   = tarinfo.uid
882
            ti.gid   = tarinfo.gid
883
            ti.uname = tarinfo.uname
884
            ti.gname = tarinfo.gname
885
            for d in os.path.split(os.path.splitdrive(upperdirs)[1]):
886
                ti.name = os.path.join(ti.name, d)
887
                self._extract_member(ti, ti.name)
888
889
        if tarinfo.isreg():
890
            self._makefile(tarinfo, targetpath)
891
        elif tarinfo.isdir():
892
            self._makedir(tarinfo, targetpath)
893
        elif tarinfo.isfifo():
894
            self._makefifo(tarinfo, targetpath)
895
        elif tarinfo.ischr() or tarinfo.isblk():
896
            self._makedev(tarinfo, targetpath)
897
        elif tarinfo.islnk() or tarinfo.issym():
898
            self._makelink(tarinfo, targetpath)
899
        else:
900
            self._makefile(tarinfo, targetpath)
901
            if tarinfo.type not in SUPPORTED_TYPES:
902
                self._dbg(1, "\ntarfile: Unknown file type '%s', " \
903
                             "extracted as regular file." % tarinfo.type)
904
905
        if not tarinfo.issym():
906
            self._chown(tarinfo, targetpath)
907
            self._chmod(tarinfo, targetpath)
908
            if not tarinfo.isdir():
909
                self._utime(tarinfo, targetpath)
910
911
    def _makedir(self, tarinfo, targetpath):
912
        """Make a directory called targetpath out of tarinfo.
913
        """
914
        try:
915
            os.mkdir(targetpath)
916
        except EnvironmentError, e:
917
            if e.errno != errno.EEXIST:
918
                raise
919
920
    def _makefile(self, tarinfo, targetpath):
921
        """Make a file called targetpath out of tarinfo.
922
        """
923
        source = self.extractfile(tarinfo)
924
        target = __builtin__.file(targetpath, "wb")
925
        copyfileobj(source, target)
926
        source.close()
927
        target.close()
928
929
    def _makefifo(self, tarinfo, targetpath):
930
        """Make a fifo called targetpath out of tarinfo.
931
        """
932
        if hasattr(os, "mkfifo"):
933
            os.mkfifo(targetpath)
934
        else:
935
            raise TarError, "Fifo not supported by system"
936
937
    def _makedev(self, tarinfo, targetpath):
938
        """Make a character or block device called targetpath out of tarinfo.
939
        """
940
        if not hasattr(os, "mknod"):
941
            raise TarError, "Special devices not supported by system"
942
943
        mode = tarinfo.mode
944
        if tarinfo.isblk():
945
            mode |= stat.S_IFBLK
946
        else:
947
            mode |= stat.S_IFCHR
948
949
        # This if statement should go away when python-2.3a0-devicemacros
950
        # patch succeeds.
951
        if hasattr(os, "makedev"):
952
            os.mknod(targetpath, mode,
953
                     os.makedev(tarinfo.devmajor, tarinfo.devminor))
954
        else:
955
            os.mknod(targetpath, mode,
956
                     tarinfo.devmajor, tarinfo.devminor)
957
958
    def _makelink(self, tarinfo, targetpath):
959
        """Make a (symbolic) link called targetpath out of tarinfo.
960
           If it cannot be made (due to platform or failure), we try
961
           to make a copy of the referenced file instead of a link.
962
        """
963
        linkpath = tarinfo.linkname
964
        self._dbg(1, " -> %s" % linkpath)
965
        try:
966
            if tarinfo.issym():
967
                os.symlink(linkpath, targetpath)
968
            else:
969
                linkpath = os.path.join(os.path.dirname(targetpath),
970
                                        linkpath)
971
                os.link(linkpath, targetpath)
972
        except AttributeError:
973
            linkpath = os.path.join(os.path.dirname(tarinfo.name),
974
                                    tarinfo.linkname)
975
            linkpath = normpath(linkpath)
976
            try:
977
                self._extract_member(self.getmember(linkpath), targetpath)
978
            except (IOError, OSError, KeyError), e:
979
                linkpath = os.path.normpath(linkpath)
980
                try:
981
                    shutil.copy2(linkpath, targetpath)
982
                except EnvironmentError, e:
983
                    raise TarError, "Link could not be created"
984
985
    def _chown(self, tarinfo, targetpath):
986
        """Set owner of targetpath according to tarinfo.
987
        """
988
        if pwd and os.geteuid() == 0:
989
            # We have to be root to do so.
54 by bescoto
Cache pwd and group files
990
            try: g = gname2gid(tarinfo.gname)
991
            except KeyError:
992
                try:
993
                    gid2gname(tarinfo.gid) # Make sure gid exists
994
                    g = tarinfo.gid
995
                except KeyError: g = os.getgid()
996
            try: u = uname2uid(tarinfo.uname)
997
            except KeyError:
998
                try:
999
                    uid2uname(tarinfo.uid) # Make sure uid exists
1000
                    u = tarinfo.uid
1001
                except KeyError: u = os.getuid()
1 by bescoto
Initial checkin
1002
            try:
1003
                if tarinfo.issym() and hasattr(os, "lchown"):
1004
                    os.lchown(targetpath, u, g)
1005
                else:
1006
                    os.chown(targetpath, u, g)
1007
            except EnvironmentError, e:
1008
                self._dbg(2, "\ntarfile: (chown failed), %s `%s'"
1009
                             % (e.strerror, e.filename))
1010
1011
    def _chmod(self, tarinfo, targetpath):
1012
        """Set file permissions of targetpath according to tarinfo.
1013
        """
1014
        try:
1015
            os.chmod(targetpath, tarinfo.mode)
1016
        except EnvironmentError, e:
1017
            self._dbg(2, "\ntarfile: (chmod failed), %s `%s'"
1018
                         % (e.strerror, e.filename))
1019
1020
    def _utime(self, tarinfo, targetpath):
1021
        """Set modification time of targetpath according to tarinfo.
1022
        """
1023
        try:
1024
            os.utime(targetpath, (tarinfo.mtime, tarinfo.mtime))
1025
        except EnvironmentError, e:
1026
            self._dbg(2, "\ntarfile: (utime failed), %s `%s'"
1027
                         % (e.strerror, e.filename))
1028
1029
    def _getmember(self, name, tarinfo=None):
1030
        """Find an archive member by name from bottom to top.
1031
           If tarinfo is given, it is used as the starting point.
1032
        """
1033
        if tarinfo is None:
1034
            end = len(self.members)
1035
        else:
1036
            end = self.members.index(tarinfo)
1037
1038
        for i in xrange(end - 1, -1, -1):
1039
            if name == self.membernames[i]:
1040
                return self.members[i]
1041
1042
    def _load(self):
1043
        """Read through the entire archive file and look for readable
1044
           members.
1045
        """
1046
        while 1:
1047
            tarinfo = self.next()
1048
            if tarinfo is None:
1049
                break
1050
        self._loaded = 1
1051
        return
1052
1053
    def __iter__(self):
1054
        """Provide an iterator object.
1055
        """
1056
        if self._loaded:
1057
            return iter(self.members)
1058
        else:
1059
            return TarIter(self)
1060
1061
    def _buftoinfo(self, buf):
1062
        """Transform a 512 byte block to a TarInfo instance.
1063
        """
1064
        tarinfo = TarInfo()
1065
        tarinfo.name = nts(buf[0:100])
95 by loafman
Apply patch for bug 19998, ValueError exception.
1066
        tarinfo.mode = int(buf[100:107], 8)
1067
        tarinfo.uid = int(buf[108:115],8)
1068
        tarinfo.gid = int(buf[116:123],8)
1069
        tarinfo.size = long(buf[124:135], 8)
1070
        tarinfo.mtime = long(buf[136:147], 8)
304 by loafman
Untabify all files. To compare against previous
1071
        # chksum stored as a six digit octal number with
1072
        # leading zeroes followed by a nul and then a space
95 by loafman
Apply patch for bug 19998, ValueError exception.
1073
        tarinfo.chksum = int(buf[148:154], 8)
1 by bescoto
Initial checkin
1074
        tarinfo.type = buf[156:157]
1075
        tarinfo.linkname = nts(buf[157:257])
1076
        tarinfo.uname = nts(buf[265:297])
1077
        tarinfo.gname = nts(buf[297:329])
1078
        try:
599 by Kenneth Loafman
Applied patches from Kasper Brand that fixed device file handling.
1079
            tarinfo.devmajor = int(buf[329:336], 8)
1080
            tarinfo.devminor = int(buf[337:344], 8)
1 by bescoto
Initial checkin
1081
        except ValueError:
1082
            tarinfo.devmajor = tarinfo.devmajor = 0
1083
        tarinfo.prefix = buf[345:500]
1084
        if tarinfo.chksum != calc_chksum(buf):
1085
            self._dbg(1, "tarfile: Bad Checksum\n")
1086
        return tarinfo
596 by Kenneth Loafman
418170 [PATCH] file names longer then 512 symbols are not supported
1087
        
1 by bescoto
Initial checkin
1088
    def _proc_gnulong(self, tarinfo, type):
596 by Kenneth Loafman
418170 [PATCH] file names longer then 512 symbols are not supported
1089
        """Evaluate the blocks that hold a GNU longname
1 by bescoto
Initial checkin
1090
           or longlink member.
1091
        """
1092
        name = None
1093
        linkname = None
596 by Kenneth Loafman
418170 [PATCH] file names longer then 512 symbols are not supported
1094
        #may be some sanity checking should be done here
1095
        #assert tarinfo.size < 1000 * BLOCKSIZE, "Filename appears to be too long!"
1 by bescoto
Initial checkin
1096
        buf = self.fileobj.read(BLOCKSIZE)
48 by bescoto
Added test and fix for long symlink to long file bug
1097
        if not buf: return None
596 by Kenneth Loafman
418170 [PATCH] file names longer then 512 symbols are not supported
1098
        namesize = tarinfo.size - BLOCKSIZE
1 by bescoto
Initial checkin
1099
        self.offset += BLOCKSIZE
596 by Kenneth Loafman
418170 [PATCH] file names longer then 512 symbols are not supported
1100
        # may be the whole name should be read with one operation?
1101
        while namesize > 0:
1102
            buf += self.fileobj.read(BLOCKSIZE)
1103
            if not buf: return None
1104
            self.offset += BLOCKSIZE
1105
            namesize -= BLOCKSIZE
48 by bescoto
Added test and fix for long symlink to long file bug
1106
        if type == GNUTYPE_LONGNAME: name = nts(buf)
1107
        if type == GNUTYPE_LONGLINK: linkname = nts(buf)
1 by bescoto
Initial checkin
1108
1109
        buf = self.fileobj.read(BLOCKSIZE)
48 by bescoto
Added test and fix for long symlink to long file bug
1110
        if not buf: return None
1 by bescoto
Initial checkin
1111
        tarinfo = self._buftoinfo(buf)
48 by bescoto
Added test and fix for long symlink to long file bug
1112
        if tarinfo.type in (GNUTYPE_LONGLINK, GNUTYPE_LONGNAME):
1113
            tarinfo = self._proc_gnulong(tarinfo, tarinfo.type)
1 by bescoto
Initial checkin
1114
        if name is not None:
1115
            tarinfo.name = name
1116
        if linkname is not None:
1117
            tarinfo.linkname = linkname
1118
        self.offset += BLOCKSIZE
1119
        return tarinfo
1120
596 by Kenneth Loafman
418170 [PATCH] file names longer then 512 symbols are not supported
1121
1122
1 by bescoto
Initial checkin
1123
    def _return_gnulong(self, name, type):
1124
        """Insert a GNU longname/longlink member into the archive.
1125
           It consists of a common tar header, with the length
1126
           of the longname as size, followed by a data block,
1127
           which contains the longname as a null terminated string.
1128
        """
1129
        tarinfo = TarInfo()
1130
        tarinfo.name = "././@LongLink"
1131
        tarinfo.type = type
1132
        tarinfo.mode = 0
1133
        tarinfo.size = len(name)
1134
596 by Kenneth Loafman
418170 [PATCH] file names longer then 512 symbols are not supported
1135
        residual = (tarinfo.size % BLOCKSIZE)
1 by bescoto
Initial checkin
1136
        return "%s%s%s" % (tarinfo.getheader(), name,
596 by Kenneth Loafman
418170 [PATCH] file names longer then 512 symbols are not supported
1137
                           "\0" * ((BLOCKSIZE - residual) * (residual > 0)))
1 by bescoto
Initial checkin
1138
1139
    def _proc_sparse(self, tarinfo):
1140
        """Analyze a GNU sparse header plus extra headers.
1141
        """
1142
        buf = tarinfo.getheader()
1143
        sp = _ringbuffer()
1144
        pos = 386
1145
        lastpos = 0l
1146
        realpos = 0l
1147
        try:
1148
            # There are 4 possible sparse structs in the
1149
            # first header.
1150
            for i in range(4):
1151
                offset = int(buf[pos:pos + 12], 8)
1152
                numbytes = int(buf[pos + 12:pos + 24], 8)
1153
                if offset > lastpos:
1154
                    sp.append(_hole(lastpos, offset - lastpos))
1155
                sp.append(_data(offset, numbytes, realpos))
1156
                realpos += numbytes
1157
                lastpos = offset + numbytes
1158
                pos += 24
1159
1160
            isextended = ord(buf[482])
1161
            origsize = int(buf[483:495], 8)
1162
1163
            # If the isextended flag is given,
1164
            # there are extra headers to process.
1165
            while isextended == 1:
1166
                buf = self.fileobj.read(BLOCKSIZE)
1167
                self.offset += BLOCKSIZE
1168
                pos = 0
1169
                for i in range(21):
1170
                    offset = int(buf[pos:pos + 12], 8)
1171
                    numbytes = int(buf[pos + 12:pos + 24], 8)
1172
                    if offset > lastpos:
1173
                        sp.append(_hole(lastpos, offset - lastpos))
1174
                    sp.append(_data(offset, numbytes, realpos))
1175
                    realpos += numbytes
1176
                    lastpos = offset + numbytes
1177
                    pos += 24
1178
                isextended = ord(buf[504])
1179
        except ValueError:
1180
            pass
1181
        if lastpos < origsize:
1182
            sp.append(_hole(lastpos, origsize - lastpos))
1183
1184
        tarinfo.sparse = sp
1185
        return origsize
1186
1187
    def _dbg(self, level, msg):
1188
        if level <= self.debug:
1189
            sys.stdout.write(msg)
1190
# class TarFile
1191
1192
class TarIter:
1193
    """Iterator Class.
1194
1195
       for tarinfo in TarFile(...):
1196
           suite...
1197
    """
1198
1199
    def __init__(self, tarfile):
1200
        """Construct a TarIter instance.
1201
        """
1202
        self.tarfile = tarfile
1203
    def __iter__(self):
1204
        """Return iterator object.
1205
        """
1206
        return self
1207
    def next(self):
1208
        """Return the next item using TarFile's next() method.
1209
           When all members have been read, set TarFile as _loaded.
1210
        """
1211
        tarinfo = self.tarfile.next()
1212
        if not tarinfo:
1213
            self.tarfile._loaded = 1
1214
            raise StopIteration
1215
        return tarinfo
1216
# class TarIter
1217
1218
# Helper classes for sparse file support
1219
class _section:
1220
    """Base class for _data and _hole.
1221
    """
1222
    def __init__(self, offset, size):
1223
        self.offset = offset
1224
        self.size = size
1225
    def __contains__(self, offset):
1226
        return self.offset <= offset < self.offset + self.size
1227
1228
class _data(_section):
1229
    """Represent a data section in a sparse file.
1230
    """
1231
    def __init__(self, offset, size, realpos):
1232
        _section.__init__(self, offset, size)
1233
        self.realpos = realpos
1234
1235
class _hole(_section):
1236
    """Represent a hole section in a sparse file.
1237
    """
1238
    pass
1239
1240
class _ringbuffer(list):
1241
    """Ringbuffer class which increases performance
1242
       over a regular list.
1243
    """
1244
    def __init__(self):
1245
        self.idx = 0
1246
    def find(self, offset):
1247
        idx = self.idx
1248
        while 1:
1249
            item = self[idx]
1250
            if offset in item:
1251
                break
1252
            idx += 1
1253
            if idx == len(self):
1254
                idx = 0
1255
            if idx == self.idx:
1256
                # End of File
1257
                return None
1258
        self.idx = idx
1259
        return item
1260
1261
class _FileObject:
1262
    """File-like object for reading an archive member,
1263
       is returned by TarFile.extractfile().
1264
       Support for sparse files included.
1265
    """
1266
1267
    def __init__(self, tarfile, tarinfo):
1268
        self.tarfile = tarfile
1269
        self.fileobj = tarfile.fileobj
1270
        self.name    = tarinfo.name
1271
        self.mode    = "r"
1272
        self.closed  = 0
1273
        self.offset  = tarinfo.offset_data
1274
        self.size    = tarinfo.size
1275
        self.pos     = 0l
1276
        self.linebuffer = ""
1277
        if tarinfo.issparse():
1278
            self.sparse = tarinfo.sparse
1279
            self.read = self._readsparse
1280
        else:
1281
            self.read = self._readnormal
1282
1283
    def readline(self, size=-1):
1284
        """Read a line with approx. size.
1285
           If size is negative, read a whole line.
1286
           readline() and read() must not be mixed up (!).
1287
        """
1288
        if size < 0:
1289
            size = sys.maxint
1290
1291
        nl = self.linebuffer.find("\n")
1292
        if nl >= 0:
1293
            nl = min(nl, size)
1294
        else:
1295
            size -= len(self.linebuffer)
1296
            while nl < 0:
1297
                buf = self.read(min(size, 100))
1298
                if not buf:
1299
                    break
1300
                self.linebuffer += buf
1301
                size -= len(buf)
1302
                if size <= 0:
1303
                    break
1304
                nl = self.linebuffer.find("\n")
1305
            if nl == -1:
1306
                s = self.linebuffer
1307
                self.linebuffer = ""
1308
                return s
1309
        buf = self.linebuffer[:nl]
1310
        self.linebuffer = self.linebuffer[nl + 1:]
1311
        while buf[-1:] == "\r":
1312
            buf = buf[:-1]
1313
        return buf + "\n"
1314
1315
    def readlines(self):
1316
        """Return a list with all (following) lines.
1317
        """
1318
        result = []
1319
        while 1:
1320
            line = self.readline()
1321
            if not line: break
1322
            result.append(line)
1323
        return result
1324
1325
    def _readnormal(self, size=None):
1326
        """Read operation for regular files.
1327
        """
1328
        if self.closed:
1329
            raise ValueError, "I/O operation on closed file"
1330
        #self.fileobj.seek(self.offset + self.pos)
1331
        bytesleft = self.size - self.pos
1332
        if size is None:
1333
            bytestoread = bytesleft
1334
        else:
1335
            bytestoread = min(size, bytesleft)
1336
        self.pos += bytestoread
1337
        self.tarfile.offset += bytestoread
1338
        return self.fileobj.read(bytestoread)
1339
1340
    def _readsparse(self, size=None):
1341
        """Read operation for sparse files.
1342
        """
1343
        if self.closed:
1344
            raise ValueError, "I/O operation on closed file"
1345
1346
        if size is None:
1347
            size = self.size - self.pos
1348
1349
        data = ""
1350
        while size > 0:
1351
            buf = self._readsparsesection(size)
1352
            if not buf:
1353
                break
1354
            size -= len(buf)
1355
            data += buf
1356
        return data
1357
1358
    def _readsparsesection(self, size):
1359
        """Read a single section of a sparse file.
1360
        """
1361
        section = self.sparse.find(self.pos)
1362
1363
        if section is None:
1364
            return ""
1365
1366
        toread = min(size, section.offset + section.size - self.pos)
1367
        if isinstance(section, _data):
1368
            realpos = section.realpos + self.pos - section.offset
1369
            self.pos += toread
1370
            self.fileobj.seek(self.offset + realpos)
1371
            return self.fileobj.read(toread)
1372
        else:
1373
            self.pos += toread
1374
            return "\0" * toread
1375
1376
    def tell(self):
1377
        """Return the current file position.
1378
        """
1379
        return self.pos
1380
1381
    def seek(self, pos, whence=0):
1382
        """Seek to a position in the file.
1383
        """
1384
        self.linebuffer = ""
1385
        if whence == 0:
1386
            self.pos = min(max(pos, 0), self.size)
1387
        if whence == 1:
1388
            if pos < 0:
1389
                self.pos = max(self.pos + pos, 0)
1390
            else:
1391
                self.pos = min(self.pos + pos, self.size)
1392
        if whence == 2:
1393
            self.pos = max(min(self.size + pos, self.size), 0)
1394
1395
    def close(self):
1396
        """Close the file object.
1397
        """
1398
        self.closed = 1
1399
#class _FileObject
1400
1401
#---------------------------------------------
1402
# zipfile compatible TarFile class
1403
#
1404
# for details consult zipfile's documentation
1405
#---------------------------------------------
1406
import cStringIO
1407
1408
TAR_PLAIN = 0           # zipfile.ZIP_STORED
1409
TAR_GZIPPED = 8         # zipfile.ZIP_DEFLATED
1410
class TarFileCompat:
1411
    """TarFile class compatible with standard module zipfile's
1412
       ZipFile class.
1413
    """
1414
    def __init__(self, file, mode="r", compression=TAR_PLAIN):
1415
        if compression == TAR_PLAIN:
1416
            self.tarfile = open(file, mode)
1417
        elif compression == TAR_GZIPPED:
1418
            self.tarfile = gzopen(file, mode)
1419
        else:
1420
            raise ValueError, "unknown compression constant"
1421
        if mode[0:1] == "r":
1422
            import time
1423
            members = self.tarfile.getmembers()
1424
            for i in range(len(members)):
1425
                m = members[i]
1426
                m.filename = m.name
1427
                m.file_size = m.size
1428
                m.date_time = time.gmtime(m.mtime)[:6]
1429
    def namelist(self):
1430
        return map(lambda m: m.name, self.infolist())
1431
    def infolist(self):
1432
        return filter(lambda m: m.type in REGULAR_TYPES,
1433
                      self.tarfile.getmembers())
1434
    def printdir(self):
1435
        self.tarfile.list()
1436
    def testzip(self):
1437
        return
1438
    def getinfo(self, name):
1439
        return self.tarfile.getmember(name)
1440
    def read(self, name):
1441
        return self.tarfile.extractfile(self.tarfile.getmember(name)).read()
1442
    def write(self, filename, arcname=None, compress_type=None):
1443
        self.tarfile.add(filename, arcname)
1444
    def writestr(self, zinfo, bytes):
1445
        import calendar
1446
        zinfo.name = zinfo.filename
1447
        zinfo.size = zinfo.file_size
1448
        zinfo.mtime = calendar.timegm(zinfo.date_time)
1449
        self.tarfile.addfile(zinfo, cStringIO.StringIO(bytes))
1450
    def close(self):
1451
        self.tarfile.close()
1452
#class TarFileCompat
1453
1454
if __name__ == "__main__":
1455
    # a "light-weight" implementation of GNUtar ;-)
1456
    usage = """
1457
Usage: %s [options] [files]
1458
1459
-h      display this help message
1460
-c      create a tarfile
1461
-r      append to an existing archive
1462
-x      extract archive
1463
-t      list archive contents
1464
-f FILENAME
1465
        use archive FILENAME, else STDOUT (-c)
1466
-z      filter archive through gzip
1467
-C DIRNAME
1468
        with opt -x:     extract to directory DIRNAME
1469
        with opt -c, -r: put files to archive under DIRNAME
1470
-v      verbose output
1471
-q      quiet
1472
1473
wildcards *, ?, [seq], [!seq] are accepted.
1474
    """ % sys.argv[0]
1475
1476
    import getopt, glob
1477
    try:
1478
        opts, args = getopt.getopt(sys.argv[1:], "htcrzxf:C:qv")
1479
    except getopt.GetoptError, e:
1480
        print
1481
        print "ERROR:", e
1482
        print usage
1483
        sys.exit(0)
1484
1485
    file = None
1486
    mode = None
1487
    dir = None
1488
    comp = 0
1489
    debug = 0
1490
    for o, a in opts:
1491
        if o == "-t": mode = "l"        # list archive
1492
        if o == "-c": mode = "w"        # write to archive
1493
        if o == "-r": mode = "a"        # append to archive
1494
        if o == "-x": mode = "r"        # extract from archive
1495
        if o == "-f": file = a          # specify filename else use stdout
1496
        if o == "-C": dir = a           # change to dir
1497
        if o == "-z": comp = 1          # filter through gzip
1498
        if o == "-v": debug = 2         # verbose mode
1499
        if o == "-q": debug = 0         # quiet mode
1500
        if o == "-h":                   # help message
1501
            print usage
1502
            sys.exit(0)
1503
1504
    if not mode:
1505
        print usage
1506
        sys.exit(0)
1507
1508
    if comp:
1509
        func = gzopen
1510
    else:
1511
        func = open
1512
1513
    if not file or file == "-":
1514
        if mode != "w":
1515
            print usage
1516
            sys.exit(0)
1517
        debug = 0
1518
        # If under Win32, set stdout to binary.
1519
        try:
1520
            import msvcrt
1521
            msvcrt.setmode(1, os.O_BINARY)
1522
        except ImportError:
1523
            pass
1524
        tarfile = func("sys.stdout.tar", mode, 9, sys.stdout)
1525
    else:
1526
        if mode == "l":
1527
            tarfile = func(file, "r")
1528
        else:
1529
            tarfile = func(file, mode)
1530
1531
    tarfile.debug = debug
1532
1533
    if mode == "r":
1534
        if dir is None:
1535
            dir = ""
1536
        for tarinfo in tarfile:
1537
            tarfile.extract(tarinfo, dir)
1538
    elif mode == "l":
1539
        tarfile.list(debug)
1540
    else:
1541
        for arg in args:
1542
            files = glob.glob(arg)
1543
            for f in files:
1544
                tarfile.add(f, dir)
1545
    tarfile.close()
1546
1547
1548
class TarFromIterator(TarFile):
1549
    """Readable tarfile-like object generated from iterator
1550
    """
1551
    # These various status numbers indicate what we are in the process
1552
    # of doing in the tarfile.
1553
    BEGIN = 0 # next step is to read tarinfo, write new header
1554
    MIDDLE_OF_FILE = 1 # in process of writing file data
1555
    END = 2 # end of data
1556
1557
    # Buffer is added to in multiples of following
1558
    BUFFER_ADDLEN = 64 * 1024
1559
1560
    def __init__(self, pair_iter):
1561
        """Construct a TarFromIterator instance.  pair_iter is an
1562
        iterator of (TarInfo, fileobj) objects, which fileobj should
1563
        be a file-like object opened for reading, or None.  The
1564
        fileobjs will be closed before the next element in the
1565
        iterator is read.
1566
        """
1567
        self.closed = None
1568
        self.name = None
1569
        self.mode = "rb"
1570
        self.pair_iter = pair_iter
1571
1572
        self.init_datastructures()
1573
        self.status = self.BEGIN
1574
        self.cur_tarinfo, self.cur_fileobj = None, None
1575
        self.cur_pos_in_fileobj = 0
1576
        self.buffer = ""
1577
        # holds current position as seen by reading client.  This is
1578
        # distinct from self.offset.
1579
        self.tar_iter_offset = 0
1580
1581
    def seek(self, offset):
1582
        """Seek to current position.  Just read and discard some amount"""
1583
        if offset < self.tar_iter_offset:
1584
            raise TarError("Seeks in TarFromIterator must go forwards,\n"
1585
                           "Instead asking for %s from %s" %
1586
                           (offset, self.tar_iter_offset))
1587
        while offset - self.tar_iter_offset >= self.BUFFER_ADDLEN:
1588
            buf = self.read(self.BUFFER_ADDLEN)
1589
            if not buf: return # eof
1590
        self.read(offset - self.tar_iter_offset)
1591
1592
    def read(self, length = -1):
1593
        """Return next length bytes, or everything if length < 0"""
1594
        if length < 0:
1595
            while 1:
1596
                if not self._addtobuffer(): break
1597
            result = self.buffer
1598
            self.buffer = ""
1599
        else:
1600
            while len(self.buffer) < length:
1601
                if not self._addtobuffer(): break
1602
            # It's possible that length > len(self.buffer)
1603
            result = self.buffer[:length]
1604
            self.buffer = self.buffer[length:]
1605
        self.tar_iter_offset += len(result)
1606
        return result
1607
        
1608
    def _addtobuffer(self):
1609
        """Write more data into the buffer.  Return None if at end"""
1610
        if self.status == self.BEGIN:
1611
            # Just write headers into buffer
1612
            try: self.cur_tarinfo, self.cur_fileobj = self.pair_iter.next()
1613
            except StopIteration:
1614
                self._add_final()
1615
                self.status = self.END
1616
                return None
1617
1618
            # Zero out tarinfo sizes for various file types
1619
            if self.cur_tarinfo.type in (LNKTYPE, SYMTYPE,
1620
                                         FIFOTYPE, CHRTYPE, BLKTYPE):
1621
                self.cur_tarinfo.size = 0l
1622
1623
            full_headers = self._get_full_headers(self.cur_tarinfo)
1624
            self.buffer += full_headers
1625
            self.offset += len(full_headers)
1626
            assert len(full_headers) % BLOCKSIZE == 0
1627
1628
            if self.cur_fileobj is None: # no data with header
1629
                self.status = self.BEGIN
1630
                self._finish_fileobj()
1631
            else:
1632
                self.status = self.MIDDLE_OF_FILE
1633
                self.cur_pos_in_fileobj = 0
1634
            return 1
1635
        elif self.status == self.MIDDLE_OF_FILE:
1636
            # Add next chunk of self.cur_fileobj to self.buffer
1637
            l = min(self.BUFFER_ADDLEN,
1638
                    self.cur_tarinfo.size - self.cur_pos_in_fileobj)
1639
            s = self.cur_fileobj.read(l)
1640
            self.cur_pos_in_fileobj += len(s)
1641
            if len(s) == 0:
1642
                if l != 0: raise IOError, "end of file reached"
1643
                blocks, remainder = divmod(self.cur_tarinfo.size, BLOCKSIZE)
1644
                if remainder > 0:
1645
                    self.buffer += "\0" * (BLOCKSIZE - remainder)
1646
                    blocks += 1
1647
                self.cur_fileobj.close()
1648
                self.offset += blocks * BLOCKSIZE
1649
                self._finish_fileobj()
1650
                self.status = self.BEGIN
1651
            else: self.buffer += s
1652
            return 1
1653
        elif self.status == self.END: return None
1654
        assert 0
1655
1656
    def _finish_fileobj(self):
1657
        """Update some variables when done writing fileobj"""
1658
        return # Skip saving tarinfo information to save memory
1659
        self.members.append(self.cur_tarinfo)
1660
        self.membernames.append(self.cur_tarinfo.name)
1661
        self.chunks.append(self.offset)
1662
1663
    def _add_final(self):
1664
        """Add closing footer to buffer"""
1665
        blocks, remainder = divmod(self.offset, RECORDSIZE)
1666
        if remainder > 0: self.buffer += "\0" * (RECORDSIZE - remainder)
1667
1668
    def close(self):
1669
        """Close file obj"""
1670
        assert not self.closed
1671
        self.closed = 1
54 by bescoto
Cache pwd and group files
1672
1673
1674
def uid2uname(uid):
1675
    """Return uname of uid, or raise KeyError if none"""
1676
    if uid_dict is None: set_pwd_dict()
1677
    return uid_dict[uid]
1678
1679
def uname2uid(uname):
1680
    """Return uid of given uname, or raise KeyError if none"""
1681
    if uname_dict is None: set_pwd_dict()
1682
    return uname_dict[uname]
1683
1684
def set_pwd_dict():
1685
    """Set global pwd caching dictionaries uid_dict and uname_dict"""
1686
    global uid_dict, uname_dict
1687
    assert uid_dict is None and uname_dict is None and pwd
1688
    uid_dict = {}; uname_dict = {}
1689
    for entry in pwd.getpwall():
1690
        uname = entry[0]; uid = entry[2]
1691
        uid_dict[uid] = uname
1692
        uname_dict[uname] = uid
1693
1694
def gid2gname(gid):
1695
    """Return group name of gid, or raise KeyError if none"""
1696
    if gid_dict is None: set_grp_dict()
1697
    return gid_dict[gid]
1698
1699
def gname2gid(gname):
1700
    """Return gid of given group name, or raise KeyError if none"""
1701
    if gname_dict is None: set_grp_dict()
1702
    return gname_dict[gname]
1703
1704
def set_grp_dict():
1705
    global gid_dict, gname_dict
1706
    assert gid_dict is None and gname_dict is None and grp
1707
    gid_dict = {}; gname_dict = {}
1708
    for entry in grp.getgrall():
1709
        gname = entry[0]; gid = entry[2]
1710
        gid_dict[gid] = gname
1711
        gname_dict[gname] = gid