1
# -*- Mode:Python; indent-tabs-mode:nil; tab-width:4 -*-
3
# Copyright 2002 Ben Escoto <ben@emerose.org>
4
# Copyright 2007 Kenneth Loafman <kenneth@loafman.com>
6
# This file is part of duplicity.
8
# Duplicity is free software; you can redistribute it and/or modify it
9
# under the terms of the GNU General Public License as published by the
10
# Free Software Foundation; either version 2 of the License, or (at your
11
# option) any later version.
13
# Duplicity is distributed in the hope that it will be useful, but
14
# WITHOUT ANY WARRANTY; without even the implied warranty of
15
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16
# General Public License for more details.
18
# You should have received a copy of the GNU General Public License
19
# along with duplicity; if not, write to the Free Software Foundation,
20
# Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
22
import re #@UnusedImport
27
from duplicity import tarfile #@UnusedImport
28
from duplicity import librsync #@UnusedImport
29
from duplicity import log #@UnusedImport
30
from duplicity import diffdir
31
from duplicity import misc
32
from duplicity import selection
33
from duplicity import util #@UnusedImport
34
from duplicity.path import * #@UnusedWildImport
35
from duplicity.lazy import * #@UnusedWildImport
37
"""Functions for patching of directories"""
39
class PatchDirException( Exception ):
43
def Patch( base_path, difftar_fileobj ):
44
"""Patch given base_path and file object containing delta"""
45
diff_tarfile = tarfile.TarFile( "arbitrary", "r", difftar_fileobj )
46
patch_diff_tarfile( base_path, diff_tarfile )
47
assert not difftar_fileobj.close()
49
def Patch_from_iter( base_path, fileobj_iter, restrict_index=() ):
50
"""Patch given base_path and iterator of delta file objects"""
51
diff_tarfile = TarFile_FromFileobjs( fileobj_iter )
52
patch_diff_tarfile( base_path, diff_tarfile, restrict_index )
54
def patch_diff_tarfile( base_path, diff_tarfile, restrict_index=() ):
55
"""Patch given Path object using delta tarfile (as in tarfile.TarFile)
57
If restrict_index is set, ignore any deltas in diff_tarfile that
58
don't start with restrict_index.
61
if base_path.exists():
62
path_iter = selection.Select( base_path ).set_iter()
64
path_iter = empty_iter() # probably untarring full backup
66
diff_path_iter = difftar2path_iter( diff_tarfile )
68
diff_path_iter = filter_path_iter( diff_path_iter, restrict_index )
69
collated = diffdir.collate2iters( path_iter, diff_path_iter )
71
ITR = IterTreeReducer( PathPatcher, [base_path] )
72
for basis_path, diff_ropath in collated:
74
log.Info( _( "Patching %s" ) % ( basis_path.get_relative_path(), ),
75
log.InfoCode.patch_file_patching,
76
util.escape( basis_path.get_relative_path() ) )
77
ITR( basis_path.index, basis_path, diff_ropath )
79
log.Info( _( "Patching %s" ) % ( diff_ropath.get_relative_path(), ),
80
log.InfoCode.patch_file_patching,
81
util.escape( diff_ropath.get_relative_path() ) )
82
ITR( diff_ropath.index, basis_path, diff_ropath )
88
yield 1 # this never happens, but fools into generator treatment
90
def filter_path_iter( path_iter, index ):
91
"""Rewrite path elements of path_iter so they start with index
93
Discard any that doesn't start with index, and remove the index
97
assert isinstance( index, tuple ) and index, index
99
for path in path_iter:
100
if path.index[:l] == index:
101
path.index = path.index[l:]
104
def difftar2path_iter( diff_tarfile ):
105
"""Turn file-like difftarobj into iterator of ROPaths"""
106
tar_iter = iter( diff_tarfile )
107
multivol_fileobj = None
109
# The next tar_info is stored in this one element list so
110
# Multivol_Filelike below can update it. Any StopIterations will
112
tarinfo_list = [tar_iter.next()]
115
# This section relevant when a multivol diff is last in tar
116
if not tarinfo_list[0]:
118
if multivol_fileobj and not multivol_fileobj.at_end:
119
multivol_fileobj.close() # aborting in middle of multivol
122
index, difftype, multivol = get_index_from_tarinfo( tarinfo_list[0] )
123
ropath = ROPath( index )
124
ropath.init_from_tarinfo( tarinfo_list[0] )
125
ropath.difftype = difftype
126
if difftype == "deleted":
130
multivol_fileobj = Multivol_Filelike( diff_tarfile, tar_iter,
131
tarinfo_list, index )
132
ropath.setfileobj( multivol_fileobj )
134
continue # Multivol_Filelike will reset tarinfo_list
136
ropath.setfileobj( diff_tarfile.extractfile( tarinfo_list[0] ) )
138
tarinfo_list[0] = tar_iter.next()
140
def get_index_from_tarinfo( tarinfo ):
141
"""Return (index, difftype, multivol) pair from tarinfo object"""
142
for prefix in ["snapshot/", "diff/", "deleted/",
143
"multivol_diff/", "multivol_snapshot/"]:
144
tiname = util.get_tarinfo_name( tarinfo )
145
if tiname.startswith( prefix ):
146
name = tiname[len( prefix ):] # strip prefix
147
if prefix.startswith( "multivol" ):
148
if prefix == "multivol_diff/":
151
difftype = "snapshot"
154
re.subn( "(?s)^multivol_(diff|snapshot)/?(.*)/[0-9]+$",
157
raise PatchDirException( "Unrecognized diff entry %s" %
160
difftype = prefix[:-1] # strip trailing /
161
name = tiname[len( prefix ):]
162
if name.endswith( "/" ):
163
name = name[:-1] # strip trailing /'s
167
raise PatchDirException( "Unrecognized diff entry %s" %
169
if name == "." or name == "":
172
index = tuple( name.split( "/" ) )
174
raise PatchDirException( "Tar entry %s contains '..'. Security "
175
"violation" % ( tiname, ) )
176
return ( index, difftype, multivol )
179
class Multivol_Filelike:
180
"""Emulate a file like object from multivols
182
Maintains a buffer about the size of a volume. When it is read()
183
to the end, pull in more volumes as desired.
186
def __init__( self, tf, tar_iter, tarinfo_list, index ):
187
"""Initializer. tf is TarFile obj, tarinfo is first tarinfo"""
188
self.tf, self.tar_iter = tf, tar_iter
189
self.tarinfo_list = tarinfo_list # must store as list for write access
194
def read( self, length= -1 ):
195
"""Read length bytes from file"""
197
while self.addtobuffer():
199
real_len = len( self.buffer )
201
while len( self.buffer ) < length:
202
if not self.addtobuffer():
204
real_len = min( len( self.buffer ), length )
206
result = self.buffer[:real_len]
207
self.buffer = self.buffer[real_len:]
210
def addtobuffer( self ):
211
"""Add next chunk to buffer"""
214
index, difftype, multivol = get_index_from_tarinfo( #@UnusedVariable
215
self.tarinfo_list[0] )
216
if not multivol or index != self.index:
218
# the following communicates next tarinfo to difftar2path_iter
222
fp = self.tf.extractfile( self.tarinfo_list[0] )
223
self.buffer += fp.read()
227
self.tarinfo_list[0] = self.tar_iter.next()
228
except StopIteration:
229
self.tarinfo_list[0] = None
235
"""If not at end, read remaining data"""
239
if not self.addtobuffer():
244
class PathPatcher( ITRBranch ):
245
"""Used by DirPatch, process the given basis and diff"""
246
def __init__( self, base_path ):
247
"""Set base_path, Path of root of tree"""
248
self.base_path = base_path
249
self.dir_diff_ropath = None
251
def start_process( self, index, basis_path, diff_ropath ):
252
"""Start processing when diff_ropath is a directory"""
253
if not ( diff_ropath and diff_ropath.isdir() ):
254
assert index == (), str( index ) # should only happen for first elem
255
self.fast_process( index, basis_path, diff_ropath )
259
basis_path = self.base_path.new_index( index )
260
assert not basis_path.exists()
261
basis_path.mkdir() # Need place for later files to go into
262
elif not basis_path.isdir():
265
self.dir_basis_path = basis_path
266
self.dir_diff_ropath = diff_ropath
268
def end_process( self ):
269
"""Copy directory permissions when leaving tree"""
270
if self.dir_diff_ropath:
271
self.dir_diff_ropath.copy_attribs( self.dir_basis_path )
273
def can_fast_process( self, index, basis_path, diff_ropath ):
274
"""No need to recurse if diff_ropath isn't a directory"""
275
return not ( diff_ropath and diff_ropath.isdir() )
277
def fast_process( self, index, basis_path, diff_ropath ):
278
"""For use when neither is a directory"""
282
if diff_ropath.difftype == "deleted":
283
pass # already deleted
285
# just copy snapshot over
286
diff_ropath.copy( self.base_path.new_index( index ) )
287
elif diff_ropath.difftype == "deleted":
288
if basis_path.isdir():
292
elif not basis_path.isreg():
293
if basis_path.isdir():
297
diff_ropath.copy( basis_path )
299
assert diff_ropath.difftype == "diff", diff_ropath.difftype
300
basis_path.patch_with_attribs( diff_ropath )
303
class TarFile_FromFileobjs:
304
"""Like a tarfile.TarFile iterator, but read from multiple fileobjs"""
305
def __init__( self, fileobj_iter ):
306
"""Make new tarinfo iterator
308
fileobj_iter should be an iterator of file objects opened for
309
reading. They will be closed at end of reading.
312
self.fileobj_iter = fileobj_iter
313
self.tarfile, self.tar_iter = None, None
314
self.current_fp = None
316
def __iter__( self ):
319
def set_tarfile( self ):
320
"""Set tarfile from next file object, or raise StopIteration"""
322
assert not self.current_fp.close()
323
self.current_fp = self.fileobj_iter.next()
324
self.tarfile = util.make_tarfile("r", self.current_fp)
325
self.tar_iter = iter( self.tarfile )
331
return self.tar_iter.next()
332
except StopIteration:
333
assert not self.tarfile.close()
335
return self.tar_iter.next()
337
def extractfile( self, tarinfo ):
338
"""Return data associated with given tarinfo"""
339
return self.tarfile.extractfile( tarinfo )
342
def collate_iters( iter_list ):
343
"""Collate iterators by index
345
Input is a list of n iterators each of which must iterate elements
346
with an index attribute. The elements must come out in increasing
347
order, and the index should be a tuple itself.
349
The output is an iterator which yields tuples where all elements
350
in the tuple have the same index, and the tuple has n elements in
351
it. If any iterator lacks an element with that index, the tuple
352
will have None in that spot.
355
# overflow[i] means that iter_list[i] has been exhausted
356
# elems[i] is None means that it is time to replenish it.
357
iter_num = len( iter_list )
359
return diffdir.collate2iters( iter_list[0], iter_list[1] )
360
overflow = [None] * iter_num
363
def setrorps( overflow, elems ):
364
"""Set the overflow and rorps list"""
365
for i in range( iter_num ):
366
if not overflow[i] and elems[i] is None:
368
elems[i] = iter_list[i].next()
369
except StopIteration:
373
def getleastindex( elems ):
374
"""Return the first index in elems, assuming elems isn't empty"""
375
return min( map( lambda elem: elem.index, filter( lambda x: x, elems ) ) )
377
def yield_tuples( iter_num, overflow, elems ):
379
setrorps( overflow, elems )
380
if not None in overflow:
383
index = getleastindex( elems )
385
for i in range( iter_num ):
386
if elems[i] and elems[i].index == index:
387
yieldval.append( elems[i] )
390
yieldval.append( None )
391
yield tuple( yieldval )
392
return yield_tuples( iter_num, overflow, elems )
395
"""Like a tuple, but has .index (used previously by collate_iters)"""
396
def __init__( self, index, sequence ):
398
self.data = tuple( sequence )
401
return len( self.data )
403
def __getitem__( self, key ):
404
"""This only works for numerical keys (easier this way)"""
405
return self.data[key]
407
def __lt__( self, other ):
408
return self.__cmp__( other ) == -1
409
def __le__( self, other ):
410
return self.__cmp__( other ) != 1
411
def __ne__( self, other ):
412
return not self.__eq__( other )
413
def __gt__( self, other ):
414
return self.__cmp__( other ) == 1
415
def __ge__( self, other ):
416
return self.__cmp__( other ) != -1
418
def __cmp__( self, other ):
419
assert isinstance( other, IndexedTuple )
420
if self.index < other.index:
422
elif self.index == other.index:
427
def __eq__( self, other ):
428
if isinstance( other, IndexedTuple ):
429
return self.index == other.index and self.data == other.data
430
elif type( other ) is types.TupleType:
431
return self.data == other
436
return "(%s).%s" % ( ", ".join( map( str, self.data ) ), self.index )
438
def normalize_ps( patch_sequence ):
439
"""Given an sequence of ROPath deltas, remove blank and unnecessary
441
The sequence is assumed to be in patch order (later patches apply
442
to earlier ones). A patch is unnecessary if a later one doesn't
443
require it (for instance, any patches before a "delete" are
448
i = len( patch_sequence ) - 1
450
delta = patch_sequence[i]
451
if delta is not None:
453
result_list.insert( 0, delta )
454
if delta.difftype != "diff":
459
def patch_seq2ropath( patch_seq ):
460
"""Apply the patches in patch_seq, return single ropath"""
462
assert first.difftype != "diff", patch_seq
463
if not first.isreg():
464
# No need to bother with data if not regular file
465
assert len( patch_seq ) == 1, len( patch_seq )
466
return first.get_ropath()
468
current_file = first.open( "rb" )
470
for delta_ropath in patch_seq[1:]:
471
assert delta_ropath.difftype == "diff", delta_ropath.difftype
472
if not isinstance( current_file, file ):
473
# librsync needs true file
474
tempfp = os.tmpfile()
475
misc.copyfileobj( current_file, tempfp )
476
assert not current_file.close()
478
current_file = tempfp
479
current_file = librsync.PatchedFile( current_file,
480
delta_ropath.open( "rb" ) )
481
result = patch_seq[-1].get_ropath()
482
result.setfileobj( current_file )
485
def integrate_patch_iters( iter_list ):
486
"""Combine a list of iterators of ropath patches
488
The iter_list should be sorted in patch order, and the elements in
489
each iter_list need to be orderd by index. The output will be an
490
iterator of the final ROPaths in index order.
493
collated = collate_iters( iter_list )
494
for patch_seq in collated:
495
final_ropath = patch_seq2ropath( normalize_ps( patch_seq ) )
496
if final_ropath.exists():
497
# otherwise final patch was delete
500
def tarfiles2rop_iter( tarfile_list, restrict_index=() ):
501
"""Integrate tarfiles of diffs into single ROPath iter
503
Then filter out all the diffs in that index which don't start with
507
diff_iters = map( difftar2path_iter, tarfile_list )
509
# Apply filter before integration
510
diff_iters = map( lambda i: filter_path_iter( i, restrict_index ),
512
return integrate_patch_iters( diff_iters )
514
def Write_ROPaths( base_path, rop_iter ):
515
"""Write out ropaths in rop_iter starting at base_path
517
Returns 1 if something was actually written, 0 otherwise.
520
ITR = IterTreeReducer( ROPath_IterWriter, [base_path] )
522
for ropath in rop_iter:
524
ITR( ropath.index, ropath )
529
class ROPath_IterWriter( ITRBranch ):
530
"""Used in Write_ROPaths above
532
We need to use an ITR because we have to update the
533
permissions/times of directories after we write the files in them.
536
def __init__( self, base_path ):
537
"""Set base_path, Path of root of tree"""
538
self.base_path = base_path
539
self.dir_diff_ropath = None
540
self.dir_new_path = None
542
def start_process( self, index, ropath ):
543
"""Write ropath. Only handles the directory case"""
544
if not ropath.isdir():
545
# Base may not be a directory, but rest should
546
assert ropath.index == (), ropath.index
547
new_path = self.base_path.new_index( index )
549
if new_path.exists():
551
ropath.copy( new_path )
553
self.dir_new_path = self.base_path.new_index( index )
554
if self.dir_new_path.exists() and not globals.force:
555
# base may exist, but nothing else
556
assert index == (), index
558
self.dir_new_path.mkdir()
559
self.dir_diff_ropath = ropath
561
def end_process( self ):
562
"""Update information of a directory when leaving it"""
563
if self.dir_diff_ropath:
564
self.dir_diff_ropath.copy_attribs( self.dir_new_path )
566
def can_fast_process( self, index, ropath ):
567
"""Can fast process (no recursion) if ropath isn't a directory"""
568
log.Info( _( "Writing %s of type %s" ) %
569
( ropath.get_relative_path(), ropath.type ),
570
log.InfoCode.patch_file_writing,
571
"%s %s" % ( util.escape( ropath.get_relative_path() ), ropath.type ) )
572
return not ropath.isdir()
574
def fast_process( self, index, ropath ):
575
"""Write non-directory ropath to destination"""
577
ropath.copy( self.base_path.new_index( index ) )