# -*- Mode:Python; indent-tabs-mode:nil; tab-width:4 -*-
#
# Copyright 2002 Ben Escoto <ben@emerose.org>
# Copyright 2007 Kenneth Loafman <kenneth@loafman.com>
#
# This file is part of duplicity.
#
# Duplicity is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 2 of the License, or (at your
# option) any later version.
#
# Duplicity is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with duplicity; if not, write to the Free Software Foundation,
# Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
from future_builtins import filter, map
import re # @UnusedImport
import types
import os
import sys
import tempfile
from duplicity import tarfile # @UnusedImport
from duplicity import librsync # @UnusedImport
from duplicity import log # @UnusedImport
from duplicity import diffdir
from duplicity import selection
from duplicity import tempdir
from duplicity import util # @UnusedImport
from duplicity.path import * # @UnusedWildImport
from duplicity.lazy import * # @UnusedWildImport
"""Functions for patching of directories"""
[docs]class PatchDirException(Exception):
pass
[docs]def Patch(base_path, difftar_fileobj):
"""Patch given base_path and file object containing delta"""
diff_tarfile = tarfile.TarFile("arbitrary", "r", difftar_fileobj)
patch_diff_tarfile(base_path, diff_tarfile)
assert not difftar_fileobj.close()
[docs]def Patch_from_iter(base_path, fileobj_iter, restrict_index=()):
"""Patch given base_path and iterator of delta file objects"""
diff_tarfile = TarFile_FromFileobjs(fileobj_iter)
patch_diff_tarfile(base_path, diff_tarfile, restrict_index)
[docs]def patch_diff_tarfile(base_path, diff_tarfile, restrict_index=()):
"""Patch given Path object using delta tarfile (as in tarfile.TarFile)
If restrict_index is set, ignore any deltas in diff_tarfile that
don't start with restrict_index.
"""
if base_path.exists():
path_iter = selection.Select(base_path).set_iter()
else:
path_iter = empty_iter() # probably untarring full backup
diff_path_iter = difftar2path_iter(diff_tarfile)
if restrict_index:
diff_path_iter = filter_path_iter(diff_path_iter, restrict_index)
collated = diffdir.collate2iters(path_iter, diff_path_iter)
ITR = IterTreeReducer(PathPatcher, [base_path])
for basis_path, diff_ropath in collated:
if basis_path:
log.Info(_("Patching %s") % (util.ufn(basis_path.get_relative_path())),
log.InfoCode.patch_file_patching,
util.escape(basis_path.get_relative_path()))
ITR(basis_path.index, basis_path, diff_ropath)
else:
log.Info(_("Patching %s") % (util.ufn(diff_ropath.get_relative_path())),
log.InfoCode.patch_file_patching,
util.escape(diff_ropath.get_relative_path()))
ITR(diff_ropath.index, basis_path, diff_ropath)
ITR.Finish()
base_path.setdata()
[docs]def empty_iter():
if 0:
yield 1 # this never happens, but fools into generator treatment
[docs]def filter_path_iter(path_iter, index):
"""Rewrite path elements of path_iter so they start with index
Discard any that doesn't start with index, and remove the index
prefix from the rest.
"""
assert isinstance(index, tuple) and index, index
l = len(index)
for path in path_iter:
if path.index[:l] == index:
path.index = path.index[l:]
yield path
[docs]def difftar2path_iter(diff_tarfile):
"""Turn file-like difftarobj into iterator of ROPaths"""
tar_iter = iter(diff_tarfile)
multivol_fileobj = None
# The next tar_info is stored in this one element list so
# Multivol_Filelike below can update it. Any StopIterations will
# be passed upwards.
tarinfo_list = [next(tar_iter)]
while 1:
# This section relevant when a multivol diff is last in tar
if not tarinfo_list[0]:
raise StopIteration
if multivol_fileobj and not multivol_fileobj.at_end:
multivol_fileobj.close() # aborting in middle of multivol
continue
index, difftype, multivol = get_index_from_tarinfo(tarinfo_list[0])
ropath = ROPath(index)
ropath.init_from_tarinfo(tarinfo_list[0])
ropath.difftype = difftype
if difftype == "deleted":
ropath.type = None
elif ropath.isreg():
if multivol:
multivol_fileobj = Multivol_Filelike(diff_tarfile, tar_iter,
tarinfo_list, index)
ropath.setfileobj(multivol_fileobj)
yield ropath
continue # Multivol_Filelike will reset tarinfo_list
else:
ropath.setfileobj(diff_tarfile.extractfile(tarinfo_list[0]))
yield ropath
tarinfo_list[0] = next(tar_iter)
[docs]def get_index_from_tarinfo(tarinfo):
"""Return (index, difftype, multivol) pair from tarinfo object"""
for prefix in ["snapshot/", "diff/", "deleted/",
"multivol_diff/", "multivol_snapshot/"]:
tiname = util.get_tarinfo_name(tarinfo)
if tiname.startswith(prefix):
name = tiname[len(prefix):] # strip prefix
if prefix.startswith("multivol"):
if prefix == "multivol_diff/":
difftype = "diff"
else:
difftype = "snapshot"
multivol = 1
name, num_subs = \
re.subn("(?s)^multivol_(diff|snapshot)/?(.*)/[0-9]+$",
"\\2", tiname)
if num_subs != 1:
raise PatchDirException(u"Unrecognized diff entry %s" %
util.ufn(tiname))
else:
difftype = prefix[:-1] # strip trailing /
name = tiname[len(prefix):]
if name.endswith("/"):
name = name[:-1] # strip trailing /'s
multivol = 0
break
else:
raise PatchDirException(u"Unrecognized diff entry %s" %
util.ufn(tiname))
if name == "." or name == "":
index = ()
else:
index = tuple(name.split("/"))
if '..' in index:
raise PatchDirException(u"Tar entry %s contains '..'. Security "
"violation" % util.ufn(tiname))
return (index, difftype, multivol)
[docs]class Multivol_Filelike:
"""Emulate a file like object from multivols
Maintains a buffer about the size of a volume. When it is read()
to the end, pull in more volumes as desired.
"""
def __init__(self, tf, tar_iter, tarinfo_list, index):
"""Initializer. tf is TarFile obj, tarinfo is first tarinfo"""
self.tf, self.tar_iter = tf, tar_iter
self.tarinfo_list = tarinfo_list # must store as list for write access
self.index = index
self.buffer = ""
self.at_end = 0
[docs] def read(self, length=-1):
"""Read length bytes from file"""
if length < 0:
while self.addtobuffer():
pass
real_len = len(self.buffer)
else:
while len(self.buffer) < length:
if not self.addtobuffer():
break
real_len = min(len(self.buffer), length)
result = self.buffer[:real_len]
self.buffer = self.buffer[real_len:]
return result
[docs] def addtobuffer(self):
"""Add next chunk to buffer"""
if self.at_end:
return None
index, difftype, multivol = get_index_from_tarinfo( # @UnusedVariable
self.tarinfo_list[0])
if not multivol or index != self.index:
# we've moved on
# the following communicates next tarinfo to difftar2path_iter
self.at_end = 1
return None
fp = self.tf.extractfile(self.tarinfo_list[0])
self.buffer += fp.read()
fp.close()
try:
self.tarinfo_list[0] = next(self.tar_iter)
except StopIteration:
self.tarinfo_list[0] = None
self.at_end = 1
return None
return 1
[docs] def close(self):
"""If not at end, read remaining data"""
if not self.at_end:
while 1:
self.buffer = ""
if not self.addtobuffer():
break
self.at_end = 1
[docs]class PathPatcher(ITRBranch):
"""Used by DirPatch, process the given basis and diff"""
def __init__(self, base_path):
"""Set base_path, Path of root of tree"""
self.base_path = base_path
self.dir_diff_ropath = None
[docs] def start_process(self, index, basis_path, diff_ropath):
"""Start processing when diff_ropath is a directory"""
if not (diff_ropath and diff_ropath.isdir()):
assert index == (), util.uindex(index) # should only happen for first elem
self.fast_process(index, basis_path, diff_ropath)
return
if not basis_path:
basis_path = self.base_path.new_index(index)
assert not basis_path.exists()
basis_path.mkdir() # Need place for later files to go into
elif not basis_path.isdir():
basis_path.delete()
basis_path.mkdir()
self.dir_basis_path = basis_path
self.dir_diff_ropath = diff_ropath
[docs] def end_process(self):
"""Copy directory permissions when leaving tree"""
if self.dir_diff_ropath:
self.dir_diff_ropath.copy_attribs(self.dir_basis_path)
[docs] def can_fast_process(self, index, basis_path, diff_ropath):
"""No need to recurse if diff_ropath isn't a directory"""
return not (diff_ropath and diff_ropath.isdir())
[docs] def fast_process(self, index, basis_path, diff_ropath):
"""For use when neither is a directory"""
if not diff_ropath:
return # no change
elif not basis_path:
if diff_ropath.difftype == "deleted":
pass # already deleted
else:
# just copy snapshot over
diff_ropath.copy(self.base_path.new_index(index))
elif diff_ropath.difftype == "deleted":
if basis_path.isdir():
basis_path.deltree()
else:
basis_path.delete()
elif not basis_path.isreg() or (basis_path.isreg() and diff_ropath.difftype == "snapshot"):
if basis_path.isdir():
basis_path.deltree()
else:
basis_path.delete()
diff_ropath.copy(basis_path)
else:
assert diff_ropath.difftype == "diff", diff_ropath.difftype
basis_path.patch_with_attribs(diff_ropath)
[docs]class TarFile_FromFileobjs:
"""Like a tarfile.TarFile iterator, but read from multiple fileobjs"""
def __init__(self, fileobj_iter):
"""Make new tarinfo iterator
fileobj_iter should be an iterator of file objects opened for
reading. They will be closed at end of reading.
"""
self.fileobj_iter = fileobj_iter
self.tarfile, self.tar_iter = None, None
self.current_fp = None
def __iter__(self):
return self
[docs] def set_tarfile(self):
"""Set tarfile from next file object, or raise StopIteration"""
if self.current_fp:
assert not self.current_fp.close()
self.current_fp = next(self.fileobj_iter)
self.tarfile = util.make_tarfile("r", self.current_fp)
self.tar_iter = iter(self.tarfile)
[docs] def next(self):
if not self.tarfile:
self.set_tarfile()
try:
return next(self.tar_iter)
except StopIteration:
assert not self.tarfile.close()
self.set_tarfile()
return next(self.tar_iter)
[docs]def collate_iters(iter_list):
"""Collate iterators by index
Input is a list of n iterators each of which must iterate elements
with an index attribute. The elements must come out in increasing
order, and the index should be a tuple itself.
The output is an iterator which yields tuples where all elements
in the tuple have the same index, and the tuple has n elements in
it. If any iterator lacks an element with that index, the tuple
will have None in that spot.
"""
# overflow[i] means that iter_list[i] has been exhausted
# elems[i] is None means that it is time to replenish it.
iter_num = len(iter_list)
if iter_num == 2:
return diffdir.collate2iters(iter_list[0], iter_list[1])
overflow = [None] * iter_num
elems = overflow[:]
def setrorps(overflow, elems):
"""Set the overflow and rorps list"""
for i in range(iter_num):
if not overflow[i] and elems[i] is None:
try:
elems[i] = next(iter_list[i])
except StopIteration:
overflow[i] = 1
elems[i] = None
def getleastindex(elems):
"""Return the first index in elems, assuming elems isn't empty"""
return min(map(lambda elem: elem.index, filter(lambda x: x, elems)))
def yield_tuples(iter_num, overflow, elems):
while 1:
setrorps(overflow, elems)
if None not in overflow:
break
index = getleastindex(elems)
yieldval = []
for i in range(iter_num):
if elems[i] and elems[i].index == index:
yieldval.append(elems[i])
elems[i] = None
else:
yieldval.append(None)
yield tuple(yieldval)
return yield_tuples(iter_num, overflow, elems)
[docs]class IndexedTuple:
"""Like a tuple, but has .index (used previously by collate_iters)"""
def __init__(self, index, sequence):
self.index = index
self.data = tuple(sequence)
def __len__(self):
return len(self.data)
def __getitem__(self, key):
"""This only works for numerical keys (easier this way)"""
return self.data[key]
def __lt__(self, other):
return self.__cmp__(other) == -1
def __le__(self, other):
return self.__cmp__(other) != 1
def __ne__(self, other):
return not self.__eq__(other)
def __gt__(self, other):
return self.__cmp__(other) == 1
def __ge__(self, other):
return self.__cmp__(other) != -1
def __cmp__(self, other):
assert isinstance(other, IndexedTuple)
if self.index < other.index:
return -1
elif self.index == other.index:
return 0
else:
return 1
def __eq__(self, other):
if isinstance(other, IndexedTuple):
return self.index == other.index and self.data == other.data
elif isinstance(other, tuple):
return self.data == other
else:
return None
def __str__(self):
return "(%s).%s" % (", ".join(map(str, self.data)), self.index)
[docs]def normalize_ps(patch_sequence):
"""Given an sequence of ROPath deltas, remove blank and unnecessary
The sequence is assumed to be in patch order (later patches apply
to earlier ones). A patch is unnecessary if a later one doesn't
require it (for instance, any patches before a "delete" are
unnecessary).
"""
result_list = []
i = len(patch_sequence) - 1
while i >= 0:
delta = patch_sequence[i]
if delta is not None:
# skip blank entries
result_list.insert(0, delta)
if delta.difftype != "diff":
break
i -= 1
return result_list
[docs]def patch_seq2ropath(patch_seq):
"""Apply the patches in patch_seq, return single ropath"""
first = patch_seq[0]
assert first.difftype != "diff", "First patch in sequence " \
"%s was a diff" % patch_seq
if not first.isreg():
# No need to bother with data if not regular file
assert len(patch_seq) == 1, "Patch sequence isn't regular, but " \
"has %d entries" % len(patch_seq)
return first.get_ropath()
current_file = first.open("rb")
for delta_ropath in patch_seq[1:]:
assert delta_ropath.difftype == "diff", delta_ropath.difftype
if not isinstance(current_file, file):
"""
librsync insists on a real file object, which we create manually
by using the duplicity.tempdir to tell us where.
See https://bugs.launchpad.net/duplicity/+bug/670891 for discussion
of os.tmpfile() vs tempfile.TemporaryFile() w.r.t. Windows / Linux.
"""
if sys.platform.startswith(('cygwin', 'windows')):
tempfp = os.tmpfile()
else:
tempfp = tempfile.TemporaryFile(dir=tempdir.default().dir())
util.copyfileobj(current_file, tempfp)
assert not current_file.close()
tempfp.seek(0)
current_file = tempfp
current_file = librsync.PatchedFile(current_file,
delta_ropath.open("rb"))
result = patch_seq[-1].get_ropath()
result.setfileobj(current_file)
return result
[docs]def integrate_patch_iters(iter_list):
"""Combine a list of iterators of ropath patches
The iter_list should be sorted in patch order, and the elements in
each iter_list need to be orderd by index. The output will be an
iterator of the final ROPaths in index order.
"""
collated = collate_iters(iter_list)
for patch_seq in collated:
normalized = normalize_ps(patch_seq)
try:
final_ropath = patch_seq2ropath(normalized)
if final_ropath.exists():
# otherwise final patch was delete
yield final_ropath
except Exception as e:
filename = normalized[-1].get_ropath().get_relative_path()
log.Warn(_("Error '%s' patching %s") %
(util.uexc(e), util.ufn(filename)),
log.WarningCode.cannot_process,
util.escape(filename))
[docs]def tarfiles2rop_iter(tarfile_list, restrict_index=()):
"""Integrate tarfiles of diffs into single ROPath iter
Then filter out all the diffs in that index which don't start with
the restrict_index.
"""
diff_iters = [difftar2path_iter(x) for x in tarfile_list]
if restrict_index:
# Apply filter before integration
diff_iters = [filter_path_iter(x, restrict_index) for x in diff_iters]
return integrate_patch_iters(diff_iters)
[docs]def Write_ROPaths(base_path, rop_iter):
"""Write out ropaths in rop_iter starting at base_path
Returns 1 if something was actually written, 0 otherwise.
"""
ITR = IterTreeReducer(ROPath_IterWriter, [base_path])
return_val = 0
for ropath in rop_iter:
return_val = 1
ITR(ropath.index, ropath)
ITR.Finish()
base_path.setdata()
return return_val
[docs]class ROPath_IterWriter(ITRBranch):
"""Used in Write_ROPaths above
We need to use an ITR because we have to update the
permissions/times of directories after we write the files in them.
"""
def __init__(self, base_path):
"""Set base_path, Path of root of tree"""
self.base_path = base_path
self.dir_diff_ropath = None
self.dir_new_path = None
[docs] def start_process(self, index, ropath):
"""Write ropath. Only handles the directory case"""
if not ropath.isdir():
# Base may not be a directory, but rest should
assert ropath.index == (), ropath.index
new_path = self.base_path.new_index(index)
if ropath.exists():
if new_path.exists():
new_path.deltree()
ropath.copy(new_path)
self.dir_new_path = self.base_path.new_index(index)
if self.dir_new_path.exists() and not globals.force:
# base may exist, but nothing else
assert index == (), index
else:
self.dir_new_path.mkdir()
self.dir_diff_ropath = ropath
[docs] def end_process(self):
"""Update information of a directory when leaving it"""
if self.dir_diff_ropath:
self.dir_diff_ropath.copy_attribs(self.dir_new_path)
[docs] def can_fast_process(self, index, ropath):
"""Can fast process (no recursion) if ropath isn't a directory"""
log.Info(_("Writing %s of type %s") %
(util.ufn(ropath.get_relative_path()), ropath.type),
log.InfoCode.patch_file_writing,
"%s %s" % (util.escape(ropath.get_relative_path()), ropath.type))
return not ropath.isdir()
[docs] def fast_process(self, index, ropath):
"""Write non-directory ropath to destination"""
if ropath.exists():
ropath.copy(self.base_path.new_index(index))