1
# Copyright 2002 Ben Escoto
3
# This file is part of duplicity.
5
# duplicity is free software; you can redistribute it and/or modify it
6
# under the terms of the GNU General Public License as published by
7
# the Free Software Foundation, Inc., 675 Mass Ave, Cambridge MA
8
# 02139, USA; either version 2 of the License, or (at your option) any
9
# later version; incorporated herein by reference.
11
"""Classes and functions on collections of backup volumes"""
14
import log, file_naming, path, dup_time, globals, manifest
16
class CollectionsError(Exception): pass
19
"""Backup set - the backup information produced by one session"""
20
def __init__(self, backend):
21
"""Initialize new backup set, only backend is required at first"""
22
self.backend = backend
24
self.volume_name_dict = {} # dict from volume number to filename
25
self.remote_manifest_name = None
26
self.local_manifest_path = None
27
self.time = None # will be set if is full backup set
28
self.start_time, self.end_time = None, None # will be set if inc
30
def is_complete(self):
31
"""Assume complete if found manifest file"""
32
return self.remote_manifest_name
34
def add_filename(self, filename):
35
"""Add a filename to given set. Return true if it fits.
37
The filename will match the given set if it has the right
38
times and is of the right type. The information will be set
39
from the first filename given.
42
pr = file_naming.parse(filename)
43
if not pr or not (pr.type == "full" or pr.type == "inc"): return None
45
if not self.info_set: self.set_info(pr)
47
if pr.type != self.type: return None
48
if pr.time != self.time: return None
49
if (pr.start_time != self.start_time or
50
pr.end_time != self.end_time): return None
52
if pr.manifest: self.set_manifest(filename)
54
assert pr.volume_number is not None
55
assert not self.volume_name_dict.has_key(pr.volume_number), \
56
(self.volume_name_dict, filename)
57
self.volume_name_dict[pr.volume_number] = filename
60
def set_info(self, pr):
61
"""Set BackupSet information from ParseResults object"""
62
assert not self.info_set
65
self.start_time, self.end_time = pr.start_time, pr.end_time
69
def set_manifest(self, remote_filename):
70
"""Add local and remote manifest filenames to backup set"""
71
assert not self.remote_manifest_name, (self.remote_manifest_name,
73
self.remote_manifest_name = remote_filename
75
if not globals.archive_dir: return
76
for local_filename in globals.archive_dir.listdir():
77
pr = file_naming.parse(local_filename)
78
if (pr and pr.manifest and pr.type == self.type and
79
pr.time == self.time and pr.start_time == self.start_time
80
and pr.end_time == self.end_time):
81
self.local_manifest_path = \
82
globals.archive_dir.append(local_filename)
86
"""Remove all files in set"""
87
self.backend.delete(self.remote_manfest_name)
88
for filename in self.volume_name_dict: self.backend.delete(filename)
91
"""For now just list files in set"""
93
if self.remote_manifest_name:
94
filelist.append(self.remote_manifest_name)
95
filelist.extend(self.volume_name_dict.values())
96
return "\n".join(filelist)
98
def get_timestr(self):
99
"""Return time string suitable for log statements"""
100
return dup_time.timetopretty(self.time or self.end_time)
102
def check_manifests(self):
103
"""Make sure remote manifest is equal to local one"""
104
if not self.remote_manifest_name and not self.local_manifest_path:
105
log.FatalError("Fatal Error: "
106
"No manifests found for most recent backup")
107
assert self.remote_manifest_name, "if only one, should be remote"
108
remote_manifest = self.get_remote_manifest()
109
if self.local_manifest_path:
110
local_manifest = self.get_local_manifest()
111
if remote_manifest != local_manifest:
113
"""Fatal Error: Remote manifest does not match local one. Either the
114
remote backup set or the local archive directory has been corrupted.""")
116
remote_manifest.check_dirinfo()
118
def get_local_manifest(self):
119
"""Return manifest object by reading local manifest file"""
120
assert self.local_manifest_path
121
manifest_buffer = self.local_manifest_path.get_data()
122
return manifest.Manifest().from_string(manifest_buffer)
124
def get_remote_manifest(self):
125
"""Return manifest by reading remote manifest on backend"""
126
assert self.remote_manifest_name
127
manifest_buffer = self.backend.get_data(self.remote_manifest_name)
128
return manifest.Manifest().from_string(manifest_buffer)
130
def get_manifest(self):
131
"""Return manifest object, showing preference for local copy"""
132
if self.local_manifest_path: return self.get_local_manifest()
133
else: return self.get_remote_manifest()
137
"""BackupChain - a number of linked BackupSets
139
A BackupChain always starts with a full backup set and continues
140
with incremental ones.
143
def __init__(self, backend):
144
"""Initialize new chain, only backend is required at first"""
145
self.backend = backend
147
self.incset_list = [] # sorted list of BackupSets
148
self.start_time, self.end_time = None, None
150
def set_full(self, fullset):
151
"""Add full backup set"""
152
assert not self.fullset and isinstance(fullset, BackupSet)
153
self.fullset = fullset
155
self.start_time, self.end_time = fullset.time, fullset.time
157
def add_inc(self, incset):
158
"""Add incset to self. Return None if incset does not match"""
159
if self.end_time == incset.start_time:
160
self.incset_list.append(incset)
161
self.end_time = incset.end_time
167
"""Delete all sets in chain, in reverse order"""
168
for i in range(len(self.incset_list)-1, -1, -1):
169
self.incset_list[i].delete()
171
def get_sets_at_time(self, time):
172
"""Return a list of sets in chain earlier or equal to time"""
173
older_incsets = filter(lambda s: s.end_time <= time, self.incset_list)
174
return [self.fullset] + older_incsets
177
"""Return last BackupSet in chain"""
178
if self.incset_list: return self.incset_list[-1]
179
else: return self.fullset
182
class SignatureChain:
183
"""A number of linked signatures
185
Analog to BackupChain - start with a full-sig, and continue with
189
def __init__(self, local, location):
190
"""Return new SignatureChain.
192
local should be true iff the signature chain resides in
193
globals.archive_dir and false if the chain is in
197
if local: self.archive_dir, self.backend = location, None
198
else: self.archive_dir, self.backend = None, location
199
self.fullsig = None # filename of full signature
200
self.inclist = [] # list of filenames of incremental signatures
201
self.start_time, self.end_time = None, None
204
"""Return true if represents a signature chain in archive_dir"""
205
return self.archive_dir
207
def add_filename(self, filename, pr = None):
208
"""Add new sig filename to current chain. Return true if fits"""
209
if not pr: pr = file_naming.parse(filename)
210
if not pr: return None
213
if pr.type != "new-sig": return None
214
if pr.start_time != self.end_time: return None
215
self.inclist.append(filename)
216
self.end_time = pr.end_time
219
if pr.type != "full-sig": return None
220
self.fullsig = filename
221
self.start_time, self.end_time = pr.time, pr.time
224
def get_fileobjs(self):
225
"""Return ordered list of signature fileobjs opened for reading"""
227
if self.archive_dir: # local
228
def filename_to_fileobj(filename):
229
"""Open filename in archive_dir, return filtered fileobj"""
230
sig_dp = path.DupPath(self.archive_dir.name, (filename,))
231
return sig_dp.filtered_open("rb")
232
else: filename_to_fileobj = self.backend.get_fileobj_read
233
return map(filename_to_fileobj, [self.fullsig] + self.inclist)
236
"""Remove all files in signature set"""
237
# Try to delete in opposite order, so something useful even if aborted
239
for i in range(len(self.inclist)-1, -1, -1):
240
self.inclist[i].delete()
241
self.fullsig.delete()
244
inclist_copy = self.inclist[:]
245
inclist_copy.reverse()
246
inclist_copy.append(self.fullsig)
247
self.backend.delete(inclist_copy)
250
class CollectionsStatus:
251
"""Hold information about available chains and sets"""
252
def __init__(self, backend, archive_dir = None):
253
"""Make new object. Does not set values"""
254
self.backend, self.archive_dir = backend, archive_dir
256
# Will hold (signature chain, backup chain) pair of active
257
# (most recent) chains
258
self.matched_chain_pair = None
260
# These should be sorted by end_time
261
self.all_backup_chains = None
262
self.other_backup_chains = None
263
self.other_sig_chains = None
265
# Other misc paths and sets which shouldn't be there
266
self.orphaned_sig_names = None
267
self.orphaned_backup_sets = None
268
self.incomplete_backup_sets = None
270
# True if set_values() below has run
271
self.values_set = None
274
"""Return string version, for testing purposes"""
275
l = ["Backend: %s" % (self.backend,),
276
"Archive dir: %s" % (self.archive_dir,),
277
"Matched pair: %s" % (self.matched_chain_pair,),
278
"All backup chains: %s" % (self.all_backup_chains,),
279
"Other backup chains: %s" % (self.other_backup_chains,),
280
"Other sig chains: %s" % (self.other_sig_chains,),
281
"Orphaned sig names: %s" % (self.orphaned_sig_names,),
282
"Orphaned backup sets: %s" % (self.orphaned_backup_sets,),
283
"Incomplete backup sets: %s" % (self.incomplete_backup_sets,)]
286
def set_values(self, sig_chain_warning = 1):
287
"""Set values from archive_dir and backend.
289
if archive_dir is None, omit any local chains. Returs self
290
for convenience. If sig_chain_warning is set to None, do not
291
warn about unnecessary sig chains. This is because there may
292
naturally be some unecessary ones after a full backup.
296
backend_filename_list = self.backend.list()
298
backup_chains, self.orphaned_backup_sets, self.incomplete_backup_set=\
299
self.get_backup_chains(backend_filename_list)
300
backup_chains = self.get_sorted_chains(backup_chains)
301
self.all_backup_chains = backup_chains
304
local_sig_chains, local_orphaned_sig_names = \
305
self.get_signature_chains(local = 1)
306
else: local_sig_chains, local_orphaned_sig_names = [], []
307
remote_sig_chains, remote_orphaned_sig_names = \
308
self.get_signature_chains(0, backend_filename_list)
309
self.orphaned_sig_names = (local_orphaned_sig_names +
310
remote_orphaned_sig_names)
311
self.set_matched_chain_pair(local_sig_chains + remote_sig_chains,
313
self.warn(sig_chain_warning)
316
def set_matched_chain_pair(self, sig_chains, backup_chains):
317
"""Set self.matched_chain_pair and self.other_sig/backup_chains
319
The latest matched_chain_pair will be set. If there are both
320
remote and local signature chains capable of matching the
321
latest backup chain, use the local sig chain (it does not need
325
if sig_chains and backup_chains:
326
latest_backup_chain = backup_chains[-1]
327
sig_chains = self.get_sorted_chains(sig_chains)
328
for i in range(len(sig_chains)-1, -1, -1):
329
if sig_chains[i].end_time == latest_backup_chain.end_time:
330
self.matched_chain_pair = (sig_chains[i],
335
self.other_sig_chains = sig_chains
336
self.other_backup_chains = backup_chains
338
def warn(self, sig_chain_warning):
339
"""Log various error messages if find incomplete/orphaned files"""
340
assert self.values_set
341
if self.orphaned_sig_names:
342
log.Log("Warning, found the following orphaned signature files:\n"
343
+ "\n".join(self.orphaned_sig_names), 2)
344
if self.other_sig_chains and sig_chain_warning:
345
if self.matched_chain_pair:
346
log.Log("Warning, found unnecessary signature chain(s)", 2)
347
else: log.FatalError("Found signatures but no corresponding "
350
if self.incomplete_backup_sets:
351
log.Log("Warning, found incomplete backup sets, probably left "
352
"from aborted session", 2)
353
if self.orphaned_backup_sets:
354
log.Log("Warning, found the following orphaned backup files:\n"
355
+ "\n".join(map(lambda x: str(x), orphaned_sets)), 2)
357
def get_backup_chains(self, filename_list):
358
"""Split given filename_list into chains
360
Return value will be pair (list of chains, list of sets, list
361
of incomplete sets), where the list of sets will comprise sets
362
not fitting into any chain, and the incomplete sets are sets
366
# First put filenames in set form
368
def add_to_sets(filename):
369
"""Try adding filename to existing sets, or make new one"""
371
if set.add_filename(filename): break
373
new_set = BackupSet(self.backend)
374
if new_set.add_filename(filename): sets.append(new_set)
375
else: log.Log("Ignoring file '%s'" % filename, 9)
376
map(add_to_sets, filename_list)
377
sets, incomplete_sets = self.get_sorted_sets(sets)
379
chains, orphaned_sets = [], []
380
def add_to_chains(set):
381
"""Try adding set to existing chains, or make new one"""
382
if set.type == "full":
383
new_chain = BackupChain(self.backend)
384
new_chain.set_full(set)
385
chains.append(new_chain)
387
assert set.type == "inc"
389
if chain.add_inc(set): break
390
else: orphaned_sets.append(set)
391
map(add_to_chains, sets)
392
return (chains, orphaned_sets, incomplete_sets)
394
def get_sorted_sets(self, set_list):
395
"""Sort set list by end time, return (sorted list, incomplete)"""
396
time_set_pairs, incomplete_sets = [], []
398
if not set.is_complete: incomplete_sets.append(set)
399
elif set.type == "full": time_set_pairs.append((set.time, set))
400
else: time_set_pairs.append((set.end_time, set))
401
time_set_pairs.sort()
402
return (map(lambda p: p[1], time_set_pairs), incomplete_sets)
404
def get_signature_chains(self, local, filelist = None):
405
"""Find chains in archive_dir (if local is true) or backend
407
Use filelist if given, otherwise regenerate. Return value is
408
pair (list of chains, list of signature paths not in any
413
if filelist is not None: return filelist
414
elif local: return self.archive_dir.listdir()
415
else: return self.backend.list()
417
def get_new_sigchain():
418
"""Return new empty signature chain"""
419
if local: return SignatureChain(1, self.archive_dir)
420
else: return SignatureChain(None, self.backend)
422
# Build initial chains from full sig filenames
423
chains, new_sig_filenames = [], []
424
for filename in get_filelist():
425
pr = file_naming.parse(filename)
427
if pr.type == "full-sig":
428
new_chain = get_new_sigchain()
429
assert new_chain.add_filename(filename, pr)
430
chains.append(new_chain)
431
elif pr.type == "new-sig": new_sig_filenames.append(filename)
433
# Try adding new signatures to existing chains
434
orphaned_filenames = []
435
new_sig_filenames.sort()
436
for sig_filename in new_sig_filenames:
438
if chain.add_filename(sig_filename): break
439
else: orphaned_filenames.append(sig_filename)
440
return (chains, orphaned_filenames)
442
def get_sorted_chains(self, chain_list):
443
"""Return chains sorted by end_time. If tie, local goes last"""
444
# Build dictionary from end_times to lists of corresponding chains
445
endtime_chain_dict = {}
446
for chain in chain_list:
447
if endtime_chain_dict.has_key(chain.end_time):
448
endtime_chain_dict[chain.end_time].append(chain)
449
else: endtime_chain_dict[chain.end_time] = [chain]
451
# Use dictionary to build final sorted list
452
sorted_end_times = endtime_chain_dict.keys()
453
sorted_end_times.sort()
454
sorted_chain_list = []
455
for end_time in sorted_end_times:
456
chain_list = endtime_chain_dict[end_time]
457
if len(chain_list) == 1: sorted_chain_list.append(chain_list[0])
459
assert len(chain_list) == 2
460
if chain_list[0].backend: # is remote, goes first
461
assert chain_list[1].archive_dir # other is local
462
sorted_chain_list.append(chain_list[0])
463
sorted_chain_list.append(chain_list[1])
464
else: # is local, goes second
465
assert chain_list[1].backend # other is remote
466
sorted_chain_list.append(chain_list[1])
467
sorted_chain_list.append(chain_list[0])
469
return sorted_chain_list
471
def get_backup_chain_at_time(self, time):
472
"""Return backup chain covering specified time
474
Tries to find the backup chain covering the given time. If
475
there is none, return the earliest chain before, and failing
476
that, the earliest chain.
479
if not self.all_backup_chains:
480
raise CollectionsError("No backup chains found")
482
covering_chains = filter(lambda c: c.start_time <= time <= c.end_time,
483
self.all_backup_chains)
484
if len(covering_chains) > 1:
485
raise CollectionsError("Two chains cover the given time")
486
elif len(covering_chains) == 1: return covering_chains[0]
488
old_chains = filter(lambda c: c.end_time < time,
489
self.all_backup_chains)
490
if old_chains: return old_chains[-1]
491
else: return self.all_backup_chains[0] # no chains are old enough
493
def cleanup_signatures(self):
494
"""Delete unnecessary older signatures"""
495
map(SignatureChain.delete, self.other_sig_chains)