~mterry/duplicity/gdrive

« back to all changes in this revision

Viewing changes to duplicity/collections.py

  • Committer: bescoto
  • Date: 2002-10-29 01:49:46 UTC
  • Revision ID: vcs-imports@canonical.com-20021029014946-3m4rmm5plom7pl6q
Initial checkin

Show diffs side-by-side

added added

removed removed

Lines of Context:
 
1
# Copyright 2002 Ben Escoto
 
2
#
 
3
# This file is part of duplicity.
 
4
#
 
5
# duplicity is free software; you can redistribute it and/or modify it
 
6
# under the terms of the GNU General Public License as published by
 
7
# the Free Software Foundation, Inc., 675 Mass Ave, Cambridge MA
 
8
# 02139, USA; either version 2 of the License, or (at your option) any
 
9
# later version; incorporated herein by reference.
 
10
 
 
11
"""Classes and functions on collections of backup volumes"""
 
12
 
 
13
import gzip
 
14
import log, file_naming, path, dup_time, globals, manifest
 
15
 
 
16
class CollectionsError(Exception): pass
 
17
 
 
18
class BackupSet:
 
19
        """Backup set - the backup information produced by one session"""
 
20
        def __init__(self, backend):
 
21
                """Initialize new backup set, only backend is required at first"""
 
22
                self.backend = backend
 
23
                self.info_set = None
 
24
                self.volume_name_dict = {} # dict from volume number to filename
 
25
                self.remote_manifest_name = None
 
26
                self.local_manifest_path = None
 
27
                self.time = None # will be set if is full backup set
 
28
                self.start_time, self.end_time = None, None # will be set if inc
 
29
 
 
30
        def is_complete(self):
 
31
                """Assume complete if found manifest file"""
 
32
                return self.remote_manifest_name
 
33
 
 
34
        def add_filename(self, filename):
 
35
                """Add a filename to given set.  Return true if it fits.
 
36
 
 
37
                The filename will match the given set if it has the right
 
38
                times and is of the right type.  The information will be set
 
39
                from the first filename given.
 
40
 
 
41
                """
 
42
                pr = file_naming.parse(filename)
 
43
                if not pr or not (pr.type == "full" or pr.type == "inc"): return None
 
44
 
 
45
                if not self.info_set: self.set_info(pr)
 
46
                else:
 
47
                        if pr.type != self.type: return None
 
48
                        if pr.time != self.time: return None
 
49
                        if (pr.start_time != self.start_time or
 
50
                                pr.end_time != self.end_time): return None
 
51
 
 
52
                if pr.manifest: self.set_manifest(filename)
 
53
                else:
 
54
                        assert pr.volume_number is not None
 
55
                        assert not self.volume_name_dict.has_key(pr.volume_number), \
 
56
                                   (self.volume_name_dict, filename)
 
57
                        self.volume_name_dict[pr.volume_number] = filename
 
58
                return 1
 
59
 
 
60
        def set_info(self, pr):
 
61
                """Set BackupSet information from ParseResults object"""
 
62
                assert not self.info_set
 
63
                self.type = pr.type
 
64
                self.time = pr.time
 
65
                self.start_time, self.end_time = pr.start_time, pr.end_time
 
66
                self.time = pr.time
 
67
                self.info_set = 1
 
68
 
 
69
        def set_manifest(self, remote_filename):
 
70
                """Add local and remote manifest filenames to backup set"""
 
71
                assert not self.remote_manifest_name, (self.remote_manifest_name,
 
72
                                                                                           remote_filename)
 
73
                self.remote_manifest_name = remote_filename
 
74
 
 
75
                if not globals.archive_dir: return
 
76
                for local_filename in globals.archive_dir.listdir():
 
77
                        pr = file_naming.parse(local_filename)
 
78
                        if (pr and pr.manifest and pr.type == self.type and
 
79
                                pr.time == self.time and pr.start_time == self.start_time
 
80
                                and pr.end_time == self.end_time):
 
81
                                self.local_manifest_path = \
 
82
                                                          globals.archive_dir.append(local_filename)
 
83
                                break
 
84
 
 
85
        def delete(self):
 
86
                """Remove all files in set"""
 
87
                self.backend.delete(self.remote_manfest_name)
 
88
                for filename in self.volume_name_dict: self.backend.delete(filename)
 
89
 
 
90
        def __str__(self):
 
91
                """For now just list files in set"""
 
92
                filelist = []
 
93
                if self.remote_manifest_name:
 
94
                        filelist.append(self.remote_manifest_name)
 
95
                filelist.extend(self.volume_name_dict.values())
 
96
                return "\n".join(filelist)
 
97
 
 
98
        def get_timestr(self):
 
99
                """Return time string suitable for log statements"""
 
100
                return dup_time.timetopretty(self.time or self.end_time)
 
101
 
 
102
        def check_manifests(self):
 
103
                """Make sure remote manifest is equal to local one"""
 
104
                if not self.remote_manifest_name and not self.local_manifest_path:
 
105
                        log.FatalError("Fatal Error: "
 
106
                                                   "No manifests found for most recent backup")
 
107
                assert self.remote_manifest_name, "if only one, should be remote"
 
108
                remote_manifest = self.get_remote_manifest()
 
109
                if self.local_manifest_path:
 
110
                        local_manifest = self.get_local_manifest()
 
111
                        if remote_manifest != local_manifest:
 
112
                                log.FatalError(
 
113
"""Fatal Error: Remote manifest does not match local one.  Either the
 
114
remote backup set or the local archive directory has been corrupted.""")
 
115
 
 
116
                remote_manifest.check_dirinfo()
 
117
 
 
118
        def get_local_manifest(self):
 
119
                """Return manifest object by reading local manifest file"""
 
120
                assert self.local_manifest_path
 
121
                manifest_buffer = self.local_manifest_path.get_data()
 
122
                return manifest.Manifest().from_string(manifest_buffer)
 
123
 
 
124
        def get_remote_manifest(self):
 
125
                """Return manifest by reading remote manifest on backend"""
 
126
                assert self.remote_manifest_name
 
127
                manifest_buffer = self.backend.get_data(self.remote_manifest_name)
 
128
                return manifest.Manifest().from_string(manifest_buffer)
 
129
 
 
130
        def get_manifest(self):
 
131
                """Return manifest object, showing preference for local copy"""
 
132
                if self.local_manifest_path: return self.get_local_manifest()
 
133
                else: return self.get_remote_manifest()
 
134
 
 
135
 
 
136
class BackupChain:
 
137
        """BackupChain - a number of linked BackupSets
 
138
 
 
139
        A BackupChain always starts with a full backup set and continues
 
140
        with incremental ones.
 
141
 
 
142
        """
 
143
        def __init__(self, backend):
 
144
                """Initialize new chain, only backend is required at first"""
 
145
                self.backend = backend
 
146
                self.fullset = None
 
147
                self.incset_list = [] # sorted list of BackupSets
 
148
                self.start_time, self.end_time = None, None
 
149
 
 
150
        def set_full(self, fullset):
 
151
                """Add full backup set"""
 
152
                assert not self.fullset and isinstance(fullset, BackupSet)
 
153
                self.fullset = fullset
 
154
                assert fullset.time
 
155
                self.start_time, self.end_time = fullset.time, fullset.time
 
156
 
 
157
        def add_inc(self, incset):
 
158
                """Add incset to self.  Return None if incset does not match"""
 
159
                if self.end_time == incset.start_time:
 
160
                        self.incset_list.append(incset)
 
161
                        self.end_time = incset.end_time
 
162
                        assert self.end_time
 
163
                        return 1
 
164
                else: return None
 
165
 
 
166
        def delete(self):
 
167
                """Delete all sets in chain, in reverse order"""
 
168
                for i in range(len(self.incset_list)-1, -1, -1):
 
169
                        self.incset_list[i].delete()
 
170
 
 
171
        def get_sets_at_time(self, time):
 
172
                """Return a list of sets in chain earlier or equal to time"""
 
173
                older_incsets = filter(lambda s: s.end_time <= time, self.incset_list)
 
174
                return [self.fullset] + older_incsets
 
175
 
 
176
        def get_last(self):
 
177
                """Return last BackupSet in chain"""
 
178
                if self.incset_list: return self.incset_list[-1]
 
179
                else: return self.fullset
 
180
 
 
181
 
 
182
class SignatureChain:
 
183
        """A number of linked signatures
 
184
 
 
185
        Analog to BackupChain - start with a full-sig, and continue with
 
186
        new-sigs.
 
187
 
 
188
        """
 
189
        def __init__(self, local, location):
 
190
                """Return new SignatureChain.
 
191
 
 
192
                local should be true iff the signature chain resides in
 
193
                globals.archive_dir and false if the chain is in
 
194
                globals.backend.
 
195
 
 
196
                """
 
197
                if local: self.archive_dir, self.backend = location, None
 
198
                else: self.archive_dir, self.backend = None, location
 
199
                self.fullsig = None # filename of full signature
 
200
                self.inclist = [] # list of filenames of incremental signatures
 
201
                self.start_time, self.end_time = None, None
 
202
 
 
203
        def islocal(self):
 
204
                """Return true if represents a signature chain in archive_dir"""
 
205
                return self.archive_dir
 
206
 
 
207
        def add_filename(self, filename, pr = None):
 
208
                """Add new sig filename to current chain.  Return true if fits"""
 
209
                if not pr: pr = file_naming.parse(filename)
 
210
                if not pr: return None
 
211
 
 
212
                if self.fullsig:
 
213
                        if pr.type != "new-sig": return None
 
214
                        if pr.start_time != self.end_time: return None
 
215
                        self.inclist.append(filename)
 
216
                        self.end_time = pr.end_time
 
217
                        return 1
 
218
                else:
 
219
                        if pr.type != "full-sig": return None
 
220
                        self.fullsig = filename
 
221
                        self.start_time, self.end_time = pr.time, pr.time
 
222
                        return 1
 
223
                
 
224
        def get_fileobjs(self):
 
225
                """Return ordered list of signature fileobjs opened for reading"""
 
226
                assert self.fullsig
 
227
                if self.archive_dir: # local
 
228
                        def filename_to_fileobj(filename):
 
229
                                """Open filename in archive_dir, return filtered fileobj"""
 
230
                                sig_dp = path.DupPath(self.archive_dir.name, (filename,))
 
231
                                return sig_dp.filtered_open("rb")
 
232
                else: filename_to_fileobj = self.backend.get_fileobj_read
 
233
                return map(filename_to_fileobj, [self.fullsig] + self.inclist)
 
234
 
 
235
        def delete(self):
 
236
                """Remove all files in signature set"""
 
237
                # Try to delete in opposite order, so something useful even if aborted
 
238
                if self.archive_dir:
 
239
                        for i in range(len(self.inclist)-1, -1, -1):
 
240
                                self.inclist[i].delete()
 
241
                        self.fullsig.delete()
 
242
                else:
 
243
                        assert self.backend
 
244
                        inclist_copy = self.inclist[:]
 
245
                        inclist_copy.reverse()
 
246
                        inclist_copy.append(self.fullsig)
 
247
                        self.backend.delete(inclist_copy)
 
248
 
 
249
 
 
250
class CollectionsStatus:
 
251
        """Hold information about available chains and sets"""
 
252
        def __init__(self, backend, archive_dir = None):
 
253
                """Make new object.  Does not set values"""
 
254
                self.backend, self.archive_dir = backend, archive_dir
 
255
 
 
256
                # Will hold (signature chain, backup chain) pair of active
 
257
                # (most recent) chains
 
258
                self.matched_chain_pair = None
 
259
 
 
260
                # These should be sorted by end_time
 
261
                self.all_backup_chains = None
 
262
                self.other_backup_chains = None
 
263
                self.other_sig_chains = None
 
264
 
 
265
                # Other misc paths and sets which shouldn't be there
 
266
                self.orphaned_sig_names = None
 
267
                self.orphaned_backup_sets = None
 
268
                self.incomplete_backup_sets = None
 
269
 
 
270
                # True if set_values() below has run
 
271
                self.values_set = None
 
272
 
 
273
        def __str__(self):
 
274
                """Return string version, for testing purposes"""
 
275
                l = ["Backend: %s" % (self.backend,),
 
276
                         "Archive dir: %s" % (self.archive_dir,),
 
277
                         "Matched pair: %s" % (self.matched_chain_pair,),
 
278
                         "All backup chains: %s" % (self.all_backup_chains,),
 
279
                         "Other backup chains: %s" % (self.other_backup_chains,),
 
280
                         "Other sig chains: %s" % (self.other_sig_chains,),
 
281
                         "Orphaned sig names: %s" % (self.orphaned_sig_names,),
 
282
                         "Orphaned backup sets: %s" % (self.orphaned_backup_sets,),
 
283
                         "Incomplete backup sets: %s" % (self.incomplete_backup_sets,)]
 
284
                return "\n".join(l)
 
285
 
 
286
        def set_values(self, sig_chain_warning = 1):
 
287
                """Set values from archive_dir and backend.
 
288
 
 
289
                if archive_dir is None, omit any local chains.  Returs self
 
290
                for convenience.  If sig_chain_warning is set to None, do not
 
291
                warn about unnecessary sig chains.  This is because there may
 
292
                naturally be some unecessary ones after a full backup.
 
293
 
 
294
                """
 
295
                self.values_set = 1
 
296
                backend_filename_list = self.backend.list()
 
297
 
 
298
                backup_chains, self.orphaned_backup_sets, self.incomplete_backup_set=\
 
299
                                           self.get_backup_chains(backend_filename_list)
 
300
                backup_chains = self.get_sorted_chains(backup_chains)
 
301
                self.all_backup_chains = backup_chains
 
302
 
 
303
                if self.archive_dir:
 
304
                        local_sig_chains, local_orphaned_sig_names = \
 
305
                                                          self.get_signature_chains(local = 1)
 
306
                else: local_sig_chains, local_orphaned_sig_names = [], []
 
307
                remote_sig_chains, remote_orphaned_sig_names = \
 
308
                                                   self.get_signature_chains(0, backend_filename_list)
 
309
                self.orphaned_sig_names = (local_orphaned_sig_names +
 
310
                                                                   remote_orphaned_sig_names)
 
311
                self.set_matched_chain_pair(local_sig_chains + remote_sig_chains,
 
312
                                                                        backup_chains)
 
313
                self.warn(sig_chain_warning)
 
314
                return self
 
315
 
 
316
        def set_matched_chain_pair(self, sig_chains, backup_chains):
 
317
                """Set self.matched_chain_pair and self.other_sig/backup_chains
 
318
 
 
319
                The latest matched_chain_pair will be set.  If there are both
 
320
                remote and local signature chains capable of matching the
 
321
                latest backup chain, use the local sig chain (it does not need
 
322
                to be downloaded).
 
323
 
 
324
                """
 
325
                if sig_chains and backup_chains:
 
326
                        latest_backup_chain = backup_chains[-1]
 
327
                        sig_chains = self.get_sorted_chains(sig_chains)
 
328
                        for i in range(len(sig_chains)-1, -1, -1):
 
329
                                if sig_chains[i].end_time == latest_backup_chain.end_time:
 
330
                                        self.matched_chain_pair = (sig_chains[i],
 
331
                                                                                           backup_chains[-1])
 
332
                                        del sig_chains[i]
 
333
                                        break
 
334
                        
 
335
                self.other_sig_chains = sig_chains
 
336
                self.other_backup_chains = backup_chains
 
337
 
 
338
        def warn(self, sig_chain_warning):
 
339
                """Log various error messages if find incomplete/orphaned files"""
 
340
                assert self.values_set
 
341
                if self.orphaned_sig_names:
 
342
                        log.Log("Warning, found the following orphaned signature files:\n"
 
343
                                        + "\n".join(self.orphaned_sig_names), 2)
 
344
                if self.other_sig_chains and sig_chain_warning:
 
345
                        if self.matched_chain_pair:
 
346
                                log.Log("Warning, found unnecessary signature chain(s)", 2)
 
347
                        else: log.FatalError("Found signatures but no corresponding "
 
348
                                                                 "backup files")
 
349
 
 
350
                if self.incomplete_backup_sets:
 
351
                        log.Log("Warning, found incomplete backup sets, probably left "
 
352
                                        "from aborted session", 2)
 
353
                if self.orphaned_backup_sets:
 
354
                        log.Log("Warning, found the following orphaned backup files:\n"
 
355
                                        + "\n".join(map(lambda x: str(x), orphaned_sets)), 2)
 
356
 
 
357
        def get_backup_chains(self, filename_list):
 
358
                """Split given filename_list into chains
 
359
 
 
360
                Return value will be pair (list of chains, list of sets, list
 
361
                of incomplete sets), where the list of sets will comprise sets
 
362
                not fitting into any chain, and the incomplete sets are sets
 
363
                missing files.
 
364
 
 
365
                """
 
366
                # First put filenames in set form
 
367
                sets = []
 
368
                def add_to_sets(filename):
 
369
                        """Try adding filename to existing sets, or make new one"""
 
370
                        for set in sets:
 
371
                                if set.add_filename(filename): break
 
372
                        else:
 
373
                                new_set = BackupSet(self.backend)
 
374
                                if new_set.add_filename(filename): sets.append(new_set)
 
375
                                else: log.Log("Ignoring file '%s'" % filename, 9)
 
376
                map(add_to_sets, filename_list)
 
377
                sets, incomplete_sets = self.get_sorted_sets(sets)
 
378
 
 
379
                chains, orphaned_sets = [], []
 
380
                def add_to_chains(set):
 
381
                        """Try adding set to existing chains, or make new one"""
 
382
                        if set.type == "full":
 
383
                                new_chain = BackupChain(self.backend)
 
384
                                new_chain.set_full(set)
 
385
                                chains.append(new_chain)
 
386
                        else:
 
387
                                assert set.type == "inc"
 
388
                                for chain in chains:
 
389
                                        if chain.add_inc(set): break
 
390
                                else: orphaned_sets.append(set)
 
391
                map(add_to_chains, sets)
 
392
                return (chains, orphaned_sets, incomplete_sets)
 
393
 
 
394
        def get_sorted_sets(self, set_list):
 
395
                """Sort set list by end time, return (sorted list, incomplete)"""
 
396
                time_set_pairs, incomplete_sets = [], []
 
397
                for set in set_list:
 
398
                        if not set.is_complete: incomplete_sets.append(set)
 
399
                        elif set.type == "full": time_set_pairs.append((set.time, set))
 
400
                        else: time_set_pairs.append((set.end_time, set))
 
401
                time_set_pairs.sort()
 
402
                return (map(lambda p: p[1], time_set_pairs), incomplete_sets)
 
403
 
 
404
        def get_signature_chains(self, local, filelist = None):
 
405
                """Find chains in archive_dir (if local is true) or backend
 
406
 
 
407
                Use filelist if given, otherwise regenerate.  Return value is
 
408
                pair (list of chains, list of signature paths not in any
 
409
                chains).
 
410
 
 
411
                """
 
412
                def get_filelist():
 
413
                        if filelist is not None: return filelist
 
414
                        elif local: return self.archive_dir.listdir()
 
415
                        else: return self.backend.list()
 
416
 
 
417
                def get_new_sigchain():
 
418
                        """Return new empty signature chain"""
 
419
                        if local: return SignatureChain(1, self.archive_dir)
 
420
                        else: return SignatureChain(None, self.backend)
 
421
 
 
422
                # Build initial chains from full sig filenames
 
423
                chains, new_sig_filenames = [], []
 
424
                for filename in get_filelist():
 
425
                        pr = file_naming.parse(filename)
 
426
                        if pr:
 
427
                                if pr.type == "full-sig":
 
428
                                        new_chain = get_new_sigchain()
 
429
                                        assert new_chain.add_filename(filename, pr)
 
430
                                        chains.append(new_chain)
 
431
                                elif pr.type == "new-sig": new_sig_filenames.append(filename)
 
432
 
 
433
                # Try adding new signatures to existing chains
 
434
                orphaned_filenames = []
 
435
                new_sig_filenames.sort()
 
436
                for sig_filename in new_sig_filenames:
 
437
                        for chain in chains:
 
438
                                if chain.add_filename(sig_filename): break
 
439
                        else: orphaned_filenames.append(sig_filename)
 
440
                return (chains, orphaned_filenames)
 
441
 
 
442
        def get_sorted_chains(self, chain_list):
 
443
                """Return chains sorted by end_time.  If tie, local goes last"""
 
444
                # Build dictionary from end_times to lists of corresponding chains
 
445
                endtime_chain_dict = {}
 
446
                for chain in chain_list:
 
447
                        if endtime_chain_dict.has_key(chain.end_time):
 
448
                                endtime_chain_dict[chain.end_time].append(chain)
 
449
                        else: endtime_chain_dict[chain.end_time] = [chain]
 
450
                
 
451
                # Use dictionary to build final sorted list
 
452
                sorted_end_times = endtime_chain_dict.keys()
 
453
                sorted_end_times.sort()
 
454
                sorted_chain_list = []
 
455
                for end_time in sorted_end_times:
 
456
                        chain_list = endtime_chain_dict[end_time]
 
457
                        if len(chain_list) == 1: sorted_chain_list.append(chain_list[0])
 
458
                        else:
 
459
                                assert len(chain_list) == 2
 
460
                                if chain_list[0].backend: # is remote, goes first
 
461
                                        assert chain_list[1].archive_dir # other is local
 
462
                                        sorted_chain_list.append(chain_list[0])
 
463
                                        sorted_chain_list.append(chain_list[1])
 
464
                                else: # is local, goes second
 
465
                                        assert chain_list[1].backend # other is remote
 
466
                                        sorted_chain_list.append(chain_list[1])
 
467
                                        sorted_chain_list.append(chain_list[0])
 
468
 
 
469
                return sorted_chain_list
 
470
 
 
471
        def get_backup_chain_at_time(self, time):
 
472
                """Return backup chain covering specified time
 
473
 
 
474
                Tries to find the backup chain covering the given time.  If
 
475
                there is none, return the earliest chain before, and failing
 
476
                that, the earliest chain.
 
477
 
 
478
                """
 
479
                if not self.all_backup_chains:
 
480
                        raise CollectionsError("No backup chains found")
 
481
 
 
482
                covering_chains = filter(lambda c: c.start_time <= time <= c.end_time,
 
483
                                                                 self.all_backup_chains)
 
484
                if len(covering_chains) > 1:
 
485
                        raise CollectionsError("Two chains cover the given time")
 
486
                elif len(covering_chains) == 1: return covering_chains[0]
 
487
 
 
488
                old_chains = filter(lambda c: c.end_time < time,
 
489
                                                        self.all_backup_chains)
 
490
                if old_chains: return old_chains[-1]
 
491
                else: return self.all_backup_chains[0] # no chains are old enough
 
492
 
 
493
        def cleanup_signatures(self):
 
494
                """Delete unnecessary older signatures"""
 
495
                map(SignatureChain.delete, self.other_sig_chains)