1
# Copyright 2002 Ben Escoto
3
# This file is part of duplicity.
5
# duplicity is free software; you can redistribute it and/or modify it
6
# under the terms of the GNU General Public License as published by
7
# the Free Software Foundation, Inc., 675 Mass Ave, Cambridge MA
8
# 02139, USA; either version 2 of the License, or (at your option) any
9
# later version; incorporated herein by reference.
11
"""Iterate exactly the requested files in a directory
13
Parses includes and excludes to yield correct files. More
14
documentation on what this code does can be found on the man page.
18
from __future__ import generators
21
import robust, log, globals
24
class SelectError(Exception):
25
"""Some error dealing with the Select class"""
28
class FilePrefixError(SelectError):
29
"""Signals that a specified file doesn't start with correct prefix"""
32
class GlobbingError(SelectError):
33
"""Something has gone wrong when parsing a glob string"""
38
"""Iterate appropriate Paths in given directory
40
This class acts as an iterator on account of its next() method.
41
Basically, it just goes through all the files in a directory in
42
order (depth-first) and subjects each file to a bunch of tests
43
(selection functions) in order. The first test that includes or
44
excludes the file means that the file gets included (iterated) or
45
excluded. The default is include, so with no tests we would just
46
iterate all the files in the directory in order.
48
The one complication to this is that sometimes we don't know
49
whether or not to include a directory until we examine its
50
contents. For instance, if we want to include all the **.py
51
files. If /home/ben/foo.py exists, we should also include /home
52
and /home/ben, but if these directories contain no **.py files,
53
they shouldn't be included. For this reason, a test may not
54
include or exclude a directory, but merely "scan" it. If later a
55
file in the directory gets included, so does the directory.
57
As mentioned above, each test takes the form of a selection
58
function. The selection function takes a path, and returns:
60
None - means the test has nothing to say about the related file
61
0 - the file is excluded by the test
62
1 - the file is included
63
2 - the test says the file (must be directory) should be scanned
65
Also, a selection function f has a variable f.exclude which should
66
be true iff f could potentially exclude some file. This is used
67
to signal an error if the last function only includes, which would
68
be redundant and presumably isn't what the user intends.
71
# This re should not match normal filenames, but usually just globs
72
glob_re = re.compile("(.*[*?[]|ignorecase\\:)", re.I | re.S)
74
def __init__(self, path):
75
"""Initializer, called with Path of root directory"""
76
assert isinstance(path, Path), str(path)
77
self.selection_functions = []
79
self.prefix = self.rootpath.name
82
"""Initialize generator, prepare to iterate."""
83
self.rootpath.setdata() # this may have changed since Select init
84
self.iter = self.Iterate(self.rootpath)
85
self.next = self.iter.next
86
self.__iter__ = lambda: self
89
def Iterate(self, path):
90
"""Return iterator yielding paths in path
92
This function looks a bit more complicated than it needs to be
93
because it avoids extra recursion (and no extra function calls
94
for non-directory files) while still doing the "directory
98
def error_handler(exc, path, filename):
99
log.Log("Error initializing file %s/%s" % (path.name, filename), 2)
103
"""Generate relevant files in directory path
105
Returns (path, num) where num == 0 means path should be
106
generated normally, num == 1 means the path is a directory
107
and should be included iff something inside is included.
110
for filename in robust.listpath(path):
111
new_path = robust.check_common_error(
112
error_handler, Path.append, (path, filename))
114
s = self.Select(new_path)
115
if s == 1: yield (new_path, 0)
116
elif s == 2 and new_path.isdir(): yield (new_path, 1)
118
if not path.type: # base doesn't exist
119
log.Log("Warning: base %s doesn't exist, continuing" %
122
log.Log("Selecting %s" % path.name, 7)
124
if not path.isdir(): return
125
diryield_stack = [diryield(path)]
126
delayed_path_stack = []
128
while diryield_stack:
129
try: subpath, val = diryield_stack[-1].next()
130
except StopIteration:
132
if delayed_path_stack: delayed_path_stack.pop()
135
if delayed_path_stack:
136
for delayed_path in delayed_path_stack:
137
log.Log("Selecting %s" % delayed_path.name, 7)
139
del delayed_path_stack[:]
140
log.Log("Selecting %s" % subpath.name, 7)
142
if subpath.isdir(): diryield_stack.append(diryield(subpath))
144
delayed_path_stack.append(subpath)
145
diryield_stack.append(diryield(subpath))
147
def Select(self, path):
148
"""Run through the selection functions and return dominant val 0/1/2"""
149
for sf in self.selection_functions:
151
if result is not None: return result
154
def ParseArgs(self, argtuples):
155
"""Create selection functions based on list of tuples
157
The tuples are created when the initial commandline arguments
158
are read. They have the form (option string, additional
159
argument) except for the filelist tuples, which should be
160
(option-string, (additional argument, filelist_fp)).
164
for opt, arg in argtuples:
165
if opt == "--exclude":
166
self.add_selection_func(self.glob_get_sf(arg, 0))
167
elif opt == "--exclude-device-files":
168
self.add_selection_func(self.devfiles_get_sf())
169
elif opt == "--exclude-filelist":
170
self.add_selection_func(self.filelist_get_sf(
172
elif opt == "--exclude-other-filesystems":
173
self.add_selection_func(self.other_filesystems_get_sf(0))
174
elif opt == "--exclude-regexp":
175
self.add_selection_func(self.regexp_get_sf(arg, 0))
176
elif opt == "--include":
177
self.add_selection_func(self.glob_get_sf(arg, 1))
178
elif opt == "--include-filelist":
179
self.add_selection_func(self.filelist_get_sf(
181
elif opt == "--include-regexp":
182
self.add_selection_func(self.regexp_get_sf(arg, 1))
183
else: assert 0, "Bad selection option %s" % opt
184
except SelectError, e: self.parse_catch_error(e)
185
self.parse_last_excludes()
187
def parse_catch_error(self, exc):
188
"""Deal with selection error exc"""
189
if isinstance(exc, FilePrefixError):
191
"""Fatal Error: The file specification
193
cannot match any files in the base directory
195
Useful file specifications begin with the base directory or some
196
pattern (such as '**') which matches the base directory.""" %
198
elif isinstance(e, GlobbingError):
199
log.FatalError("Fatal Error while processing expression\n"
203
def parse_last_excludes(self):
204
"""Exit with error if last selection function isn't an exclude"""
205
if (self.selection_functions and
206
not self.selection_functions[-1].exclude):
208
"""Last selection expression:
210
only specifies that files be included. Because the default is to
211
include all files, the expression is redundant. Exiting because this
212
probably isn't what you meant.""" %
213
(self.selection_functions[-1].name,))
215
def add_selection_func(self, sel_func, add_to_start = None):
216
"""Add another selection function at the end or beginning"""
217
if add_to_start: self.selection_functions.insert(0, sel_func)
218
else: self.selection_functions.append(sel_func)
220
def filelist_get_sf(self, filelist_fp, inc_default, filelist_name):
221
"""Return selection function by reading list of files
223
The format of the filelist is documented in the man page.
224
filelist_fp should be an (open) file object.
225
inc_default should be true if this is an include list,
226
false for an exclude list.
227
filelist_name is just a string used for logging.
230
log.Log("Reading filelist %s" % filelist_name, 4)
231
tuple_list, something_excluded = \
232
self.filelist_read(filelist_fp, inc_default, filelist_name)
233
log.Log("Sorting filelist %s" % filelist_name, 4)
235
i = [0] # We have to put index in list because of stupid scoping rules
237
def selection_function(path):
239
if i[0] >= len(tuple_list): return None
241
self.filelist_pair_match(path, tuple_list[i[0]])
244
if include is None: continue # later line may match
247
selection_function.exclude = something_excluded or inc_default == 0
248
selection_function.name = "Filelist: " + filelist_name
249
return selection_function
251
def filelist_read(self, filelist_fp, include, filelist_name):
252
"""Read filelist from fp, return (tuplelist, something_excluded)"""
253
prefix_warnings = [0]
254
def incr_warnings(exc):
255
"""Warn if prefix is incorrect"""
256
prefix_warnings[0] += 1
257
if prefix_warnings[0] < 6:
258
log.Log("Warning: file specification '%s' in filelist %s\n"
259
"doesn't start with correct prefix %s. Ignoring." %
260
(exc, filelist_name, self.prefix), 2)
261
if prefix_warnings[0] == 5:
262
log.Log("Future prefix errors will not be logged.", 2)
264
something_excluded, tuple_list = None, []
265
separator = globals.null_separator and "\0" or "\n"
266
for line in filelist_fp.read().split(separator):
267
if not line: continue # skip blanks
268
try: tuple = self.filelist_parse_line(line, include)
269
except FilePrefixError, exc:
272
tuple_list.append(tuple)
273
if not tuple[1]: something_excluded = 1
274
if filelist_fp.close():
275
log.Log("Error closing filelist %s" % filelist_name, 2)
276
return (tuple_list, something_excluded)
278
def filelist_parse_line(self, line, include):
279
"""Parse a single line of a filelist, returning a pair
281
pair will be of form (index, include), where index is another
282
tuple, and include is 1 if the line specifies that we are
283
including a file. The default is given as an argument.
284
prefix is the string that the index is relative to.
288
if line[:2] == "+ ": # Check for "+ "/"- " syntax
291
elif line[:2] == "- ":
295
if not line.startswith(self.prefix): raise FilePrefixError(line)
296
line = line[len(self.prefix):] # Discard prefix
297
index = tuple(filter(lambda x: x, line.split("/"))) # remove empties
298
return (index, include)
300
def filelist_pair_match(self, path, pair):
301
"""Matches a filelist tuple against a path
303
Returns a pair (include, move_on). include is None if the
304
tuple doesn't match either way, and 0/1 if the tuple excludes
305
or includes the path.
307
move_on is true if the tuple cannot match a later index, and
308
so we should move on to the next tuple in the index.
311
index, include = pair
313
if index < path.index: return (None, 1)
314
if index == path.index: return (1, 1)
315
elif index[:len(path.index)] == path.index:
316
return (1, None) # /foo/bar implicitly includes /foo
317
else: return (None, None) # path greater, not initial sequence
319
if path.index[:len(index)] == index:
320
return (0, None) # /foo implicitly excludes /foo/bar
321
elif index < path.index: return (None, 1)
322
else: return (None, None) # path greater, not initial sequence
323
else: assert 0, "Include is %s, should be 0 or 1" % (include,)
325
def other_filesystems_get_sf(self, include):
326
"""Return selection function matching files on other filesystems"""
327
assert include == 0 or include == 1
328
root_devloc = self.rootpath.getdevloc()
330
if path.exists() and path.getdevloc() != root_devloc:
333
sel_func.exclude = not include
334
sel_func.name = "Match other filesystems"
337
def regexp_get_sf(self, regexp_string, include):
338
"""Return selection function given by regexp_string"""
339
assert include == 0 or include == 1
340
try: regexp = re.compile(regexp_string)
342
log.Log("Error compiling regular expression %s" % regexp_string, 1)
346
if regexp.search(path.name): return include
349
sel_func.exclude = not include
350
sel_func.name = "Regular expression: %s" % regexp_string
353
def devfiles_get_sf(self):
354
"""Return a selection function to exclude all dev files"""
355
if self.selection_functions:
356
log.Log("Warning: exclude-device-files is not the first "
357
"selector.\nThis may not be what you intended", 3)
359
if path.isdev(): return 0
362
sel_func.name = "Exclude device files"
365
def glob_get_sf(self, glob_str, include):
366
"""Return selection function given by glob string"""
367
assert include == 0 or include == 1
368
if glob_str == "**": sel_func = lambda path: include
369
elif not self.glob_re.match(glob_str): # normal file
370
sel_func = self.glob_get_filename_sf(glob_str, include)
371
else: sel_func = self.glob_get_normal_sf(glob_str, include)
373
sel_func.exclude = not include
374
sel_func.name = "Command-line %s glob: %s" % \
375
(include and "include" or "exclude", glob_str)
378
def glob_get_filename_sf(self, filename, include):
379
"""Get a selection function given a normal filename
381
Some of the parsing is better explained in
382
filelist_parse_line. The reason this is split from normal
383
globbing is things are a lot less complicated if no special
384
globbing characters are used.
387
if not filename.startswith(self.prefix):
388
raise FilePrefixError(filename)
389
index = tuple(filter(lambda x: x,
390
filename[len(self.prefix):].split("/")))
391
return self.glob_get_tuple_sf(index, include)
393
def glob_get_tuple_sf(self, tuple, include):
394
"""Return selection function based on tuple"""
395
def include_sel_func(path):
396
if (path.index == tuple[:len(path.index)] or
397
path.index[:len(tuple)] == tuple):
398
return 1 # /foo/bar implicitly matches /foo, vice-versa
401
def exclude_sel_func(path):
402
if path.index[:len(tuple)] == tuple:
403
return 0 # /foo excludes /foo/bar, not vice-versa
406
if include == 1: sel_func = include_sel_func
407
elif include == 0: sel_func = exclude_sel_func
408
sel_func.exclude = not include
409
sel_func.name = "Tuple select %s" % (tuple,)
412
def glob_get_normal_sf(self, glob_str, include):
413
"""Return selection function based on glob_str
415
The basic idea is to turn glob_str into a regular expression,
416
and just use the normal regular expression. There is a
417
complication because the selection function should return '2'
418
(scan) for directories which may contain a file which matches
419
the glob_str. So we break up the glob string into parts, and
420
any file which matches an initial sequence of glob parts gets
423
Thanks to Donovan Baarda who provided some code which did some
424
things similar to this.
427
if glob_str.lower().startswith("ignorecase:"):
428
re_comp = lambda r: re.compile(r, re.I | re.S)
429
glob_str = glob_str[len("ignorecase:"):]
430
else: re_comp = lambda r: re.compile(r, re.S)
432
# matches what glob matches and any files in directory
433
glob_comp_re = re_comp("^%s($|/)" % self.glob_to_re(glob_str))
435
if glob_str.find("**") != -1:
436
glob_str = glob_str[:glob_str.find("**")+2] # truncate after **
438
scan_comp_re = re_comp("^(%s)$" %
439
"|".join(self.glob_get_prefix_res(glob_str)))
441
def include_sel_func(path):
442
if glob_comp_re.match(path.name): return 1
443
elif scan_comp_re.match(path.name): return 2
446
def exclude_sel_func(path):
447
if glob_comp_re.match(path.name): return 0
450
# Check to make sure prefix is ok
451
if not include_sel_func(self.rootpath): raise FilePrefixError(glob_str)
453
if include: return include_sel_func
454
else: return exclude_sel_func
456
def glob_get_prefix_res(self, glob_str):
457
"""Return list of regexps equivalent to prefixes of glob_str"""
458
glob_parts = glob_str.split("/")
459
if "" in glob_parts[1:-1]: # "" OK if comes first or last, as in /foo/
460
raise GlobbingError("Consecutive '/'s found in globbing string "
463
prefixes = map(lambda i: "/".join(glob_parts[:i+1]),
464
range(len(glob_parts)))
465
# we must make exception for root "/", only dir to end in slash
466
if prefixes[0] == "": prefixes[0] = "/"
467
return map(self.glob_to_re, prefixes)
469
def glob_to_re(self, pat):
470
"""Returned regular expression equivalent to shell glob pat
472
Currently only the ?, *, [], and ** expressions are supported.
473
Ranges like [a-z] are also currently unsupported. There is no
474
way to quote these special characters.
476
This function taken with minor modifications from efnmatch.py
480
i, n, res = 0, len(pat), ''
482
c, s = pat[i], pat[i:i+2]
487
elif c == '*': res = res + '[^/]*'
488
elif c == '?': res = res + '[^/]'
491
if j < n and pat[j] in '!^': j = j+1
492
if j < n and pat[j] == ']': j = j+1
493
while j < n and pat[j] != ']': j = j+1
494
if j >= n: res = res + '\\[' # interpret the [ literally
495
else: # Deal with inside of [..]
496
stuff = pat[i:j].replace('\\','\\\\')
498
if stuff[0] in '!^': stuff = '^' + stuff[1:]
499
res = res + '[' + stuff + ']'
500
else: res = res + re.escape(c)