1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
|
# -*- Mode:Python; indent-tabs-mode:nil; tab-width:4 -*-
#
# Copyright 2002 Ben Escoto <ben@emerose.org>
# Copyright 2007 Kenneth Loafman <kenneth@loafman.com>
# Copyright 2014 Aaron Whitehouse <aaron@whitehouse.kiwi.nz>
#
# This file is part of duplicity.
#
# Duplicity is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 2 of the License, or (at your
# option) any later version.
#
# Duplicity is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with duplicity; if not, write to the Free Software Foundation,
# Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
import re
class GlobbingError(Exception):
"""Something has gone wrong when parsing a glob string"""
pass
class FilePrefixError(GlobbingError):
"""Signals that a specified file doesn't start with correct prefix"""
pass
def _glob_get_prefix_regexs(glob_str):
"""Return list of regexps equivalent to prefixes of glob_str"""
# Internal. Used by glob_get_normal_sf.
glob_parts = glob_str.split("/")
if "" in glob_parts[1:-1]:
# "" OK if comes first or last, as in /foo/
raise GlobbingError("Consecutive '/'s found in globbing string " +
glob_str)
prefixes = ["/".join(glob_parts[:i + 1]) for i in range(len(glob_parts))]
# we must make exception for root "/", only dir to end in slash
if prefixes[0] == "":
prefixes[0] = "/"
return list(map(glob_to_regex, prefixes))
def path_matches_glob_fn(glob_str, include, ignore_case=False):
"""Return a function test_fn(path) which
tests whether path matches glob, as per the Unix shell rules, taking as
arguments a path, a glob string and include (0 indicating that the glob
string is an exclude glob and 1 indicating that it is an include glob,
returning:
0 - if the file should be excluded
1 - if the file should be included
2 - if the folder should be scanned for any included/excluded files
None - if the selection function has nothing to say about the file
"""
glob_ends_w_slash = False
if glob_str != "/" and glob_str[-1] == "/":
glob_ends_w_slash = True
# Remove trailing / from directory name (unless that is the entire
# string)
glob_str = glob_str[:-1]
flags = 0
if ignore_case:
flags = re.IGNORECASE
re_comp = lambda r: re.compile(r, re.S | flags)
# matches what glob matches and any files in directory
# Resulting regular expression is:
# ^ string must be at the beginning of path
# string translated into regex
# ($|/) nothing must follow except for the end of the string, newline or /
# Note that the "/" at the end of the regex means that it will match
# if the glob matches a parent folders of path
glob_comp_re = re_comp("^%s($|/)" % glob_to_regex(glob_str))
if glob_ends_w_slash:
# Creates a version of glob_comp_re that does not match folder contents
# This can be used later to check that an exact match is actually a
# folder, rather than a file.
glob_comp_re_exact = re_comp("^%s($)" % glob_to_regex(glob_str))
if glob_str.find("**") != -1:
# glob_str has a ** in it
glob_str = glob_str[:glob_str.find("**") + 2] # truncate after **
# Below regex is translates to:
# ^ string must be at the beginning of path
# the regexs corresponding to the parent directories of glob_str
# $ nothing must follow except for the end of the string or newline
scan_comp_re = re_comp("^(%s)$" %
"|".join(_glob_get_prefix_regexs(glob_str)))
def test_fn(path):
if glob_comp_re.match(path.name):
# Path matches glob, or is contained within a matching folder
if not glob_ends_w_slash:
return include
else:
# Glob ended with a /, so we need to check any exact match was
# a folder
if glob_comp_re_exact.match(path.name):
# Not an included file/folder, so must be a folder to match
if path.isdir():
# Is a directory, so all is well
return include
else:
# Exact match and not a folder
return None
else:
# An included file/folder, so normal approach is fine
return include
elif include == 1 and scan_comp_re.match(path.name):
return 2
else:
return None
return test_fn
def glob_to_regex(pat):
"""Returned regular expression equivalent to shell glob pat
Currently only the ?, *, [], and ** expressions are supported.
Ranges like [a-z] are also currently unsupported. There is no
way to quote these special characters.
This function taken with minor modifications from efnmatch.py
by Donovan Baarda.
"""
# Internal. Used by glob_get_normal_sf, glob_get_prefix_res and unit tests.
i, n, res = 0, len(pat), ''
while i < n:
c, s = pat[i], pat[i:i + 2]
i = i + 1
if s == '**':
res = res + '.*'
i = i + 1
elif c == '*':
res = res + '[^/]*'
elif c == '?':
res = res + '[^/]'
elif c == '[':
j = i
if j < n and pat[j] in '!^':
j = j + 1
if j < n and pat[j] == ']':
j = j + 1
while j < n and pat[j] != ']':
j = j + 1
if j >= n:
res = res + '\\[' # interpret the [ literally
else:
# Deal with inside of [..]
stuff = pat[i:j].replace('\\', '\\\\')
i = j + 1
if stuff[0] in '!^':
stuff = '^' + stuff[1:]
res = res + '[' + stuff + ']'
else:
res = res + re.escape(c)
return res
|