Source code for duplicity.globmatch

# -*- Mode:Python; indent-tabs-mode:nil; tab-width:4 -*-
#
# Copyright 2002 Ben Escoto <ben@emerose.org>
# Copyright 2007 Kenneth Loafman <kenneth@loafman.com>
# Copyright 2014 Aaron Whitehouse <aaron@whitehouse.kiwi.nz>
#
# This file is part of duplicity.
#
# Duplicity is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 2 of the License, or (at your
# option) any later version.
#
# Duplicity is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with duplicity; if not, write to the Free Software Foundation,
# Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA

import re


[docs]class GlobbingError(Exception): """Something has gone wrong when parsing a glob string""" pass
[docs]class FilePrefixError(GlobbingError): """Signals that a specified file doesn't start with correct prefix""" pass
def _glob_get_prefix_regexs(glob_str): """Return list of regexps equivalent to prefixes of glob_str""" # Internal. Used by glob_get_normal_sf. glob_parts = glob_str.split("/") if "" in glob_parts[1:-1]: # "" OK if comes first or last, as in /foo/ raise GlobbingError("Consecutive '/'s found in globbing string " + glob_str) prefixes = ["/".join(glob_parts[:i + 1]) for i in range(len(glob_parts))] # we must make exception for root "/", only dir to end in slash if prefixes[0] == "": prefixes[0] = "/" return list(map(glob_to_regex, prefixes))
[docs]def select_fn_from_glob(glob_str, include, ignore_case=False): """Return a function test_fn(path) which tests whether path matches glob, as per the Unix shell rules, taking as arguments a path, a glob string and include (0 indicating that the glob string is an exclude glob and 1 indicating that it is an include glob, returning: 0 - if the file should be excluded 1 - if the file should be included 2 - if the folder should be scanned for any included/excluded files None - if the selection function has nothing to say about the file Note: including a folder implicitly includes everything within it. """ glob_ends_w_slash = False if glob_str == "/": # If the glob string is '/', it implicitly includes everything glob_str = "/**" elif glob_str[-1] == "/": glob_ends_w_slash = True # Remove trailing / from directory name (unless that is the entire # string) glob_str = glob_str[:-1] flags = 0 if ignore_case: flags = re.IGNORECASE re_comp = lambda r: re.compile(r, re.S | flags) # matches what glob matches and any files in directory # Resulting regular expression is: # ^ string must be at the beginning of path # string translated into regex # ($|/) nothing must follow except for the end of the string, newline or / # Note that the "/" at the end of the regex means that it will match # if the glob matches a parent folders of path, i.e. including a folder # includes everything within it. glob_comp_re = re_comp("^%s($|/)" % glob_to_regex(glob_str)) if glob_ends_w_slash: # Creates a version of glob_comp_re that does not match folder contents # This can be used later to check that an exact match is actually a # folder, rather than a file. glob_comp_re_exact = re_comp("^%s($)" % glob_to_regex(glob_str)) if glob_str.find("**") != -1: # glob_str has a ** in it glob_str = glob_str[:glob_str.find("**") + 2] # truncate after ** # Below regex is translates to: # ^ string must be at the beginning of path # the regexs corresponding to the parent directories of glob_str # $ nothing must follow except for the end of the string or newline scan_comp_re = re_comp("^(%s)$" % "|".join(_glob_get_prefix_regexs(glob_str))) def test_fn(path): assert not path.name[-1] == "/" or path.name == "/", \ "path.name should never end in '/' during normal operation for " \ "normal paths (except '/' alone)\n" \ "path.name here is " + path.name + " and glob is " + glob_str if glob_comp_re.match(path.name): # Path matches glob, or is contained within a matching folder if not glob_ends_w_slash: return include else: # Glob ended with a /, so we need to check any exact match was # a folder if glob_comp_re_exact.match(path.name): # Not an included file/folder, so must be a folder to match if path.isdir(): # Is a directory, so all is well return include else: # Exact match and not a folder return None else: # An included file/folder, so normal approach is fine return include elif include == 1 and scan_comp_re.match(path.name): return 2 else: return None return test_fn
[docs]def glob_to_regex(pat): """Returned regular expression equivalent to shell glob pat Currently only the ?, *, [], and ** expressions are supported. Ranges like [a-z] are currently unsupported. There is no way to quote these special characters. This function taken with minor modifications from efnmatch.py by Donovan Baarda. """ # Internal. Used by glob_get_normal_sf, glob_get_prefix_res and unit tests. i, n, res = 0, len(pat), '' while i < n: c, s = pat[i], pat[i:i + 2] i = i + 1 if s == '**': res = res + '.*' i = i + 1 elif c == '*': res = res + '[^/]*' elif c == '?': res = res + '[^/]' elif c == '[': j = i if j < n and pat[j] in '!^': j = j + 1 if j < n and pat[j] == ']': j = j + 1 while j < n and pat[j] != ']': j = j + 1 if j >= n: res = res + '\\[' # interpret the [ literally else: # Deal with inside of [..] stuff = pat[i:j].replace('\\', '\\\\') i = j + 1 if stuff[0] in '!^': stuff = '^' + stuff[1:] res = res + '[' + stuff + ']' else: res = res + re.escape(c) return res