~ubuntu-branches/debian/jessie/bzr-fastimport/jessie

« back to all changes in this revision

Viewing changes to processors/filter_processor.py

  • Committer: Bazaar Package Importer
  • Author(s): Jelmer Vernooij
  • Date: 2010-11-06 18:40:27 UTC
  • mfrom: (1.1.6 upstream)
  • Revision ID: james.westby@ubuntu.com-20101106184027-iclo8iim9equ6i8b
Tags: 0.9.0+bzr279-1
* New upstream snapshot.
* Bump standards version to 3.9.1 (no changes).
* Run testsuite during package build.

Show diffs side-by-side

added added

removed removed

Lines of Context:
1
 
# Copyright (C) 2009 Canonical Ltd
2
 
#
3
 
# This program is free software; you can redistribute it and/or modify
4
 
# it under the terms of the GNU General Public License as published by
5
 
# the Free Software Foundation; either version 2 of the License, or
6
 
# (at your option) any later version.
7
 
#
8
 
# This program is distributed in the hope that it will be useful,
9
 
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
 
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
 
# GNU General Public License for more details.
12
 
#
13
 
# You should have received a copy of the GNU General Public License
14
 
# along with this program; if not, write to the Free Software
15
 
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
16
 
 
17
 
"""Import processor that filters the input (and doesn't import)."""
18
 
 
19
 
 
20
 
from bzrlib import osutils
21
 
from bzrlib.trace import (
22
 
    warning,
23
 
    )
24
 
from bzrlib.plugins.fastimport import (
25
 
    commands,
26
 
    helpers,
27
 
    processor,
28
 
    )
29
 
 
30
 
 
31
 
class FilterProcessor(processor.ImportProcessor):
32
 
    """An import processor that filters the input to include/exclude objects.
33
 
 
34
 
    No changes to the current repository are made.
35
 
 
36
 
    Here are the supported parameters:
37
 
 
38
 
    * include_paths - a list of paths that commits must change in order to
39
 
      be kept in the output stream
40
 
 
41
 
    * exclude_paths - a list of paths that should not appear in the output
42
 
      stream
43
 
    """
44
 
 
45
 
    known_params = [
46
 
        'include_paths',
47
 
        'exclude_paths',
48
 
        ]
49
 
 
50
 
    def pre_process(self):
51
 
        self.includes = self.params.get('include_paths')
52
 
        self.excludes = self.params.get('exclude_paths')
53
 
        # What's the new root, if any
54
 
        self.new_root = helpers.common_directory(self.includes)
55
 
        # Buffer of blobs until we know we need them: mark -> cmd
56
 
        self.blobs = {}
57
 
        # These are the commits we've output so far
58
 
        self.interesting_commits = set()
59
 
        # Map of commit-id to list of parents
60
 
        self.parents = {}
61
 
 
62
 
    def pre_handler(self, cmd):
63
 
        self.command = cmd
64
 
        # Should this command be included in the output or not?
65
 
        self.keep = False
66
 
        # Blobs to dump into the output before dumping the command itself
67
 
        self.referenced_blobs = []
68
 
 
69
 
    def post_handler(self, cmd):
70
 
        if not self.keep:
71
 
            return
72
 
        # print referenced blobs and the command
73
 
        for blob_id in self.referenced_blobs:
74
 
            self._print_command(self.blobs[blob_id])
75
 
        self._print_command(self.command)
76
 
 
77
 
    def progress_handler(self, cmd):
78
 
        """Process a ProgressCommand."""
79
 
        # These always pass through
80
 
        self.keep = True
81
 
 
82
 
    def blob_handler(self, cmd):
83
 
        """Process a BlobCommand."""
84
 
        # These never pass through directly. We buffer them and only
85
 
        # output them if referenced by an interesting command.
86
 
        self.blobs[cmd.id] = cmd
87
 
        self.keep = False
88
 
 
89
 
    def checkpoint_handler(self, cmd):
90
 
        """Process a CheckpointCommand."""
91
 
        # These always pass through
92
 
        self.keep = True
93
 
 
94
 
    def commit_handler(self, cmd):
95
 
        """Process a CommitCommand."""
96
 
        # These pass through if they meet the filtering conditions
97
 
        interesting_filecmds = self._filter_filecommands(cmd.file_iter)
98
 
        if interesting_filecmds:
99
 
            # If all we have is a single deleteall, skip this commit
100
 
            if len(interesting_filecmds) == 1 and isinstance(
101
 
                interesting_filecmds[0], commands.FileDeleteAllCommand):
102
 
                pass
103
 
            else:
104
 
                # Remember just the interesting file commands
105
 
                self.keep = True
106
 
                cmd.file_iter = iter(interesting_filecmds)
107
 
 
108
 
                # Record the referenced blobs
109
 
                for fc in interesting_filecmds:
110
 
                    if isinstance(fc, commands.FileModifyCommand):
111
 
                        if (fc.dataref is not None and
112
 
                            fc.kind != 'directory'):
113
 
                            self.referenced_blobs.append(fc.dataref)
114
 
 
115
 
                # Update from and merges to refer to commits in the output
116
 
                cmd.from_ = self._find_interesting_from(cmd.from_)
117
 
                cmd.merges = self._find_interesting_merges(cmd.merges)
118
 
                self.interesting_commits.add(cmd.id)
119
 
 
120
 
        # Keep track of the parents
121
 
        if cmd.from_ and cmd.merges:
122
 
            parents = [cmd.from_] + cmd.merges
123
 
        elif cmd.from_:
124
 
            parents = [cmd.from_]
125
 
        else:
126
 
            parents = None
127
 
        self.parents[":" + cmd.mark] = parents
128
 
 
129
 
    def reset_handler(self, cmd):
130
 
        """Process a ResetCommand."""
131
 
        if cmd.from_ is None:
132
 
            # We pass through resets that init a branch because we have to
133
 
            # assume the branch might be interesting.
134
 
            self.keep = True
135
 
        else:
136
 
            # Keep resets if they indirectly reference something we kept
137
 
            cmd.from_ = self._find_interesting_from(cmd.from_)
138
 
            self.keep = cmd.from_ is not None
139
 
 
140
 
    def tag_handler(self, cmd):
141
 
        """Process a TagCommand."""
142
 
        # Keep tags if they indirectly reference something we kept
143
 
        cmd.from_ = self._find_interesting_from(cmd.from_)
144
 
        self.keep = cmd.from_ is not None
145
 
 
146
 
    def feature_handler(self, cmd):
147
 
        """Process a FeatureCommand."""
148
 
        feature = cmd.feature_name
149
 
        if feature not in commands.FEATURE_NAMES:
150
 
            self.warning("feature %s is not supported - parsing may fail"
151
 
                % (feature,))
152
 
        # These always pass through
153
 
        self.keep = True
154
 
 
155
 
    def _print_command(self, cmd):
156
 
        """Wrapper to avoid adding unnecessary blank lines."""
157
 
        text = repr(cmd)
158
 
        self.outf.write(text)
159
 
        if not text.endswith("\n"):
160
 
            self.outf.write("\n")
161
 
 
162
 
    def _filter_filecommands(self, filecmd_iter):
163
 
        """Return the filecommands filtered by includes & excludes.
164
 
        
165
 
        :return: a list of FileCommand objects
166
 
        """
167
 
        if self.includes is None and self.excludes is None:
168
 
            return list(filecmd_iter())
169
 
 
170
 
        # Do the filtering, adjusting for the new_root
171
 
        result = []
172
 
        for fc in filecmd_iter():
173
 
            if (isinstance(fc, commands.FileModifyCommand) or
174
 
                isinstance(fc, commands.FileDeleteCommand)):
175
 
                if self._path_to_be_kept(fc.path):
176
 
                    fc.path = self._adjust_for_new_root(fc.path)
177
 
                else:
178
 
                    continue
179
 
            elif isinstance(fc, commands.FileDeleteAllCommand):
180
 
                pass
181
 
            elif isinstance(fc, commands.FileRenameCommand):
182
 
                fc = self._convert_rename(fc)
183
 
            elif isinstance(fc, commands.FileCopyCommand):
184
 
                fc = self._convert_copy(fc)
185
 
            else:
186
 
                warning("cannot handle FileCommands of class %s - ignoring",
187
 
                        fc.__class__)
188
 
                continue
189
 
            if fc is not None:
190
 
                result.append(fc)
191
 
        return result
192
 
 
193
 
    def _path_to_be_kept(self, path):
194
 
        """Does the given path pass the filtering criteria?"""
195
 
        if self.excludes and (path in self.excludes
196
 
                or osutils.is_inside_any(self.excludes, path)):
197
 
            return False
198
 
        if self.includes:
199
 
            return (path in self.includes
200
 
                or osutils.is_inside_any(self.includes, path))
201
 
        return True
202
 
 
203
 
    def _adjust_for_new_root(self, path):
204
 
        """Adjust a path given the new root directory of the output."""
205
 
        if self.new_root is None:
206
 
            return path
207
 
        elif path.startswith(self.new_root):
208
 
            return path[len(self.new_root):]
209
 
        else:
210
 
            return path
211
 
 
212
 
    def _find_interesting_parent(self, commit_ref):
213
 
        while True:
214
 
            if commit_ref in self.interesting_commits:
215
 
                return commit_ref
216
 
            parents = self.parents.get(commit_ref)
217
 
            if not parents:
218
 
                return None
219
 
            commit_ref = parents[0]
220
 
 
221
 
    def _find_interesting_from(self, commit_ref):
222
 
        if commit_ref is None:
223
 
            return None
224
 
        return self._find_interesting_parent(commit_ref)
225
 
 
226
 
    def _find_interesting_merges(self, commit_refs):
227
 
        if commit_refs is None:
228
 
            return None
229
 
        merges = []
230
 
        for commit_ref in commit_refs:
231
 
            parent = self._find_interesting_parent(commit_ref)
232
 
            if parent is not None:
233
 
                merges.append(parent)
234
 
        if merges:
235
 
            return merges
236
 
        else:
237
 
            return None
238
 
 
239
 
    def _convert_rename(self, fc):
240
 
        """Convert a FileRenameCommand into a new FileCommand.
241
 
        
242
 
        :return: None if the rename is being ignored, otherwise a
243
 
          new FileCommand based on the whether the old and new paths
244
 
          are inside or outside of the interesting locations.
245
 
          """
246
 
        old = fc.old_path
247
 
        new = fc.new_path
248
 
        keep_old = self._path_to_be_kept(old)
249
 
        keep_new = self._path_to_be_kept(new)
250
 
        if keep_old and keep_new:
251
 
            fc.old_path = self._adjust_for_new_root(old)
252
 
            fc.new_path = self._adjust_for_new_root(new)
253
 
            return fc
254
 
        elif keep_old:
255
 
            # The file has been renamed to a non-interesting location.
256
 
            # Delete it!
257
 
            old = self._adjust_for_new_root(old)
258
 
            return commands.FileDeleteCommand(old)
259
 
        elif keep_new:
260
 
            # The file has been renamed into an interesting location
261
 
            # We really ought to add it but we don't currently buffer
262
 
            # the contents of all previous files and probably never want
263
 
            # to. Maybe fast-import-info needs to be extended to
264
 
            # remember all renames and a config file can be passed
265
 
            # into here ala fast-import?
266
 
            warning("cannot turn rename of %s into an add of %s yet" %
267
 
                (old, new))
268
 
        return None
269
 
 
270
 
    def _convert_copy(self, fc):
271
 
        """Convert a FileCopyCommand into a new FileCommand.
272
 
        
273
 
        :return: None if the copy is being ignored, otherwise a
274
 
          new FileCommand based on the whether the source and destination
275
 
          paths are inside or outside of the interesting locations.
276
 
          """
277
 
        src = fc.src_path
278
 
        dest = fc.dest_path
279
 
        keep_src = self._path_to_be_kept(src)
280
 
        keep_dest = self._path_to_be_kept(dest)
281
 
        if keep_src and keep_dest:
282
 
            fc.src_path = self._adjust_for_new_root(src)
283
 
            fc.dest_path = self._adjust_for_new_root(dest)
284
 
            return fc
285
 
        elif keep_src:
286
 
            # The file has been copied to a non-interesting location.
287
 
            # Ignore it!
288
 
            return None
289
 
        elif keep_dest:
290
 
            # The file has been copied into an interesting location
291
 
            # We really ought to add it but we don't currently buffer
292
 
            # the contents of all previous files and probably never want
293
 
            # to. Maybe fast-import-info needs to be extended to
294
 
            # remember all copies and a config file can be passed
295
 
            # into here ala fast-import?
296
 
            warning("cannot turn copy of %s into an add of %s yet" %
297
 
                (src, dest))
298
 
        return None