~ubuntu-branches/ubuntu/utopic/bzr-fastimport/utopic-proposed

« back to all changes in this revision

Viewing changes to processors/filter_processor.py

  • Committer: Bazaar Package Importer
  • Author(s): Jelmer Vernooij
  • Date: 2009-05-05 20:23:22 UTC
  • mfrom: (0.1.3 squeeze) (1.1.1 upstream)
  • Revision ID: james.westby@ubuntu.com-20090505202322-t5ce971trg0ojsfj
Tags: 0.8.0~bzr181-1
* Move to section vcs.
* New upstream snapshot.

Show diffs side-by-side

added added

removed removed

Lines of Context:
 
1
# Copyright (C) 2009 Canonical Ltd
 
2
#
 
3
# This program is free software; you can redistribute it and/or modify
 
4
# it under the terms of the GNU General Public License as published by
 
5
# the Free Software Foundation; either version 2 of the License, or
 
6
# (at your option) any later version.
 
7
#
 
8
# This program is distributed in the hope that it will be useful,
 
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
 
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 
11
# GNU General Public License for more details.
 
12
#
 
13
# You should have received a copy of the GNU General Public License
 
14
# along with this program; if not, write to the Free Software
 
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 
16
 
 
17
"""Import processor that filters the input (and doesn't import)."""
 
18
 
 
19
 
 
20
from bzrlib import osutils
 
21
from bzrlib.trace import (
 
22
    warning,
 
23
    )
 
24
from bzrlib.plugins.fastimport import (
 
25
    commands,
 
26
    helpers,
 
27
    processor,
 
28
    )
 
29
 
 
30
 
 
31
class FilterProcessor(processor.ImportProcessor):
 
32
    """An import processor that filters the input to include/exclude objects.
 
33
 
 
34
    No changes to the current repository are made.
 
35
 
 
36
    Here are the supported parameters:
 
37
 
 
38
    * include_paths - a list of paths that commits must change in order to
 
39
      be kept in the output stream
 
40
 
 
41
    * exclude_paths - a list of paths that should not appear in the output
 
42
      stream
 
43
    """
 
44
 
 
45
    known_params = [
 
46
        'include_paths',
 
47
        'exclude_paths',
 
48
        ]
 
49
 
 
50
    def pre_process(self):
 
51
        self.includes = self.params.get('include_paths')
 
52
        self.excludes = self.params.get('exclude_paths')
 
53
        # What's the new root, if any
 
54
        self.new_root = helpers.common_directory(self.includes)
 
55
        # Buffer of blobs until we know we need them: mark -> cmd
 
56
        self.blobs = {}
 
57
        # These are the commits we've output so far
 
58
        self.interesting_commits = set()
 
59
        # Map of commit-id to list of parents
 
60
        self.parents = {}
 
61
 
 
62
    def pre_handler(self, cmd):
 
63
        self.command = cmd
 
64
        # Should this command be included in the output or not?
 
65
        self.keep = False
 
66
        # Blobs to dump into the output before dumping the command itself
 
67
        self.referenced_blobs = []
 
68
 
 
69
    def post_handler(self, cmd):
 
70
        if not self.keep:
 
71
            return
 
72
        # print referenced blobs and the command
 
73
        for blob_id in self.referenced_blobs:
 
74
            self._print_command(self.blobs[blob_id])
 
75
        self._print_command(self.command)
 
76
 
 
77
    def progress_handler(self, cmd):
 
78
        """Process a ProgressCommand."""
 
79
        # These always pass through
 
80
        self.keep = True
 
81
 
 
82
    def blob_handler(self, cmd):
 
83
        """Process a BlobCommand."""
 
84
        # These never pass through directly. We buffer them and only
 
85
        # output them if referenced by an interesting command.
 
86
        self.blobs[cmd.id] = cmd
 
87
        self.keep = False
 
88
 
 
89
    def checkpoint_handler(self, cmd):
 
90
        """Process a CheckpointCommand."""
 
91
        # These always pass through
 
92
        self.keep = True
 
93
 
 
94
    def commit_handler(self, cmd):
 
95
        """Process a CommitCommand."""
 
96
        # These pass through if they meet the filtering conditions
 
97
        interesting_filecmds = self._filter_filecommands(cmd.file_iter)
 
98
        if interesting_filecmds:
 
99
            # If all we have is a single deleteall, skip this commit
 
100
            if len(interesting_filecmds) == 1 and isinstance(
 
101
                interesting_filecmds[0], commands.FileDeleteAllCommand):
 
102
                pass
 
103
            else:
 
104
                # Remember just the interesting file commands
 
105
                self.keep = True
 
106
                cmd.file_iter = iter(interesting_filecmds)
 
107
 
 
108
                # Record the referenced blobs
 
109
                for fc in interesting_filecmds:
 
110
                    if isinstance(fc, commands.FileModifyCommand):
 
111
                        if fc.dataref is not None:
 
112
                            self.referenced_blobs.append(fc.dataref)
 
113
 
 
114
                # Update from and merges to refer to commits in the output
 
115
                cmd.from_ = self._find_interesting_from(cmd.from_)
 
116
                cmd.merges = self._find_interesting_merges(cmd.merges)
 
117
                self.interesting_commits.add(cmd.id)
 
118
 
 
119
        # Keep track of the parents
 
120
        if cmd.from_ and cmd.merges:
 
121
            parents = [cmd.from_] + cmd.merges
 
122
        elif cmd.from_:
 
123
            parents = [cmd.from_]
 
124
        else:
 
125
            parents = None
 
126
        self.parents[":" + cmd.mark] = parents
 
127
 
 
128
    def reset_handler(self, cmd):
 
129
        """Process a ResetCommand."""
 
130
        if cmd.from_ is None:
 
131
            # We pass through resets that init a branch because we have to
 
132
            # assume the branch might be interesting.
 
133
            self.keep = True
 
134
        else:
 
135
            # Keep resets if they indirectly reference something we kept
 
136
            cmd.from_ = self._find_interesting_from(cmd.from_)
 
137
            self.keep = cmd.from_ is not None
 
138
 
 
139
    def tag_handler(self, cmd):
 
140
        """Process a TagCommand."""
 
141
        # Keep tags if they indirectly reference something we kept
 
142
        cmd.from_ = self._find_interesting_from(cmd.from_)
 
143
        self.keep = cmd.from_ is not None
 
144
 
 
145
    def _print_command(self, cmd):
 
146
        """Wrapper to avoid adding unnecessary blank lines."""
 
147
        text = repr(cmd)
 
148
        self.outf.write(text)
 
149
        if not text.endswith("\n"):
 
150
            self.outf.write("\n")
 
151
 
 
152
    def _filter_filecommands(self, filecmd_iter):
 
153
        """Return the filecommands filtered by includes & excludes.
 
154
        
 
155
        :return: a list of FileCommand objects
 
156
        """
 
157
        if self.includes is None and self.excludes is None:
 
158
            return list(filecmd_iter())
 
159
 
 
160
        # Do the filtering, adjusting for the new_root
 
161
        result = []
 
162
        for fc in filecmd_iter():
 
163
            if (isinstance(fc, commands.FileModifyCommand) or
 
164
                isinstance(fc, commands.FileDeleteCommand)):
 
165
                if self._path_to_be_kept(fc.path):
 
166
                    fc.path = self._adjust_for_new_root(fc.path)
 
167
                else:
 
168
                    continue
 
169
            elif isinstance(fc, commands.FileDeleteAllCommand):
 
170
                pass
 
171
            elif isinstance(fc, commands.FileRenameCommand):
 
172
                fc = self._convert_rename(fc)
 
173
            elif isinstance(fc, commands.FileCopyCommand):
 
174
                fc = self._convert_copy(fc)
 
175
            else:
 
176
                warning("cannot handle FileCommands of class %s - ignoring",
 
177
                        fc.__class__)
 
178
                continue
 
179
            if fc is not None:
 
180
                result.append(fc)
 
181
        return result
 
182
 
 
183
    def _path_to_be_kept(self, path):
 
184
        """Does the given path pass the filtering criteria?"""
 
185
        if self.excludes and (path in self.excludes
 
186
                or osutils.is_inside_any(self.excludes, path)):
 
187
            return False
 
188
        if self.includes:
 
189
            return (path in self.includes
 
190
                or osutils.is_inside_any(self.includes, path))
 
191
        return True
 
192
 
 
193
    def _adjust_for_new_root(self, path):
 
194
        """Adjust a path given the new root directory of the output."""
 
195
        if self.new_root is None:
 
196
            return path
 
197
        elif path.startswith(self.new_root):
 
198
            return path[len(self.new_root):]
 
199
        else:
 
200
            return path
 
201
 
 
202
    def _find_interesting_parent(self, commit_ref):
 
203
        while True:
 
204
            if commit_ref in self.interesting_commits:
 
205
                return commit_ref
 
206
            parents = self.parents.get(commit_ref)
 
207
            if not parents:
 
208
                return None
 
209
            commit_ref = parents[0]
 
210
 
 
211
    def _find_interesting_from(self, commit_ref):
 
212
        if commit_ref is None:
 
213
            return None
 
214
        return self._find_interesting_parent(commit_ref)
 
215
 
 
216
    def _find_interesting_merges(self, commit_refs):
 
217
        if commit_refs is None:
 
218
            return None
 
219
        merges = []
 
220
        for commit_ref in commit_refs:
 
221
            parent = self._find_interesting_parent(commit_ref)
 
222
            if parent is not None:
 
223
                merges.append(parent)
 
224
        if merges:
 
225
            return merges
 
226
        else:
 
227
            return None
 
228
 
 
229
    def _convert_rename(self, fc):
 
230
        """Convert a FileRenameCommand into a new FileCommand.
 
231
        
 
232
        :return: None if the rename is being ignored, otherwise a
 
233
          new FileCommand based on the whether the old and new paths
 
234
          are inside or outside of the interesting locations.
 
235
          """
 
236
        old = fc.old_path
 
237
        new = fc.new_path
 
238
        keep_old = self._path_to_be_kept(old)
 
239
        keep_new = self._path_to_be_kept(new)
 
240
        if keep_old and keep_new:
 
241
            fc.old_path = self._adjust_for_new_root(old)
 
242
            fc.new_path = self._adjust_for_new_root(new)
 
243
            return fc
 
244
        elif keep_old:
 
245
            # The file has been renamed to a non-interesting location.
 
246
            # Delete it!
 
247
            old = self._adjust_for_new_root(old)
 
248
            return commands.FileDeleteCommand(old)
 
249
        elif keep_new:
 
250
            # The file has been renamed into an interesting location
 
251
            # We really ought to add it but we don't currently buffer
 
252
            # the contents of all previous files and probably never want
 
253
            # to. Maybe fast-import-info needs to be extended to
 
254
            # remember all renames and a config file can be passed
 
255
            # into here ala fast-import?
 
256
            warning("cannot turn rename of %s into an add of %s yet" %
 
257
                (old, new))
 
258
        return None
 
259
 
 
260
    def _convert_copy(self, fc):
 
261
        """Convert a FileCopyCommand into a new FileCommand.
 
262
        
 
263
        :return: None if the copy is being ignored, otherwise a
 
264
          new FileCommand based on the whether the source and destination
 
265
          paths are inside or outside of the interesting locations.
 
266
          """
 
267
        src = fc.src_path
 
268
        dest = fc.dest_path
 
269
        keep_src = self._path_to_be_kept(src)
 
270
        keep_dest = self._path_to_be_kept(dest)
 
271
        if keep_src and keep_dest:
 
272
            fc.src_path = self._adjust_for_new_root(src)
 
273
            fc.dest_path = self._adjust_for_new_root(dest)
 
274
            return fc
 
275
        elif keep_src:
 
276
            # The file has been copied to a non-interesting location.
 
277
            # Ignore it!
 
278
            return None
 
279
        elif keep_dest:
 
280
            # The file has been copied into an interesting location
 
281
            # We really ought to add it but we don't currently buffer
 
282
            # the contents of all previous files and probably never want
 
283
            # to. Maybe fast-import-info needs to be extended to
 
284
            # remember all copies and a config file can be passed
 
285
            # into here ala fast-import?
 
286
            warning("cannot turn copy of %s into an add of %s yet" %
 
287
                (src, dest))
 
288
        return None