1
# Copyright (C) 2009 Canonical Ltd
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
# GNU General Public License for more details.
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17
"""Import processor that filters the input (and doesn't import)."""
20
from bzrlib import osutils
21
from bzrlib.trace import (
24
from bzrlib.plugins.fastimport import (
31
class FilterProcessor(processor.ImportProcessor):
32
"""An import processor that filters the input to include/exclude objects.
34
No changes to the current repository are made.
36
Here are the supported parameters:
38
* include_paths - a list of paths that commits must change in order to
39
be kept in the output stream
41
* exclude_paths - a list of paths that should not appear in the output
50
def pre_process(self):
51
self.includes = self.params.get('include_paths')
52
self.excludes = self.params.get('exclude_paths')
53
# What's the new root, if any
54
self.new_root = helpers.common_directory(self.includes)
55
# Buffer of blobs until we know we need them: mark -> cmd
57
# These are the commits we've output so far
58
self.interesting_commits = set()
59
# Map of commit-id to list of parents
62
def pre_handler(self, cmd):
64
# Should this command be included in the output or not?
66
# Blobs to dump into the output before dumping the command itself
67
self.referenced_blobs = []
69
def post_handler(self, cmd):
72
# print referenced blobs and the command
73
for blob_id in self.referenced_blobs:
74
self._print_command(self.blobs[blob_id])
75
self._print_command(self.command)
77
def progress_handler(self, cmd):
78
"""Process a ProgressCommand."""
79
# These always pass through
82
def blob_handler(self, cmd):
83
"""Process a BlobCommand."""
84
# These never pass through directly. We buffer them and only
85
# output them if referenced by an interesting command.
86
self.blobs[cmd.id] = cmd
89
def checkpoint_handler(self, cmd):
90
"""Process a CheckpointCommand."""
91
# These always pass through
94
def commit_handler(self, cmd):
95
"""Process a CommitCommand."""
96
# These pass through if they meet the filtering conditions
97
interesting_filecmds = self._filter_filecommands(cmd.file_iter)
98
if interesting_filecmds:
99
# If all we have is a single deleteall, skip this commit
100
if len(interesting_filecmds) == 1 and isinstance(
101
interesting_filecmds[0], commands.FileDeleteAllCommand):
104
# Remember just the interesting file commands
106
cmd.file_iter = iter(interesting_filecmds)
108
# Record the referenced blobs
109
for fc in interesting_filecmds:
110
if isinstance(fc, commands.FileModifyCommand):
111
if (fc.dataref is not None and
112
fc.kind != 'directory'):
113
self.referenced_blobs.append(fc.dataref)
115
# Update from and merges to refer to commits in the output
116
cmd.from_ = self._find_interesting_from(cmd.from_)
117
cmd.merges = self._find_interesting_merges(cmd.merges)
118
self.interesting_commits.add(cmd.id)
120
# Keep track of the parents
121
if cmd.from_ and cmd.merges:
122
parents = [cmd.from_] + cmd.merges
124
parents = [cmd.from_]
127
self.parents[":" + cmd.mark] = parents
129
def reset_handler(self, cmd):
130
"""Process a ResetCommand."""
131
if cmd.from_ is None:
132
# We pass through resets that init a branch because we have to
133
# assume the branch might be interesting.
136
# Keep resets if they indirectly reference something we kept
137
cmd.from_ = self._find_interesting_from(cmd.from_)
138
self.keep = cmd.from_ is not None
140
def tag_handler(self, cmd):
141
"""Process a TagCommand."""
142
# Keep tags if they indirectly reference something we kept
143
cmd.from_ = self._find_interesting_from(cmd.from_)
144
self.keep = cmd.from_ is not None
146
def feature_handler(self, cmd):
147
"""Process a FeatureCommand."""
148
feature = cmd.feature_name
149
if feature not in commands.FEATURE_NAMES:
150
self.warning("feature %s is not supported - parsing may fail"
152
# These always pass through
155
def _print_command(self, cmd):
156
"""Wrapper to avoid adding unnecessary blank lines."""
158
self.outf.write(text)
159
if not text.endswith("\n"):
160
self.outf.write("\n")
162
def _filter_filecommands(self, filecmd_iter):
163
"""Return the filecommands filtered by includes & excludes.
165
:return: a list of FileCommand objects
167
if self.includes is None and self.excludes is None:
168
return list(filecmd_iter())
170
# Do the filtering, adjusting for the new_root
172
for fc in filecmd_iter():
173
if (isinstance(fc, commands.FileModifyCommand) or
174
isinstance(fc, commands.FileDeleteCommand)):
175
if self._path_to_be_kept(fc.path):
176
fc.path = self._adjust_for_new_root(fc.path)
179
elif isinstance(fc, commands.FileDeleteAllCommand):
181
elif isinstance(fc, commands.FileRenameCommand):
182
fc = self._convert_rename(fc)
183
elif isinstance(fc, commands.FileCopyCommand):
184
fc = self._convert_copy(fc)
186
warning("cannot handle FileCommands of class %s - ignoring",
193
def _path_to_be_kept(self, path):
194
"""Does the given path pass the filtering criteria?"""
195
if self.excludes and (path in self.excludes
196
or osutils.is_inside_any(self.excludes, path)):
199
return (path in self.includes
200
or osutils.is_inside_any(self.includes, path))
203
def _adjust_for_new_root(self, path):
204
"""Adjust a path given the new root directory of the output."""
205
if self.new_root is None:
207
elif path.startswith(self.new_root):
208
return path[len(self.new_root):]
212
def _find_interesting_parent(self, commit_ref):
214
if commit_ref in self.interesting_commits:
216
parents = self.parents.get(commit_ref)
219
commit_ref = parents[0]
221
def _find_interesting_from(self, commit_ref):
222
if commit_ref is None:
224
return self._find_interesting_parent(commit_ref)
226
def _find_interesting_merges(self, commit_refs):
227
if commit_refs is None:
230
for commit_ref in commit_refs:
231
parent = self._find_interesting_parent(commit_ref)
232
if parent is not None:
233
merges.append(parent)
239
def _convert_rename(self, fc):
240
"""Convert a FileRenameCommand into a new FileCommand.
242
:return: None if the rename is being ignored, otherwise a
243
new FileCommand based on the whether the old and new paths
244
are inside or outside of the interesting locations.
248
keep_old = self._path_to_be_kept(old)
249
keep_new = self._path_to_be_kept(new)
250
if keep_old and keep_new:
251
fc.old_path = self._adjust_for_new_root(old)
252
fc.new_path = self._adjust_for_new_root(new)
255
# The file has been renamed to a non-interesting location.
257
old = self._adjust_for_new_root(old)
258
return commands.FileDeleteCommand(old)
260
# The file has been renamed into an interesting location
261
# We really ought to add it but we don't currently buffer
262
# the contents of all previous files and probably never want
263
# to. Maybe fast-import-info needs to be extended to
264
# remember all renames and a config file can be passed
265
# into here ala fast-import?
266
warning("cannot turn rename of %s into an add of %s yet" %
270
def _convert_copy(self, fc):
271
"""Convert a FileCopyCommand into a new FileCommand.
273
:return: None if the copy is being ignored, otherwise a
274
new FileCommand based on the whether the source and destination
275
paths are inside or outside of the interesting locations.
279
keep_src = self._path_to_be_kept(src)
280
keep_dest = self._path_to_be_kept(dest)
281
if keep_src and keep_dest:
282
fc.src_path = self._adjust_for_new_root(src)
283
fc.dest_path = self._adjust_for_new_root(dest)
286
# The file has been copied to a non-interesting location.
290
# The file has been copied into an interesting location
291
# We really ought to add it but we don't currently buffer
292
# the contents of all previous files and probably never want
293
# to. Maybe fast-import-info needs to be extended to
294
# remember all copies and a config file can be passed
295
# into here ala fast-import?
296
warning("cannot turn copy of %s into an add of %s yet" %