1
# Copyright (C) 2008 Canonical Ltd
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
# GNU General Public License for more details.
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17
"""Import processor that supports all Bazaar repository formats."""
37
from bzrlib.repofmt import pack_repo
38
from bzrlib.trace import (
44
import bzrlib.util.configobj.configobj as configobj
45
from bzrlib.plugins.fastimport import (
46
errors as plugin_errors,
54
# How many commits before automatically reporting progress
55
_DEFAULT_AUTO_PROGRESS = 1000
57
# How many commits before automatically checkpointing
58
_DEFAULT_AUTO_CHECKPOINT = 10000
60
# How many inventories to cache
61
_DEFAULT_INV_CACHE_SIZE = 10
64
class GenericProcessor(processor.ImportProcessor):
65
"""An import processor that handles basic imports.
67
Current features supported:
69
* blobs are cached in memory
70
* files and symlinks commits are supported
71
* checkpoints automatically happen at a configurable frequency
72
over and above the stream requested checkpoints
73
* timestamped progress reporting, both automatic and stream requested
74
* LATER: reset support, tags for each branch
75
* some basic statistics are dumped on completion.
77
At checkpoints and on completion, the commit-id -> revision-id map is
78
saved to a file called 'fastimport-id-map'. If the import crashes
79
or is interrupted, it can be started again and this file will be
80
used to skip over already loaded revisions. The format of each line
81
is "commit-id revision-id" so commit-ids cannot include spaces.
83
Here are the supported parameters:
85
* info - name of a hints file holding the analysis generated
86
by running the fast-import-info processor in verbose mode. When
87
importing large repositories, this parameter is needed so
88
that the importer knows what blobs to intelligently cache.
90
* trees - update the working trees before completing.
91
By default, the importer updates the repository
92
and branches and the user needs to run 'bzr update' for the
93
branches of interest afterwards.
95
* checkpoint - automatically checkpoint every n commits over and
96
above any checkpoints contained in the import stream.
99
* count - only import this many commits then exit. If not set
100
or negative, all commits are imported.
102
* inv-cache - number of inventories to cache.
103
If not set, the default is 10.
105
* experimental - enable experimental mode, i.e. use features
106
not yet fully tested.
108
* import-marks - name of file to read to load mark information from
110
* export-marks - name of file to write to save mark information to
124
def note(self, msg, *args):
125
"""Output a note but timestamp it."""
126
msg = "%s %s" % (self._time_of_day(), msg)
129
def warning(self, msg, *args):
130
"""Output a warning but timestamp it."""
131
msg = "%s WARNING: %s" % (self._time_of_day(), msg)
134
def debug(self, mgs, *args):
135
"""Output a debug message if the appropriate -D option was given."""
136
if "fast-import" in debug.debug_flags:
137
msg = "%s DEBUG: %s" % (self._time_of_day(), msg)
140
def _time_of_day(self):
141
"""Time of day as a string."""
142
# Note: this is a separate method so tests can patch in a fixed value
143
return time.strftime("%H:%M:%S")
145
def _import_marks(self, filename):
150
"Could not open import-marks file, not importing marks")
153
firstline = f.readline()
154
match = re.match(r'^format=(\d+)$', firstline)
156
print >>sys.stderr, "%r doesn't look like a mark file" % \
159
elif match.group(1) != '1':
160
print >>sys.stderr, 'format version in mark file not supported'
163
for string in f.readline().rstrip('\n').split('\0'):
166
name, integer = string.rsplit('.', 1)
167
# We really can't do anything with the branch information, so we
170
self.cache_mgr.revision_ids = {}
172
line = line.rstrip('\n')
173
mark, revid = line.split(' ', 1)
174
self.cache_mgr.revision_ids[mark] = revid
177
def export_marks(self, filename):
179
f = file(filename, 'w')
182
"Could not open export-marks file, not exporting marks")
184
f.write('format=1\n')
186
for mark, revid in self.cache_mgr.revision_ids.iteritems():
187
f.write('%s %s\n' % (mark, revid))
190
def pre_process(self):
191
self._start_time = time.time()
192
self._load_info_and_params()
193
self.cache_mgr = GenericCacheManager(self.info, self.verbose,
194
self.inventory_cache_size)
196
if self.params.get("import-marks") is not None:
197
self._import_marks(self.params.get("import-marks"))
198
self.skip_total = False
199
self.first_incremental_commit = True
201
self.first_incremental_commit = False
202
self.skip_total = self._init_id_map()
204
self.note("Found %d commits already loaded - "
205
"skipping over these ...", self.skip_total)
206
self._revision_count = 0
208
# mapping of tag name to revision_id
211
# Create the revision loader needed for committing
212
new_repo_api = hasattr(self.repo, 'revisions')
214
self.loader = revisionloader.RevisionLoader2(self.repo)
215
elif not self._experimental:
216
self.loader = revisionloader.RevisionLoader1(self.repo)
218
def fulltext_when(count):
219
total = self.total_commits
220
if total is not None and count == total:
223
# Create an inventory fulltext every 200 revisions
224
fulltext = count % 200 == 0
226
self.note("%d commits - storing inventory as full-text",
230
self.loader = revisionloader.ImportRevisionLoader1(
231
self.repo, self.inventory_cache_size,
232
fulltext_when=fulltext_when)
234
# Disable autopacking if the repo format supports it.
235
# THIS IS A HACK - there is no sanctioned way of doing this yet.
236
if isinstance(self.repo, pack_repo.KnitPackRepository):
237
self._original_max_pack_count = \
238
self.repo._pack_collection._max_pack_count
239
def _max_pack_count_for_import(total_revisions):
240
return total_revisions + 1
241
self.repo._pack_collection._max_pack_count = \
242
_max_pack_count_for_import
244
self._original_max_pack_count = None
246
# Create a write group. This is committed at the end of the import.
247
# Checkpointing closes the current one and starts a new one.
248
self.repo.start_write_group()
250
def _load_info_and_params(self):
251
self._experimental = bool(self.params.get('experimental', False))
253
# This is currently hard-coded but might be configurable via
254
# parameters one day if that's needed
255
repo_transport = self.repo.control_files._transport
256
self.id_map_path = repo_transport.local_abspath("fastimport-id-map")
258
# Load the info file, if any
259
info_path = self.params.get('info')
260
if info_path is not None:
261
self.info = configobj.ConfigObj(info_path)
265
# Decide how often to automatically report progress
266
# (not a parameter yet)
267
self.progress_every = _DEFAULT_AUTO_PROGRESS
269
self.progress_every = self.progress_every / 10
271
# Decide how often to automatically checkpoint
272
self.checkpoint_every = int(self.params.get('checkpoint',
273
_DEFAULT_AUTO_CHECKPOINT))
275
# Decide how big to make the inventory cache
276
self.inventory_cache_size = int(self.params.get('inv-cache',
277
_DEFAULT_INV_CACHE_SIZE))
279
# Find the maximum number of commits to import (None means all)
280
# and prepare progress reporting. Just in case the info file
281
# has an outdated count of commits, we store the max counts
282
# at which we need to terminate separately to the total used
283
# for progress tracking.
285
self.max_commits = int(self.params['count'])
286
if self.max_commits < 0:
287
self.max_commits = None
289
self.max_commits = None
290
if self.info is not None:
291
self.total_commits = int(self.info['Command counts']['commit'])
292
if (self.max_commits is not None and
293
self.total_commits > self.max_commits):
294
self.total_commits = self.max_commits
296
self.total_commits = self.max_commits
298
def _process(self, command_iter):
299
# if anything goes wrong, abort the write group if any
301
processor.ImportProcessor._process(self, command_iter)
303
if self.repo is not None and self.repo.is_in_write_group():
304
self.repo.abort_write_group()
307
def post_process(self):
308
# Commit the current write group and checkpoint the id map
309
self.repo.commit_write_group()
312
if self.params.get("export-marks") is not None:
313
self.export_marks(self.params.get("export-marks"))
315
# Update the branches
316
self.note("Updating branch information ...")
317
updater = GenericBranchUpdater(self.repo, self.branch, self.cache_mgr,
318
helpers.invert_dictset(self.cache_mgr.heads),
319
self.cache_mgr.last_ref, self.tags)
320
branches_updated, branches_lost = updater.update()
321
self._branch_count = len(branches_updated)
323
# Tell the user about branches that were not created
325
if not self.repo.is_shared():
326
self.warning("Cannot import multiple branches into "
327
"an unshared repository")
328
self.warning("Not creating branches for these head revisions:")
329
for lost_info in branches_lost:
330
head_revision = lost_info[1]
331
branch_name = lost_info[0]
332
self.note("\t %s = %s", head_revision, branch_name)
334
# Update the working trees as requested and dump stats
336
remind_about_update = True
337
if self._branch_count == 0:
338
self.note("no branches to update")
339
self.note("no working trees to update")
340
remind_about_update = False
341
elif self.params.get('trees', False):
342
trees = self._get_working_trees(branches_updated)
344
self.note("Updating the working trees ...")
346
report = delta._ChangeReporter()
351
self._tree_count += 1
352
remind_about_update = False
354
self.warning("No working trees available to update")
357
# Finish up by telling the user what to do next.
358
if self._original_max_pack_count:
359
# We earlier disabled autopacking, creating one pack every
360
# checkpoint instead. We now pack the repository to optimise
361
# how data is stored.
362
if self._revision_count > self.checkpoint_every:
363
self.note("Packing repository ...")
365
# To be conservative, packing puts the old packs and
366
# indices in obsolete_packs. We err on the side of
367
# optimism and clear out that directory to save space.
368
self.note("Removing obsolete packs ...")
369
# TODO: Use a public API for this once one exists
370
repo_transport = self.repo._pack_collection.transport
371
repo_transport.clone('obsolete_packs').delete_multi(
372
repo_transport.list_dir('obsolete_packs'))
373
if remind_about_update:
374
# This message is explicitly not timestamped.
375
note("To refresh the working tree for a branch, "
378
def _get_working_trees(self, branches):
379
"""Get the working trees for branches in the repository."""
381
wt_expected = self.repo.make_working_trees()
383
if br == self.branch and br is not None:
384
wt = self.working_tree
387
wt = br.bzrdir.open_workingtree()
388
except errors.NoWorkingTree:
389
self.warning("No working tree for branch %s", br)
396
def dump_stats(self):
397
time_required = progress.str_tdelta(time.time() - self._start_time)
398
rc = self._revision_count - self.skip_total
399
bc = self._branch_count
400
wtc = self._tree_count
401
self.note("Imported %d %s, updating %d %s and %d %s in %s",
402
rc, helpers.single_plural(rc, "revision", "revisions"),
403
bc, helpers.single_plural(bc, "branch", "branches"),
404
wtc, helpers.single_plural(wtc, "tree", "trees"),
407
def _init_id_map(self):
408
"""Load the id-map and check it matches the repository.
410
:return: the number of entries in the map
412
# Currently, we just check the size. In the future, we might
413
# decide to be more paranoid and check that the revision-ids
414
# are identical as well.
415
self.cache_mgr.revision_ids, known = idmapfile.load_id_map(
417
existing_count = len(self.repo.all_revision_ids())
418
if existing_count < known:
419
raise plugin_errors.BadRepositorySize(known, existing_count)
422
def _save_id_map(self):
423
"""Save the id-map."""
424
# Save the whole lot every time. If this proves a problem, we can
425
# change to 'append just the new ones' at a later time.
426
idmapfile.save_id_map(self.id_map_path, self.cache_mgr.revision_ids)
428
def blob_handler(self, cmd):
429
"""Process a BlobCommand."""
430
if cmd.mark is not None:
433
dataref = osutils.sha_strings(cmd.data)
434
self.cache_mgr.store_blob(dataref, cmd.data)
436
def checkpoint_handler(self, cmd):
437
"""Process a CheckpointCommand."""
438
# Commit the current write group and start a new one
439
self.repo.commit_write_group()
441
self.repo.start_write_group()
443
def commit_handler(self, cmd):
444
"""Process a CommitCommand."""
445
if self.skip_total and self._revision_count < self.skip_total:
446
_track_heads(cmd, self.cache_mgr)
447
# Check that we really do know about this commit-id
448
if not self.cache_mgr.revision_ids.has_key(cmd.id):
449
raise plugin_errors.BadRestart(cmd.id)
450
# Consume the file commands and free any non-sticky blobs
451
for fc in cmd.file_iter():
453
self.cache_mgr._blobs = {}
454
self._revision_count += 1
455
# If we're finished getting back to where we were,
456
# load the file-ids cache
457
if self._revision_count == self.skip_total:
458
self._gen_file_ids_cache()
459
self.note("Generated the file-ids cache - %d entries",
460
len(self.cache_mgr.file_ids.keys()))
462
if self.first_incremental_commit:
463
self.first_incremental_commit = None
464
parents = _track_heads(cmd, self.cache_mgr)
465
self._gen_file_ids_cache(parents)
467
# 'Commit' the revision and report progress
468
handler = GenericCommitHandler(cmd, self.repo, self.cache_mgr,
469
self.loader, self.verbose, self._experimental)
471
self.cache_mgr.revision_ids[cmd.id] = handler.revision_id
472
self._revision_count += 1
473
self.report_progress("(%s)" % cmd.id)
475
# Check if we should finish up or automatically checkpoint
476
if (self.max_commits is not None and
477
self._revision_count >= self.max_commits):
478
self.note("Stopping after reaching requested count of commits")
480
elif self._revision_count % self.checkpoint_every == 0:
481
self.note("%d commits - automatic checkpoint triggered",
482
self._revision_count)
483
self.checkpoint_handler(None)
485
def _gen_file_ids_cache(self, revs=False):
486
"""Generate the file-id cache by searching repository inventories.
488
# Get the interesting revisions - the heads
492
head_ids = self.cache_mgr.heads.keys()
493
revision_ids = [self.cache_mgr.revision_ids[h] for h in head_ids]
495
# Update the fileid cache
497
for revision_id in revision_ids:
498
inv = self.repo.revision_tree(revision_id).inventory
499
# Cache the inventories while we're at it
500
self.cache_mgr.inventories[revision_id] = inv
501
for path, ie in inv.iter_entries():
502
file_ids[path] = ie.file_id
503
self.cache_mgr.file_ids = file_ids
505
def report_progress(self, details=''):
506
# TODO: use a progress bar with ETA enabled
507
if self._revision_count % self.progress_every == 0:
508
if self.total_commits is not None:
509
counts = "%d/%d" % (self._revision_count, self.total_commits)
510
eta = progress.get_eta(self._start_time, self._revision_count,
512
eta_str = progress.str_tdelta(eta)
513
if eta_str.endswith('--'):
516
eta_str = '[%s] ' % eta_str
518
counts = "%d" % (self._revision_count,)
520
self.note("%s commits processed %s%s" % (counts, eta_str, details))
522
def progress_handler(self, cmd):
523
"""Process a ProgressCommand."""
524
# We could use a progress bar here instead
525
self.note("progress %s" % (cmd.message,))
527
def reset_handler(self, cmd):
528
"""Process a ResetCommand."""
529
if cmd.ref.startswith('refs/tags/'):
530
tag_name = cmd.ref[len('refs/tags/'):]
531
if cmd.from_ is not None:
532
self._set_tag(tag_name, cmd.from_)
534
self.warning("ignoring reset refs/tags/%s - no from clause"
538
# FIXME: cmd.from_ is a committish and thus could reference
539
# another branch. Create a method for resolving commitish's.
540
if cmd.from_ is not None:
541
self.cache_mgr.track_heads_for_ref(cmd.ref, cmd.from_)
542
# Why is this required now vs at the end?
543
#updater = GenericBranchUpdater(self.repo, self.branch, self.cache_mgr,
544
# helpers.invert_dictset(self.cache_mgr.heads),
545
# self.cache_mgr.last_ref, self.tags)
548
def tag_handler(self, cmd):
549
"""Process a TagCommand."""
550
if cmd.from_ is not None:
551
self._set_tag(cmd.id, cmd.from_)
553
self.warning("ignoring tag %s - no from clause" % cmd.id)
555
def _set_tag(self, name, from_):
556
"""Define a tag given a name and import 'from' reference."""
557
bzr_tag_name = name.decode('utf-8', 'replace')
558
bzr_rev_id = self.cache_mgr.revision_ids[from_]
559
self.tags[bzr_tag_name] = bzr_rev_id
562
class GenericCacheManager(object):
563
"""A manager of caches for the GenericProcessor."""
565
def __init__(self, info, verbose=False, inventory_cache_size=10):
566
"""Create a manager of caches.
568
:param info: a ConfigObj holding the output from
569
the --info processor, or None if no hints are available
571
self.verbose = verbose
573
# dataref -> data. datref is either :mark or the sha-1.
574
# Sticky blobs aren't removed after being referenced.
576
self._sticky_blobs = {}
578
# revision-id -> Inventory cache
579
# these are large and we probably don't need too many as
580
# most parents are recent in history
581
self.inventories = lru_cache.LRUCache(inventory_cache_size)
583
# import commmit-ids -> revision-id lookup table
584
# we need to keep all of these but they are small
585
self.revision_ids = {}
587
# path -> file-ids - as generated
590
# Head tracking: last ref, last id per ref & map of commit ids to ref*s*
595
# Work out the blobs to make sticky - None means all
596
self._blobs_to_keep = None
599
self._blobs_to_keep = info['Blob usage tracking']['multi']
601
# info not in file - possible when no blobs used
604
def store_blob(self, id, data):
605
"""Store a blob of data."""
606
if (self._blobs_to_keep is None or data == '' or
607
id in self._blobs_to_keep):
608
self._sticky_blobs[id] = data
610
self._blobs[id] = data
612
def fetch_blob(self, id):
613
"""Fetch a blob of data."""
615
return self._sticky_blobs[id]
617
return self._blobs.pop(id)
619
def _delete_path(self, path):
620
"""Remove a path from caches."""
621
# we actually want to remember what file-id we gave a path,
622
# even when that file is deleted, so doing nothing is correct
625
def _rename_path(self, old_path, new_path):
626
"""Rename a path in the caches."""
627
# In this case, we need to forget the file-id we gave a path,
628
# otherwise, we'll get duplicate file-ids in the repository.
629
self.file_ids[new_path] = self.file_ids[old_path]
630
del self.file_ids[old_path]
632
def track_heads_for_ref(self, cmd_ref, cmd_id, parents=None):
633
if parents is not None:
634
for parent in parents:
635
refs = self.heads.get(parent)
637
refs.discard(cmd_ref)
639
del self.heads[parent]
640
self.heads.setdefault(cmd_id, set()).add(cmd_ref)
641
self.last_ids[cmd_ref] = cmd_id
642
self.last_ref = cmd_ref
645
def _track_heads(cmd, cache_mgr):
646
"""Track the repository heads given a CommitCommand.
648
:return: the list of parents in terms of commit-ids
650
# Get the true set of parents
651
if cmd.from_ is not None:
652
parents = [cmd.from_]
654
last_id = cache_mgr.last_ids.get(cmd.ref)
655
if last_id is not None:
659
parents.extend(cmd.merges)
662
cache_mgr.track_heads_for_ref(cmd.ref, cmd.id, parents)
666
class GenericCommitHandler(processor.CommitHandler):
668
def __init__(self, command, repo, cache_mgr, loader, verbose=False,
669
_experimental=False):
670
processor.CommitHandler.__init__(self, command)
672
self.cache_mgr = cache_mgr
674
self.verbose = verbose
675
self._experimental = _experimental
677
def note(self, msg, *args):
678
"""Output a note but add context."""
679
msg = "%s (%s)" % (msg, self.command.id)
682
def warning(self, msg, *args):
683
"""Output a warning but add context."""
684
msg = "WARNING: %s (%s)" % (msg, self.command.id)
687
def debug(self, msg, *args):
688
"""Output a mutter if the appropriate -D option was given."""
689
if "fast-import" in debug.debug_flags:
690
msg = "%s (%s)" % (msg, self.command.id)
693
def pre_process_files(self):
694
"""Prepare for committing."""
695
self.revision_id = self.gen_revision_id()
696
# cache of texts for this commit, indexed by file-id
697
self.lines_for_commit = {}
698
if self.repo.supports_rich_root():
699
self.lines_for_commit[inventory.ROOT_ID] = []
701
# Track the heads and get the real parent list
702
parents = _track_heads(self.command, self.cache_mgr)
704
# Convert the parent commit-ids to bzr revision-ids
706
self.parents = [self.cache_mgr.revision_ids[p]
710
self.debug("%s id: %s, parents: %s", self.command.id,
711
self.revision_id, str(self.parents))
713
# Seed the inventory from the previous one
714
if len(self.parents) == 0:
715
self.inventory = self.gen_initial_inventory()
717
# use the bzr_revision_id to lookup the inv cache
718
inv = self.get_inventory(self.parents[0])
719
# TODO: Shallow copy - deep inventory copying is expensive
720
self.inventory = inv.copy()
721
if self.repo.supports_rich_root():
722
self.inventory.revision_id = self.revision_id
724
# In this repository, root entries have no knit or weave. When
725
# serializing out to disk and back in, root.revision is always
726
# the new revision_id.
727
self.inventory.root.revision = self.revision_id
729
# directory-path -> inventory-entry for current inventory
730
self.directory_entries = dict(self.inventory.directories())
732
def post_process_files(self):
733
"""Save the revision."""
734
self.cache_mgr.inventories[self.revision_id] = self.inventory
736
# Load the revision into the repository
738
committer = self.command.committer
739
who = "%s <%s>" % (committer[0],committer[1])
740
author = self.command.author
741
if author is not None:
742
author_id = "%s <%s>" % (author[0],author[1])
744
rev_props['author'] = author_id
745
rev = revision.Revision(
746
timestamp=committer[2],
747
timezone=committer[3],
749
message=self._escape_commit_message(self.command.message),
750
revision_id=self.revision_id,
751
properties=rev_props,
752
parent_ids=self.parents)
753
self.loader.load(rev, self.inventory, None,
754
lambda file_id: self._get_lines(file_id),
755
lambda revision_ids: self._get_inventories(revision_ids))
757
def _escape_commit_message(self, message):
758
"""Replace xml-incompatible control characters."""
759
# It's crap that we need to do this at this level (but we do)
760
# Code copied from bzrlib.commit.
762
# Python strings can include characters that can't be
763
# represented in well-formed XML; escape characters that
764
# aren't listed in the XML specification
765
# (http://www.w3.org/TR/REC-xml/#NT-Char).
766
message, _ = re.subn(
767
u'[^\x09\x0A\x0D\u0020-\uD7FF\uE000-\uFFFD]+',
768
lambda match: match.group(0).encode('unicode_escape'),
772
def modify_handler(self, filecmd):
773
if filecmd.dataref is not None:
774
data = self.cache_mgr.fetch_blob(filecmd.dataref)
777
self.debug("modifying %s", filecmd.path)
778
self._modify_inventory(filecmd.path, filecmd.kind,
779
filecmd.is_executable, data)
781
def _delete_recursive(self, path):
782
self.debug("deleting %s", path)
783
fileid = self.bzr_file_id(path)
784
dirname, basename = osutils.split(path)
785
if (fileid in self.inventory and
786
isinstance(self.inventory[fileid], inventory.InventoryDirectory)):
787
for child_path in self.inventory[fileid].children.keys():
788
self._delete_recursive(os.utils.pathjoin(path, child_path))
790
if self.inventory.id2path(fileid) == path:
791
del self.inventory[fileid]
793
# already added by some other name?
794
if dirname in self.cache_mgr.file_ids:
795
parent_id = self.cache_mgr.file_ids[dirname]
796
del self.inventory[parent_id].children[basename]
798
self._warn_unless_in_merges(fileid, path)
799
except errors.NoSuchId:
800
self._warn_unless_in_merges(fileid, path)
801
except AttributeError, ex:
802
if ex.args[0] == 'children':
803
# A directory has changed into a file and then one
804
# of it's children is being deleted!
805
self._warn_unless_in_merges(fileid, path)
809
self.cache_mgr._delete_path(path)
813
def delete_handler(self, filecmd):
814
self._delete_recursive(filecmd.path)
816
def _warn_unless_in_merges(self, fileid, path):
817
if len(self.parents) <= 1:
819
for parent in self.parents[1:]:
820
if fileid in self.get_inventory(parent):
822
self.warning("ignoring delete of %s as not in parent inventories", path)
824
def copy_handler(self, filecmd):
825
raise NotImplementedError(self.copy_handler)
827
def rename_handler(self, filecmd):
828
old_path = filecmd.old_path
829
new_path = filecmd.new_path
830
self.debug("renaming %s to %s", old_path, new_path)
831
file_id = self.bzr_file_id(old_path)
832
basename, new_parent_ie = self._ensure_directory(new_path)
833
new_parent_id = new_parent_ie.file_id
834
existing_id = self.inventory.path2id(new_path)
835
if existing_id is not None:
836
self.inventory.remove_recursive_id(existing_id)
837
ie = self.inventory[file_id]
838
lines = self.loader._get_lines(file_id, ie.revision)
839
self.lines_for_commit[file_id] = lines
840
self.inventory.rename(file_id, new_parent_id, basename)
841
self.cache_mgr._rename_path(old_path, new_path)
842
self.inventory[file_id].revision = self.revision_id
844
def deleteall_handler(self, filecmd):
845
self.debug("deleting all files (and also all directories)")
846
# Would be nice to have an inventory.clear() method here
847
root_items = [ie for (name, ie) in
848
self.inventory.root.children.iteritems()]
849
for root_item in root_items:
850
self.inventory.remove_recursive_id(root_item.file_id)
852
def bzr_file_id_and_new(self, path):
853
"""Get a Bazaar file identifier and new flag for a path.
855
:return: file_id, is_new where
856
is_new = True if the file_id is newly created
859
id = self.cache_mgr.file_ids[path]
862
id = generate_ids.gen_file_id(path)
863
self.cache_mgr.file_ids[path] = id
864
self.debug("Generated new file id %s for '%s'", id, path)
867
def bzr_file_id(self, path):
868
"""Get a Bazaar file identifier for a path."""
869
return self.bzr_file_id_and_new(path)[0]
871
def gen_initial_inventory(self):
872
"""Generate an inventory for a parentless revision."""
873
inv = inventory.Inventory(revision_id=self.revision_id)
874
if self.repo.supports_rich_root():
875
# The very first root needs to have the right revision
876
inv.root.revision = self.revision_id
879
def gen_revision_id(self):
880
"""Generate a revision id.
882
Subclasses may override this to produce deterministic ids say.
884
committer = self.command.committer
885
# Perhaps 'who' being the person running the import is ok? If so,
886
# it might be a bit quicker and give slightly better compression?
887
who = "%s <%s>" % (committer[0],committer[1])
888
timestamp = committer[2]
889
return generate_ids.gen_revision_id(who, timestamp)
891
def get_inventory(self, revision_id):
892
"""Get the inventory for a revision id."""
894
inv = self.cache_mgr.inventories[revision_id]
897
self.note("get_inventory cache miss for %s", revision_id)
898
# Not cached so reconstruct from repository
899
inv = self.repo.revision_tree(revision_id).inventory
900
self.cache_mgr.inventories[revision_id] = inv
903
def _get_inventories(self, revision_ids):
904
"""Get the inventories for revision-ids.
906
This is a callback used by the RepositoryLoader to
907
speed up inventory reconstruction.
911
# If an inventory is in the cache, we assume it was
912
# successfully loaded into the repsoitory
913
for revision_id in revision_ids:
915
inv = self.cache_mgr.inventories[revision_id]
916
present.append(revision_id)
919
self.note("get_inventories cache miss for %s", revision_id)
920
# Not cached so reconstruct from repository
921
if self.repo.has_revision(revision_id):
922
rev_tree = self.repo.revision_tree(revision_id)
923
present.append(revision_id)
925
rev_tree = self.repo.revision_tree(None)
926
inv = rev_tree.inventory
927
self.cache_mgr.inventories[revision_id] = inv
928
inventories.append(inv)
929
return present, inventories
931
def _get_lines(self, file_id):
932
"""Get the lines for a file-id."""
933
return self.lines_for_commit[file_id]
935
def _modify_inventory(self, path, kind, is_executable, data):
936
"""Add to or change an item in the inventory."""
937
# Create the new InventoryEntry
938
basename, parent_ie = self._ensure_directory(path)
939
file_id = self.bzr_file_id(path)
940
ie = inventory.make_entry(kind, basename, parent_ie.file_id, file_id)
941
ie.revision = self.revision_id
942
if isinstance(ie, inventory.InventoryFile):
943
ie.executable = is_executable
944
lines = osutils.split_lines(data)
945
ie.text_sha1 = osutils.sha_strings(lines)
946
ie.text_size = sum(map(len, lines))
947
self.lines_for_commit[file_id] = lines
948
elif isinstance(ie, inventory.InventoryLink):
949
ie.symlink_target = data.encode('utf8')
950
# There are no lines stored for a symlink so
951
# make sure the cache used by get_lines knows that
952
self.lines_for_commit[file_id] = []
954
raise errors.BzrError("Cannot import items of kind '%s' yet" %
957
# Record this new inventory entry
958
if file_id in self.inventory:
959
# HACK: no API for this (del+add does more than it needs to)
960
self.inventory._byid[file_id] = ie
961
parent_ie.children[basename] = ie
963
self.inventory.add(ie)
965
def _ensure_directory(self, path):
966
"""Ensure that the containing directory exists for 'path'"""
967
dirname, basename = osutils.split(path)
969
# the root node doesn't get updated
970
return basename, self.inventory.root
972
ie = self.directory_entries[dirname]
974
# We will create this entry, since it doesn't exist
979
# No directory existed, we will just create one, first, make sure
981
dir_basename, parent_ie = self._ensure_directory(dirname)
982
dir_file_id = self.bzr_file_id(dirname)
983
ie = inventory.entry_factory['directory'](dir_file_id,
986
ie.revision = self.revision_id
987
self.directory_entries[dirname] = ie
988
# There are no lines stored for a directory so
989
# make sure the cache used by get_lines knows that
990
self.lines_for_commit[dir_file_id] = []
991
#print "adding dir for %s" % path
992
self.inventory.add(ie)
996
class GenericBranchUpdater(object):
998
def __init__(self, repo, branch, cache_mgr, heads_by_ref, last_ref, tags):
999
"""Create an object responsible for updating branches.
1001
:param heads_by_ref: a dictionary where
1002
names are git-style references like refs/heads/master;
1003
values are one item lists of commits marks.
1006
self.branch = branch
1007
self.cache_mgr = cache_mgr
1008
self.heads_by_ref = heads_by_ref
1009
self.last_ref = last_ref
1013
"""Update the Bazaar branches and tips matching the heads.
1015
If the repository is shared, this routine creates branches
1016
as required. If it isn't, warnings are produced about the
1017
lost of information.
1019
:return: updated, lost_heads where
1020
updated = the list of branches updated
1021
lost_heads = a list of (bazaar-name,revision) for branches that
1022
would have been created had the repository been shared
1025
branch_tips, lost_heads = self._get_matching_branches()
1026
for br, tip in branch_tips:
1027
if self._update_branch(br, tip):
1029
return updated, lost_heads
1031
def _get_matching_branches(self):
1032
"""Get the Bazaar branches.
1034
:return: default_tip, branch_tips, lost_heads where
1035
default_tip = the last commit mark for the default branch
1036
branch_tips = a list of (branch,tip) tuples for other branches.
1037
lost_heads = a list of (bazaar-name,revision) for branches that
1038
would have been created had the repository been shared and
1039
everything succeeded
1043
ref_names = self.heads_by_ref.keys()
1044
if self.branch is not None:
1045
trunk = self.select_trunk(ref_names)
1046
default_tip = self.heads_by_ref[trunk][0]
1047
branch_tips.append((self.branch, default_tip))
1048
ref_names.remove(trunk)
1050
# Convert the reference names into Bazaar speak
1051
bzr_names = self._get_bzr_names_from_ref_names(ref_names)
1053
# Policy for locating branches
1054
def dir_under_current(name, ref_name):
1055
# Using the Bazaar name, get a directory under the current one
1057
def dir_sister_branch(name, ref_name):
1058
# Using the Bazaar name, get a sister directory to the branch
1059
return osutils.pathjoin(self.branch.base, "..", name)
1060
if self.branch is not None:
1061
dir_policy = dir_sister_branch
1063
dir_policy = dir_under_current
1065
# Create/track missing branches
1066
shared_repo = self.repo.is_shared()
1067
for name in sorted(bzr_names.keys()):
1068
ref_name = bzr_names[name]
1069
tip = self.heads_by_ref[ref_name][0]
1071
location = dir_policy(name, ref_name)
1073
br = self.make_branch(location)
1074
branch_tips.append((br,tip))
1076
except errors.BzrError, ex:
1077
error("ERROR: failed to create branch %s: %s",
1079
lost_head = self.cache_mgr.revision_ids[tip]
1080
lost_info = (name, lost_head)
1081
lost_heads.append(lost_info)
1082
return branch_tips, lost_heads
1084
def select_trunk(self, ref_names):
1085
"""Given a set of ref names, choose one as the trunk."""
1086
for candidate in ['refs/heads/master']:
1087
if candidate in ref_names:
1089
# Use the last reference in the import stream
1090
return self.last_ref
1092
def make_branch(self, location):
1093
"""Make a branch in the repository if not already there."""
1095
return bzrdir.BzrDir.open(location).open_branch()
1096
except errors.NotBranchError, ex:
1097
return bzrdir.BzrDir.create_branch_convenience(location)
1099
def _get_bzr_names_from_ref_names(self, ref_names):
1100
"""Generate Bazaar branch names from import ref names.
1102
:return: a dictionary with Bazaar names as keys and
1103
the original reference names as values.
1106
for ref_name in sorted(ref_names):
1107
parts = ref_name.split('/')
1108
if parts[0] == 'refs':
1110
full_name = "--".join(parts)
1111
bazaar_name = parts[-1]
1112
if bazaar_name in bazaar_names:
1113
if parts[0] == 'remotes':
1114
bazaar_name += ".remote"
1116
bazaar_name = full_name
1117
bazaar_names[bazaar_name] = ref_name
1120
def _update_branch(self, br, last_mark):
1121
"""Update a branch with last revision and tag information.
1123
:return: whether the branch was changed or not
1125
last_rev_id = self.cache_mgr.revision_ids[last_mark]
1126
revs = list(self.repo.iter_reverse_revision_history(last_rev_id))
1128
existing_revno, existing_last_rev_id = br.last_revision_info()
1130
if revno != existing_revno or last_rev_id != existing_last_rev_id:
1131
br.set_last_revision_info(revno, last_rev_id)
1133
# apply tags known in this branch
1136
for tag,rev in self.tags.items():
1140
br.tags._set_tag_dict(my_tags)
1143
tagno = len(my_tags)
1144
note("\t branch %s now has %d %s and %d %s", br.nick,
1145
revno, helpers.single_plural(revno, "revision", "revisions"),
1146
tagno, helpers.single_plural(tagno, "tag", "tags"))