~jelmer/bzr-git/705807-dpush

252 by Jelmer Vernooij
Clarify history, copyright.
1
# Copyright (C) 2009 Jelmer Vernooij <jelmer@samba.org>
228 by Jelmer Vernooij
Split out map.
2
#
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
7
#
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
12
#
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
16
17
"""Map from Git sha's to Bazaar objects."""
18
260 by Jelmer Vernooij
Add DictGitShaMap, useful for testing.
19
from dulwich.objects import (
20
    Blob,
1029 by Jelmer Vernooij
Use dictionary with verifiers rather than requiring testament3-sha1 everywhere.
21
    Commit,
864 by Jelmer Vernooij
Cope with the first commit being pointless.
22
    Tree,
586 by Jelmer Vernooij
Fix issues pointed out by pyflakes.
23
    sha_to_hex,
1153 by Jelmer Vernooij
Import ZERO_SHA from dulwich.objects.
24
    ZERO_SHA,
260 by Jelmer Vernooij
Add DictGitShaMap, useful for testing.
25
    )
437 by Jelmer Vernooij
Implement BazaarObjectStore.__contains__, BazaarObjectStore.iter_shas, BazaarObjectStore.get_parents.
26
from dulwich.object_store import (
457 by Jelmer Vernooij
Use BaseObjectStore.
27
    BaseObjectStore,
437 by Jelmer Vernooij
Implement BazaarObjectStore.__contains__, BazaarObjectStore.iter_shas, BazaarObjectStore.get_parents.
28
    )
249 by Jelmer Vernooij
Implement Tree.
29
260 by Jelmer Vernooij
Add DictGitShaMap, useful for testing.
30
from bzrlib import (
324.1.1 by Jelmer Vernooij
Check that regenerated objects have the expected sha1.
31
    errors,
789 by Jelmer Vernooij
Cope with ghosts, cache inventories.
32
    lru_cache,
478 by Jelmer Vernooij
Cope with disappeared revisions.
33
    trace,
260 by Jelmer Vernooij
Add DictGitShaMap, useful for testing.
34
    ui,
773 by Jelmer Vernooij
Implement inventory_to_objects
35
    urlutils,
260 by Jelmer Vernooij
Add DictGitShaMap, useful for testing.
36
    )
541 by Jelmer Vernooij
Cope with NULL_REVISION.
37
from bzrlib.revision import (
38
    NULL_REVISION,
39
    )
1023 by Jelmer Vernooij
Set and verify testament.
40
from bzrlib.testament import(
41
    StrictTestament3,
42
    )
228 by Jelmer Vernooij
Split out map.
43
229 by Jelmer Vernooij
More work on converter.
44
from bzrlib.plugins.git.mapping import (
463 by Jelmer Vernooij
Support remote dpush (except for references).
45
    default_mapping,
359 by Jelmer Vernooij
Simplify file mode handling, avoid inventory_to_tree_and_blobs as it is expensive if trees/blobs have already been converted.
46
    directory_to_tree,
548 by Jelmer Vernooij
Extract unusual file modes from revision when reconstructing Trees.
47
    extract_unusual_modes,
324.1.1 by Jelmer Vernooij
Check that regenerated objects have the expected sha1.
48
    mapping_registry,
795 by Jelmer Vernooij
simplify sha extraction for blobs, process multiple blobs at once.
49
    symlink_to_blob,
229 by Jelmer Vernooij
More work on converter.
50
    )
938 by Jelmer Vernooij
Rename shamap to cache, as it can also do content caching now.
51
from bzrlib.plugins.git.cache import (
842 by Jelmer Vernooij
Allow content cache to be provided.
52
    from_repository as cache_from_repository,
231 by Jelmer Vernooij
Partially fix pull.
53
    )
54
878 by Jelmer Vernooij
Fix determining of unusual file modes.
55
import posixpath
900.1.23 by Jelmer Vernooij
More work on roundtripping support.
56
import stat
878 by Jelmer Vernooij
Fix determining of unusual file modes.
57
228 by Jelmer Vernooij
Split out map.
58
452 by Jelmer Vernooij
Rename converter -> object_store, provide utility function for getting ObjectStore's.
59
def get_object_store(repo, mapping=None):
60
    git = getattr(repo, "_git", None)
61
    if git is not None:
62
        return git.object_store
63
    return BazaarObjectStore(repo, mapping)
64
65
852 by Jelmer Vernooij
Cache trees rather than inventories.
66
MAX_TREE_CACHE_SIZE = 50 * 1024 * 1024
67
68
69
class LRUTreeCache(object):
789 by Jelmer Vernooij
Cope with ghosts, cache inventories.
70
71
    def __init__(self, repository):
852 by Jelmer Vernooij
Cache trees rather than inventories.
72
        def approx_tree_size(tree):
789 by Jelmer Vernooij
Cope with ghosts, cache inventories.
73
            # Very rough estimate, 1k per inventory entry
852 by Jelmer Vernooij
Cache trees rather than inventories.
74
            return len(tree.inventory) * 1024
789 by Jelmer Vernooij
Cope with ghosts, cache inventories.
75
        self.repository = repository
852 by Jelmer Vernooij
Cache trees rather than inventories.
76
        self._cache = lru_cache.LRUSizeCache(max_size=MAX_TREE_CACHE_SIZE,
77
            after_cleanup_size=None, compute_size=approx_tree_size)
789 by Jelmer Vernooij
Cope with ghosts, cache inventories.
78
963 by Jelmer Vernooij
Add some tests for LRUTreeCache.
79
    def revision_tree(self, revid):
789 by Jelmer Vernooij
Cope with ghosts, cache inventories.
80
        try:
989 by Jelmer Vernooij
Add asserts.
81
            tree = self._cache[revid]
789 by Jelmer Vernooij
Cope with ghosts, cache inventories.
82
        except KeyError:
852 by Jelmer Vernooij
Cache trees rather than inventories.
83
            tree = self.repository.revision_tree(revid)
84
            self.add(tree)
989 by Jelmer Vernooij
Add asserts.
85
        assert tree.get_revision_id() == tree.inventory.revision_id
86
        return tree
852 by Jelmer Vernooij
Cache trees rather than inventories.
87
88
    def iter_revision_trees(self, revids):
989 by Jelmer Vernooij
Add asserts.
89
        trees = {}
90
        todo = []
91
        for revid in revids:
92
            try:
93
                tree = self._cache[revid]
94
            except KeyError:
95
                todo.append(revid)
96
            else:
97
                assert tree.get_revision_id() == revid
98
                assert tree.inventory.revision_id == revid
99
                trees[revid] = tree
100
        for tree in self.repository.revision_trees(todo):
852 by Jelmer Vernooij
Cache trees rather than inventories.
101
            trees[tree.get_revision_id()] = tree
102
            self.add(tree)
103
        return (trees[r] for r in revids)
104
105
    def revision_trees(self, revids):
106
        return list(self.iter_revision_trees(revids))
107
108
    def add(self, tree):
109
        self._cache.add(tree.get_revision_id(), tree)
789 by Jelmer Vernooij
Cope with ghosts, cache inventories.
110
111
1053 by Jelmer Vernooij
Fix find_missing_bzr_revids.
112
def _find_missing_bzr_revids(graph, want, have):
900.1.5 by Jelmer Vernooij
enable 'bzr push'.
113
    """Find the revisions that have to be pushed.
114
115
    :param get_parent_map: Function that returns the parents for a sequence
116
        of revisions.
117
    :param want: Revisions the target wants
118
    :param have: Revisions the target already has
119
    :return: Set of revisions to fetch
120
    """
899 by Jelmer Vernooij
Add tests for find_missing_bzr_revids.
121
    todo = set()
1053 by Jelmer Vernooij
Fix find_missing_bzr_revids.
122
    for rev in want:
123
        todo.update(graph.find_unique_ancestors(rev, have))
899 by Jelmer Vernooij
Add tests for find_missing_bzr_revids.
124
    if NULL_REVISION in todo:
125
        todo.remove(NULL_REVISION)
126
    return todo
127
128
793 by Jelmer Vernooij
Make _check_expected_sha a global fn.
129
def _check_expected_sha(expected_sha, object):
797 by Jelmer Vernooij
Add docstring, fix formatting.
130
    """Check whether an object matches an expected SHA.
131
132
    :param expected_sha: None or expected SHA as either binary or as hex digest
133
    :param object: Object to verify
134
    """
793 by Jelmer Vernooij
Make _check_expected_sha a global fn.
135
    if expected_sha is None:
136
        return
137
    if len(expected_sha) == 40:
138
        if expected_sha != object.sha().hexdigest():
797 by Jelmer Vernooij
Add docstring, fix formatting.
139
            raise AssertionError("Invalid sha for %r: %s" % (object,
140
                expected_sha))
793 by Jelmer Vernooij
Make _check_expected_sha a global fn.
141
    elif len(expected_sha) == 20:
142
        if expected_sha != object.sha().digest():
797 by Jelmer Vernooij
Add docstring, fix formatting.
143
            raise AssertionError("Invalid sha for %r: %s" % (object,
144
                sha_to_hex(expected_sha)))
793 by Jelmer Vernooij
Make _check_expected_sha a global fn.
145
    else:
797 by Jelmer Vernooij
Add docstring, fix formatting.
146
        raise AssertionError("Unknown length %d for %r" % (len(expected_sha),
147
            expected_sha))
793 by Jelmer Vernooij
Make _check_expected_sha a global fn.
148
149
931 by Jelmer Vernooij
Update docstring, deal with kind changes appropriately in _tree_to_objects
150
def _tree_to_objects(tree, parent_trees, idmap, unusual_modes,
151
                     dummy_file_name=None):
798 by Jelmer Vernooij
Split out _inventory_to_objects into a function.
152
    """Iterate over the objects that were introduced in a revision.
153
841 by Jelmer Vernooij
Eliminate InventorySHAMap.
154
    :param idmap: id map
931 by Jelmer Vernooij
Update docstring, deal with kind changes appropriately in _tree_to_objects
155
    :param parent_trees: Parent revision trees
156
    :param unusual_modes: Unusual file modes dictionary
900.1.30 by Jelmer Vernooij
Support creating dummy files for empty directories.
157
    :param dummy_file_name: File name to use for dummy files
158
        in empty directories. None to skip empty directories
837 by Jelmer Vernooij
Return inventory entries when creating git objects for a revision.
159
    :return: Yields (path, object, ie) entries
798 by Jelmer Vernooij
Split out _inventory_to_objects into a function.
160
    """
161
    new_trees = {}
162
    new_blobs = []
163
    shamap = {}
859.1.1 by Jelmer Vernooij
Use iter_changes() rather than iterating over all contents of an inventory.
164
    try:
165
        base_tree = parent_trees[0]
166
        other_parent_trees = parent_trees[1:]
167
    except IndexError:
168
        base_tree = tree._repository.revision_tree(NULL_REVISION)
169
        other_parent_trees = []
868 by Jelmer Vernooij
Cope with no-change merges.
170
    def find_unchanged_parent_ie(ie, parent_trees):
171
        assert ie.kind in ("symlink", "file")
172
        for ptree in parent_trees:
173
            try:
174
                pie = ptree.inventory[ie.file_id]
175
            except errors.NoSuchId:
176
                pass
177
            else:
178
                if (pie.text_sha1 == ie.text_sha1 and 
179
                    pie.kind == ie.kind and
180
                    pie.symlink_target == ie.symlink_target):
181
                    return pie
182
        raise KeyError
965 by Jelmer Vernooij
Formatting fixes.
183
931 by Jelmer Vernooij
Update docstring, deal with kind changes appropriately in _tree_to_objects
184
    # Find all the changed blobs
859.1.1 by Jelmer Vernooij
Use iter_changes() rather than iterating over all contents of an inventory.
185
    for (file_id, path, changed_content, versioned, parent, name, kind,
186
         executable) in tree.iter_changes(base_tree):
187
        if kind[1] == "file":
188
            ie = tree.inventory[file_id]
189
            if changed_content:
868 by Jelmer Vernooij
Cope with no-change merges.
190
                try:
191
                    pie = find_unchanged_parent_ie(ie, other_parent_trees)
192
                except KeyError:
193
                    pass
194
                else:
900.1.40 by Jelmer Vernooij
Checks for roundtripping.
195
                    try:
196
                        shamap[ie.file_id] = idmap.lookup_blob_id(
197
                            pie.file_id, pie.revision)
198
                    except KeyError:
199
                        # no-change merge ?
200
                        blob = Blob()
201
                        blob.data = tree.get_file_text(ie.file_id)
202
                        shamap[ie.file_id] = blob.id
859.1.1 by Jelmer Vernooij
Use iter_changes() rather than iterating over all contents of an inventory.
203
            if not file_id in shamap:
204
                new_blobs.append((path[1], ie))
878 by Jelmer Vernooij
Fix determining of unusual file modes.
205
            new_trees[posixpath.dirname(path[1])] = parent[1]
859.1.1 by Jelmer Vernooij
Use iter_changes() rather than iterating over all contents of an inventory.
206
        elif kind[1] == "symlink":
207
            ie = tree.inventory[file_id]
208
            if changed_content:
209
                blob = symlink_to_blob(ie)
868 by Jelmer Vernooij
Cope with no-change merges.
210
                shamap[file_id] = blob.id
211
                try:
212
                    find_unchanged_parent_ie(ie, other_parent_trees)
213
                except KeyError:
214
                    yield path[1], blob, ie
878 by Jelmer Vernooij
Fix determining of unusual file modes.
215
            new_trees[posixpath.dirname(path[1])] = parent[1]
859.1.3 by Jelmer Vernooij
Simplify..
216
        elif kind[1] not in (None, "directory"):
859.1.1 by Jelmer Vernooij
Use iter_changes() rather than iterating over all contents of an inventory.
217
            raise AssertionError(kind[1])
1017 by Jelmer Vernooij
Cope with root moving.
218
        if (path[0] not in (None, "") and
219
            parent[0] in tree.inventory and
220
            tree.inventory[parent[0]].kind == "directory"):
931 by Jelmer Vernooij
Update docstring, deal with kind changes appropriately in _tree_to_objects
221
            # Removal
878 by Jelmer Vernooij
Fix determining of unusual file modes.
222
            new_trees[posixpath.dirname(path[0])] = parent[0]
798 by Jelmer Vernooij
Split out _inventory_to_objects into a function.
223
    
931 by Jelmer Vernooij
Update docstring, deal with kind changes appropriately in _tree_to_objects
224
    # Fetch contents of the blobs that were changed
859.1.2 by Jelmer Vernooij
Make it work for evolution.
225
    for (path, ie), chunks in tree.iter_files_bytes(
226
        [(ie.file_id, (path, ie)) for (path, ie) in new_blobs]):
798 by Jelmer Vernooij
Split out _inventory_to_objects into a function.
227
        obj = Blob()
851 by Jelmer Vernooij
Use blob.chunked.
228
        obj.chunked = chunks
837 by Jelmer Vernooij
Return inventory entries when creating git objects for a revision.
229
        yield path, obj, ie
230
        shamap[ie.file_id] = obj.id
798 by Jelmer Vernooij
Split out _inventory_to_objects into a function.
231
879 by Jelmer Vernooij
Fix unusual modes.
232
    for path in unusual_modes:
233
        parent_path = posixpath.dirname(path)
234
        new_trees[parent_path] = tree.path2id(parent_path)
989 by Jelmer Vernooij
Add asserts.
235
798 by Jelmer Vernooij
Split out _inventory_to_objects into a function.
236
    trees = {}
237
    while new_trees:
238
        items = new_trees.items()
239
        new_trees = {}
240
        for path, file_id in items:
931 by Jelmer Vernooij
Update docstring, deal with kind changes appropriately in _tree_to_objects
241
            parent_id = tree.inventory[file_id].parent_id
798 by Jelmer Vernooij
Split out _inventory_to_objects into a function.
242
            if parent_id is not None:
243
                parent_path = urlutils.dirname(path)
244
                new_trees[parent_path] = parent_id
245
            trees[path] = file_id
246
808 by Jelmer Vernooij
Avoid recalculating tree shas we already have.
247
    def ie_to_hexsha(ie):
248
        try:
249
            return shamap[ie.file_id]
250
        except KeyError:
884 by Jelmer Vernooij
Cope with -0000 as timezone in Git commits.
251
            # FIXME: Should be the same as in parent
859.1.1 by Jelmer Vernooij
Use iter_changes() rather than iterating over all contents of an inventory.
252
            if ie.kind in ("file", "symlink"):
868 by Jelmer Vernooij
Cope with no-change merges.
253
                try:
254
                    return idmap.lookup_blob_id(ie.file_id, ie.revision)
255
                except KeyError:
256
                    # no-change merge ?
257
                    blob = Blob()
258
                    blob.data = tree.get_file_text(ie.file_id)
259
                    return blob.id
859.1.1 by Jelmer Vernooij
Use iter_changes() rather than iterating over all contents of an inventory.
260
            elif ie.kind == "directory":
261
                # Not all cache backends store the tree information, 
262
                # calculate again from scratch
900.1.30 by Jelmer Vernooij
Support creating dummy files for empty directories.
263
                ret = directory_to_tree(ie, ie_to_hexsha, unusual_modes,
264
                    dummy_file_name)
859.1.1 by Jelmer Vernooij
Use iter_changes() rather than iterating over all contents of an inventory.
265
                if ret is None:
266
                    return ret
267
                return ret.id
268
            else:
269
                raise AssertionError
808 by Jelmer Vernooij
Avoid recalculating tree shas we already have.
270
798 by Jelmer Vernooij
Split out _inventory_to_objects into a function.
271
    for path in sorted(trees.keys(), reverse=True):
859.1.1 by Jelmer Vernooij
Use iter_changes() rather than iterating over all contents of an inventory.
272
        ie = tree.inventory[trees[path]]
798 by Jelmer Vernooij
Split out _inventory_to_objects into a function.
273
        assert ie.kind == "directory"
900.1.30 by Jelmer Vernooij
Support creating dummy files for empty directories.
274
        obj = directory_to_tree(ie, ie_to_hexsha, unusual_modes,
275
            dummy_file_name)
798 by Jelmer Vernooij
Split out _inventory_to_objects into a function.
276
        if obj is not None:
837 by Jelmer Vernooij
Return inventory entries when creating git objects for a revision.
277
            yield path, obj, ie
798 by Jelmer Vernooij
Split out _inventory_to_objects into a function.
278
            shamap[ie.file_id] = obj.id
279
280
457 by Jelmer Vernooij
Use BaseObjectStore.
281
class BazaarObjectStore(BaseObjectStore):
320 by Jelmer Vernooij
Handle lightweight checkouts.
282
    """A Git-style object store backed onto a Bazaar repository."""
228 by Jelmer Vernooij
Split out map.
283
284
    def __init__(self, repository, mapping=None):
285
        self.repository = repository
286
        if mapping is None:
463 by Jelmer Vernooij
Support remote dpush (except for references).
287
            self.mapping = default_mapping
228 by Jelmer Vernooij
Split out map.
288
        else:
289
            self.mapping = mapping
847 by Jelmer Vernooij
Add BzrGitCache object.
290
        self._cache = cache_from_repository(repository)
677.1.40 by Jelmer Vernooij
Add content cache infrastructure.
291
        self._content_cache_types = ("tree")
847 by Jelmer Vernooij
Add BzrGitCache object.
292
        self.start_write_group = self._cache.idmap.start_write_group
293
        self.abort_write_group = self._cache.idmap.abort_write_group
294
        self.commit_write_group = self._cache.idmap.commit_write_group
852 by Jelmer Vernooij
Cache trees rather than inventories.
295
        self.tree_cache = LRUTreeCache(self.repository)
228 by Jelmer Vernooij
Split out map.
296
437 by Jelmer Vernooij
Implement BazaarObjectStore.__contains__, BazaarObjectStore.iter_shas, BazaarObjectStore.get_parents.
297
    def _update_sha_map(self, stop_revision=None):
683 by Jelmer Vernooij
Lazier checking of which revisions need to be fetched.
298
        graph = self.repository.get_graph()
437 by Jelmer Vernooij
Implement BazaarObjectStore.__contains__, BazaarObjectStore.iter_shas, BazaarObjectStore.get_parents.
299
        if stop_revision is None:
683 by Jelmer Vernooij
Lazier checking of which revisions need to be fetched.
300
            heads = graph.heads(self.repository.all_revision_ids())
437 by Jelmer Vernooij
Implement BazaarObjectStore.__contains__, BazaarObjectStore.iter_shas, BazaarObjectStore.get_parents.
301
        else:
683 by Jelmer Vernooij
Lazier checking of which revisions need to be fetched.
302
            heads = set([stop_revision])
847 by Jelmer Vernooij
Add BzrGitCache object.
303
        missing_revids = self._cache.idmap.missing_revisions(heads)
683 by Jelmer Vernooij
Lazier checking of which revisions need to be fetched.
304
        while heads:
305
            parents = graph.get_parent_map(heads)
306
            todo = set()
307
            for p in parents.values():
684 by Jelmer Vernooij
Properly close write groups.
308
                todo.update([x for x in p if x not in missing_revids])
847 by Jelmer Vernooij
Add BzrGitCache object.
309
            heads = self._cache.idmap.missing_revisions(todo)
684 by Jelmer Vernooij
Properly close write groups.
310
            missing_revids.update(heads)
694 by Jelmer Vernooij
Avoid processing NULL_REVISION.
311
        if NULL_REVISION in missing_revids:
312
            missing_revids.remove(NULL_REVISION)
677.1.16 by Jelmer Vernooij
Add optimization preventing recursive index updating.
313
        missing_revids = self.repository.has_revisions(missing_revids)
314
        if not missing_revids:
315
            return
735 by Jelmer Vernooij
Use convenience functions for start/stop write groups.
316
        self.start_write_group()
231 by Jelmer Vernooij
Partially fix pull.
317
        try:
677.1.4 by Jelmer Vernooij
Merge trunk.
318
            pb = ui.ui_factory.nested_progress_bar()
319
            try:
320
                for i, revid in enumerate(graph.iter_topo_order(missing_revids)):
677.1.16 by Jelmer Vernooij
Add optimization preventing recursive index updating.
321
                    trace.mutter('processing %r', revid)
677.1.4 by Jelmer Vernooij
Merge trunk.
322
                    pb.update("updating git map", i, len(missing_revids))
323
                    self._update_sha_map_revision(revid)
324
            finally:
325
                pb.finished()
735 by Jelmer Vernooij
Use convenience functions for start/stop write groups.
326
        except:
327
            self.abort_write_group()
328
            raise
329
        else:
330
            self.commit_write_group()
229 by Jelmer Vernooij
More work on converter.
331
422 by Jelmer Vernooij
'bzr git-object' without arguments now prints the available git objects.
332
    def __iter__(self):
333
        self._update_sha_map()
847 by Jelmer Vernooij
Add BzrGitCache object.
334
        return iter(self._cache.idmap.sha1s())
422 by Jelmer Vernooij
'bzr git-object' without arguments now prints the available git objects.
335
1029 by Jelmer Vernooij
Use dictionary with verifiers rather than requiring testament3-sha1 everywhere.
336
    def _reconstruct_commit(self, rev, tree_sha, roundtrip, verifiers):
337
        """Reconstruct a Commit object.
338
339
        :param rev: Revision object
340
        :param tree_sha: SHA1 of the root tree object
341
        :param roundtrip: Whether or not to roundtrip bzr metadata
342
        :param verifiers: Verifiers for the commits
343
        :return: Commit object
344
        """
527.1.7 by Jelmer Vernooij
Cope with ghosts a bit better.
345
        def parent_lookup(revid):
346
            try:
347
                return self._lookup_revision_sha1(revid)
348
            except errors.NoSuchRevision:
349
                return None
900.1.4 by Jelmer Vernooij
More work on roundtripping.
350
        return self.mapping.export_commit(rev, tree_sha, parent_lookup,
1029 by Jelmer Vernooij
Use dictionary with verifiers rather than requiring testament3-sha1 everywhere.
351
            roundtrip, verifiers)
527.1.7 by Jelmer Vernooij
Cope with ghosts a bit better.
352
900.1.49 by Jelmer Vernooij
Avoid trying to set HEAD for remote branches.
353
    def _create_fileid_map_blob(self, inv):
354
        # FIXME: This can probably be a lot more efficient, 
355
        # not all files necessarily have to be processed.
356
        file_ids = {}
357
        for (path, ie) in inv.iter_entries():
358
            if self.mapping.generate_file_id(path) != ie.file_id:
359
                file_ids[path] = ie.file_id
360
        return self.mapping.export_fileid_map(file_ids)
361
900.1.4 by Jelmer Vernooij
More work on roundtripping.
362
    def _revision_to_objects(self, rev, tree, roundtrip):
900.1.23 by Jelmer Vernooij
More work on roundtripping support.
363
        """Convert a revision to a set of git objects.
364
365
        :param rev: Bazaar revision object
366
        :param tree: Bazaar revision tree
367
        :param roundtrip: Whether to roundtrip all Bazaar revision data
368
        """
548 by Jelmer Vernooij
Extract unusual file modes from revision when reconstructing Trees.
369
        unusual_modes = extract_unusual_modes(rev)
789 by Jelmer Vernooij
Cope with ghosts, cache inventories.
370
        present_parents = self.repository.has_revisions(rev.parent_ids)
852 by Jelmer Vernooij
Cache trees rather than inventories.
371
        parent_trees = self.tree_cache.revision_trees(
797 by Jelmer Vernooij
Add docstring, fix formatting.
372
            [p for p in rev.parent_ids if p in present_parents])
900.1.23 by Jelmer Vernooij
More work on roundtripping support.
373
        root_tree = None
852 by Jelmer Vernooij
Cache trees rather than inventories.
374
        for path, obj, ie in _tree_to_objects(tree, parent_trees,
900.1.30 by Jelmer Vernooij
Support creating dummy files for empty directories.
375
                self._cache.idmap, unusual_modes, self.mapping.BZR_DUMMY_FILE):
773 by Jelmer Vernooij
Implement inventory_to_objects
376
            if path == "":
900.1.23 by Jelmer Vernooij
More work on roundtripping support.
377
                root_tree = obj
900.1.34 by Jelmer Vernooij
Yield the proper object for the tree root.
378
                root_ie = ie
379
                # Don't yield just yet
380
            else:
381
                yield path, obj, ie
900.1.23 by Jelmer Vernooij
More work on roundtripping support.
382
        if root_tree is None:
859.1.2 by Jelmer Vernooij
Make it work for evolution.
383
            # Pointless commit - get the tree sha elsewhere
864 by Jelmer Vernooij
Cope with the first commit being pointless.
384
            if not rev.parent_ids:
900.1.23 by Jelmer Vernooij
More work on roundtripping support.
385
                root_tree = Tree()
864 by Jelmer Vernooij
Cope with the first commit being pointless.
386
            else:
387
                base_sha1 = self._lookup_revision_sha1(rev.parent_ids[0])
900.1.37 by Jelmer Vernooij
Factor out some common code for finding refs to send.
388
                root_tree = self[self[base_sha1].tree]
900.1.35 by Jelmer Vernooij
Ignore control files in inventories.
389
            root_ie = tree.inventory.root
915 by Jelmer Vernooij
Cope with the fact that the old format didn't export file ids.
390
        if roundtrip and self.mapping.BZR_FILE_IDS_FILE is not None:
900.1.49 by Jelmer Vernooij
Avoid trying to set HEAD for remote branches.
391
            b = self._create_fileid_map_blob(tree.inventory)
900.1.23 by Jelmer Vernooij
More work on roundtripping support.
392
            if b is not None:
393
                root_tree[self.mapping.BZR_FILE_IDS_FILE] = ((stat.S_IFREG | 0644), b.id)
394
                yield self.mapping.BZR_FILE_IDS_FILE, b, None
900.1.34 by Jelmer Vernooij
Yield the proper object for the tree root.
395
        yield "", root_tree, root_ie
1023 by Jelmer Vernooij
Set and verify testament.
396
        if roundtrip:
397
            testament3 = StrictTestament3(rev, tree.inventory)
1029 by Jelmer Vernooij
Use dictionary with verifiers rather than requiring testament3-sha1 everywhere.
398
            verifiers = { "testament3-sha1": testament3.as_sha1() }
1023 by Jelmer Vernooij
Set and verify testament.
399
        else:
1029 by Jelmer Vernooij
Use dictionary with verifiers rather than requiring testament3-sha1 everywhere.
400
            verifiers = {}
900.1.43 by Jelmer Vernooij
Some refactoring, support proper file ids in revision deltas.
401
        commit_obj = self._reconstruct_commit(rev, root_tree.id,
1029 by Jelmer Vernooij
Use dictionary with verifiers rather than requiring testament3-sha1 everywhere.
402
            roundtrip=roundtrip, verifiers=verifiers)
324.1.1 by Jelmer Vernooij
Check that regenerated objects have the expected sha1.
403
        try:
841 by Jelmer Vernooij
Eliminate InventorySHAMap.
404
            foreign_revid, mapping = mapping_registry.parse_revision_id(
405
                rev.revision_id)
324.1.1 by Jelmer Vernooij
Check that regenerated objects have the expected sha1.
406
        except errors.InvalidRevisionId:
407
            pass
408
        else:
794 by Jelmer Vernooij
Use _check_expected_sha rather than custom checks.
409
            _check_expected_sha(foreign_revid, commit_obj)
837 by Jelmer Vernooij
Return inventory entries when creating git objects for a revision.
410
        yield None, commit_obj, None
783 by Jelmer Vernooij
Move object generation into a separate function.
411
838 by Jelmer Vernooij
Add convenience object for updating the object store.
412
    def _get_updater(self, rev):
849 by Jelmer Vernooij
Allow cache backends to decide when to add entries rather than adding once per commit.
413
        return self._cache.get_updater(rev)
838 by Jelmer Vernooij
Add convenience object for updating the object store.
414
783 by Jelmer Vernooij
Move object generation into a separate function.
415
    def _update_sha_map_revision(self, revid):
416
        rev = self.repository.get_revision(revid)
852 by Jelmer Vernooij
Cache trees rather than inventories.
417
        tree = self.tree_cache.revision_tree(rev.revision_id)
838 by Jelmer Vernooij
Add convenience object for updating the object store.
418
        updater = self._get_updater(rev)
900.1.4 by Jelmer Vernooij
More work on roundtripping.
419
        for path, obj, ie in self._revision_to_objects(rev, tree,
420
            roundtrip=True):
1029 by Jelmer Vernooij
Use dictionary with verifiers rather than requiring testament3-sha1 everywhere.
421
            if isinstance(obj, Commit):
422
                testament3 = StrictTestament3(rev, tree.inventory)
423
                ie = { "testament3-sha1": testament3.as_sha1() }
952 by Jelmer Vernooij
Write git pack files rather than loose objects.
424
            updater.add_object(obj, ie, path)
838 by Jelmer Vernooij
Add convenience object for updating the object store.
425
        commit_obj = updater.finish()
781 by Jelmer Vernooij
Return commit id after converting a revision.
426
        return commit_obj.id
229 by Jelmer Vernooij
More work on converter.
427
855 by Jelmer Vernooij
_get_ -> _reconstruct_.
428
    def _reconstruct_blobs(self, keys):
698 by Jelmer Vernooij
Merge fixes for SHA1s of symlinks.
429
        """Return a Git Blob object from a fileid and revision stored in bzr.
430
431
        :param fileid: File id of the text
432
        :param revision: Revision of the text
433
        """
859.1.2 by Jelmer Vernooij
Make it work for evolution.
434
        stream = self.repository.iter_files_bytes(
435
            ((key[0], key[1], key) for key in keys))
856 by Jelmer Vernooij
Support reconstructing multiple blobs at the same time.
436
        for (fileid, revision, expected_sha), chunks in stream:
854 by Jelmer Vernooij
_get_blob -> _get_blobs.
437
            blob = Blob()
438
            blob.chunked = chunks
868 by Jelmer Vernooij
Cope with no-change merges.
439
            if blob.id != expected_sha and blob.data == "":
854 by Jelmer Vernooij
_get_blob -> _get_blobs.
440
                # Perhaps it's a symlink ?
441
                tree = self.tree_cache.revision_tree(revision)
442
                entry = tree.inventory[fileid]
868 by Jelmer Vernooij
Cope with no-change merges.
443
                if entry.kind == 'symlink':
444
                    blob = symlink_to_blob(entry)
854 by Jelmer Vernooij
_get_blob -> _get_blobs.
445
            _check_expected_sha(expected_sha, blob)
446
            yield blob
229 by Jelmer Vernooij
More work on converter.
447
855 by Jelmer Vernooij
_get_ -> _reconstruct_.
448
    def _reconstruct_tree(self, fileid, revid, inv, unusual_modes,
449
        expected_sha=None):
343 by Jelmer Vernooij
Use file ids consistently in map.
450
        """Return a Git Tree object from a file id and a revision stored in bzr.
249 by Jelmer Vernooij
Implement Tree.
451
343 by Jelmer Vernooij
Use file ids consistently in map.
452
        :param fileid: fileid in the tree.
249 by Jelmer Vernooij
Implement Tree.
453
        :param revision: Revision of the tree.
454
        """
776 by Jelmer Vernooij
Remove unnecessary lookups.
455
        def get_ie_sha1(entry):
456
            if entry.kind == "directory":
808 by Jelmer Vernooij
Avoid recalculating tree shas we already have.
457
                try:
859 by Jelmer Vernooij
Trivial cleanups.
458
                    return self._cache.idmap.lookup_tree_id(entry.file_id,
459
                        revid)
812 by Jelmer Vernooij
Catch KeyError from lookup_tree as well - some caches (such as sqlite) don't store all trees, only some.
460
                except (NotImplementedError, KeyError):
855 by Jelmer Vernooij
_get_ -> _reconstruct_.
461
                    obj = self._reconstruct_tree(entry.file_id, revid, inv,
808 by Jelmer Vernooij
Avoid recalculating tree shas we already have.
462
                        unusual_modes)
463
                    if obj is None:
464
                        return None
465
                    else:
466
                        return obj.id
776 by Jelmer Vernooij
Remove unnecessary lookups.
467
            elif entry.kind in ("file", "symlink"):
868 by Jelmer Vernooij
Cope with no-change merges.
468
                try:
469
                    return self._cache.idmap.lookup_blob_id(entry.file_id,
470
                        entry.revision)
471
                except KeyError:
472
                    # no-change merge?
473
                    return self._reconstruct_blobs(
474
                        [(entry.file_id, entry.revision, None)]).next().id
776 by Jelmer Vernooij
Remove unnecessary lookups.
475
            else:
476
                raise AssertionError("unknown entry kind '%s'" % entry.kind)
900.1.30 by Jelmer Vernooij
Support creating dummy files for empty directories.
477
        tree = directory_to_tree(inv[fileid], get_ie_sha1, unusual_modes,
478
            self.mapping.BZR_DUMMY_FILE)
915 by Jelmer Vernooij
Cope with the fact that the old format didn't export file ids.
479
        if (inv.root.file_id == fileid and
480
            self.mapping.BZR_FILE_IDS_FILE is not None):
900.1.49 by Jelmer Vernooij
Avoid trying to set HEAD for remote branches.
481
            b = self._create_fileid_map_blob(inv)
482
            # If this is the root tree, add the file ids
483
            tree[self.mapping.BZR_FILE_IDS_FILE] = ((stat.S_IFREG | 0644), b.id)
793 by Jelmer Vernooij
Make _check_expected_sha a global fn.
484
        _check_expected_sha(expected_sha, tree)
249 by Jelmer Vernooij
Implement Tree.
485
        return tree
229 by Jelmer Vernooij
More work on converter.
486
437 by Jelmer Vernooij
Implement BazaarObjectStore.__contains__, BazaarObjectStore.iter_shas, BazaarObjectStore.get_parents.
487
    def get_parents(self, sha):
454 by Jelmer Vernooij
Use ObjectStore.find_missing_objects in server.
488
        """Retrieve the parents of a Git commit by SHA1.
489
490
        :param sha: SHA1 of the commit
491
        :raises: KeyError, NotCommitError
492
        """
437 by Jelmer Vernooij
Implement BazaarObjectStore.__contains__, BazaarObjectStore.iter_shas, BazaarObjectStore.get_parents.
493
        return self[sha].parents
494
364 by Jelmer Vernooij
Reimplement dpush, but more efficient and only writing a single pack file rather than one per revision.
495
    def _lookup_revision_sha1(self, revid):
449 by Jelmer Vernooij
Use BazaarObjectStore to find matching SHA1s for bzr revisions.
496
        """Return the SHA1 matching a Bazaar revision."""
541 by Jelmer Vernooij
Cope with NULL_REVISION.
497
        if revid == NULL_REVISION:
891 by Jelmer Vernooij
Use ZERO_SHA constant where possible.
498
            return ZERO_SHA
364 by Jelmer Vernooij
Reimplement dpush, but more efficient and only writing a single pack file rather than one per revision.
499
        try:
847 by Jelmer Vernooij
Add BzrGitCache object.
500
            return self._cache.idmap.lookup_commit(revid)
364 by Jelmer Vernooij
Reimplement dpush, but more efficient and only writing a single pack file rather than one per revision.
501
        except KeyError:
682 by Jelmer Vernooij
Avoid doing a full sha map update if we already know the SHA1.
502
            try:
503
                return mapping_registry.parse_revision_id(revid)[0]
504
            except errors.InvalidRevisionId:
900.1.47 by Jelmer Vernooij
Fix handling of HEAD refs.
505
                self.repository.lock_read()
506
                try:
507
                    self._update_sha_map(revid)
508
                finally:
509
                    self.repository.unlock()
847 by Jelmer Vernooij
Add BzrGitCache object.
510
                return self._cache.idmap.lookup_commit(revid)
364 by Jelmer Vernooij
Reimplement dpush, but more efficient and only writing a single pack file rather than one per revision.
511
310 by Jelmer Vernooij
Fix pull from remote branches.
512
    def get_raw(self, sha):
454 by Jelmer Vernooij
Use ObjectStore.find_missing_objects in server.
513
        """Get the raw representation of a Git object by SHA1.
514
515
        :param sha: SHA1 of the git object
516
        """
566 by Jelmer Vernooij
Fix ObjectStore.get_raw() .
517
        obj = self[sha]
518
        return (obj.type, obj.as_raw_string())
310 by Jelmer Vernooij
Fix pull from remote branches.
519
437 by Jelmer Vernooij
Implement BazaarObjectStore.__contains__, BazaarObjectStore.iter_shas, BazaarObjectStore.get_parents.
520
    def __contains__(self, sha):
521
        # See if sha is in map
522
        try:
1162.1.1 by Jelmer Vernooij
Initial work on supporting multiple results for git shas.
523
            for (type, type_data) in self.lookup_git_sha(sha):
524
                if type == "commit":
525
                    if self.repository.has_revision(type_data[0]):
526
                        return True
527
                elif type == "blob":
528
                    if self.repository.texts.has_key(type_data):
529
                        return True
530
                elif type == "tree":
531
                    if self.repository.has_revision(type_data[1]):
532
                        return True
533
                else:
534
                    raise AssertionError("Unknown object type '%s'" % type)
568 by Jelmer Vernooij
Properly check that matching bzr objects exist.
535
            else:
1162.1.1 by Jelmer Vernooij
Initial work on supporting multiple results for git shas.
536
                return False
437 by Jelmer Vernooij
Implement BazaarObjectStore.__contains__, BazaarObjectStore.iter_shas, BazaarObjectStore.get_parents.
537
        except KeyError:
538
            return False
539
898 by Jelmer Vernooij
Optimize finding of git shas.
540
    def lookup_git_shas(self, shas, update_map=True):
541
        ret = {}
542
        for sha in shas:
969 by Jelmer Vernooij
Use tuples with bzr revid and git sha to avoid lookups.
543
            if sha == ZERO_SHA:
1169 by Jelmer Vernooij
Fix some sha lookups.
544
                ret[sha] = [("commit", (NULL_REVISION, None, {}))]
969 by Jelmer Vernooij
Use tuples with bzr revid and git sha to avoid lookups.
545
                continue
898 by Jelmer Vernooij
Optimize finding of git shas.
546
            try:
1162.1.3 by Jelmer Vernooij
Fix more tests.
547
                ret[sha] = list(self._cache.idmap.lookup_git_sha(sha))
898 by Jelmer Vernooij
Optimize finding of git shas.
548
            except KeyError:
549
                if update_map:
550
                    # if not, see if there are any unconverted revisions and add
551
                    # them to the map, search for sha in map again
552
                    self._update_sha_map()
553
                    update_map = False
554
                    try:
1162.1.3 by Jelmer Vernooij
Fix more tests.
555
                        ret[sha] = list(self._cache.idmap.lookup_git_sha(sha))
898 by Jelmer Vernooij
Optimize finding of git shas.
556
                    except KeyError:
557
                        pass
558
        return ret
559
560
    def lookup_git_sha(self, sha, update_map=True):
561
        return self.lookup_git_shas([sha], update_map=update_map)[sha]
437 by Jelmer Vernooij
Implement BazaarObjectStore.__contains__, BazaarObjectStore.iter_shas, BazaarObjectStore.get_parents.
562
563
    def __getitem__(self, sha):
849 by Jelmer Vernooij
Allow cache backends to decide when to add entries rather than adding once per commit.
564
        if self._cache.content_cache is not None:
840 by Jelmer Vernooij
Support using content cache.
565
            try:
847 by Jelmer Vernooij
Add BzrGitCache object.
566
                return self._cache.content_cache[sha]
840 by Jelmer Vernooij
Support using content cache.
567
            except KeyError:
568
                pass
1169 by Jelmer Vernooij
Fix some sha lookups.
569
        for (kind, type_data) in self.lookup_git_sha(sha):
1162.1.1 by Jelmer Vernooij
Initial work on supporting multiple results for git shas.
570
            # convert object to git object
1169 by Jelmer Vernooij
Fix some sha lookups.
571
            if kind == "commit":
1162.1.1 by Jelmer Vernooij
Initial work on supporting multiple results for git shas.
572
                (revid, tree_sha, verifiers) = type_data
573
                try:
574
                    rev = self.repository.get_revision(revid)
575
                except errors.NoSuchRevision:
1169 by Jelmer Vernooij
Fix some sha lookups.
576
                    trace.mutter('entry for %s %s in shamap: %r, but not '
577
                                 'found in repository', kind, sha, type_data)
1162.1.1 by Jelmer Vernooij
Initial work on supporting multiple results for git shas.
578
                    raise KeyError(sha)
579
                commit = self._reconstruct_commit(rev, tree_sha, roundtrip=True,
580
                    verifiers=verifiers)
581
                _check_expected_sha(sha, commit)
582
                return commit
1169 by Jelmer Vernooij
Fix some sha lookups.
583
            elif kind == "blob":
1162.1.1 by Jelmer Vernooij
Initial work on supporting multiple results for git shas.
584
                (fileid, revision) = type_data
585
                return self._reconstruct_blobs([(fileid, revision, sha)]).next()
1169 by Jelmer Vernooij
Fix some sha lookups.
586
            elif kind == "tree":
1162.1.1 by Jelmer Vernooij
Initial work on supporting multiple results for git shas.
587
                (fileid, revid) = type_data
588
                try:
589
                    tree = self.tree_cache.revision_tree(revid)
590
                    rev = self.repository.get_revision(revid)
591
                except errors.NoSuchRevision:
1169 by Jelmer Vernooij
Fix some sha lookups.
592
                    trace.mutter('entry for %s %s in shamap: %r, but not found in repository', kind, sha, type_data)
1162.1.1 by Jelmer Vernooij
Initial work on supporting multiple results for git shas.
593
                    raise KeyError(sha)
594
                unusual_modes = extract_unusual_modes(rev)
595
                try:
596
                    return self._reconstruct_tree(fileid, revid,
597
                        tree.inventory, unusual_modes, expected_sha=sha)
598
                except errors.NoSuchRevision:
599
                    raise KeyError(sha)
600
            else:
1169 by Jelmer Vernooij
Fix some sha lookups.
601
                raise AssertionError("Unknown object type '%s'" % kind)
228 by Jelmer Vernooij
Split out map.
602
        else:
1162.1.1 by Jelmer Vernooij
Initial work on supporting multiple results for git shas.
603
            raise KeyError(sha)
782 by Jelmer Vernooij
Add custom generate_pack_contents implementation.
604
900.1.37 by Jelmer Vernooij
Factor out some common code for finding refs to send.
605
    def generate_lossy_pack_contents(self, have, want, progress=None,
606
            get_tagged=None):
607
        return self.generate_pack_contents(have, want, progress, get_tagged,
608
            lossy=True)
609
899 by Jelmer Vernooij
Add tests for find_missing_bzr_revids.
610
    def generate_pack_contents(self, have, want, progress=None,
900.1.37 by Jelmer Vernooij
Factor out some common code for finding refs to send.
611
            get_tagged=None, lossy=False):
782 by Jelmer Vernooij
Add custom generate_pack_contents implementation.
612
        """Iterate over the contents of a pack file.
613
614
        :param have: List of SHA1s of objects that should not be sent
615
        :param want: List of SHA1s of objects that should be sent
616
        """
787 by Jelmer Vernooij
Implement custom ObjectWalker.generate_pack_contents.
617
        processed = set()
898 by Jelmer Vernooij
Optimize finding of git shas.
618
        ret = self.lookup_git_shas(have + want)
787 by Jelmer Vernooij
Implement custom ObjectWalker.generate_pack_contents.
619
        for commit_sha in have:
620
            try:
1180 by Jelmer Vernooij
Some dpush fixes.
621
                for (type, type_data) in ret[commit_sha]:
622
                    assert type == "commit"
623
                    processed.add(type_data[0])
787 by Jelmer Vernooij
Implement custom ObjectWalker.generate_pack_contents.
624
            except KeyError:
625
                pass
626
        pending = set()
627
        for commit_sha in want:
628
            if commit_sha in have:
629
                continue
898 by Jelmer Vernooij
Optimize finding of git shas.
630
            try:
1180 by Jelmer Vernooij
Some dpush fixes.
631
                for (type, type_data) in ret[commit_sha]:
632
                    assert type == "commit"
633
                    pending.add(type_data[0])
898 by Jelmer Vernooij
Optimize finding of git shas.
634
            except KeyError:
635
                pass
899 by Jelmer Vernooij
Add tests for find_missing_bzr_revids.
636
1053 by Jelmer Vernooij
Fix find_missing_bzr_revids.
637
        graph = self.repository.get_graph()
638
        todo = _find_missing_bzr_revids(graph, pending, processed)
787 by Jelmer Vernooij
Implement custom ObjectWalker.generate_pack_contents.
639
        trace.mutter('sending revisions %r', todo)
640
        ret = []
641
        pb = ui.ui_factory.nested_progress_bar()
642
        try:
643
            for i, revid in enumerate(todo):
644
                pb.update("generating git objects", i, len(todo))
1059 by Jelmer Vernooij
Fix graph tests.
645
                try:
646
                    rev = self.repository.get_revision(revid)
647
                except errors.NoSuchRevision:
648
                    continue
852 by Jelmer Vernooij
Cache trees rather than inventories.
649
                tree = self.tree_cache.revision_tree(revid)
900.1.37 by Jelmer Vernooij
Factor out some common code for finding refs to send.
650
                for path, obj, ie in self._revision_to_objects(rev, tree,
651
                    roundtrip=not lossy):
787 by Jelmer Vernooij
Implement custom ObjectWalker.generate_pack_contents.
652
                    ret.append((obj, path))
653
        finally:
654
            pb.finished()
655
        return ret
865.1.1 by Jelmer Vernooij
Implement ObjectStore.add_{thin_,}pack.
656
657
    def add_thin_pack(self):
658
        import tempfile
659
        import os
660
        fd, path = tempfile.mkstemp(suffix=".pack")
661
        f = os.fdopen(fd, 'wb')
662
        def commit():
663
            from dulwich.pack import PackData, Pack
664
            from bzrlib.plugins.git.fetch import import_git_objects
665
            os.fsync(fd)
666
            f.close()
667
            if os.path.getsize(path) == 0:
668
                return
669
            pd = PackData(path)
670
            pd.create_index_v2(path[:-5]+".idx", self.object_store.get_raw)
671
672
            p = Pack(path[:-5])
673
            self.repository.lock_write()
674
            try:
675
                self.repository.start_write_group()
676
                try:
677
                    import_git_objects(self.repository, self.mapping, 
678
                        p.iterobjects(get_raw=self.get_raw),
679
                        self.object_store)
680
                except:
681
                    self.repository.abort_write_group()
682
                    raise
683
                else:
684
                    self.repository.commit_write_group()
685
            finally:
686
                self.repository.unlock()
687
        return f, commit
688
689
    # The pack isn't kept around anyway, so no point 
690
    # in treating full packs different from thin packs
691
    add_pack = add_thin_pack