~jelmer/bzr-git/705807-dpush

252 by Jelmer Vernooij
Clarify history, copyright.
1
# Copyright (C) 2007 Canonical Ltd
900.1.32 by Jelmer Vernooij
update copyright
2
# Copyright (C) 2008-2010 Jelmer Vernooij <jelmer@samba.org>
252 by Jelmer Vernooij
Clarify history, copyright.
3
# Copyright (C) 2008 John Carr
18 by John Arbash Meinel
Start splitting up the Git{Branch,Dir,Repository} into separate modules, etc.
4
#
5
# This program is free software; you can redistribute it and/or modify
6
# it under the terms of the GNU General Public License as published by
7
# the Free Software Foundation; either version 2 of the License, or
8
# (at your option) any later version.
9
#
10
# This program is distributed in the hope that it will be useful,
11
# but WITHOUT ANY WARRANTY; without even the implied warranty of
12
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13
# GNU General Public License for more details.
14
#
15
# You should have received a copy of the GNU General Public License
16
# along with this program; if not, write to the Free Software
17
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
18
19
"""Converters, etc for going between Bazaar and Git ids."""
20
635.1.1 by Jelmer Vernooij
Add support for parsing hg-git metadata in the experimental mappings.
21
import base64
359 by Jelmer Vernooij
Simplify file mode handling, avoid inventory_to_tree_and_blobs as it is expensive if trees/blobs have already been converted.
22
import stat
23
292 by Jelmer Vernooij
Fix formatting.
24
from bzrlib import (
926 by Jelmer Vernooij
Fix formatting, drop support for Bazaar < 2.0.
25
    bencode,
292 by Jelmer Vernooij
Fix formatting.
26
    errors,
27
    foreign,
490 by Jelmer Vernooij
Warn about unusual modes and escaped XML-invalid characters.
28
    trace,
292 by Jelmer Vernooij
Fix formatting.
29
    )
30
from bzrlib.inventory import (
31
    ROOT_ID,
32
    )
152 by Jelmer Vernooij
Fix syntax errors.
33
from bzrlib.foreign import (
695 by Jelmer Vernooij
Clean up trailing whitespace.
34
    ForeignVcs,
35
    VcsMappingRegistry,
292 by Jelmer Vernooij
Fix formatting.
36
    ForeignRevision,
37
    )
701 by Jelmer Vernooij
Fix check in git repos.
38
from bzrlib.revision import (
39
    NULL_REVISION,
40
    )
635.1.1 by Jelmer Vernooij
Add support for parsing hg-git metadata in the experimental mappings.
41
from bzrlib.plugins.git.hg import (
42
    format_hg_metadata,
43
    extract_hg_metadata,
44
    )
900.1.2 by Jelmer Vernooij
Add functions for adding metadata to revision messages.
45
from bzrlib.plugins.git.roundtrip import (
46
    extract_bzr_metadata,
900.1.4 by Jelmer Vernooij
More work on roundtripping.
47
    inject_bzr_metadata,
48
    BzrGitRevisionMetadata,
900.1.23 by Jelmer Vernooij
More work on roundtripping support.
49
    deserialize_fileid_map,
50
    serialize_fileid_map,
900.1.2 by Jelmer Vernooij
Add functions for adding metadata to revision messages.
51
    )
309 by Jelmer Vernooij
Add XML escaping to work around serialization bug in bzr.
52
359 by Jelmer Vernooij
Simplify file mode handling, avoid inventory_to_tree_and_blobs as it is expensive if trees/blobs have already been converted.
53
DEFAULT_FILE_MODE = stat.S_IFREG | 0644
345 by Jelmer Vernooij
Keep track of file modes to use.
54
97.1.1 by Jelmer Vernooij
Use foreign utility functions.
55
150 by Jelmer Vernooij
Abstract away file id generation.
56
def escape_file_id(file_id):
57
    return file_id.replace('_', '__').replace(' ', '_s')
58
59
60
def unescape_file_id(file_id):
390 by Jelmer Vernooij
Fix file id unescape function when there are other underscores in the file id.
61
    ret = []
62
    i = 0
63
    while i < len(file_id):
64
        if file_id[i] != '_':
65
            ret.append(file_id[i])
66
        else:
67
            if file_id[i+1] == '_':
68
                ret.append("_")
69
            elif file_id[i+1] == 's':
70
                ret.append(" ")
71
            else:
826 by Jelmer Vernooij
Fix some long lines.
72
                raise AssertionError("unknown escape character %s" %
73
                    file_id[i+1])
390 by Jelmer Vernooij
Fix file id unescape function when there are other underscores in the file id.
74
            i += 1
75
        i += 1
76
    return "".join(ret)
150 by Jelmer Vernooij
Abstract away file id generation.
77
78
376 by Jelmer Vernooij
Make sure author and committer names pushed to git contain < and >, otherwise the git parser barfs.
79
def fix_person_identifier(text):
80
    if "<" in text and ">" in text:
81
        return text
82
    return "%s <%s>" % (text, text)
83
84
490 by Jelmer Vernooij
Warn about unusual modes and escaped XML-invalid characters.
85
def warn_escaped(commit, num_escaped):
86
    trace.warning("Escaped %d XML-invalid characters in %s. Will be unable "
87
                  "to regenerate the SHA map.", num_escaped, commit)
88
89
90
def warn_unusual_mode(commit, path, mode):
826 by Jelmer Vernooij
Fix some long lines.
91
    trace.mutter("Unusual file mode %o for %s in %s. Storing as revision "
92
                 "property. ", mode, path, commit)
490 by Jelmer Vernooij
Warn about unusual modes and escaped XML-invalid characters.
93
94
97.1.1 by Jelmer Vernooij
Use foreign utility functions.
95
class BzrGitMapping(foreign.VcsMapping):
97 by Jelmer Vernooij
use mapping object.
96
    """Class that maps between Git and Bazaar semantics."""
97
    experimental = False
98
915 by Jelmer Vernooij
Cope with the fact that the old format didn't export file ids.
99
    BZR_FILE_IDS_FILE = None
900.1.23 by Jelmer Vernooij
More work on roundtripping support.
100
915 by Jelmer Vernooij
Cope with the fact that the old format didn't export file ids.
101
    BZR_DUMMY_FILE = None
900.1.26 by Jelmer Vernooij
Add is_control_file method to BzrGitMapping.
102
198 by Jelmer Vernooij
Cope with move of show_foreign_revid.
103
    def __init__(self):
104
        super(BzrGitMapping, self).__init__(foreign_git)
105
195 by Jelmer Vernooij
Return mapping in revision_id_bzr_to_foreign() as required by the interface.
106
    def __eq__(self, other):
1020 by Jelmer Vernooij
Store testament-sha1 in metadata.
107
        return (type(self) == type(other) and
726 by Jelmer Vernooij
Factor out conversion of branch names to refs.
108
                self.revid_prefix == other.revid_prefix)
195 by Jelmer Vernooij
Return mapping in revision_id_bzr_to_foreign() as required by the interface.
109
110
    @classmethod
111
    def revision_id_foreign_to_bzr(cls, git_rev_id):
97 by Jelmer Vernooij
use mapping object.
112
        """Convert a git revision id handle to a Bazaar revision id."""
891 by Jelmer Vernooij
Use ZERO_SHA constant where possible.
113
        from dulwich.protocol import ZERO_SHA
114
        if git_rev_id == ZERO_SHA:
769 by Jelmer Vernooij
Cope with open_branch() actually checking whether there is a branch present.
115
            return NULL_REVISION
195 by Jelmer Vernooij
Return mapping in revision_id_bzr_to_foreign() as required by the interface.
116
        return "%s:%s" % (cls.revid_prefix, git_rev_id)
97 by Jelmer Vernooij
use mapping object.
117
195 by Jelmer Vernooij
Return mapping in revision_id_bzr_to_foreign() as required by the interface.
118
    @classmethod
119
    def revision_id_bzr_to_foreign(cls, bzr_rev_id):
97 by Jelmer Vernooij
use mapping object.
120
        """Convert a Bazaar revision id to a git revision id handle."""
195 by Jelmer Vernooij
Return mapping in revision_id_bzr_to_foreign() as required by the interface.
121
        if not bzr_rev_id.startswith("%s:" % cls.revid_prefix):
122
            raise errors.InvalidRevisionId(bzr_rev_id, cls)
123
        return bzr_rev_id[len(cls.revid_prefix)+1:], cls()
97 by Jelmer Vernooij
use mapping object.
124
150 by Jelmer Vernooij
Abstract away file id generation.
125
    def generate_file_id(self, path):
297 by Jelmer Vernooij
Cope with non-ascii characters in filenames (needs a test..).
126
        # Git paths are just bytestrings
127
        # We must just hope they are valid UTF-8..
157 by Jelmer Vernooij
Fix some bit of fetching.
128
        if path == "":
129
            return ROOT_ID
973 by Jelmer Vernooij
Add tests for generate_file_id.
130
        if type(path) is unicode:
131
            path = path.encode("utf-8")
297 by Jelmer Vernooij
Cope with non-ascii characters in filenames (needs a test..).
132
        return escape_file_id(path)
150 by Jelmer Vernooij
Abstract away file id generation.
133
900.1.26 by Jelmer Vernooij
Add is_control_file method to BzrGitMapping.
134
    def is_control_file(self, path):
135
        return path in (self.BZR_FILE_IDS_FILE, self.BZR_DUMMY_FILE)
136
303.1.2 by Jelmer Vernooij
Fix versionedfiles.
137
    def parse_file_id(self, file_id):
138
        if file_id == ROOT_ID:
139
            return ""
140
        return unescape_file_id(file_id)
141
900.1.31 by Jelmer Vernooij
Properly escape revids when using them in ref names.
142
    def revid_as_refname(self, revid):
143
        import urllib
144
        return "refs/bzr/%s" % urllib.quote(revid)
145
546 by Jelmer Vernooij
Add more docstrings, support storing unusual file modes.
146
    def import_unusual_file_modes(self, rev, unusual_file_modes):
147
        if unusual_file_modes:
878 by Jelmer Vernooij
Fix determining of unusual file modes.
148
            ret = [(path, unusual_file_modes[path])
149
                   for path in sorted(unusual_file_modes.keys())]
549 by Jelmer Vernooij
Fix storing of unusual file modes.
150
            rev.properties['file-modes'] = bencode.bencode(ret)
546 by Jelmer Vernooij
Add more docstrings, support storing unusual file modes.
151
547 by Jelmer Vernooij
Support getting unusual file modes out of revision properties.
152
    def export_unusual_file_modes(self, rev):
153
        try:
894 by Jelmer Vernooij
Simplify formatting a bit.
154
            file_modes = rev.properties['file-modes']
547 by Jelmer Vernooij
Support getting unusual file modes out of revision properties.
155
        except KeyError:
156
            return {}
894 by Jelmer Vernooij
Simplify formatting a bit.
157
        else:
158
            return dict([(self.generate_file_id(path), mode) for (path, mode) in bencode.bdecode(file_modes.encode("utf-8"))])
547 by Jelmer Vernooij
Support getting unusual file modes out of revision properties.
159
727 by Jelmer Vernooij
Cope with different encodings better, rather than just stripping out
160
    def _generate_git_svn_metadata(self, rev, encoding):
643 by Jelmer Vernooij
Attempt to parse git-svn-id metadata.
161
        try:
894 by Jelmer Vernooij
Simplify formatting a bit.
162
            git_svn_id = rev.properties["git-svn-id"]
643 by Jelmer Vernooij
Attempt to parse git-svn-id metadata.
163
        except KeyError:
164
            return ""
894 by Jelmer Vernooij
Simplify formatting a bit.
165
        else:
166
            return "\ngit-svn-id: %s\n" % git_svn_id.encode(encoding)
643 by Jelmer Vernooij
Attempt to parse git-svn-id metadata.
167
638 by Jelmer Vernooij
Abstract support for hg-git metadata.
168
    def _generate_hg_message_tail(self, rev):
169
        extra = {}
170
        renames = []
639 by Jelmer Vernooij
Support renames in hg-git messages as well.
171
        branch = 'default'
638 by Jelmer Vernooij
Abstract support for hg-git metadata.
172
        for name in rev.properties:
173
            if name == 'hg:extra:branch':
174
                branch = rev.properties['hg:extra:branch']
175
            elif name.startswith('hg:extra'):
826 by Jelmer Vernooij
Fix some long lines.
176
                extra[name[len('hg:extra:'):]] = base64.b64decode(
177
                    rev.properties[name])
639 by Jelmer Vernooij
Support renames in hg-git messages as well.
178
            elif name == 'hg:renames':
826 by Jelmer Vernooij
Fix some long lines.
179
                renames = bencode.bdecode(base64.b64decode(
180
                    rev.properties['hg:renames']))
639 by Jelmer Vernooij
Support renames in hg-git messages as well.
181
            # TODO: Export other properties as 'bzr:' extras?
660 by Jelmer Vernooij
Fix encoding issues.
182
        ret = format_hg_metadata(renames, branch, extra)
183
        assert isinstance(ret, str)
184
        return ret
638 by Jelmer Vernooij
Abstract support for hg-git metadata.
185
643 by Jelmer Vernooij
Attempt to parse git-svn-id metadata.
186
    def _extract_git_svn_metadata(self, rev, message):
187
        lines = message.split("\n")
1029 by Jelmer Vernooij
Use dictionary with verifiers rather than requiring testament3-sha1 everywhere.
188
        if not (lines[-1] == "" and len(lines) >= 2 and lines[-2].startswith("git-svn-id:")):
643 by Jelmer Vernooij
Attempt to parse git-svn-id metadata.
189
            return message
652 by Jelmer Vernooij
Split out git-svn-id parser as separate function, implement ForeignGit.serialize_foreign_revid.
190
        git_svn_id = lines[-2].split(": ", 1)[1]
643 by Jelmer Vernooij
Attempt to parse git-svn-id metadata.
191
        rev.properties['git-svn-id'] = git_svn_id
652 by Jelmer Vernooij
Split out git-svn-id parser as separate function, implement ForeignGit.serialize_foreign_revid.
192
        (url, rev, uuid) = parse_git_svn_id(git_svn_id)
643 by Jelmer Vernooij
Attempt to parse git-svn-id metadata.
193
        # FIXME: Convert this to converted-from property somehow..
660 by Jelmer Vernooij
Fix encoding issues.
194
        ret = "\n".join(lines[:-2])
195
        assert isinstance(ret, str)
196
        return ret
643 by Jelmer Vernooij
Attempt to parse git-svn-id metadata.
197
638 by Jelmer Vernooij
Abstract support for hg-git metadata.
198
    def _extract_hg_metadata(self, rev, message):
199
        (message, renames, branch, extra) = extract_hg_metadata(message)
200
        if branch is not None:
201
            rev.properties['hg:extra:branch'] = branch
202
        for name, value in extra.iteritems():
203
            rev.properties['hg:extra:' + name] = base64.b64encode(value)
639 by Jelmer Vernooij
Support renames in hg-git messages as well.
204
        if renames:
826 by Jelmer Vernooij
Fix some long lines.
205
            rev.properties['hg:renames'] = base64.b64encode(bencode.bencode(
206
                [(new, old) for (old, new) in renames.iteritems()]))
638 by Jelmer Vernooij
Abstract support for hg-git metadata.
207
        return message
208
900.1.2 by Jelmer Vernooij
Add functions for adding metadata to revision messages.
209
    def _extract_bzr_metadata(self, rev, message):
210
        (message, metadata) = extract_bzr_metadata(message)
900.1.4 by Jelmer Vernooij
More work on roundtripping.
211
        return message, metadata
900.1.2 by Jelmer Vernooij
Add functions for adding metadata to revision messages.
212
727 by Jelmer Vernooij
Cope with different encodings better, rather than just stripping out
213
    def _decode_commit_message(self, rev, message, encoding):
912 by Jelmer Vernooij
Merge roundtrip support.
214
        return message.decode(encoding), BzrGitRevisionMetadata()
635.1.1 by Jelmer Vernooij
Add support for parsing hg-git metadata in the experimental mappings.
215
727 by Jelmer Vernooij
Cope with different encodings better, rather than just stripping out
216
    def _encode_commit_message(self, rev, message, encoding):
217
        return message.encode(encoding)
635.1.1 by Jelmer Vernooij
Add support for parsing hg-git metadata in the experimental mappings.
218
900.1.22 by Jelmer Vernooij
Fix file id map (de)serialization.
219
    def export_fileid_map(self, fileid_map):
220
        """Export a file id map to a fileid map.
221
222
        :param fileid_map: File id map, mapping paths to file ids
223
        :return: A Git blob object
224
        """
900.1.23 by Jelmer Vernooij
More work on roundtripping support.
225
        from dulwich.objects import Blob
226
        b = Blob()
227
        b.set_raw_chunks(serialize_fileid_map(fileid_map))
228
        return b
900.1.22 by Jelmer Vernooij
Fix file id map (de)serialization.
229
1023 by Jelmer Vernooij
Set and verify testament.
230
    def export_commit(self, rev, tree_sha, parent_lookup, roundtrip,
1029 by Jelmer Vernooij
Use dictionary with verifiers rather than requiring testament3-sha1 everywhere.
231
                      verifiers):
635.1.1 by Jelmer Vernooij
Add support for parsing hg-git metadata in the experimental mappings.
232
        """Turn a Bazaar revision in to a Git commit
233
234
        :param tree_sha: Tree sha for the commit
826 by Jelmer Vernooij
Fix some long lines.
235
        :param parent_lookup: Function for looking up the GIT sha equiv of a
236
            bzr revision
1023 by Jelmer Vernooij
Set and verify testament.
237
        :param roundtrip: Whether to store roundtripping information.
1029 by Jelmer Vernooij
Use dictionary with verifiers rather than requiring testament3-sha1 everywhere.
238
        :param verifiers: Verifiers info
635.1.1 by Jelmer Vernooij
Add support for parsing hg-git metadata in the experimental mappings.
239
        :return dulwich.objects.Commit represent the revision:
240
        """
241
        from dulwich.objects import Commit
242
        commit = Commit()
243
        commit.tree = tree_sha
900.1.8 by Jelmer Vernooij
Support ghost revisions while roundtripping.
244
        if roundtrip:
245
            metadata = BzrGitRevisionMetadata()
1029 by Jelmer Vernooij
Use dictionary with verifiers rather than requiring testament3-sha1 everywhere.
246
            metadata.verifiers = verifiers
900.1.8 by Jelmer Vernooij
Support ghost revisions while roundtripping.
247
        else:
248
            metadata = None
900.1.43 by Jelmer Vernooij
Some refactoring, support proper file ids in revision deltas.
249
        parents = []
635.1.1 by Jelmer Vernooij
Add support for parsing hg-git metadata in the experimental mappings.
250
        for p in rev.parent_ids:
705 by Jelmer Vernooij
Cope with imports.
251
            try:
252
                git_p = parent_lookup(p)
253
            except KeyError:
254
                git_p = None
900.1.8 by Jelmer Vernooij
Support ghost revisions while roundtripping.
255
                if metadata is not None:
256
                    metadata.explicit_parent_ids = rev.parent_ids
635.1.1 by Jelmer Vernooij
Add support for parsing hg-git metadata in the experimental mappings.
257
            if git_p is not None:
258
                assert len(git_p) == 40, "unexpected length for %r" % git_p
900.1.43 by Jelmer Vernooij
Some refactoring, support proper file ids in revision deltas.
259
                parents.append(git_p)
260
        commit.parents = parents
727 by Jelmer Vernooij
Cope with different encodings better, rather than just stripping out
261
        try:
262
            encoding = rev.properties['git-explicit-encoding']
263
        except KeyError:
264
            encoding = rev.properties.get('git-implicit-encoding', 'utf-8')
265
        commit.encoding = rev.properties.get('git-explicit-encoding')
266
        commit.committer = fix_person_identifier(rev.committer.encode(
267
            encoding))
268
        commit.author = fix_person_identifier(
269
            rev.get_apparent_authors()[0].encode(encoding))
635.1.1 by Jelmer Vernooij
Add support for parsing hg-git metadata in the experimental mappings.
270
        commit.commit_time = long(rev.timestamp)
271
        if 'author-timestamp' in rev.properties:
272
            commit.author_time = long(rev.properties['author-timestamp'])
273
        else:
274
            commit.author_time = commit.commit_time
884 by Jelmer Vernooij
Cope with -0000 as timezone in Git commits.
275
        commit._commit_timezone_neg_utc = "commit-timezone-neg-utc" in rev.properties
635.1.1 by Jelmer Vernooij
Add support for parsing hg-git metadata in the experimental mappings.
276
        commit.commit_timezone = rev.timezone
884 by Jelmer Vernooij
Cope with -0000 as timezone in Git commits.
277
        commit._author_timezone_neg_utc = "author-timezone-neg-utc" in rev.properties
635.1.1 by Jelmer Vernooij
Add support for parsing hg-git metadata in the experimental mappings.
278
        if 'author-timezone' in rev.properties:
279
            commit.author_timezone = int(rev.properties['author-timezone'])
280
        else:
695 by Jelmer Vernooij
Clean up trailing whitespace.
281
            commit.author_timezone = commit.commit_timezone
727 by Jelmer Vernooij
Cope with different encodings better, rather than just stripping out
282
        commit.message = self._encode_commit_message(rev, rev.message, 
283
            encoding)
900.1.40 by Jelmer Vernooij
Checks for roundtripping.
284
        assert type(commit.message) == str
900.1.8 by Jelmer Vernooij
Support ghost revisions while roundtripping.
285
        if metadata is not None:
900.1.4 by Jelmer Vernooij
More work on roundtripping.
286
            try:
287
                mapping_registry.parse_revision_id(rev.revision_id)
288
            except errors.InvalidRevisionId:
289
                metadata.revision_id = rev.revision_id
900.1.10 by Jelmer Vernooij
Support roundtripping custom revision properties.
290
            mapping_properties = set(
291
                ['author', 'author-timezone', 'author-timezone-neg-utc',
292
                 'commit-timezone-neg-utc', 'git-implicit-encoding',
900.1.15 by Jelmer Vernooij
Add file-modes to list of mapping properties.
293
                 'git-explicit-encoding', 'author-timestamp', 'file-modes'])
900.1.10 by Jelmer Vernooij
Support roundtripping custom revision properties.
294
            for k, v in rev.properties.iteritems():
295
                if not k in mapping_properties:
296
                    metadata.properties[k] = v
912 by Jelmer Vernooij
Merge roundtrip support.
297
        if self.roundtripping:
298
            commit.message = inject_bzr_metadata(commit.message, metadata, 
299
                                                 encoding)
900.1.40 by Jelmer Vernooij
Checks for roundtripping.
300
        assert type(commit.message) == str
635.1.1 by Jelmer Vernooij
Add support for parsing hg-git metadata in the experimental mappings.
301
        return commit
302
900.1.22 by Jelmer Vernooij
Fix file id map (de)serialization.
303
    def import_fileid_map(self, blob):
304
        """Convert a git file id map blob.
305
306
        :param blob: Git blob object with fileid map
307
        :return: Dictionary mapping paths to file ids
308
        """
900.1.35 by Jelmer Vernooij
Ignore control files in inventories.
309
        return deserialize_fileid_map(blob.data)
900.1.22 by Jelmer Vernooij
Fix file id map (de)serialization.
310
900.1.44 by Jelmer Vernooij
Properly look up Bazaar revision ids for revision parents in case they are round-tripped.
311
    def import_commit(self, commit, lookup_parent_revid):
151 by Jelmer Vernooij
Support converting git objects to bzr objects.
312
        """Convert a git commit to a bzr revision.
313
1021 by Jelmer Vernooij
Put testament sha1 in revisions.
314
        :return: a `bzrlib.revision.Revision` object, foreign revid and a
315
            testament sha1
151 by Jelmer Vernooij
Support converting git objects to bzr objects.
316
        """
317
        if commit is None:
318
            raise AssertionError("Commit object can't be None")
826 by Jelmer Vernooij
Fix some long lines.
319
        rev = ForeignRevision(commit.id, self,
320
                self.revision_id_foreign_to_bzr(commit.id))
900.1.4 by Jelmer Vernooij
More work on roundtripping.
321
        rev.git_metadata = None
727 by Jelmer Vernooij
Cope with different encodings better, rather than just stripping out
322
        def decode_using_encoding(rev, commit, encoding):
323
            rev.committer = str(commit.committer).decode(encoding)
324
            if commit.committer != commit.author:
325
                rev.properties['author'] = str(commit.author).decode(encoding)
900.1.4 by Jelmer Vernooij
More work on roundtripping.
326
            rev.message, rev.git_metadata = self._decode_commit_message(
327
                rev, commit.message, encoding)
727 by Jelmer Vernooij
Cope with different encodings better, rather than just stripping out
328
        if commit.encoding is not None:
329
            rev.properties['git-explicit-encoding'] = commit.encoding
330
            decode_using_encoding(rev, commit, commit.encoding)
331
        else:
332
            for encoding in ('utf-8', 'latin1'):
333
                try:
334
                    decode_using_encoding(rev, commit, encoding)
335
                except UnicodeDecodeError:
336
                    pass
337
                else:
338
                    if encoding != 'utf-8':
339
                        rev.properties['git-implicit-encoding'] = encoding
340
                    break
350 by Jelmer Vernooij
Support author_time
341
        if commit.commit_time != commit.author_time:
342
            rev.properties['author-timestamp'] = str(commit.author_time)
359 by Jelmer Vernooij
Simplify file mode handling, avoid inventory_to_tree_and_blobs as it is expensive if trees/blobs have already been converted.
343
        if commit.commit_timezone != commit.author_timezone:
826 by Jelmer Vernooij
Fix some long lines.
344
            rev.properties['author-timezone'] = "%d" % commit.author_timezone
884 by Jelmer Vernooij
Cope with -0000 as timezone in Git commits.
345
        if commit._author_timezone_neg_utc:
346
            rev.properties['author-timezone-neg-utc'] = ""
347
        if commit._commit_timezone_neg_utc:
348
            rev.properties['commit-timezone-neg-utc'] = ""
151 by Jelmer Vernooij
Support converting git objects to bzr objects.
349
        rev.timestamp = commit.commit_time
440 by Jelmer Vernooij
Remove silly mapping of timezones; dulwich uses offsets now as well.
350
        rev.timezone = commit.commit_timezone
1162.1.5 by Jelmer Vernooij
Fix looking up of parents during fetch.
351
        rev.parent_ids = None
900.1.4 by Jelmer Vernooij
More work on roundtripping.
352
        if rev.git_metadata is not None:
900.1.6 by Jelmer Vernooij
Roundtripping support for revision ids works.
353
            md = rev.git_metadata
1021 by Jelmer Vernooij
Put testament sha1 in revisions.
354
            roundtrip_revid = md.revision_id
900.1.8 by Jelmer Vernooij
Support ghost revisions while roundtripping.
355
            if md.explicit_parent_ids:
356
                rev.parent_ids = md.explicit_parent_ids
900.1.10 by Jelmer Vernooij
Support roundtripping custom revision properties.
357
            rev.properties.update(md.properties)
1029 by Jelmer Vernooij
Use dictionary with verifiers rather than requiring testament3-sha1 everywhere.
358
            verifiers = md.verifiers
1021 by Jelmer Vernooij
Put testament sha1 in revisions.
359
        else:
360
            roundtrip_revid = None
1029 by Jelmer Vernooij
Use dictionary with verifiers rather than requiring testament3-sha1 everywhere.
361
            verifiers = {}
1162.1.5 by Jelmer Vernooij
Fix looking up of parents during fetch.
362
        if rev.parent_ids is None:
363
            rev.parent_ids = tuple([lookup_parent_revid(p) for p in commit.parents])
1029 by Jelmer Vernooij
Use dictionary with verifiers rather than requiring testament3-sha1 everywhere.
364
        return rev, roundtrip_revid, verifiers
151 by Jelmer Vernooij
Support converting git objects to bzr objects.
365
900.1.43 by Jelmer Vernooij
Some refactoring, support proper file ids in revision deltas.
366
    def get_fileid_map(self, lookup_object, tree_sha):
367
        """Obtain a fileid map for a particular tree.
368
369
        :param lookup_object: Function for looking up an object
370
        :param tree_sha: SHA of the root tree
371
        :return: GitFileIdMap instance
372
        """
373
        try:
374
            file_id_map_sha = lookup_object(tree_sha)[self.BZR_FILE_IDS_FILE][1]
375
        except KeyError:
376
            file_ids = {}
377
        else:
378
            file_ids = self.import_fileid_map(lookup_object(file_id_map_sha))
379
        return GitFileIdMap(file_ids, self)
380
97 by Jelmer Vernooij
use mapping object.
381
190 by Jelmer Vernooij
Bless current mapping as v1.
382
class BzrGitMappingv1(BzrGitMapping):
383
    revid_prefix = 'git-v1'
384
    experimental = False
385
393 by Jelmer Vernooij
Provide __str__ implementation for mapping, fix docstring for ForeignGit.
386
    def __str__(self):
387
        return self.revid_prefix
388
190 by Jelmer Vernooij
Bless current mapping as v1.
389
390
class BzrGitMappingExperimental(BzrGitMappingv1):
104 by Jelmer Vernooij
Use bzr-foreign function names for converting between git and bzr revids.
391
    revid_prefix = 'git-experimental'
392
    experimental = True
912 by Jelmer Vernooij
Merge roundtrip support.
393
    roundtripping = True
97 by Jelmer Vernooij
use mapping object.
394
915 by Jelmer Vernooij
Cope with the fact that the old format didn't export file ids.
395
    BZR_FILE_IDS_FILE = '.bzrfileids'
396
397
    BZR_DUMMY_FILE = '.bzrdummy'
398
727 by Jelmer Vernooij
Cope with different encodings better, rather than just stripping out
399
    def _decode_commit_message(self, rev, message, encoding):
638 by Jelmer Vernooij
Abstract support for hg-git metadata.
400
        message = self._extract_hg_metadata(rev, message)
643 by Jelmer Vernooij
Attempt to parse git-svn-id metadata.
401
        message = self._extract_git_svn_metadata(rev, message)
900.1.4 by Jelmer Vernooij
More work on roundtripping.
402
        message, metadata = self._extract_bzr_metadata(rev, message)
403
        return message.decode(encoding), metadata
635.1.1 by Jelmer Vernooij
Add support for parsing hg-git metadata in the experimental mappings.
404
727 by Jelmer Vernooij
Cope with different encodings better, rather than just stripping out
405
    def _encode_commit_message(self, rev, message, encoding):
406
        ret = message.encode(encoding)
638 by Jelmer Vernooij
Abstract support for hg-git metadata.
407
        ret += self._generate_hg_message_tail(rev)
727 by Jelmer Vernooij
Cope with different encodings better, rather than just stripping out
408
        ret += self._generate_git_svn_metadata(rev, encoding)
635.1.1 by Jelmer Vernooij
Add support for parsing hg-git metadata in the experimental mappings.
409
        return ret
410
900.1.44 by Jelmer Vernooij
Properly look up Bazaar revision ids for revision parents in case they are round-tripped.
411
    def import_commit(self, commit, lookup_parent_revid):
1029 by Jelmer Vernooij
Use dictionary with verifiers rather than requiring testament3-sha1 everywhere.
412
        rev, roundtrip_revid, verifiers = super(BzrGitMappingExperimental, self).import_commit(commit, lookup_parent_revid)
642 by Jelmer Vernooij
In experimental mappings, set 'converted_revision' property.
413
        rev.properties['converted_revision'] = "git %s\n" % commit.id
1029 by Jelmer Vernooij
Use dictionary with verifiers rather than requiring testament3-sha1 everywhere.
414
        return rev, roundtrip_revid, verifiers
642 by Jelmer Vernooij
In experimental mappings, set 'converted_revision' property.
415
97 by Jelmer Vernooij
use mapping object.
416
195 by Jelmer Vernooij
Return mapping in revision_id_bzr_to_foreign() as required by the interface.
417
class GitMappingRegistry(VcsMappingRegistry):
546 by Jelmer Vernooij
Add more docstrings, support storing unusual file modes.
418
    """Registry with available git mappings."""
195 by Jelmer Vernooij
Return mapping in revision_id_bzr_to_foreign() as required by the interface.
419
420
    def revision_id_bzr_to_foreign(self, bzr_revid):
701 by Jelmer Vernooij
Fix check in git repos.
421
        if bzr_revid == NULL_REVISION:
892 by Jelmer Vernooij
Lazy import ZERO_SHA.
422
            from dulwich.protocol import ZERO_SHA
891 by Jelmer Vernooij
Use ZERO_SHA constant where possible.
423
            return ZERO_SHA, None
195 by Jelmer Vernooij
Return mapping in revision_id_bzr_to_foreign() as required by the interface.
424
        if not bzr_revid.startswith("git-"):
425
            raise errors.InvalidRevisionId(bzr_revid, None)
426
        (mapping_version, git_sha) = bzr_revid.split(":", 1)
427
        mapping = self.get(mapping_version)
428
        return mapping.revision_id_bzr_to_foreign(bzr_revid)
429
430
    parse_revision_id = revision_id_bzr_to_foreign
431
432
433
mapping_registry = GitMappingRegistry()
434
mapping_registry.register_lazy('git-v1', "bzrlib.plugins.git.mapping",
826 by Jelmer Vernooij
Fix some long lines.
435
    "BzrGitMappingv1")
436
mapping_registry.register_lazy('git-experimental',
437
    "bzrlib.plugins.git.mapping", "BzrGitMappingExperimental")
661 by Jelmer Vernooij
Set mapping back to v1.
438
mapping_registry.set_default('git-v1')
195 by Jelmer Vernooij
Return mapping in revision_id_bzr_to_foreign() as required by the interface.
439
440
441
class ForeignGit(ForeignVcs):
393 by Jelmer Vernooij
Provide __str__ implementation for mapping, fix docstring for ForeignGit.
442
    """The Git Stupid Content Tracker"""
195 by Jelmer Vernooij
Return mapping in revision_id_bzr_to_foreign() as required by the interface.
443
631.1.1 by Jelmer Vernooij
Use foreign branch testing infrastructure.
444
    @property
445
    def branch_format(self):
446
        from bzrlib.plugins.git.branch import GitBranchFormat
447
        return GitBranchFormat()
448
657 by Jelmer Vernooij
Provide repository_format attribute, as required by newer foreign VCS tests in bzrlib.
449
    @property
450
    def repository_format(self):
451
        from bzrlib.plugins.git.repository import GitRepositoryFormat
452
        return GitRepositoryFormat()
453
195 by Jelmer Vernooij
Return mapping in revision_id_bzr_to_foreign() as required by the interface.
454
    def __init__(self):
455
        super(ForeignGit, self).__init__(mapping_registry)
646 by Jelmer Vernooij
Store abbreviation in foreign branch.
456
        self.abbreviation = "git"
195 by Jelmer Vernooij
Return mapping in revision_id_bzr_to_foreign() as required by the interface.
457
198 by Jelmer Vernooij
Cope with move of show_foreign_revid.
458
    @classmethod
652 by Jelmer Vernooij
Split out git-svn-id parser as separate function, implement ForeignGit.serialize_foreign_revid.
459
    def serialize_foreign_revid(self, foreign_revid):
460
        return foreign_revid
461
462
    @classmethod
198 by Jelmer Vernooij
Cope with move of show_foreign_revid.
463
    def show_foreign_revid(cls, foreign_revid):
464
        return { "git commit": foreign_revid }
465
466
467
foreign_git = ForeignGit()
637 by Jelmer Vernooij
Allow single place for configuration of default mapping.
468
default_mapping = mapping_registry.get_default()()
212 by Jelmer Vernooij
Move conversion functions to mapping, use fetch_objects() from repository if present.
469
470
354 by Jelmer Vernooij
Support symlinks in conversion to git.
471
def symlink_to_blob(entry):
472
    from dulwich.objects import Blob
473
    blob = Blob()
795 by Jelmer Vernooij
simplify sha extraction for blobs, process multiple blobs at once.
474
    symlink_target = entry.symlink_target
475
    if type(symlink_target) == unicode:
476
        symlink_target = symlink_target.encode('utf-8')
798 by Jelmer Vernooij
Split out _inventory_to_objects into a function.
477
    blob.data = symlink_target
354 by Jelmer Vernooij
Support symlinks in conversion to git.
478
    return blob
479
546 by Jelmer Vernooij
Add more docstrings, support storing unusual file modes.
480
521 by Jelmer Vernooij
Abstract out kind mapping a bit, initial work on support tree-references.
481
def mode_is_executable(mode):
546 by Jelmer Vernooij
Add more docstrings, support storing unusual file modes.
482
    """Check if mode should be considered executable."""
521 by Jelmer Vernooij
Abstract out kind mapping a bit, initial work on support tree-references.
483
    return bool(mode & 0111)
484
546 by Jelmer Vernooij
Add more docstrings, support storing unusual file modes.
485
521 by Jelmer Vernooij
Abstract out kind mapping a bit, initial work on support tree-references.
486
def mode_kind(mode):
546 by Jelmer Vernooij
Add more docstrings, support storing unusual file modes.
487
    """Determine the Bazaar inventory kind based on Unix file mode."""
521 by Jelmer Vernooij
Abstract out kind mapping a bit, initial work on support tree-references.
488
    entry_kind = (mode & 0700000) / 0100000
489
    if entry_kind == 0:
490
        return 'directory'
491
    elif entry_kind == 1:
492
        file_kind = (mode & 070000) / 010000
493
        if file_kind == 0:
494
            return 'file'
495
        elif file_kind == 2:
496
            return 'symlink'
497
        elif file_kind == 6:
498
            return 'tree-reference'
499
        else:
500
            raise AssertionError(
501
                "Unknown file kind %d, perms=%o." % (file_kind, mode,))
502
    else:
503
        raise AssertionError(
504
            "Unknown kind, perms=%r." % (mode,))
505
354 by Jelmer Vernooij
Support symlinks in conversion to git.
506
527.1.6 by Jelmer Vernooij
Support sending git am-style patches with "bzr send --format=git".
507
def object_mode(kind, executable):
508
    if kind == 'directory':
359 by Jelmer Vernooij
Simplify file mode handling, avoid inventory_to_tree_and_blobs as it is expensive if trees/blobs have already been converted.
509
        return stat.S_IFDIR
527.1.6 by Jelmer Vernooij
Support sending git am-style patches with "bzr send --format=git".
510
    elif kind == 'symlink':
699.1.1 by INADA Naoki
Handle executable mode for symlink.
511
        mode = stat.S_IFLNK
512
        if executable:
703 by Jelmer Vernooij
Merge support for executable symlinks.
513
            mode |= 0111
699.1.1 by INADA Naoki
Handle executable mode for symlink.
514
        return mode
527.1.6 by Jelmer Vernooij
Support sending git am-style patches with "bzr send --format=git".
515
    elif kind == 'file':
359 by Jelmer Vernooij
Simplify file mode handling, avoid inventory_to_tree_and_blobs as it is expensive if trees/blobs have already been converted.
516
        mode = stat.S_IFREG | 0644
527.1.6 by Jelmer Vernooij
Support sending git am-style patches with "bzr send --format=git".
517
        if executable:
359 by Jelmer Vernooij
Simplify file mode handling, avoid inventory_to_tree_and_blobs as it is expensive if trees/blobs have already been converted.
518
            mode |= 0111
519
        return mode
665 by Jelmer Vernooij
Add more checks for submodules.
520
    elif kind == 'tree-reference':
521
        from dulwich.objects import S_IFGITLINK
522
        return S_IFGITLINK
359 by Jelmer Vernooij
Simplify file mode handling, avoid inventory_to_tree_and_blobs as it is expensive if trees/blobs have already been converted.
523
    else:
524
        raise AssertionError
525
526
527.1.6 by Jelmer Vernooij
Support sending git am-style patches with "bzr send --format=git".
527
def entry_mode(entry):
528
    """Determine the git file mode for an inventory entry."""
529
    return object_mode(entry.kind, entry.executable)
530
531
900.1.30 by Jelmer Vernooij
Support creating dummy files for empty directories.
532
def directory_to_tree(entry, lookup_ie_sha1, unusual_modes, empty_file_name):
533
    """Create a Git Tree object from a Bazaar directory.
534
535
    :param entry: Inventory entry
536
    :param lookup_ie_sha1: Lookup the Git SHA1 for a inventory entry
537
    :param unusual_modes: Dictionary with unusual file modes by file ids
538
    :param empty_file_name: Name to use for dummy files in empty directories,
539
        None to ignore empty directories.
540
    """
541
    from dulwich.objects import Blob, Tree
359 by Jelmer Vernooij
Simplify file mode handling, avoid inventory_to_tree_and_blobs as it is expensive if trees/blobs have already been converted.
542
    tree = Tree()
807 by Jelmer Vernooij
Fix test, remove unnecessary sort.
543
    for name, value in entry.children.iteritems():
359 by Jelmer Vernooij
Simplify file mode handling, avoid inventory_to_tree_and_blobs as it is expensive if trees/blobs have already been converted.
544
        ie = entry.children[name]
549 by Jelmer Vernooij
Fix storing of unusual file modes.
545
        try:
546
            mode = unusual_modes[ie.file_id]
547
        except KeyError:
548
            mode = entry_mode(ie)
808 by Jelmer Vernooij
Avoid recalculating tree shas we already have.
549
        hexsha = lookup_ie_sha1(ie)
589 by Jelmer Vernooij
Fix handling of empty trees.
550
        if hexsha is not None:
1152 by Jelmer Vernooij
Require dulwich 0.7.1.
551
            tree.add(name.encode("utf-8"), mode, hexsha)
589 by Jelmer Vernooij
Fix handling of empty trees.
552
    if entry.parent_id is not None and len(tree) == 0:
553
        # Only the root can be an empty tree
900.1.30 by Jelmer Vernooij
Support creating dummy files for empty directories.
554
        if empty_file_name is not None:
1152 by Jelmer Vernooij
Require dulwich 0.7.1.
555
            tree.add(empty_file_name, stat.S_IFREG | 0644, Blob().id)
900.1.30 by Jelmer Vernooij
Support creating dummy files for empty directories.
556
        else:
557
            return None
359 by Jelmer Vernooij
Simplify file mode handling, avoid inventory_to_tree_and_blobs as it is expensive if trees/blobs have already been converted.
558
    return tree
559
560
548 by Jelmer Vernooij
Extract unusual file modes from revision when reconstructing Trees.
561
def extract_unusual_modes(rev):
562
    try:
826 by Jelmer Vernooij
Fix some long lines.
563
        foreign_revid, mapping = mapping_registry.parse_revision_id(
564
            rev.revision_id)
548 by Jelmer Vernooij
Extract unusual file modes from revision when reconstructing Trees.
565
    except errors.InvalidRevisionId:
566
        return {}
567
    else:
568
        return mapping.export_unusual_file_modes(rev)
569
570
652 by Jelmer Vernooij
Split out git-svn-id parser as separate function, implement ForeignGit.serialize_foreign_revid.
571
def parse_git_svn_id(text):
572
    (head, uuid) = text.rsplit(" ", 1)
573
    (full_url, rev) = head.rsplit("@", 1)
653 by Jelmer Vernooij
Fix typo in git-svn-id parser, return revnum as integer.
574
    return (full_url, int(rev), uuid)
900.1.33 by Jelmer Vernooij
Fix file id map lookups.
575
576
577
class GitFileIdMap(object):
578
579
    def __init__(self, file_ids, mapping):
580
        self.file_ids = file_ids
581
        self.paths = None
582
        self.mapping = mapping
583
584
    def lookup_file_id(self, path):
984 by Jelmer Vernooij
Handle non-ascii characters in filenames.
585
        assert type(path) is str
900.1.33 by Jelmer Vernooij
Fix file id map lookups.
586
        try:
973 by Jelmer Vernooij
Add tests for generate_file_id.
587
            file_id = self.file_ids[path]
900.1.33 by Jelmer Vernooij
Fix file id map lookups.
588
        except KeyError:
973 by Jelmer Vernooij
Add tests for generate_file_id.
589
            file_id = self.mapping.generate_file_id(path)
590
        assert type(file_id) is str
591
        return file_id
900.1.33 by Jelmer Vernooij
Fix file id map lookups.
592
593
    def lookup_path(self, file_id):
594
        if self.paths is None:
595
            self.paths = {}
596
            for k, v in self.file_ids.iteritems():
597
                self.paths[v] = k
598
        try:
984 by Jelmer Vernooij
Handle non-ascii characters in filenames.
599
            path = self.paths[file_id]
900.1.33 by Jelmer Vernooij
Fix file id map lookups.
600
        except KeyError:
601
            return self.mapping.parse_file_id(file_id)
984 by Jelmer Vernooij
Handle non-ascii characters in filenames.
602
        else:
603
            assert type(path) is str
604
            return path