~launchpad-pqm/launchpad/devel

18228.2.1 by Colin Watson
Refactor CodeImportSourceDetails.fromCodeImport to CodeImportSourceDetails.fromCodeImportJob, to make it easier to issue macaroons in future.
1
# Copyright 2009-2016 Canonical Ltd.  This software is licensed under the
8687.15.17 by Karl Fogel
Add the copyright header block to the rest of the files under lib/lp/.
2
# GNU Affero General Public License version 3 (see the file LICENSE).
5670.3.1 by jml at canonical
Just the BazaarBranchStore
3
4
"""The code import worker. This imports code from foreign repositories."""
5
6
__metaclass__ = type
5670.2.9 by jml at canonical
Add support for storing foreign branches.
7
__all__ = [
8
    'BazaarBranchStore',
13756.4.1 by Jelmer Vernooij
Re-import bzr code imports.
9
    'BzrImportWorker',
7675.76.2 by Michael Hudson
small refactoring
10
    'BzrSvnImportWorker',
7675.85.2 by Jonathan Lange
Undo revision generated by step 2 of process.
11
    'CSCVSImportWorker',
13756.3.7 by Jelmer Vernooij
Use SafeBranchOpener, but with AcceptAnythingPolicy.
12
    'CodeImportBranchOpenPolicy',
6055.1.3 by Michael Hudson
re-type what i had in my imploded loom :(
13
    'CodeImportSourceDetails',
10224.12.9 by Michael Hudson
self-review begins
14
    'CodeImportWorkerExitCode',
5670.2.29 by jml at canonical
ForeignBranchStore -> ForeignTreeStore
15
    'ForeignTreeStore',
7675.76.2 by Michael Hudson
small refactoring
16
    'GitImportWorker',
5821.1.5 by Michael Hudson
a test. which does not work
17
    'ImportWorker',
18228.2.2 by Colin Watson
Refactor various bits of the code import worker to be less Bazaar-specific.
18
    'ToBzrImportWorker',
5670.2.9 by jml at canonical
Add support for storing foreign branches.
19
    'get_default_bazaar_branch_store',
7675.76.2 by Michael Hudson
small refactoring
20
    ]
5670.3.1 by jml at canonical
Just the BazaarBranchStore
21
22
18250.3.2 by Colin Watson
Fix the Git-to-Git import worker to synchronise HEAD if possible.
23
import io
5670.2.12 by jml at canonical
The worker itself! Finally. Still needs some cleanup.
24
import os
25
import shutil
18228.3.1 by Colin Watson
Add a Git-to-Git import worker.
26
import subprocess
27
from urlparse import (
28
    urlsplit,
29
    urlunsplit,
30
    )
5670.2.12 by jml at canonical
The worker itself! Finally. Still needs some cleanup.
31
14612.2.10 by William Grant
Fix some lp.codehosting imports to be first again. It loads bzr plugins :(
32
# FIRST Ensure correct plugins are loaded. Do not delete this comment or the
33
# line below this comment.
34
import lp.codehosting
35
11403.1.4 by Henning Eggers
Reformatted imports using format-imports script r32.
36
from bzrlib.branch import (
37
    Branch,
38
    InterBranch,
39
    )
40
from bzrlib.bzrdir import (
41
    BzrDir,
42
    BzrDirFormat,
43
    )
44
from bzrlib.errors import (
13168.11.1 by Jelmer Vernooij
Initial work on supporting FAILURE_INVALID and FAILURE_UNSUPPORTED_FEATURE return types from imports.
45
    ConnectionError,
13168.11.3 by Jelmer Vernooij
Test classification of unsupported features.
46
    InvalidEntryName,
13261.7.11 by Jelmer Vernooij
Format imports.
47
    NoRepositoryPresent,
11403.1.4 by Henning Eggers
Reformatted imports using format-imports script r32.
48
    NoSuchFile,
49
    NotBranchError,
13958.3.2 by Jelmer Vernooij
Follow redirects when opening branches.
50
    TooManyRedirections,
51
    )
52
from bzrlib.transport import (
14022.6.17 by Jelmer Vernooij
Reintroduce stacking support.
53
    get_transport_from_path,
54
    get_transport_from_url,
13958.3.2 by Jelmer Vernooij
Follow redirects when opening branches.
55
    )
13980.2.1 by Jeroen Vermeulen
Lint. Lots of lint.
56
import bzrlib.ui
13261.7.13 by Jelmer Vernooij
Update bzr-hg to newer version, fix tests.
57
from bzrlib.upgrade import upgrade
11403.1.4 by Henning Eggers
Reformatted imports using format-imports script r32.
58
from bzrlib.urlutils import (
59
    join as urljoin,
60
    local_path_from_url,
61
    )
62
import cscvs
63
from cscvs.cmds import totla
64
import CVS
18250.3.2 by Colin Watson
Fix the Git-to-Git import worker to synchronise HEAD if possible.
65
from dulwich.errors import GitProtocolError
66
from dulwich.protocol import (
67
    pkt_line,
68
    Protocol,
69
    )
13756.3.7 by Jelmer Vernooij
Use SafeBranchOpener, but with AcceptAnythingPolicy.
70
from lazr.uri import (
71
    InvalidURIError,
72
    URI,
73
    )
18228.3.1 by Colin Watson
Add a Git-to-Git import worker.
74
from pymacaroons import Macaroon
13811.1.1 by Jeroen Vermeulen
More lint.
75
import SCM
18228.3.1 by Colin Watson
Add a Git-to-Git import worker.
76
from zope.component import getUtility
77
from zope.security.proxy import removeSecurityProxy
13756.3.7 by Jelmer Vernooij
Use SafeBranchOpener, but with AcceptAnythingPolicy.
78
10224.12.9 by Michael Hudson
self-review begins
79
from lp.code.enums import RevisionControlSystems
18228.3.1 by Colin Watson
Add a Git-to-Git import worker.
80
from lp.code.interfaces.branch import (
81
    get_blacklisted_hostnames,
82
    IBranch,
83
    )
14022.6.17 by Jelmer Vernooij
Reintroduce stacking support.
84
from lp.code.interfaces.codehosting import (
85
    branch_id_alias,
86
    compose_public_url,
87
    )
16927.1.1 by William Grant
Kill off CSCVS Subversion imports in favour of bzr-svn.
88
from lp.codehosting.codeimport.foreigntree import CVSWorkingTree
13811.1.1 by Jeroen Vermeulen
More lint.
89
from lp.codehosting.codeimport.tarball import (
90
    create_tarball,
91
    extract_tarball,
92
    )
93
from lp.codehosting.codeimport.uifactory import LoggingUIFactory
13756.3.7 by Jelmer Vernooij
Use SafeBranchOpener, but with AcceptAnythingPolicy.
94
from lp.codehosting.safe_open import (
95
    BadUrl,
96
    BranchOpenPolicy,
97
    SafeBranchOpener,
98
    )
14612.2.1 by William Grant
format-imports on lib/. So many imports.
99
from lp.services.config import config
18228.3.1 by Colin Watson
Add a Git-to-Git import worker.
100
from lp.services.macaroons.interfaces import IMacaroonIssuer
11382.6.34 by Gavin Panella
Reformat imports in all files touched so far.
101
from lp.services.propertycache import cachedproperty
18250.3.2 by Colin Watson
Fix the Git-to-Git import worker to synchronise HEAD if possible.
102
from lp.services.timeout import urlfetch
18228.3.4 by Colin Watson
Refactor common URL sanitisation code out to lp.services.utils.sanitise_urls.
103
from lp.services.utils import sanitise_urls
5670.3.1 by jml at canonical
Just the BazaarBranchStore
104
105
13756.3.7 by Jelmer Vernooij
Use SafeBranchOpener, but with AcceptAnythingPolicy.
106
class CodeImportBranchOpenPolicy(BranchOpenPolicy):
107
    """Branch open policy for code imports.
108
109
    In summary:
110
     - follow references,
18228.3.1 by Colin Watson
Add a Git-to-Git import worker.
111
     - only open non-Launchpad URLs for imports from Bazaar to Bazaar or
112
       from Git to Git
13756.3.7 by Jelmer Vernooij
Use SafeBranchOpener, but with AcceptAnythingPolicy.
113
     - only open the allowed schemes
114
    """
115
14874.1.1 by Jelmer Vernooij
Support the 'bzr://' URL scheme for code imports.
116
    allowed_schemes = ['http', 'https', 'svn', 'git', 'ftp', 'bzr']
13756.3.7 by Jelmer Vernooij
Use SafeBranchOpener, but with AcceptAnythingPolicy.
117
18228.3.1 by Colin Watson
Add a Git-to-Git import worker.
118
    def __init__(self, rcstype, target_rcstype):
17557.2.1 by Colin Watson
Allow code imports from Launchpad Git to Launchpad Bazaar.
119
        self.rcstype = rcstype
18228.3.1 by Colin Watson
Add a Git-to-Git import worker.
120
        self.target_rcstype = target_rcstype
17557.2.1 by Colin Watson
Allow code imports from Launchpad Git to Launchpad Bazaar.
121
13756.3.7 by Jelmer Vernooij
Use SafeBranchOpener, but with AcceptAnythingPolicy.
122
    def shouldFollowReferences(self):
123
        """See `BranchOpenPolicy.shouldFollowReferences`.
124
125
        We traverse branch references for MIRRORED branches because they
126
        provide a useful redirection mechanism and we want to be consistent
127
        with the bzr command line.
128
        """
129
        return True
130
131
    def transformFallbackLocation(self, branch, url):
132
        """See `BranchOpenPolicy.transformFallbackLocation`.
133
134
        For mirrored branches, we stack on whatever the remote branch claims
135
        to stack on, but this URL still needs to be checked.
136
        """
137
        return urljoin(branch.base, url), True
138
139
    def checkOneURL(self, url):
140
        """See `BranchOpenPolicy.checkOneURL`.
141
17557.2.1 by Colin Watson
Allow code imports from Launchpad Git to Launchpad Bazaar.
142
        We refuse to mirror Bazaar branches from Launchpad, or any branches
143
        from a ssh-like or file URL.
13756.3.7 by Jelmer Vernooij
Use SafeBranchOpener, but with AcceptAnythingPolicy.
144
        """
145
        try:
146
            uri = URI(url)
147
        except InvalidURIError:
148
            raise BadUrl(url)
18228.3.1 by Colin Watson
Add a Git-to-Git import worker.
149
        if self.rcstype == self.target_rcstype:
17557.2.1 by Colin Watson
Allow code imports from Launchpad Git to Launchpad Bazaar.
150
            launchpad_domain = config.vhost.mainsite.hostname
151
            if uri.underDomain(launchpad_domain):
152
                raise BadUrl(url)
13756.3.7 by Jelmer Vernooij
Use SafeBranchOpener, but with AcceptAnythingPolicy.
153
        for hostname in get_blacklisted_hostnames():
154
            if uri.underDomain(hostname):
155
                raise BadUrl(url)
156
        if uri.scheme not in self.allowed_schemes:
157
            raise BadUrl(url)
158
159
10224.12.9 by Michael Hudson
self-review begins
160
class CodeImportWorkerExitCode:
161
    """Exit codes used by the code import worker script."""
162
163
    SUCCESS = 0
164
    FAILURE = 1
165
    SUCCESS_NOCHANGE = 2
10271.2.1 by Michael Hudson
well, this passes existing tests...
166
    SUCCESS_PARTIAL = 3
13168.11.1 by Jelmer Vernooij
Initial work on supporting FAILURE_INVALID and FAILURE_UNSUPPORTED_FEATURE return types from imports.
167
    FAILURE_INVALID = 4
168
    FAILURE_UNSUPPORTED_FEATURE = 5
13756.3.8 by Jelmer Vernooij
Test use of import policy.
169
    FAILURE_FORBIDDEN = 6
13785.3.1 by Jelmer Vernooij
Add new code import result state: remote branch broken.
170
    FAILURE_REMOTE_BROKEN = 7
10224.12.9 by Michael Hudson
self-review begins
171
172
5670.3.1 by jml at canonical
Just the BazaarBranchStore
173
class BazaarBranchStore:
174
    """A place where Bazaar branches of code imports are kept."""
175
176
    def __init__(self, transport):
177
        """Construct a Bazaar branch store based at `transport`."""
178
        self.transport = transport
179
6015.2.1 by Michael Hudson
redo the branch store so it doesn't talk to the db
180
    def _getMirrorURL(self, db_branch_id):
5670.3.6 by jml at canonical
Rewrite gettarget in terms of the BazaarBranchStore.
181
        """Return the URL that `db_branch` is stored at."""
6015.2.1 by Michael Hudson
redo the branch store so it doesn't talk to the db
182
        return urljoin(self.transport.base, '%08x' % db_branch_id)
5670.3.1 by jml at canonical
Just the BazaarBranchStore
183
10677.1.1 by Michael Hudson
don't create a working tree for code imports if it's not needed
184
    def pull(self, db_branch_id, target_path, required_format,
14022.6.17 by Jelmer Vernooij
Reintroduce stacking support.
185
             needs_tree=False, stacked_on_url=None):
10677.1.1 by Michael Hudson
don't create a working tree for code imports if it's not needed
186
        """Pull down the Bazaar branch of an import to `target_path`.
5670.3.1 by jml at canonical
Just the BazaarBranchStore
187
10677.1.1 by Michael Hudson
don't create a working tree for code imports if it's not needed
188
        :return: A Bazaar branch for the code import corresponding to the
189
            database branch with id `db_branch_id`.
5670.3.1 by jml at canonical
Just the BazaarBranchStore
190
        """
7873.1.6 by Michael Hudson
woo it works
191
        remote_url = self._getMirrorURL(db_branch_id)
5670.3.1 by jml at canonical
Just the BazaarBranchStore
192
        try:
10377.1.1 by Michael Hudson
this probably fixes the failure
193
            remote_bzr_dir = BzrDir.open(remote_url)
5670.3.1 by jml at canonical
Just the BazaarBranchStore
194
        except NotBranchError:
10677.1.1 by Michael Hudson
don't create a working tree for code imports if it's not needed
195
            local_branch = BzrDir.create_branch_and_repo(
196
                target_path, format=required_format)
197
            if needs_tree:
198
                local_branch.bzrdir.create_workingtree()
14022.6.17 by Jelmer Vernooij
Reintroduce stacking support.
199
            if stacked_on_url:
200
                local_branch.set_stacked_on_url(stacked_on_url)
10677.1.1 by Michael Hudson
don't create a working tree for code imports if it's not needed
201
            return local_branch
10686.1.1 by Michael Hudson
be stupid and fast not smart and slow in BazaarBranchStore.pull
202
        # The proper thing to do here would be to call
203
        # "remote_bzr_dir.sprout()".  But 2a fetch slowly checks which
204
        # revisions are in the ancestry of the tip of the remote branch, which
205
        # we strictly don't care about, so we just copy the whole thing down
206
        # at the vfs level.
10792.1.2 by Michael Hudson
rob approved version
207
        control_dir = remote_bzr_dir.root_transport.relpath(
208
            remote_bzr_dir.transport.abspath('.'))
14022.6.17 by Jelmer Vernooij
Reintroduce stacking support.
209
        target = get_transport_from_path(target_path)
10792.1.2 by Michael Hudson
rob approved version
210
        target_control = target.clone(control_dir)
211
        target_control.create_prefix()
212
        remote_bzr_dir.transport.copy_tree_to_transport(target_control)
10686.1.1 by Michael Hudson
be stupid and fast not smart and slow in BazaarBranchStore.pull
213
        local_bzr_dir = BzrDir.open_from_transport(target)
13457.6.8 by Jelmer Vernooij
Start autoupgrading again.
214
        if local_bzr_dir.needs_format_conversion(format=required_format):
215
            try:
216
                local_bzr_dir.root_transport.delete_tree('backup.bzr')
217
            except NoSuchFile:
218
                pass
219
            upgrade(target_path, required_format, clean_up=True)
10686.1.1 by Michael Hudson
be stupid and fast not smart and slow in BazaarBranchStore.pull
220
        if needs_tree:
221
            local_bzr_dir.create_workingtree()
222
        return local_bzr_dir.open_branch()
5670.3.1 by jml at canonical
Just the BazaarBranchStore
223
14022.6.17 by Jelmer Vernooij
Reintroduce stacking support.
224
    def push(self, db_branch_id, bzr_branch, required_format,
225
             stacked_on_url=None):
10677.1.1 by Michael Hudson
don't create a working tree for code imports if it's not needed
226
        """Push up `bzr_branch` as the Bazaar branch for `code_import`.
10224.12.11 by Michael Hudson
some restructuring
227
228
        :return: A boolean that is true if the push was non-trivial
229
            (i.e. actually transferred revisions).
230
        """
10081.1.3 by Michael Hudson
remove lp.codehosting.bzrutils.ensure_base
231
        self.transport.create_prefix()
6015.2.1 by Michael Hudson
redo the branch store so it doesn't talk to the db
232
        target_url = self._getMirrorURL(db_branch_id)
5670.3.1 by jml at canonical
Just the BazaarBranchStore
233
        try:
10677.1.1 by Michael Hudson
don't create a working tree for code imports if it's not needed
234
            remote_branch = Branch.open(target_url)
5670.3.1 by jml at canonical
Just the BazaarBranchStore
235
        except NotBranchError:
10677.1.1 by Michael Hudson
don't create a working tree for code imports if it's not needed
236
            remote_branch = BzrDir.create_branch_and_repo(
7675.85.2 by Jonathan Lange
Undo revision generated by step 2 of process.
237
                target_url, format=required_format)
13168.9.3 by Jelmer Vernooij
Keep old branch around as long as possible.
238
            old_branch = None
13168.9.2 by Jelmer Vernooij
Upgrade import branches.
239
        else:
240
            if remote_branch.bzrdir.needs_format_conversion(
241
                    required_format):
13168.9.4 by Jelmer Vernooij
Add some comments, extra test.
242
                # For upgrades, push to a new branch in
243
                # the new format. When done pushing,
244
                # retire the old .bzr directory and rename
245
                # the new one in place.
13168.9.3 by Jelmer Vernooij
Keep old branch around as long as possible.
246
                old_branch = remote_branch
13168.9.6 by Jelmer Vernooij
Use allowed backup directory names when upgrading code imports remotely.
247
                upgrade_url = urljoin(target_url, "backup.bzr")
13168.9.3 by Jelmer Vernooij
Keep old branch around as long as possible.
248
                try:
249
                    remote_branch.bzrdir.root_transport.delete_tree(
13168.9.6 by Jelmer Vernooij
Use allowed backup directory names when upgrading code imports remotely.
250
                        'backup.bzr')
13168.9.3 by Jelmer Vernooij
Keep old branch around as long as possible.
251
                except NoSuchFile:
252
                    pass
13168.9.2 by Jelmer Vernooij
Upgrade import branches.
253
                remote_branch = BzrDir.create_branch_and_repo(
13168.9.3 by Jelmer Vernooij
Keep old branch around as long as possible.
254
                    upgrade_url, format=required_format)
255
            else:
256
                old_branch = None
14291.1.2 by Jeroen Vermeulen
Lint.
257
        # This can be done safely, since only modern formats are used to
258
        # import to.
14022.6.17 by Jelmer Vernooij
Reintroduce stacking support.
259
        if stacked_on_url is not None:
260
            remote_branch.set_stacked_on_url(stacked_on_url)
10677.1.1 by Michael Hudson
don't create a working tree for code imports if it's not needed
261
        pull_result = remote_branch.pull(bzr_branch, overwrite=True)
10377.1.1 by Michael Hudson
this probably fixes the failure
262
        # Because of the way we do incremental imports, there may be revisions
263
        # in the branch's repo that are not in the ancestry of the branch tip.
264
        # We need to transfer them too.
10677.1.1 by Michael Hudson
don't create a working tree for code imports if it's not needed
265
        remote_branch.repository.fetch(bzr_branch.repository)
13168.9.3 by Jelmer Vernooij
Keep old branch around as long as possible.
266
        if old_branch is not None:
13168.9.4 by Jelmer Vernooij
Add some comments, extra test.
267
            # The format has changed; move the new format
268
            # branch in place.
13168.9.3 by Jelmer Vernooij
Keep old branch around as long as possible.
269
            base_transport = old_branch.bzrdir.root_transport
13168.9.6 by Jelmer Vernooij
Use allowed backup directory names when upgrading code imports remotely.
270
            base_transport.delete_tree('.bzr')
271
            base_transport.rename("backup.bzr/.bzr", ".bzr")
13168.9.7 by Jelmer Vernooij
Fix test.
272
            base_transport.rmdir("backup.bzr")
10224.12.11 by Michael Hudson
some restructuring
273
        return pull_result.old_revid != pull_result.new_revid
5670.3.1 by jml at canonical
Just the BazaarBranchStore
274
275
276
def get_default_bazaar_branch_store():
277
    """Return the default `BazaarBranchStore`."""
278
    return BazaarBranchStore(
14022.6.17 by Jelmer Vernooij
Reintroduce stacking support.
279
        get_transport_from_url(config.codeimport.bazaar_branch_store))
5670.2.9 by jml at canonical
Add support for storing foreign branches.
280
281
6015.2.4 by Michael Hudson
docstrings and some renaming
282
class CodeImportSourceDetails:
283
    """The information needed to process an import.
284
285
    As the worker doesn't talk to the database, we don't use
286
    `CodeImport` objects for this.
287
288
    The 'fromArguments' and 'asArguments' methods convert to and from a form
289
    of the information suitable for passing around on executables' command
290
    lines.
291
18228.3.1 by Colin Watson
Add a Git-to-Git import worker.
292
    :ivar target_id: The id of the Bazaar branch or the path of the Git
293
        repository associated with this code import, used for locating the
294
        existing import and the foreign tree.
16927.1.1 by William Grant
Kill off CSCVS Subversion imports in favour of bzr-svn.
295
    :ivar rcstype: 'cvs', 'git', 'bzr-svn', 'bzr' as appropriate.
18228.3.1 by Colin Watson
Add a Git-to-Git import worker.
296
    :ivar target_rcstype: 'bzr' or 'git' as appropriate.
16927.1.1 by William Grant
Kill off CSCVS Subversion imports in favour of bzr-svn.
297
    :ivar url: The branch URL if rcstype in ['bzr-svn', 'git', 'bzr'], None
298
        otherwise.
6015.2.4 by Michael Hudson
docstrings and some renaming
299
    :ivar cvs_root: The $CVSROOT if rcstype == 'cvs', None otherwise.
300
    :ivar cvs_module: The CVS module if rcstype == 'cvs', None otherwise.
18228.2.2 by Colin Watson
Refactor various bits of the code import worker to be less Bazaar-specific.
301
    :ivar stacked_on_url: The URL of the branch that the associated branch
302
        is stacked on, if any.
18228.3.1 by Colin Watson
Add a Git-to-Git import worker.
303
    :ivar macaroon: A macaroon granting authority to push to the target
304
        repository if target_rcstype == 'git', None otherwise.
10129.6.22 by Tim Penhey
Docstring fixes.
305
    """
6015.2.2 by Michael Hudson
move foreign tree away from database objects
306
18228.3.1 by Colin Watson
Add a Git-to-Git import worker.
307
    def __init__(self, target_id, rcstype, target_rcstype, url=None,
308
                 cvs_root=None, cvs_module=None, stacked_on_url=None,
309
                 macaroon=None):
18228.2.2 by Colin Watson
Refactor various bits of the code import worker to be less Bazaar-specific.
310
        self.target_id = target_id
6015.2.2 by Michael Hudson
move foreign tree away from database objects
311
        self.rcstype = rcstype
18228.3.1 by Colin Watson
Add a Git-to-Git import worker.
312
        self.target_rcstype = target_rcstype
10129.6.8 by Tim Penhey
More url cleanup.
313
        self.url = url
6015.2.2 by Michael Hudson
move foreign tree away from database objects
314
        self.cvs_root = cvs_root
315
        self.cvs_module = cvs_module
14022.6.17 by Jelmer Vernooij
Reintroduce stacking support.
316
        self.stacked_on_url = stacked_on_url
18228.3.1 by Colin Watson
Add a Git-to-Git import worker.
317
        self.macaroon = macaroon
6015.2.2 by Michael Hudson
move foreign tree away from database objects
318
319
    @classmethod
320
    def fromArguments(cls, arguments):
6015.2.4 by Michael Hudson
docstrings and some renaming
321
        """Convert command line-style arguments to an instance."""
18228.3.1 by Colin Watson
Add a Git-to-Git import worker.
322
        target_id = arguments.pop(0)
6305.1.1 by Michael Hudson
the required changes, probably, together with adaptations for existing tests.
323
        rcstype = arguments.pop(0)
18228.3.3 by Colin Watson
Add various comments based on review feedback.
324
        # XXX cjwatson 2016-10-12: Remove compatibility code once the
325
        # scheduler always passes both source and target types.
18228.3.1 by Colin Watson
Add a Git-to-Git import worker.
326
        if ':' in rcstype:
327
            rcstype, target_rcstype = rcstype.split(':', 1)
328
        else:
329
            target_rcstype = 'bzr'
16927.1.1 by William Grant
Kill off CSCVS Subversion imports in favour of bzr-svn.
330
        if rcstype in ['bzr-svn', 'git', 'bzr']:
14022.6.17 by Jelmer Vernooij
Reintroduce stacking support.
331
            url = arguments.pop(0)
18228.3.1 by Colin Watson
Add a Git-to-Git import worker.
332
            if target_rcstype == 'bzr':
333
                try:
334
                    stacked_on_url = arguments.pop(0)
335
                except IndexError:
336
                    stacked_on_url = None
337
            else:
14022.6.17 by Jelmer Vernooij
Reintroduce stacking support.
338
                stacked_on_url = None
10129.6.8 by Tim Penhey
More url cleanup.
339
            cvs_root = cvs_module = None
6015.2.2 by Michael Hudson
move foreign tree away from database objects
340
        elif rcstype == 'cvs':
14022.6.2 by Jelmer Vernooij
Inherit stacked_on_url.
341
            url = None
14022.6.17 by Jelmer Vernooij
Reintroduce stacking support.
342
            stacked_on_url = None
6305.1.1 by Michael Hudson
the required changes, probably, together with adaptations for existing tests.
343
            [cvs_root, cvs_module] = arguments
6015.2.2 by Michael Hudson
move foreign tree away from database objects
344
        else:
6015.2.3 by Michael Hudson
move the rest of the worker gubbins over to the non-db style
345
            raise AssertionError("Unknown rcstype %r." % rcstype)
18228.3.1 by Colin Watson
Add a Git-to-Git import worker.
346
        if target_rcstype == 'bzr':
347
            target_id = int(target_id)
348
            macaroon = None
349
        elif target_rcstype == 'git':
350
            macaroon = Macaroon.deserialize(arguments.pop(0))
351
        else:
352
            raise AssertionError("Unknown target_rcstype %r." % target_rcstype)
14022.6.17 by Jelmer Vernooij
Reintroduce stacking support.
353
        return cls(
18228.3.1 by Colin Watson
Add a Git-to-Git import worker.
354
            target_id, rcstype, target_rcstype, url, cvs_root, cvs_module,
355
            stacked_on_url, macaroon)
6015.2.2 by Michael Hudson
move foreign tree away from database objects
356
6055.1.2 by Michael Hudson
rescue stuff from my imploded loom
357
    @classmethod
18228.2.1 by Colin Watson
Refactor CodeImportSourceDetails.fromCodeImport to CodeImportSourceDetails.fromCodeImportJob, to make it easier to issue macaroons in future.
358
    def fromCodeImportJob(cls, job):
359
        """Convert a `CodeImportJob` to an instance."""
360
        code_import = job.code_import
18228.2.2 by Colin Watson
Refactor various bits of the code import worker to be less Bazaar-specific.
361
        target = code_import.target
18228.3.1 by Colin Watson
Add a Git-to-Git import worker.
362
        if IBranch.providedBy(target):
363
            if target.stacked_on is not None and not target.stacked_on.private:
364
                stacked_path = branch_id_alias(target.stacked_on)
365
                stacked_on_url = compose_public_url('http', stacked_path)
366
            else:
367
                stacked_on_url = None
368
            target_id = target.id
14022.6.17 by Jelmer Vernooij
Reintroduce stacking support.
369
        else:
18228.3.3 by Colin Watson
Add various comments based on review feedback.
370
            # We don't have a better way to identify the target repository
371
            # than the mutable unique name, but the macaroon constrains
372
            # pushes tightly enough that the worst case is an authentication
373
            # failure.
18228.3.1 by Colin Watson
Add a Git-to-Git import worker.
374
            target_id = target.unique_name
16927.1.1 by William Grant
Kill off CSCVS Subversion imports in favour of bzr-svn.
375
        if code_import.rcs_type == RevisionControlSystems.BZR_SVN:
14022.6.17 by Jelmer Vernooij
Reintroduce stacking support.
376
            return cls(
18228.3.1 by Colin Watson
Add a Git-to-Git import worker.
377
                target_id, 'bzr-svn', 'bzr', str(code_import.url),
14022.6.17 by Jelmer Vernooij
Reintroduce stacking support.
378
                stacked_on_url=stacked_on_url)
6055.1.2 by Michael Hudson
rescue stuff from my imploded loom
379
        elif code_import.rcs_type == RevisionControlSystems.CVS:
10129.6.8 by Tim Penhey
More url cleanup.
380
            return cls(
18228.3.1 by Colin Watson
Add a Git-to-Git import worker.
381
                target_id, 'cvs', 'bzr',
10129.6.8 by Tim Penhey
More url cleanup.
382
                cvs_root=str(code_import.cvs_root),
383
                cvs_module=str(code_import.cvs_module))
7675.85.2 by Jonathan Lange
Undo revision generated by step 2 of process.
384
        elif code_import.rcs_type == RevisionControlSystems.GIT:
18228.3.1 by Colin Watson
Add a Git-to-Git import worker.
385
            if IBranch.providedBy(target):
386
                return cls(
387
                    target_id, 'git', 'bzr', str(code_import.url),
388
                    stacked_on_url=stacked_on_url)
389
            else:
390
                issuer = getUtility(IMacaroonIssuer, 'code-import-job')
391
                macaroon = removeSecurityProxy(issuer).issueMacaroon(job)
392
                return cls(
393
                    target_id, 'git', 'git', str(code_import.url),
394
                    macaroon=macaroon)
13756.4.1 by Jelmer Vernooij
Re-import bzr code imports.
395
        elif code_import.rcs_type == RevisionControlSystems.BZR:
14022.6.17 by Jelmer Vernooij
Reintroduce stacking support.
396
            return cls(
18228.3.1 by Colin Watson
Add a Git-to-Git import worker.
397
                target_id, 'bzr', 'bzr', str(code_import.url),
14022.6.17 by Jelmer Vernooij
Reintroduce stacking support.
398
                stacked_on_url=stacked_on_url)
6055.1.2 by Michael Hudson
rescue stuff from my imploded loom
399
        else:
9945.1.2 by Michael Hudson
worker monitor test for bzr-svn. depends on a hacked bzrlib
400
            raise AssertionError("Unknown rcstype %r." % code_import.rcs_type)
6055.1.2 by Michael Hudson
rescue stuff from my imploded loom
401
6015.2.3 by Michael Hudson
move the rest of the worker gubbins over to the non-db style
402
    def asArguments(self):
6015.2.4 by Michael Hudson
docstrings and some renaming
403
        """Return a list of arguments suitable for passing to a child process.
404
        """
18261.1.1 by Colin Watson
Always pass source and target RCS types to the code import worker, since it now always accepts the combined form.
405
        result = [
406
            str(self.target_id), '%s:%s' % (self.rcstype, self.target_rcstype)]
16927.1.1 by William Grant
Kill off CSCVS Subversion imports in favour of bzr-svn.
407
        if self.rcstype in ['bzr-svn', 'git', 'bzr']:
10129.6.8 by Tim Penhey
More url cleanup.
408
            result.append(self.url)
14022.6.17 by Jelmer Vernooij
Reintroduce stacking support.
409
            if self.stacked_on_url is not None:
410
                result.append(self.stacked_on_url)
6015.2.3 by Michael Hudson
move the rest of the worker gubbins over to the non-db style
411
        elif self.rcstype == 'cvs':
412
            result.append(self.cvs_root)
413
            result.append(self.cvs_module)
414
        else:
415
            raise AssertionError("Unknown rcstype %r." % self.rcstype)
18228.3.1 by Colin Watson
Add a Git-to-Git import worker.
416
        if self.target_rcstype == 'git':
18228.3.3 by Colin Watson
Add various comments based on review feedback.
417
            # XXX cjwatson 2016-10-12: Consider arranging for this to be
418
            # passed to worker processes in the environment instead.
18228.3.1 by Colin Watson
Add a Git-to-Git import worker.
419
            result.append(self.macaroon.serialize())
6015.2.3 by Michael Hudson
move the rest of the worker gubbins over to the non-db style
420
        return result
421
6015.2.2 by Michael Hudson
move foreign tree away from database objects
422
8615.12.4 by Michael Hudson
extract class that stores and retrieves branches from a transport from ForeignTreeStore
423
class ImportDataStore:
8615.12.5 by Michael Hudson
tests for ImportDataSource
424
    """A store for data associated with an import.
425
426
    Import workers can store and retreive files into and from the store using
427
    `put()` and `fetch()`.
428
429
    So this store can find files stored by previous versions of this code, the
430
    files are stored at ``<BRANCH ID IN HEX>.<EXT>`` where BRANCH ID comes
431
    from the CodeImportSourceDetails used to construct the instance and EXT
432
    comes from the local name passed to `put` or `fetch`.
433
    """
8615.12.4 by Michael Hudson
extract class that stores and retrieves branches from a transport from ForeignTreeStore
434
435
    def __init__(self, transport, source_details):
436
        """Initialize an `ImportDataStore`.
437
438
        :param transport: The transport files will be stored on.
439
        :param source_details: The `CodeImportSourceDetails` object, used to
440
            know where to store files on the remote transport.
441
        """
8615.12.11 by Michael Hudson
this interface finally makes sense, maybe
442
        self.source_details = source_details
8615.12.4 by Michael Hudson
extract class that stores and retrieves branches from a transport from ForeignTreeStore
443
        self._transport = transport
18228.2.2 by Colin Watson
Refactor various bits of the code import worker to be less Bazaar-specific.
444
        self._target_id = source_details.target_id
8615.12.4 by Michael Hudson
extract class that stores and retrieves branches from a transport from ForeignTreeStore
445
446
    def _getRemoteName(self, local_name):
447
        """Convert `local_name` to the name used to store a file.
448
449
        The algorithm is a little stupid for historical reasons: we chop off
450
        the extension and stick that on the end of the branch id from the
451
        source_details we were constructed with, in hex padded to 8
452
        characters.  For example 'tree.tar.gz' might become '0000a23d.tar.gz'
453
        or 'git.db' might become '00003e4.db'.
454
7675.76.8 by Michael Hudson
cleanups
455
        :param local_name: The local name of the file to be stored.
456
        :return: The name to store the file as on the remote transport.
8615.12.4 by Michael Hudson
extract class that stores and retrieves branches from a transport from ForeignTreeStore
457
        """
458
        if '/' in local_name:
459
            raise AssertionError("local_name must be a name, not a path")
460
        dot_index = local_name.index('.')
461
        if dot_index < 0:
462
            raise AssertionError("local_name must have an extension.")
463
        ext = local_name[dot_index:]
18228.2.2 by Colin Watson
Refactor various bits of the code import worker to be less Bazaar-specific.
464
        return '%08x%s' % (self._target_id, ext)
8615.12.4 by Michael Hudson
extract class that stores and retrieves branches from a transport from ForeignTreeStore
465
8615.12.14 by Michael Hudson
docstrings
466
    def fetch(self, filename, dest_transport=None):
467
        """Retrieve `filename` from the store.
468
469
        :param filename: The name of the file to retrieve (must be a filename,
470
            not a path).
8615.12.19 by Michael Hudson
review comments
471
        :param dest_transport: The transport to retrieve the file to,
14022.6.17 by Jelmer Vernooij
Reintroduce stacking support.
472
            defaulting to ``get_transport_from_path('.')``.
8615.12.14 by Michael Hudson
docstrings
473
        :return: A boolean, true if the file was found and retrieved, false
474
            otherwise.
475
        """
8615.12.6 by Michael Hudson
acutally use ImportDataStore to store/retrieve git.db
476
        if dest_transport is None:
14022.6.17 by Jelmer Vernooij
Reintroduce stacking support.
477
            dest_transport = get_transport_from_path('.')
8615.12.14 by Michael Hudson
docstrings
478
        remote_name = self._getRemoteName(filename)
8615.12.4 by Michael Hudson
extract class that stores and retrieves branches from a transport from ForeignTreeStore
479
        if self._transport.has(remote_name):
8615.12.18 by Michael Hudson
lint!
480
            dest_transport.put_file(
481
                filename, self._transport.get(remote_name))
8615.12.4 by Michael Hudson
extract class that stores and retrieves branches from a transport from ForeignTreeStore
482
            return True
483
        else:
484
            return False
485
8615.12.14 by Michael Hudson
docstrings
486
    def put(self, filename, source_transport=None):
487
        """Put `filename` into the store.
488
489
        :param filename: The name of the file to store (must be a filename,
490
            not a path).
491
        :param source_transport: The transport to look for the file on,
492
            defaulting to ``get_transport('.')``.
493
        """
8615.12.6 by Michael Hudson
acutally use ImportDataStore to store/retrieve git.db
494
        if source_transport is None:
14022.6.17 by Jelmer Vernooij
Reintroduce stacking support.
495
            source_transport = get_transport_from_path('.')
8615.12.14 by Michael Hudson
docstrings
496
        remote_name = self._getRemoteName(filename)
497
        local_file = source_transport.get(filename)
10081.1.3 by Michael Hudson
remove lp.codehosting.bzrutils.ensure_base
498
        self._transport.create_prefix()
8615.12.4 by Michael Hudson
extract class that stores and retrieves branches from a transport from ForeignTreeStore
499
        try:
500
            self._transport.put_file(remote_name, local_file)
501
        finally:
502
            local_file.close()
503
504
5670.2.29 by jml at canonical
ForeignBranchStore -> ForeignTreeStore
505
class ForeignTreeStore:
5670.2.30 by jml at canonical
foreign_branch_store -> foreign_tree_store
506
    """Manages retrieving and storing foreign working trees.
5670.2.9 by jml at canonical
Add support for storing foreign branches.
507
5670.2.30 by jml at canonical
foreign_branch_store -> foreign_tree_store
508
    The code import system stores tarballs of CVS and SVN working trees on
509
    another system. The tarballs are kept in predictable locations based on
6015.2.4 by Michael Hudson
docstrings and some renaming
510
    the ID of the branch associated to the `CodeImport`.
5670.2.9 by jml at canonical
Add support for storing foreign branches.
511
512
    The tarballs are all kept in one directory. The filename of a tarball is
6015.2.4 by Michael Hudson
docstrings and some renaming
513
    XXXXXXXX.tar.gz, where 'XXXXXXXX' is the ID of the `CodeImport`'s branch
514
    in hex.
5670.2.9 by jml at canonical
Add support for storing foreign branches.
515
    """
516
8615.12.11 by Michael Hudson
this interface finally makes sense, maybe
517
    def __init__(self, import_data_store):
5670.2.29 by jml at canonical
ForeignBranchStore -> ForeignTreeStore
518
        """Construct a `ForeignTreeStore`.
5670.2.9 by jml at canonical
Add support for storing foreign branches.
519
520
        :param transport: A writable transport that points to the base
521
            directory where the tarballs are stored.
522
        :ptype transport: `bzrlib.transport.Transport`.
523
        """
8615.12.11 by Michael Hudson
this interface finally makes sense, maybe
524
        self.import_data_store = import_data_store
5670.2.9 by jml at canonical
Add support for storing foreign branches.
525
8615.12.11 by Michael Hudson
this interface finally makes sense, maybe
526
    def _getForeignTree(self, target_path):
527
        """Return a foreign tree object for `target_path`."""
528
        source_details = self.import_data_store.source_details
16927.1.1 by William Grant
Kill off CSCVS Subversion imports in favour of bzr-svn.
529
        if source_details.rcstype == 'cvs':
5670.2.9 by jml at canonical
Add support for storing foreign branches.
530
            return CVSWorkingTree(
6015.2.2 by Michael Hudson
move foreign tree away from database objects
531
                source_details.cvs_root, source_details.cvs_module,
5670.2.9 by jml at canonical
Add support for storing foreign branches.
532
                target_path)
533
        else:
534
            raise AssertionError(
6015.2.2 by Michael Hudson
move foreign tree away from database objects
535
                "unknown RCS type: %r" % source_details.rcstype)
5670.2.9 by jml at canonical
Add support for storing foreign branches.
536
8615.12.11 by Michael Hudson
this interface finally makes sense, maybe
537
    def archive(self, foreign_tree):
5670.2.31 by jml at canonical
Terminology change: foreign branch -> foreign tree.
538
        """Archive the foreign tree."""
8615.12.4 by Michael Hudson
extract class that stores and retrieves branches from a transport from ForeignTreeStore
539
        local_name = 'foreign_tree.tar.gz'
540
        create_tarball(foreign_tree.local_path, 'foreign_tree.tar.gz')
8615.12.11 by Michael Hudson
this interface finally makes sense, maybe
541
        self.import_data_store.put(local_name)
5670.2.9 by jml at canonical
Add support for storing foreign branches.
542
8615.12.11 by Michael Hudson
this interface finally makes sense, maybe
543
    def fetch(self, target_path):
6015.2.4 by Michael Hudson
docstrings and some renaming
544
        """Fetch the foreign branch for `source_details` to `target_path`.
5670.2.9 by jml at canonical
Add support for storing foreign branches.
545
6015.2.4 by Michael Hudson
docstrings and some renaming
546
        If there is no tarball archived for `source_details`, then try to
5670.2.31 by jml at canonical
Terminology change: foreign branch -> foreign tree.
547
        download (i.e. checkout) the foreign tree from its source repository,
548
        generally on a third party server.
5670.2.9 by jml at canonical
Add support for storing foreign branches.
549
        """
550
        try:
8615.12.11 by Michael Hudson
this interface finally makes sense, maybe
551
            return self.fetchFromArchive(target_path)
5670.2.9 by jml at canonical
Add support for storing foreign branches.
552
        except NoSuchFile:
8615.12.11 by Michael Hudson
this interface finally makes sense, maybe
553
            return self.fetchFromSource(target_path)
5670.2.9 by jml at canonical
Add support for storing foreign branches.
554
8615.12.11 by Michael Hudson
this interface finally makes sense, maybe
555
    def fetchFromSource(self, target_path):
6015.2.4 by Michael Hudson
docstrings and some renaming
556
        """Fetch the foreign tree for `source_details` to `target_path`."""
8615.12.11 by Michael Hudson
this interface finally makes sense, maybe
557
        branch = self._getForeignTree(target_path)
5670.2.9 by jml at canonical
Add support for storing foreign branches.
558
        branch.checkout()
559
        return branch
560
8615.12.11 by Michael Hudson
this interface finally makes sense, maybe
561
    def fetchFromArchive(self, target_path):
6015.2.4 by Michael Hudson
docstrings and some renaming
562
        """Fetch the foreign tree for `source_details` from the archive."""
8615.12.4 by Michael Hudson
extract class that stores and retrieves branches from a transport from ForeignTreeStore
563
        local_name = 'foreign_tree.tar.gz'
8615.12.11 by Michael Hudson
this interface finally makes sense, maybe
564
        if not self.import_data_store.fetch(local_name):
8615.12.4 by Michael Hudson
extract class that stores and retrieves branches from a transport from ForeignTreeStore
565
            raise NoSuchFile(local_name)
566
        extract_tarball(local_name, target_path)
8615.12.11 by Michael Hudson
this interface finally makes sense, maybe
567
        tree = self._getForeignTree(target_path)
5670.2.9 by jml at canonical
Add support for storing foreign branches.
568
        tree.update()
569
        return tree
570
571
5670.2.12 by jml at canonical
The worker itself! Finally. Still needs some cleanup.
572
class ImportWorker:
573
    """Oversees the actual work of a code import."""
574
18228.2.2 by Colin Watson
Refactor various bits of the code import worker to be less Bazaar-specific.
575
    def __init__(self, source_details, logger, opener_policy):
5670.2.23 by jml at canonical
Mostly a bucket-load of docstrings. Also remove unused code.
576
        """Construct an `ImportWorker`.
577
6015.2.4 by Michael Hudson
docstrings and some renaming
578
        :param source_details: A `CodeImportSourceDetails` object.
5821.1.10 by Michael Hudson
comments from the review
579
        :param logger: A `Logger` to pass to cscvs.
13756.3.9 by Jelmer Vernooij
Require specifying branch opener policy.
580
        :param opener_policy: Policy object that decides what branches can
581
             be imported
5670.2.23 by jml at canonical
Mostly a bucket-load of docstrings. Also remove unused code.
582
        """
6015.2.3 by Michael Hudson
move the rest of the worker gubbins over to the non-db style
583
        self.source_details = source_details
5821.1.1 by Michael Hudson
make importworker logging less nuts
584
        self._logger = logger
13756.3.9 by Jelmer Vernooij
Require specifying branch opener policy.
585
        self._opener_policy = opener_policy
5670.2.12 by jml at canonical
The worker itself! Finally. Still needs some cleanup.
586
7675.85.2 by Jonathan Lange
Undo revision generated by step 2 of process.
587
    def getWorkingDirectory(self):
588
        """The directory we should change to and store all scratch files in.
589
        """
590
        base = config.codeimportworker.working_directory_root
18228.2.2 by Colin Watson
Refactor various bits of the code import worker to be less Bazaar-specific.
591
        dirname = 'worker-for-branch-%s' % self.source_details.target_id
7675.85.2 by Jonathan Lange
Undo revision generated by step 2 of process.
592
        return os.path.join(base, dirname)
593
594
    def run(self):
595
        """Run the code import job.
596
597
        This is the primary public interface to the `ImportWorker`. This
598
        method:
599
600
         1. Retrieves an up-to-date foreign tree to import.
601
         2. Gets the Bazaar branch to import into.
602
         3. Imports the foreign tree into the Bazaar branch. If we've
603
            already imported this before, we synchronize the imported Bazaar
604
            branch with the latest changes to the foreign tree.
605
         4. Publishes the newly-updated Bazaar branch, making it available to
606
            Launchpad users.
607
         5. Archives the foreign tree, so that we can update it quickly next
608
            time.
609
        """
610
        working_directory = self.getWorkingDirectory()
611
        if os.path.exists(working_directory):
612
            shutil.rmtree(working_directory)
613
        os.makedirs(working_directory)
7675.76.10 by Michael Hudson
clean up some old hacks. TestCVSImport.test_sync is broken though
614
        saved_pwd = os.getcwd()
7675.85.2 by Jonathan Lange
Undo revision generated by step 2 of process.
615
        os.chdir(working_directory)
616
        try:
10271.2.1 by Michael Hudson
well, this passes existing tests...
617
            return self._doImport()
7675.85.2 by Jonathan Lange
Undo revision generated by step 2 of process.
618
        finally:
619
            shutil.rmtree(working_directory)
7675.76.10 by Michael Hudson
clean up some old hacks. TestCVSImport.test_sync is broken though
620
            os.chdir(saved_pwd)
7675.85.2 by Jonathan Lange
Undo revision generated by step 2 of process.
621
622
    def _doImport(self):
10224.12.11 by Michael Hudson
some restructuring
623
        """Perform the import.
624
13168.11.1 by Jelmer Vernooij
Initial work on supporting FAILURE_INVALID and FAILURE_UNSUPPORTED_FEATURE return types from imports.
625
        :return: A CodeImportWorkerExitCode
10224.12.11 by Michael Hudson
some restructuring
626
        """
7675.85.2 by Jonathan Lange
Undo revision generated by step 2 of process.
627
        raise NotImplementedError()
628
629
18228.2.2 by Colin Watson
Refactor various bits of the code import worker to be less Bazaar-specific.
630
class ToBzrImportWorker(ImportWorker):
631
    """Oversees the actual work of a code import to Bazaar."""
632
633
    # Where the Bazaar working tree will be stored.
634
    BZR_BRANCH_PATH = 'bzr_branch'
635
636
    # Should `getBazaarBranch` create a working tree?
637
    needs_bzr_tree = True
638
639
    required_format = BzrDirFormat.get_default_format()
640
641
    def __init__(self, source_details, import_data_transport,
642
                 bazaar_branch_store, logger, opener_policy):
643
        """Construct a `ToBzrImportWorker`.
644
645
        :param source_details: A `CodeImportSourceDetails` object.
646
        :param bazaar_branch_store: A `BazaarBranchStore`. The import worker
647
            uses this to fetch and store the Bazaar branches that are created
648
            and updated during the import process.
649
        :param logger: A `Logger` to pass to cscvs.
650
        :param opener_policy: Policy object that decides what branches can
651
             be imported
652
        """
653
        super(ToBzrImportWorker, self).__init__(
654
            source_details, logger, opener_policy)
655
        self.bazaar_branch_store = bazaar_branch_store
656
        self.import_data_store = ImportDataStore(
657
            import_data_transport, self.source_details)
658
659
    def getBazaarBranch(self):
660
        """Return the Bazaar `Branch` that we are importing into."""
661
        if os.path.isdir(self.BZR_BRANCH_PATH):
662
            shutil.rmtree(self.BZR_BRANCH_PATH)
663
        return self.bazaar_branch_store.pull(
664
            self.source_details.target_id, self.BZR_BRANCH_PATH,
665
            self.required_format, self.needs_bzr_tree,
666
            stacked_on_url=self.source_details.stacked_on_url)
667
668
    def pushBazaarBranch(self, bazaar_branch):
669
        """Push the updated Bazaar branch to the server.
670
671
        :return: True if revisions were transferred.
672
        """
673
        return self.bazaar_branch_store.push(
674
            self.source_details.target_id, bazaar_branch,
675
            self.required_format,
676
            stacked_on_url=self.source_details.stacked_on_url)
677
678
679
class CSCVSImportWorker(ToBzrImportWorker):
7675.85.2 by Jonathan Lange
Undo revision generated by step 2 of process.
680
    """An ImportWorker for imports that use CSCVS.
681
682
    As well as invoking cscvs to do the import, this class also needs to
683
    manage a foreign working tree.
684
    """
685
686
    # Where the foreign working tree will be stored.
687
    FOREIGN_WORKING_TREE_PATH = 'foreign_working_tree'
688
8615.12.20 by Michael Hudson
sneaky test fix
689
    @cachedproperty
8615.12.15 by Michael Hudson
docstrings, some more clarity
690
    def foreign_tree_store(self):
691
        return ForeignTreeStore(self.import_data_store)
7675.72.5 by Michael Hudson
begin moving code from ImportWorker into CSCVSImportWorker
692
5670.2.32 by jml at canonical
_getForeignBranch -> _getForeignTree.
693
    def getForeignTree(self):
5670.2.23 by jml at canonical
Mostly a bucket-load of docstrings. Also remove unused code.
694
        """Return the foreign branch object that we are importing from.
695
16927.1.1 by William Grant
Kill off CSCVS Subversion imports in favour of bzr-svn.
696
        :return: A `CVSWorkingTree`.
5670.2.23 by jml at canonical
Mostly a bucket-load of docstrings. Also remove unused code.
697
        """
6290.2.1 by Michael Hudson
this seems to be it?
698
        if os.path.isdir(self.FOREIGN_WORKING_TREE_PATH):
699
            shutil.rmtree(self.FOREIGN_WORKING_TREE_PATH)
700
        os.mkdir(self.FOREIGN_WORKING_TREE_PATH)
8615.12.11 by Michael Hudson
this interface finally makes sense, maybe
701
        return self.foreign_tree_store.fetch(self.FOREIGN_WORKING_TREE_PATH)
5670.2.12 by jml at canonical
The worker itself! Finally. Still needs some cleanup.
702
10677.1.1 by Michael Hudson
don't create a working tree for code imports if it's not needed
703
    def importToBazaar(self, foreign_tree, bazaar_branch):
704
        """Actually import `foreign_tree` into `bazaar_branch`.
5670.2.23 by jml at canonical
Mostly a bucket-load of docstrings. Also remove unused code.
705
16927.1.1 by William Grant
Kill off CSCVS Subversion imports in favour of bzr-svn.
706
        :param foreign_tree: A `CVSWorkingTree`.
10677.1.1 by Michael Hudson
don't create a working tree for code imports if it's not needed
707
        :param bazaar_tree: A `bzrlib.branch.Branch`, which must have a
708
            colocated working tree.
5670.2.23 by jml at canonical
Mostly a bucket-load of docstrings. Also remove unused code.
709
        """
5670.2.31 by jml at canonical
Terminology change: foreign branch -> foreign tree.
710
        foreign_directory = foreign_tree.local_path
10677.1.1 by Michael Hudson
don't create a working tree for code imports if it's not needed
711
        bzr_directory = str(bazaar_branch.bzrdir.open_workingtree().basedir)
5670.2.23 by jml at canonical
Mostly a bucket-load of docstrings. Also remove unused code.
712
713
        scm_branch = SCM.branch(bzr_directory)
714
        last_commit = cscvs.findLastCscvsCommit(scm_branch)
715
716
        # If branch in `bazaar_tree` doesn't have any identifiable CSCVS
13194.2.1 by Gavin Panella
Change all uses of 'initialise' to 'initialize'.
717
        # revisions, CSCVS "initializes" the branch.
5670.2.12 by jml at canonical
The worker itself! Finally. Still needs some cleanup.
718
        if last_commit is None:
719
            self._runToBaz(
5670.2.23 by jml at canonical
Mostly a bucket-load of docstrings. Also remove unused code.
720
                foreign_directory, "-SI", "MAIN.1", bzr_directory)
5670.2.12 by jml at canonical
The worker itself! Finally. Still needs some cleanup.
721
5670.2.23 by jml at canonical
Mostly a bucket-load of docstrings. Also remove unused code.
722
        # Now we synchronise the branch, that is, import all new revisions
723
        # from the foreign branch into the Bazaar branch. If we've just
724
        # initialized the Bazaar branch, then this means we import *all*
725
        # revisions.
726
        last_commit = cscvs.findLastCscvsCommit(scm_branch)
5670.2.12 by jml at canonical
The worker itself! Finally. Still needs some cleanup.
727
        self._runToBaz(
5670.2.23 by jml at canonical
Mostly a bucket-load of docstrings. Also remove unused code.
728
            foreign_directory, "-SC", "%s::" % last_commit, bzr_directory)
5670.2.12 by jml at canonical
The worker itself! Finally. Still needs some cleanup.
729
730
    def _runToBaz(self, source_dir, flags, revisions, bazpath):
5670.2.23 by jml at canonical
Mostly a bucket-load of docstrings. Also remove unused code.
731
        """Actually run the CSCVS utility that imports revisions.
732
733
        :param source_dir: The directory containing the foreign working tree
734
            that we are importing from.
735
        :param flags: Flags to pass to `totla.totla`.
736
        :param revisions: The revisions to import.
737
        :param bazpath: The directory containing the Bazaar working tree that
738
            we are importing into.
739
        """
740
        # XXX: JonathanLange 2008-02-08: We need better documentation for
741
        # `flags` and `revisions`.
5670.2.12 by jml at canonical
The worker itself! Finally. Still needs some cleanup.
742
        config = CVS.Config(source_dir)
743
        config.args = ["--strict", "-b", bazpath,
744
                       flags, revisions, bazpath]
745
        totla.totla(config, self._logger, config.args, SCM.tree(source_dir))
746
7675.85.2 by Jonathan Lange
Undo revision generated by step 2 of process.
747
    def _doImport(self):
748
        foreign_tree = self.getForeignTree()
10677.1.1 by Michael Hudson
don't create a working tree for code imports if it's not needed
749
        bazaar_branch = self.getBazaarBranch()
750
        self.importToBazaar(foreign_tree, bazaar_branch)
751
        non_trivial = self.pushBazaarBranch(bazaar_branch)
8615.12.11 by Michael Hudson
this interface finally makes sense, maybe
752
        self.foreign_tree_store.archive(foreign_tree)
10271.2.1 by Michael Hudson
well, this passes existing tests...
753
        if non_trivial:
754
            return CodeImportWorkerExitCode.SUCCESS
755
        else:
756
            return CodeImportWorkerExitCode.SUCCESS_NOCHANGE
7675.85.2 by Jonathan Lange
Undo revision generated by step 2 of process.
757
758
18228.2.2 by Colin Watson
Refactor various bits of the code import worker to be less Bazaar-specific.
759
class PullingImportWorker(ToBzrImportWorker):
7675.76.2 by Michael Hudson
small refactoring
760
    """An import worker for imports that can be done by a bzr plugin.
761
13081.2.12 by Jelmer Vernooij
format_classes -> probers.
762
    Subclasses need to implement `probers`.
7675.76.2 by Michael Hudson
small refactoring
763
    """
7675.85.2 by Jonathan Lange
Undo revision generated by step 2 of process.
764
10677.1.1 by Michael Hudson
don't create a working tree for code imports if it's not needed
765
    needs_bzr_tree = False
766
13168.11.3 by Jelmer Vernooij
Test classification of unsupported features.
767
    @property
768
    def invalid_branch_exceptions(self):
769
        """Exceptions that indicate no (valid) remote branch is present."""
770
        raise NotImplementedError
13168.11.1 by Jelmer Vernooij
Initial work on supporting FAILURE_INVALID and FAILURE_UNSUPPORTED_FEATURE return types from imports.
771
13168.11.3 by Jelmer Vernooij
Test classification of unsupported features.
772
    @property
773
    def unsupported_feature_exceptions(self):
774
        """The exceptions to consider for unsupported features."""
775
        raise NotImplementedError
13168.11.1 by Jelmer Vernooij
Initial work on supporting FAILURE_INVALID and FAILURE_UNSUPPORTED_FEATURE return types from imports.
776
9949.2.4 by Michael Hudson
sort of works
777
    @property
13785.3.1 by Jelmer Vernooij
Add new code import result state: remote branch broken.
778
    def broken_remote_exceptions(self):
779
        """The exceptions to consider for broken remote branches."""
780
        raise NotImplementedError
781
782
    @property
13081.2.12 by Jelmer Vernooij
format_classes -> probers.
783
    def probers(self):
784
        """The probers that should be tried for this import."""
9949.2.4 by Michael Hudson
sort of works
785
        raise NotImplementedError
786
13081.2.13 by Jelmer Vernooij
Use new fetch(limit=) API.
787
    def getRevisionLimit(self):
788
        """Return maximum number of revisions to fetch (None for no limit).
10271.2.8 by Michael Hudson
docstring
789
        """
13081.2.13 by Jelmer Vernooij
Use new fetch(limit=) API.
790
        return None
10271.2.3 by Michael Hudson
finish off worker code, perhaps
791
7675.85.2 by Jonathan Lange
Undo revision generated by step 2 of process.
792
    def _doImport(self):
10677.1.3 by Michael Hudson
more logging, delete commented out mystery line
793
        self._logger.info("Starting job.")
8340.1.5 by Michael Hudson
* install LoggingUIFactory in worker
794
        saved_factory = bzrlib.ui.ui_factory
14517.1.3 by Jelmer Vernooij
Re-merge always loading of foreign plugins.
795
        opener = SafeBranchOpener(self._opener_policy, self.probers)
13457.6.15 by Jelmer Vernooij
Fix tests.
796
        bzrlib.ui.ui_factory = LoggingUIFactory(logger=self._logger)
8340.1.5 by Michael Hudson
* install LoggingUIFactory in worker
797
        try:
10677.1.2 by Michael Hudson
add some logging statements
798
            self._logger.info(
799
                "Getting exising bzr branch from central store.")
10677.1.1 by Michael Hudson
don't create a working tree for code imports if it's not needed
800
            bazaar_branch = self.getBazaarBranch()
13756.3.7 by Jelmer Vernooij
Use SafeBranchOpener, but with AcceptAnythingPolicy.
801
            try:
14517.1.3 by Jelmer Vernooij
Re-merge always loading of foreign plugins.
802
                remote_branch = opener.open(self.source_details.url)
13958.3.2 by Jelmer Vernooij
Follow redirects when opening branches.
803
            except TooManyRedirections:
804
                self._logger.info("Too many redirections.")
805
                return CodeImportWorkerExitCode.FAILURE_INVALID
13756.3.13 by Jelmer Vernooij
Simplify safe open handling.
806
            except NotBranchError:
13168.11.3 by Jelmer Vernooij
Test classification of unsupported features.
807
                self._logger.info("No branch found at remote location.")
808
                return CodeImportWorkerExitCode.FAILURE_INVALID
15523.4.1 by Colin Watson
Use new-style "except Exception as e" syntax rather than "except Exception, e".
809
            except BadUrl as e:
13756.3.7 by Jelmer Vernooij
Use SafeBranchOpener, but with AcceptAnythingPolicy.
810
                self._logger.info("Invalid URL: %s" % e)
13756.3.8 by Jelmer Vernooij
Test use of import policy.
811
                return CodeImportWorkerExitCode.FAILURE_FORBIDDEN
15523.4.1 by Colin Watson
Use new-style "except Exception as e" syntax rather than "except Exception, e".
812
            except ConnectionError as e:
7675.1226.1 by Jelmer Vernooij
Fix tests if there is no HTTP server running on localhost.
813
                self._logger.info("Unable to open remote branch: %s" % e)
814
                return CodeImportWorkerExitCode.FAILURE_INVALID
13168.11.1 by Jelmer Vernooij
Initial work on supporting FAILURE_INVALID and FAILURE_UNSUPPORTED_FEATURE return types from imports.
815
            try:
13756.3.23 by Jelmer Vernooij
Protect last_revision in try too.
816
                remote_branch_tip = remote_branch.last_revision()
817
                inter_branch = InterBranch.get(remote_branch, bazaar_branch)
818
                self._logger.info("Importing branch.")
13756.4.1 by Jelmer Vernooij
Re-import bzr code imports.
819
                revision_limit = self.getRevisionLimit()
14022.6.17 by Jelmer Vernooij
Reintroduce stacking support.
820
                inter_branch.fetch(limit=revision_limit)
13168.11.1 by Jelmer Vernooij
Initial work on supporting FAILURE_INVALID and FAILURE_UNSUPPORTED_FEATURE return types from imports.
821
                if bazaar_branch.repository.has_revision(remote_branch_tip):
822
                    pull_result = inter_branch.pull(overwrite=True)
823
                    if pull_result.old_revid != pull_result.new_revid:
824
                        result = CodeImportWorkerExitCode.SUCCESS
825
                    else:
826
                        result = CodeImportWorkerExitCode.SUCCESS_NOCHANGE
827
                else:
828
                    result = CodeImportWorkerExitCode.SUCCESS_PARTIAL
15523.4.1 by Colin Watson
Use new-style "except Exception as e" syntax rather than "except Exception, e".
829
            except Exception as e:
13168.11.1 by Jelmer Vernooij
Initial work on supporting FAILURE_INVALID and FAILURE_UNSUPPORTED_FEATURE return types from imports.
830
                if e.__class__ in self.unsupported_feature_exceptions:
831
                    self._logger.info(
13457.6.15 by Jelmer Vernooij
Fix tests.
832
                        "Unable to import branch because of limitations in "
833
                        "Bazaar.")
13168.11.1 by Jelmer Vernooij
Initial work on supporting FAILURE_INVALID and FAILURE_UNSUPPORTED_FEATURE return types from imports.
834
                    self._logger.info(str(e))
13457.6.15 by Jelmer Vernooij
Fix tests.
835
                    return (
836
                        CodeImportWorkerExitCode.FAILURE_UNSUPPORTED_FEATURE)
13168.11.1 by Jelmer Vernooij
Initial work on supporting FAILURE_INVALID and FAILURE_UNSUPPORTED_FEATURE return types from imports.
837
                elif e.__class__ in self.invalid_branch_exceptions:
13785.3.1 by Jelmer Vernooij
Add new code import result state: remote branch broken.
838
                    self._logger.info("Branch invalid: %s", str(e))
13168.11.3 by Jelmer Vernooij
Test classification of unsupported features.
839
                    return CodeImportWorkerExitCode.FAILURE_INVALID
13785.3.1 by Jelmer Vernooij
Add new code import result state: remote branch broken.
840
                elif e.__class__ in self.broken_remote_exceptions:
841
                    self._logger.info("Remote branch broken: %s", str(e))
842
                    return CodeImportWorkerExitCode.FAILURE_REMOTE_BROKEN
13168.11.1 by Jelmer Vernooij
Initial work on supporting FAILURE_INVALID and FAILURE_UNSUPPORTED_FEATURE return types from imports.
843
                else:
844
                    raise
13081.1.1 by Jelmer Vernooij
Rename foreign_branch to remote_branch.
845
            self._logger.info("Pushing local import branch to central store.")
10677.1.1 by Michael Hudson
don't create a working tree for code imports if it's not needed
846
            self.pushBazaarBranch(bazaar_branch)
10677.1.3 by Michael Hudson
more logging, delete commented out mystery line
847
            self._logger.info("Job complete.")
13081.2.13 by Jelmer Vernooij
Use new fetch(limit=) API.
848
            return result
8340.1.5 by Michael Hudson
* install LoggingUIFactory in worker
849
        finally:
850
            bzrlib.ui.ui_factory = saved_factory
7675.76.2 by Michael Hudson
small refactoring
851
852
853
class GitImportWorker(PullingImportWorker):
8615.12.15 by Michael Hudson
docstrings, some more clarity
854
    """An import worker for Git imports.
855
856
    The only behaviour we add is preserving the 'git.db' shamap between runs.
857
    """
8615.12.1 by Michael Hudson
basically noop changes that provide a place to hang the changes i want to make.
858
7675.76.4 by Michael Hudson
merge lots of trunk, fixing conflicts
859
    @property
13168.11.3 by Jelmer Vernooij
Test classification of unsupported features.
860
    def invalid_branch_exceptions(self):
861
        return [
862
            NoRepositoryPresent,
863
            NotBranchError,
864
            ConnectionError,
865
        ]
866
867
    @property
868
    def unsupported_feature_exceptions(self):
869
        from bzrlib.plugins.git.fetch import SubmodulesRequireSubtrees
870
        return [
871
            InvalidEntryName,
872
            SubmodulesRequireSubtrees,
873
        ]
874
875
    @property
13785.3.1 by Jelmer Vernooij
Add new code import result state: remote branch broken.
876
    def broken_remote_exceptions(self):
877
        return []
878
879
    @property
13081.2.12 by Jelmer Vernooij
format_classes -> probers.
880
    def probers(self):
881
        """See `PullingImportWorker.probers`."""
9949.2.5 by Michael Hudson
support multiple formats to keep git testable
882
        from bzrlib.plugins.git import (
13081.2.12 by Jelmer Vernooij
format_classes -> probers.
883
            LocalGitProber, RemoteGitProber)
884
        return [LocalGitProber, RemoteGitProber]
9949.2.4 by Michael Hudson
sort of works
885
13081.2.13 by Jelmer Vernooij
Use new fetch(limit=) API.
886
    def getRevisionLimit(self):
887
        """See `PullingImportWorker.getRevisionLimit`."""
888
        return config.codeimport.git_revisions_import_limit
10271.2.3 by Michael Hudson
finish off worker code, perhaps
889
10677.1.1 by Michael Hudson
don't create a working tree for code imports if it's not needed
890
    def getBazaarBranch(self):
18228.2.2 by Colin Watson
Refactor various bits of the code import worker to be less Bazaar-specific.
891
        """See `ToBzrImportWorker.getBazaarBranch`.
8615.12.15 by Michael Hudson
docstrings, some more clarity
892
10677.1.1 by Michael Hudson
don't create a working tree for code imports if it's not needed
893
        In addition to the superclass' behaviour, we retrieve bzr-git's
894
        caches, both legacy and modern, from the import data store and put
895
        them where bzr-git will find them in the Bazaar tree, that is at
896
        '.bzr/repository/git.db' and '.bzr/repository/git'.
8615.12.15 by Michael Hudson
docstrings, some more clarity
897
        """
10677.1.1 by Michael Hudson
don't create a working tree for code imports if it's not needed
898
        branch = PullingImportWorker.getBazaarBranch(self)
10651.1.4 by Michael Hudson
review comments
899
        # Fetch the legacy cache from the store, if present.
8615.12.11 by Michael Hudson
this interface finally makes sense, maybe
900
        self.import_data_store.fetch(
10677.1.1 by Michael Hudson
don't create a working tree for code imports if it's not needed
901
            'git.db', branch.repository._transport)
10651.1.4 by Michael Hudson
review comments
902
        # The cache dir from newer bzr-gits is stored as a tarball.
10651.1.1 by Michael Hudson
first attempt
903
        local_name = 'git-cache.tar.gz'
10651.1.2 by Michael Hudson
assorted fixes
904
        if self.import_data_store.fetch(local_name):
10677.1.1 by Michael Hudson
don't create a working tree for code imports if it's not needed
905
            repo_transport = branch.repository._transport
10651.1.2 by Michael Hudson
assorted fixes
906
            repo_transport.mkdir('git')
10651.1.1 by Michael Hudson
first attempt
907
            git_db_dir = os.path.join(
908
                local_path_from_url(repo_transport.base), 'git')
909
            extract_tarball(local_name, git_db_dir)
10677.1.1 by Michael Hudson
don't create a working tree for code imports if it's not needed
910
        return branch
911
912
    def pushBazaarBranch(self, bazaar_branch):
18228.2.2 by Colin Watson
Refactor various bits of the code import worker to be less Bazaar-specific.
913
        """See `ToBzrImportWorker.pushBazaarBranch`.
10677.1.1 by Michael Hudson
don't create a working tree for code imports if it's not needed
914
915
        In addition to the superclass' behaviour, we store bzr-git's cache
916
        directory at .bzr/repository/git in the import data store.
8615.12.15 by Michael Hudson
docstrings, some more clarity
917
        """
10677.1.1 by Michael Hudson
don't create a working tree for code imports if it's not needed
918
        non_trivial = PullingImportWorker.pushBazaarBranch(
919
            self, bazaar_branch)
920
        repo_base = bazaar_branch.repository._transport.base
921
        git_db_dir = os.path.join(local_path_from_url(repo_base), 'git')
10651.1.1 by Michael Hudson
first attempt
922
        local_name = 'git-cache.tar.gz'
10651.1.2 by Michael Hudson
assorted fixes
923
        create_tarball(git_db_dir, local_name)
924
        self.import_data_store.put(local_name)
10224.12.12 by Michael Hudson
some renaming and a fix for hg
925
        return non_trivial
7675.76.2 by Michael Hudson
small refactoring
926
927
928
class BzrSvnImportWorker(PullingImportWorker):
7675.76.8 by Michael Hudson
cleanups
929
    """An import worker for importing Subversion via bzr-svn."""
7675.76.2 by Michael Hudson
small refactoring
930
13168.11.3 by Jelmer Vernooij
Test classification of unsupported features.
931
    @property
932
    def invalid_branch_exceptions(self):
933
        return [
934
            NoRepositoryPresent,
935
            NotBranchError,
936
            ConnectionError,
937
        ]
938
939
    @property
940
    def unsupported_feature_exceptions(self):
941
        from bzrlib.plugins.svn.errors import InvalidFileName
942
        return [
943
            InvalidEntryName,
944
            InvalidFileName,
945
        ]
946
13785.3.1 by Jelmer Vernooij
Add new code import result state: remote branch broken.
947
    @property
948
    def broken_remote_exceptions(self):
949
        from bzrlib.plugins.svn.errors import IncompleteRepositoryHistory
950
        return [IncompleteRepositoryHistory]
951
13081.2.13 by Jelmer Vernooij
Use new fetch(limit=) API.
952
    def getRevisionLimit(self):
953
        """See `PullingImportWorker.getRevisionLimit`."""
954
        return config.codeimport.svn_revisions_import_limit
10490.1.2 by Michael Hudson
this is it
955
7675.76.4 by Michael Hudson
merge lots of trunk, fixing conflicts
956
    @property
13081.2.12 by Jelmer Vernooij
format_classes -> probers.
957
    def probers(self):
958
        """See `PullingImportWorker.probers`."""
959
        from bzrlib.plugins.svn import SvnRemoteProber
960
        return [SvnRemoteProber]
13756.4.1 by Jelmer Vernooij
Re-import bzr code imports.
961
962
963
class BzrImportWorker(PullingImportWorker):
964
    """An import worker for importing Bazaar branches."""
965
966
    invalid_branch_exceptions = [
967
        NotBranchError,
968
        ConnectionError,
969
        ]
970
    unsupported_feature_exceptions = []
16169.1.1 by William Grant
Fix BzrImportWorker to implement broken_remote_exceptions. Otherwise failures crash when they try to determine which category they are.
971
    broken_remote_exceptions = []
13756.4.1 by Jelmer Vernooij
Re-import bzr code imports.
972
973
    def getRevisionLimit(self):
974
        """See `PullingImportWorker.getRevisionLimit`."""
975
        # For now, just grab the whole branch at once.
13811.1.1 by Jeroen Vermeulen
More lint.
976
        # bzr does support fetch(limit=) but it isn't very efficient at
977
        # the moment.
13756.4.1 by Jelmer Vernooij
Re-import bzr code imports.
978
        return None
979
980
    @property
981
    def probers(self):
982
        """See `PullingImportWorker.probers`."""
983
        from bzrlib.bzrdir import BzrProber, RemoteBzrProber
984
        return [BzrProber, RemoteBzrProber]
18228.3.1 by Colin Watson
Add a Git-to-Git import worker.
985
986
987
class GitToGitImportWorker(ImportWorker):
988
    """An import worker for imports from Git to Git."""
989
990
    def _runGit(self, *args, **kwargs):
991
        """Run git with arguments, sending output to the logger."""
992
        cmd = ["git"] + list(args)
993
        git_process = subprocess.Popen(
994
            cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, **kwargs)
995
        for line in git_process.stdout:
996
            line = line.decode("UTF-8", "replace").rstrip("\n")
18228.3.4 by Colin Watson
Refactor common URL sanitisation code out to lp.services.utils.sanitise_urls.
997
            self._logger.info(sanitise_urls(line))
18228.3.1 by Colin Watson
Add a Git-to-Git import worker.
998
        retcode = git_process.wait()
999
        if retcode:
1000
            raise subprocess.CalledProcessError(retcode, cmd)
1001
18250.3.2 by Colin Watson
Fix the Git-to-Git import worker to synchronise HEAD if possible.
1002
    def _getHead(self, repository, remote_name):
1003
        """Get HEAD from a configured remote in a local repository.
1004
1005
        The returned ref name will be adjusted in such a way that it can be
1006
        passed to `_setHead` (e.g. refs/remotes/origin/master ->
1007
        refs/heads/master).
1008
        """
1009
        # This is a bit weird, but set-head will bail out if the target
1010
        # doesn't exist in the correct remotes namespace.  git 2.8.0 has
1011
        # "git ls-remote --symref <repository> HEAD" which would involve
1012
        # less juggling.
1013
        self._runGit(
1014
            "fetch", "-q", ".", "refs/heads/*:refs/remotes/%s/*" % remote_name,
1015
            cwd=repository)
1016
        self._runGit(
1017
            "remote", "set-head", remote_name, "--auto", cwd=repository)
1018
        ref_prefix = "refs/remotes/%s/" % remote_name
1019
        target_ref = subprocess.check_output(
1020
            ["git", "symbolic-ref", ref_prefix + "HEAD"],
1021
            cwd=repository, universal_newlines=True).rstrip("\n")
1022
        if not target_ref.startswith(ref_prefix):
1023
            raise GitProtocolError(
1024
                "'git remote set-head %s --auto' did not leave remote HEAD "
1025
                "under %s" % (remote_name, ref_prefix))
18250.3.6 by Colin Watson
Check that the remote HEAD is a valid ref name.
1026
        real_target_ref = "refs/heads/" + target_ref[len(ref_prefix):]
1027
        # Ensure the result is a valid ref name, just in case.
1028
        self._runGit("check-ref-format", real_target_ref, cwd="repository")
1029
        return real_target_ref
18250.3.2 by Colin Watson
Fix the Git-to-Git import worker to synchronise HEAD if possible.
1030
1031
    def _setHead(self, target_url, target_ref):
1032
        """Set HEAD on a remote repository.
1033
1034
        This relies on the turnip-set-symbolic-ref extension.
1035
        """
1036
        service = "turnip-set-symbolic-ref"
1037
        url = urljoin(target_url, service)
1038
        headers = {
1039
            "Content-Type": "application/x-%s-request" % service,
1040
            }
1041
        body = pkt_line("HEAD %s" % target_ref) + pkt_line(None)
1042
        try:
1043
            response = urlfetch(url, method="POST", headers=headers, data=body)
1044
            response.raise_for_status()
1045
        except Exception as e:
1046
            raise GitProtocolError(str(e))
1047
        content_type = response.headers.get("Content-Type")
1048
        if content_type != ("application/x-%s-result" % service):
1049
            raise GitProtocolError(
1050
                "Invalid Content-Type from server: %s" % content_type)
1051
        content = io.BytesIO(response.content)
1052
        proto = Protocol(content.read, None)
18263.1.1 by Colin Watson
Don't expect a flush-pkt in the response to turnip-set-symbolic-ref.
1053
        pkt = proto.read_pkt_line()
1054
        if pkt is None:
1055
            raise GitProtocolError("Unexpected flush-pkt from server")
1056
        elif pkt.rstrip(b"\n") == b"ACK HEAD":
18250.3.2 by Colin Watson
Fix the Git-to-Git import worker to synchronise HEAD if possible.
1057
            pass
18263.1.1 by Colin Watson
Don't expect a flush-pkt in the response to turnip-set-symbolic-ref.
1058
        elif pkt.startswith(b"ERR "):
18250.3.3 by Colin Watson
Tolerate trailing LF in turnip-set-symbolic-ref ACK packets.
1059
            raise GitProtocolError(
18263.1.1 by Colin Watson
Don't expect a flush-pkt in the response to turnip-set-symbolic-ref.
1060
                pkt[len(b"ERR "):].rstrip(b"\n").decode("UTF-8"))
18250.3.2 by Colin Watson
Fix the Git-to-Git import worker to synchronise HEAD if possible.
1061
        else:
18263.1.1 by Colin Watson
Don't expect a flush-pkt in the response to turnip-set-symbolic-ref.
1062
            raise GitProtocolError("Unexpected packet %r from server" % pkt)
18250.3.2 by Colin Watson
Fix the Git-to-Git import worker to synchronise HEAD if possible.
1063
18250.3.5 by Colin Watson
Delete refs in a more careful way that will work with packed-refs.
1064
    def _deleteRefs(self, repository, pattern):
1065
        """Delete all refs in `repository` matching `pattern`."""
1066
        # XXX cjwatson 2016-11-08: We might ideally use something like:
1067
        # "git for-each-ref --format='delete %(refname)%00%(objectname)%00' \
1068
        #   <pattern> | git update-ref --stdin -z
1069
        # ... which would be faster, but that requires git 1.8.5.
1070
        remote_refs = subprocess.check_output(
1071
            ["git", "for-each-ref", "--format=%(refname)", pattern],
1072
            cwd="repository").splitlines()
1073
        for remote_ref in remote_refs:
1074
            self._runGit("update-ref", "-d", remote_ref, cwd="repository")
1075
18228.3.1 by Colin Watson
Add a Git-to-Git import worker.
1076
    def _doImport(self):
1077
        self._logger.info("Starting job.")
1078
        try:
1079
            self._opener_policy.checkOneURL(self.source_details.url)
1080
        except BadUrl as e:
1081
            self._logger.info("Invalid URL: %s" % e)
1082
            return CodeImportWorkerExitCode.FAILURE_FORBIDDEN
1083
        unauth_target_url = urljoin(
18228.3.2 by Colin Watson
Use config.codehosting.git_browse_root rather than config.codeimport.git_repository_store.
1084
            config.codehosting.git_browse_root, self.source_details.target_id)
18228.3.1 by Colin Watson
Add a Git-to-Git import worker.
1085
        split = urlsplit(unauth_target_url)
18250.3.1 by Colin Watson
Refactor code import git-to-git worker tests to push over HTTP.
1086
        target_netloc = ":%s@%s" % (
1087
            self.source_details.macaroon.serialize(), split.hostname)
1088
        if split.port:
1089
            target_netloc += ":%s" % split.port
18228.3.1 by Colin Watson
Add a Git-to-Git import worker.
1090
        target_url = urlunsplit([
1091
            split.scheme, target_netloc, split.path, "", ""])
1092
        # XXX cjwatson 2016-10-11: Ideally we'd put credentials in a
1093
        # credentials store instead.  However, git only accepts credentials
1094
        # that have both a non-empty username and a non-empty password.
1095
        self._logger.info("Getting existing repository from hosting service.")
1096
        try:
18228.3.5 by Colin Watson
Tidy up git-to-git mirroring mechanics a little.
1097
            self._runGit("clone", "--mirror", target_url, "repository")
18228.3.1 by Colin Watson
Add a Git-to-Git import worker.
1098
        except subprocess.CalledProcessError as e:
1099
            self._logger.info(
1100
                "Unable to get existing repository from hosting service: "
18228.8.3 by Colin Watson
Avoid leaking macaroons in error messages.
1101
                "git clone exited %s" % e.returncode)
18228.3.1 by Colin Watson
Add a Git-to-Git import worker.
1102
            return CodeImportWorkerExitCode.FAILURE
1103
        self._logger.info("Fetching remote repository.")
1104
        try:
18228.3.5 by Colin Watson
Tidy up git-to-git mirroring mechanics a little.
1105
            self._runGit("config", "gc.auto", "0", cwd="repository")
18384.1.1 by Colin Watson
Fix git-to-git code imports on xenial.
1106
            # Remove any stray remote-tracking refs from the last time round.
1107
            self._deleteRefs("repository", "refs/remotes/source/**")
18228.3.1 by Colin Watson
Add a Git-to-Git import worker.
1108
            self._runGit(
18250.3.2 by Colin Watson
Fix the Git-to-Git import worker to synchronise HEAD if possible.
1109
                "remote", "add", "source", self.source_details.url,
1110
                cwd="repository")
1111
            self._runGit(
1112
                "fetch", "--prune", "source", "+refs/*:refs/*",
1113
                cwd="repository")
1114
            try:
18250.3.4 by Colin Watson
Adjust variable name.
1115
                new_head = self._getHead("repository", "source")
18250.3.2 by Colin Watson
Fix the Git-to-Git import worker to synchronise HEAD if possible.
1116
            except (subprocess.CalledProcessError, GitProtocolError) as e2:
1117
                self._logger.info("Unable to fetch default branch: %s" % e2)
18250.3.4 by Colin Watson
Adjust variable name.
1118
                new_head = None
18250.3.2 by Colin Watson
Fix the Git-to-Git import worker to synchronise HEAD if possible.
1119
            self._runGit("remote", "rm", "source", cwd="repository")
1120
            # XXX cjwatson 2016-11-03: For some reason "git remote rm"
1121
            # doesn't actually remove the refs.
18384.1.1 by Colin Watson
Fix git-to-git code imports on xenial.
1122
            self._deleteRefs("repository", "refs/remotes/source/**")
18228.3.1 by Colin Watson
Add a Git-to-Git import worker.
1123
        except subprocess.CalledProcessError as e:
1124
            self._logger.info("Unable to fetch remote repository: %s" % e)
1125
            return CodeImportWorkerExitCode.FAILURE_INVALID
1126
        self._logger.info("Pushing repository to hosting service.")
1127
        try:
18250.3.4 by Colin Watson
Adjust variable name.
1128
            if new_head is not None:
18250.3.2 by Colin Watson
Fix the Git-to-Git import worker to synchronise HEAD if possible.
1129
                # Push the target of HEAD first to ensure that it is always
1130
                # available.
1131
                self._runGit(
18250.3.4 by Colin Watson
Adjust variable name.
1132
                    "push", target_url, "+%s:%s" % (new_head, new_head),
18250.3.2 by Colin Watson
Fix the Git-to-Git import worker to synchronise HEAD if possible.
1133
                    cwd="repository")
1134
                try:
18250.3.4 by Colin Watson
Adjust variable name.
1135
                    self._setHead(target_url, new_head)
18250.3.2 by Colin Watson
Fix the Git-to-Git import worker to synchronise HEAD if possible.
1136
                except GitProtocolError as e:
1137
                    self._logger.info("Unable to set default branch: %s" % e)
18228.3.1 by Colin Watson
Add a Git-to-Git import worker.
1138
            self._runGit("push", "--mirror", target_url, cwd="repository")
1139
        except subprocess.CalledProcessError as e:
18228.8.3 by Colin Watson
Avoid leaking macaroons in error messages.
1140
            self._logger.info(
1141
                "Unable to push to hosting service: git push exited %s" %
1142
                e.returncode)
18228.3.1 by Colin Watson
Add a Git-to-Git import worker.
1143
            return CodeImportWorkerExitCode.FAILURE
1144
        return CodeImportWorkerExitCode.SUCCESS