~vmiklos/bzr-fastimport/darcs

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
# Copyright (C) 2008 Canonical Ltd
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA

r"""FastImport Plugin
=================

The fastimport plugin provides stream-based importing of data into Bazaar.
A bzr-fast-export.py script is also included providing exporting of data
out of Bazaar to the same format. As well as enabling interchange between
multiple VCS tools, fastimport/export can be useful for complex branch
operations, e.g. partitioning off part of a code base in order to Open
Source it.

The normal import recipe is::

  bzr init-repo .
  front-end | bzr fast-import -

Numerous front-ends are provided in the exporters directory where
the plugin is installed. The list of known front-ends and their
status is documented on http://bazaar-vcs.org/BzrFastImport/FrontEnds.
For further details, see http://bazaar-vcs.org/BzrFastImport and the
online help for the commands::

  bzr help fast-import
  bzr help fast-export
  bzr help fast-import-filter
  bzr help fast-import-info
  bzr help fast-import-query

To report bugs or publish enhancements, visit the bzr-fastimport project
page on Launchpad, https://launchpad.net/bzr-fastimport.
"""

version_info = (0, 8, 0, 'dev', 0)

from bzrlib.commands import Command, register_command
from bzrlib.option import Option, ListOption


def test_suite():
    import tests
    return tests.test_suite()


def _run(source, processor_factory, control, params, verbose):
    """Create and run a processor.
    
    :param source: a filename or '-' for standard input
    :param processor_factory: a callable for creating a processor
    :param control: the BzrDir of the destination or None if no
      destination is expected
    """
    import parser
    if source == '-':
        import sys
        stream = sys.stdin
        try:
            import os
            if os.name == 'nt':
                fileno = getattr(sys.stdin, 'fileno', None)
                if fileno:
                    no = fileno()
                    if no >= 0:     # -1 means we're working as subprocess
                        import msvcrt
                        msvcrt.setmode(no, os.O_BINARY)
        except ImportError:
            pass
    else:
        stream = open(source, "rb")
    proc = processor_factory(control, params=params, verbose=verbose)
    p = parser.ImportParser(stream, verbose=verbose)
    return proc.process(p.iter_commands)


class cmd_fast_import(Command):
    """Backend for fast Bazaar data importers.

    This command reads a mixed command/data stream and
    creates branches in the current repository accordingly.
    To specify standard input as the input stream, use a
    source name of '-'.
    
    The usual recipe is::

      bzr init-repo .
      front-end | bzr fast-import -

    If run inside a branch using a shared repository, then
    the current branch is made the trunk and other branches,
    if any, are created in sister directories. If run inside
    a standalone tree, the current branch is also made the
    trunk, but warnings are output about other branches found.

    Git reference names are mapped to bzr branch names as follows:
      
    * refs/heads/foo is mapped to foo
    * refs/remotes/origin/foo is mapped to foo.remote
    * refs/tags/foo is mapped to foo.tag
    * */master is mapped to trunk, trunk.remote, etc.
    * */trunk is mapped to git-trunk, git-trunk.remote, etc.

    The stream format is upwardly compatible with git-fast-import
    so existing front-ends for that tool can typically be reused
    without changes. See http://bazaar-vcs.org/BzrFastImport for
    links to matching exporters from Subversion, CVS, Git,
    Mercurial, Darcs, Perforce and SCCS.

    While reusing an existing format with existing frontends is
    great, it does mean a slightly more complex recipe when
    importing large projects via exporters that reuse blob data
    across commits, namely::

      bzr init-repo .
      front-end > xxx.fi
      bzr fast-import-info -v xxx.fi > xxx.cfg
      bzr fast-import xxx.fi --info xxx.cfg

    In this scenario, the xxx.cfg file generated by the first pass
    holds caching hints that the second pass uses to lower memory
    usage.

    At checkpoints and on completion, the commit-id -> revision-id
    map is saved to a file called 'fastimport-id-map' in the control
    directory for the repository (e.g. .bzr/repository). If the import
    is interrupted or unexpectedly crashes, it can be started again
    and this file will be used to skip over already loaded revisions.
    As long as subsequent exports from the original source begin
    with exactly the same revisions, you can use this feature to
    maintain a mirror of a repository managed by a foreign tool.
    If and when Bazaar is used to manage the repository, this file
    can be safely deleted.

    If you wish to write a custom exporter for your project, see
    http://bazaar-vcs.org/BzrFastImport for the detailed protocol
    specification. In many cases, exporters can be written quite
    quickly using whatever scripting/programming language you like.

    Examples::

     cd /git/repo/path
     git-fast-export --signed-tags=warn | bzr fast-import -

        Import a Git repository into Bazaar.

     svn-fast-export.py /svn/repo/path | bzr fast-import -

        Import a Subversion repository into Bazaar.

     hg-fast-export.py -r /hg/repo/path | bzr fast-import -

        Import a Mercurial repository into Bazaar.
    """
    hidden = False
    _see_also = ['fast-export', 'fast-import-filter', 'fast-import-info']
    takes_args = ['source']
    takes_options = ['verbose',
                    Option('info', type=str,
                        help="Path to file containing caching hints.",
                        ),
                    Option('trees',
                        help="Update working trees.",
                        ),
                    Option('checkpoint', type=int,
                        help="Checkpoint automatically every N revisions.",
                        ),
                    Option('count', type=int,
                        help="Import this many revisions then exit.",
                        ),
                    Option('inv-cache', type=int,
                        help="Number of inventories to cache.",
                        ),
                    Option('experimental',
                        help="Enable experimental features.",
                        ),
                    Option('import-marks', type=str,
                        help="Import marks from file."
                        ),
                    Option('export-marks', type=str,
                        help="Export marks to file."
                        ),
                     ]
    aliases = []
    def run(self, source, verbose=False, info=None, trees=False,
        checkpoint=10000, count=-1, inv_cache=10,
        experimental=False, import_marks=None, export_marks=None):
        from bzrlib import bzrdir
        from bzrlib.plugins.fastimport.processors import generic_processor
        control, relpath = bzrdir.BzrDir.open_containing('.')
        params = {
            'info': info,
            'trees': trees,
            'checkpoint': checkpoint,
            'count': count,
            'inv-cache': inv_cache,
            'experimental': experimental,
            'import-marks': import_marks,
            'export-marks': export_marks,
            }
        return _run(source, generic_processor.GenericProcessor, control,
            params, verbose)


class cmd_fast_import_filter(Command):
    """Filter a fast-import stream to include/exclude files & directories.

    This command is useful for splitting a subdirectory or bunch of
    files out from a project to create a new project complete with history
    for just those files. It can also be used to create a new project
    repository that removes all references to files that should not have
    been committed, e.g. security-related information (like passwords),
    commercially sensitive material, files with an incompatible license or
    large binary files like CD images.

    When filtering out a subdirectory (or file), the new stream uses the
    subdirectory (or subdirectory containing the file) as the root. As
    fast-import doesn't know in advance whether a path is a file or
    directory in the stream, you need to specify a trailing '/' on
    directories passed to the --includes option. If multiple files or
    directories are given, the new root is the deepest common directory.

    To specify standard input as the input stream, use a source
    name of '-'.

    Note: If a path has been renamed, take care to specify the *original*
    path name, not the final name that it ends up with.

    Examples::

      Create a new project from a library. (Note the trailing / on the
      directory name of the library.)

        front-end | bzr fast-import-filter -i lib/xxx/ > xxx.fi
        bzr init-repo mylibrary
        cd mylibrary
        bzr fast-import ../xxx.fi
        (lib/xxx/foo is now foo)

      Create a new repository without a sensitive file.

        front-end | bzr fast-import-filter -x missile-codes.txt > clean.fi
        bzr init-repo project.clean
        cd project.clean
        bzr fast-import ../clean.fi
    """
    hidden = False
    _see_also = ['fast-import']
    takes_args = ['source']
    takes_options = ['verbose',
                    ListOption('include_paths', short_name='i', type=str,
                        help="Only include commits affecting these paths."
                             " Directories should have a trailing /."
                        ),
                    ListOption('exclude_paths', short_name='x', type=str,
                        help="Exclude these paths from commits."
                        ),
                     ]
    aliases = []
    encoding_type = 'exact'
    def run(self, source, verbose=False, include_paths=None,
        exclude_paths=None):
        from bzrlib.plugins.fastimport.processors import filter_processor
        params = {
            'include_paths': include_paths,
            'exclude_paths': exclude_paths,
            }
        return _run(source, filter_processor.FilterProcessor, None, params,
            verbose)


class cmd_fast_import_info(Command):
    """Output information about a fast-import stream.

    This command reads a fast-import stream and outputs
    statistics and interesting properties about what it finds.
    When run in verbose mode, the information is output as a
    configuration file that can be passed to fast-import to
    assist it in intelligently caching objects.

    To specify standard input as the input stream, use a source
    name of '-'.

    Examples::

     front-end | bzr fast-import-info -

        Display statistics about the import stream produced by front-end.

     front-end | bzr fast-import-info -v - > front-end.cfg

       Create a hints file for running fast-import on a large repository.
    """
    hidden = False
    _see_also = ['fast-import']
    takes_args = ['source']
    takes_options = ['verbose']
    aliases = []
    def run(self, source, verbose=False):
        from bzrlib.plugins.fastimport.processors import info_processor
        return _run(source, info_processor.InfoProcessor, None, {}, verbose)


class cmd_fast_import_query(Command):
    """Query a fast-import stream displaying selected commands.

    To specify standard input as the input stream, use a source
    name of '-'. To specify the commands to display, use the -C
    option one or more times. To specify just some fields for
    a command, use the syntax::

      command=field1,...

    By default, the nominated fields for the nominated commands
    are displayed tab separated. To see the information in
    a name:value format, use verbose mode.

    Note: Binary fields (e.g. data for blobs) are masked out
    so it is generally safe to view the output in a terminal.

    Examples::

      front-end > xxx.fi
      bzr fast-import-query xxx.fi -Creset -Ctag

        Show all the fields of the reset and tag commands.

      bzr fast-import-query xxx.fi -Ccommit=mark,merge

        Show the mark and merge fields of the commit commands.
    """
    hidden = True
    _see_also = ['fast-import', 'fast-import-filter']
    takes_args = ['source']
    takes_options = ['verbose',
                    ListOption('commands', short_name='C', type=str,
                        help="Display fields for these commands."
                        ),
                     ]
    aliases = []
    def run(self, source, verbose=False, commands=None):
        from bzrlib.plugins.fastimport.processors import query_processor
        from bzrlib.plugins.fastimport import helpers
        params = helpers.defines_to_dict(commands)
        return _run(source, query_processor.QueryProcessor, None, params,
            verbose)


class cmd_fast_export(Command):
    """Generate a fast-import stream from a Bazaar branch.

    This program generates a stream from a bzr branch in the format required by
    git-fast-import(1). It preserves merges correctly, even merged branches with
    no common history (`bzr merge -r 0..-1`).

    To import several unmerged but related branches into the same repository,
    use the --{export,import}-marks options, and specify a name for the git
    branch like this::
    
        % bzr-fast-export --export-marks=marks.bzr project.dev |
              GIT_DIR=project/.git git-fast-import --export-marks=marks.git

        % bzr-fast-export --import-marks=marks.bzr -b other project.other |
              GIT_DIR=project/.git git-fast-import --import-marks=marks.git

    If you get a "Missing space after source" error from git-fast-import,
    see the top of the commands.py module for a work-around.
    """
    hidden = False
    _see_also = ['fast-import', 'fast-import-filter']
    takes_args = ['source']
    takes_options = ['verbose',
                    Option('git-branch', short_name='b', type=str,
                        argname='FILE',
                        help='Name of the git branch to create (default=master).'
                        ),
                    Option('checkpoint', type=int, argname='N',
                        help="Checkpoint every N revisions (default=1000)."
                        ),
                    Option('marks', type=str, argname='FILE',
                        help="Import marks from and export marks to file."
                        ),
                    Option('import-marks', type=str, argname='FILE',
                        help="Import marks from file."
                        ),
                    Option('export-marks', type=str, argname='FILE',
                        help="Export marks to file."
                        ),
                     ]
    aliases = []
    def run(self, source, verbose=False, git_branch="master", checkpoint=1000,
        marks=None, import_marks=None, export_marks=None):
        from bzrlib.plugins.fastimport import bzr_exporter

        if marks:                                              
            import_marks = export_marks = marks
        exporter = bzr_exporter.BzrFastExporter(source,
            git_branch=git_branch, checkpoint=checkpoint,
            import_marks_file=import_marks, export_marks_file=export_marks)
        return exporter.run()


register_command(cmd_fast_import)
register_command(cmd_fast_import_filter)
register_command(cmd_fast_import_info)
register_command(cmd_fast_import_query)
register_command(cmd_fast_export)