~zyga/bzr-fastimport/fixes

« back to all changes in this revision

Viewing changes to processors/info_processor.py

  • Committer: Ian Clatworthy
  • Date: 2008-02-14 06:28:42 UTC
  • Revision ID: ian.clatworthy@internode.on.net-20080214062842-x2yy4rk70ny1ounp
1st cut: gfi parser + --info processing method

Show diffs side-by-side

added added

removed removed

Lines of Context:
 
1
# Copyright (C) 2008 Canonical Ltd
 
2
#
 
3
# This program is free software; you can redistribute it and/or modify
 
4
# it under the terms of the GNU General Public License as published by
 
5
# the Free Software Foundation; either version 2 of the License, or
 
6
# (at your option) any later version.
 
7
#
 
8
# This program is distributed in the hope that it will be useful,
 
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
 
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 
11
# GNU General Public License for more details.
 
12
#
 
13
# You should have received a copy of the GNU General Public License
 
14
# along with this program; if not, write to the Free Software
 
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 
16
 
 
17
"""Import processor that dump stats about the input (and doesn't import)."""
 
18
 
 
19
 
 
20
from bzrlib.trace import (
 
21
    note,
 
22
    warning,
 
23
    )
 
24
from bzrlib.plugins.fastimport import (
 
25
    commands,
 
26
    processor,
 
27
    )
 
28
 
 
29
 
 
30
# Maximum number of parents for a merge commit
 
31
_MAX_PARENTS = 16
 
32
 
 
33
 
 
34
class InfoProcessor(processor.ImportProcessor):
 
35
    """An import processor that dumps statistics about the input.
 
36
 
 
37
    No changes to the current repository are made.
 
38
 
 
39
    As well as providing useful information about an import
 
40
    stream before importing it, this processor is useful for
 
41
    benchmarking the speed at which data can be extracted from
 
42
    the source.
 
43
    """
 
44
 
 
45
    def __init__(self, target=None, verbose=False):
 
46
        # Allow creation without a target
 
47
        processor.ImportProcessor.__init__(self, target, verbose)
 
48
 
 
49
 
 
50
    def pre_process(self):
 
51
        # Init statistics
 
52
        self.cmd_counts = {}
 
53
        for cmd in commands.COMMAND_NAMES:
 
54
            self.cmd_counts[cmd] = 0
 
55
        self.file_cmd_counts = {}
 
56
        for fc in commands.FILE_COMMAND_NAMES:
 
57
            self.file_cmd_counts[fc] = 0
 
58
        self.parent_counts = {}
 
59
        for i in xrange(0, _MAX_PARENTS):
 
60
            self.parent_counts[i] = 0
 
61
        self.committers = set()
 
62
        self.separate_authors_found = False
 
63
        self.symlinks_found = False
 
64
        self.executables_found = False
 
65
 
 
66
    def post_process(self):
 
67
        # Dump statistics
 
68
        note("Command counts:")
 
69
        for cmd in commands.COMMAND_NAMES:
 
70
            note("\t%d\t%s", self.cmd_counts[cmd], cmd)
 
71
        note("File command counts:")
 
72
        for fc in commands.FILE_COMMAND_NAMES:
 
73
            note("\t%d\t%s", self.file_cmd_counts[fc], fc)
 
74
        if self.cmd_counts['commit']:
 
75
            note("Parent counts:")
 
76
            for i in xrange(0, _MAX_PARENTS):
 
77
                count = self.parent_counts[i]
 
78
                if count > 0:
 
79
                    note("\t%d\t%d", count, i)
 
80
            note("Other information:")
 
81
            note("\t%d\t%s" % (len(self.committers), 'unique committers'))
 
82
            note("\t%s\t%s" % (_found(self.separate_authors_found),
 
83
                'separate authors'))
 
84
            note("\t%s\t%s" % (_found(self.executables_found), 'executables'))
 
85
            note("\t%s\t%s" % (_found(self.symlinks_found), 'symlinks'))
 
86
 
 
87
    def progress_handler(self, cmd):
 
88
        """Process a ProgressCommand."""
 
89
        self.cmd_counts[cmd.name] += 1
 
90
 
 
91
    def blob_handler(self, cmd):
 
92
        """Process a BlobCommand."""
 
93
        self.cmd_counts[cmd.name] += 1
 
94
 
 
95
    def checkpoint_handler(self, cmd):
 
96
        """Process a CheckpointCommand."""
 
97
        self.cmd_counts[cmd.name] += 1
 
98
 
 
99
    def commit_handler(self, cmd):
 
100
        """Process a CommitCommand."""
 
101
        self.cmd_counts[cmd.name] += 1
 
102
        self.parent_counts[len(cmd.parents)] += 1
 
103
        self.committers.add(cmd.committer)
 
104
        if cmd.author is not None:
 
105
            self.separate_authors_found = True
 
106
        for fc in cmd.file_iter():
 
107
            self.file_cmd_counts[fc.name] += 1
 
108
            if isinstance(fc, commands.FileModifyCommand):
 
109
                if fc.is_executable:
 
110
                    self.executables_found = True
 
111
                if fc.kind == commands.SYMLINK_KIND:
 
112
                    self.symlinks_found = True
 
113
 
 
114
    def reset_handler(self, cmd):
 
115
        """Process a ResetCommand."""
 
116
        self.cmd_counts[cmd.name] += 1
 
117
 
 
118
    def tag_handler(self, cmd):
 
119
        """Process a TagCommand."""
 
120
        self.cmd_counts[cmd.name] += 1
 
121
 
 
122
 
 
123
def _found(b):
 
124
    """Format a found boolean as a string."""
 
125
    return ['no', 'found'][b]