1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
|
#
# Copyright (C) 2008, 2009 Canonical Ltd.
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
#
"""Cache the whole history data needed by loggerhead about a branch."""
import gc
import logging
import time
from bzrlib.revision import is_null, NULL_REVISION
from loggerhead.util import StaticTuple
def _strip_NULL_ghosts(revision_graph):
"""
Copied over from bzrlib meant as a temporary workaround for
deprecated methods.
"""
# Filter ghosts, and null:
if NULL_REVISION in revision_graph:
del revision_graph[NULL_REVISION]
as_st = StaticTuple.from_sequence
for key, parents in revision_graph.iteritems():
revision_graph[key] = as_st([
parent for parent in parents if parent in revision_graph])
return revision_graph
def compute_whole_history_data(branch):
"""Compute _rev_info and _rev_indices for a branch.
See History.__doc__ for what these data structures mean.
"""
z = time.time()
last_revid = branch.last_revision()
log = logging.getLogger('loggerhead.%s' %
(branch.get_config().get_nickname(),))
if is_null(last_revid):
return ([], {})
gc_enabled = gc.isenabled()
if gc_enabled:
gc.disable()
try:
# from bzrlib import commands
# Profiling shows that the bulk of the time spent here is reading the
# data out of the indexes, rather than time building and sorting the
# graph. At least we're using code paths that can be optimized if
# possible. Of course, ideally we wouldn't be
# loading-the-whole-graph...
# rev_info, rev_indices = commands.apply_lsprofiled(',,prof.txt',
# _compute_graph, branch, last_revid)
rev_info, rev_indices = _compute_graph(branch, last_revid)
finally:
if gc_enabled:
gc.enable()
log.info('built revision graph cache: %.3f secs' % (time.time() - z,))
return (rev_info, rev_indices)
def _compute_graph(branch, last_revid):
"""Do the actual work of computing the graph information."""
# Using get_known_graph_ancestry drops us from 2.3s on bzr.dev down to
# 0.9s. Wrapping this with a gc.disable call, drops us further to 0.7s.
# This shows even better with mysql.
# orig known_graph gc.disable
# bzr.dev 2.357 0.900 0.700
# mysql 4.353 2.563 1.634
last_key = (last_revid,)
graph = branch.repository.revisions.get_known_graph_ancestry([last_key])
# What about ghosts?
merge_sorted = graph.merge_sort(last_key)
rev_info = []
rev_indices = {}
get_parent_keys = graph.get_parent_keys
get_child_keys = graph.get_child_keys
# TODO: Use StaticTuple
# Using StaticTuple does show a memory reduction (85.6MB => 81.1MB
# peak on a MySQL branch). There doesn't seem to be a time-difference
# wrt how long it takes to build (probably because we have gc
# disabled?). StaticTuple should help in 'unrelated' code, since it
# reduces overall gc overhead. StaticTuple isn't trivial, as it
# interacts with the marshalling code.
as_st = StaticTuple.from_sequence
for seq, info in enumerate(merge_sorted):
#seq, revid, merge_depth, revno, end_of_merge = info
# Switch back from a tuple key to a simple string rev_id
rev_id = info.key[-1]
revno_str = '.'.join(map(str, info.revno))
parent_ids = as_st([p[-1] for p in get_parent_keys(info.key)])
rev_indices[rev_id] = len(rev_info)
# TODO: Try using the original merge_sorted object. Gives us a nice
# Object.foo rather than entry[0][1] syntax. However would need
# special handling for the caching layer
basic_info = StaticTuple(seq, rev_id, info.merge_depth, revno_str,
info.end_of_merge)
if info.merge_depth != 0:
# Find the children of this revision
child_ids = as_st([c[-1] for c in get_child_keys(info.key)])
else:
child_ids = StaticTuple()
rev_info.append(StaticTuple(basic_info, child_ids, parent_ids))
return rev_info, rev_indices
|