1
# Copyright (C) 2010 Canonical Ltd
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
# GNU General Public License for more details.
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17
"""Store history information in a database."""
20
from sqlite3 import dbapi2
22
from pysqlite2 import dbapi2
24
from collections import defaultdict, deque
37
from loggerhead import history_db_schema as schema
40
NULL_PARENTS = (revision.NULL_REVISION,)
44
"""Create a query string representing N parameters.
50
return ', '.join('?'*n)
53
def _add_n_params(query, n):
54
"""Add n parameters to the query string.
56
the query should have a single '%s' in it to be expanded.
58
return query % (_n_params(n),)
61
def _get_result_for_many(cursor, query, params):
62
"""Get the results for a query with a lot of parameters.
64
SQLite has a limit on how many parameters can be passed (often for a good
65
reason). However, we don't want to have to think about it right now. So
66
this just loops over params based on a maximum allowed per query. Then
67
return the whole result in one list.
69
:param query: An SQL query that should contain something like:
71
:param params: A list of parameters to supply to the function.
75
for start in xrange(0, len(params), MAX_COUNT):
76
next_params = params[start:start+MAX_COUNT]
78
cursor.execute(_add_n_params(query, len(next_params)),
79
next_params).fetchall())
83
class Importer(object):
84
"""Import data from bzr into the history_db."""
86
_MAINLINE_PARENT_RANGE_LEN = 100
88
def __init__(self, db_path, a_branch, tip_revision_id=None,
89
incremental=False, validate=False):
90
db_conn = dbapi2.connect(db_path)
91
self._incremental = incremental
92
self._validate = validate
93
self._db_conn = db_conn
95
self._cursor = self._db_conn.cursor()
96
self._branch = a_branch
97
if tip_revision_id is None:
98
self._branch_tip_rev_id = a_branch.last_revision()
100
self._branch_tip_rev_id = tip_revision_id
101
self._branch_tip_key = (self._branch_tip_rev_id,)
103
if not self._incremental:
105
self._rev_id_to_db_id = {}
106
self._db_id_to_rev_id = {}
107
self._stats = defaultdict(lambda: 0)
108
# Map child_id => [parent_db_ids]
109
self._db_parent_map = {}
111
def set_max_cache_size(self, size):
112
"""Tell SQLite how many megabytes to cache internally."""
113
page_size = self._db_conn.execute('PRAGMA page_size').fetchone()[0]
114
self._db_conn.execute("PRAGMA cache_size = %d"
115
% (size / page_size,));
117
def _ensure_schema(self):
118
if not schema.is_initialized(self._db_conn, dbapi2.OperationalError):
119
schema.create_sqlite_db(self._db_conn)
120
if 'history_db' in debug.debug_flags:
121
trace.note('history_db initialized database')
122
# We know we can't do this incrementally, because nothing has
124
#self._incremental = False
126
def _ensure_revisions(self, revision_ids):
127
schema.ensure_revisions(self._cursor, revision_ids,
128
self._rev_id_to_db_id,
129
self._db_id_to_rev_id, self._graph)
131
def _ensure_graph(self):
132
if self._graph is not None:
134
repo = self._branch.repository
135
self._graph = repo.revisions.get_known_graph_ancestry(
136
[self._branch_tip_key])
138
def _is_imported(self, tip_rev_id):
139
res = self._cursor.execute(
140
"SELECT tip_revision FROM dotted_revno JOIN revision"
141
" ON dotted_revno.tip_revision = revision.db_id"
142
" WHERE revision_id = ?"
143
" AND tip_revision = merged_revision",
144
(tip_rev_id,)).fetchone()
145
return (res is not None)
147
def _insert_nodes(self, tip_rev_id, nodes):
148
"""Insert all of the nodes mentioned into the database."""
149
self._stats['_insert_node_calls'] += 1
150
rev_to_db = self._rev_id_to_db_id
151
tip_db_id = rev_to_db[tip_rev_id]
152
self._stats['total_nodes_inserted'] += len(nodes)
154
st = static_tuple.StaticTuple
155
def build_revno_info():
156
for dist, node in enumerate(nodes):
157
# TODO: Do we need to track the 'end_of_merge' and 'merge_depth'
159
db_id = rev_to_db[node.key[0]]
160
revno_entries.append((tip_db_id,
162
'.'.join(map(str, node.revno)),
168
schema.create_dotted_revnos(self._cursor, revno_entries)
169
except dbapi2.IntegrityError:
170
# Revisions already exist
174
def _update_parents(self, nodes):
175
"""Update parent information for all these nodes."""
176
# Get the keys and their parents
177
parent_keys = self._graph.get_parent_keys
178
parent_map = dict([(n.key[0], [p[0] for p in parent_keys(n.key)])
180
self._insert_parent_map(parent_map)
182
def _insert_parent_map(self, parent_map):
183
"""Insert all the entries in this parent map into the parent table."""
184
r_to_d = self._rev_id_to_db_id
185
def _ensure_parent_map_keys():
186
rev_ids = set([r for r in parent_map if r not in r_to_d])
187
rev_ids_update = rev_ids.update
188
for parent_keys in parent_map.itervalues():
189
rev_ids_update([p for p in parent_keys if p not in r_to_d])
190
self._ensure_revisions(rev_ids)
191
_ensure_parent_map_keys()
193
stuple = static_tuple.StaticTuple.from_sequence
194
for rev_id, parent_ids in parent_map.iteritems():
195
db_id = r_to_d[rev_id]
196
if db_id in self._db_parent_map:
197
# This has already been imported, skip it
199
parent_db_ids = stuple([r_to_d[p_id] for p_id in parent_ids])
200
self._db_parent_map[db_id] = parent_db_ids
201
for idx, parent_db_id in enumerate(parent_db_ids):
202
data.append((db_id, parent_db_id, idx))
203
# Inserting the data in db-sorted order actually improves perf a fair
204
# amount. ~10%. My guess is that it keeps locality for uniqueness
207
self._cursor.executemany("INSERT OR IGNORE INTO parent"
208
" (child, parent, parent_idx)"
209
"VALUES (?, ?, ?)", data)
212
if revision.is_null(self._branch_tip_rev_id):
214
merge_sorted = self._import_tip(self._branch_tip_rev_id)
215
self._db_conn.commit()
217
def _get_merge_sorted_tip(self, tip_revision_id):
218
if self._incremental:
219
self._update_ancestry(tip_revision_id)
220
self._ensure_revisions([tip_revision_id])
221
tip_db_id = self._rev_id_to_db_id[tip_revision_id]
222
inc_merger = _IncrementalMergeSort(self, tip_db_id)
223
merge_sorted = inc_merger.topo_order()
224
# Map db_ids back to the keys that self._graph would generate
225
# Assert that the result is valid
228
actual_ms = self._graph.merge_sort((tip_revision_id,))
229
actual_ms_iter = iter(actual_ms)
231
actual_ms_iter = None
233
def assert_is_equal(x, y):
235
import pdb; pdb.set_trace()
236
db_to_rev = self._db_id_to_rev_id
237
for node in merge_sorted:
239
node.key = (db_to_rev[node.key],)
240
except KeyError: # Look this one up in the db
241
rev_res = self._cursor.execute(
242
"SELECT revision_id FROM revision WHERE db_id = ?",
243
(node.key,)).fetchone()
245
self._db_id_to_rev_id[node.key] = rev_id
246
self._rev_id_to_db_id[rev_id] = node.key
248
if actual_ms_iter is None:
250
actual_node = actual_ms_iter.next()
251
assert_is_equal(node.key, actual_node.key)
252
assert_is_equal(node.revno, actual_node.revno)
253
assert_is_equal(node.merge_depth, actual_node.merge_depth)
254
assert_is_equal(node.end_of_merge, actual_node.end_of_merge)
255
if actual_ms_iter is not None:
257
actual_node = actual_ms_iter.next()
258
except StopIteration:
259
# no problem they both say they've finished
262
# The next revision must have already been imported
263
assert self._is_imported(actual_node.key[0])
265
merge_sorted = self._graph.merge_sort((tip_revision_id,))
268
def _import_tip(self, tip_revision_id, suppress_progress_and_commit=False):
269
if suppress_progress_and_commit:
272
pb = ui.ui_factory.nested_progress_bar()
274
pb.update('getting merge_sorted')
275
merge_sorted = self._get_merge_sorted_tip(tip_revision_id)
276
if not self._incremental:
277
# If _incremental all the revisions will have already been ensured
278
# by the _update_ancestry code.
280
pb.update('allocating revisions', 0,
282
self._ensure_revisions([n.key[0] for n in merge_sorted])
284
pb.update('updating parents', 0,
286
self._update_parents(merge_sorted)
288
last_mainline_rev_id = None
290
for idx, node in enumerate(merge_sorted):
291
if pb is not None and idx & 0xFF == 0:
292
pb.update('importing', idx, len(merge_sorted))
293
if last_mainline_rev_id is None:
295
assert node.merge_depth == 0, \
296
"We did not start at a mainline?"
297
last_mainline_rev_id = node.key[0]
298
new_nodes.append(node)
300
if node.merge_depth == 0:
301
# We're at a new mainline. Insert the nodes for the
302
# previous mainline. If this has already been inserted, we
303
# assume all previous ones are also. Safe as long as we
304
# wait to commit() until we insert all parents.
305
if not self._insert_nodes(last_mainline_rev_id, new_nodes):
306
# This data has already been imported.
309
last_mainline_rev_id = node.key[0]
311
new_nodes.append(node)
313
assert last_mainline_rev_id is not None
314
self._insert_nodes(last_mainline_rev_id, new_nodes)
316
self._build_one_mainline(tip_revision_id)
322
def _update_ancestry(self, new_tip_rev_id):
323
"""Walk the parents of this tip, updating 'revision' and 'parent'
325
self._rev_id_to_db_id will be updated.
328
children) = self._find_known_ancestors(new_tip_rev_id)
329
self._compute_gdfo_and_insert(known, children, parent_map)
330
self._insert_parent_map(parent_map)
331
# This seems to slow things down a fair amount. On bzrtools, we end up
332
# calling it 75 times, and it ends up taking 800ms. vs a total rutime
333
# of 1200ms otherwise.
334
# self._db_conn.commit()
336
def _find_known_ancestors(self, new_tip_rev_id):
337
"""Starting at tip, find ancestors we already have"""
338
needed = [new_tip_rev_id]
339
all_needed = set(new_tip_rev_id)
343
pb = ui.ui_factory.nested_progress_bar()
346
pb.update('Finding ancestry', len(all_needed), len(all_needed))
347
rev_id = needed.pop()
349
# We may add particular parents multiple times, just ignore
350
# them once they've been found
352
res = self._cursor.execute(
353
"SELECT gdfo FROM revision WHERE revision_id = ?",
356
known[rev_id] = res[0]
358
# We don't have this entry recorded yet, add the parents to the
360
pmap = self._branch.repository.get_parent_map([rev_id])
361
parent_map.update(pmap)
362
parent_ids = pmap.get(rev_id, None)
363
if parent_ids is None or parent_ids == NULL_PARENTS:
364
# We can insert this rev directly, because we know its
365
# gdfo, as it has no parents.
366
parent_map[rev_id] = ()
367
self._cursor.execute("INSERT INTO revision (revision_id, gdfo)"
368
" VALUES (?, ?)", (rev_id, 1))
369
# Wrap around to populate known quickly
370
needed.append(rev_id)
371
if parent_ids is None:
372
# This is a ghost, add it to the table
373
self._cursor.execute("INSERT INTO ghost (db_id)"
374
" SELECT db_id FROM revision"
375
" WHERE revision_id = ?",
378
for parent_id in pmap[rev_id]:
379
if parent_id not in known:
380
if parent_id not in all_needed:
381
needed.append(parent_id)
382
all_needed.add(parent_id)
383
children.setdefault(parent_id, []).append(rev_id)
386
return known, parent_map, children
388
def _compute_gdfo_and_insert(self, known, children, parent_map):
389
# At this point, we should have walked to all known parents, and should
390
# be able to build up the gdfo and parent info for all keys.
391
pending = [(gdfo, rev_id) for rev_id, gdfo in known.iteritems()]
393
gdfo, rev_id = pending.pop()
394
for child_id in children.get(rev_id, []):
395
if child_id in known:
396
# XXX: Already numbered?
397
assert known[child_id] > gdfo
399
parent_ids = parent_map[child_id]
401
for parent_id in parent_ids:
403
this_gdfo = known[parent_id]
405
# One parent hasn't been computed yet
407
if this_gdfo > max_gdfo:
410
# All parents have their gdfo known
411
# assert gdfo == max_gdfo
412
child_gdfo = max_gdfo + 1
413
known[child_id] = child_gdfo
414
self._cursor.execute(
415
"INSERT INTO revision (revision_id, gdfo)"
417
(child_id, child_gdfo))
418
# Put this into the pending queue so that *its* children
420
pending.append((child_gdfo, child_id))
421
if self._graph is not None:
422
for rev_id, gdfo in known.iteritems():
423
assert gdfo == self._graph._nodes[(rev_id,)].gdfo
425
def _get_db_id(self, revision_id):
426
db_res = self._cursor.execute('SELECT db_id FROM revision'
427
' WHERE revision_id = ?',
428
[revision_id]).fetchone()
433
def _update_dotted(self, new_tip_rev_id):
434
"""We have a new 'tip' revision, Update the dotted_revno table."""
435
# Just make sure the db has valid info for all the existing entries
436
self._update_ancestry(new_tip_rev_id)
438
def _get_mainline_range_count(self, head_db_id):
439
"""Does the given head_db_id already have a range defined using it."""
440
res = self._cursor.execute("SELECT pkey, count, tail"
441
" FROM mainline_parent_range"
443
" ORDER BY count DESC LIMIT 1",
444
[head_db_id]).fetchone()
446
return None, None, None
449
def _get_mainline_range(self, range_key):
450
"""Get the revisions in the mainline range specified."""
451
res = self._cursor.execute("SELECT revision FROM mainline_parent"
453
" ORDER BY dist DESC", [range_key])
454
return [r[0] for r in res.fetchall()]
456
def _get_lh_parent_db_id(self, revision_db_id):
457
parent_res = self._cursor.execute("""
460
WHERE parent.child = ?
462
LIMIT 1 -- hint to the db, should always be only 1
463
""", (revision_db_id,)).fetchone()
464
# self._stats['lh_parent_step'] += 1
465
if parent_res is None:
469
def _insert_range(self, range_db_ids, tail_db_id):
470
head_db_id = range_db_ids[0]
471
self._cursor.execute("INSERT INTO mainline_parent_range"
472
" (head, tail, count) VALUES (?, ?, ?)",
473
(head_db_id, tail_db_id, len(range_db_ids)))
474
# Note: This works for sqlite, does it work for pgsql?
475
range_key = self._cursor.lastrowid
476
self._stats['ranges_inserted'] += 1
477
# Note that 'tail' is explicitly not included in the range
478
self._stats['revs_in_ranges'] += len(range_db_ids)
479
self._cursor.executemany(
480
"INSERT INTO mainline_parent (range, revision, dist)"
482
[(range_key, d, idx) for idx, d in enumerate(range_db_ids)])
484
def _build_one_mainline(self, head_rev_id):
485
# 1) Walk backward until you find an existing entry in the
486
# mainline_parent_range table (or you reach the end)
487
# 2) If the range has less than X revisions, include it in the
488
# revisions to be added
489
# 3) chop the list into X revision sections, and insert them
491
# This should ensure that any given ancestry has at most 1 section
492
# which has less than X revisions, and it should preserve convergence.
493
self._ensure_revisions([head_rev_id])
494
cur_db_id = self._rev_id_to_db_id[head_rev_id]
496
while cur_db_id is not None:
497
(range_key, next_count,
498
tail) = self._get_mainline_range_count(cur_db_id)
499
if range_key is not None:
500
# This tip is already present in mainline_parent_range
503
and next_count < self._MAINLINE_PARENT_RANGE_LEN):
504
range_db_ids.extend(self._get_mainline_range(range_key))
508
range_db_ids.append(cur_db_id)
509
cur_db_id = self._get_lh_parent_db_id(cur_db_id)
510
# We now have a list of db ids that need to be split up into
513
tail_db_ids = range_db_ids[-self._MAINLINE_PARENT_RANGE_LEN:]
514
del range_db_ids[-self._MAINLINE_PARENT_RANGE_LEN:]
515
self._insert_range(tail_db_ids, cur_db_id)
516
cur_db_id = tail_db_ids[0]
519
class _MergeSortNode(object):
520
"""A simple object that represents one entry in the merge sorted graph."""
522
__slots__ = ('key', 'merge_depth', 'revno', 'end_of_merge',
523
'_left_parent', '_left_pending_parent',
524
'_pending_parents', '_is_first',
527
def __init__(self, key, merge_depth, left_parent, pending_parents,
530
self.merge_depth = merge_depth
532
self.end_of_merge = None
533
self._left_parent = left_parent
534
self._left_pending_parent = left_parent
535
self._pending_parents = pending_parents
536
self._is_first = is_first
539
return '%s(%s, %s, %s, %s [%s %s %s %s])' % (
540
self.__class__.__name__,
541
self.key, self.revno, self.end_of_merge, self.merge_depth,
542
self._left_parent, self._left_pending_parent,
543
self._pending_parents, self._is_first)
546
class _IncrementalMergeSort(object):
547
"""Context for importing partial history."""
548
# Note: all of the ids in this object are database ids. the revision_ids
549
# should have already been imported before we get to this step.
551
def __init__(self, importer, tip_db_id):
552
self._importer = importer
553
self._tip_db_id = tip_db_id
554
self._mainline_db_ids = None
555
self._imported_mainline_id = None
556
self._cursor = importer._cursor
557
self._stats = importer._stats
560
self._known_gdfo = {}
561
# db_ids that we know are ancestors of mainline_db_ids that are not
562
# ancestors of pre_mainline_id
563
self._interesting_ancestor_ids = set()
565
# Information from the dotted_revno table for revisions that are in the
566
# already-imported mainline.
567
self._imported_dotted_revno = {}
568
# What dotted revnos have been loaded
569
self._known_dotted = set()
570
# This is the gdfo of the current mainline revision search tip. This is
571
# the threshold such that
572
self._imported_gdfo = None
574
# Revisions that we are walking, to see if they are interesting, or
576
self._search_tips = None
577
# mainline revno => number of child branches
578
self._revno_to_branch_count = {}
579
# (revno, branch_num) => oldest seen child
580
self._branch_to_child_count = {}
582
self._depth_first_stack = None
583
self._scheduled_stack = None
584
self._seen_parents = None
585
# Map from db_id => parent_ids
586
self._parent_map = self._importer._db_parent_map
588
# We just populate all known ghosts here.
589
# TODO: Ghosts are expected to be rare. If we find a case where probing
590
# for them at runtime is better than grabbing them all at once,
591
# re-evaluate this decision.
594
def _find_needed_mainline(self):
595
"""Find mainline revisions that need to be filled out.
597
:return: ([mainline_not_imported], most_recent_imported)
599
db_id = self._tip_db_id
601
while db_id is not None and not self._is_imported_db_id(db_id):
603
db_id = self._importer._get_lh_parent_db_id(db_id)
604
self._mainline_db_ids = needed
605
self._interesting_ancestor_ids.update(self._mainline_db_ids)
606
self._imported_mainline_id = db_id
608
def _get_initial_search_tips(self):
609
"""Grab the right-hand parents of all the interesting mainline.
611
We know we already searched all of the left-hand parents, so just grab
612
the right-hand parents.
614
# TODO: Split this into a loop, since sqlite has a maximum number of
616
res = _get_result_for_many(self._cursor,
617
"SELECT parent, gdfo FROM parent, revision"
618
" WHERE parent.parent = revision.db_id"
619
" AND parent_idx != 0"
620
" AND child IN (%s)",
621
self._mainline_db_ids)
622
self._search_tips = set([r[0] for r in res])
623
self._stats['num_search_tips'] += len(self._search_tips)
624
self._known_gdfo.update(res)
625
# We know that we will eventually need at least 1 step of the mainline
626
# (it gives us the basis for numbering everything). We do it now,
627
# because it increases the 'cheap' filtering we can do right away.
628
self._stats['step mainline initial'] += 1
629
self._step_mainline()
630
ghost_res = self._cursor.execute("SELECT db_id FROM ghost").fetchall()
631
self._ghosts = set([g[0] for g in ghost_res])
633
def _is_imported_db_id(self, tip_db_id):
634
res = self._cursor.execute(
635
"SELECT count(*) FROM dotted_revno"
636
" WHERE tip_revision = ?"
637
" AND tip_revision = merged_revision",
638
(tip_db_id,)).fetchone()
641
def _split_search_tips_by_gdfo(self, unknown):
642
"""For these search tips, mark ones 'interesting' based on gdfo.
644
All search tips are ancestors of _mainline_db_ids. So if their gdfo
645
indicates that they could not be merged into already imported
646
revisions, then we know automatically that they are
647
new-and-interesting. Further, if they are present in
648
_imported_dotted_revno, then we know they are not interesting, and
649
we will stop searching them.
651
Otherwise, we don't know for sure which category they fall into, so
652
we return them for further processing.
654
:return: still_unknown - search tips that aren't known to be
655
interesting, and aren't known to be in the ancestry of already
660
for db_id in unknown:
661
if (db_id in self._imported_dotted_revno
662
or db_id == self._imported_mainline_id):
663
# This should be removed as a search tip, we know it isn't
664
# interesting, it is an ancestor of an imported revision
665
self._stats['split already imported'] += 1
666
self._search_tips.remove(db_id)
668
gdfo = self._known_gdfo[db_id]
669
if gdfo >= self._imported_gdfo:
670
self._stats['split gdfo'] += 1
671
self._interesting_ancestor_ids.add(db_id)
673
still_unknown.append(db_id)
676
def _split_interesting_using_children(self, unknown_search_tips):
677
"""Find children of these search tips.
679
For each search tip, we find all of its known children. We then filter
681
a) child is ignored if child in _interesting_ancestor_ids
682
b) child is ignored if gdfo(child) > _imported_gdfo
683
or (gdfo(child) == _imported_gdfo and child !=
684
_imported_mainline_id)
685
The reason for the extra check is because for the ancestry
686
left-to-be-searched, with tip at _imported_mainline_id, *only*
687
_imported_mainline_id is allowed to have gdfo = _imported_gdfo.
688
for each search tip, if there are no interesting children, then this
689
search tip is definitely interesting (there is no path for it to be
690
merged into a previous mainline entry.)
692
:return: still_unknown
693
still_unknown are the search tips that are still have children that
694
could be possibly merged.
696
interesting = self._interesting_ancestor_ids
697
parent_child_res = self._cursor.execute(_add_n_params(
698
"SELECT parent, child FROM parent"
699
" WHERE parent IN (%s)",
700
len(unknown_search_tips)), unknown_search_tips).fetchall()
701
parent_to_children = {}
702
already_imported = set()
703
for parent, child in parent_child_res:
704
if (child in self._imported_dotted_revno
705
or child == self._imported_mainline_id):
706
# This child is already imported, so obviously the parent is,
708
self._stats['split child imported'] += 1
709
already_imported.add(parent)
710
already_imported.add(child)
711
parent_to_children.setdefault(parent, []).append(child)
712
self._search_tips.difference_update(already_imported)
713
possibly_merged_children = set(
714
[c for p, c in parent_child_res
715
if c not in interesting and p not in already_imported])
716
known_gdfo = self._known_gdfo
717
unknown_gdfos = [c for c in possibly_merged_children
718
if c not in known_gdfo]
719
# TODO: Is it more wasteful to join this table early, or is it better
720
# because we avoid having to pass N parameters back in?
721
# TODO: Would sorting the db ids help? They are the primary key for the
722
# table, so we could potentially fetch in a better order.
724
res = self._cursor.execute(_add_n_params(
725
"SELECT db_id, gdfo FROM revision WHERE db_id IN (%s)",
726
len(unknown_gdfos)), unknown_gdfos)
727
known_gdfo.update(res)
728
min_gdfo = self._imported_gdfo
729
# Remove all of the children who have gdfo >= min. We already handled
730
# the == min case in the first loop.
731
possibly_merged_children.difference_update(
732
[c for c in possibly_merged_children if known_gdfo[c] >= min_gdfo])
734
for parent in unknown_search_tips:
735
if parent in already_imported:
736
self._stats['split parent imported'] += 1
738
for c in parent_to_children[parent]:
739
if c in possibly_merged_children:
740
still_unknown.append(parent)
742
else: # All children could not be possibly merged
743
self._stats['split children interesting'] += 1
744
interesting.add(parent)
747
def _step_mainline(self):
748
"""Move the mainline pointer by one, updating the data."""
749
self._stats['step mainline'] += 1
750
res = self._cursor.execute(
751
"SELECT merged_revision, revno, end_of_merge, merge_depth"
752
" FROM dotted_revno WHERE tip_revision = ? ORDER BY dist",
753
[self._imported_mainline_id]).fetchall()
754
stuple = static_tuple.StaticTuple.from_sequence
755
st = static_tuple.StaticTuple
756
dotted_info = [st(r[0], st(stuple(map(int, r[1].split('.'))),
759
self._stats['step mainline cache missed'] += 1
760
self._stats['step mainline added'] += len(dotted_info)
761
self._update_info_from_dotted_revno(dotted_info)
762
# TODO: We could remove search tips that show up as newly merged
763
# though that can wait until the next
764
# _split_search_tips_by_gdfo
765
# new_merged_ids = [r[0] for r in res]
766
res = self._cursor.execute("SELECT parent, gdfo"
767
" FROM parent, revision"
768
" WHERE parent = db_id"
769
" AND parent_idx = 0"
771
[self._imported_mainline_id]).fetchone()
773
# Walked off the mainline...
774
# TODO: Make sure this stuff is tested
775
self._imported_mainline_id = None
776
self._imported_gdfo = 0
778
self._imported_mainline_id, self._imported_gdfo = res
779
self._known_gdfo[self._imported_mainline_id] = self._imported_gdfo
781
def _step_search_tips(self):
782
"""Move the search tips to their parents."""
783
self._stats['step search tips'] += 1
784
res = _get_result_for_many(self._cursor,
785
"SELECT parent, gdfo FROM parent, revision"
786
" WHERE parent=db_id AND child IN (%s)",
787
list(self._search_tips))
788
# TODO: We could use this time to fill out _parent_map, rather than
789
# waiting until _push_node and duplicating a request to the
790
# parent table. It may be reasonable to wait on gdfo also...
792
# Filter out search tips that we've already searched via a different
793
# path. By construction, if we are stepping the search tips, we know
794
# that all previous search tips are either in
795
# self._imported_dotted_revno or in self._interesting_ancestor_ids.
796
# _imported_dotted_revno will be filtered in the first
797
# _split_search_tips_by_gdfo call, so we just filter out already
799
interesting = self._interesting_ancestor_ids
800
self._search_tips = set([r[0] for r in res if r[0] not in interesting])
801
# TODO: For search tips we will be removing, we don't need to join
802
# against revision since we should already have them. There may
803
# be other ways that we already know gdfo. It may be cheaper to
805
self._stats['num_search_tips'] += len(self._search_tips)
806
self._known_gdfo.update(res)
808
def _ensure_lh_parent_info(self):
809
"""LH parents of interesting_ancestor_ids is either present or pending.
811
Either the data should be in _imported_dotted_revno, or the lh parent
812
should be in interesting_ancestor_ids (meaning we will number it).
814
#XXX REMOVE: pmap = self._parent_map
815
missing_parent_ids = set()
816
for db_id in self._interesting_ancestor_ids:
817
parent_ids = self._get_parents(db_id)
818
if not parent_ids: # no parents, nothing to add
820
lh_parent = parent_ids[0]
821
if lh_parent in self._interesting_ancestor_ids:
823
if lh_parent in self._imported_dotted_revno:
825
missing_parent_ids.add(lh_parent)
826
missing_parent_ids.difference_update(self._ghosts)
827
while missing_parent_ids:
828
self._stats['step mainline ensure LH'] += 1
829
self._step_mainline()
830
missing_parent_ids = missing_parent_ids.difference(
831
self._imported_dotted_revno)
833
def _find_interesting_ancestry(self):
834
self._find_needed_mainline()
835
self._get_initial_search_tips()
836
while self._search_tips:
837
# We don't know whether these search tips are known interesting, or
838
# known uninteresting
839
unknown = list(self._search_tips)
841
unknown = self._split_search_tips_by_gdfo(unknown)
844
unknown = self._split_interesting_using_children(unknown)
847
# The current search tips are the 'newest' possible tips right
848
# now. If we can't classify them as definitely being
849
# interesting, then we need to step the mainline until we can.
850
# This means that the current search tips have children that
851
# could be merged into an earlier mainline, walk the mainline
852
# to see if we can resolve that.
853
# Note that relying strictly on gdfo is a bit of a waste here,
854
# because you may have a rev with 10 children before it lands
855
# in mainline, but all 11 revs will be in the dotted_revno
856
# cache for that mainline.
857
self._stats['step mainline unknown'] += 1
858
self._step_mainline()
859
# All search_tips are known to either be interesting or
860
# uninteresting. Walk any search tips that remain.
861
self._step_search_tips()
862
# We're now sure we have all of the now-interesting revisions. To
863
# number them, we need their left-hand parents to be in
864
# _imported_dotted_revno
865
self._ensure_lh_parent_info()
867
def _update_info_from_dotted_revno(self, dotted_info):
868
"""Update info like 'child_seen' from the dotted_revno info."""
869
# TODO: We can move this iterator into a parameter, and have it
870
# continuously updated from _step_mainline()
871
self._imported_dotted_revno.update(dotted_info)
872
self._known_dotted.update([i[1][0] for i in dotted_info])
873
for db_id, (revno, eom, depth) in dotted_info:
874
if len(revno) > 1: # dotted revno, make sure branch count is right
875
base_revno = revno[0]
876
if (base_revno not in self._revno_to_branch_count
877
or revno[1] > self._revno_to_branch_count[base_revno]):
878
self._revno_to_branch_count[base_revno] = revno[1]
879
branch_key = revno[:2]
880
mini_revno = revno[2]
884
mini_revno = revno[0]
885
# We found a mainline branch, make sure it is marked as such
886
self._revno_to_branch_count.setdefault(0, 0)
887
if (branch_key not in self._branch_to_child_count
888
or mini_revno > self._branch_to_child_count[branch_key]):
889
self._branch_to_child_count[branch_key] = mini_revno
891
def _is_first_child(self, parent_id):
892
"""Is this the first child seen for the given parent?"""
893
if parent_id in self._seen_parents:
895
# We haven't seen this while walking, but perhaps the already merged
897
self._seen_parents.add(parent_id)
898
if parent_id not in self._imported_dotted_revno:
899
# Haven't seen this parent merged before, so we can't have seen
902
revno = self._imported_dotted_revno[parent_id][0]
904
branch_key = revno[:2]
905
mini_revno = revno[2]
908
mini_revno = revno[0]
909
if self._branch_to_child_count.get(branch_key, 0) > mini_revno:
910
# This revision shows up in the graph, but another revision in this
911
# branch shows up later, so this revision must have already been
914
# If we got this far, it doesn't appear to have been seen.
917
def _get_parents(self, db_id):
918
if db_id in self._parent_map:
919
parent_ids = self._parent_map[db_id]
921
parent_res = self._cursor.execute(
922
"SELECT parent FROM parent WHERE child = ?"
923
" ORDER BY parent_idx", (db_id,)).fetchall()
924
parent_ids = static_tuple.StaticTuple.from_sequence(
925
[r[0] for r in parent_res])
926
self._parent_map[db_id] = parent_ids
929
def _push_node(self, db_id, merge_depth):
930
# TODO: Check if db_id is a ghost (not allowed on the stack)
931
self._stats['pushed'] += 1
932
if db_id not in self._interesting_ancestor_ids:
933
# This is a parent that we really don't need to number
934
self._stats['pushed uninteresting'] += 1
936
parent_ids = self._get_parents(db_id)
937
if len(parent_ids) <= 0:
939
# We are dealing with a 'new root' possibly because of a ghost,
940
# possibly because of merging a new ancestry.
941
# KnownGraph.merge_sort just always says True here, so stick with
945
left_parent = parent_ids[0]
946
if left_parent in self._ghosts:
950
is_first = self._is_first_child(left_parent)
951
# Note: we don't have to filter out self._ghosts here, as long as we do
953
pending_parents = static_tuple.StaticTuple.from_sequence(
954
[p for p in parent_ids[1:] if p not in self._ghosts])
955
# v- logically probably better as a tuple or object. We currently
956
# modify it in place, so we use a list
957
self._depth_first_stack.append(
958
_MergeSortNode(db_id, merge_depth, left_parent, pending_parents,
961
def _step_to_latest_branch(self, base_revno):
962
"""Step the mainline until we've loaded the latest sub-branch.
964
This is used when we need to create a new child branch. We need to
965
ensure that we've loaded the most-recently-merged branch, so that we
966
can generate the correct branch counter.
968
For example, if you have a revision whose left-hand parent is 1.2.3,
969
you need to load mainline revisions until you find some revision like
970
(1.?.1). This will ensure that you have the most recent branch merged
971
to mainline that was branched from the revno=1 revision in mainline.
973
Note that if we find (1,3,1) before finding (1,2,1) that is ok. As long
974
as we have found the first revision of any sub-branch, we know that
975
we've found the most recent (since we walk backwards).
977
:param base_revno: The revision that this branch is based on. 0 means
978
that this is a new-root branch.
981
self._stats['step to latest'] += 1
983
start_point = self._imported_mainline_id
985
while self._imported_mainline_id is not None:
986
if (base_revno,) in self._known_dotted:
987
# We have walked far enough to load the original revision,
988
# which means we've loaded all children.
989
self._stats['step to latest found base'] += 1
990
found = (base_revno,)
992
# Estimate what is the most recent branch, and see if we have read
994
branch_count = self._revno_to_branch_count.get(base_revno, 0)
995
root_of_branch_revno = (base_revno, branch_count, 1)
996
# Note: if branch_count == 0, that means we haven't seen any
997
# other branches for this revision.
998
if root_of_branch_revno in self._known_dotted:
999
found = root_of_branch_revno
1001
self._stats['step mainline to-latest'] += 1
1003
self._stats['step mainline to-latest NULL'] += 1
1004
self._step_mainline()
1007
def _pop_node(self):
1008
"""Move the last node from the _depth_first_stack to _scheduled_stack.
1010
This is the most left-hand node that we are able to find.
1012
node = self._depth_first_stack.pop()
1013
if node._left_parent is not None:
1014
parent_revno = self._imported_dotted_revno[node._left_parent][0]
1015
if node._is_first: # We simply number as parent + 1
1016
if len(parent_revno) == 1:
1017
mini_revno = parent_revno[0] + 1
1018
revno = (mini_revno,)
1019
# Not sure if we *have* to maintain it, but it does keep
1020
# our data-structures consistent
1021
if mini_revno > self._branch_to_child_count[0]:
1022
self._branch_to_child_count[0] = mini_revno
1024
revno = parent_revno[:2] + (parent_revno[2] + 1,)
1026
# we need a new branch number. To get this correct, we have to
1027
# make sure that the beginning of this branch has been loaded
1028
if len(parent_revno) > 1:
1029
# if parent_revno is a mainline, then
1030
# _ensure_lh_parent_info should have already loaded enough
1031
# data. So we only do this when the parent is a merged
1033
self._step_to_latest_branch(parent_revno[0])
1034
base_revno = parent_revno[0]
1036
self._revno_to_branch_count.get(base_revno, 0) + 1)
1037
self._revno_to_branch_count[base_revno] = branch_count
1038
revno = (base_revno, branch_count, 1)
1040
# We found a new root. There are 2 cases:
1041
# a) This is the very first revision in the branch. In which case
1042
# self._revno_to_branch_count won't have any counts for
1044
# b) This is a ghost / the first revision in a branch that was
1045
# merged. We need to allocate a new branch number.
1046
# This distinction is pretty much the same as the 'is_first'
1047
# check for revs with a parent if you consider the NULL revision
1049
# We have to walk back far enough to be sure that we have the
1050
# most-recent merged new-root. This can be detected by finding
1051
# any new root's first revision. And, of course, we should find
1052
# the last one first while walking backwards.
1054
# When you see (0,X,1) you've reached the point where the X
1055
# number was chosen. A hypothetical (0,X+1,1) revision would
1056
# only be numbered X+1 if it was merged after (0,X,1). Thus
1057
# the *first* (0,?,1) revision you find merged must be the
1060
self._step_to_latest_branch(0)
1061
branch_count = self._revno_to_branch_count.get(0, -1) + 1
1062
self._revno_to_branch_count[0] = branch_count
1063
if branch_count == 0: # This is the mainline
1065
self._branch_to_child_count[0] = 1
1067
revno = (0, branch_count, 1)
1068
if not self._scheduled_stack:
1069
# For all but mainline revisions, we break on the end-of-merge. So
1070
# when we start new numbering, end_of_merge is True. For mainline
1071
# revisions, this is only true when we don't have a parent.
1073
if node._left_parent is not None and node.merge_depth == 0:
1074
end_of_merge = False
1076
prev_node = self._scheduled_stack[-1]
1077
if prev_node.merge_depth < node.merge_depth:
1079
elif (prev_node.merge_depth == node.merge_depth
1080
and prev_node.key not in self._parent_map[node.key]):
1081
# Next node is not a direct parent
1084
end_of_merge = False
1085
revno = static_tuple.StaticTuple.from_sequence(revno)
1087
node.end_of_merge = end_of_merge
1088
self._imported_dotted_revno[node.key] = static_tuple.StaticTuple(
1089
revno, end_of_merge, node.merge_depth)
1090
self._known_dotted.add(revno)
1091
node._pending_parents = None
1092
self._scheduled_stack.append(node)
1094
def _compute_merge_sort(self):
1095
self._depth_first_stack = []
1096
self._scheduled_stack = []
1097
self._seen_parents = set()
1098
if not self._mainline_db_ids:
1101
self._push_node(self._mainline_db_ids[0], 0)
1103
while self._depth_first_stack:
1104
last = self._depth_first_stack[-1]
1105
if last._left_pending_parent is None and not last._pending_parents:
1106
# The parents have been processed, pop the node
1109
while (last._left_pending_parent is not None
1110
or last._pending_parents):
1111
if last._left_pending_parent is not None:
1112
# Push on the left-hand-parent
1113
next_db_id = last._left_pending_parent
1114
last._left_pending_parent = None
1116
pending_parents = last._pending_parents
1117
next_db_id = pending_parents[-1]
1118
last._pending_parents = pending_parents[:-1]
1119
if next_db_id in self._imported_dotted_revno:
1121
if next_db_id == last._left_parent: #Is the left-parent?
1122
next_merge_depth = last.merge_depth
1124
next_merge_depth = last.merge_depth + 1
1125
self._push_node(next_db_id, next_merge_depth)
1126
# And switch to the outer loop
1129
def topo_order(self):
1130
self._find_interesting_ancestry()
1131
self._compute_merge_sort()
1132
return list(reversed(self._scheduled_stack))
1135
class Querier(object):
1136
"""Perform queries on an existing history db."""
1138
def __init__(self, db_path, a_branch):
1139
self._db_path = db_path
1140
self._db_conn = None
1142
self._importer_lock = None
1143
self._branch = a_branch
1144
self._branch_tip_rev_id = a_branch.last_revision()
1145
self._branch_tip_db_id = self._get_db_id(self._branch_tip_rev_id)
1146
self._tip_is_imported = False
1147
self._stats = defaultdict(lambda: 0)
1149
def set_importer_lock(self, lock):
1150
"""Add a thread-lock for building and running an Importer.
1152
The DB back-end is generally single-writer, so add a thread lock to
1153
avoid having two writers trying to access it at the same time.
1155
This will be used as part of _import_tip. Note that it doesn't (yet?)
1156
support anything like timeout.
1158
self._importer_lock = lock
1160
def _get_cursor(self):
1161
if self._cursor is not None:
1163
db_conn = dbapi2.connect(self._db_path)
1164
self._db_conn = db_conn
1165
self._cursor = self._db_conn.cursor()
1168
def ensure_branch_tip(self):
1169
"""Ensure that the branch tip has been imported.
1171
This will run Importer if it has not.
1173
if self._branch_tip_db_id is not None and self._tip_is_imported:
1175
if self._branch_tip_db_id is None:
1176
# This revision has not been seen by the DB, so we know it isn't
1180
if self._is_imported_db_id(self._branch_tip_db_id):
1181
# This revision was seen, and imported
1182
self._tip_is_imported = True
1186
def _import_tip(self):
1187
if self._cursor is not None:
1189
if self._importer_lock is not None:
1190
self._importer_lock.acquire()
1193
importer = Importer(self._db_path, self._branch,
1194
tip_revision_id=self._branch_tip_rev_id,
1196
importer.do_import()
1197
tdelta = time.time() - t
1198
if 'history_db' in debug.debug_flags:
1199
trace.note('imported %d nodes on-the-fly in %.3fs'
1200
% (importer._stats.get('total_nodes_inserted', 0),
1202
self._db_conn = importer._db_conn
1203
self._cursor = importer._cursor
1204
self._branch_tip_db_id = self._get_db_id(self._branch_tip_rev_id)
1205
self._tip_is_imported = True
1207
if self._importer_lock is not None:
1208
self._importer_lock.release()
1210
def _is_imported_db_id(self, tip_db_id):
1211
res = self._get_cursor().execute(
1212
"SELECT count(*) FROM dotted_revno"
1213
" WHERE tip_revision = ?"
1214
" AND tip_revision = merged_revision",
1215
(tip_db_id,)).fetchone()
1219
if self._db_conn is not None:
1220
self._db_conn.close()
1221
self._db_conn = None
1224
def _get_db_id(self, revision_id):
1226
db_res = self._get_cursor().execute(
1227
'SELECT db_id FROM revision'
1228
' WHERE revision_id = ?',
1229
[revision_id]).fetchone()
1230
except dbapi2.OperationalError:
1236
def get_lh_parent_rev_id(self, revision_id):
1237
parent_res = self._get_cursor().execute("""
1238
SELECT p.revision_id
1239
FROM parent, revision as c, revision as p
1240
WHERE parent.child = c.db_id
1241
AND parent.parent = p.db_id
1242
AND c.revision_id = ?
1244
""", (revision_id,)).fetchone()
1245
self._stats['lh_parent_step'] += 1
1246
if parent_res is None:
1248
return parent_res[0]
1250
def get_children(self, revision_id):
1251
"""Returns all the children the db knows about for this revision_id.
1253
(we should probably try to filter it based on ancestry of
1254
self._branch_tip_rev_id...)
1256
# One option for people who care, is to just have them turn around a
1257
# request for get_dotted_revnos(), and things that aren't there are not
1259
cursor = self._get_cursor()
1260
res = cursor.execute("SELECT c.revision_id"
1261
" FROM revision p, parent, revision c"
1262
" WHERE child = c.db_id"
1263
" AND parent = p.db_id"
1264
" AND p.revision_id = ?",
1265
(revision_id,)).fetchall()
1266
return [r[0] for r in res]
1268
def _get_lh_parent_db_id(self, revision_db_id):
1269
parent_res = self._get_cursor().execute("""
1270
SELECT parent.parent
1272
WHERE parent.child = ?
1274
""", (revision_db_id,)).fetchone()
1275
self._stats['lh_parent_step'] += 1
1276
if parent_res is None:
1278
return parent_res[0]
1280
def _get_range_key_and_tail(self, tip_db_id):
1281
"""Return the best range w/ head = tip_db_id or None."""
1282
range_res = self._get_cursor().execute(
1284
" FROM mainline_parent_range"
1286
" ORDER BY count DESC LIMIT 1",
1287
(tip_db_id,)).fetchone()
1288
if range_res is None:
1289
tail = self._get_lh_parent_db_id(tip_db_id)
1293
def get_dotted_revnos(self, revision_ids):
1294
"""Determine the dotted revno, using the range info, etc."""
1295
self.ensure_branch_tip()
1297
cursor = self._get_cursor()
1298
tip_db_id = self._branch_tip_db_id
1299
if tip_db_id is None:
1302
db_id_to_rev_id = {}
1303
for rev_id in revision_ids:
1304
db_id = self._get_db_id(rev_id)
1306
import pdb; pdb.set_trace()
1308
db_id_to_rev_id[db_id] = rev_id
1310
while tip_db_id is not None and db_ids:
1311
self._stats['num_steps'] += 1
1312
range_key, next_db_id = self._get_range_key_and_tail(tip_db_id)
1313
if range_key is None:
1314
revno_res = cursor.execute(_add_n_params(
1315
"SELECT merged_revision, revno FROM dotted_revno"
1316
" WHERE tip_revision = ?"
1317
" AND merged_revision IN (%s)",
1319
[tip_db_id] + list(db_ids)).fetchall()
1321
revno_res = cursor.execute(_add_n_params(
1322
"SELECT merged_revision, revno"
1323
" FROM dotted_revno, mainline_parent"
1324
" WHERE tip_revision = mainline_parent.revision"
1325
" AND mainline_parent.range = ?"
1326
" AND merged_revision IN (%s)",
1328
[range_key] + list(db_ids)).fetchall()
1329
tip_db_id = next_db_id
1330
for db_id, revno in revno_res:
1331
db_ids.discard(db_id)
1332
revnos[db_id_to_rev_id[db_id]] = tuple(map(int,
1334
self._stats['query_time'] += (time.time() - t)
1337
def get_revision_ids(self, revnos):
1338
"""Map from a dotted-revno back into a revision_id."""
1339
self.ensure_branch_tip()
1341
tip_db_id = self._branch_tip_db_id
1342
# TODO: If tip_db_id is None, maybe we want to raise an exception here?
1343
# To indicate that the branch has not been imported yet
1344
revno_strs = set(['.'.join(map(str, revno)) for revno in revnos])
1346
cursor = self._get_cursor()
1347
while tip_db_id is not None and revno_strs:
1348
self._stats['num_steps'] += 1
1349
range_key, next_db_id = self._get_range_key_and_tail(tip_db_id)
1350
if range_key is None:
1351
revision_res = cursor.execute(_add_n_params(
1352
"SELECT revision_id, revno"
1353
" FROM dotted_revno, revision"
1354
" WHERE merged_revision = revision.db_id"
1355
" AND tip_revision = ?"
1356
" AND revno IN (%s)", len(revno_strs)),
1357
[tip_db_id] + list(revno_strs)).fetchall()
1359
revision_res = cursor.execute(_add_n_params(
1360
"SELECT revision_id, revno"
1361
" FROM dotted_revno, mainline_parent, revision"
1362
" WHERE tip_revision = mainline_parent.revision"
1363
" AND merged_revision = revision.db_id"
1364
" AND mainline_parent.range = ?"
1365
" AND revno IN (%s)", len(revno_strs)),
1366
[range_key] + list(revno_strs)).fetchall()
1367
tip_db_id = next_db_id
1368
for revision_id, revno_str in revision_res:
1369
dotted = tuple(map(int, revno_str.split('.')))
1370
revno_strs.discard(revno_str)
1371
revno_map[dotted] = revision_id
1372
self._stats['query_time'] += (time.time() - t)
1375
def get_mainline_where_merged(self, revision_ids):
1376
"""Determine what mainline revisions merged the given revisions."""
1377
self.ensure_branch_tip()
1379
tip_db_id = self._branch_tip_db_id
1380
if tip_db_id is None:
1382
cursor = self._get_cursor()
1384
db_id_to_rev_id = {}
1385
for rev_id in revision_ids:
1386
db_id = self._get_db_id(rev_id)
1388
import pdb; pdb.set_trace()
1390
db_id_to_rev_id[db_id] = rev_id
1391
revision_to_mainline_map = {}
1392
while tip_db_id is not None and db_ids:
1393
self._stats['num_steps'] += 1
1394
range_key, next_db_id = self._get_range_key_and_tail(tip_db_id)
1395
if range_key is None:
1396
mainline_res = cursor.execute(_add_n_params(
1397
"SELECT revision_id, merged_revision"
1398
" FROM dotted_revno, revision"
1399
" WHERE tip_revision = ?"
1400
" AND tip_revision = revision.db_id"
1401
" AND merged_revision IN (%s)",
1403
[tip_db_id] + list(db_ids)).fetchall()
1405
mainline_res = cursor.execute(_add_n_params(
1406
"SELECT revision_id, merged_revision"
1407
" FROM dotted_revno, mainline_parent, revision"
1408
" WHERE tip_revision = mainline_parent.revision"
1409
" AND tip_revision = db_id"
1410
" AND mainline_parent.range = ?"
1411
" AND merged_revision IN (%s)",
1413
[range_key] + list(db_ids)).fetchall()
1414
tip_db_id = next_db_id
1415
for mainline_revision_id, merged_db_id in mainline_res:
1416
db_ids.discard(merged_db_id)
1417
revision_to_mainline_map[db_id_to_rev_id[merged_db_id]] = \
1418
mainline_revision_id
1419
self._stats['query_time'] += (time.time() - t)
1420
return revision_to_mainline_map
1422
def _get_mainline_range_starting_at(self, head_db_id):
1423
"""Try to find a range at this tip.
1425
If a range cannot be found, just find the next parent.
1426
:return: (range_or_None, next_db_id)
1428
cursor = self._get_cursor()
1429
range_key, next_db_id = self._get_range_key_and_tail(tip_db_id)
1430
if range_key is None:
1431
return None, next_db_id
1432
# TODO: Is ORDER BY dist ASC expensive? We know a priori that the list
1433
# is probably already in sorted order, but does sqlite know that?
1434
range_db_ids = cursor.execute(
1435
"SELECT revision FROM mainline_parent"
1436
" WHERE range = ? ORDER BY dist ASC",
1437
(range_key,)).fetchall()
1438
db_ids = [r[0] for r in range_db_ids]
1439
return db_ids, next_db_id
1441
def walk_mainline(self):
1443
db_id = self._get_db_id(self._branch_tip_rev_id)
1445
while db_id is not None:
1446
self._stats['num_steps'] += 1
1447
next_range, next_db_id = self._get_mainline_range_starting_at(db_id)
1448
if next_range is None:
1449
# No range, so switch to using by-parent search
1450
all_ids.append(db_id)
1452
assert next_range[0] == db_id
1453
all_ids.extend(next_range)
1455
self._stats['query_time'] += (time.time() - t)
1458
def walk_ancestry(self):
1459
"""Walk the whole ancestry.
1461
Use the information from the dotted_revno table and the mainline_parent
1462
table to speed things up.
1464
db_id = self._get_db_id(self._branch_tip_rev_id)
1465
all_ancestors = set()
1467
cursor = self._get_cursor()
1468
while db_id is not None:
1469
self._stats['num_steps'] += 1
1470
range_key, next_db_id = self._get_range_key_and_tail(db_id)
1471
if range_key is None:
1472
merged_revs = cursor.execute(
1473
"SELECT merged_revision FROM dotted_revno"
1474
" WHERE tip_revision = ?",
1475
(db_id,)).fetchall()
1476
all_ancestors.update([r[0] for r in merged_revs])
1478
merged_revs = cursor.execute(
1479
"SELECT merged_revision FROM dotted_revno, mainline_parent"
1480
" WHERE tip_revision = mainline_parent.revision"
1481
" AND mainline_parent.range = ?",
1482
[range_key]).fetchall()
1483
all_ancestors.update([r[0] for r in merged_revs])
1485
self._stats['query_time'] += (time.time() - t)
1486
return all_ancestors
1488
def _find_tip_containing(self, tip_db_id, merged_db_id):
1489
"""Walk backwards until you find the tip that contains the given id."""
1490
cursor = self._get_cursor()
1491
while tip_db_id is not None:
1492
if tip_db_id == merged_db_id:
1493
# A tip obviously contains itself
1494
self._stats['step_find_tip_as_merged'] += 1
1496
self._stats['num_steps'] += 1
1497
self._stats['step_find_tip_containing'] += 1
1498
range_key, next_db_id = self._get_range_key_and_tail(tip_db_id)
1499
if range_key is None:
1500
present_res = cursor.execute(
1501
"SELECT 1 FROM dotted_revno"
1502
" WHERE tip_revision = ?"
1503
" AND merged_revision = ?",
1504
[tip_db_id, merged_db_id]).fetchone()
1506
present_res = cursor.execute(
1508
" FROM dotted_revno, mainline_parent"
1509
" WHERE tip_revision = mainline_parent.revision"
1510
" AND mainline_parent.range = ?"
1511
" AND merged_revision = ?",
1512
[range_key, merged_db_id]).fetchone()
1513
if present_res is not None:
1514
# We found a tip that contains merged_db_id
1516
tip_db_id = next_db_id
1519
def _get_merge_sorted_range(self, tip_db_id, start_db_id, stop_db_id):
1520
"""Starting at the given tip, read all merge_sorted data until stop."""
1521
if start_db_id is None or start_db_id == tip_db_id:
1525
cursor = self._get_cursor()
1526
while tip_db_id is not None:
1527
self._stats['num_steps'] += 1
1528
self._stats['step_get_merge_sorted'] += 1
1529
range_key, next_db_id = self._get_range_key_and_tail(tip_db_id)
1530
if range_key is None:
1531
merged_res = cursor.execute(
1532
"SELECT db_id, revision_id, merge_depth, revno,"
1534
" FROM dotted_revno, revision"
1535
" WHERE tip_revision = ?"
1536
" AND db_id = merged_revision"
1538
(tip_db_id,)).fetchall()
1540
# NOTE: Adding the ORDER BY costs us 981ms - 844ms = 137ms when
1541
# doing 'bzr log -n0 -r -10..-1' on bzr.dev.
1542
# That seems like a lot. Extracting them without sorting
1543
# on them costs about the same amount. So the guess is
1544
# that adding the extra columns requires more I/O.
1545
# At the moment, SELECT order == INSERT order, so we don't
1546
# strictly need it. I don't know that we can trust that,
1548
merged_res = cursor.execute(
1549
"SELECT db_id, revision_id, merge_depth, revno,"
1551
# " , mainline_parent.dist as mp_dist"
1552
# " , dotted_revno.dist as dr_dist"
1553
" FROM dotted_revno, revision, mainline_parent"
1554
" WHERE tip_revision = mainline_parent.revision"
1555
" AND mainline_parent.range = ?"
1556
" AND db_id = merged_revision",
1557
# " ORDER BY mainline_parent.dist, dotted_revno.dist",
1558
[range_key]).fetchall()
1560
for db_id, r_id, depth, revno_str, eom in merged_res:
1561
if stop_db_id is not None and db_id == stop_db_id:
1563
revno = tuple(map(int, revno_str.split('.')))
1564
yield r_id, depth, revno, eom
1566
for info in merged_res:
1567
if not found_start and info[0] == start_db_id:
1570
if stop_db_id is not None and info[0] == stop_db_id:
1572
db_id, r_id, depth, revno_str, eom = info
1573
revno = tuple(map(int, revno_str.split('.')))
1574
yield r_id, depth, revno, eom
1575
tip_db_id = next_db_id
1577
def iter_merge_sorted_revisions(self, start_revision_id=None,
1578
stop_revision_id=None):
1579
"""See Branch.iter_merge_sorted_revisions()
1581
Note that start and stop differ from the Branch implementation, because
1582
stop is *always* exclusive. You can simulate the rest by careful
1585
self.ensure_branch_tip()
1587
tip_db_id = self._branch_tip_db_id
1588
if tip_db_id is None:
1590
if start_revision_id is not None:
1591
start_db_id = self._get_db_id(start_revision_id)
1593
start_db_id = tip_db_id
1595
if stop_revision_id is not None:
1596
stop_db_id = self._get_db_id(stop_revision_id)
1597
# Seek fast until we find start_db_id
1600
tip_db_id = self._find_tip_containing(tip_db_id, start_db_id)
1601
# Now that you have found the first tip containing the given start
1602
# revision, pull in data until you walk off the history, or you find
1604
merge_sorted = list(
1605
self._get_merge_sorted_range(tip_db_id, start_db_id, stop_db_id))
1606
self._stats['query_time'] += (time.time() - t)
1609
def heads(self, revision_ids):
1610
"""Compute Graph.heads() on the given data."""
1611
raise NotImplementedError(self.heads)