201
209
self._lock.release()
212
_raw_revno_revid_cache = lru_cache.LRUCache(10000)
213
_revno_revid_lock = threading.RLock()
216
class RevnoRevidMemoryCache(object):
217
"""A store that maps revnos to revids based on the branch it is in.
220
def __init__(self, cache, lock, branch_tip):
221
# Note: what we'd really like is something that knew how long it takes
222
# to produce a revno * how often it is accessed. Since some revnos
223
# take 100x longer to produce than others. Could we cheat and just loop
225
# There are also other possible layouts. A per-branch cache, with an
226
# LRU around the whole thing, etc. I chose this for simplicity.
227
self._branch_tip = branch_tip
229
# lru_cache is not thread-safe, so we need to lock all accesses.
230
# It is even modified when doing a get() on it.
234
"""Return the data associated with `key`.
235
Otherwise return None.
237
:param key: Can be a revno_str or a revid.
241
cached = self._cache.get((self._branch_tip, key))
246
def set(self, revid, revno_str):
247
"""Store `data` under `key`.
251
# TODO: StaticTuples ? Probably only useful if we cache more than
252
# 10k of them. 100k/1M is probably useful.
253
self._cache[(self._branch_tip, revid)] = revno_str
254
self._cache[(self._branch_tip, revno_str)] = revid
203
258
# Used to store locks that prevent multiple threads from building a
204
259
# revision graph for the same branch at the same time, because that can
205
260
# cause severe performance issues that are so bad that the system seems
207
262
revision_graph_locks = {}
208
263
revision_graph_check_lock = threading.Lock()
264
history_db_importer_lock = threading.Lock()
210
266
class History(object):
211
267
"""Decorate a branch to provide information for rendering.
218
274
:ivar _file_change_cache: An object that caches information about the
219
275
files that changed between two revisions.
220
:ivar _rev_info: A list of information about revisions. This is by far
221
the most cryptic data structure in loggerhead. At the top level, it
222
is a list of 3-tuples [(merge-info, where-merged, parents)].
223
`merge-info` is (seq, revid, merge_depth, revno_str, end_of_merge) --
224
like a merged sorted list, but the revno is stringified.
225
`where-merged` is a tuple of revisions that have this revision as a
226
non-lefthand parent. Finally, `parents` is just the usual list of
227
parents of this revision.
228
:ivar _rev_indices: A dictionary mapping each revision id to the index of
229
the information about it in _rev_info.
276
:ivar _querier: A HistoryDB.Querier instance, allowing us to query for
277
information in the ancestry of the branch.
230
278
:ivar _revno_revid: A dictionary mapping stringified revnos to revision
234
def _load_whole_history_data(self, caches, cache_key):
235
"""Set the attributes relating to the whole history of the branch.
237
:param caches: a list of caches with interfaces like
238
`RevInfoMemoryCache` and be ordered from fastest to slowest.
239
:param cache_key: the key to use with the caches.
241
self._rev_indices = None
242
self._rev_info = None
245
def update_missed_caches():
246
for cache in missed_caches:
247
cache.set(cache_key, self.last_revid, self._rev_info)
249
# Theoretically, it's possible for two threads to race in creating
250
# the Lock() object for their branch, so we put a lock around
251
# creating the per-branch Lock().
252
revision_graph_check_lock.acquire()
254
if cache_key not in revision_graph_locks:
255
revision_graph_locks[cache_key] = threading.Lock()
257
revision_graph_check_lock.release()
259
revision_graph_locks[cache_key].acquire()
262
data = cache.get(cache_key, self.last_revid)
264
self._rev_info = data
265
update_missed_caches()
268
missed_caches.append(cache)
270
whole_history_data = compute_whole_history_data(self._branch)
271
self._rev_info, self._rev_indices = whole_history_data
272
update_missed_caches()
274
revision_graph_locks[cache_key].release()
276
if self._rev_indices is not None:
277
self._revno_revid = {}
278
for ((_, revid, _, revno_str, _), _, _) in self._rev_info:
279
self._revno_revid[revno_str] = revid
281
self._revno_revid = {}
282
self._rev_indices = {}
283
for ((seq, revid, _, revno_str, _), _, _) in self._rev_info:
284
self._rev_indices[revid] = seq
285
self._revno_revid[revno_str] = revid
287
282
def __init__(self, branch, whole_history_data_cache, file_cache=None,
288
revinfo_disk_cache=None, cache_key=None):
283
revinfo_disk_cache=None, cache_key=None,
284
show_merge_points=True,
289
286
assert branch.is_locked(), (
290
287
"Can only construct a History object with a read-locked branch.")
291
288
if file_cache is not None:
296
293
self._branch = branch
297
294
self._branch_tags = None
298
295
self._inventory_cache = {}
296
# Map from (tip_revision, revision_id) => revno_str
297
# and from (tip_revisino, revno_str) => revision_id
298
self._querier = _get_querier(branch)
299
if self._querier is None:
300
assert cache_path is not None
301
self._querier = history_db.Querier(
302
os.path.join(cache_path, 'historydb.sql'), branch)
303
# History-db is not configured for this branch, do it ourselves
304
# sqlite is single-writer, so block concurrant updates.
305
# Note that this was even done in the past because of perf issues, even
306
# without a disk requirement.
307
self._querier.set_importer_lock(history_db_importer_lock)
308
# TODO: Is this being premature? It makes the rest of the code
310
self._querier.ensure_branch_tip()
299
311
self._branch_nick = self._branch.get_config().get_nickname()
300
312
self.log = logging.getLogger('loggerhead.%s' % (self._branch_nick,))
302
314
self.last_revid = branch.last_revision()
304
caches = [RevInfoMemoryCache(whole_history_data_cache)]
305
if revinfo_disk_cache:
306
caches.append(revinfo_disk_cache)
307
self._load_whole_history_data(caches, cache_key)
315
self._revno_revid_cache = RevnoRevidMemoryCache(_raw_revno_revid_cache,
316
_revno_revid_lock, self._branch.last_revision())
310
319
def has_revisions(self):
314
323
return self._branch.get_config()
316
325
def get_revno(self, revid):
317
if revid not in self._rev_indices:
320
seq = self._rev_indices[revid]
321
revno = self._rev_info[seq][0][3]
328
revno_str = self._revno_revid_cache.get(revid)
329
if revno_str is not None:
331
revnos = self._querier.get_dotted_revno_range_multi([revid])
332
# TODO: Should probably handle KeyError?
333
dotted_revno = revnos[revid]
334
revno_str = '.'.join(map(str, dotted_revno))
335
self._revno_revid_cache.set(revid, revno_str)
338
def get_revnos(self, revids):
339
"""Get a map of revid => revno for all revisions."""
344
revno_map[revid] = 'unknown'
346
revno_str = self._revno_revid_cache.get(revid)
347
if revno_str is not None:
348
revno_map[revid] = revno_str
350
unknown.append(revid)
353
# querier returns dotted revno tuples
354
query_revno_map = self._querier.get_dotted_revno_range_multi(
356
ghosts = set(unknown)
357
for revid, dotted_revno in query_revno_map.iteritems():
358
revno_str = '.'.join(map(str, dotted_revno))
359
self._revno_revid_cache.set(revid, revno_str)
360
revno_map[revid] = revno_str
361
ghosts.discard(revid)
363
revno_map.update([(n, 'unknown') for n in ghosts])
366
def get_revid_for_revno(self, revno_str):
367
revid = self._revno_revid_cache.get(revno_str)
368
if revid is not None:
370
dotted_revno = tuple(map(int, revno_str.split('.')))
371
revnos = self._querier.get_revision_ids([dotted_revno])
372
revnos = dict([('.'.join(map(str, drn)), ri)
373
for drn, ri in revnos.iteritems()])
374
for revno_str, revid in revnos.iteritems():
375
self._revno_revid_cache.set(revid, revno_str)
376
return revnos[revno_str]
378
def _get_lh_parent(self, revid):
379
"""Get the left-hand parent of a given revision id."""
380
# TODO: Move this into a public method on Querier
381
# TODO: Possibly look into caching some of this info in memory, and
382
# between HTTP requests.
383
self._querier.ensure_branch_tip()
384
return self._querier._get_lh_parent_rev_id(revid)
386
def _get_children(self, revid):
387
"""Get the children of the given revision id."""
388
# XXX: We should be filtering this based on self._branch's ancestry...
389
# TODO: We also should be using a method on Querier, instead of doing
391
c = self._querier._get_cursor()
392
res = c.execute("SELECT c.revision_id"
393
" FROM revision p, parent, revision c"
394
" WHERE child = c.db_id"
395
" AND parent = p.db_id"
396
" AND p.revision_id = ?",
398
return [r[0] for r in res]
324
400
def get_revids_from(self, revid_list, start_revid):
326
402
Yield the mainline (wrt start_revid) revisions that merged each
327
403
revid in revid_list.
405
tip_revid = start_revid
329
406
if revid_list is None:
330
revid_list = [r[0][1] for r in self._rev_info]
407
# This returns the mainline of start_revid
408
# TODO: We could use Querier for this
409
# Note: Don't use self._branch.revision_history, as that always
410
# grabs the full history, and we now support stopping early.
411
history = self._branch.repository.iter_reverse_revision_history(
413
for revid in history:
331
416
revid_set = set(revid_list)
334
def introduced_revisions(revid):
336
seq = self._rev_indices[revid]
337
md = self._rev_info[seq][0][2]
339
while i < len(self._rev_info) and self._rev_info[i][0][2] > md:
340
r.add(self._rev_info[i][0][1])
344
if bzrlib.revision.is_null(revid):
346
if introduced_revisions(revid) & revid_set:
348
parents = self._rev_info[self._rev_indices[revid]][2]
349
if len(parents) == 0:
418
while tip_revid is not None and revid_set:
419
parent_revid = self._get_lh_parent(tip_revid)
420
# TODO: Consider caching this, especially between HTTP requests
421
introduced = self._querier.iter_merge_sorted_revisions(
422
start_revision_id=tip_revid, stop_revision_id=parent_revid)
423
introduced_revs = set([i[0] for i in introduced
426
revid_set.difference_update(introduced_revs)
428
tip_revid = parent_revid
353
430
def get_short_revision_history_by_fileid(self, file_id):
354
431
# FIXME: would be awesome if we could get, for a folder, the list of
366
443
del possible_keys, next_keys
369
def get_revision_history_since(self, revid_list, date):
370
# if a user asks for revisions starting at 01-sep, they mean inclusive,
371
# so start at midnight on 02-sep.
372
date = date + datetime.timedelta(days=1)
373
# our revid list is sorted in REVERSE date order,
374
# so go thru some hoops here...
376
index = bisect.bisect(_RevListToTimestamps(revid_list,
377
self._branch.repository),
383
return revid_list[index:]
385
def get_search_revid_list(self, query, revid_list):
387
given a "quick-search" query, try a few obvious possible meanings:
389
- revision id or # ("128.1.3")
390
- date (US style "mm/dd/yy", earth style "dd-mm-yy", or \
391
iso style "yyyy-mm-dd")
392
- comment text as a fallback
394
and return a revid list that matches.
396
# FIXME: there is some silliness in this action. we have to look up
397
# all the relevant changes (time-consuming) only to return a list of
398
# revids which will be used to fetch a set of changes again.
400
# if they entered a revid, just jump straight there;
401
# ignore the passed-in revid_list
402
revid = self.fix_revid(query)
403
if revid is not None:
404
if isinstance(revid, unicode):
405
revid = revid.encode('utf-8')
406
changes = self.get_changes([revid])
407
if (changes is not None) and (len(changes) > 0):
411
m = self.us_date_re.match(query)
413
date = datetime.datetime(util.fix_year(int(m.group(3))),
417
m = self.earth_date_re.match(query)
419
date = datetime.datetime(util.fix_year(int(m.group(3))),
423
m = self.iso_date_re.match(query)
425
date = datetime.datetime(util.fix_year(int(m.group(1))),
429
if revid_list is None:
430
# if no limit to the query was given,
431
# search only the direct-parent path.
432
revid_list = list(self.get_revids_from(None, self.last_revid))
433
return self.get_revision_history_since(revid_list, date)
435
446
revno_re = re.compile(r'^[\d\.]+$')
436
# the date regex are without a final '$' so that queries like
437
# "2006-11-30 12:15" still mostly work. (i think it's better to give
438
# them 90% of what they want instead of nothing at all.)
439
us_date_re = re.compile(r'^(\d{1,2})/(\d{1,2})/(\d\d(\d\d?))')
440
earth_date_re = re.compile(r'^(\d{1,2})-(\d{1,2})-(\d\d(\d\d?))')
441
iso_date_re = re.compile(r'^(\d\d\d\d)-(\d\d)-(\d\d)')
443
448
def fix_revid(self, revid):
444
449
# if a "revid" is actually a dotted revno, convert it to a revid
464
474
if revid is None:
465
475
revid = self.last_revid
466
476
if file_id is not None:
467
# since revid is 'start_revid', possibly should start the path
468
# tracing from revid... FIXME
469
revlist = list(self.get_short_revision_history_by_fileid(file_id))
470
revlist = list(self.get_revids_from(revlist, revid))
478
self.get_short_revision_history_by_fileid(file_id, revid))
479
revlist = self.get_revids_from(revlist, revid)
472
revlist = list(self.get_revids_from(None, revid))
481
revlist = self.get_revids_from(None, revid)
475
def get_view(self, revid, start_revid, file_id, query=None):
484
def _iterate_sufficiently(self, iterable, stop_at, extra_rev_count):
485
"""Return a list of iterable.
487
If extra_rev_count is None, fully consume iterable.
488
Otherwise, stop at 'stop_at' + extra_rev_count.
491
iterate until you find stop_at, then iterate 10 more times.
493
if extra_rev_count is None:
494
return list(iterable)
503
for count, n in enumerate(iterable):
505
if count >= extra_rev_count:
509
def get_view(self, revid, start_revid, file_id, query=None,
510
extra_rev_count=None):
477
512
use the URL parameters (revid, start_revid, file_id, and query) to
478
513
determine the revision list we're viewing (start_revid, file_id, query)
625
670
if len(changes) == 0:
628
# some data needs to be recalculated each time, because it may
629
# change as new revisions are added.
673
needed_revnos = set()
630
674
for change in changes:
675
needed_revnos.add(change.revid)
676
needed_revnos.update([p_id for p_id in change.parents])
677
revno_map = self.get_revnos(needed_revnos)
679
def merge_points_callback(a_change, attr):
631
680
merge_revids = self.simplify_merge_point_list(
632
self.get_merge_point_list(change.revid))
633
change.merge_points = [
634
util.Container(revid=r,
635
revno=self.get_revno(r)) for r in merge_revids]
681
self.get_merge_point_list(a_change.revid))
684
revno_map = self.get_revnos(merge_revids)
685
return [util.Container(revid=r, revno=revno_map[r])
686
for r in merge_revids]
689
for change in changes:
690
change._set_property('merge_points', merge_points_callback)
636
691
if len(change.parents) > 0:
637
change.parents = [util.Container(revid=r,
638
revno=self.get_revno(r)) for r in change.parents]
639
change.revno = self.get_revno(change.revid)
642
for change in changes:
692
change.parents = [util.Container(revid=r, revno=revno_map[r])
693
for r in change.parents]
694
change.revno = revno_map[change.revid]
643
695
change.parity = parity