1
# Copyright (C) 2009 Jelmer Vernooij <jelmer@samba.org>
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
# GNU General Public License for more details.
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17
"""Map from Git sha's to Bazaar objects."""
19
from dulwich.objects import (
26
from dulwich.objects import (
32
btree_index as _mod_btree_index,
39
from bzrlib.transport import (
46
from xdg.BaseDirectory import xdg_cache_home
48
from bzrlib.config import config_dir
49
ret = os.path.join(config_dir(), "git")
51
ret = os.path.join(xdg_cache_home, "bazaar", "git")
52
if not os.path.isdir(ret):
57
def get_remote_cache_transport():
58
return get_transport(get_cache_dir())
61
def check_pysqlite_version(sqlite3):
62
"""Check that sqlite library is compatible.
65
if (sqlite3.sqlite_version_info[0] < 3 or
66
(sqlite3.sqlite_version_info[0] == 3 and
67
sqlite3.sqlite_version_info[1] < 3)):
68
trace.warning('Needs at least sqlite 3.3.x')
69
raise bzrlib.errors.BzrError("incompatible sqlite library")
74
check_pysqlite_version(sqlite3)
75
except (ImportError, bzrlib.errors.BzrError), e:
76
from pysqlite2 import dbapi2 as sqlite3
77
check_pysqlite_version(sqlite3)
79
trace.warning('Needs at least Python2.5 or Python2.4 with the pysqlite2 '
81
raise bzrlib.errors.BzrError("missing sqlite library")
84
_mapdbs = threading.local()
86
"""Get a cache for this thread's db connections."""
89
except AttributeError:
94
class GitShaMap(object):
95
"""Git<->Bzr revision id mapping database."""
97
def lookup_git_sha(self, sha):
98
"""Lookup a Git sha in the database.
99
:param sha: Git object sha
100
:return: (type, type_data) with type_data:
101
revision: revid, tree sha
103
raise NotImplementedError(self.lookup_git_sha)
105
def lookup_blob_id(self, file_id, revision):
106
"""Retrieve a Git blob SHA by file id.
108
:param file_id: File id of the file/symlink
109
:param revision: revision in which the file was last changed.
111
raise NotImplementedError(self.lookup_blob_id)
113
def lookup_tree_id(self, file_id, revision):
114
"""Retrieve a Git tree SHA by file id.
116
raise NotImplementedError(self.lookup_tree_id)
119
"""List the revision ids known."""
120
raise NotImplementedError(self.revids)
122
def missing_revisions(self, revids):
123
"""Return set of all the revisions that are not present."""
124
present_revids = set(self.revids())
125
if not isinstance(revids, set):
127
return revids - present_revids
130
"""List the SHA1s."""
131
raise NotImplementedError(self.sha1s)
133
def start_write_group(self):
134
"""Start writing changes."""
136
def commit_write_group(self):
137
"""Commit any pending changes."""
139
def abort_write_group(self):
140
"""Abort any pending changes."""
143
class ContentCache(object):
144
"""Object that can cache Git objects."""
146
def add(self, object):
148
raise NotImplementedError(self.add)
150
def add_multi(self, objects):
151
"""Add multiple objects."""
155
def __getitem__(self, sha):
156
"""Retrieve an item, by SHA."""
157
raise NotImplementedError(self.__getitem__)
160
class BzrGitCacheFormat(object):
161
"""Bazaar-Git Cache Format."""
163
def get_format_string(self):
164
"""Return a single-line unique format string for this cache format."""
165
raise NotImplementedError(self.get_format_string)
167
def open(self, transport):
168
"""Open this format on a transport."""
169
raise NotImplementedError(self.open)
171
def initialize(self, transport):
172
"""Create a new instance of this cache format at transport."""
173
transport.put_bytes('format', self.get_format_string())
176
def from_transport(self, transport):
177
"""Open a cache file present on a transport, or initialize one.
179
:param transport: Transport to use
180
:return: A BzrGitCache instance
183
format_name = transport.get_bytes('format')
184
format = formats.get(format_name)
185
except bzrlib.errors.NoSuchFile:
186
format = formats.get('default')
187
format.initialize(transport)
188
return format.open(transport)
191
def from_repository(cls, repository):
192
"""Open a cache file for a repository.
194
This will use the repository's transport to store the cache file, or
195
use the users global cache directory if the repository has no
196
transport associated with it.
198
:param repository: Repository to open the cache for
199
:return: A `BzrGitCache`
201
repo_transport = getattr(repository, "_transport", None)
202
if repo_transport is not None:
203
# Even if we don't write to this repo, we should be able
204
# to update its cache.
205
repo_transport = remove_readonly_transport_decorator(repo_transport)
207
repo_transport.mkdir('git')
208
except bzrlib.errors.FileExists:
210
transport = repo_transport.clone('git')
212
transport = get_remote_cache_transport()
213
return cls.from_transport(transport)
216
class CacheUpdater(object):
217
"""Base class for objects that can update a bzr-git cache."""
219
def add_object(self, obj, ie, path):
220
raise NotImplementedError(self.add_object)
223
raise NotImplementedError(self.finish)
226
class BzrGitCache(object):
227
"""Caching backend."""
229
def __init__(self, idmap, content_cache, cache_updater_klass):
231
self.content_cache = content_cache
232
self._cache_updater_klass = cache_updater_klass
234
def get_updater(self, rev):
235
"""Update an object that implements the CacheUpdater interface for
238
return self._cache_updater_klass(self, rev)
241
DictBzrGitCache = lambda: BzrGitCache(DictGitShaMap(), None, DictCacheUpdater)
244
class DictCacheUpdater(CacheUpdater):
245
"""Cache updater for dict-based caches."""
247
def __init__(self, cache, rev):
249
self.revid = rev.revision_id
250
self.parent_revids = rev.parent_ids
254
def add_object(self, obj, ie, path):
255
if obj.type_name == "commit":
258
type_data = (self.revid, self._commit.tree)
259
self.cache.idmap._by_revid[self.revid] = obj.id
260
elif obj.type_name in ("blob", "tree"):
262
if obj.type_name == "blob":
263
revision = ie.revision
265
revision = self.revid
266
type_data = (ie.file_id, revision)
267
self.cache.idmap._by_fileid.setdefault(type_data[1], {})[type_data[0]] =\
271
self.cache.idmap._by_sha[obj.id] = (obj.type_name, type_data)
274
if self._commit is None:
275
raise AssertionError("No commit object added")
279
class DictGitShaMap(GitShaMap):
280
"""Git SHA map that uses a dictionary."""
287
def lookup_blob_id(self, fileid, revision):
288
return self._by_fileid[revision][fileid]
290
def lookup_git_sha(self, sha):
291
return self._by_sha[sha]
293
def lookup_tree_id(self, fileid, revision):
294
return self._by_fileid[revision][fileid]
296
def lookup_commit(self, revid):
297
return self._by_revid[revid]
300
for key, (type, type_data) in self._by_sha.iteritems():
305
return self._by_sha.iterkeys()
308
class SqliteCacheUpdater(CacheUpdater):
310
def __init__(self, cache, rev):
312
self.db = self.cache.idmap.db
313
self.revid = rev.revision_id
318
def add_object(self, obj, ie, path):
319
if obj.type_name == "commit":
322
elif obj.type_name == "tree":
324
self._trees.append((obj.id, ie.file_id, self.revid))
325
elif obj.type_name == "blob":
327
self._blobs.append((obj.id, ie.file_id, ie.revision))
332
if self._commit is None:
333
raise AssertionError("No commit object added")
335
"replace into trees (sha1, fileid, revid) values (?, ?, ?)",
338
"replace into blobs (sha1, fileid, revid) values (?, ?, ?)",
341
"replace into commits (sha1, revid, tree_sha) values (?, ?, ?)",
342
(self._commit.id, self.revid, self._commit.tree))
346
SqliteBzrGitCache = lambda p: BzrGitCache(SqliteGitShaMap(p), None, SqliteCacheUpdater)
349
class SqliteGitCacheFormat(BzrGitCacheFormat):
351
def get_format_string(self):
352
return 'bzr-git sha map version 1 using sqlite\n'
354
def open(self, transport):
356
basepath = transport.local_abspath(".")
357
except bzrlib.errors.NotLocalUrl:
358
basepath = get_cache_dir()
359
return SqliteBzrGitCache(os.path.join(basepath, "idmap.db"))
362
class SqliteGitShaMap(GitShaMap):
363
"""Bazaar GIT Sha map that uses a sqlite database for storage."""
365
def __init__(self, path=None):
368
self.db = sqlite3.connect(":memory:")
370
if not mapdbs().has_key(path):
371
mapdbs()[path] = sqlite3.connect(path)
372
self.db = mapdbs()[path]
373
self.db.text_factory = str
374
self.db.executescript("""
375
create table if not exists commits(
376
sha1 text not null check(length(sha1) == 40),
378
tree_sha text not null check(length(tree_sha) == 40)
380
create index if not exists commit_sha1 on commits(sha1);
381
create unique index if not exists commit_revid on commits(revid);
382
create table if not exists blobs(
383
sha1 text not null check(length(sha1) == 40),
384
fileid text not null,
387
create index if not exists blobs_sha1 on blobs(sha1);
388
create unique index if not exists blobs_fileid_revid on blobs(fileid, revid);
389
create table if not exists trees(
390
sha1 text unique not null check(length(sha1) == 40),
391
fileid text not null,
394
create unique index if not exists trees_sha1 on trees(sha1);
395
create unique index if not exists trees_fileid_revid on trees(fileid, revid);
399
return "%s(%r)" % (self.__class__.__name__, self.path)
401
def lookup_commit(self, revid):
402
cursor = self.db.execute("select sha1 from commits where revid = ?",
404
row = cursor.fetchone()
409
def commit_write_group(self):
412
def lookup_blob_id(self, fileid, revision):
413
row = self.db.execute("select sha1 from blobs where fileid = ? and revid = ?", (fileid, revision)).fetchone()
416
raise KeyError(fileid)
418
def lookup_tree_id(self, fileid, revision):
419
row = self.db.execute("select sha1 from trees where fileid = ? and revid = ?", (fileid, revision)).fetchone()
422
raise KeyError(fileid)
424
def lookup_git_sha(self, sha):
425
"""Lookup a Git sha in the database.
427
:param sha: Git object sha
428
:return: (type, type_data) with type_data:
429
revision: revid, tree sha
431
row = self.db.execute("select revid, tree_sha from commits where sha1 = ?", (sha,)).fetchone()
433
return ("commit", row)
434
row = self.db.execute("select fileid, revid from blobs where sha1 = ?", (sha,)).fetchone()
437
row = self.db.execute("select fileid, revid from trees where sha1 = ?", (sha,)).fetchone()
443
"""List the revision ids known."""
444
return (row for (row,) in self.db.execute("select revid from commits"))
447
"""List the SHA1s."""
448
for table in ("blobs", "commits", "trees"):
449
for (sha,) in self.db.execute("select sha1 from %s" % table):
453
class TdbCacheUpdater(CacheUpdater):
454
"""Cache updater for tdb-based caches."""
456
def __init__(self, cache, rev):
458
self.db = cache.idmap.db
459
self.revid = rev.revision_id
460
self.parent_revids = rev.parent_ids
464
def add_object(self, obj, ie, path):
465
sha = obj.sha().digest()
466
if obj.type_name == "commit":
467
self.db["commit\0" + self.revid] = "\0".join((sha, obj.tree))
468
type_data = (self.revid, obj.tree)
471
elif obj.type_name == "blob":
474
self.db["\0".join(("blob", ie.file_id, ie.revision))] = sha
475
type_data = (ie.file_id, ie.revision)
476
elif obj.type_name == "tree":
479
type_data = (ie.file_id, self.revid)
482
self.db["git\0" + sha] = "\0".join((obj.type_name, ) + type_data)
485
if self._commit is None:
486
raise AssertionError("No commit object added")
490
TdbBzrGitCache = lambda p: BzrGitCache(TdbGitShaMap(p), None, TdbCacheUpdater)
492
class TdbGitCacheFormat(BzrGitCacheFormat):
493
"""Cache format for tdb-based caches."""
495
def get_format_string(self):
496
return 'bzr-git sha map version 3 using tdb\n'
498
def open(self, transport):
500
basepath = transport.local_abspath(".")
501
except bzrlib.errors.NotLocalUrl:
502
basepath = get_cache_dir()
504
return TdbBzrGitCache(os.path.join(basepath, "idmap.tdb"))
507
"Unable to open existing bzr-git cache because 'tdb' is not "
511
class TdbGitShaMap(GitShaMap):
512
"""SHA Map that uses a TDB database.
516
"git <sha1>" -> "<type> <type-data1> <type-data2>"
517
"commit revid" -> "<sha1> <tree-id>"
518
"tree fileid revid" -> "<sha1>"
519
"blob fileid revid" -> "<sha1>"
523
TDB_HASH_SIZE = 50000
525
def __init__(self, path=None):
531
if not mapdbs().has_key(path):
532
mapdbs()[path] = tdb.Tdb(path, self.TDB_HASH_SIZE, tdb.DEFAULT,
533
os.O_RDWR|os.O_CREAT)
534
self.db = mapdbs()[path]
536
if int(self.db["version"]) not in (2, 3):
537
trace.warning("SHA Map is incompatible (%s -> %d), rebuilding database.",
538
self.db["version"], self.TDB_MAP_VERSION)
542
self.db["version"] = str(self.TDB_MAP_VERSION)
544
def start_write_group(self):
545
"""Start writing changes."""
546
self.db.transaction_start()
548
def commit_write_group(self):
549
"""Commit any pending changes."""
550
self.db.transaction_commit()
552
def abort_write_group(self):
553
"""Abort any pending changes."""
554
self.db.transaction_cancel()
557
return "%s(%r)" % (self.__class__.__name__, self.path)
559
def lookup_commit(self, revid):
560
return sha_to_hex(self.db["commit\0" + revid][:20])
562
def lookup_blob_id(self, fileid, revision):
563
return sha_to_hex(self.db["\0".join(("blob", fileid, revision))])
565
def lookup_git_sha(self, sha):
566
"""Lookup a Git sha in the database.
568
:param sha: Git object sha
569
:return: (type, type_data) with type_data:
570
revision: revid, tree sha
573
sha = hex_to_sha(sha)
574
data = self.db["git\0" + sha].split("\0")
575
return (data[0], (data[1], data[2]))
577
def missing_revisions(self, revids):
580
if self.db.get("commit\0" + revid) is None:
585
"""List the revision ids known."""
586
for key in self.db.iterkeys():
587
if key.startswith("commit\0"):
591
"""List the SHA1s."""
592
for key in self.db.iterkeys():
593
if key.startswith("git\0"):
594
yield sha_to_hex(key[4:])
597
class VersionedFilesContentCache(ContentCache):
599
def __init__(self, vf):
603
self._vf.insert_record_stream(
604
[versionedfile.ChunkedContentFactory((obj.id,), [], None,
605
obj.as_legacy_object_chunks())])
607
def __getitem__(self, sha):
608
stream = self._vf.get_record_stream([(sha,)], 'unordered', True)
609
entry = stream.next()
610
if entry.storage_kind == 'absent':
612
return ShaFile._parse_legacy_object(entry.get_bytes_as('fulltext'))
615
class GitObjectStoreContentCache(ContentCache):
617
def __init__(self, store):
620
def add_multi(self, objs):
621
self.store.add_objects(objs)
623
def add(self, obj, path):
624
self.store.add_object(obj)
626
def __getitem__(self, sha):
627
return self.store[sha]
630
class IndexCacheUpdater(CacheUpdater):
632
def __init__(self, cache, rev):
634
self.revid = rev.revision_id
635
self.parent_revids = rev.parent_ids
638
self._cache_objs = set()
640
def add_object(self, obj, ie, path):
641
if obj.type_name == "commit":
644
self.cache.idmap._add_git_sha(obj.id, "commit",
645
(self.revid, obj.tree))
646
self.cache.idmap._add_node(("commit", self.revid, "X"),
647
" ".join((obj.id, obj.tree)))
648
self._cache_objs.add((obj, path))
649
elif obj.type_name == "blob":
650
self.cache.idmap._add_git_sha(obj.id, "blob",
651
(ie.file_id, ie.revision))
652
self.cache.idmap._add_node(("blob", ie.file_id, ie.revision), obj.id)
653
if ie.kind == "symlink":
654
self._cache_objs.add((obj, path))
655
elif obj.type_name == "tree":
656
self.cache.idmap._add_git_sha(obj.id, "tree",
657
(ie.file_id, self.revid))
658
self._cache_objs.add((obj, path))
663
self.cache.content_cache.add_multi(self._cache_objs)
667
class IndexBzrGitCache(BzrGitCache):
669
def __init__(self, transport=None):
670
mapper = versionedfile.ConstantMapper("trees")
671
shamap = IndexGitShaMap(transport.clone('index'))
672
#trees_store = knit.make_file_factory(True, mapper)(transport)
673
#content_cache = VersionedFilesContentCache(trees_store)
674
from bzrlib.plugins.git.transportgit import TransportObjectStore
675
store = TransportObjectStore(transport.clone('objects'))
676
content_cache = GitObjectStoreContentCache(store)
677
super(IndexBzrGitCache, self).__init__(shamap, content_cache,
681
class IndexGitCacheFormat(BzrGitCacheFormat):
683
def get_format_string(self):
684
return 'bzr-git sha map with git object cache version 1\n'
686
def initialize(self, transport):
687
super(IndexGitCacheFormat, self).initialize(transport)
688
transport.mkdir('index')
689
transport.mkdir('objects')
690
from bzrlib.plugins.git.transportgit import TransportObjectStore
691
TransportObjectStore.init(transport.clone('objects'))
693
def open(self, transport):
694
return IndexBzrGitCache(transport)
697
class IndexGitShaMap(GitShaMap):
698
"""SHA Map that uses the Bazaar APIs to store a cache.
700
BTree Index file with the following contents:
702
("git", <sha1>) -> "<type> <type-data1> <type-data2>"
703
("commit", <revid>) -> "<sha1> <tree-id>"
704
("blob", <fileid>, <revid>) -> <sha1>
708
def __init__(self, transport=None):
709
if transport is None:
710
self._transport = None
711
self._index = _mod_index.InMemoryGraphIndex(0, key_elements=3)
712
self._builder = self._index
715
self._transport = transport
716
self._index = _mod_index.CombinedGraphIndex([])
717
for name in self._transport.list_dir("."):
718
if not name.endswith(".rix"):
720
x = _mod_btree_index.BTreeGraphIndex(self._transport, name,
721
self._transport.stat(name).st_size)
722
self._index.insert_index(0, x)
725
def from_repository(cls, repository):
726
transport = getattr(repository, "_transport", None)
727
if transport is not None:
729
transport.mkdir('git')
730
except bzrlib.errors.FileExists:
732
return cls(transport.clone('git'))
733
from bzrlib.transport import get_transport
734
return cls(get_transport(get_cache_dir()))
737
if self._transport is not None:
738
return "%s(%r)" % (self.__class__.__name__, self._transport.base)
740
return "%s()" % (self.__class__.__name__)
743
assert self._builder is None
744
self.start_write_group()
745
for _, key, value in self._index.iter_all_entries():
746
self._builder.add_node(key, value)
748
for name in self._transport.list_dir('.'):
749
if name.endswith('.rix'):
750
to_remove.append(name)
751
self.commit_write_group()
752
del self._index.indices[1:]
753
for name in to_remove:
754
self._transport.rename(name, name + '.old')
756
def start_write_group(self):
757
assert self._builder is None
758
self._builder = _mod_btree_index.BTreeBuilder(0, key_elements=3)
759
self._name = osutils.sha()
761
def commit_write_group(self):
762
assert self._builder is not None
763
stream = self._builder.finish()
764
name = self._name.hexdigest() + ".rix"
765
size = self._transport.put_file(name, stream)
766
index = _mod_btree_index.BTreeGraphIndex(self._transport, name, size)
767
self._index.insert_index(0, index)
771
def abort_write_group(self):
772
assert self._builder is not None
776
def _add_node(self, key, value):
778
self._builder.add_node(key, value)
779
except bzrlib.errors.BadIndexDuplicateKey:
780
# Multiple bzr objects can have the same contents
785
def _get_entry(self, key):
786
entries = self._index.iter_entries([key])
788
return entries.next()[2]
789
except StopIteration:
790
if self._builder is None:
792
entries = self._builder.iter_entries([key])
794
return entries.next()[2]
795
except StopIteration:
798
def _iter_keys_prefix(self, prefix):
799
for entry in self._index.iter_entries_prefix([prefix]):
801
if self._builder is not None:
802
for entry in self._builder.iter_entries_prefix([prefix]):
805
def lookup_commit(self, revid):
806
return self._get_entry(("commit", revid, "X"))[:40]
808
def _add_git_sha(self, hexsha, type, type_data):
809
if hexsha is not None:
810
self._name.update(hexsha)
811
self._add_node(("git", hexsha, "X"),
812
" ".join((type, type_data[0], type_data[1])))
814
# This object is not represented in Git - perhaps an empty
816
self._name.update(type + " ".join(type_data))
818
def lookup_blob_id(self, fileid, revision):
819
return self._get_entry(("blob", fileid, revision))
821
def lookup_git_sha(self, sha):
823
sha = sha_to_hex(sha)
824
data = self._get_entry(("git", sha, "X")).split(" ", 2)
825
return (data[0], (data[1], data[2]))
828
"""List the revision ids known."""
829
for key in self._iter_keys_prefix(("commit", None, None)):
832
def missing_revisions(self, revids):
833
"""Return set of all the revisions that are not present."""
834
missing_revids = set(revids)
835
for _, key, value in self._index.iter_entries((
836
("commit", revid, "X") for revid in revids)):
837
missing_revids.remove(key[1])
838
return missing_revids
841
"""List the SHA1s."""
842
for key in self._iter_keys_prefix(("git", None, None)):
846
formats = registry.Registry()
847
formats.register(TdbGitCacheFormat().get_format_string(),
849
formats.register(SqliteGitCacheFormat().get_format_string(),
850
SqliteGitCacheFormat())
851
formats.register(IndexGitCacheFormat().get_format_string(),
852
IndexGitCacheFormat())
853
# In the future, this will become the default:
854
# formats.register('default', IndexGitCacheFormat())
858
formats.register('default', SqliteGitCacheFormat())
860
formats.register('default', TdbGitCacheFormat())
864
def migrate_ancient_formats(repo_transport):
865
# Prefer migrating git.db over git.tdb, since the latter may not
866
# be openable on some platforms.
867
if repo_transport.has("git.db"):
868
SqliteGitCacheFormat().initialize(repo_transport.clone("git"))
869
repo_transport.rename("git.db", "git/idmap.db")
870
elif repo_transport.has("git.tdb"):
871
TdbGitCacheFormat().initialize(repo_transport.clone("git"))
872
repo_transport.rename("git.tdb", "git/idmap.tdb")
875
def remove_readonly_transport_decorator(transport):
876
if transport.is_readonly():
877
return transport._decorated
881
def from_repository(repository):
882
"""Open a cache file for a repository.
884
If the repository is remote and there is no transport available from it
885
this will use a local file in the users cache directory
886
(typically ~/.cache/bazaar/git/)
888
:param repository: A repository object
890
repo_transport = getattr(repository, "_transport", None)
891
if repo_transport is not None:
892
# Migrate older cache formats
893
repo_transport = remove_readonly_transport_decorator(repo_transport)
895
repo_transport.mkdir("git")
896
except bzrlib.errors.FileExists:
899
migrate_ancient_formats(repo_transport)
900
return BzrGitCacheFormat.from_repository(repository)