1
# Copyright (C) 2008 Canonical Ltd
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
# GNU General Public License for more details.
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17
"""Parameterised loading of revisions into a repository."""
20
from bzrlib import errors, knit, lru_cache, osutils
21
from bzrlib import revision as _mod_revision
24
class AbstractRevisionLoader(object):
25
# NOTE: This is effectively bzrlib.repository._install_revision
26
# refactored to be a class. When importing, we want more flexibility
27
# in how previous revisions are cached, data is feed in, etc.
29
def __init__(self, repo):
30
"""An object responsible for loading revisions into a repository.
32
NOTE: Repository locking is not managed by this class. Clients
33
should take a write lock, call load() multiple times, then release
36
:param repository: the target repository
40
def load(self, rev, inv, signature, text_provider,
41
inventories_provider=None):
42
"""Load a revision into a repository.
44
:param rev: the Revision
45
:param inv: the inventory
46
:param signature: signing information
47
:param text_provider: a callable expecting a file_id parameter
48
that returns the text for that file-id
49
:param inventories_provider: a callable expecting a repository and
50
a list of revision-ids, that returns:
51
* the list of revision-ids present in the repository
52
* the list of inventories for the revision-id's,
53
including an empty inventory for the missing revisions
54
If None, a default implementation is provided.
56
if inventories_provider is None:
57
inventories_provider = self._default_inventories_provider
58
present_parents, parent_invs = inventories_provider(rev.parent_ids)
59
self._load_texts(rev.revision_id, inv.iter_entries(), parent_invs,
62
rev.inventory_sha1 = self._add_inventory(rev.revision_id,
64
except errors.RevisionAlreadyPresent:
66
if signature is not None:
67
self.repo.add_signature_text(rev.revision_id, signature)
68
self._add_revision(rev, inv)
70
def _load_texts(self, revision_id, entries, parent_invs, text_provider):
71
"""Load texts to a repository for inventory entries.
73
This method is provided for subclasses to use or override.
75
:param revision_id: the revision identifier
76
:param entries: iterator over the inventory entries
77
:param parent_inv: the parent inventories
78
:param text_provider: a callable expecting a file_id parameter
79
that returns the text for that file-id
81
raise NotImplementedError(self._load_texts)
83
def _add_inventory(self, revision_id, inv, parents):
84
"""Add the inventory inv to the repository as revision_id.
86
:param parents: The revision ids of the parents that revision_id
87
is known to have and are in the repository already.
89
:returns: The validator(which is a sha1 digest, though what is sha'd is
90
repository format specific) of the serialized inventory.
92
return self.repo.add_inventory(revision_id, inv, parents)
94
def _add_revision(self, rev, inv):
95
"""Add a revision and its inventory to a repository.
97
:param rev: the Revision
98
:param inv: the inventory
100
repo.add_revision(rev.revision_id, rev, inv)
102
def _default_inventories_provider(self, revision_ids):
103
"""An inventories provider that queries the repository."""
106
for revision_id in revision_ids:
107
if self.repo.has_revision(revision_id):
108
present.append(revision_id)
109
rev_tree = self.repo.revision_tree(revision_id)
111
rev_tree = self.repo.revision_tree(None)
112
inventories.append(rev_tree.inventory)
113
return present, inventories
116
class RevisionLoader1(AbstractRevisionLoader):
117
"""A RevisionLoader that uses the old bzrlib Repository API.
119
The old API was present until bzr.dev rev 3510.
122
def _load_texts(self, revision_id, entries, parent_invs, text_provider):
123
"""See RevisionLoader._load_texts()."""
124
# Backwards compatibility hack: skip the root id.
125
if not self.repo.supports_rich_root():
126
path, root = entries.next()
127
if root.revision != revision_id:
128
raise errors.IncompatibleRevision(repr(self.repo))
129
# Add the texts that are not already present
130
tx = self.repo.get_transaction()
131
for path, ie in entries:
132
# This test is *really* slow: over 50% of import time
133
#w = self.repo.weave_store.get_weave_or_empty(ie.file_id, tx)
134
#if ie.revision in w:
136
# Try another way, realising that this assumes that the
137
# version is not already there. In the general case,
138
# a shared repository might already have the revision but
139
# we arguably don't need that check when importing from
141
if ie.revision != revision_id:
144
for parent_inv in parent_invs:
145
if ie.file_id not in parent_inv:
147
parent_id = parent_inv[ie.file_id].revision
148
if parent_id in text_parents:
150
text_parents.append(parent_id)
151
lines = text_provider(ie.file_id)
152
vfile = self.repo.weave_store.get_weave_or_empty(ie.file_id, tx)
153
vfile.add_lines(revision_id, text_parents, lines)
155
def _get_lines(self, file_id, revision_id):
156
tx = self.repo.get_transaction()
157
w = self.repo.weave_store.get_weave(ie.file_id, tx)
158
return w.get_lines(revision_id)
160
def _add_revision(self, rev, inv):
161
# There's no need to do everything repo.add_revision does and
162
# doing so (since bzr.dev 3392) can be pretty slow for long
163
# delta chains on inventories. Just do the essentials here ...
164
_mod_revision.check_not_reserved_id(rev.revision_id)
165
self.repo._revision_store.add_revision(rev, self.repo.get_transaction())
168
class RevisionLoader2(AbstractRevisionLoader):
169
"""A RevisionLoader that uses the new bzrlib Repository API."""
171
def _load_texts(self, revision_id, entries, parent_invs, text_provider):
172
"""See RevisionLoader._load_texts()."""
173
# Backwards compatibility hack: skip the root id.
174
if not self.repo.supports_rich_root():
175
path, root = entries.next()
176
if root.revision != revision_id:
177
raise errors.IncompatibleRevision(repr(self.repo))
179
for path, ie in entries:
180
text_keys[(ie.file_id, ie.revision)] = ie
181
text_parent_map = self.repo.texts.get_parent_map(text_keys)
182
missing_texts = set(text_keys) - set(text_parent_map)
183
# Add the texts that are not already present
184
for text_key in missing_texts:
185
ie = text_keys[text_key]
187
for parent_inv in parent_invs:
188
if ie.file_id not in parent_inv:
190
parent_id = parent_inv[ie.file_id].revision
191
if parent_id in text_parents:
193
text_parents.append((ie.file_id, parent_id))
194
lines = text_provider(ie.file_id)
195
self.repo.texts.add_lines(text_key, text_parents, lines)
197
def _get_lines(self, file_id, revision_id):
198
record = self.repo.texts.get_record_stream([(file_id, revision_id)],
199
'unordered', True).next()
200
if record.storage_kind == 'absent':
201
raise errors.RevisionNotPresent(record.key, self.repo)
202
return osutils.split_lines(record.get_bytes_as('fulltext'))
204
def _add_revision(self, rev, inv):
205
# There's no need to do everything repo.add_revision does and
206
# doing so (since bzr.dev 3392) can be pretty slow for long
207
# delta chains on inventories. Just do the essentials here ...
208
_mod_revision.check_not_reserved_id(rev.revision_id)
209
self.repo._add_revision(rev)
212
class ImportRevisionLoader1(RevisionLoader1):
213
"""A RevisionLoader (old Repository API) optimised for importing.
215
This implementation caches serialised inventory texts and provides
216
fine-grained control over when inventories are stored as fulltexts.
219
def __init__(self, repo, parent_texts_to_cache=1, fulltext_when=None,
221
"""See AbstractRevisionLoader.__init__.
223
:param repository: the target repository
224
:param parent_text_to_cache: the number of parent texts to cache
225
:para fulltext_when: if non None, a function to call to decide
226
whether to fulltext the inventory or not. The revision count
227
is passed as a parameter and the result is treated as a boolean.
229
RevisionLoader1.__init__(self, repo)
230
self.inv_parent_texts = lru_cache.LRUCache(parent_texts_to_cache)
231
self.fulltext_when = fulltext_when
232
self.random_ids = random_ids
233
self.revision_count = 0
235
def _add_inventory(self, revision_id, inv, parents):
236
"""See RevisionLoader._add_inventory."""
237
# Code taken from bzrlib.repository.add_inventory
238
assert self.repo.is_in_write_group()
239
_mod_revision.check_not_reserved_id(revision_id)
240
assert inv.revision_id is None or inv.revision_id == revision_id, \
241
"Mismatch between inventory revision" \
242
" id and insertion revid (%r, %r)" % (inv.revision_id, revision_id)
243
assert inv.root is not None
244
inv_lines = self.repo._serialise_inventory_to_lines(inv)
245
inv_vf = self.repo.get_inventory_weave()
246
sha1, num_bytes, parent_text = self._inventory_add_lines(inv_vf,
247
revision_id, parents, inv_lines, self.inv_parent_texts)
248
self.inv_parent_texts[revision_id] = parent_text
251
def _inventory_add_lines(self, inv_vf, version_id, parents, lines,
253
"""See Repository._inventory_add_lines()."""
254
# setup parameters used in original code but not this API
255
self.revision_count += 1
256
if self.fulltext_when is not None:
257
delta = not self.fulltext_when(self.revision_count)
260
left_matching_blocks = None
261
random_id = self.random_ids
262
check_content = False
264
# bzrlib.knit.add_lines() but error checking optimised
265
inv_vf._check_add(version_id, lines, random_id, check_content)
267
####################################################################
268
# bzrlib.knit._add() but skip checking if fulltext better than delta
269
####################################################################
271
line_bytes = ''.join(lines)
272
digest = osutils.sha_string(line_bytes)
274
for parent in parents:
275
if inv_vf.has_version(parent):
276
present_parents.append(parent)
277
if parent_texts is None:
280
# can only compress against the left most present parent.
282
(len(present_parents) == 0 or
283
present_parents[0] != parents[0])):
286
text_length = len(line_bytes)
289
if lines[-1][-1] != '\n':
290
# copy the contents of lines.
292
options.append('no-eol')
293
lines[-1] = lines[-1] + '\n'
297
# # To speed the extract of texts the delta chain is limited
298
# # to a fixed number of deltas. This should minimize both
299
# # I/O and the time spend applying deltas.
300
# delta = inv_vf._check_should_delta(present_parents)
302
assert isinstance(version_id, str)
303
content = inv_vf.factory.make(lines, version_id)
304
if delta or (inv_vf.factory.annotated and len(present_parents) > 0):
305
# Merge annotations from parent texts if needed.
306
delta_hunks = inv_vf._merge_annotations(content, present_parents,
307
parent_texts, delta, inv_vf.factory.annotated,
308
left_matching_blocks)
311
options.append('line-delta')
312
store_lines = inv_vf.factory.lower_line_delta(delta_hunks)
313
size, bytes = inv_vf._data._record_to_data(version_id, digest,
316
options.append('fulltext')
317
# isinstance is slower and we have no hierarchy.
318
if inv_vf.factory.__class__ == knit.KnitPlainFactory:
319
# Use the already joined bytes saving iteration time in
321
size, bytes = inv_vf._data._record_to_data(version_id, digest,
324
# get mixed annotation + content and feed it into the
326
store_lines = inv_vf.factory.lower_fulltext(content)
327
size, bytes = inv_vf._data._record_to_data(version_id, digest,
330
access_memo = inv_vf._data.add_raw_records([size], bytes)[0]
331
inv_vf._index.add_versions(
332
((version_id, options, access_memo, parents),),
334
return digest, text_length, content