1
# Copyright (C) 2008 Canonical Ltd
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
# GNU General Public License for more details.
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17
"""Tests for repositories that support CHK indices."""
25
from bzrlib.versionedfile import VersionedFiles
26
from bzrlib.tests.per_repository_chk import TestCaseWithRepositoryCHK
29
class TestCHKSupport(TestCaseWithRepositoryCHK):
31
def test_chk_bytes_attribute_is_VersionedFiles(self):
32
repo = self.make_repository('.')
33
self.assertIsInstance(repo.chk_bytes, VersionedFiles)
35
def test_add_bytes_to_chk_bytes_store(self):
36
repo = self.make_repository('.')
39
repo.start_write_group()
41
sha1, len, _ = repo.chk_bytes.add_lines((None,),
42
None, ["foo\n", "bar\n"], random_id=True)
43
self.assertEqual('4e48e2c9a3d2ca8a708cb0cc545700544efb5021',
46
set([('sha1:4e48e2c9a3d2ca8a708cb0cc545700544efb5021',)]),
47
repo.chk_bytes.keys())
49
repo.abort_write_group()
52
repo.commit_write_group()
55
# And after an unlock/lock pair
59
set([('sha1:4e48e2c9a3d2ca8a708cb0cc545700544efb5021',)]),
60
repo.chk_bytes.keys())
64
repo = repo.bzrdir.open_repository()
68
set([('sha1:4e48e2c9a3d2ca8a708cb0cc545700544efb5021',)]),
69
repo.chk_bytes.keys())
73
def test_pack_preserves_chk_bytes_store(self):
74
leaf_lines = ["chkleaf:\n", "0\n", "1\n", "0\n", "\n"]
75
leaf_sha1 = osutils.sha_strings(leaf_lines)
76
node_lines = ["chknode:\n", "0\n", "1\n", "1\n", "foo\n",
77
"\x00sha1:%s\n" % (leaf_sha1,)]
78
node_sha1 = osutils.sha_strings(node_lines)
79
expected_set = set([('sha1:' + leaf_sha1,), ('sha1:' + node_sha1,)])
80
repo = self.make_repository('.')
83
repo.start_write_group()
85
# Internal node pointing at a leaf.
86
repo.chk_bytes.add_lines((None,), None, node_lines, random_id=True)
88
repo.abort_write_group()
91
repo.commit_write_group()
92
repo.start_write_group()
94
# Leaf in a separate pack.
95
repo.chk_bytes.add_lines((None,), None, leaf_lines, random_id=True)
97
repo.abort_write_group()
100
repo.commit_write_group()
102
self.assertEqual(expected_set, repo.chk_bytes.keys())
106
repo = repo.bzrdir.open_repository()
109
self.assertEqual(expected_set, repo.chk_bytes.keys())
113
def test_chk_bytes_are_fully_buffered(self):
114
repo = self.make_repository('.')
116
self.addCleanup(repo.unlock)
117
repo.start_write_group()
119
sha1, len, _ = repo.chk_bytes.add_lines((None,),
120
None, ["foo\n", "bar\n"], random_id=True)
121
self.assertEqual('4e48e2c9a3d2ca8a708cb0cc545700544efb5021',
124
set([('sha1:4e48e2c9a3d2ca8a708cb0cc545700544efb5021',)]),
125
repo.chk_bytes.keys())
127
repo.abort_write_group()
130
repo.commit_write_group()
131
# This may not always be correct if we change away from BTreeGraphIndex
132
# in the future. But for now, lets check that chk_bytes are fully
134
index = repo.chk_bytes._index._graph_index._indices[0]
135
self.assertIsInstance(index, btree_index.BTreeGraphIndex)
136
self.assertIs(type(index._leaf_node_cache), dict)
137
# Re-opening the repository should also have a repo with everything
139
repo2 = repository.Repository.open(self.get_url())
141
self.addCleanup(repo2.unlock)
142
index = repo2.chk_bytes._index._graph_index._indices[0]
143
self.assertIsInstance(index, btree_index.BTreeGraphIndex)
144
self.assertIs(type(index._leaf_node_cache), dict)
147
class TestCommitWriteGroupIntegrityCheck(TestCaseWithRepositoryCHK):
148
"""Tests that commit_write_group prevents various kinds of invalid data
149
from being committed to a CHK repository.
152
def reopen_repo_and_resume_write_group(self, repo):
153
resume_tokens = repo.suspend_write_group()
155
reopened_repo = repo.bzrdir.open_repository()
156
reopened_repo.lock_write()
157
self.addCleanup(reopened_repo.unlock)
158
reopened_repo.resume_write_group(resume_tokens)
161
def test_missing_chk_root_for_inventory(self):
162
"""commit_write_group fails with BzrCheckError when the chk root record
163
for a new inventory is missing.
165
repo = self.make_repository('damaged-repo')
166
builder = self.make_branch_builder('simple-branch')
167
builder.build_snapshot('A-id', None, [
168
('add', ('', 'root-id', 'directory', None)),
169
('add', ('file', 'file-id', 'file', 'content\n'))])
170
b = builder.get_branch()
172
self.addCleanup(b.unlock)
174
repo.start_write_group()
175
# Now, add the objects manually
176
text_keys = [('file-id', 'A-id'), ('root-id', 'A-id')]
177
# Directly add the texts, inventory, and revision object for 'A-id' --
178
# but don't add the chk_bytes.
179
src_repo = b.repository
180
repo.texts.insert_record_stream(src_repo.texts.get_record_stream(
181
text_keys, 'unordered', True))
182
repo.inventories.insert_record_stream(
183
src_repo.inventories.get_record_stream(
184
[('A-id',)], 'unordered', True))
185
repo.revisions.insert_record_stream(
186
src_repo.revisions.get_record_stream(
187
[('A-id',)], 'unordered', True))
188
# Make sure the presence of the missing data in a fallback does not
190
repo.add_fallback_repository(b.repository)
191
self.assertRaises(errors.BzrCheckError, repo.commit_write_group)
192
reopened_repo = self.reopen_repo_and_resume_write_group(repo)
194
errors.BzrCheckError, reopened_repo.commit_write_group)
195
reopened_repo.abort_write_group()
197
def test_missing_chk_root_for_unchanged_inventory(self):
198
"""commit_write_group fails with BzrCheckError when the chk root record
199
for a new inventory is missing, even if the parent inventory is present
200
and has identical content (i.e. the same chk root).
202
A stacked repository containing only a revision with an identical
203
inventory to its parent will still have the chk root records for those
206
(In principle the chk records are unnecessary in this case, but in
207
practice bzr 2.0rc1 (at least) expects to find them.)
209
repo = self.make_repository('damaged-repo')
210
# Make a branch where the last two revisions have identical
212
builder = self.make_branch_builder('simple-branch')
213
builder.build_snapshot('A-id', None, [
214
('add', ('', 'root-id', 'directory', None)),
215
('add', ('file', 'file-id', 'file', 'content\n'))])
216
builder.build_snapshot('B-id', None, [])
217
builder.build_snapshot('C-id', None, [])
218
b = builder.get_branch()
220
self.addCleanup(b.unlock)
221
# check our setup: B-id and C-id should have identical chk root keys.
222
inv_b = b.repository.get_inventory('B-id')
223
inv_c = b.repository.get_inventory('C-id')
224
self.assertEqual(inv_b.id_to_entry.key(), inv_c.id_to_entry.key())
225
# Now, manually insert objects for a stacked repo with only revision
227
# We need ('revisions', 'C-id'), ('inventories', 'C-id'),
228
# ('inventories', 'B-id'), and the corresponding chk roots for those
231
repo.start_write_group()
232
src_repo = b.repository
233
repo.inventories.insert_record_stream(
234
src_repo.inventories.get_record_stream(
235
[('B-id',), ('C-id',)], 'unordered', True))
236
repo.revisions.insert_record_stream(
237
src_repo.revisions.get_record_stream(
238
[('C-id',)], 'unordered', True))
239
# Make sure the presence of the missing data in a fallback does not
241
repo.add_fallback_repository(b.repository)
242
self.assertRaises(errors.BzrCheckError, repo.commit_write_group)
243
reopened_repo = self.reopen_repo_and_resume_write_group(repo)
245
errors.BzrCheckError, reopened_repo.commit_write_group)
246
reopened_repo.abort_write_group()
248
def test_missing_chk_leaf_for_inventory(self):
249
"""commit_write_group fails with BzrCheckError when the chk root record
250
for a parent inventory of a new revision is missing.
252
repo = self.make_repository('damaged-repo')
253
b = self.make_branch_with_multiple_chk_nodes()
254
src_repo = b.repository
256
self.addCleanup(src_repo.unlock)
257
# Now, manually insert objects for a stacked repo with only revision
258
# C-id, *except* drop the non-root chk records.
259
inv_b = src_repo.get_inventory('B-id')
260
inv_c = src_repo.get_inventory('C-id')
261
chk_root_keys_only = [
262
inv_b.id_to_entry.key(), inv_b.parent_id_basename_to_file_id.key(),
263
inv_c.id_to_entry.key(), inv_c.parent_id_basename_to_file_id.key()]
264
all_chks = src_repo.chk_bytes.keys()
265
# Pick a non-root key to drop
266
key_to_drop = all_chks.difference(chk_root_keys_only).pop()
267
all_chks.discard(key_to_drop)
269
repo.start_write_group()
270
repo.chk_bytes.insert_record_stream(
271
src_repo.chk_bytes.get_record_stream(
272
all_chks, 'unordered', True))
273
repo.texts.insert_record_stream(
274
src_repo.texts.get_record_stream(
275
src_repo.texts.keys(), 'unordered', True))
276
repo.inventories.insert_record_stream(
277
src_repo.inventories.get_record_stream(
278
[('B-id',), ('C-id',)], 'unordered', True))
279
repo.revisions.insert_record_stream(
280
src_repo.revisions.get_record_stream(
281
[('C-id',)], 'unordered', True))
282
# Make sure the presence of the missing data in a fallback does not
284
repo.add_fallback_repository(b.repository)
285
self.assertRaises(errors.BzrCheckError, repo.commit_write_group)
286
reopened_repo = self.reopen_repo_and_resume_write_group(repo)
288
errors.BzrCheckError, reopened_repo.commit_write_group)
289
reopened_repo.abort_write_group()
291
def test_missing_chk_root_for_parent_inventory(self):
292
"""commit_write_group fails with BzrCheckError when the chk root record
293
for a parent inventory of a new revision is missing.
295
repo = self.make_repository('damaged-repo')
296
b = self.make_branch_with_multiple_chk_nodes()
298
self.addCleanup(b.unlock)
299
# Now, manually insert objects for a stacked repo with only revision
300
# C-id, *except* the chk root entry for the parent inventory.
301
# We need ('revisions', 'C-id'), ('inventories', 'C-id'),
302
# ('inventories', 'B-id'), and the corresponding chk roots for those
304
inv_c = b.repository.get_inventory('C-id')
305
chk_keys_for_c_only = [
306
inv_c.id_to_entry.key(), inv_c.parent_id_basename_to_file_id.key()]
308
repo.start_write_group()
309
src_repo = b.repository
310
repo.chk_bytes.insert_record_stream(
311
src_repo.chk_bytes.get_record_stream(
312
chk_keys_for_c_only, 'unordered', True))
313
repo.inventories.insert_record_stream(
314
src_repo.inventories.get_record_stream(
315
[('B-id',), ('C-id',)], 'unordered', True))
316
repo.revisions.insert_record_stream(
317
src_repo.revisions.get_record_stream(
318
[('C-id',)], 'unordered', True))
319
# Make sure the presence of the missing data in a fallback does not
321
repo.add_fallback_repository(b.repository)
322
self.assertRaises(errors.BzrCheckError, repo.commit_write_group)
323
reopened_repo = self.reopen_repo_and_resume_write_group(repo)
325
errors.BzrCheckError, reopened_repo.commit_write_group)
326
reopened_repo.abort_write_group()
328
def make_branch_with_multiple_chk_nodes(self):
329
# add and modify files with very long file-ids, so that the chk map
330
# will need more than just a root node.
331
builder = self.make_branch_builder('simple-branch')
337
('add', ('file-' + name, 'file-%s-id' % name, 'file',
338
'content %s\n' % name)))
339
file_modifies.append(
340
('modify', ('file-%s-id' % name, 'new content %s\n' % name)))
341
builder.build_snapshot('A-id', None, [
342
('add', ('', 'root-id', 'directory', None))] +
344
builder.build_snapshot('B-id', None, [])
345
builder.build_snapshot('C-id', None, file_modifies)
346
return builder.get_branch()
348
def test_missing_text_record(self):
349
"""commit_write_group fails with BzrCheckError when a text is missing.
351
repo = self.make_repository('damaged-repo')
352
b = self.make_branch_with_multiple_chk_nodes()
353
src_repo = b.repository
355
self.addCleanup(src_repo.unlock)
356
# Now, manually insert objects for a stacked repo with only revision
357
# C-id, *except* drop one changed text.
358
all_texts = src_repo.texts.keys()
359
all_texts.remove(('file-%s-id' % ('c'*10000,), 'C-id'))
361
repo.start_write_group()
362
repo.chk_bytes.insert_record_stream(
363
src_repo.chk_bytes.get_record_stream(
364
src_repo.chk_bytes.keys(), 'unordered', True))
365
repo.texts.insert_record_stream(
366
src_repo.texts.get_record_stream(
367
all_texts, 'unordered', True))
368
repo.inventories.insert_record_stream(
369
src_repo.inventories.get_record_stream(
370
[('B-id',), ('C-id',)], 'unordered', True))
371
repo.revisions.insert_record_stream(
372
src_repo.revisions.get_record_stream(
373
[('C-id',)], 'unordered', True))
374
# Make sure the presence of the missing data in a fallback does not
376
repo.add_fallback_repository(b.repository)
377
self.assertRaises(errors.BzrCheckError, repo.commit_write_group)
378
reopened_repo = self.reopen_repo_and_resume_write_group(repo)
380
errors.BzrCheckError, reopened_repo.commit_write_group)
381
reopened_repo.abort_write_group()