1
# Copyright (C) 2008 Canonical Ltd
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
# GNU General Public License for more details.
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17
"""A dedicated ReconcilePacker with a single intent: fix bug 277537.
19
We rely on a previous successful bzr check that gave us the relevant
28
revision as _mod_revision,
32
from bzrlib.repofmt import (
37
class InventoryAncestryReconcilePacker(pack_repo.ReconcilePacker):
39
def __init__(self, pack_collection, packs, suffix, file_rev_ids,
41
super(InventoryAncestryReconcilePacker, self).__init__(
42
pack_collection, packs, suffix, revision_ids=revision_ids)
43
self._file_rev_ids = file_rev_ids
45
def _copy_text_texts(self):
46
"""generate what texts we should have and then copy."""
47
self.pb.update("Copying content texts", 3)
48
# we have three major tasks here:
49
# 1) generate the ideal index
50
repo = self._pack_collection.repo
51
ancestors = dict([(key[0], tuple(ref[0] for ref in refs[0])) for
53
self.new_pack.revision_index.iter_all_entries()])
54
ideal_index = repo._generate_text_key_index(self._text_refs, ancestors)
55
# 2) generate a text_nodes list that contains all the deltas that can
56
# be used as-is, with corrected parents.
60
NULL_REVISION = _mod_revision.NULL_REVISION
61
text_index_map, text_nodes = self._get_text_nodes()
62
for node in text_nodes:
68
ideal_parents = tuple(ideal_index[node[1]])
70
discarded_nodes.append(node)
71
self._data_changed = True
73
if ideal_parents == (NULL_REVISION,):
75
if ideal_parents == node[3][0]:
78
elif ideal_parents[0:1] == node[3][0][0:1]:
79
# the left most parent is the same, or there are no parents
80
# today. Either way, we can preserve the representation as
81
# long as we change the refs to be inserted.
82
self._data_changed = True
83
ok_nodes.append((node[0], node[1], node[2],
84
(ideal_parents, node[3][1])))
85
self._data_changed = True
87
# Reinsert this text completely
88
bad_texts.append((node[1], ideal_parents))
89
self._data_changed = True
90
# we're finished with some data.
93
# 3) bulk copy the ok data
94
total_items, readv_group_iter = self._least_readv_node_readv(ok_nodes)
95
list(self._copy_nodes_graph(text_index_map, self.new_pack._writer,
96
self.new_pack.text_index, readv_group_iter, total_items))
97
# 4) adhoc copy all the other texts.
98
# We have to topologically insert all texts otherwise we can fail to
99
# reconcile when parts of a single delta chain are preserved intact,
100
# and other parts are not. E.g. Discarded->d1->d2->d3. d1 will be
101
# reinserted, and if d3 has incorrect parents it will also be
102
# reinserted. If we insert d3 first, d2 is present (as it was bulk
103
# copied), so we will try to delta, but d2 is not currently able to be
104
# extracted because it's basis d1 is not present. Topologically sorting
105
# addresses this. The following generates a sort for all the texts that
106
# are being inserted without having to reference the entire text key
107
# space (we only topo sort the revisions, which is smaller).
108
topo_order = tsort.topo_sort(ancestors)
109
rev_order = dict(zip(topo_order, range(len(topo_order))))
110
bad_texts.sort(key=lambda key:rev_order[key[0][1]])
111
transaction = repo.get_transaction()
112
file_id_index = _mod_index.GraphIndexPrefixAdapter(
113
self.new_pack.text_index,
115
add_nodes_callback=self.new_pack.text_index.add_nodes)
116
data_access = knit._DirectPackAccess(
117
{self.new_pack.text_index:self.new_pack.access_tuple()})
118
data_access.set_writer(self.new_pack._writer, self.new_pack.text_index,
119
self.new_pack.access_tuple())
120
output_texts = knit.KnitVersionedFiles(
121
knit._KnitGraphIndex(self.new_pack.text_index,
122
add_callback=self.new_pack.text_index.add_nodes,
123
deltas=True, parents=True, is_locked=repo.is_locked),
124
data_access=data_access, max_delta_chain=200)
125
for key, parent_keys in bad_texts:
126
# We refer to the new pack to delta data being output.
127
# A possible improvement would be to catch errors on short reads
128
# and only flush then.
129
self.new_pack.flush()
131
for parent_key in parent_keys:
132
if parent_key[0] != key[0]:
133
# Graph parents must match the fileid
134
raise errors.BzrError('Mismatched key parent %r:%r' %
136
parents.append(parent_key[1])
137
text_lines = osutils.split_lines(repo.texts.get_record_stream(
138
[key], 'unordered', True).next().get_bytes_as('fulltext'))
139
output_texts.add_lines(key, parent_keys, text_lines,
140
random_id=True, check_content=False)
141
# 5) check that nothing inserted has a reference outside the keyspace.
142
missing_text_keys = self.new_pack._external_compression_parents_of_texts()
143
if missing_text_keys:
144
raise errors.BzrError('Reference to missing compression parents %r'
145
% (missing_text_keys,))
146
self._log_copied_texts()
149
class InventoryAncestryReconciler(reconcile.PackReconciler):
151
def __init__(self, repo, file_rev_ids,
152
other=None, # This isn't used
154
super(InventoryAncestryReconciler, self).__init__(
155
repo, other=other, thorough=thorough)
156
self._file_rev_ids = file_rev_ids
158
def _reconcile_steps(self):
159
"""Perform the steps to reconcile this repository."""
160
if not self.thorough:
162
collection = self.repo._pack_collection
163
collection.ensure_loaded()
164
collection.lock_names()
166
packs = collection.all_packs()
167
all_revisions = self.repo.all_revision_ids()
168
total_inventories = len(list(
169
collection.inventory_index.combined_index.iter_all_entries()))
170
if len(all_revisions):
171
self._packer = InventoryAncestryReconcilePacker(
172
collection, packs, ".reconcile", self._file_rev_ids,
174
new_pack = self._packer.pack(pb=self.pb)
175
if new_pack is not None:
176
self._discard_and_save(packs)
178
# only make a new pack when there is data to copy.
179
self._discard_and_save(packs)
180
self.garbage_inventories = total_inventories - len(list(
181
collection.inventory_index.combined_index.iter_all_entries()))
183
collection._unlock_names()