~vila/bzr/fix277537 : revision 5

1

2

#

3

# This program is free software; you can redistribute it and/or modify

4

# it under the terms of the GNU General Public License as published by

5

# the Free Software Foundation; either version 2 of the License, or

6

# (at your option) any later version.

7

#

8

# This program is distributed in the hope that it will be useful,

9

# but WITHOUT ANY WARRANTY; without even the implied warranty of

10

# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the

11

# GNU General Public License for more details.

12

#

13

# You should have received a copy of the GNU General Public License

14

# along with this program; if not, write to the Free Software

15

# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA

16

17

"""A dedicated ReconcilePacker with a single intent: fix bug 277537.

18

19

We rely on a previous successful bzr check that gave us the relevant

20

information.

21

"""

22

23

from bzrlib import (

24

index as _mod_index,

25

knit,

26

osutils,

27

reconcile,

28

revision as _mod_revision,

29

tsort,

30

)

31

32

from bzrlib.repofmt import (

33

pack_repo,

34

)

35

36

37

class InventoryAncestryReconcilePacker(pack_repo.ReconcilePacker):

38

39

def __init__(self, pack_collection, packs, suffix, file_rev_ids,

40

revision_ids=None):

41

super(InventoryAncestryReconcilePacker, self).__init__(

42

pack_collection, packs, suffix, revision_ids=revision_ids)

43

self._file_rev_ids = file_rev_ids

44

45

def _copy_text_texts(self):

46

"""generate what texts we should have and then copy."""

47

self.pb.update("Copying content texts", 3)

48

# we have three major tasks here:

49

# 1) generate the ideal index

50

repo = self._pack_collection.repo

51

ancestors = dict([(key[0], tuple(ref[0] for ref in refs[0])) for

52

_1, key, _2, refs in

53

self.new_pack.revision_index.iter_all_entries()])

54

ideal_index = repo._generate_text_key_index(self._text_refs, ancestors)

55

# 2) generate a text_nodes list that contains all the deltas that can

56

# be used as-is, with corrected parents.

57

ok_nodes = []

58

bad_texts = []

59

discarded_nodes = []

60

NULL_REVISION = _mod_revision.NULL_REVISION

61

text_index_map, text_nodes = self._get_text_nodes()

62

for node in text_nodes:

63

# 0 - index

64

# 1 - key

65

# 2 - value

66

# 3 - refs

67

try:

68

ideal_parents = tuple(ideal_index[node[1]])

69

except KeyError:

70

discarded_nodes.append(node)

71

self._data_changed = True

72

else:

73

if ideal_parents == (NULL_REVISION,):

74

ideal_parents = ()

75

if ideal_parents == node[3][0]:

76

# no change needed.

77

ok_nodes.append(node)

78

elif ideal_parents[0:1] == node[3][0][0:1]:

79

# the left most parent is the same, or there are no parents

80

# today. Either way, we can preserve the representation as

81

# long as we change the refs to be inserted.

82

self._data_changed = True

83

ok_nodes.append((node[0], node[1], node[2],

84

(ideal_parents, node[3][1])))

85

self._data_changed = True

86

else:

87

# Reinsert this text completely

88

bad_texts.append((node[1], ideal_parents))

89

self._data_changed = True

90

# we're finished with some data.

91

del ideal_index

92

del text_nodes

93

# 3) bulk copy the ok data

94

total_items, readv_group_iter = self._least_readv_node_readv(ok_nodes)

95

list(self._copy_nodes_graph(text_index_map, self.new_pack._writer,

96

self.new_pack.text_index, readv_group_iter, total_items))

97

# 4) adhoc copy all the other texts.

98

# We have to topologically insert all texts otherwise we can fail to

99

# reconcile when parts of a single delta chain are preserved intact,

100

# and other parts are not. E.g. Discarded->d1->d2->d3. d1 will be

101

# reinserted, and if d3 has incorrect parents it will also be

102

# reinserted. If we insert d3 first, d2 is present (as it was bulk

103

# copied), so we will try to delta, but d2 is not currently able to be

104

# extracted because it's basis d1 is not present. Topologically sorting

105

# addresses this. The following generates a sort for all the texts that

106

# are being inserted without having to reference the entire text key

107

# space (we only topo sort the revisions, which is smaller).

108

topo_order = tsort.topo_sort(ancestors)

109

rev_order = dict(zip(topo_order, range(len(topo_order))))

110

bad_texts.sort(key=lambda key:rev_order[key[0][1]])

111

transaction = repo.get_transaction()

112

file_id_index = _mod_index.GraphIndexPrefixAdapter(

113

self.new_pack.text_index,

114

('blank', ), 1,

115

add_nodes_callback=self.new_pack.text_index.add_nodes)

116

data_access = knit._DirectPackAccess(

117

{self.new_pack.text_index:self.new_pack.access_tuple()})

118

data_access.set_writer(self.new_pack._writer, self.new_pack.text_index,

119

self.new_pack.access_tuple())

120

output_texts = knit.KnitVersionedFiles(

121

knit._KnitGraphIndex(self.new_pack.text_index,

122

add_callback=self.new_pack.text_index.add_nodes,

123

deltas=True, parents=True, is_locked=repo.is_locked),

124

data_access=data_access, max_delta_chain=200)

125

for key, parent_keys in bad_texts:

126

# We refer to the new pack to delta data being output.

127

# A possible improvement would be to catch errors on short reads

128

# and only flush then.

129

self.new_pack.flush()

130

parents = []

131

for parent_key in parent_keys:

132

if parent_key[0] != key[0]:

133

# Graph parents must match the fileid

134

raise errors.BzrError('Mismatched key parent %r:%r' %

135

(key, parent_keys))

136

parents.append(parent_key[1])

137

text_lines = osutils.split_lines(repo.texts.get_record_stream(

138

[key], 'unordered', True).next().get_bytes_as('fulltext'))

139

output_texts.add_lines(key, parent_keys, text_lines,

140

random_id=True, check_content=False)

141

# 5) check that nothing inserted has a reference outside the keyspace.

142

missing_text_keys = self.new_pack._external_compression_parents_of_texts()

143

if missing_text_keys:

144

raise errors.BzrError('Reference to missing compression parents %r'

145

% (missing_text_keys,))

146

self._log_copied_texts()

147

148

149

class InventoryAncestryReconciler(reconcile.PackReconciler):

150

151

def __init__(self, repo, file_rev_ids,

152

other=None, # This isn't used

153

thorough=False):

154

super(InventoryAncestryReconciler, self).__init__(

155

repo, other=other, thorough=thorough)

156

self._file_rev_ids = file_rev_ids

157

158

def _reconcile_steps(self):

159

"""Perform the steps to reconcile this repository."""

160

if not self.thorough:

161

return

162

collection = self.repo._pack_collection

163

collection.ensure_loaded()

164

collection.lock_names()

165

try:

166

packs = collection.all_packs()

167

all_revisions = self.repo.all_revision_ids()

168

total_inventories = len(list(

169

collection.inventory_index.combined_index.iter_all_entries()))

170

if len(all_revisions):

171

self._packer = InventoryAncestryReconcilePacker(

172

collection, packs, ".reconcile", self._file_rev_ids,

173

all_revisions)

174

new_pack = self._packer.pack(pb=self.pb)

175

if new_pack is not None:

176

self._discard_and_save(packs)

177

else:

178

# only make a new pack when there is data to copy.

179

self._discard_and_save(packs)

180

self.garbage_inventories = total_inventories - len(list(

181

collection.inventory_index.combined_index.iter_all_entries()))

182

finally:

183

collection._unlock_names()

184

185