~ubuntu-branches/ubuntu/utopic/bzr-fastimport/utopic-proposed

« back to all changes in this revision

Viewing changes to revisionloader.py

Committer: Bazaar Package Importer
Author(s): Jelmer Vernooij
Date: 2009-01-25 03:13:48 UTC
Revision ID: james.westby@ubuntu.com-20090125031348-pykgsz5j6xpgfw8a

Tags: upstream-0.0.1~bzr112

Import upstream version 0.0.1~bzr112

files added:

COPYING.txt

NEWS

README.txt

__init__.py

commands.py

dates.py

doc/notes.txt

errors.py

exporters

exporters/Makefile

exporters/bzr-fast-export

exporters/bzr-fast-export.LICENSE

exporters/bzr-fast-export.README

exporters/hg-fast-export.README

exporters/hg-fast-export.py

exporters/hg-fast-export.sh

exporters/hg2git.py

exporters/svn-archive.c

exporters/svn-fast-export.c

exporters/svn-fast-export.py

helpers.py

idmapfile.py

parser.py

processor.py

processors

processors/__init__.py

processors/generic_processor.py

processors/info_processor.py

processors/query_processor.py

revisionloader.py

setup.py

tests

tests/__init__.py

tests/test_errors.py

tests/test_parser.py

tests/test_processor.py

Show diffs side-by-side

added added

removed removed

revisionloader.py

# This program is free software; you can redistribute it and/or modify

# it under the terms of the GNU General Public License as published by

# the Free Software Foundation; either version 2 of the License, or

# (at your option) any later version.

# This program is distributed in the hope that it will be useful,

# but WITHOUT ANY WARRANTY; without even the implied warranty of

# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the

# GNU General Public License for more details.

# You should have received a copy of the GNU General Public License

# along with this program; if not, write to the Free Software

# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA

"""Parameterised loading of revisions into a repository."""

from bzrlib import errors, knit, lru_cache, osutils

from bzrlib import revision as _mod_revision

class AbstractRevisionLoader(object):

# NOTE: This is effectively bzrlib.repository._install_revision

# refactored to be a class. When importing, we want more flexibility

# in how previous revisions are cached, data is feed in, etc.

def __init__(self, repo):

"""An object responsible for loading revisions into a repository.

NOTE: Repository locking is not managed by this class. Clients

should take a write lock, call load() multiple times, then release

the lock.

:param repository: the target repository

"""

self.repo = repo

def load(self, rev, inv, signature, text_provider,

inventories_provider=None):

"""Load a revision into a repository.

:param rev: the Revision

:param inv: the inventory

:param signature: signing information

:param text_provider: a callable expecting a file_id parameter

that returns the text for that file-id

:param inventories_provider: a callable expecting a repository and

a list of revision-ids, that returns:

* the list of revision-ids present in the repository

* the list of inventories for the revision-id's,

including an empty inventory for the missing revisions

If None, a default implementation is provided.

"""

if inventories_provider is None:

inventories_provider = self._default_inventories_provider

present_parents, parent_invs = inventories_provider(rev.parent_ids)

self._load_texts(rev.revision_id, inv.iter_entries(), parent_invs,

text_provider)

try:

rev.inventory_sha1 = self._add_inventory(rev.revision_id,

inv, present_parents)

except errors.RevisionAlreadyPresent:

pass

if signature is not None:

self.repo.add_signature_text(rev.revision_id, signature)

self._add_revision(rev, inv)

def _load_texts(self, revision_id, entries, parent_invs, text_provider):

"""Load texts to a repository for inventory entries.

This method is provided for subclasses to use or override.

:param revision_id: the revision identifier

:param entries: iterator over the inventory entries

:param parent_inv: the parent inventories

:param text_provider: a callable expecting a file_id parameter

that returns the text for that file-id

"""

raise NotImplementedError(self._load_texts)

def _add_inventory(self, revision_id, inv, parents):

"""Add the inventory inv to the repository as revision_id.

:param parents: The revision ids of the parents that revision_id

is known to have and are in the repository already.

:returns: The validator(which is a sha1 digest, though what is sha'd is

repository format specific) of the serialized inventory.

"""

return self.repo.add_inventory(revision_id, inv, parents)

def _add_revision(self, rev, inv):

"""Add a revision and its inventory to a repository.

:param rev: the Revision

:param inv: the inventory

"""

100

repo.add_revision(rev.revision_id, rev, inv)

101

102

def _default_inventories_provider(self, revision_ids):

103

"""An inventories provider that queries the repository."""

104

present = []

105

inventories = []

106

for revision_id in revision_ids:

107

if self.repo.has_revision(revision_id):

108

present.append(revision_id)

109

rev_tree = self.repo.revision_tree(revision_id)

110

else:

111

rev_tree = self.repo.revision_tree(None)

112

inventories.append(rev_tree.inventory)

113

return present, inventories

114

115

116

class RevisionLoader1(AbstractRevisionLoader):

117

"""A RevisionLoader that uses the old bzrlib Repository API.

118

119

The old API was present until bzr.dev rev 3510.

120

"""

121

122

def _load_texts(self, revision_id, entries, parent_invs, text_provider):

123

"""See RevisionLoader._load_texts()."""

124

# Backwards compatibility hack: skip the root id.

125

if not self.repo.supports_rich_root():

126

path, root = entries.next()

127

if root.revision != revision_id:

128

raise errors.IncompatibleRevision(repr(self.repo))

129

# Add the texts that are not already present

130

tx = self.repo.get_transaction()

131

for path, ie in entries:

132

# This test is *really* slow: over 50% of import time

133

#w = self.repo.weave_store.get_weave_or_empty(ie.file_id, tx)

134

#if ie.revision in w:

135

# continue

136

# Try another way, realising that this assumes that the

137

# version is not already there. In the general case,

138

# a shared repository might already have the revision but

139

# we arguably don't need that check when importing from

140

# a foreign system.

141

if ie.revision != revision_id:

142

continue

143

text_parents = []

144

for parent_inv in parent_invs:

145

if ie.file_id not in parent_inv:

146

continue

147

parent_id = parent_inv[ie.file_id].revision

148

if parent_id in text_parents:

149

continue

150

text_parents.append(parent_id)

151

lines = text_provider(ie.file_id)

152

vfile = self.repo.weave_store.get_weave_or_empty(ie.file_id, tx)

153

vfile.add_lines(revision_id, text_parents, lines)

154

155

def _get_lines(self, file_id, revision_id):

156

tx = self.repo.get_transaction()

157

w = self.repo.weave_store.get_weave(ie.file_id, tx)

158

return w.get_lines(revision_id)

159

160

def _add_revision(self, rev, inv):

161

# There's no need to do everything repo.add_revision does and

162

# doing so (since bzr.dev 3392) can be pretty slow for long

163

# delta chains on inventories. Just do the essentials here ...

164

_mod_revision.check_not_reserved_id(rev.revision_id)

165

self.repo._revision_store.add_revision(rev, self.repo.get_transaction())

166

167

168

class RevisionLoader2(AbstractRevisionLoader):

169

"""A RevisionLoader that uses the new bzrlib Repository API."""

170

171

def _load_texts(self, revision_id, entries, parent_invs, text_provider):

172

"""See RevisionLoader._load_texts()."""

173

# Backwards compatibility hack: skip the root id.

174

if not self.repo.supports_rich_root():

175

path, root = entries.next()

176

if root.revision != revision_id:

177

raise errors.IncompatibleRevision(repr(self.repo))

178

text_keys = {}

179

for path, ie in entries:

180

text_keys[(ie.file_id, ie.revision)] = ie

181

text_parent_map = self.repo.texts.get_parent_map(text_keys)

182

missing_texts = set(text_keys) - set(text_parent_map)

183

# Add the texts that are not already present

184

for text_key in missing_texts:

185

ie = text_keys[text_key]

186

text_parents = []

187

for parent_inv in parent_invs:

188

if ie.file_id not in parent_inv:

189

continue

190

parent_id = parent_inv[ie.file_id].revision

191

if parent_id in text_parents:

192

continue

193

text_parents.append((ie.file_id, parent_id))

194

lines = text_provider(ie.file_id)

195

self.repo.texts.add_lines(text_key, text_parents, lines)

196

197

def _get_lines(self, file_id, revision_id):

198

record = self.repo.texts.get_record_stream([(file_id, revision_id)],

199

'unordered', True).next()

200

if record.storage_kind == 'absent':

201

raise errors.RevisionNotPresent(record.key, self.repo)

202

return osutils.split_lines(record.get_bytes_as('fulltext'))

203

204

def _add_revision(self, rev, inv):

205

# There's no need to do everything repo.add_revision does and

206

# doing so (since bzr.dev 3392) can be pretty slow for long

207

# delta chains on inventories. Just do the essentials here ...

208

_mod_revision.check_not_reserved_id(rev.revision_id)

209

self.repo._add_revision(rev)

210

211

212

class ImportRevisionLoader1(RevisionLoader1):

213

"""A RevisionLoader (old Repository API) optimised for importing.

214

215

This implementation caches serialised inventory texts and provides

216

fine-grained control over when inventories are stored as fulltexts.

217

"""

218

219

def __init__(self, repo, parent_texts_to_cache=1, fulltext_when=None,

220

random_ids=True):

221

"""See AbstractRevisionLoader.__init__.

222

223

:param repository: the target repository

224

:param parent_text_to_cache: the number of parent texts to cache

225

:para fulltext_when: if non None, a function to call to decide

226

whether to fulltext the inventory or not. The revision count

227

is passed as a parameter and the result is treated as a boolean.

228

"""

229

RevisionLoader1.__init__(self, repo)

230

self.inv_parent_texts = lru_cache.LRUCache(parent_texts_to_cache)

231

self.fulltext_when = fulltext_when

232

self.random_ids = random_ids

233

self.revision_count = 0

234

235

def _add_inventory(self, revision_id, inv, parents):

236

"""See RevisionLoader._add_inventory."""

237

# Code taken from bzrlib.repository.add_inventory

238

assert self.repo.is_in_write_group()

239

_mod_revision.check_not_reserved_id(revision_id)

240

assert inv.revision_id is None or inv.revision_id == revision_id, \

241

"Mismatch between inventory revision" \

242

" id and insertion revid (%r, %r)" % (inv.revision_id, revision_id)

243

assert inv.root is not None

244

inv_lines = self.repo._serialise_inventory_to_lines(inv)

245

inv_vf = self.repo.get_inventory_weave()

246

sha1, num_bytes, parent_text = self._inventory_add_lines(inv_vf,

247

revision_id, parents, inv_lines, self.inv_parent_texts)

248

self.inv_parent_texts[revision_id] = parent_text

249

return sha1

250

251

def _inventory_add_lines(self, inv_vf, version_id, parents, lines,

252

parent_texts):

253

"""See Repository._inventory_add_lines()."""

254

# setup parameters used in original code but not this API

255

self.revision_count += 1

256

if self.fulltext_when is not None:

257

delta = not self.fulltext_when(self.revision_count)

258

else:

259

delta = inv_vf.delta

260

left_matching_blocks = None

261

random_id = self.random_ids

262

check_content = False

263

264

# bzrlib.knit.add_lines() but error checking optimised

265

inv_vf._check_add(version_id, lines, random_id, check_content)

266

267

####################################################################

268

# bzrlib.knit._add() but skip checking if fulltext better than delta

269

####################################################################

270

271

line_bytes = ''.join(lines)

272

digest = osutils.sha_string(line_bytes)

273

present_parents = []

274

for parent in parents:

275

if inv_vf.has_version(parent):

276

present_parents.append(parent)

277

if parent_texts is None:

278

parent_texts = {}

279

280

# can only compress against the left most present parent.

281

if (delta and

282

(len(present_parents) == 0 or

283

present_parents[0] != parents[0])):

284

delta = False

285

286

text_length = len(line_bytes)

287

options = []

288

if lines:

289

if lines[-1][-1] != '\n':

290

# copy the contents of lines.

291

lines = lines[:]

292

options.append('no-eol')

293

lines[-1] = lines[-1] + '\n'

294

line_bytes += '\n'

295

296

#if delta:

297

# # To speed the extract of texts the delta chain is limited

298

# # to a fixed number of deltas. This should minimize both

299

# # I/O and the time spend applying deltas.

300

# delta = inv_vf._check_should_delta(present_parents)

301

302

assert isinstance(version_id, str)

303

content = inv_vf.factory.make(lines, version_id)

304

if delta or (inv_vf.factory.annotated and len(present_parents) > 0):

305

# Merge annotations from parent texts if needed.

306

delta_hunks = inv_vf._merge_annotations(content, present_parents,

307

parent_texts, delta, inv_vf.factory.annotated,

308

left_matching_blocks)

309

310

if delta:

311

options.append('line-delta')

312

store_lines = inv_vf.factory.lower_line_delta(delta_hunks)

313

size, bytes = inv_vf._data._record_to_data(version_id, digest,

314

store_lines)

315

else:

316

options.append('fulltext')

317

# isinstance is slower and we have no hierarchy.

318

if inv_vf.factory.__class__ == knit.KnitPlainFactory:

319

# Use the already joined bytes saving iteration time in

320

# _record_to_data.

321

size, bytes = inv_vf._data._record_to_data(version_id, digest,

322

lines, [line_bytes])

323

else:

324

# get mixed annotation + content and feed it into the

325

# serialiser.

326

store_lines = inv_vf.factory.lower_fulltext(content)

327

size, bytes = inv_vf._data._record_to_data(version_id, digest,

328

store_lines)

329

330

access_memo = inv_vf._data.add_raw_records([size], bytes)[0]

331

inv_vf._index.add_versions(

332

((version_id, options, access_memo, parents),),

333

random_id=random_id)

334

return digest, text_length, content

Older »