~bzr/bzr-fastimport/0.6

« back to all changes in this revision

Viewing changes to processors/generic_processor.py

Committer: Ian Clatworthy
Date: 2008-02-15 07:55:31 UTC
Revision ID: ian.clatworthy@internode.on.net-20080215075531-6diyqa855c5lwpe3

first cut at generic processing method

files added:
revisionloader.py

files modified:
dates.py

processors/generic_processor.py

Show diffs side-by-side

added added

removed removed

processors/generic_processor.py

"""Import processor that supports all Bazaar repository formats."""

from bzrlib import (

errors,

generate_ids,

inventory,

lru_cache,

osutils,

revision,

revisiontree,

)

from bzrlib.trace import (

note,

warning,

)

from bzrlib.plugins.fastimport import processor

from bzrlib.plugins.fastimport import (

processor,

revisionloader,

)

class GenericProcessor(processor.ImportProcessor):

Current features supported:

* progress reporting works

* checkpoints are ignored

* timestamped progress reporting

* blobs are cached in memory until used

* TODO: commit handling

* LATER: branch support

* checkpoints and tags are ignored

* some basic statistics are dumped on completion.

Other commands produce errors.

"""

def pre_process(self):

# Init statistics

# Statistics

self._revision_count = 0

self._branch_count = 0

self._tag_count = 0

self._file_count = 0

self._dir_count = 0

self._symlink_count = 0

# dataref -> data. datref is either :mark or the sha-1.

# Once a blob is used, it should be deleted from here.

self.blob_cache = {}

def post_process(self):

# Dump statistics

note("Imported %d revisions into %d branches with %d tags.",

self._revision_count, self._branch_count, self._tag_count)

note("%d files, %d directories, %d symlinks.",

self._file_count, self._dir_count, self._symlink_count)

#note("%d files, %d directories, %d symlinks.",

# self._file_count, self._dir_count, self._symlink_count)

def blob_handler(self, cmd):

"""Process a BlobCommand."""

if cmd.mark is not None:

dataref = ":%s" % (cmd.mark,)

else:

dataref = osutils.sha_strings(cmd.data)

self.blob_cache[dataref] = cmd.data

def checkpoint_handler(self, cmd):

"""Process a CheckpointCommand."""

warning("ignoring checkpoint")

def commit_handler(self, cmd):

"""Process a CommitCommand."""

handler = GenericCommitHandler(cmd, self.target, self.blob_cache)

handler.process()

self._revision_count += 1

def progress_handler(self, cmd):

"""Process a ProgressCommand."""

note("progress %s" % (cmd.message,))

def checkpoint_handler(self, cmd):

"""Process a CheckpointCommand."""

warning("ignoring checkpoint command")

note("%s progress %s" % (self._time_of_day(), cmd.message))

def _time_of_day(self):

"""Time of day as a string."""

# Note: this is a separate method so tests can patch in a fixed value

return datetime.datetime.now().strftime("%H:%M:%s")

def reset_handler(self, cmd):

"""Process a ResetCommand."""

warning("multiple branches are not supported yet"

" - ignoring branch '%s'", cmd.ref)

100

101

def tag_handler(self, cmd):

102

"""Process a TagCommand."""

103

warning("tags are not supported yet - ignoring tag '%s'", cmd.id)

104

105

106

class GenericCommitHandler(processor.CommitHandler):

107

108

def __init__(self, command, repo, blob_cache, inventory_cache_size=100):

109

processor.CommitHandler.__init__(self, command)

110

self.repo = repo

111

# cache of blobs until they are referenced

112

self.blob_cache = blob_cache

113

# revision-id -> Inventory cache

114

self.inventory_cache = lru_cache.LRUCache(inventory_cache_size)

115

# smart loader that uses this cache

116

self.loader = revisionloader.RevisionLoader(repo,

117

lambda revision_ids: self._get_inventories(revision_ids))

118

# directory-path -> inventory-entry lookup table

119

self._directory_entries = {}

120

# import-ref to revision-id lookup table

121

self.revision_id_by_import_ref = {}

122

123

def pre_process_files(self):

124

"""Prepare for committing."""

125

self.revision_id = self.gen_revision_id()

126

self.inv_delta = []

127

# cache of texts for this commit, indexed by file-id

128

self.text_for_commit = {}

129

130

def post_process_files(self):

131

"""Save the revision."""

132

rev = revision.Revision(self.revision_id)

133

committer = self.command.committer

134

rev.committer = "%s <%s>" % (committer[0],committer[1])

135

rev.timestamp = committer[2]

136

rev.timezone = committer[3]

137

print "loading revision %r" % (rev,)

138

139

# Derive the inventory from the previous one

140

parents = self.command.parents

141

if len(parents) == 0:

142

new_inventory = inventory.Inventory()

143

else:

144

# use the bzr_revision_id to lookup the inv cache

145

parent_id = self.revision_id_by_import_ref[parents[0]]

146

new_inventory = self.get_inventory(parent_id).copy()

147

new_inventory.apply_delta(self.inv_delta)

148

self.revision_id_by_import_ref[self.command.ref] = new_inventory

149

150

# debug trace ...

151

print "applied inventory delta ..."

152

for entry in self.inv_delta:

153

print " %r" % (entry,)

154

print "creating inventory ..."

155

for entry in new_inventory:

156

print " %r" % (entry,)

157

158

## Uncomment once the rest is working

159

# self.loader.load(revision, new_inventory, None,

160

# lambda file_id: self._get_text(file_id))

161

162

def modify_handler(self, filecmd):

163

if filecmd.dataref is not None:

164

data = self.blob_cache[filecmd.dataref]

165

# Conserve memory, assuming blobs aren't referenced twice

166

del self.blob_cache[filecmd.dataref]

167

else:

168

data = filecmd.data

169

self._modify_inventory(filecmd.path, filecmd.kind,

170

filecmd.is_executable, data)

171

172

def delete_handler(self, filecmd):

173

path = filecmd.path

174

self.inv_delta.append((path, None, self.bzr_file_id(path), None))

175

176

def copy_handler(self, filecmd):

177

raise NotImplementedError(self.copy_handler)

178

179

def rename_handler(self, filecmd):

180

# TODO: add a suitable entry to the inventory delta

181

raise NotImplementedError(self.rename_handler)

182

183

def deleteall_handler(self, filecmd):

184

raise NotImplementedError(self.deleteall_handler)

185

186

def bzr_file_id(self, path):

187

"""Generate a Bazaar file identifier for a path."""

188

# TODO: Search the current inventory instead of generating every time

189

return generate_ids.gen_file_id(path)

190

191

def gen_revision_id(self):

192

"""Generate a revision id.

193

194

Subclasses may override this to produce deterministic ids say.

195

"""

196

committer = self.command.committer

197

who = "%s <%s>" % (committer[0],committer[1])

198

timestamp = committer[2]

199

return generate_ids.gen_revision_id(who, timestamp)

200

201

def _get_inventories(self, revision_ids):

202

"""Get the inventories for revision-ids.

203

204

This is a callback used by the RepositoryLoader to

205

speed up inventory reconstruction."""

206

present = []

207

inventories = []

208

# If an inventoy is in the cache, we assume it was

209

# successfully loaded into the repsoitory

210

for revision_id in revision_ids:

211

try:

212

inv = self.inventory_cache[revision_id]

213

present.append(revision_id)

214

except KeyError:

215

# TODO: count misses and/or inform the user about the miss?

216

# Not cached so reconstruct from repository

217

if self.repo.has_revision(revision_id):

218

rev_tree = self.repo.revision_tree(revision_id)

219

present.append(revision_id)

220

else:

221

rev_tree = self.repo.revision_tree(None)

222

inv = rev_tree.inventory

223

self.inventory_cache[revision_id] = inv

224

inventories.append(inv)

225

return present, inventories

226

227

def _get_text(self, file_id):

228

"""Get the text for a file-id."""

229

return self.text_for_commit[file_id]

230

231

def _modify_inventory(self, path, kind, is_executable, data):

232

"""Add to or change an item in the inventory."""

233

# Create the new InventoryEntry

234

basename, parent_ie = self._ensure_directory(path)

235

file_id = self.bzr_file_id(path)

236

ie = inventory.make_entry(kind, basename, parent_ie, file_id)

237

if isinstance(ie, inventory.InventoryFile):

238

ie.text_sha1 = osutils.sha_strings(data)

239

ie.text_size = len(data)

240

ie.executable = is_executable

241

self.text_for_commit[file_id] = data

242

elif isinstance(ie, inventory.InventoryLnk):

243

ie.symlink_target = data

244

else:

245

raise errors.BzrError("Cannot import items of kind '%s' yet" %

246

(kind,))

247

248

# Record this new inventory entry. As the import stream doesn't

249

# repeat all files every time, we build an entry delta.

250

# HACK: We also assume that inventory.apply_delta handles the

251

# 'add' case cleanly when asked to change a non-existent entry.

252

# This saves time vs explicitly detecting add vs change.

253

old_path = path

254

self.inv_delta.append((old_path, path, file_id, ie))

255

256

def _ensure_directory(self, path):

257

"""Ensure that the containing directory exists for 'path'"""

258

dirname, basename = osutils.split(path)

259

if dirname == '':

260

# the root node doesn't get updated

261

return basename, inventory.ROOT_ID

262

try:

263

ie = self._directory_entries[dirname]

264

except KeyError:

265

# We will create this entry, since it doesn't exist

266

pass

267

else:

268

return basename, ie

269

270

# No directory existed, we will just create one, first, make sure

271

# the parent exists

272

dir_basename, parent_ie = self._ensure_directory(dirname)

273

dir_file_id = self.bzr_file_id(dirname)

274

ie = inventory.entry_factory['directory'](dir_file_id,

275

dir_basename,

276

parent_ie.file_id)

277

ie.revision = self.revision_id

278

self._directory_entries[dirname] = ie

279

self.inv_delta.append((None, path, dir_file_id, ie))

280

return basename, ie

Older »