~ubuntu-branches/ubuntu/trusty/duplicity/trusty

Viewing changes to duplicity/backends/_boto_multi.py

Committer: Package Import Robot
Author(s): Michael Terry
Date: 2011-12-06 14:15:01 UTC
mfrom: (1.9.4)
Revision ID: package-import@ubuntu.com-20111206141501-nvfaaauqivpwyb7f

Tags: 0.6.17-0ubuntu1

* New upstream release
* debian/patches/06_use_passphrase.dpatch,
  debian/patches/07_large_rackspace_list.dpatch,
  debian/patches/08_check_volumes.dpatch:
  - Dropped, applied upstream
* debian/rules:
  - Run new upstream test suite during build
* debian/control:
  - Add rdiff as a build-dep to run above test suite
* debian/patches/06testfixes.dpatch:
  - Fix a few tests to not fail erroneously
* debian/patches/07fixincresume.dpatch:
  - Fix a bug with resuming an incremental backup that would result in
    a bogus error.  Also patches in a test for it.
* debian/tests/full-cycle-local:
  - New DEP-8 test script that backs up locally, restores, and checks files
* debian/tests/full-cycle-u1:
  - New DEP-8 test script that does the same as above, but to Ubuntu One
* debian/tests/control:
  - Start of DEP-8 test suite.  Only enable above full-cycle-local test
    for automatic execution.  The other is for manual testing right now.

files added:
.bzrignore

README-LOG

README-REPO

bin/duplicity

bin/duplicity.1

bin/rdiffdir

bin/rdiffdir.1

debian/patches/06testfixes.dpatch

debian/patches/07fixincresume.dpatch

debian/tests

debian/tests/control

debian/tests/full-cycle-local

debian/tests/full-cycle-u1

dist

dist/duplicity.spec.template

dist/makedist

dist/makerpm

dist/mkGNUChangelog.sh

dist/setup.py

duplicity/GnuPGInterface.py

duplicity/__init__.py

duplicity/_librsyncmodule.c

duplicity/asyncscheduler.py

duplicity/backend.py

duplicity/backends

duplicity/backends/__init__.py

duplicity/backends/_boto_multi.py

duplicity/backends/_boto_single.py

duplicity/backends/botobackend.py

duplicity/backends/cloudfilesbackend.py

duplicity/backends/ftpbackend.py

duplicity/backends/ftpsbackend.py

duplicity/backends/gdocsbackend.py

duplicity/backends/giobackend.py

duplicity/backends/hsibackend.py

duplicity/backends/imapbackend.py

duplicity/backends/localbackend.py

duplicity/backends/rsyncbackend.py

duplicity/backends/sshbackend.py

duplicity/backends/tahoebackend.py

duplicity/backends/u1backend.py

duplicity/backends/webdavbackend.py

duplicity/collections.py

duplicity/commandline.py

duplicity/compilec.py

duplicity/diffdir.py

duplicity/dup_temp.py

duplicity/dup_threading.py

duplicity/dup_time.py

duplicity/errors.py

duplicity/file_naming.py

duplicity/filechunkio.py

duplicity/globals.py

duplicity/gpg.py

duplicity/lazy.py

duplicity/librsync.py

duplicity/log.py

duplicity/manifest.py

duplicity/misc.py

duplicity/patchdir.py

duplicity/path.py

duplicity/pexpect.py

duplicity/robust.py

duplicity/selection.py

duplicity/static.py

duplicity/statistics.py

duplicity/tarfile.py

duplicity/tempdir.py

duplicity/urlparse_2_5.py

duplicity/util.py

po/LINGUAS

po/Makevars

po/POTFILES.in

po/POTFILES.skip

po/bg.po

po/de

po/de.po

po/de/de.po

po/de/duplicity.mo

po/el

po/el.po

po/el/duplicity.mo

po/el/el.po

po/en_AU

po/en_AU.po

po/en_AU/duplicity.mo

po/en_AU/en_AU.po

po/en_GB.po

po/eo.po

po/es.po

po/fr

po/fr.po

po/fr/duplicity.mo

po/fr/fr.po

po/he

po/he.po

po/he/duplicity.mo

po/he/he.po

po/hu

po/hu.po

po/hu/duplicity.mo

po/hu/hu.po

po/id

po/id.po

po/id/duplicity.mo

po/id/id.po

po/io.po

po/it

po/it.po

po/it/duplicity.mo

po/it/it.po

po/oc

po/oc.po

po/oc/duplicity.mo

po/oc/oc.po

po/pl

po/pl.po

po/pl/duplicity.mo

po/pl/pl.po

po/pt.po

po/pt_BR

po/pt_BR.po

po/pt_BR/duplicity.mo

po/pt_BR/pt_BR.po

po/ru

po/ru.po

po/ru/duplicity.mo

po/ru/ru.po

po/sl

po/sl.po

po/sl/duplicity.mo

po/sl/sl.po

po/sq

po/sq.po

po/sq/duplicity.mo

po/sq/sq.po

po/sr

po/sr.po

po/sr/duplicity.mo

po/sr/sr.po

po/sv

po/sv.po

po/sv/duplicity.mo

po/sv/sv.po

po/tr

po/tr.po

po/tr/duplicity.mo

po/tr/tr.po

po/ug

po/ug.po

po/ug/duplicity.mo

po/ug/ug.po

po/update-pot

po/zh_CN.po

tarfile-CHANGES

testing

testing/gnupg

testing/gnupg/README

testing/gnupg/pubring.gpg

testing/gnupg/secring.gpg

testing/gnupg/trustdb.gpg

testing/helpers

testing/helpers/helper.py

testing/manual

testing/manual/backendtest.py

testing/manual/config.py.tmpl

testing/manual/manual-ctrl-c-test.sh

testing/manual/roottest.py

testing/manual/run-coverage.sh

testing/rootfiles.tar.gz

testing/run-tests

testing/run-tests-ve

testing/testfiles.tar.gz

testing/tests

testing/tests/GnuPGInterfacetest.py

testing/tests/badupload.py

testing/tests/cleanuptest.py

testing/tests/collectionstest.py

testing/tests/diffdirtest.py

testing/tests/dup_temptest.py

testing/tests/dup_timetest.py

testing/tests/file_namingtest.py

testing/tests/finaltest.py

testing/tests/gpgtest.py

testing/tests/lazytest.py

testing/tests/logtest.py

testing/tests/manifesttest.py

testing/tests/misctest.py

testing/tests/parsedurltest.py

testing/tests/patchdirtest.py

testing/tests/pathtest.py

testing/tests/rdiffdirtest.py

testing/tests/restarttest.py

testing/tests/selectiontest.py

testing/tests/statictest.py

testing/tests/statisticstest.py

testing/tests/tempdirtest.py

testing/tests/test_tarfile.py

testing/testtar.tar

files removed:
LOG-README

REPO-README

_librsyncmodule.c

debian/patches/06_use_passphrase.dpatch

debian/patches/07_large_rackspace_list.dpatch

debian/patches/08_check_volumes.dpatch

duplicity.1

rdiffdir

rdiffdir.1

src/GnuPGInterface.py

src/__init__.py

src/asyncscheduler.py

src/backend.py

src/backends

src/backends/__init__.py

src/backends/botobackend.py

src/backends/cloudfilesbackend.py

src/backends/ftpbackend.py

src/backends/ftpsbackend.py

src/backends/gdocsbackend.py

src/backends/giobackend.py

src/backends/hsibackend.py

src/backends/imapbackend.py

src/backends/localbackend.py

src/backends/rsyncbackend.py

src/backends/sshbackend.py

src/backends/tahoebackend.py

src/backends/u1backend.py

src/backends/webdavbackend.py

src/collections.py

src/commandline.py

src/diffdir.py

src/dup_temp.py

src/dup_threading.py

src/dup_time.py

src/errors.py

src/file_naming.py

src/globals.py

src/gpg.py

src/lazy.py

src/librsync.py

src/log.py

src/manifest.py

src/misc.py

src/patchdir.py

src/path.py

src/pexpect.py

src/robust.py

src/selection.py

src/static.py

src/statistics.py

src/tarfile.py

src/tempdir.py

src/urlparse_2_5.py

src/util.py

files modified:
CHANGELOG

Changelog.GNU

README

debian/changelog

debian/control

debian/duplicity.docs

debian/patches/00list

debian/patches/01pexpect.dpatch

debian/patches/02cachedesync.dpatch

debian/patches/03forcecleanup.dpatch

debian/patches/05upstreamgpgintf.dpatch

debian/rules

duplicity

po/bg/bg.po

po/bg/duplicity.mo

po/duplicity.pot

po/en_GB/duplicity.mo

po/en_GB/en_GB.po

po/eo/duplicity.mo

po/eo/eo.po

po/es/duplicity.mo

po/es/es.po

po/io/duplicity.mo

po/io/io.po

po/pt/duplicity.mo

po/pt/pt.po

po/zh_CN/duplicity.mo

po/zh_CN/zh_CN.po

setup.py

tarfile-LICENSE

Show diffs side-by-side

added added

removed removed

duplicity/backends/_boto_multi.py

# -*- Mode:Python; indent-tabs-mode:nil; tab-width:4 -*-

# This file is part of duplicity.

# Duplicity is free software; you can redistribute it and/or modify it

# under the terms of the GNU General Public License as published by the

# Free Software Foundation; either version 2 of the License, or (at your

# option) any later version.

# Duplicity is distributed in the hope that it will be useful, but

# WITHOUT ANY WARRANTY; without even the implied warranty of

# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU

# General Public License for more details.

# You should have received a copy of the GNU General Public License

# along with duplicity; if not, write to the Free Software Foundation,

# Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA

import os

import sys

import time

import duplicity.backend

from duplicity import globals

from duplicity import log

from duplicity.errors import * #@UnusedWildImport

from duplicity.util import exception_traceback

from duplicity.backend import retry

from duplicity.filechunkio import FileChunkIO

BOTO_MIN_VERSION = "1.6a"

# Multiprocessing is not supported on *BSD

if sys.platform not in ('darwin', 'linux2'):

from multiprocessing import dummy as multiprocessing

log.Debug('Multiprocessing is not supported on %s, will use threads instead.' % sys.platform)

else:

import multiprocessing

def get_connection(scheme, parsed_url):

try:

import boto

assert boto.Version >= BOTO_MIN_VERSION

from boto.s3.connection import S3Connection

assert hasattr(S3Connection, 'lookup')

# Newer versions of boto default to using

# virtual hosting for buckets as a result of

# upstream deprecation of the old-style access

# method by Amazon S3. This change is not

# backwards compatible (in particular with

# respect to upper case characters in bucket

# names); so we default to forcing use of the

# old-style method unless the user has

# explicitly asked us to use new-style bucket

# access.

# Note that if the user wants to use new-style

# buckets, we use the subdomain calling form

# rather than given the option of both

# subdomain and vhost. The reason being that

# anything addressable as a vhost, is also

# addressable as a subdomain. Seeing as the

# latter is mostly a convenience method of

# allowing browse:able content semi-invisibly

# being hosted on S3, the former format makes

# a lot more sense for us to use - being

# explicit about what is happening (the fact

# that we are talking to S3 servers).

try:

from boto.s3.connection import OrdinaryCallingFormat

from boto.s3.connection import SubdomainCallingFormat

cfs_supported = True

calling_format = OrdinaryCallingFormat()

except ImportError:

cfs_supported = False

calling_format = None

if globals.s3_use_new_style:

if cfs_supported:

calling_format = SubdomainCallingFormat()

else:

log.FatalError("Use of new-style (subdomain) S3 bucket addressing was"

"requested, but does not seem to be supported by the "

"boto library. Either you need to upgrade your boto "

"library or duplicity has failed to correctly detect "

"the appropriate support.",

log.ErrorCode.boto_old_style)

else:

if cfs_supported:

calling_format = OrdinaryCallingFormat()

100

else:

101

calling_format = None

102

103

except ImportError:

104

log.FatalError("This backend (s3) requires boto library, version %s or later, "

105

"(http://code.google.com/p/boto/)." % BOTO_MIN_VERSION,

106

log.ErrorCode.boto_lib_too_old)

107

108

if scheme == 's3+http':

109

# Use the default Amazon S3 host.

110

conn = S3Connection(is_secure=(not globals.s3_unencrypted_connection))

111

else:

112

assert scheme == 's3'

113

conn = S3Connection(

114

host = parsed_url.hostname,

115

is_secure=(not globals.s3_unencrypted_connection))

116

117

if hasattr(conn, 'calling_format'):

118

if calling_format is None:

119

log.FatalError("It seems we previously failed to detect support for calling "

120

"formats in the boto library, yet the support is there. This is "

121

"almost certainly a duplicity bug.",

122

log.ErrorCode.boto_calling_format)

123

else:

124

conn.calling_format = calling_format

125

126

else:

127

# Duplicity hangs if boto gets a null bucket name.

128

# HC: Caught a socket error, trying to recover

129

raise BackendException('Boto requires a bucket name.')

130

return conn

131

132

133

class BotoBackend(duplicity.backend.Backend):

134

"""

135

Backend for Amazon's Simple Storage System, (aka Amazon S3), though

136

the use of the boto module, (http://code.google.com/p/boto/).

137

138

To make use of this backend you must set aws_access_key_id

139

and aws_secret_access_key in your ~/.boto or /etc/boto.cfg

140

with your Amazon Web Services key id and secret respectively.

141

Alternatively you can export the environment variables

142

AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY.

143

"""

144

145

def __init__(self, parsed_url):

146

duplicity.backend.Backend.__init__(self, parsed_url)

147

148

from boto.s3.key import Key

149

from boto.s3.multipart import MultiPartUpload

150

151

# This folds the null prefix and all null parts, which means that:

152

# //MyBucket/ and //MyBucket are equivalent.

153

# //MyBucket//My///My/Prefix/ and //MyBucket/My/Prefix are equivalent.

154

self.url_parts = filter(lambda x: x != '', parsed_url.path.split('/'))

155

156

if self.url_parts:

157

self.bucket_name = self.url_parts.pop(0)

158

else:

159

# Duplicity hangs if boto gets a null bucket name.

160

# HC: Caught a socket error, trying to recover

161

raise BackendException('Boto requires a bucket name.')

162

163

self.scheme = parsed_url.scheme

164

165

self.key_class = Key

166

167

if self.url_parts:

168

self.key_prefix = '%s/' % '/'.join(self.url_parts)

169

else:

170

self.key_prefix = ''

171

172

self.straight_url = duplicity.backend.strip_auth_from_url(parsed_url)

173

self.parsed_url = parsed_url

174

self.resetConnection()

175

176

def resetConnection(self):

177

self.bucket = None

178

self.conn = get_connection(self.scheme, self.parsed_url)

179

self.bucket = self.conn.lookup(self.bucket_name)

180

181

def put(self, source_path, remote_filename=None):

182

from boto.s3.connection import Location

183

if globals.s3_european_buckets:

184

if not globals.s3_use_new_style:

185

log.FatalError("European bucket creation was requested, but not new-style "

186

"bucket addressing (--s3-use-new-style)",

187

log.ErrorCode.s3_bucket_not_style)

188

#Network glitch may prevent first few attempts of creating/looking up a bucket

189

for n in range(1, globals.num_retries+1):

190

if self.bucket:

191

break

192

if n > 1:

193

time.sleep(30)

194

try:

195

try:

196

self.bucket = self.conn.get_bucket(self.bucket_name, validate=True)

197

except Exception, e:

198

if "NoSuchBucket" in str(e):

199

if globals.s3_european_buckets:

200

self.bucket = self.conn.create_bucket(self.bucket_name,

201

location=Location.EU)

202

else:

203

self.bucket = self.conn.create_bucket(self.bucket_name)

204

else:

205

raise e

206

except Exception, e:

207

log.Warn("Failed to create bucket (attempt #%d) '%s' failed (reason: %s: %s)"

208

"" % (n, self.bucket_name,

209

e.__class__.__name__,

210

str(e)))

211

self.resetConnection()

212

213

if not remote_filename:

214

remote_filename = source_path.get_filename()

215

key = self.key_prefix + remote_filename

216

for n in range(1, globals.num_retries+1):

217

if n > 1:

218

# sleep before retry (new connection to a **hopeful** new host, so no need to wait so long)

219

time.sleep(10)

220

221

if globals.s3_use_rrs:

222

storage_class = 'REDUCED_REDUNDANCY'

223

else:

224

storage_class = 'STANDARD'

225

log.Info("Uploading %s/%s to %s Storage" % (self.straight_url, remote_filename, storage_class))

226

try:

227

headers = {

228

'Content-Type': 'application/octet-stream',

229

'x-amz-storage-class': storage_class

230

}

231

self.upload(source_path.name, key, headers)

232

self.resetConnection()

233

return

234

except Exception, e:

235

log.Warn("Upload '%s/%s' failed (attempt #%d, reason: %s: %s)"

236

"" % (self.straight_url,

237

remote_filename,

238

239

e.__class__.__name__,

240

str(e)))

241

log.Debug("Backtrace of previous error: %s" % (exception_traceback(),))

242

self.resetConnection()

243

log.Warn("Giving up trying to upload %s/%s after %d attempts" %

244

(self.straight_url, remote_filename, globals.num_retries))

245

raise BackendException("Error uploading %s/%s" % (self.straight_url, remote_filename))

246

247

def get(self, remote_filename, local_path):

248

key = self.key_class(self.bucket)

249

key.key = self.key_prefix + remote_filename

250

for n in range(1, globals.num_retries+1):

251

if n > 1:

252

# sleep before retry (new connection to a **hopeful** new host, so no need to wait so long)

253

time.sleep(10)

254

log.Info("Downloading %s/%s" % (self.straight_url, remote_filename))

255

try:

256

key.get_contents_to_filename(local_path.name)

257

local_path.setdata()

258

self.resetConnection()

259

return

260

except Exception, e:

261

log.Warn("Download %s/%s failed (attempt #%d, reason: %s: %s)"

262

"" % (self.straight_url,

263

remote_filename,

264

265

e.__class__.__name__,

266

str(e)), 1)

267

log.Debug("Backtrace of previous error: %s" % (exception_traceback(),))

268

self.resetConnection()

269

log.Warn("Giving up trying to download %s/%s after %d attempts" %

270

(self.straight_url, remote_filename, globals.num_retries))

271

raise BackendException("Error downloading %s/%s" % (self.straight_url, remote_filename))

272

273

def list(self):

274

if not self.bucket:

275

return []

276

277

for n in range(1, globals.num_retries+1):

278

if n > 1:

279

# sleep before retry

280

time.sleep(30)

281

log.Info("Listing %s" % self.straight_url)

282

try:

283

return self._list_filenames_in_bucket()

284

except Exception, e:

285

log.Warn("List %s failed (attempt #%d, reason: %s: %s)"

286

"" % (self.straight_url,

287

288

e.__class__.__name__,

289

str(e)), 1)

290

log.Debug("Backtrace of previous error: %s" % (exception_traceback(),))

291

log.Warn("Giving up trying to list %s after %d attempts" %

292

(self.straight_url, globals.num_retries))

293

raise BackendException("Error listng %s" % self.straight_url)

294

295

def _list_filenames_in_bucket(self):

296

# We add a 'd' to the prefix to make sure it is not null (for boto) and

297

# to optimize the listing of our filenames, which always begin with 'd'.

298

# This will cause a failure in the regression tests as below:

299

# FAIL: Test basic backend operations

300

# <tracback snipped>

301

# AssertionError: Got list: []

302

# Wanted: ['testfile']

303

# Because of the need for this optimization, it should be left as is.

304

#for k in self.bucket.list(prefix = self.key_prefix + 'd', delimiter = '/'):

305

filename_list = []

306

for k in self.bucket.list(prefix = self.key_prefix, delimiter = '/'):

307

try:

308

filename = k.key.replace(self.key_prefix, '', 1)

309

filename_list.append(filename)

310

log.Debug("Listed %s/%s" % (self.straight_url, filename))

311

except AttributeError:

312

pass

313

return filename_list

314

315

def delete(self, filename_list):

316

for filename in filename_list:

317

self.bucket.delete_key(self.key_prefix + filename)

318

log.Debug("Deleted %s/%s" % (self.straight_url, filename))

319

320

@retry

321

def _query_file_info(self, filename, raise_errors=False):

322

try:

323

key = self.bucket.lookup(self.key_prefix + filename)

324

if key is None:

325

return {'size': -1}

326

return {'size': key.size}

327

except Exception, e:

328

log.Warn("Query %s/%s failed: %s"

329

"" % (self.straight_url,

330

filename,

331

str(e)))

332

self.resetConnection()

333

if raise_errors:

334

raise e

335

else:

336

return {'size': None}

337

338

def upload(self, filename, key, headers=None):

339

chunk_size = globals.s3_multipart_chunk_size

340

341

# Check minimum chunk size for S3

342

if chunk_size < globals.s3_multipart_minimum_chunk_size:

343

log.Warn("Minimum chunk size is %d, but %d specified." % (

344

globals.s3_multipart_minimum_chunk_size, chunk_size))

345

chunk_size = globals.s3_multipart_minimum_chunk_size

346

347

# Decide in how many chunks to upload

348

bytes = os.path.getsize(filename)

349

if bytes < chunk_size:

350

chunks = 1

351

else:

352

chunks = bytes / chunk_size

353

if (bytes % chunk_size):

354

chunks += 1

355

356

log.Debug("Uploading %d bytes in %d chunks" % (bytes, chunks))

357

358

mp = self.bucket.initiate_multipart_upload(key, headers)

359

360

pool = multiprocessing.Pool(processes=chunks)

361

for n in range(chunks):

362

params = {

363

'scheme': self.scheme,

364

'url': self.parsed_url,

365

'bucket_name': self.bucket_name,

366

'multipart_id': mp.id,

367

'filename': filename,

368

'offset': n,

369

'bytes': chunk_size,

370

'num_retries': globals.num_retries,

371

}

372

pool.apply_async(multipart_upload_worker, kwds=params)

373

pool.close()

374

pool.join()

375

376

if len(mp.get_all_parts()) < chunks:

377

mp.cancel_upload()

378

raise BackendException("Multipart upload failed. Aborted.")

379

380

return mp.complete_upload()

381

382

383

def multipart_upload_worker(scheme, parsed_url, bucket_name, multipart_id, filename,

384

offset, bytes, num_retries):

385

"""

386

Worker method for uploading a file chunk to S3 using multipart upload.

387

Note that the file chunk is read into memory, so it's important to keep

388

this number reasonably small.

389

"""

390

import traceback

391

392

def _upload_callback(uploaded, total):

393

worker_name = multiprocessing.current_process().name

394

log.Debug("%s: Uploaded %s/%s bytes" % (worker_name, uploaded, total))

395

396

def _upload(num_retries):

397

worker_name = multiprocessing.current_process().name

398

log.Debug("%s: Uploading chunk %d" % (worker_name, offset + 1))

399

try:

400

conn = get_connection(scheme, parsed_url)

401

bucket = conn.lookup(bucket_name)

402

403

for mp in bucket.get_all_multipart_uploads():

404

if mp.id == multipart_id:

405

with FileChunkIO(filename, 'r', offset=offset * bytes, bytes=bytes) as fd:

406

mp.upload_part_from_file(fd, offset + 1, cb=_upload_callback)

407

break

408

except Exception, e:

409

traceback.print_exc()

410

if num_retries:

411

log.Debug("%s: Upload of chunk %d failed. Retrying %d more times..." % (

412

worker_name, offset + 1, num_retries - 1))

413

return _upload(num_retries - 1)

414

log.Debug("%s: Upload of chunk %d failed. Aborting..." % (

415

worker_name, offset + 1))

416

raise e

417

log.Debug("%s: Upload of chunk %d complete" % (worker_name, offset + 1))

418

419

return _upload(num_retries)

420

421

duplicity.backend.register_backend("s3", BotoBackend)

422

duplicity.backend.register_backend("s3+http", BotoBackend)

Older »