1
# -*- Mode:Python; indent-tabs-mode:nil; tab-width:4 -*-
3
# Copyright 2016 Stefan Breunig <stefan-duplicity@breunig.xyz>
4
# Based on the backend onedrivebackend.py
6
# This file is part of duplicity.
8
# Duplicity is free software; you can redistribute it and/or modify it
9
# under the terms of the GNU General Public License as published by the
10
# Free Software Foundation; either version 2 of the License, or (at your
11
# option) any later version.
13
# Duplicity is distributed in the hope that it will be useful, but
14
# WITHOUT ANY WARRANTY; without even the implied warranty of
15
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16
# General Public License for more details.
18
# You should have received a copy of the GNU General Public License
19
# along with duplicity; if not, write to the Free Software Foundation,
20
# Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
27
from io import DEFAULT_BUFFER_SIZE
29
import duplicity.backend
30
from duplicity.errors import BackendException
31
from duplicity import globals
32
from duplicity import log
35
class ADBackend(duplicity.backend.Backend):
37
Backend for Amazon Drive. It communicates directly with Amazon Drive using
38
their RESTful API and does not rely on externally setup software (like
42
OAUTH_TOKEN_PATH = os.path.expanduser('~/.duplicity_ad_oauthtoken.json')
44
OAUTH_AUTHORIZE_URL = 'https://www.amazon.com/ap/oa'
45
OAUTH_TOKEN_URL = 'https://api.amazon.com/auth/o2/token'
46
# NOTE: Amazon requires https, which is why I am using my domain/setup
47
# instead of Duplicity's. Mail me at stefan-duplicity@breunig.xyz once it is
48
# available through https and I will whitelist the new URL.
49
OAUTH_REDIRECT_URL = 'https://breunig.xyz/duplicity/copy.html'
50
OAUTH_SCOPE = ['clouddrive:read_other', 'clouddrive:write']
52
CLIENT_ID = 'amzn1.application-oa2-client.791c9c2d78444e85a32eb66f92eb6bcc'
53
CLIENT_SECRET = '5b322c6a37b25f16d848a6a556eddcc30314fc46ae65c87068ff1bc4588d715b'
55
MULTIPART_BOUNDARY = 'DuplicityFormBoundaryd66364f7f8924f7e9d478e19cf4b871d114a1e00262542'
57
def __init__(self, parsed_url):
58
duplicity.backend.Backend.__init__(self, parsed_url)
60
self.metadata_url = 'https://drive.amazonaws.com/drive/v1/'
61
self.content_url = 'https://content-na.drive.amazonaws.com/cdproxy/'
63
self.names_to_ids = {}
64
self.backup_target_id = None
65
self.backup_target = parsed_url.path.lstrip('/')
67
if globals.volsize > (10 * 1024 * 1024 * 1024):
68
# https://forums.developer.amazon.com/questions/22713/file-size-limits.html
69
# https://forums.developer.amazon.com/questions/22038/support-for-chunked-transfer-encoding.html
71
'Your --volsize is bigger than 10 GiB, which is the maximum '
72
'file size on Amazon Drive that does not require work arounds.')
78
from requests_oauthlib import OAuth2Session
80
raise BackendException(
81
'Amazon Drive backend requires python-requests and '
82
'python-requests-oauthlib to be installed.\n\n'
83
'For Debian and derivates use:\n'
84
' apt-get install python-requests python-requests-oauthlib\n'
85
'For Fedora and derivates use:\n'
86
' yum install python-requests python-requests-oauthlib')
88
self.initialize_oauth2_session()
89
self.resolve_backup_target()
91
def initialize_oauth2_session(self):
92
"""Setup or refresh oauth2 session with Amazon Drive"""
94
def token_updater(token):
95
"""Stores oauth2 token on disk"""
97
with open(self.OAUTH_TOKEN_PATH, 'w') as f:
99
except Exception as err:
100
log.Error('Could not save the OAuth2 token to %s. This means '
101
'you may need to do the OAuth2 authorization '
102
'process again soon. Original error: %s' % (
103
self.OAUTH_TOKEN_PATH, err))
107
with open(self.OAUTH_TOKEN_PATH) as f:
109
except IOError as err:
110
log.Notice('Could not load OAuth2 token. '
111
'Trying to create a new one. (original error: %s)' % err)
113
self.http_client = OAuth2Session(
115
scope=self.OAUTH_SCOPE,
116
redirect_uri=self.OAUTH_REDIRECT_URL,
118
auto_refresh_kwargs={
119
'client_id': self.CLIENT_ID,
120
'client_secret': self.CLIENT_SECRET,
122
auto_refresh_url=self.OAUTH_TOKEN_URL,
123
token_updater=token_updater)
125
if token is not None:
126
self.http_client.refresh_token(self.OAUTH_TOKEN_URL)
128
endpoints_response = self.http_client.get(self.metadata_url +
130
if endpoints_response.status_code != requests.codes.ok:
134
if not sys.stdout.isatty() or not sys.stdin.isatty():
135
log.FatalError('The OAuth2 token could not be loaded from %s '
136
'and you are not running duplicity '
137
'interactively, so duplicity cannot possibly '
138
'access Amazon Drive.' % self.OAUTH_TOKEN_PATH)
139
authorization_url, _ = self.http_client.authorization_url(
140
self.OAUTH_AUTHORIZE_URL)
143
print ('In order to allow duplicity to access Amazon Drive, please '
144
'open the following URL in a browser and copy the URL of the '
145
'page you see after authorization here:')
146
print authorization_url
149
redirected_to = (raw_input('URL of the resulting page: ')
150
.replace('http://', 'https://', 1))
152
token = self.http_client.fetch_token(
153
self.OAUTH_TOKEN_URL,
154
client_secret=self.CLIENT_SECRET,
155
authorization_response=redirected_to)
157
endpoints_response = self.http_client.get(self.metadata_url +
159
endpoints_response.raise_for_status()
162
urls = endpoints_response.json()
163
if 'metadataUrl' not in urls or 'contentUrl' not in urls:
164
log.FatalError('Could not retrieve endpoint URLs for this account')
165
self.metadata_url = urls['metadataUrl']
166
self.content_url = urls['contentUrl']
168
def resolve_backup_target(self):
169
"""Resolve node id for remote backup target folder"""
171
response = self.http_client.get(
172
self.metadata_url + 'nodes?filters=kind:FOLDER AND isRoot:true')
173
parent_node_id = response.json()['data'][0]['id']
175
for component in [x for x in self.backup_target.split('/') if x]:
176
# There doesn't seem to be escaping support, so cut off filter
177
# after first unsupported character
178
query = re.search('^[A-Za-z0-9_-]*', component).group(0)
179
if component != query:
182
matches = self.read_all_pages(
183
self.metadata_url + 'nodes?filters=kind:FOLDER AND name:%s '
184
'AND parents:%s' % (query, parent_node_id))
185
candidates = [f for f in matches if f.get('name') == component]
187
if len(candidates) >= 2:
188
log.FatalError('There are multiple folders with the same name '
189
'below one parent.\nParentID: %s\nFolderName: '
190
'%s' % (parent_node_id, component))
191
elif len(candidates) == 1:
192
parent_node_id = candidates[0]['id']
194
log.Debug('Folder %s does not exist yet. Creating.' % component)
195
parent_node_id = self.mkdir(parent_node_id, component)
197
log.Debug("Backup target folder has id: %s" % parent_node_id)
198
self.backup_target_id = parent_node_id
200
def get_file_id(self, remote_filename):
201
"""Find id of remote file in backup target folder"""
203
if remote_filename not in self.names_to_ids:
206
return self.names_to_ids.get(remote_filename)
208
def mkdir(self, parent_node_id, folder_name):
209
"""Create a new folder as a child of a parent node"""
211
data = {'name': folder_name, 'parents': [parent_node_id], 'kind': 'FOLDER'}
212
response = self.http_client.post(
213
self.metadata_url + 'nodes',
214
data=json.dumps(data))
215
response.raise_for_status()
216
return response.json()['id']
218
def multipart_stream(self, metadata, source_path):
219
"""Generator for multipart/form-data file upload from source file"""
221
boundary = self.MULTIPART_BOUNDARY
223
yield str.encode('--%s\r\nContent-Disposition: form-data; '
224
'name="metadata"\r\n\r\n' % boundary +
225
'%s\r\n' % json.dumps(metadata) +
226
'--%s\r\n' % boundary)
227
yield b'Content-Disposition: form-data; name="content"; filename="i_love_backups"\r\n'
228
yield b'Content-Type: application/octet-stream\r\n\r\n'
230
with source_path.open() as stream:
232
f = stream.read(DEFAULT_BUFFER_SIZE)
238
yield str.encode('\r\n--%s--\r\n' % boundary +
239
'multipart/form-data; boundary=%s' % boundary)
241
def read_all_pages(self, url):
242
"""Iterates over nodes API URL until all pages were read"""
246
token_param = '&startToken=' if '?' in url else '?startToken='
249
paginated_url = url + token_param + next_token
250
response = self.http_client.get(paginated_url)
251
if response.status_code != 200:
252
raise BackendException("Pagination failed with status=%s on "
253
"URL=%s" % (response.status_code, url))
255
parsed = response.json()
256
if 'data' in parsed and len(parsed['data']) > 0:
257
result.extend(parsed['data'])
261
# Do not make another HTTP request if everything is here already
262
if len(result) >= parsed['count']:
265
if 'nextToken' not in parsed:
267
next_token = parsed['nextToken']
271
def raise_for_existing_file(self, remote_filename):
272
"""Report error when file already existed in location and delete it"""
274
self._delete(remote_filename)
275
raise BackendException('Upload failed, because there was a file with '
276
'the same name as %s already present. The file was '
277
'deleted, and duplicity will retry the upload unless '
278
'the retry limit has been reached.' % remote_filename)
280
def _put(self, source_path, remote_filename):
281
"""Upload a local file to Amazon Drive"""
283
quota = self.http_client.get(self.metadata_url + 'account/quota')
284
quota.raise_for_status()
285
available = quota.json()['available']
287
source_size = os.path.getsize(source_path.name)
289
if source_size > available:
290
raise BackendException(
291
'Out of space: trying to store "%s" (%d bytes), but only '
292
'%d bytes available on Amazon Drive.' % (
293
source_path.name, source_size, available))
295
# Just check the cached list, to avoid _list for every new file being
297
if remote_filename in self.names_to_ids:
298
log.Debug('File %s seems to already exist on Amazon Drive. Deleting '
299
'before attempting to upload it again.' % remote_filename)
300
self._delete(remote_filename)
302
metadata = {'name': remote_filename, 'kind': 'FILE',
303
'parents': [self.backup_target_id]}
304
headers = {'Content-Type': 'multipart/form-data; boundary=%s'
305
% self.MULTIPART_BOUNDARY}
306
data = self.multipart_stream(metadata, source_path)
308
response = self.http_client.post(
309
self.content_url + 'nodes?suppress=deduplication',
313
if response.status_code == 409: # "409 : Duplicate file exists."
314
self.raise_for_existing_file(remote_filename)
315
elif response.status_code == 201:
316
log.Debug('%s uploaded successfully' % remote_filename)
317
elif response.status_code == 408 or response.status_code == 504:
318
log.Info('%s upload failed with timeout status code=%d. Speculatively '
319
'waiting for %d seconds to see if Amazon Drive finished the '
320
'upload anyway' % (remote_filename, response.status_code,
322
tries = globals.timeout / 15
327
remote_size = self._query(remote_filename)['size']
328
if source_size == remote_size:
329
log.Debug('Upload turned out to be successful after all.')
331
elif remote_size == -1:
332
log.Debug('Uploaded file is not yet there, %d tries left.'
336
self.raise_for_existing_file(remote_filename)
337
raise BackendException('%s upload failed and file did not show up '
338
'within time limit.' % remote_filename)
340
log.Debug('%s upload returned an undesirable status code %s'
341
% (remote_filename, response.status_code))
342
response.raise_for_status()
344
parsed = response.json()
345
if 'id' not in parsed:
346
raise BackendException('%s was uploaded, but returned JSON does not '
347
'contain ID of new file. Retrying.\nJSON:\n\n%s'
348
% (remote_filename, parsed))
350
# XXX: The upload may be considered finished before the file shows up
351
# in the file listing. As such, the following is required to avoid race
352
# conditions when duplicity calls _query or _list.
353
self.names_to_ids[parsed['name']] = parsed['id']
355
def _get(self, remote_filename, local_path):
356
"""Download file from Amazon Drive"""
358
with local_path.open('wb') as local_file:
359
file_id = self.get_file_id(remote_filename)
361
raise BackendException(
362
'File "%s" cannot be downloaded: it does not exist' %
365
response = self.http_client.get(
366
self.content_url + '/nodes/' + file_id + '/content', stream=True)
367
response.raise_for_status()
368
for chunk in response.iter_content(chunk_size=DEFAULT_BUFFER_SIZE):
370
local_file.write(chunk)
373
def _query(self, remote_filename):
374
"""Retrieve file size info from Amazon Drive"""
376
file_id = self.get_file_id(remote_filename)
379
response = self.http_client.get(self.metadata_url + 'nodes/' + file_id)
380
response.raise_for_status()
382
return {'size': response.json()['contentProperties']['size']}
385
"""List files in Amazon Drive backup folder"""
387
files = self.read_all_pages(
388
self.metadata_url + 'nodes/' + self.backup_target_id +
389
'/children?filters=kind:FILE')
391
self.names_to_ids = {f['name']: f['id'] for f in files}
393
return self.names_to_ids.keys()
395
def _delete(self, remote_filename):
396
"""Delete file from Amazon Drive"""
398
file_id = self.get_file_id(remote_filename)
400
raise BackendException(
401
'File "%s" cannot be deleted: it does not exist' % (
403
response = self.http_client.put(self.metadata_url + 'trash/' + file_id)
404
response.raise_for_status()
405
del self.names_to_ids[remote_filename]
407
duplicity.backend.register_backend('ad', AmazonDriveBackend)