~tribaal/txaws/xss-hardening

« back to all changes in this revision

Viewing changes to txaws/storage/client.py

  • Committer: Duncan McGreggor
  • Date: 2009-11-22 02:20:42 UTC
  • mto: (44.3.2 484858-s3-scripts)
  • mto: This revision was merged to the branch mainline in revision 52.
  • Revision ID: duncan@canonical.com-20091122022042-4zi231hxni1z53xd
* Updated the LICENSE file with copyright information.
* Updated the README with license information.

Show diffs side-by-side

added added

removed removed

Lines of Context:
1
 
# Copyright (C) 2008 Tristan Seligmann <mithrandi@mithrandi.net>
2
 
# Copyright (C) 2009 Canonical Ltd
3
 
# Copyright (C) 2009 Duncan McGreggor <oubiwann@adytum.us>
4
 
# Copyright (C) 2012 New Dream Network (DreamHost)
5
 
# Licenced under the txaws licence available at /LICENSE in the txaws source.
6
 
 
7
1
"""
8
2
Client wrapper for Amazon's Simple Storage Service.
9
3
 
12
6
Various API-incompatible changes are planned in order to expose missing
13
7
functionality in this wrapper.
14
8
"""
15
 
import mimetypes
16
 
 
 
9
 
 
10
from epsilon.extime import Time
 
11
 
 
12
from twisted.web.client import getPage
17
13
from twisted.web.http import datetimeToString
18
14
 
19
 
from dateutil.parser import parse as parseTime
20
 
 
21
 
from txaws.client.base import BaseClient, BaseQuery, error_wrapper
22
 
from txaws.s3.acls import AccessControlPolicy
23
 
from txaws.s3.model import (
24
 
    Bucket, BucketItem, BucketListing, ItemOwner, LifecycleConfiguration,
25
 
    LifecycleConfigurationRule, NotificationConfiguration, RequestPayment,
26
 
    VersioningConfiguration, WebsiteConfiguration, MultipartInitiationResponse,
27
 
    MultipartCompletionResponse)
28
 
from txaws.s3.exception import S3Error
29
 
from txaws.service import AWSServiceEndpoint, S3_ENDPOINT
 
15
from txaws.service import AWSServiceEndpoint
30
16
from txaws.util import XML, calculate_md5
31
17
 
32
18
 
33
 
def s3_error_wrapper(error):
34
 
    error_wrapper(error, S3Error)
35
 
 
36
 
 
37
 
class URLContext(object):
38
 
    """
39
 
    The hosts and the paths that form an S3 endpoint change depending upon the
40
 
    context in which they are called.  While S3 supports bucket names in the
41
 
    host name, we use the convention of providing it in the path so that
42
 
    using IP addresses and alternative implementations of S3 actually works
43
 
    (e.g. Walrus).
44
 
    """
45
 
    def __init__(self, service_endpoint, bucket="", object_name=""):
46
 
        self.endpoint = service_endpoint
47
 
        self.bucket = bucket
48
 
        self.object_name = object_name
49
 
 
50
 
    def get_host(self):
51
 
        return self.endpoint.get_host()
52
 
 
53
 
    def get_path(self):
54
 
        path = "/"
55
 
        if self.bucket is not None:
56
 
            path += self.bucket
57
 
        if self.bucket is not None and self.object_name:
58
 
            if not self.object_name.startswith("/"):
59
 
                path += "/"
60
 
            path += self.object_name
61
 
        elif self.bucket is not None and not path.endswith("/"):
62
 
            path += "/"
63
 
        return path
64
 
 
65
 
    def get_url(self):
66
 
        if self.endpoint.port is not None:
67
 
            return "%s://%s:%d%s" % (
68
 
                self.endpoint.scheme, self.get_host(), self.endpoint.port,
69
 
                self.get_path())
70
 
        else:
71
 
            return "%s://%s%s" % (
72
 
                self.endpoint.scheme, self.get_host(), self.get_path())
73
 
 
74
 
 
75
 
class S3Client(BaseClient):
76
 
    """A client for S3."""
77
 
 
78
 
    def __init__(self, creds=None, endpoint=None, query_factory=None,
79
 
                 receiver_factory=None):
80
 
        if query_factory is None:
81
 
            query_factory = Query
82
 
        super(S3Client, self).__init__(creds, endpoint, query_factory,
83
 
                                       receiver_factory=receiver_factory)
84
 
 
85
 
    def list_buckets(self):
86
 
        """
87
 
        List all buckets.
88
 
 
89
 
        Returns a list of all the buckets owned by the authenticated sender of
90
 
        the request.
91
 
        """
92
 
        query = self.query_factory(
93
 
            action="GET", creds=self.creds, endpoint=self.endpoint,
94
 
            receiver_factory=self.receiver_factory)
95
 
        d = query.submit()
96
 
        return d.addCallback(self._parse_list_buckets)
97
 
 
98
 
    def _parse_list_buckets(self, xml_bytes):
99
 
        """
100
 
        Parse XML bucket list response.
101
 
        """
102
 
        root = XML(xml_bytes)
103
 
        buckets = []
104
 
        for bucket_data in root.find("Buckets"):
105
 
            name = bucket_data.findtext("Name")
106
 
            date_text = bucket_data.findtext("CreationDate")
107
 
            date_time = parseTime(date_text)
108
 
            bucket = Bucket(name, date_time)
109
 
            buckets.append(bucket)
110
 
        return buckets
111
 
 
112
 
    def create_bucket(self, bucket):
113
 
        """
114
 
        Create a new bucket.
115
 
        """
116
 
        query = self.query_factory(
117
 
            action="PUT", creds=self.creds, endpoint=self.endpoint,
118
 
            bucket=bucket)
119
 
        return query.submit()
120
 
 
121
 
    def delete_bucket(self, bucket):
122
 
        """
123
 
        Delete a bucket.
124
 
 
125
 
        The bucket must be empty before it can be deleted.
126
 
        """
127
 
        query = self.query_factory(
128
 
            action="DELETE", creds=self.creds, endpoint=self.endpoint,
129
 
            bucket=bucket)
130
 
        return query.submit()
131
 
 
132
 
    def get_bucket(self, bucket):
133
 
        """
134
 
        Get a list of all the objects in a bucket.
135
 
        """
136
 
        query = self.query_factory(
137
 
            action="GET", creds=self.creds, endpoint=self.endpoint,
138
 
            bucket=bucket, receiver_factory=self.receiver_factory)
139
 
        d = query.submit()
140
 
        return d.addCallback(self._parse_get_bucket)
141
 
 
142
 
    def _parse_get_bucket(self, xml_bytes):
143
 
        root = XML(xml_bytes)
144
 
        name = root.findtext("Name")
145
 
        prefix = root.findtext("Prefix")
146
 
        marker = root.findtext("Marker")
147
 
        max_keys = root.findtext("MaxKeys")
148
 
        is_truncated = root.findtext("IsTruncated")
149
 
        contents = []
150
 
 
151
 
        for content_data in root.findall("Contents"):
152
 
            key = content_data.findtext("Key")
153
 
            date_text = content_data.findtext("LastModified")
154
 
            modification_date = parseTime(date_text)
155
 
            etag = content_data.findtext("ETag")
156
 
            size = content_data.findtext("Size")
157
 
            storage_class = content_data.findtext("StorageClass")
158
 
            owner_id = content_data.findtext("Owner/ID")
159
 
            owner_display_name = content_data.findtext("Owner/DisplayName")
160
 
            owner = ItemOwner(owner_id, owner_display_name)
161
 
            content_item = BucketItem(key, modification_date, etag, size,
162
 
                                      storage_class, owner)
163
 
            contents.append(content_item)
164
 
 
165
 
        common_prefixes = []
166
 
        for prefix_data in root.findall("CommonPrefixes"):
167
 
            common_prefixes.append(prefix_data.text)
168
 
 
169
 
        return BucketListing(name, prefix, marker, max_keys, is_truncated,
170
 
                             contents, common_prefixes)
171
 
 
172
 
    def get_bucket_location(self, bucket):
173
 
        """
174
 
        Get the location (region) of a bucket.
175
 
 
176
 
        @param bucket: The name of the bucket.
177
 
        @return: A C{Deferred} that will fire with the bucket's region.
178
 
        """
179
 
        query = self.query_factory(action="GET", creds=self.creds,
180
 
                                   endpoint=self.endpoint, bucket=bucket,
181
 
                                   object_name="?location",
182
 
                                   receiver_factory=self.receiver_factory)
183
 
        d = query.submit()
184
 
        return d.addCallback(self._parse_bucket_location)
185
 
 
186
 
    def _parse_bucket_location(self, xml_bytes):
187
 
        """Parse a C{LocationConstraint} XML document."""
188
 
        root = XML(xml_bytes)
189
 
        return root.text or ""
190
 
 
191
 
    def get_bucket_lifecycle(self, bucket):
192
 
        """
193
 
        Get the lifecycle configuration of a bucket.
194
 
 
195
 
        @param bucket: The name of the bucket.
196
 
        @return: A C{Deferred} that will fire with the bucket's lifecycle
197
 
        configuration.
198
 
        """
199
 
        query = self.query_factory(
200
 
            action='GET', creds=self.creds, endpoint=self.endpoint,
201
 
            bucket=bucket, object_name='?lifecycle',
202
 
            receiver_factory=self.receiver_factory)
203
 
        return query.submit().addCallback(self._parse_lifecycle_config)
204
 
 
205
 
    def _parse_lifecycle_config(self, xml_bytes):
206
 
        """Parse a C{LifecycleConfiguration} XML document."""
207
 
        root = XML(xml_bytes)
208
 
        rules = []
209
 
 
210
 
        for content_data in root.findall("Rule"):
211
 
            id = content_data.findtext("ID")
212
 
            prefix = content_data.findtext("Prefix")
213
 
            status = content_data.findtext("Status")
214
 
            expiration = int(content_data.findtext("Expiration/Days"))
215
 
            rules.append(
216
 
                LifecycleConfigurationRule(id, prefix, status, expiration))
217
 
 
218
 
        return LifecycleConfiguration(rules)
219
 
 
220
 
    def get_bucket_website_config(self, bucket):
221
 
        """
222
 
        Get the website configuration of a bucket.
223
 
 
224
 
        @param bucket: The name of the bucket.
225
 
        @return: A C{Deferred} that will fire with the bucket's website
226
 
        configuration.
227
 
        """
228
 
        query = self.query_factory(
229
 
            action='GET', creds=self.creds, endpoint=self.endpoint,
230
 
            bucket=bucket, object_name='?website',
231
 
            receiver_factory=self.receiver_factory)
232
 
        return query.submit().addCallback(self._parse_website_config)
233
 
 
234
 
    def _parse_website_config(self, xml_bytes):
235
 
        """Parse a C{WebsiteConfiguration} XML document."""
236
 
        root = XML(xml_bytes)
237
 
        index_suffix = root.findtext("IndexDocument/Suffix")
238
 
        error_key = root.findtext("ErrorDocument/Key")
239
 
 
240
 
        return WebsiteConfiguration(index_suffix, error_key)
241
 
 
242
 
    def get_bucket_notification_config(self, bucket):
243
 
        """
244
 
        Get the notification configuration of a bucket.
245
 
 
246
 
        @param bucket: The name of the bucket.
247
 
        @return: A C{Deferred} that will request the bucket's notification
248
 
        configuration.
249
 
        """
250
 
        query = self.query_factory(
251
 
            action='GET', creds=self.creds, endpoint=self.endpoint,
252
 
            bucket=bucket, object_name='?notification',
253
 
            receiver_factory=self.receiver_factory)
254
 
        return query.submit().addCallback(self._parse_notification_config)
255
 
 
256
 
    def _parse_notification_config(self, xml_bytes):
257
 
        """Parse a C{NotificationConfiguration} XML document."""
258
 
        root = XML(xml_bytes)
259
 
        topic = root.findtext("TopicConfiguration/Topic")
260
 
        event = root.findtext("TopicConfiguration/Event")
261
 
 
262
 
        return NotificationConfiguration(topic, event)
263
 
 
264
 
    def get_bucket_versioning_config(self, bucket):
265
 
        """
266
 
        Get the versioning configuration of a bucket.
267
 
 
268
 
        @param bucket: The name of the bucket.  @return: A C{Deferred} that
269
 
        will request the bucket's versioning configuration.
270
 
        """
271
 
        query = self.query_factory(
272
 
            action='GET', creds=self.creds, endpoint=self.endpoint,
273
 
            bucket=bucket, object_name='?versioning',
274
 
            receiver_factory=self.receiver_factory)
275
 
        return query.submit().addCallback(self._parse_versioning_config)
276
 
 
277
 
    def _parse_versioning_config(self, xml_bytes):
278
 
        """Parse a C{VersioningConfiguration} XML document."""
279
 
        root = XML(xml_bytes)
280
 
        mfa_delete = root.findtext("MfaDelete")
281
 
        status = root.findtext("Status")
282
 
 
283
 
        return VersioningConfiguration(mfa_delete=mfa_delete, status=status)
284
 
 
285
 
    def get_bucket_acl(self, bucket):
286
 
        """
287
 
        Get the access control policy for a bucket.
288
 
        """
289
 
        query = self.query_factory(
290
 
            action='GET', creds=self.creds, endpoint=self.endpoint,
291
 
            bucket=bucket, object_name='?acl',
292
 
            receiver_factory=self.receiver_factory)
293
 
        return query.submit().addCallback(self._parse_acl)
294
 
 
295
 
    def put_bucket_acl(self, bucket, access_control_policy):
296
 
        """
297
 
        Set access control policy on a bucket.
298
 
        """
299
 
        data = access_control_policy.to_xml()
300
 
        query = self.query_factory(
301
 
            action='PUT', creds=self.creds, endpoint=self.endpoint,
302
 
            bucket=bucket, object_name='?acl', data=data,
303
 
            receiver_factory=self.receiver_factory)
304
 
        return query.submit().addCallback(self._parse_acl)
305
 
 
306
 
    def _parse_acl(self, xml_bytes):
307
 
        """
308
 
        Parse an C{AccessControlPolicy} XML document and convert it into an
309
 
        L{AccessControlPolicy} instance.
310
 
        """
311
 
        return AccessControlPolicy.from_xml(xml_bytes)
312
 
 
313
 
    def put_object(self, bucket, object_name, data=None, content_type=None,
314
 
                   metadata={}, amz_headers={}, body_producer=None):
315
 
        """
316
 
        Put an object in a bucket.
317
 
 
318
 
        An existing object with the same name will be replaced.
319
 
 
320
 
        @param bucket: The name of the bucket.
321
 
        @param object: The name of the object.
322
 
        @param data: The data to write.
323
 
        @param content_type: The type of data being written.
324
 
        @param metadata: A C{dict} used to build C{x-amz-meta-*} headers.
325
 
        @param amz_headers: A C{dict} used to build C{x-amz-*} headers.
326
 
        @return: A C{Deferred} that will fire with the result of request.
327
 
        """
328
 
        query = self.query_factory(
329
 
            action="PUT", creds=self.creds, endpoint=self.endpoint,
330
 
            bucket=bucket, object_name=object_name, data=data,
331
 
            content_type=content_type, metadata=metadata,
332
 
            amz_headers=amz_headers, body_producer=body_producer,
333
 
            receiver_factory=self.receiver_factory)
334
 
        return query.submit()
335
 
 
336
 
    def copy_object(self, source_bucket, source_object_name, dest_bucket=None,
337
 
                    dest_object_name=None, metadata={}, amz_headers={}):
338
 
        """
339
 
        Copy an object stored in S3 from a source bucket to a destination
340
 
        bucket.
341
 
 
342
 
        @param source_bucket: The S3 bucket to copy the object from.
343
 
        @param source_object_name: The name of the object to copy.
344
 
        @param dest_bucket: Optionally, the S3 bucket to copy the object to.
345
 
            Defaults to C{source_bucket}.
346
 
        @param dest_object_name: Optionally, the name of the new object.
347
 
            Defaults to C{source_object_name}.
348
 
        @param metadata: A C{dict} used to build C{x-amz-meta-*} headers.
349
 
        @param amz_headers: A C{dict} used to build C{x-amz-*} headers.
350
 
        @return: A C{Deferred} that will fire with the result of request.
351
 
        """
352
 
        dest_bucket = dest_bucket or source_bucket
353
 
        dest_object_name = dest_object_name or source_object_name
354
 
        amz_headers["copy-source"] = "/%s/%s" % (source_bucket,
355
 
                                                 source_object_name)
356
 
        query = self.query_factory(
357
 
            action="PUT", creds=self.creds, endpoint=self.endpoint,
358
 
            bucket=dest_bucket, object_name=dest_object_name,
359
 
            metadata=metadata, amz_headers=amz_headers,
360
 
            receiver_factory=self.receiver_factory)
361
 
        return query.submit()
362
 
 
363
 
    def get_object(self, bucket, object_name):
364
 
        """
365
 
        Get an object from a bucket.
366
 
        """
367
 
        query = self.query_factory(
368
 
            action="GET", creds=self.creds, endpoint=self.endpoint,
369
 
            bucket=bucket, object_name=object_name,
370
 
            receiver_factory=self.receiver_factory)
371
 
        return query.submit()
372
 
 
373
 
    def head_object(self, bucket, object_name):
374
 
        """
375
 
        Retrieve object metadata only.
376
 
        """
377
 
        query = self.query_factory(
378
 
            action="HEAD", creds=self.creds, endpoint=self.endpoint,
379
 
            bucket=bucket, object_name=object_name)
380
 
        d = query.submit()
381
 
        return d.addCallback(query.get_response_headers)
382
 
 
383
 
    def delete_object(self, bucket, object_name):
384
 
        """
385
 
        Delete an object from a bucket.
386
 
 
387
 
        Once deleted, there is no method to restore or undelete an object.
388
 
        """
389
 
        query = self.query_factory(
390
 
            action="DELETE", creds=self.creds, endpoint=self.endpoint,
391
 
            bucket=bucket, object_name=object_name)
392
 
        return query.submit()
393
 
 
394
 
    def put_object_acl(self, bucket, object_name, access_control_policy):
395
 
        """
396
 
        Set access control policy on an object.
397
 
        """
398
 
        data = access_control_policy.to_xml()
399
 
        query = self.query_factory(
400
 
            action='PUT', creds=self.creds, endpoint=self.endpoint,
401
 
            bucket=bucket, object_name='%s?acl' % object_name, data=data,
402
 
            receiver_factory=self.receiver_factory)
403
 
        return query.submit().addCallback(self._parse_acl)
404
 
 
405
 
    def get_object_acl(self, bucket, object_name):
406
 
        """
407
 
        Get the access control policy for an object.
408
 
        """
409
 
        query = self.query_factory(
410
 
            action='GET', creds=self.creds, endpoint=self.endpoint,
411
 
            bucket=bucket, object_name='%s?acl' % object_name,
412
 
            receiver_factory=self.receiver_factory)
413
 
        return query.submit().addCallback(self._parse_acl)
414
 
 
415
 
    def put_request_payment(self, bucket, payer):
416
 
        """
417
 
        Set request payment configuration on bucket to payer.
418
 
 
419
 
        @param bucket: The name of the bucket.
420
 
        @param payer: The name of the payer.
421
 
        @return: A C{Deferred} that will fire with the result of the request.
422
 
        """
423
 
        data = RequestPayment(payer).to_xml()
424
 
        query = self.query_factory(
425
 
            action="PUT", creds=self.creds, endpoint=self.endpoint,
426
 
            bucket=bucket, object_name="?requestPayment", data=data,
427
 
            receiver_factory=self.receiver_factory)
428
 
        return query.submit()
429
 
 
430
 
    def get_request_payment(self, bucket):
431
 
        """
432
 
        Get the request payment configuration on a bucket.
433
 
 
434
 
        @param bucket: The name of the bucket.
435
 
        @return: A C{Deferred} that will fire with the name of the payer.
436
 
        """
437
 
        query = self.query_factory(
438
 
            action="GET", creds=self.creds, endpoint=self.endpoint,
439
 
            bucket=bucket, object_name="?requestPayment",
440
 
            receiver_factory=self.receiver_factory)
441
 
        return query.submit().addCallback(self._parse_get_request_payment)
442
 
 
443
 
    def _parse_get_request_payment(self, xml_bytes):
444
 
        """
445
 
        Parse a C{RequestPaymentConfiguration} XML document and extract the
446
 
        payer.
447
 
        """
448
 
        return RequestPayment.from_xml(xml_bytes).payer
449
 
 
450
 
    def init_multipart_upload(self, bucket, object_name, content_type=None,
451
 
                              amz_headers={}, metadata={}):
452
 
        """
453
 
        Initiate a multipart upload to a bucket.
454
 
 
455
 
        @param bucket: The name of the bucket
456
 
        @param object_name: The object name
457
 
        @param content_type: The Content-Type for the object
458
 
        @param metadata: C{dict} containing additional metadata
459
 
        @param amz_headers: A C{dict} used to build C{x-amz-*} headers.
460
 
        @return: C{str} upload_id
461
 
        """
462
 
        objectname_plus = '%s?uploads' % object_name
463
 
        query = self.query_factory(
464
 
            action="POST", creds=self.creds, endpoint=self.endpoint,
465
 
            bucket=bucket, object_name=objectname_plus, data='',
466
 
            content_type=content_type, amz_headers=amz_headers,
467
 
            metadata=metadata)
468
 
        d = query.submit()
469
 
        return d.addCallback(MultipartInitiationResponse.from_xml)
470
 
 
471
 
    def upload_part(self, bucket, object_name, upload_id, part_number,
472
 
                    data=None, content_type=None, metadata={},
473
 
                    body_producer=None):
474
 
        """
475
 
        Upload a part of data corresponding to a multipart upload.
476
 
 
477
 
        @param bucket: The bucket name
478
 
        @param object_name: The object name
479
 
        @param upload_id: The multipart upload id
480
 
        @param part_number: The part number
481
 
        @param data: Data (optional, requires body_producer if not specified)
482
 
        @param content_type: The Content-Type
483
 
        @param metadata: Additional metadata
484
 
        @param body_producer: an C{IBodyProducer} (optional, requires data if
485
 
            not specified)
486
 
        @return: the C{Deferred} from underlying query.submit() call
487
 
        """
488
 
        parms = 'partNumber=%s&uploadId=%s' % (str(part_number), upload_id)
489
 
        objectname_plus = '%s?%s' % (object_name, parms)
490
 
        query = self.query_factory(
491
 
            action="PUT", creds=self.creds, endpoint=self.endpoint,
492
 
            bucket=bucket, object_name=objectname_plus, data=data,
493
 
            content_type=content_type, metadata=metadata,
494
 
            body_producer=body_producer, receiver_factory=self.receiver_factory)
495
 
        d = query.submit()
496
 
        return d.addCallback(query.get_response_headers)
497
 
 
498
 
    def complete_multipart_upload(self, bucket, object_name, upload_id,
499
 
                                  parts_list, content_type=None, metadata={}):
500
 
        """
501
 
        Complete a multipart upload.
502
 
 
503
 
        N.B. This can be possibly be a slow operation.
504
 
 
505
 
        @param bucket: The bucket name
506
 
        @param object_name: The object name
507
 
        @param upload_id: The multipart upload id
508
 
        @param parts_list: A List of all the parts
509
 
            (2-tuples of part sequence number and etag)
510
 
        @param content_type: The Content-Type of the object
511
 
        @param metadata: C{dict} containing additional metadata
512
 
        @return: a C{Deferred} that fires after request is complete
513
 
        """
514
 
        data = self._build_complete_multipart_upload_xml(parts_list)
515
 
        objectname_plus = '%s?uploadId=%s' % (object_name, upload_id)
516
 
        query = self.query_factory(
517
 
            action="POST", creds=self.creds, endpoint=self.endpoint,
518
 
            bucket=bucket, object_name=objectname_plus, data=data,
519
 
            content_type=content_type, metadata=metadata)
520
 
        d = query.submit()
521
 
        # TODO - handle error responses
522
 
        return d.addCallback(MultipartCompletionResponse.from_xml)
523
 
 
524
 
    def _build_complete_multipart_upload_xml(self, parts_list):
525
 
        xml = []
526
 
        parts_list.sort(key=lambda p: int(p[0]))
527
 
        xml.append('<CompleteMultipartUpload>')
528
 
        for pt in parts_list:
529
 
            xml.append('<Part>')
530
 
            xml.append('<PartNumber>%s</PartNumber>' % pt[0])
531
 
            xml.append('<ETag>%s</ETag>' % pt[1])
532
 
            xml.append('</Part>')
533
 
        xml.append('</CompleteMultipartUpload>')
534
 
        return '\n'.join(xml)
535
 
 
536
 
 
537
 
class Query(BaseQuery):
538
 
    """A query for submission to the S3 service."""
539
 
 
540
 
    def __init__(self, bucket=None, object_name=None, data="",
541
 
                 content_type=None, metadata={}, amz_headers={},
542
 
                 body_producer=None, *args, **kwargs):
543
 
        super(Query, self).__init__(*args, **kwargs)
 
19
class S3Request(object):
 
20
 
 
21
    def __init__(self, verb, bucket=None, object_name=None, data="",
 
22
                 content_type=None, metadata={}, creds=None, endpoint=None):
 
23
        self.verb = verb
544
24
        self.bucket = bucket
545
25
        self.object_name = object_name
546
26
        self.data = data
547
 
        self.body_producer = body_producer
548
27
        self.content_type = content_type
549
28
        self.metadata = metadata
550
 
        self.amz_headers = amz_headers
 
29
        self.creds = creds
 
30
        self.endpoint = endpoint
551
31
        self.date = datetimeToString()
552
 
        if not self.endpoint or not self.endpoint.host:
553
 
            self.endpoint = AWSServiceEndpoint(S3_ENDPOINT)
554
 
        self.endpoint.set_method(self.action)
555
 
 
556
 
    def set_content_type(self):
557
 
        """
558
 
        Set the content type based on the file extension used in the object
559
 
        name.
560
 
        """
561
 
        if self.object_name and not self.content_type:
562
 
            # XXX nothing is currently done with the encoding... we may
563
 
            # need to in the future
564
 
            self.content_type, encoding = mimetypes.guess_type(
565
 
                self.object_name, strict=False)
 
32
 
 
33
    def get_path(self):
 
34
        path = "/"
 
35
        if self.bucket is not None:
 
36
            path += self.bucket
 
37
            if self.object_name is not None:
 
38
                path += "/" + self.object_name
 
39
        return path
 
40
 
 
41
    def get_uri(self):
 
42
        if self.endpoint is None:
 
43
            self.endpoint = AWSServiceEndpoint()
 
44
        self.endpoint.set_path(self.get_path())
 
45
        return self.endpoint.get_uri()
566
46
 
567
47
    def get_headers(self):
568
 
        """
569
 
        Build the list of headers needed in order to perform S3 operations.
570
 
        """
571
 
        if self.body_producer:
572
 
            content_length = self.body_producer.length
573
 
        else:
574
 
            content_length = len(self.data)
575
 
        headers = {"Content-Length": content_length,
 
48
        headers = {"Content-Length": len(self.data),
 
49
                   "Content-MD5": calculate_md5(self.data),
576
50
                   "Date": self.date}
577
 
        if self.body_producer is None:
578
 
            headers["Content-MD5"] = calculate_md5(self.data)
 
51
 
579
52
        for key, value in self.metadata.iteritems():
580
53
            headers["x-amz-meta-" + key] = value
581
 
        for key, value in self.amz_headers.iteritems():
582
 
            headers["x-amz-" + key] = value
583
 
        # Before we check if the content type is set, let's see if we can set
584
 
        # it by guessing the the mimetype.
585
 
        self.set_content_type()
 
54
 
586
55
        if self.content_type is not None:
587
56
            headers["Content-Type"] = self.content_type
 
57
 
588
58
        if self.creds is not None:
589
 
            signature = self.sign(headers)
 
59
            signature = self.get_signature(headers)
590
60
            headers["Authorization"] = "AWS %s:%s" % (
591
61
                self.creds.access_key, signature)
592
62
        return headers
593
63
 
 
64
    def get_canonicalized_resource(self):
 
65
        return self.get_path()
 
66
 
594
67
    def get_canonicalized_amz_headers(self, headers):
595
 
        """
596
 
        Get the headers defined by Amazon S3.
597
 
        """
598
 
        headers = [
599
 
            (name.lower(), value) for name, value in headers.iteritems()
 
68
        result = ""
 
69
        headers = [(name.lower(), value) for name, value in headers.iteritems()
600
70
            if name.lower().startswith("x-amz-")]
601
71
        headers.sort()
602
 
        # XXX missing spec implementation:
603
 
        # 1) txAWS doesn't currently combine headers with the same name
604
 
        # 2) txAWS doesn't currently unfold long headers
605
72
        return "".join("%s:%s\n" % (name, value) for name, value in headers)
606
73
 
607
 
    def get_canonicalized_resource(self):
608
 
        """
609
 
        Get an S3 resource path.
610
 
        """
611
 
        path = "/"
612
 
        if self.bucket is not None:
613
 
            path += self.bucket
614
 
        if self.bucket is not None and self.object_name:
615
 
            if not self.object_name.startswith("/"):
616
 
                path += "/"
617
 
            path += self.object_name
618
 
        elif self.bucket is not None and not path.endswith("/"):
619
 
            path += "/"
620
 
        return path
621
 
 
622
 
    def sign(self, headers):
623
 
        """Sign this query using its built in credentials."""
624
 
        text = (self.action + "\n" +
 
74
    def get_signature(self, headers):
 
75
        text = (self.verb + "\n" + 
625
76
                headers.get("Content-MD5", "") + "\n" +
626
77
                headers.get("Content-Type", "") + "\n" +
627
78
                headers.get("Date", "") + "\n" +
628
79
                self.get_canonicalized_amz_headers(headers) +
629
80
                self.get_canonicalized_resource())
630
 
        return self.creds.sign(text, hash_type="sha1")
631
 
 
632
 
    def submit(self, url_context=None):
633
 
        """Submit this query.
634
 
 
635
 
        @return: A deferred from get_page
636
 
        """
637
 
        if not url_context:
638
 
            url_context = URLContext(
639
 
                self.endpoint, self.bucket, self.object_name)
640
 
        d = self.get_page(
641
 
            url_context.get_url(), method=self.action, postdata=self.data,
642
 
            headers=self.get_headers(), body_producer=self.body_producer,
643
 
            receiver_factory=self.receiver_factory)
644
 
        return d.addErrback(s3_error_wrapper)
 
81
        return self.creds.sign(text)
 
82
 
 
83
    def submit(self):
 
84
        return self.get_page(url=self.get_uri(), method=self.verb,
 
85
                             postdata=self.data, headers=self.get_headers())
 
86
 
 
87
    def get_page(self, *a, **kw):
 
88
        return getPage(*a, **kw)
 
89
 
 
90
 
 
91
class S3(object):
 
92
 
 
93
    request_factory = S3Request
 
94
 
 
95
    def __init__(self, creds, endpoint):
 
96
        self.creds = creds
 
97
        self.endpoint = endpoint
 
98
 
 
99
    def make_request(self, *a, **kw):
 
100
        """
 
101
        Create a request with the arguments passed in.
 
102
 
 
103
        This uses the request_factory attribute, adding the creds and endpoint
 
104
        to the arguments passed in.
 
105
        """
 
106
        return self.request_factory(creds=self.creds, endpoint=self.endpoint,
 
107
                                    *a, **kw)
 
108
 
 
109
    def _parse_bucket_list(self, response):
 
110
        """
 
111
        Parse XML bucket list response.
 
112
        """
 
113
        root = XML(response)
 
114
        for bucket in root.find("Buckets"):
 
115
            timeText = bucket.findtext("CreationDate")
 
116
            yield {
 
117
                "name": bucket.findtext("Name"),
 
118
                "created": Time.fromISO8601TimeAndDate(timeText),
 
119
                }
 
120
 
 
121
    def list_buckets(self):
 
122
        """
 
123
        List all buckets.
 
124
 
 
125
        Returns a list of all the buckets owned by the authenticated sender of
 
126
        the request.
 
127
        """
 
128
        deferred = self.make_request("GET").submit()
 
129
        deferred.addCallback(self._parse_bucket_list)
 
130
        return deferred
 
131
 
 
132
    def create_bucket(self, bucket):
 
133
        """
 
134
        Create a new bucket.
 
135
        """
 
136
        return self.make_request("PUT", bucket).submit()
 
137
 
 
138
    def delete_bucket(self, bucket):
 
139
        """
 
140
        Delete a bucket.
 
141
 
 
142
        The bucket must be empty before it can be deleted.
 
143
        """
 
144
        return self.make_request("DELETE", bucket).submit()
 
145
 
 
146
    def put_object(self, bucket, object_name, data, content_type=None,
 
147
                   metadata={}):
 
148
        """
 
149
        Put an object in a bucket.
 
150
 
 
151
        Any existing object of the same name will be replaced.
 
152
        """
 
153
        return self.make_request("PUT", bucket, object_name, data,
 
154
                                 content_type, metadata).submit()
 
155
 
 
156
    def get_object(self, bucket, object_name):
 
157
        """
 
158
        Get an object from a bucket.
 
159
        """
 
160
        return self.make_request("GET", bucket, object_name).submit()
 
161
 
 
162
    def head_object(self, bucket, object_name):
 
163
        """
 
164
        Retrieve object metadata only.
 
165
 
 
166
        This is like get_object, but the object's content is not retrieved.
 
167
        Currently the metadata is not returned to the caller either, so this
 
168
        method is mostly useless, and only provided for completeness.
 
169
        """
 
170
        return self.make_request("HEAD", bucket, object_name).submit()
 
171
 
 
172
    def delete_object(self, bucket, object_name):
 
173
        """
 
174
        Delete an object from a bucket.
 
175
 
 
176
        Once deleted, there is no method to restore or undelete an object.
 
177
        """
 
178
        return self.make_request("DELETE", bucket, object_name).submit()