3
# Copyright 2009 Facebook
5
# Licensed under the Apache License, Version 2.0 (the "License"); you may
6
# not use this file except in compliance with the License. You may obtain
7
# a copy of the License at
9
# http://www.apache.org/licenses/LICENSE-2.0
11
# Unless required by applicable law or agreed to in writing, software
12
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
13
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
14
# License for the specific language governing permissions and limitations
17
"""Implementation of an S3-like storage server based on local files.
19
Useful to test features that will eventually run on S3, or if you want to
20
run something locally that was once running on S3.
22
We don't support all the features of S3, but it does work with the
23
standard S3 client for the most basic semantics. To use the standard
24
S3 client with this module:
26
c = S3.AWSAuthConnection("", "", server="localhost", port=8888,
28
c.create_bucket("mybucket")
29
c.put("mybucket", "mykey", "a value")
30
print c.get("mybucket", "mykey").body
46
def start(port, root_directory="/tmp/s3", bucket_depth=0):
47
"""Starts the mock S3 server on the given port at the given path."""
48
application = S3Application(root_directory, bucket_depth)
49
http_server = httpserver.HTTPServer(application)
50
http_server.listen(port)
51
ioloop.IOLoop.instance().start()
54
class S3Application(web.Application):
55
"""Implementation of an S3-like storage server based on local files.
57
If bucket depth is given, we break files up into multiple directories
58
to prevent hitting file system limits for number of files in each
59
directories. 1 means one level of directories, 2 means 2, etc.
61
def __init__(self, root_directory, bucket_depth=0):
62
web.Application.__init__(self, [
64
(r"/([^/]+)/(.+)", ObjectHandler),
65
(r"/([^/]+)/", BucketHandler),
67
self.directory = os.path.abspath(root_directory)
68
if not os.path.exists(self.directory):
69
os.makedirs(self.directory)
70
self.bucket_depth = bucket_depth
73
class BaseRequestHandler(web.RequestHandler):
74
SUPPORTED_METHODS = ("PUT", "GET", "DELETE")
76
def render_xml(self, value):
77
assert isinstance(value, dict) and len(value) == 1
78
self.set_header("Content-Type", "application/xml; charset=UTF-8")
79
name = value.keys()[0]
81
parts.append('<' + escape.utf8(name) +
82
' xmlns="http://doc.s3.amazonaws.com/2006-03-01">')
83
self._render_parts(value.values()[0], parts)
84
parts.append('</' + escape.utf8(name) + '>')
85
self.finish('<?xml version="1.0" encoding="UTF-8"?>\n' +
88
def _render_parts(self, value, parts=[]):
89
if isinstance(value, basestring):
90
parts.append(escape.xhtml_escape(value))
91
elif isinstance(value, int) or isinstance(value, long):
92
parts.append(str(value))
93
elif isinstance(value, datetime.datetime):
94
parts.append(value.strftime("%Y-%m-%dT%H:%M:%S.000Z"))
95
elif isinstance(value, dict):
96
for name, subvalue in value.iteritems():
97
if not isinstance(subvalue, list):
99
for subsubvalue in subvalue:
100
parts.append('<' + escape.utf8(name) + '>')
101
self._render_parts(subsubvalue, parts)
102
parts.append('</' + escape.utf8(name) + '>')
104
raise Exception("Unknown S3 value type %r", value)
106
def _object_path(self, bucket, object_name):
107
if self.application.bucket_depth < 1:
108
return os.path.abspath(os.path.join(
109
self.application.directory, bucket, object_name))
110
hash = hashlib.md5(object_name).hexdigest()
111
path = os.path.abspath(os.path.join(
112
self.application.directory, bucket))
113
for i in range(self.application.bucket_depth):
114
path = os.path.join(path, hash[:2 * (i + 1)])
115
return os.path.join(path, object_name)
118
class RootHandler(BaseRequestHandler):
120
names = os.listdir(self.application.directory)
123
path = os.path.join(self.application.directory, name)
127
"CreationDate": datetime.datetime.utcfromtimestamp(
130
self.render_xml({"ListAllMyBucketsResult": {
131
"Buckets": {"Bucket": buckets},
135
class BucketHandler(BaseRequestHandler):
136
def get(self, bucket_name):
137
prefix = self.get_argument("prefix", u"")
138
marker = self.get_argument("marker", u"")
139
max_keys = int(self.get_argument("max-keys", 50000))
140
path = os.path.abspath(os.path.join(self.application.directory,
142
terse = int(self.get_argument("terse", 0))
143
if not path.startswith(self.application.directory) or \
144
not os.path.isdir(path):
145
raise web.HTTPError(404)
147
for root, dirs, files in os.walk(path):
148
for file_name in files:
149
object_names.append(os.path.join(root, file_name))
151
for i in range(self.application.bucket_depth):
152
skip += 2 * (i + 1) + 1
153
object_names = [n[skip:] for n in object_names]
159
start_pos = bisect.bisect_right(object_names, marker, start_pos)
161
start_pos = bisect.bisect_left(object_names, prefix, start_pos)
164
for object_name in object_names[start_pos:]:
165
if not object_name.startswith(prefix):
167
if len(contents) >= max_keys:
170
object_path = self._object_path(bucket_name, object_name)
171
c = {"Key": object_name}
173
info = os.stat(object_path)
175
"LastModified": datetime.datetime.utcfromtimestamp(
177
"Size": info.st_size,
181
self.render_xml({"ListBucketResult": {
186
"IsTruncated": truncated,
187
"Contents": contents,
190
def put(self, bucket_name):
191
path = os.path.abspath(os.path.join(
192
self.application.directory, bucket_name))
193
if not path.startswith(self.application.directory) or \
194
os.path.exists(path):
195
raise web.HTTPError(403)
199
def delete(self, bucket_name):
200
path = os.path.abspath(os.path.join(
201
self.application.directory, bucket_name))
202
if not path.startswith(self.application.directory) or \
203
not os.path.isdir(path):
204
raise web.HTTPError(404)
205
if len(os.listdir(path)) > 0:
206
raise web.HTTPError(403)
212
class ObjectHandler(BaseRequestHandler):
213
def get(self, bucket, object_name):
214
object_name = urllib.unquote(object_name)
215
path = self._object_path(bucket, object_name)
216
if not path.startswith(self.application.directory) or \
217
not os.path.isfile(path):
218
raise web.HTTPError(404)
220
self.set_header("Content-Type", "application/unknown")
221
self.set_header("Last-Modified", datetime.datetime.utcfromtimestamp(
223
object_file = open(path, "r")
225
self.finish(object_file.read())
229
def put(self, bucket, object_name):
230
object_name = urllib.unquote(object_name)
231
bucket_dir = os.path.abspath(os.path.join(
232
self.application.directory, bucket))
233
if not bucket_dir.startswith(self.application.directory) or \
234
not os.path.isdir(bucket_dir):
235
raise web.HTTPError(404)
236
path = self._object_path(bucket, object_name)
237
if not path.startswith(bucket_dir) or os.path.isdir(path):
238
raise web.HTTPError(403)
239
directory = os.path.dirname(path)
240
if not os.path.exists(directory):
241
os.makedirs(directory)
242
object_file = open(path, "w")
243
object_file.write(self.request.body)
247
def delete(self, bucket, object_name):
248
object_name = urllib.unquote(object_name)
249
path = self._object_path(bucket, object_name)
250
if not path.startswith(self.application.directory) or \
251
not os.path.isfile(path):
252
raise web.HTTPError(404)