81
def _extract_headers(header_text, url):
82
"""Extract the mapping for an rfc2822 header
84
This is a helper function for the test suite and for _pycurl.
85
(urllib already parses the headers for us)
87
In the case that there are multiple headers inside the file,
88
the last one is returned.
90
:param header_text: A string of header information.
91
This expects that the first line of a header will always be HTTP ...
92
:param url: The url we are parsing, so we can raise nice errors
93
:return: mimetools.Message object, which basically acts like a case
94
insensitive dictionary.
97
remaining = header_text
100
raise errors.InvalidHttpResponse(url, 'Empty headers')
103
header_file = StringIO(remaining)
104
first_line = header_file.readline()
105
if not first_line.startswith('HTTP'):
106
if first_header: # The first header *must* start with HTTP
107
raise errors.InvalidHttpResponse(url,
108
'Opening header line did not start with HTTP: %s'
111
break # We are done parsing
113
m = mimetools.Message(header_file)
115
# mimetools.Message parses the first header up to a blank line
116
# So while there is remaining data, it probably means there is
117
# another header to be parsed.
118
# Get rid of any preceeding whitespace, which if it is all whitespace
119
# will get rid of everything.
120
remaining = header_file.read().lstrip()
124
81
class HttpTransportBase(ConnectedTransport, medium.SmartClientMedium):
125
82
"""Base class for http implementations.
175
132
:param relpath: The relative path to the file
177
134
code, response_file = self._get(relpath, None)
135
# FIXME: some callers want an iterable... One step forward, three steps
136
# backwards :-/ And not only an iterable, but an iterable that can be
137
# seeked backwards, so we will never be able to do that. One such
138
# known client is bzrlib.bundle.serializer.v4.get_bundle_reader. At the
139
# time of this writing it's even the only known client -- vila20071203
140
return StringIO(response_file.read())
142
# TODO: Add tests for tail_amount or deprecate it
180
143
def _get(self, relpath, ranges, tail_amount=0):
181
144
"""Get a file, or part of a file.
213
176
# further tries were unsuccessful
214
177
raise exc_info[0], exc_info[1], exc_info[2]
216
def _get_ranges_hinted(self, relpath, ranges):
217
"""Issue a ranged GET request taking server capabilities into account.
219
Depending of the errors returned by the server, we try several GET
220
requests, trying to minimize the data transferred.
222
:param relpath: Path relative to transport base URL
223
:param ranges: None to get the whole file;
224
or a list of _CoalescedOffset to fetch parts of a file.
225
:returns: A file handle containing at least the requested ranges.
232
code, f = self._get(relpath, ranges)
233
except errors.InvalidRange, e:
235
exc_info = sys.exc_info()
236
self._degrade_range_hint(relpath, ranges, exc_info)
240
179
# _coalesce_offsets is a helper for readv, it try to combine ranges without
241
180
# degrading readv performances. _bytes_to_read_before_seek is the value
242
181
# used for the limit parameter and has been tuned for other transports. For
254
193
# By default Apache has a limit of ~400 ranges before replying with a 400
255
194
# Bad Request. So we go underneath that amount to be safe.
256
195
_max_get_ranges = 200
196
# We impose no limit on the range size. But see _pycurl.py for a different
258
200
def _readv(self, relpath, offsets):
259
201
"""Get parts of the file at the given relative path.
274
216
sorted_offsets = sorted(offsets)
275
217
coalesced = self._coalesce_offsets(
276
218
sorted_offsets, limit=self._max_readv_combine,
277
fudge_factor=self._bytes_to_read_before_seek)
219
fudge_factor=self._bytes_to_read_before_seek,
220
max_size=self._get_max_size)
279
222
# Turn it into a list, we will iterate it several times
280
223
coalesced = list(coalesced)
284
227
# Cache the data read, but only until it's been used
286
229
# We will iterate on the data received from the GET requests and
287
# serve the corresponding offsets repecting the initial order. We
230
# serve the corresponding offsets respecting the initial order. We
288
231
# need an offset iterator for that.
289
232
iter_offsets = iter(offsets)
290
233
cur_offset_and_size = iter_offsets.next()
293
for cur_coal, file in self._coalesce_readv(relpath, coalesced):
236
for cur_coal, rfile in self._coalesce_readv(relpath, coalesced):
294
237
# Split the received chunk
295
238
for offset, size in cur_coal.ranges:
296
239
start = cur_coal.start + offset
298
data = file.read(size)
241
data = rfile.read(size)
299
242
data_len = len(data)
300
243
if data_len != size:
301
244
raise errors.ShortReadvError(relpath, start, size,
303
data_map[(start, size)] = data
246
if (start, size) == cur_offset_and_size:
247
# The offset requested are sorted as the coalesced
248
# ones, no need to cache. Win !
249
yield cur_offset_and_size[0], data
250
cur_offset_and_size = iter_offsets.next()
252
# Different sorting. We need to cache.
253
data_map[(start, size)] = data
305
255
# Yield everything we can
306
256
while cur_offset_and_size in data_map:
311
261
yield cur_offset_and_size[0], this_data
312
262
cur_offset_and_size = iter_offsets.next()
314
except (errors.ShortReadvError,errors.InvalidRange), e:
264
except (errors.ShortReadvError, errors.InvalidRange,
265
errors.InvalidHttpRange), e:
315
266
self._degrade_range_hint(relpath, coalesced, sys.exc_info())
316
267
# Some offsets may have been already processed, so we retry
317
268
# only the unsuccessful ones.
321
272
def _coalesce_readv(self, relpath, coalesced):
322
273
"""Issue several GET requests to satisfy the coalesced offsets"""
323
total = len(coalesced)
324
if self._range_hint == 'multi':
325
max_ranges = self._max_get_ranges
326
elif self._range_hint == 'single':
275
def get_and_yield(relpath, coalesced):
277
# Note that the _get below may raise
278
# errors.InvalidHttpRange. It's the caller's responsibility to
279
# decide how to retry since it may provide different coalesced
281
code, rfile = self._get(relpath, coalesced)
282
for coal in coalesced:
285
if self._range_hint is None:
286
# Download whole file
287
for c, rfile in get_and_yield(relpath, coalesced):
329
# The whole file will be downloaded anyway
331
# TODO: Some web servers may ignore the range requests and return the
332
# whole file, we may want to detect that and avoid further requests.
333
# Hint: test_readv_multiple_get_requests will fail in that case .
334
for group in xrange(0, len(coalesced), max_ranges):
335
ranges = coalesced[group:group+max_ranges]
336
# Note that the following may raise errors.InvalidRange. It's the
337
# caller responsability to decide how to retry since it may provide
338
# different coalesced offsets.
339
code, file = self._get(relpath, ranges)
290
total = len(coalesced)
291
if self._range_hint == 'multi':
292
max_ranges = self._max_get_ranges
293
elif self._range_hint == 'single':
296
raise AssertionError("Unknown _range_hint %r"
297
% (self._range_hint,))
298
# TODO: Some web servers may ignore the range requests and return
299
# the whole file, we may want to detect that and avoid further
301
# Hint: test_readv_multiple_get_requests will fail once we do that
304
for coal in coalesced:
305
if ((self._get_max_size > 0
306
and cumul + coal.length > self._get_max_size)
307
or len(ranges) >= max_ranges):
308
# Get that much and yield
309
for c, rfile in get_and_yield(relpath, ranges):
311
# Restart with the current offset
317
# Get the rest and yield
318
for c, rfile in get_and_yield(relpath, ranges):
343
321
def recommended_page_size(self):
344
322
"""See Transport.recommended_page_size().
352
@deprecated_method(zero_seventeen)
353
def offsets_to_ranges(offsets):
354
"""Turn a list of offsets and sizes into a list of byte ranges.
356
:param offsets: A list of tuples of (start, size). An empty list
358
:return: a list of inclusive byte ranges (start, end)
359
Adjacent ranges will be combined.
361
# Make sure we process sorted offsets
362
offsets = sorted(offsets)
367
for start, size in offsets:
368
end = start + size - 1
370
combined.append([start, end])
371
elif start <= prev_end + 1:
372
combined[-1][1] = end
374
combined.append([start, end])
379
329
def _post(self, body_bytes):
380
330
"""POST body_bytes to .bzr/smart on this transport.
490
440
return self.__class__(self.abspath(offset), self)
492
442
def _attempted_range_header(self, offsets, tail_amount):
493
"""Prepare a HTTP Range header at a level the server should accept"""
443
"""Prepare a HTTP Range header at a level the server should accept.
445
:return: the range header representing offsets/tail_amount or None if
446
no header can be built.
495
449
if self._range_hint == 'multi':
496
450
# Generate the header describing all offsets