~ubuntu-branches/ubuntu/hardy/bzr/hardy-proposed

Viewing changes to bzrlib/transport/http/__init__.py

Committer: Bazaar Package Importer
Author(s): Adeodato Simó
Date: 2007-12-14 13:12:06 UTC
mfrom: (1.1.35 upstream)
Revision ID: james.westby@ubuntu.com-20071214131206-tetsic75bgd968n1

Tags: 1.0-1

http://bugs.debian.org/455592

* Final 1.0 release.
  + Wrong download links in User Guide fixed. (Closes: #455592)
  + Drop now unneeded patch fix_version_strings.diff.

* After some chat with upstream, recommend `bzr upgrade` and then `bzr
  reconcile` in NEWS.Debian, instead of reconcile before upgrade, since
  reconciling in the old format (knits) can be remarkably slower than in
  the new format (packs). (NB: this should only be done with 1.0~rc2 and
  later, due to LP #165290.)

files added:
doc/en/user-guide/adv_merging.txt

index.txt

files removed:
debian/patches

debian/patches/fix_version_strings.diff

debian/patches/series

doc/en/user-reference/index.txt

files modified:
Makefile

NEWS

README

bzrlib/__init__.py

bzrlib/_dirstate_helpers_c.c

bzrlib/_knit_load_data_c.c

bzrlib/bugtracker.py

bzrlib/builtins.py

bzrlib/errors.py

bzrlib/help_topics.py

bzrlib/remote.py

bzrlib/tests/HttpServer.py

bzrlib/tests/blackbox/test_exceptions.py

bzrlib/tests/repository_implementations/test_repository.py

bzrlib/tests/test_errors.py

bzrlib/tests/test_http.py

bzrlib/tests/test_http_response.py

bzrlib/tests/test_revision.py

bzrlib/tests/test_transport.py

bzrlib/tests/test_transport_implementations.py

bzrlib/trace.py

bzrlib/transport/__init__.py

bzrlib/transport/http/__init__.py

bzrlib/transport/http/_pycurl.py

bzrlib/transport/http/_urllib.py

bzrlib/transport/http/_urllib2_wrappers.py

bzrlib/transport/http/response.py

debian/NEWS

debian/changelog

doc/en/mini-tutorial/index.txt

doc/en/user-guide/authentication_conf.txt

doc/en/user-guide/bug_trackers.txt

doc/en/user-guide/configuration.txt

doc/en/user-guide/core_concepts.txt

doc/en/user-guide/http_smart_server.txt

doc/en/user-guide/index.txt

doc/en/user-guide/installing_bazaar.txt

doc/en/user-guide/plugins.txt

doc/en/user-guide/publishing_a_branch.txt

doc/en/user-guide/reusing_a_checkout.txt

doc/en/user-guide/server.txt

doc/en/user-guide/setting_up_email.txt

doc/en/user-guide/specifying_revisions.txt

doc/en/user-guide/version_info.txt

tools/doc_generate/autodoc_rstx.py

tools/rst2html.py

Show diffs side-by-side

added added

removed removed

bzrlib/transport/http/__init__.py

# This program is free software; you can redistribute it and/or modify

# it under the terms of the GNU General Public License as published by

return url

def _extract_headers(header_text, url):

"""Extract the mapping for an rfc2822 header

This is a helper function for the test suite and for _pycurl.

(urllib already parses the headers for us)

In the case that there are multiple headers inside the file,

the last one is returned.

:param header_text: A string of header information.

This expects that the first line of a header will always be HTTP ...

:param url: The url we are parsing, so we can raise nice errors

:return: mimetools.Message object, which basically acts like a case

insensitive dictionary.

"""

first_header = True

remaining = header_text

if not remaining:

100

raise errors.InvalidHttpResponse(url, 'Empty headers')

101

102

while remaining:

103

header_file = StringIO(remaining)

104

first_line = header_file.readline()

105

if not first_line.startswith('HTTP'):

106

if first_header: # The first header *must* start with HTTP

107

raise errors.InvalidHttpResponse(url,

108

'Opening header line did not start with HTTP: %s'

109

% (first_line,))

110

else:

111

break # We are done parsing

112

first_header = False

113

m = mimetools.Message(header_file)

114

115

# mimetools.Message parses the first header up to a blank line

116

# So while there is remaining data, it probably means there is

117

# another header to be parsed.

118

# Get rid of any preceeding whitespace, which if it is all whitespace

119

# will get rid of everything.

120

remaining = header_file.read().lstrip()

121

return m

122

123

124

class HttpTransportBase(ConnectedTransport, medium.SmartClientMedium):

125

"""Base class for http implementations.

126

175

132

:param relpath: The relative path to the file

176

133

"""

177

134

code, response_file = self._get(relpath, None)

178

return response_file

135

# FIXME: some callers want an iterable... One step forward, three steps

136

# backwards :-/ And not only an iterable, but an iterable that can be

137

# seeked backwards, so we will never be able to do that. One such

138

# known client is bzrlib.bundle.serializer.v4.get_bundle_reader. At the

139

# time of this writing it's even the only known client -- vila20071203

140

return StringIO(response_file.read())

179

141

142

# TODO: Add tests for tail_amount or deprecate it

180

143

def _get(self, relpath, ranges, tail_amount=0):

181

144

"""Get a file, or part of a file.

182

145

213

176

# further tries were unsuccessful

214

177

raise exc_info[0], exc_info[1], exc_info[2]

215

178

216

def _get_ranges_hinted(self, relpath, ranges):

217

"""Issue a ranged GET request taking server capabilities into account.

218

219

Depending of the errors returned by the server, we try several GET

220

requests, trying to minimize the data transferred.

221

222

:param relpath: Path relative to transport base URL

223

:param ranges: None to get the whole file;

224

or a list of _CoalescedOffset to fetch parts of a file.

225

:returns: A file handle containing at least the requested ranges.

226

"""

227

exc_info = None

228

try_again = True

229

while try_again:

230

try_again = False

231

try:

232

code, f = self._get(relpath, ranges)

233

except errors.InvalidRange, e:

234

if exc_info is None:

235

exc_info = sys.exc_info()

236

self._degrade_range_hint(relpath, ranges, exc_info)

237

try_again = True

238

return f

239

240

179

# _coalesce_offsets is a helper for readv, it try to combine ranges without

241

180

# degrading readv performances. _bytes_to_read_before_seek is the value

242

181

# used for the limit parameter and has been tuned for other transports. For

254

193

# By default Apache has a limit of ~400 ranges before replying with a 400

255

194

# Bad Request. So we go underneath that amount to be safe.

256

195

_max_get_ranges = 200

196

# We impose no limit on the range size. But see _pycurl.py for a different

197

# use.

198

_get_max_size = 0

257

199

258

200

def _readv(self, relpath, offsets):

259

201

"""Get parts of the file at the given relative path.

262

204

:param return: A list or generator of (offset, data) tuples

263

205

"""

264

206

265

# offsets may be a genarator, we will iterate it several times, so

207

# offsets may be a generator, we will iterate it several times, so

266

208

# build a list

267

209

offsets = list(offsets)

268

210

274

216

sorted_offsets = sorted(offsets)

275

217

coalesced = self._coalesce_offsets(

276

218

sorted_offsets, limit=self._max_readv_combine,

277

fudge_factor=self._bytes_to_read_before_seek)

219

fudge_factor=self._bytes_to_read_before_seek,

220

max_size=self._get_max_size)

278

221

279

222

# Turn it into a list, we will iterate it several times

280

223

coalesced = list(coalesced)

284

227

# Cache the data read, but only until it's been used

285

228

data_map = {}

286

229

# We will iterate on the data received from the GET requests and

287

# serve the corresponding offsets repecting the initial order. We

230

# serve the corresponding offsets respecting the initial order. We

288

231

# need an offset iterator for that.

289

232

iter_offsets = iter(offsets)

290

233

cur_offset_and_size = iter_offsets.next()

291

234

292

235

try:

293

for cur_coal, file in self._coalesce_readv(relpath, coalesced):

236

for cur_coal, rfile in self._coalesce_readv(relpath, coalesced):

294

237

# Split the received chunk

295

238

for offset, size in cur_coal.ranges:

296

239

start = cur_coal.start + offset

297

file.seek(start, 0)

298

data = file.read(size)

240

rfile.seek(start, 0)

241

data = rfile.read(size)

299

242

data_len = len(data)

300

243

if data_len != size:

301

244

raise errors.ShortReadvError(relpath, start, size,

302

245

actual=data_len)

303

data_map[(start, size)] = data

246

if (start, size) == cur_offset_and_size:

247

# The offset requested are sorted as the coalesced

248

# ones, no need to cache. Win !

249

yield cur_offset_and_size[0], data

250

cur_offset_and_size = iter_offsets.next()

251

else:

252

# Different sorting. We need to cache.

253

data_map[(start, size)] = data

304

254

305

255

# Yield everything we can

306

256

while cur_offset_and_size in data_map:

311

261

yield cur_offset_and_size[0], this_data

312

262

cur_offset_and_size = iter_offsets.next()

313

263

314

except (errors.ShortReadvError,errors.InvalidRange), e:

264

except (errors.ShortReadvError, errors.InvalidRange,

265

errors.InvalidHttpRange), e:

315

266

self._degrade_range_hint(relpath, coalesced, sys.exc_info())

316

267

# Some offsets may have been already processed, so we retry

317

268

# only the unsuccessful ones.

320

271

321

272

def _coalesce_readv(self, relpath, coalesced):

322

273

"""Issue several GET requests to satisfy the coalesced offsets"""

323

total = len(coalesced)

324

if self._range_hint == 'multi':

325

max_ranges = self._max_get_ranges

326

elif self._range_hint == 'single':

327

max_ranges = total

274

275

def get_and_yield(relpath, coalesced):

276

if coalesced:

277

# Note that the _get below may raise

278

# errors.InvalidHttpRange. It's the caller's responsibility to

279

# decide how to retry since it may provide different coalesced

280

# offsets.

281

code, rfile = self._get(relpath, coalesced)

282

for coal in coalesced:

283

yield coal, rfile

284

285

if self._range_hint is None:

286

# Download whole file

287

for c, rfile in get_and_yield(relpath, coalesced):

288

yield c, rfile

328

289

else:

329

# The whole file will be downloaded anyway

330

max_ranges = total

331

# TODO: Some web servers may ignore the range requests and return the

332

# whole file, we may want to detect that and avoid further requests.

333

# Hint: test_readv_multiple_get_requests will fail in that case .

334

for group in xrange(0, len(coalesced), max_ranges):

335

ranges = coalesced[group:group+max_ranges]

336

# Note that the following may raise errors.InvalidRange. It's the

337

# caller responsability to decide how to retry since it may provide

338

# different coalesced offsets.

339

code, file = self._get(relpath, ranges)

340

for range in ranges:

341

yield range, file

290

total = len(coalesced)

291

if self._range_hint == 'multi':

292

max_ranges = self._max_get_ranges

293

elif self._range_hint == 'single':

294

max_ranges = total

295

else:

296

raise AssertionError("Unknown _range_hint %r"

297

% (self._range_hint,))

298

# TODO: Some web servers may ignore the range requests and return

299

# the whole file, we may want to detect that and avoid further

300

# requests.

301

# Hint: test_readv_multiple_get_requests will fail once we do that

302

cumul = 0

303

ranges = []

304

for coal in coalesced:

305

if ((self._get_max_size > 0

306

and cumul + coal.length > self._get_max_size)

307

or len(ranges) >= max_ranges):

308

# Get that much and yield

309

for c, rfile in get_and_yield(relpath, ranges):

310

yield c, rfile

311

# Restart with the current offset

312

ranges = [coal]

313

cumul = coal.length

314

else:

315

ranges.append(coal)

316

cumul += coal.length

317

# Get the rest and yield

318

for c, rfile in get_and_yield(relpath, ranges):

319

yield c, rfile

342

320

343

321

def recommended_page_size(self):

344

322

"""See Transport.recommended_page_size().

348

326

"""

349

327

return 64 * 1024

350

328

351

@staticmethod

352

@deprecated_method(zero_seventeen)

353

def offsets_to_ranges(offsets):

354

"""Turn a list of offsets and sizes into a list of byte ranges.

355

356

:param offsets: A list of tuples of (start, size). An empty list

357

is not accepted.

358

:return: a list of inclusive byte ranges (start, end)

359

Adjacent ranges will be combined.

360

"""

361

# Make sure we process sorted offsets

362

offsets = sorted(offsets)

363

364

prev_end = None

365

combined = []

366

367

for start, size in offsets:

368

end = start + size - 1

369

if prev_end is None:

370

combined.append([start, end])

371

elif start <= prev_end + 1:

372

combined[-1][1] = end

373

else:

374

combined.append([start, end])

375

prev_end = end

376

377

return combined

378

379

329

def _post(self, body_bytes):

380

330

"""POST body_bytes to .bzr/smart on this transport.

381

331

490

440

return self.__class__(self.abspath(offset), self)

491

441

492

442

def _attempted_range_header(self, offsets, tail_amount):

493

"""Prepare a HTTP Range header at a level the server should accept"""

443

"""Prepare a HTTP Range header at a level the server should accept.

444

445

:return: the range header representing offsets/tail_amount or None if

446

no header can be built.

447

"""

494

448

495

449

if self._range_hint == 'multi':

496

450

# Generate the header describing all offsets

Older »