~ubuntu-branches/ubuntu/precise/python-django/precise-proposed

Viewing changes to .pc/bug15496-base64-multipart-fix.diff/django/http/multipartparser.py

Committer: Package Import Robot
Author(s): Andres Rodriguez, Julian Edwards
Date: 2012-11-20 16:00:41 UTC
Revision ID: package-import@ubuntu.com-20121120160041-140ztibl9h0omz79

Tags: 1.3.1-4ubuntu1.5

[ Julian Edwards ]
* debian/patches:
  - genericipaddressfield.diff: Backport GenericIPAddressField
    from 1.4 (LP: #1081391)
  - prefetch_related.diff: Backport prefetch_related from 1.4 (LP: #1081388)
  - bug15496-base64-multipart-fix.diff: Include fix for upstream bug #15496
    which makes 'Content-Transfer-Encoding: base64: work for multipart
    messages. (LP: #1081392)

files added:
.pc/bug15496-base64-multipart-fix.diff

.pc/bug15496-base64-multipart-fix.diff/django

.pc/bug15496-base64-multipart-fix.diff/django/http

.pc/bug15496-base64-multipart-fix.diff/django/http/multipartparser.py

.pc/genericipaddressfield.diff

.pc/genericipaddressfield.diff/django

.pc/genericipaddressfield.diff/django/core

.pc/genericipaddressfield.diff/django/core/validators.py

.pc/genericipaddressfield.diff/django/db

.pc/genericipaddressfield.diff/django/db/backends

.pc/genericipaddressfield.diff/django/db/backends/mysql

.pc/genericipaddressfield.diff/django/db/backends/mysql/creation.py

.pc/genericipaddressfield.diff/django/db/backends/oracle

.pc/genericipaddressfield.diff/django/db/backends/oracle/creation.py

.pc/genericipaddressfield.diff/django/db/backends/postgresql

.pc/genericipaddressfield.diff/django/db/backends/postgresql/creation.py

.pc/genericipaddressfield.diff/django/db/backends/postgresql/introspection.py

.pc/genericipaddressfield.diff/django/db/backends/sqlite3

.pc/genericipaddressfield.diff/django/db/backends/sqlite3/creation.py

.pc/genericipaddressfield.diff/django/db/models

.pc/genericipaddressfield.diff/django/db/models/fields

.pc/genericipaddressfield.diff/django/db/models/fields/__init__.py

.pc/genericipaddressfield.diff/django/forms

.pc/genericipaddressfield.diff/django/forms/fields.py

.pc/genericipaddressfield.diff/django/utils

.pc/genericipaddressfield.diff/django/utils/ipv6.py

.pc/prefetch_related.diff

.pc/prefetch_related.diff/django

.pc/prefetch_related.diff/django/contrib

.pc/prefetch_related.diff/django/contrib/contenttypes

.pc/prefetch_related.diff/django/contrib/contenttypes/generic.py

.pc/prefetch_related.diff/django/db

.pc/prefetch_related.diff/django/db/models

.pc/prefetch_related.diff/django/db/models/fields

.pc/prefetch_related.diff/django/db/models/fields/related.py

.pc/prefetch_related.diff/django/db/models/manager.py

.pc/prefetch_related.diff/django/db/models/query.py

debian/patches/bug15496-base64-multipart-fix.diff

debian/patches/genericipaddressfield.diff

debian/patches/prefetch_related.diff

django/utils/ipv6.py

files modified:
.pc/applied-patches

debian/changelog

debian/patches/series

django/contrib/contenttypes/generic.py

django/core/validators.py

django/db/backends/mysql/creation.py

django/db/backends/oracle/creation.py

django/db/backends/postgresql/creation.py

django/db/backends/postgresql/introspection.py

django/db/backends/sqlite3/creation.py

django/db/models/fields/__init__.py

django/db/models/fields/related.py

django/db/models/manager.py

django/db/models/query.py

django/forms/fields.py

django/http/multipartparser.py

Show diffs side-by-side

added added

removed removed

.pc/bug15496-base64-multipart-fix.diff/django/http/multipartparser.py

"""

Multi-part parsing for file uploads.

Exposes one class, ``MultiPartParser``, which feeds chunks of uploaded data to

file upload handlers for processing.

"""

import cgi

from django.conf import settings

from django.core.exceptions import SuspiciousOperation

from django.utils.datastructures import MultiValueDict

from django.utils.encoding import force_unicode

from django.utils.text import unescape_entities

from django.core.files.uploadhandler import StopUpload, SkipFile, StopFutureHandlers

__all__ = ('MultiPartParser', 'MultiPartParserError', 'InputStreamExhausted')

class MultiPartParserError(Exception):

pass

class InputStreamExhausted(Exception):

"""

No more reads are allowed from this device.

"""

pass

RAW = "raw"

FILE = "file"

FIELD = "field"

class MultiPartParser(object):

"""

A rfc2388 multipart/form-data parser.

``MultiValueDict.parse()`` reads the input stream in ``chunk_size`` chunks

and returns a tuple of ``(MultiValueDict(POST), MultiValueDict(FILES))``. If

"""

def __init__(self, META, input_data, upload_handlers, encoding=None):

"""

Initialize the MultiPartParser object.

:META:

The standard ``META`` dictionary in Django request objects.

:input_data:

The raw post data, as a file-like object.

:upload_handler:

An UploadHandler instance that performs operations on the uploaded

data.

:encoding:

The encoding with which to treat the incoming data.

"""

# Content-Type should containt multipart and the boundary information.

content_type = META.get('HTTP_CONTENT_TYPE', META.get('CONTENT_TYPE', ''))

if not content_type.startswith('multipart/'):

raise MultiPartParserError('Invalid Content-Type: %s' % content_type)

# Parse the header to get the boundary to split the parts.

ctypes, opts = parse_header(content_type)

boundary = opts.get('boundary')

if not boundary or not cgi.valid_boundary(boundary):

raise MultiPartParserError('Invalid boundary in multipart: %s' % boundary)

# Content-Length should contain the length of the body we are about

# to receive.

try:

content_length = int(META.get('HTTP_CONTENT_LENGTH', META.get('CONTENT_LENGTH',0)))

except (ValueError, TypeError):

# For now set it to 0; we'll try again later on down.

content_length = 0

if content_length < 0:

# This means we shouldn't continue...raise an error.

raise MultiPartParserError("Invalid content length: %r" % content_length)

self._boundary = boundary

self._input_data = input_data

# For compatibility with low-level network APIs (with 32-bit integers),

# the chunk size should be < 2^31, but still divisible by 4.

possible_sizes = [x.chunk_size for x in upload_handlers if x.chunk_size]

self._chunk_size = min([2**31-4] + possible_sizes)

self._meta = META

self._encoding = encoding or settings.DEFAULT_CHARSET

self._content_length = content_length

self._upload_handlers = upload_handlers

def parse(self):

"""

Parse the POST data and break it into a FILES MultiValueDict and a POST

MultiValueDict.

100

Returns a tuple containing the POST and FILES dictionary, respectively.

101

"""

102

# We have to import QueryDict down here to avoid a circular import.

103

from django.http import QueryDict

104

105

encoding = self._encoding

106

handlers = self._upload_handlers

107

108

# HTTP spec says that Content-Length >= 0 is valid

109

# handling content-length == 0 before continuing

110

if self._content_length == 0:

111

return QueryDict(MultiValueDict(), encoding=self._encoding), MultiValueDict()

112

113

limited_input_data = LimitBytes(self._input_data, self._content_length)

114

115

# See if the handler will want to take care of the parsing.

116

# This allows overriding everything if somebody wants it.

117

for handler in handlers:

118

result = handler.handle_raw_input(limited_input_data,

119

self._meta,

120

self._content_length,

121

self._boundary,

122

encoding)

123

if result is not None:

124

return result[0], result[1]

125

126

# Create the data structures to be used later.

127

self._post = QueryDict('', mutable=True)

128

self._files = MultiValueDict()

129

130

# Instantiate the parser and stream:

131

stream = LazyStream(ChunkIter(limited_input_data, self._chunk_size))

132

133

# Whether or not to signal a file-completion at the beginning of the loop.

134

old_field_name = None

135

counters = [0] * len(handlers)

136

137

try:

138

for item_type, meta_data, field_stream in Parser(stream, self._boundary):

139

if old_field_name:

140

# We run this at the beginning of the next loop

141

# since we cannot be sure a file is complete until

142

# we hit the next boundary/part of the multipart content.

143

self.handle_file_complete(old_field_name, counters)

144

old_field_name = None

145

146

try:

147

disposition = meta_data['content-disposition'][1]

148

field_name = disposition['name'].strip()

149

except (KeyError, IndexError, AttributeError):

150

continue

151

152

transfer_encoding = meta_data.get('content-transfer-encoding')

153

field_name = force_unicode(field_name, encoding, errors='replace')

154

155

if item_type == FIELD:

156

# This is a post field, we can just set it in the post

157

if transfer_encoding == 'base64':

158

raw_data = field_stream.read()

159

try:

160

data = str(raw_data).decode('base64')

161

except:

162

data = raw_data

163

else:

164

data = field_stream.read()

165

166

self._post.appendlist(field_name,

167

force_unicode(data, encoding, errors='replace'))

168

elif item_type == FILE:

169

# This is a file, use the handler...

170

file_name = disposition.get('filename')

171

if not file_name:

172

continue

173

file_name = force_unicode(file_name, encoding, errors='replace')

174

file_name = self.IE_sanitize(unescape_entities(file_name))

175

176

content_type = meta_data.get('content-type', ('',))[0].strip()

177

try:

178

charset = meta_data.get('content-type', (0,{}))[1].get('charset', None)

179

except:

180

charset = None

181

182

try:

183

content_length = int(meta_data.get('content-length')[0])

184

except (IndexError, TypeError, ValueError):

185

content_length = None

186

187

counters = [0] * len(handlers)

188

try:

189

for handler in handlers:

190

try:

191

handler.new_file(field_name, file_name,

192

content_type, content_length,

193

charset)

194

except StopFutureHandlers:

195

break

196

197

for chunk in field_stream:

198

if transfer_encoding == 'base64':

199

# We only special-case base64 transfer encoding

200

try:

201

chunk = str(chunk).decode('base64')

202

except Exception, e:

203

# Since this is only a chunk, any error is an unfixable error.

204

raise MultiPartParserError("Could not decode base64 data: %r" % e)

205

206

for i, handler in enumerate(handlers):

207

chunk_length = len(chunk)

208

chunk = handler.receive_data_chunk(chunk,

209

counters[i])

210

counters[i] += chunk_length

211

if chunk is None:

212

# If the chunk received by the handler is None, then don't continue.

213

break

214

215

except SkipFile, e:

216

# Just use up the rest of this file...

217

exhaust(field_stream)

218

else:

219

# Handle file upload completions on next iteration.

220

old_field_name = field_name

221

else:

222

# If this is neither a FIELD or a FILE, just exhaust the stream.

223

exhaust(stream)

224

except StopUpload, e:

225

if not e.connection_reset:

226

exhaust(limited_input_data)

227

else:

228

# Make sure that the request data is all fed

229

exhaust(limited_input_data)

230

231

# Signal that the upload has completed.

232

for handler in handlers:

233

retval = handler.upload_complete()

234

if retval:

235

break

236

237

return self._post, self._files

238

239

def handle_file_complete(self, old_field_name, counters):

240

"""

241

Handle all the signalling that takes place when a file is complete.

242

"""

243

for i, handler in enumerate(self._upload_handlers):

244

file_obj = handler.file_complete(counters[i])

245

if file_obj:

246

# If it returns a file object, then set the files dict.

247

self._files.appendlist(force_unicode(old_field_name,

248

self._encoding,

249

errors='replace'),

250

file_obj)

251

break

252

253

def IE_sanitize(self, filename):

254

"""Cleanup filename from Internet Explorer full paths."""

255

return filename and filename[filename.rfind("\\")+1:].strip()

256

257

class LazyStream(object):

258

"""

259

The LazyStream wrapper allows one to get and "unget" bytes from a stream.

260

261

Given a producer object (an iterator that yields bytestrings), the

262

LazyStream object will support iteration, reading, and keeping a "look-back"

263

variable in case you need to "unget" some bytes.

264

"""

265

def __init__(self, producer, length=None):

266

"""

267

Every LazyStream must have a producer when instantiated.

268

269

A producer is an iterable that returns a string each time it

270

is called.

271

"""

272

self._producer = producer

273

self._empty = False

274

self._leftover = ''

275

self.length = length

276

self.position = 0

277

self._remaining = length

278

self._unget_history = []

279

280

def tell(self):

281

return self.position

282

283

def read(self, size=None):

284

def parts():

285

remaining = (size is not None and [size] or [self._remaining])[0]

286

# do the whole thing in one shot if no limit was provided.

287

if remaining is None:

288

yield ''.join(self)

289

return

290

291

# otherwise do some bookkeeping to return exactly enough

292

# of the stream and stashing any extra content we get from

293

# the producer

294

while remaining != 0:

295

assert remaining > 0, 'remaining bytes to read should never go negative'

296

297

chunk = self.next()

298

299

emitting = chunk[:remaining]

300

self.unget(chunk[remaining:])

301

remaining -= len(emitting)

302

yield emitting

303

304

out = ''.join(parts())

305

return out

306

307

def next(self):

308

"""

309

Used when the exact number of bytes to read is unimportant.

310

311

This procedure just returns whatever is chunk is conveniently returned

312

from the iterator instead. Useful to avoid unnecessary bookkeeping if

313

performance is an issue.

314

"""

315

if self._leftover:

316

output = self._leftover

317

self._leftover = ''

318

else:

319

output = self._producer.next()

320

self._unget_history = []

321

self.position += len(output)

322

return output

323

324

def close(self):

325

"""

326

Used to invalidate/disable this lazy stream.

327

328

Replaces the producer with an empty list. Any leftover bytes that have

329

already been read will still be reported upon read() and/or next().

330

"""

331

self._producer = []

332

333

def __iter__(self):

334

return self

335

336

def unget(self, bytes):

337

"""

338

Places bytes back onto the front of the lazy stream.

339

340

Future calls to read() will return those bytes first. The

341

stream position and thus tell() will be rewound.

342

"""

343

if not bytes:

344

return

345

self._update_unget_history(len(bytes))

346

self.position -= len(bytes)

347

self._leftover = ''.join([bytes, self._leftover])

348

349

def _update_unget_history(self, num_bytes):

350

"""

351

Updates the unget history as a sanity check to see if we've pushed

352

back the same number of bytes in one chunk. If we keep ungetting the

353

same number of bytes many times (here, 50), we're mostly likely in an

354

infinite loop of some sort. This is usually caused by a

355

maliciously-malformed MIME request.

356

"""

357

self._unget_history = [num_bytes] + self._unget_history[:49]

358

number_equal = len([current_number for current_number in self._unget_history

359

if current_number == num_bytes])

360

361

if number_equal > 40:

362

raise SuspiciousOperation(

363

"The multipart parser got stuck, which shouldn't happen with"

364

" normal uploaded files. Check for malicious upload activity;"

365

" if there is none, report this to the Django developers."

366

)

367

368

class ChunkIter(object):

369

"""

370

An iterable that will yield chunks of data. Given a file-like object as the

371

constructor, this object will yield chunks of read operations from that

372

object.

373

"""

374

def __init__(self, flo, chunk_size=64 * 1024):

375

self.flo = flo

376

self.chunk_size = chunk_size

377

378

def next(self):

379

try:

380

data = self.flo.read(self.chunk_size)

381

except InputStreamExhausted:

382

raise StopIteration()

383

if data:

384

return data

385

else:

386

raise StopIteration()

387

388

def __iter__(self):

389

return self

390

391

class LimitBytes(object):

392

""" Limit bytes for a file object. """

393

def __init__(self, fileobject, length):

394

self._file = fileobject

395

self.remaining = length

396

397

def read(self, num_bytes=None):

398

"""

399

Read data from the underlying file.

400

If you ask for too much or there isn't anything left,

401

this will raise an InputStreamExhausted error.

402

"""

403

if self.remaining <= 0:

404

raise InputStreamExhausted()

405

if num_bytes is None:

406

num_bytes = self.remaining

407

else:

408

num_bytes = min(num_bytes, self.remaining)

409

self.remaining -= num_bytes

410

return self._file.read(num_bytes)

411

412

class InterBoundaryIter(object):

413

"""

414

A Producer that will iterate over boundaries.

415

"""

416

def __init__(self, stream, boundary):

417

self._stream = stream

418

self._boundary = boundary

419

420

def __iter__(self):

421

return self

422

423

def next(self):

424

try:

425

return LazyStream(BoundaryIter(self._stream, self._boundary))

426

except InputStreamExhausted:

427

raise StopIteration()

428

429

class BoundaryIter(object):

430

"""

431

A Producer that is sensitive to boundaries.

432

433

Will happily yield bytes until a boundary is found. Will yield the bytes

434

before the boundary, throw away the boundary bytes themselves, and push the

435

post-boundary bytes back on the stream.

436

437

The future calls to .next() after locating the boundary will raise a

438

StopIteration exception.

439

"""

440

441

def __init__(self, stream, boundary):

442

self._stream = stream

443

self._boundary = boundary

444

self._done = False

445

# rollback an additional six bytes because the format is like

446

# this: CRLF<boundary>[--CRLF]

447

self._rollback = len(boundary) + 6

448

449

# Try to use mx fast string search if available. Otherwise

450

# use Python find. Wrap the latter for consistency.

451

unused_char = self._stream.read(1)

452

if not unused_char:

453

raise InputStreamExhausted()

454

self._stream.unget(unused_char)

455

try:

456

from mx.TextTools import FS

457

self._fs = FS(boundary).find

458

except ImportError:

459

self._fs = lambda data: data.find(boundary)

460

461

def __iter__(self):

462

return self

463

464

def next(self):

465

if self._done:

466

raise StopIteration()

467

468

stream = self._stream

469

rollback = self._rollback

470

471

bytes_read = 0

472

chunks = []

473

for bytes in stream:

474

bytes_read += len(bytes)

475

chunks.append(bytes)

476

if bytes_read > rollback:

477

break

478

if not bytes:

479

break

480

else:

481

self._done = True

482

483

if not chunks:

484

raise StopIteration()

485

486

chunk = ''.join(chunks)

487

boundary = self._find_boundary(chunk, len(chunk) < self._rollback)

488

489

if boundary:

490

end, next = boundary

491

stream.unget(chunk[next:])

492

self._done = True

493

return chunk[:end]

494

else:

495

# make sure we dont treat a partial boundary (and

496

# its separators) as data

497

if not chunk[:-rollback]:# and len(chunk) >= (len(self._boundary) + 6):

498

# There's nothing left, we should just return and mark as done.

499

self._done = True

500

return chunk

501

else:

502

stream.unget(chunk[-rollback:])

503

return chunk[:-rollback]

504

505

def _find_boundary(self, data, eof = False):

506

"""

507

Finds a multipart boundary in data.

508

509

Should no boundry exist in the data None is returned instead. Otherwise

510

a tuple containing the indices of the following are returned:

511

512

* the end of current encapsulation

513

* the start of the next encapsulation

514

"""

515

index = self._fs(data)

516

if index < 0:

517

return None

518

else:

519

end = index

520

next = index + len(self._boundary)

521

# backup over CRLF

522

if data[max(0,end-1)] == '\n':

523

end -= 1

524

if data[max(0,end-1)] == '\r':

525

end -= 1

526

return end, next

527

528

def exhaust(stream_or_iterable):

529

"""

530

Completely exhausts an iterator or stream.

531

532

Raise a MultiPartParserError if the argument is not a stream or an iterable.

533

"""

534

iterator = None

535

try:

536

iterator = iter(stream_or_iterable)

537

except TypeError:

538

iterator = ChunkIter(stream_or_iterable, 16384)

539

540

if iterator is None:

541

raise MultiPartParserError('multipartparser.exhaust() was passed a non-iterable or stream parameter')

542

543

for __ in iterator:

544

pass

545

546

def parse_boundary_stream(stream, max_header_size):

547

"""

548

Parses one and exactly one stream that encapsulates a boundary.

549

"""

550

# Stream at beginning of header, look for end of header

551

# and parse it if found. The header must fit within one

552

# chunk.

553

chunk = stream.read(max_header_size)

554

555

# 'find' returns the top of these four bytes, so we'll

556

# need to munch them later to prevent them from polluting

557

# the payload.

558

header_end = chunk.find('\r\n\r\n')

559

560

def _parse_header(line):

561

main_value_pair, params = parse_header(line)

562

try:

563

name, value = main_value_pair.split(':', 1)

564

except:

565

raise ValueError("Invalid header: %r" % line)

566

return name, (value, params)

567

568

if header_end == -1:

569

# we find no header, so we just mark this fact and pass on

570

# the stream verbatim

571

stream.unget(chunk)

572

return (RAW, {}, stream)

573

574

header = chunk[:header_end]

575

576

# here we place any excess chunk back onto the stream, as

577

# well as throwing away the CRLFCRLF bytes from above.

578

stream.unget(chunk[header_end + 4:])

579

580

TYPE = RAW

581

outdict = {}

582

583

# Eliminate blank lines

584

for line in header.split('\r\n'):

585

# This terminology ("main value" and "dictionary of

586

# parameters") is from the Python docs.

587

try:

588

name, (value, params) = _parse_header(line)

589

except:

590

continue

591

592

if name == 'content-disposition':

593

TYPE = FIELD

594

if params.get('filename'):

595

TYPE = FILE

596

597

outdict[name] = value, params

598

599

if TYPE == RAW:

600

stream.unget(chunk)

601

602

return (TYPE, outdict, stream)

603

604

class Parser(object):

605

def __init__(self, stream, boundary):

606

self._stream = stream

607

self._separator = '--' + boundary

608

609

def __iter__(self):

610

boundarystream = InterBoundaryIter(self._stream, self._separator)

611

for sub_stream in boundarystream:

612

# Iterate over each part

613

yield parse_boundary_stream(sub_stream, 1024)

614

615

def parse_header(line):

616

""" Parse the header into a key-value. """

617

plist = _parse_header_params(';' + line)

618

key = plist.pop(0).lower()

619

pdict = {}

620

for p in plist:

621

i = p.find('=')

622

if i >= 0:

623

name = p[:i].strip().lower()

624

value = p[i+1:].strip()

625

if len(value) >= 2 and value[0] == value[-1] == '"':

626

value = value[1:-1]

627

value = value.replace('\\\\', '\\').replace('\\"', '"')

628

pdict[name] = value

629

return key, pdict

630

631

def _parse_header_params(s):

632

plist = []

633

while s[:1] == ';':

634

s = s[1:]

635

end = s.find(';')

636

while end > 0 and s.count('"', 0, end) % 2:

637

end = s.find(';', end + 1)

638

if end < 0:

639

end = len(s)

640

f = s[:end]

641

plist.append(f.strip())

642

s = s[end:]

643

return plist

Older »