~gnome-el-l10n/gnome.gr-website/gnomegr-venus

Viewing changes to planet/vendor/html5lib/tokenizer.py

Committer: Sam Ruby
Date: 2007-08-12 19:08:29 UTC
Revision ID: rubys@intertwingly.net-20070812190829-8ypg0qvs2rsohcim

Upgrade to the latest html5lib
Fixes the following error:
http://lists.planetplanet.org/archives/devel/2007-August/001638.html

files modified:
planet/vendor/html5lib/filters/lint.py

planet/vendor/html5lib/filters/whitespace.py

planet/vendor/html5lib/html5parser.py

planet/vendor/html5lib/inputstream.py

planet/vendor/html5lib/liberalxmlparser.py

planet/vendor/html5lib/sanitizer.py

planet/vendor/html5lib/serializer/htmlserializer.py

planet/vendor/html5lib/tokenizer.py

planet/vendor/html5lib/treebuilders/__init__.py

planet/vendor/html5lib/treebuilders/_base.py

planet/vendor/html5lib/treebuilders/dom.py

planet/vendor/html5lib/treebuilders/etree.py

planet/vendor/html5lib/treebuilders/simpletree.py

planet/vendor/html5lib/treebuilders/soup.py

planet/vendor/html5lib/treewalkers/__init__.py

planet/vendor/html5lib/treewalkers/_base.py

planet/vendor/html5lib/treewalkers/dom.py

planet/vendor/html5lib/treewalkers/genshistream.py

planet/vendor/html5lib/treewalkers/simpletree.py

Show diffs side-by-side

added added

removed removed

planet/vendor/html5lib/tokenizer.py

# XXX need to fix documentation

def __init__(self, stream, encoding=None, parseMeta=True):

def __init__(self, stream, encoding=None, parseMeta=True,

lowercaseElementName=True, lowercaseAttrName=True,):

self.stream = HTMLInputStream(stream, encoding, parseMeta)

#Perform case conversions?

self.lowercaseElementName = lowercaseElementName

self.lowercaseAttrName = lowercaseAttrName

self.states = {

"data":self.dataState,

"entityData":self.entityDataState,

111

116

self.currentToken["type"] = "EmptyTag"

112

117

else:

113

118

self.tokenQueue.append({"type": "ParseError", "data":

114

_("Solidus (/) incorrectly placed in tag.")})

119

_(u"Solidus (/) incorrectly placed in tag.")})

115

120

116

121

# The character we just consumed need to be put back on the stack so it

117

122

# doesn't get lost...

146

151

147

152

if charAsInt == 13:

148

153

self.tokenQueue.append({"type": "ParseError", "data":

149

_("Incorrect CR newline entity. Replaced with LF.")})

154

_(u"Incorrect CR newline entity. Replaced with LF.")})

150

155

charAsInt = 10

151

156

elif 127 < charAsInt < 160:

152

157

# If the integer is between 127 and 160 (so 128 and bigger and 159

153

158

# and smaller) we need to do the "windows trick".

154

159

self.tokenQueue.append({"type": "ParseError", "data":

155

_("Entity used with illegal number (windows-1252 reference).")})

160

_(u"Entity used with illegal number (windows-1252 reference).")})

156

161

157

162

charAsInt = entitiesWindows1252[charAsInt - 128]

158

163

168

173

char = eval("u'\\U%08x'" % charAsInt)

169

174

except:

170

175

self.tokenQueue.append({"type": "ParseError", "data":

171

_("Numeric entity couldn't be converted to character (codepoint: U+%08x).") % charAsInt})

176

_(u"Numeric entity couldn't be converted to character (codepoint: U+%08x).") % charAsInt})

172

177

else:

173

178

char = u"\uFFFD"

174

179

self.tokenQueue.append({"type": "ParseError", "data":

175

_("Numeric entity represents an illegal codepoint: U+%08x.") % charAsInt})

180

_(u"Numeric entity represents an illegal codepoint: U+%08x.") % charAsInt})

176

181

177

182

# Discard the ; if present. Otherwise, put it back on the queue and

178

183

# invoke parseError on parser.

179

184

if c != u";":

180

185

self.tokenQueue.append({"type": "ParseError", "data":

181

_("Numeric entity didn't end with ';'.")})

186

_(u"Numeric entity didn't end with ';'.")})

182

187

self.stream.unget(c)

183

188

184

189

return char

191

196

elif charStack[0] == u"#":

192

197

# We might have a number entity here.

193

198

charStack.extend([self.stream.char(), self.stream.char()])

194

if EOF in charStack:

199

if EOF in charStack[:2]:

195

200

# If we reach the end of the file put everything up to EOF

196

201

# back in the queue

197

202

charStack = charStack[:charStack.index(EOF)]

198

203

self.stream.unget(charStack)

199

204

self.tokenQueue.append({"type": "ParseError", "data":

200

_("Numeric entity expected. Got end of file instead.")})

205

_(u"Numeric entity expected. Got end of file instead.")})

201

206

else:

202

207

if charStack[1].lower() == u"x" \

203

208

and charStack[2] in hexDigits:

212

217

# No number entity detected.

213

218

self.stream.unget(charStack)

214

219

self.tokenQueue.append({"type": "ParseError", "data":

215

_("Numeric entity expected but none found.")})

220

_(u"Numeric entity expected but none found.")})

216

221

else:

217

222

# At this point in the process might have named entity. Entities

218

223

# are stored in the global variable "entities".

244

249

if entityName is not None:

245

250

if entityName[-1] != ";":

246

251

self.tokenQueue.append({"type": "ParseError", "data":

247

_("Named entity didn't end with ';'.")})

252

_(u"Named entity didn't end with ';'.")})

248

253

if entityName[-1] != ";" and fromAttribute and \

249

254

(charStack[entityLength] in asciiLetters

250

255

or charStack[entityLength] in digits):

254

259

self.stream.unget(charStack[entityLength:])

255

260

else:

256

261

self.tokenQueue.append({"type": "ParseError", "data":

257

_("Named entity expected. Got none.")})

262

_(u"Named entity expected. Got none.")})

258

263

self.stream.unget(charStack)

259

264

return char

260

265

272

277

the state to "data" because that's what's needed after a token has been

273

278

emitted.

274

279

"""

275

280

token = self.currentToken

276

281

# Add token to the queue to be yielded

277

self.tokenQueue.append(self.currentToken)

282

if (token["type"] in ("StartTag", "EndTag", "EmptyTag")):

283

if self.lowercaseElementName:

284

token["name"] = token["name"].translate(asciiUpper2Lower)

285

if token["type"] == "EndTag" and token["data"]:

286

self.tokenQueue.append({"type":"ParseError",

287

"data":_(u"End tag contains unexpected attributes.")})

288

self.tokenQueue.append(token)

278

289

self.state = self.states["data"]

279

290

280

291

286

297

287

298

def dataState(self):

288

299

data = self.stream.char()

300

301

# Keep a charbuffer to handle the escapeFlag

289

302

if self.contentModelFlag in\

290

303

(contentModelFlags["CDATA"], contentModelFlags["RCDATA"]):

291

304

if len(self.lastFourChars) == 4:

292

305

self.lastFourChars.pop(0)

293

306

self.lastFourChars.append(data)

307

308

# The rest of the logic

294

309

if data == "&" and self.contentModelFlag in\

295

(contentModelFlags["PCDATA"], contentModelFlags["RCDATA"]):

310

(contentModelFlags["PCDATA"], contentModelFlags["RCDATA"]) and not\

311

self.escapeFlag:

296

312

self.state = self.states["entityData"]

297

313

elif data == "-" and self.contentModelFlag in\

298

(contentModelFlags["CDATA"], contentModelFlags["RCDATA"]) and\

299

self.escapeFlag == False and\

300

"".join(self.lastFourChars) == "<!--":

314

(contentModelFlags["CDATA"], contentModelFlags["RCDATA"]) and not\

315

self.escapeFlag and "".join(self.lastFourChars) == "<!--":

301

316

self.escapeFlag = True

302

317

self.tokenQueue.append({"type": "Characters", "data":data})

303

318

elif data == "<" and (self.contentModelFlag ==\

307

322

self.state = self.states["tagOpen"]

308

323

elif data == ">" and self.contentModelFlag in\

309

324

(contentModelFlags["CDATA"], contentModelFlags["RCDATA"]) and\

310

self.escapeFlag == True and "".join(self.lastFourChars)[1:] == "-->":

325

self.escapeFlag and "".join(self.lastFourChars)[1:] == "-->":

311

326

self.escapeFlag = False

312

327

self.tokenQueue.append({"type": "Characters", "data":data})

313

328

elif data == EOF:

317

332

# Directly after emitting a token you switch back to the "data

318

333

# state". At that point spaceCharacters are important so they are

319

334

# emitted separately.

320

# XXX need to check if we don't need a special "spaces" flag on

321

# characters.

322

335

self.tokenQueue.append({"type": "SpaceCharacters", "data":

323

336

data + self.stream.charsUntil(spaceCharacters, True)})

324

337

else:

350

363

# XXX In theory it could be something besides a tag name. But

351

364

# do we really care?

352

365

self.tokenQueue.append({"type": "ParseError", "data":

353

_("Expected tag name. Got '>' instead.")})

366

_(u"Expected tag name. Got '>' instead.")})

354

367

self.tokenQueue.append({"type": "Characters", "data": u"<>"})

355

368

self.state = self.states["data"]

356

369

elif data == u"?":

357

370

# XXX In theory it could be something besides a tag name. But

358

371

# do we really care?

359

372

self.tokenQueue.append({"type": "ParseError", "data":

360

_("Expected tag name. Got '?' instead (HTML doesn't "

373

_(u"Expected tag name. Got '?' instead (HTML doesn't "

361

374

"support processing instructions).")})

362

375

self.stream.unget(data)

363

376

self.state = self.states["bogusComment"]

364

377

else:

365

378

# XXX

366

379

self.tokenQueue.append({"type": "ParseError", "data":

367

_("Expected tag name. Got something else instead")})

380

_(u"Expected tag name. Got something else instead")})

368

381

self.tokenQueue.append({"type": "Characters", "data": u"<"})

369

382

self.stream.unget(data)

370

383

self.state = self.states["data"]

423

436

self.state = self.states["tagName"]

424

437

elif data == u">":

425

438

self.tokenQueue.append({"type": "ParseError", "data":

426

_("Expected closing tag. Got '>' instead. Ignoring '</>'.")})

439

_(u"Expected closing tag. Got '>' instead. Ignoring '</>'.")})

427

440

self.state = self.states["data"]

428

441

elif data == EOF:

429

442

self.tokenQueue.append({"type": "ParseError", "data":

430

_("Expected closing tag. Unexpected end of file.")})

443

_(u"Expected closing tag. Unexpected end of file.")})

431

444

self.tokenQueue.append({"type": "Characters", "data": u"</"})

432

445

self.state = self.states["data"]

433

446

else:

434

447

# XXX data can be _'_...

435

448

self.tokenQueue.append({"type": "ParseError", "data":

436

_("Expected closing tag. Unexpected character '" + data + "' found.")})

449

_(u"Expected closing tag. Unexpected character '%s' found.") % (data,)})

437

450

self.stream.unget(data)

438

451

self.state = self.states["bogusComment"]

439

452

return True

449

462

self.emitCurrentToken()

450

463

elif data == EOF:

451

464

self.tokenQueue.append({"type": "ParseError", "data":

452

_("Unexpected end of file in the tag name.")})

465

_(u"Unexpected end of file in the tag name.")})

453

466

self.emitCurrentToken()

454

467

elif data == u"/":

455

468

self.processSolidusInTag()

471

484

self.processSolidusInTag()

472

485

elif data == EOF:

473

486

self.tokenQueue.append({"type": "ParseError", "data":

474

_("Unexpected end of file. Expected attribute name instead.")})

487

_(u"Unexpected end of file. Expected attribute name instead.")})

475

488

self.emitCurrentToken()

476

489

else:

477

490

self.currentToken["data"].append([data, ""])

481

494

def attributeNameState(self):

482

495

data = self.stream.char()

483

496

leavingThisState = True

497

emitToken = False

484

498

if data == u"=":

485

499

self.state = self.states["beforeAttributeValue"]

486

500

elif data in asciiLetters:

491

505

# XXX If we emit here the attributes are converted to a dict

492

506

# without being checked and when the code below runs we error

493

507

# because data is a dict not a list

494

pass

508

emitToken = True

495

509

elif data in spaceCharacters:

496

510

self.state = self.states["afterAttributeName"]

497

511

elif data == u"/":

499

513

self.state = self.states["beforeAttributeName"]

500

514

elif data == EOF:

501

515

self.tokenQueue.append({"type": "ParseError", "data":

502

_("Unexpected end of file in attribute name.")})

503

self.emitCurrentToken()

504

leavingThisState = False

516

_(u"Unexpected end of file in attribute name.")})

517

self.state = self.states["data"]

518

emitToken = True

505

519

else:

506

520

self.currentToken["data"][-1][0] += data

507

521

leavingThisState = False

510

524

# Attributes are not dropped at this stage. That happens when the

511

525

# start tag token is emitted so values can still be safely appended

512

526

# to attributes, but we do want to report the parse error in time.

527

if self.lowercaseAttrName:

528

self.currentToken["data"][-1][0] = (

529

self.currentToken["data"][-1][0].translate(asciiUpper2Lower))

513

530

for name, value in self.currentToken["data"][:-1]:

514

531

if self.currentToken["data"][-1][0] == name:

515

532

self.tokenQueue.append({"type": "ParseError", "data":

516

_("Dropped duplicate attribute on tag.")})

533

_(u"Dropped duplicate attribute on tag.")})

534

break

517

535

# XXX Fix for above XXX

518

if data == u">":

536

if emitToken:

519

537

self.emitCurrentToken()

520

538

return True

521

539

535

553

self.state = self.states["beforeAttributeName"]

536

554

elif data == EOF:

537

555

self.tokenQueue.append({"type": "ParseError", "data":

538

_("Unexpected end of file. Expected = or end of tag.")})

556

_(u"Unexpected end of file. Expected = or end of tag.")})

539

557

self.emitCurrentToken()

540

558

else:

541

559

self.currentToken["data"].append([data, ""])

557

575

self.emitCurrentToken()

558

576

elif data == EOF:

559

577

self.tokenQueue.append({"type": "ParseError", "data":

560

_("Unexpected end of file. Expected attribute value.")})

578

_(u"Unexpected end of file. Expected attribute value.")})

561

579

self.emitCurrentToken()

562

580

else:

563

581

self.currentToken["data"][-1][1] += data

572

590

self.processEntityInAttribute()

573

591

elif data == EOF:

574

592

self.tokenQueue.append({"type": "ParseError", "data":

575

_("Unexpected end of file in attribute value (\").")})

593

_(u"Unexpected end of file in attribute value (\").")})

576

594

self.emitCurrentToken()

577

595

else:

578

596

self.currentToken["data"][-1][1] += data +\

587

605

self.processEntityInAttribute()

588

606

elif data == EOF:

589

607

self.tokenQueue.append({"type": "ParseError", "data":

590

_("Unexpected end of file in attribute value (').")})

608

_(u"Unexpected end of file in attribute value (').")})

591

609

self.emitCurrentToken()

592

610

else:

593

611

self.currentToken["data"][-1][1] += data +\

604

622

self.emitCurrentToken()

605

623

elif data == EOF:

606

624

self.tokenQueue.append({"type": "ParseError", "data":

607

_("Unexpected end of file in attribute value.")})

625

_(u"Unexpected end of file in attribute value.")})

608

626

self.emitCurrentToken()

609

627

else:

610

628

self.currentToken["data"][-1][1] += data + self.stream.charsUntil( \

627

645

def markupDeclarationOpenState(self):

628

646

charStack = [self.stream.char(), self.stream.char()]

629

647

if charStack == [u"-", u"-"]:

630

self.currentToken = {"type": "Comment", "data": ""}

648

self.currentToken = {"type": "Comment", "data": u""}

631

649

self.state = self.states["commentStart"]

632

650

else:

633

651

for x in xrange(5):

635

653

# Put in explicit EOF check

636

654

if (not EOF in charStack and

637

655

"".join(charStack).upper() == u"DOCTYPE"):

638

self.currentToken = {"type":"Doctype", "name":"",

656

self.currentToken = {"type":"Doctype", "name":u"",

639

657

"publicId":None, "systemId":None, "correct":True}

640

658

self.state = self.states["doctype"]

641

659

else:

642

660

self.tokenQueue.append({"type": "ParseError", "data":

643

_("Expected '--' or 'DOCTYPE'. Not found.")})

661

_(u"Expected '--' or 'DOCTYPE'. Not found.")})

644

662

self.stream.unget(charStack)

645

663

self.state = self.states["bogusComment"]

646

664

return True

651

669

self.state = self.states["commentStartDash"]

652

670

elif data == ">":

653

671

self.tokenQueue.append({"type": "ParseError", "data":

654

_("Incorrect comment.")})

672

_(u"Incorrect comment.")})

655

673

self.tokenQueue.append(self.currentToken)

656

674

self.state = self.states["data"]

657

675

elif data == EOF:

658

676

self.tokenQueue.append({"type": "ParseError", "data":

659

_("Unexpected end of file in comment.")})

677

_(u"Unexpected end of file in comment.")})

660

678

self.tokenQueue.append(self.currentToken)

661

679

self.state = self.states["data"]

662

680

else:

670

688

self.state = self.states["commentEnd"]

671

689

elif data == ">":

672

690

self.tokenQueue.append({"type": "ParseError", "data":

673

_("Incorrect comment.")})

691

_(u"Incorrect comment.")})

674

692

self.tokenQueue.append(self.currentToken)

675

693

self.state = self.states["data"]

676

694

elif data == EOF:

677

695

self.tokenQueue.append({"type": "ParseError", "data":

678

_("Unexpected end of file in comment.")})

696

_(u"Unexpected end of file in comment.")})

679

697

self.tokenQueue.append(self.currentToken)

680

698

self.state = self.states["data"]

681

699

else:

682

self.currentToken["data"] += data + self.stream.charsUntil(u"-")

700

self.currentToken["data"] += "-" + data + self.stream.charsUntil(u"-")

683

701

self.state = self.states["comment"]

684

702

return True

685

703

690

708

self.state = self.states["commentEndDash"]

691

709

elif data == EOF:

692

710

self.tokenQueue.append({"type": "ParseError", "data":

693

_("Unexpected end of file in comment.")})

711

_(u"Unexpected end of file in comment.")})

694

712

self.tokenQueue.append(self.currentToken)

695

713

self.state = self.states["data"]

696

714

else:

703

721

self.state = self.states["commentEnd"]

704

722

elif data == EOF:

705

723

self.tokenQueue.append({"type": "ParseError", "data":

706

_("Unexpected end of file in comment (-)")})

724

_(u"Unexpected end of file in comment (-)")})

707

725

self.tokenQueue.append(self.currentToken)

708

726

self.state = self.states["data"]

709

727

else:

722

740

self.state = self.states["data"]

723

741

elif data == u"-":

724

742

self.tokenQueue.append({"type": "ParseError", "data":

725

_("Unexpected '-' after '--' found in comment.")})

743

_(u"Unexpected '-' after '--' found in comment.")})

726

744

self.currentToken["data"] += data

727

745

elif data == EOF:

728

746

self.tokenQueue.append({"type": "ParseError", "data":

729

_("Unexpected end of file in comment (--).")})

747

_(u"Unexpected end of file in comment (--).")})

730

748

self.tokenQueue.append(self.currentToken)

731

749

self.state = self.states["data"]

732

750

else:

733

751

# XXX

734

752

self.tokenQueue.append({"type": "ParseError", "data":

735

_("Unexpected character in comment found.")})

753

_(u"Unexpected character in comment found.")})

736

754

self.currentToken["data"] += u"--" + data

737

755

self.state = self.states["comment"]

738

756

return True

743

761

self.state = self.states["beforeDoctypeName"]

744

762

else:

745

763

self.tokenQueue.append({"type": "ParseError", "data":

746

_("No space after literal string 'DOCTYPE'.")})

764

_(u"No space after literal string 'DOCTYPE'.")})

747

765

self.stream.unget(data)

748

766

self.state = self.states["beforeDoctypeName"]

749

767

return True

754

772

pass

755

773

elif data == u">":

756

774

self.tokenQueue.append({"type": "ParseError", "data":

757

_("Unexpected > character. Expected DOCTYPE name.")})

775

_(u"Unexpected > character. Expected DOCTYPE name.")})

758

776

self.currentToken["correct"] = False

759

777

self.tokenQueue.append(self.currentToken)

760

778

self.state = self.states["data"]

761

779

elif data == EOF:

762

780

self.tokenQueue.append({"type": "ParseError", "data":

763

_("Unexpected end of file. Expected DOCTYPE name.")})

781

_(u"Unexpected end of file. Expected DOCTYPE name.")})

764

782

self.currentToken["correct"] = False

765

783

self.tokenQueue.append(self.currentToken)

766

784

self.state = self.states["data"]

778

796

self.state = self.states["data"]

779

797

elif data == EOF:

780

798

self.tokenQueue.append({"type": "ParseError", "data":

781

_("Unexpected end of file in DOCTYPE name.")})

799

_(u"Unexpected end of file in DOCTYPE name.")})

782

800

self.currentToken["correct"] = False

783

801

self.tokenQueue.append(self.currentToken)

784

802

self.state = self.states["data"]

797

815

self.currentToken["correct"] = False

798

816

self.stream.unget(data)

799

817

self.tokenQueue.append({"type": "ParseError", "data":

800

_("Unexpected end of file in DOCTYPE.")})

818

_(u"Unexpected end of file in DOCTYPE.")})

801

819

self.tokenQueue.append(self.currentToken)

802

820

self.state = self.states["data"]

803

821

else:

813

831

else:

814

832

self.stream.unget(charStack)

815

833

self.tokenQueue.append({"type": "ParseError", "data":

816

_("Expected space or '>'. Got '" + data + "'")})

834

_(u"Expected space or '>'. Got '%s'") % (data,)})

817

835

self.state = self.states["bogusDoctype"]

818

836

return True

819

837

822

840

if data in spaceCharacters:

823

841

pass

824

842

elif data == "\"":

825

self.currentToken["publicId"] = ""

843

self.currentToken["publicId"] = u""

826

844

self.state = self.states["doctypePublicIdentifierDoubleQuoted"]

827

845

elif data == "'":

828

self.currentToken["publicId"] = ""

846

self.currentToken["publicId"] = u""

829

847

self.state = self.states["doctypePublicIdentifierSingleQuoted"]

830

848

elif data == ">":

831

849

self.tokenQueue.append({"type": "ParseError", "data":

832

_("Unexpected end of DOCTYPE.")})

850

_(u"Unexpected end of DOCTYPE.")})

833

851

self.currentToken["correct"] = False

834

852

self.tokenQueue.append(self.currentToken)

835

853

self.state = self.states["data"]

836

854

elif data == EOF:

837

855

self.tokenQueue.append({"type": "ParseError", "data":

838

_("Unexpected end of file in DOCTYPE.")})

856

_(u"Unexpected end of file in DOCTYPE.")})

839

857

self.currentToken["correct"] = False

840

858

self.tokenQueue.append(self.currentToken)

841

859

self.state = self.states["data"]

842

860

else:

843

861

self.tokenQueue.append({"type": "ParseError", "data":

844

_("Unexpected character in DOCTYPE.")})

862

_(u"Unexpected character in DOCTYPE.")})

845

863

self.state = self.states["bogusDoctype"]

846

864

return True

847

865

851

869

self.state = self.states["afterDoctypePublicIdentifier"]

852

870

elif data == EOF:

853

871

self.tokenQueue.append({"type": "ParseError", "data":

854

_("Unexpected end of file in DOCTYPE.")})

872

_(u"Unexpected end of file in DOCTYPE.")})

855

873

self.currentToken["correct"] = False

856

874

self.tokenQueue.append(self.currentToken)

857

875

self.state = self.states["data"]

865

883

self.state = self.states["afterDoctypePublicIdentifier"]

866

884

elif data == EOF:

867

885

self.tokenQueue.append({"type": "ParseError", "data":

868

_("Unexpected end of file in DOCTYPE.")})

886

_(u"Unexpected end of file in DOCTYPE.")})

869

887

self.currentToken["correct"] = False

870

888

self.tokenQueue.append(self.currentToken)

871

889

self.state = self.states["data"]

878

896

if data in spaceCharacters:

879

897

pass

880

898

elif data == "\"":

881

self.currentToken["systemId"] = ""

899

self.currentToken["systemId"] = u""

882

900

self.state = self.states["doctypeSystemIdentifierDoubleQuoted"]

883

901

elif data == "'":

884

self.currentToken["systemId"] = ""

902

self.currentToken["systemId"] = u""

885

903

self.state = self.states["doctypeSystemIdentifierSingleQuoted"]

886

904

elif data == ">":

887

905

self.tokenQueue.append(self.currentToken)

888

906

self.state = self.states["data"]

889

907

elif data == EOF:

890

908

self.tokenQueue.append({"type": "ParseError", "data":

891

_("Unexpected end of file in DOCTYPE.")})

909

_(u"Unexpected end of file in DOCTYPE.")})

892

910

self.currentToken["correct"] = False

893

911

self.tokenQueue.append(self.currentToken)

894

912

self.state = self.states["data"]

895

913

else:

896

914

self.tokenQueue.append({"type": "ParseError", "data":

897

_("Unexpected character in DOCTYPE.")})

915

_(u"Unexpected character in DOCTYPE.")})

898

916

self.state = self.states["bogusDoctype"]

899

917

return True

900

918

903

921

if data in spaceCharacters:

904

922

pass

905

923

elif data == "\"":

906

self.currentToken["systemId"] = ""

924

self.currentToken["systemId"] = u""

907

925

self.state = self.states["doctypeSystemIdentifierDoubleQuoted"]

908

926

elif data == "'":

909

self.currentToken["systemId"] = ""

927

self.currentToken["systemId"] = u""

910

928

self.state = self.states["doctypeSystemIdentifierSingleQuoted"]

911

929

elif data == ">":

912

930

self.tokenQueue.append({"type": "ParseError", "data":

913

_("Unexpected character in DOCTYPE.")})

931

_(u"Unexpected character in DOCTYPE.")})

914

932

self.currentToken["correct"] = False

915

933

self.tokenQueue.append(self.currentToken)

916

934

self.state = self.states["data"]

917

935

elif data == EOF:

918

936

self.tokenQueue.append({"type": "ParseError", "data":

919

_("Unexpected end of file in DOCTYPE.")})

937

_(u"Unexpected end of file in DOCTYPE.")})

920

938

self.currentToken["correct"] = False

921

939

self.tokenQueue.append(self.currentToken)

922

940

self.state = self.states["data"]

923

941

else:

924

942

self.tokenQueue.append({"type": "ParseError", "data":

925

_("Unexpected character in DOCTYPE.")})

943

_(u"Unexpected character in DOCTYPE.")})

926

944

self.state = self.states["bogusDoctype"]

927

945

return True

928

946

932

950

self.state = self.states["afterDoctypeSystemIdentifier"]

933

951

elif data == EOF:

934

952

self.tokenQueue.append({"type": "ParseError", "data":

935

_("Unexpected end of file in DOCTYPE.")})

953

_(u"Unexpected end of file in DOCTYPE.")})

936

954

self.currentToken["correct"] = False

937

955

self.tokenQueue.append(self.currentToken)

938

956

self.state = self.states["data"]

946

964

self.state = self.states["afterDoctypeSystemIdentifier"]

947

965

elif data == EOF:

948

966

self.tokenQueue.append({"type": "ParseError", "data":

949

_("Unexpected end of file in DOCTYPE.")})

967

_(u"Unexpected end of file in DOCTYPE.")})

950

968

self.currentToken["correct"] = False

951

969

self.tokenQueue.append(self.currentToken)

952

970

self.state = self.states["data"]

963

981

self.state = self.states["data"]

964

982

elif data == EOF:

965

983

self.tokenQueue.append({"type": "ParseError", "data":

966

_("Unexpected end of file in DOCTYPE.")})

984

_(u"Unexpected end of file in DOCTYPE.")})

967

985

self.currentToken["correct"] = False

968

986

self.tokenQueue.append(self.currentToken)

969

987

self.state = self.states["data"]

970

988

else:

971

989

self.tokenQueue.append({"type": "ParseError", "data":

972

_("Unexpected character in DOCTYPE.")})

990

_(u"Unexpected character in DOCTYPE.")})

973

991

self.state = self.states["bogusDoctype"]

974

992

return True

975

993

983

1001

# XXX EMIT

984

1002

self.stream.unget(data)

985

1003

self.tokenQueue.append({"type": "ParseError", "data":

986

_("Unexpected end of file in bogus doctype.")})

1004

_(u"Unexpected end of file in bogus doctype.")})

987

1005

self.tokenQueue.append(self.currentToken)

988

1006

self.state = self.states["data"]

989

1007

else:

Older »