3
3
(C) 1997 Torben Weis (weis@kde.org)
4
4
(C) 1999,2001 Lars Knoll (knoll@kde.org)
5
5
(C) 2000,2001 Dirk Mueller (mueller@kde.org)
6
Copyright (C) 2004, 2005, 2006, 2007 Apple Inc. All rights reserved.
6
Copyright (C) 2004, 2005, 2006, 2007, 2008, 2009 Apple Inc. All rights reserved.
7
Copyright (C) 2009 Torch Mobile Inc. All rights reserved. (http://www.torchmobile.com/)
8
9
This library is free software; you can redistribute it and/or
9
10
modify it under the terms of the GNU Library General Public
45
47
#include "HTMLIsIndexElement.h"
46
48
#include "HTMLMapElement.h"
47
49
#include "HTMLNames.h"
50
#include "HTMLParserQuirks.h"
48
51
#include "HTMLTableCellElement.h"
49
52
#include "HTMLTableRowElement.h"
50
53
#include "HTMLTableSectionElement.h"
51
54
#include "HTMLTokenizer.h"
52
55
#include "LocalizedStrings.h"
53
57
#include "Settings.h"
55
59
#include <wtf/StdLibExtras.h>
57
61
namespace WebCore {
59
63
using namespace HTMLNames;
61
65
static const unsigned cMaxRedundantTagDepth = 20;
62
66
static const unsigned cResidualStyleMaxDepth = 200;
68
static const int minBlockLevelTagPriority = 3;
70
// A cap on the number of tags with priority minBlockLevelTagPriority or higher
71
// allowed in m_blockStack. The cap is enforced by adding such new elements as
72
// siblings instead of children once it is reached.
73
static const size_t cMaxBlockDepth = 4096;
64
75
struct HTMLStackElem : Noncopyable {
65
76
HTMLStackElem(const AtomicString& t, int lvl, Node* n, bool r, HTMLStackElem* nx)
115
126
HTMLParser::HTMLParser(HTMLDocument* doc, bool reportErrors)
118
, didRefCurrent(false)
129
, m_didRefCurrent(false)
120
132
, m_hasPElementInScope(NotInScope)
123
, haveFrameSet(false)
134
, m_haveContent(false)
135
, m_haveFrameSet(false)
124
136
, m_isParsingFragment(false)
125
137
, m_reportErrors(reportErrors)
126
138
, m_handlingResidualStyleAcrossBlocks(false)
127
, inStrayTableContent(0)
139
, m_inStrayTableContent(0)
140
, m_parserQuirks(m_document->page() ? m_document->page()->chrome()->client()->createHTMLParserQuirks() : 0)
131
144
HTMLParser::HTMLParser(DocumentFragment* frag)
132
: document(frag->document())
134
, didRefCurrent(true)
145
: m_document(frag->document())
147
, m_didRefCurrent(true)
136
150
, m_hasPElementInScope(NotInScope)
139
, haveFrameSet(false)
152
, m_haveContent(false)
153
, m_haveFrameSet(false)
140
154
, m_isParsingFragment(true)
141
155
, m_reportErrors(false)
142
156
, m_handlingResidualStyleAcrossBlocks(false)
143
, inStrayTableContent(0)
157
, m_inStrayTableContent(0)
158
, m_parserQuirks(m_document->page() ? m_document->page()->chrome()->client()->createHTMLParserQuirks() : 0)
149
164
HTMLParser::~HTMLParser()
156
171
void HTMLParser::reset()
158
173
ASSERT(!m_isParsingFragment);
160
setCurrent(document);
175
setCurrent(m_document);
165
haveFrameSet = false;
167
inStrayTableContent = 0;
180
m_haveFrameSet = false;
181
m_haveContent = false;
182
m_inStrayTableContent = 0;
169
184
m_currentFormElement = 0;
170
185
m_currentMapElement = 0;
172
187
m_isindexElement = 0;
174
189
m_skipModeTag = nullAtom;
192
m_parserQuirks->reset();
177
195
void HTMLParser::setCurrent(Node* newCurrent)
179
bool didRefNewCurrent = newCurrent && newCurrent != document;
197
bool didRefNewCurrent = newCurrent && newCurrent != m_document;
180
198
if (didRefNewCurrent)
181
199
newCurrent->ref();
184
current = newCurrent;
185
didRefCurrent = didRefNewCurrent;
202
m_current = newCurrent;
203
m_didRefCurrent = didRefNewCurrent;
188
206
PassRefPtr<Node> HTMLParser::parseToken(Token* t)
191
209
if (!t->beginTag && t->tagName == m_skipModeTag)
192
210
// Found the end tag for the current skip mode, so we're done skipping.
193
211
m_skipModeTag = nullAtom;
194
else if (current->localName() == t->tagName)
212
else if (m_current->localName() == t->tagName)
195
213
// Do not skip </iframe>.
196
214
// FIXME: What does that comment mean? How can it be right to parse a token without clearing m_skipModeTag?
202
220
// Apparently some sites use </br> instead of <br>. Be compatible with IE and Firefox and treat this like <br>.
203
if (t->isCloseTag(brTag) && document->inCompatMode()) {
221
if (t->isCloseTag(brTag) && m_document->inCompatMode()) {
204
222
reportError(MalformedBRError);
205
223
t->beginTag = true;
213
231
// Ignore spaces, if we're not inside a paragraph or other inline code.
214
232
// Do not alter the text if it is part of a scriptTag.
215
if (t->tagName == textAtom && t->text && current->localName() != scriptTag) {
216
if (inBody && !skipMode() && current->localName() != styleTag &&
217
current->localName() != titleTag && !t->text->containsOnlyWhitespace())
233
if (t->tagName == textAtom && t->text && m_current->localName() != scriptTag) {
234
if (m_inBody && !skipMode() && m_current->localName() != styleTag &&
235
m_current->localName() != titleTag && !t->text->containsOnlyWhitespace())
236
m_haveContent = true;
221
239
String text = t->text.get();
222
240
unsigned charsLeft = text.length();
223
241
while (charsLeft) {
224
242
// split large blocks of text to nodes of manageable size
225
n = Text::createWithLengthLimit(document, text, charsLeft);
243
n = Text::createWithLengthLimit(m_document, text, charsLeft);
226
244
if (!insertNode(n.get(), t->selfClosingTag))
278
296
void HTMLParser::parseDoctypeToken(DoctypeToken* t)
280
298
// Ignore any doctype after the first. Ignore doctypes in fragments.
281
if (document->doctype() || m_isParsingFragment || current != document)
299
if (m_document->doctype() || m_isParsingFragment || m_current != m_document)
284
302
// Make a new doctype node and set it as our doctype.
285
document->addChild(DocumentType::create(document, String::adopt(t->m_name), String::adopt(t->m_publicID), String::adopt(t->m_systemID)));
303
m_document->addChild(DocumentType::create(m_document, String::adopt(t->m_name), String::adopt(t->m_publicID), String::adopt(t->m_systemID)));
288
static bool isTableSection(Node* n)
306
static bool isTableSection(const Node* n)
290
308
return n->hasTagName(tbodyTag) || n->hasTagName(tfootTag) || n->hasTagName(theadTag);
293
static bool isTablePart(Node* n)
311
static bool isTablePart(const Node* n)
295
313
return n->hasTagName(trTag) || n->hasTagName(tdTag) || n->hasTagName(thTag) ||
296
314
isTableSection(n);
299
static bool isTableRelated(Node* n)
317
static bool isTableRelated(const Node* n)
301
319
return n->hasTagName(tableTag) || isTablePart(n);
316
334
// <table> is never allowed inside stray table content. Always pop out of the stray table content
317
335
// and close up the first table, and then start the second table as a sibling.
318
if (inStrayTableContent && localName == tableTag)
336
if (m_inStrayTableContent && localName == tableTag)
319
337
popBlock(tableTag);
339
if (tagPriority >= minBlockLevelTagPriority) {
340
while (m_blocksInStack >= cMaxBlockDepth)
341
popBlock(m_blockStack->tagName);
344
if (m_parserQuirks && !m_parserQuirks->shouldInsertNode(m_current, n))
321
347
// let's be stupid and just try to insert it.
322
348
// this should work if the document is well-formed
323
Node* newNode = current->addChild(n);
349
Node* newNode = m_current->addChild(n);
325
351
return handleError(n, flat, localName, tagPriority); // Try to handle the error.
327
353
// don't push elements without end tags (e.g., <img>) on the stack
328
bool parentAttached = current->attached();
354
bool parentAttached = m_current->attached();
329
355
if (tagPriority > 0 && !flat) {
330
if (newNode == current) {
356
if (newNode == m_current) {
331
357
// This case should only be hit when a demoted <form> is placed inside a table.
332
358
ASSERT(localName == formTag);
333
reportError(FormInsideTablePartError, ¤t->localName());
359
reportError(FormInsideTablePartError, &m_current->localName());
360
HTMLFormElement* form = static_cast<HTMLFormElement*>(n);
361
form->setDemoted(true);
335
363
// The pushBlock function transfers ownership of current to the block stack
336
// so we're guaranteed that didRefCurrent is false. The code below is an
364
// so we're guaranteed that m_didRefCurrent is false. The code below is an
337
365
// optimized version of setCurrent that takes advantage of that fact and also
338
366
// assumes that newNode is neither 0 nor a pointer to the document.
339
367
pushBlock(localName, tagPriority);
340
368
newNode->beginParsingChildren();
341
ASSERT(!didRefCurrent);
369
ASSERT(!m_didRefCurrent);
344
didRefCurrent = true;
372
m_didRefCurrent = true;
346
374
if (parentAttached && !n->attached() && !m_isParsingFragment)
364
395
if (n->isHTMLElement()) {
365
396
HTMLElement* h = static_cast<HTMLElement*>(n);
366
397
if (h->hasLocalName(trTag) || h->hasLocalName(thTag) || h->hasLocalName(tdTag)) {
367
if (inStrayTableContent && !isTableRelated(current)) {
368
reportError(MisplacedTablePartError, &localName, ¤t->localName());
398
if (m_inStrayTableContent && !isTableRelated(m_current)) {
399
reportError(MisplacedTablePartError, &localName, &m_current->localName());
369
400
// pop out to the nearest enclosing table-related tag.
370
while (blockStack && !isTableRelated(current))
401
while (m_blockStack && !isTableRelated(m_current))
372
403
return insertNode(n);
374
405
} else if (h->hasLocalName(headTag)) {
375
if (!current->isDocumentNode() && !current->hasTagName(htmlTag)) {
406
if (!m_current->isDocumentNode() && !m_current->hasTagName(htmlTag)) {
376
407
reportError(MisplacedHeadError);
379
410
} else if (h->hasLocalName(metaTag) || h->hasLocalName(linkTag) || h->hasLocalName(baseTag)) {
380
411
bool createdHead = false;
383
414
createdHead = true;
386
417
if (!createdHead)
387
reportError(MisplacedHeadContentError, &localName, ¤t->localName());
388
if (head->addChild(n)) {
418
reportError(MisplacedHeadContentError, &localName, &m_current->localName());
419
if (m_head->addChild(n)) {
389
420
if (!n->attached() && !m_isParsingFragment)
395
426
} else if (h->hasLocalName(htmlTag)) {
396
if (!current->isDocumentNode() ) {
397
if (document->documentElement() && document->documentElement()->hasTagName(htmlTag)) {
427
if (!m_current->isDocumentNode() ) {
428
if (m_document->documentElement() && m_document->documentElement()->hasTagName(htmlTag)) {
398
429
reportError(RedundantHTMLBodyError, &localName);
399
430
// we have another <HTML> element.... apply attributes to existing one
400
431
// make sure we don't overwrite already existing attributes
401
NamedAttrMap* map = static_cast<Element*>(n)->attributes(true);
402
Element* existingHTML = static_cast<Element*>(document->documentElement());
403
NamedAttrMap* bmap = existingHTML->attributes(false);
432
NamedNodeMap* map = static_cast<Element*>(n)->attributes(true);
433
Element* existingHTML = static_cast<Element*>(m_document->documentElement());
434
NamedNodeMap* bmap = existingHTML->attributes(false);
404
435
for (unsigned l = 0; map && l < map->length(); ++l) {
405
436
Attribute* it = map->attributeItem(l);
406
437
if (!bmap->getAttributeItem(it->name()))
412
} else if (h->hasLocalName(titleTag) || h->hasLocalName(styleTag)) {
443
} else if (h->hasLocalName(titleTag) || h->hasLocalName(styleTag) || h->hasLocalName(scriptTag)) {
413
444
bool createdHead = false;
416
447
createdHead = true;
419
Node* newNode = head->addChild(n);
450
Node* newNode = m_head->addChild(n);
421
452
setSkipMode(h->tagQName());
425
456
if (!createdHead)
426
reportError(MisplacedHeadContentError, &localName, ¤t->localName());
457
reportError(MisplacedHeadContentError, &localName, &m_current->localName());
428
459
pushBlock(localName, tagPriority);
429
460
newNode->beginParsingChildren();
436
467
setSkipMode(h->tagQName());
439
470
} else if (h->hasLocalName(bodyTag)) {
440
if (inBody && document->body()) {
471
if (m_inBody && m_document->body()) {
441
472
// we have another <BODY> element.... apply attributes to existing one
442
473
// make sure we don't overwrite already existing attributes
443
474
// some sites use <body bgcolor=rightcolor>...<body bgcolor=wrongcolor>
444
475
reportError(RedundantHTMLBodyError, &localName);
445
NamedAttrMap* map = static_cast<Element*>(n)->attributes(true);
446
Element* existingBody = document->body();
447
NamedAttrMap* bmap = existingBody->attributes(false);
476
NamedNodeMap* map = static_cast<Element*>(n)->attributes(true);
477
Element* existingBody = m_document->body();
478
NamedNodeMap* bmap = existingBody->attributes(false);
448
479
for (unsigned l = 0; map && l < map->length(); ++l) {
449
480
Attribute* it = map->attributeItem(l);
450
481
if (!bmap->getAttributeItem(it->name()))
455
else if (!current->isDocumentNode())
486
else if (!m_current->isDocumentNode())
457
488
} else if (h->hasLocalName(areaTag)) {
458
489
if (m_currentMapElement) {
459
reportError(MisplacedAreaError, ¤t->localName());
490
reportError(MisplacedAreaError, &m_current->localName());
460
491
m_currentMapElement->addChild(n);
461
492
if (!n->attached() && !m_isParsingFragment)
467
498
} else if (h->hasLocalName(colgroupTag) || h->hasLocalName(captionTag)) {
468
if (isTableRelated(current)) {
469
while (blockStack && isTablePart(current))
499
if (isTableRelated(m_current)) {
500
while (m_blockStack && isTablePart(m_current))
471
502
return insertNode(n);
474
} else if (n->isCommentNode() && !head)
505
} else if (n->isCommentNode() && !m_head)
477
508
// 2. Next we examine our currently active element to do some further error handling.
478
if (current->isHTMLElement()) {
479
HTMLElement* h = static_cast<HTMLElement*>(current);
509
if (m_current->isHTMLElement()) {
510
HTMLElement* h = static_cast<HTMLElement*>(m_current);
480
511
const AtomicString& currentTagName = h->localName();
481
512
if (h->hasLocalName(htmlTag)) {
482
513
HTMLElement* elt = n->isHTMLElement() ? static_cast<HTMLElement*>(n) : 0;
485
516
elt->hasLocalName(objectTag) || elt->hasLocalName(embedTag) ||
486
517
elt->hasLocalName(titleTag) || elt->hasLocalName(isindexTag) ||
487
518
elt->hasLocalName(baseTag))) {
489
head = new HTMLHeadElement(headTag, document);
490
insertNode(head.get());
520
m_head = new HTMLHeadElement(headTag, m_document);
521
insertNode(m_head.get());
496
527
if (t->containsOnlyWhitespace())
500
e = new HTMLBodyElement(bodyTag, document);
530
if (!m_haveFrameSet) {
531
// Ensure that head exists.
532
// But not for older versions of Mail, where the implicit <head> isn't expected - <rdar://problem/6863795>
533
if (shouldCreateImplicitHead(m_document))
537
e = new HTMLBodyElement(bodyTag, m_document);
511
548
// This means the body starts here...
549
if (!m_haveFrameSet) {
550
ASSERT(currentTagName == headTag);
513
551
popBlock(currentTagName);
514
e = new HTMLBodyElement(bodyTag, document);
552
e = new HTMLBodyElement(bodyTag, m_document);
571
609
pushBlock(localName, tagPriority);
572
610
n->beginParsingChildren();
574
inStrayTableContent++;
575
blockStack->strayTableContent = true;
612
m_inStrayTableContent++;
613
m_blockStack->strayTableContent = true;
582
if (current->hasTagName(trTag)) {
620
if (m_current->hasTagName(trTag)) {
583
621
reportError(TablePartRequiredError, &localName, &tdTag.localName());
584
e = new HTMLTableCellElement(tdTag, document);
585
} else if (current->hasTagName(tableTag)) {
622
e = new HTMLTableCellElement(tdTag, m_document);
623
} else if (m_current->hasTagName(tableTag)) {
586
624
// Don't report an error in this case, since making a <tbody> happens all the time when you have <table><tr>,
587
625
// and it isn't really a parse error per se.
588
e = new HTMLTableSectionElement(tbodyTag, document);
626
e = new HTMLTableSectionElement(tbodyTag, m_document);
590
628
reportError(TablePartRequiredError, &localName, &trTag.localName());
591
e = new HTMLTableRowElement(trTag, document);
629
e = new HTMLTableRowElement(trTag, m_document);
622
660
popBlock(currentTagName);
624
662
} else if (!h->hasLocalName(bodyTag)) {
625
if (isInline(current)) {
663
if (isInline(m_current)) {
626
664
popInlineBlocks();
630
} else if (current->isDocumentNode()) {
668
} else if (m_current->isDocumentNode()) {
631
669
if (n->isTextNode()) {
632
670
Text* t = static_cast<Text*>(n);
633
671
if (t->containsOnlyWhitespace())
637
if (!document->documentElement()) {
638
e = new HTMLHtmlElement(htmlTag, document);
675
if (!m_document->documentElement()) {
676
e = new HTMLHtmlElement(htmlTag, m_document);
644
682
// 3. If we couldn't handle the error, just return false and attempt to error-correct again.
646
reportError(IgnoredContentError, &localName, ¤t->localName());
684
reportError(IgnoredContentError, &localName, &m_current->localName());
649
687
return insertNode(n);
655
693
bool HTMLParser::textCreateErrorCheck(Token* t, RefPtr<Node>& result)
657
result = new Text(document, t->text.get());
695
result = Text::create(m_document, t->text.get());
661
699
bool HTMLParser::commentCreateErrorCheck(Token* t, RefPtr<Node>& result)
663
result = new Comment(document, t->text.get());
701
result = Comment::create(m_document, t->text.get());
667
705
bool HTMLParser::headCreateErrorCheck(Token*, RefPtr<Node>& result)
669
if (!head || current->localName() == htmlTag) {
670
head = new HTMLHeadElement(headTag, document);
707
if (!m_head || m_current->localName() == htmlTag) {
708
m_head = new HTMLHeadElement(headTag, m_document);
673
711
reportError(MisplacedHeadError);
677
715
bool HTMLParser::bodyCreateErrorCheck(Token*, RefPtr<Node>&)
679
717
// body no longer allowed if we have a frameset
721
// Ensure that head exists (unless parsing a fragment).
722
// But not for older versions of Mail, where the implicit <head> isn't expected - <rdar://problem/6863795>
723
if (!m_isParsingFragment && shouldCreateImplicitHead(m_document))
682
726
popBlock(headTag);
687
731
bool HTMLParser::framesetCreateErrorCheck(Token*, RefPtr<Node>&)
689
733
popBlock(headTag);
690
if (inBody && !haveFrameSet && !haveContent) {
734
if (m_inBody && !m_haveFrameSet && !m_haveContent) {
691
735
popBlock(bodyTag);
692
736
// ### actually for IE document.body returns the now hidden "body" element
693
737
// we can't implement that behaviour now because it could cause too many
694
738
// regressions and the headaches are not worth the work as long as there is
695
739
// no site actually relying on that detail (Dirk)
696
if (document->body())
697
document->body()->setAttribute(styleAttr, "display:none");
740
if (m_document->body())
741
m_document->body()->setAttribute(styleAttr, "display:none");
700
if ((haveContent || haveFrameSet) && current->localName() == htmlTag)
744
if ((m_haveContent || m_haveFrameSet) && m_current->localName() == htmlTag)
746
m_haveFrameSet = true;
709
753
// Only create a new form if we're not already inside one.
710
754
// This is consistent with other browsers' behavior.
711
755
if (!m_currentFormElement) {
712
m_currentFormElement = new HTMLFormElement(formTag, document);
756
m_currentFormElement = new HTMLFormElement(formTag, m_document);
713
757
result = m_currentFormElement;
714
758
pCloserCreateErrorCheck(t, result);
719
763
bool HTMLParser::isindexCreateErrorCheck(Token* t, RefPtr<Node>& result)
721
765
RefPtr<Node> n = handleIsindex(t);
723
767
m_isindexElement = n.release();
725
769
t->selfClosingTag = true;
796
bool HTMLParser::rpCreateErrorCheck(Token*, RefPtr<Node>&)
803
bool HTMLParser::rtCreateErrorCheck(Token*, RefPtr<Node>&)
752
810
bool HTMLParser::nestedCreateErrorCheck(Token* t, RefPtr<Node>&)
754
812
popBlock(t->tagName);
797
855
bool HTMLParser::noscriptCreateErrorCheck(Token*, RefPtr<Node>&)
799
857
if (!m_isParsingFragment) {
800
Settings* settings = document->settings();
858
Settings* settings = m_document->settings();
801
859
if (settings && settings->isJavaScriptEnabled())
802
860
setSkipMode(noscriptTag);
814
872
bool HTMLParser::pCloserStrictCreateErrorCheck(Token*, RefPtr<Node>&)
816
if (document->inCompatMode())
874
if (m_document->inCompatMode())
818
876
if (hasPElementInScope())
823
881
bool HTMLParser::mapCreateErrorCheck(Token*, RefPtr<Node>& result)
825
m_currentMapElement = new HTMLMapElement(mapTag, document);
883
m_currentMapElement = new HTMLMapElement(mapTag, m_document);
826
884
result = m_currentMapElement;
863
921
gFunctionMap.set(listingTag.localName().impl(), &HTMLParser::pCloserCreateErrorCheck);
864
922
gFunctionMap.set(mapTag.localName().impl(), &HTMLParser::mapCreateErrorCheck);
865
923
gFunctionMap.set(menuTag.localName().impl(), &HTMLParser::pCloserCreateErrorCheck);
924
gFunctionMap.set(navTag.localName().impl(), &HTMLParser::pCloserCreateErrorCheck);
866
925
gFunctionMap.set(nobrTag.localName().impl(), &HTMLParser::nestedCreateErrorCheck);
867
926
gFunctionMap.set(noembedTag.localName().impl(), &HTMLParser::noembedCreateErrorCheck);
868
927
gFunctionMap.set(noframesTag.localName().impl(), &HTMLParser::noframesCreateErrorCheck);
869
929
gFunctionMap.set(noscriptTag.localName().impl(), &HTMLParser::noscriptCreateErrorCheck);
870
931
gFunctionMap.set(olTag.localName().impl(), &HTMLParser::pCloserCreateErrorCheck);
871
932
gFunctionMap.set(pTag.localName().impl(), &HTMLParser::pCloserCreateErrorCheck);
872
933
gFunctionMap.set(plaintextTag.localName().impl(), &HTMLParser::pCloserCreateErrorCheck);
873
934
gFunctionMap.set(preTag.localName().impl(), &HTMLParser::pCloserCreateErrorCheck);
935
gFunctionMap.set(rpTag.localName().impl(), &HTMLParser::rpCreateErrorCheck);
936
gFunctionMap.set(rtTag.localName().impl(), &HTMLParser::rtCreateErrorCheck);
874
937
gFunctionMap.set(sTag.localName().impl(), &HTMLParser::nestedStyleCreateErrorCheck);
875
938
gFunctionMap.set(selectTag.localName().impl(), &HTMLParser::selectCreateErrorCheck);
876
939
gFunctionMap.set(smallTag.localName().impl(), &HTMLParser::nestedStyleCreateErrorCheck);
893
956
if (CreateErrorCheckFunc errorCheckFunc = gFunctionMap.get(t->tagName.impl()))
894
957
proceed = (this->*errorCheckFunc)(t, result);
896
result = HTMLElementFactory::createHTMLElement(QualifiedName(nullAtom, t->tagName, xhtmlNamespaceURI), document, m_currentFormElement.get());
959
result = HTMLElementFactory::createHTMLElement(QualifiedName(nullAtom, t->tagName, xhtmlNamespaceURI), m_document, m_currentFormElement.get());
897
960
return result.release();
903
966
// about 1500 tags, all from a bunch of <b>s. We will only allow at most 20
904
967
// nested tags of the same type before just ignoring them all together.
906
for (HTMLStackElem* curr = blockStack;
969
for (HTMLStackElem* curr = m_blockStack;
907
970
i < cMaxRedundantTagDepth && curr && curr->tagName == tagName;
908
971
curr = curr->next, i++) { }
909
972
return i != cMaxRedundantTagDepth;
926
989
else if (t->tagName == pTag)
927
990
checkForCloseTagErrors = false;
929
HTMLStackElem* oldElem = blockStack;
992
HTMLStackElem* oldElem = m_blockStack;
930
993
popBlock(t->tagName, checkForCloseTagErrors);
931
if (oldElem == blockStack && t->tagName == pTag) {
994
if (oldElem == m_blockStack && t->tagName == pTag) {
932
995
// We encountered a stray </p>. Amazingly Gecko, WinIE, and MacIE all treat
933
996
// this as a valid break, i.e., <p></p>. So go ahead and make the empty
972
1035
e->hasLocalName(noframesTag) || e->hasLocalName(nolayerTag) ||
973
1036
e->hasLocalName(noembedTag))
1038
#if !ENABLE(XHTMLMP)
975
1039
if (e->hasLocalName(noscriptTag) && !m_isParsingFragment) {
976
Settings* settings = document->settings();
1040
Settings* settings = m_document->settings();
977
1041
if (settings && settings->isJavaScriptEnabled())
1028
1093
unaffectedTags.add(optgroupTag.localName().impl());
1029
1094
unaffectedTags.add(selectTag.localName().impl());
1030
1095
unaffectedTags.add(objectTag.localName().impl());
1096
unaffectedTags.add(datagridTag.localName().impl());
1097
unaffectedTags.add(datalistTag.localName().impl());
1033
1100
return !unaffectedTags.contains(tagName.impl());
1044
1111
// Find the outermost element that crosses over to a higher level. If there exists another higher-level
1045
1112
// element, we will do another pass, until we have corrected the innermost one.
1046
1113
ExceptionCode ec = 0;
1047
HTMLStackElem* curr = blockStack;
1114
HTMLStackElem* curr = m_blockStack;
1048
1115
HTMLStackElem* prev = 0;
1049
1116
HTMLStackElem* prevMaxElem = 0;
1070
1137
Node* residualElem = prev->node;
1071
Node* blockElem = prevMaxElem ? prevMaxElem->node : current;
1138
Node* blockElem = prevMaxElem ? prevMaxElem->node : m_current;
1072
1139
Node* parentElem = elem->node;
1074
1141
// Check to see if the reparenting that is going to occur is allowed according to the DOM.
1217
1284
// <table><b><i><form></b></form></i></table>
1218
1285
// Then this check will be too simplistic. Right now the <i><form> chain will end up inside the <tbody>, which is pretty crazy.
1219
1286
if (strayTableContent)
1220
inStrayTableContent--;
1287
m_inStrayTableContent--;
1222
1289
// Step 7: Reopen intermediate inlines, e.g., <b><p><i>Foo</b>Goo</p>.
1223
1290
// In the above example, Goo should stay italic.
1224
1291
// We cap the number of tags we're willing to reopen based off cResidualStyleMaxDepth.
1226
HTMLStackElem* curr = blockStack;
1293
HTMLStackElem* curr = m_blockStack;
1227
1294
HTMLStackElem* residualStyleStack = 0;
1228
1295
unsigned stackDepth = 1;
1229
1296
unsigned redundantStyleCount = 0;
1273
1340
if (malformedTableParent)
1274
1341
malformedTableParent->insertBefore(newNode, malformedTableParent->lastChild(), ec);
1276
current->appendChild(newNode, ec);
1343
m_current->appendChild(newNode, ec);
1277
1344
// FIXME: Is it really OK to ignore the exceptions here?
1279
1346
// Now push a new stack element for this node we just created.
1283
1350
// Set our strayTableContent boolean if needed, so that the reopened tag also knows
1284
1351
// that it is inside a malformed table.
1285
blockStack->strayTableContent = malformedTableParent != 0;
1286
if (blockStack->strayTableContent)
1287
inStrayTableContent++;
1352
m_blockStack->strayTableContent = malformedTableParent != 0;
1353
if (m_blockStack->strayTableContent)
1354
m_inStrayTableContent++;
1289
1356
// Clear our malformed table parent variable.
1290
1357
malformedTableParent = 0;
1303
1370
void HTMLParser::pushBlock(const AtomicString& tagName, int level)
1305
blockStack = new HTMLStackElem(tagName, level, current, didRefCurrent, blockStack);
1306
didRefCurrent = false;
1372
m_blockStack = new HTMLStackElem(tagName, level, m_current, m_didRefCurrent, m_blockStack);
1373
if (level >= minBlockLevelTagPriority)
1375
m_didRefCurrent = false;
1307
1376
if (tagName == pTag)
1308
1377
m_hasPElementInScope = InScope;
1309
1378
else if (isScopingTag(tagName))
1313
1382
void HTMLParser::popBlock(const AtomicString& tagName, bool reportErrors)
1315
HTMLStackElem* elem = blockStack;
1384
HTMLStackElem* elem = m_blockStack;
1386
if (m_parserQuirks && elem && !m_parserQuirks->shouldPopBlock(elem->tagName, tagName))
1317
1389
int maxLevel = 0;
1319
1391
while (elem && (elem->tagName != tagName)) {
1340
1412
HTMLStackElem* residualStyleStack = 0;
1341
1413
Node* malformedTableParent = 0;
1415
elem = m_blockStack;
1344
1416
unsigned stackDepth = 1;
1345
1417
unsigned redundantStyleCount = 0;
1347
1419
if (elem->tagName == tagName) {
1348
int strayTable = inStrayTableContent;
1420
int strayTable = m_inStrayTableContent;
1353
1425
// explicit <tbody> or <tr>.
1354
1426
// If we end up needing to reopen residual style tags, the root of the reopened chain
1355
1427
// must also know that it is the root of malformed content inside a <tbody>/<tr>.
1356
if (strayTable && (inStrayTableContent < strayTable) && residualStyleStack) {
1357
Node* curr = current;
1428
if (strayTable && (m_inStrayTableContent < strayTable) && residualStyleStack) {
1429
Node* curr = m_current;
1358
1430
while (curr && !curr->hasTagName(tableTag))
1359
1431
curr = curr->parentNode();
1360
1432
malformedTableParent = curr ? curr->parentNode() : 0;
1399
1471
inline HTMLStackElem* HTMLParser::popOneBlockCommon()
1401
HTMLStackElem* elem = blockStack;
1473
HTMLStackElem* elem = m_blockStack;
1403
1475
// Form elements restore their state during the parsing process.
1404
1476
// Also, a few elements (<applet>, <object>) need to know when all child elements (<param>s) are available.
1405
if (current && elem->node != current)
1406
current->finishParsingChildren();
1477
if (m_current && elem->node != m_current)
1478
m_current->finishParsingChildren();
1408
blockStack = elem->next;
1409
current = elem->node;
1410
didRefCurrent = elem->didRefNode;
1480
if (m_blockStack->level >= minBlockLevelTagPriority) {
1481
ASSERT(m_blocksInStack > 0);
1484
m_blockStack = elem->next;
1485
m_current = elem->node;
1486
m_didRefCurrent = elem->didRefNode;
1412
1488
if (elem->strayTableContent)
1413
inStrayTableContent--;
1489
m_inStrayTableContent--;
1415
1491
if (elem->tagName == pTag)
1416
1492
m_hasPElementInScope = NotInScope;
1423
1499
void HTMLParser::popOneBlock()
1425
1501
// Store the current node before popOneBlockCommon overwrites it.
1426
Node* lastCurrent = current;
1427
bool didRefLastCurrent = didRefCurrent;
1502
Node* lastCurrent = m_current;
1503
bool didRefLastCurrent = m_didRefCurrent;
1429
1505
delete popOneBlockCommon();
1438
1514
// See the two callers for details.
1440
1516
// Store the current node before popOneBlockCommon overwrites it.
1441
Node* lastCurrent = current;
1442
bool didRefLastCurrent = didRefCurrent;
1517
Node* lastCurrent = m_current;
1518
bool didRefLastCurrent = m_didRefCurrent;
1444
1520
// Pop the block, but don't deref the current node as popOneBlock does because
1445
1521
// we'll be using the pointer in the new stack element.
1448
1524
// Transfer the current node into the stack element.
1449
1525
// No need to deref the old elem->node because popOneBlockCommon transferred
1450
// it into the current/didRefCurrent fields.
1526
// it into the m_current/m_didRefCurrent fields.
1451
1527
elem->node = lastCurrent;
1452
1528
elem->didRefNode = didRefLastCurrent;
1453
1529
elem->next = head;
1457
1533
void HTMLParser::checkIfHasPElementInScope()
1459
1535
m_hasPElementInScope = NotInScope;
1460
HTMLStackElem* elem = blockStack;
1536
HTMLStackElem* elem = m_blockStack;
1462
1538
const AtomicString& tagName = elem->tagName;
1463
1539
if (tagName == pTag) {
1472
1548
void HTMLParser::popInlineBlocks()
1474
while (blockStack && isInline(current))
1550
while (m_blockStack && isInline(m_current))
1478
1554
void HTMLParser::freeBlock()
1556
while (m_blockStack)
1558
ASSERT(!m_blocksInStack);
1484
1561
void HTMLParser::createHead()
1486
if (head || !document->documentElement())
1489
head = new HTMLHeadElement(headTag, document);
1490
HTMLElement* body = document->body();
1566
if (!m_document->documentElement()) {
1567
insertNode(new HTMLHtmlElement(htmlTag, m_document));
1568
ASSERT(m_document->documentElement());
1571
m_head = new HTMLHeadElement(headTag, m_document);
1572
HTMLElement* body = m_document->body();
1491
1573
ExceptionCode ec = 0;
1492
document->documentElement()->insertBefore(head.get(), body, ec);
1574
m_document->documentElement()->insertBefore(m_head.get(), body, ec);
1496
1578
// If the body does not exist yet, then the <head> should be pushed as the current block.
1497
if (head && !body) {
1498
pushBlock(head->localName(), head->tagPriority());
1499
setCurrent(head.get());
1579
if (m_head && !body) {
1580
pushBlock(m_head->localName(), m_head->tagPriority());
1581
setCurrent(m_head.get());
1503
1585
PassRefPtr<Node> HTMLParser::handleIsindex(Token* t)
1505
RefPtr<Node> n = new HTMLDivElement(divTag, document);
1587
RefPtr<Node> n = new HTMLDivElement(divTag, m_document);
1507
1589
NamedMappedAttrMap* attrs = t->attrs.get();
1509
RefPtr<HTMLIsIndexElement> isIndex = new HTMLIsIndexElement(isindexTag, document, m_currentFormElement.get());
1591
RefPtr<HTMLIsIndexElement> isIndex = new HTMLIsIndexElement(isindexTag, m_document, m_currentFormElement.get());
1510
1592
isIndex->setAttributeMap(attrs);
1511
1593
isIndex->setAttribute(typeAttr, "khtml_isindex");
1520
n->addChild(new HTMLHRElement(hrTag, document));
1521
n->addChild(new Text(document, text));
1602
n->addChild(new HTMLHRElement(hrTag, m_document));
1603
n->addChild(Text::create(m_document, text));
1522
1604
n->addChild(isIndex.release());
1523
n->addChild(new HTMLHRElement(hrTag, document));
1605
n->addChild(new HTMLHRElement(hrTag, m_document));
1525
1607
return n.release();
1528
1610
void HTMLParser::startBody()
1535
1617
if (m_isindexElement) {
1536
1618
insertNode(m_isindexElement.get(), true /* don't descend into this node */);
1541
1623
void HTMLParser::finished()
1543
1625
// In the case of a completely empty document, here's the place to create the HTML element.
1544
if (current && current->isDocumentNode() && !document->documentElement())
1545
insertNode(new HTMLHtmlElement(htmlTag, document));
1626
if (m_current && m_current->isDocumentNode() && !m_document->documentElement())
1627
insertNode(new HTMLHtmlElement(htmlTag, m_document));
1547
1629
// This ensures that "current" is not left pointing to a node when the document is destroyed.
1551
1633
// Warning, this may delete the tokenizer and parser, so don't try to do anything else after this.
1552
1634
if (!m_isParsingFragment)
1553
document->finishedParsing();
1635
m_document->finishedParsing();
1556
1638
void HTMLParser::reportErrorToConsole(HTMLParserErrorCode errorCode, const AtomicString* tagName1, const AtomicString* tagName2, bool closeTags)
1558
Frame* frame = document->frame();
1640
Frame* frame = m_document->frame();
1562
HTMLTokenizer* htmlTokenizer = static_cast<HTMLTokenizer*>(document->tokenizer());
1644
HTMLTokenizer* htmlTokenizer = static_cast<HTMLTokenizer*>(m_document->tokenizer());
1563
1645
int lineNumber = htmlTokenizer->lineNumber() + 1;
1565
1647
AtomicString tag1;
1592
1674
message.replace("%tag1", tag1);
1593
1675
message.replace("%tag2", tag2);
1595
frame->domWindow()->console()->addMessage(HTMLMessageSource,
1677
frame->domWindow()->console()->addMessage(HTMLMessageSource, LogMessageType,
1596
1678
isWarning(errorCode) ? WarningMessageLevel : ErrorMessageLevel,
1597
message, lineNumber, document->url().string());
1679
message, lineNumber, m_document->url().string());
1682
#ifdef BUILDING_ON_LEOPARD
1683
bool shouldCreateImplicitHead(Document* document)
1687
Settings* settings = document->page() ? document->page()->settings() : 0;
1688
return settings ? !settings->needsLeopardMailQuirks() : true;
1690
#elif defined(BUILDING_ON_TIGER)
1691
bool shouldCreateImplicitHead(Document* document)
1695
Settings* settings = document->page() ? document->page()->settings() : 0;
1696
return settings ? !settings->needsTigerMailQuirks() : true;