1
/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2
/* vim: set ts=2 sw=2 et tw=80: */
3
/* ***** BEGIN LICENSE BLOCK *****
4
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
6
* The contents of this file are subject to the Mozilla Public License Version
7
* 1.1 (the "License"); you may not use this file except in compliance with
8
* the License. You may obtain a copy of the License at
9
* http://www.mozilla.org/MPL/
11
* Software distributed under the License is distributed on an "AS IS" basis,
12
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
13
* for the specific language governing rights and limitations under the
16
* The Original Code is mozilla.org code.
18
* The Initial Developer of the Original Code is
19
* Netscape Communications Corporation.
20
* Portions created by the Initial Developer are Copyright (C) 1998
21
* the Initial Developer. All Rights Reserved.
24
* Ryan Jones <sciguyryan@gmail.com>
26
* Alternatively, the contents of this file may be used under the terms of
27
* either of the GNU General Public License Version 2 or later (the "GPL"),
28
* or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
29
* in which case the provisions of the GPL or the LGPL are applicable instead
30
* of those above. If you wish to allow use of your version of this file only
31
* under the terms of either the GPL or the LGPL, and not to allow others to
32
* use your version of this file under the terms of the MPL, indicate your
33
* decision by deleting the provisions above and replace them with the notice
34
* and other provisions required by the GPL or the LGPL. If you do not delete
35
* the provisions above, a recipient may use your version of this file under
36
* the terms of any one of the MPL, the GPL or the LGPL.
38
* ***** END LICENSE BLOCK ***** */
41
* nsIContentSerializer implementation that can be used with an
42
* nsIDocumentEncoder to convert an HTML (not XHTML!) DOM to an HTML
43
* string that could be parsed into more or less the original DOM.
46
#include "nsHTMLContentSerializer.h"
48
#include "nsIDOMElement.h"
49
#include "nsIDOMText.h"
50
#include "nsIContent.h"
51
#include "nsIDocument.h"
52
#include "nsIDOMDocument.h"
53
#include "nsINameSpaceManager.h"
55
#include "nsUnicharUtils.h"
56
#include "nsXPIDLString.h"
57
#include "nsIServiceManager.h"
58
#include "nsIDocumentEncoder.h"
59
#include "nsGkAtoms.h"
61
#include "nsNetUtil.h"
63
#include "nsITextToSubURI.h"
65
#include "nsIParserService.h"
66
#include "nsContentUtils.h"
67
#include "nsLWBrkCIID.h"
68
#include "nsIScriptElement.h"
69
#include "nsAttrName.h"
71
#define kIndentStr NS_LITERAL_STRING(" ")
72
#define kLessThan NS_LITERAL_STRING("<")
73
#define kGreaterThan NS_LITERAL_STRING(">")
74
#define kEndTag NS_LITERAL_STRING("</")
76
static const char kMozStr[] = "moz";
78
static const PRInt32 kLongLineLen = 128;
80
nsresult NS_NewHTMLContentSerializer(nsIContentSerializer** aSerializer)
82
nsHTMLContentSerializer* it = new nsHTMLContentSerializer();
84
return NS_ERROR_OUT_OF_MEMORY;
87
return CallQueryInterface(it, aSerializer);
90
nsHTMLContentSerializer::nsHTMLContentSerializer()
95
mMayIgnoreLineBreakSequence(PR_FALSE),
96
mIsWholeDocument(PR_FALSE),
98
mNeedLineBreaker(PR_TRUE)
102
nsHTMLContentSerializer::~nsHTMLContentSerializer()
104
NS_ASSERTION(mOLStateStack.Count() == 0, "Expected OL State stack to be empty");
105
if (mOLStateStack.Count() > 0){
106
for (PRInt32 i = 0; i < mOLStateStack.Count(); i++){
107
olState* state = (olState*)mOLStateStack[i];
109
mOLStateStack.RemoveElementAt(i);
115
nsHTMLContentSerializer::Init(PRUint32 aFlags, PRUint32 aWrapColumn,
116
const char* aCharSet, PRBool aIsCopying,
117
PRBool aIsWholeDocument)
124
mMaxColumn = aWrapColumn;
127
mIsWholeDocument = aIsWholeDocument;
128
mIsCopying = aIsCopying;
129
mIsFirstChildOfOL = PR_FALSE;
130
mDoFormat = (mFlags & nsIDocumentEncoder::OutputFormatted) ? PR_TRUE
132
mBodyOnly = (mFlags & nsIDocumentEncoder::OutputBodyOnly) ? PR_TRUE
134
// Set the line break character:
135
if ((mFlags & nsIDocumentEncoder::OutputCRLineBreak)
136
&& (mFlags & nsIDocumentEncoder::OutputLFLineBreak)) { // Windows
137
mLineBreak.AssignLiteral("\r\n");
139
else if (mFlags & nsIDocumentEncoder::OutputCRLineBreak) { // Mac
140
mLineBreak.AssignLiteral("\r");
142
else if (mFlags & nsIDocumentEncoder::OutputLFLineBreak) { // Unix/DOM
143
mLineBreak.AssignLiteral("\n");
146
mLineBreak.AssignLiteral(NS_LINEBREAK); // Platform/default
153
// set up entity converter if we are going to need it
154
if (mFlags & nsIDocumentEncoder::OutputEncodeW3CEntities) {
155
mEntityConverter = do_CreateInstance(NS_ENTITYCONVERTER_CONTRACTID);
162
nsHTMLContentSerializer::AppendText(nsIDOMText* aText,
163
PRInt32 aStartOffset,
167
NS_ENSURE_ARG(aText);
169
if (mNeedLineBreaker) {
170
mNeedLineBreaker = PR_FALSE;
172
nsCOMPtr<nsIDOMDocument> domDoc;
173
aText->GetOwnerDocument(getter_AddRefs(domDoc));
174
nsCOMPtr<nsIDocument> document = do_QueryInterface(domDoc);
180
rv = AppendTextData((nsIDOMNode*)aText, aStartOffset,
181
aEndOffset, data, PR_TRUE, PR_FALSE);
183
return NS_ERROR_FAILURE;
186
AppendToStringConvertLF(data, aStr);
188
else if (mFlags & nsIDocumentEncoder::OutputRaw) {
189
PRInt32 lastNewlineOffset = data.RFindChar('\n');
190
AppendToString(data, aStr);
191
if (lastNewlineOffset != kNotFound)
192
mColPos = data.Length() - lastNewlineOffset;
194
else if (!mDoFormat) {
195
PRInt32 lastNewlineOffset = kNotFound;
196
PRBool hasLongLines = HasLongLines(data, lastNewlineOffset);
198
// We have long lines, rewrap
199
AppendToStringWrapped(data, aStr, PR_FALSE);
200
if (lastNewlineOffset != kNotFound)
201
mColPos = data.Length() - lastNewlineOffset;
204
AppendToStringConvertLF(data, aStr);
208
AppendToStringWrapped(data, aStr, PR_FALSE);
214
void nsHTMLContentSerializer::AppendWrapped_WhitespaceSequence(
215
nsASingleFragmentString::const_char_iterator &aPos,
216
const nsASingleFragmentString::const_char_iterator aEnd,
217
const nsASingleFragmentString::const_char_iterator aSequenceStart,
218
PRBool &aMayIgnoreStartOfLineWhitespaceSequence,
219
nsAString &aOutputStr)
221
// Handle the complete sequence of whitespace.
222
// Continue to iterate until we find the first non-whitespace char.
223
// Updates "aPos" to point to the first unhandled char.
224
// Also updates the aMayIgnoreStartOfLineWhitespaceSequence flag,
225
// as well as the other "global" state flags.
227
PRBool sawBlankOrTab = PR_FALSE;
228
PRBool leaveLoop = PR_FALSE;
234
sawBlankOrTab = PR_TRUE;
238
// do not increase mColPos,
239
// because we will reduce the whitespace to a single char
245
} while (!leaveLoop && aPos < aEnd);
248
// if we had previously been asked to add space,
249
// our situation has not changed
251
else if (!sawBlankOrTab && mMayIgnoreLineBreakSequence) {
253
mMayIgnoreLineBreakSequence = PR_FALSE;
255
else if (aMayIgnoreStartOfLineWhitespaceSequence) {
257
aMayIgnoreStartOfLineWhitespaceSequence = PR_FALSE;
261
if (mColPos + 1 >= mMaxColumn) {
262
// no much sense in delaying, we only have one slot left,
263
// let's write a break now
264
aOutputStr.Append(mLineBreak);
268
// do not write out yet, we may write out either a space or a linebreak
269
// let's delay writing it out until we know more
272
++mColPos; // eat a slot of available space
276
// Asian text usually does not contain spaces, therefore we should not
277
// transform a linebreak into a space.
278
// Since we only saw linebreaks, but no spaces or tabs,
279
// let's write a linebreak now.
280
aOutputStr.Append(mLineBreak);
281
mMayIgnoreLineBreakSequence = PR_TRUE;
287
void nsHTMLContentSerializer::AppendWrapped_NonWhitespaceSequence(
288
nsASingleFragmentString::const_char_iterator &aPos,
289
const nsASingleFragmentString::const_char_iterator aEnd,
290
const nsASingleFragmentString::const_char_iterator aSequenceStart,
291
PRBool &aMayIgnoreStartOfLineWhitespaceSequence,
292
nsAString& aOutputStr)
294
mMayIgnoreLineBreakSequence = PR_FALSE;
295
aMayIgnoreStartOfLineWhitespaceSequence = PR_FALSE;
297
// Handle the complete sequence of non-whitespace in this block
298
// Iterate until we find the first whitespace char or an aEnd condition
299
// Updates "aPos" to point to the first unhandled char.
300
// Also updates the aMayIgnoreStartOfLineWhitespaceSequence flag,
301
// as well as the other "global" state flags.
303
PRBool thisSequenceStartsAtBeginningOfLine = !mColPos;
304
PRBool onceAgainBecauseWeAddedBreakInFront;
305
PRBool foundWhitespaceInLoop;
308
onceAgainBecauseWeAddedBreakInFront = PR_FALSE;
309
foundWhitespaceInLoop = PR_FALSE;
312
if (*aPos == ' ' || *aPos == '\t' || *aPos == '\n') {
313
foundWhitespaceInLoop = PR_TRUE;
319
} while (mColPos < mMaxColumn && aPos < aEnd);
321
if (aPos == aEnd || foundWhitespaceInLoop) {
322
// there is enough room for the complete block we found
325
aOutputStr.Append(PRUnichar(' '));
326
mAddSpace = PR_FALSE;
329
aOutputStr.Append(aSequenceStart, aPos - aSequenceStart);
330
// We have not yet reached the max column, we will continue to
331
// fill the current line in the next outer loop iteration.
333
else { // mColPos == mMaxColumn
334
if (!thisSequenceStartsAtBeginningOfLine && mAddSpace) {
335
// We can avoid to wrap.
337
aOutputStr.Append(mLineBreak);
338
mAddSpace = PR_FALSE;
339
aPos = aSequenceStart;
341
thisSequenceStartsAtBeginningOfLine = PR_TRUE;
342
onceAgainBecauseWeAddedBreakInFront = PR_TRUE;
347
PRBool foundWrapPosition = PR_FALSE;
348
nsILineBreaker *lineBreaker = nsContentUtils::LineBreaker();
350
PRInt32 wrapPosition;
352
wrapPosition = lineBreaker->Prev(aSequenceStart,
353
(aEnd - aSequenceStart),
354
(aPos - aSequenceStart) + 1);
355
if (wrapPosition != NS_LINEBREAKER_NEED_MORE_TEXT) {
356
foundWrapPosition = PR_TRUE;
359
wrapPosition = lineBreaker->Next(aSequenceStart,
360
(aEnd - aSequenceStart),
361
(aPos - aSequenceStart));
362
if (wrapPosition != NS_LINEBREAKER_NEED_MORE_TEXT) {
363
foundWrapPosition = PR_TRUE;
367
if (foundWrapPosition) {
369
aOutputStr.Append(PRUnichar(' '));
370
mAddSpace = PR_FALSE;
373
aOutputStr.Append(aSequenceStart, wrapPosition);
374
aOutputStr.Append(mLineBreak);
375
aPos = aSequenceStart + wrapPosition;
377
aMayIgnoreStartOfLineWhitespaceSequence = PR_TRUE;
378
mMayIgnoreLineBreakSequence = PR_TRUE;
381
// try some simple fallback logic
382
// go forward up to the next whitespace position,
383
// in the worst case this will be all the rest of the data
386
if (*aPos == ' ' || *aPos == '\t' || *aPos == '\n') {
392
} while (aPos < aEnd);
395
aOutputStr.Append(PRUnichar(' '));
396
mAddSpace = PR_FALSE;
399
aOutputStr.Append(aSequenceStart, aPos - aSequenceStart);
403
} while (onceAgainBecauseWeAddedBreakInFront);
407
nsHTMLContentSerializer::AppendToStringWrapped(const nsASingleFragmentString& aStr,
408
nsAString& aOutputStr,
409
PRBool aTranslateEntities)
411
nsASingleFragmentString::const_char_iterator pos, end, sequenceStart;
413
aStr.BeginReading(pos);
414
aStr.EndReading(end);
416
// if the current line already has text on it, such as a tag,
417
// leading whitespace is significant
419
PRBool mayIgnoreStartOfLineWhitespaceSequence = !mColPos;
424
// if beginning of a whitespace sequence
425
if (*pos == ' ' || *pos == '\n' || *pos == '\t') {
426
AppendWrapped_WhitespaceSequence(pos, end, sequenceStart,
427
mayIgnoreStartOfLineWhitespaceSequence, aOutputStr);
429
else { // any other non-whitespace char
430
AppendWrapped_NonWhitespaceSequence(pos, end, sequenceStart,
431
mayIgnoreStartOfLineWhitespaceSequence, aOutputStr);
437
nsHTMLContentSerializer::AppendDocumentStart(nsIDOMDocument *aDocument,
444
nsHTMLContentSerializer::IsJavaScript(nsIAtom* aAttrNameAtom, const nsAString& aValueString)
446
if (aAttrNameAtom == nsGkAtoms::href ||
447
aAttrNameAtom == nsGkAtoms::src) {
448
static const char kJavaScript[] = "javascript";
449
PRInt32 pos = aValueString.FindChar(':');
450
if (pos < (PRInt32)(sizeof kJavaScript - 1))
452
nsAutoString scheme(Substring(aValueString, 0, pos));
453
scheme.StripWhitespace();
454
if ((scheme.Length() == (sizeof kJavaScript - 1)) &&
455
scheme.EqualsIgnoreCase(kJavaScript))
461
return nsContentUtils::IsEventAttributeName(aAttrNameAtom,
466
nsHTMLContentSerializer::EscapeURI(const nsAString& aURI, nsAString& aEscapedURI)
468
// URL escape %xx cannot be used in JS.
469
// No escaping if the scheme is 'javascript'.
470
if (IsJavaScript(nsGkAtoms::href, aURI)) {
475
// nsITextToSubURI does charset convert plus uri escape
476
// This is needed to convert to a document charset which is needed to support existing browsers.
477
// But we eventually want to use UTF-8 instead of a document charset, then the code would be much simpler.
478
// See HTML 4.01 spec, "Appendix B.2.1 Non-ASCII characters in URI attribute values"
479
nsCOMPtr<nsITextToSubURI> textToSubURI;
480
nsAutoString uri(aURI); // in order to use FindCharInSet()
484
if (!mCharset.IsEmpty() && !IsASCII(uri)) {
485
textToSubURI = do_GetService(NS_ITEXTTOSUBURI_CONTRACTID, &rv);
486
NS_ENSURE_SUCCESS(rv, rv);
492
nsXPIDLCString escapedURI;
493
aEscapedURI.Truncate(0);
495
// Loop and escape parts by avoiding escaping reserved characters (and '%', '#' ).
496
while ((end = uri.FindCharInSet("%#;/?:@&=+$,", start)) != -1) {
497
part = Substring(aURI, start, (end-start));
498
if (textToSubURI && !IsASCII(part)) {
499
rv = textToSubURI->ConvertAndEscape(mCharset.get(), part.get(), getter_Copies(escapedURI));
500
NS_ENSURE_SUCCESS(rv, rv);
503
escapedURI.Adopt(nsEscape(NS_ConvertUTF16toUTF8(part).get(), url_Path));
505
AppendASCIItoUTF16(escapedURI, aEscapedURI);
507
// Append a reserved character without escaping.
508
part = Substring(aURI, end, 1);
509
aEscapedURI.Append(part);
513
if (start < (PRInt32) aURI.Length()) {
514
// Escape the remaining part.
515
part = Substring(aURI, start, aURI.Length()-start);
517
rv = textToSubURI->ConvertAndEscape(mCharset.get(), part.get(), getter_Copies(escapedURI));
518
NS_ENSURE_SUCCESS(rv, rv);
521
escapedURI.Adopt(nsEscape(NS_ConvertUTF16toUTF8(part).get(), url_Path));
523
AppendASCIItoUTF16(escapedURI, aEscapedURI);
530
nsHTMLContentSerializer::SerializeAttributes(nsIContent* aContent,
535
PRUint32 index, count;
536
nsAutoString nameStr, valueStr;
538
count = aContent->GetAttrCount();
540
NS_NAMED_LITERAL_STRING(_mozStr, "_moz");
542
// Loop backward over the attributes, since the order they are stored in is
543
// the opposite of the order they were parsed in (see bug 213347 for reason).
544
// index is unsigned, hence index >= 0 is always true.
545
for (index = count; index > 0; ) {
547
const nsAttrName* name = aContent->GetAttrNameAt(index);
548
PRInt32 namespaceID = name->NamespaceID();
549
nsIAtom* attrName = name->LocalName();
551
// Filter out any attribute starting with [-|_]moz
552
const char* sharedName;
553
attrName->GetUTF8String(&sharedName);
554
if ((('_' == *sharedName) || ('-' == *sharedName)) &&
555
!nsCRT::strncmp(sharedName+1, kMozStr, PRUint32(sizeof(kMozStr)-1))) {
558
aContent->GetAttr(namespaceID, attrName, valueStr);
561
// Filter out special case of <br type="_moz"> or <br _moz*>,
562
// used by the editor. Bug 16988. Yuck.
564
if (aTagName == nsGkAtoms::br && attrName == nsGkAtoms::type &&
565
StringBeginsWith(valueStr, _mozStr)) {
569
if (mIsCopying && mIsFirstChildOfOL && (aTagName == nsGkAtoms::li) &&
570
(attrName == nsGkAtoms::value)){
571
// This is handled separately in SerializeLIValueAttribute()
574
PRBool isJS = IsJavaScript(attrName, valueStr);
576
if (((attrName == nsGkAtoms::href) ||
577
(attrName == nsGkAtoms::src))) {
578
// Make all links absolute when converting only the selection:
579
if (mFlags & nsIDocumentEncoder::OutputAbsoluteLinks) {
580
// Would be nice to handle OBJECT and APPLET tags,
581
// but that gets more complicated since we have to
582
// search the tag list for CODEBASE as well.
583
// For now, just leave them relative.
584
nsCOMPtr<nsIURI> uri = aContent->GetBaseURI();
587
rv = NS_MakeAbsoluteURI(absURI, valueStr, uri);
588
if (NS_SUCCEEDED(rv)) {
593
// Need to escape URI.
594
nsAutoString tempURI(valueStr);
595
if (!isJS && NS_FAILED(EscapeURI(tempURI, valueStr)))
599
if (mIsWholeDocument && aTagName == nsGkAtoms::meta &&
600
attrName == nsGkAtoms::content) {
601
// If we're serializing a <meta http-equiv="content-type">,
602
// use the proper value, rather than what's in the document.
604
aContent->GetAttr(kNameSpaceID_None, nsGkAtoms::httpEquiv, header);
605
if (header.LowerCaseEqualsLiteral("content-type")) {
606
valueStr = NS_LITERAL_STRING("text/html; charset=") +
607
NS_ConvertASCIItoUTF16(mCharset);
611
attrName->ToString(nameStr);
613
/*If we already crossed the MaxColumn limit or
614
* if this attr name-value pair(including a space,=,opening and closing quotes) is greater than MaxColumn limit
615
* then start the attribute from a new line.
619
&& (mColPos >= mMaxColumn
620
|| ((PRInt32)(mColPos + nameStr.Length() +
621
valueStr.Length() + 4) > mMaxColumn))) {
622
aStr.Append(mLineBreak);
626
// Expand shorthand attribute.
627
if (IsShorthandAttr(attrName, aTagName) && valueStr.IsEmpty()) {
630
SerializeAttr(EmptyString(), nameStr, valueStr, aStr, !isJS);
635
nsHTMLContentSerializer::AppendElementStart(nsIDOMElement *aElement,
636
nsIDOMElement *aOriginalElement,
639
NS_ENSURE_ARG(aElement);
641
nsCOMPtr<nsIContent> content = do_QueryInterface(aElement);
642
if (!content) return NS_ERROR_FAILURE;
644
// The _moz_dirty attribute is emitted by the editor to
645
// indicate that this element should be pretty printed
646
// even if we're not in pretty printing mode
647
PRBool hasDirtyAttr = content->HasAttr(kNameSpaceID_None,
648
nsGkAtoms::mozdirty);
650
nsIAtom *name = content->Tag();
652
if (name == nsGkAtoms::br && mPreLevel > 0
653
&& (mFlags & nsIDocumentEncoder::OutputNoFormattingInPre)) {
654
AppendToString(mLineBreak, aStr);
655
mMayIgnoreLineBreakSequence = PR_TRUE;
660
if (name == nsGkAtoms::body) {
664
if (LineBreakBeforeOpen(name, hasDirtyAttr)) {
665
AppendToString(mLineBreak, aStr);
666
mMayIgnoreLineBreakSequence = PR_TRUE;
668
mAddSpace = PR_FALSE;
670
else if (mAddSpace) {
671
AppendToString(PRUnichar(' '), aStr);
672
mAddSpace = PR_FALSE;
675
MaybeAddNewline(aStr);
677
// Always reset to avoid false newlines in case MaybeAddNewline wasn't
679
mAddNewline = PR_FALSE;
681
StartIndentation(name, hasDirtyAttr, aStr);
683
if (name == nsGkAtoms::pre ||
684
name == nsGkAtoms::script ||
685
name == nsGkAtoms::style) {
689
AppendToString(kLessThan, aStr);
691
nsAutoString nameStr;
692
name->ToString(nameStr);
693
AppendToString(nameStr.get(), -1, aStr);
695
// Need to keep track of OL and LI elements in order to get ordinal number
697
if (mIsCopying && name == nsGkAtoms::ol){
698
// We are copying and current node is an OL;
699
// Store it's start attribute value in olState->startVal.
701
PRInt32 startAttrVal = 0;
702
aElement->GetAttribute(NS_LITERAL_STRING("start"), start);
703
if (!start.IsEmpty()){
705
startAttrVal = start.ToInteger(&rv);
706
//If OL has "start" attribute, first LI element has to start with that value
707
//Therefore subtracting 1 as all the LI elements are incrementing it before using it;
708
//In failure of ToInteger(), default StartAttrValue to 0.
709
if (NS_SUCCEEDED(rv))
714
olState* state = new olState(startAttrVal, PR_TRUE);
716
mOLStateStack.AppendElement(state);
719
if (mIsCopying && name == nsGkAtoms::li) {
720
mIsFirstChildOfOL = IsFirstChildOfOL(aOriginalElement);
721
if (mIsFirstChildOfOL){
722
// If OL is parent of this LI, serialize attributes in different manner.
723
SerializeLIValueAttribute(aElement, aStr);
727
// Even LI passed above have to go through this
728
// for serializing attributes other than "value".
729
SerializeAttributes(content, name, aStr);
731
AppendToString(kGreaterThan, aStr);
733
if (LineBreakAfterOpen(name, hasDirtyAttr)) {
734
AppendToString(mLineBreak, aStr);
735
mMayIgnoreLineBreakSequence = PR_TRUE;
739
if (name == nsGkAtoms::script ||
740
name == nsGkAtoms::style ||
741
name == nsGkAtoms::noscript ||
742
name == nsGkAtoms::noframes) {
746
if (mIsWholeDocument && name == nsGkAtoms::head) {
747
// Check if there already are any content-type meta children.
748
// If there are, they will be modified to use the correct charset.
749
// If there aren't, we'll insert one here.
750
PRBool hasMeta = PR_FALSE;
751
PRUint32 i, childCount = content->GetChildCount();
752
for (i = 0; i < childCount; ++i) {
753
nsIContent* child = content->GetChildAt(i);
754
if (child->IsNodeOfType(nsINode::eHTML) &&
755
child->Tag() == nsGkAtoms::meta &&
756
child->HasAttr(kNameSpaceID_None, nsGkAtoms::content)) {
758
child->GetAttr(kNameSpaceID_None, nsGkAtoms::httpEquiv, header);
760
printf("Header value = '%s'\n", NS_ConvertUTF16toUTF8(header).get());
762
if (header.LowerCaseEqualsLiteral("content-type")) {
770
AppendToString(mLineBreak, aStr);
771
AppendToString(NS_LITERAL_STRING("<meta http-equiv=\"content-type\""),
773
AppendToString(NS_LITERAL_STRING(" content=\"text/html; charset="), aStr);
774
AppendToString(NS_ConvertASCIItoUTF16(mCharset), aStr);
775
AppendToString(NS_LITERAL_STRING("\">"), aStr);
783
nsHTMLContentSerializer::AppendElementEnd(nsIDOMElement *aElement,
786
NS_ENSURE_ARG(aElement);
788
nsCOMPtr<nsIContent> content = do_QueryInterface(aElement);
789
if (!content) return NS_ERROR_FAILURE;
791
PRBool hasDirtyAttr = content->HasAttr(kNameSpaceID_None,
792
nsGkAtoms::mozdirty);
794
nsIAtom *name = content->Tag();
796
if (name == nsGkAtoms::script) {
797
nsCOMPtr<nsIScriptElement> script = do_QueryInterface(aElement);
799
if (script && script->IsMalformed()) {
800
// We're looking at a malformed script tag. This means that the end tag
801
// was missing in the source. Imitate that here by not serializing the end
807
if (name == nsGkAtoms::pre ||
808
name == nsGkAtoms::script ||
809
name == nsGkAtoms::style) {
813
if (mIsCopying && (name == nsGkAtoms::ol)){
814
NS_ASSERTION((mOLStateStack.Count() > 0), "Cannot have an empty OL Stack");
815
/* Though at this point we must always have an state to be deleted as all
816
the OL opening tags are supposed to push an olState object to the stack*/
817
if (mOLStateStack.Count() > 0) {
818
olState* state = (olState*)mOLStateStack.ElementAt(mOLStateStack.Count() -1);
819
mOLStateStack.RemoveElementAt(mOLStateStack.Count() -1);
824
nsIParserService* parserService = nsContentUtils::GetParserService();
826
if (parserService && (name != nsGkAtoms::style)) {
829
parserService->IsContainer(parserService->HTMLAtomTagToId(name),
831
if (!isContainer) return NS_OK;
834
if (LineBreakBeforeClose(name, hasDirtyAttr)) {
835
AppendToString(mLineBreak, aStr);
836
mMayIgnoreLineBreakSequence = PR_TRUE;
838
mAddSpace = PR_FALSE;
840
else if (mAddSpace) {
841
AppendToString(PRUnichar(' '), aStr);
842
mAddSpace = PR_FALSE;
845
EndIndentation(name, hasDirtyAttr, aStr);
847
nsAutoString nameStr;
848
name->ToString(nameStr);
850
AppendToString(kEndTag, aStr);
851
AppendToString(nameStr.get(), -1, aStr);
852
AppendToString(kGreaterThan, aStr);
854
if (LineBreakAfterClose(name, hasDirtyAttr)) {
855
AppendToString(mLineBreak, aStr);
856
mMayIgnoreLineBreakSequence = PR_TRUE;
860
MaybeFlagNewline(aElement);
869
nsHTMLContentSerializer::AppendToString(const PRUnichar* aStr,
871
nsAString& aOutputStr)
873
if (mBodyOnly && !mInBody) {
877
PRInt32 length = (aLength == -1) ? nsCRT::strlen(aStr) : aLength;
881
aOutputStr.Append(aStr, length);
885
nsHTMLContentSerializer::AppendToString(const PRUnichar aChar,
886
nsAString& aOutputStr)
888
if (mBodyOnly && !mInBody) {
894
aOutputStr.Append(aChar);
897
static const PRUint16 kValNBSP = 160;
898
static const char kEntityNBSP[] = "nbsp";
900
static const PRUint16 kGTVal = 62;
901
static const char* kEntities[] = {
902
"", "", "", "", "", "", "", "", "", "",
903
"", "", "", "", "", "", "", "", "", "",
904
"", "", "", "", "", "", "", "", "", "",
905
"", "", "", "", "", "", "", "", "amp", "",
906
"", "", "", "", "", "", "", "", "", "",
907
"", "", "", "", "", "", "", "", "", "",
911
static const char* kAttrEntities[] = {
912
"", "", "", "", "", "", "", "", "", "",
913
"", "", "", "", "", "", "", "", "", "",
914
"", "", "", "", "", "", "", "", "", "",
915
"", "", "", "", "quot", "", "", "", "amp", "",
916
"", "", "", "", "", "", "", "", "", "",
917
"", "", "", "", "", "", "", "", "", "",
922
nsHTMLContentSerializer::AppendToString(const nsAString& aStr,
923
nsAString& aOutputStr,
924
PRBool aTranslateEntities,
927
if (mBodyOnly && !mInBody) {
932
mColPos += aStr.Length();
935
if (aTranslateEntities && !mInCDATA) {
936
if (mFlags & (nsIDocumentEncoder::OutputEncodeBasicEntities |
937
nsIDocumentEncoder::OutputEncodeLatin1Entities |
938
nsIDocumentEncoder::OutputEncodeHTMLEntities |
939
nsIDocumentEncoder::OutputEncodeW3CEntities)) {
940
nsIParserService* parserService = nsContentUtils::GetParserService();
942
if (!parserService) {
943
NS_ERROR("Can't get parser service");
947
nsReadingIterator<PRUnichar> done_reading;
948
aStr.EndReading(done_reading);
950
// for each chunk of |aString|...
951
PRUint32 advanceLength = 0;
952
nsReadingIterator<PRUnichar> iter;
954
const char **entityTable = mInAttribute ? kAttrEntities : kEntities;
956
for (aStr.BeginReading(iter);
957
iter != done_reading;
958
iter.advance(PRInt32(advanceLength))) {
959
PRUint32 fragmentLength = iter.size_forward();
960
PRUint32 lengthReplaced = 0; // the number of UTF-16 codepoints
961
// replaced by a particular entity
962
const PRUnichar* c = iter.get();
963
const PRUnichar* fragmentStart = c;
964
const PRUnichar* fragmentEnd = c + fragmentLength;
965
const char* entityText = nsnull;
966
nsCAutoString entityReplacement;
967
char* fullEntityText = nsnull;
970
// for each character in this chunk, check if it
971
// needs to be replaced
972
for (; c < fragmentEnd; c++, advanceLength++) {
974
if (val == kValNBSP) {
975
entityText = kEntityNBSP;
978
else if ((val <= kGTVal) && (entityTable[val][0] != 0)) {
979
entityText = entityTable[val];
981
} else if (val > 127 &&
983
mFlags & nsIDocumentEncoder::OutputEncodeLatin1Entities) ||
984
mFlags & nsIDocumentEncoder::OutputEncodeHTMLEntities)) {
985
parserService->HTMLConvertUnicodeToEntity(val, entityReplacement);
987
if (!entityReplacement.IsEmpty()) {
988
entityText = entityReplacement.get();
992
else if (val > 127 &&
993
mFlags & nsIDocumentEncoder::OutputEncodeW3CEntities &&
995
if (NS_IS_HIGH_SURROGATE(val) &&
996
c + 1 < fragmentEnd &&
997
NS_IS_LOW_SURROGATE(*(c + 1))) {
998
PRUint32 valUTF32 = SURROGATE_TO_UCS4(val, *(++c));
999
if (NS_SUCCEEDED(mEntityConverter->ConvertUTF32ToEntity(valUTF32,
1000
nsIEntityConverter::entityW3C, &fullEntityText))) {
1008
else if (NS_SUCCEEDED(mEntityConverter->ConvertToEntity(val,
1009
nsIEntityConverter::entityW3C,
1010
&fullEntityText))) {
1017
aOutputStr.Append(fragmentStart, advanceLength);
1019
aOutputStr.Append(PRUnichar('&'));
1020
AppendASCIItoUTF16(entityText, aOutputStr);
1021
aOutputStr.Append(PRUnichar(';'));
1024
// if it comes from nsIEntityConverter, it already has '&' and ';'
1025
else if (fullEntityText) {
1026
AppendASCIItoUTF16(fullEntityText, aOutputStr);
1027
nsMemory::Free(fullEntityText);
1028
advanceLength += lengthReplaced;
1032
nsXMLContentSerializer::AppendToString(aStr, aOutputStr, aTranslateEntities, aIncrColumn);
1038
aOutputStr.Append(aStr);
1042
nsHTMLContentSerializer::AppendToStringConvertLF(const nsAString& aStr,
1043
nsAString& aOutputStr)
1045
// Convert line-endings to mLineBreak
1047
PRUint32 theLen = aStr.Length();
1048
while (start < theLen) {
1049
PRInt32 eol = aStr.FindChar('\n', start);
1050
if (eol == kNotFound) {
1051
nsDependentSubstring dataSubstring(aStr, start, theLen - start);
1052
AppendToString(dataSubstring, aOutputStr);
1056
nsDependentSubstring dataSubstring(aStr, start, eol - start);
1057
AppendToString(dataSubstring, aOutputStr);
1058
AppendToString(mLineBreak, aOutputStr);
1060
if (start == theLen)
1067
nsHTMLContentSerializer::LineBreakBeforeOpen(nsIAtom* aName,
1068
PRBool aHasDirtyAttr)
1070
if ((!mDoFormat && !aHasDirtyAttr) || mPreLevel || !mColPos ||
1071
(mFlags & nsIDocumentEncoder::OutputRaw)) {
1075
if (aName == nsGkAtoms::title ||
1076
aName == nsGkAtoms::meta ||
1077
aName == nsGkAtoms::link ||
1078
aName == nsGkAtoms::style ||
1079
aName == nsGkAtoms::select ||
1080
aName == nsGkAtoms::option ||
1081
aName == nsGkAtoms::script ||
1082
aName == nsGkAtoms::html) {
1086
nsIParserService* parserService = nsContentUtils::GetParserService();
1088
if (parserService) {
1090
parserService->IsBlock(parserService->HTMLAtomTagToId(aName), res);
1099
nsHTMLContentSerializer::LineBreakAfterOpen(nsIAtom* aName,
1100
PRBool aHasDirtyAttr)
1102
if ((!mDoFormat && !aHasDirtyAttr) || mPreLevel ||
1103
(mFlags & nsIDocumentEncoder::OutputRaw)) {
1107
if ((aName == nsGkAtoms::html) ||
1108
(aName == nsGkAtoms::head) ||
1109
(aName == nsGkAtoms::body) ||
1110
(aName == nsGkAtoms::ul) ||
1111
(aName == nsGkAtoms::ol) ||
1112
(aName == nsGkAtoms::dl) ||
1113
(aName == nsGkAtoms::table) ||
1114
(aName == nsGkAtoms::tbody) ||
1115
(aName == nsGkAtoms::tr) ||
1116
(aName == nsGkAtoms::br) ||
1117
(aName == nsGkAtoms::meta) ||
1118
(aName == nsGkAtoms::link) ||
1119
(aName == nsGkAtoms::script) ||
1120
(aName == nsGkAtoms::select) ||
1121
(aName == nsGkAtoms::map) ||
1122
(aName == nsGkAtoms::area) ||
1123
(aName == nsGkAtoms::style)) {
1131
nsHTMLContentSerializer::LineBreakBeforeClose(nsIAtom* aName,
1132
PRBool aHasDirtyAttr)
1134
if ((!mDoFormat && !aHasDirtyAttr) || mPreLevel || !mColPos ||
1135
(mFlags & nsIDocumentEncoder::OutputRaw)) {
1139
if ((aName == nsGkAtoms::html) ||
1140
(aName == nsGkAtoms::head) ||
1141
(aName == nsGkAtoms::body) ||
1142
(aName == nsGkAtoms::ul) ||
1143
(aName == nsGkAtoms::ol) ||
1144
(aName == nsGkAtoms::dl) ||
1145
(aName == nsGkAtoms::select) ||
1146
(aName == nsGkAtoms::table) ||
1147
(aName == nsGkAtoms::tbody)) {
1155
nsHTMLContentSerializer::LineBreakAfterClose(nsIAtom* aName,
1156
PRBool aHasDirtyAttr)
1158
if ((!mDoFormat && !aHasDirtyAttr) || mPreLevel ||
1159
(mFlags & nsIDocumentEncoder::OutputRaw)) {
1163
if ((aName == nsGkAtoms::html) ||
1164
(aName == nsGkAtoms::head) ||
1165
(aName == nsGkAtoms::body) ||
1166
(aName == nsGkAtoms::tr) ||
1167
(aName == nsGkAtoms::th) ||
1168
(aName == nsGkAtoms::td) ||
1169
(aName == nsGkAtoms::pre) ||
1170
(aName == nsGkAtoms::title) ||
1171
(aName == nsGkAtoms::li) ||
1172
(aName == nsGkAtoms::dt) ||
1173
(aName == nsGkAtoms::dd) ||
1174
(aName == nsGkAtoms::blockquote) ||
1175
(aName == nsGkAtoms::select) ||
1176
(aName == nsGkAtoms::option) ||
1177
(aName == nsGkAtoms::p) ||
1178
(aName == nsGkAtoms::map) ||
1179
(aName == nsGkAtoms::div)) {
1183
nsIParserService* parserService = nsContentUtils::GetParserService();
1185
if (parserService) {
1187
parserService->IsBlock(parserService->HTMLAtomTagToId(aName), res);
1196
nsHTMLContentSerializer::StartIndentation(nsIAtom* aName,
1197
PRBool aHasDirtyAttr,
1200
if ((mDoFormat || aHasDirtyAttr) && !mPreLevel && !mColPos) {
1201
for (PRInt32 i = mIndent; --i >= 0; ) {
1202
AppendToString(kIndentStr, aStr);
1206
if ((aName == nsGkAtoms::head) ||
1207
(aName == nsGkAtoms::table) ||
1208
(aName == nsGkAtoms::tr) ||
1209
(aName == nsGkAtoms::ul) ||
1210
(aName == nsGkAtoms::ol) ||
1211
(aName == nsGkAtoms::dl) ||
1212
(aName == nsGkAtoms::tbody) ||
1213
(aName == nsGkAtoms::form) ||
1214
(aName == nsGkAtoms::frameset) ||
1215
(aName == nsGkAtoms::blockquote) ||
1216
(aName == nsGkAtoms::li) ||
1217
(aName == nsGkAtoms::dt) ||
1218
(aName == nsGkAtoms::dd)) {
1224
nsHTMLContentSerializer::EndIndentation(nsIAtom* aName,
1225
PRBool aHasDirtyAttr,
1228
if ((aName == nsGkAtoms::head) ||
1229
(aName == nsGkAtoms::table) ||
1230
(aName == nsGkAtoms::tr) ||
1231
(aName == nsGkAtoms::ul) ||
1232
(aName == nsGkAtoms::ol) ||
1233
(aName == nsGkAtoms::dl) ||
1234
(aName == nsGkAtoms::li) ||
1235
(aName == nsGkAtoms::tbody) ||
1236
(aName == nsGkAtoms::form) ||
1237
(aName == nsGkAtoms::blockquote) ||
1238
(aName == nsGkAtoms::dt) ||
1239
(aName == nsGkAtoms::dd) ||
1240
(aName == nsGkAtoms::frameset)) {
1244
if ((mDoFormat || aHasDirtyAttr) && !mPreLevel && !mColPos) {
1245
for (PRInt32 i = mIndent; --i >= 0; ) {
1246
AppendToString(kIndentStr, aStr);
1251
// See if the string has any lines longer than longLineLen:
1252
// if so, we presume formatting is wonky (e.g. the node has been edited)
1253
// and we'd better rewrap the whole text node.
1255
nsHTMLContentSerializer::HasLongLines(const nsString& text, PRInt32& aLastNewlineOffset)
1258
PRUint32 theLen=text.Length();
1259
PRBool rv = PR_FALSE;
1260
aLastNewlineOffset = kNotFound;
1261
for (start = 0; start < theLen; )
1263
PRInt32 eol = text.FindChar('\n', start);
1265
eol = text.Length();
1268
aLastNewlineOffset = eol;
1270
if (PRInt32(eol - start) > kLongLineLen)
1278
nsHTMLContentSerializer::SerializeLIValueAttribute(nsIDOMElement* aElement,
1281
// We are copying and we are at the "first" LI node of OL in selected range.
1282
// It may not be the first LI child of OL but it's first in the selected range.
1283
// Note that we get into this condition only once per a OL.
1284
PRBool found = PR_FALSE;
1285
nsCOMPtr<nsIDOMNode> currNode = do_QueryInterface(aElement);
1286
nsAutoString valueStr;
1288
olState defaultOLState(0, PR_FALSE);
1289
olState* state = nsnull;
1290
if (mOLStateStack.Count() > 0)
1291
state = (olState*)mOLStateStack.ElementAt(mOLStateStack.Count()-1);
1292
/* Though we should never reach to a "state" as null or mOLStateStack.Count() == 0
1293
at this point as all LI are supposed to be inside some OL and OL tag should have
1294
pushed a state to the olStateStack.*/
1295
if (!state || mOLStateStack.Count() == 0)
1296
state = &defaultOLState;
1297
PRInt32 startVal = state->startVal;
1298
state->isFirstListItem = PR_FALSE;
1299
// Traverse previous siblings until we find one with "value" attribute.
1300
// offset keeps track of how many previous siblings we had tocurrNode traverse.
1301
while (currNode && !found) {
1302
nsCOMPtr<nsIDOMElement> currElement = do_QueryInterface(currNode);
1303
// currElement may be null if it were a text node.
1305
nsAutoString tagName;
1306
currElement->GetTagName(tagName);
1307
if (tagName.LowerCaseEqualsLiteral("li")) {
1308
currElement->GetAttribute(NS_LITERAL_STRING("value"), valueStr);
1309
if (valueStr.IsEmpty())
1314
startVal = valueStr.ToInteger(&rv);
1318
nsCOMPtr<nsIDOMNode> tmp;
1319
currNode->GetPreviousSibling(getter_AddRefs(tmp));
1322
// If LI was not having "value", Set the "value" attribute for it.
1323
// Note that We are at the first LI in the selected range of OL.
1324
if (offset == 0 && found) {
1325
// offset = 0 => LI itself has the value attribute and we did not need to traverse back.
1326
// Just serialize value attribute like other tags.
1327
SerializeAttr(EmptyString(), NS_LITERAL_STRING("value"), valueStr, aStr, PR_FALSE);
1329
else if (offset == 1 && !found) {
1330
/*(offset = 1 && !found) means either LI is the first child node of OL
1331
and LI is not having "value" attribute.
1332
In that case we would not like to set "value" attribute to reduce the changes.
1336
else if (offset > 0) {
1337
// Set value attribute.
1338
nsAutoString valueStr;
1340
//As serializer needs to use this valueAttr we are creating here,
1341
valueStr.AppendInt(startVal + offset);
1342
SerializeAttr(EmptyString(), NS_LITERAL_STRING("value"), valueStr, aStr, PR_FALSE);
1347
nsHTMLContentSerializer::IsFirstChildOfOL(nsIDOMElement* aElement){
1348
nsCOMPtr<nsIDOMNode> node = do_QueryInterface(aElement);
1349
nsAutoString parentName;
1351
nsCOMPtr<nsIDOMNode> parentNode;
1352
node->GetParentNode(getter_AddRefs(parentNode));
1354
parentNode->GetNodeName(parentName);
1359
if (parentName.LowerCaseEqualsLiteral("ol")) {
1360
olState defaultOLState(0, PR_FALSE);
1361
olState* state = nsnull;
1362
if (mOLStateStack.Count() > 0)
1363
state = (olState*)mOLStateStack.ElementAt(mOLStateStack.Count()-1);
1364
/* Though we should never reach to a "state" as null at this point as
1365
all LI are supposed to be inside some OL and OL tag should have pushed
1366
a state to the mOLStateStack.*/
1368
state = &defaultOLState;
1370
if (state->isFirstListItem)