1
/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2
/* ***** BEGIN LICENSE BLOCK *****
3
* Version: NPL 1.1/GPL 2.0/LGPL 2.1
5
* The contents of this file are subject to the Netscape Public License
6
* Version 1.1 (the "License"); you may not use this file except in
7
* compliance with the License. You may obtain a copy of the License at
8
* http://www.mozilla.org/NPL/
10
* Software distributed under the License is distributed on an "AS IS" basis,
11
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
12
* for the specific language governing rights and limitations under the
15
* The Original Code is mozilla.org code.
17
* The Initial Developer of the Original Code is
18
* Netscape Communications Corporation.
19
* Portions created by the Initial Developer are Copyright (C) 1998
20
* the Initial Developer. All Rights Reserved.
23
* Daniel Bratell <bratell@lysator.liu.se>
24
* Ben Bucksch <mozilla@bucksch.org>
27
* Alternatively, the contents of this file may be used under the terms of
28
* either the GNU General Public License Version 2 or later (the "GPL"), or
29
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
30
* in which case the provisions of the GPL or the LGPL are applicable instead
31
* of those above. If you wish to allow use of your version of this file only
32
* under the terms of either the GPL or the LGPL, and not to allow others to
33
* use your version of this file under the terms of the NPL, indicate your
34
* decision by deleting the provisions above and replace them with the notice
35
* and other provisions required by the GPL or the LGPL. If you do not delete
36
* the provisions above, a recipient may use your version of this file under
37
* the terms of any one of the NPL, the GPL or the LGPL.
39
* ***** END LICENSE BLOCK ***** */
41
#include "nsPlainTextSerializer.h"
42
#include "nsILineBreakerFactory.h"
43
#include "nsLWBrkCIID.h"
44
#include "nsIPrefBranch.h"
45
#include "nsIPrefService.h"
46
#include "nsIServiceManager.h"
47
#include "nsHTMLAtoms.h"
48
#include "nsIDOMText.h"
49
#include "nsIDOMCDATASection.h"
50
#include "nsIDOMElement.h"
51
#include "nsINameSpaceManager.h"
52
#include "nsITextContent.h"
53
#include "nsTextFragment.h"
54
#include "nsContentUtils.h"
55
#include "nsReadableUtils.h"
56
#include "nsUnicharUtils.h"
58
#include "nsIParserService.h"
59
#include "nsIDOMHTMLDocument.h"
60
#include "nsIDOMHTMLElement.h"
62
static NS_DEFINE_CID(kLWBrkCID, NS_LWBRK_CID);
64
#define PREF_STRUCTS "converter.html2txt.structs"
65
#define PREF_HEADER_STRATEGY "converter.html2txt.header_strategy"
67
static const PRInt32 kTabSize=4;
68
static const PRInt32 kOLNumberWidth = 3;
69
static const PRInt32 kIndentSizeHeaders = 2; /* Indention of h1, if
70
mHeaderStrategy = 1 or = 2.
71
Indention of other headers
74
static const PRInt32 kIndentIncrementHeaders = 2; /* If mHeaderStrategy = 1,
75
indent h(x+1) this many
76
columns more than h(x) */
77
static const PRInt32 kIndentSizeList = (kTabSize > kOLNumberWidth+3) ? kTabSize: kOLNumberWidth+3;
78
// Indention of non-first lines of ul and ol
79
static const PRInt32 kIndentSizeDD = kTabSize; // Indention of <dd>
81
static PRInt32 HeaderLevel(eHTMLTags aTag);
82
static PRInt32 GetUnicharWidth(PRUnichar ucs);
83
static PRInt32 GetUnicharStringWidth(const PRUnichar* pwcs, PRInt32 n);
85
// Someday may want to make this non-const:
86
static const PRUint32 TagStackSize = 500;
87
static const PRUint32 OLStackSize = 100;
89
nsresult NS_NewPlainTextSerializer(nsIContentSerializer** aSerializer)
91
nsPlainTextSerializer* it = new nsPlainTextSerializer();
93
return NS_ERROR_OUT_OF_MEMORY;
96
return CallQueryInterface(it, aSerializer);
99
nsPlainTextSerializer::nsPlainTextSerializer()
100
: kSpace(NS_LITERAL_STRING(" ")) // Init of "constant"
103
mOutputString = nsnull;
105
mAtFirstColumn = PR_TRUE;
108
mStructs = PR_TRUE; // will be read from prefs later
109
mHeaderStrategy = 1 /*indent increasingly*/; // ditto
110
mQuotesPreformatted = PR_FALSE; // ditto
111
mDontWrapAnyQuotes = PR_FALSE; // ditto
113
for (PRInt32 i = 0; i <= 6; i++) {
114
mHeaderCounter[i] = 0;
118
mWrapColumn = 72; // XXX magic number, we expect someone to reset this
119
mCurrentLineWidth = 0;
122
mEmptyLines = 1; // The start of the document is an "empty line" in itself,
123
mInWhitespace = PR_TRUE;
124
mPreFormatted = PR_FALSE;
125
mStartedOutput = PR_FALSE;
127
// initialize the tag stack to zero:
128
mTagStack = new nsHTMLTag[TagStackSize];
130
mIgnoreAboveIndex = (PRUint32)kNotFound;
132
// initialize the OL stack, where numbers for ordered lists are kept:
133
mOLStack = new PRInt32[OLStackSize];
139
nsPlainTextSerializer::~nsPlainTextSerializer()
145
NS_IMPL_ISUPPORTS4(nsPlainTextSerializer,
146
nsIContentSerializer,
153
nsPlainTextSerializer::Init(PRUint32 aFlags, PRUint32 aWrapColumn,
154
const char* aCharSet, PRBool aIsCopying)
157
// Check if the major control flags are set correctly.
158
if(aFlags & nsIDocumentEncoder::OutputFormatFlowed) {
159
NS_ASSERTION(aFlags & nsIDocumentEncoder::OutputFormatted,
160
"If you want format=flowed, you must combine it with "
161
"nsIDocumentEncoder::OutputFormatted");
164
if(aFlags & nsIDocumentEncoder::OutputFormatted) {
165
NS_ASSERTION(!(aFlags & nsIDocumentEncoder::OutputPreformatted),
166
"Can't do formatted and preformatted output at the same time!");
170
NS_ENSURE_TRUE(nsContentUtils::GetParserServiceWeakRef(),
171
NS_ERROR_UNEXPECTED);
176
mWrapColumn = aWrapColumn;
178
// Only create a linebreaker if we will handle wrapping.
180
nsCOMPtr<nsILineBreakerFactory> lf(do_GetService(kLWBrkCID, &rv));
181
if (NS_SUCCEEDED(rv)) {
183
rv = lf->GetBreaker(lbarg, getter_AddRefs(mLineBreaker));
184
if (NS_FAILED(rv)) return NS_ERROR_FAILURE;
188
// Set the line break character:
189
if ((mFlags & nsIDocumentEncoder::OutputCRLineBreak)
190
&& (mFlags & nsIDocumentEncoder::OutputLFLineBreak)) {
192
mLineBreak.Assign(NS_LITERAL_STRING("\r\n"));
194
else if (mFlags & nsIDocumentEncoder::OutputCRLineBreak) {
196
mLineBreak.Assign(PRUnichar('\r'));
198
else if (mFlags & nsIDocumentEncoder::OutputLFLineBreak) {
200
mLineBreak.Assign(PRUnichar('\n'));
204
mLineBreak.AssignWithConversion(NS_LINEBREAK);
207
mLineBreakDue = PR_FALSE;
210
nsCOMPtr<nsIPrefBranch> prefBranch(do_GetService(NS_PREFSERVICE_CONTRACTID));
212
NS_WARNING("Could not get a pref branch!");
216
PRBool tempBool = PR_FALSE;
217
if (mFlags & nsIDocumentEncoder::OutputFormatted) {
218
// Get some prefs that controls how we do formatted output
219
prefBranch->GetBoolPref(PREF_STRUCTS, &tempBool);
221
prefBranch->GetIntPref(PREF_HEADER_STRATEGY, &mHeaderStrategy);
222
// The quotesPreformatted pref is a temporary measure. See bug 69638.
223
prefBranch->GetBoolPref("editor.quotesPreformatted", &tempBool);
224
mQuotesPreformatted = tempBool;
225
// DontWrapAnyQuotes is set according to whether plaintext mail
226
// is wrapping to window width -- see bug 134439.
227
// We'll only want this if we're wrapping and formatted.
228
if (mFlags & nsIDocumentEncoder::OutputWrap || mWrapColumn > 0) {
229
prefBranch->GetBoolPref("mail.compose.wrap_to_window_width", &tempBool);
230
mDontWrapAnyQuotes = tempBool;
234
// XXX We should let the caller pass this in.
235
prefBranch->GetBoolPref("browser.frames.enabled", &tempBool);
237
mFlags &= ~nsIDocumentEncoder::OutputNoFramesContent;
240
mFlags |= nsIDocumentEncoder::OutputNoFramesContent;
247
nsPlainTextSerializer::GetLastBool(const nsVoidArray& aStack)
249
PRUint32 size = aStack.Count();
253
return (aStack.ElementAt(size-1) != NS_REINTERPRET_CAST(void*, PR_FALSE));
257
nsPlainTextSerializer::SetLastBool(nsVoidArray& aStack, PRBool aValue)
259
PRUint32 size = aStack.Count();
261
aStack.ReplaceElementAt(NS_REINTERPRET_CAST(void*, aValue), size-1);
264
NS_ERROR("There is no \"Last\" value");
269
nsPlainTextSerializer::PushBool(nsVoidArray& aStack, PRBool aValue)
271
aStack.AppendElement(NS_REINTERPRET_CAST(void*, aValue));
275
nsPlainTextSerializer::PopBool(nsVoidArray& aStack)
277
PRBool returnValue = PR_FALSE;
278
PRUint32 size = aStack.Count();
280
returnValue = (aStack.ElementAt(size-1) != NS_REINTERPRET_CAST(void*, PR_FALSE));
281
aStack.RemoveElementAt(size-1);
287
nsPlainTextSerializer::Initialize(nsAString* aOutString,
288
PRUint32 aFlags, PRUint32 aWrapCol)
290
nsresult rv = Init(aFlags, aWrapCol, nsnull, PR_FALSE);
291
NS_ENSURE_SUCCESS(rv, rv);
293
// XXX This is wrong. It violates XPCOM string ownership rules.
294
// We're only getting away with this because instances of this
295
// class are restricted to single function scope.
296
mOutputString = aOutString;
302
nsPlainTextSerializer::AppendText(nsIDOMText* aText,
303
PRInt32 aStartOffset,
307
if (mIgnoreAboveIndex != (PRUint32)kNotFound) {
311
NS_ASSERTION(aStartOffset >= 0, "Negative start offset for text fragment!");
312
if ( aStartOffset < 0 )
313
return NS_ERROR_INVALID_ARG;
315
NS_ENSURE_ARG(aText);
319
nsAutoString textstr;
321
nsCOMPtr<nsITextContent> content = do_QueryInterface(aText);
322
if (!content) return NS_ERROR_FAILURE;
324
const nsTextFragment* frag;
325
content->GetText(&frag);
328
PRInt32 endoffset = (aEndOffset == -1) ? frag->GetLength() : aEndOffset;
329
NS_ASSERTION(aStartOffset <= endoffset, "A start offset is beyond the end of the text fragment!");
331
length = endoffset - aStartOffset;
337
textstr.Assign(frag->Get2b() + aStartOffset, length);
340
textstr.AssignWithConversion(frag->Get1b()+aStartOffset, length);
344
mOutputString = &aStr;
346
// We have to split the string across newlines
347
// to match parser behavior
349
PRInt32 offset = textstr.FindCharInSet("\n\r");
350
while (offset != kNotFound) {
354
rv = DoAddLeaf(nsnull,
356
Substring(textstr, start, offset-start));
357
if (NS_FAILED(rv)) break;
361
rv = DoAddLeaf(nsnull, eHTMLTag_newline, mLineBreak);
362
if (NS_FAILED(rv)) break;
365
offset = textstr.FindCharInSet("\n\r", start);
368
// Consume the last bit of the string if there's any left
369
if (NS_SUCCEEDED(rv) && start < length) {
371
rv = DoAddLeaf(nsnull,
373
Substring(textstr, start, length-start));
376
rv = DoAddLeaf(nsnull, eHTMLTag_text, textstr);
380
mOutputString = nsnull;
386
nsPlainTextSerializer::AppendCDATASection(nsIDOMCDATASection* aCDATASection,
387
PRInt32 aStartOffset,
391
return AppendText(aCDATASection, aStartOffset, aEndOffset, aStr);
395
nsPlainTextSerializer::AppendElementStart(nsIDOMElement *aElement,
399
NS_ENSURE_ARG(aElement);
401
mContent = do_QueryInterface(aElement);
402
if (!mContent) return NS_ERROR_FAILURE;
405
PRInt32 id = GetIdForContent(mContent);
407
PRBool isContainer = IsContainer(id);
409
mOutputString = &aStr;
412
rv = DoOpenContainer(nsnull, id);
416
rv = DoAddLeaf(nsnull, id, empty);
420
mOutputString = nsnull;
422
if (!mInHead && id == eHTMLTag_head)
429
nsPlainTextSerializer::AppendElementEnd(nsIDOMElement *aElement,
432
NS_ENSURE_ARG(aElement);
434
mContent = do_QueryInterface(aElement);
435
if (!mContent) return NS_ERROR_FAILURE;
438
PRInt32 id = GetIdForContent(mContent);
440
PRBool isContainer = IsContainer(id);
442
mOutputString = &aStr;
446
rv = DoCloseContainer(id);
450
mOutputString = nsnull;
452
if (mInHead && id == eHTMLTag_head)
459
nsPlainTextSerializer::Flush(nsAString& aStr)
461
mOutputString = &aStr;
463
mOutputString = nsnull;
468
nsPlainTextSerializer::AppendDocumentStart(nsIDOMDocument *aDocument,
471
#ifdef MOZ_STANDALONE_COMPOSER
472
NS_ENSURE_ARG(aDocument);
474
nsCOMPtr<nsIDOMHTMLDocument> htmldoc = do_QueryInterface(aDocument);
475
nsCOMPtr<nsIDOMElement> bodyElt;
479
nsCOMPtr<nsIDOMHTMLElement> bodyElement;
480
res = htmldoc->GetBody(getter_AddRefs(bodyElement));
481
if (NS_FAILED(res) || !bodyElement)
484
bodyElt = do_QueryInterface(bodyElement);
487
nsAutoString sourceViewAttr;
488
res = bodyElt->GetAttribute(NS_LITERAL_STRING("_moz_sourceview"), sourceViewAttr);
492
if (sourceViewAttr.Equals(NS_LITERAL_STRING("true"), nsCaseInsensitiveStringComparator()))
493
mFlags |= nsIDocumentEncoder::OutputRaw;
499
nsPlainTextSerializer::OpenContainer(const nsIParserNode& aNode)
501
PRInt32 type = aNode.GetNodeType();
503
return DoOpenContainer(&aNode, type);
507
nsPlainTextSerializer::CloseContainer(const nsHTMLTag aTag)
509
return DoCloseContainer(aTag);
513
nsPlainTextSerializer::AddHeadContent(const nsIParserNode& aNode)
516
nsresult rv = AddLeaf(aNode);
522
nsPlainTextSerializer::AddLeaf(const nsIParserNode& aNode)
524
if (mIgnoreAboveIndex != (PRUint32)kNotFound) {
528
eHTMLTags type = (eHTMLTags)aNode.GetNodeType();
529
const nsAString& text = aNode.GetText();
531
if ((type == eHTMLTag_text) ||
532
(type == eHTMLTag_whitespace) ||
533
(type == eHTMLTag_newline)) {
534
// Copy the text out, stripping out CRs
537
str.SetCapacity(text.Length());
538
nsReadingIterator<PRUnichar> srcStart, srcEnd;
539
length = nsContentUtils::CopyNewlineNormalizedUnicodeTo(text.BeginReading(srcStart), text.EndReading(srcEnd), str);
540
str.SetLength(length);
541
return DoAddLeaf(&aNode, type, str);
544
return DoAddLeaf(&aNode, type, text);
549
nsPlainTextSerializer::OpenHTML(const nsIParserNode& aNode)
551
return OpenContainer(aNode);
555
nsPlainTextSerializer::CloseHTML()
557
return CloseContainer(eHTMLTag_html);
561
nsPlainTextSerializer::OpenHead(const nsIParserNode& aNode)
568
nsPlainTextSerializer::CloseHead()
575
nsPlainTextSerializer::OpenBody(const nsIParserNode& aNode)
577
return OpenContainer(aNode);
581
nsPlainTextSerializer::CloseBody()
583
return CloseContainer(eHTMLTag_body);
587
nsPlainTextSerializer::OpenForm(const nsIParserNode& aNode)
589
return OpenContainer(aNode);
593
nsPlainTextSerializer::CloseForm()
595
return CloseContainer(eHTMLTag_form);
599
nsPlainTextSerializer::OpenMap(const nsIParserNode& aNode)
601
return OpenContainer(aNode);
605
nsPlainTextSerializer::CloseMap()
607
return CloseContainer(eHTMLTag_map);
611
nsPlainTextSerializer::OpenFrameset(const nsIParserNode& aNode)
613
return OpenContainer(aNode);
617
nsPlainTextSerializer::CloseFrameset()
619
return CloseContainer(eHTMLTag_frameset);
623
nsPlainTextSerializer::IsEnabled(PRInt32 aTag, PRBool* aReturn)
625
nsHTMLTag theHTMLTag = nsHTMLTag(aTag);
627
if (theHTMLTag == eHTMLTag_script) {
628
*aReturn = !(mFlags & nsIDocumentEncoder::OutputNoScriptContent);
630
else if (theHTMLTag == eHTMLTag_frameset) {
631
*aReturn = !(mFlags & nsIDocumentEncoder::OutputNoFramesContent);
641
* aNode may be null when we're working with the DOM, but then mContent is
645
nsPlainTextSerializer::DoOpenContainer(const nsIParserNode* aNode, PRInt32 aTag)
647
if (mFlags & nsIDocumentEncoder::OutputRaw) {
648
// Raw means raw. Don't even think about doing anything fancy
649
// here like indenting, adding line breaks or any other
650
// characters such as list item bullets, quote characters
651
// around <q>, etc. I mean it! Don't make me smack you!
656
eHTMLTags type = (eHTMLTags)aTag;
658
if (mTagStackIndex < TagStackSize) {
659
mTagStack[mTagStackIndex++] = type;
662
if (mIgnoreAboveIndex != (PRUint32)kNotFound) {
667
EnsureVerticalSpace(mFloatingLines);
669
// Check if this tag's content that should not be output
670
if ((type == eHTMLTag_noscript &&
671
!(mFlags & nsIDocumentEncoder::OutputNoScriptContent)) ||
672
((type == eHTMLTag_iframe || type == eHTMLTag_noframes) &&
673
!(mFlags & nsIDocumentEncoder::OutputNoFramesContent))) {
674
// Ignore everything that follows the current tag in
675
// question until a matching end tag is encountered.
676
mIgnoreAboveIndex = mTagStackIndex - 1;
680
if (type == eHTMLTag_body) {
681
// Try to figure out here whether we have a
682
// preformatted style attribute.
684
// Trigger on the presence of a "-moz-pre-wrap" in the
685
// style attribute. That's a very simplistic way to do
686
// it, but better than nothing.
687
// Also set mWrapColumn to the value given there
688
// (which arguably we should only do if told to do so).
691
if(NS_SUCCEEDED(GetAttributeValue(aNode, nsHTMLAtoms::style, style)) &&
692
(kNotFound != (whitespace = style.Find("white-space:")))) {
694
if (kNotFound != style.Find("-moz-pre-wrap", PR_TRUE, whitespace)) {
695
#ifdef DEBUG_preformatted
696
printf("Set mPreFormatted based on style moz-pre-wrap\n");
698
mPreFormatted = PR_TRUE;
699
PRInt32 widthOffset = style.Find("width:");
700
if (widthOffset >= 0) {
701
// We have to search for the ch before the semicolon,
702
// not for the semicolon itself, because nsString::ToInteger()
703
// considers 'c' to be a valid numeric char (even if radix=10)
704
// but then gets confused if it sees it next to the number
705
// when the radix specified was 10, and returns an error code.
706
PRInt32 semiOffset = style.Find("ch", widthOffset+6);
707
PRInt32 length = (semiOffset > 0 ? semiOffset - widthOffset - 6
708
: style.Length() - widthOffset);
709
nsAutoString widthstr;
710
style.Mid(widthstr, widthOffset+6, length);
712
PRInt32 col = widthstr.ToInteger(&err);
714
if (NS_SUCCEEDED(err)) {
715
mWrapColumn = (PRUint32)col;
716
#ifdef DEBUG_preformatted
717
printf("Set wrap column to %d based on style\n", mWrapColumn);
722
else if (kNotFound != style.Find("pre", PR_TRUE, whitespace)) {
723
#ifdef DEBUG_preformatted
724
printf("Set mPreFormatted based on style pre\n");
726
mPreFormatted = PR_TRUE;
731
mPreFormatted = PR_FALSE;
741
if (type == eHTMLTag_p || type == eHTMLTag_pre) {
742
EnsureVerticalSpace(1); // Should this be 0 in unformatted case?
744
else if (type == eHTMLTag_tr) {
745
PushBool(mHasWrittenCellsForRow, PR_FALSE);
747
else if (type == eHTMLTag_td || type == eHTMLTag_th) {
748
// We must make sure that the content of two table cells get a
749
// space between them.
751
// To make the separation between cells most obvious and
752
// importable, we use a TAB.
753
if (GetLastBool(mHasWrittenCellsForRow)) {
754
// Bypass |Write| so that the TAB isn't compressed away.
755
AddToLine(NS_LITERAL_STRING("\t").get(), 1);
756
mInWhitespace = PR_TRUE;
758
else if (mHasWrittenCellsForRow.Count() == 0) {
759
// We don't always see a <tr> (nor a <table>) before the <td> if we're
760
// copying part of a table
761
PushBool(mHasWrittenCellsForRow, PR_TRUE); // will never be popped
764
SetLastBool(mHasWrittenCellsForRow, PR_TRUE);
767
else if (type == eHTMLTag_ul) {
768
// Indent here to support nested lists, which aren't included in li :-(
769
EnsureVerticalSpace(mULCount + mOLStackIndex == 0 ? 1 : 0);
770
// Must end the current line before we change indention
771
mIndent += kIndentSizeList;
774
else if (type == eHTMLTag_ol) {
775
EnsureVerticalSpace(mULCount + mOLStackIndex == 0 ? 1 : 0);
776
// Must end the current line before we change indention
777
if (mOLStackIndex < OLStackSize) {
778
nsAutoString startAttr;
779
PRInt32 startVal = 1;
780
if(NS_SUCCEEDED(GetAttributeValue(aNode, nsHTMLAtoms::start, startAttr))){
782
startVal = startAttr.ToInteger(&rv);
786
mOLStack[mOLStackIndex++] = startVal;
788
mIndent += kIndentSizeList; // see ul
790
else if (type == eHTMLTag_li) {
791
if (mTagStackIndex > 1 && IsInOL()) {
792
if (mOLStackIndex > 0) {
793
nsAutoString valueAttr;
794
if(NS_SUCCEEDED(GetAttributeValue(aNode, nsHTMLAtoms::value, valueAttr))){
796
PRInt32 valueAttrVal = valueAttr.ToInteger(&rv);
797
if (NS_SUCCEEDED(rv))
798
mOLStack[mOLStackIndex-1] = valueAttrVal;
800
// This is what nsBulletFrame does for OLs:
801
mInIndentString.AppendInt(mOLStack[mOLStackIndex-1]++, 10);
804
mInIndentString.Append(PRUnichar('#'));
807
mInIndentString.Append(PRUnichar('.'));
811
static char bulletCharArray[] = "*o+#";
812
NS_ASSERTION(mULCount > 0, "mULCount should be greater than 0 here");
813
char bulletChar = bulletCharArray[(mULCount - 1) % 4];
814
mInIndentString.Append(PRUnichar(bulletChar));
817
mInIndentString.Append(PRUnichar(' '));
819
else if (type == eHTMLTag_dl) {
820
EnsureVerticalSpace(1);
822
else if (type == eHTMLTag_dt) {
823
EnsureVerticalSpace(0);
825
else if (type == eHTMLTag_dd) {
826
EnsureVerticalSpace(0);
827
mIndent += kIndentSizeDD;
829
else if (type == eHTMLTag_span) {
832
else if (type == eHTMLTag_blockquote) {
833
EnsureVerticalSpace(1);
836
nsresult rv = GetAttributeValue(aNode, nsHTMLAtoms::type, value);
838
PRBool isInCiteBlockquote =
839
NS_SUCCEEDED(rv) && value.EqualsIgnoreCase("cite");
841
PushBool(mIsInCiteBlockquote, isInCiteBlockquote);
842
if (isInCiteBlockquote) {
846
mIndent += kTabSize; // Check for some maximum value?
850
// Else make sure we'll separate block level tags,
851
// even if we're about to leave, before doing any other formatting.
852
else if (IsBlockLevel(aTag)) {
853
EnsureVerticalSpace(0);
856
//////////////////////////////////////////////////////////////
857
if (!(mFlags & nsIDocumentEncoder::OutputFormatted)) {
860
//////////////////////////////////////////////////////////////
861
// The rest of this routine is formatted output stuff,
862
// which we should skip if we're not formatted:
863
//////////////////////////////////////////////////////////////
866
PRBool currentNodeIsConverted = IsCurrentNodeConverted(aNode);
867
PushBool(mCurrentNodeIsConverted, currentNodeIsConverted);
869
if (type == eHTMLTag_h1 || type == eHTMLTag_h2 ||
870
type == eHTMLTag_h3 || type == eHTMLTag_h4 ||
871
type == eHTMLTag_h5 || type == eHTMLTag_h6)
873
EnsureVerticalSpace(2);
874
if (mHeaderStrategy == 2) { // numbered
875
mIndent += kIndentSizeHeaders;
877
PRInt32 level = HeaderLevel(type);
878
// Increase counter for current level
879
mHeaderCounter[level]++;
880
// Reset all lower levels
883
for (i = level + 1; i <= 6; i++) {
884
mHeaderCounter[i] = 0;
889
for (i = 1; i <= level; i++) {
890
leadup.AppendInt(mHeaderCounter[i]);
891
leadup.Append(PRUnichar('.'));
893
leadup.Append(PRUnichar(' '));
896
else if (mHeaderStrategy == 1) { // indent increasingly
897
mIndent += kIndentSizeHeaders;
898
for (PRInt32 i = HeaderLevel(type); i > 1; i--) {
899
// for h(x), run x-1 times
900
mIndent += kIndentIncrementHeaders;
904
else if (type == eHTMLTag_a && !currentNodeIsConverted) {
906
if (NS_SUCCEEDED(GetAttributeValue(aNode, nsHTMLAtoms::href, url))
911
else if (type == eHTMLTag_q) {
912
Write(NS_LITERAL_STRING("\""));
914
else if (type == eHTMLTag_sup && mStructs && !currentNodeIsConverted) {
915
Write(NS_LITERAL_STRING("^"));
917
else if (type == eHTMLTag_sub && mStructs && !currentNodeIsConverted) {
918
Write(NS_LITERAL_STRING("_"));
920
else if (type == eHTMLTag_code && mStructs && !currentNodeIsConverted) {
921
Write(NS_LITERAL_STRING("|"));
923
else if ((type == eHTMLTag_strong || type == eHTMLTag_b)
924
&& mStructs && !currentNodeIsConverted) {
925
Write(NS_LITERAL_STRING("*"));
927
else if ((type == eHTMLTag_em || type == eHTMLTag_i)
928
&& mStructs && !currentNodeIsConverted) {
929
Write(NS_LITERAL_STRING("/"));
931
else if (type == eHTMLTag_u && mStructs && !currentNodeIsConverted) {
932
Write(NS_LITERAL_STRING("_"));
939
nsPlainTextSerializer::DoCloseContainer(PRInt32 aTag)
941
eHTMLTags type = (eHTMLTags)aTag;
942
if (mFlags & nsIDocumentEncoder::OutputLineBreaksWhenClosingLI) {
943
if (type == eHTMLTag_li)
949
if (mFlags & nsIDocumentEncoder::OutputRaw) {
950
// Raw means raw. Don't even think about doing anything fancy
951
// here like indenting, adding line breaks or any other
952
// characters such as list item bullets, quote characters
953
// around <q>, etc. I mean it! Don't make me smack you!
958
if (mTagStackIndex > 0) {
962
if (mTagStackIndex >= mIgnoreAboveIndex) {
963
if (mTagStackIndex == mIgnoreAboveIndex) {
964
// We're dealing with the close tag whose matching
965
// open tag had set the mIgnoreAboveIndex value.
966
// Reset mIgnoreAboveIndex before discarding this tag.
967
mIgnoreAboveIndex = (PRUint32)kNotFound;
972
// End current line if we're ending a block level tag
973
if((type == eHTMLTag_body) || (type == eHTMLTag_html)) {
974
// We want the output to end with a new line,
975
// but in preformatted areas like text fields,
976
// we can't emit newlines that weren't there.
977
// So add the newline only in the case of formatted output.
978
if (mFlags & nsIDocumentEncoder::OutputFormatted) {
979
EnsureVerticalSpace(0);
984
// We won't want to do anything with these in formatted mode either,
985
// so just return now:
988
else if (type == eHTMLTag_tr) {
989
PopBool(mHasWrittenCellsForRow);
990
// Should always end a line, but get no more whitespace
991
if (mFloatingLines < 0)
993
mLineBreakDue = PR_TRUE;
995
else if ((type == eHTMLTag_li) ||
996
(type == eHTMLTag_dt)) {
997
// Items that should always end a line, but get no more whitespace
998
if (mFloatingLines < 0)
1000
mLineBreakDue = PR_TRUE;
1002
else if (type == eHTMLTag_pre) {
1004
mLineBreakDue = PR_TRUE;
1006
else if (type == eHTMLTag_ul) {
1008
mIndent -= kIndentSizeList;
1009
if (--mULCount + mOLStackIndex == 0) {
1011
mLineBreakDue = PR_TRUE;
1014
else if (type == eHTMLTag_ol) {
1015
FlushLine(); // Doing this after decreasing OLStackIndex would be wrong.
1016
mIndent -= kIndentSizeList;
1018
if (mULCount + mOLStackIndex == 0) {
1020
mLineBreakDue = PR_TRUE;
1023
else if (type == eHTMLTag_dl) {
1025
mLineBreakDue = PR_TRUE;
1027
else if (type == eHTMLTag_dd) {
1029
mIndent -= kIndentSizeDD;
1031
else if (type == eHTMLTag_span) {
1034
else if (type == eHTMLTag_div) {
1035
if (mFloatingLines < 0)
1037
mLineBreakDue = PR_TRUE;
1039
else if (type == eHTMLTag_blockquote) {
1040
FlushLine(); // Is this needed?
1043
PRBool isInCiteBlockquote = PopBool(mIsInCiteBlockquote);
1045
if (isInCiteBlockquote) {
1049
mIndent -= kTabSize;
1053
mLineBreakDue = PR_TRUE;
1055
else if (IsBlockLevel(aTag)
1056
&& type != eHTMLTag_script
1057
&& type != eHTMLTag_doctypeDecl
1058
&& type != eHTMLTag_markupDecl) {
1059
// All other blocks get 1 vertical space after them
1060
// in formatted mode, otherwise 0.
1061
// This is hard. Sometimes 0 is a better number, but
1063
if (mFlags & nsIDocumentEncoder::OutputFormatted)
1064
EnsureVerticalSpace(1);
1066
if (mFloatingLines < 0)
1068
mLineBreakDue = PR_TRUE;
1072
//////////////////////////////////////////////////////////////
1073
if (!(mFlags & nsIDocumentEncoder::OutputFormatted)) {
1076
//////////////////////////////////////////////////////////////
1077
// The rest of this routine is formatted output stuff,
1078
// which we should skip if we're not formatted:
1079
//////////////////////////////////////////////////////////////
1081
// Pop the currentConverted stack
1082
PRBool currentNodeIsConverted = PopBool(mCurrentNodeIsConverted);
1084
if (type == eHTMLTag_h1 || type == eHTMLTag_h2 ||
1085
type == eHTMLTag_h3 || type == eHTMLTag_h4 ||
1086
type == eHTMLTag_h5 || type == eHTMLTag_h6) {
1088
if (mHeaderStrategy) { /*numbered or indent increasingly*/
1089
mIndent -= kIndentSizeHeaders;
1091
if (mHeaderStrategy == 1 /*indent increasingly*/ ) {
1092
for (PRInt32 i = HeaderLevel(type); i > 1; i--) {
1093
// for h(x), run x-1 times
1094
mIndent -= kIndentIncrementHeaders;
1097
EnsureVerticalSpace(1);
1099
else if (type == eHTMLTag_a && !currentNodeIsConverted && !mURL.IsEmpty()) {
1101
temp.Assign(NS_LITERAL_STRING(" <"));
1103
temp.Append(PRUnichar('>'));
1107
else if (type == eHTMLTag_q) {
1108
Write(NS_LITERAL_STRING("\""));
1110
else if ((type == eHTMLTag_sup || type == eHTMLTag_sub)
1111
&& mStructs && !currentNodeIsConverted) {
1114
else if (type == eHTMLTag_code && mStructs && !currentNodeIsConverted) {
1115
Write(NS_LITERAL_STRING("|"));
1117
else if ((type == eHTMLTag_strong || type == eHTMLTag_b)
1118
&& mStructs && !currentNodeIsConverted) {
1119
Write(NS_LITERAL_STRING("*"));
1121
else if ((type == eHTMLTag_em || type == eHTMLTag_i)
1122
&& mStructs && !currentNodeIsConverted) {
1123
Write(NS_LITERAL_STRING("/"));
1125
else if (type == eHTMLTag_u && mStructs && !currentNodeIsConverted) {
1126
Write(NS_LITERAL_STRING("_"));
1133
* aNode may be null when we're working with the DOM, but then mContent is
1137
nsPlainTextSerializer::DoAddLeaf(const nsIParserNode *aNode, PRInt32 aTag,
1138
const nsAString& aText)
1140
// If we don't want any output, just return
1146
EnsureVerticalSpace(mFloatingLines);
1148
eHTMLTags type = (eHTMLTags)aTag;
1150
if ((mTagStackIndex > 1 &&
1151
mTagStack[mTagStackIndex-2] == eHTMLTag_select) ||
1152
(mTagStackIndex > 0 &&
1153
mTagStack[mTagStackIndex-1] == eHTMLTag_select)) {
1154
// Don't output the contents of SELECT elements;
1155
// Might be nice, eventually, to output just the selected element.
1156
// Read more in bug 31994.
1159
else if (mTagStackIndex > 0 && mTagStack[mTagStackIndex-1] == eHTMLTag_script) {
1160
// Don't output the contents of <script> tags;
1163
else if (type == eHTMLTag_text) {
1164
/* Check, if we are in a link (symbolized with mURL containing the URL)
1165
and the text is equal to the URL. In that case we don't want to output
1166
the URL twice so we scrap the text in mURL. */
1167
if (!mURL.IsEmpty() && mURL.Equals(aText)) {
1172
else if (type == eHTMLTag_entity) {
1173
nsIParserService* parserService =
1174
nsContentUtils::GetParserServiceWeakRef();
1175
if (parserService) {
1176
nsAutoString str(aText);
1178
parserService->HTMLConvertEntityToUnicode(str, &entity);
1181
str.First() == (PRUnichar) '#') {
1183
entity = str.ToInteger(&err, kAutoDetect); // NCR
1186
temp.Append(PRUnichar(entity));
1190
else if (type == eHTMLTag_br) {
1191
// Another egregious editor workaround, see bug 38194:
1192
// ignore the bogus br tags that the editor sticks here and there.
1193
nsAutoString typeAttr;
1194
if (NS_FAILED(GetAttributeValue(aNode, nsHTMLAtoms::type, typeAttr))
1195
|| !typeAttr.Equals(NS_LITERAL_STRING("_moz"))) {
1196
EnsureVerticalSpace(mEmptyLines+1);
1199
else if (type == eHTMLTag_whitespace) {
1200
// The only times we want to pass along whitespace from the original
1201
// html source are if we're forced into preformatted mode via flags,
1202
// or if we're prettyprinting and we're inside a <pre>.
1203
// Otherwise, either we're collapsing to minimal text, or we're
1204
// prettyprinting to mimic the html format, and in neither case
1205
// does the formatting of the html source help us.
1206
// One exception: at the very beginning of a selection,
1207
// we want to preserve whitespace.
1208
if (mFlags & nsIDocumentEncoder::OutputPreformatted ||
1209
(mPreFormatted && !mWrapColumn) ||
1213
else if(!mInWhitespace ||
1215
&& mFlags | nsIDocumentEncoder::OutputSelectionOnly)) {
1216
mInWhitespace = PR_FALSE;
1218
mInWhitespace = PR_TRUE;
1221
else if (type == eHTMLTag_newline) {
1222
if (mFlags & nsIDocumentEncoder::OutputPreformatted ||
1223
(mPreFormatted && !mWrapColumn) ||
1225
EnsureVerticalSpace(mEmptyLines+1);
1231
else if (type == eHTMLTag_hr &&
1232
(mFlags & nsIDocumentEncoder::OutputFormatted)) {
1233
EnsureVerticalSpace(0);
1235
// Make a line of dashes as wide as the wrap width
1236
// XXX honoring percentage would be nice
1238
PRUint32 width = (mWrapColumn > 0 ? mWrapColumn : 25);
1239
while (line.Length() < width) {
1240
line.Append(PRUnichar('-'));
1244
EnsureVerticalSpace(0);
1246
else if (type == eHTMLTag_img) {
1247
/* Output (in decreasing order of preference)
1248
alt, title or nothing */
1249
// See <http://www.w3.org/TR/REC-html40/struct/objects.html#edef-IMG>
1250
nsAutoString imageDescription;
1251
if (NS_SUCCEEDED(GetAttributeValue(aNode,
1253
imageDescription))) {
1254
// If the alt attribute has an empty value (|alt=""|), output nothing
1256
else if (NS_SUCCEEDED(GetAttributeValue(aNode,
1259
&& !imageDescription.IsEmpty()) {
1260
imageDescription = NS_LITERAL_STRING(" [") +
1262
NS_LITERAL_STRING("] ");
1265
Write(imageDescription);
1273
* Adds as many newline as necessary to get |noOfRows| empty lines
1275
* noOfRows = -1 : Being in the middle of some line of text
1276
* noOfRows = 0 : Being at the start of a line
1277
* noOfRows = n>0 : Having n empty lines before the current line.
1280
nsPlainTextSerializer::EnsureVerticalSpace(PRInt32 noOfRows)
1282
// If we have something in the indent we probably want to output
1283
// it and it's not included in the count for empty lines so we don't
1284
// realize that we should start a new line.
1285
if(noOfRows >= 0 && !mInIndentString.IsEmpty()) {
1289
while(mEmptyLines < noOfRows) {
1292
mLineBreakDue = PR_FALSE;
1293
mFloatingLines = -1;
1297
* This empties the current line cache without adding a NEWLINE.
1298
* Should not be used if line wrapping is of importance since
1299
* this function destroys the cache information.
1301
* It will also write indentation and quotes if we believe us to be
1302
* at the start of the line.
1305
nsPlainTextSerializer::FlushLine()
1307
if(!mCurrentLine.IsEmpty()) {
1308
if(mAtFirstColumn) {
1309
OutputQuotesAndIndent(); // XXX: Should we always do this? Bug?
1312
Output(mCurrentLine);
1313
mAtFirstColumn = mAtFirstColumn && mCurrentLine.IsEmpty();
1314
mCurrentLine.Truncate();
1315
mCurrentLineWidth = 0;
1320
* Prints the text to output to our current output device (the string mOutputString).
1321
* The only logic here is to replace non breaking spaces with a normal space since
1322
* most (all?) receivers of the result won't understand the nbsp and even be
1326
nsPlainTextSerializer::Output(nsString& aString)
1328
if (!aString.IsEmpty()) {
1329
mStartedOutput = PR_TRUE;
1332
// First, replace all nbsp characters with spaces,
1333
// which the unicode encoder won't do for us.
1334
static PRUnichar nbsp = 160;
1335
static PRUnichar space = ' ';
1336
aString.ReplaceChar(nbsp, space);
1338
mOutputString->Append(aString);
1342
* This function adds a piece of text to the current stored line. If we are
1343
* wrapping text and the stored line will become too long, a suitable
1344
* location to wrap will be found and the line that's complete will be
1348
nsPlainTextSerializer::AddToLine(const PRUnichar * aLineFragment,
1349
PRInt32 aLineFragmentLength)
1351
PRUint32 prefixwidth = (mCiteQuoteLevel > 0 ? mCiteQuoteLevel + 1:0)+mIndent;
1354
EnsureVerticalSpace(mFloatingLines);
1356
PRInt32 linelength = mCurrentLine.Length();
1357
if(0 == linelength) {
1358
if(0 == aLineFragmentLength) {
1359
// Nothing at all. Are you kidding me?
1363
if(mFlags & nsIDocumentEncoder::OutputFormatFlowed) {
1366
'>' == aLineFragment[0] ||
1367
' ' == aLineFragment[0] ||
1368
!nsCRT::strncmp(aLineFragment, NS_LITERAL_STRING("From ").get(), 5)
1370
&& mCiteQuoteLevel == 0 // We space-stuff quoted lines anyway
1373
// Space stuffing a la RFC 2646 (format=flowed).
1374
mCurrentLine.Append(PRUnichar(' '));
1377
mCurrentLineWidth += GetUnicharWidth(' ');
1378
#ifdef DEBUG_wrapping
1379
NS_ASSERTION(GetUnicharStringWidth(mCurrentLine.get(),
1380
mCurrentLine.Length()) ==
1381
(PRInt32)mCurrentLineWidth,
1382
"mCurrentLineWidth and reality out of sync!");
1390
mCurrentLine.Append(aLineFragment, aLineFragmentLength);
1392
mCurrentLineWidth += GetUnicharStringWidth(aLineFragment,
1393
aLineFragmentLength);
1394
#ifdef DEBUG_wrapping
1395
NS_ASSERTION(GetUnicharstringWidth(mCurrentLine.get(),
1396
mCurrentLine.Length()) ==
1397
(PRInt32)mCurrentLineWidth,
1398
"mCurrentLineWidth and reality out of sync!");
1402
linelength = mCurrentLine.Length();
1407
#ifdef DEBUG_wrapping
1408
NS_ASSERTION(GetUnicharstringWidth(mCurrentLine.get(),
1409
mCurrentLine.Length()) ==
1410
(PRInt32)mCurrentLineWidth,
1411
"mCurrentLineWidth and reality out of sync!");
1414
// The "+4" is to avoid wrap lines that only would be a couple
1415
// of letters too long. We give this bonus only if the
1416
// wrapcolumn is more than 20.
1417
PRUint32 bonuswidth = (mWrapColumn > 20) ? 4 : 0;
1419
// XXX: Should calculate prefixwidth with GetUnicharStringWidth
1420
while(mCurrentLineWidth+prefixwidth > mWrapColumn+bonuswidth) {
1421
// Must wrap. Let's find a good place to do that.
1422
nsresult result = NS_OK;
1424
// We go from the end removing one letter at a time until
1425
// we have a reasonable width
1426
PRInt32 goodSpace = mCurrentLine.Length();
1427
PRUint32 width = mCurrentLineWidth;
1428
while(goodSpace > 0 && (width+prefixwidth > mWrapColumn)) {
1430
width -= GetUnicharWidth(mCurrentLine[goodSpace]);
1435
PRBool oNeedMoreText;
1436
if (nsnull != mLineBreaker) {
1437
result = mLineBreaker->Prev(mCurrentLine.get(),
1438
mCurrentLine.Length(), goodSpace,
1439
(PRUint32 *) &goodSpace, &oNeedMoreText);
1440
if (oNeedMoreText) {
1443
else if (nsCRT::IsAsciiSpace(mCurrentLine.CharAt(goodSpace-1))) {
1444
--goodSpace; // adjust the position since line breaker returns a position next to space
1447
// fallback if the line breaker is unavailable or failed
1448
if (nsnull == mLineBreaker || NS_FAILED(result)) {
1449
goodSpace = mWrapColumn-prefixwidth;
1450
while (goodSpace >= 0 &&
1451
!nsCRT::IsAsciiSpace(mCurrentLine.CharAt(goodSpace))) {
1456
nsAutoString restOfLine;
1457
if (goodSpace < 0) {
1458
// If we don't found a good place to break, accept long line and
1459
// try to find another place to break
1460
goodSpace=(prefixwidth>mWrapColumn+1)?1:mWrapColumn-prefixwidth+1;
1462
if (nsnull != mLineBreaker) {
1463
result = mLineBreaker->Next(mCurrentLine.get(),
1464
mCurrentLine.Length(), goodSpace,
1465
(PRUint32 *) &goodSpace, &oNeedMoreText);
1467
// fallback if the line breaker is unavailable or failed
1468
if (nsnull == mLineBreaker || NS_FAILED(result)) {
1469
goodSpace=(prefixwidth>mWrapColumn)?1:mWrapColumn-prefixwidth;
1470
while (goodSpace < linelength &&
1471
!nsCRT::IsAsciiSpace(mCurrentLine.CharAt(goodSpace))) {
1477
if((goodSpace < linelength) && (goodSpace > 0)) {
1478
// Found a place to break
1480
// -1 (trim a char at the break position)
1481
// only if the line break was a space.
1482
if (nsCRT::IsAsciiSpace(mCurrentLine.CharAt(goodSpace))) {
1483
mCurrentLine.Right(restOfLine, linelength-goodSpace-1);
1486
mCurrentLine.Right(restOfLine, linelength-goodSpace);
1488
mCurrentLine.Truncate(goodSpace);
1490
mCurrentLine.Truncate();
1491
// Space stuff new line?
1492
if(mFlags & nsIDocumentEncoder::OutputFormatFlowed) {
1494
!restOfLine.IsEmpty()
1497
restOfLine[0] == '>' ||
1498
restOfLine[0] == ' ' ||
1499
StringBeginsWith(restOfLine, NS_LITERAL_STRING("From "))
1501
&& mCiteQuoteLevel == 0 // We space-stuff quoted lines anyway
1504
// Space stuffing a la RFC 2646 (format=flowed).
1505
mCurrentLine.Append(PRUnichar(' '));
1506
//XXX doesn't seem to work correctly for ' '
1509
mCurrentLine.Append(restOfLine);
1510
mCurrentLineWidth = GetUnicharStringWidth(mCurrentLine.get(),
1511
mCurrentLine.Length());
1512
linelength = mCurrentLine.Length();
1516
// Nothing to do. Hopefully we get more data later
1517
// to use for a place to break line
1528
* Outputs the contents of mCurrentLine, and resets line specific
1529
* variables. Also adds an indentation and prefix if there is
1530
* one specified. Strips ending spaces from the line if it isn't
1534
nsPlainTextSerializer::EndLine(PRBool aSoftlinebreak)
1536
PRUint32 currentlinelength = mCurrentLine.Length();
1538
if(aSoftlinebreak && 0 == currentlinelength) {
1543
// In non-preformatted mode, remove SPACE from the end
1544
// of the line, unless we got "-- " in a format=flowed
1545
// output. "-- " is the sig delimiter by convention and
1546
// shouldn't be touched even in format=flowed
1547
// (see RFC 2646). We only check for "-- " when it's a hard line
1548
// break for obvious reasons.
1549
if(!(mFlags & nsIDocumentEncoder::OutputPreformatted) &&
1550
(aSoftlinebreak || !mCurrentLine.Equals(NS_LITERAL_STRING("-- ")))) {
1551
// Remove SPACE:s from the end of the line.
1552
while(currentlinelength > 0 &&
1553
mCurrentLine[currentlinelength-1] == ' ') {
1554
--currentlinelength;
1556
mCurrentLine.SetLength(currentlinelength);
1559
if(aSoftlinebreak &&
1560
(mFlags & nsIDocumentEncoder::OutputFormatFlowed) &&
1562
// Add the soft part of the soft linebreak (RFC 2646 4.1)
1563
// We only do this when there is no indentation since format=flowed
1564
// lines and indentation doesn't work well together.
1565
mCurrentLine.Append(PRUnichar(' '));
1568
if(aSoftlinebreak) {
1573
if(!mCurrentLine.IsEmpty() || !mInIndentString.IsEmpty()) {
1580
if(mAtFirstColumn) {
1581
// If we don't have anything "real" to output we have to
1582
// make sure the indent doesn't end in a space since that
1583
// would trick a format=flowed-aware receiver.
1584
PRBool stripTrailingSpaces = mCurrentLine.IsEmpty();
1585
OutputQuotesAndIndent(stripTrailingSpaces);
1588
mCurrentLine.Append(mLineBreak);
1589
Output(mCurrentLine);
1590
mCurrentLine.Truncate();
1591
mCurrentLineWidth = 0;
1592
mAtFirstColumn=PR_TRUE;
1593
mInWhitespace=PR_TRUE;
1594
mLineBreakDue = PR_FALSE;
1595
mFloatingLines = -1;
1600
* Outputs the calculated and stored indent and text in the indentation. That is
1601
* quote chars and numbers for numbered lists and such. It will also reset any
1602
* stored text to put in the indentation after using it.
1605
nsPlainTextSerializer::OutputQuotesAndIndent(PRBool stripTrailingSpaces /* = PR_FALSE */)
1607
nsAutoString stringToOutput;
1609
// Put the mail quote "> " chars in, if appropriate:
1610
if (mCiteQuoteLevel > 0) {
1611
nsAutoString quotes;
1612
for(int i=0; i < mCiteQuoteLevel; i++) {
1613
quotes.Append(PRUnichar('>'));
1615
if (!mCurrentLine.IsEmpty()) {
1616
/* Better don't output a space here, if the line is empty,
1617
in case a recieving f=f-aware UA thinks, this were a flowed line,
1618
which it isn't - it's just empty.
1619
(Flowed lines may be joined with the following one,
1620
so the empty line may be lost completely.) */
1621
quotes.Append(PRUnichar(' '));
1623
stringToOutput = quotes;
1624
mAtFirstColumn = PR_FALSE;
1627
// Indent if necessary
1628
PRInt32 indentwidth = mIndent - mInIndentString.Length();
1630
&& (!mCurrentLine.IsEmpty() || !mInIndentString.IsEmpty())
1631
// Don't make empty lines look flowed
1633
nsAutoString spaces;
1634
for (int i=0; i < indentwidth; ++i)
1635
spaces.Append(PRUnichar(' '));
1636
stringToOutput += spaces;
1637
mAtFirstColumn = PR_FALSE;
1640
if(!mInIndentString.IsEmpty()) {
1641
stringToOutput += mInIndentString;
1642
mAtFirstColumn = PR_FALSE;
1643
mInIndentString.Truncate();
1646
if(stripTrailingSpaces) {
1647
PRInt32 lineLength = stringToOutput.Length();
1648
while(lineLength > 0 &&
1649
' ' == stringToOutput[lineLength-1]) {
1652
stringToOutput.SetLength(lineLength);
1655
if(!stringToOutput.IsEmpty()) {
1656
Output(stringToOutput);
1662
* Write a string. This is the highlevel function to use to get text output.
1663
* By using AddToLine, Output, EndLine and other functions it handles quotation,
1664
* line wrapping, indentation, whitespace compression and other things.
1667
nsPlainTextSerializer::Write(const nsAString& aString)
1669
#ifdef DEBUG_wrapping
1670
printf("Write(%s): wrap col = %d\n",
1671
NS_ConvertUCS2toUTF8(aString).get(), mWrapColumn);
1677
PRInt32 totLen = aString.Length();
1679
// If the string is empty, do nothing:
1680
if (totLen <= 0) return;
1682
// We have two major codepaths here. One that does preformatted text and one
1683
// that does normal formatted text. The one for preformatted text calls
1684
// Output directly while the other code path goes through AddToLine.
1685
if ((mPreFormatted && !mWrapColumn) || IsInPre()
1686
|| ((((!mQuotesPreformatted && mSpanLevel > 0) || mDontWrapAnyQuotes))
1687
&& mEmptyLines >= 0 && aString.First() == PRUnichar('>'))) {
1688
// No intelligent wrapping.
1690
// This mustn't be mixed with intelligent wrapping without clearing
1691
// the mCurrentLine buffer before!!!
1692
NS_WARN_IF_FALSE(mCurrentLine.IsEmpty(),
1693
"Mixed wrapping data and nonwrapping data on the same line");
1694
if (!mCurrentLine.IsEmpty()) {
1698
// Put the mail quote "> " chars in, if appropriate.
1699
// Have to put it in before every line.
1701
if(mAtFirstColumn) {
1702
OutputQuotesAndIndent();
1705
// Find one of '\n' or '\r' using iterators since nsAString
1706
// doesn't have the old FindCharInSet function.
1707
nsAString::const_iterator iter; aString.BeginReading(iter);
1708
nsAString::const_iterator done_searching; aString.EndReading(done_searching);
1710
PRInt32 new_newline = bol;
1711
newline = kNotFound;
1712
while(iter != done_searching) {
1713
if('\n' == *iter || '\r' == *iter) {
1714
newline = new_newline;
1722
if(newline == kNotFound) {
1724
nsAutoString stringpart(Substring(aString, bol, totLen - bol));
1725
if(!stringpart.IsEmpty()) {
1726
PRUnichar lastchar = stringpart[stringpart.Length()-1];
1727
if((lastchar == '\t') || (lastchar == ' ') ||
1728
(lastchar == '\r') ||(lastchar == '\n')) {
1729
mInWhitespace = PR_TRUE;
1732
mInWhitespace = PR_FALSE;
1737
mAtFirstColumn = mAtFirstColumn && (totLen-bol)==0;
1741
// There is a newline
1742
nsAutoString stringpart(Substring(aString, bol, newline-bol));
1743
mInWhitespace = PR_TRUE;
1745
// and write the newline
1748
mAtFirstColumn = PR_TRUE;
1750
if('\r' == *iter && bol < totLen && '\n' == *++iter) {
1751
// There was a CRLF in the input. This used to be illegal and
1752
// stripped by the parser. Apparently not anymore. Let's skip
1759
#ifdef DEBUG_wrapping
1760
printf("No wrapping: newline is %d, totLen is %d\n",
1766
// XXX Copy necessary to use nsString methods and gain
1767
// access to underlying buffer
1768
nsAutoString str(aString);
1770
// Intelligent handling of text
1771
// If needed, strip out all "end of lines"
1772
// and multiple whitespace between words
1774
nsAutoString tempstr;
1775
const PRUnichar * offsetIntoBuffer = nsnull;
1777
while (bol < totLen) { // Loop over lines
1778
// Find a place where we may have to do whitespace compression
1779
nextpos = str.FindCharInSet(" \t\n\r", bol);
1780
#ifdef DEBUG_wrapping
1781
nsAutoString remaining;
1782
str.Right(remaining, totLen - bol);
1783
foo = ToNewCString(remaining);
1784
// printf("Next line: bol = %d, newlinepos = %d, totLen = %d, string = '%s'\n",
1785
// bol, nextpos, totLen, foo);
1786
nsMemory::Free(foo);
1789
if(nextpos == kNotFound) {
1790
// The rest of the string
1791
offsetIntoBuffer = str.get() + bol;
1792
AddToLine(offsetIntoBuffer, totLen-bol);
1794
mInWhitespace=PR_FALSE;
1797
// There's still whitespace left in the string
1798
if (nextpos != 0 && (nextpos + 1) < totLen) {
1799
offsetIntoBuffer = str.get() + nextpos;
1800
// skip '\n' if it is between CJ chars
1801
if (offsetIntoBuffer[0] == '\n' && IS_CJ_CHAR(offsetIntoBuffer[-1]) && IS_CJ_CHAR(offsetIntoBuffer[1])) {
1802
offsetIntoBuffer = str.get() + bol;
1803
AddToLine(offsetIntoBuffer, nextpos-bol);
1808
// If we're already in whitespace and not preformatted, just skip it:
1809
if (mInWhitespace && (nextpos == bol) && !mPreFormatted &&
1810
!(mFlags & nsIDocumentEncoder::OutputPreformatted)) {
1816
if(nextpos == bol) {
1817
// Note that we are in whitespace.
1818
mInWhitespace = PR_TRUE;
1819
offsetIntoBuffer = str.get() + nextpos;
1820
AddToLine(offsetIntoBuffer, 1);
1825
mInWhitespace = PR_TRUE;
1827
offsetIntoBuffer = str.get() + bol;
1828
if(mPreFormatted || (mFlags & nsIDocumentEncoder::OutputPreformatted)) {
1829
// Preserve the real whitespace character
1831
AddToLine(offsetIntoBuffer, nextpos-bol);
1835
// Replace the whitespace with a space
1836
AddToLine(offsetIntoBuffer, nextpos-bol);
1837
AddToLine(kSpace.get(),1);
1838
bol = nextpos + 1; // Let's eat the whitespace
1841
} // Continue looping over the string
1846
* Gets the value of an attribute in a string. If the function returns
1847
* NS_ERROR_NOT_AVAILABLE, there was none such attribute specified.
1850
nsPlainTextSerializer::GetAttributeValue(const nsIParserNode* aNode,
1852
nsString& aValueRet)
1855
if (NS_CONTENT_ATTR_NOT_THERE != mContent->GetAttr(kNameSpaceID_None,
1856
aName, aValueRet)) {
1862
aName->ToString(name);
1864
PRInt32 count = aNode->GetAttributeCount();
1865
for (PRInt32 i=0;i<count;i++) {
1866
const nsAString& key = aNode->GetKeyAt(i);
1867
if (key.Equals(name, nsCaseInsensitiveStringComparator())) {
1868
aValueRet = aNode->GetValueAt(i);
1874
return NS_ERROR_NOT_AVAILABLE;
1878
* Returns true, if the element was inserted by Moz' TXT->HTML converter.
1879
* In this case, we should ignore it.
1882
nsPlainTextSerializer::IsCurrentNodeConverted(const nsIParserNode* aNode)
1885
nsresult rv = GetAttributeValue(aNode, nsHTMLAtoms::kClass, value);
1886
return (NS_SUCCEEDED(rv) &&
1887
(value.EqualsIgnoreCase("moz-txt", 7) ||
1888
value.EqualsIgnoreCase("\"moz-txt", 8)));
1894
nsPlainTextSerializer::GetIdForContent(nsIContent* aContent)
1896
if (!aContent->IsContentOfType(nsIContent::eHTML)) {
1897
return eHTMLTag_unknown;
1900
nsIParserService* parserService = nsContentUtils::GetParserServiceWeakRef();
1903
nsresult rv = parserService->HTMLAtomTagToId(aContent->Tag(), &id);
1904
NS_ASSERTION(NS_SUCCEEDED(rv), "Can't map HTML tag to id!");
1910
* Returns true if the id represents an element of block type.
1911
* Can be used to determine if a new paragraph should be started.
1914
nsPlainTextSerializer::IsBlockLevel(PRInt32 aId)
1916
PRBool isBlock = PR_FALSE;
1918
nsIParserService* parserService = nsContentUtils::GetParserServiceWeakRef();
1919
if (parserService) {
1920
parserService->IsBlock(aId, isBlock);
1927
* Returns true if the id represents a container.
1930
nsPlainTextSerializer::IsContainer(PRInt32 aId)
1932
PRBool isContainer = PR_FALSE;
1934
nsIParserService* parserService = nsContentUtils::GetParserServiceWeakRef();
1935
if (parserService) {
1936
parserService->IsContainer(aId, isContainer);
1943
* Returns true if we currently are inside a <pre>. The check is done
1944
* by traversing the tag stack looking for <pre> until we hit a block
1945
* level tag which is assumed to override any <pre>:s below it in
1946
* the stack. To do this correctly to a 100% would require access
1947
* to style which we don't support in this converter.
1950
nsPlainTextSerializer::IsInPre()
1952
PRInt32 i = mTagStackIndex;
1954
if(mTagStack[i-1] == eHTMLTag_pre)
1956
if(IsBlockLevel(mTagStack[i-1])) {
1957
// We assume that every other block overrides a <pre>
1963
// Not a <pre> in the whole stack
1968
* This method is required only to indentify LI's inside OL.
1969
* Returns TRUE if we are inside an OL tag and FALSE otherwise.
1972
nsPlainTextSerializer::IsInOL()
1974
PRInt32 i = mTagStackIndex;
1976
if(mTagStack[i] == eHTMLTag_ol)
1978
if (mTagStack[i] == eHTMLTag_ul) {
1979
// If a UL is reached first, LI belongs the UL nested in OL.
1983
// We may reach here for orphan LI's.
1988
@return 0 = no header, 1 = h1, ..., 6 = h6
1990
PRInt32 HeaderLevel(eHTMLTags aTag)
2015
* This is an implementation of GetUnicharWidth() and
2016
* GetUnicharStringWidth() as defined in
2017
* "The Single UNIX Specification, Version 2, The Open Group, 1997"
2018
* <http://www.UNIX-systems.org/online.html>
2020
* Markus Kuhn -- 2000-02-08 -- public domain
2022
* Minor alterations to fit Mozilla's data types by Daniel Bratell
2025
/* These functions define the column width of an ISO 10646 character
2028
* - The null character (U+0000) has a column width of 0.
2030
* - Other C0/C1 control characters and DEL will lead to a return
2033
* - Non-spacing and enclosing combining characters (general
2034
* category code Mn or Me in the Unicode database) have a
2035
* column width of 0.
2037
* - Spacing characters in the East Asian Wide (W) or East Asian
2038
* FullWidth (F) category as defined in Unicode Technical
2039
* Report #11 have a column width of 2.
2041
* - All remaining characters (including all printable
2042
* ISO 8859-1 and WGL4 characters, Unicode control characters,
2043
* etc.) have a column width of 1.
2045
* This implementation assumes that wchar_t characters are encoded
2049
PRInt32 GetUnicharWidth(PRUnichar ucs)
2051
/* sorted list of non-overlapping intervals of non-spacing characters */
2052
static const struct interval {
2056
{ 0x0300, 0x034E }, { 0x0360, 0x0362 }, { 0x0483, 0x0486 },
2057
{ 0x0488, 0x0489 }, { 0x0591, 0x05A1 }, { 0x05A3, 0x05B9 },
2058
{ 0x05BB, 0x05BD }, { 0x05BF, 0x05BF }, { 0x05C1, 0x05C2 },
2059
{ 0x05C4, 0x05C4 }, { 0x064B, 0x0655 }, { 0x0670, 0x0670 },
2060
{ 0x06D6, 0x06E4 }, { 0x06E7, 0x06E8 }, { 0x06EA, 0x06ED },
2061
{ 0x0711, 0x0711 }, { 0x0730, 0x074A }, { 0x07A6, 0x07B0 },
2062
{ 0x0901, 0x0902 }, { 0x093C, 0x093C }, { 0x0941, 0x0948 },
2063
{ 0x094D, 0x094D }, { 0x0951, 0x0954 }, { 0x0962, 0x0963 },
2064
{ 0x0981, 0x0981 }, { 0x09BC, 0x09BC }, { 0x09C1, 0x09C4 },
2065
{ 0x09CD, 0x09CD }, { 0x09E2, 0x09E3 }, { 0x0A02, 0x0A02 },
2066
{ 0x0A3C, 0x0A3C }, { 0x0A41, 0x0A42 }, { 0x0A47, 0x0A48 },
2067
{ 0x0A4B, 0x0A4D }, { 0x0A70, 0x0A71 }, { 0x0A81, 0x0A82 },
2068
{ 0x0ABC, 0x0ABC }, { 0x0AC1, 0x0AC5 }, { 0x0AC7, 0x0AC8 },
2069
{ 0x0ACD, 0x0ACD }, { 0x0B01, 0x0B01 }, { 0x0B3C, 0x0B3C },
2070
{ 0x0B3F, 0x0B3F }, { 0x0B41, 0x0B43 }, { 0x0B4D, 0x0B4D },
2071
{ 0x0B56, 0x0B56 }, { 0x0B82, 0x0B82 }, { 0x0BC0, 0x0BC0 },
2072
{ 0x0BCD, 0x0BCD }, { 0x0C3E, 0x0C40 }, { 0x0C46, 0x0C48 },
2073
{ 0x0C4A, 0x0C4D }, { 0x0C55, 0x0C56 }, { 0x0CBF, 0x0CBF },
2074
{ 0x0CC6, 0x0CC6 }, { 0x0CCC, 0x0CCD }, { 0x0D41, 0x0D43 },
2075
{ 0x0D4D, 0x0D4D }, { 0x0DCA, 0x0DCA }, { 0x0DD2, 0x0DD4 },
2076
{ 0x0DD6, 0x0DD6 }, { 0x0E31, 0x0E31 }, { 0x0E34, 0x0E3A },
2077
{ 0x0E47, 0x0E4E }, { 0x0EB1, 0x0EB1 }, { 0x0EB4, 0x0EB9 },
2078
{ 0x0EBB, 0x0EBC }, { 0x0EC8, 0x0ECD }, { 0x0F18, 0x0F19 },
2079
{ 0x0F35, 0x0F35 }, { 0x0F37, 0x0F37 }, { 0x0F39, 0x0F39 },
2080
{ 0x0F71, 0x0F7E }, { 0x0F80, 0x0F84 }, { 0x0F86, 0x0F87 },
2081
{ 0x0F90, 0x0F97 }, { 0x0F99, 0x0FBC }, { 0x0FC6, 0x0FC6 },
2082
{ 0x102D, 0x1030 }, { 0x1032, 0x1032 }, { 0x1036, 0x1037 },
2083
{ 0x1039, 0x1039 }, { 0x1058, 0x1059 }, { 0x17B7, 0x17BD },
2084
{ 0x17C6, 0x17C6 }, { 0x17C9, 0x17D3 }, { 0x18A9, 0x18A9 },
2085
{ 0x20D0, 0x20E3 }, { 0x302A, 0x302F }, { 0x3099, 0x309A },
2086
{ 0xFB1E, 0xFB1E }, { 0xFE20, 0xFE23 }
2089
PRInt32 max = sizeof(combining) / sizeof(struct interval) - 1;
2092
/* test for 8-bit control characters */
2095
if (ucs < 32 || (ucs >= 0x7f && ucs < 0xa0))
2098
/* first quick check for Latin-1 etc. characters */
2099
if (ucs < combining[0].first)
2102
/* binary search in table of non-spacing characters */
2103
while (max >= min) {
2104
mid = (min + max) / 2;
2105
if (combining[mid].last < ucs)
2107
else if (combining[mid].first > ucs)
2109
else if (combining[mid].first <= ucs && combining[mid].last >= ucs)
2113
/* if we arrive here, ucs is not a combining or C0/C1 control character */
2115
/* fast test for majority of non-wide scripts */
2120
((ucs >= 0x1100 && ucs <= 0x115f) || /* Hangul Jamo */
2121
(ucs >= 0x2e80 && ucs <= 0xa4cf && (ucs & ~0x0011) != 0x300a &&
2122
ucs != 0x303f) || /* CJK ... Yi */
2123
(ucs >= 0xac00 && ucs <= 0xd7a3) || /* Hangul Syllables */
2124
(ucs >= 0xf900 && ucs <= 0xfaff) || /* CJK Compatibility Ideographs */
2125
(ucs >= 0xfe30 && ucs <= 0xfe6f) || /* CJK Compatibility Forms */
2126
(ucs >= 0xff00 && ucs <= 0xff5f) || /* Fullwidth Forms */
2127
(ucs >= 0xffe0 && ucs <= 0xffe6));
2131
PRInt32 GetUnicharStringWidth(const PRUnichar* pwcs, PRInt32 n)
2133
PRInt32 w, width = 0;
2135
for (;*pwcs && n-- > 0; pwcs++)
2136
if ((w = GetUnicharWidth(*pwcs)) < 0)
2137
++width; // Taking 1 as the width of non-printable character, for bug# 94475.
2145
nsPlainTextSerializer::PreserveSelection(nsIDOMNode * aStartContainer,
2146
PRInt32 aStartOffset,
2147
nsIDOMNode * aEndContainer,