1
/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2
/* ***** BEGIN LICENSE BLOCK *****
3
* Version: NPL 1.1/GPL 2.0/LGPL 2.1
5
* The contents of this file are subject to the Netscape Public License
6
* Version 1.1 (the "License"); you may not use this file except in
7
* compliance with the License. You may obtain a copy of the License at
8
* http://www.mozilla.org/NPL/
10
* Software distributed under the License is distributed on an "AS IS" basis,
11
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
12
* for the specific language governing rights and limitations under the
15
* The Original Code is mozilla.org code.
17
* The Initial Developer of the Original Code is
18
* Netscape Communications Corporation.
19
* Portions created by the Initial Developer are Copyright (C) 1998
20
* the Initial Developer. All Rights Reserved.
25
* Alternatively, the contents of this file may be used under the terms of
26
* either the GNU General Public License Version 2 or later (the "GPL"), or
27
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
28
* in which case the provisions of the GPL or the LGPL are applicable instead
29
* of those above. If you wish to allow use of your version of this file only
30
* under the terms of either the GPL or the LGPL, and not to allow others to
31
* use your version of this file under the terms of the NPL, indicate your
32
* decision by deleting the provisions above and replace them with the notice
33
* and other provisions required by the GPL or the LGPL. If you do not delete
34
* the provisions above, a recipient may use your version of this file under
35
* the terms of any one of the NPL, the GPL or the LGPL.
37
* ***** END LICENSE BLOCK ***** */
39
//#define __INCREMENTAL 1
41
#include "nsScanner.h"
43
#include "nsIServiceManager.h"
44
#include "nsICharsetConverterManager.h"
45
#include "nsICharsetAlias.h"
46
#include "nsReadableUtils.h"
47
#include "nsIInputStream.h"
48
#include "nsILocalFile.h"
49
#include "nsNetUtil.h"
50
#include "nsUTF8Utils.h" // for LossyConvertEncoding
54
static NS_DEFINE_CID(kCharsetAliasCID, NS_CHARSETALIAS_CID);
56
nsReadEndCondition::nsReadEndCondition(const PRUnichar* aTerminateChars) :
57
mChars(aTerminateChars), mFilter(PRUnichar(~0)) // All bits set
59
// Build filter that will be used to filter out characters with
60
// bits that none of the terminal chars have. This works very well
61
// because terminal chars often have only the last 4-6 bits set and
62
// normal ascii letters have bit 7 set. Other letters have even higher
66
const PRUnichar *current = aTerminateChars;
67
PRUnichar terminalChar = *current;
68
while (terminalChar) {
69
mFilter &= ~terminalChar;
71
terminalChar = *current;
75
static NS_DEFINE_CID(kCharsetConverterManagerCID, NS_ICHARSETCONVERTERMANAGER_CID);
77
static const char kBadHTMLText[] ="<H3>Oops...</H3>You just tried to read a non-existent document: <BR>";
78
static const char kUnorderedStringError[] = "String argument must be ordered. Don't you read API's?";
83
const int kBufsize=64;
86
MOZ_DECL_CTOR_COUNTER(nsScanner)
89
* Use this constructor if you want i/o to be based on
90
* a single string you hand in during construction.
91
* This short cut was added for Javascript.
93
* @update gess 5/12/98
94
* @param aMode represents the parser mode (nav, other)
97
nsScanner::nsScanner(const nsAString& anHTMLString, const nsACString& aCharset,
101
MOZ_COUNT_CTOR(nsScanner);
103
mTotalRead = anHTMLString.Length();
104
mSlidingBuffer = nsnull;
106
mFirstNonWhitespacePosition = -1;
107
AppendToBuffer(anHTMLString);
108
mSlidingBuffer->BeginReading(mCurrentPosition);
109
mMarkPosition = mCurrentPosition;
110
mIncremental = PR_FALSE;
112
mCharsetSource = kCharsetUninitialized;
113
SetDocumentCharset(aCharset, aSource);
117
* Use this constructor if you want i/o to be based on strings
118
* the scanner receives. If you pass a null filename, you
119
* can still provide data to the scanner via append.
121
* @update gess 5/12/98
122
* @param aFilename --
125
nsScanner::nsScanner(nsString& aFilename,PRBool aCreateStream,
126
const nsACString& aCharset, PRInt32 aSource)
127
: mFilename(aFilename), mParser(nsnull)
129
MOZ_COUNT_CTOR(nsScanner);
131
mSlidingBuffer = nsnull;
133
// XXX This is a big hack. We need to initialize the iterators to something.
134
// What matters is that mCurrentPosition == mEndPosition, so that our methods
135
// believe that we are at EOF (see bug 182067). We null out mCurrentPosition
136
// so that we have some hope of catching null pointer dereferences associated
137
// with this hack. --darin
138
memset(&mCurrentPosition, 0, sizeof(mCurrentPosition));
139
mMarkPosition = mCurrentPosition;
140
mEndPosition = mCurrentPosition;
142
mIncremental = PR_TRUE;
143
mFirstNonWhitespacePosition = -1;
148
nsCOMPtr<nsILocalFile> file;
149
nsCOMPtr<nsIInputStream> fileStream;
151
NS_NewLocalFile(aFilename, PR_TRUE, getter_AddRefs(file));
153
NS_NewLocalFileInputStream(getter_AddRefs(mInputStream), file);
157
mCharsetSource = kCharsetUninitialized;
158
SetDocumentCharset(aCharset, aSource);
162
* Use this constructor if you want i/o to be stream based.
164
* @update gess 5/12/98
166
* @param assumeOwnership --
167
* @param aFilename --
170
nsScanner::nsScanner(const nsAString& aFilename, nsIInputStream* aStream,
171
const nsACString& aCharset, PRInt32 aSource)
172
: mFilename(aFilename), mParser(nsnull)
174
MOZ_COUNT_CTOR(nsScanner);
176
mSlidingBuffer = nsnull;
178
// XXX This is a big hack. We need to initialize the iterators to something.
179
// What matters is that mCurrentPosition == mEndPosition, so that our methods
180
// believe that we are at EOF (see bug 182067). We null out mCurrentPosition
181
// so that we have some hope of catching null pointer dereferences associated
182
// with this hack. --darin
183
memset(&mCurrentPosition, 0, sizeof(mCurrentPosition));
184
mMarkPosition = mCurrentPosition;
185
mEndPosition = mCurrentPosition;
187
mIncremental = PR_FALSE;
188
mFirstNonWhitespacePosition = -1;
191
mInputStream=aStream;
193
mCharsetSource = kCharsetUninitialized;
194
SetDocumentCharset(aCharset, aSource);
198
nsresult nsScanner::SetDocumentCharset(const nsACString& aCharset , PRInt32 aSource) {
200
nsresult res = NS_OK;
202
if( aSource < mCharsetSource) // priority is lower the the current one , just
205
nsCOMPtr<nsICharsetAlias> calias(do_GetService(kCharsetAliasCID, &res));
206
NS_ASSERTION( nsnull != calias, "cannot find charset alias");
207
if( NS_SUCCEEDED(res) && (nsnull != calias))
209
PRBool same = PR_FALSE;
210
res = calias->Equals(aCharset, mCharset, &same);
211
if(NS_SUCCEEDED(res) && same)
213
return NS_OK; // no difference, don't change it
215
// different, need to change it
216
nsCAutoString charsetName;
217
res = calias->GetPreferred(aCharset, charsetName);
219
if(NS_FAILED(res) && (kCharsetUninitialized == mCharsetSource) )
221
// failed - unknown alias , fallback to ISO-8859-1
222
charsetName.Assign(NS_LITERAL_CSTRING("ISO-8859-1"));
224
mCharset = charsetName;
225
mCharsetSource = aSource;
227
nsCOMPtr<nsICharsetConverterManager> ccm =
228
do_GetService(kCharsetConverterManagerCID, &res);
229
if(NS_SUCCEEDED(res) && (nsnull != ccm))
231
nsIUnicodeDecoder * decoder = nsnull;
232
res = ccm->GetUnicodeDecoderRaw(mCharset.get(), &decoder);
233
if(NS_SUCCEEDED(res) && (nsnull != decoder))
235
NS_IF_RELEASE(mUnicodeDecoder);
237
mUnicodeDecoder = decoder;
248
* @update gess 3/25/98
252
nsScanner::~nsScanner() {
254
if (mSlidingBuffer) {
255
delete mSlidingBuffer;
258
MOZ_COUNT_DTOR(nsScanner);
261
mInputStream->Close();
265
NS_IF_RELEASE(mUnicodeDecoder);
269
* Resets current offset position of input stream to marked position.
270
* This allows us to back up to this point if the need should arise,
271
* such as when tokenization gets interrupted.
272
* NOTE: IT IS REALLY BAD FORM TO CALL RELEASE WITHOUT CALLING MARK FIRST!
274
* @update gess 5/12/98
278
void nsScanner::RewindToMark(void){
279
mCountRemaining += (Distance(mMarkPosition, mCurrentPosition));
280
mCurrentPosition = mMarkPosition;
285
* Records current offset position in input stream. This allows us
286
* to back up to this point if the need should arise, such as when
287
* tokenization gets interrupted.
289
* @update gess 7/29/98
293
void nsScanner::Mark() {
294
if (mSlidingBuffer) {
295
mSlidingBuffer->DiscardPrefix(mCurrentPosition);
296
mSlidingBuffer->BeginReading(mCurrentPosition);
297
mMarkPosition = mCurrentPosition;
303
* Insert data to our underlying input buffer as
304
* if it were read from an input stream.
306
* @update harishd 01/12/99
309
PRBool nsScanner::UngetReadable(const nsAString& aBuffer) {
311
mSlidingBuffer->UngetReadable(aBuffer,mCurrentPosition);
312
mSlidingBuffer->BeginReading(mCurrentPosition); // Insertion invalidated our iterators
313
mSlidingBuffer->EndReading(mEndPosition);
315
PRUint32 length = aBuffer.Length();
316
mCountRemaining += length; // Ref. bug 117441
317
mTotalRead += length;
322
* Append data to our underlying input buffer as
323
* if it were read from an input stream.
328
nsresult nsScanner::Append(const nsAString& aBuffer) {
330
mTotalRead += aBuffer.Length();
331
AppendToBuffer(aBuffer);
338
* @update gess 5/21/98
342
nsresult nsScanner::Append(const char* aBuffer, PRUint32 aLen,
343
nsIRequest *aRequest)
346
PRUnichar *unichars, *start;
347
if(mUnicodeDecoder) {
348
PRInt32 unicharBufLen = 0;
349
mUnicodeDecoder->GetMaxLength(aBuffer, aLen, &unicharBufLen);
350
nsScannerString::Buffer* buffer = nsScannerString::AllocBuffer(unicharBufLen + 1);
351
NS_ENSURE_TRUE(buffer,NS_ERROR_OUT_OF_MEMORY);
352
start = unichars = buffer->DataStart();
354
PRInt32 totalChars = 0;
355
PRInt32 unicharLength = unicharBufLen;
357
PRInt32 srcLength = aLen;
358
res = mUnicodeDecoder->Convert(aBuffer, &srcLength, unichars, &unicharLength);
360
totalChars += unicharLength;
361
// Continuation of failure case
363
// if we failed, we consume one byte, replace it with U+FFFD
364
// and try the conversion again.
365
unichars[unicharLength++] = (PRUnichar)0xFFFD;
366
unichars = unichars + unicharLength;
367
unicharLength = unicharBufLen - (++totalChars);
369
mUnicodeDecoder->Reset();
371
if(((PRUint32) (srcLength + 1)) > aLen) {
378
aBuffer += srcLength;
381
} while (NS_FAILED(res) && (aLen > 0));
383
buffer->SetDataLength(totalChars);
384
AppendToBuffer(buffer, aRequest);
385
mTotalRead += totalChars;
387
// Don't propagate return code of unicode decoder
388
// since it doesn't reflect on our success or failure
393
AppendASCIItoBuffer(aBuffer, aLen, aRequest);
402
* Grab data from underlying stream.
407
nsresult nsScanner::FillBuffer(void) {
408
nsresult result=NS_OK;
412
//This is DEBUG code!!!!!! XXX DEBUG XXX
413
//If you're here, it means someone tried to load a
414
//non-existent document. So as a favor, we emit a
415
//little bit of HTML explaining the error.
417
mBuffer.Append((const char*)kBadHTMLText);
418
mBuffer.Append(mFilename);
419
mTotalRead+=mBuffer.Length();
427
char buf[kBufsize+1];
430
// XXX use ReadSegments to avoid extra buffer copy? --darin
432
result = mInputStream->Read(buf, kBufsize, &numread);
437
if((0<numread) && (0==result)) {
438
AppendASCIItoBuffer(buf, numread, nsnull);
447
* determine if the scanner has reached EOF
449
* @update gess 5/12/98
451
* @return 0=!eof 1=eof
453
nsresult nsScanner::Eof() {
454
nsresult theError=NS_OK;
456
if (!mSlidingBuffer) {
460
theError=FillBuffer();
462
if(NS_OK==theError) {
463
if (0==(PRUint32)mSlidingBuffer->Length()) {
472
* retrieve next char from scanners internal input stream
474
* @update gess 3/25/98
476
* @return error code reflecting read status
478
nsresult nsScanner::GetChar(PRUnichar& aChar) {
479
nsresult result=NS_OK;
482
if (!mSlidingBuffer) {
486
if (mCurrentPosition == mEndPosition) {
491
aChar=*mCurrentPosition++;
499
* peek ahead to consume next char from scanner's internal
502
* @update gess 3/25/98
506
nsresult nsScanner::Peek(PRUnichar& aChar, PRUint32 aOffset) {
507
nsresult result=NS_OK;
510
if (!mSlidingBuffer) {
514
if (mCurrentPosition == mEndPosition) {
520
while ((NS_OK == result) && (mCountRemaining <= aOffset)) {
524
if (NS_OK == result) {
525
nsScannerIterator pos = mCurrentPosition;
526
pos.advance(aOffset);
531
aChar=*mCurrentPosition;
538
nsresult nsScanner::Peek(nsAString& aStr, PRInt32 aNumChars, PRInt32 aOffset)
540
if (!mSlidingBuffer) {
544
if (mCurrentPosition == mEndPosition) {
548
nsScannerIterator start, end;
550
start = mCurrentPosition;
552
if (mCountRemaining <= aOffset) {
557
start.advance(aOffset);
560
if (mCountRemaining < PRUint32(aNumChars + aOffset)) {
565
end.advance(aNumChars);
568
CopyUnicodeTo(start, end, aStr);
575
* Skip whitespace on scanner input stream
577
* @update gess 3/25/98
579
* @return error status
581
nsresult nsScanner::SkipWhitespace(PRInt32& aNewlinesSkipped) {
583
if (!mSlidingBuffer) {
587
PRUnichar theChar = 0;
588
nsresult result = Peek(theChar);
590
if (result == kEOF) {
591
// XXX why wouldn't Eof() return kEOF?? --darin
595
nsScannerIterator current = mCurrentPosition;
596
PRBool done = PR_FALSE;
597
PRBool skipped = PR_FALSE;
599
while (!done && current != mEndPosition) {
602
case '\r': ++aNewlinesSkipped;
608
PRUnichar thePrevChar = theChar;
609
theChar = (++current != mEndPosition) ? *current : '\0';
610
if ((thePrevChar == '\r' && theChar == '\n') ||
611
(thePrevChar == '\n' && theChar == '\r')) {
612
theChar = (++current != mEndPosition) ? *current : '\0'; // CRLF == LFCR => LF
623
SetPosition(current);
624
if (current == mEndPosition) {
633
* Skip over chars as long as they equal given char
635
* @update gess 3/25/98
639
nsresult nsScanner::SkipOver(PRUnichar aSkipChar){
641
if (!mSlidingBuffer) {
646
nsresult result=NS_OK;
648
while(NS_OK==result) {
650
if(NS_OK == result) {
663
* Skip over chars as long as they're in aSkipSet
665
* @update gess 3/25/98
666
* @param aSkipSet is an ordered string.
669
nsresult nsScanner::SkipOver(nsString& aSkipSet){
671
if (!mSlidingBuffer) {
676
nsresult result=NS_OK;
678
while(NS_OK==result) {
679
result=Peek(theChar);
680
if(NS_OK == result) {
681
PRInt32 pos=aSkipSet.FindChar(theChar);
695
* Skip over chars until they're in aValidSet
697
* @update gess 3/25/98
698
* @param aValid set is an ordered string that
699
* contains chars you're looking for
702
nsresult nsScanner::SkipTo(nsString& aValidSet){
703
if (!mSlidingBuffer) {
708
nsresult result=NS_OK;
710
while(NS_OK==result) {
712
if(NS_OK == result) {
713
PRInt32 pos=aValidSet.FindChar(ch);
725
void DoErrTest(nsString& aString) {
726
PRInt32 pos=aString.FindChar(0);
728
if(aString.Length()-1!=pos) {
733
void DoErrTest(nsCString& aString) {
734
PRInt32 pos=aString.FindChar(0);
736
if(aString.Length()-1!=pos) {
743
* Skip over chars as long as they're in aValidSet
745
* @update gess 3/25/98
746
* @param aValidSet is an ordered string containing the
747
* characters you want to skip
750
nsresult nsScanner::SkipPast(nsString& aValidSet){
751
NS_NOTYETIMPLEMENTED("Error: SkipPast not yet implemented.");
756
* Consume characters until you did not find the terminal char
758
* @update gess 3/25/98
759
* @param aString - receives new data from stream
760
* @param aIgnore - If set ignores ':','-','_','.'
763
nsresult nsScanner::GetIdentifier(nsString& aString,PRBool allowPunct) {
765
if (!mSlidingBuffer) {
770
nsresult result=Peek(theChar);
771
nsScannerIterator current, end;
772
PRBool found=PR_FALSE;
774
current = mCurrentPosition;
777
while(current != end) {
789
found = ('a'<=theChar && theChar<='z') ||
790
('A'<=theChar && theChar<='Z') ||
791
('0'<=theChar && theChar<='9');
796
// If the current character isn't a valid character for
797
// the identifier, we're done. Copy the results into
798
// the string passed in.
799
CopyUnicodeTo(mCurrentPosition, current, aString);
805
// Drop NULs on the floor since nobody really likes them.
806
while (current != end && !*current) {
810
SetPosition(current);
811
if (current == end) {
815
//DoErrTest(aString);
821
* Consume characters until you did not find the terminal char
823
* @update gess 3/25/98
824
* @param aString - receives new data from stream
825
* @param allowPunct - If set ignores ':','-','_','.'
828
nsresult nsScanner::ReadIdentifier(nsString& aString,PRBool allowPunct) {
830
if (!mSlidingBuffer) {
835
nsresult result=Peek(theChar);
836
nsScannerIterator origin, current, end;
837
PRBool found=PR_FALSE;
839
origin = mCurrentPosition;
840
current = mCurrentPosition;
843
while(current != end) {
855
found = ('a'<=theChar && theChar<='z') ||
856
('A'<=theChar && theChar<='Z') ||
857
('0'<=theChar && theChar<='9');
862
AppendUnicodeTo(mCurrentPosition, current, aString);
869
// Drop NULs on the floor since nobody really likes them
870
while (current != end && !*current) {
874
SetPosition(current);
875
if (current == end) {
876
AppendUnicodeTo(origin, current, aString);
880
//DoErrTest(aString);
885
nsresult nsScanner::ReadIdentifier(nsScannerIterator& aStart,
886
nsScannerIterator& aEnd,
889
if (!mSlidingBuffer) {
894
nsresult result=Peek(theChar);
895
nsScannerIterator origin, current, end;
896
PRBool found=PR_FALSE;
898
origin = mCurrentPosition;
899
current = mCurrentPosition;
902
while(current != end) {
914
if(('a'<=theChar) && (theChar<='z'))
916
else if(('A'<=theChar) && (theChar<='Z'))
918
else if(('0'<=theChar) && (theChar<='9'))
924
aStart = mCurrentPosition;
932
SetPosition(current);
933
if (current == end) {
939
//DoErrTest(aString);
947
* @param aString - should contain digits
950
nsresult nsScanner::ReadNumber(nsString& aString,PRInt32 aBase) {
952
if (!mSlidingBuffer) {
956
NS_ASSERTION(aBase == 10 || aBase == 16,"base value not supported");
959
nsresult result=Peek(theChar);
960
nsScannerIterator origin, current, end;
962
origin = mCurrentPosition;
966
PRBool done = PR_FALSE;
967
while(current != end) {
970
done = (theChar < '0' || theChar > '9') &&
971
((aBase == 16)? (theChar < 'A' || theChar > 'F') &&
972
(theChar < 'a' || theChar > 'f')
975
AppendUnicodeTo(origin, current, aString);
982
SetPosition(current);
983
if (current == end) {
984
AppendUnicodeTo(origin, current, aString);
988
//DoErrTest(aString);
994
* Consume characters until you find the terminal char
996
* @update gess 3/25/98
997
* @param aString receives new data from stream
998
* @param addTerminal tells us whether to append terminal to aString
1001
nsresult nsScanner::ReadWhitespace(nsString& aString,
1002
PRInt32& aNewlinesSkipped) {
1004
if (!mSlidingBuffer) {
1008
PRUnichar theChar = 0;
1009
nsresult result = Peek(theChar);
1011
if (result == kEOF) {
1015
nsScannerIterator origin, current, end;
1016
PRBool done = PR_FALSE;
1018
origin = mCurrentPosition;
1022
while(!done && current != end) {
1025
case '\r': ++aNewlinesSkipped;
1030
PRUnichar thePrevChar = theChar;
1031
theChar = (++current != end) ? *current : '\0';
1032
if ((thePrevChar == '\r' && theChar == '\n') ||
1033
(thePrevChar == '\n' && theChar == '\r')) {
1034
theChar = (++current != end) ? *current : '\0'; // CRLF == LFCR => LF
1040
AppendUnicodeTo(origin, current, aString);
1045
SetPosition(current);
1046
if (current == end) {
1047
AppendUnicodeTo(origin, current, aString);
1054
nsresult nsScanner::ReadWhitespace(nsScannerIterator& aStart,
1055
nsScannerIterator& aEnd,
1056
PRInt32& aNewlinesSkipped) {
1058
if (!mSlidingBuffer) {
1062
PRUnichar theChar = 0;
1063
nsresult result = Peek(theChar);
1065
if (result == kEOF) {
1069
nsScannerIterator origin, current, end;
1070
PRBool done = PR_FALSE;
1072
origin = mCurrentPosition;
1076
while(!done && current != end) {
1079
case '\r': ++aNewlinesSkipped;
1084
PRUnichar thePrevChar = theChar;
1085
theChar = (++current != end) ? *current : '\0';
1086
if ((thePrevChar == '\r' && theChar == '\n') ||
1087
(thePrevChar == '\n' && theChar == '\r')) {
1088
theChar = (++current != end) ? *current : '\0'; // CRLF == LFCR => LF
1100
SetPosition(current);
1101
if (current == end) {
1111
* Consume chars as long as they are <i>in</i> the
1112
* given validSet of input chars.
1114
* @update gess 3/25/98
1115
* @param aString will contain the result of this method
1116
* @param aValidSet is an ordered string that contains the
1118
* @return error code
1120
nsresult nsScanner::ReadWhile(nsString& aString,
1121
nsString& aValidSet,
1122
PRBool addTerminal){
1124
if (!mSlidingBuffer) {
1128
PRUnichar theChar=0;
1129
nsresult result=Peek(theChar);
1130
nsScannerIterator origin, current, end;
1132
origin = mCurrentPosition;
1136
while(current != end) {
1140
PRInt32 pos=aValidSet.FindChar(theChar);
1141
if(kNotFound==pos) {
1144
AppendUnicodeTo(origin, current, aString);
1151
SetPosition(current);
1152
if (current == end) {
1153
AppendUnicodeTo(origin, current, aString);
1157
//DoErrTest(aString);
1163
#ifdef MOZ_STANDALONE_COMPOSER
1164
nsresult nsScanner::Checks(const nsReadEndCondition& aEndCondition)
1166
if (!mSlidingBuffer) {
1170
nsScannerIterator origin, current;
1171
const PRUnichar* setstart = aEndCondition.mChars;
1172
const PRUnichar* setcurrent = setstart;
1174
origin = mCurrentPosition;
1177
PRUnichar theChar = 0;
1178
nsresult result = Peek(theChar);
1180
if (result == kEOF) {
1184
while (current != mEndPosition) {
1185
// Filter out completely wrong characters
1186
// Check if all bits are in the required area
1187
if(!(theChar & aEndCondition.mFilter)) {
1188
// They were. Do a thorough check.
1189
if (*setcurrent == theChar)
1209
* Consume characters until you encounter one contained in given
1212
* @update gess 3/25/98
1213
* @param aString will contain the result of this method
1214
* @param aTerminalSet is an ordered string that contains
1215
* the set of INVALID characters
1216
* @return error code
1218
nsresult nsScanner::ReadUntil(nsAString& aString,
1219
const nsReadEndCondition& aEndCondition,
1221
PRBool aAllTerminators)
1223
if (!mSlidingBuffer) {
1227
nsScannerIterator origin, current;
1228
const PRUnichar* setstart = aEndCondition.mChars;
1229
const PRUnichar* setcurrent = setstart;
1231
origin = mCurrentPosition;
1234
PRUnichar theChar=0;
1235
nsresult result=Peek(theChar);
1237
if (result == kEOF) {
1241
while (current != mEndPosition) {
1242
// Filter out completely wrong characters
1243
// Check if all bits are in the required area
1244
if(!(theChar & aEndCondition.mFilter)) {
1245
// They were. Do a thorough check.
1247
if (aAllTerminators) {
1248
if (*setcurrent && *setcurrent == theChar)
1251
setcurrent = setstart;
1252
if (*setcurrent && *setcurrent == theChar)
1259
setcurrent = setstart;
1260
while (*setcurrent) {
1261
if (*setcurrent == theChar) {
1273
// If we are here, we didn't find any terminator in the string and
1274
// current = mEndPosition
1275
SetPosition(current);
1276
AppendUnicodeTo(origin, current, aString);
1282
AppendUnicodeTo(origin, current, aString);
1283
SetPosition(current);
1285
//DoErrTest(aString);
1290
nsresult nsScanner::ReadUntil(nsScannerIterator& aStart,
1291
nsScannerIterator& aEnd,
1292
const nsReadEndCondition &aEndCondition,
1295
if (!mSlidingBuffer) {
1299
nsScannerIterator origin, current;
1300
const PRUnichar* setstart = aEndCondition.mChars;
1301
const PRUnichar* setcurrent;
1303
origin = mCurrentPosition;
1306
PRUnichar theChar=0;
1307
nsresult result=Peek(theChar);
1309
if (result == kEOF) {
1310
aStart = aEnd = current;
1314
while (current != mEndPosition) {
1315
// Filter out completely wrong characters
1316
// Check if all bits are in the required area
1317
if(!(theChar & aEndCondition.mFilter)) {
1318
// They were. Do a thorough check.
1319
setcurrent = setstart;
1320
while (*setcurrent) {
1321
if (*setcurrent == theChar) {
1332
// If we are here, we didn't find any terminator in the string and
1333
// current = mEndPosition
1334
SetPosition(current);
1344
SetPosition(current);
1350
* Consumes chars until you see the given terminalChar
1352
* @update gess 3/25/98
1354
* @return error code
1356
nsresult nsScanner::ReadUntil(nsAString& aString,
1357
PRUnichar aTerminalChar,
1360
if (!mSlidingBuffer) {
1364
nsScannerIterator origin, current;
1366
origin = mCurrentPosition;
1372
while (current != mEndPosition) {
1373
if (aTerminalChar == theChar) {
1376
AppendUnicodeTo(origin, current, aString);
1377
SetPosition(current);
1384
// If we are here, we didn't find any terminator in the string and
1385
// current = mEndPosition
1386
AppendUnicodeTo(origin, current, aString);
1387
SetPosition(current);
1392
void nsScanner::BindSubstring(nsScannerSubstring& aSubstring, const nsScannerIterator& aStart, const nsScannerIterator& aEnd)
1394
aSubstring.Rebind(*mSlidingBuffer, aStart, aEnd);
1397
void nsScanner::CurrentPosition(nsScannerIterator& aPosition)
1399
aPosition = mCurrentPosition;
1402
void nsScanner::EndReading(nsScannerIterator& aPosition)
1404
aPosition = mEndPosition;
1407
void nsScanner::SetPosition(nsScannerIterator& aPosition, PRBool aTerminate, PRBool aReverse)
1409
if (mSlidingBuffer) {
1411
mCountRemaining += (Distance(aPosition, mCurrentPosition));
1414
mCountRemaining -= (Distance(mCurrentPosition, aPosition));
1416
mCurrentPosition = aPosition;
1417
if (aTerminate && (mCurrentPosition == mEndPosition)) {
1418
mMarkPosition = mCurrentPosition;
1419
mSlidingBuffer->DiscardPrefix(mCurrentPosition);
1424
void nsScanner::ReplaceCharacter(nsScannerIterator& aPosition,
1427
if (mSlidingBuffer) {
1428
mSlidingBuffer->ReplaceCharacter(aPosition, aChar);
1432
void nsScanner::AppendToBuffer(nsScannerString::Buffer* aBuf,
1433
nsIRequest *aRequest)
1435
if (nsParser::sParserDataListeners && mParser &&
1436
NS_FAILED(mParser->DataAdded(Substring(aBuf->DataStart(),
1437
aBuf->DataEnd()), aRequest))) {
1438
// Don't actually append on failure.
1443
if (!mSlidingBuffer) {
1444
mSlidingBuffer = new nsScannerString(aBuf);
1445
mSlidingBuffer->BeginReading(mCurrentPosition);
1446
mMarkPosition = mCurrentPosition;
1447
mSlidingBuffer->EndReading(mEndPosition);
1448
mCountRemaining = aBuf->DataLength();
1451
mSlidingBuffer->AppendBuffer(aBuf);
1452
if (mCurrentPosition == mEndPosition) {
1453
mSlidingBuffer->BeginReading(mCurrentPosition);
1455
mSlidingBuffer->EndReading(mEndPosition);
1456
mCountRemaining += aBuf->DataLength();
1459
if (mFirstNonWhitespacePosition == -1) {
1460
nsScannerIterator iter(mCurrentPosition);
1461
nsScannerIterator end(mEndPosition);
1463
while (iter != end) {
1464
if (!nsCRT::IsAsciiSpace(*iter)) {
1465
mFirstNonWhitespacePosition = Distance(mCurrentPosition, iter);
1475
void nsScanner::AppendASCIItoBuffer(const char* aData, PRUint32 aLen,
1476
nsIRequest *aRequest)
1478
nsScannerString::Buffer* buf = nsScannerString::AllocBuffer(aLen);
1481
LossyConvertEncoding<char, PRUnichar> converter(buf->DataStart());
1482
converter.write(aData, aLen);
1483
converter.write_terminator();
1484
AppendToBuffer(buf, aRequest);
1489
* call this to copy bytes out of the scanner that have not yet been consumed
1490
* by the tokenization process.
1492
* @update gess 5/12/98
1493
* @param aCopyBuffer is where the scanner buffer will be copied to
1496
void nsScanner::CopyUnusedData(nsString& aCopyBuffer) {
1497
nsScannerIterator start, end;
1498
start = mCurrentPosition;
1501
CopyUnicodeTo(start, end, aCopyBuffer);
1505
* Retrieve the name of the file that the scanner is reading from.
1506
* In some cases, it's just a given name, because the scanner isn't
1507
* really reading from a file.
1509
* @update gess 5/12/98
1512
nsString& nsScanner::GetFilename(void) {
1517
* Conduct self test. Actually, selftesting for this class
1518
* occurs in the parser selftest.
1520
* @update gess 3/25/98
1525
void nsScanner::SelfTest(void) {