2
www.sourceforge.net/projects/tinyxml
3
Original code (2.0 and earlier )copyright (c) 2000-2002 Lee Thomason (www.grinninglizard.com)
5
This software is provided 'as-is', without any express or implied
6
warranty. In no event will the authors be held liable for any
7
damages arising from the use of this software.
9
Permission is granted to anyone to use this software for any
10
purpose, including commercial applications, and to alter it and
11
redistribute it freely, subject to the following restrictions:
13
1. The origin of this software must not be misrepresented; you must
14
not claim that you wrote the original software. If you use this
15
software in a product, an acknowledgment in the product documentation
16
would be appreciated but is not required.
18
2. Altered source versions must be plainly marked as such, and
19
must not be misrepresented as being the original software.
21
3. This notice may not be removed or altered from any source
30
//#define DEBUG_PARSER
31
#if defined( DEBUG_PARSER )
32
# if defined( DEBUG ) && defined( _MSC_VER )
34
# define TIXML_LOG OutputDebugString
36
# define TIXML_LOG printf
42
// Note tha "PutString" hardcodes the same list. This
43
// is less flexible than it appears. Changing the entries
44
// or order will break putstring.
45
TiXmlBase::Entity TiXmlBase::entity[ NUM_ENTITY ] =
50
{ """, 6, '\"' },
54
// Bunch of unicode info at:
55
// http://www.unicode.org/faq/utf_bom.html
56
// Including the basic of this table, which determines the #bytes in the
57
// sequence from the lead byte. 1 placed for invalid sequences --
58
// although the result will be junk, pass it through as much as possible.
59
// Beware of the non-characters in UTF-8:
60
// ef bb bf (Microsoft "lead bytes")
64
const unsigned char TIXML_UTF_LEAD_0 = 0xefU;
65
const unsigned char TIXML_UTF_LEAD_1 = 0xbbU;
66
const unsigned char TIXML_UTF_LEAD_2 = 0xbfU;
68
const int TiXmlBase::utf8ByteTable[256] =
70
// 0 1 2 3 4 5 6 7 8 9 a b c d e f
71
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x00
72
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x10
73
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x20
74
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x30
75
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x40
76
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x50
77
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x60
78
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x70 End of ASCII range
79
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x80 0x80 to 0xc1 invalid
80
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x90
81
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0xa0
82
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0xb0
83
1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // 0xc0 0xc2 to 0xdf 2 byte
84
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // 0xd0
85
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // 0xe0 0xe0 to 0xef 3 byte
86
4, 4, 4, 4, 4, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 // 0xf0 0xf0 to 0xf4 4 byte, 0xf5 and higher invalid
90
void TiXmlBase::ConvertUTF32ToUTF8( unsigned long input, char* output, int* length )
92
const unsigned long BYTE_MASK = 0xBF;
93
const unsigned long BYTE_MARK = 0x80;
94
const unsigned long FIRST_BYTE_MARK[7] = { 0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC };
98
else if ( input < 0x800 )
100
else if ( input < 0x10000 )
102
else if ( input < 0x200000 )
105
{ *length = 0; return; } // This code won't covert this correctly anyway.
109
// Scary scary fall throughs.
114
*output = (char)((input | BYTE_MARK) & BYTE_MASK);
118
*output = (char)((input | BYTE_MARK) & BYTE_MASK);
122
*output = (char)((input | BYTE_MARK) & BYTE_MASK);
126
*output = (char)(input | FIRST_BYTE_MARK[*length]);
131
/*static*/ int TiXmlBase::IsAlpha( unsigned char anyByte, TiXmlEncoding /*encoding*/ )
133
// This will only work for low-ascii, everything else is assumed to be a valid
134
// letter. I'm not sure this is the best approach, but it is quite tricky trying
135
// to figure out alhabetical vs. not across encoding. So take a very
136
// conservative approach.
138
// if ( encoding == TIXML_ENCODING_UTF8 )
141
return isalpha( anyByte );
143
return 1; // What else to do? The unicode set is huge...get the english ones right.
147
// return isalpha( anyByte );
152
/*static*/ int TiXmlBase::IsAlphaNum( unsigned char anyByte, TiXmlEncoding /*encoding*/ )
154
// This will only work for low-ascii, everything else is assumed to be a valid
155
// letter. I'm not sure this is the best approach, but it is quite tricky trying
156
// to figure out alhabetical vs. not across encoding. So take a very
157
// conservative approach.
159
// if ( encoding == TIXML_ENCODING_UTF8 )
162
return isalnum( anyByte );
164
return 1; // What else to do? The unicode set is huge...get the english ones right.
168
// return isalnum( anyByte );
173
class TiXmlParsingData
175
friend class TiXmlDocument;
177
void Stamp( const char* now, TiXmlEncoding encoding );
179
const TiXmlCursor& Cursor() { return cursor; }
182
// Only used by the document!
183
TiXmlParsingData( const char* start, int _tabsize, int row, int col )
198
void TiXmlParsingData::Stamp( const char* now, TiXmlEncoding encoding )
202
// Do nothing if the tabsize is 0.
208
// Get the current row, column.
209
int row = cursor.row;
210
int col = cursor.col;
211
const char* p = stamp;
216
// Treat p as unsigned, so we have a happy compiler.
217
const unsigned char* pU = (const unsigned char*)p;
219
// Code contributed by Fletcher Dunn: (modified by lee)
222
// We *should* never get here, but in case we do, don't
223
// advance past the terminating null character, ever
227
// bump down to the next line
233
// Check for \r\n sequence, and treat this as a single character
240
// bump down to the next line
247
// Check for \n\r sequence, and treat this as a single
248
// character. (Yes, this bizarre thing does occur still
249
// on some arcane platforms...)
259
// Skip to next tab stop
260
col = (col / tabsize + 1) * tabsize;
263
case TIXML_UTF_LEAD_0:
264
if ( encoding == TIXML_ENCODING_UTF8 )
266
if ( *(p+1) && *(p+2) )
268
// In these cases, don't advance the column. These are
270
if ( *(pU+1)==TIXML_UTF_LEAD_1 && *(pU+2)==TIXML_UTF_LEAD_2 )
272
else if ( *(pU+1)==0xbfU && *(pU+2)==0xbeU )
274
else if ( *(pU+1)==0xbfU && *(pU+2)==0xbfU )
277
{ p +=3; ++col; } // A normal character.
288
if ( encoding == TIXML_ENCODING_UTF8 )
290
// Eat the 1 to 4 byte utf8 character.
291
int step = TiXmlBase::utf8ByteTable[*((const unsigned char*)p)];
293
step = 1; // Error case from bad encoding, but handle gracefully.
296
// Just advance one column, of course.
309
assert( cursor.row >= -1 );
310
assert( cursor.col >= -1 );
316
const char* TiXmlBase::SkipWhiteSpace( const char* p, TiXmlEncoding encoding )
322
if ( encoding == TIXML_ENCODING_UTF8 )
326
const unsigned char* pU = (const unsigned char*)p;
328
// Skip the stupid Microsoft UTF-8 Byte order marks
329
if ( *(pU+0)==TIXML_UTF_LEAD_0
330
&& *(pU+1)==TIXML_UTF_LEAD_1
331
&& *(pU+2)==TIXML_UTF_LEAD_2 )
336
else if(*(pU+0)==TIXML_UTF_LEAD_0
343
else if(*(pU+0)==TIXML_UTF_LEAD_0
351
if ( IsWhiteSpace( *p ) || *p == '\n' || *p =='\r' ) // Still using old rules for white space.
359
while ( *p && IsWhiteSpace( *p ) || *p == '\n' || *p =='\r' )
367
/*static*/ bool TiXmlBase::StreamWhiteSpace( std::istream * in, TIXML_STRING * tag )
371
if ( !in->good() ) return false;
374
// At this scope, we can't get to a document. So fail silently.
375
if ( !IsWhiteSpace( c ) || c <= 0 )
378
*tag += (char) in->get();
382
/*static*/ bool TiXmlBase::StreamTo( std::istream * in, int character, TIXML_STRING * tag )
384
//assert( character > 0 && character < 128 ); // else it won't work in utf-8
388
if ( c == character )
390
if ( c <= 0 ) // Silent failure: can't get document at this scope
400
// One of TinyXML's more performance demanding functions. Try to keep the memory overhead down. The
401
// "assign" optimization removes over 10% of the execution time.
403
const char* TiXmlBase::ReadName( const char* p, TIXML_STRING * name, TiXmlEncoding encoding )
405
// Oddly, not supported on some comilers,
411
// Names start with letters or underscores.
412
// Of course, in unicode, tinyxml has no idea what a letter *is*. The
413
// algorithm is generous.
415
// After that, they can be letters, underscores, numbers,
416
// hyphens, or colons. (Colons are valid ony for namespaces,
417
// but tinyxml can't tell namespaces from names.)
419
&& ( IsAlpha( (unsigned char) *p, encoding ) || *p == '_' ) )
421
const char* start = p;
423
&& ( IsAlphaNum( (unsigned char ) *p, encoding )
429
//(*name) += *p; // expensive
433
name->assign( start, p-start );
440
const char* TiXmlBase::GetEntity( const char* p, char* value, int* length, TiXmlEncoding encoding )
442
// Presume an entity, and pull it out.
447
if ( *(p+1) && *(p+1) == '#' && *(p+2) )
449
unsigned long ucs = 0;
456
if ( !*(p+3) ) return 0;
459
q = strchr( q, ';' );
461
if ( !q || !*q ) return 0;
468
if ( *q >= '0' && *q <= '9' )
469
ucs += mult * (*q - '0');
470
else if ( *q >= 'a' && *q <= 'f' )
471
ucs += mult * (*q - 'a' + 10);
472
else if ( *q >= 'A' && *q <= 'F' )
473
ucs += mult * (*q - 'A' + 10 );
483
if ( !*(p+2) ) return 0;
486
q = strchr( q, ';' );
488
if ( !q || !*q ) return 0;
495
if ( *q >= '0' && *q <= '9' )
496
ucs += mult * (*q - '0');
503
if ( encoding == TIXML_ENCODING_UTF8 )
505
// convert the UCS to UTF-8
506
ConvertUTF32ToUTF8( ucs, value, length );
513
return p + delta + 1;
516
// Now try to match it.
517
for( i=0; i<NUM_ENTITY; ++i )
519
if ( strncmp( entity[i].str, p, entity[i].strLength ) == 0 )
521
assert( strlen( entity[i].str ) == entity[i].strLength );
522
*value = entity[i].chr;
524
return ( p + entity[i].strLength );
528
// So it wasn't an entity, its unrecognized, or something like that.
529
*value = *p; // Don't put back the last one, since we return it!
530
//*length = 1; // Leave unrecognized entities - this doesn't really work.
531
// Just writes strange XML.
536
bool TiXmlBase::StringEqual( const char* p,
539
TiXmlEncoding encoding )
553
while ( *q && *tag && ToLower( *q, encoding ) == ToLower( *tag, encoding ) )
564
while ( *q && *tag && *q == *tag )
570
if ( *tag == 0 ) // Have we found the end of the tag, and everything equal?
576
const char* TiXmlBase::ReadText( const char* p,
580
bool caseInsensitive,
581
TiXmlEncoding encoding )
584
if ( !trimWhiteSpace // certain tags always keep whitespace
585
|| !condenseWhiteSpace ) // if true, whitespace is always kept
587
// Keep all the white space.
589
&& !StringEqual( p, endTag, caseInsensitive, encoding )
593
char cArr[4] = { 0, 0, 0, 0 };
594
p = GetChar( p, cArr, &len, encoding );
595
text->append( cArr, len );
600
bool whitespace = false;
602
// Remove leading white space:
603
p = SkipWhiteSpace( p, encoding );
605
&& !StringEqual( p, endTag, caseInsensitive, encoding ) )
607
if ( *p == '\r' || *p == '\n' )
612
else if ( IsWhiteSpace( *p ) )
619
// If we've found whitespace, add it before the
620
// new character. Any whitespace just becomes a space.
627
char cArr[4] = { 0, 0, 0, 0 };
628
p = GetChar( p, cArr, &len, encoding );
630
(*text) += cArr[0]; // more efficient
632
text->append( cArr, len );
637
p += strlen( endTag );
643
void TiXmlDocument::StreamIn( std::istream * in, TIXML_STRING * tag )
645
// The basic issue with a document is that we don't know what we're
646
// streaming. Read something presumed to be a tag (and hope), then
647
// identify it, and call the appropriate stream method on the tag.
649
// This "pre-streaming" will never read the closing ">" so the
650
// sub-tag can orient itself.
652
if ( !StreamTo( in, '<', tag ) )
654
SetError( TIXML_ERROR_PARSING_EMPTY, 0, 0, TIXML_ENCODING_UNKNOWN );
660
int tagIndex = (int) tag->length();
661
while ( in->good() && in->peek() != '>' )
666
SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
674
// We now have something we presume to be a node of
675
// some sort. Identify it, and call the node to
676
// continue streaming.
677
TiXmlNode* node = Identify( tag->c_str() + tagIndex, TIXML_DEFAULT_ENCODING );
681
node->StreamIn( in, tag );
682
bool isElement = node->ToElement() != 0;
686
// If this is the root element, we're done. Parsing will be
687
// done by the >> operator.
695
SetError( TIXML_ERROR, 0, 0, TIXML_ENCODING_UNKNOWN );
700
// We should have returned sooner.
701
SetError( TIXML_ERROR, 0, 0, TIXML_ENCODING_UNKNOWN );
706
const char* TiXmlDocument::Parse( const char* p, TiXmlParsingData* prevData, TiXmlEncoding encoding )
710
// Parse away, at the document level. Since a document
711
// contains nothing but other tags, most of what happens
712
// here is skipping white space.
715
SetError( TIXML_ERROR_DOCUMENT_EMPTY, 0, 0, TIXML_ENCODING_UNKNOWN );
719
// Note that, for a document, this needs to come
720
// before the while space skip, so that parsing
721
// starts from the pointer we are given.
725
location.row = prevData->cursor.row;
726
location.col = prevData->cursor.col;
733
TiXmlParsingData data( p, TabSize(), location.row, location.col );
734
location = data.Cursor();
736
if ( encoding == TIXML_ENCODING_UNKNOWN )
738
// Check for the Microsoft UTF-8 lead bytes.
739
const unsigned char* pU = (const unsigned char*)p;
740
if ( *(pU+0) && *(pU+0) == TIXML_UTF_LEAD_0
741
&& *(pU+1) && *(pU+1) == TIXML_UTF_LEAD_1
742
&& *(pU+2) && *(pU+2) == TIXML_UTF_LEAD_2 )
744
encoding = TIXML_ENCODING_UTF8;
745
useMicrosoftBOM = true;
749
p = SkipWhiteSpace( p, encoding );
752
SetError( TIXML_ERROR_DOCUMENT_EMPTY, 0, 0, TIXML_ENCODING_UNKNOWN );
758
TiXmlNode* node = Identify( p, encoding );
761
p = node->Parse( p, &data, encoding );
762
LinkEndChild( node );
769
// Did we get encoding info?
770
if ( encoding == TIXML_ENCODING_UNKNOWN
771
&& node->ToDeclaration() )
773
TiXmlDeclaration* dec = node->ToDeclaration();
774
const char* enc = dec->Encoding();
778
encoding = TIXML_ENCODING_UTF8;
779
else if ( StringEqual( enc, "UTF-8", true, TIXML_ENCODING_UNKNOWN ) )
780
encoding = TIXML_ENCODING_UTF8;
781
else if ( StringEqual( enc, "UTF8", true, TIXML_ENCODING_UNKNOWN ) )
782
encoding = TIXML_ENCODING_UTF8; // incorrect, but be nice
784
encoding = TIXML_ENCODING_LEGACY;
787
p = SkipWhiteSpace( p, encoding );
792
SetError( TIXML_ERROR_DOCUMENT_EMPTY, 0, 0, encoding );
800
void TiXmlDocument::SetError( int err, const char* pError, TiXmlParsingData* data, TiXmlEncoding encoding )
802
// The first error in a chain is more accurate - don't set again!
806
assert( err > 0 && err < TIXML_ERROR_STRING_COUNT );
809
errorDesc = errorString[ errorId ];
811
errorLocation.Clear();
812
if ( pError && data )
814
data->Stamp( pError, encoding );
815
errorLocation = data->Cursor();
820
TiXmlNode* TiXmlNode::Identify( const char* p, TiXmlEncoding encoding )
822
TiXmlNode* returnNode = 0;
824
p = SkipWhiteSpace( p, encoding );
825
if( !p || !*p || *p != '<' )
830
TiXmlDocument* doc = GetDocument();
831
p = SkipWhiteSpace( p, encoding );
838
// What is this thing?
839
// - Elements start with a letter or underscore, but xml is reserved.
841
// - Decleration: <?xml
842
// - Everthing else is unknown to tinyxml.
845
const char* xmlHeader = { "<?xml" };
846
const char* commentHeader = { "<!--" };
847
const char* dtdHeader = { "<!" };
848
const char* cdataHeader = { "<![CDATA[" };
850
if ( StringEqual( p, xmlHeader, true, encoding ) )
853
TIXML_LOG( "XML parsing Declaration\n" );
855
returnNode = new TiXmlDeclaration();
857
else if ( StringEqual( p, commentHeader, false, encoding ) )
860
TIXML_LOG( "XML parsing Comment\n" );
862
returnNode = new TiXmlComment();
864
else if ( StringEqual( p, cdataHeader, false, encoding ) )
867
TIXML_LOG( "XML parsing CDATA\n" );
869
TiXmlText* text = new TiXmlText( "" );
870
text->SetCDATA( true );
873
else if ( StringEqual( p, dtdHeader, false, encoding ) )
876
TIXML_LOG( "XML parsing Unknown(1)\n" );
878
returnNode = new TiXmlUnknown();
880
else if ( IsAlpha( *(p+1), encoding )
884
TIXML_LOG( "XML parsing Element\n" );
886
returnNode = new TiXmlElement( "" );
891
TIXML_LOG( "XML parsing Unknown(2)\n" );
893
returnNode = new TiXmlUnknown();
898
// Set the parent, so it can report errors
899
returnNode->parent = this;
904
doc->SetError( TIXML_ERROR_OUT_OF_MEMORY, 0, 0, TIXML_ENCODING_UNKNOWN );
911
void TiXmlElement::StreamIn (std::istream * in, TIXML_STRING * tag)
913
// We're called with some amount of pre-parsing. That is, some of "this"
914
// element is in "tag". Go ahead and stream to the closing ">"
920
TiXmlDocument* document = GetDocument();
922
document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
931
if ( tag->length() < 3 ) return;
933
// Okay...if we are a "/>" tag, then we're done. We've read a complete tag.
934
// If not, identify and stream.
936
if ( tag->at( tag->length() - 1 ) == '>'
937
&& tag->at( tag->length() - 2 ) == '/' )
942
else if ( tag->at( tag->length() - 1 ) == '>' )
944
// There is more. Could be:
946
// cdata text (which looks like another node)
951
StreamWhiteSpace( in, tag );
954
if ( in->good() && in->peek() != '<' )
957
TiXmlText text( "" );
958
text.StreamIn( in, tag );
960
// What follows text is a closing tag or another node.
961
// Go around again and figure it out.
965
// We now have either a closing tag...or another node.
966
// We should be at a "<", regardless.
967
if ( !in->good() ) return;
968
assert( in->peek() == '<' );
969
int tagIndex = (int) tag->length();
971
bool closingTag = false;
972
bool firstCharFound = false;
982
TiXmlDocument* document = GetDocument();
984
document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
994
// Early out if we find the CDATA id.
995
if ( c == '[' && tag->size() >= 9 )
997
size_t len = tag->size();
998
const char* start = tag->c_str() + len - 9;
999
if ( strcmp( start, "<![CDATA[" ) == 0 ) {
1000
assert( !closingTag );
1005
if ( !firstCharFound && c != '<' && !IsWhiteSpace( c ) )
1007
firstCharFound = true;
1012
// If it was a closing tag, then read in the closing '>' to clean up the input stream.
1013
// If it was not, the streaming will be done by the tag.
1022
TiXmlDocument* document = GetDocument();
1024
document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
1030
// We are done, once we've found our closing tag.
1035
// If not a closing tag, id it, and stream.
1036
const char* tagloc = tag->c_str() + tagIndex;
1037
TiXmlNode* node = Identify( tagloc, TIXML_DEFAULT_ENCODING );
1040
node->StreamIn( in, tag );
1044
// No return: go around from the beginning: text, closing tag, or node.
1051
const char* TiXmlElement::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding )
1053
p = SkipWhiteSpace( p, encoding );
1054
TiXmlDocument* document = GetDocument();
1058
if ( document ) document->SetError( TIXML_ERROR_PARSING_ELEMENT, 0, 0, encoding );
1064
data->Stamp( p, encoding );
1065
location = data->Cursor();
1070
if ( document ) document->SetError( TIXML_ERROR_PARSING_ELEMENT, p, data, encoding );
1074
p = SkipWhiteSpace( p+1, encoding );
1077
const char* pErr = p;
1079
p = ReadName( p, &value, encoding );
1082
if ( document ) document->SetError( TIXML_ERROR_FAILED_TO_READ_ELEMENT_NAME, pErr, data, encoding );
1086
TIXML_STRING endTag ("</");
1090
// Check for and read attributes. Also look for an empty
1091
// tag or an end tag.
1095
p = SkipWhiteSpace( p, encoding );
1098
if ( document ) document->SetError( TIXML_ERROR_READING_ATTRIBUTES, pErr, data, encoding );
1107
if ( document ) document->SetError( TIXML_ERROR_PARSING_EMPTY, p, data, encoding );
1112
else if ( *p == '>' )
1114
// Done with attributes (if there were any.)
1115
// Read the value -- which can include other
1116
// elements -- read the end tag, and return.
1118
p = ReadValue( p, data, encoding ); // Note this is an Element method, and will set the error if one happens.
1122
// We should find the end tag now
1123
if ( StringEqual( p, endTag.c_str(), false, encoding ) )
1125
p += endTag.length();
1130
if ( document ) document->SetError( TIXML_ERROR_READING_END_TAG, p, data, encoding );
1136
// Try to read an attribute:
1137
TiXmlAttribute* attrib = new TiXmlAttribute();
1140
if ( document ) document->SetError( TIXML_ERROR_OUT_OF_MEMORY, pErr, data, encoding );
1144
attrib->SetDocument( document );
1146
p = attrib->Parse( p, data, encoding );
1150
if ( document ) document->SetError( TIXML_ERROR_PARSING_ELEMENT, pErr, data, encoding );
1155
// Handle the strange case of double attributes:
1156
#ifdef TIXML_USE_STL
1157
TiXmlAttribute* node = attributeSet.Find( attrib->NameTStr() );
1159
TiXmlAttribute* node = attributeSet.Find( attrib->Name() );
1163
node->SetValue( attrib->Value() );
1168
attributeSet.Add( attrib );
1175
const char* TiXmlElement::ReadValue( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding )
1177
TiXmlDocument* document = GetDocument();
1179
// Read in text and elements in any order.
1180
const char* pWithWhiteSpace = p;
1181
p = SkipWhiteSpace( p, encoding );
1187
// Take what we have, make a text element.
1188
TiXmlText* textNode = new TiXmlText( "" );
1192
if ( document ) document->SetError( TIXML_ERROR_OUT_OF_MEMORY, 0, 0, encoding );
1196
if ( TiXmlBase::IsWhiteSpaceCondensed() )
1198
p = textNode->Parse( p, data, encoding );
1202
// Special case: we want to keep the white space
1203
// so that leading spaces aren't removed.
1204
p = textNode->Parse( pWithWhiteSpace, data, encoding );
1207
if ( !textNode->Blank() )
1208
LinkEndChild( textNode );
1215
// Have we hit a new element or an end tag? This could also be
1216
// a TiXmlText in the "CDATA" style.
1217
if ( StringEqual( p, "</", false, encoding ) )
1223
TiXmlNode* node = Identify( p, encoding );
1226
p = node->Parse( p, data, encoding );
1227
LinkEndChild( node );
1235
pWithWhiteSpace = p;
1236
p = SkipWhiteSpace( p, encoding );
1241
if ( document ) document->SetError( TIXML_ERROR_READING_ELEMENT_VALUE, 0, 0, encoding );
1247
#ifdef TIXML_USE_STL
1248
void TiXmlUnknown::StreamIn( std::istream * in, TIXML_STRING * tag )
1250
while ( in->good() )
1255
TiXmlDocument* document = GetDocument();
1257
document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
1272
const char* TiXmlUnknown::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding )
1274
TiXmlDocument* document = GetDocument();
1275
p = SkipWhiteSpace( p, encoding );
1279
data->Stamp( p, encoding );
1280
location = data->Cursor();
1282
if ( !p || !*p || *p != '<' )
1284
if ( document ) document->SetError( TIXML_ERROR_PARSING_UNKNOWN, p, data, encoding );
1290
while ( p && *p && *p != '>' )
1298
if ( document ) document->SetError( TIXML_ERROR_PARSING_UNKNOWN, 0, 0, encoding );
1305
#ifdef TIXML_USE_STL
1306
void TiXmlComment::StreamIn( std::istream * in, TIXML_STRING * tag )
1308
while ( in->good() )
1313
TiXmlDocument* document = GetDocument();
1315
document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
1322
&& tag->at( tag->length() - 2 ) == '-'
1323
&& tag->at( tag->length() - 3 ) == '-' )
1333
const char* TiXmlComment::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding )
1335
TiXmlDocument* document = GetDocument();
1338
p = SkipWhiteSpace( p, encoding );
1342
data->Stamp( p, encoding );
1343
location = data->Cursor();
1345
const char* startTag = "<!--";
1346
const char* endTag = "-->";
1348
if ( !StringEqual( p, startTag, false, encoding ) )
1350
document->SetError( TIXML_ERROR_PARSING_COMMENT, p, data, encoding );
1353
p += strlen( startTag );
1354
p = ReadText( p, &value, false, endTag, false, encoding );
1359
const char* TiXmlAttribute::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding )
1361
p = SkipWhiteSpace( p, encoding );
1362
if ( !p || !*p ) return 0;
1366
// tabsize = document->TabSize();
1370
data->Stamp( p, encoding );
1371
location = data->Cursor();
1373
// Read the name, the '=' and the value.
1374
const char* pErr = p;
1375
p = ReadName( p, &name, encoding );
1378
if ( document ) document->SetError( TIXML_ERROR_READING_ATTRIBUTES, pErr, data, encoding );
1381
p = SkipWhiteSpace( p, encoding );
1382
if ( !p || !*p || *p != '=' )
1384
if ( document ) document->SetError( TIXML_ERROR_READING_ATTRIBUTES, p, data, encoding );
1389
p = SkipWhiteSpace( p, encoding );
1392
if ( document ) document->SetError( TIXML_ERROR_READING_ATTRIBUTES, p, data, encoding );
1397
const char SINGLE_QUOTE = '\'';
1398
const char DOUBLE_QUOTE = '\"';
1400
if ( *p == SINGLE_QUOTE )
1403
end = "\'"; // single quote in string
1404
p = ReadText( p, &value, false, end, false, encoding );
1406
else if ( *p == DOUBLE_QUOTE )
1409
end = "\""; // double quote in string
1410
p = ReadText( p, &value, false, end, false, encoding );
1414
// All attribute values should be in single or double quotes.
1415
// But this is such a common error that the parser will try
1416
// its best, even without them.
1418
while ( p && *p // existence
1419
&& !IsWhiteSpace( *p ) && *p != '\n' && *p != '\r' // whitespace
1420
&& *p != '/' && *p != '>' ) // tag end
1422
if ( *p == SINGLE_QUOTE || *p == DOUBLE_QUOTE ) {
1423
// [ 1451649 ] Attribute values with trailing quotes not handled correctly
1424
// We did not have an opening quote but seem to have a
1425
// closing one. Give up and throw an error.
1426
if ( document ) document->SetError( TIXML_ERROR_READING_ATTRIBUTES, p, data, encoding );
1436
#ifdef TIXML_USE_STL
1437
void TiXmlText::StreamIn( std::istream * in, TIXML_STRING * tag )
1439
while ( in->good() )
1442
if ( !cdata && (c == '<' ) )
1448
TiXmlDocument* document = GetDocument();
1450
document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
1455
in->get(); // "commits" the peek made above
1457
if ( cdata && c == '>' && tag->size() >= 3 ) {
1458
size_t len = tag->size();
1459
if ( (*tag)[len-2] == ']' && (*tag)[len-3] == ']' ) {
1460
// terminator of cdata.
1468
const char* TiXmlText::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding )
1471
TiXmlDocument* document = GetDocument();
1475
data->Stamp( p, encoding );
1476
location = data->Cursor();
1479
const char* const startTag = "<![CDATA[";
1480
const char* const endTag = "]]>";
1482
if ( cdata || StringEqual( p, startTag, false, encoding ) )
1486
if ( !StringEqual( p, startTag, false, encoding ) )
1488
document->SetError( TIXML_ERROR_PARSING_CDATA, p, data, encoding );
1491
p += strlen( startTag );
1493
// Keep all the white space, ignore the encoding, etc.
1495
&& !StringEqual( p, endTag, false, encoding )
1503
p = ReadText( p, &dummy, false, endTag, false, encoding );
1508
bool ignoreWhite = true;
1510
const char* end = "<";
1511
p = ReadText( p, &value, ignoreWhite, end, false, encoding );
1513
return p-1; // don't truncate the '<'
1518
#ifdef TIXML_USE_STL
1519
void TiXmlDeclaration::StreamIn( std::istream * in, TIXML_STRING * tag )
1521
while ( in->good() )
1526
TiXmlDocument* document = GetDocument();
1528
document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
1542
const char* TiXmlDeclaration::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding _encoding )
1544
p = SkipWhiteSpace( p, _encoding );
1545
// Find the beginning, find the end, and look for
1546
// the stuff in-between.
1547
TiXmlDocument* document = GetDocument();
1548
if ( !p || !*p || !StringEqual( p, "<?xml", true, _encoding ) )
1550
if ( document ) document->SetError( TIXML_ERROR_PARSING_DECLARATION, 0, 0, _encoding );
1555
data->Stamp( p, _encoding );
1556
location = data->Cursor();
1572
p = SkipWhiteSpace( p, _encoding );
1573
if ( StringEqual( p, "version", true, _encoding ) )
1575
TiXmlAttribute attrib;
1576
p = attrib.Parse( p, data, _encoding );
1577
version = attrib.Value();
1579
else if ( StringEqual( p, "encoding", true, _encoding ) )
1581
TiXmlAttribute attrib;
1582
p = attrib.Parse( p, data, _encoding );
1583
encoding = attrib.Value();
1585
else if ( StringEqual( p, "standalone", true, _encoding ) )
1587
TiXmlAttribute attrib;
1588
p = attrib.Parse( p, data, _encoding );
1589
standalone = attrib.Value();
1593
// Read over whatever it is.
1594
while( p && *p && *p != '>' && !IsWhiteSpace( *p ) )
1601
bool TiXmlText::Blank() const
1603
for ( unsigned i=0; i<value.length(); i++ )
1604
if ( !IsWhiteSpace( value[i] ) )