2
www.sourceforge.net/projects/tinyxml
3
Original code (2.0 and earlier )copyright (c) 2000-2002 Lee Thomason (www.grinninglizard.com)
5
This software is provided 'as-is', without any express or implied
6
warranty. In no event will the authors be held liable for any
7
damages arising from the use of this software.
9
Permission is granted to anyone to use this software for any
10
purpose, including commercial applications, and to alter it and
11
redistribute it freely, subject to the following restrictions:
13
1. The origin of this software must not be misrepresented; you must
14
not claim that you wrote the original software. If you use this
15
software in a product, an acknowledgment in the product documentation
16
would be appreciated but is not required.
18
2. Altered source versions must be plainly marked as such, and
19
must not be misrepresented as being the original software.
21
3. This notice may not be removed or altered from any source
30
//#define DEBUG_PARSER
31
#if defined( DEBUG_PARSER )
32
# if defined( DEBUG ) && defined( _MSC_VER )
34
# define TIXML_LOG OutputDebugString
36
# define TIXML_LOG printf
40
// Note tha "PutString" hardcodes the same list. This
41
// is less flexible than it appears. Changing the entries
42
// or order will break putstring.
43
TiXmlBase::Entity TiXmlBase::entity[ NUM_ENTITY ] =
48
{ """, 6, '\"' },
52
// Bunch of unicode info at:
53
// http://www.unicode.org/faq/utf_bom.html
54
// Including the basic of this table, which determines the #bytes in the
55
// sequence from the lead byte. 1 placed for invalid sequences --
56
// although the result will be junk, pass it through as much as possible.
57
// Beware of the non-characters in UTF-8:
58
// ef bb bf (Microsoft "lead bytes")
62
const unsigned char TIXML_UTF_LEAD_0 = 0xefU;
63
const unsigned char TIXML_UTF_LEAD_1 = 0xbbU;
64
const unsigned char TIXML_UTF_LEAD_2 = 0xbfU;
66
const int TiXmlBase::utf8ByteTable[256] =
68
// 0 1 2 3 4 5 6 7 8 9 a b c d e f
69
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x00
70
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x10
71
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x20
72
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x30
73
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x40
74
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x50
75
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x60
76
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x70 End of ASCII range
77
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x80 0x80 to 0xc1 invalid
78
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x90
79
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0xa0
80
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0xb0
81
1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // 0xc0 0xc2 to 0xdf 2 byte
82
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // 0xd0
83
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // 0xe0 0xe0 to 0xef 3 byte
84
4, 4, 4, 4, 4, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 // 0xf0 0xf0 to 0xf4 4 byte, 0xf5 and higher invalid
88
void TiXmlBase::ConvertUTF32ToUTF8( unsigned long input, char* output, int* length )
90
const unsigned long BYTE_MASK = 0xBF;
91
const unsigned long BYTE_MARK = 0x80;
92
const unsigned long FIRST_BYTE_MARK[7] = { 0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC };
96
else if ( input < 0x800 )
98
else if ( input < 0x10000 )
100
else if ( input < 0x200000 )
103
{ *length = 0; return; } // This code won't covert this correctly anyway.
107
// Scary scary fall throughs.
112
*output = (char)((input | BYTE_MARK) & BYTE_MASK);
116
*output = (char)((input | BYTE_MARK) & BYTE_MASK);
120
*output = (char)((input | BYTE_MARK) & BYTE_MASK);
124
*output = (char)(input | FIRST_BYTE_MARK[*length]);
129
/*static*/ int TiXmlBase::IsAlpha( unsigned char anyByte, TiXmlEncoding /*encoding*/ )
131
// This will only work for low-ascii, everything else is assumed to be a valid
132
// letter. I'm not sure this is the best approach, but it is quite tricky trying
133
// to figure out alhabetical vs. not across encoding. So take a very
134
// conservative approach.
136
// if ( encoding == TIXML_ENCODING_UTF8 )
139
return isalpha( anyByte );
141
return 1; // What else to do? The unicode set is huge...get the english ones right.
145
// return isalpha( anyByte );
150
/*static*/ int TiXmlBase::IsAlphaNum( unsigned char anyByte, TiXmlEncoding /*encoding*/ )
152
// This will only work for low-ascii, everything else is assumed to be a valid
153
// letter. I'm not sure this is the best approach, but it is quite tricky trying
154
// to figure out alhabetical vs. not across encoding. So take a very
155
// conservative approach.
157
// if ( encoding == TIXML_ENCODING_UTF8 )
160
return isalnum( anyByte );
162
return 1; // What else to do? The unicode set is huge...get the english ones right.
166
// return isalnum( anyByte );
171
class TiXmlParsingData
173
friend class TiXmlDocument;
175
void Stamp( const char* now, TiXmlEncoding encoding );
177
const TiXmlCursor& Cursor() { return cursor; }
180
// Only used by the document!
181
TiXmlParsingData( const char* start, int _tabsize, int row, int col )
196
void TiXmlParsingData::Stamp( const char* now, TiXmlEncoding encoding )
200
// Do nothing if the tabsize is 0.
206
// Get the current row, column.
207
int row = cursor.row;
208
int col = cursor.col;
209
const char* p = stamp;
214
// Treat p as unsigned, so we have a happy compiler.
215
const unsigned char* pU = (const unsigned char*)p;
217
// Code contributed by Fletcher Dunn: (modified by lee)
220
// We *should* never get here, but in case we do, don't
221
// advance past the terminating null character, ever
225
// bump down to the next line
231
// Check for \r\n sequence, and treat this as a single character
238
// bump down to the next line
245
// Check for \n\r sequence, and treat this as a single
246
// character. (Yes, this bizarre thing does occur still
247
// on some arcane platforms...)
257
// Skip to next tab stop
258
col = (col / tabsize + 1) * tabsize;
261
case TIXML_UTF_LEAD_0:
262
if ( encoding == TIXML_ENCODING_UTF8 )
264
if ( *(p+1) && *(p+2) )
266
// In these cases, don't advance the column. These are
268
if ( *(pU+1)==TIXML_UTF_LEAD_1 && *(pU+2)==TIXML_UTF_LEAD_2 )
270
else if ( *(pU+1)==0xbfU && *(pU+2)==0xbeU )
272
else if ( *(pU+1)==0xbfU && *(pU+2)==0xbfU )
275
{ p +=3; ++col; } // A normal character.
286
if ( encoding == TIXML_ENCODING_UTF8 )
288
// Eat the 1 to 4 byte utf8 character.
289
int step = TiXmlBase::utf8ByteTable[*((const unsigned char*)p)];
291
step = 1; // Error case from bad encoding, but handle gracefully.
294
// Just advance one column, of course.
307
assert( cursor.row >= -1 );
308
assert( cursor.col >= -1 );
314
const char* TiXmlBase::SkipWhiteSpace( const char* p, TiXmlEncoding encoding )
320
if ( encoding == TIXML_ENCODING_UTF8 )
324
const unsigned char* pU = (const unsigned char*)p;
326
// Skip the stupid Microsoft UTF-8 Byte order marks
327
if ( *(pU+0)==TIXML_UTF_LEAD_0
328
&& *(pU+1)==TIXML_UTF_LEAD_1
329
&& *(pU+2)==TIXML_UTF_LEAD_2 )
334
else if(*(pU+0)==TIXML_UTF_LEAD_0
341
else if(*(pU+0)==TIXML_UTF_LEAD_0
349
if ( IsWhiteSpace( *p ) || *p == '\n' || *p =='\r' ) // Still using old rules for white space.
357
while ( *p && IsWhiteSpace( *p ) || *p == '\n' || *p =='\r' )
365
/*static*/ bool TiXmlBase::StreamWhiteSpace( std::istream * in, TIXML_STRING * tag )
369
if ( !in->good() ) return false;
372
// At this scope, we can't get to a document. So fail silently.
373
if ( !IsWhiteSpace( c ) || c <= 0 )
376
*tag += (char) in->get();
380
/*static*/ bool TiXmlBase::StreamTo( std::istream * in, int character, TIXML_STRING * tag )
382
//assert( character > 0 && character < 128 ); // else it won't work in utf-8
386
if ( c == character )
388
if ( c <= 0 ) // Silent failure: can't get document at this scope
398
// One of TinyXML's more performance demanding functions. Try to keep the memory overhead down. The
399
// "assign" optimization removes over 10% of the execution time.
401
const char* TiXmlBase::ReadName( const char* p, TIXML_STRING * name, TiXmlEncoding encoding )
403
// Oddly, not supported on some comilers,
409
// Names start with letters or underscores.
410
// Of course, in unicode, tinyxml has no idea what a letter *is*. The
411
// algorithm is generous.
413
// After that, they can be letters, underscores, numbers,
414
// hyphens, or colons. (Colons are valid ony for namespaces,
415
// but tinyxml can't tell namespaces from names.)
417
&& ( IsAlpha( (unsigned char) *p, encoding ) || *p == '_' ) )
419
const char* start = p;
421
&& ( IsAlphaNum( (unsigned char ) *p, encoding )
427
//(*name) += *p; // expensive
431
name->assign( start, p-start );
438
const char* TiXmlBase::GetEntity( const char* p, char* value, int* length, TiXmlEncoding encoding )
440
// Presume an entity, and pull it out.
445
if ( *(p+1) && *(p+1) == '#' && *(p+2) )
447
unsigned long ucs = 0;
454
if ( !*(p+3) ) return 0;
457
q = strchr( q, ';' );
459
if ( !q || !*q ) return 0;
466
if ( *q >= '0' && *q <= '9' )
467
ucs += mult * (*q - '0');
468
else if ( *q >= 'a' && *q <= 'f' )
469
ucs += mult * (*q - 'a' + 10);
470
else if ( *q >= 'A' && *q <= 'F' )
471
ucs += mult * (*q - 'A' + 10 );
481
if ( !*(p+2) ) return 0;
484
q = strchr( q, ';' );
486
if ( !q || !*q ) return 0;
493
if ( *q >= '0' && *q <= '9' )
494
ucs += mult * (*q - '0');
501
if ( encoding == TIXML_ENCODING_UTF8 )
503
// convert the UCS to UTF-8
504
ConvertUTF32ToUTF8( ucs, value, length );
511
return p + delta + 1;
514
// Now try to match it.
515
for( i=0; i<NUM_ENTITY; ++i )
517
if ( strncmp( entity[i].str, p, entity[i].strLength ) == 0 )
519
assert( strlen( entity[i].str ) == entity[i].strLength );
520
*value = entity[i].chr;
522
return ( p + entity[i].strLength );
526
// So it wasn't an entity, its unrecognized, or something like that.
527
*value = *p; // Don't put back the last one, since we return it!
528
//*length = 1; // Leave unrecognized entities - this doesn't really work.
529
// Just writes strange XML.
534
bool TiXmlBase::StringEqual( const char* p,
537
TiXmlEncoding encoding )
551
while ( *q && *tag && ToLower( *q, encoding ) == ToLower( *tag, encoding ) )
562
while ( *q && *tag && *q == *tag )
568
if ( *tag == 0 ) // Have we found the end of the tag, and everything equal?
574
const char* TiXmlBase::ReadText( const char* p,
578
bool caseInsensitive,
579
TiXmlEncoding encoding )
582
if ( !trimWhiteSpace // certain tags always keep whitespace
583
|| !condenseWhiteSpace ) // if true, whitespace is always kept
585
// Keep all the white space.
587
&& !StringEqual( p, endTag, caseInsensitive, encoding )
591
char cArr[4] = { 0, 0, 0, 0 };
592
p = GetChar( p, cArr, &len, encoding );
593
text->append( cArr, len );
598
bool whitespace = false;
600
// Remove leading white space:
601
p = SkipWhiteSpace( p, encoding );
603
&& !StringEqual( p, endTag, caseInsensitive, encoding ) )
605
if ( *p == '\r' || *p == '\n' )
610
else if ( IsWhiteSpace( *p ) )
617
// If we've found whitespace, add it before the
618
// new character. Any whitespace just becomes a space.
625
char cArr[4] = { 0, 0, 0, 0 };
626
p = GetChar( p, cArr, &len, encoding );
628
(*text) += cArr[0]; // more efficient
630
text->append( cArr, len );
635
p += strlen( endTag );
641
void TiXmlDocument::StreamIn( std::istream * in, TIXML_STRING * tag )
643
// The basic issue with a document is that we don't know what we're
644
// streaming. Read something presumed to be a tag (and hope), then
645
// identify it, and call the appropriate stream method on the tag.
647
// This "pre-streaming" will never read the closing ">" so the
648
// sub-tag can orient itself.
650
if ( !StreamTo( in, '<', tag ) )
652
SetError( TIXML_ERROR_PARSING_EMPTY, 0, 0, TIXML_ENCODING_UNKNOWN );
658
int tagIndex = (int) tag->length();
659
while ( in->good() && in->peek() != '>' )
664
SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
672
// We now have something we presume to be a node of
673
// some sort. Identify it, and call the node to
674
// continue streaming.
675
TiXmlNode* node = Identify( tag->c_str() + tagIndex, TIXML_DEFAULT_ENCODING );
679
node->StreamIn( in, tag );
680
bool isElement = node->ToElement() != 0;
684
// If this is the root element, we're done. Parsing will be
685
// done by the >> operator.
693
SetError( TIXML_ERROR, 0, 0, TIXML_ENCODING_UNKNOWN );
698
// We should have returned sooner.
699
SetError( TIXML_ERROR, 0, 0, TIXML_ENCODING_UNKNOWN );
704
const char* TiXmlDocument::Parse( const char* p, TiXmlParsingData* prevData, TiXmlEncoding encoding )
708
// Parse away, at the document level. Since a document
709
// contains nothing but other tags, most of what happens
710
// here is skipping white space.
713
SetError( TIXML_ERROR_DOCUMENT_EMPTY, 0, 0, TIXML_ENCODING_UNKNOWN );
717
// Note that, for a document, this needs to come
718
// before the while space skip, so that parsing
719
// starts from the pointer we are given.
723
location.row = prevData->cursor.row;
724
location.col = prevData->cursor.col;
731
TiXmlParsingData data( p, TabSize(), location.row, location.col );
732
location = data.Cursor();
734
if ( encoding == TIXML_ENCODING_UNKNOWN )
736
// Check for the Microsoft UTF-8 lead bytes.
737
const unsigned char* pU = (const unsigned char*)p;
738
if ( *(pU+0) && *(pU+0) == TIXML_UTF_LEAD_0
739
&& *(pU+1) && *(pU+1) == TIXML_UTF_LEAD_1
740
&& *(pU+2) && *(pU+2) == TIXML_UTF_LEAD_2 )
742
encoding = TIXML_ENCODING_UTF8;
743
useMicrosoftBOM = true;
747
p = SkipWhiteSpace( p, encoding );
750
SetError( TIXML_ERROR_DOCUMENT_EMPTY, 0, 0, TIXML_ENCODING_UNKNOWN );
756
TiXmlNode* node = Identify( p, encoding );
759
p = node->Parse( p, &data, encoding );
760
LinkEndChild( node );
767
// Did we get encoding info?
768
if ( encoding == TIXML_ENCODING_UNKNOWN
769
&& node->ToDeclaration() )
771
TiXmlDeclaration* dec = node->ToDeclaration();
772
const char* enc = dec->Encoding();
776
encoding = TIXML_ENCODING_UTF8;
777
else if ( StringEqual( enc, "UTF-8", true, TIXML_ENCODING_UNKNOWN ) )
778
encoding = TIXML_ENCODING_UTF8;
779
else if ( StringEqual( enc, "UTF8", true, TIXML_ENCODING_UNKNOWN ) )
780
encoding = TIXML_ENCODING_UTF8; // incorrect, but be nice
782
encoding = TIXML_ENCODING_LEGACY;
785
p = SkipWhiteSpace( p, encoding );
790
SetError( TIXML_ERROR_DOCUMENT_EMPTY, 0, 0, encoding );
798
void TiXmlDocument::SetError( int err, const char* pError, TiXmlParsingData* data, TiXmlEncoding encoding )
800
// The first error in a chain is more accurate - don't set again!
804
assert( err > 0 && err < TIXML_ERROR_STRING_COUNT );
807
errorDesc = errorString[ errorId ];
809
errorLocation.Clear();
810
if ( pError && data )
812
data->Stamp( pError, encoding );
813
errorLocation = data->Cursor();
818
TiXmlNode* TiXmlNode::Identify( const char* p, TiXmlEncoding encoding )
820
TiXmlNode* returnNode = 0;
822
p = SkipWhiteSpace( p, encoding );
823
if( !p || !*p || *p != '<' )
828
TiXmlDocument* doc = GetDocument();
829
p = SkipWhiteSpace( p, encoding );
836
// What is this thing?
837
// - Elements start with a letter or underscore, but xml is reserved.
839
// - Decleration: <?xml
840
// - Everthing else is unknown to tinyxml.
843
const char* xmlHeader = { "<?xml" };
844
const char* commentHeader = { "<!--" };
845
const char* dtdHeader = { "<!" };
846
const char* cdataHeader = { "<![CDATA[" };
848
if ( StringEqual( p, xmlHeader, true, encoding ) )
851
TIXML_LOG( "XML parsing Declaration\n" );
853
returnNode = new TiXmlDeclaration();
855
else if ( StringEqual( p, commentHeader, false, encoding ) )
858
TIXML_LOG( "XML parsing Comment\n" );
860
returnNode = new TiXmlComment();
862
else if ( StringEqual( p, cdataHeader, false, encoding ) )
865
TIXML_LOG( "XML parsing CDATA\n" );
867
TiXmlText* text = new TiXmlText( "" );
868
text->SetCDATA( true );
871
else if ( StringEqual( p, dtdHeader, false, encoding ) )
874
TIXML_LOG( "XML parsing Unknown(1)\n" );
876
returnNode = new TiXmlUnknown();
878
else if ( IsAlpha( *(p+1), encoding )
882
TIXML_LOG( "XML parsing Element\n" );
884
returnNode = new TiXmlElement( "" );
889
TIXML_LOG( "XML parsing Unknown(2)\n" );
891
returnNode = new TiXmlUnknown();
896
// Set the parent, so it can report errors
897
returnNode->parent = this;
902
doc->SetError( TIXML_ERROR_OUT_OF_MEMORY, 0, 0, TIXML_ENCODING_UNKNOWN );
909
void TiXmlElement::StreamIn (std::istream * in, TIXML_STRING * tag)
911
// We're called with some amount of pre-parsing. That is, some of "this"
912
// element is in "tag". Go ahead and stream to the closing ">"
918
TiXmlDocument* document = GetDocument();
920
document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
929
if ( tag->length() < 3 ) return;
931
// Okay...if we are a "/>" tag, then we're done. We've read a complete tag.
932
// If not, identify and stream.
934
if ( tag->at( tag->length() - 1 ) == '>'
935
&& tag->at( tag->length() - 2 ) == '/' )
940
else if ( tag->at( tag->length() - 1 ) == '>' )
942
// There is more. Could be:
944
// cdata text (which looks like another node)
949
StreamWhiteSpace( in, tag );
952
if ( in->good() && in->peek() != '<' )
955
TiXmlText text( "" );
956
text.StreamIn( in, tag );
958
// What follows text is a closing tag or another node.
959
// Go around again and figure it out.
963
// We now have either a closing tag...or another node.
964
// We should be at a "<", regardless.
965
if ( !in->good() ) return;
966
assert( in->peek() == '<' );
967
int tagIndex = (int) tag->length();
969
bool closingTag = false;
970
bool firstCharFound = false;
980
TiXmlDocument* document = GetDocument();
982
document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
992
// Early out if we find the CDATA id.
993
if ( c == '[' && tag->size() >= 9 )
995
size_t len = tag->size();
996
const char* start = tag->c_str() + len - 9;
997
if ( strcmp( start, "<![CDATA[" ) == 0 ) {
998
assert( !closingTag );
1003
if ( !firstCharFound && c != '<' && !IsWhiteSpace( c ) )
1005
firstCharFound = true;
1010
// If it was a closing tag, then read in the closing '>' to clean up the input stream.
1011
// If it was not, the streaming will be done by the tag.
1020
TiXmlDocument* document = GetDocument();
1022
document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
1028
// We are done, once we've found our closing tag.
1033
// If not a closing tag, id it, and stream.
1034
const char* tagloc = tag->c_str() + tagIndex;
1035
TiXmlNode* node = Identify( tagloc, TIXML_DEFAULT_ENCODING );
1038
node->StreamIn( in, tag );
1042
// No return: go around from the beginning: text, closing tag, or node.
1049
const char* TiXmlElement::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding )
1051
p = SkipWhiteSpace( p, encoding );
1052
TiXmlDocument* document = GetDocument();
1056
if ( document ) document->SetError( TIXML_ERROR_PARSING_ELEMENT, 0, 0, encoding );
1062
data->Stamp( p, encoding );
1063
location = data->Cursor();
1068
if ( document ) document->SetError( TIXML_ERROR_PARSING_ELEMENT, p, data, encoding );
1072
p = SkipWhiteSpace( p+1, encoding );
1075
const char* pErr = p;
1077
p = ReadName( p, &value, encoding );
1080
if ( document ) document->SetError( TIXML_ERROR_FAILED_TO_READ_ELEMENT_NAME, pErr, data, encoding );
1084
TIXML_STRING endTag ("</");
1088
// Check for and read attributes. Also look for an empty
1089
// tag or an end tag.
1093
p = SkipWhiteSpace( p, encoding );
1096
if ( document ) document->SetError( TIXML_ERROR_READING_ATTRIBUTES, pErr, data, encoding );
1105
if ( document ) document->SetError( TIXML_ERROR_PARSING_EMPTY, p, data, encoding );
1110
else if ( *p == '>' )
1112
// Done with attributes (if there were any.)
1113
// Read the value -- which can include other
1114
// elements -- read the end tag, and return.
1116
p = ReadValue( p, data, encoding ); // Note this is an Element method, and will set the error if one happens.
1118
// We were looking for the end tag, but found nothing.
1119
// Fix for [ 1663758 ] Failure to report error on bad XML
1120
if ( document ) document->SetError( TIXML_ERROR_READING_END_TAG, p, data, encoding );
1124
// We should find the end tag now
1125
if ( StringEqual( p, endTag.c_str(), false, encoding ) )
1127
p += endTag.length();
1132
if ( document ) document->SetError( TIXML_ERROR_READING_END_TAG, p, data, encoding );
1138
// Try to read an attribute:
1139
TiXmlAttribute* attrib = new TiXmlAttribute();
1142
if ( document ) document->SetError( TIXML_ERROR_OUT_OF_MEMORY, pErr, data, encoding );
1146
attrib->SetDocument( document );
1148
p = attrib->Parse( p, data, encoding );
1152
if ( document ) document->SetError( TIXML_ERROR_PARSING_ELEMENT, pErr, data, encoding );
1157
// Handle the strange case of double attributes:
1158
#ifdef TIXML_USE_STL
1159
TiXmlAttribute* node = attributeSet.Find( attrib->NameTStr() );
1161
TiXmlAttribute* node = attributeSet.Find( attrib->Name() );
1165
node->SetValue( attrib->Value() );
1170
attributeSet.Add( attrib );
1177
const char* TiXmlElement::ReadValue( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding )
1179
TiXmlDocument* document = GetDocument();
1181
// Read in text and elements in any order.
1182
const char* pWithWhiteSpace = p;
1183
p = SkipWhiteSpace( p, encoding );
1189
// Take what we have, make a text element.
1190
TiXmlText* textNode = new TiXmlText( "" );
1194
if ( document ) document->SetError( TIXML_ERROR_OUT_OF_MEMORY, 0, 0, encoding );
1198
if ( TiXmlBase::IsWhiteSpaceCondensed() )
1200
p = textNode->Parse( p, data, encoding );
1204
// Special case: we want to keep the white space
1205
// so that leading spaces aren't removed.
1206
p = textNode->Parse( pWithWhiteSpace, data, encoding );
1209
if ( !textNode->Blank() )
1210
LinkEndChild( textNode );
1217
// Have we hit a new element or an end tag? This could also be
1218
// a TiXmlText in the "CDATA" style.
1219
if ( StringEqual( p, "</", false, encoding ) )
1225
TiXmlNode* node = Identify( p, encoding );
1228
p = node->Parse( p, data, encoding );
1229
LinkEndChild( node );
1237
pWithWhiteSpace = p;
1238
p = SkipWhiteSpace( p, encoding );
1243
if ( document ) document->SetError( TIXML_ERROR_READING_ELEMENT_VALUE, 0, 0, encoding );
1249
#ifdef TIXML_USE_STL
1250
void TiXmlUnknown::StreamIn( std::istream * in, TIXML_STRING * tag )
1252
while ( in->good() )
1257
TiXmlDocument* document = GetDocument();
1259
document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
1274
const char* TiXmlUnknown::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding )
1276
TiXmlDocument* document = GetDocument();
1277
p = SkipWhiteSpace( p, encoding );
1281
data->Stamp( p, encoding );
1282
location = data->Cursor();
1284
if ( !p || !*p || *p != '<' )
1286
if ( document ) document->SetError( TIXML_ERROR_PARSING_UNKNOWN, p, data, encoding );
1292
while ( p && *p && *p != '>' )
1300
if ( document ) document->SetError( TIXML_ERROR_PARSING_UNKNOWN, 0, 0, encoding );
1307
#ifdef TIXML_USE_STL
1308
void TiXmlComment::StreamIn( std::istream * in, TIXML_STRING * tag )
1310
while ( in->good() )
1315
TiXmlDocument* document = GetDocument();
1317
document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
1324
&& tag->at( tag->length() - 2 ) == '-'
1325
&& tag->at( tag->length() - 3 ) == '-' )
1335
const char* TiXmlComment::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding )
1337
TiXmlDocument* document = GetDocument();
1340
p = SkipWhiteSpace( p, encoding );
1344
data->Stamp( p, encoding );
1345
location = data->Cursor();
1347
const char* startTag = "<!--";
1348
const char* endTag = "-->";
1350
if ( !StringEqual( p, startTag, false, encoding ) )
1352
document->SetError( TIXML_ERROR_PARSING_COMMENT, p, data, encoding );
1355
p += strlen( startTag );
1357
// [ 1475201 ] TinyXML parses entities in comments
1358
// Oops - ReadText doesn't work, because we don't want to parse the entities.
1359
// p = ReadText( p, &value, false, endTag, false, encoding );
1361
// from the XML spec:
1363
[Definition: Comments may appear anywhere in a document outside other markup; in addition,
1364
they may appear within the document type declaration at places allowed by the grammar.
1365
They are not part of the document's character data; an XML processor MAY, but need not,
1366
make it possible for an application to retrieve the text of comments. For compatibility,
1367
the string "--" (double-hyphen) MUST NOT occur within comments.] Parameter entity
1368
references MUST NOT be recognized within comments.
1370
An example of a comment:
1372
<!-- declarations for <head> & <body> -->
1376
// Keep all the white space.
1377
while ( p && *p && !StringEqual( p, endTag, false, encoding ) )
1379
value.append( p, 1 );
1383
p += strlen( endTag );
1389
const char* TiXmlAttribute::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding )
1391
p = SkipWhiteSpace( p, encoding );
1392
if ( !p || !*p ) return 0;
1396
// tabsize = document->TabSize();
1400
data->Stamp( p, encoding );
1401
location = data->Cursor();
1403
// Read the name, the '=' and the value.
1404
const char* pErr = p;
1405
p = ReadName( p, &name, encoding );
1408
if ( document ) document->SetError( TIXML_ERROR_READING_ATTRIBUTES, pErr, data, encoding );
1411
p = SkipWhiteSpace( p, encoding );
1412
if ( !p || !*p || *p != '=' )
1414
if ( document ) document->SetError( TIXML_ERROR_READING_ATTRIBUTES, p, data, encoding );
1419
p = SkipWhiteSpace( p, encoding );
1422
if ( document ) document->SetError( TIXML_ERROR_READING_ATTRIBUTES, p, data, encoding );
1427
const char SINGLE_QUOTE = '\'';
1428
const char DOUBLE_QUOTE = '\"';
1430
if ( *p == SINGLE_QUOTE )
1433
end = "\'"; // single quote in string
1434
p = ReadText( p, &value, false, end, false, encoding );
1436
else if ( *p == DOUBLE_QUOTE )
1439
end = "\""; // double quote in string
1440
p = ReadText( p, &value, false, end, false, encoding );
1444
// All attribute values should be in single or double quotes.
1445
// But this is such a common error that the parser will try
1446
// its best, even without them.
1448
while ( p && *p // existence
1449
&& !IsWhiteSpace( *p ) && *p != '\n' && *p != '\r' // whitespace
1450
&& *p != '/' && *p != '>' ) // tag end
1452
if ( *p == SINGLE_QUOTE || *p == DOUBLE_QUOTE ) {
1453
// [ 1451649 ] Attribute values with trailing quotes not handled correctly
1454
// We did not have an opening quote but seem to have a
1455
// closing one. Give up and throw an error.
1456
if ( document ) document->SetError( TIXML_ERROR_READING_ATTRIBUTES, p, data, encoding );
1466
#ifdef TIXML_USE_STL
1467
void TiXmlText::StreamIn( std::istream * in, TIXML_STRING * tag )
1469
while ( in->good() )
1472
if ( !cdata && (c == '<' ) )
1478
TiXmlDocument* document = GetDocument();
1480
document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
1485
in->get(); // "commits" the peek made above
1487
if ( cdata && c == '>' && tag->size() >= 3 ) {
1488
size_t len = tag->size();
1489
if ( (*tag)[len-2] == ']' && (*tag)[len-3] == ']' ) {
1490
// terminator of cdata.
1498
const char* TiXmlText::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding )
1501
TiXmlDocument* document = GetDocument();
1505
data->Stamp( p, encoding );
1506
location = data->Cursor();
1509
const char* const startTag = "<![CDATA[";
1510
const char* const endTag = "]]>";
1512
if ( cdata || StringEqual( p, startTag, false, encoding ) )
1516
if ( !StringEqual( p, startTag, false, encoding ) )
1518
document->SetError( TIXML_ERROR_PARSING_CDATA, p, data, encoding );
1521
p += strlen( startTag );
1523
// Keep all the white space, ignore the encoding, etc.
1525
&& !StringEqual( p, endTag, false, encoding )
1533
p = ReadText( p, &dummy, false, endTag, false, encoding );
1538
bool ignoreWhite = true;
1540
const char* end = "<";
1541
p = ReadText( p, &value, ignoreWhite, end, false, encoding );
1543
return p-1; // don't truncate the '<'
1548
#ifdef TIXML_USE_STL
1549
void TiXmlDeclaration::StreamIn( std::istream * in, TIXML_STRING * tag )
1551
while ( in->good() )
1556
TiXmlDocument* document = GetDocument();
1558
document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
1572
const char* TiXmlDeclaration::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding _encoding )
1574
p = SkipWhiteSpace( p, _encoding );
1575
// Find the beginning, find the end, and look for
1576
// the stuff in-between.
1577
TiXmlDocument* document = GetDocument();
1578
if ( !p || !*p || !StringEqual( p, "<?xml", true, _encoding ) )
1580
if ( document ) document->SetError( TIXML_ERROR_PARSING_DECLARATION, 0, 0, _encoding );
1585
data->Stamp( p, _encoding );
1586
location = data->Cursor();
1602
p = SkipWhiteSpace( p, _encoding );
1603
if ( StringEqual( p, "version", true, _encoding ) )
1605
TiXmlAttribute attrib;
1606
p = attrib.Parse( p, data, _encoding );
1607
version = attrib.Value();
1609
else if ( StringEqual( p, "encoding", true, _encoding ) )
1611
TiXmlAttribute attrib;
1612
p = attrib.Parse( p, data, _encoding );
1613
encoding = attrib.Value();
1615
else if ( StringEqual( p, "standalone", true, _encoding ) )
1617
TiXmlAttribute attrib;
1618
p = attrib.Parse( p, data, _encoding );
1619
standalone = attrib.Value();
1623
// Read over whatever it is.
1624
while( p && *p && *p != '>' && !IsWhiteSpace( *p ) )
1631
bool TiXmlText::Blank() const
1633
for ( unsigned i=0; i<value.length(); i++ )
1634
if ( !IsWhiteSpace( value[i] ) )